From f694bb71b7ea7841a5b5db3d884dfda5a3f78023 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 9 Sep 2022 11:30:06 -0500
Subject: Strip number suffix from instance name to consolidate services that
 traces are spread over (#13729)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The problem with many services is that it makes it hard to find which service has the trace you want, see https://github.com/jaegertracing/jaeger-ui/issues/985

Previously, we split traces out into services based on their instance name like `matrix.org client_reader-1`, etc but there are many worker instances of the same `client_reader` so there is a lot to click through.

With this PR, all of the traces are just collected under the worker type like `client_reader`, `event_persister` 😇

Note: A Synapse worker instance name is an opaque string with the number convention only being our own thing for the `matrix.org` deployment. But seems pretty sensible to group things this way.
---
 synapse/logging/opentracing.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'synapse')

diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 482316a1ff..adf3f54770 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -203,6 +203,9 @@ if TYPE_CHECKING:
 
 # Helper class
 
+# Matches the number suffix in an instance name like "matrix.org client_reader-8"
+STRIP_INSTANCE_NUMBER_SUFFIX_REGEX = re.compile(r"[_-]?\d+$")
+
 
 class _DummyTagNames:
     """wrapper of opentracings tags. We need to have them if we
@@ -441,9 +444,17 @@ def init_tracer(hs: "HomeServer") -> None:
 
     from jaeger_client.metrics.prometheus import PrometheusMetricsFactory
 
+    # Instance names are opaque strings but by stripping off the number suffix,
+    # we can get something that looks like a "worker type", e.g.
+    # "client_reader-1" -> "client_reader" so we don't spread the traces across
+    # so many services.
+    instance_name_by_type = re.sub(
+        STRIP_INSTANCE_NUMBER_SUFFIX_REGEX, "", hs.get_instance_name()
+    )
+
     config = JaegerConfig(
         config=hs.config.tracing.jaeger_config,
-        service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}",
+        service_name=f"{hs.config.server.server_name} {instance_name_by_type}",
         scope_manager=LogContextScopeManager(),
         metrics_factory=PrometheusMetricsFactory(),
     )
-- 
cgit 1.5.1


From a911ffb42cc88adc8084a04acf6fd651efba278f Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 9 Sep 2022 11:31:37 -0500
Subject: Tag trace with instance name (#13761)

We tag the Synapse instance name so that it's an easy jumping off point into the logs. Can also be used to filter for an instance that is under load.

As suggested by @clokep and @reivilibre in,

 - https://github.com/matrix-org/synapse/pull/13729#discussion_r964719258
 - https://github.com/matrix-org/synapse/pull/13729#discussion_r964733578
---
 changelog.d/13761.misc         | 1 +
 synapse/api/auth.py            | 7 +++++++
 synapse/logging/opentracing.py | 6 ++++--
 3 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13761.misc

(limited to 'synapse')

diff --git a/changelog.d/13761.misc b/changelog.d/13761.misc
new file mode 100644
index 0000000000..f7aa8c459a
--- /dev/null
+++ b/changelog.d/13761.misc
@@ -0,0 +1 @@
+Tag traces with the instance name to be able to easily jump into the right logs and filter traces by instance.
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 8e54ef84b2..4a75eb6b21 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -32,6 +32,7 @@ from synapse.appservice import ApplicationService
 from synapse.http import get_request_user_agent
 from synapse.http.site import SynapseRequest
 from synapse.logging.opentracing import (
+    SynapseTags,
     active_span,
     force_tracing,
     start_active_span,
@@ -161,6 +162,12 @@ class Auth:
                 parent_span.set_tag(
                     "authenticated_entity", requester.authenticated_entity
                 )
+                # We tag the Synapse instance name so that it's an easy jumping
+                # off point into the logs. Can also be used to filter for an
+                # instance that is under load.
+                parent_span.set_tag(
+                    SynapseTags.INSTANCE_NAME, self.hs.get_instance_name()
+                )
                 parent_span.set_tag("user_id", requester.user.to_string())
                 if requester.device_id is not None:
                     parent_span.set_tag("device_id", requester.device_id)
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index adf3f54770..ca2735dd6d 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -298,6 +298,8 @@ class SynapseTags:
     # Whether the sync response has new data to be returned to the client.
     SYNC_RESULT = "sync.new_data"
 
+    INSTANCE_NAME = "instance_name"
+
     # incoming HTTP request ID  (as written in the logs)
     REQUEST_ID = "request_id"
 
@@ -1043,11 +1045,11 @@ def trace_servlet(
             # with JsonResource).
             scope.span.set_operation_name(request.request_metrics.name)
 
-            # set the tags *after* the servlet completes, in case it decided to
-            # prioritise the span (tags will get dropped on unprioritised spans)
             request_tags[
                 SynapseTags.REQUEST_TAG
             ] = request.request_metrics.start_context.tag
 
+            # set the tags *after* the servlet completes, in case it decided to
+            # prioritise the span (tags will get dropped on unprioritised spans)
             for k, v in request_tags.items():
                 scope.span.set_tag(k, v)
-- 
cgit 1.5.1


From 4c4889cac0e6f7df4689287b9fddea1bf8b15b7f Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Fri, 9 Sep 2022 19:00:21 +0100
Subject: Concurrently collect room unread counts for push badges (#13765)

Most of the time this function is heavily cached, but when that isn't
the case fetching the counts room by room slows down push delivery on
users with many (thousands) of rooms.

Signed off by Nick @ Beeper.
---
 changelog.d/13765.misc     |  1 +
 synapse/push/push_tools.py | 13 ++++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13765.misc

(limited to 'synapse')

diff --git a/changelog.d/13765.misc b/changelog.d/13765.misc
new file mode 100644
index 0000000000..fdda5cf3b6
--- /dev/null
+++ b/changelog.d/13765.misc
@@ -0,0 +1 @@
+Concurrently fetch room push actions when calculating badge counts. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py
index 6661887d9f..658bf373b7 100644
--- a/synapse/push/push_tools.py
+++ b/synapse/push/push_tools.py
@@ -17,6 +17,7 @@ from synapse.events import EventBase
 from synapse.push.presentable_names import calculate_room_name, name_from_member_event
 from synapse.storage.controllers import StorageControllers
 from synapse.storage.databases.main import DataStore
+from synapse.util.async_helpers import concurrently_execute
 
 
 async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -> int:
@@ -25,13 +26,19 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -
 
     badge = len(invites)
 
-    for room_id in joins:
-        notifs = await (
-            store.get_unread_event_push_actions_by_room_for_user(
+    room_notifs = []
+
+    async def get_room_unread_count(room_id: str) -> None:
+        room_notifs.append(
+            await store.get_unread_event_push_actions_by_room_for_user(
                 room_id,
                 user_id,
             )
         )
+
+    await concurrently_execute(get_room_unread_count, joins, 10)
+
+    for notifs in room_notifs:
         if notifs.notify_count == 0:
             continue
 
-- 
cgit 1.5.1


From ebfeac7c5ded851a2639911ec6adf9d0fcdb029a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 12 Sep 2022 11:03:42 +0100
Subject: Check if Rust lib needs rebuilding. (#13759)

This protects against the common mistake of failing to remember to rebuild Rust code after making changes.
---
 changelog.d/13759.misc         |  1 +
 rust/Cargo.toml                |  4 ++
 rust/build.rs                  | 45 ++++++++++++++++++++++
 rust/src/lib.rs                | 10 ++++-
 stubs/synapse/synapse_rust.pyi |  1 +
 synapse/__init__.py            |  5 +++
 synapse/util/rust.py           | 84 ++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13759.misc
 create mode 100644 rust/build.rs
 create mode 100644 synapse/util/rust.py

(limited to 'synapse')

diff --git a/changelog.d/13759.misc b/changelog.d/13759.misc
new file mode 100644
index 0000000000..f91c512483
--- /dev/null
+++ b/changelog.d/13759.misc
@@ -0,0 +1 @@
+Add a check for editable installs if the Rust library needs rebuilding.
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 0a9760cafc..deddf3cec2 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -19,3 +19,7 @@ name = "synapse.synapse_rust"
 
 [dependencies]
 pyo3 = { version = "0.16.5", features = ["extension-module", "macros", "abi3", "abi3-py37"] }
+
+[build-dependencies]
+blake2 = "0.10.4"
+hex = "0.4.3"
diff --git a/rust/build.rs b/rust/build.rs
new file mode 100644
index 0000000000..2117975e56
--- /dev/null
+++ b/rust/build.rs
@@ -0,0 +1,45 @@
+//! This build script calculates the hash of all files in the `src/`
+//! directory and adds it as an environment variable during build time.
+//!
+//! This is used so that the python code can detect when the built native module
+//! does not match the source in-tree, helping to detect the case where the
+//! source has been updated but the library hasn't been rebuilt.
+
+use std::path::PathBuf;
+
+use blake2::{Blake2b512, Digest};
+
+fn main() -> Result<(), std::io::Error> {
+    let mut dirs = vec![PathBuf::from("src")];
+
+    let mut paths = Vec::new();
+    while let Some(path) = dirs.pop() {
+        let mut entries = std::fs::read_dir(path)?
+            .map(|res| res.map(|e| e.path()))
+            .collect::<Result<Vec<_>, std::io::Error>>()?;
+
+        entries.sort();
+
+        for entry in entries {
+            if entry.is_dir() {
+                dirs.push(entry)
+            } else {
+                paths.push(entry.to_str().expect("valid rust paths").to_string());
+            }
+        }
+    }
+
+    paths.sort();
+
+    let mut hasher = Blake2b512::new();
+
+    for path in paths {
+        let bytes = std::fs::read(path)?;
+        hasher.update(bytes);
+    }
+
+    let hex_digest = hex::encode(hasher.finalize());
+    println!("cargo:rustc-env=SYNAPSE_RUST_DIGEST={hex_digest}");
+
+    Ok(())
+}
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 142fc2ed93..ba42465fb8 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -1,5 +1,13 @@
 use pyo3::prelude::*;
 
+/// Returns the hash of all the rust source files at the time it was compiled.
+///
+/// Used by python to detect if the rust library is outdated.
+#[pyfunction]
+fn get_rust_file_digest() -> &'static str {
+    env!("SYNAPSE_RUST_DIGEST")
+}
+
 /// Formats the sum of two numbers as string.
 #[pyfunction]
 #[pyo3(text_signature = "(a, b, /)")]
@@ -11,6 +19,6 @@ fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
 #[pymodule]
 fn synapse_rust(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
-
+    m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
     Ok(())
 }
diff --git a/stubs/synapse/synapse_rust.pyi b/stubs/synapse/synapse_rust.pyi
index 5b51ba05d7..8658d3138f 100644
--- a/stubs/synapse/synapse_rust.pyi
+++ b/stubs/synapse/synapse_rust.pyi
@@ -1 +1,2 @@
 def sum_as_string(a: int, b: int) -> str: ...
+def get_rust_file_digest() -> str: ...
diff --git a/synapse/__init__.py b/synapse/__init__.py
index b1369aca8f..1bed6393bd 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -20,6 +20,8 @@ import json
 import os
 import sys
 
+from synapse.util.rust import check_rust_lib_up_to_date
+
 # Check that we're not running on an unsupported Python version.
 if sys.version_info < (3, 7):
     print("Synapse requires Python 3.7 or above.")
@@ -78,3 +80,6 @@ if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     from synapse.util.patch_inline_callbacks import do_patch
 
     do_patch()
+
+
+check_rust_lib_up_to_date()
diff --git a/synapse/util/rust.py b/synapse/util/rust.py
new file mode 100644
index 0000000000..30ecb9ffd9
--- /dev/null
+++ b/synapse/util/rust.py
@@ -0,0 +1,84 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+from hashlib import blake2b
+
+import synapse
+from synapse.synapse_rust import get_rust_file_digest
+
+
+def check_rust_lib_up_to_date() -> None:
+    """For editable installs check if the rust library is outdated and needs to
+    be rebuilt.
+    """
+
+    if not _dist_is_editable():
+        return
+
+    synapse_dir = os.path.dirname(synapse.__file__)
+    synapse_root = os.path.abspath(os.path.join(synapse_dir, ".."))
+
+    # Double check we've not gone into site-packages...
+    if os.path.basename(synapse_root) == "site-packages":
+        return
+
+    # ... and it looks like the root of a python project.
+    if not os.path.exists("pyproject.toml"):
+        return
+
+    # Get the hash of all Rust source files
+    hash = _hash_rust_files_in_directory(os.path.join(synapse_root, "rust", "src"))
+
+    if hash != get_rust_file_digest():
+        raise Exception("Rust module outdated. Please rebuild using `poetry install`")
+
+
+def _hash_rust_files_in_directory(directory: str) -> str:
+    """Get the hash of all files in a directory (recursively)"""
+
+    directory = os.path.abspath(directory)
+
+    paths = []
+
+    dirs = [directory]
+    while dirs:
+        dir = dirs.pop()
+        with os.scandir(dir) as d:
+            for entry in d:
+                if entry.is_dir():
+                    dirs.append(entry.path)
+                else:
+                    paths.append(entry.path)
+
+    # We sort to make sure that we get a consistent and well-defined ordering.
+    paths.sort()
+
+    hasher = blake2b()
+
+    for path in paths:
+        with open(os.path.join(directory, path), "rb") as f:
+            hasher.update(f.read())
+
+    return hasher.hexdigest()
+
+
+def _dist_is_editable() -> bool:
+    """Is distribution an editable install?"""
+    for path_item in sys.path:
+        egg_link = os.path.join(path_item, "matrix-synapse.egg-link")
+        if os.path.isfile(egg_link):
+            return True
+    return False
-- 
cgit 1.5.1


From da41a7cd618d11b05c2c04c39068fd4b1e1b7894 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Mon, 12 Sep 2022 12:58:33 +0100
Subject: Remove check current state membership up to date (#13745)

* Remove checks for membership column in current_state_events
* Add schema script to force through the
  `current_state_events_membership` background job

Contributed by Nick @ Beeper (@fizzadar).
---
 changelog.d/13745.misc                             |   1 +
 synapse/storage/databases/main/roommember.py       | 202 +++++----------------
 ...force_update_current_state_events_membership.py |  52 ++++++
 3 files changed, 100 insertions(+), 155 deletions(-)
 create mode 100644 changelog.d/13745.misc
 create mode 100644 synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py

(limited to 'synapse')

diff --git a/changelog.d/13745.misc b/changelog.d/13745.misc
new file mode 100644
index 0000000000..e97a789c0e
--- /dev/null
+++ b/changelog.d/13745.misc
@@ -0,0 +1 @@
+Remove old queries to join room memberships to current state events. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 6e1ff5626b..fdb4684e12 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -32,10 +32,7 @@ import attr
 
 from synapse.api.constants import EventTypes, Membership
 from synapse.metrics import LaterGauge
-from synapse.metrics.background_process_metrics import (
-    run_as_background_process,
-    wrap_as_background_process,
-)
+from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
@@ -91,16 +88,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         # at a time. Keyed by room_id.
         self._joined_host_linearizer = Linearizer("_JoinedHostsCache")
 
-        # Is the current_state_events.membership up to date? Or is the
-        # background update still running?
-        self._current_state_events_membership_up_to_date = False
-
-        txn = db_conn.cursor(
-            txn_name="_check_safe_current_state_events_membership_updated"
-        )
-        self._check_safe_current_state_events_membership_updated_txn(txn)
-        txn.close()
-
         if (
             self.hs.config.worker.run_background_tasks
             and self.hs.config.metrics.metrics_flags.known_servers
@@ -157,34 +144,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         self._known_servers_count = max([count, 1])
         return self._known_servers_count
 
-    def _check_safe_current_state_events_membership_updated_txn(
-        self, txn: LoggingTransaction
-    ) -> None:
-        """Checks if it is safe to assume the new current_state_events
-        membership column is up to date
-        """
-
-        pending_update = self.db_pool.simple_select_one_txn(
-            txn,
-            table="background_updates",
-            keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
-            retcols=["update_name"],
-            allow_none=True,
-        )
-
-        self._current_state_events_membership_up_to_date = not pending_update
-
-        # If the update is still running, reschedule to run.
-        if pending_update:
-            self._clock.call_later(
-                15.0,
-                run_as_background_process,
-                "_check_safe_current_state_events_membership_updated",
-                self.db_pool.runInteraction,
-                "_check_safe_current_state_events_membership_updated",
-                self._check_safe_current_state_events_membership_updated_txn,
-            )
-
     @cached(max_entries=100000, iterable=True)
     async def get_users_in_room(self, room_id: str) -> List[str]:
         """
@@ -212,31 +171,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         `get_current_hosts_in_room()` and so we can re-use the cache but it's
         not horrible to have here either.
         """
-        # If we can assume current_state_events.membership is up to date
-        # then we can avoid a join, which is a Very Good Thing given how
-        # frequently this function gets called.
-        if self._current_state_events_membership_up_to_date:
-            sql = """
-                SELECT c.state_key FROM current_state_events as c
-                /* Get the depth of the event from the events table */
-                INNER JOIN events AS e USING (event_id)
-                WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
-                /* Sorted by lowest depth first */
-                ORDER BY e.depth ASC;
-            """
-        else:
-            sql = """
-                SELECT c.state_key FROM room_memberships as m
-                /* Get the depth of the event from the events table */
-                INNER JOIN events AS e USING (event_id)
-                INNER JOIN current_state_events as c
-                ON m.event_id = c.event_id
-                AND m.room_id = c.room_id
-                AND m.user_id = c.state_key
-                WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
-                /* Sorted by lowest depth first */
-                ORDER BY e.depth ASC;
-            """
+        sql = """
+            SELECT c.state_key FROM current_state_events as c
+            /* Get the depth of the event from the events table */
+            INNER JOIN events AS e USING (event_id)
+            WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
+            /* Sorted by lowest depth first */
+            ORDER BY e.depth ASC;
+        """
 
         txn.execute(sql, (room_id, Membership.JOIN))
         return [r[0] for r in txn]
@@ -353,28 +295,14 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             # We do this all in one transaction to keep the cache small.
             # FIXME: get rid of this when we have room_stats
 
-            # If we can assume current_state_events.membership is up to date
-            # then we can avoid a join, which is a Very Good Thing given how
-            # frequently this function gets called.
-            if self._current_state_events_membership_up_to_date:
-                # Note, rejected events will have a null membership field, so
-                # we we manually filter them out.
-                sql = """
-                    SELECT count(*), membership FROM current_state_events
-                    WHERE type = 'm.room.member' AND room_id = ?
-                        AND membership IS NOT NULL
-                    GROUP BY membership
-                """
-            else:
-                sql = """
-                    SELECT count(*), m.membership FROM room_memberships as m
-                    INNER JOIN current_state_events as c
-                    ON m.event_id = c.event_id
-                    AND m.room_id = c.room_id
-                    AND m.user_id = c.state_key
-                    WHERE c.type = 'm.room.member' AND c.room_id = ?
-                    GROUP BY m.membership
-                """
+            # Note, rejected events will have a null membership field, so
+            # we we manually filter them out.
+            sql = """
+                SELECT count(*), membership FROM current_state_events
+                WHERE type = 'm.room.member' AND room_id = ?
+                    AND membership IS NOT NULL
+                GROUP BY membership
+            """
 
             txn.execute(sql, (room_id,))
             res: Dict[str, MemberSummary] = {}
@@ -383,30 +311,18 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
             # we order by membership and then fairly arbitrarily by event_id so
             # heroes are consistent
-            if self._current_state_events_membership_up_to_date:
-                # Note, rejected events will have a null membership field, so
-                # we we manually filter them out.
-                sql = """
-                    SELECT state_key, membership, event_id
-                    FROM current_state_events
-                    WHERE type = 'm.room.member' AND room_id = ?
-                        AND membership IS NOT NULL
-                    ORDER BY
-                        CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
-                        event_id ASC
-                    LIMIT ?
-                """
-            else:
-                sql = """
-                    SELECT c.state_key, m.membership, c.event_id
-                    FROM room_memberships as m
-                    INNER JOIN current_state_events as c USING (room_id, event_id)
-                    WHERE c.type = 'm.room.member' AND c.room_id = ?
-                    ORDER BY
-                        CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
-                        c.event_id ASC
-                    LIMIT ?
-                """
+            # Note, rejected events will have a null membership field, so
+            # we we manually filter them out.
+            sql = """
+                SELECT state_key, membership, event_id
+                FROM current_state_events
+                WHERE type = 'm.room.member' AND room_id = ?
+                    AND membership IS NOT NULL
+                ORDER BY
+                    CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+                    event_id ASC
+                LIMIT ?
+            """
 
             # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
             txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
@@ -649,27 +565,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         # We use `current_state_events` here and not `local_current_membership`
         # as a) this gets called with remote users and b) this only gets called
         # for rooms the server is participating in.
-        if self._current_state_events_membership_up_to_date:
-            sql = """
-                SELECT room_id, e.instance_name, e.stream_ordering
-                FROM current_state_events AS c
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    c.type = 'm.room.member'
-                    AND c.state_key = ?
-                    AND c.membership = ?
-            """
-        else:
-            sql = """
-                SELECT room_id, e.instance_name, e.stream_ordering
-                FROM current_state_events AS c
-                INNER JOIN room_memberships AS m USING (room_id, event_id)
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    c.type = 'm.room.member'
-                    AND c.state_key = ?
-                    AND m.membership = ?
-            """
+        sql = """
+            SELECT room_id, e.instance_name, e.stream_ordering
+            FROM current_state_events AS c
+            INNER JOIN events AS e USING (room_id, event_id)
+            WHERE
+                c.type = 'm.room.member'
+                AND c.state_key = ?
+                AND c.membership = ?
+        """
 
         txn.execute(sql, (user_id, Membership.JOIN))
         return frozenset(
@@ -707,27 +611,15 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             user_ids,
         )
 
-        if self._current_state_events_membership_up_to_date:
-            sql = f"""
-                SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
-                FROM current_state_events AS c
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    c.type = 'm.room.member'
-                    AND c.membership = ?
-                    AND {clause}
-            """
-        else:
-            sql = f"""
-                SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
-                FROM current_state_events AS c
-                INNER JOIN room_memberships AS m USING (room_id, event_id)
-                INNER JOIN events AS e USING (room_id, event_id)
-                WHERE
-                    c.type = 'm.room.member'
-                    AND m.membership = ?
-                    AND {clause}
-            """
+        sql = f"""
+            SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
+            FROM current_state_events AS c
+            INNER JOIN events AS e USING (room_id, event_id)
+            WHERE
+                c.type = 'm.room.member'
+                AND c.membership = ?
+                AND {clause}
+        """
 
         txn.execute(sql, [Membership.JOIN] + args)
 
diff --git a/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
new file mode 100644
index 0000000000..b5853d125c
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
@@ -0,0 +1,52 @@
+# Copyright 2022 Beeper
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+Forces through the `current_state_events_membership` background job so checks
+for its completion can be removed.
+
+Note the background job must still remain defined in the database class.
+"""
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+    cur.execute("SELECT update_name FROM background_updates")
+    rows = cur.fetchall()
+    for row in rows:
+        if row[0] == "current_state_events_membership":
+            break
+    # No pending background job so nothing to do here
+    else:
+        return
+
+    # Populate membership field for all current_state_events, this may take
+    # a while but was originally handled via a background update in 2019.
+    cur.execute(
+        """
+        UPDATE current_state_events
+        SET membership = (
+            SELECT membership FROM room_memberships
+            WHERE event_id = current_state_events.event_id
+        )
+        """
+    )
+
+    # Finally, delete the background job because we've handled it above
+    cur.execute(
+        """
+        DELETE FROM background_updates
+        WHERE update_name = 'current_state_events_membership'
+        """
+    )
-- 
cgit 1.5.1


From cdbb6412327b542e0dead792717fe58253291131 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Tue, 13 Sep 2022 08:16:37 +0100
Subject: Add receipts event stream ordering (#13703)

---
 changelog.d/13703.misc                             |  1 +
 synapse/_scripts/synapse_port_db.py                |  2 +
 synapse/storage/databases/main/receipts.py         | 74 +++++++++++++++++++++-
 .../delta/72/05receipts_event_stream_ordering.sql  | 19 ++++++
 4 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13703.misc
 create mode 100644 synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql

(limited to 'synapse')

diff --git a/changelog.d/13703.misc b/changelog.d/13703.misc
new file mode 100644
index 0000000000..685a29b17d
--- /dev/null
+++ b/changelog.d/13703.misc
@@ -0,0 +1 @@
+Add & populate `event_stream_ordering` column on receipts table for future optimisation of push action processing. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 543bba27c2..30983c47fb 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -67,6 +67,7 @@ from synapse.storage.databases.main.media_repository import (
 )
 from synapse.storage.databases.main.presence import PresenceBackgroundUpdateStore
 from synapse.storage.databases.main.pusher import PusherWorkerStore
+from synapse.storage.databases.main.receipts import ReceiptsBackgroundUpdateStore
 from synapse.storage.databases.main.registration import (
     RegistrationBackgroundUpdateStore,
     find_max_generated_user_id_localpart,
@@ -203,6 +204,7 @@ class Store(
     PushRuleStore,
     PusherWorkerStore,
     PresenceBackgroundUpdateStore,
+    ReceiptsBackgroundUpdateStore,
 ):
     def execute(self, f: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]:
         return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs)
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 3838409519..719a12b0ae 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -675,6 +675,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             values={
                 "stream_id": stream_id,
                 "event_id": event_id,
+                "event_stream_ordering": stream_ordering,
                 "data": json_encoder.encode(data),
             },
             # receipts_linearized has a unique constraint on
@@ -830,5 +831,76 @@ class ReceiptsWorkerStore(SQLBaseStore):
         )
 
 
-class ReceiptsStore(ReceiptsWorkerStore):
+class ReceiptsBackgroundUpdateStore(SQLBaseStore):
+    POPULATE_RECEIPT_EVENT_STREAM_ORDERING = "populate_event_stream_ordering"
+
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_update_handler(
+            self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING,
+            self._populate_receipt_event_stream_ordering,
+        )
+
+    async def _populate_receipt_event_stream_ordering(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        def _populate_receipt_event_stream_ordering_txn(
+            txn: LoggingTransaction,
+        ) -> bool:
+
+            if "max_stream_id" in progress:
+                max_stream_id = progress["max_stream_id"]
+            else:
+                txn.execute("SELECT max(stream_id) FROM receipts_linearized")
+                res = txn.fetchone()
+                if res is None or res[0] is None:
+                    return True
+                else:
+                    max_stream_id = res[0]
+
+            start = progress.get("stream_id", 0)
+            stop = start + batch_size
+
+            sql = """
+                UPDATE receipts_linearized
+                SET event_stream_ordering = (
+                    SELECT stream_ordering
+                    FROM events
+                    WHERE event_id = receipts_linearized.event_id
+                )
+                WHERE stream_id >= ? AND stream_id < ?
+            """
+            txn.execute(sql, (start, stop))
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn,
+                self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING,
+                {
+                    "stream_id": stop,
+                    "max_stream_id": max_stream_id,
+                },
+            )
+
+            return stop > max_stream_id
+
+        finished = await self.db_pool.runInteraction(
+            "_remove_devices_from_device_inbox_txn",
+            _populate_receipt_event_stream_ordering_txn,
+        )
+
+        if finished:
+            await self.db_pool.updates._end_background_update(
+                self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING
+            )
+
+        return batch_size
+
+
+class ReceiptsStore(ReceiptsWorkerStore, ReceiptsBackgroundUpdateStore):
     pass
diff --git a/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql b/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql
new file mode 100644
index 0000000000..2a822f4509
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/05receipts_event_stream_ordering.sql
@@ -0,0 +1,19 @@
+/* Copyright 2022 Beeper
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE receipts_linearized ADD COLUMN event_stream_ordering BIGINT;
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('populate_event_stream_ordering', '{}');
-- 
cgit 1.5.1


From b60d47ab2c55580fc1941497964cd33c27838231 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 13 Sep 2022 10:53:11 +0100
Subject: Updates to the schema dump script (#13770)

---
 changelog.d/13770.misc                             |  1 +
 scripts-dev/make_full_schema.sh                    | 48 ++++++++--------------
 .../storage/schema/state/delta/30/state_stream.sql |  4 ++
 3 files changed, 21 insertions(+), 32 deletions(-)
 create mode 100644 changelog.d/13770.misc

(limited to 'synapse')

diff --git a/changelog.d/13770.misc b/changelog.d/13770.misc
new file mode 100644
index 0000000000..36ac91400a
--- /dev/null
+++ b/changelog.d/13770.misc
@@ -0,0 +1 @@
+Update the script which makes full schema dumps.
diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh
index f0e22d4ca2..61394360ce 100755
--- a/scripts-dev/make_full_schema.sh
+++ b/scripts-dev/make_full_schema.sh
@@ -9,8 +9,10 @@
 export PGHOST="localhost"
 POSTGRES_DB_NAME="synapse_full_schema.$$"
 
-SQLITE_FULL_SCHEMA_OUTPUT_FILE="full.sql.sqlite"
-POSTGRES_FULL_SCHEMA_OUTPUT_FILE="full.sql.postgres"
+SQLITE_SCHEMA_FILE="schema.sql.sqlite"
+SQLITE_ROWS_FILE="rows.sql.sqlite"
+POSTGRES_SCHEMA_FILE="full.sql.postgres"
+POSTGRES_ROWS_FILE="rows.sql.postgres"
 
 REQUIRED_DEPS=("matrix-synapse" "psycopg2")
 
@@ -22,7 +24,7 @@ usage() {
   echo "  Username to connect to local postgres instance. The password will be requested"
   echo "  during script execution."
   echo "-c"
-  echo "  CI mode. Enables coverage tracking and prints every command that the script runs."
+  echo "  CI mode. Prints every command that the script runs."
   echo "-o <path>"
   echo "  Directory to output full schema files to."
   echo "-h"
@@ -37,11 +39,6 @@ while getopts "p:co:h" opt; do
     c)
       # Print all commands that are being executed
       set -x
-
-      # Modify required dependencies for coverage
-      REQUIRED_DEPS+=("coverage" "coverage-enable-subprocess")
-
-      COVERAGE=1
       ;;
     o)
       command -v realpath > /dev/null || (echo "The -o flag requires the 'realpath' binary to be installed" && exit 1)
@@ -102,6 +99,7 @@ SQLITE_DB=$TMPDIR/homeserver.db
 POSTGRES_CONFIG=$TMPDIR/postgres.conf
 
 # Ensure these files are delete on script exit
+# TODO: the trap should also drop the temp postgres DB
 trap 'rm -rf $TMPDIR' EXIT
 
 cat > "$SQLITE_CONFIG" <<EOF
@@ -147,48 +145,34 @@ python -m synapse.app.homeserver --generate-keys -c "$SQLITE_CONFIG"
 
 # Make sure the SQLite3 database is using the latest schema and has no pending background update.
 echo "Running db background jobs..."
-synapse/_scripts/update_synapse_database.py --database-config --run-background-updates "$SQLITE_CONFIG"
+synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
 
 # Create the PostgreSQL database.
 echo "Creating postgres database..."
 createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"
 
-echo "Copying data from SQLite3 to Postgres with synapse_port_db..."
-if [ -z "$COVERAGE" ]; then
-  # No coverage needed
-  synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG"
-else
-  # Coverage desired
-  coverage run synapse/_scripts/synapse_port_db.py --sqlite-database "$SQLITE_DB" --postgres-config "$POSTGRES_CONFIG"
-fi
+echo "Running db background jobs..."
+synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
+
 
 # Delete schema_version, applied_schema_deltas and applied_module_schemas tables
 # Also delete any shadow tables from fts4
-# This needs to be done after synapse_port_db is run
 echo "Dropping unwanted db tables..."
 SQL="
 DROP TABLE schema_version;
 DROP TABLE applied_schema_deltas;
 DROP TABLE applied_module_schemas;
-DROP TABLE event_search_content;
-DROP TABLE event_search_segments;
-DROP TABLE event_search_segdir;
-DROP TABLE event_search_docsize;
-DROP TABLE event_search_stat;
-DROP TABLE user_directory_search_content;
-DROP TABLE user_directory_search_segments;
-DROP TABLE user_directory_search_segdir;
-DROP TABLE user_directory_search_docsize;
-DROP TABLE user_directory_search_stat;
 "
 sqlite3 "$SQLITE_DB" <<< "$SQL"
 psql "$POSTGRES_DB_NAME" -w <<< "$SQL"
 
-echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE'..."
-sqlite3 "$SQLITE_DB" ".dump" > "$OUTPUT_DIR/$SQLITE_FULL_SCHEMA_OUTPUT_FILE"
+echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_SCHEMA_FILE' and '$OUTPUT_DIR/$SQLITE_ROWS_FILE'..."
+sqlite3 "$SQLITE_DB" ".schema --indent" > "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE"
+sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE"
 
-echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE'..."
-pg_dump --format=plain --no-tablespaces --no-acl --no-owner $POSTGRES_DB_NAME | sed -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_FULL_SCHEMA_OUTPUT_FILE"
+echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..."
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE"
 
 echo "Cleaning up temporary Postgres database..."
 dropdb $POSTGRES_DB_NAME
diff --git a/synapse/storage/schema/state/delta/30/state_stream.sql b/synapse/storage/schema/state/delta/30/state_stream.sql
index e85699e82e..bdaf8b02d5 100644
--- a/synapse/storage/schema/state/delta/30/state_stream.sql
+++ b/synapse/storage/schema/state/delta/30/state_stream.sql
@@ -26,6 +26,10 @@
  * (event, state) pair, we can use that stream_ordering to identify when
  * the new state was assigned for the event.
  */
+
+/* NB: This table belongs to the `main` logical database; it should not be present
+ * in `state`.
+ */
 CREATE TABLE IF NOT EXISTS ex_outlier_stream(
     event_stream_ordering BIGINT PRIMARY KEY NOT NULL,
     event_id TEXT NOT NULL,
-- 
cgit 1.5.1


From 12dacecabd27680dc77c17724953ecda0801b5ea Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Tue, 13 Sep 2022 16:14:28 +0200
Subject: Make sequence `cache_invalidation_stream_seq` begin at `2` (#13766)

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/13766.bugfix                           |  1 +
 synapse/storage/schema/__init__.py                 |  1 +
 ...8begin_cache_invalidation_seq_at_2.sql.postgres | 23 ++++++++++++++++++++++
 3 files changed, 25 insertions(+)
 create mode 100644 changelog.d/13766.bugfix
 create mode 100644 synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/13766.bugfix b/changelog.d/13766.bugfix
new file mode 100644
index 0000000000..c708e54f9c
--- /dev/null
+++ b/changelog.d/13766.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the `cache_invalidation_stream_seq` sequence would begin at 1 instead of 2.
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 256f745dc0..32cda5e3ba 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -76,6 +76,7 @@ Changes in SCHEMA_VERSION = 72:
     - event_edges.(room_id, is_state) are no longer written to.
     - Tables related to groups are dropped.
     - Unused column application_services_state.last_txn is dropped
+    - Cache invalidation stream id sequence now begins at 2 to match code expectation.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres b/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres
new file mode 100644
index 0000000000..69931fe971
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/08begin_cache_invalidation_seq_at_2.sql.postgres
@@ -0,0 +1,23 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+
+-- The sequence needs to begin at 2 because a bunch of code assumes that
+-- get_next_id_txn will return values >= 2, cf this comment:
+-- https://github.com/matrix-org/synapse/blob/b93bd95e8ab64d27ae26841020f62ee61272a5f2/synapse/storage/util/id_generators.py#L344
+
+SELECT setval('cache_invalidation_stream_seq', (
+    SELECT COALESCE(MAX(last_value), 1) FROM cache_invalidation_stream_seq
+));
-- 
cgit 1.5.1


From 21687ec189f404bcee98ae61b008afc8c5094400 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 14 Sep 2022 08:28:12 +0000
Subject: Fix a long-standing spec compliance bug where Synapse would accept a
 trailing slash on the end of `/get_missing_events` federation requests.
 (#13789)

* Don't accept a trailing slash on the end of /get_missing_events

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/13789.bugfix                          | 1 +
 synapse/federation/transport/server/federation.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13789.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13789.bugfix b/changelog.d/13789.bugfix
new file mode 100644
index 0000000000..9e1e3e0fa7
--- /dev/null
+++ b/changelog.d/13789.bugfix
@@ -0,0 +1 @@
+Fix a long-standing spec compliance bug where Synapse would accept a trailing slash on the end of `/get_missing_events` federation requests.
\ No newline at end of file
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index f7884bfbe0..6bb4659c4c 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -549,8 +549,7 @@ class FederationClientKeysClaimServlet(BaseFederationServerServlet):
 
 
 class FederationGetMissingEventsServlet(BaseFederationServerServlet):
-    # TODO(paul): Why does this path alone end with "/?" optional?
-    PATH = "/get_missing_events/(?P<room_id>[^/]*)/?"
+    PATH = "/get_missing_events/(?P<room_id>[^/]*)"
 
     async def on_POST(
         self,
-- 
cgit 1.5.1


From c73774467edb04c372caecb9e843542654f7610b Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Wed, 14 Sep 2022 10:42:57 +0100
Subject: Fix bug in device list caching when remote users leave rooms (#13749)

When a remote user leaves the last room shared with the homeserver, we
have to mark their device list as unsubscribed, otherwise we would hold
on to a stale device list in our cache. Crucially, the device list would
remain cached even after the remote user rejoined the room, which could
lead to E2EE failures until the next change to the remote user's device
list.

Fixes #13651.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13749.bugfix                      |  1 +
 synapse/handlers/device.py                    | 11 -----------
 synapse/handlers/e2e_keys.py                  | 26 ++++++++++++++++++++++++++
 synapse/storage/controllers/persist_events.py | 20 +++++++++++++++++---
 tests/handlers/test_e2e_keys.py               |  8 +++++++-
 5 files changed, 51 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/13749.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13749.bugfix b/changelog.d/13749.bugfix
new file mode 100644
index 0000000000..8ffafec07b
--- /dev/null
+++ b/changelog.d/13749.bugfix
@@ -0,0 +1 @@
+Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index c5ac169644..901e2310b7 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -45,7 +45,6 @@ from synapse.types import (
     JsonDict,
     StreamKeyType,
     StreamToken,
-    UserID,
     get_domain_from_id,
     get_verify_key_from_cross_signing_key,
 )
@@ -324,8 +323,6 @@ class DeviceHandler(DeviceWorkerHandler):
             self.device_list_updater.incoming_device_list_update,
         )
 
-        hs.get_distributor().observe("user_left_room", self.user_left_room)
-
         # Whether `_handle_new_device_update_async` is currently processing.
         self._handle_new_device_update_is_processing = False
 
@@ -569,14 +566,6 @@ class DeviceHandler(DeviceWorkerHandler):
             StreamKeyType.DEVICE_LIST, position, users=[from_user_id]
         )
 
-    async def user_left_room(self, user: UserID, room_id: str) -> None:
-        user_id = user.to_string()
-        room_ids = await self.store.get_rooms_for_user(user_id)
-        if not room_ids:
-            # We no longer share rooms with this user, so we'll no longer
-            # receive device updates. Mark this in DB.
-            await self.store.mark_remote_user_device_list_as_unsubscribed(user_id)
-
     async def store_dehydrated_device(
         self,
         user_id: str,
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index ec81639c78..8eed63ccf3 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -175,6 +175,32 @@ class E2eKeysHandler:
                     user_ids_not_in_cache,
                     remote_results,
                 ) = await self.store.get_user_devices_from_cache(query_list)
+
+                # Check that the homeserver still shares a room with all cached users.
+                # Note that this check may be slightly racy when a remote user leaves a
+                # room after we have fetched their cached device list. In the worst case
+                # we will do extra federation queries for devices that we had cached.
+                cached_users = set(remote_results.keys())
+                valid_cached_users = (
+                    await self.store.get_users_server_still_shares_room_with(
+                        remote_results.keys()
+                    )
+                )
+                invalid_cached_users = cached_users - valid_cached_users
+                if invalid_cached_users:
+                    # Fix up results. If we get here, there is either a bug in device
+                    # list tracking, or we hit the race mentioned above.
+                    user_ids_not_in_cache.update(invalid_cached_users)
+                    for invalid_user_id in invalid_cached_users:
+                        remote_results.pop(invalid_user_id)
+                    # This log message may be removed if it turns out it's almost
+                    # entirely triggered by races.
+                    logger.error(
+                        "Devices for %s were cached, but the server no longer shares "
+                        "any rooms with them. The cached device lists are stale.",
+                        invalid_cached_users,
+                    )
+
                 for user_id, devices in remote_results.items():
                     user_devices = results.setdefault(user_id, {})
                     for device_id, device in devices.items():
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index dad3731b9b..501dbbc990 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -598,9 +598,9 @@ class EventsPersistenceStorageController:
             # room
             state_delta_for_room: Dict[str, DeltaState] = {}
 
-            # Set of remote users which were in rooms the server has left. We
-            # should check if we still share any rooms and if not we mark their
-            # device lists as stale.
+            # Set of remote users which were in rooms the server has left or who may
+            # have left rooms the server is in. We should check if we still share any
+            # rooms and if not we mark their device lists as stale.
             potentially_left_users: Set[str] = set()
 
             if not backfilled:
@@ -725,6 +725,20 @@ class EventsPersistenceStorageController:
                                 current_state = {}
                                 delta.no_longer_in_room = True
 
+                            # Add all remote users that might have left rooms.
+                            potentially_left_users.update(
+                                user_id
+                                for event_type, user_id in delta.to_delete
+                                if event_type == EventTypes.Member
+                                and not self.is_mine_id(user_id)
+                            )
+                            potentially_left_users.update(
+                                user_id
+                                for event_type, user_id in delta.to_insert.keys()
+                                if event_type == EventTypes.Member
+                                and not self.is_mine_id(user_id)
+                            )
+
                             state_delta_for_room[room_id] = delta
 
             await self.persist_events_store._persist_events_and_state_updates(
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 1e6ad4b663..95698bc275 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -891,6 +891,12 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
             new_callable=mock.MagicMock,
             return_value=make_awaitable(["some_room_id"]),
         )
+        mock_get_users = mock.patch.object(
+            self.store,
+            "get_users_server_still_shares_room_with",
+            new_callable=mock.MagicMock,
+            return_value=make_awaitable({remote_user_id}),
+        )
         mock_request = mock.patch.object(
             self.hs.get_federation_client(),
             "query_user_devices",
@@ -898,7 +904,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
             return_value=make_awaitable(response_body),
         )
 
-        with mock_get_rooms, mock_request as mocked_federation_request:
+        with mock_get_rooms, mock_get_users, mock_request as mocked_federation_request:
             # Make the first query and sanity check it succeeds.
             response_1 = self.get_success(
                 e2e_handler.query_devices(
-- 
cgit 1.5.1


From 51a77e990b7a59e460ab22a2788ab8c3506b9a2c Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 14 Sep 2022 14:16:12 +0100
Subject: Remove incorrect migration file from `state` logical DB (#13788)

* Remove incorrect migration file from `state` logical DB

The table `ex_outlier_stream` is part of the `main` logical DB; it
should not have been created in the `state` logical DB. We remove this
migration now as a tidy-up.

Note: we cannot `DROP TABLE IF EXISTS ex_outlier_stream` in a new
migration, because some (most) instances of Synapse host both of these
logical DBs on the same DB cluster.

* Changelog
---
 changelog.d/13788.misc                             |  1 +
 .../storage/schema/state/delta/30/state_stream.sql | 37 ----------------------
 2 files changed, 1 insertion(+), 37 deletions(-)
 create mode 100644 changelog.d/13788.misc
 delete mode 100644 synapse/storage/schema/state/delta/30/state_stream.sql

(limited to 'synapse')

diff --git a/changelog.d/13788.misc b/changelog.d/13788.misc
new file mode 100644
index 0000000000..7263b1ac52
--- /dev/null
+++ b/changelog.d/13788.misc
@@ -0,0 +1 @@
+Remove an old, incorrect migration file.
diff --git a/synapse/storage/schema/state/delta/30/state_stream.sql b/synapse/storage/schema/state/delta/30/state_stream.sql
deleted file mode 100644
index bdaf8b02d5..0000000000
--- a/synapse/storage/schema/state/delta/30/state_stream.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright 2016 OpenMarket Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-/* We used to create a table called current_state_resets, but this is no
- * longer used and is removed in delta 54.
- */
-
-/* The outlier events that have aquired a state group typically through
- * backfill. This is tracked separately to the events table, as assigning a
- * state group change the position of the existing event in the stream
- * ordering.
- * However since a stream_ordering is assigned in persist_event for the
- * (event, state) pair, we can use that stream_ordering to identify when
- * the new state was assigned for the event.
- */
-
-/* NB: This table belongs to the `main` logical database; it should not be present
- * in `state`.
- */
-CREATE TABLE IF NOT EXISTS ex_outlier_stream(
-    event_stream_ordering BIGINT PRIMARY KEY NOT NULL,
-    event_id TEXT NOT NULL,
-    state_group BIGINT NOT NULL
-);
-- 
cgit 1.5.1


From eaed4e6113f5ed40056fa02ae922cb273d02be6e Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 14 Sep 2022 16:33:54 +0200
Subject: Remove unused method in `synapse.api.auth.Auth`. (#13795)

Clean-up from b19060a29b4f73897847db2aba5d03ec819086e0 (#13094)
and 73af10f419346a5f2d70131ac1ed8e69942edca0 (#13093) which removed
all callers.
---
 changelog.d/13795.misc | 1 +
 synapse/api/auth.py    | 9 ---------
 2 files changed, 1 insertion(+), 9 deletions(-)
 create mode 100644 changelog.d/13795.misc

(limited to 'synapse')

diff --git a/changelog.d/13795.misc b/changelog.d/13795.misc
new file mode 100644
index 0000000000..20d90cc130
--- /dev/null
+++ b/changelog.d/13795.misc
@@ -0,0 +1 @@
+Remove unused method in `synapse.api.auth.Auth`.
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 4a75eb6b21..3d7f986ac7 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -459,15 +459,6 @@ class Auth:
             )
             raise InvalidClientTokenError("Invalid access token passed.")
 
-    def get_appservice_by_req(self, request: SynapseRequest) -> ApplicationService:
-        token = self.get_access_token_from_request(request)
-        service = self.store.get_app_service_by_token(token)
-        if not service:
-            logger.warning("Unrecognised appservice access token.")
-            raise InvalidClientTokenError()
-        request.requester = create_requester(service.sender, app_service=service)
-        return service
-
     async def is_server_admin(self, requester: Requester) -> bool:
         """Check if the given user is a local server admin.
 
-- 
cgit 1.5.1


From cf65433de26ecce551c64e56d9ee8435c99defab Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 14 Sep 2022 15:29:05 +0000
Subject: Fix a memory leak when running the unit tests. (#13798)

---
 changelog.d/13798.misc          |  1 +
 synapse/util/caches/__init__.py |  3 ++-
 synapse/util/metrics.py         | 10 +++++-----
 3 files changed, 8 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13798.misc

(limited to 'synapse')

diff --git a/changelog.d/13798.misc b/changelog.d/13798.misc
new file mode 100644
index 0000000000..e4ec2d77d6
--- /dev/null
+++ b/changelog.d/13798.misc
@@ -0,0 +1 @@
+Fix a memory leak when running the unit tests.
\ No newline at end of file
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 35c0be08b0..f7c3a6794e 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -205,8 +205,9 @@ def register_cache(
         add_resizable_cache(cache_name, resize_callback)
 
     metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
+    metric_name = "cache_%s_%s" % (cache_type, cache_name)
     caches_by_name[cache_name] = cache
-    CACHE_METRIC_REGISTRY.register_hook(metric.collect)
+    CACHE_METRIC_REGISTRY.register_hook(metric_name, metric.collect)
     return metric
 
 
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 9687120ebf..165480bdbe 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -15,7 +15,7 @@
 import logging
 from functools import wraps
 from types import TracebackType
-from typing import Awaitable, Callable, Generator, List, Optional, Type, TypeVar
+from typing import Awaitable, Callable, Dict, Generator, Optional, Type, TypeVar
 
 from prometheus_client import CollectorRegistry, Counter, Metric
 from typing_extensions import Concatenate, ParamSpec, Protocol
@@ -220,21 +220,21 @@ class DynamicCollectorRegistry(CollectorRegistry):
 
     def __init__(self) -> None:
         super().__init__()
-        self._pre_update_hooks: List[Callable[[], None]] = []
+        self._pre_update_hooks: Dict[str, Callable[[], None]] = {}
 
     def collect(self) -> Generator[Metric, None, None]:
         """
         Collects metrics, calling pre-update hooks first.
         """
 
-        for pre_update_hook in self._pre_update_hooks:
+        for pre_update_hook in self._pre_update_hooks.values():
             pre_update_hook()
 
         yield from super().collect()
 
-    def register_hook(self, hook: Callable[[], None]) -> None:
+    def register_hook(self, metric_name: str, hook: Callable[[], None]) -> None:
         """
         Registers a hook that is called before metric collection.
         """
 
-        self._pre_update_hooks.append(hook)
+        self._pre_update_hooks[metric_name] = hook
-- 
cgit 1.5.1


From 6302753012927b63feddc71dd287e2d3554707d4 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 14 Sep 2022 15:53:18 +0000
Subject: Deduplicate `is_server_notices_room`. (#13780)

---
 changelog.d/13780.misc                       |  1 +
 synapse/handlers/message.py                  | 10 +---------
 synapse/handlers/room_member.py              | 10 +---------
 synapse/storage/databases/main/roommember.py | 17 +++++++++++++++++
 4 files changed, 20 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/13780.misc

(limited to 'synapse')

diff --git a/changelog.d/13780.misc b/changelog.d/13780.misc
new file mode 100644
index 0000000000..1bcac51cad
--- /dev/null
+++ b/changelog.d/13780.misc
@@ -0,0 +1 @@
+Deduplicate `is_server_notices_room`.
\ No newline at end of file
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 72157d5a36..e07cda133a 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -752,20 +752,12 @@ class EventCreationHandler:
         if builder.type == EventTypes.Member:
             membership = builder.content.get("membership", None)
             if membership == Membership.JOIN:
-                return await self._is_server_notices_room(builder.room_id)
+                return await self.store.is_server_notice_room(builder.room_id)
             elif membership == Membership.LEAVE:
                 # the user is always allowed to leave (but not kick people)
                 return builder.state_key == requester.user.to_string()
         return False
 
-    async def _is_server_notices_room(self, room_id: str) -> bool:
-        if self.config.servernotices.server_notices_mxid is None:
-            return False
-        is_server_notices_room = await self.store.check_local_user_in_room(
-            user_id=self.config.servernotices.server_notices_mxid, room_id=room_id
-        )
-        return is_server_notices_room
-
     async def assert_accepted_privacy_policy(self, requester: Requester) -> None:
         """Check if a user has accepted the privacy policy
 
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 5d4adf5bfd..8d01f4bf2b 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -837,7 +837,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 old_membership == Membership.INVITE
                 and effective_membership_state == Membership.LEAVE
             ):
-                is_blocked = await self._is_server_notice_room(room_id)
+                is_blocked = await self.store.is_server_notice_room(room_id)
                 if is_blocked:
                     raise SynapseError(
                         HTTPStatus.FORBIDDEN,
@@ -1617,14 +1617,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
         return False
 
-    async def _is_server_notice_room(self, room_id: str) -> bool:
-        if self._server_notices_mxid is None:
-            return False
-        is_server_notices_room = await self.store.check_local_user_in_room(
-            user_id=self._server_notices_mxid, room_id=room_id
-        )
-        return is_server_notices_room
-
 
 class RoomMemberMasterHandler(RoomMemberHandler):
     def __init__(self, hs: "HomeServer"):
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index fdb4684e12..a8d224602a 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -88,6 +88,8 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         # at a time. Keyed by room_id.
         self._joined_host_linearizer = Linearizer("_JoinedHostsCache")
 
+        self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
+
         if (
             self.hs.config.worker.run_background_tasks
             and self.hs.config.metrics.metrics_flags.known_servers
@@ -504,6 +506,21 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return membership == Membership.JOIN
 
+    async def is_server_notice_room(self, room_id: str) -> bool:
+        """
+        Determines whether the given room is a 'Server Notices' room, used for
+        sending server notices to a user.
+
+        This is determined by seeing whether the server notices user is present
+        in the room.
+        """
+        if self._server_notices_mxid is None:
+            return False
+        is_server_notices_room = await self.check_local_user_in_room(
+            user_id=self._server_notices_mxid, room_id=room_id
+        )
+        return is_server_notices_room
+
     async def get_local_current_membership_for_user_in_room(
         self, user_id: str, room_id: str
     ) -> Tuple[Optional[str], Optional[str]]:
-- 
cgit 1.5.1


From f2d12ccabef17faa0bf6b34fbb6d944849afc4d4 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 14 Sep 2022 12:01:42 -0400
Subject: Use partial indices on SQLIte. (#13802)

Partial indices have been supported since SQLite 3.8, but Synapse
now requires >= 3.27, so we can enable support for them.

This requires rebuilding previous indices which were partial on
PostgreSQL, but not on SQLite.
---
 changelog.d/13802.misc                             |  1 +
 synapse/storage/background_updates.py              |  6 +--
 .../storage/databases/main/event_push_actions.py   |  1 -
 .../main/delta/72/09partial_indices.sql.sqlite     | 56 ++++++++++++++++++++++
 4 files changed, 59 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/13802.misc
 create mode 100644 synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/13802.misc b/changelog.d/13802.misc
new file mode 100644
index 0000000000..0d55071326
--- /dev/null
+++ b/changelog.d/13802.misc
@@ -0,0 +1 @@
+Use partial indices on SQLite.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 555b4e77d2..cf1eabc437 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -581,9 +581,6 @@ class BackgroundUpdater:
         def create_index_sqlite(conn: Connection) -> None:
             # Sqlite doesn't support concurrent creation of indexes.
             #
-            # We don't use partial indices on SQLite as it wasn't introduced
-            # until 3.8, and wheezy and CentOS 7 have 3.7
-            #
             # We assume that sqlite doesn't give us invalid indices; however
             # we may still end up with the index existing but the
             # background_updates not having been recorded if synapse got shut
@@ -591,12 +588,13 @@ class BackgroundUpdater:
             # has supported CREATE TABLE|INDEX IF NOT EXISTS since 3.3.0.)
             sql = (
                 "CREATE %(unique)s INDEX IF NOT EXISTS %(name)s ON %(table)s"
-                " (%(columns)s)"
+                " (%(columns)s) %(where_clause)s"
             ) % {
                 "unique": "UNIQUE" if unique else "",
                 "name": index_name,
                 "table": table,
                 "columns": ", ".join(columns),
+                "where_clause": "WHERE " + where_clause if where_clause else "",
             }
 
             c = conn.cursor()
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index f4a07de2a3..3a3fb8c507 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1255,7 +1255,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore):
             table="event_push_actions",
             columns=["highlight", "stream_ordering"],
             where_clause="highlight=0",
-            psql_only=True,
         )
 
     async def get_push_actions_for_user(
diff --git a/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite b/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite
new file mode 100644
index 0000000000..c8dfdf0218
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/09partial_indices.sql.sqlite
@@ -0,0 +1,56 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+-- SQLite needs to rebuild indices which use partial indices on Postgres, but
+-- previously did not use them on SQLite.
+
+-- Drop each index that was added with register_background_index_update AND specified
+-- a where_clause (that existed before this delta).
+
+-- From events_bg_updates.py
+DROP INDEX IF EXISTS event_contains_url_index;
+-- There is also a redactions_censored_redacts index, but that gets dropped.
+DROP INDEX IF EXISTS redactions_have_censored_ts;
+-- There is also a PostgreSQL only index (event_contains_url_index2)
+-- which gets renamed to event_contains_url_index.
+
+-- From roommember.py
+DROP INDEX IF EXISTS room_memberships_user_room_forgotten;
+
+-- From presence.py
+DROP INDEX IF EXISTS presence_stream_state_not_offline_idx;
+
+-- From media_repository.py
+DROP INDEX IF EXISTS local_media_repository_url_idx;
+
+-- From event_push_actions.py
+DROP INDEX IF EXISTS event_push_actions_highlights_index;
+-- There's also a event_push_actions_stream_highlight_index which was previously
+-- PostgreSQL-only.
+
+-- From state.py
+DROP INDEX IF EXISTS current_state_events_member_index;
+
+-- Re-insert the background jobs to re-create the indices.
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7209, 'event_contains_url_index', '{}', NULL),
+  (7209, 'redactions_have_censored_ts_idx', '{}', NULL),
+  (7209, 'room_membership_forgotten_idx', '{}', NULL),
+  (7209, 'presence_stream_not_offline_index', '{}', NULL),
+  (7209, 'local_media_repository_url_idx', '{}', NULL),
+  (7209, 'event_push_actions_highlights_index', '{}', NULL),
+  (7209, 'event_push_actions_stream_highlight_index', '{}', NULL),
+  (7209, 'current_state_members_idx', '{}', NULL)
+ON CONFLICT (update_name) DO NOTHING;
-- 
cgit 1.5.1


From 666ae877292d4747b9441105e3df8558f7a335c0 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 14 Sep 2022 13:11:16 -0400
Subject: Update event push action and receipt tables to support threads.
 (#13753)

Adds a `thread_id` column to the `event_push_actions`, `event_push_actions_staging`,
and `event_push_summary` tables. This will notifications to be segmented by the thread
in a future pull request. The `thread_id` column stores the root event ID or the special
value `"main"`.

The `thread_id` column for `event_push_actions` and `event_push_summary` is
backfilled with `"main"` for all existing rows. New entries into `event_push_actions`
and `event_push_actions_staging` will get the proper thread ID.

`receipts_linearized` and `receipts_graph` also gain a `thread_id` column, which is similar,
except `NULL` is a special value meaning the receipt is "unthreaded".

See MSC3771 and MSC3773 for where this data will be useful.
---
 changelog.d/13753.misc                             |   1 +
 synapse/push/bulk_push_rule_evaluator.py           |  29 ++---
 .../storage/databases/main/event_push_actions.py   | 121 ++++++++++++++++++++-
 synapse/storage/databases/main/events.py           |   4 +-
 synapse/storage/databases/main/receipts.py         |  20 ++++
 synapse/storage/schema/__init__.py                 |   6 +-
 .../main/delta/72/06thread_notifications.sql       |  30 +++++
 .../main/delta/72/07thread_receipts.sql.postgres   |  30 +++++
 .../main/delta/72/07thread_receipts.sql.sqlite     |  70 ++++++++++++
 .../schema/main/delta/72/08thread_receipts.sql     |  20 ++++
 tests/replication/slave/storage/test_events.py     |   1 +
 11 files changed, 312 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/13753.misc
 create mode 100644 synapse/storage/schema/main/delta/72/06thread_notifications.sql
 create mode 100644 synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite
 create mode 100644 synapse/storage/schema/main/delta/72/08thread_receipts.sql

(limited to 'synapse')

diff --git a/changelog.d/13753.misc b/changelog.d/13753.misc
new file mode 100644
index 0000000000..63de2eb9f9
--- /dev/null
+++ b/changelog.d/13753.misc
@@ -0,0 +1 @@
+Prepatory work for storing thread IDs for notifications and receipts.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index d1caf8a0f7..3846fbc5f0 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -198,7 +198,7 @@ class BulkPushRuleEvaluator:
         return pl_event.content if pl_event else {}, sender_level
 
     async def _get_mutual_relations(
-        self, event: EventBase, rules: Iterable[Tuple[PushRule, bool]]
+        self, parent_id: str, rules: Iterable[Tuple[PushRule, bool]]
     ) -> Dict[str, Set[Tuple[str, str]]]:
         """
         Fetch event metadata for events which related to the same event as the given event.
@@ -206,7 +206,7 @@ class BulkPushRuleEvaluator:
         If the given event has no relation information, returns an empty dictionary.
 
         Args:
-            event_id: The event ID which is targeted by relations.
+            parent_id: The event ID which is targeted by relations.
             rules: The push rules which will be processed for this event.
 
         Returns:
@@ -220,12 +220,6 @@ class BulkPushRuleEvaluator:
         if not self._relations_match_enabled:
             return {}
 
-        # If the event does not have a relation, then cannot have any mutual
-        # relations.
-        relation = relation_from_event(event)
-        if not relation:
-            return {}
-
         # Pre-filter to figure out which relation types are interesting.
         rel_types = set()
         for rule, enabled in rules:
@@ -246,9 +240,7 @@ class BulkPushRuleEvaluator:
             return {}
 
         # If any valid rules were found, fetch the mutual relations.
-        return await self.store.get_mutual_event_relations(
-            relation.parent_id, rel_types
-        )
+        return await self.store.get_mutual_event_relations(parent_id, rel_types)
 
     @measure_func("action_for_event_by_user")
     async def action_for_event_by_user(
@@ -281,9 +273,17 @@ class BulkPushRuleEvaluator:
             sender_power_level,
         ) = await self._get_power_levels_and_sender_level(event, context)
 
-        relations = await self._get_mutual_relations(
-            event, itertools.chain(*rules_by_user.values())
-        )
+        relation = relation_from_event(event)
+        # If the event does not have a relation, then cannot have any mutual
+        # relations or thread ID.
+        relations = {}
+        thread_id = "main"
+        if relation:
+            relations = await self._get_mutual_relations(
+                relation.parent_id, itertools.chain(*rules_by_user.values())
+            )
+            if relation.rel_type == RelationTypes.THREAD:
+                thread_id = relation.parent_id
 
         evaluator = PushRuleEvaluatorForEvent(
             event,
@@ -352,6 +352,7 @@ class BulkPushRuleEvaluator:
             event.event_id,
             actions_by_user,
             count_as_unread,
+            thread_id,
         )
 
 
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 3a3fb8c507..6b8668d2dc 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -98,6 +98,7 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
+from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 
@@ -232,6 +233,104 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             replaces_index="event_push_summary_user_rm",
         )
 
+        self.db_pool.updates.register_background_index_update(
+            "event_push_summary_unique_index2",
+            index_name="event_push_summary_unique_index2",
+            table="event_push_summary",
+            columns=["user_id", "room_id", "thread_id"],
+            unique=True,
+        )
+
+        self.db_pool.updates.register_background_update_handler(
+            "event_push_backfill_thread_id",
+            self._background_backfill_thread_id,
+        )
+
+    async def _background_backfill_thread_id(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """
+        Fill in the thread_id field for event_push_actions and event_push_summary.
+
+        This is preparatory so that it can be made non-nullable in the future.
+
+        Because all current (null) data is done in an unthreaded manner this
+        simply assumes it is on the "main" timeline. Since event_push_actions
+        are periodically cleared it is not possible to correctly re-calculate
+        the thread_id.
+        """
+        event_push_actions_done = progress.get("event_push_actions_done", False)
+
+        def add_thread_id_txn(
+            txn: LoggingTransaction, table_name: str, start_stream_ordering: int
+        ) -> int:
+            sql = f"""
+            SELECT stream_ordering
+            FROM {table_name}
+            WHERE
+                thread_id IS NULL
+                AND stream_ordering > ?
+            ORDER BY stream_ordering
+            LIMIT ?
+            """
+            txn.execute(sql, (start_stream_ordering, batch_size))
+
+            # No more rows to process.
+            rows = txn.fetchall()
+            if not rows:
+                progress[f"{table_name}_done"] = True
+                self.db_pool.updates._background_update_progress_txn(
+                    txn, "event_push_backfill_thread_id", progress
+                )
+                return 0
+
+            # Update the thread ID for any of those rows.
+            max_stream_ordering = rows[-1][0]
+
+            sql = f"""
+            UPDATE {table_name}
+            SET thread_id = 'main'
+            WHERE stream_ordering <= ? AND thread_id IS NULL
+            """
+            txn.execute(sql, (max_stream_ordering,))
+
+            # Update progress.
+            processed_rows = txn.rowcount
+            progress[f"max_{table_name}_stream_ordering"] = max_stream_ordering
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "event_push_backfill_thread_id", progress
+            )
+
+            return processed_rows
+
+        # First update the event_push_actions table, then the event_push_summary table.
+        #
+        # Note that the event_push_actions_staging table is ignored since it is
+        # assumed that items in that table will only exist for a short period of
+        # time.
+        if not event_push_actions_done:
+            result = await self.db_pool.runInteraction(
+                "event_push_backfill_thread_id",
+                add_thread_id_txn,
+                "event_push_actions",
+                progress.get("max_event_push_actions_stream_ordering", 0),
+            )
+        else:
+            result = await self.db_pool.runInteraction(
+                "event_push_backfill_thread_id",
+                add_thread_id_txn,
+                "event_push_summary",
+                progress.get("max_event_push_summary_stream_ordering", 0),
+            )
+
+            # Only done after the event_push_summary table is done.
+            if not result:
+                await self.db_pool.updates._end_background_update(
+                    "event_push_backfill_thread_id"
+                )
+
+        return result
+
     @cached(tree=True, max_entries=5000)
     async def get_unread_event_push_actions_by_room_for_user(
         self,
@@ -670,6 +769,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         event_id: str,
         user_id_actions: Dict[str, Collection[Union[Mapping, str]]],
         count_as_unread: bool,
+        thread_id: str,
     ) -> None:
         """Add the push actions for the event to the push action staging area.
 
@@ -678,6 +778,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             user_id_actions: A mapping of user_id to list of push actions, where
                 an action can either be a string or dict.
             count_as_unread: Whether this event should increment unread counts.
+            thread_id: The thread this event is parent of, if applicable.
         """
         if not user_id_actions:
             return
@@ -686,7 +787,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         # can be used to insert into the `event_push_actions_staging` table.
         def _gen_entry(
             user_id: str, actions: Collection[Union[Mapping, str]]
-        ) -> Tuple[str, str, str, int, int, int]:
+        ) -> Tuple[str, str, str, int, int, int, str]:
             is_highlight = 1 if _action_has_highlight(actions) else 0
             notif = 1 if "notify" in actions else 0
             return (
@@ -696,11 +797,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 notif,  # notif column
                 is_highlight,  # highlight column
                 int(count_as_unread),  # unread column
+                thread_id,  # thread_id column
             )
 
         await self.db_pool.simple_insert_many(
             "event_push_actions_staging",
-            keys=("event_id", "user_id", "actions", "notif", "highlight", "unread"),
+            keys=(
+                "event_id",
+                "user_id",
+                "actions",
+                "notif",
+                "highlight",
+                "unread",
+                "thread_id",
+            ),
             values=[
                 _gen_entry(user_id, actions)
                 for user_id, actions in user_id_actions.items()
@@ -981,6 +1091,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             )
 
             # Replace the previous summary with the new counts.
+            #
+            # TODO(threads): Upsert per-thread instead of setting them all to main.
             self.db_pool.simple_upsert_txn(
                 txn,
                 table="event_push_summary",
@@ -990,6 +1102,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                     "unread_count": unread_count,
                     "stream_ordering": old_rotate_stream_ordering,
                     "last_receipt_stream_ordering": stream_ordering,
+                    "thread_id": "main",
                 },
             )
 
@@ -1138,17 +1251,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
+        # TODO(threads): Update on a per-thread basis.
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
             key_names=("user_id", "room_id"),
             key_values=[(user_id, room_id) for user_id, room_id in summaries],
-            value_names=("notif_count", "unread_count", "stream_ordering"),
+            value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"),
             value_values=[
                 (
                     summary.notif_count,
                     summary.unread_count,
                     summary.stream_ordering,
+                    "main",
                 )
                 for summary in summaries.values()
             ],
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index a4010ee28d..c0b4080e4b 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2192,9 +2192,9 @@ class PersistEventsStore:
         sql = """
             INSERT INTO event_push_actions (
                 room_id, event_id, user_id, actions, stream_ordering,
-                topological_ordering, notif, highlight, unread
+                topological_ordering, notif, highlight, unread, thread_id
             )
-            SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread
+            SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread, thread_id
             FROM event_push_actions_staging
             WHERE event_id = ?
         """
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 719a12b0ae..ddb8e80b69 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -113,6 +113,24 @@ class ReceiptsWorkerStore(SQLBaseStore):
             prefilled_cache=receipts_stream_prefill,
         )
 
+        self.db_pool.updates.register_background_index_update(
+            "receipts_linearized_unique_index",
+            index_name="receipts_linearized_unique_index",
+            table="receipts_linearized",
+            columns=["room_id", "receipt_type", "user_id"],
+            where_clause="thread_id IS NULL",
+            unique=True,
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            "receipts_graph_unique_index",
+            index_name="receipts_graph_unique_index",
+            table="receipts_graph",
+            columns=["room_id", "receipt_type", "user_id"],
+            where_clause="thread_id IS NULL",
+            unique=True,
+        )
+
     def get_max_receipt_stream_id(self) -> int:
         """Get the current max stream ID for receipts stream"""
         return self._receipts_id_gen.get_current_token()
@@ -677,6 +695,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 "event_id": event_id,
                 "event_stream_ordering": stream_ordering,
                 "data": json_encoder.encode(data),
+                "thread_id": None,
             },
             # receipts_linearized has a unique constraint on
             # (user_id, room_id, receipt_type), so no need to lock
@@ -824,6 +843,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             values={
                 "event_ids": json_encoder.encode(event_ids),
                 "data": json_encoder.encode(data),
+                "thread_id": None,
             },
             # receipts_graph has a unique constraint on
             # (user_id, room_id, receipt_type), so no need to lock
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 32cda5e3ba..38c9532bfd 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 72  # remember to update the list below when updating
+SCHEMA_VERSION = 73  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -77,6 +77,10 @@ Changes in SCHEMA_VERSION = 72:
     - Tables related to groups are dropped.
     - Unused column application_services_state.last_txn is dropped
     - Cache invalidation stream id sequence now begins at 2 to match code expectation.
+
+Changes in SCHEMA_VERSION = 73;
+    - thread_id column is added to event_push_actions, event_push_actions_staging
+      event_push_summary, receipts_linearized, and receipts_graph.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/72/06thread_notifications.sql b/synapse/storage/schema/main/delta/72/06thread_notifications.sql
new file mode 100644
index 0000000000..2f4f5dac7a
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/06thread_notifications.sql
@@ -0,0 +1,30 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a nullable column for thread ID to the event push actions tables; this
+-- will be filled in with a default value for any previously existing rows.
+--
+-- After migration this can be made non-nullable.
+
+ALTER TABLE event_push_actions_staging ADD COLUMN thread_id TEXT;
+ALTER TABLE event_push_actions ADD COLUMN thread_id TEXT;
+ALTER TABLE event_push_summary ADD COLUMN thread_id TEXT;
+
+-- Update the unique index for `event_push_summary`.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7006, 'event_push_summary_unique_index2', '{}');
+
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7006, 'event_push_backfill_thread_id', '{}', 'event_push_summary_unique_index2');
diff --git a/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres
new file mode 100644
index 0000000000..55fff9e278
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.postgres
@@ -0,0 +1,30 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a nullable column for thread ID to the receipts table; this allows a
+-- receipt per user, per room, as well as an unthreaded receipt (corresponding
+-- to a null thread ID).
+
+ALTER TABLE receipts_linearized ADD COLUMN thread_id TEXT;
+ALTER TABLE receipts_graph ADD COLUMN thread_id TEXT;
+
+-- Rebuild the unique constraint with the thread_id.
+ALTER TABLE receipts_linearized
+    ADD CONSTRAINT receipts_linearized_uniqueness_thread
+        UNIQUE (room_id, receipt_type, user_id, thread_id);
+
+ALTER TABLE receipts_graph
+    ADD CONSTRAINT receipts_graph_uniqueness_thread
+        UNIQUE (room_id, receipt_type, user_id, thread_id);
diff --git a/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite
new file mode 100644
index 0000000000..232f67deb4
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/07thread_receipts.sql.sqlite
@@ -0,0 +1,70 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Allow multiple receipts per user per room via a nullable thread_id column.
+--
+-- SQLite doesn't support modifying constraints to an existing table, so it must
+-- be recreated.
+
+-- Create the new tables.
+CREATE TABLE receipts_linearized_new (
+    stream_id BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    thread_id TEXT,
+    event_stream_ordering BIGINT,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id),
+    CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id)
+);
+
+CREATE TABLE receipts_graph_new (
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_ids TEXT NOT NULL,
+    thread_id TEXT,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id),
+    CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id)
+);
+
+-- Drop the old indexes.
+DROP INDEX IF EXISTS receipts_linearized_id;
+DROP INDEX IF EXISTS receipts_linearized_room_stream;
+DROP INDEX IF EXISTS receipts_linearized_user;
+
+-- Copy the data.
+INSERT INTO receipts_linearized_new (stream_id, room_id, receipt_type, user_id, event_id, event_stream_ordering, data)
+    SELECT stream_id, room_id, receipt_type, user_id, event_id, event_stream_ordering, data
+    FROM receipts_linearized;
+INSERT INTO receipts_graph_new (room_id, receipt_type, user_id, event_ids, data)
+    SELECT room_id, receipt_type, user_id, event_ids, data
+    FROM receipts_graph;
+
+-- Drop the old tables.
+DROP TABLE receipts_linearized;
+DROP TABLE receipts_graph;
+
+-- Rename the tables.
+ALTER TABLE receipts_linearized_new RENAME TO receipts_linearized;
+ALTER TABLE receipts_graph_new RENAME TO receipts_graph;
+
+-- Create the indices.
+CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id );
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id );
+CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id );
diff --git a/synapse/storage/schema/main/delta/72/08thread_receipts.sql b/synapse/storage/schema/main/delta/72/08thread_receipts.sql
new file mode 100644
index 0000000000..e35b021f31
--- /dev/null
+++ b/synapse/storage/schema/main/delta/72/08thread_receipts.sql
@@ -0,0 +1,20 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7007, 'receipts_linearized_unique_index', '{}');
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7007, 'receipts_graph_unique_index', '{}');
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
index 531a0db2d0..49a21e2e85 100644
--- a/tests/replication/slave/storage/test_events.py
+++ b/tests/replication/slave/storage/test_events.py
@@ -404,6 +404,7 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase):
                 event.event_id,
                 {user_id: actions for user_id, actions in push_actions},
                 False,
+                "main",
             )
         )
         return event, context
-- 
cgit 1.5.1


From 957e3d74fc70f92bb9ed3c709f87752bf77a8c90 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 14 Sep 2022 13:57:50 -0500
Subject: Keep track when we try and fail to process a pulled event (#13589)

We can follow-up this PR with:

 1. Only try to backfill from an event if we haven't tried recently -> https://github.com/matrix-org/synapse/issues/13622
 1. When we decide to backfill that event again, process it in the background so it doesn't block and make `/messages` slow when we know it will probably fail again -> https://github.com/matrix-org/synapse/issues/13623
 1. Generally track failures everywhere we try and fail to pull an event over federation -> https://github.com/matrix-org/synapse/issues/13700

Fix https://github.com/matrix-org/synapse/issues/13621

Part of https://github.com/matrix-org/synapse/issues/13356

Mentioned in [internal doc](https://docs.google.com/document/d/1lvUoVfYUiy6UaHB6Rb4HicjaJAU40-APue9Q4vzuW3c/edit#bookmark=id.qv7cj51sv9i5)
---
 changelog.d/13589.feature                          |   1 +
 synapse/handlers/federation_event.py               |   7 +
 synapse/storage/databases/main/event_federation.py |  45 +++++
 synapse/storage/databases/main/events.py           |  32 ++-
 synapse/storage/schema/__init__.py                 |   2 +
 .../main/delta/73/01event_failed_pull_attempts.sql |  29 +++
 tests/handlers/test_federation_event.py            | 222 +++++++++++++++++++++
 7 files changed, 329 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/13589.feature
 create mode 100644 synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql

(limited to 'synapse')

diff --git a/changelog.d/13589.feature b/changelog.d/13589.feature
new file mode 100644
index 0000000000..78fa1ddb52
--- /dev/null
+++ b/changelog.d/13589.feature
@@ -0,0 +1 @@
+Keep track when we attempt to backfill an event but fail so we can intelligently back-off in the future.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index ace7adcffb..9e065e1116 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -862,6 +862,9 @@ class FederationEventHandler:
             self._sanity_check_event(event)
         except SynapseError as err:
             logger.warning("Event %s failed sanity check: %s", event_id, err)
+            await self._store.record_event_failed_pull_attempt(
+                event.room_id, event_id, str(err)
+            )
             return
 
         try:
@@ -897,6 +900,10 @@ class FederationEventHandler:
                     backfilled=backfilled,
                 )
         except FederationError as e:
+            await self._store.record_event_failed_pull_attempt(
+                event.room_id, event_id, str(e)
+            )
+
             if e.code == 403:
                 logger.warning("Pulled event %s failed history check.", event_id)
             else:
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index ca47a22bf1..ef477978ed 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1294,6 +1294,51 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         return event_id_results
 
+    @trace
+    async def record_event_failed_pull_attempt(
+        self, room_id: str, event_id: str, cause: str
+    ) -> None:
+        """
+        Record when we fail to pull an event over federation.
+
+        This information allows us to be more intelligent when we decide to
+        retry (we don't need to fail over and over) and we can process that
+        event in the background so we don't block on it each time.
+
+        Args:
+            room_id: The room where the event failed to pull from
+            event_id: The event that failed to be fetched or processed
+            cause: The error message or reason that we failed to pull the event
+        """
+        await self.db_pool.runInteraction(
+            "record_event_failed_pull_attempt",
+            self._record_event_failed_pull_attempt_upsert_txn,
+            room_id,
+            event_id,
+            cause,
+            db_autocommit=True,  # Safe as it's a single upsert
+        )
+
+    def _record_event_failed_pull_attempt_upsert_txn(
+        self,
+        txn: LoggingTransaction,
+        room_id: str,
+        event_id: str,
+        cause: str,
+    ) -> None:
+        sql = """
+            INSERT INTO event_failed_pull_attempts (
+                room_id, event_id, num_attempts, last_attempt_ts, last_cause
+            )
+                VALUES (?, ?, ?, ?, ?)
+            ON CONFLICT (room_id, event_id) DO UPDATE SET
+                num_attempts=event_failed_pull_attempts.num_attempts + 1,
+                last_attempt_ts=EXCLUDED.last_attempt_ts,
+                last_cause=EXCLUDED.last_cause;
+        """
+
+        txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause))
+
     async def get_missing_events(
         self,
         room_id: str,
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index c0b4080e4b..1b54a2eb57 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2435,17 +2435,31 @@ class PersistEventsStore:
             "DELETE FROM event_backward_extremities"
             " WHERE event_id = ? AND room_id = ?"
         )
+        backward_extremity_tuples_to_remove = [
+            (ev.event_id, ev.room_id)
+            for ev in events
+            if not ev.internal_metadata.is_outlier()
+            # If we encountered an event with no prev_events, then we might
+            # as well remove it now because it won't ever have anything else
+            # to backfill from.
+            or len(ev.prev_event_ids()) == 0
+        ]
         txn.execute_batch(
             query,
-            [
-                (ev.event_id, ev.room_id)
-                for ev in events
-                if not ev.internal_metadata.is_outlier()
-                # If we encountered an event with no prev_events, then we might
-                # as well remove it now because it won't ever have anything else
-                # to backfill from.
-                or len(ev.prev_event_ids()) == 0
-            ],
+            backward_extremity_tuples_to_remove,
+        )
+
+        # Clear out the failed backfill attempts after we successfully pulled
+        # the event. Since we no longer need these events as backward
+        # extremities, it also means that they won't be backfilled from again so
+        # we no longer need to store the backfill attempts around it.
+        query = """
+            DELETE FROM event_failed_pull_attempts
+            WHERE event_id = ? and room_id = ?
+        """
+        txn.execute_batch(
+            query,
+            backward_extremity_tuples_to_remove,
         )
 
 
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 38c9532bfd..68e055c664 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -81,6 +81,8 @@ Changes in SCHEMA_VERSION = 72:
 Changes in SCHEMA_VERSION = 73;
     - thread_id column is added to event_push_actions, event_push_actions_staging
       event_push_summary, receipts_linearized, and receipts_graph.
+    - Add table `event_failed_pull_attempts` to keep track when we fail to pull
+      events over federation.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql b/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql
new file mode 100644
index 0000000000..d397ee1082
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql
@@ -0,0 +1,29 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- Add a table that keeps track of when we failed to pull an event over
+-- federation (via /backfill, `/event`, `/get_missing_events`, etc). This allows
+-- us to be more intelligent when we decide to retry (we don't need to fail over
+-- and over) and we can process that event in the background so we don't block
+-- on it each time.
+CREATE TABLE IF NOT EXISTS event_failed_pull_attempts(
+    room_id TEXT NOT NULL REFERENCES rooms (room_id),
+    event_id TEXT NOT NULL,
+    num_attempts INT NOT NULL,
+    last_attempt_ts BIGINT NOT NULL,
+    last_cause TEXT NOT NULL,
+    PRIMARY KEY (room_id, event_id)
+);
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index 51c8dd6498..b5b89405a4 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -227,3 +227,225 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
 
         if prev_exists_as_outlier:
             self.mock_federation_transport_client.get_event.assert_not_called()
+
+    def test_process_pulled_event_records_failed_backfill_attempts(
+        self,
+    ) -> None:
+        """
+        Test to make sure that failed backfill attempts for an event are
+        recorded in the `event_failed_pull_attempts` table.
+
+        In this test, we pretend we are processing a "pulled" event via
+        backfill. The pulled event has a fake `prev_event` which our server has
+        obviously never seen before so it attempts to request the state at that
+        `prev_event` which expectedly fails because it's a fake event. Because
+        the server can't fetch the state at the missing `prev_event`, the
+        "pulled" event fails the history check and is fails to process.
+
+        We check that we correctly record the number of failed pull attempts
+        of the pulled event and as a sanity check, that the "pulled" event isn't
+        persisted.
+        """
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+        room_version = self.get_success(main_store.get_room_version(room_id))
+
+        # We expect an outbound request to /state_ids, so stub that out
+        self.mock_federation_transport_client.get_room_state_ids.return_value = make_awaitable(
+            {
+                # Mimic the other server not knowing about the state at all.
+                # We want to cause Synapse to throw an error (`Unable to get
+                # missing prev_event $fake_prev_event`) and fail to backfill
+                # the pulled event.
+                "pdu_ids": [],
+                "auth_chain_ids": [],
+            }
+        )
+        # We also expect an outbound request to /state
+        self.mock_federation_transport_client.get_room_state.return_value = make_awaitable(
+            StateRequestResponse(
+                # Mimic the other server not knowing about the state at all.
+                # We want to cause Synapse to throw an error (`Unable to get
+                # missing prev_event $fake_prev_event`) and fail to backfill
+                # the pulled event.
+                auth_events=[],
+                state=[],
+            )
+        )
+
+        pulled_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "test_regular_type",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [
+                        # The fake prev event will make the pulled event fail
+                        # the history check (`Unable to get missing prev_event
+                        # $fake_prev_event`)
+                        "$fake_prev_event"
+                    ],
+                    "auth_events": [],
+                    "origin_server_ts": 1,
+                    "depth": 12,
+                    "content": {"body": "pulled"},
+                }
+            ),
+            room_version,
+        )
+
+        # The function under test: try to process the pulled event
+        with LoggingContext("test"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, pulled_event, backfilled=True
+                )
+            )
+
+        # Make sure our failed pull attempt was recorded
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+            )
+        )
+        self.assertEqual(backfill_num_attempts, 1)
+
+        # The function under test: try to process the pulled event again
+        with LoggingContext("test"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, pulled_event, backfilled=True
+                )
+            )
+
+        # Make sure our second failed pull attempt was recorded (`num_attempts` was incremented)
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+            )
+        )
+        self.assertEqual(backfill_num_attempts, 2)
+
+        # And as a sanity check, make sure the event was not persisted through all of this.
+        persisted = self.get_success(
+            main_store.get_event(pulled_event.event_id, allow_none=True)
+        )
+        self.assertIsNone(
+            persisted,
+            "pulled event that fails the history check should not be persisted at all",
+        )
+
+    def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_persisted(
+        self,
+    ) -> None:
+        """
+        Test to make sure that failed pull attempts
+        (`event_failed_pull_attempts` table) for an event are cleared after the
+        event is successfully persisted.
+
+        In this test, we pretend we are processing a "pulled" event via
+        backfill. The pulled event succesfully processes and the backward
+        extremeties are updated along with clearing out any failed pull attempts
+        for those old extremities.
+
+        We check that we correctly cleared failed pull attempts of the
+        pulled event.
+        """
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+        room_version = self.get_success(main_store.get_room_version(room_id))
+
+        # allow the remote user to send state events
+        self.helper.send_state(
+            room_id,
+            "m.room.power_levels",
+            {"events_default": 0, "state_default": 0},
+            tok=tok,
+        )
+
+        # add the remote user to the room
+        member_event = self.get_success(
+            event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
+        )
+
+        initial_state_map = self.get_success(
+            main_store.get_partial_current_state_ids(room_id)
+        )
+
+        auth_event_ids = [
+            initial_state_map[("m.room.create", "")],
+            initial_state_map[("m.room.power_levels", "")],
+            member_event.event_id,
+        ]
+
+        pulled_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "test_regular_type",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [member_event.event_id],
+                    "auth_events": auth_event_ids,
+                    "origin_server_ts": 1,
+                    "depth": 12,
+                    "content": {"body": "pulled"},
+                }
+            ),
+            room_version,
+        )
+
+        # Fake the "pulled" event failing to backfill once so we can test
+        # if it's cleared out later on.
+        self.get_success(
+            main_store.record_event_failed_pull_attempt(
+                pulled_event.room_id, pulled_event.event_id, "fake cause"
+            )
+        )
+        # Make sure we have a failed pull attempt recorded for the pulled event
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+            )
+        )
+        self.assertEqual(backfill_num_attempts, 1)
+
+        # The function under test: try to process the pulled event
+        with LoggingContext("test"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, pulled_event, backfilled=True
+                )
+            )
+
+        # Make sure the failed pull attempts for the pulled event are cleared
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+                allow_none=True,
+            )
+        )
+        self.assertIsNone(backfill_num_attempts)
+
+        # And as a sanity check, make sure the "pulled" event was persisted.
+        persisted = self.get_success(
+            main_store.get_event(pulled_event.event_id, allow_none=True)
+        )
+        self.assertIsNotNone(persisted, "pulled event was not persisted at all")
-- 
cgit 1.5.1


From 918c74bfb57e3ca4d300ed9a3bfb99b99126f821 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 15 Sep 2022 13:57:16 +0100
Subject: Add a `MXCUri` class to make working with mxc uri's easier. (#13162)

---
 changelog.d/13162.misc                    |   1 +
 poetry.lock                               |  10 +--
 pyproject.toml                            |   2 +-
 synapse/rest/media/v1/media_repository.py |   6 +-
 synapse/rest/media/v1/upload_resource.py  |   6 +-
 tests/rest/media/test_media_retention.py  | 102 +++++++++++-------------------
 6 files changed, 53 insertions(+), 74 deletions(-)
 create mode 100644 changelog.d/13162.misc

(limited to 'synapse')

diff --git a/changelog.d/13162.misc b/changelog.d/13162.misc
new file mode 100644
index 0000000000..b0d7c05e74
--- /dev/null
+++ b/changelog.d/13162.misc
@@ -0,0 +1 @@
+Bump the minimum dependency of `matrix_common` to 1.3.0 to make use of the `MXCUri` class. Use `MXCUri` to simplify media retention test code.
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index cdc69f8ea9..291f3c51e6 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -524,11 +524,11 @@ python-versions = ">=3.7"
 
 [[package]]
 name = "matrix-common"
-version = "1.2.1"
+version = "1.3.0"
 description = "Common utilities for Synapse, Sydent and Sygnal"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 
 [package.dependencies]
 attrs = "*"
@@ -1625,7 +1625,7 @@ url_preview = ["lxml"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "79cfa09d59f9f8b5ef24318fb860df1915f54328692aa56d04331ecbdd92a8cb"
+content-hash = "1b14fc274d9e2a495a7f864150f3ffcf4d9f585e09a67e53301ae4ef3c2f3e48"
 
 [metadata.files]
 attrs = [
@@ -2113,8 +2113,8 @@ markupsafe = [
     {file = "MarkupSafe-2.1.0.tar.gz", hash = "sha256:80beaf63ddfbc64a0452b841d8036ca0611e049650e20afcb882f5d3c266d65f"},
 ]
 matrix-common = [
-    {file = "matrix_common-1.2.1-py3-none-any.whl", hash = "sha256:946709c405944a0d4b1d73207b77eb064b6dbfc5d70a69471320b06d8ce98b20"},
-    {file = "matrix_common-1.2.1.tar.gz", hash = "sha256:a99dcf02a6bd95b24a5a61b354888a2ac92bf2b4b839c727b8dd9da2cdfa3853"},
+    {file = "matrix_common-1.3.0-py3-none-any.whl", hash = "sha256:524e2785b9b03be4d15f3a8a6b857c5b6af68791ffb1b9918f0ad299abc4db20"},
+    {file = "matrix_common-1.3.0.tar.gz", hash = "sha256:62e121cccd9f243417b57ec37a76dc44aeb198a7a5c67afd6b8275992ff2abd1"},
 ]
 matrix-synapse-ldap3 = [
     {file = "matrix-synapse-ldap3-0.2.2.tar.gz", hash = "sha256:b388d95693486eef69adaefd0fd9e84463d52fe17b0214a00efcaa669b73cb74"},
diff --git a/pyproject.toml b/pyproject.toml
index 157385ad8a..8e50dd2852 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -164,7 +164,7 @@ typing-extensions = ">=3.10.0.1"
 cryptography = ">=3.4.7"
 # ijson 3.1.4 fixes a bug with "." in property names
 ijson = ">=3.1.4"
-matrix-common = "^1.2.1"
+matrix-common = "^1.3.0"
 # We need packaging.requirements.Requirement, added in 16.1.
 packaging = ">=16.1"
 # At the time of writing, we only use functions from the version `importlib.metadata`
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 9dd3c8d4bb..328c0c5477 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -19,6 +19,8 @@ import shutil
 from io import BytesIO
 from typing import IO, TYPE_CHECKING, Dict, List, Optional, Set, Tuple
 
+from matrix_common.types.mxc_uri import MXCUri
+
 import twisted.internet.error
 import twisted.web.http
 from twisted.internet.defer import Deferred
@@ -186,7 +188,7 @@ class MediaRepository:
         content: IO,
         content_length: int,
         auth_user: UserID,
-    ) -> str:
+    ) -> MXCUri:
         """Store uploaded content for a local user and return the mxc URL
 
         Args:
@@ -219,7 +221,7 @@ class MediaRepository:
 
         await self._generate_thumbnails(None, media_id, media_id, media_type)
 
-        return "mxc://%s/%s" % (self.server_name, media_id)
+        return MXCUri(self.server_name, media_id)
 
     async def get_local_media(
         self, request: SynapseRequest, media_id: str, name: Optional[str]
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
index e73e431dc9..97548b54e5 100644
--- a/synapse/rest/media/v1/upload_resource.py
+++ b/synapse/rest/media/v1/upload_resource.py
@@ -101,6 +101,8 @@ class UploadResource(DirectServeJsonResource):
             # the default 404, as that would just be confusing.
             raise SynapseError(400, "Bad content")
 
-        logger.info("Uploaded content with URI %r", content_uri)
+        logger.info("Uploaded content with URI '%s'", content_uri)
 
-        respond_with_json(request, 200, {"content_uri": content_uri}, send_cors=True)
+        respond_with_json(
+            request, 200, {"content_uri": str(content_uri)}, send_cors=True
+        )
diff --git a/tests/rest/media/test_media_retention.py b/tests/rest/media/test_media_retention.py
index 14af07c5af..23f227aed6 100644
--- a/tests/rest/media/test_media_retention.py
+++ b/tests/rest/media/test_media_retention.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 import io
-from typing import Iterable, Optional, Tuple
+from typing import Iterable, Optional
+
+from matrix_common.types.mxc_uri import MXCUri
 
 from twisted.test.proto_helpers import MemoryReactor
 
@@ -63,9 +65,9 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
             last_accessed_ms: Optional[int],
             is_quarantined: Optional[bool] = False,
             is_protected: Optional[bool] = False,
-        ) -> str:
+        ) -> MXCUri:
             # "Upload" some media to the local media store
-            mxc_uri = self.get_success(
+            mxc_uri: MXCUri = self.get_success(
                 media_repository.create_content(
                     media_type="text/plain",
                     upload_name=None,
@@ -75,13 +77,11 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                 )
             )
 
-            media_id = mxc_uri.split("/")[-1]
-
             # Set the last recently accessed time for this media
             if last_accessed_ms is not None:
                 self.get_success(
                     self.store.update_cached_last_access_time(
-                        local_media=(media_id,),
+                        local_media=(mxc_uri.media_id,),
                         remote_media=(),
                         time_ms=last_accessed_ms,
                     )
@@ -92,7 +92,7 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                 self.get_success(
                     self.store.quarantine_media_by_id(
                         server_name=self.hs.config.server.server_name,
-                        media_id=media_id,
+                        media_id=mxc_uri.media_id,
                         quarantined_by="@theadmin:test",
                     )
                 )
@@ -101,18 +101,18 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                 # Mark this media as protected from quarantine
                 self.get_success(
                     self.store.mark_local_media_as_safe(
-                        media_id=media_id,
+                        media_id=mxc_uri.media_id,
                         safe=True,
                     )
                 )
 
-            return media_id
+            return mxc_uri
 
         def _cache_remote_media_and_set_attributes(
             media_id: str,
             last_accessed_ms: Optional[int],
             is_quarantined: Optional[bool] = False,
-        ) -> str:
+        ) -> MXCUri:
             # Pretend to cache some remote media
             self.get_success(
                 self.store.store_cached_remote_media(
@@ -146,7 +146,7 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                     )
                 )
 
-            return media_id
+            return MXCUri(self.remote_server_name, media_id)
 
         # Start with the local media store
         self.local_recently_accessed_media = _create_media_and_set_attributes(
@@ -214,28 +214,16 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
         # Remote media should be unaffected.
         self._assert_if_mxc_uris_purged(
             purged=[
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_media,
-                ),
-                (self.hs.config.server.server_name, self.local_never_accessed_media),
+                self.local_not_recently_accessed_media,
+                self.local_never_accessed_media,
             ],
             not_purged=[
-                (self.hs.config.server.server_name, self.local_recently_accessed_media),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_quarantined_media,
-                ),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_protected_media,
-                ),
-                (self.remote_server_name, self.remote_recently_accessed_media),
-                (self.remote_server_name, self.remote_not_recently_accessed_media),
-                (
-                    self.remote_server_name,
-                    self.remote_not_recently_accessed_quarantined_media,
-                ),
+                self.local_recently_accessed_media,
+                self.local_not_recently_accessed_quarantined_media,
+                self.local_not_recently_accessed_protected_media,
+                self.remote_recently_accessed_media,
+                self.remote_not_recently_accessed_media,
+                self.remote_not_recently_accessed_quarantined_media,
             ],
         )
 
@@ -261,49 +249,35 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
         # Remote media accessed <30 days ago should still exist.
         self._assert_if_mxc_uris_purged(
             purged=[
-                (self.remote_server_name, self.remote_not_recently_accessed_media),
+                self.remote_not_recently_accessed_media,
             ],
             not_purged=[
-                (self.remote_server_name, self.remote_recently_accessed_media),
-                (self.hs.config.server.server_name, self.local_recently_accessed_media),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_media,
-                ),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_quarantined_media,
-                ),
-                (
-                    self.hs.config.server.server_name,
-                    self.local_not_recently_accessed_protected_media,
-                ),
-                (
-                    self.remote_server_name,
-                    self.remote_not_recently_accessed_quarantined_media,
-                ),
-                (self.hs.config.server.server_name, self.local_never_accessed_media),
+                self.remote_recently_accessed_media,
+                self.local_recently_accessed_media,
+                self.local_not_recently_accessed_media,
+                self.local_not_recently_accessed_quarantined_media,
+                self.local_not_recently_accessed_protected_media,
+                self.remote_not_recently_accessed_quarantined_media,
+                self.local_never_accessed_media,
             ],
         )
 
     def _assert_if_mxc_uris_purged(
-        self, purged: Iterable[Tuple[str, str]], not_purged: Iterable[Tuple[str, str]]
+        self, purged: Iterable[MXCUri], not_purged: Iterable[MXCUri]
     ) -> None:
-        def _assert_mxc_uri_purge_state(
-            server_name: str, media_id: str, expect_purged: bool
-        ) -> None:
+        def _assert_mxc_uri_purge_state(mxc_uri: MXCUri, expect_purged: bool) -> None:
             """Given an MXC URI, assert whether it has been purged or not."""
-            if server_name == self.hs.config.server.server_name:
+            if mxc_uri.server_name == self.hs.config.server.server_name:
                 found_media_dict = self.get_success(
-                    self.store.get_local_media(media_id)
+                    self.store.get_local_media(mxc_uri.media_id)
                 )
             else:
                 found_media_dict = self.get_success(
-                    self.store.get_cached_remote_media(server_name, media_id)
+                    self.store.get_cached_remote_media(
+                        mxc_uri.server_name, mxc_uri.media_id
+                    )
                 )
 
-            mxc_uri = f"mxc://{server_name}/{media_id}"
-
             if expect_purged:
                 self.assertIsNone(
                     found_media_dict, msg=f"{mxc_uri} unexpectedly not purged"
@@ -315,7 +289,7 @@ class MediaRetentionTestCase(unittest.HomeserverTestCase):
                 )
 
         # Assert that the given MXC URIs have either been correctly purged or not.
-        for server_name, media_id in purged:
-            _assert_mxc_uri_purge_state(server_name, media_id, expect_purged=True)
-        for server_name, media_id in not_purged:
-            _assert_mxc_uri_purge_state(server_name, media_id, expect_purged=False)
+        for mxc_uri in purged:
+            _assert_mxc_uri_purge_state(mxc_uri, expect_purged=True)
+        for mxc_uri in not_purged:
+            _assert_mxc_uri_purge_state(mxc_uri, expect_purged=False)
-- 
cgit 1.5.1


From 742f9f9d78490f7f16bdb607a8f61ca258d520ef Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 15 Sep 2022 18:36:02 +0100
Subject: A third batch of Pydantic validation for rest/client/account.py
 (#13736)

---
 changelog.d/13736.feature        |  1 +
 synapse/rest/client/account.py   | 65 ++++++++++++++++++++++------------------
 synapse/rest/client/models.py    | 28 +++++++++--------
 tests/rest/client/test_models.py | 29 ++++++++++++++++--
 4 files changed, 78 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/13736.feature

(limited to 'synapse')

diff --git a/changelog.d/13736.feature b/changelog.d/13736.feature
new file mode 100644
index 0000000000..60a63c1009
--- /dev/null
+++ b/changelog.d/13736.feature
@@ -0,0 +1 @@
+Improve validation of request bodies for the following client-server API endpoints: [`/account/3pid/add`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3pidadd), [`/account/3pid/bind`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3pidbind), [`/account/3pid/delete`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3piddelete) and [`/account/3pid/unbind`](https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3account3pidunbind).
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index a09aaf3448..2db2a04f95 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -19,6 +19,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple
 from urllib.parse import urlparse
 
 from pydantic import StrictBool, StrictStr, constr
+from typing_extensions import Literal
 
 from twisted.web.server import Request
 
@@ -43,6 +44,7 @@ from synapse.metrics import threepid_send_requests
 from synapse.push.mailer import Mailer
 from synapse.rest.client.models import (
     AuthenticationData,
+    ClientSecretStr,
     EmailRequestTokenBody,
     MsisdnRequestTokenBody,
 )
@@ -627,6 +629,11 @@ class ThreepidAddRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.auth_handler = hs.get_auth_handler()
 
+    class PostBody(RequestBodyModel):
+        auth: Optional[AuthenticationData] = None
+        client_secret: ClientSecretStr
+        sid: StrictStr
+
     @interactive_auth_handler
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         if not self.hs.config.registration.enable_3pid_changes:
@@ -636,22 +643,17 @@ class ThreepidAddRestServlet(RestServlet):
 
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
-        body = parse_json_object_from_request(request)
-
-        assert_params_in_dict(body, ["client_secret", "sid"])
-        sid = body["sid"]
-        client_secret = body["client_secret"]
-        assert_valid_client_secret(client_secret)
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         await self.auth_handler.validate_user_via_ui_auth(
             requester,
             request,
-            body,
+            body.dict(exclude_unset=True),
             "add a third-party identifier to your account",
         )
 
         validation_session = await self.identity_handler.validate_threepid_session(
-            client_secret, sid
+            body.client_secret, body.sid
         )
         if validation_session:
             await self.auth_handler.add_threepid(
@@ -676,23 +678,20 @@ class ThreepidBindRestServlet(RestServlet):
         self.identity_handler = hs.get_identity_handler()
         self.auth = hs.get_auth()
 
-    async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        body = parse_json_object_from_request(request)
+    class PostBody(RequestBodyModel):
+        client_secret: ClientSecretStr
+        id_access_token: StrictStr
+        id_server: StrictStr
+        sid: StrictStr
 
-        assert_params_in_dict(
-            body, ["id_server", "sid", "id_access_token", "client_secret"]
-        )
-        id_server = body["id_server"]
-        sid = body["sid"]
-        id_access_token = body["id_access_token"]
-        client_secret = body["client_secret"]
-        assert_valid_client_secret(client_secret)
+    async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
 
         await self.identity_handler.bind_threepid(
-            client_secret, sid, user_id, id_server, id_access_token
+            body.client_secret, body.sid, user_id, body.id_server, body.id_access_token
         )
 
         return 200, {}
@@ -708,23 +707,27 @@ class ThreepidUnbindRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.datastore = self.hs.get_datastores().main
 
+    class PostBody(RequestBodyModel):
+        address: StrictStr
+        id_server: Optional[StrictStr] = None
+        medium: Literal["email", "msisdn"]
+
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         """Unbind the given 3pid from a specific identity server, or identity servers that are
         known to have this 3pid bound
         """
         requester = await self.auth.get_user_by_req(request)
-        body = parse_json_object_from_request(request)
-        assert_params_in_dict(body, ["medium", "address"])
-
-        medium = body.get("medium")
-        address = body.get("address")
-        id_server = body.get("id_server")
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         # Attempt to unbind the threepid from an identity server. If id_server is None, try to
         # unbind from all identity servers this threepid has been added to in the past
         result = await self.identity_handler.try_unbind_threepid(
             requester.user.to_string(),
-            {"address": address, "medium": medium, "id_server": id_server},
+            {
+                "address": body.address,
+                "medium": body.medium,
+                "id_server": body.id_server,
+            },
         )
         return 200, {"id_server_unbind_result": "success" if result else "no-support"}
 
@@ -738,21 +741,25 @@ class ThreepidDeleteRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.auth_handler = hs.get_auth_handler()
 
+    class PostBody(RequestBodyModel):
+        address: StrictStr
+        id_server: Optional[StrictStr] = None
+        medium: Literal["email", "msisdn"]
+
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         if not self.hs.config.registration.enable_3pid_changes:
             raise SynapseError(
                 400, "3PID changes are disabled on this server", Codes.FORBIDDEN
             )
 
-        body = parse_json_object_from_request(request)
-        assert_params_in_dict(body, ["medium", "address"])
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
 
         try:
             ret = await self.auth_handler.delete_threepid(
-                user_id, body["medium"], body["address"], body.get("id_server")
+                user_id, body.medium, body.address, body.id_server
             )
         except Exception:
             # NB. This endpoint should succeed if there is nothing to
diff --git a/synapse/rest/client/models.py b/synapse/rest/client/models.py
index 6278450c70..3d7940b0fc 100644
--- a/synapse/rest/client/models.py
+++ b/synapse/rest/client/models.py
@@ -36,18 +36,20 @@ class AuthenticationData(RequestBodyModel):
     type: Optional[StrictStr] = None
 
 
-class ThreePidRequestTokenBody(RequestBodyModel):
-    if TYPE_CHECKING:
-        client_secret: StrictStr
-    else:
-        # See also assert_valid_client_secret()
-        client_secret: constr(
-            regex="[0-9a-zA-Z.=_-]",  # noqa: F722
-            min_length=0,
-            max_length=255,
-            strict=True,
-        )
+if TYPE_CHECKING:
+    ClientSecretStr = StrictStr
+else:
+    # See also assert_valid_client_secret()
+    ClientSecretStr = constr(
+        regex="[0-9a-zA-Z.=_-]",  # noqa: F722
+        min_length=1,
+        max_length=255,
+        strict=True,
+    )
+
 
+class ThreepidRequestTokenBody(RequestBodyModel):
+    client_secret: ClientSecretStr
     id_server: Optional[StrictStr]
     id_access_token: Optional[StrictStr]
     next_link: Optional[StrictStr]
@@ -62,7 +64,7 @@ class ThreePidRequestTokenBody(RequestBodyModel):
         return token
 
 
-class EmailRequestTokenBody(ThreePidRequestTokenBody):
+class EmailRequestTokenBody(ThreepidRequestTokenBody):
     email: StrictStr
 
     # Canonicalise the email address. The addresses are all stored canonicalised
@@ -80,6 +82,6 @@ else:
     ISO3116_1_Alpha_2 = constr(regex="[A-Z]{2}", strict=True)
 
 
-class MsisdnRequestTokenBody(ThreePidRequestTokenBody):
+class MsisdnRequestTokenBody(ThreepidRequestTokenBody):
     country: ISO3116_1_Alpha_2
     phone_number: StrictStr
diff --git a/tests/rest/client/test_models.py b/tests/rest/client/test_models.py
index a9da00665e..0b8fcb0c47 100644
--- a/tests/rest/client/test_models.py
+++ b/tests/rest/client/test_models.py
@@ -11,14 +11,37 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import unittest
+import unittest as stdlib_unittest
 
-from pydantic import ValidationError
+from pydantic import BaseModel, ValidationError
+from typing_extensions import Literal
 
 from synapse.rest.client.models import EmailRequestTokenBody
 
 
-class EmailRequestTokenBodyTestCase(unittest.TestCase):
+class ThreepidMediumEnumTestCase(stdlib_unittest.TestCase):
+    class Model(BaseModel):
+        medium: Literal["email", "msisdn"]
+
+    def test_accepts_valid_medium_string(self) -> None:
+        """Sanity check that Pydantic behaves sensibly with an enum-of-str
+
+        This is arguably more of a test of a class that inherits from str and Enum
+        simultaneously.
+        """
+        model = self.Model.parse_obj({"medium": "email"})
+        self.assertEqual(model.medium, "email")
+
+    def test_rejects_invalid_medium_value(self) -> None:
+        with self.assertRaises(ValidationError):
+            self.Model.parse_obj({"medium": "interpretive_dance"})
+
+    def test_rejects_invalid_medium_type(self) -> None:
+        with self.assertRaises(ValidationError):
+            self.Model.parse_obj({"medium": 123})
+
+
+class EmailRequestTokenBodyTestCase(stdlib_unittest.TestCase):
     base_request = {
         "client_secret": "hunter2",
         "email": "alice@wonderland.com",
-- 
cgit 1.5.1


From b2b0c8527957d89b36c0eafea70347c200c1d294 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 15 Sep 2022 14:28:48 -0400
Subject: Support providing an index predicate for upserts. (#13822)

This is useful to upsert against a table which has a unique
partial index while avoiding conflicts.
---
 changelog.d/13822.misc                |  1 +
 synapse/storage/background_updates.py |  1 +
 synapse/storage/database.py           | 30 +++++++++++++++++++++++-------
 3 files changed, 25 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/13822.misc

(limited to 'synapse')

diff --git a/changelog.d/13822.misc b/changelog.d/13822.misc
new file mode 100644
index 0000000000..dbc77cbcfa
--- /dev/null
+++ b/changelog.d/13822.misc
@@ -0,0 +1 @@
+Support providing an index predicate clause when doing upserts.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index cf1eabc437..bf5e7ee7be 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -533,6 +533,7 @@ class BackgroundUpdater:
             index_name: name of index to add
             table: table to add index to
             columns: columns/expressions to include in index
+            where_clause: A WHERE clause to specify a partial unique index.
             unique: true to make a UNIQUE index
             psql_only: true to only create this index on psql databases (useful
                 for virtual sqlite tables)
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index e881bff7fb..921cd4dc5e 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1191,6 +1191,7 @@ class DatabasePool:
         keyvalues: Dict[str, Any],
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
+        where_clause: Optional[str] = None,
         lock: bool = True,
     ) -> bool:
         """
@@ -1203,6 +1204,7 @@ class DatabasePool:
             keyvalues: The unique key tables and their new values
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
+            where_clause: An index predicate to apply to the upsert.
             lock: True to lock the table when doing the upsert. Unused when performing
                 a native upsert.
         Returns:
@@ -1213,7 +1215,12 @@ class DatabasePool:
 
         if table not in self._unsafe_to_upsert_tables:
             return self.simple_upsert_txn_native_upsert(
-                txn, table, keyvalues, values, insertion_values=insertion_values
+                txn,
+                table,
+                keyvalues,
+                values,
+                insertion_values=insertion_values,
+                where_clause=where_clause,
             )
         else:
             return self.simple_upsert_txn_emulated(
@@ -1222,6 +1229,7 @@ class DatabasePool:
                 keyvalues,
                 values,
                 insertion_values=insertion_values,
+                where_clause=where_clause,
                 lock=lock,
             )
 
@@ -1232,6 +1240,7 @@ class DatabasePool:
         keyvalues: Dict[str, Any],
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
+        where_clause: Optional[str] = None,
         lock: bool = True,
     ) -> bool:
         """
@@ -1240,6 +1249,7 @@ class DatabasePool:
             keyvalues: The unique key tables and their new values
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
+            where_clause: An index predicate to apply to the upsert.
             lock: True to lock the table when doing the upsert.
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
@@ -1259,14 +1269,17 @@ class DatabasePool:
             else:
                 return "%s = ?" % (key,)
 
+        # Generate a where clause of each keyvalue and optionally the provided
+        # index predicate.
+        where = [_getwhere(k) for k in keyvalues]
+        if where_clause:
+            where.append(where_clause)
+
         if not values:
             # If `values` is empty, then all of the values we care about are in
             # the unique key, so there is nothing to UPDATE. We can just do a
             # SELECT instead to see if it exists.
-            sql = "SELECT 1 FROM %s WHERE %s" % (
-                table,
-                " AND ".join(_getwhere(k) for k in keyvalues),
-            )
+            sql = "SELECT 1 FROM %s WHERE %s" % (table, " AND ".join(where))
             sqlargs = list(keyvalues.values())
             txn.execute(sql, sqlargs)
             if txn.fetchall():
@@ -1277,7 +1290,7 @@ class DatabasePool:
             sql = "UPDATE %s SET %s WHERE %s" % (
                 table,
                 ", ".join("%s = ?" % (k,) for k in values),
-                " AND ".join(_getwhere(k) for k in keyvalues),
+                " AND ".join(where),
             )
             sqlargs = list(values.values()) + list(keyvalues.values())
 
@@ -1307,6 +1320,7 @@ class DatabasePool:
         keyvalues: Dict[str, Any],
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
+        where_clause: Optional[str] = None,
     ) -> bool:
         """
         Use the native UPSERT functionality in PostgreSQL.
@@ -1316,6 +1330,7 @@ class DatabasePool:
             keyvalues: The unique key tables and their new values
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
+            where_clause: An index predicate to apply to the upsert.
 
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
@@ -1331,11 +1346,12 @@ class DatabasePool:
             allvalues.update(values)
             latter = "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values)
 
-        sql = ("INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s") % (
+        sql = "INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) %s DO %s" % (
             table,
             ", ".join(k for k in allvalues),
             ", ".join("?" for _ in allvalues),
             ", ".join(k for k in keyvalues),
+            f"WHERE {where_clause}" if where_clause else "",
             latter,
         )
         txn.execute(sql, list(allvalues.values()))
-- 
cgit 1.5.1


From 140af0cdb653bc2fef9474af06a5c5b525073998 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 15 Sep 2022 14:40:49 -0500
Subject: Record any exception when processing a pulled event (#13814)

Part of https://github.com/matrix-org/synapse/issues/13700 and https://github.com/matrix-org/synapse/issues/13356

Follow-up to https://github.com/matrix-org/synapse/pull/13589
---
 changelog.d/13589.feature            |  2 +-
 changelog.d/13814.feature            |  1 +
 synapse/handlers/federation_event.py | 10 ++++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13814.feature

(limited to 'synapse')

diff --git a/changelog.d/13589.feature b/changelog.d/13589.feature
index 78fa1ddb52..a5ea2bc82e 100644
--- a/changelog.d/13589.feature
+++ b/changelog.d/13589.feature
@@ -1 +1 @@
-Keep track when we attempt to backfill an event but fail so we can intelligently back-off in the future.
+Keep track when we fail to process a pulled event over federation so we can intelligently back-off in the future.
diff --git a/changelog.d/13814.feature b/changelog.d/13814.feature
new file mode 100644
index 0000000000..a5ea2bc82e
--- /dev/null
+++ b/changelog.d/13814.feature
@@ -0,0 +1 @@
+Keep track when we fail to process a pulled event over federation so we can intelligently back-off in the future.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 9e065e1116..efcdb84057 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -866,6 +866,11 @@ class FederationEventHandler:
                 event.room_id, event_id, str(err)
             )
             return
+        except Exception as exc:
+            await self._store.record_event_failed_pull_attempt(
+                event.room_id, event_id, str(exc)
+            )
+            raise exc
 
         try:
             try:
@@ -908,6 +913,11 @@ class FederationEventHandler:
                 logger.warning("Pulled event %s failed history check.", event_id)
             else:
                 raise
+        except Exception as exc:
+            await self._store.record_event_failed_pull_attempt(
+                event.room_id, event_id, str(exc)
+            )
+            raise exc
 
     @trace
     async def _compute_event_context_with_maybe_missing_prevs(
-- 
cgit 1.5.1


From 5093cbf88da1c439f5bf16b7a4cf19246781bd93 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 15 Sep 2022 15:32:25 -0500
Subject: Be able to correlate timeouts in reverse-proxy layer in front of
 Synapse (pull request ID from header) (#13801)

Fix https://github.com/matrix-org/synapse/issues/13685

New config:

```diff
  listeners:
    - port: 8008
      tls: false
      type: http
      x_forwarded: true
+     request_id_header: "cf-ray"
      bind_addresses: ['::1', '127.0.0.1', '0.0.0.0']
```
---
 changelog.d/13801.feature                        |  1 +
 docs/reverse_proxy.md                            |  4 ++++
 docs/usage/configuration/config_documentation.md | 11 ++++++++++-
 synapse/config/server.py                         | 13 ++++++++++---
 synapse/http/site.py                             | 14 +++++++++++++-
 5 files changed, 38 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/13801.feature

(limited to 'synapse')

diff --git a/changelog.d/13801.feature b/changelog.d/13801.feature
new file mode 100644
index 0000000000..d7cedfd302
--- /dev/null
+++ b/changelog.d/13801.feature
@@ -0,0 +1 @@
+Add `listeners[x].request_id_header` config to specify which request header to extract and use as the request ID in order to correlate requests from a reverse-proxy.
diff --git a/docs/reverse_proxy.md b/docs/reverse_proxy.md
index d1618e8155..4e7a1d4435 100644
--- a/docs/reverse_proxy.md
+++ b/docs/reverse_proxy.md
@@ -45,6 +45,10 @@ listens to traffic on localhost. (Do not change `bind_addresses` to `127.0.0.1`
 when using a containerized Synapse, as that will prevent it from responding
 to proxied traffic.)
 
+Optionally, you can also set
+[`request_id_header`](../usage/configuration/config_documentation.md#listeners)
+so that the server extracts and re-uses the same request ID format that the
+reverse proxy is using.
 
 ## Reverse-proxy configuration examples
 
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index cd546041b2..69d305b62e 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -434,7 +434,16 @@ Sub-options for each listener include:
 * `tls`: set to true to enable TLS for this listener. Will use the TLS key/cert specified in tls_private_key_path / tls_certificate_path.
 
 * `x_forwarded`: Only valid for an 'http' listener. Set to true to use the X-Forwarded-For header as the client IP. Useful when Synapse is
-   behind a reverse-proxy.
+   behind a [reverse-proxy](../../reverse_proxy.md).
+
+* `request_id_header`: The header extracted from each incoming request that is
+   used as the basis for the request ID. The request ID is used in
+   [logs](../administration/request_log.md#request-log-format) and tracing to
+   correlate and match up requests. When unset, Synapse will automatically
+   generate sequential request IDs. This option is useful when Synapse is behind
+   a [reverse-proxy](../../reverse_proxy.md).
+
+   _Added in Synapse 1.68.0._
 
 * `resources`: Only valid for an 'http' listener. A list of resources to host
    on this port. Sub-options for each resource are:
diff --git a/synapse/config/server.py b/synapse/config/server.py
index c91df636d9..f2353ce5fb 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -206,6 +206,7 @@ class HttpListenerConfig:
     resources: List[HttpResourceConfig] = attr.Factory(list)
     additional_resources: Dict[str, dict] = attr.Factory(dict)
     tag: Optional[str] = None
+    request_id_header: Optional[str] = None
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
@@ -520,9 +521,11 @@ class ServerConfig(Config):
         ):
             raise ConfigError("allowed_avatar_mimetypes must be a list")
 
-        self.listeners = [
-            parse_listener_def(i, x) for i, x in enumerate(config.get("listeners", []))
-        ]
+        listeners = config.get("listeners", [])
+        if not isinstance(listeners, list):
+            raise ConfigError("Expected a list", ("listeners",))
+
+        self.listeners = [parse_listener_def(i, x) for i, x in enumerate(listeners)]
 
         # no_tls is not really supported any more, but let's grandfather it in
         # here.
@@ -889,6 +892,9 @@ def read_gc_thresholds(
 
 def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
     """parse a listener config from the config file"""
+    if not isinstance(listener, dict):
+        raise ConfigError("Expected a dictionary", ("listeners", str(num)))
+
     listener_type = listener["type"]
     # Raise a helpful error if direct TCP replication is still configured.
     if listener_type == "replication":
@@ -928,6 +934,7 @@ def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
             resources=resources,
             additional_resources=listener.get("additional_resources", {}),
             tag=listener.get("tag"),
+            request_id_header=listener.get("request_id_header"),
         )
 
     return ListenerConfig(port, bind_addresses, listener_type, tls, http_config)
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 1155f3f610..55a6afce35 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -72,10 +72,12 @@ class SynapseRequest(Request):
         site: "SynapseSite",
         *args: Any,
         max_request_body_size: int = 1024,
+        request_id_header: Optional[str] = None,
         **kw: Any,
     ):
         super().__init__(channel, *args, **kw)
         self._max_request_body_size = max_request_body_size
+        self.request_id_header = request_id_header
         self.synapse_site = site
         self.reactor = site.reactor
         self._channel = channel  # this is used by the tests
@@ -172,7 +174,14 @@ class SynapseRequest(Request):
         self._opentracing_span = span
 
     def get_request_id(self) -> str:
-        return "%s-%i" % (self.get_method(), self.request_seq)
+        request_id_value = None
+        if self.request_id_header:
+            request_id_value = self.getHeader(self.request_id_header)
+
+        if request_id_value is None:
+            request_id_value = str(self.request_seq)
+
+        return "%s-%s" % (self.get_method(), request_id_value)
 
     def get_redacted_uri(self) -> str:
         """Gets the redacted URI associated with the request (or placeholder if the URI
@@ -611,12 +620,15 @@ class SynapseSite(Site):
         proxied = config.http_options.x_forwarded
         request_class = XForwardedForRequest if proxied else SynapseRequest
 
+        request_id_header = config.http_options.request_id_header
+
         def request_factory(channel: HTTPChannel, queued: bool) -> Request:
             return request_class(
                 channel,
                 self,
                 max_request_body_size=max_request_body_size,
                 queued=queued,
+                request_id_header=request_id_header,
             )
 
         self.requestFactory = request_factory  # type: ignore
-- 
cgit 1.5.1


From b73cbb82157d9666e8d667733afebc0d09ed858c Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 16 Sep 2022 12:45:04 +0100
Subject: Avoid putting rejected events in room state (#13723)

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13723.bugfix                |   1 +
 synapse/state/v2.py                     |  15 ++
 tests/handlers/test_federation_event.py | 399 ++++++++++++++++++++++++++++++++
 3 files changed, 415 insertions(+)
 create mode 100644 changelog.d/13723.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13723.bugfix b/changelog.d/13723.bugfix
new file mode 100644
index 0000000000..a23174d31d
--- /dev/null
+++ b/changelog.d/13723.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where previously rejected events could end up in room state because they pass auth checks given the current state of the room.
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index af03851c71..1b9d7d8457 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -577,6 +577,21 @@ async def _iterative_auth_checks(
                 if ev.rejected_reason is None:
                     auth_events[key] = event_map[ev_id]
 
+        if event.rejected_reason is not None:
+            # Do not admit previously rejected events into state.
+            # TODO: This isn't spec compliant. Events that were previously rejected due
+            #       to failing auth checks at their state, but pass auth checks during
+            #       state resolution should be accepted. Synapse does not handle the
+            #       change of rejection status well, so we preserve the previous
+            #       rejection status for now.
+            #
+            #       Note that events rejected for non-state reasons, such as having the
+            #       wrong auth events, should remain rejected.
+            #
+            #       https://spec.matrix.org/v1.2/rooms/v9/#rejected-events
+            #       https://github.com/matrix-org/synapse/issues/13797
+            continue
+
         try:
             event_auth.check_state_dependent_auth_rules(
                 event,
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index b5b89405a4..918010cddb 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -11,14 +11,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
 from unittest import mock
 
+from synapse.api.errors import AuthError
+from synapse.api.room_versions import RoomVersion
+from synapse.event_auth import (
+    check_state_dependent_auth_rules,
+    check_state_independent_auth_rules,
+)
 from synapse.events import make_event_from_dict
 from synapse.events.snapshot import EventContext
 from synapse.federation.transport.client import StateRequestResponse
 from synapse.logging.context import LoggingContext
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.state.v2 import _mainline_sort, _reverse_topological_power_sort
+from synapse.types import JsonDict
 
 from tests import unittest
 from tests.test_utils import event_injection, make_awaitable
@@ -449,3 +458,393 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
             main_store.get_event(pulled_event.event_id, allow_none=True)
         )
         self.assertIsNotNone(persisted, "pulled event was not persisted at all")
+
+    def test_process_pulled_event_with_rejected_missing_state(self) -> None:
+        """Ensure that we correctly handle pulled events with missing state containing a
+        rejected state event
+
+        In this test, we pretend we are processing a "pulled" event (eg, via backfill
+        or get_missing_events). The pulled event has a prev_event we haven't previously
+        seen, so the server requests the state at that prev_event. We expect the server
+        to make a /state request.
+
+        We simulate a remote server whose /state includes a rejected kick event for a
+        local user. Notably, the kick event is rejected only because it cites a rejected
+        auth event and would otherwise be accepted based on the room state. During state
+        resolution, we re-run auth and can potentially introduce such rejected events
+        into the state if we are not careful.
+
+        We check that the pulled event is correctly persisted, and that the state
+        afterwards does not include the rejected kick.
+        """
+        # The DAG we are testing looks like:
+        #
+        #                 ...
+        #                  |
+        #                  v
+        #       remote admin user joins
+        #                |   |
+        #        +-------+   +-------+
+        #        |                   |
+        #        |          rejected power levels
+        #        |           from remote server
+        #        |                   |
+        #        |                   v
+        #        |       rejected kick of local user
+        #        v           from remote server
+        # new power levels           |
+        #        |                   v
+        #        |             missing event
+        #        |           from remote server
+        #        |                   |
+        #        +-------+   +-------+
+        #                |   |
+        #                v   v
+        #             pulled event
+        #          from remote server
+        #
+        # (arrows are in the opposite direction to prev_events.)
+
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room.
+        kermit_user_id = self.register_user("kermit", "test")
+        kermit_tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(
+            room_creator=kermit_user_id, tok=kermit_tok
+        )
+        room_version = self.get_success(main_store.get_room_version(room_id))
+
+        # Add another local user to the room. This user is going to be kicked in a
+        # rejected event.
+        bert_user_id = self.register_user("bert", "test")
+        bert_tok = self.login("bert", "test")
+        self.helper.join(room_id, user=bert_user_id, tok=bert_tok)
+
+        # Allow the remote user to kick bert.
+        # The remote user is going to send a rejected power levels event later on and we
+        # need state resolution to order it before another power levels event kermit is
+        # going to send later on. Hence we give both users the same power level, so that
+        # ties are broken by `origin_server_ts`.
+        self.helper.send_state(
+            room_id,
+            "m.room.power_levels",
+            {"users": {kermit_user_id: 100, OTHER_USER: 100}},
+            tok=kermit_tok,
+        )
+
+        # Add the remote user to the room.
+        other_member_event = self.get_success(
+            event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
+        )
+
+        initial_state_map = self.get_success(
+            main_store.get_partial_current_state_ids(room_id)
+        )
+        create_event = self.get_success(
+            main_store.get_event(initial_state_map[("m.room.create", "")])
+        )
+        bert_member_event = self.get_success(
+            main_store.get_event(initial_state_map[("m.room.member", bert_user_id)])
+        )
+        power_levels_event = self.get_success(
+            main_store.get_event(initial_state_map[("m.room.power_levels", "")])
+        )
+
+        # We now need a rejected state event that will fail
+        # `check_state_independent_auth_rules` but pass
+        # `check_state_dependent_auth_rules`.
+
+        # First, we create a power levels event that we pretend the remote server has
+        # accepted, but the local homeserver will reject.
+        next_depth = 100
+        next_timestamp = other_member_event.origin_server_ts + 100
+        rejected_power_levels_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "m.room.power_levels",
+                    "state_key": "",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [other_member_event.event_id],
+                    "auth_events": [
+                        initial_state_map[("m.room.create", "")],
+                        initial_state_map[("m.room.power_levels", "")],
+                        # The event will be rejected because of the duplicated auth
+                        # event.
+                        other_member_event.event_id,
+                        other_member_event.event_id,
+                    ],
+                    "origin_server_ts": next_timestamp,
+                    "depth": next_depth,
+                    "content": power_levels_event.content,
+                }
+            ),
+            room_version,
+        )
+        next_depth += 1
+        next_timestamp += 100
+
+        with LoggingContext("send_rejected_power_levels_event"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME,
+                    rejected_power_levels_event,
+                    backfilled=False,
+                )
+            )
+            self.assertEqual(
+                self.get_success(
+                    main_store.get_rejection_reason(
+                        rejected_power_levels_event.event_id
+                    )
+                ),
+                "auth_error",
+            )
+
+        # Then we create a kick event for a local user that cites the rejected power
+        # levels event in its auth events. The kick event will be rejected solely
+        # because of the rejected auth event and would otherwise be accepted.
+        rejected_kick_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "m.room.member",
+                    "state_key": bert_user_id,
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [rejected_power_levels_event.event_id],
+                    "auth_events": [
+                        initial_state_map[("m.room.create", "")],
+                        rejected_power_levels_event.event_id,
+                        initial_state_map[("m.room.member", bert_user_id)],
+                        initial_state_map[("m.room.member", OTHER_USER)],
+                    ],
+                    "origin_server_ts": next_timestamp,
+                    "depth": next_depth,
+                    "content": {"membership": "leave"},
+                }
+            ),
+            room_version,
+        )
+        next_depth += 1
+        next_timestamp += 100
+
+        # The kick event must fail the state-independent auth rules, but pass the
+        # state-dependent auth rules, so that it has a chance of making it through state
+        # resolution.
+        self.get_failure(
+            check_state_independent_auth_rules(main_store, rejected_kick_event),
+            AuthError,
+        )
+        check_state_dependent_auth_rules(
+            rejected_kick_event,
+            [create_event, power_levels_event, other_member_event, bert_member_event],
+        )
+
+        # The kick event must also win over the original member event during state
+        # resolution.
+        self.assertEqual(
+            self.get_success(
+                _mainline_sort(
+                    self.clock,
+                    room_id,
+                    event_ids=[
+                        bert_member_event.event_id,
+                        rejected_kick_event.event_id,
+                    ],
+                    resolved_power_event_id=power_levels_event.event_id,
+                    event_map={
+                        bert_member_event.event_id: bert_member_event,
+                        rejected_kick_event.event_id: rejected_kick_event,
+                    },
+                    state_res_store=main_store,
+                )
+            ),
+            [bert_member_event.event_id, rejected_kick_event.event_id],
+            "The rejected kick event will not be applied after bert's join event "
+            "during state resolution. The test setup is incorrect.",
+        )
+
+        with LoggingContext("send_rejected_kick_event"):
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, rejected_kick_event, backfilled=False
+                )
+            )
+            self.assertEqual(
+                self.get_success(
+                    main_store.get_rejection_reason(rejected_kick_event.event_id)
+                ),
+                "auth_error",
+            )
+
+        # We need another power levels event which will win over the rejected one during
+        # state resolution, otherwise we hit other issues where we end up with rejected
+        # a power levels event during state resolution.
+        self.reactor.advance(100)  # ensure the `origin_server_ts` is larger
+        new_power_levels_event = self.get_success(
+            main_store.get_event(
+                self.helper.send_state(
+                    room_id,
+                    "m.room.power_levels",
+                    {"users": {kermit_user_id: 100, OTHER_USER: 100, bert_user_id: 1}},
+                    tok=kermit_tok,
+                )["event_id"]
+            )
+        )
+        self.assertEqual(
+            self.get_success(
+                _reverse_topological_power_sort(
+                    self.clock,
+                    room_id,
+                    event_ids=[
+                        new_power_levels_event.event_id,
+                        rejected_power_levels_event.event_id,
+                    ],
+                    event_map={},
+                    state_res_store=main_store,
+                    full_conflicted_set=set(),
+                )
+            ),
+            [rejected_power_levels_event.event_id, new_power_levels_event.event_id],
+            "The power levels events will not have the desired ordering during state "
+            "resolution. The test setup is incorrect.",
+        )
+
+        # Create a missing event, so that the local homeserver has to do a `/state` or
+        # `/state_ids` request to pull state from the remote homeserver.
+        missing_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "m.room.message",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [rejected_kick_event.event_id],
+                    "auth_events": [
+                        initial_state_map[("m.room.create", "")],
+                        initial_state_map[("m.room.power_levels", "")],
+                        initial_state_map[("m.room.member", OTHER_USER)],
+                    ],
+                    "origin_server_ts": next_timestamp,
+                    "depth": next_depth,
+                    "content": {"msgtype": "m.text", "body": "foo"},
+                }
+            ),
+            room_version,
+        )
+        next_depth += 1
+        next_timestamp += 100
+
+        # The pulled event has two prev events, one of which is missing. We will make a
+        # `/state` or `/state_ids` request to the remote homeserver to ask it for the
+        # state before the missing prev event.
+        pulled_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "m.room.message",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [
+                        new_power_levels_event.event_id,
+                        missing_event.event_id,
+                    ],
+                    "auth_events": [
+                        initial_state_map[("m.room.create", "")],
+                        new_power_levels_event.event_id,
+                        initial_state_map[("m.room.member", OTHER_USER)],
+                    ],
+                    "origin_server_ts": next_timestamp,
+                    "depth": next_depth,
+                    "content": {"msgtype": "m.text", "body": "bar"},
+                }
+            ),
+            room_version,
+        )
+        next_depth += 1
+        next_timestamp += 100
+
+        # Prepare the response for the `/state` or `/state_ids` request.
+        # The remote server believes bert has been kicked, while the local server does
+        # not.
+        state_before_missing_event = self.get_success(
+            main_store.get_events_as_list(initial_state_map.values())
+        )
+        state_before_missing_event = [
+            event
+            for event in state_before_missing_event
+            if event.event_id != bert_member_event.event_id
+        ]
+        state_before_missing_event.append(rejected_kick_event)
+
+        # We have to bump the clock a bit, to keep the retry logic in
+        # `FederationClient.get_pdu` happy
+        self.reactor.advance(60000)
+        with LoggingContext("send_pulled_event"):
+
+            async def get_event(
+                destination: str, event_id: str, timeout: Optional[int] = None
+            ) -> JsonDict:
+                self.assertEqual(destination, self.OTHER_SERVER_NAME)
+                self.assertEqual(event_id, missing_event.event_id)
+                return {"pdus": [missing_event.get_pdu_json()]}
+
+            async def get_room_state_ids(
+                destination: str, room_id: str, event_id: str
+            ) -> JsonDict:
+                self.assertEqual(destination, self.OTHER_SERVER_NAME)
+                self.assertEqual(event_id, missing_event.event_id)
+                return {
+                    "pdu_ids": [event.event_id for event in state_before_missing_event],
+                    "auth_chain_ids": [],
+                }
+
+            async def get_room_state(
+                room_version: RoomVersion, destination: str, room_id: str, event_id: str
+            ) -> StateRequestResponse:
+                self.assertEqual(destination, self.OTHER_SERVER_NAME)
+                self.assertEqual(event_id, missing_event.event_id)
+                return StateRequestResponse(
+                    state=state_before_missing_event,
+                    auth_events=[],
+                )
+
+            self.mock_federation_transport_client.get_event.side_effect = get_event
+            self.mock_federation_transport_client.get_room_state_ids.side_effect = (
+                get_room_state_ids
+            )
+            self.mock_federation_transport_client.get_room_state.side_effect = (
+                get_room_state
+            )
+
+            self.get_success(
+                self.hs.get_federation_event_handler()._process_pulled_event(
+                    self.OTHER_SERVER_NAME, pulled_event, backfilled=False
+                )
+            )
+            self.assertIsNone(
+                self.get_success(
+                    main_store.get_rejection_reason(pulled_event.event_id)
+                ),
+                "Pulled event was unexpectedly rejected, likely due to a problem with "
+                "the test setup.",
+            )
+            self.assertEqual(
+                {pulled_event.event_id},
+                self.get_success(
+                    main_store.have_events_in_timeline([pulled_event.event_id])
+                ),
+                "Pulled event was not persisted, likely due to a problem with the test "
+                "setup.",
+            )
+
+            # We must not accept rejected events into the room state, so we expect bert
+            # to not be kicked, even if the remote server believes so.
+            new_state_map = self.get_success(
+                main_store.get_partial_current_state_ids(room_id)
+            )
+            self.assertEqual(
+                new_state_map[("m.room.member", bert_user_id)],
+                bert_member_event.event_id,
+                "Rejected kick event unexpectedly became part of room state.",
+            )
-- 
cgit 1.5.1


From 74f60cec92c5aff87d6e74d177e95ec5f1a69f2b Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 16 Sep 2022 14:29:03 +0200
Subject: Add an admin API endpoint to find a user based on its external ID in
 an auth provider. (#13810)

---
 changelog.d/13810.feature        |  1 +
 docs/admin_api/user_admin_api.md | 38 ++++++++++++++++++
 synapse/rest/admin/__init__.py   |  2 +
 synapse/rest/admin/users.py      | 27 +++++++++++++
 tests/rest/admin/test_user.py    | 87 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 155 insertions(+)
 create mode 100644 changelog.d/13810.feature

(limited to 'synapse')

diff --git a/changelog.d/13810.feature b/changelog.d/13810.feature
new file mode 100644
index 0000000000..f0258af661
--- /dev/null
+++ b/changelog.d/13810.feature
@@ -0,0 +1 @@
+Add an admin API endpoint to find a user based on its external ID in an auth provider.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 975f05c929..3625c7b6c5 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -1155,3 +1155,41 @@ GET /_synapse/admin/v1/username_available?username=$localpart
 
 The request and response format is the same as the
 [/_matrix/client/r0/register/available](https://matrix.org/docs/spec/client_server/r0.6.0#get-matrix-client-r0-register-available) API.
+
+### Find a user based on their ID in an auth provider
+
+The API is:
+
+```
+GET /_synapse/admin/v1/auth_providers/$provider/users/$external_id
+```
+
+When a user matched the given ID for the given provider, an HTTP code `200` with a response body like the following is returned:
+
+```json
+{
+    "user_id": "@hello:example.org"
+}
+```
+
+**Parameters**
+
+The following parameters should be set in the URL:
+
+- `provider` - The ID of the authentication provider, as advertised by the [`GET /_matrix/client/v3/login`](https://spec.matrix.org/latest/client-server-api/#post_matrixclientv3login) API in the `m.login.sso` authentication method.
+- `external_id` - The user ID from the authentication provider. Usually corresponds to the `sub` claim for OIDC providers, or to the `uid` attestation for SAML2 providers.
+
+The `external_id` may have characters that are not URL-safe (typically `/`, `:` or `@`), so it is advised to URL-encode those parameters.
+
+**Errors**
+
+Returns a `404` HTTP status code if no user was found, with a response body like this:
+
+```json
+{
+    "errcode":"M_NOT_FOUND",
+    "error":"User not found"
+}
+```
+
+_Added in Synapse 1.68.0._
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index bac754e1b1..885669f9c7 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -80,6 +80,7 @@ from synapse.rest.admin.users import (
     SearchUsersRestServlet,
     ShadowBanRestServlet,
     UserAdminServlet,
+    UserByExternalId,
     UserMembershipRestServlet,
     UserRegisterServlet,
     UserRestServletV2,
@@ -275,6 +276,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ListDestinationsRestServlet(hs).register(http_server)
     RoomMessagesRestServlet(hs).register(http_server)
     RoomTimestampToEventRestServlet(hs).register(http_server)
+    UserByExternalId(hs).register(http_server)
 
     # Some servlets only get registered for the main process.
     if hs.config.worker.worker_app is None:
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 78ee9b6532..2ca6b2d08a 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -1156,3 +1156,30 @@ class AccountDataRestServlet(RestServlet):
                 "rooms": by_room_data,
             },
         }
+
+
+class UserByExternalId(RestServlet):
+    """Find a user based on an external ID from an auth provider"""
+
+    PATTERNS = admin_patterns(
+        "/auth_providers/(?P<provider>[^/]*)/users/(?P<external_id>[^/]*)"
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastores().main
+
+    async def on_GET(
+        self,
+        request: SynapseRequest,
+        provider: str,
+        external_id: str,
+    ) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self._auth, request)
+
+        user_id = await self._store.get_user_by_external_id(provider, external_id)
+
+        if user_id is None:
+            raise NotFoundError("User not found")
+
+        return HTTPStatus.OK, {"user_id": user_id}
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index ec5ccf6fca..9f536ceeb3 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -4140,3 +4140,90 @@ class AccountDataTestCase(unittest.HomeserverTestCase):
             {"b": 2},
             channel.json_body["account_data"]["rooms"]["test_room"]["m.per_room"],
         )
+
+
+class UsersByExternalIdTestCase(unittest.HomeserverTestCase):
+
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.other_user = self.register_user("user", "pass")
+        self.get_success(
+            self.store.record_user_external_id(
+                "the-auth-provider", "the-external-id", self.other_user
+            )
+        )
+        self.get_success(
+            self.store.record_user_external_id(
+                "another-auth-provider", "a:complex@external/id", self.other_user
+            )
+        )
+
+    def test_no_auth(self) -> None:
+        """Try to lookup a user without authentication."""
+        url = (
+            "/_synapse/admin/v1/auth_providers/the-auth-provider/users/the-external-id"
+        )
+
+        channel = self.make_request(
+            "GET",
+            url,
+        )
+
+        self.assertEqual(401, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
+
+    def test_binding_does_not_exist(self) -> None:
+        """Tests that a lookup for an external ID that does not exist returns a 404"""
+        url = "/_synapse/admin/v1/auth_providers/the-auth-provider/users/unknown-id"
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(404, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
+
+    def test_success(self) -> None:
+        """Tests a successful external ID lookup"""
+        url = (
+            "/_synapse/admin/v1/auth_providers/the-auth-provider/users/the-external-id"
+        )
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(
+            {"user_id": self.other_user},
+            channel.json_body,
+        )
+
+    def test_success_urlencoded(self) -> None:
+        """Tests a successful external ID lookup with an url-encoded ID"""
+        url = "/_synapse/admin/v1/auth_providers/another-auth-provider/users/a%3Acomplex%40external%2Fid"
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(
+            {"user_id": self.other_user},
+            channel.json_body,
+        )
-- 
cgit 1.5.1


From d64e85197af31f5642f64ae1d86f5a0c74050fec Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 16 Sep 2022 16:16:05 +0100
Subject: Remove error spam when users query the keys of departed remote users
 (#13826)

The error message introduced in #13749 has turned out to be very spammy.
Remove it for now.
---
 changelog.d/13826.bugfix     |  1 +
 synapse/handlers/e2e_keys.py | 21 ++++++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/13826.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13826.bugfix b/changelog.d/13826.bugfix
new file mode 100644
index 0000000000..8ffafec07b
--- /dev/null
+++ b/changelog.d/13826.bugfix
@@ -0,0 +1 @@
+Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver.
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 8eed63ccf3..09a2492afc 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -188,18 +188,21 @@ class E2eKeysHandler:
                 )
                 invalid_cached_users = cached_users - valid_cached_users
                 if invalid_cached_users:
-                    # Fix up results. If we get here, there is either a bug in device
-                    # list tracking, or we hit the race mentioned above.
+                    # Fix up results. If we get here, it means there was either a bug in
+                    # device list tracking, or we hit the race mentioned above.
+                    # TODO: In practice, this path is hit fairly often in existing
+                    #       deployments when clients query the keys of departed remote
+                    #       users. A background update to mark the appropriate device
+                    #       lists as unsubscribed is needed.
+                    #       https://github.com/matrix-org/synapse/issues/13651
+                    # Note that this currently introduces a failure mode when clients
+                    # are trying to decrypt old messages from a remote user whose
+                    # homeserver is no longer available. We may want to consider falling
+                    # back to the cached data when we fail to retrieve a device list
+                    # over federation for such remote users.
                     user_ids_not_in_cache.update(invalid_cached_users)
                     for invalid_user_id in invalid_cached_users:
                         remote_results.pop(invalid_user_id)
-                    # This log message may be removed if it turns out it's almost
-                    # entirely triggered by races.
-                    logger.error(
-                        "Devices for %s were cached, but the server no longer shares "
-                        "any rooms with them. The cached device lists are stale.",
-                        invalid_cached_users,
-                    )
 
                 for user_id, devices in remote_results.items():
                     user_devices = results.setdefault(user_id, {})
-- 
cgit 1.5.1


From 44be42338e032a50e5fc3d6c69be4055f33cb26c Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 16 Sep 2022 10:56:56 -0500
Subject: Add support to purge rows from MSC2716 and other tables when purging
 a room (#13825)

`event_failed_pull_attempts` added in https://github.com/matrix-org/synapse/pull/13589

MSC2716 related tables added in:

 - https://github.com/matrix-org/synapse/pull/10245/files#diff-3d42dfb44d02f7de3aada105e0bdc1cc9dd7f953cbf0f36c5d0f50827bf0320aR1
    - Renamed in https://github.com/matrix-org/synapse/pull/10838/files#diff-2730bfbe9e688b55e46f9371aefe67dac2bd2b2b7d9d6b92774eea1fcfae156dR1
 - https://github.com/matrix-org/synapse/pull/10498/files#diff-c52bbfbb5921a3f6f023b24343668479d966fac164f13b7c39d2197ce3afa7a5R1
---
 changelog.d/13825.bugfix                           |  1 +
 synapse/storage/databases/main/purge_events.py     |  5 +++++
 synapse/storage/schema/__init__.py                 |  2 ++
 .../delta/73/02room_id_indexes_for_purging.sql     | 22 ++++++++++++++++++++++
 4 files changed, 30 insertions(+)
 create mode 100644 changelog.d/13825.bugfix
 create mode 100644 synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql

(limited to 'synapse')

diff --git a/changelog.d/13825.bugfix b/changelog.d/13825.bugfix
new file mode 100644
index 0000000000..626fc6349f
--- /dev/null
+++ b/changelog.d/13825.bugfix
@@ -0,0 +1 @@
+Delete associated data from `event_failed_pull_attempts`, `insertion_events`, `insertion_event_extremities`, `insertion_event_extremities`, `insertion_event_extremities` when purging the room.
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index f6822707e4..9213ce0b5a 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -419,6 +419,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "event_forward_extremities",
             "event_push_actions",
             "event_search",
+            "event_failed_pull_attempts",
             "partial_state_events",
             "events",
             "federation_inbound_events_staging",
@@ -441,6 +442,10 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "e2e_room_keys",
             "event_push_summary",
             "pusher_throttle",
+            "insertion_events",
+            "insertion_event_extremities",
+            "insertion_event_edges",
+            "batch_events",
             "room_account_data",
             "room_tags",
             # "rooms" happens last, to keep the foreign keys in the other tables
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 68e055c664..f29424d17a 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -83,6 +83,8 @@ Changes in SCHEMA_VERSION = 73;
       event_push_summary, receipts_linearized, and receipts_graph.
     - Add table `event_failed_pull_attempts` to keep track when we fail to pull
       events over federation.
+    - Add indexes to various tables (`event_failed_pull_attempts`, `insertion_events`,
+      `batch_events`) to make it easy to delete all associated rows when purging a room.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql b/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql
new file mode 100644
index 0000000000..6d38bdd430
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/02room_id_indexes_for_purging.sql
@@ -0,0 +1,22 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add index so we can easily purge all rows from a given `room_id`
+CREATE INDEX IF NOT EXISTS event_failed_pull_attempts_room_id ON event_failed_pull_attempts(room_id);
+
+-- MSC2716 related tables:
+-- Add indexes so we can easily purge all rows from a given `room_id`
+CREATE INDEX IF NOT EXISTS insertion_events_room_id ON insertion_events(room_id);
+CREATE INDEX IF NOT EXISTS batch_events_room_id ON batch_events(room_id);
-- 
cgit 1.5.1


From c802ef14119b21cfdf8f5a9c246b695c98c0f718 Mon Sep 17 00:00:00 2001
From: Denis <dakariakin@gmail.com>
Date: Tue, 20 Sep 2022 10:44:38 +0200
Subject: Don't include redundant prev_state in new events (#13791)

---
 changelog.d/13791.removal               | 1 +
 synapse/events/builder.py               | 1 -
 synapse/federation/federation_client.py | 3 ---
 3 files changed, 1 insertion(+), 4 deletions(-)
 create mode 100644 changelog.d/13791.removal

(limited to 'synapse')

diff --git a/changelog.d/13791.removal b/changelog.d/13791.removal
new file mode 100644
index 0000000000..283226b63e
--- /dev/null
+++ b/changelog.d/13791.removal
@@ -0,0 +1 @@
+Don't include redundant `prev_state` in new events. Contributed by Denis Kariakin (@dakariakin).
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 746bd3978d..e2ee10dd3d 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -167,7 +167,6 @@ class EventBuilder:
             "content": self.content,
             "unsigned": self.unsigned,
             "depth": depth,
-            "prev_state": [],
         }
 
         if self.is_state():
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 4a4289ee7c..464672a3da 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -906,9 +906,6 @@ class FederationClient(FederationBase):
             # The protoevent received over the JSON wire may not have all
             # the required fields. Lets just gloss over that because
             # there's some we never care about
-            if "prev_state" not in pdu_dict:
-                pdu_dict["prev_state"] = []
-
             ev = builder.create_local_event_from_event_dict(
                 self._clock,
                 self.hostname,
-- 
cgit 1.5.1


From 42d261c32f13e2de7494a0ade77c1f7b646af1fe Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 20 Sep 2022 12:10:31 +0100
Subject: Port the push rule classes to Rust. (#13768)

---
 .rustfmt.toml                               |   1 +
 changelog.d/13768.misc                      |   1 +
 rust/Cargo.toml                             |  10 +-
 rust/src/lib.rs                             |   9 +-
 rust/src/push/base_rules.rs                 | 335 ++++++++++++++++
 rust/src/push/mod.rs                        | 502 ++++++++++++++++++++++++
 stubs/synapse/synapse_rust.pyi              |   2 -
 stubs/synapse/synapse_rust/__init__.pyi     |   2 +
 stubs/synapse/synapse_rust/push.pyi         |  37 ++
 synapse/handlers/push_rules.py              |   5 +-
 synapse/push/baserules.py                   | 583 ----------------------------
 synapse/push/bulk_push_rule_evaluator.py    |   7 +-
 synapse/push/clientformat.py                |   5 +-
 synapse/storage/databases/main/push_rule.py |  23 +-
 tests/handlers/test_deactivate_account.py   |  27 +-
 15 files changed, 932 insertions(+), 617 deletions(-)
 create mode 100644 .rustfmt.toml
 create mode 100644 changelog.d/13768.misc
 create mode 100644 rust/src/push/base_rules.rs
 create mode 100644 rust/src/push/mod.rs
 delete mode 100644 stubs/synapse/synapse_rust.pyi
 create mode 100644 stubs/synapse/synapse_rust/__init__.pyi
 create mode 100644 stubs/synapse/synapse_rust/push.pyi
 delete mode 100644 synapse/push/baserules.py

(limited to 'synapse')

diff --git a/.rustfmt.toml b/.rustfmt.toml
new file mode 100644
index 0000000000..bf96e7743d
--- /dev/null
+++ b/.rustfmt.toml
@@ -0,0 +1 @@
+group_imports = "StdExternalCrate"
diff --git a/changelog.d/13768.misc b/changelog.d/13768.misc
new file mode 100644
index 0000000000..28bddb7059
--- /dev/null
+++ b/changelog.d/13768.misc
@@ -0,0 +1 @@
+Port push rules to using Rust.
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index deddf3cec2..8dc5f93ff1 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -18,7 +18,15 @@ crate-type = ["cdylib"]
 name = "synapse.synapse_rust"
 
 [dependencies]
-pyo3 = { version = "0.16.5", features = ["extension-module", "macros", "abi3", "abi3-py37"] }
+anyhow = "1.0.63"
+lazy_static = "1.4.0"
+log = "0.4.17"
+pyo3 = { version = "0.17.1", features = ["extension-module", "macros", "anyhow", "abi3", "abi3-py37"] }
+pyo3-log = "0.7.0"
+pythonize = "0.17.0"
+regex = "1.6.0"
+serde = { version = "1.0.144", features = ["derive"] }
+serde_json = "1.0.85"
 
 [build-dependencies]
 blake2 = "0.10.4"
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index ba42465fb8..c7b60e58a7 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -1,5 +1,7 @@
 use pyo3::prelude::*;
 
+pub mod push;
+
 /// Returns the hash of all the rust source files at the time it was compiled.
 ///
 /// Used by python to detect if the rust library is outdated.
@@ -17,8 +19,13 @@ fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
 
 /// The entry point for defining the Python module.
 #[pymodule]
-fn synapse_rust(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
+fn synapse_rust(py: Python<'_>, m: &PyModule) -> PyResult<()> {
+    pyo3_log::init();
+
     m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
     m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
+
+    push::register_module(py, m)?;
+
     Ok(())
 }
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
new file mode 100644
index 0000000000..7c62bc4849
--- /dev/null
+++ b/rust/src/push/base_rules.rs
@@ -0,0 +1,335 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Contains the definitions of the "base" push rules.
+
+use std::borrow::Cow;
+use std::collections::HashMap;
+
+use lazy_static::lazy_static;
+use serde_json::Value;
+
+use super::KnownCondition;
+use crate::push::Action;
+use crate::push::Condition;
+use crate::push::EventMatchCondition;
+use crate::push::PushRule;
+use crate::push::SetTweak;
+use crate::push::TweakValue;
+
+const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak {
+    set_tweak: Cow::Borrowed("highlight"),
+    value: None,
+    other_keys: Value::Null,
+});
+
+const HIGHLIGHT_FALSE_ACTION: Action = Action::SetTweak(SetTweak {
+    set_tweak: Cow::Borrowed("highlight"),
+    value: Some(TweakValue::Other(Value::Bool(false))),
+    other_keys: Value::Null,
+});
+
+const SOUND_ACTION: Action = Action::SetTweak(SetTweak {
+    set_tweak: Cow::Borrowed("sound"),
+    value: Some(TweakValue::String(Cow::Borrowed("default"))),
+    other_keys: Value::Null,
+});
+
+const RING_ACTION: Action = Action::SetTweak(SetTweak {
+    set_tweak: Cow::Borrowed("sound"),
+    value: Some(TweakValue::String(Cow::Borrowed("ring"))),
+    other_keys: Value::Null,
+});
+
+pub const BASE_PREPEND_OVERRIDE_RULES: &[PushRule] = &[PushRule {
+    rule_id: Cow::Borrowed("global/override/.m.rule.master"),
+    priority_class: 5,
+    conditions: Cow::Borrowed(&[]),
+    actions: Cow::Borrowed(&[Action::DontNotify]),
+    default: true,
+    default_enabled: false,
+}];
+
+pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.suppress_notices"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("content.msgtype"),
+                pattern: Some(Cow::Borrowed("m.notice")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::DontNotify]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.invite_for_me"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.member")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("content.membership"),
+                pattern: Some(Cow::Borrowed("invite")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("state_key"),
+                pattern: None,
+                pattern_type: Some(Cow::Borrowed("user_id")),
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.member_event"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.member")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::DontNotify]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.contains_display_name"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::ContainsDisplayName)]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.roomnotif"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::SenderNotificationPermission {
+                key: Cow::Borrowed("room"),
+            }),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("content.body"),
+                pattern: Some(Cow::Borrowed("@room")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.tombstone"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.tombstone")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("state_key"),
+                pattern: Some(Cow::Borrowed("")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.m.rule.reaction"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.reaction")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::DontNotify]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.org.matrix.msc3786.rule.room.server_acl"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.server_acl")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("state_key"),
+                pattern: Some(Cow::Borrowed("")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[]),
+        default: true,
+        default_enabled: true,
+    },
+];
+
+pub const BASE_APPEND_CONTENT_RULES: &[PushRule] = &[PushRule {
+    rule_id: Cow::Borrowed("global/content/.m.rule.contains_user_name"),
+    priority_class: 4,
+    conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+        EventMatchCondition {
+            key: Cow::Borrowed("content.body"),
+            pattern: None,
+            pattern_type: Some(Cow::Borrowed("user_localpart")),
+        },
+    ))]),
+    actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]),
+    default: true,
+    default_enabled: true,
+}];
+
+pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.call"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.call.invite")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify, RING_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.room_one_to_one"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.message")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.encrypted_room_one_to_one"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.encrypted")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc3772.thread_reply"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch {
+            rel_type: Cow::Borrowed("m.thread"),
+            sender: None,
+            sender_type: Some(Cow::Borrowed("user_id")),
+        })]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.message"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.message")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.m.rule.encrypted"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("m.room.encrypted")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.im.vector.jitsi"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("im.vector.modular.widgets")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("content.type"),
+                pattern: Some(Cow::Borrowed("jitsi")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("state_key"),
+                pattern: Some(Cow::Borrowed("*")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+];
+
+lazy_static! {
+    pub static ref BASE_RULES_BY_ID: HashMap<&'static str, &'static PushRule> =
+        BASE_PREPEND_OVERRIDE_RULES
+            .iter()
+            .chain(BASE_APPEND_OVERRIDE_RULES.iter())
+            .chain(BASE_APPEND_CONTENT_RULES.iter())
+            .chain(BASE_APPEND_UNDERRIDE_RULES.iter())
+            .map(|rule| { (&*rule.rule_id, rule) })
+            .collect();
+}
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
new file mode 100644
index 0000000000..de6764e7c5
--- /dev/null
+++ b/rust/src/push/mod.rs
@@ -0,0 +1,502 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! An implementation of Matrix push rules.
+//!
+//! The `Cow<_>` type is used extensively within this module to allow creating
+//! the base rules as constants (in Rust constants can't require explicit
+//! allocation atm).
+//!
+//! ---
+//!
+//! Push rules is the system used to determine which events trigger a push (and a
+//! bump in notification counts).
+//!
+//! This consists of a list of "push rules" for each user, where a push rule is a
+//! pair of "conditions" and "actions". When a user receives an event Synapse
+//! iterates over the list of push rules until it finds one where all the conditions
+//! match the event, at which point "actions" describe the outcome (e.g. notify,
+//! highlight, etc).
+//!
+//! Push rules are split up into 5 different "kinds" (aka "priority classes"), which
+//! are run in order:
+//!     1. Override — highest priority rules, e.g. always ignore notices
+//!     2. Content — content specific rules, e.g. @ notifications
+//!     3. Room — per room rules, e.g. enable/disable notifications for all messages
+//!        in a room
+//!     4. Sender — per sender rules, e.g. never notify for messages from a given
+//!        user
+//!     5. Underride — the lowest priority "default" rules, e.g. notify for every
+//!        message.
+//!
+//! The set of "base rules" are the list of rules that every user has by default. A
+//! user can modify their copy of the push rules in one of three ways:
+//!
+//!     1. Adding a new push rule of a certain kind
+//!     2. Changing the actions of a base rule
+//!     3. Enabling/disabling a base rule.
+//!
+//! The base rules are split into whether they come before or after a particular
+//! kind, so the order of push rule evaluation would be: base rules for before
+//! "override" kind, user defined "override" rules, base rules after "override"
+//! kind, etc, etc.
+
+use std::borrow::Cow;
+use std::collections::{BTreeMap, HashMap, HashSet};
+
+use anyhow::{Context, Error};
+use log::warn;
+use pyo3::prelude::*;
+use pythonize::pythonize;
+use serde::de::Error as _;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+mod base_rules;
+
+/// Called when registering modules with python.
+pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
+    let child_module = PyModule::new(py, "push")?;
+    child_module.add_class::<PushRule>()?;
+    child_module.add_class::<PushRules>()?;
+    child_module.add_class::<FilteredPushRules>()?;
+    child_module.add_function(wrap_pyfunction!(get_base_rule_ids, m)?)?;
+
+    m.add_submodule(child_module)?;
+
+    // We need to manually add the module to sys.modules to make `from
+    // synapse.synapse_rust import push` work.
+    py.import("sys")?
+        .getattr("modules")?
+        .set_item("synapse.synapse_rust.push", child_module)?;
+
+    Ok(())
+}
+
+#[pyfunction]
+fn get_base_rule_ids() -> HashSet<&'static str> {
+    base_rules::BASE_RULES_BY_ID.keys().copied().collect()
+}
+
+/// A single push rule for a user.
+#[derive(Debug, Clone)]
+#[pyclass(frozen)]
+pub struct PushRule {
+    /// A unique ID for this rule
+    pub rule_id: Cow<'static, str>,
+    /// The "kind" of push rule this is (see `PRIORITY_CLASS_MAP` in Python)
+    #[pyo3(get)]
+    pub priority_class: i32,
+    /// The conditions that must all match for actions to be applied
+    pub conditions: Cow<'static, [Condition]>,
+    /// The actions to apply if all conditions are met
+    pub actions: Cow<'static, [Action]>,
+    /// Whether this is a base rule
+    #[pyo3(get)]
+    pub default: bool,
+    /// Whether this is enabled by default
+    #[pyo3(get)]
+    pub default_enabled: bool,
+}
+
+#[pymethods]
+impl PushRule {
+    #[staticmethod]
+    pub fn from_db(
+        rule_id: String,
+        priority_class: i32,
+        conditions: &str,
+        actions: &str,
+    ) -> Result<PushRule, Error> {
+        let conditions = serde_json::from_str(conditions).context("parsing conditions")?;
+        let actions = serde_json::from_str(actions).context("parsing actions")?;
+
+        Ok(PushRule {
+            rule_id: Cow::Owned(rule_id),
+            priority_class,
+            conditions,
+            actions,
+            default: false,
+            default_enabled: true,
+        })
+    }
+
+    #[getter]
+    fn rule_id(&self) -> &str {
+        &self.rule_id
+    }
+
+    #[getter]
+    fn actions(&self) -> Vec<Action> {
+        self.actions.clone().into_owned()
+    }
+
+    #[getter]
+    fn conditions(&self) -> Vec<Condition> {
+        self.conditions.clone().into_owned()
+    }
+
+    fn __repr__(&self) -> String {
+        format!(
+            "<PushRule rule_id={}, conditions={:?}, actions={:?}>",
+            self.rule_id, self.conditions, self.actions
+        )
+    }
+}
+
+/// The "action" Synapse should perform for a matching push rule.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Action {
+    DontNotify,
+    Notify,
+    Coalesce,
+    SetTweak(SetTweak),
+
+    // An unrecognized custom action.
+    Unknown(Value),
+}
+
+impl IntoPy<PyObject> for Action {
+    fn into_py(self, py: Python<'_>) -> PyObject {
+        // When we pass the `Action` struct to Python we want it to be converted
+        // to a dict. We use `pythonize`, which converts the struct using the
+        // `serde` serialization.
+        pythonize(py, &self).expect("valid action")
+    }
+}
+
+/// The body of a `SetTweak` push action.
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub struct SetTweak {
+    set_tweak: Cow<'static, str>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    value: Option<TweakValue>,
+
+    // This picks up any other fields that may have been added by clients.
+    // These get added when we convert the `Action` to a python object.
+    #[serde(flatten)]
+    other_keys: Value,
+}
+
+/// The value of a `set_tweak`.
+///
+/// We need this (rather than using `TweakValue` directly) so that we can use
+/// `&'static str` in the value when defining the constant base rules.
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[serde(untagged)]
+pub enum TweakValue {
+    String(Cow<'static, str>),
+    Other(Value),
+}
+
+impl Serialize for Action {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match self {
+            Action::DontNotify => serializer.serialize_str("dont_notify"),
+            Action::Notify => serializer.serialize_str("notify"),
+            Action::Coalesce => serializer.serialize_str("coalesce"),
+            Action::SetTweak(tweak) => tweak.serialize(serializer),
+            Action::Unknown(value) => value.serialize(serializer),
+        }
+    }
+}
+
+/// Simple helper class for deserializing Action from JSON.
+#[derive(Deserialize)]
+#[serde(untagged)]
+enum ActionDeserializeHelper {
+    Str(String),
+    SetTweak(SetTweak),
+    Unknown(Value),
+}
+
+impl<'de> Deserialize<'de> for Action {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let helper: ActionDeserializeHelper = Deserialize::deserialize(deserializer)?;
+        match helper {
+            ActionDeserializeHelper::Str(s) => match &*s {
+                "dont_notify" => Ok(Action::DontNotify),
+                "notify" => Ok(Action::Notify),
+                "coalesce" => Ok(Action::Coalesce),
+                _ => Err(D::Error::custom("unrecognized action")),
+            },
+            ActionDeserializeHelper::SetTweak(set_tweak) => Ok(Action::SetTweak(set_tweak)),
+            ActionDeserializeHelper::Unknown(value) => Ok(Action::Unknown(value)),
+        }
+    }
+}
+
+/// A condition used in push rules to match against an event.
+///
+/// We need this split as `serde` doesn't give us the ability to have a
+/// "catchall" variant in tagged enums.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(untagged)]
+pub enum Condition {
+    /// A recognized condition that we can match against
+    Known(KnownCondition),
+    /// An unrecognized condition that we ignore.
+    Unknown(Value),
+}
+
+/// The set of "known" conditions that we can handle.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(rename_all = "snake_case")]
+#[serde(tag = "kind")]
+pub enum KnownCondition {
+    EventMatch(EventMatchCondition),
+    ContainsDisplayName,
+    RoomMemberCount {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        is: Option<Cow<'static, str>>,
+    },
+    SenderNotificationPermission {
+        key: Cow<'static, str>,
+    },
+    #[serde(rename = "org.matrix.msc3772.relation_match")]
+    RelationMatch {
+        rel_type: Cow<'static, str>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        sender: Option<Cow<'static, str>>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        sender_type: Option<Cow<'static, str>>,
+    },
+}
+
+impl IntoPy<PyObject> for Condition {
+    fn into_py(self, py: Python<'_>) -> PyObject {
+        pythonize(py, &self).expect("valid condition")
+    }
+}
+
+/// The body of a [`Condition::EventMatch`]
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct EventMatchCondition {
+    key: Cow<'static, str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pattern: Option<Cow<'static, str>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pattern_type: Option<Cow<'static, str>>,
+}
+
+/// The collection of push rules for a user.
+#[derive(Debug, Clone, Default)]
+#[pyclass(frozen)]
+struct PushRules {
+    /// Custom push rules that override a base rule.
+    overridden_base_rules: HashMap<Cow<'static, str>, PushRule>,
+
+    /// Custom rules that come between the prepend/append override base rules.
+    override_rules: Vec<PushRule>,
+    /// Custom rules that come before the base content rules.
+    content: Vec<PushRule>,
+    /// Custom rules that come before the base room rules.
+    room: Vec<PushRule>,
+    /// Custom rules that come before the base sender rules.
+    sender: Vec<PushRule>,
+    /// Custom rules that come before the base underride rules.
+    underride: Vec<PushRule>,
+}
+
+#[pymethods]
+impl PushRules {
+    #[new]
+    fn new(rules: Vec<PushRule>) -> PushRules {
+        let mut push_rules: PushRules = Default::default();
+
+        for rule in rules {
+            if let Some(&o) = base_rules::BASE_RULES_BY_ID.get(&*rule.rule_id) {
+                push_rules.overridden_base_rules.insert(
+                    rule.rule_id.clone(),
+                    PushRule {
+                        actions: rule.actions.clone(),
+                        ..o.clone()
+                    },
+                );
+
+                continue;
+            }
+
+            match rule.priority_class {
+                5 => push_rules.override_rules.push(rule),
+                4 => push_rules.content.push(rule),
+                3 => push_rules.room.push(rule),
+                2 => push_rules.sender.push(rule),
+                1 => push_rules.underride.push(rule),
+                _ => {
+                    warn!(
+                        "Unrecognized priority class for rule {}: {}",
+                        rule.rule_id, rule.priority_class
+                    );
+                }
+            }
+        }
+
+        push_rules
+    }
+
+    /// Returns the list of all rules, including base rules, in the order they
+    /// should be executed in.
+    fn rules(&self) -> Vec<PushRule> {
+        self.iter().cloned().collect()
+    }
+}
+
+impl PushRules {
+    /// Iterates over all the rules, including base rules, in the order they
+    /// should be executed in.
+    pub fn iter(&self) -> impl Iterator<Item = &PushRule> {
+        base_rules::BASE_PREPEND_OVERRIDE_RULES
+            .iter()
+            .chain(self.override_rules.iter())
+            .chain(base_rules::BASE_APPEND_OVERRIDE_RULES.iter())
+            .chain(self.content.iter())
+            .chain(base_rules::BASE_APPEND_CONTENT_RULES.iter())
+            .chain(self.room.iter())
+            .chain(self.sender.iter())
+            .chain(self.underride.iter())
+            .chain(base_rules::BASE_APPEND_UNDERRIDE_RULES.iter())
+            .map(|rule| {
+                self.overridden_base_rules
+                    .get(&*rule.rule_id)
+                    .unwrap_or(rule)
+            })
+    }
+}
+
+/// A wrapper around `PushRules` that checks the enabled state of rules and
+/// filters out disabled experimental rules.
+#[derive(Debug, Clone, Default)]
+#[pyclass(frozen)]
+pub struct FilteredPushRules {
+    push_rules: PushRules,
+    enabled_map: BTreeMap<String, bool>,
+    msc3786_enabled: bool,
+    msc3772_enabled: bool,
+}
+
+#[pymethods]
+impl FilteredPushRules {
+    #[new]
+    fn py_new(
+        push_rules: PushRules,
+        enabled_map: BTreeMap<String, bool>,
+        msc3786_enabled: bool,
+        msc3772_enabled: bool,
+    ) -> Self {
+        Self {
+            push_rules,
+            enabled_map,
+            msc3786_enabled,
+            msc3772_enabled,
+        }
+    }
+
+    /// Returns the list of all rules and their enabled state, including base
+    /// rules, in the order they should be executed in.
+    fn rules(&self) -> Vec<(PushRule, bool)> {
+        self.iter().map(|(r, e)| (r.clone(), e)).collect()
+    }
+}
+
+impl FilteredPushRules {
+    /// Iterates over all the rules and their enabled state, including base
+    /// rules, in the order they should be executed in.
+    fn iter(&self) -> impl Iterator<Item = (&PushRule, bool)> {
+        self.push_rules
+            .iter()
+            .filter(|rule| {
+                // Ignore disabled experimental push rules
+                if !self.msc3786_enabled
+                    && rule.rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl"
+                {
+                    return false;
+                }
+
+                if !self.msc3772_enabled
+                    && rule.rule_id == "global/underride/.org.matrix.msc3772.thread_reply"
+                {
+                    return false;
+                }
+
+                true
+            })
+            .map(|r| {
+                let enabled = *self
+                    .enabled_map
+                    .get(&*r.rule_id)
+                    .unwrap_or(&r.default_enabled);
+                (r, enabled)
+            })
+    }
+}
+
+#[test]
+fn test_serialize_condition() {
+    let condition = Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+        key: "content.body".into(),
+        pattern: Some("coffee".into()),
+        pattern_type: None,
+    }));
+
+    let json = serde_json::to_string(&condition).unwrap();
+    assert_eq!(
+        json,
+        r#"{"kind":"event_match","key":"content.body","pattern":"coffee"}"#
+    )
+}
+
+#[test]
+fn test_deserialize_condition() {
+    let json = r#"{"kind":"event_match","key":"content.body","pattern":"coffee"}"#;
+
+    let _: Condition = serde_json::from_str(json).unwrap();
+}
+
+#[test]
+fn test_deserialize_custom_condition() {
+    let json = r#"{"kind":"custom_tag"}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(condition, Condition::Unknown(_)));
+
+    let new_json = serde_json::to_string(&condition).unwrap();
+    assert_eq!(json, new_json);
+}
+
+#[test]
+fn test_deserialize_action() {
+    let _: Action = serde_json::from_str(r#""notify""#).unwrap();
+    let _: Action = serde_json::from_str(r#""dont_notify""#).unwrap();
+    let _: Action = serde_json::from_str(r#""coalesce""#).unwrap();
+    let _: Action = serde_json::from_str(r#"{"set_tweak": "highlight"}"#).unwrap();
+}
+
+#[test]
+fn test_custom_action() {
+    let json = r#"{"some_custom":"action_fields"}"#;
+
+    let action: Action = serde_json::from_str(json).unwrap();
+    assert!(matches!(action, Action::Unknown(_)));
+
+    let new_json = serde_json::to_string(&action).unwrap();
+    assert_eq!(json, new_json);
+}
diff --git a/stubs/synapse/synapse_rust.pyi b/stubs/synapse/synapse_rust.pyi
deleted file mode 100644
index 8658d3138f..0000000000
--- a/stubs/synapse/synapse_rust.pyi
+++ /dev/null
@@ -1,2 +0,0 @@
-def sum_as_string(a: int, b: int) -> str: ...
-def get_rust_file_digest() -> str: ...
diff --git a/stubs/synapse/synapse_rust/__init__.pyi b/stubs/synapse/synapse_rust/__init__.pyi
new file mode 100644
index 0000000000..8658d3138f
--- /dev/null
+++ b/stubs/synapse/synapse_rust/__init__.pyi
@@ -0,0 +1,2 @@
+def sum_as_string(a: int, b: int) -> str: ...
+def get_rust_file_digest() -> str: ...
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
new file mode 100644
index 0000000000..93c4e69d42
--- /dev/null
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -0,0 +1,37 @@
+from typing import Any, Collection, Dict, Mapping, Sequence, Tuple, Union
+
+from synapse.types import JsonDict
+
+class PushRule:
+    @property
+    def rule_id(self) -> str: ...
+    @property
+    def priority_class(self) -> int: ...
+    @property
+    def conditions(self) -> Sequence[Mapping[str, str]]: ...
+    @property
+    def actions(self) -> Sequence[Union[Mapping[str, Any], str]]: ...
+    @property
+    def default(self) -> bool: ...
+    @property
+    def default_enabled(self) -> bool: ...
+    @staticmethod
+    def from_db(
+        rule_id: str, priority_class: int, conditions: str, actions: str
+    ) -> "PushRule": ...
+
+class PushRules:
+    def __init__(self, rules: Collection[PushRule]): ...
+    def rules(self) -> Collection[PushRule]: ...
+
+class FilteredPushRules:
+    def __init__(
+        self,
+        push_rules: PushRules,
+        enabled_map: Dict[str, bool],
+        msc3786_enabled: bool,
+        msc3772_enabled: bool,
+    ): ...
+    def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
+
+def get_base_rule_ids() -> Collection[str]: ...
diff --git a/synapse/handlers/push_rules.py b/synapse/handlers/push_rules.py
index 2599160bcc..1219672a59 100644
--- a/synapse/handlers/push_rules.py
+++ b/synapse/handlers/push_rules.py
@@ -16,14 +16,17 @@ from typing import TYPE_CHECKING, List, Optional, Union
 import attr
 
 from synapse.api.errors import SynapseError, UnrecognizedRequestError
-from synapse.push.baserules import BASE_RULE_IDS
 from synapse.storage.push_rule import RuleNotFoundException
+from synapse.synapse_rust.push import get_base_rule_ids
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
+BASE_RULE_IDS = get_base_rule_ids()
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class RuleSpec:
     scope: str
diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py
deleted file mode 100644
index 440205e80c..0000000000
--- a/synapse/push/baserules.py
+++ /dev/null
@@ -1,583 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-# Copyright 2017 New Vector Ltd
-# Copyright 2019 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Push rules is the system used to determine which events trigger a push (and a
-bump in notification counts).
-
-This consists of a list of "push rules" for each user, where a push rule is a
-pair of "conditions" and "actions". When a user receives an event Synapse
-iterates over the list of push rules until it finds one where all the conditions
-match the event, at which point "actions" describe the outcome (e.g. notify,
-highlight, etc).
-
-Push rules are split up into 5 different "kinds" (aka "priority classes"), which
-are run in order:
-    1. Override — highest priority rules, e.g. always ignore notices
-    2. Content — content specific rules, e.g. @ notifications
-    3. Room — per room rules, e.g. enable/disable notifications for all messages
-       in a room
-    4. Sender — per sender rules, e.g. never notify for messages from a given
-       user
-    5. Underride — the lowest priority "default" rules, e.g. notify for every
-       message.
-
-The set of "base rules" are the list of rules that every user has by default. A
-user can modify their copy of the push rules in one of three ways:
-
-    1. Adding a new push rule of a certain kind
-    2. Changing the actions of a base rule
-    3. Enabling/disabling a base rule.
-
-The base rules are split into whether they come before or after a particular
-kind, so the order of push rule evaluation would be: base rules for before
-"override" kind, user defined "override" rules, base rules after "override"
-kind, etc, etc.
-"""
-
-import itertools
-import logging
-from typing import Dict, Iterator, List, Mapping, Sequence, Tuple, Union
-
-import attr
-
-from synapse.config.experimental import ExperimentalConfig
-from synapse.push.rulekinds import PRIORITY_CLASS_MAP
-
-logger = logging.getLogger(__name__)
-
-
-@attr.s(auto_attribs=True, slots=True, frozen=True)
-class PushRule:
-    """A push rule
-
-    Attributes:
-        rule_id: a unique ID for this rule
-        priority_class: what "kind" of push rule this is (see
-            `PRIORITY_CLASS_MAP` for mapping between int and kind)
-        conditions: the sequence of conditions that all need to match
-        actions: the actions to apply if all conditions are met
-        default: is this a base rule?
-        default_enabled: is this enabled by default?
-    """
-
-    rule_id: str
-    priority_class: int
-    conditions: Sequence[Mapping[str, str]]
-    actions: Sequence[Union[str, Mapping]]
-    default: bool = False
-    default_enabled: bool = True
-
-
-@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False)
-class PushRules:
-    """A collection of push rules for an account.
-
-    Can be iterated over, producing push rules in priority order.
-    """
-
-    # A mapping from rule ID to push rule that overrides a base rule. These will
-    # be returned instead of the base rule.
-    overriden_base_rules: Dict[str, PushRule] = attr.Factory(dict)
-
-    # The following stores the custom push rules at each priority class.
-    #
-    # We keep these separate (rather than combining into one big list) to avoid
-    # copying the base rules around all the time.
-    override: List[PushRule] = attr.Factory(list)
-    content: List[PushRule] = attr.Factory(list)
-    room: List[PushRule] = attr.Factory(list)
-    sender: List[PushRule] = attr.Factory(list)
-    underride: List[PushRule] = attr.Factory(list)
-
-    def __iter__(self) -> Iterator[PushRule]:
-        # When iterating over the push rules we need to return the base rules
-        # interspersed at the correct spots.
-        for rule in itertools.chain(
-            BASE_PREPEND_OVERRIDE_RULES,
-            self.override,
-            BASE_APPEND_OVERRIDE_RULES,
-            self.content,
-            BASE_APPEND_CONTENT_RULES,
-            self.room,
-            self.sender,
-            self.underride,
-            BASE_APPEND_UNDERRIDE_RULES,
-        ):
-            # Check if a base rule has been overriden by a custom rule. If so
-            # return that instead.
-            override_rule = self.overriden_base_rules.get(rule.rule_id)
-            if override_rule:
-                yield override_rule
-            else:
-                yield rule
-
-    def __len__(self) -> int:
-        # The length is mostly used by caches to get a sense of "size" / amount
-        # of memory this object is using, so we only count the number of custom
-        # rules.
-        return (
-            len(self.overriden_base_rules)
-            + len(self.override)
-            + len(self.content)
-            + len(self.room)
-            + len(self.sender)
-            + len(self.underride)
-        )
-
-
-@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False)
-class FilteredPushRules:
-    """A wrapper around `PushRules` that filters out disabled experimental push
-    rules, and includes the "enabled" state for each rule when iterated over.
-    """
-
-    push_rules: PushRules
-    enabled_map: Dict[str, bool]
-    experimental_config: ExperimentalConfig
-
-    def __iter__(self) -> Iterator[Tuple[PushRule, bool]]:
-        for rule in self.push_rules:
-            if not _is_experimental_rule_enabled(
-                rule.rule_id, self.experimental_config
-            ):
-                continue
-
-            enabled = self.enabled_map.get(rule.rule_id, rule.default_enabled)
-
-            yield rule, enabled
-
-    def __len__(self) -> int:
-        return len(self.push_rules)
-
-
-DEFAULT_EMPTY_PUSH_RULES = PushRules()
-
-
-def compile_push_rules(rawrules: List[PushRule]) -> PushRules:
-    """Given a set of custom push rules return a `PushRules` instance (which
-    includes the base rules).
-    """
-
-    if not rawrules:
-        # Fast path to avoid allocating empty lists when there are no custom
-        # rules for the user.
-        return DEFAULT_EMPTY_PUSH_RULES
-
-    rules = PushRules()
-
-    for rule in rawrules:
-        # We need to decide which bucket each custom push rule goes into.
-
-        # If it has the same ID as a base rule then it overrides that...
-        overriden_base_rule = BASE_RULES_BY_ID.get(rule.rule_id)
-        if overriden_base_rule:
-            rules.overriden_base_rules[rule.rule_id] = attr.evolve(
-                overriden_base_rule, actions=rule.actions
-            )
-            continue
-
-        # ... otherwise it gets added to the appropriate priority class bucket
-        collection: List[PushRule]
-        if rule.priority_class == 5:
-            collection = rules.override
-        elif rule.priority_class == 4:
-            collection = rules.content
-        elif rule.priority_class == 3:
-            collection = rules.room
-        elif rule.priority_class == 2:
-            collection = rules.sender
-        elif rule.priority_class == 1:
-            collection = rules.underride
-        elif rule.priority_class <= 0:
-            logger.info(
-                "Got rule with priority class less than zero, but doesn't override a base rule: %s",
-                rule,
-            )
-            continue
-        else:
-            # We log and continue here so as not to break event sending
-            logger.error("Unknown priority class: %", rule.priority_class)
-            continue
-
-        collection.append(rule)
-
-    return rules
-
-
-def _is_experimental_rule_enabled(
-    rule_id: str, experimental_config: ExperimentalConfig
-) -> bool:
-    """Used by `FilteredPushRules` to filter out experimental rules when they
-    have not been enabled.
-    """
-    if (
-        rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl"
-        and not experimental_config.msc3786_enabled
-    ):
-        return False
-    if (
-        rule_id == "global/underride/.org.matrix.msc3772.thread_reply"
-        and not experimental_config.msc3772_enabled
-    ):
-        return False
-    return True
-
-
-BASE_APPEND_CONTENT_RULES = [
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["content"],
-        rule_id="global/content/.m.rule.contains_user_name",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "content.body",
-                # Match the localpart of the requester's MXID.
-                "pattern_type": "user_localpart",
-            }
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight"},
-        ],
-    )
-]
-
-
-BASE_PREPEND_OVERRIDE_RULES = [
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.master",
-        default_enabled=False,
-        conditions=[],
-        actions=["dont_notify"],
-    )
-]
-
-
-BASE_APPEND_OVERRIDE_RULES = [
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.suppress_notices",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "content.msgtype",
-                "pattern": "m.notice",
-                "_cache_key": "_suppress_notices",
-            }
-        ],
-        actions=["dont_notify"],
-    ),
-    # NB. .m.rule.invite_for_me must be higher prio than .m.rule.member_event
-    # otherwise invites will be matched by .m.rule.member_event
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.invite_for_me",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.member",
-                "_cache_key": "_member",
-            },
-            {
-                "kind": "event_match",
-                "key": "content.membership",
-                "pattern": "invite",
-                "_cache_key": "_invite_member",
-            },
-            # Match the requester's MXID.
-            {"kind": "event_match", "key": "state_key", "pattern_type": "user_id"},
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight", "value": False},
-        ],
-    ),
-    # Will we sometimes want to know about people joining and leaving?
-    # Perhaps: if so, this could be expanded upon. Seems the most usual case
-    # is that we don't though. We add this override rule so that even if
-    # the room rule is set to notify, we don't get notifications about
-    # join/leave/avatar/displayname events.
-    # See also: https://matrix.org/jira/browse/SYN-607
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.member_event",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.member",
-                "_cache_key": "_member",
-            }
-        ],
-        actions=["dont_notify"],
-    ),
-    # This was changed from underride to override so it's closer in priority
-    # to the content rules where the user name highlight rule lives. This
-    # way a room rule is lower priority than both but a custom override rule
-    # is higher priority than both.
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.contains_display_name",
-        conditions=[{"kind": "contains_display_name"}],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight"},
-        ],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.roomnotif",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "content.body",
-                "pattern": "@room",
-                "_cache_key": "_roomnotif_content",
-            },
-            {
-                "kind": "sender_notification_permission",
-                "key": "room",
-                "_cache_key": "_roomnotif_pl",
-            },
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": True}],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.tombstone",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.tombstone",
-                "_cache_key": "_tombstone",
-            },
-            {
-                "kind": "event_match",
-                "key": "state_key",
-                "pattern": "",
-                "_cache_key": "_tombstone_statekey",
-            },
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": True}],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.m.rule.reaction",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.reaction",
-                "_cache_key": "_reaction",
-            }
-        ],
-        actions=["dont_notify"],
-    ),
-    # XXX: This is an experimental rule that is only enabled if msc3786_enabled
-    # is enabled, if it is not the rule gets filtered out in _load_rules() in
-    # PushRulesWorkerStore
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["override"],
-        rule_id="global/override/.org.matrix.msc3786.rule.room.server_acl",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.server_acl",
-                "_cache_key": "_room_server_acl",
-            },
-            {
-                "kind": "event_match",
-                "key": "state_key",
-                "pattern": "",
-                "_cache_key": "_room_server_acl_state_key",
-            },
-        ],
-        actions=[],
-    ),
-]
-
-
-BASE_APPEND_UNDERRIDE_RULES = [
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.call",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.call.invite",
-                "_cache_key": "_call",
-            }
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "ring"},
-            {"set_tweak": "highlight", "value": False},
-        ],
-    ),
-    # XXX: once m.direct is standardised everywhere, we should use it to detect
-    # a DM from the user's perspective rather than this heuristic.
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.room_one_to_one",
-        conditions=[
-            {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"},
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.message",
-                "_cache_key": "_message",
-            },
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight", "value": False},
-        ],
-    ),
-    # XXX: this is going to fire for events which aren't m.room.messages
-    # but are encrypted (e.g. m.call.*)...
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.encrypted_room_one_to_one",
-        conditions=[
-            {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"},
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.encrypted",
-                "_cache_key": "_encrypted",
-            },
-        ],
-        actions=[
-            "notify",
-            {"set_tweak": "sound", "value": "default"},
-            {"set_tweak": "highlight", "value": False},
-        ],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.org.matrix.msc3772.thread_reply",
-        conditions=[
-            {
-                "kind": "org.matrix.msc3772.relation_match",
-                "rel_type": "m.thread",
-                # Match the requester's MXID.
-                "sender_type": "user_id",
-            }
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": False}],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.message",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.message",
-                "_cache_key": "_message",
-            }
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": False}],
-    ),
-    # XXX: this is going to fire for events which aren't m.room.messages
-    # but are encrypted (e.g. m.call.*)...
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.m.rule.encrypted",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "m.room.encrypted",
-                "_cache_key": "_encrypted",
-            }
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": False}],
-    ),
-    PushRule(
-        default=True,
-        priority_class=PRIORITY_CLASS_MAP["underride"],
-        rule_id="global/underride/.im.vector.jitsi",
-        conditions=[
-            {
-                "kind": "event_match",
-                "key": "type",
-                "pattern": "im.vector.modular.widgets",
-                "_cache_key": "_type_modular_widgets",
-            },
-            {
-                "kind": "event_match",
-                "key": "content.type",
-                "pattern": "jitsi",
-                "_cache_key": "_content_type_jitsi",
-            },
-            {
-                "kind": "event_match",
-                "key": "state_key",
-                "pattern": "*",
-                "_cache_key": "_is_state_event",
-            },
-        ],
-        actions=["notify", {"set_tweak": "highlight", "value": False}],
-    ),
-]
-
-
-BASE_RULE_IDS = set()
-
-BASE_RULES_BY_ID: Dict[str, PushRule] = {}
-
-for r in BASE_APPEND_CONTENT_RULES:
-    BASE_RULE_IDS.add(r.rule_id)
-    BASE_RULES_BY_ID[r.rule_id] = r
-
-for r in BASE_PREPEND_OVERRIDE_RULES:
-    BASE_RULE_IDS.add(r.rule_id)
-    BASE_RULES_BY_ID[r.rule_id] = r
-
-for r in BASE_APPEND_OVERRIDE_RULES:
-    BASE_RULE_IDS.add(r.rule_id)
-    BASE_RULES_BY_ID[r.rule_id] = r
-
-for r in BASE_APPEND_UNDERRIDE_RULES:
-    BASE_RULE_IDS.add(r.rule_id)
-    BASE_RULES_BY_ID[r.rule_id] = r
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 3846fbc5f0..404379ef67 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -37,11 +37,11 @@ from synapse.events.snapshot import EventContext
 from synapse.state import POWER_KEY
 from synapse.storage.databases.main.roommember import EventIdMembership
 from synapse.storage.state import StateFilter
+from synapse.synapse_rust.push import FilteredPushRules, PushRule
 from synapse.util.caches import register_cache
 from synapse.util.metrics import measure_func
 from synapse.visibility import filter_event_for_clients_with_state
 
-from .baserules import FilteredPushRules, PushRule
 from .push_rule_evaluator import PushRuleEvaluatorForEvent
 
 if TYPE_CHECKING:
@@ -280,7 +280,8 @@ class BulkPushRuleEvaluator:
         thread_id = "main"
         if relation:
             relations = await self._get_mutual_relations(
-                relation.parent_id, itertools.chain(*rules_by_user.values())
+                relation.parent_id,
+                itertools.chain(*(r.rules() for r in rules_by_user.values())),
             )
             if relation.rel_type == RelationTypes.THREAD:
                 thread_id = relation.parent_id
@@ -333,7 +334,7 @@ class BulkPushRuleEvaluator:
                 # current user, it'll be added to the dict later.
                 actions_by_user[uid] = []
 
-            for rule, enabled in rules:
+            for rule, enabled in rules.rules():
                 if not enabled:
                     continue
 
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index 73618d9234..ebc13beda1 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -16,10 +16,9 @@ import copy
 from typing import Any, Dict, List, Optional
 
 from synapse.push.rulekinds import PRIORITY_CLASS_INVERSE_MAP, PRIORITY_CLASS_MAP
+from synapse.synapse_rust.push import FilteredPushRules, PushRule
 from synapse.types import UserID
 
-from .baserules import FilteredPushRules, PushRule
-
 
 def format_push_rules_for_user(
     user: UserID, ruleslist: FilteredPushRules
@@ -34,7 +33,7 @@ def format_push_rules_for_user(
 
     rules["global"] = _add_empty_priority_class_arrays(rules["global"])
 
-    for r, enabled in ruleslist:
+    for r, enabled in ruleslist.rules():
         template_name = _priority_class_to_template_name(r.priority_class)
 
         rulearray = rules["global"][template_name]
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 5079edd1e0..ed17b2e70c 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -30,9 +30,8 @@ from typing import (
 
 from synapse.api.errors import StoreError
 from synapse.config.homeserver import ExperimentalConfig
-from synapse.push.baserules import FilteredPushRules, PushRule, compile_push_rules
 from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
-from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
@@ -51,6 +50,7 @@ from synapse.storage.util.id_generators import (
     IdGenerator,
     StreamIdGenerator,
 )
+from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRules
 from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
@@ -72,18 +72,25 @@ def _load_rules(
     """
 
     ruleslist = [
-        PushRule(
+        PushRule.from_db(
             rule_id=rawrule["rule_id"],
             priority_class=rawrule["priority_class"],
-            conditions=db_to_json(rawrule["conditions"]),
-            actions=db_to_json(rawrule["actions"]),
+            conditions=rawrule["conditions"],
+            actions=rawrule["actions"],
         )
         for rawrule in rawrules
     ]
 
-    push_rules = compile_push_rules(ruleslist)
+    push_rules = PushRules(
+        ruleslist,
+    )
 
-    filtered_rules = FilteredPushRules(push_rules, enabled_map, experimental_config)
+    filtered_rules = FilteredPushRules(
+        push_rules,
+        enabled_map,
+        msc3786_enabled=experimental_config.msc3786_enabled,
+        msc3772_enabled=experimental_config.msc3772_enabled,
+    )
 
     return filtered_rules
 
@@ -845,7 +852,7 @@ class PushRuleStore(PushRulesWorkerStore):
         user_push_rules = await self.get_push_rules_for_user(user_id)
 
         # Get rules relating to the old room and copy them to the new room
-        for rule, enabled in user_push_rules:
+        for rule, enabled in user_push_rules.rules():
             if not enabled:
                 continue
 
diff --git a/tests/handlers/test_deactivate_account.py b/tests/handlers/test_deactivate_account.py
index 7b9b711521..bce65fab7d 100644
--- a/tests/handlers/test_deactivate_account.py
+++ b/tests/handlers/test_deactivate_account.py
@@ -15,11 +15,11 @@
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import AccountDataTypes
-from synapse.push.baserules import PushRule
 from synapse.push.rulekinds import PRIORITY_CLASS_MAP
 from synapse.rest import admin
 from synapse.rest.client import account, login
 from synapse.server import HomeServer
+from synapse.synapse_rust.push import PushRule
 from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
@@ -161,20 +161,15 @@ class DeactivateAccountTestCase(HomeserverTestCase):
             self._store.get_push_rules_for_user(self.user)
         )
         # Filter out default rules; we don't care
-        push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)]
+        push_rules = [
+            r for r, _ in filtered_push_rules.rules() if self._is_custom_rule(r)
+        ]
         # Check our rule made it
-        self.assertEqual(
-            push_rules,
-            [
-                PushRule(
-                    rule_id="personal.override.rule1",
-                    priority_class=5,
-                    conditions=[],
-                    actions=[],
-                )
-            ],
-            push_rules,
-        )
+        self.assertEqual(len(push_rules), 1)
+        self.assertEqual(push_rules[0].rule_id, "personal.override.rule1")
+        self.assertEqual(push_rules[0].priority_class, 5)
+        self.assertEqual(push_rules[0].conditions, [])
+        self.assertEqual(push_rules[0].actions, [])
 
         # Request the deactivation of our account
         self._deactivate_my_account()
@@ -183,7 +178,9 @@ class DeactivateAccountTestCase(HomeserverTestCase):
             self._store.get_push_rules_for_user(self.user)
         )
         # Filter out default rules; we don't care
-        push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)]
+        push_rules = [
+            r for r, _ in filtered_push_rules.rules() if self._is_custom_rule(r)
+        ]
         # Check our rule no longer exists
         self.assertEqual(push_rules, [], push_rules)
 
-- 
cgit 1.5.1


From fff9b955fa39bda2cca1fa726b561c7886e746a1 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 20 Sep 2022 14:14:12 +0100
Subject: Generate separate snapshots for logical databases (#13792)

* Generate separate snapshots for sqlite, postgres and common
* Cleanup postgres dbs in the TRAP
* Say which logical DB we're applying updates to
* Run background updates on the state DB
* Add new option for accepting a SCHEMA_NUMBER
---
 changelog.d/13792.misc                      |   1 +
 scripts-dev/make_full_schema.sh             | 166 +++++++++++++++++++++-------
 synapse/_scripts/update_synapse_database.py |  14 ++-
 synapse/storage/background_updates.py       |   5 +-
 4 files changed, 140 insertions(+), 46 deletions(-)
 create mode 100644 changelog.d/13792.misc

(limited to 'synapse')

diff --git a/changelog.d/13792.misc b/changelog.d/13792.misc
new file mode 100644
index 0000000000..36ac91400a
--- /dev/null
+++ b/changelog.d/13792.misc
@@ -0,0 +1 @@
+Update the script which makes full schema dumps.
diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh
index 61394360ce..d8cd06ee4f 100755
--- a/scripts-dev/make_full_schema.sh
+++ b/scripts-dev/make_full_schema.sh
@@ -2,23 +2,16 @@
 #
 # This script generates SQL files for creating a brand new Synapse DB with the latest
 # schema, on both SQLite3 and Postgres.
-#
-# It does so by having Synapse generate an up-to-date SQLite DB, then running
-# synapse_port_db to convert it to Postgres. It then dumps the contents of both.
 
 export PGHOST="localhost"
-POSTGRES_DB_NAME="synapse_full_schema.$$"
-
-SQLITE_SCHEMA_FILE="schema.sql.sqlite"
-SQLITE_ROWS_FILE="rows.sql.sqlite"
-POSTGRES_SCHEMA_FILE="full.sql.postgres"
-POSTGRES_ROWS_FILE="rows.sql.postgres"
-
+POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$"
+POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$"
+POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$"
 REQUIRED_DEPS=("matrix-synapse" "psycopg2")
 
 usage() {
   echo
-  echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n] [-h]"
+  echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n <schema number>] [-h]"
   echo
   echo "-p <postgres_username>"
   echo "  Username to connect to local postgres instance. The password will be requested"
@@ -27,11 +20,16 @@ usage() {
   echo "  CI mode. Prints every command that the script runs."
   echo "-o <path>"
   echo "  Directory to output full schema files to."
+  echo "-n <schema number>"
+  echo "  Schema number for the new snapshot. Used to set the location of files within "
+  echo "  the output directory, mimicking that of synapse/storage/schemas."
+  echo "  Defaults to 9999."
   echo "-h"
   echo "  Display this help text."
 }
 
-while getopts "p:co:h" opt; do
+SCHEMA_NUMBER="9999"
+while getopts "p:co:hn:" opt; do
   case $opt in
     p)
       export PGUSER=$OPTARG
@@ -48,6 +46,9 @@ while getopts "p:co:h" opt; do
       usage
       exit
       ;;
+    n)
+      SCHEMA_NUMBER="$OPTARG"
+      ;;
     \?)
       echo "ERROR: Invalid option: -$OPTARG" >&2
       usage
@@ -95,12 +96,21 @@ cd "$(dirname "$0")/.."
 TMPDIR=$(mktemp -d)
 KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path
 SQLITE_CONFIG=$TMPDIR/sqlite.conf
-SQLITE_DB=$TMPDIR/homeserver.db
+SQLITE_MAIN_DB=$TMPDIR/main.db
+SQLITE_STATE_DB=$TMPDIR/state.db
+SQLITE_COMMON_DB=$TMPDIR/common.db
 POSTGRES_CONFIG=$TMPDIR/postgres.conf
 
 # Ensure these files are delete on script exit
-# TODO: the trap should also drop the temp postgres DB
-trap 'rm -rf $TMPDIR' EXIT
+cleanup() {
+  echo "Cleaning up temporary sqlite database and config files..."
+  rm -r "$TMPDIR"
+  echo "Cleaning up temporary Postgres database..."
+  dropdb --if-exists "$POSTGRES_COMMON_DB_NAME"
+  dropdb --if-exists "$POSTGRES_MAIN_DB_NAME"
+  dropdb --if-exists "$POSTGRES_STATE_DB_NAME"
+}
+trap 'cleanup' EXIT
 
 cat > "$SQLITE_CONFIG" <<EOF
 server_name: "test"
@@ -110,10 +120,22 @@ macaroon_secret_key: "abcde"
 
 report_stats: false
 
-database:
-  name: "sqlite3"
-  args:
-    database: "$SQLITE_DB"
+databases:
+  common:
+    name: "sqlite3"
+    data_stores: []
+    args:
+      database: "$SQLITE_COMMON_DB"
+  main:
+    name: "sqlite3"
+    data_stores: ["main"]
+    args:
+      database: "$SQLITE_MAIN_DB"
+  state:
+    name: "sqlite3"
+    data_stores: ["state"]
+    args:
+      database: "$SQLITE_STATE_DB"
 
 # Suppress the key server warning.
 trusted_key_servers: []
@@ -127,13 +149,32 @@ macaroon_secret_key: "abcde"
 
 report_stats: false
 
-database:
-  name: "psycopg2"
-  args:
-    user: "$PGUSER"
-    host: "$PGHOST"
-    password: "$PGPASSWORD"
-    database: "$POSTGRES_DB_NAME"
+databases:
+  common:
+    name: "psycopg2"
+    data_stores: []
+    args:
+      user: "$PGUSER"
+      host: "$PGHOST"
+      password: "$PGPASSWORD"
+      database: "$POSTGRES_COMMON_DB_NAME"
+  main:
+    name: "psycopg2"
+    data_stores: ["main"]
+    args:
+      user: "$PGUSER"
+      host: "$PGHOST"
+      password: "$PGPASSWORD"
+      database: "$POSTGRES_MAIN_DB_NAME"
+  state:
+    name: "psycopg2"
+    data_stores: ["state"]
+    args:
+      user: "$PGUSER"
+      host: "$PGHOST"
+      password: "$PGPASSWORD"
+      database: "$POSTGRES_STATE_DB_NAME"
+
 
 # Suppress the key server warning.
 trusted_key_servers: []
@@ -148,33 +189,76 @@ echo "Running db background jobs..."
 synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
 
 # Create the PostgreSQL database.
-echo "Creating postgres database..."
-createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"
+echo "Creating postgres databases..."
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_COMMON_DB_NAME"
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME"
+createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME"
 
 echo "Running db background jobs..."
 synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
 
 
-# Delete schema_version, applied_schema_deltas and applied_module_schemas tables
-# Also delete any shadow tables from fts4
 echo "Dropping unwanted db tables..."
-SQL="
+
+# Some common tables are created and updated by Synapse itself and do not belong in the
+# schema.
+DROP_APP_MANAGED_TABLES="
 DROP TABLE schema_version;
+DROP TABLE schema_compat_version;
 DROP TABLE applied_schema_deltas;
 DROP TABLE applied_module_schemas;
 "
-sqlite3 "$SQLITE_DB" <<< "$SQL"
-psql "$POSTGRES_DB_NAME" -w <<< "$SQL"
+# Other common tables are not created by Synapse and do belong in the schema.
+# TODO: we could derive DROP_COMMON_TABLES from the dump of the common-only DB. But
+#       since there's only one table there, I haven't bothered to do so.
+DROP_COMMON_TABLES="$DROP_APP_MANAGED_TABLES
+DROP TABLE background_updates;
+"
+
+sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES"
+sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES"
+sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES"
+psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES"
+psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
+psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
+
+# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation
+# details behind full text search tables. Omit these from the dumps.
+
+sqlite3 "$SQLITE_MAIN_DB" <<< "
+DROP TABLE event_search_content;
+DROP TABLE event_search_segments;
+DROP TABLE event_search_segdir;
+DROP TABLE event_search_docsize;
+DROP TABLE event_search_stat;
+DROP TABLE user_directory_search_content;
+DROP TABLE user_directory_search_segments;
+DROP TABLE user_directory_search_segdir;
+DROP TABLE user_directory_search_docsize;
+DROP TABLE user_directory_search_stat;
+"
 
-echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_SCHEMA_FILE' and '$OUTPUT_DIR/$SQLITE_ROWS_FILE'..."
-sqlite3 "$SQLITE_DB" ".schema --indent" > "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE"
-sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE"
+echo "Dumping SQLite3 schema..."
+
+mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
+sqlite3 "$SQLITE_COMMON_DB" ".schema --indent"           > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_MAIN_DB"   ".schema --indent"           > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_MAIN_DB"   ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_STATE_DB"  ".schema --indent"           > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_STATE_DB"  ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+
+cleanup_pg_schema() {
+   sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
+}
 
-echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..."
-pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE"
-pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE"
+echo "Dumping Postgres schema..."
 
-echo "Cleaning up temporary Postgres database..."
-dropdb $POSTGRES_DB_NAME
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema  > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME"   | cleanup_pg_schema  > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME"   | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME"  | cleanup_pg_schema  > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME"  | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
 
 echo "Done! Files dumped to: $OUTPUT_DIR"
diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py
index b4aeae6dd5..fb1fb83f50 100755
--- a/synapse/_scripts/update_synapse_database.py
+++ b/synapse/_scripts/update_synapse_database.py
@@ -48,10 +48,13 @@ class MockHomeserver(HomeServer):
 
 
 def run_background_updates(hs: HomeServer) -> None:
-    store = hs.get_datastores().main
+    main = hs.get_datastores().main
+    state = hs.get_datastores().state
 
     async def run_background_updates() -> None:
-        await store.db_pool.updates.run_background_updates(sleep=False)
+        await main.db_pool.updates.run_background_updates(sleep=False)
+        if state:
+            await state.db_pool.updates.run_background_updates(sleep=False)
         # Stop the reactor to exit the script once every background update is run.
         reactor.stop()
 
@@ -97,8 +100,11 @@ def main() -> None:
     # Load, process and sanity-check the config.
     hs_config = yaml.safe_load(args.database_config)
 
-    if "database" not in hs_config:
-        sys.stderr.write("The configuration file must have a 'database' section.\n")
+    if "database" not in hs_config and "databases" not in hs_config:
+        sys.stderr.write(
+            "The configuration file must have a 'database' or 'databases' section. "
+            "See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database"
+        )
         sys.exit(4)
 
     config = HomeServerConfig()
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index bf5e7ee7be..2056ecb2c3 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -285,7 +285,10 @@ class BackgroundUpdater:
         back_to_back_failures = 0
 
         try:
-            logger.info("Starting background schema updates")
+            logger.info(
+                "Starting background schema updates for database %s",
+                self._database_name,
+            )
             while self.enabled:
                 try:
                     result = await self.do_next_background_update(sleep)
-- 
cgit 1.5.1


From 85fc7ea1a1fb38424923dd1ff117405aea04c33c Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 20 Sep 2022 15:18:07 +0200
Subject: Remove the `complete_sso_login` method from the Module API which was
 deprecated in Synapse 1.13.0. (#13843)

Signed-off-by: Quentin Gliech <quenting@element.io>
---
 changelog.d/13843.removal      |  1 +
 synapse/handlers/auth.py       | 34 +---------------------------------
 synapse/module_api/__init__.py | 25 -------------------------
 3 files changed, 2 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/13843.removal

(limited to 'synapse')

diff --git a/changelog.d/13843.removal b/changelog.d/13843.removal
new file mode 100644
index 0000000000..f6caaa8895
--- /dev/null
+++ b/changelog.d/13843.removal
@@ -0,0 +1 @@
+Remove the `complete_sso_login` method from the Module API which was deprecated in Synapse 1.13.0.
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 0327fc57a4..eacd631ee0 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -63,7 +63,6 @@ from synapse.http.server import finish_request, respond_with_html
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import defer_to_thread
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.storage.roommember import ProfileInfo
 from synapse.types import JsonDict, Requester, UserID
 from synapse.util import stringutils as stringutils
 from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
@@ -1687,41 +1686,10 @@ class AuthHandler:
             respond_with_html(request, 403, self._sso_account_deactivated_template)
             return
 
-        profile = await self.store.get_profileinfo(
+        user_profile_data = await self.store.get_profileinfo(
             UserID.from_string(registered_user_id).localpart
         )
 
-        self._complete_sso_login(
-            registered_user_id,
-            auth_provider_id,
-            request,
-            client_redirect_url,
-            extra_attributes,
-            new_user=new_user,
-            user_profile_data=profile,
-            auth_provider_session_id=auth_provider_session_id,
-        )
-
-    def _complete_sso_login(
-        self,
-        registered_user_id: str,
-        auth_provider_id: str,
-        request: Request,
-        client_redirect_url: str,
-        extra_attributes: Optional[JsonDict] = None,
-        new_user: bool = False,
-        user_profile_data: Optional[ProfileInfo] = None,
-        auth_provider_session_id: Optional[str] = None,
-    ) -> None:
-        """
-        The synchronous portion of complete_sso_login.
-
-        This exists purely for backwards compatibility of synapse.module_api.ModuleApi.
-        """
-
-        if user_profile_data is None:
-            user_profile_data = ProfileInfo(None, None)
-
         # Store any extra attributes which will be passed in the login response.
         # Note that this is per-user so it may overwrite a previous value, this
         # is considered OK since the newest SSO attributes should be most valid.
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 87ba154cb7..9287c0fb8d 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -836,31 +836,6 @@ class ModuleApi:
             self._store.db_pool.runInteraction(desc, func, *args, **kwargs)  # type: ignore[arg-type]
         )
 
-    def complete_sso_login(
-        self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str
-    ) -> None:
-        """Complete a SSO login by redirecting the user to a page to confirm whether they
-        want their access token sent to `client_redirect_url`, or redirect them to that
-        URL with a token directly if the URL matches with one of the whitelisted clients.
-
-        This is deprecated in favor of complete_sso_login_async.
-
-        Added in Synapse v1.11.1.
-
-        Args:
-            registered_user_id: The MXID that has been registered as a previous step of
-                of this SSO login.
-            request: The request to respond to.
-            client_redirect_url: The URL to which to offer to redirect the user (or to
-                redirect them directly if whitelisted).
-        """
-        self._auth_handler._complete_sso_login(
-            registered_user_id,
-            "<unknown>",
-            request,
-            client_redirect_url,
-        )
-
     async def complete_sso_login_async(
         self,
         registered_user_id: str,
-- 
cgit 1.5.1


From 16e1a9d9a7884967da390ef967b942a5e35e8f6c Mon Sep 17 00:00:00 2001
From: Peter Scheu <32014443+peterscheu-aceart@users.noreply.github.com>
Date: Wed, 21 Sep 2022 15:08:16 +0200
Subject: Correct documentation for map_user_attributes of OpenID Mapping
 Providers (#13836)

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/13836.doc         |  1 +
 docs/sso_mapping_providers.md | 12 +++++++++---
 synapse/handlers/sso.py       |  3 +++
 3 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13836.doc

(limited to 'synapse')

diff --git a/changelog.d/13836.doc b/changelog.d/13836.doc
new file mode 100644
index 0000000000..f2edab00f4
--- /dev/null
+++ b/changelog.d/13836.doc
@@ -0,0 +1 @@
+Fix a mistake in sso_mapping_providers.md: `map_user_attributes` is expected to return `display_name` not `displayname`.
diff --git a/docs/sso_mapping_providers.md b/docs/sso_mapping_providers.md
index 817499149f..9f5e5fbbe1 100644
--- a/docs/sso_mapping_providers.md
+++ b/docs/sso_mapping_providers.md
@@ -73,8 +73,8 @@ A custom mapping provider must specify the following methods:
 * `async def map_user_attributes(self, userinfo, token, failures)`
     - This method must be async.
     - Arguments:
-      - `userinfo` - A `authlib.oidc.core.claims.UserInfo` object to extract user
-                     information from.
+      - `userinfo` - An [`authlib.oidc.core.claims.UserInfo`](https://docs.authlib.org/en/latest/specs/oidc.html#authlib.oidc.core.UserInfo)
+                     object to extract user information from.
       - `token` - A dictionary which includes information necessary to make
                   further requests to the OpenID provider.
       - `failures` - An `int` that represents the amount of times the returned
@@ -91,7 +91,13 @@ A custom mapping provider must specify the following methods:
         `None`, the user is prompted to pick their own username. This is only used
         during a user's first login. Once a localpart has been associated with a
         remote user ID (see `get_remote_user_id`) it cannot be updated.
-      - `displayname`: An optional string, the display name for the user.
+      - `confirm_localpart`: A boolean. If set to `True`, when a `localpart`
+        string is returned from this method, Synapse will prompt the user to
+        either accept this localpart or pick their own username. Otherwise this
+        option has no effect. If omitted, defaults to `False`.
+      - `display_name`: An optional string, the display name for the user.
+      - `emails`: A list of strings, the email address(es) to associate with
+        this user. If omitted, defaults to an empty list.
 * `async def get_extra_attributes(self, userinfo, token)`
     - This method must be async.
     - Arguments:
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 1e171f3f71..6bc1cbd787 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -128,6 +128,9 @@ class SsoIdentityProvider(Protocol):
 
 @attr.s(auto_attribs=True)
 class UserAttributes:
+    # NB: This struct is documented in docs/sso_mapping_providers.md so that users can
+    # populate it with data from their own mapping providers.
+
     # the localpart of the mxid that the mapper has assigned to the user.
     # if `None`, the mapper has not picked a userid, and the user should be prompted to
     # enter one.
-- 
cgit 1.5.1


From 6bd8763804dc0987c7ecd37bcb5ebff465fffa29 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 21 Sep 2022 15:32:01 +0200
Subject: Add cache invalidation across workers to module API (#13667)

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
---
 changelog.d/13667.feature                          |  1 +
 scripts-dev/mypy_synapse_plugin.py                 |  4 +-
 synapse/module_api/__init__.py                     | 33 ++++++++-
 synapse/storage/_base.py                           | 23 +++++--
 synapse/storage/databases/main/cache.py            | 20 ++++--
 synapse/util/caches/descriptors.py                 | 14 ++--
 .../replication/test_module_cache_invalidation.py  | 79 ++++++++++++++++++++++
 7 files changed, 153 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/13667.feature
 create mode 100644 tests/replication/test_module_cache_invalidation.py

(limited to 'synapse')

diff --git a/changelog.d/13667.feature b/changelog.d/13667.feature
new file mode 100644
index 0000000000..a0b3cfe18c
--- /dev/null
+++ b/changelog.d/13667.feature
@@ -0,0 +1 @@
+Add cache invalidation across workers to module API.
diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py
index d08517a953..2c377533c0 100644
--- a/scripts-dev/mypy_synapse_plugin.py
+++ b/scripts-dev/mypy_synapse_plugin.py
@@ -29,7 +29,7 @@ class SynapsePlugin(Plugin):
         self, fullname: str
     ) -> Optional[Callable[[MethodSigContext], CallableType]]:
         if fullname.startswith(
-            "synapse.util.caches.descriptors._CachedFunction.__call__"
+            "synapse.util.caches.descriptors.CachedFunction.__call__"
         ) or fullname.startswith(
             "synapse.util.caches.descriptors._LruCachedFunction.__call__"
         ):
@@ -38,7 +38,7 @@ class SynapsePlugin(Plugin):
 
 
 def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
-    """Fixes the `_CachedFunction.__call__` signature to be correct.
+    """Fixes the `CachedFunction.__call__` signature to be correct.
 
     It already has *almost* the correct signature, except:
 
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 9287c0fb8d..59755bff6d 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -125,7 +125,7 @@ from synapse.types import (
 )
 from synapse.util import Clock
 from synapse.util.async_helpers import maybe_awaitable
-from synapse.util.caches.descriptors import cached
+from synapse.util.caches.descriptors import CachedFunction, cached
 from synapse.util.frozenutils import freeze
 
 if TYPE_CHECKING:
@@ -836,6 +836,37 @@ class ModuleApi:
             self._store.db_pool.runInteraction(desc, func, *args, **kwargs)  # type: ignore[arg-type]
         )
 
+    def register_cached_function(self, cached_func: CachedFunction) -> None:
+        """Register a cached function that should be invalidated across workers.
+        Invalidation local to a worker can be done directly using `cached_func.invalidate`,
+        however invalidation that needs to go to other workers needs to call `invalidate_cache`
+        on the module API instead.
+
+        Args:
+            cached_function: The cached function that will be registered to receive invalidation
+            locally and from other workers.
+        """
+        self._store.register_external_cached_function(
+            f"{cached_func.__module__}.{cached_func.__name__}", cached_func
+        )
+
+    async def invalidate_cache(
+        self, cached_func: CachedFunction, keys: Tuple[Any, ...]
+    ) -> None:
+        """Invalidate a cache entry of a cached function across workers. The cached function
+        needs to be registered on all workers first with `register_cached_function`.
+
+        Args:
+            cached_function: The cached function that needs an invalidation
+            keys: keys of the entry to invalidate, usually matching the arguments of the
+            cached function.
+        """
+        cached_func.invalidate(keys)
+        await self._store.send_invalidation_to_replication(
+            f"{cached_func.__module__}.{cached_func.__name__}",
+            keys,
+        )
+
     async def complete_sso_login_async(
         self,
         registered_user_id: str,
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index e30f9c76d4..303a5d5298 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -15,12 +15,13 @@
 # limitations under the License.
 import logging
 from abc import ABCMeta
-from typing import TYPE_CHECKING, Any, Collection, Iterable, Optional, Union
+from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, Optional, Union
 
 from synapse.storage.database import make_in_list_sql_clause  # noqa: F401; noqa: F401
 from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
 from synapse.types import get_domain_from_id
 from synapse.util import json_decoder
+from synapse.util.caches.descriptors import CachedFunction
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -47,6 +48,8 @@ class SQLBaseStore(metaclass=ABCMeta):
         self.database_engine = database.engine
         self.db_pool = database
 
+        self.external_cached_functions: Dict[str, CachedFunction] = {}
+
     def process_replication_rows(
         self,
         stream_name: str,
@@ -95,7 +98,7 @@ class SQLBaseStore(metaclass=ABCMeta):
 
     def _attempt_to_invalidate_cache(
         self, cache_name: str, key: Optional[Collection[Any]]
-    ) -> None:
+    ) -> bool:
         """Attempts to invalidate the cache of the given name, ignoring if the
         cache doesn't exist. Mainly used for invalidating caches on workers,
         where they may not have the cache.
@@ -113,9 +116,12 @@ class SQLBaseStore(metaclass=ABCMeta):
         try:
             cache = getattr(self, cache_name)
         except AttributeError:
-            # We probably haven't pulled in the cache in this worker,
-            # which is fine.
-            return
+            # Check if an externally defined module cache has been registered
+            cache = self.external_cached_functions.get(cache_name)
+            if not cache:
+                # We probably haven't pulled in the cache in this worker,
+                # which is fine.
+                return False
 
         if key is None:
             cache.invalidate_all()
@@ -125,6 +131,13 @@ class SQLBaseStore(metaclass=ABCMeta):
             invalidate_method = getattr(cache, "invalidate_local", cache.invalidate)
             invalidate_method(tuple(key))
 
+        return True
+
+    def register_external_cached_function(
+        self, cache_name: str, func: CachedFunction
+    ) -> None:
+        self.external_cached_functions[cache_name] = func
+
 
 def db_to_json(db_content: Union[memoryview, bytes, bytearray, str]) -> Any:
     """
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 12e9a42382..2c421151c1 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -33,7 +33,7 @@ from synapse.storage.database import (
 )
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.util.id_generators import MultiWriterIdGenerator
-from synapse.util.caches.descriptors import _CachedFunction
+from synapse.util.caches.descriptors import CachedFunction
 from synapse.util.iterutils import batch_iter
 
 if TYPE_CHECKING:
@@ -269,9 +269,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             return
 
         cache_func.invalidate(keys)
-        await self.db_pool.runInteraction(
-            "invalidate_cache_and_stream",
-            self._send_invalidation_to_replication,
+        await self.send_invalidation_to_replication(
             cache_func.__name__,
             keys,
         )
@@ -279,7 +277,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
     def _invalidate_cache_and_stream(
         self,
         txn: LoggingTransaction,
-        cache_func: _CachedFunction,
+        cache_func: CachedFunction,
         keys: Tuple[Any, ...],
     ) -> None:
         """Invalidates the cache and adds it to the cache stream so slaves
@@ -293,7 +291,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         self._send_invalidation_to_replication(txn, cache_func.__name__, keys)
 
     def _invalidate_all_cache_and_stream(
-        self, txn: LoggingTransaction, cache_func: _CachedFunction
+        self, txn: LoggingTransaction, cache_func: CachedFunction
     ) -> None:
         """Invalidates the entire cache and adds it to the cache stream so slaves
         will know to invalidate their caches.
@@ -334,6 +332,16 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
                 txn, CURRENT_STATE_CACHE_NAME, [room_id]
             )
 
+    async def send_invalidation_to_replication(
+        self, cache_name: str, keys: Optional[Collection[Any]]
+    ) -> None:
+        await self.db_pool.runInteraction(
+            "send_invalidation_to_replication",
+            self._send_invalidation_to_replication,
+            cache_name,
+            keys,
+        )
+
     def _send_invalidation_to_replication(
         self, txn: LoggingTransaction, cache_name: str, keys: Optional[Iterable[Any]]
     ) -> None:
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 10aff4d04a..3909f1caea 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -53,7 +53,7 @@ CacheKey = Union[Tuple, Any]
 F = TypeVar("F", bound=Callable[..., Any])
 
 
-class _CachedFunction(Generic[F]):
+class CachedFunction(Generic[F]):
     invalidate: Any = None
     invalidate_all: Any = None
     prefill: Any = None
@@ -242,7 +242,7 @@ class LruCacheDescriptor(_CacheDescriptorBase):
 
             return ret2
 
-        wrapped = cast(_CachedFunction, _wrapped)
+        wrapped = cast(CachedFunction, _wrapped)
         wrapped.cache = cache
         obj.__dict__[self.name] = wrapped
 
@@ -363,7 +363,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
 
             return make_deferred_yieldable(ret)
 
-        wrapped = cast(_CachedFunction, _wrapped)
+        wrapped = cast(CachedFunction, _wrapped)
 
         if self.num_args == 1:
             assert not self.tree
@@ -572,7 +572,7 @@ def cached(
     iterable: bool = False,
     prune_unread_entries: bool = True,
     name: Optional[str] = None,
-) -> Callable[[F], _CachedFunction[F]]:
+) -> Callable[[F], CachedFunction[F]]:
     func = lambda orig: DeferredCacheDescriptor(
         orig,
         max_entries=max_entries,
@@ -585,7 +585,7 @@ def cached(
         name=name,
     )
 
-    return cast(Callable[[F], _CachedFunction[F]], func)
+    return cast(Callable[[F], CachedFunction[F]], func)
 
 
 def cachedList(
@@ -594,7 +594,7 @@ def cachedList(
     list_name: str,
     num_args: Optional[int] = None,
     name: Optional[str] = None,
-) -> Callable[[F], _CachedFunction[F]]:
+) -> Callable[[F], CachedFunction[F]]:
     """Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`.
 
     Used to do batch lookups for an already created cache. One of the arguments
@@ -631,7 +631,7 @@ def cachedList(
         name=name,
     )
 
-    return cast(Callable[[F], _CachedFunction[F]], func)
+    return cast(Callable[[F], CachedFunction[F]], func)
 
 
 def _get_cache_key_builder(
diff --git a/tests/replication/test_module_cache_invalidation.py b/tests/replication/test_module_cache_invalidation.py
new file mode 100644
index 0000000000..b93cae67d3
--- /dev/null
+++ b/tests/replication/test_module_cache_invalidation.py
@@ -0,0 +1,79 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+import synapse
+from synapse.module_api import cached
+
+from tests.replication._base import BaseMultiWorkerStreamTestCase
+
+logger = logging.getLogger(__name__)
+
+FIRST_VALUE = "one"
+SECOND_VALUE = "two"
+
+KEY = "mykey"
+
+
+class TestCache:
+    current_value = FIRST_VALUE
+
+    @cached()
+    async def cached_function(self, user_id: str) -> str:
+        return self.current_value
+
+
+class ModuleCacheInvalidationTestCase(BaseMultiWorkerStreamTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+    ]
+
+    def test_module_cache_full_invalidation(self):
+        main_cache = TestCache()
+        self.hs.get_module_api().register_cached_function(main_cache.cached_function)
+
+        worker_hs = self.make_worker_hs("synapse.app.generic_worker")
+
+        worker_cache = TestCache()
+        worker_hs.get_module_api().register_cached_function(
+            worker_cache.cached_function
+        )
+
+        self.assertEqual(FIRST_VALUE, self.get_success(main_cache.cached_function(KEY)))
+        self.assertEqual(
+            FIRST_VALUE, self.get_success(worker_cache.cached_function(KEY))
+        )
+
+        main_cache.current_value = SECOND_VALUE
+        worker_cache.current_value = SECOND_VALUE
+        # No invalidation yet, should return the cached value on both the main process and the worker
+        self.assertEqual(FIRST_VALUE, self.get_success(main_cache.cached_function(KEY)))
+        self.assertEqual(
+            FIRST_VALUE, self.get_success(worker_cache.cached_function(KEY))
+        )
+
+        # Full invalidation on the main process, should be replicated on the worker that
+        # should returned the updated value too
+        self.get_success(
+            self.hs.get_module_api().invalidate_cache(
+                main_cache.cached_function, (KEY,)
+            )
+        )
+
+        self.assertEqual(
+            SECOND_VALUE, self.get_success(main_cache.cached_function(KEY))
+        )
+        self.assertEqual(
+            SECOND_VALUE, self.get_success(worker_cache.cached_function(KEY))
+        )
-- 
cgit 1.5.1


From 8ae42ab8fa3c6b52d74c24daa7ca75a478fa4fbb Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 21 Sep 2022 15:39:01 +0100
Subject: Support enabling/disabling pushers (from MSC3881) (#13799)

Partial implementation of MSC3881
---
 changelog.d/13799.feature                          |   1 +
 synapse/_scripts/synapse_port_db.py                |   1 +
 synapse/config/experimental.py                     |   3 +
 synapse/handlers/register.py                       |   4 +-
 synapse/push/__init__.py                           |   2 +
 synapse/push/pusherpool.py                         |  81 ++++++++---
 synapse/replication/tcp/client.py                  |  10 +-
 synapse/rest/admin/users.py                        |   4 +-
 synapse/rest/client/pusher.py                      |  18 ++-
 synapse/storage/databases/main/pusher.py           |  69 ++++++----
 .../schema/main/delta/73/02add_pusher_enabled.sql  |  16 +++
 tests/push/test_email.py                           |   4 +-
 tests/push/test_http.py                            | 148 +++++++++++++++++++--
 tests/replication/test_pusher_shard.py             |   2 +-
 tests/rest/admin/test_user.py                      |   2 +-
 15 files changed, 294 insertions(+), 71 deletions(-)
 create mode 100644 changelog.d/13799.feature
 create mode 100644 synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql

(limited to 'synapse')

diff --git a/changelog.d/13799.feature b/changelog.d/13799.feature
new file mode 100644
index 0000000000..6c8e5cffe2
--- /dev/null
+++ b/changelog.d/13799.feature
@@ -0,0 +1 @@
+Add experimental support for [MSC3881: Remotely toggle push notifications for another client](https://github.com/matrix-org/matrix-spec-proposals/pull/3881).
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 30983c47fb..450ba462ba 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -111,6 +111,7 @@ BOOLEAN_COLUMNS = {
     "e2e_fallback_keys_json": ["used"],
     "access_tokens": ["used"],
     "device_lists_changes_in_room": ["converted_to_destinations"],
+    "pushers": ["enabled"],
 }
 
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 702b81e636..f4541a8db0 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -93,3 +93,6 @@ class ExperimentalConfig(Config):
 
         # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices.
         self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False)
+
+        # MSC3881: Remotely toggle push notifications for another client
+        self.msc3881_enabled: bool = experimental.get("msc3881_enabled", False)
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 20ec22105a..cfcadb34db 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -997,7 +997,7 @@ class RegistrationHandler:
             assert user_tuple
             token_id = user_tuple.token_id
 
-            await self.pusher_pool.add_pusher(
+            await self.pusher_pool.add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="email",
@@ -1005,7 +1005,7 @@ class RegistrationHandler:
                 app_display_name="Email Notifications",
                 device_display_name=threepid["address"],
                 pushkey=threepid["address"],
-                lang=None,  # We don't know a user's language here
+                lang=None,
                 data={},
             )
 
diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py
index 57c4d70466..ac99d35a7e 100644
--- a/synapse/push/__init__.py
+++ b/synapse/push/__init__.py
@@ -116,6 +116,7 @@ class PusherConfig:
     last_stream_ordering: int
     last_success: Optional[int]
     failing_since: Optional[int]
+    enabled: bool
 
     def as_dict(self) -> Dict[str, Any]:
         """Information that can be retrieved about a pusher after creation."""
@@ -128,6 +129,7 @@ class PusherConfig:
             "lang": self.lang,
             "profile_tag": self.profile_tag,
             "pushkey": self.pushkey,
+            "enabled": self.enabled,
         }
 
 
diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index 1e0ef44fc7..2597898cf4 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -94,7 +94,7 @@ class PusherPool:
             return
         run_as_background_process("start_pushers", self._start_pushers)
 
-    async def add_pusher(
+    async def add_or_update_pusher(
         self,
         user_id: str,
         access_token: Optional[int],
@@ -106,6 +106,7 @@ class PusherPool:
         lang: Optional[str],
         data: JsonDict,
         profile_tag: str = "",
+        enabled: bool = True,
     ) -> Optional[Pusher]:
         """Creates a new pusher and adds it to the pool
 
@@ -147,9 +148,20 @@ class PusherPool:
                 last_stream_ordering=last_stream_ordering,
                 last_success=None,
                 failing_since=None,
+                enabled=enabled,
             )
         )
 
+        # Before we actually persist the pusher, we check if the user already has one
+        # for this app ID and pushkey. If so, we want to keep the access token in place,
+        # since this could be one device modifying (e.g. enabling/disabling) another
+        # device's pusher.
+        existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey(
+            user_id, app_id, pushkey
+        )
+        if existing_config:
+            access_token = existing_config.access_token
+
         await self.store.add_pusher(
             user_id=user_id,
             access_token=access_token,
@@ -163,8 +175,9 @@ class PusherPool:
             data=data,
             last_stream_ordering=last_stream_ordering,
             profile_tag=profile_tag,
+            enabled=enabled,
         )
-        pusher = await self.start_pusher_by_id(app_id, pushkey, user_id)
+        pusher = await self.process_pusher_change_by_id(app_id, pushkey, user_id)
 
         return pusher
 
@@ -276,10 +289,25 @@ class PusherPool:
         except Exception:
             logger.exception("Exception in pusher on_new_receipts")
 
-    async def start_pusher_by_id(
+    async def _get_pusher_config_for_user_by_app_id_and_pushkey(
+        self, user_id: str, app_id: str, pushkey: str
+    ) -> Optional[PusherConfig]:
+        resultlist = await self.store.get_pushers_by_app_id_and_pushkey(app_id, pushkey)
+
+        pusher_config = None
+        for r in resultlist:
+            if r.user_name == user_id:
+                pusher_config = r
+
+        return pusher_config
+
+    async def process_pusher_change_by_id(
         self, app_id: str, pushkey: str, user_id: str
     ) -> Optional[Pusher]:
-        """Look up the details for the given pusher, and start it
+        """Look up the details for the given pusher, and either start it if its
+        "enabled" flag is True, or try to stop it otherwise.
+
+        If the pusher is new and its "enabled" flag is False, the stop is a noop.
 
         Returns:
             The pusher started, if any
@@ -290,12 +318,13 @@ class PusherPool:
         if not self._pusher_shard_config.should_handle(self._instance_name, user_id):
             return None
 
-        resultlist = await self.store.get_pushers_by_app_id_and_pushkey(app_id, pushkey)
+        pusher_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey(
+            user_id, app_id, pushkey
+        )
 
-        pusher_config = None
-        for r in resultlist:
-            if r.user_name == user_id:
-                pusher_config = r
+        if pusher_config and not pusher_config.enabled:
+            self.maybe_stop_pusher(app_id, pushkey, user_id)
+            return None
 
         pusher = None
         if pusher_config:
@@ -305,7 +334,7 @@ class PusherPool:
 
     async def _start_pushers(self) -> None:
         """Start all the pushers"""
-        pushers = await self.store.get_all_pushers()
+        pushers = await self.store.get_enabled_pushers()
 
         # Stagger starting up the pushers so we don't completely drown the
         # process on start up.
@@ -363,6 +392,8 @@ class PusherPool:
 
         synapse_pushers.labels(type(pusher).__name__, pusher.app_id).inc()
 
+        logger.info("Starting pusher %s / %s", pusher.user_id, appid_pushkey)
+
         # Check if there *may* be push to process. We do this as this check is a
         # lot cheaper to do than actually fetching the exact rows we need to
         # push.
@@ -382,16 +413,7 @@ class PusherPool:
         return pusher
 
     async def remove_pusher(self, app_id: str, pushkey: str, user_id: str) -> None:
-        appid_pushkey = "%s:%s" % (app_id, pushkey)
-
-        byuser = self.pushers.get(user_id, {})
-
-        if appid_pushkey in byuser:
-            logger.info("Stopping pusher %s / %s", user_id, appid_pushkey)
-            pusher = byuser.pop(appid_pushkey)
-            pusher.on_stop()
-
-            synapse_pushers.labels(type(pusher).__name__, pusher.app_id).dec()
+        self.maybe_stop_pusher(app_id, pushkey, user_id)
 
         # We can only delete pushers on master.
         if self._remove_pusher_client:
@@ -402,3 +424,22 @@ class PusherPool:
             await self.store.delete_pusher_by_app_id_pushkey_user_id(
                 app_id, pushkey, user_id
             )
+
+    def maybe_stop_pusher(self, app_id: str, pushkey: str, user_id: str) -> None:
+        """Stops a pusher with the given app ID and push key if one is running.
+
+        Args:
+            app_id: the pusher's app ID.
+            pushkey: the pusher's push key.
+            user_id: the user the pusher belongs to. Only used for logging.
+        """
+        appid_pushkey = "%s:%s" % (app_id, pushkey)
+
+        byuser = self.pushers.get(user_id, {})
+
+        if appid_pushkey in byuser:
+            logger.info("Stopping pusher %s / %s", user_id, appid_pushkey)
+            pusher = byuser.pop(appid_pushkey)
+            pusher.on_stop()
+
+            synapse_pushers.labels(type(pusher).__name__, pusher.app_id).dec()
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index e4f2201c92..cf9cd6833b 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -189,7 +189,9 @@ class ReplicationDataHandler:
                 if row.deleted:
                     self.stop_pusher(row.user_id, row.app_id, row.pushkey)
                 else:
-                    await self.start_pusher(row.user_id, row.app_id, row.pushkey)
+                    await self.process_pusher_change(
+                        row.user_id, row.app_id, row.pushkey
+                    )
         elif stream_name == EventsStream.NAME:
             # We shouldn't get multiple rows per token for events stream, so
             # we don't need to optimise this for multiple rows.
@@ -334,13 +336,15 @@ class ReplicationDataHandler:
         logger.info("Stopping pusher %r / %r", user_id, key)
         pusher.on_stop()
 
-    async def start_pusher(self, user_id: str, app_id: str, pushkey: str) -> None:
+    async def process_pusher_change(
+        self, user_id: str, app_id: str, pushkey: str
+    ) -> None:
         if not self._notify_pushers:
             return
 
         key = "%s:%s" % (app_id, pushkey)
         logger.info("Starting pusher %r / %r", user_id, key)
-        await self._pusher_pool.start_pusher_by_id(app_id, pushkey, user_id)
+        await self._pusher_pool.process_pusher_change_by_id(app_id, pushkey, user_id)
 
 
 class FederationSenderHandler:
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 2ca6b2d08a..1274773d7e 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -375,7 +375,7 @@ class UserRestServletV2(RestServlet):
                         and self.hs.config.email.email_notif_for_new_users
                         and medium == "email"
                     ):
-                        await self.pusher_pool.add_pusher(
+                        await self.pusher_pool.add_or_update_pusher(
                             user_id=user_id,
                             access_token=None,
                             kind="email",
@@ -383,7 +383,7 @@ class UserRestServletV2(RestServlet):
                             app_display_name="Email Notifications",
                             device_display_name=address,
                             pushkey=address,
-                            lang=None,  # We don't know a user's language here
+                            lang=None,
                             data={},
                         )
 
diff --git a/synapse/rest/client/pusher.py b/synapse/rest/client/pusher.py
index 9a1f10f4be..c9f76125dc 100644
--- a/synapse/rest/client/pusher.py
+++ b/synapse/rest/client/pusher.py
@@ -42,6 +42,7 @@ class PushersRestServlet(RestServlet):
         super().__init__()
         self.hs = hs
         self.auth = hs.get_auth()
+        self._msc3881_enabled = self.hs.config.experimental.msc3881_enabled
 
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
@@ -51,9 +52,14 @@ class PushersRestServlet(RestServlet):
             user.to_string()
         )
 
-        filtered_pushers = [p.as_dict() for p in pushers]
+        pusher_dicts = [p.as_dict() for p in pushers]
 
-        return 200, {"pushers": filtered_pushers}
+        for pusher in pusher_dicts:
+            if self._msc3881_enabled:
+                pusher["org.matrix.msc3881.enabled"] = pusher["enabled"]
+            del pusher["enabled"]
+
+        return 200, {"pushers": pusher_dicts}
 
 
 class PushersSetRestServlet(RestServlet):
@@ -65,6 +71,7 @@ class PushersSetRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.notifier = hs.get_notifier()
         self.pusher_pool = self.hs.get_pusherpool()
+        self._msc3881_enabled = self.hs.config.experimental.msc3881_enabled
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
@@ -103,6 +110,10 @@ class PushersSetRestServlet(RestServlet):
         if "append" in content:
             append = content["append"]
 
+        enabled = True
+        if self._msc3881_enabled and "org.matrix.msc3881.enabled" in content:
+            enabled = content["org.matrix.msc3881.enabled"]
+
         if not append:
             await self.pusher_pool.remove_pushers_by_app_id_and_pushkey_not_user(
                 app_id=content["app_id"],
@@ -111,7 +122,7 @@ class PushersSetRestServlet(RestServlet):
             )
 
         try:
-            await self.pusher_pool.add_pusher(
+            await self.pusher_pool.add_or_update_pusher(
                 user_id=user.to_string(),
                 access_token=requester.access_token_id,
                 kind=content["kind"],
@@ -122,6 +133,7 @@ class PushersSetRestServlet(RestServlet):
                 lang=content["lang"],
                 data=content["data"],
                 profile_tag=content.get("profile_tag", ""),
+                enabled=enabled,
             )
         except PusherConfigException as pce:
             raise SynapseError(
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index bd0cfa7f32..ee55b8c4a9 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -89,6 +89,11 @@ class PusherWorkerStore(SQLBaseStore):
                 )
                 continue
 
+            # If we're using SQLite, then boolean values are integers. This is
+            # troublesome since some code using the return value of this method might
+            # expect it to be a boolean, or will expose it to clients (in responses).
+            r["enabled"] = bool(r["enabled"])
+
             yield PusherConfig(**r)
 
     async def get_pushers_by_app_id_and_pushkey(
@@ -100,38 +105,52 @@ class PusherWorkerStore(SQLBaseStore):
         return await self.get_pushers_by({"user_name": user_id})
 
     async def get_pushers_by(self, keyvalues: Dict[str, Any]) -> Iterator[PusherConfig]:
-        ret = await self.db_pool.simple_select_list(
-            "pushers",
-            keyvalues,
-            [
-                "id",
-                "user_name",
-                "access_token",
-                "profile_tag",
-                "kind",
-                "app_id",
-                "app_display_name",
-                "device_display_name",
-                "pushkey",
-                "ts",
-                "lang",
-                "data",
-                "last_stream_ordering",
-                "last_success",
-                "failing_since",
-            ],
+        """Retrieve pushers that match the given criteria.
+
+        Args:
+            keyvalues: A {column: value} dictionary.
+
+        Returns:
+            The pushers for which the given columns have the given values.
+        """
+
+        def get_pushers_by_txn(txn: LoggingTransaction) -> List[Dict[str, Any]]:
+            # We could technically use simple_select_list here, but we need to call
+            # COALESCE on the 'enabled' column. While it is technically possible to give
+            # simple_select_list the whole `COALESCE(...) AS ...` as a column name, it
+            # feels a bit hacky, so it's probably better to just inline the query.
+            sql = """
+            SELECT
+                id, user_name, access_token, profile_tag, kind, app_id,
+                app_display_name, device_display_name, pushkey, ts, lang, data,
+                last_stream_ordering, last_success, failing_since,
+                COALESCE(enabled, TRUE) AS enabled
+            FROM pushers
+            """
+
+            sql += "WHERE %s" % (" AND ".join("%s = ?" % (k,) for k in keyvalues),)
+
+            txn.execute(sql, list(keyvalues.values()))
+
+            return self.db_pool.cursor_to_dict(txn)
+
+        ret = await self.db_pool.runInteraction(
             desc="get_pushers_by",
+            func=get_pushers_by_txn,
         )
+
         return self._decode_pushers_rows(ret)
 
-    async def get_all_pushers(self) -> Iterator[PusherConfig]:
-        def get_pushers(txn: LoggingTransaction) -> Iterator[PusherConfig]:
-            txn.execute("SELECT * FROM pushers")
+    async def get_enabled_pushers(self) -> Iterator[PusherConfig]:
+        def get_enabled_pushers_txn(txn: LoggingTransaction) -> Iterator[PusherConfig]:
+            txn.execute("SELECT * FROM pushers WHERE COALESCE(enabled, TRUE)")
             rows = self.db_pool.cursor_to_dict(txn)
 
             return self._decode_pushers_rows(rows)
 
-        return await self.db_pool.runInteraction("get_all_pushers", get_pushers)
+        return await self.db_pool.runInteraction(
+            "get_enabled_pushers", get_enabled_pushers_txn
+        )
 
     async def get_all_updated_pushers_rows(
         self, instance_name: str, last_id: int, current_id: int, limit: int
@@ -476,6 +495,7 @@ class PusherStore(PusherWorkerStore):
         data: Optional[JsonDict],
         last_stream_ordering: int,
         profile_tag: str = "",
+        enabled: bool = True,
     ) -> None:
         async with self._pushers_id_gen.get_next() as stream_id:
             # no need to lock because `pushers` has a unique key on
@@ -494,6 +514,7 @@ class PusherStore(PusherWorkerStore):
                     "last_stream_ordering": last_stream_ordering,
                     "profile_tag": profile_tag,
                     "id": stream_id,
+                    "enabled": enabled,
                 },
                 desc="add_pusher",
                 lock=False,
diff --git a/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql b/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql
new file mode 100644
index 0000000000..dba3b4900b
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql
@@ -0,0 +1,16 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE pushers ADD COLUMN enabled BOOLEAN;
\ No newline at end of file
diff --git a/tests/push/test_email.py b/tests/push/test_email.py
index 7a3b0d6755..fd14568f55 100644
--- a/tests/push/test_email.py
+++ b/tests/push/test_email.py
@@ -114,7 +114,7 @@ class EmailPusherTests(HomeserverTestCase):
         )
 
         self.pusher = self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=self.user_id,
                 access_token=self.token_id,
                 kind="email",
@@ -136,7 +136,7 @@ class EmailPusherTests(HomeserverTestCase):
         """
         with self.assertRaises(SynapseError) as cm:
             self.get_success_or_raise(
-                self.hs.get_pusherpool().add_pusher(
+                self.hs.get_pusherpool().add_or_update_pusher(
                     user_id=self.user_id,
                     access_token=self.token_id,
                     kind="email",
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index d9c68cdd2d..af67d84463 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -19,8 +19,8 @@ from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
 from synapse.logging.context import make_deferred_yieldable
-from synapse.push import PusherConfigException
-from synapse.rest.client import login, push_rule, receipts, room
+from synapse.push import PusherConfig, PusherConfigException
+from synapse.rest.client import login, push_rule, pusher, receipts, room
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
@@ -35,6 +35,7 @@ class HTTPPusherTests(HomeserverTestCase):
         login.register_servlets,
         receipts.register_servlets,
         push_rule.register_servlets,
+        pusher.register_servlets,
     ]
     user_id = True
     hijack_auth = False
@@ -74,7 +75,7 @@ class HTTPPusherTests(HomeserverTestCase):
 
         def test_data(data: Optional[JsonDict]) -> None:
             self.get_failure(
-                self.hs.get_pusherpool().add_pusher(
+                self.hs.get_pusherpool().add_or_update_pusher(
                     user_id=user_id,
                     access_token=token_id,
                     kind="http",
@@ -119,7 +120,7 @@ class HTTPPusherTests(HomeserverTestCase):
         token_id = user_tuple.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="http",
@@ -235,7 +236,7 @@ class HTTPPusherTests(HomeserverTestCase):
         token_id = user_tuple.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="http",
@@ -355,7 +356,7 @@ class HTTPPusherTests(HomeserverTestCase):
         token_id = user_tuple.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="http",
@@ -441,7 +442,7 @@ class HTTPPusherTests(HomeserverTestCase):
         token_id = user_tuple.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="http",
@@ -518,7 +519,7 @@ class HTTPPusherTests(HomeserverTestCase):
         token_id = user_tuple.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="http",
@@ -624,7 +625,7 @@ class HTTPPusherTests(HomeserverTestCase):
         token_id = user_tuple.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="http",
@@ -728,18 +729,38 @@ class HTTPPusherTests(HomeserverTestCase):
         )
         self.assertEqual(channel.code, 200, channel.json_body)
 
-    def _make_user_with_pusher(self, username: str) -> Tuple[str, str]:
+    def _make_user_with_pusher(
+        self, username: str, enabled: bool = True
+    ) -> Tuple[str, str]:
+        """Registers a user and creates a pusher for them.
+
+        Args:
+            username: the localpart of the new user's Matrix ID.
+            enabled: whether to create the pusher in an enabled or disabled state.
+        """
         user_id = self.register_user(username, "pass")
         access_token = self.login(username, "pass")
 
         # Register the pusher
+        self._set_pusher(user_id, access_token, enabled)
+
+        return user_id, access_token
+
+    def _set_pusher(self, user_id: str, access_token: str, enabled: bool) -> None:
+        """Creates or updates the pusher for the given user.
+
+        Args:
+            user_id: the user's Matrix ID.
+            access_token: the access token associated with the pusher.
+            enabled: whether to enable or disable the pusher.
+        """
         user_tuple = self.get_success(
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         token_id = user_tuple.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="http",
@@ -749,11 +770,10 @@ class HTTPPusherTests(HomeserverTestCase):
                 pushkey="a@example.com",
                 lang=None,
                 data={"url": "http://example.com/_matrix/push/v1/notify"},
+                enabled=enabled,
             )
         )
 
-        return user_id, access_token
-
     def test_dont_notify_rule_overrides_message(self) -> None:
         """
         The override push rule will suppress notification
@@ -791,3 +811,105 @@ class HTTPPusherTests(HomeserverTestCase):
         # The user sends a message back (sends a notification)
         self.helper.send(room, body="Hello", tok=access_token)
         self.assertEqual(len(self.push_attempts), 1)
+
+    @override_config({"experimental_features": {"msc3881_enabled": True}})
+    def test_disable(self) -> None:
+        """Tests that disabling a pusher means it's not pushed to anymore."""
+        user_id, access_token = self._make_user_with_pusher("user")
+        other_user_id, other_access_token = self._make_user_with_pusher("otheruser")
+
+        room = self.helper.create_room_as(user_id, tok=access_token)
+        self.helper.join(room=room, user=other_user_id, tok=other_access_token)
+
+        # Send a message and check that it generated a push.
+        self.helper.send(room, body="Hi!", tok=other_access_token)
+        self.assertEqual(len(self.push_attempts), 1)
+
+        # Disable the pusher.
+        self._set_pusher(user_id, access_token, enabled=False)
+
+        # Send another message and check that it did not generate a push.
+        self.helper.send(room, body="Hi!", tok=other_access_token)
+        self.assertEqual(len(self.push_attempts), 1)
+
+        # Get the pushers for the user and check that it is marked as disabled.
+        channel = self.make_request("GET", "/pushers", access_token=access_token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(len(channel.json_body["pushers"]), 1)
+
+        enabled = channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"]
+        self.assertFalse(enabled)
+        self.assertTrue(isinstance(enabled, bool))
+
+    @override_config({"experimental_features": {"msc3881_enabled": True}})
+    def test_enable(self) -> None:
+        """Tests that enabling a disabled pusher means it gets pushed to."""
+        # Create the user with the pusher already disabled.
+        user_id, access_token = self._make_user_with_pusher("user", enabled=False)
+        other_user_id, other_access_token = self._make_user_with_pusher("otheruser")
+
+        room = self.helper.create_room_as(user_id, tok=access_token)
+        self.helper.join(room=room, user=other_user_id, tok=other_access_token)
+
+        # Send a message and check that it did not generate a push.
+        self.helper.send(room, body="Hi!", tok=other_access_token)
+        self.assertEqual(len(self.push_attempts), 0)
+
+        # Enable the pusher.
+        self._set_pusher(user_id, access_token, enabled=True)
+
+        # Send another message and check that it did generate a push.
+        self.helper.send(room, body="Hi!", tok=other_access_token)
+        self.assertEqual(len(self.push_attempts), 1)
+
+        # Get the pushers for the user and check that it is marked as enabled.
+        channel = self.make_request("GET", "/pushers", access_token=access_token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(len(channel.json_body["pushers"]), 1)
+
+        enabled = channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"]
+        self.assertTrue(enabled)
+        self.assertTrue(isinstance(enabled, bool))
+
+    @override_config({"experimental_features": {"msc3881_enabled": True}})
+    def test_null_enabled(self) -> None:
+        """Tests that a pusher that has an 'enabled' column set to NULL (eg pushers
+        created before the column was introduced) is considered enabled.
+        """
+        # We intentionally set 'enabled' to None so that it's stored as NULL in the
+        # database.
+        user_id, access_token = self._make_user_with_pusher("user", enabled=None)  # type: ignore[arg-type]
+
+        channel = self.make_request("GET", "/pushers", access_token=access_token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(len(channel.json_body["pushers"]), 1)
+        self.assertTrue(channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"])
+
+    def test_update_different_device_access_token(self) -> None:
+        """Tests that if we create a pusher from one device, the update it from another
+        device, the access token associated with the pusher stays the same.
+        """
+        # Create a user with a pusher.
+        user_id, access_token = self._make_user_with_pusher("user")
+
+        # Get the token ID for the current access token, since that's what we store in
+        # the pushers table.
+        user_tuple = self.get_success(
+            self.hs.get_datastores().main.get_user_by_access_token(access_token)
+        )
+        token_id = user_tuple.token_id
+
+        # Generate a new access token, and update the pusher with it.
+        new_token = self.login("user", "pass")
+        self._set_pusher(user_id, new_token, enabled=False)
+
+        # Get the current list of pushers for the user.
+        ret = self.get_success(
+            self.hs.get_datastores().main.get_pushers_by({"user_name": user_id})
+        )
+        pushers: List[PusherConfig] = list(ret)
+
+        # Check that we still have one pusher, and that the access token associated with
+        # it didn't change.
+        self.assertEqual(len(pushers), 1)
+        self.assertEqual(pushers[0].access_token, token_id)
diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py
index 8f4f6688ce..59fea93e49 100644
--- a/tests/replication/test_pusher_shard.py
+++ b/tests/replication/test_pusher_shard.py
@@ -55,7 +55,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
         token_id = user_dict.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
                 access_token=token_id,
                 kind="http",
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 9f536ceeb3..1847e6ad6b 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -2839,7 +2839,7 @@ class PushersRestTestCase(unittest.HomeserverTestCase):
         token_id = user_tuple.token_id
 
         self.get_success(
-            self.hs.get_pusherpool().add_pusher(
+            self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=self.other_user,
                 access_token=token_id,
                 kind="http",
-- 
cgit 1.5.1


From 0fd2f2d46064efd37284a36d5b478815d69ddd96 Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@users.noreply.github.com>
Date: Wed, 21 Sep 2022 16:12:29 +0100
Subject: Implementation of MSC3882 login token request (#13722)

---
 changelog.d/13722.feature                     |   1 +
 synapse/config/experimental.py                |   7 ++
 synapse/rest/__init__.py                      |   2 +
 synapse/rest/client/login_token_request.py    |  94 ++++++++++++++++++
 synapse/rest/client/versions.py               |   2 +
 tests/rest/client/test_login_token_request.py | 132 ++++++++++++++++++++++++++
 6 files changed, 238 insertions(+)
 create mode 100644 changelog.d/13722.feature
 create mode 100644 synapse/rest/client/login_token_request.py
 create mode 100644 tests/rest/client/test_login_token_request.py

(limited to 'synapse')

diff --git a/changelog.d/13722.feature b/changelog.d/13722.feature
new file mode 100644
index 0000000000..588d143c0f
--- /dev/null
+++ b/changelog.d/13722.feature
@@ -0,0 +1 @@
+Experimental implementation of MSC3882 to allow an existing device/session to generate a login token for use on a new device/session.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index f4541a8db0..bf27f6c101 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -96,3 +96,10 @@ class ExperimentalConfig(Config):
 
         # MSC3881: Remotely toggle push notifications for another client
         self.msc3881_enabled: bool = experimental.get("msc3881_enabled", False)
+
+        # MSC3882: Allow an existing session to sign in a new session
+        self.msc3882_enabled: bool = experimental.get("msc3882_enabled", False)
+        self.msc3882_ui_auth: bool = experimental.get("msc3882_ui_auth", True)
+        self.msc3882_token_timeout = self.parse_duration(
+            experimental.get("msc3882_token_timeout", "5m")
+        )
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index b712215112..9a2ab99ede 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -30,6 +30,7 @@ from synapse.rest.client import (
     keys,
     knock,
     login as v1_login,
+    login_token_request,
     logout,
     mutual_rooms,
     notifications,
@@ -130,3 +131,4 @@ class ClientRestResource(JsonResource):
 
         # unstable
         mutual_rooms.register_servlets(hs, client_resource)
+        login_token_request.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/login_token_request.py b/synapse/rest/client/login_token_request.py
new file mode 100644
index 0000000000..ca5c54bf17
--- /dev/null
+++ b/synapse/rest/client/login_token_request.py
@@ -0,0 +1,94 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING, Tuple
+
+from synapse.http.server import HttpServer
+from synapse.http.servlet import RestServlet, parse_json_object_from_request
+from synapse.http.site import SynapseRequest
+from synapse.rest.client._base import client_patterns, interactive_auth_handler
+from synapse.types import JsonDict
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class LoginTokenRequestServlet(RestServlet):
+    """
+    Get a token that can be used with `m.login.token` to log in a second device.
+
+    Request:
+
+    POST /login/token HTTP/1.1
+    Content-Type: application/json
+
+    {}
+
+    Response:
+
+    HTTP/1.1 200 OK
+    {
+        "login_token": "ABDEFGH",
+        "expires_in": 3600,
+    }
+    """
+
+    PATTERNS = client_patterns("/login/token$")
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastores().main
+        self.clock = hs.get_clock()
+        self.server_name = hs.config.server.server_name
+        self.macaroon_gen = hs.get_macaroon_generator()
+        self.auth_handler = hs.get_auth_handler()
+        self.token_timeout = hs.config.experimental.msc3882_token_timeout
+        self.ui_auth = hs.config.experimental.msc3882_ui_auth
+
+    @interactive_auth_handler
+    async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+        body = parse_json_object_from_request(request)
+
+        if self.ui_auth:
+            await self.auth_handler.validate_user_via_ui_auth(
+                requester,
+                request,
+                body,
+                "issue a new access token for your account",
+                can_skip_ui_auth=False,  # Don't allow skipping of UI auth
+            )
+
+        login_token = self.macaroon_gen.generate_short_term_login_token(
+            user_id=requester.user.to_string(),
+            auth_provider_id="org.matrix.msc3882.login_token_request",
+            duration_in_ms=self.token_timeout,
+        )
+
+        return (
+            200,
+            {
+                "login_token": login_token,
+                "expires_in": self.token_timeout // 1000,
+            },
+        )
+
+
+def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+    if hs.config.experimental.msc3882_enabled:
+        LoginTokenRequestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index c516cda95d..c3488f4330 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -105,6 +105,8 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3440.stable": True,  # TODO: remove when "v1.3" is added above
                     # Allows moderators to fetch redacted event content as described in MSC2815
                     "fi.mau.msc2815": self.config.experimental.msc2815_enabled,
+                    # Adds support for login token requests as per MSC3882
+                    "org.matrix.msc3882": self.config.experimental.msc3882_enabled,
                 },
             },
         )
diff --git a/tests/rest/client/test_login_token_request.py b/tests/rest/client/test_login_token_request.py
new file mode 100644
index 0000000000..d5bb16c98d
--- /dev/null
+++ b/tests/rest/client/test_login_token_request.py
@@ -0,0 +1,132 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.rest import admin
+from synapse.rest.client import login, login_token_request
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.unittest import override_config
+
+
+class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
+
+    servlets = [
+        login.register_servlets,
+        admin.register_servlets,
+        login_token_request.register_servlets,
+    ]
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        self.hs = self.setup_test_homeserver()
+        self.hs.config.registration.enable_registration = True
+        self.hs.config.registration.registrations_require_3pid = []
+        self.hs.config.registration.auto_join_rooms = []
+        self.hs.config.captcha.enable_registration_captcha = False
+
+        return self.hs
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.user = "user123"
+        self.password = "password"
+
+    def test_disabled(self) -> None:
+        channel = self.make_request("POST", "/login/token", {}, access_token=None)
+        self.assertEqual(channel.code, 400)
+
+        self.register_user(self.user, self.password)
+        token = self.login(self.user, self.password)
+
+        channel = self.make_request("POST", "/login/token", {}, access_token=token)
+        self.assertEqual(channel.code, 400)
+
+    @override_config({"experimental_features": {"msc3882_enabled": True}})
+    def test_require_auth(self) -> None:
+        channel = self.make_request("POST", "/login/token", {}, access_token=None)
+        self.assertEqual(channel.code, 401)
+
+    @override_config({"experimental_features": {"msc3882_enabled": True}})
+    def test_uia_on(self) -> None:
+        user_id = self.register_user(self.user, self.password)
+        token = self.login(self.user, self.password)
+
+        channel = self.make_request("POST", "/login/token", {}, access_token=token)
+        self.assertEqual(channel.code, 401)
+        self.assertIn({"stages": ["m.login.password"]}, channel.json_body["flows"])
+
+        session = channel.json_body["session"]
+
+        uia = {
+            "auth": {
+                "type": "m.login.password",
+                "identifier": {"type": "m.id.user", "user": self.user},
+                "password": self.password,
+                "session": session,
+            },
+        }
+
+        channel = self.make_request("POST", "/login/token", uia, access_token=token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["expires_in"], 300)
+
+        login_token = channel.json_body["login_token"]
+
+        channel = self.make_request(
+            "POST",
+            "/login",
+            content={"type": "m.login.token", "token": login_token},
+        )
+        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.json_body["user_id"], user_id)
+
+    @override_config(
+        {"experimental_features": {"msc3882_enabled": True, "msc3882_ui_auth": False}}
+    )
+    def test_uia_off(self) -> None:
+        user_id = self.register_user(self.user, self.password)
+        token = self.login(self.user, self.password)
+
+        channel = self.make_request("POST", "/login/token", {}, access_token=token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["expires_in"], 300)
+
+        login_token = channel.json_body["login_token"]
+
+        channel = self.make_request(
+            "POST",
+            "/login",
+            content={"type": "m.login.token", "token": login_token},
+        )
+        self.assertEqual(channel.code, 200, channel.result)
+        self.assertEqual(channel.json_body["user_id"], user_id)
+
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3882_enabled": True,
+                "msc3882_ui_auth": False,
+                "msc3882_token_timeout": "15s",
+            }
+        }
+    )
+    def test_expires_in(self) -> None:
+        self.register_user(self.user, self.password)
+        token = self.login(self.user, self.password)
+
+        channel = self.make_request("POST", "/login/token", {}, access_token=token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["expires_in"], 15)
-- 
cgit 1.5.1


From ccca14140a019c2e0430f95d78fa075efd8d535f Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 21 Sep 2022 16:31:53 +0100
Subject: Track device IDs for pushers (#13831)

Second half of the MSC3881 implementation
---
 changelog.d/13831.feature                          |  1 +
 synapse/push/__init__.py                           |  2 +
 synapse/push/pusherpool.py                         | 10 ++-
 synapse/rest/client/pusher.py                      |  3 +
 synapse/storage/databases/main/pusher.py           | 73 +++++++++++++++++++++-
 .../schema/main/delta/73/03pusher_device_id.sql    | 20 ++++++
 tests/push/test_http.py                            | 55 ++++++++++++++--
 7 files changed, 154 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/13831.feature
 create mode 100644 synapse/storage/schema/main/delta/73/03pusher_device_id.sql

(limited to 'synapse')

diff --git a/changelog.d/13831.feature b/changelog.d/13831.feature
new file mode 100644
index 0000000000..6c8e5cffe2
--- /dev/null
+++ b/changelog.d/13831.feature
@@ -0,0 +1 @@
+Add experimental support for [MSC3881: Remotely toggle push notifications for another client](https://github.com/matrix-org/matrix-spec-proposals/pull/3881).
diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py
index ac99d35a7e..a0c760239d 100644
--- a/synapse/push/__init__.py
+++ b/synapse/push/__init__.py
@@ -117,6 +117,7 @@ class PusherConfig:
     last_success: Optional[int]
     failing_since: Optional[int]
     enabled: bool
+    device_id: Optional[str]
 
     def as_dict(self) -> Dict[str, Any]:
         """Information that can be retrieved about a pusher after creation."""
@@ -130,6 +131,7 @@ class PusherConfig:
             "profile_tag": self.profile_tag,
             "pushkey": self.pushkey,
             "enabled": self.enabled,
+            "device_id": self.device_id,
         }
 
 
diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index 2597898cf4..e2648cbc93 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -107,6 +107,7 @@ class PusherPool:
         data: JsonDict,
         profile_tag: str = "",
         enabled: bool = True,
+        device_id: Optional[str] = None,
     ) -> Optional[Pusher]:
         """Creates a new pusher and adds it to the pool
 
@@ -149,18 +150,20 @@ class PusherPool:
                 last_success=None,
                 failing_since=None,
                 enabled=enabled,
+                device_id=device_id,
             )
         )
 
         # Before we actually persist the pusher, we check if the user already has one
-        # for this app ID and pushkey. If so, we want to keep the access token in place,
-        # since this could be one device modifying (e.g. enabling/disabling) another
-        # device's pusher.
+        # this app ID and pushkey. If so, we want to keep the access token and device ID
+        # in place, since this could be one device modifying (e.g. enabling/disabling)
+        # another device's pusher.
         existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey(
             user_id, app_id, pushkey
         )
         if existing_config:
             access_token = existing_config.access_token
+            device_id = existing_config.device_id
 
         await self.store.add_pusher(
             user_id=user_id,
@@ -176,6 +179,7 @@ class PusherPool:
             last_stream_ordering=last_stream_ordering,
             profile_tag=profile_tag,
             enabled=enabled,
+            device_id=device_id,
         )
         pusher = await self.process_pusher_change_by_id(app_id, pushkey, user_id)
 
diff --git a/synapse/rest/client/pusher.py b/synapse/rest/client/pusher.py
index c9f76125dc..975eef2144 100644
--- a/synapse/rest/client/pusher.py
+++ b/synapse/rest/client/pusher.py
@@ -57,7 +57,9 @@ class PushersRestServlet(RestServlet):
         for pusher in pusher_dicts:
             if self._msc3881_enabled:
                 pusher["org.matrix.msc3881.enabled"] = pusher["enabled"]
+                pusher["org.matrix.msc3881.device_id"] = pusher["device_id"]
             del pusher["enabled"]
+            del pusher["device_id"]
 
         return 200, {"pushers": pusher_dicts}
 
@@ -134,6 +136,7 @@ class PushersSetRestServlet(RestServlet):
                 data=content["data"],
                 profile_tag=content.get("profile_tag", ""),
                 enabled=enabled,
+                device_id=requester.device_id,
             )
         except PusherConfigException as pce:
             raise SynapseError(
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index ee55b8c4a9..01206950a9 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -124,7 +124,7 @@ class PusherWorkerStore(SQLBaseStore):
                 id, user_name, access_token, profile_tag, kind, app_id,
                 app_display_name, device_display_name, pushkey, ts, lang, data,
                 last_stream_ordering, last_success, failing_since,
-                COALESCE(enabled, TRUE) AS enabled
+                COALESCE(enabled, TRUE) AS enabled, device_id
             FROM pushers
             """
 
@@ -477,7 +477,74 @@ class PusherWorkerStore(SQLBaseStore):
         return number_deleted
 
 
-class PusherStore(PusherWorkerStore):
+class PusherBackgroundUpdatesStore(SQLBaseStore):
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_update_handler(
+            "set_device_id_for_pushers", self._set_device_id_for_pushers
+        )
+
+    async def _set_device_id_for_pushers(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """Background update to populate the device_id column of the pushers table."""
+        last_pusher_id = progress.get("pusher_id", 0)
+
+        def set_device_id_for_pushers_txn(txn: LoggingTransaction) -> int:
+            txn.execute(
+                """
+                    SELECT p.id, at.device_id
+                    FROM pushers AS p
+                    INNER JOIN access_tokens AS at
+                        ON p.access_token = at.id
+                    WHERE
+                        p.access_token IS NOT NULL
+                        AND at.device_id IS NOT NULL
+                        AND p.id > ?
+                    ORDER BY p.id
+                    LIMIT ?
+                """,
+                (last_pusher_id, batch_size),
+            )
+
+            rows = self.db_pool.cursor_to_dict(txn)
+            if len(rows) == 0:
+                return 0
+
+            self.db_pool.simple_update_many_txn(
+                txn=txn,
+                table="pushers",
+                key_names=("id",),
+                key_values=[(row["id"],) for row in rows],
+                value_names=("device_id",),
+                value_values=[(row["device_id"],) for row in rows],
+            )
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "set_device_id_for_pushers", {"pusher_id": rows[-1]["id"]}
+            )
+
+            return len(rows)
+
+        nb_processed = await self.db_pool.runInteraction(
+            "set_device_id_for_pushers", set_device_id_for_pushers_txn
+        )
+
+        if nb_processed < batch_size:
+            await self.db_pool.updates._end_background_update(
+                "set_device_id_for_pushers"
+            )
+
+        return nb_processed
+
+
+class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore):
     def get_pushers_stream_token(self) -> int:
         return self._pushers_id_gen.get_current_token()
 
@@ -496,6 +563,7 @@ class PusherStore(PusherWorkerStore):
         last_stream_ordering: int,
         profile_tag: str = "",
         enabled: bool = True,
+        device_id: Optional[str] = None,
     ) -> None:
         async with self._pushers_id_gen.get_next() as stream_id:
             # no need to lock because `pushers` has a unique key on
@@ -515,6 +583,7 @@ class PusherStore(PusherWorkerStore):
                     "profile_tag": profile_tag,
                     "id": stream_id,
                     "enabled": enabled,
+                    "device_id": device_id,
                 },
                 desc="add_pusher",
                 lock=False,
diff --git a/synapse/storage/schema/main/delta/73/03pusher_device_id.sql b/synapse/storage/schema/main/delta/73/03pusher_device_id.sql
new file mode 100644
index 0000000000..1b4ffbeebe
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/03pusher_device_id.sql
@@ -0,0 +1,20 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a device_id column to track the device ID that created the pusher. It's NULLable
+-- on purpose, because a) it might not be possible to track down the device that created
+-- old pushers (pushers.access_token and access_tokens.device_id are both NULLable), and
+-- b) access tokens retrieved via the admin API don't have a device associated to them.
+ALTER TABLE pushers ADD COLUMN device_id TEXT;
\ No newline at end of file
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index af67d84463..b383b8401f 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -22,6 +22,7 @@ from synapse.logging.context import make_deferred_yieldable
 from synapse.push import PusherConfig, PusherConfigException
 from synapse.rest.client import login, push_rule, pusher, receipts, room
 from synapse.server import HomeServer
+from synapse.storage.databases.main.registration import TokenLookupResult
 from synapse.types import JsonDict
 from synapse.util import Clock
 
@@ -771,6 +772,7 @@ class HTTPPusherTests(HomeserverTestCase):
                 lang=None,
                 data={"url": "http://example.com/_matrix/push/v1/notify"},
                 enabled=enabled,
+                device_id=user_tuple.device_id,
             )
         )
 
@@ -885,19 +887,21 @@ class HTTPPusherTests(HomeserverTestCase):
         self.assertEqual(len(channel.json_body["pushers"]), 1)
         self.assertTrue(channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"])
 
-    def test_update_different_device_access_token(self) -> None:
+    def test_update_different_device_access_token_device_id(self) -> None:
         """Tests that if we create a pusher from one device, the update it from another
-        device, the access token associated with the pusher stays the same.
+        device, the access token and device ID associated with the pusher stays the
+        same.
         """
         # Create a user with a pusher.
         user_id, access_token = self._make_user_with_pusher("user")
 
         # Get the token ID for the current access token, since that's what we store in
-        # the pushers table.
+        # the pushers table. Also get the device ID from it.
         user_tuple = self.get_success(
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         # Generate a new access token, and update the pusher with it.
         new_token = self.login("user", "pass")
@@ -909,7 +913,48 @@ class HTTPPusherTests(HomeserverTestCase):
         )
         pushers: List[PusherConfig] = list(ret)
 
-        # Check that we still have one pusher, and that the access token associated with
-        # it didn't change.
+        # Check that we still have one pusher, and that the access token and device ID
+        # associated with it didn't change.
         self.assertEqual(len(pushers), 1)
         self.assertEqual(pushers[0].access_token, token_id)
+        self.assertEqual(pushers[0].device_id, device_id)
+
+    @override_config({"experimental_features": {"msc3881_enabled": True}})
+    def test_device_id(self) -> None:
+        """Tests that a pusher created with a given device ID shows that device ID in
+        GET /pushers requests.
+        """
+        self.register_user("user", "pass")
+        access_token = self.login("user", "pass")
+
+        # We create the pusher with an HTTP request rather than with
+        # _make_user_with_pusher so that we can test the device ID is correctly set when
+        # creating a pusher via an API call.
+        self.make_request(
+            method="POST",
+            path="/pushers/set",
+            content={
+                "kind": "http",
+                "app_id": "m.http",
+                "app_display_name": "HTTP Push Notifications",
+                "device_display_name": "pushy push",
+                "pushkey": "a@example.com",
+                "lang": "en",
+                "data": {"url": "http://example.com/_matrix/push/v1/notify"},
+            },
+            access_token=access_token,
+        )
+
+        # Look up the user info for the access token so we can compare the device ID.
+        lookup_result: TokenLookupResult = self.get_success(
+            self.hs.get_datastores().main.get_user_by_access_token(access_token)
+        )
+
+        # Get the user's devices and check it has the correct device ID.
+        channel = self.make_request("GET", "/pushers", access_token=access_token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(len(channel.json_body["pushers"]), 1)
+        self.assertEqual(
+            channel.json_body["pushers"][0]["org.matrix.msc3881.device_id"],
+            lookup_result.device_id,
+        )
-- 
cgit 1.5.1


From efabf44c7652095a0e3d9d9083fc8359cdde3854 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 21 Sep 2022 17:18:44 +0100
Subject: Add version flag for MSC3881 (#13860)

---
 changelog.d/13860.feature       | 1 +
 synapse/rest/client/versions.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/13860.feature

(limited to 'synapse')

diff --git a/changelog.d/13860.feature b/changelog.d/13860.feature
new file mode 100644
index 0000000000..6c8e5cffe2
--- /dev/null
+++ b/changelog.d/13860.feature
@@ -0,0 +1 @@
+Add experimental support for [MSC3881: Remotely toggle push notifications for another client](https://github.com/matrix-org/matrix-spec-proposals/pull/3881).
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index c3488f4330..b3917a5abc 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -107,6 +107,8 @@ class VersionsRestServlet(RestServlet):
                     "fi.mau.msc2815": self.config.experimental.msc2815_enabled,
                     # Adds support for login token requests as per MSC3882
                     "org.matrix.msc3882": self.config.experimental.msc3882_enabled,
+                    # Adds support for remotely enabling/disabling pushers, as per MSC3881
+                    "org.matrix.msc3881": self.config.experimental.msc3881_enabled,
                 },
             },
         )
-- 
cgit 1.5.1


From 1a1abdda42551dad3aadc04a169c25f4cc651a2c Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 21 Sep 2022 22:23:44 +0100
Subject: Last batch of Pydantic for synapse/rest/client/account.py (#13832)

* Validation for `/add_threepid/msisdn/submit_token`

* Don't validate deprecated endpoint

* Changelog
---
 changelog.d/13832.feature      |  1 +
 synapse/rest/client/account.py | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13832.feature

(limited to 'synapse')

diff --git a/changelog.d/13832.feature b/changelog.d/13832.feature
new file mode 100644
index 0000000000..1dc1d66efe
--- /dev/null
+++ b/changelog.d/13832.feature
@@ -0,0 +1 @@
+Improve validation for the unspecced, internal-only `_matrix/client/unstable/add_threepid/msisdn/submit_token` endpoint.
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 2db2a04f95..44f622bcce 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -534,6 +534,11 @@ class AddThreepidMsisdnSubmitTokenServlet(RestServlet):
         "/add_threepid/msisdn/submit_token$", releases=(), unstable=True
     )
 
+    class PostBody(RequestBodyModel):
+        client_secret: ClientSecretStr
+        sid: StrictStr
+        token: StrictStr
+
     def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.config = hs.config
@@ -549,16 +554,14 @@ class AddThreepidMsisdnSubmitTokenServlet(RestServlet):
                 "instead.",
             )
 
-        body = parse_json_object_from_request(request)
-        assert_params_in_dict(body, ["client_secret", "sid", "token"])
-        assert_valid_client_secret(body["client_secret"])
+        body = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         # Proxy submit_token request to msisdn threepid delegate
         response = await self.identity_handler.proxy_msisdn_submit_token(
             self.config.registration.account_threepid_delegate_msisdn,
-            body["client_secret"],
-            body["sid"],
-            body["token"],
+            body.client_secret,
+            body.sid,
+            body.token,
         )
         return 200, response
 
@@ -581,6 +584,10 @@ class ThreepidRestServlet(RestServlet):
 
         return 200, {"threepids": threepids}
 
+    # NOTE(dmr): I have chosen not to use Pydantic to parse this request's body, because
+    # the endpoint is deprecated. (If you really want to, you could do this by reusing
+    # ThreePidBindRestServelet.PostBody with an `alias_generator` to handle
+    # `threePidCreds` versus `three_pid_creds`.
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         if not self.hs.config.registration.enable_3pid_changes:
             raise SynapseError(
-- 
cgit 1.5.1


From b7272b73aa38dcb19c9b075514f963390358113d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 22 Sep 2022 08:47:49 -0400
Subject: Properly paginate forward in the /relations API. (#13840)

This fixes a bug where the `/relations` API with `dir=f` would
skip the first item of each page (except the first page), causing
incomplete data to be returned to the client.
---
 changelog.d/13840.bugfix                    |  1 +
 synapse/storage/databases/main/relations.py | 38 +++++++++++++++++++++--------
 synapse/storage/databases/main/stream.py    |  6 ++---
 tests/rest/client/test_relations.py         | 29 +++++++++++++++++++++-
 4 files changed, 60 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/13840.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13840.bugfix b/changelog.d/13840.bugfix
new file mode 100644
index 0000000000..0f014439a8
--- /dev/null
+++ b/changelog.d/13840.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse v1.53.0 where the experimental implementation of [MSC3715](https://github.com/matrix-org/matrix-spec-proposals/pull/3715) would give incorrect results when paginating forward.
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 7bd27790eb..898947af95 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -51,6 +51,8 @@ class _RelatedEvent:
     event_id: str
     # The sender of the related event.
     sender: str
+    topological_ordering: Optional[int]
+    stream_ordering: int
 
 
 class RelationsWorkerStore(SQLBaseStore):
@@ -91,6 +93,9 @@ class RelationsWorkerStore(SQLBaseStore):
         # it. The `event_id` must match the `event.event_id`.
         assert event.event_id == event_id
 
+        # Ensure bad limits aren't being passed in.
+        assert limit >= 0
+
         where_clause = ["relates_to_id = ?", "room_id = ?"]
         where_args: List[Union[str, int]] = [event.event_id, room_id]
         is_redacted = event.internal_metadata.is_redacted()
@@ -139,21 +144,34 @@ class RelationsWorkerStore(SQLBaseStore):
         ) -> Tuple[List[_RelatedEvent], Optional[StreamToken]]:
             txn.execute(sql, where_args + [limit + 1])
 
-            last_topo_id = None
-            last_stream_id = None
             events = []
-            for row in txn:
+            for event_id, relation_type, sender, topo_ordering, stream_ordering in txn:
                 # Do not include edits for redacted events as they leak event
                 # content.
-                if not is_redacted or row[1] != RelationTypes.REPLACE:
-                    events.append(_RelatedEvent(row[0], row[2]))
-                last_topo_id = row[3]
-                last_stream_id = row[4]
+                if not is_redacted or relation_type != RelationTypes.REPLACE:
+                    events.append(
+                        _RelatedEvent(event_id, sender, topo_ordering, stream_ordering)
+                    )
 
-            # If there are more events, generate the next pagination key.
+            # If there are more events, generate the next pagination key from the
+            # last event returned.
             next_token = None
-            if len(events) > limit and last_topo_id and last_stream_id:
-                next_key = RoomStreamToken(last_topo_id, last_stream_id)
+            if len(events) > limit:
+                # Instead of using the last row (which tells us there is more
+                # data), use the last row to be returned.
+                events = events[:limit]
+
+                topo = events[-1].topological_ordering
+                token = events[-1].stream_ordering
+                if direction == "b":
+                    # Tokens are positions between events.
+                    # This token points *after* the last event in the chunk.
+                    # We need it to point to the event before it in the chunk
+                    # when we are going backwards so we subtract one from the
+                    # stream part.
+                    token -= 1
+                next_key = RoomStreamToken(topo, token)
+
                 if from_token:
                     next_token = from_token.copy_and_replace(
                         StreamKeyType.ROOM, next_key
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 3f9bfaeac5..530f04e149 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -1334,15 +1334,15 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         if rows:
             topo = rows[-1].topological_ordering
-            toke = rows[-1].stream_ordering
+            token = rows[-1].stream_ordering
             if direction == "b":
                 # Tokens are positions between events.
                 # This token points *after* the last event in the chunk.
                 # We need it to point to the event before it in the chunk
                 # when we are going backwards so we subtract one from the
                 # stream part.
-                toke -= 1
-            next_token = RoomStreamToken(topo, toke)
+                token -= 1
+            next_token = RoomStreamToken(topo, token)
         else:
             # TODO (erikj): We should work out what to do here instead.
             next_token = to_token if to_token else from_token
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index 651f4f415d..d33e34d829 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -788,6 +788,7 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
             channel.json_body["chunk"][0],
         )
 
+    @unittest.override_config({"experimental_features": {"msc3715_enabled": True}})
     def test_repeated_paginate_relations(self) -> None:
         """Test that if we paginate using a limit and tokens then we get the
         expected events.
@@ -809,7 +810,7 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
 
             channel = self.make_request(
                 "GET",
-                f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}?limit=1{from_token}",
+                f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}?limit=3{from_token}",
                 access_token=self.user_token,
             )
             self.assertEqual(200, channel.code, channel.json_body)
@@ -827,6 +828,32 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
         found_event_ids.reverse()
         self.assertEqual(found_event_ids, expected_event_ids)
 
+        # Test forward pagination.
+        prev_token = ""
+        found_event_ids = []
+        for _ in range(20):
+            from_token = ""
+            if prev_token:
+                from_token = "&from=" + prev_token
+
+            channel = self.make_request(
+                "GET",
+                f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}?org.matrix.msc3715.dir=f&limit=3{from_token}",
+                access_token=self.user_token,
+            )
+            self.assertEqual(200, channel.code, channel.json_body)
+
+            found_event_ids.extend(e["event_id"] for e in channel.json_body["chunk"])
+            next_batch = channel.json_body.get("next_batch")
+
+            self.assertNotEqual(prev_token, next_batch)
+            prev_token = next_batch
+
+            if not prev_token:
+                break
+
+        self.assertEqual(found_event_ids, expected_event_ids)
+
     def test_pagination_from_sync_and_messages(self) -> None:
         """Pagination tokens from /sync and /messages can be used to paginate /relations."""
         channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "A")
-- 
cgit 1.5.1


From c06b2b714262825e1d2510b62c38fdeda339f6dc Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 23 Sep 2022 10:47:16 +0000
Subject: Faster Remote Room Joins: tell remote homeservers that we are unable
 to authorise them if they query a room which has partial state on our server.
 (#13823)

---
 changelog.d/13823.misc                  |  1 +
 synapse/api/errors.py                   |  6 ++++++
 synapse/config/experimental.py          |  3 ++-
 synapse/federation/federation_server.py | 11 +++--------
 synapse/handlers/event_auth.py          | 31 ++++++++++++++++++++++++++----
 synapse/handlers/federation.py          | 34 +++++++++++++--------------------
 synapse/handlers/federation_event.py    |  2 +-
 synapse/handlers/receipts.py            |  2 +-
 synapse/handlers/room_summary.py        |  6 ++----
 synapse/handlers/typing.py              |  2 +-
 tests/handlers/test_typing.py           |  2 +-
 11 files changed, 58 insertions(+), 42 deletions(-)
 create mode 100644 changelog.d/13823.misc

(limited to 'synapse')

diff --git a/changelog.d/13823.misc b/changelog.d/13823.misc
new file mode 100644
index 0000000000..527d79f4b2
--- /dev/null
+++ b/changelog.d/13823.misc
@@ -0,0 +1 @@
+Faster Remote Room Joins: tell remote homeservers that we are unable to authorise them if they query a room which has partial state on our server.
\ No newline at end of file
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index e6dea89c6d..1c6b53aa24 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -100,6 +100,12 @@ class Codes(str, Enum):
 
     UNREDACTED_CONTENT_DELETED = "FI.MAU.MSC2815_UNREDACTED_CONTENT_DELETED"
 
+    # Returned for federation requests where we can't process a request as we
+    # can't ensure the sending server is in a room which is partial-stated on
+    # our side.
+    # Part of MSC3895.
+    UNABLE_DUE_TO_PARTIAL_STATE = "ORG.MATRIX.MSC3895_UNABLE_DUE_TO_PARTIAL_STATE"
+
 
 class CodeMessageException(RuntimeError):
     """An exception with integer code and message string attributes.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index bf27f6c101..595eb007a5 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -63,7 +63,8 @@ class ExperimentalConfig(Config):
         # MSC3706 (server-side support for partial state in /send_join responses)
         self.msc3706_enabled: bool = experimental.get("msc3706_enabled", False)
 
-        # experimental support for faster joins over federation (msc2775, msc3706)
+        # experimental support for faster joins over federation
+        # (MSC2775, MSC3706, MSC3895)
         # requires a target server with msc3706_enabled enabled.
         self.faster_joins_enabled: bool = experimental.get("faster_joins", False)
 
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 3bf84cf625..907940e19e 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -530,13 +530,10 @@ class FederationServer(FederationBase):
     async def on_room_state_request(
         self, origin: str, room_id: str, event_id: str
     ) -> Tuple[int, JsonDict]:
+        await self._event_auth_handler.assert_host_in_room(room_id, origin)
         origin_host, _ = parse_server_name(origin)
         await self.check_server_matches_acl(origin_host, room_id)
 
-        in_room = await self._event_auth_handler.check_host_in_room(room_id, origin)
-        if not in_room:
-            raise AuthError(403, "Host not in room.")
-
         # we grab the linearizer to protect ourselves from servers which hammer
         # us. In theory we might already have the response to this query
         # in the cache so we could return it without waiting for the linearizer
@@ -560,13 +557,10 @@ class FederationServer(FederationBase):
         if not event_id:
             raise NotImplementedError("Specify an event")
 
+        await self._event_auth_handler.assert_host_in_room(room_id, origin)
         origin_host, _ = parse_server_name(origin)
         await self.check_server_matches_acl(origin_host, room_id)
 
-        in_room = await self._event_auth_handler.check_host_in_room(room_id, origin)
-        if not in_room:
-            raise AuthError(403, "Host not in room.")
-
         resp = await self._state_ids_resp_cache.wrap(
             (room_id, event_id),
             self._on_state_ids_request_compute,
@@ -955,6 +949,7 @@ class FederationServer(FederationBase):
         self, origin: str, room_id: str, event_id: str
     ) -> Tuple[int, Dict[str, Any]]:
         async with self._server_linearizer.queue((origin, room_id)):
+            await self._event_auth_handler.assert_host_in_room(room_id, origin)
             origin_host, _ = parse_server_name(origin)
             await self.check_server_matches_acl(origin_host, room_id)
 
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index c3ddc5d182..8249ca1ed2 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -31,7 +31,6 @@ from synapse.events import EventBase
 from synapse.events.builder import EventBuilder
 from synapse.events.snapshot import EventContext
 from synapse.types import StateMap, get_domain_from_id
-from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -156,9 +155,33 @@ class EventAuthHandler:
             Codes.UNABLE_TO_GRANT_JOIN,
         )
 
-    async def check_host_in_room(self, room_id: str, host: str) -> bool:
-        with Measure(self._clock, "check_host_in_room"):
-            return await self._store.is_host_joined(room_id, host)
+    async def is_host_in_room(self, room_id: str, host: str) -> bool:
+        return await self._store.is_host_joined(room_id, host)
+
+    async def assert_host_in_room(
+        self, room_id: str, host: str, allow_partial_state_rooms: bool = False
+    ) -> None:
+        """
+        Asserts that the host is in the room, or raises an AuthError.
+
+        If the room is partial-stated, we raise an AuthError with the
+        UNABLE_DUE_TO_PARTIAL_STATE error code, unless `allow_partial_state_rooms` is true.
+
+        If allow_partial_state_rooms is True and the room is partial-stated,
+        this function may return an incorrect result as we are not able to fully
+        track server membership in a room without full state.
+        """
+        if not allow_partial_state_rooms and await self._store.is_partial_state_room(
+            room_id
+        ):
+            raise AuthError(
+                403,
+                "Unable to authorise you right now; room is partial-stated here.",
+                errcode=Codes.UNABLE_DUE_TO_PARTIAL_STATE,
+            )
+
+        if not await self.is_host_in_room(room_id, host):
+            raise AuthError(403, "Host not in room.")
 
     async def check_restricted_join_rules(
         self,
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index dd4b9f66d1..583d5ecd77 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -804,7 +804,7 @@ class FederationHandler:
             )
 
         # now check that we are *still* in the room
-        is_in_room = await self._event_auth_handler.check_host_in_room(
+        is_in_room = await self._event_auth_handler.is_host_in_room(
             room_id, self.server_name
         )
         if not is_in_room:
@@ -1150,9 +1150,7 @@ class FederationHandler:
     async def on_backfill_request(
         self, origin: str, room_id: str, pdu_list: List[str], limit: int
     ) -> List[EventBase]:
-        in_room = await self._event_auth_handler.check_host_in_room(room_id, origin)
-        if not in_room:
-            raise AuthError(403, "Host not in room.")
+        await self._event_auth_handler.assert_host_in_room(room_id, origin)
 
         # Synapse asks for 100 events per backfill request. Do not allow more.
         limit = min(limit, 100)
@@ -1198,21 +1196,17 @@ class FederationHandler:
             event_id, allow_none=True, allow_rejected=True
         )
 
-        if event:
-            in_room = await self._event_auth_handler.check_host_in_room(
-                event.room_id, origin
-            )
-            if not in_room:
-                raise AuthError(403, "Host not in room.")
-
-            events = await filter_events_for_server(
-                self._storage_controllers, origin, [event]
-            )
-            event = events[0]
-            return event
-        else:
+        if not event:
             return None
 
+        await self._event_auth_handler.assert_host_in_room(event.room_id, origin)
+
+        events = await filter_events_for_server(
+            self._storage_controllers, origin, [event]
+        )
+        event = events[0]
+        return event
+
     async def on_get_missing_events(
         self,
         origin: str,
@@ -1221,9 +1215,7 @@ class FederationHandler:
         latest_events: List[str],
         limit: int,
     ) -> List[EventBase]:
-        in_room = await self._event_auth_handler.check_host_in_room(room_id, origin)
-        if not in_room:
-            raise AuthError(403, "Host not in room.")
+        await self._event_auth_handler.assert_host_in_room(room_id, origin)
 
         # Only allow up to 20 events to be retrieved per request.
         limit = min(limit, 20)
@@ -1257,7 +1249,7 @@ class FederationHandler:
             "state_key": target_user_id,
         }
 
-        if await self._event_auth_handler.check_host_in_room(room_id, self.hs.hostname):
+        if await self._event_auth_handler.is_host_in_room(room_id, self.hs.hostname):
             room_version_obj = await self.store.get_room_version(room_id)
             builder = self.event_builder_factory.for_room_version(
                 room_version_obj, event_dict
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index efcdb84057..2d7cde7506 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -238,7 +238,7 @@ class FederationEventHandler:
         #
         # Note that if we were never in the room then we would have already
         # dropped the event, since we wouldn't know the room version.
-        is_in_room = await self._event_auth_handler.check_host_in_room(
+        is_in_room = await self._event_auth_handler.is_host_in_room(
             room_id, self._server_name
         )
         if not is_in_room:
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index d2bdb9c8be..afaf3261df 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -70,7 +70,7 @@ class ReceiptsHandler:
             # If we're not in the room just ditch the event entirely. This is
             # probably an old server that has come back and thinks we're still in
             # the room (or we've been rejoined to the room by a state reset).
-            is_in_room = await self.event_auth_handler.check_host_in_room(
+            is_in_room = await self.event_auth_handler.is_host_in_room(
                 room_id, self.server_name
             )
             if not is_in_room:
diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py
index ebd445adca..8d08625237 100644
--- a/synapse/handlers/room_summary.py
+++ b/synapse/handlers/room_summary.py
@@ -609,7 +609,7 @@ class RoomSummaryHandler:
         # If this is a request over federation, check if the host is in the room or
         # has a user who could join the room.
         elif origin:
-            if await self._event_auth_handler.check_host_in_room(
+            if await self._event_auth_handler.is_host_in_room(
                 room_id, origin
             ) or await self._store.is_host_invited(room_id, origin):
                 return True
@@ -624,9 +624,7 @@ class RoomSummaryHandler:
                     await self._event_auth_handler.get_rooms_that_allow_join(state_ids)
                 )
                 for space_id in allowed_rooms:
-                    if await self._event_auth_handler.check_host_in_room(
-                        space_id, origin
-                    ):
+                    if await self._event_auth_handler.is_host_in_room(space_id, origin):
                         return True
 
         logger.info(
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index a4cd8b8f0c..0d8466af11 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -340,7 +340,7 @@ class TypingWriterHandler(FollowerTypingHandler):
         # If we're not in the room just ditch the event entirely. This is
         # probably an old server that has come back and thinks we're still in
         # the room (or we've been rejoined to the room by a state reset).
-        is_in_room = await self.event_auth_handler.check_host_in_room(
+        is_in_room = await self.event_auth_handler.is_host_in_room(
             room_id, self.server_name
         )
         if not is_in_room:
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 8adba29d7f..1a247f12e8 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -129,7 +129,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
         async def check_host_in_room(room_id: str, server_name: str) -> bool:
             return room_id == ROOM_ID
 
-        hs.get_event_auth_handler().check_host_in_room = check_host_in_room
+        hs.get_event_auth_handler().is_host_in_room = check_host_in_room
 
         async def get_current_hosts_in_room(room_id: str):
             return {member.domain for member in self.room_members}
-- 
cgit 1.5.1


From 03c2bfb7f89d637930da52723161ce74d4f89233 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 23 Sep 2022 13:44:03 +0100
Subject: Send device list updates out to servers in partially joined rooms
 (#13874)

Use the provided list of servers in the room from the `/send_join`
response, since we will not know which users are in the room.  This
isn't sufficient to ensure that all remote servers receive the right
device list updates, since the `/send_join` response may be inaccurate
or we may calculate the membership state of new users in the room
incorrectly.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13874.misc                 |  1 +
 synapse/handlers/device.py             |  6 ++++-
 synapse/storage/controllers/state.py   | 44 +++++++++++++++++++++++++++++++++-
 synapse/storage/databases/main/room.py | 17 +++++++++++++
 4 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13874.misc

(limited to 'synapse')

diff --git a/changelog.d/13874.misc b/changelog.d/13874.misc
new file mode 100644
index 0000000000..499e488c35
--- /dev/null
+++ b/changelog.d/13874.misc
@@ -0,0 +1 @@
+Faster room joins: Send device list updates to most servers in rooms with partial state.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 901e2310b7..6566b3bf3d 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -688,11 +688,15 @@ class DeviceHandler(DeviceWorkerHandler):
                     # Ignore any users that aren't ours
                     if self.hs.is_mine_id(user_id):
                         hosts = set(
-                            await self._storage_controllers.state.get_current_hosts_in_room(
+                            await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
                                 room_id
                             )
                         )
                         hosts.discard(self.server_name)
+                        # For rooms with partial state, `hosts` is merely an
+                        # approximation. When we transition to a full state room, we
+                        # will have to send out device list updates to any servers we
+                        # missed.
 
                     # Check if we've already sent this update to some hosts
                     if current_stream_id == stream_id:
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index bbe568bf05..b1aa17047c 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -23,6 +23,7 @@ from typing import (
     List,
     Mapping,
     Optional,
+    Sequence,
     Tuple,
 )
 
@@ -524,12 +525,53 @@ class StateStorageController:
         return state_map.get(key)
 
     async def get_current_hosts_in_room(self, room_id: str) -> List[str]:
-        """Get current hosts in room based on current state."""
+        """Get current hosts in room based on current state.
+
+        Blocks until we have full state for the given room. This only happens for rooms
+        with partial state.
+
+        Returns:
+            A list of hosts in the room, sorted by longest in the room first. (aka.
+            sorted by join with the lowest depth first).
+        """
 
         await self._partial_state_room_tracker.await_full_state(room_id)
 
         return await self.stores.main.get_current_hosts_in_room(room_id)
 
+    async def get_current_hosts_in_room_or_partial_state_approximation(
+        self, room_id: str
+    ) -> Sequence[str]:
+        """Get approximation of current hosts in room based on current state.
+
+        For rooms with full state, this is equivalent to `get_current_hosts_in_room`,
+        with the same order of results.
+
+        For rooms with partial state, no blocking occurs. Instead, the list of hosts
+        in the room at the time of joining is combined with the list of hosts which
+        joined the room afterwards. The returned list may include hosts that are not
+        actually in the room and exclude hosts that are in the room, since we may
+        calculate state incorrectly during the partial state phase. The order of results
+        is arbitrary for rooms with partial state.
+        """
+        # We have to read this list first to mitigate races with un-partial stating.
+        # This will be empty for rooms with full state.
+        hosts_at_join = await self.stores.main.get_partial_state_servers_at_join(
+            room_id
+        )
+
+        hosts_from_state = await self.stores.main.get_current_hosts_in_room(room_id)
+        hosts_from_state_set = set(hosts_from_state)
+
+        # First take the list of hosts based on the current state.
+        # For rooms with partial state, this will be missing most hosts.
+        hosts = list(hosts_from_state)
+        # Then add in the list of hosts in the room at the time we joined.
+        # This will be an empty list for rooms with full state.
+        hosts.extend(host for host in hosts_at_join if host not in hosts_from_state_set)
+
+        return hosts
+
     async def get_users_in_room_with_profiles(
         self, room_id: str
     ) -> Dict[str, ProfileInfo]:
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index bef66f1992..5dd116d766 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -25,6 +25,7 @@ from typing import (
     List,
     Mapping,
     Optional,
+    Sequence,
     Tuple,
     Union,
     cast,
@@ -1133,6 +1134,22 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             get_rooms_for_retention_period_in_range_txn,
         )
 
+    async def get_partial_state_servers_at_join(self, room_id: str) -> Sequence[str]:
+        """Gets the list of servers in a partial state room at the time we joined it.
+
+        Returns:
+            The `servers_in_room` list from the `/send_join` response for partial state
+            rooms. May not be accurate or complete, as it comes from a remote
+            homeserver.
+            An empty list for full state rooms.
+        """
+        return await self.db_pool.simple_select_onecol(
+            "partial_state_rooms_servers",
+            keyvalues={"room_id": room_id},
+            retcol="server_name",
+            desc="get_partial_state_servers_at_join",
+        )
+
     async def get_partial_state_rooms_and_servers(
         self,
     ) -> Mapping[str, Collection[str]]:
-- 
cgit 1.5.1


From efd108b45d1706526416bc9a6f89463b5ff4506a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 23 Sep 2022 10:33:28 -0400
Subject: Accept & store thread IDs for receipts (implement MSC3771). (#13782)

Updates the `/receipts` endpoint and receipt EDU handler to parse a
`thread_id` from the body and insert it in the database.
---
 changelog.d/13782.feature                      |  1 +
 synapse/config/experimental.py                 |  2 +
 synapse/handlers/receipts.py                   | 23 ++++++-
 synapse/replication/tcp/client.py              |  3 +-
 synapse/replication/tcp/streams/_base.py       |  1 +
 synapse/rest/client/read_marker.py             |  2 +
 synapse/rest/client/receipts.py                | 14 ++++-
 synapse/rest/client/versions.py                |  2 +
 synapse/storage/database.py                    |  2 +
 synapse/storage/databases/main/receipts.py     | 87 +++++++++++++++++++-------
 synapse/types.py                               |  1 +
 tests/federation/test_federation_sender.py     | 21 ++++++-
 tests/handlers/test_appservice.py              |  1 +
 tests/replication/slave/storage/test_events.py |  2 +-
 tests/replication/tcp/streams/test_receipts.py | 15 ++++-
 tests/storage/test_event_push_actions.py       |  1 +
 tests/storage/test_receipts.py                 | 36 ++++++++---
 17 files changed, 173 insertions(+), 41 deletions(-)
 create mode 100644 changelog.d/13782.feature

(limited to 'synapse')

diff --git a/changelog.d/13782.feature b/changelog.d/13782.feature
new file mode 100644
index 0000000000..d0cb902dff
--- /dev/null
+++ b/changelog.d/13782.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 595eb007a5..933779c23a 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -83,6 +83,8 @@ class ExperimentalConfig(Config):
         # MSC3786 (Add a default push rule to ignore m.room.server_acl events)
         self.msc3786_enabled: bool = experimental.get("msc3786_enabled", False)
 
+        # MSC3771: Thread read receipts
+        self.msc3771_enabled: bool = experimental.get("msc3771_enabled", False)
         # MSC3772: A push rule for mutual relations.
         self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False)
 
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index afaf3261df..4768a34c07 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -63,6 +63,8 @@ class ReceiptsHandler:
         self.clock = self.hs.get_clock()
         self.state = hs.get_state_handler()
 
+        self._msc3771_enabled = hs.config.experimental.msc3771_enabled
+
     async def _received_remote_receipt(self, origin: str, content: JsonDict) -> None:
         """Called when we receive an EDU of type m.receipt from a remote HS."""
         receipts = []
@@ -91,13 +93,23 @@ class ReceiptsHandler:
                         )
                         continue
 
+                    # Check if these receipts apply to a thread.
+                    thread_id = None
+                    data = user_values.get("data", {})
+                    if self._msc3771_enabled and isinstance(data, dict):
+                        thread_id = data.get("thread_id")
+                        # If the thread ID is invalid, consider it missing.
+                        if not isinstance(thread_id, str):
+                            thread_id = None
+
                     receipts.append(
                         ReadReceipt(
                             room_id=room_id,
                             receipt_type=receipt_type,
                             user_id=user_id,
                             event_ids=user_values["event_ids"],
-                            data=user_values.get("data", {}),
+                            thread_id=thread_id,
+                            data=data,
                         )
                     )
 
@@ -114,6 +126,7 @@ class ReceiptsHandler:
                 receipt.receipt_type,
                 receipt.user_id,
                 receipt.event_ids,
+                receipt.thread_id,
                 receipt.data,
             )
 
@@ -146,7 +159,12 @@ class ReceiptsHandler:
         return True
 
     async def received_client_receipt(
-        self, room_id: str, receipt_type: str, user_id: str, event_id: str
+        self,
+        room_id: str,
+        receipt_type: str,
+        user_id: str,
+        event_id: str,
+        thread_id: Optional[str],
     ) -> None:
         """Called when a client tells us a local user has read up to the given
         event_id in the room.
@@ -156,6 +174,7 @@ class ReceiptsHandler:
             receipt_type=receipt_type,
             user_id=user_id,
             event_ids=[event_id],
+            thread_id=thread_id,
             data={"ts": int(self.clock.time_msec())},
         )
 
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index cf9cd6833b..b2522f98ca 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -427,7 +427,8 @@ class FederationSenderHandler:
                 receipt.receipt_type,
                 receipt.user_id,
                 [receipt.event_id],
-                receipt.data,
+                thread_id=receipt.thread_id,
+                data=receipt.data,
             )
             await self.federation_sender.send_read_receipt(receipt_info)
 
diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py
index 398bebeaa6..e01155ad59 100644
--- a/synapse/replication/tcp/streams/_base.py
+++ b/synapse/replication/tcp/streams/_base.py
@@ -361,6 +361,7 @@ class ReceiptsStream(Stream):
         receipt_type: str
         user_id: str
         event_id: str
+        thread_id: Optional[str]
         data: dict
 
     NAME = "receipts"
diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py
index 5e53096539..852838515c 100644
--- a/synapse/rest/client/read_marker.py
+++ b/synapse/rest/client/read_marker.py
@@ -83,6 +83,8 @@ class ReadMarkerRestServlet(RestServlet):
                     receipt_type,
                     user_id=requester.user.to_string(),
                     event_id=event_id,
+                    # Setting the thread ID is not possible with the /read_markers endpoint.
+                    thread_id=None,
                 )
 
         return 200, {}
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 5b7fad7402..f3ff156abe 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -49,6 +49,7 @@ class ReceiptRestServlet(RestServlet):
             ReceiptTypes.READ_PRIVATE,
             ReceiptTypes.FULLY_READ,
         }
+        self._msc3771_enabled = hs.config.experimental.msc3771_enabled
 
     async def on_POST(
         self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str
@@ -61,7 +62,17 @@ class ReceiptRestServlet(RestServlet):
                 f"Receipt type must be {', '.join(self._known_receipt_types)}",
             )
 
-        parse_json_object_from_request(request, allow_empty_body=False)
+        body = parse_json_object_from_request(request)
+
+        # Pull the thread ID, if one exists.
+        thread_id = None
+        if self._msc3771_enabled:
+            if "thread_id" in body:
+                thread_id = body.get("thread_id")
+                if not thread_id or not isinstance(thread_id, str):
+                    raise SynapseError(
+                        400, "thread_id field must be a non-empty string"
+                    )
 
         await self.presence_handler.bump_presence_active_time(requester.user)
 
@@ -77,6 +88,7 @@ class ReceiptRestServlet(RestServlet):
                 receipt_type,
                 user_id=requester.user.to_string(),
                 event_id=event_id,
+                thread_id=thread_id,
             )
 
         return 200, {}
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index b3917a5abc..c95b0d6f19 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -103,6 +103,8 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3030": self.config.experimental.msc3030_enabled,
                     # Adds support for thread relations, per MSC3440.
                     "org.matrix.msc3440.stable": True,  # TODO: remove when "v1.3" is added above
+                    # Support for thread read receipts.
+                    "org.matrix.msc3771": self.config.experimental.msc3771_enabled,
                     # Allows moderators to fetch redacted event content as described in MSC2815
                     "fi.mau.msc2815": self.config.experimental.msc2815_enabled,
                     # Adds support for login token requests as per MSC3882
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 921cd4dc5e..9d116f6925 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -95,6 +95,8 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = {
     "local_media_repository_thumbnails": "local_media_repository_thumbnails_method_idx",
     "remote_media_cache_thumbnails": "remote_media_repository_thumbnails_method_idx",
     "event_push_summary": "event_push_summary_unique_index",
+    "receipts_linearized": "receipts_linearized_unique_index",
+    "receipts_graph": "receipts_graph_unique_index",
 }
 
 
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index ddb8e80b69..52fe0db924 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -540,7 +540,9 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
     async def get_all_updated_receipts(
         self, instance_name: str, last_id: int, current_id: int, limit: int
-    ) -> Tuple[List[Tuple[int, list]], int, bool]:
+    ) -> Tuple[
+        List[Tuple[int, Tuple[str, str, str, str, Optional[str], JsonDict]]], int, bool
+    ]:
         """Get updates for receipts replication stream.
 
         Args:
@@ -567,9 +569,13 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
         def get_all_updated_receipts_txn(
             txn: LoggingTransaction,
-        ) -> Tuple[List[Tuple[int, list]], int, bool]:
+        ) -> Tuple[
+            List[Tuple[int, Tuple[str, str, str, str, Optional[str], JsonDict]]],
+            int,
+            bool,
+        ]:
             sql = """
-                SELECT stream_id, room_id, receipt_type, user_id, event_id, data
+                SELECT stream_id, room_id, receipt_type, user_id, event_id, thread_id, data
                 FROM receipts_linearized
                 WHERE ? < stream_id AND stream_id <= ?
                 ORDER BY stream_id ASC
@@ -578,8 +584,8 @@ class ReceiptsWorkerStore(SQLBaseStore):
             txn.execute(sql, (last_id, current_id, limit))
 
             updates = cast(
-                List[Tuple[int, list]],
-                [(r[0], r[1:5] + (db_to_json(r[5]),)) for r in txn],
+                List[Tuple[int, Tuple[str, str, str, str, Optional[str], JsonDict]]],
+                [(r[0], r[1:6] + (db_to_json(r[6]),)) for r in txn],
             )
 
             limited = False
@@ -631,6 +637,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
         receipt_type: str,
         user_id: str,
         event_id: str,
+        thread_id: Optional[str],
         data: JsonDict,
         stream_id: int,
     ) -> Optional[int]:
@@ -657,12 +664,27 @@ class ReceiptsWorkerStore(SQLBaseStore):
         # We don't want to clobber receipts for more recent events, so we
         # have to compare orderings of existing receipts
         if stream_ordering is not None:
-            sql = (
-                "SELECT stream_ordering, event_id FROM events"
-                " INNER JOIN receipts_linearized AS r USING (event_id, room_id)"
-                " WHERE r.room_id = ? AND r.receipt_type = ? AND r.user_id = ?"
+            if thread_id is None:
+                thread_clause = "r.thread_id IS NULL"
+                thread_args: Tuple[str, ...] = ()
+            else:
+                thread_clause = "r.thread_id = ?"
+                thread_args = (thread_id,)
+
+            sql = f"""
+            SELECT stream_ordering, event_id FROM events
+            INNER JOIN receipts_linearized AS r USING (event_id, room_id)
+            WHERE r.room_id = ? AND r.receipt_type = ? AND r.user_id = ? AND {thread_clause}
+            """
+            txn.execute(
+                sql,
+                (
+                    room_id,
+                    receipt_type,
+                    user_id,
+                )
+                + thread_args,
             )
-            txn.execute(sql, (room_id, receipt_type, user_id))
 
             for so, eid in txn:
                 if int(so) >= stream_ordering:
@@ -682,21 +704,28 @@ class ReceiptsWorkerStore(SQLBaseStore):
             self._receipts_stream_cache.entity_has_changed, room_id, stream_id
         )
 
+        keyvalues = {
+            "room_id": room_id,
+            "receipt_type": receipt_type,
+            "user_id": user_id,
+        }
+        where_clause = ""
+        if thread_id is None:
+            where_clause = "thread_id IS NULL"
+        else:
+            keyvalues["thread_id"] = thread_id
+
         self.db_pool.simple_upsert_txn(
             txn,
             table="receipts_linearized",
-            keyvalues={
-                "room_id": room_id,
-                "receipt_type": receipt_type,
-                "user_id": user_id,
-            },
+            keyvalues=keyvalues,
             values={
                 "stream_id": stream_id,
                 "event_id": event_id,
                 "event_stream_ordering": stream_ordering,
                 "data": json_encoder.encode(data),
-                "thread_id": None,
             },
+            where_clause=where_clause,
             # receipts_linearized has a unique constraint on
             # (user_id, room_id, receipt_type), so no need to lock
             lock=False,
@@ -748,6 +777,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
         receipt_type: str,
         user_id: str,
         event_ids: List[str],
+        thread_id: Optional[str],
         data: dict,
     ) -> Optional[Tuple[int, int]]:
         """Insert a receipt, either from local client or remote server.
@@ -780,6 +810,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 receipt_type,
                 user_id,
                 linearized_event_id,
+                thread_id,
                 data,
                 stream_id=stream_id,
                 # Read committed is actually beneficial here because we check for a receipt with
@@ -794,7 +825,8 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
         now = self._clock.time_msec()
         logger.debug(
-            "RR for event %s in %s (%i ms old)",
+            "Receipt %s for event %s in %s (%i ms old)",
+            receipt_type,
             linearized_event_id,
             room_id,
             now - event_ts,
@@ -807,6 +839,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             receipt_type,
             user_id,
             event_ids,
+            thread_id,
             data,
         )
 
@@ -821,6 +854,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
         receipt_type: str,
         user_id: str,
         event_ids: List[str],
+        thread_id: Optional[str],
         data: JsonDict,
     ) -> None:
         assert self._can_write_to_receipts
@@ -832,19 +866,26 @@ class ReceiptsWorkerStore(SQLBaseStore):
         # FIXME: This shouldn't invalidate the whole cache
         txn.call_after(self._get_linearized_receipts_for_room.invalidate, (room_id,))
 
+        keyvalues = {
+            "room_id": room_id,
+            "receipt_type": receipt_type,
+            "user_id": user_id,
+        }
+        where_clause = ""
+        if thread_id is None:
+            where_clause = "thread_id IS NULL"
+        else:
+            keyvalues["thread_id"] = thread_id
+
         self.db_pool.simple_upsert_txn(
             txn,
             table="receipts_graph",
-            keyvalues={
-                "room_id": room_id,
-                "receipt_type": receipt_type,
-                "user_id": user_id,
-            },
+            keyvalues=keyvalues,
             values={
                 "event_ids": json_encoder.encode(event_ids),
                 "data": json_encoder.encode(data),
-                "thread_id": None,
             },
+            where_clause=where_clause,
             # receipts_graph has a unique constraint on
             # (user_id, room_id, receipt_type), so no need to lock
             lock=False,
diff --git a/synapse/types.py b/synapse/types.py
index ec44601f54..773f0438d5 100644
--- a/synapse/types.py
+++ b/synapse/types.py
@@ -835,6 +835,7 @@ class ReadReceipt:
     receipt_type: str
     user_id: str
     event_ids: List[str]
+    thread_id: Optional[str]
     data: JsonDict
 
 
diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py
index a5aa500ef8..f1e357764f 100644
--- a/tests/federation/test_federation_sender.py
+++ b/tests/federation/test_federation_sender.py
@@ -49,7 +49,12 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
 
         sender = self.hs.get_federation_sender()
         receipt = ReadReceipt(
-            "room_id", "m.read", "user_id", ["event_id"], {"ts": 1234}
+            "room_id",
+            "m.read",
+            "user_id",
+            ["event_id"],
+            thread_id=None,
+            data={"ts": 1234},
         )
         self.successResultOf(defer.ensureDeferred(sender.send_read_receipt(receipt)))
 
@@ -89,7 +94,12 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
 
         sender = self.hs.get_federation_sender()
         receipt = ReadReceipt(
-            "room_id", "m.read", "user_id", ["event_id"], {"ts": 1234}
+            "room_id",
+            "m.read",
+            "user_id",
+            ["event_id"],
+            thread_id=None,
+            data={"ts": 1234},
         )
         self.successResultOf(defer.ensureDeferred(sender.send_read_receipt(receipt)))
 
@@ -121,7 +131,12 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
 
         # send the second RR
         receipt = ReadReceipt(
-            "room_id", "m.read", "user_id", ["other_id"], {"ts": 1234}
+            "room_id",
+            "m.read",
+            "user_id",
+            ["other_id"],
+            thread_id=None,
+            data={"ts": 1234},
         )
         self.successResultOf(defer.ensureDeferred(sender.send_read_receipt(receipt)))
         self.pump()
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index b17af2725b..af24c4984d 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -447,6 +447,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
                     receipt_type="m.read",
                     user_id=self.local_user,
                     event_ids=[f"$eventid_{i}"],
+                    thread_id=None,
                     data={},
                 )
             )
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
index 49a21e2e85..efd92793c0 100644
--- a/tests/replication/slave/storage/test_events.py
+++ b/tests/replication/slave/storage/test_events.py
@@ -171,7 +171,7 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase):
         if send_receipt:
             self.get_success(
                 self.master_store.insert_receipt(
-                    ROOM_ID, ReceiptTypes.READ, USER_ID_2, [event1.event_id], {}
+                    ROOM_ID, ReceiptTypes.READ, USER_ID_2, [event1.event_id], None, {}
                 )
             )
 
diff --git a/tests/replication/tcp/streams/test_receipts.py b/tests/replication/tcp/streams/test_receipts.py
index eb00117845..ede6d0c118 100644
--- a/tests/replication/tcp/streams/test_receipts.py
+++ b/tests/replication/tcp/streams/test_receipts.py
@@ -33,7 +33,12 @@ class ReceiptsStreamTestCase(BaseStreamTestCase):
         # tell the master to send a new receipt
         self.get_success(
             self.hs.get_datastores().main.insert_receipt(
-                "!room:blue", "m.read", USER_ID, ["$event:blue"], {"a": 1}
+                "!room:blue",
+                "m.read",
+                USER_ID,
+                ["$event:blue"],
+                thread_id=None,
+                data={"a": 1},
             )
         )
         self.replicate()
@@ -48,6 +53,7 @@ class ReceiptsStreamTestCase(BaseStreamTestCase):
         self.assertEqual("m.read", row.receipt_type)
         self.assertEqual(USER_ID, row.user_id)
         self.assertEqual("$event:blue", row.event_id)
+        self.assertIsNone(row.thread_id)
         self.assertEqual({"a": 1}, row.data)
 
         # Now let's disconnect and insert some data.
@@ -57,7 +63,12 @@ class ReceiptsStreamTestCase(BaseStreamTestCase):
 
         self.get_success(
             self.hs.get_datastores().main.insert_receipt(
-                "!room2:blue", "m.read", USER_ID, ["$event2:foo"], {"a": 2}
+                "!room2:blue",
+                "m.read",
+                USER_ID,
+                ["$event2:foo"],
+                thread_id=None,
+                data={"a": 2},
             )
         )
         self.replicate()
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index fc43d7edd1..08c74b93e3 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -106,6 +106,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
                     "m.read",
                     user_id=user_id,
                     event_ids=[event_id],
+                    thread_id=None,
                     data={},
                 )
             )
diff --git a/tests/storage/test_receipts.py b/tests/storage/test_receipts.py
index c89bfff241..9459ee1705 100644
--- a/tests/storage/test_receipts.py
+++ b/tests/storage/test_receipts.py
@@ -131,13 +131,18 @@ class ReceiptTestCase(HomeserverTestCase):
         # Send public read receipt for the first event
         self.get_success(
             self.store.insert_receipt(
-                self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], {}
+                self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], None, {}
             )
         )
         # Send private read receipt for the second event
         self.get_success(
             self.store.insert_receipt(
-                self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {}
+                self.room_id1,
+                ReceiptTypes.READ_PRIVATE,
+                OUR_USER_ID,
+                [event1_2_id],
+                None,
+                {},
             )
         )
 
@@ -164,7 +169,7 @@ class ReceiptTestCase(HomeserverTestCase):
         # Test receipt updating
         self.get_success(
             self.store.insert_receipt(
-                self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], {}
+                self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], None, {}
             )
         )
         res = self.get_success(
@@ -180,7 +185,12 @@ class ReceiptTestCase(HomeserverTestCase):
         # Test new room is reflected in what the method returns
         self.get_success(
             self.store.insert_receipt(
-                self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {}
+                self.room_id2,
+                ReceiptTypes.READ_PRIVATE,
+                OUR_USER_ID,
+                [event2_1_id],
+                None,
+                {},
             )
         )
         res = self.get_success(
@@ -202,13 +212,18 @@ class ReceiptTestCase(HomeserverTestCase):
         # Send public read receipt for the first event
         self.get_success(
             self.store.insert_receipt(
-                self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], {}
+                self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], None, {}
             )
         )
         # Send private read receipt for the second event
         self.get_success(
             self.store.insert_receipt(
-                self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {}
+                self.room_id1,
+                ReceiptTypes.READ_PRIVATE,
+                OUR_USER_ID,
+                [event1_2_id],
+                None,
+                {},
             )
         )
 
@@ -241,7 +256,7 @@ class ReceiptTestCase(HomeserverTestCase):
         # Test receipt updating
         self.get_success(
             self.store.insert_receipt(
-                self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], {}
+                self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], None, {}
             )
         )
         res = self.get_success(
@@ -259,7 +274,12 @@ class ReceiptTestCase(HomeserverTestCase):
         # Test new room is reflected in what the method returns
         self.get_success(
             self.store.insert_receipt(
-                self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {}
+                self.room_id2,
+                ReceiptTypes.READ_PRIVATE,
+                OUR_USER_ID,
+                [event2_1_id],
+                None,
+                {},
             )
         )
         res = self.get_success(
-- 
cgit 1.5.1


From db868db594c1a8a0baa3686b60f1c49c0d4be371 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 23 Sep 2022 11:49:39 -0500
Subject: Fix access token leak to logs from proxyagent (#13855)

This can happen specifically with an application service `/transactions/10722?access_token=leaked` request

Fix https://github.com/matrix-org/synapse/issues/13010

---

Saw an example leak in https://github.com/matrix-org/synapse/issues/13423#issuecomment-1205348482

```
2022-08-04 14:47:57,925 - synapse.http.client - 401 - DEBUG - as-sender-signal-1 - Sending request PUT http://localhost:29328/transactions/10722?access_token=<redacted>
2022-08-04 14:47:57,926 - synapse.http.proxyagent - 223 - DEBUG - as-sender-signal-1 - Requesting b'http://localhost:29328/transactions/10722?access_token=leaked' via <HostnameEndpoint localhost:29328>
```
---
 changelog.d/13855.bugfix   | 1 +
 synapse/http/proxyagent.py | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13855.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13855.bugfix b/changelog.d/13855.bugfix
new file mode 100644
index 0000000000..5ea8539bd8
--- /dev/null
+++ b/changelog.d/13855.bugfix
@@ -0,0 +1 @@
+Fix access token leak to logs from proxy agent.
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index b2a50c9105..1f8227896f 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -36,6 +36,7 @@ from twisted.web.error import SchemeNotSupported
 from twisted.web.http_headers import Headers
 from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS
 
+from synapse.http import redact_uri
 from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials
 from synapse.types import ISynapseReactor
 
@@ -220,7 +221,11 @@ class ProxyAgent(_AgentBase):
                 self._reactor, parsed_uri.host, parsed_uri.port, **self._endpoint_kwargs
             )
 
-        logger.debug("Requesting %s via %s", uri, endpoint)
+        logger.debug(
+            "Requesting %s via %s",
+            redact_uri(uri.decode("ascii", errors="replace")),
+            endpoint,
+        )
 
         if parsed_uri.scheme == b"https":
             tls_connection_creator = self._policy_for_https.creatorForNetloc(
-- 
cgit 1.5.1


From f49f73c0da5502792c65d3de1ffd352ceb6af562 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 23 Sep 2022 17:55:15 +0100
Subject: Faster room joins: Avoid blocking `/keys/changes` (#13888)

Part of the work for #12993.

Once #12993 is fully resolved, we expect `/keys/changes` to behave
sensibly when joined to a room with partial state.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13888.misc               | 1 +
 synapse/handlers/device.py           | 7 +++++--
 synapse/storage/controllers/state.py | 7 ++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13888.misc

(limited to 'synapse')

diff --git a/changelog.d/13888.misc b/changelog.d/13888.misc
new file mode 100644
index 0000000000..4ffd9bcede
--- /dev/null
+++ b/changelog.d/13888.misc
@@ -0,0 +1 @@
+Faster room joins: Avoid waiting for full state when processing `/keys/changes` requests.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 6566b3bf3d..bad262731c 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -195,7 +195,9 @@ class DeviceWorkerHandler:
         possibly_changed = set(changed)
         possibly_left = set()
         for room_id in rooms_changed:
-            current_state_ids = await self._state_storage.get_current_state_ids(room_id)
+            current_state_ids = await self._state_storage.get_current_state_ids(
+                room_id, await_full_state=False
+            )
 
             # The user may have left the room
             # TODO: Check if they actually did or if we were just invited.
@@ -234,7 +236,8 @@ class DeviceWorkerHandler:
 
             # mapping from event_id -> state_dict
             prev_state_ids = await self._state_storage.get_state_ids_for_events(
-                event_ids
+                event_ids,
+                await_full_state=False,
             )
 
             # Check if we've joined the room? If so we just blindly add all the users to
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index b1aa17047c..bb60130afe 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -407,6 +407,7 @@ class StateStorageController:
         self,
         room_id: str,
         state_filter: Optional[StateFilter] = None,
+        await_full_state: bool = True,
         on_invalidate: Optional[Callable[[], None]] = None,
     ) -> StateMap[str]:
         """Get the current state event ids for a room based on the
@@ -419,13 +420,17 @@ class StateStorageController:
             room_id: The room to get the state IDs of. state_filter: The state
             filter used to fetch state from the
                 database.
+            await_full_state: if true, will block if we do not yet have complete
+               state for the room.
             on_invalidate: Callback for when the `get_current_state_ids` cache
                 for the room gets invalidated.
 
         Returns:
             The current state of the room.
         """
-        if not state_filter or state_filter.must_await_full_state(self._is_mine_id):
+        if await_full_state and (
+            not state_filter or state_filter.must_await_full_state(self._is_mine_id)
+        ):
             await self._partial_state_room_tracker.await_full_state(room_id)
 
         if state_filter and not state_filter.is_full():
-- 
cgit 1.5.1


From ac1a31740b6d0dfda4d57a25762aaddfde981caf Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 23 Sep 2022 14:01:29 -0500
Subject: Only try to backfill event if we haven't tried before recently
 (#13635)

Only try to backfill event if we haven't tried before recently (exponential backoff). No need to keep trying the same backfill point that fails over and over.

Fix https://github.com/matrix-org/synapse/issues/13622
Fix https://github.com/matrix-org/synapse/issues/8451

Follow-up to https://github.com/matrix-org/synapse/pull/13589

Part of https://github.com/matrix-org/synapse/issues/13356
---
 changelog.d/13635.feature                          |   1 +
 synapse/handlers/federation.py                     |   4 +-
 synapse/storage/databases/main/event_federation.py | 188 ++++++--
 tests/storage/test_event_federation.py             | 481 ++++++++++++++++++++-
 4 files changed, 626 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/13635.feature

(limited to 'synapse')

diff --git a/changelog.d/13635.feature b/changelog.d/13635.feature
new file mode 100644
index 0000000000..d86bf7ed80
--- /dev/null
+++ b/changelog.d/13635.feature
@@ -0,0 +1 @@
+Exponentially backoff from backfilling the same event over and over.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 583d5ecd77..e1a4265a64 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -226,9 +226,7 @@ class FederationHandler:
         """
         backwards_extremities = [
             _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY)
-            for event_id, depth in await self.store.get_oldest_event_ids_with_depth_in_room(
-                room_id
-            )
+            for event_id, depth in await self.store.get_backfill_points_in_room(room_id)
         ]
 
         insertion_events_to_be_backfilled: List[_BackfillPoint] = []
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index ef477978ed..3251fca6fb 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import datetime
 import itertools
 import logging
 from queue import Empty, PriorityQueue
@@ -43,7 +44,7 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.signatures import SignatureWorkerStore
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
@@ -72,6 +73,13 @@ pdus_pruned_from_federation_queue = Counter(
 
 logger = logging.getLogger(__name__)
 
+BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS: int = int(
+    datetime.timedelta(days=7).total_seconds()
+)
+BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS: int = int(
+    datetime.timedelta(hours=1).total_seconds()
+)
+
 
 # All the info we need while iterating the DAG while backfilling
 @attr.s(frozen=True, slots=True, auto_attribs=True)
@@ -715,96 +723,189 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
     @trace
     @tag_args
-    async def get_oldest_event_ids_with_depth_in_room(
-        self, room_id: str
+    async def get_backfill_points_in_room(
+        self,
+        room_id: str,
     ) -> List[Tuple[str, int]]:
-        """Gets the oldest events(backwards extremities) in the room along with the
-        aproximate depth.
-
-        We use this function so that we can compare and see if someones current
-        depth at their current scrollback is within pagination range of the
-        event extremeties. If the current depth is close to the depth of given
-        oldest event, we can trigger a backfill.
+        """
+        Gets the oldest events(backwards extremities) in the room along with the
+        approximate depth. Sorted by depth, highest to lowest (descending).
 
         Args:
             room_id: Room where we want to find the oldest events
 
         Returns:
-            List of (event_id, depth) tuples
+            List of (event_id, depth) tuples. Sorted by depth, highest to lowest
+            (descending)
         """
 
-        def get_oldest_event_ids_with_depth_in_room_txn(
+        def get_backfill_points_in_room_txn(
             txn: LoggingTransaction, room_id: str
         ) -> List[Tuple[str, int]]:
-            # Assemble a dictionary with event_id -> depth for the oldest events
+            # Assemble a tuple lookup of event_id -> depth for the oldest events
             # we know of in the room. Backwards extremeties are the oldest
             # events we know of in the room but we only know of them because
-            # some other event referenced them by prev_event and aren't peristed
-            # in our database yet (meaning we don't know their depth
-            # specifically). So we need to look for the aproximate depth from
+            # some other event referenced them by prev_event and aren't
+            # persisted in our database yet (meaning we don't know their depth
+            # specifically). So we need to look for the approximate depth from
             # the events connected to the current backwards extremeties.
             sql = """
-                SELECT b.event_id, MAX(e.depth) FROM events as e
+                SELECT backward_extrem.event_id, event.depth FROM events AS event
                 /**
                  * Get the edge connections from the event_edges table
                  * so we can see whether this event's prev_events points
                  * to a backward extremity in the next join.
                  */
-                INNER JOIN event_edges as g
-                ON g.event_id = e.event_id
+                INNER JOIN event_edges AS edge
+                ON edge.event_id = event.event_id
                 /**
                  * We find the "oldest" events in the room by looking for
                  * events connected to backwards extremeties (oldest events
                  * in the room that we know of so far).
                  */
-                INNER JOIN event_backward_extremities as b
-                ON g.prev_event_id = b.event_id
-                WHERE b.room_id = ? AND g.is_state is ?
-                GROUP BY b.event_id
+                INNER JOIN event_backward_extremities AS backward_extrem
+                ON edge.prev_event_id = backward_extrem.event_id
+                /**
+                 * We use this info to make sure we don't retry to use a backfill point
+                 * if we've already attempted to backfill from it recently.
+                 */
+                LEFT JOIN event_failed_pull_attempts AS failed_backfill_attempt_info
+                ON
+                    failed_backfill_attempt_info.room_id = backward_extrem.room_id
+                    AND failed_backfill_attempt_info.event_id = backward_extrem.event_id
+                WHERE
+                    backward_extrem.room_id = ?
+                    /* We only care about non-state edges because we used to use
+                     * `event_edges` for two different sorts of "edges" (the current
+                     * event DAG, but also a link to the previous state, for state
+                     * events). These legacy state event edges can be distinguished by
+                     * `is_state` and are removed from the codebase and schema but
+                     * because the schema change is in a background update, it's not
+                     * necessarily safe to assume that it will have been completed.
+                     */
+                    AND edge.is_state is ? /* False */
+                    /**
+                     * Exponential back-off (up to the upper bound) so we don't retry the
+                     * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc.
+                     *
+                     * We use `1 << n` as a power of 2 equivalent for compatibility
+                     * with older SQLites. The left shift equivalent only works with
+                     * powers of 2 because left shift is a binary operation (base-2).
+                     * Otherwise, we would use `power(2, n)` or the power operator, `2^n`.
+                     */
+                    AND (
+                        failed_backfill_attempt_info.event_id IS NULL
+                        OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */)
+                    )
+                /**
+                 * Sort from highest to the lowest depth. Then tie-break on
+                 * alphabetical order of the event_ids so we get a consistent
+                 * ordering which is nice when asserting things in tests.
+                 */
+                ORDER BY event.depth DESC, backward_extrem.event_id DESC
             """
 
-            txn.execute(sql, (room_id, False))
+            if isinstance(self.database_engine, PostgresEngine):
+                least_function = "least"
+            elif isinstance(self.database_engine, Sqlite3Engine):
+                least_function = "min"
+            else:
+                raise RuntimeError("Unknown database engine")
+
+            txn.execute(
+                sql % (least_function,),
+                (
+                    room_id,
+                    False,
+                    self._clock.time_msec(),
+                    1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS,
+                    1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS,
+                ),
+            )
 
             return cast(List[Tuple[str, int]], txn.fetchall())
 
         return await self.db_pool.runInteraction(
-            "get_oldest_event_ids_with_depth_in_room",
-            get_oldest_event_ids_with_depth_in_room_txn,
+            "get_backfill_points_in_room",
+            get_backfill_points_in_room_txn,
             room_id,
         )
 
     @trace
     async def get_insertion_event_backward_extremities_in_room(
-        self, room_id: str
+        self,
+        room_id: str,
     ) -> List[Tuple[str, int]]:
-        """Get the insertion events we know about that we haven't backfilled yet.
-
-        We use this function so that we can compare and see if someones current
-        depth at their current scrollback is within pagination range of the
-        insertion event. If the current depth is close to the depth of given
-        insertion event, we can trigger a backfill.
+        """
+        Get the insertion events we know about that we haven't backfilled yet
+        along with the approximate depth. Sorted by depth, highest to lowest
+        (descending).
 
         Args:
             room_id: Room where we want to find the oldest events
 
         Returns:
-            List of (event_id, depth) tuples
+            List of (event_id, depth) tuples. Sorted by depth, highest to lowest
+            (descending)
         """
 
         def get_insertion_event_backward_extremities_in_room_txn(
             txn: LoggingTransaction, room_id: str
         ) -> List[Tuple[str, int]]:
             sql = """
-                SELECT b.event_id, MAX(e.depth) FROM insertion_events as i
+                SELECT
+                    insertion_event_extremity.event_id, event.depth
                 /* We only want insertion events that are also marked as backwards extremities */
-                INNER JOIN insertion_event_extremities as b USING (event_id)
+                FROM insertion_event_extremities AS insertion_event_extremity
                 /* Get the depth of the insertion event from the events table */
-                INNER JOIN events AS e USING (event_id)
-                WHERE b.room_id = ?
-                GROUP BY b.event_id
+                INNER JOIN events AS event USING (event_id)
+                /**
+                 * We use this info to make sure we don't retry to use a backfill point
+                 * if we've already attempted to backfill from it recently.
+                 */
+                LEFT JOIN event_failed_pull_attempts AS failed_backfill_attempt_info
+                ON
+                    failed_backfill_attempt_info.room_id = insertion_event_extremity.room_id
+                    AND failed_backfill_attempt_info.event_id = insertion_event_extremity.event_id
+                WHERE
+                    insertion_event_extremity.room_id = ?
+                    /**
+                     * Exponential back-off (up to the upper bound) so we don't retry the
+                     * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc
+                     *
+                     * We use `1 << n` as a power of 2 equivalent for compatibility
+                     * with older SQLites. The left shift equivalent only works with
+                     * powers of 2 because left shift is a binary operation (base-2).
+                     * Otherwise, we would use `power(2, n)` or the power operator, `2^n`.
+                     */
+                    AND (
+                        failed_backfill_attempt_info.event_id IS NULL
+                        OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */)
+                    )
+                /**
+                 * Sort from highest to the lowest depth. Then tie-break on
+                 * alphabetical order of the event_ids so we get a consistent
+                 * ordering which is nice when asserting things in tests.
+                 */
+                ORDER BY event.depth DESC, insertion_event_extremity.event_id DESC
             """
 
-            txn.execute(sql, (room_id,))
+            if isinstance(self.database_engine, PostgresEngine):
+                least_function = "least"
+            elif isinstance(self.database_engine, Sqlite3Engine):
+                least_function = "min"
+            else:
+                raise RuntimeError("Unknown database engine")
+
+            txn.execute(
+                sql % (least_function,),
+                (
+                    room_id,
+                    self._clock.time_msec(),
+                    1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS,
+                    1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS,
+                ),
+            )
             return cast(List[Tuple[str, int]], txn.fetchall())
 
         return await self.db_pool.runInteraction(
@@ -1539,7 +1640,12 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         self,
         room_id: str,
     ) -> Optional[Tuple[str, str]]:
-        """Get the next event ID in the staging area for the given room."""
+        """
+        Get the next event ID in the staging area for the given room.
+
+        Returns:
+            Tuple of the `origin` and `event_id`
+        """
 
         def _get_next_staged_event_id_for_room_txn(
             txn: LoggingTransaction,
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index a6679e1312..85739c464e 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -12,25 +12,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Tuple, Union
+import datetime
+from typing import Dict, List, Tuple, Union
 
 import attr
 from parameterized import parameterized
 
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.api.constants import EventTypes
 from synapse.api.room_versions import (
     KNOWN_ROOM_VERSIONS,
     EventFormatVersions,
     RoomVersion,
 )
 from synapse.events import _EventInternalMetadata
-from synapse.util import json_encoder
+from synapse.server import HomeServer
+from synapse.storage.database import LoggingTransaction
+from synapse.types import JsonDict
+from synapse.util import Clock, json_encoder
 
 import tests.unittest
 import tests.utils
 
 
+@attr.s(auto_attribs=True, frozen=True, slots=True)
+class _BackfillSetupInfo:
+    room_id: str
+    depth_map: Dict[str, int]
+
+
 class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
     def test_get_prev_events_for_room(self):
@@ -571,11 +584,471 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         )
         self.assertEqual(count, 1)
 
-        _, event_id = self.get_success(
+        next_staged_event_info = self.get_success(
             self.store.get_next_staged_event_id_for_room(room_id)
         )
+        assert next_staged_event_info
+        _, event_id = next_staged_event_info
         self.assertEqual(event_id, "$fake_event_id_500")
 
+    def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo:
+        """
+        Sets up a room with various events and backward extremities to test
+        backfill functions against.
+
+        Returns:
+            _BackfillSetupInfo including the `room_id` to test against and
+            `depth_map` of events in the room
+        """
+        room_id = "!backfill-room-test:some-host"
+
+        # The silly graph we use to test grabbing backward extremities,
+        # where the top is the oldest events.
+        #    1 (oldest)
+        #    |
+        #    2 ⹁
+        #    |  \
+        #    |   [b1, b2, b3]
+        #    |   |
+        #    |   A
+        #    |  /
+        #    3 {
+        #    |  \
+        #    |   [b4, b5, b6]
+        #    |   |
+        #    |   B
+        #    |  /
+        #    4 ´
+        #    |
+        #    5 (newest)
+
+        event_graph: Dict[str, List[str]] = {
+            "1": [],
+            "2": ["1"],
+            "3": ["2", "A"],
+            "4": ["3", "B"],
+            "5": ["4"],
+            "A": ["b1", "b2", "b3"],
+            "b1": ["2"],
+            "b2": ["2"],
+            "b3": ["2"],
+            "B": ["b4", "b5", "b6"],
+            "b4": ["3"],
+            "b5": ["3"],
+            "b6": ["3"],
+        }
+
+        depth_map: Dict[str, int] = {
+            "1": 1,
+            "2": 2,
+            "b1": 3,
+            "b2": 3,
+            "b3": 3,
+            "A": 4,
+            "3": 5,
+            "b4": 6,
+            "b5": 6,
+            "b6": 6,
+            "B": 7,
+            "4": 8,
+            "5": 9,
+        }
+
+        # The events we have persisted on our server.
+        # The rest are events in the room but not backfilled tet.
+        our_server_events = {"5", "4", "B", "3", "A"}
+
+        complete_event_dict_map: Dict[str, JsonDict] = {}
+        stream_ordering = 0
+        for (event_id, prev_event_ids) in event_graph.items():
+            depth = depth_map[event_id]
+
+            complete_event_dict_map[event_id] = {
+                "event_id": event_id,
+                "type": "test_regular_type",
+                "room_id": room_id,
+                "sender": "@sender",
+                "prev_event_ids": prev_event_ids,
+                "auth_event_ids": [],
+                "origin_server_ts": stream_ordering,
+                "depth": depth,
+                "stream_ordering": stream_ordering,
+                "content": {"body": "event" + event_id},
+            }
+
+            stream_ordering += 1
+
+        def populate_db(txn: LoggingTransaction):
+            # Insert the room to satisfy the foreign key constraint of
+            # `event_failed_pull_attempts`
+            self.store.db_pool.simple_insert_txn(
+                txn,
+                "rooms",
+                {
+                    "room_id": room_id,
+                    "creator": "room_creator_user_id",
+                    "is_public": True,
+                    "room_version": "6",
+                },
+            )
+
+            # Insert our server events
+            for event_id in our_server_events:
+                event_dict = complete_event_dict_map[event_id]
+
+                self.store.db_pool.simple_insert_txn(
+                    txn,
+                    table="events",
+                    values={
+                        "event_id": event_dict.get("event_id"),
+                        "type": event_dict.get("type"),
+                        "room_id": event_dict.get("room_id"),
+                        "depth": event_dict.get("depth"),
+                        "topological_ordering": event_dict.get("depth"),
+                        "stream_ordering": event_dict.get("stream_ordering"),
+                        "processed": True,
+                        "outlier": False,
+                    },
+                )
+
+            # Insert the event edges
+            for event_id in our_server_events:
+                for prev_event_id in event_graph[event_id]:
+                    self.store.db_pool.simple_insert_txn(
+                        txn,
+                        table="event_edges",
+                        values={
+                            "event_id": event_id,
+                            "prev_event_id": prev_event_id,
+                            "room_id": room_id,
+                        },
+                    )
+
+            # Insert the backward extremities
+            prev_events_of_our_events = {
+                prev_event_id
+                for our_server_event in our_server_events
+                for prev_event_id in complete_event_dict_map[our_server_event][
+                    "prev_event_ids"
+                ]
+            }
+            backward_extremities = prev_events_of_our_events - our_server_events
+            for backward_extremity in backward_extremities:
+                self.store.db_pool.simple_insert_txn(
+                    txn,
+                    table="event_backward_extremities",
+                    values={
+                        "event_id": backward_extremity,
+                        "room_id": room_id,
+                    },
+                )
+
+        self.get_success(
+            self.store.db_pool.runInteraction(
+                "_setup_room_for_backfill_tests_populate_db",
+                populate_db,
+            )
+        )
+
+        return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map)
+
+    def test_get_backfill_points_in_room(self):
+        """
+        Test to make sure we get some backfill points
+        """
+        setup_info = self._setup_room_for_backfill_tests()
+        room_id = setup_info.room_id
+
+        backfill_points = self.get_success(
+            self.store.get_backfill_points_in_room(room_id)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        self.assertListEqual(
+            backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"]
+        )
+
+    def test_get_backfill_points_in_room_excludes_events_we_have_attempted(
+        self,
+    ):
+        """
+        Test to make sure that events we have attempted to backfill (and within
+        backoff timeout duration) do not show up as an event to backfill again.
+        """
+        setup_info = self._setup_room_for_backfill_tests()
+        room_id = setup_info.room_id
+
+        # Record some attempts to backfill these events which will make
+        # `get_backfill_points_in_room` exclude them because we
+        # haven't passed the backoff interval.
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b5", "fake cause")
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b4", "fake cause")
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b3", "fake cause")
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b2", "fake cause")
+        )
+
+        # No time has passed since we attempted to backfill ^
+
+        backfill_points = self.get_success(
+            self.store.get_backfill_points_in_room(room_id)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        # Only the backfill points that we didn't record earlier exist here.
+        self.assertListEqual(backfill_event_ids, ["b6", "2", "b1"])
+
+    def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duration(
+        self,
+    ):
+        """
+        Test to make sure after we fake attempt to backfill event "b3" many times,
+        we can see retry and see the "b3" again after the backoff timeout duration
+        has exceeded.
+        """
+        setup_info = self._setup_room_for_backfill_tests()
+        room_id = setup_info.room_id
+
+        # Record some attempts to backfill these events which will make
+        # `get_backfill_points_in_room` exclude them because we
+        # haven't passed the backoff interval.
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b3", "fake cause")
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
+        )
+
+        # Now advance time by 2 hours and we should only be able to see "b3"
+        # because we have waited long enough for the single attempt (2^1 hours)
+        # but we still shouldn't see "b1" because we haven't waited long enough
+        # for this many attempts. We didn't do anything to "b2" so it should be
+        # visible regardless.
+        self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
+
+        # Make sure that "b1" is not in the list because we've
+        # already attempted many times
+        backfill_points = self.get_success(
+            self.store.get_backfill_points_in_room(room_id)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        self.assertListEqual(backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2"])
+
+        # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
+        # see if we can now backfill it
+        self.reactor.advance(datetime.timedelta(hours=20).total_seconds())
+
+        # Try again after we advanced enough time and we should see "b3" again
+        backfill_points = self.get_success(
+            self.store.get_backfill_points_in_room(room_id)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        self.assertListEqual(
+            backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"]
+        )
+
+    def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo:
+        """
+        Sets up a room with various insertion event backward extremities to test
+        backfill functions against.
+
+        Returns:
+            _BackfillSetupInfo including the `room_id` to test against and
+            `depth_map` of events in the room
+        """
+        room_id = "!backfill-room-test:some-host"
+
+        depth_map: Dict[str, int] = {
+            "1": 1,
+            "2": 2,
+            "insertion_eventA": 3,
+            "3": 4,
+            "insertion_eventB": 5,
+            "4": 6,
+            "5": 7,
+        }
+
+        def populate_db(txn: LoggingTransaction):
+            # Insert the room to satisfy the foreign key constraint of
+            # `event_failed_pull_attempts`
+            self.store.db_pool.simple_insert_txn(
+                txn,
+                "rooms",
+                {
+                    "room_id": room_id,
+                    "creator": "room_creator_user_id",
+                    "is_public": True,
+                    "room_version": "6",
+                },
+            )
+
+            # Insert our server events
+            stream_ordering = 0
+            for event_id, depth in depth_map.items():
+                self.store.db_pool.simple_insert_txn(
+                    txn,
+                    table="events",
+                    values={
+                        "event_id": event_id,
+                        "type": EventTypes.MSC2716_INSERTION
+                        if event_id.startswith("insertion_event")
+                        else "test_regular_type",
+                        "room_id": room_id,
+                        "depth": depth,
+                        "topological_ordering": depth,
+                        "stream_ordering": stream_ordering,
+                        "processed": True,
+                        "outlier": False,
+                    },
+                )
+
+                if event_id.startswith("insertion_event"):
+                    self.store.db_pool.simple_insert_txn(
+                        txn,
+                        table="insertion_event_extremities",
+                        values={
+                            "event_id": event_id,
+                            "room_id": room_id,
+                        },
+                    )
+
+                stream_ordering += 1
+
+        self.get_success(
+            self.store.db_pool.runInteraction(
+                "_setup_room_for_insertion_backfill_tests_populate_db",
+                populate_db,
+            )
+        )
+
+        return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map)
+
+    def test_get_insertion_event_backward_extremities_in_room(self):
+        """
+        Test to make sure insertion event backward extremities are returned.
+        """
+        setup_info = self._setup_room_for_insertion_backfill_tests()
+        room_id = setup_info.room_id
+
+        backfill_points = self.get_success(
+            self.store.get_insertion_event_backward_extremities_in_room(room_id)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        self.assertListEqual(
+            backfill_event_ids, ["insertion_eventB", "insertion_eventA"]
+        )
+
+    def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_have_attempted(
+        self,
+    ):
+        """
+        Test to make sure that insertion events we have attempted to backfill
+        (and within backoff timeout duration) do not show up as an event to
+        backfill again.
+        """
+        setup_info = self._setup_room_for_insertion_backfill_tests()
+        room_id = setup_info.room_id
+
+        # Record some attempts to backfill these events which will make
+        # `get_insertion_event_backward_extremities_in_room` exclude them
+        # because we haven't passed the backoff interval.
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "insertion_eventA", "fake cause"
+            )
+        )
+
+        # No time has passed since we attempted to backfill ^
+
+        backfill_points = self.get_success(
+            self.store.get_insertion_event_backward_extremities_in_room(room_id)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        # Only the backfill points that we didn't record earlier exist here.
+        self.assertListEqual(backfill_event_ids, ["insertion_eventB"])
+
+    def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_after_backoff_duration(
+        self,
+    ):
+        """
+        Test to make sure after we fake attempt to backfill event
+        "insertion_eventA" many times, we can see retry and see the
+        "insertion_eventA" again after the backoff timeout duration has
+        exceeded.
+        """
+        setup_info = self._setup_room_for_insertion_backfill_tests()
+        room_id = setup_info.room_id
+
+        # Record some attempts to backfill these events which will make
+        # `get_backfill_points_in_room` exclude them because we
+        # haven't passed the backoff interval.
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "insertion_eventB", "fake cause"
+            )
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "insertion_eventA", "fake cause"
+            )
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "insertion_eventA", "fake cause"
+            )
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "insertion_eventA", "fake cause"
+            )
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "insertion_eventA", "fake cause"
+            )
+        )
+
+        # Now advance time by 2 hours and we should only be able to see
+        # "insertion_eventB" because we have waited long enough for the single
+        # attempt (2^1 hours) but we still shouldn't see "insertion_eventA"
+        # because we haven't waited long enough for this many attempts.
+        self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
+
+        # Make sure that "insertion_eventA" is not in the list because we've
+        # already attempted many times
+        backfill_points = self.get_success(
+            self.store.get_insertion_event_backward_extremities_in_room(room_id)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        self.assertListEqual(backfill_event_ids, ["insertion_eventB"])
+
+        # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
+        # see if we can now backfill it
+        self.reactor.advance(datetime.timedelta(hours=20).total_seconds())
+
+        # Try at "insertion_eventA" again after we advanced enough time and we
+        # should see "insertion_eventA" again
+        backfill_points = self.get_success(
+            self.store.get_insertion_event_backward_extremities_in_room(room_id)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        self.assertListEqual(
+            backfill_event_ids, ["insertion_eventB", "insertion_eventA"]
+        )
+
 
 @attr.s
 class FakeEvent:
-- 
cgit 1.5.1


From dcdd50e458e7f6c77e1ca28afb300d9f0ab490b3 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 26 Sep 2022 13:30:00 +0100
Subject: Fix mypy errors with latest canonicaljson (#13905)

* Lockfile: update canonicaljson 1.6.0 -> 1.6.3

* Fix mypy errors with latest canonicaljson

The change to `_encode_json_bytes` definition wasn't sufficient:

```
synapse/http/server.py:751: error: Incompatible types in assignment (expression has type "Callable[[Arg(object, 'json_object')], bytes]", variable has type "Callable[[Arg(object, 'data')], bytes]")  [assignment]
```

Which I think is mypy warning us that the two functions accept different
sets of kwargs. Fair enough!

* Changelog
---
 changelog.d/13905.misc | 1 +
 poetry.lock            | 9 +++++----
 synapse/http/server.py | 4 ++--
 3 files changed, 8 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13905.misc

(limited to 'synapse')

diff --git a/changelog.d/13905.misc b/changelog.d/13905.misc
new file mode 100644
index 0000000000..efe3bed5f1
--- /dev/null
+++ b/changelog.d/13905.misc
@@ -0,0 +1 @@
+Fix mypy errors with canonicaljson 1.6.3.
diff --git a/poetry.lock b/poetry.lock
index 291f3c51e6..0f6d1cfa69 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -95,14 +95,15 @@ webencodings = "*"
 
 [[package]]
 name = "canonicaljson"
-version = "1.6.0"
+version = "1.6.3"
 description = "Canonical JSON"
 category = "main"
 optional = false
-python-versions = "~=3.7"
+python-versions = ">=3.7"
 
 [package.dependencies]
 simplejson = ">=3.14.0"
+typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.8\""}
 
 [package.extras]
 frozendict = ["frozendict (>=1.0)"]
@@ -1682,8 +1683,8 @@ bleach = [
     {file = "bleach-4.1.0.tar.gz", hash = "sha256:0900d8b37eba61a802ee40ac0061f8c2b5dee29c1927dd1d233e075ebf5a71da"},
 ]
 canonicaljson = [
-    {file = "canonicaljson-1.6.0-py3-none-any.whl", hash = "sha256:7230c2a2a3db07874f622af84effe41a655e07bf23734830e18a454e65d5b998"},
-    {file = "canonicaljson-1.6.0.tar.gz", hash = "sha256:8739d5fd91aca7281d425660ae65af7663808c8177778965f67e90b16a2b2427"},
+    {file = "canonicaljson-1.6.3-py3-none-any.whl", hash = "sha256:6ba3cf1702fa3d209b3e915a4e9a3e4ef194f1e8fca189c1f0b7a2a7686a27e6"},
+    {file = "canonicaljson-1.6.3.tar.gz", hash = "sha256:ca59760bc274a899a0da75809d6909ae43e5123381fd6ef040a44d1952c0b448"},
 ]
 certifi = [
     {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 6068a94b40..bcbfac2c9f 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -705,7 +705,7 @@ class _ByteProducer:
         self._request = None
 
 
-def _encode_json_bytes(json_object: Any) -> bytes:
+def _encode_json_bytes(json_object: object) -> bytes:
     """
     Encode an object into JSON. Returns an iterator of bytes.
     """
@@ -746,7 +746,7 @@ def respond_with_json(
         return None
 
     if canonical_json:
-        encoder = encode_canonical_json
+        encoder: Callable[[object], bytes] = encode_canonical_json
     else:
         encoder = _encode_json_bytes
 
-- 
cgit 1.5.1


From 6b4593a80fa2fd9ec8e1ec82fad74f3b7fbb9ba3 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Mon, 26 Sep 2022 16:26:35 +0100
Subject: Simplify cache invalidation after event persist txn (#13796)

This moves all the invalidations into a single place and de-duplicates
the code involved in invalidating caches for a given event by using
the base class method.
---
 changelog.d/13796.misc                   |   1 +
 synapse/storage/_base.py                 |   3 +
 synapse/storage/databases/main/cache.py  |  34 +++++---
 synapse/storage/databases/main/events.py | 133 +++++++------------------------
 4 files changed, 52 insertions(+), 119 deletions(-)
 create mode 100644 changelog.d/13796.misc

(limited to 'synapse')

diff --git a/changelog.d/13796.misc b/changelog.d/13796.misc
new file mode 100644
index 0000000000..9ed1662394
--- /dev/null
+++ b/changelog.d/13796.misc
@@ -0,0 +1 @@
+Use shared methods for cache invalidation when persisting events, remove duplicate codepaths. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 303a5d5298..313e8aca7d 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -91,6 +91,9 @@ class SQLBaseStore(metaclass=ABCMeta):
             self._attempt_to_invalidate_cache(
                 "get_user_in_room_with_profile", (room_id, user_id)
             )
+            self._attempt_to_invalidate_cache(
+                "get_rooms_for_user_with_stream_ordering", (user_id,)
+            )
 
         # Purge other caches based on room state.
         self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 2c421151c1..db6ce83a2b 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -223,15 +223,16 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         # process triggering the invalidation is responsible for clearing any external
         # cached objects.
         self._invalidate_local_get_event_cache(event_id)
-        self.have_seen_event.invalidate((room_id, event_id))
 
-        self.get_latest_event_ids_in_room.invalidate((room_id,))
-
-        self.get_unread_event_push_actions_by_room_for_user.invalidate((room_id,))
+        self._attempt_to_invalidate_cache("have_seen_event", (room_id, event_id))
+        self._attempt_to_invalidate_cache("get_latest_event_ids_in_room", (room_id,))
+        self._attempt_to_invalidate_cache(
+            "get_unread_event_push_actions_by_room_for_user", (room_id,)
+        )
 
         # The `_get_membership_from_event_id` is immutable, except for the
         # case where we look up an event *before* persisting it.
-        self._get_membership_from_event_id.invalidate((event_id,))
+        self._attempt_to_invalidate_cache("_get_membership_from_event_id", (event_id,))
 
         if not backfilled:
             self._events_stream_cache.entity_has_changed(room_id, stream_ordering)
@@ -240,19 +241,26 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._invalidate_local_get_event_cache(redacts)
             # Caches which might leak edits must be invalidated for the event being
             # redacted.
-            self.get_relations_for_event.invalidate((redacts,))
-            self.get_applicable_edit.invalidate((redacts,))
+            self._attempt_to_invalidate_cache("get_relations_for_event", (redacts,))
+            self._attempt_to_invalidate_cache("get_applicable_edit", (redacts,))
 
         if etype == EventTypes.Member:
             self._membership_stream_cache.entity_has_changed(state_key, stream_ordering)
-            self.get_invited_rooms_for_local_user.invalidate((state_key,))
+            self._attempt_to_invalidate_cache(
+                "get_invited_rooms_for_local_user", (state_key,)
+            )
 
         if relates_to:
-            self.get_relations_for_event.invalidate((relates_to,))
-            self.get_aggregation_groups_for_event.invalidate((relates_to,))
-            self.get_applicable_edit.invalidate((relates_to,))
-            self.get_thread_summary.invalidate((relates_to,))
-            self.get_thread_participated.invalidate((relates_to,))
+            self._attempt_to_invalidate_cache("get_relations_for_event", (relates_to,))
+            self._attempt_to_invalidate_cache(
+                "get_aggregation_groups_for_event", (relates_to,)
+            )
+            self._attempt_to_invalidate_cache("get_applicable_edit", (relates_to,))
+            self._attempt_to_invalidate_cache("get_thread_summary", (relates_to,))
+            self._attempt_to_invalidate_cache("get_thread_participated", (relates_to,))
+            self._attempt_to_invalidate_cache(
+                "get_mutual_event_relations_for_rel_type", (relates_to,)
+            )
 
     async def invalidate_cache_and_stream(
         self, cache_name: str, keys: Tuple[Any, ...]
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 1b54a2eb57..2e156a4a11 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -35,7 +35,7 @@ import attr
 from prometheus_client import Counter
 
 import synapse.metrics
-from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
+from synapse.api.constants import EventContentFields, EventTypes
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase, relation_from_event
@@ -410,6 +410,31 @@ class PersistEventsStore:
         assert min_stream_order
         assert max_stream_order
 
+        # Once the txn completes, invalidate all of the relevant caches. Note that we do this
+        # up here because it captures all the events_and_contexts before any are removed.
+        for event, _ in events_and_contexts:
+            self.store.invalidate_get_event_cache_after_txn(txn, event.event_id)
+            if event.redacts:
+                self.store.invalidate_get_event_cache_after_txn(txn, event.redacts)
+
+            relates_to = None
+            relation = relation_from_event(event)
+            if relation:
+                relates_to = relation.parent_id
+
+            assert event.internal_metadata.stream_ordering is not None
+            txn.call_after(
+                self.store._invalidate_caches_for_event,
+                event.internal_metadata.stream_ordering,
+                event.event_id,
+                event.room_id,
+                event.type,
+                getattr(event, "state_key", None),
+                event.redacts,
+                relates_to,
+                backfilled=False,
+            )
+
         self._update_forward_extremities_txn(
             txn,
             new_forward_extremities=new_forward_extremities,
@@ -459,6 +484,7 @@ class PersistEventsStore:
 
         # We call this last as it assumes we've inserted the events into
         # room_memberships, where applicable.
+        # NB: This function invalidates all state related caches
         self._update_current_state_txn(txn, state_delta_for_room, min_stream_order)
 
     def _persist_event_auth_chain_txn(
@@ -1172,13 +1198,6 @@ class PersistEventsStore:
             )
 
             # Invalidate the various caches
-
-            for member in members_changed:
-                txn.call_after(
-                    self.store.get_rooms_for_user_with_stream_ordering.invalidate,
-                    (member,),
-                )
-
             self.store._invalidate_state_caches_and_stream(
                 txn, room_id, members_changed
             )
@@ -1222,9 +1241,6 @@ class PersistEventsStore:
             self.db_pool.simple_delete_txn(
                 txn, table="event_forward_extremities", keyvalues={"room_id": room_id}
             )
-            txn.call_after(
-                self.store.get_latest_event_ids_in_room.invalidate, (room_id,)
-            )
 
         self.db_pool.simple_insert_many_txn(
             txn,
@@ -1294,8 +1310,6 @@ class PersistEventsStore:
         """
         depth_updates: Dict[str, int] = {}
         for event, context in events_and_contexts:
-            # Remove the any existing cache entries for the event_ids
-            self.store.invalidate_get_event_cache_after_txn(txn, event.event_id)
             # Then update the `stream_ordering` position to mark the latest
             # event as the front of the room. This should not be done for
             # backfilled events because backfilled events have negative
@@ -1697,16 +1711,7 @@ class PersistEventsStore:
         txn.async_call_after(prefill)
 
     def _store_redaction(self, txn: LoggingTransaction, event: EventBase) -> None:
-        """Invalidate the caches for the redacted event.
-
-        Note that these caches are also cleared as part of event replication in
-        _invalidate_caches_for_event.
-        """
         assert event.redacts is not None
-        self.store.invalidate_get_event_cache_after_txn(txn, event.redacts)
-        txn.call_after(self.store.get_relations_for_event.invalidate, (event.redacts,))
-        txn.call_after(self.store.get_applicable_edit.invalidate, (event.redacts,))
-
         self.db_pool.simple_upsert_txn(
             txn,
             table="redactions",
@@ -1807,34 +1812,6 @@ class PersistEventsStore:
 
         for event in events:
             assert event.internal_metadata.stream_ordering is not None
-            txn.call_after(
-                self.store._membership_stream_cache.entity_has_changed,
-                event.state_key,
-                event.internal_metadata.stream_ordering,
-            )
-            txn.call_after(
-                self.store.get_invited_rooms_for_local_user.invalidate,
-                (event.state_key,),
-            )
-            txn.call_after(
-                self.store.get_local_users_in_room.invalidate,
-                (event.room_id,),
-            )
-            txn.call_after(
-                self.store.get_number_joined_users_in_room.invalidate,
-                (event.room_id,),
-            )
-            txn.call_after(
-                self.store.get_user_in_room_with_profile.invalidate,
-                (event.room_id, event.state_key),
-            )
-
-            # The `_get_membership_from_event_id` is immutable, except for the
-            # case where we look up an event *before* persisting it.
-            txn.call_after(
-                self.store._get_membership_from_event_id.invalidate,
-                (event.event_id,),
-            )
 
             # We update the local_current_membership table only if the event is
             # "current", i.e., its something that has just happened.
@@ -1883,35 +1860,6 @@ class PersistEventsStore:
             },
         )
 
-        txn.call_after(
-            self.store.get_relations_for_event.invalidate, (relation.parent_id,)
-        )
-        txn.call_after(
-            self.store.get_aggregation_groups_for_event.invalidate,
-            (relation.parent_id,),
-        )
-        txn.call_after(
-            self.store.get_mutual_event_relations_for_rel_type.invalidate,
-            (relation.parent_id,),
-        )
-
-        if relation.rel_type == RelationTypes.REPLACE:
-            txn.call_after(
-                self.store.get_applicable_edit.invalidate, (relation.parent_id,)
-            )
-
-        if relation.rel_type == RelationTypes.THREAD:
-            txn.call_after(
-                self.store.get_thread_summary.invalidate, (relation.parent_id,)
-            )
-            # It should be safe to only invalidate the cache if the user has not
-            # previously participated in the thread, but that's difficult (and
-            # potentially error-prone) so it is always invalidated.
-            txn.call_after(
-                self.store.get_thread_participated.invalidate,
-                (relation.parent_id, event.sender),
-            )
-
     def _handle_insertion_event(
         self, txn: LoggingTransaction, event: EventBase
     ) -> None:
@@ -2213,28 +2161,6 @@ class PersistEventsStore:
                 ),
             )
 
-            room_to_event_ids: Dict[str, List[str]] = {}
-            for e in non_outlier_events:
-                room_to_event_ids.setdefault(e.room_id, []).append(e.event_id)
-
-            for room_id, event_ids in room_to_event_ids.items():
-                rows = self.db_pool.simple_select_many_txn(
-                    txn,
-                    table="event_push_actions_staging",
-                    column="event_id",
-                    iterable=event_ids,
-                    keyvalues={},
-                    retcols=("user_id",),
-                )
-
-                user_ids = {row["user_id"] for row in rows}
-
-                for user_id in user_ids:
-                    txn.call_after(
-                        self.store.get_unread_event_push_actions_by_room_for_user.invalidate,
-                        (room_id, user_id),
-                    )
-
         # Now we delete the staging area for *all* events that were being
         # persisted.
         txn.execute_batch(
@@ -2249,11 +2175,6 @@ class PersistEventsStore:
     def _remove_push_actions_for_event_id_txn(
         self, txn: LoggingTransaction, room_id: str, event_id: str
     ) -> None:
-        # Sad that we have to blow away the cache for the whole room here
-        txn.call_after(
-            self.store.get_unread_event_push_actions_by_room_for_user.invalidate,
-            (room_id,),
-        )
         txn.execute(
             "DELETE FROM event_push_actions WHERE room_id = ? AND event_id = ?",
             (room_id, event_id),
-- 
cgit 1.5.1


From 41461fd4d63e55d1812f0688ca58a88e7200a1d7 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Mon, 26 Sep 2022 17:33:32 +0200
Subject: typing: check origin server of typing event against room's servers
 (#13830)

This is also using the partial state approximation if needed so we do
not block here during a fast join.

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/13830.bugfix      | 1 +
 synapse/handlers/typing.py    | 7 +++++--
 tests/handlers/test_typing.py | 4 ++++
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13830.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13830.bugfix b/changelog.d/13830.bugfix
new file mode 100644
index 0000000000..e6215806cd
--- /dev/null
+++ b/changelog.d/13830.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where typing events would be accepted from remote servers not present in a room. Also fix a bug where incoming typing events would cause other incoming events to get stuck during a fast join.
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 0d8466af11..f953691669 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -362,11 +362,14 @@ class TypingWriterHandler(FollowerTypingHandler):
             )
             return
 
-        domains = await self._storage_controllers.state.get_current_hosts_in_room(
+        # Let's check that the origin server is in the room before accepting the typing
+        # event. We don't want to block waiting on a partial state so take an
+        # approximation if needed.
+        domains = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
             room_id
         )
 
-        if self.server_name in domains:
+        if user.domain in domains:
             logger.info("Got typing update from %s: %r", user_id, content)
             now = self.clock.time_msec()
             self._member_typing_until[member] = now + FEDERATION_TIMEOUT
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 1a247f12e8..9c821b3042 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -138,6 +138,10 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
             get_current_hosts_in_room
         )
 
+        hs.get_storage_controllers().state.get_current_hosts_in_room_or_partial_state_approximation = (
+            get_current_hosts_in_room
+        )
+
         async def get_users_in_room(room_id: str):
             return {str(u) for u in self.room_members}
 
-- 
cgit 1.5.1


From 0a38c7ec6d46b6e51bfa53ff44e51637d3c63f5c Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 26 Sep 2022 18:28:32 +0100
Subject: Snapshot schema 72 (#13873)

Including another batch of fixes to the schema dump script
---
 changelog.d/13873.misc                             |    1 +
 scripts-dev/make_full_schema.sh                    |   60 +-
 synapse/storage/database.py                        |    8 +
 synapse/storage/engines/_base.py                   |   23 +-
 synapse/storage/engines/postgres.py                |   12 +-
 synapse/storage/engines/sqlite.py                  |   21 +-
 synapse/storage/prepare_database.py                |    8 +-
 .../common/full_schemas/72/full.sql.postgres       |    8 +
 .../schema/common/full_schemas/72/full.sql.sqlite  |    6 +
 .../schema/main/full_schemas/72/full.sql.postgres  | 1344 ++++++++++++++++++++
 .../schema/main/full_schemas/72/full.sql.sqlite    |  646 ++++++++++
 .../schema/state/full_schemas/72/full.sql.postgres |   30 +
 .../schema/state/full_schemas/72/full.sql.sqlite   |   20 +
 13 files changed, 2165 insertions(+), 22 deletions(-)
 create mode 100644 changelog.d/13873.misc
 create mode 100644 synapse/storage/schema/common/full_schemas/72/full.sql.postgres
 create mode 100644 synapse/storage/schema/common/full_schemas/72/full.sql.sqlite
 create mode 100644 synapse/storage/schema/main/full_schemas/72/full.sql.postgres
 create mode 100644 synapse/storage/schema/main/full_schemas/72/full.sql.sqlite
 create mode 100644 synapse/storage/schema/state/full_schemas/72/full.sql.postgres
 create mode 100644 synapse/storage/schema/state/full_schemas/72/full.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/13873.misc b/changelog.d/13873.misc
new file mode 100644
index 0000000000..f4342482f0
--- /dev/null
+++ b/changelog.d/13873.misc
@@ -0,0 +1 @@
+Create a new snapshot of the database schema.
diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh
index d8cd06ee4f..e2bc1640bb 100755
--- a/scripts-dev/make_full_schema.sh
+++ b/scripts-dev/make_full_schema.sh
@@ -26,6 +26,9 @@ usage() {
   echo "  Defaults to 9999."
   echo "-h"
   echo "  Display this help text."
+  echo ""
+  echo "  NB: make sure to run this against the *oldest* supported version of postgres,"
+  echo "  or else pg_dump might output non-backwards-compatible syntax."
 }
 
 SCHEMA_NUMBER="9999"
@@ -240,25 +243,54 @@ DROP TABLE user_directory_search_stat;
 
 echo "Dumping SQLite3 schema..."
 
-mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
-sqlite3 "$SQLITE_COMMON_DB" ".schema --indent"           > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
-sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
-sqlite3 "$SQLITE_MAIN_DB"   ".schema --indent"           > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
-sqlite3 "$SQLITE_MAIN_DB"   ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
-sqlite3 "$SQLITE_STATE_DB"  ".schema --indent"           > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
-sqlite3 "$SQLITE_STATE_DB"  ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
+mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER"
+sqlite3 "$SQLITE_COMMON_DB" ".schema"                    > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_MAIN_DB"   ".schema"                    > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_MAIN_DB"   ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_STATE_DB"  ".schema"                    > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
+sqlite3 "$SQLITE_STATE_DB"  ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
 
 cleanup_pg_schema() {
-   sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
+  # Cleanup as follows:
+  # - Remove empty lines. pg_dump likes to output a lot of these.
+  # - Remove comment-only lines. pg_dump also likes to output a lot of these to visually
+  #   separate tables etc.
+  # - Remove "public." prefix --- the schema name.
+  # - Remove "SET" commands. Last time I ran this, the output commands were
+  #     SET statement_timeout = 0;
+  #     SET lock_timeout = 0;
+  #     SET idle_in_transaction_session_timeout = 0;
+  #     SET client_encoding = 'UTF8';
+  #     SET standard_conforming_strings = on;
+  #     SET check_function_bodies = false;
+  #     SET xmloption = content;
+  #     SET client_min_messages = warning;
+  #     SET row_security = off;
+  #     SET default_table_access_method = heap;
+  # - Very carefully remove specific SELECT statements. We CANNOT blanket remove all
+  #   SELECT statements because some of those have side-effects which we do want in the
+  #   schema. Last time I ran this, the only SELECTS were
+  #     SELECT pg_catalog.set_config('search_path', '', false);
+  #   and
+  #     SELECT pg_catalog.setval(text, bigint, bool);
+  #   We do want to remove the former, but the latter is important. If the last argument
+  #   is `true` or omitted, this marks the given integer as having been consumed and
+  #   will NOT appear as the nextval.
+   sed -e '/^$/d' \
+   -e '/^--/d' \
+   -e 's/public\.//g' \
+   -e '/^SET /d' \
+   -e '/^SELECT pg_catalog.set_config/d'
 }
 
 echo "Dumping Postgres schema..."
 
-pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema  > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
-pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
-pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME"   | cleanup_pg_schema  > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
-pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME"   | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
-pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME"  | cleanup_pg_schema  > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
-pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME"  | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema  > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME"   | cleanup_pg_schema  > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME"   | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --schema-only         --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME"  | cleanup_pg_schema  > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
+pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME"  | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
 
 echo "Done! Files dumped to: $OUTPUT_DIR"
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 9d116f6925..6cc88aad32 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -393,6 +393,14 @@ class LoggingTransaction:
     def executemany(self, sql: str, *args: Any) -> None:
         self._do_execute(self.txn.executemany, sql, *args)
 
+    def executescript(self, sql: str) -> None:
+        if isinstance(self.database_engine, Sqlite3Engine):
+            self._do_execute(self.txn.executescript, sql)  # type: ignore[attr-defined]
+        else:
+            raise NotImplementedError(
+                f"executescript only exists for sqlite driver, not {type(self.database_engine)}"
+            )
+
     def _make_sql_one_line(self, sql: str) -> str:
         "Strip newlines out of SQL so that the loggers in the DB are on one line"
         return " ".join(line.strip() for line in sql.splitlines() if line.strip())
diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index 0d16a419a4..70e594a68f 100644
--- a/synapse/storage/engines/_base.py
+++ b/synapse/storage/engines/_base.py
@@ -32,9 +32,10 @@ class IncorrectDatabaseSetup(RuntimeError):
 
 
 ConnectionType = TypeVar("ConnectionType", bound=Connection)
+CursorType = TypeVar("CursorType", bound=Cursor)
 
 
-class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
+class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCMeta):
     def __init__(self, module: DBAPI2Module, config: Mapping[str, Any]):
         self.module = module
 
@@ -64,7 +65,7 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
         ...
 
     @abc.abstractmethod
-    def check_new_database(self, txn: Cursor) -> None:
+    def check_new_database(self, txn: CursorType) -> None:
         """Gets called when setting up a brand new database. This allows us to
         apply stricter checks on new databases versus existing database.
         """
@@ -124,3 +125,21 @@ class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
         Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default.
         """
         ...
+
+    @staticmethod
+    @abc.abstractmethod
+    def executescript(cursor: CursorType, script: str) -> None:
+        """Execute a chunk of SQL containing multiple semicolon-delimited statements.
+
+        This is not provided by DBAPI2, and so needs engine-specific support.
+        """
+        ...
+
+    @classmethod
+    def execute_script_file(cls, cursor: CursorType, filepath: str) -> None:
+        """Execute a file containing multiple semicolon-delimited SQL statements.
+
+        This is not provided by DBAPI2, and so needs engine-specific support.
+        """
+        with open(filepath, "rt") as f:
+            cls.executescript(cursor, f.read())
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 7f7d006ac2..d8c0f64d9a 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -31,7 +31,9 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]):
+class PostgresEngine(
+    BaseDatabaseEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor]
+):
     def __init__(self, database_config: Mapping[str, Any]):
         super().__init__(psycopg2, database_config)
         psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
@@ -212,3 +214,11 @@ class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]):
         else:
             isolation_level = self.isolation_level_map[isolation_level]
         return conn.set_isolation_level(isolation_level)
+
+    @staticmethod
+    def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None:
+        """Execute a chunk of SQL containing multiple semicolon-delimited statements.
+
+        Psycopg2 seems happy to do this in DBAPI2's `execute()` function.
+        """
+        cursor.execute(script)
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index 095ae0a096..faa574dbfd 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -24,7 +24,7 @@ if TYPE_CHECKING:
     from synapse.storage.database import LoggingDatabaseConnection
 
 
-class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
+class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
     def __init__(self, database_config: Mapping[str, Any]):
         super().__init__(sqlite3, database_config)
 
@@ -120,6 +120,25 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
         # All transactions are SERIALIZABLE by default in sqlite
         pass
 
+    @staticmethod
+    def executescript(cursor: sqlite3.Cursor, script: str) -> None:
+        """Execute a chunk of SQL containing multiple semicolon-delimited statements.
+
+        Python's built-in SQLite driver does not allow you to do this with DBAPI2's
+        `execute`:
+
+        > execute() will only execute a single SQL statement. If you try to execute more
+        > than one statement with it, it will raise a Warning. Use executescript() if
+        > you want to execute multiple SQL statements with one call.
+
+        Though the docs for `executescript` warn:
+
+        > If there is a pending transaction, an implicit COMMIT statement is executed
+        > first. No other implicit transaction control is performed; any transaction
+        > control must be added to sql_script.
+        """
+        cursor.executescript(script)
+
 
 # Following functions taken from: https://github.com/coleifer/peewee
 
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 09a2b58f4c..3acdb39da7 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -266,7 +266,7 @@ def _setup_new_database(
             ".sql." + specific
         ):
             logger.debug("Applying schema %s", entry.absolute_path)
-            executescript(cur, entry.absolute_path)
+            database_engine.execute_script_file(cur, entry.absolute_path)
 
     cur.execute(
         "INSERT INTO schema_version (version, upgraded) VALUES (?,?)",
@@ -517,7 +517,7 @@ def _upgrade_existing_database(
                         UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
                     )
                 logger.info("Applying schema %s", relative_path)
-                executescript(cur, absolute_path)
+                database_engine.execute_script_file(cur, absolute_path)
             elif ext == specific_engine_extension and root_name.endswith(".sql"):
                 # A .sql file specific to our engine; just read and execute it
                 if is_worker:
@@ -525,7 +525,7 @@ def _upgrade_existing_database(
                         UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
                     )
                 logger.info("Applying engine-specific schema %s", relative_path)
-                executescript(cur, absolute_path)
+                database_engine.execute_script_file(cur, absolute_path)
             elif ext in specific_engine_extensions and root_name.endswith(".sql"):
                 # A .sql file for a different engine; skip it.
                 continue
@@ -666,7 +666,7 @@ def _get_or_create_schema_state(
 ) -> Optional[_SchemaState]:
     # Bluntly try creating the schema_version tables.
     sql_path = os.path.join(schema_path, "common", "schema_version.sql")
-    executescript(txn, sql_path)
+    database_engine.execute_script_file(txn, sql_path)
 
     txn.execute("SELECT version, upgraded FROM schema_version")
     row = txn.fetchone()
diff --git a/synapse/storage/schema/common/full_schemas/72/full.sql.postgres b/synapse/storage/schema/common/full_schemas/72/full.sql.postgres
new file mode 100644
index 0000000000..f0e546f052
--- /dev/null
+++ b/synapse/storage/schema/common/full_schemas/72/full.sql.postgres
@@ -0,0 +1,8 @@
+CREATE TABLE background_updates (
+    update_name text NOT NULL,
+    progress_json text NOT NULL,
+    depends_on text,
+    ordering integer DEFAULT 0 NOT NULL
+);
+ALTER TABLE ONLY background_updates
+    ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name);
diff --git a/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite
new file mode 100644
index 0000000000..d5a2c04aa9
--- /dev/null
+++ b/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite
@@ -0,0 +1,6 @@
+CREATE TABLE background_updates (
+    update_name text NOT NULL,
+    progress_json text NOT NULL,
+    depends_on text, ordering INT NOT NULL DEFAULT 0,
+    CONSTRAINT background_updates_uniqueness UNIQUE (update_name)
+);
diff --git a/synapse/storage/schema/main/full_schemas/72/full.sql.postgres b/synapse/storage/schema/main/full_schemas/72/full.sql.postgres
new file mode 100644
index 0000000000..d421fd9ab9
--- /dev/null
+++ b/synapse/storage/schema/main/full_schemas/72/full.sql.postgres
@@ -0,0 +1,1344 @@
+CREATE FUNCTION check_partial_state_events() RETURNS trigger
+    LANGUAGE plpgsql
+    AS $$
+            BEGIN
+                IF EXISTS (
+                    SELECT 1 FROM events
+                    WHERE events.event_id = NEW.event_id
+                       AND events.room_id != NEW.room_id
+                ) THEN
+                    RAISE EXCEPTION 'Incorrect room_id in partial_state_events';
+                END IF;
+                RETURN NEW;
+            END;
+            $$;
+CREATE TABLE access_tokens (
+    id bigint NOT NULL,
+    user_id text NOT NULL,
+    device_id text,
+    token text NOT NULL,
+    valid_until_ms bigint,
+    puppets_user_id text,
+    last_validated bigint,
+    refresh_token_id bigint,
+    used boolean
+);
+CREATE TABLE account_data (
+    user_id text NOT NULL,
+    account_data_type text NOT NULL,
+    stream_id bigint NOT NULL,
+    content text NOT NULL,
+    instance_name text
+);
+CREATE SEQUENCE account_data_sequence
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE account_validity (
+    user_id text NOT NULL,
+    expiration_ts_ms bigint NOT NULL,
+    email_sent boolean NOT NULL,
+    renewal_token text,
+    token_used_ts_ms bigint
+);
+CREATE TABLE application_services_state (
+    as_id text NOT NULL,
+    state character varying(5),
+    read_receipt_stream_id bigint,
+    presence_stream_id bigint,
+    to_device_stream_id bigint,
+    device_list_stream_id bigint
+);
+CREATE SEQUENCE application_services_txn_id_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE application_services_txns (
+    as_id text NOT NULL,
+    txn_id bigint NOT NULL,
+    event_ids text NOT NULL
+);
+CREATE TABLE appservice_room_list (
+    appservice_id text NOT NULL,
+    network_id text NOT NULL,
+    room_id text NOT NULL
+);
+CREATE TABLE appservice_stream_position (
+    lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+    stream_ordering bigint,
+    CONSTRAINT appservice_stream_position_lock_check CHECK ((lock = 'X'::bpchar))
+);
+CREATE TABLE batch_events (
+    event_id text NOT NULL,
+    room_id text NOT NULL,
+    batch_id text NOT NULL
+);
+CREATE TABLE blocked_rooms (
+    room_id text NOT NULL,
+    user_id text NOT NULL
+);
+CREATE TABLE cache_invalidation_stream_by_instance (
+    stream_id bigint NOT NULL,
+    instance_name text NOT NULL,
+    cache_func text NOT NULL,
+    keys text[],
+    invalidation_ts bigint
+);
+CREATE SEQUENCE cache_invalidation_stream_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE current_state_delta_stream (
+    stream_id bigint NOT NULL,
+    room_id text NOT NULL,
+    type text NOT NULL,
+    state_key text NOT NULL,
+    event_id text,
+    prev_event_id text,
+    instance_name text
+);
+CREATE TABLE current_state_events (
+    event_id text NOT NULL,
+    room_id text NOT NULL,
+    type text NOT NULL,
+    state_key text NOT NULL,
+    membership text
+);
+CREATE TABLE dehydrated_devices (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    device_data text NOT NULL
+);
+CREATE TABLE deleted_pushers (
+    stream_id bigint NOT NULL,
+    app_id text NOT NULL,
+    pushkey text NOT NULL,
+    user_id text NOT NULL
+);
+CREATE TABLE destination_rooms (
+    destination text NOT NULL,
+    room_id text NOT NULL,
+    stream_ordering bigint NOT NULL
+);
+CREATE TABLE destinations (
+    destination text NOT NULL,
+    retry_last_ts bigint,
+    retry_interval bigint,
+    failure_ts bigint,
+    last_successful_stream_ordering bigint
+);
+CREATE TABLE device_auth_providers (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    auth_provider_id text NOT NULL,
+    auth_provider_session_id text NOT NULL
+);
+CREATE TABLE device_federation_inbox (
+    origin text NOT NULL,
+    message_id text NOT NULL,
+    received_ts bigint NOT NULL,
+    instance_name text
+);
+CREATE TABLE device_federation_outbox (
+    destination text NOT NULL,
+    stream_id bigint NOT NULL,
+    queued_ts bigint NOT NULL,
+    messages_json text NOT NULL,
+    instance_name text
+);
+CREATE TABLE device_inbox (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    stream_id bigint NOT NULL,
+    message_json text NOT NULL,
+    instance_name text
+);
+CREATE SEQUENCE device_inbox_sequence
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE device_lists_changes_in_room (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    room_id text NOT NULL,
+    stream_id bigint NOT NULL,
+    converted_to_destinations boolean NOT NULL,
+    opentracing_context text
+);
+CREATE TABLE device_lists_outbound_last_success (
+    destination text NOT NULL,
+    user_id text NOT NULL,
+    stream_id bigint NOT NULL
+);
+CREATE TABLE device_lists_outbound_pokes (
+    destination text NOT NULL,
+    stream_id bigint NOT NULL,
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    sent boolean NOT NULL,
+    ts bigint NOT NULL,
+    opentracing_context text
+);
+CREATE TABLE device_lists_remote_cache (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    content text NOT NULL
+);
+CREATE TABLE device_lists_remote_extremeties (
+    user_id text NOT NULL,
+    stream_id text NOT NULL
+);
+CREATE TABLE device_lists_remote_resync (
+    user_id text NOT NULL,
+    added_ts bigint NOT NULL
+);
+CREATE TABLE device_lists_stream (
+    stream_id bigint NOT NULL,
+    user_id text NOT NULL,
+    device_id text NOT NULL
+);
+CREATE TABLE devices (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    display_name text,
+    last_seen bigint,
+    ip text,
+    user_agent text,
+    hidden boolean DEFAULT false
+);
+CREATE TABLE e2e_cross_signing_keys (
+    user_id text NOT NULL,
+    keytype text NOT NULL,
+    keydata text NOT NULL,
+    stream_id bigint NOT NULL
+);
+CREATE TABLE e2e_cross_signing_signatures (
+    user_id text NOT NULL,
+    key_id text NOT NULL,
+    target_user_id text NOT NULL,
+    target_device_id text NOT NULL,
+    signature text NOT NULL
+);
+CREATE TABLE e2e_device_keys_json (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    ts_added_ms bigint NOT NULL,
+    key_json text NOT NULL
+);
+CREATE TABLE e2e_fallback_keys_json (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    algorithm text NOT NULL,
+    key_id text NOT NULL,
+    key_json text NOT NULL,
+    used boolean DEFAULT false NOT NULL
+);
+CREATE TABLE e2e_one_time_keys_json (
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    algorithm text NOT NULL,
+    key_id text NOT NULL,
+    ts_added_ms bigint NOT NULL,
+    key_json text NOT NULL
+);
+CREATE TABLE e2e_room_keys (
+    user_id text NOT NULL,
+    room_id text NOT NULL,
+    session_id text NOT NULL,
+    version bigint NOT NULL,
+    first_message_index integer,
+    forwarded_count integer,
+    is_verified boolean,
+    session_data text NOT NULL
+);
+CREATE TABLE e2e_room_keys_versions (
+    user_id text NOT NULL,
+    version bigint NOT NULL,
+    algorithm text NOT NULL,
+    auth_data text NOT NULL,
+    deleted smallint DEFAULT 0 NOT NULL,
+    etag bigint
+);
+CREATE TABLE erased_users (
+    user_id text NOT NULL
+);
+CREATE TABLE event_auth (
+    event_id text NOT NULL,
+    auth_id text NOT NULL,
+    room_id text NOT NULL
+);
+CREATE SEQUENCE event_auth_chain_id
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE event_auth_chain_links (
+    origin_chain_id bigint NOT NULL,
+    origin_sequence_number bigint NOT NULL,
+    target_chain_id bigint NOT NULL,
+    target_sequence_number bigint NOT NULL
+);
+CREATE TABLE event_auth_chain_to_calculate (
+    event_id text NOT NULL,
+    room_id text NOT NULL,
+    type text NOT NULL,
+    state_key text NOT NULL
+);
+CREATE TABLE event_auth_chains (
+    event_id text NOT NULL,
+    chain_id bigint NOT NULL,
+    sequence_number bigint NOT NULL
+);
+CREATE TABLE event_backward_extremities (
+    event_id text NOT NULL,
+    room_id text NOT NULL
+);
+CREATE TABLE event_edges (
+    event_id text NOT NULL,
+    prev_event_id text NOT NULL,
+    room_id text,
+    is_state boolean DEFAULT false NOT NULL
+);
+CREATE TABLE event_expiry (
+    event_id text NOT NULL,
+    expiry_ts bigint NOT NULL
+);
+CREATE TABLE event_forward_extremities (
+    event_id text NOT NULL,
+    room_id text NOT NULL
+);
+CREATE TABLE event_json (
+    event_id text NOT NULL,
+    room_id text NOT NULL,
+    internal_metadata text NOT NULL,
+    json text NOT NULL,
+    format_version integer
+);
+CREATE TABLE event_labels (
+    event_id text NOT NULL,
+    label text NOT NULL,
+    room_id text NOT NULL,
+    topological_ordering bigint NOT NULL
+);
+CREATE TABLE event_push_actions (
+    room_id text NOT NULL,
+    event_id text NOT NULL,
+    user_id text NOT NULL,
+    profile_tag character varying(32),
+    actions text NOT NULL,
+    topological_ordering bigint,
+    stream_ordering bigint,
+    notif smallint,
+    highlight smallint,
+    unread smallint,
+    thread_id text
+);
+CREATE TABLE event_push_actions_staging (
+    event_id text NOT NULL,
+    user_id text NOT NULL,
+    actions text NOT NULL,
+    notif smallint NOT NULL,
+    highlight smallint NOT NULL,
+    unread smallint,
+    thread_id text
+);
+CREATE TABLE event_push_summary (
+    user_id text NOT NULL,
+    room_id text NOT NULL,
+    notif_count bigint NOT NULL,
+    stream_ordering bigint NOT NULL,
+    unread_count bigint,
+    last_receipt_stream_ordering bigint,
+    thread_id text
+);
+CREATE TABLE event_push_summary_last_receipt_stream_id (
+    lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+    stream_id bigint NOT NULL,
+    CONSTRAINT event_push_summary_last_receipt_stream_id_lock_check CHECK ((lock = 'X'::bpchar))
+);
+CREATE TABLE event_push_summary_stream_ordering (
+    lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+    stream_ordering bigint NOT NULL,
+    CONSTRAINT event_push_summary_stream_ordering_lock_check CHECK ((lock = 'X'::bpchar))
+);
+CREATE TABLE event_relations (
+    event_id text NOT NULL,
+    relates_to_id text NOT NULL,
+    relation_type text NOT NULL,
+    aggregation_key text
+);
+CREATE TABLE event_reports (
+    id bigint NOT NULL,
+    received_ts bigint NOT NULL,
+    room_id text NOT NULL,
+    event_id text NOT NULL,
+    user_id text NOT NULL,
+    reason text,
+    content text
+);
+CREATE TABLE event_search (
+    event_id text,
+    room_id text,
+    sender text,
+    key text,
+    vector tsvector,
+    origin_server_ts bigint,
+    stream_ordering bigint
+);
+CREATE TABLE event_to_state_groups (
+    event_id text NOT NULL,
+    state_group bigint NOT NULL
+);
+CREATE TABLE event_txn_id (
+    event_id text NOT NULL,
+    room_id text NOT NULL,
+    user_id text NOT NULL,
+    token_id bigint NOT NULL,
+    txn_id text NOT NULL,
+    inserted_ts bigint NOT NULL
+);
+CREATE TABLE events (
+    topological_ordering bigint NOT NULL,
+    event_id text NOT NULL,
+    type text NOT NULL,
+    room_id text NOT NULL,
+    content text,
+    unrecognized_keys text,
+    processed boolean NOT NULL,
+    outlier boolean NOT NULL,
+    depth bigint DEFAULT 0 NOT NULL,
+    origin_server_ts bigint,
+    received_ts bigint,
+    sender text,
+    contains_url boolean,
+    instance_name text,
+    stream_ordering bigint,
+    state_key text,
+    rejection_reason text
+);
+CREATE SEQUENCE events_backfill_stream_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE SEQUENCE events_stream_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE ex_outlier_stream (
+    event_stream_ordering bigint NOT NULL,
+    event_id text NOT NULL,
+    state_group bigint NOT NULL,
+    instance_name text
+);
+CREATE TABLE federation_inbound_events_staging (
+    origin text NOT NULL,
+    room_id text NOT NULL,
+    event_id text NOT NULL,
+    received_ts bigint NOT NULL,
+    event_json text NOT NULL,
+    internal_metadata text NOT NULL
+);
+CREATE TABLE federation_stream_position (
+    type text NOT NULL,
+    stream_id bigint NOT NULL,
+    instance_name text DEFAULT 'master'::text NOT NULL
+);
+CREATE TABLE ignored_users (
+    ignorer_user_id text NOT NULL,
+    ignored_user_id text NOT NULL
+);
+CREATE TABLE insertion_event_edges (
+    event_id text NOT NULL,
+    room_id text NOT NULL,
+    insertion_prev_event_id text NOT NULL
+);
+CREATE TABLE insertion_event_extremities (
+    event_id text NOT NULL,
+    room_id text NOT NULL
+);
+CREATE TABLE insertion_events (
+    event_id text NOT NULL,
+    room_id text NOT NULL,
+    next_batch_id text NOT NULL
+);
+CREATE TABLE instance_map (
+    instance_id integer NOT NULL,
+    instance_name text NOT NULL
+);
+CREATE SEQUENCE instance_map_instance_id_seq
+    AS integer
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+ALTER SEQUENCE instance_map_instance_id_seq OWNED BY instance_map.instance_id;
+CREATE TABLE local_current_membership (
+    room_id text NOT NULL,
+    user_id text NOT NULL,
+    event_id text NOT NULL,
+    membership text NOT NULL
+);
+CREATE TABLE local_media_repository (
+    media_id text,
+    media_type text,
+    media_length integer,
+    created_ts bigint,
+    upload_name text,
+    user_id text,
+    quarantined_by text,
+    url_cache text,
+    last_access_ts bigint,
+    safe_from_quarantine boolean DEFAULT false NOT NULL
+);
+CREATE TABLE local_media_repository_thumbnails (
+    media_id text,
+    thumbnail_width integer,
+    thumbnail_height integer,
+    thumbnail_type text,
+    thumbnail_method text,
+    thumbnail_length integer
+);
+CREATE TABLE local_media_repository_url_cache (
+    url text,
+    response_code integer,
+    etag text,
+    expires_ts bigint,
+    og text,
+    media_id text,
+    download_ts bigint
+);
+CREATE TABLE monthly_active_users (
+    user_id text NOT NULL,
+    "timestamp" bigint NOT NULL
+);
+CREATE TABLE open_id_tokens (
+    token text NOT NULL,
+    ts_valid_until_ms bigint NOT NULL,
+    user_id text NOT NULL
+);
+CREATE TABLE partial_state_events (
+    room_id text NOT NULL,
+    event_id text NOT NULL
+);
+CREATE TABLE partial_state_rooms (
+    room_id text NOT NULL
+);
+CREATE TABLE partial_state_rooms_servers (
+    room_id text NOT NULL,
+    server_name text NOT NULL
+);
+CREATE TABLE presence (
+    user_id text NOT NULL,
+    state character varying(20),
+    status_msg text,
+    mtime bigint
+);
+CREATE TABLE presence_stream (
+    stream_id bigint,
+    user_id text,
+    state text,
+    last_active_ts bigint,
+    last_federation_update_ts bigint,
+    last_user_sync_ts bigint,
+    status_msg text,
+    currently_active boolean,
+    instance_name text
+);
+CREATE SEQUENCE presence_stream_sequence
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE profiles (
+    user_id text NOT NULL,
+    displayname text,
+    avatar_url text
+);
+CREATE TABLE push_rules (
+    id bigint NOT NULL,
+    user_name text NOT NULL,
+    rule_id text NOT NULL,
+    priority_class smallint NOT NULL,
+    priority integer DEFAULT 0 NOT NULL,
+    conditions text NOT NULL,
+    actions text NOT NULL
+);
+CREATE TABLE push_rules_enable (
+    id bigint NOT NULL,
+    user_name text NOT NULL,
+    rule_id text NOT NULL,
+    enabled smallint
+);
+CREATE TABLE push_rules_stream (
+    stream_id bigint NOT NULL,
+    event_stream_ordering bigint NOT NULL,
+    user_id text NOT NULL,
+    rule_id text NOT NULL,
+    op text NOT NULL,
+    priority_class smallint,
+    priority integer,
+    conditions text,
+    actions text
+);
+CREATE TABLE pusher_throttle (
+    pusher bigint NOT NULL,
+    room_id text NOT NULL,
+    last_sent_ts bigint,
+    throttle_ms bigint
+);
+CREATE TABLE pushers (
+    id bigint NOT NULL,
+    user_name text NOT NULL,
+    access_token bigint,
+    profile_tag text NOT NULL,
+    kind text NOT NULL,
+    app_id text NOT NULL,
+    app_display_name text NOT NULL,
+    device_display_name text NOT NULL,
+    pushkey text NOT NULL,
+    ts bigint NOT NULL,
+    lang text,
+    data text,
+    last_stream_ordering bigint,
+    last_success bigint,
+    failing_since bigint
+);
+CREATE TABLE ratelimit_override (
+    user_id text NOT NULL,
+    messages_per_second bigint,
+    burst_count bigint
+);
+CREATE TABLE receipts_graph (
+    room_id text NOT NULL,
+    receipt_type text NOT NULL,
+    user_id text NOT NULL,
+    event_ids text NOT NULL,
+    data text NOT NULL,
+    thread_id text
+);
+CREATE TABLE receipts_linearized (
+    stream_id bigint NOT NULL,
+    room_id text NOT NULL,
+    receipt_type text NOT NULL,
+    user_id text NOT NULL,
+    event_id text NOT NULL,
+    data text NOT NULL,
+    instance_name text,
+    event_stream_ordering bigint,
+    thread_id text
+);
+CREATE SEQUENCE receipts_sequence
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE received_transactions (
+    transaction_id text,
+    origin text,
+    ts bigint,
+    response_code integer,
+    response_json bytea,
+    has_been_referenced smallint DEFAULT 0
+);
+CREATE TABLE redactions (
+    event_id text NOT NULL,
+    redacts text NOT NULL,
+    have_censored boolean DEFAULT false NOT NULL,
+    received_ts bigint
+);
+CREATE TABLE refresh_tokens (
+    id bigint NOT NULL,
+    user_id text NOT NULL,
+    device_id text NOT NULL,
+    token text NOT NULL,
+    next_token_id bigint,
+    expiry_ts bigint,
+    ultimate_session_expiry_ts bigint
+);
+CREATE TABLE registration_tokens (
+    token text NOT NULL,
+    uses_allowed integer,
+    pending integer NOT NULL,
+    completed integer NOT NULL,
+    expiry_time bigint
+);
+CREATE TABLE rejections (
+    event_id text NOT NULL,
+    reason text NOT NULL,
+    last_check text NOT NULL
+);
+CREATE TABLE remote_media_cache (
+    media_origin text,
+    media_id text,
+    media_type text,
+    created_ts bigint,
+    upload_name text,
+    media_length integer,
+    filesystem_id text,
+    last_access_ts bigint,
+    quarantined_by text
+);
+CREATE TABLE remote_media_cache_thumbnails (
+    media_origin text,
+    media_id text,
+    thumbnail_width integer,
+    thumbnail_height integer,
+    thumbnail_method text,
+    thumbnail_type text,
+    thumbnail_length integer,
+    filesystem_id text
+);
+CREATE TABLE room_account_data (
+    user_id text NOT NULL,
+    room_id text NOT NULL,
+    account_data_type text NOT NULL,
+    stream_id bigint NOT NULL,
+    content text NOT NULL,
+    instance_name text
+);
+CREATE TABLE room_alias_servers (
+    room_alias text NOT NULL,
+    server text NOT NULL
+);
+CREATE TABLE room_aliases (
+    room_alias text NOT NULL,
+    room_id text NOT NULL,
+    creator text
+);
+CREATE TABLE room_depth (
+    room_id text NOT NULL,
+    min_depth bigint
+);
+CREATE TABLE room_memberships (
+    event_id text NOT NULL,
+    user_id text NOT NULL,
+    sender text NOT NULL,
+    room_id text NOT NULL,
+    membership text NOT NULL,
+    forgotten integer DEFAULT 0,
+    display_name text,
+    avatar_url text
+);
+CREATE TABLE room_retention (
+    room_id text NOT NULL,
+    event_id text NOT NULL,
+    min_lifetime bigint,
+    max_lifetime bigint
+);
+CREATE TABLE room_stats_current (
+    room_id text NOT NULL,
+    current_state_events integer NOT NULL,
+    joined_members integer NOT NULL,
+    invited_members integer NOT NULL,
+    left_members integer NOT NULL,
+    banned_members integer NOT NULL,
+    local_users_in_room integer NOT NULL,
+    completed_delta_stream_id bigint NOT NULL,
+    knocked_members integer
+);
+CREATE TABLE room_stats_earliest_token (
+    room_id text NOT NULL,
+    token bigint NOT NULL
+);
+CREATE TABLE room_stats_state (
+    room_id text NOT NULL,
+    name text,
+    canonical_alias text,
+    join_rules text,
+    history_visibility text,
+    encryption text,
+    avatar text,
+    guest_access text,
+    is_federatable boolean,
+    topic text,
+    room_type text
+);
+CREATE TABLE room_tags (
+    user_id text NOT NULL,
+    room_id text NOT NULL,
+    tag text NOT NULL,
+    content text NOT NULL
+);
+CREATE TABLE room_tags_revisions (
+    user_id text NOT NULL,
+    room_id text NOT NULL,
+    stream_id bigint NOT NULL,
+    instance_name text
+);
+CREATE TABLE rooms (
+    room_id text NOT NULL,
+    is_public boolean,
+    creator text,
+    room_version text,
+    has_auth_chain_index boolean
+);
+CREATE TABLE server_keys_json (
+    server_name text NOT NULL,
+    key_id text NOT NULL,
+    from_server text NOT NULL,
+    ts_added_ms bigint NOT NULL,
+    ts_valid_until_ms bigint NOT NULL,
+    key_json bytea NOT NULL
+);
+CREATE TABLE server_signature_keys (
+    server_name text,
+    key_id text,
+    from_server text,
+    ts_added_ms bigint,
+    verify_key bytea,
+    ts_valid_until_ms bigint
+);
+CREATE TABLE sessions (
+    session_type text NOT NULL,
+    session_id text NOT NULL,
+    value text NOT NULL,
+    expiry_time_ms bigint NOT NULL
+);
+CREATE TABLE state_events (
+    event_id text NOT NULL,
+    room_id text NOT NULL,
+    type text NOT NULL,
+    state_key text NOT NULL,
+    prev_state text
+);
+CREATE TABLE stats_incremental_position (
+    lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+    stream_id bigint NOT NULL,
+    CONSTRAINT stats_incremental_position_lock_check CHECK ((lock = 'X'::bpchar))
+);
+CREATE TABLE stream_ordering_to_exterm (
+    stream_ordering bigint NOT NULL,
+    room_id text NOT NULL,
+    event_id text NOT NULL
+);
+CREATE TABLE stream_positions (
+    stream_name text NOT NULL,
+    instance_name text NOT NULL,
+    stream_id bigint NOT NULL
+);
+CREATE TABLE threepid_guest_access_tokens (
+    medium text,
+    address text,
+    guest_access_token text,
+    first_inviter text
+);
+CREATE TABLE threepid_validation_session (
+    session_id text NOT NULL,
+    medium text NOT NULL,
+    address text NOT NULL,
+    client_secret text NOT NULL,
+    last_send_attempt bigint NOT NULL,
+    validated_at bigint
+);
+CREATE TABLE threepid_validation_token (
+    token text NOT NULL,
+    session_id text NOT NULL,
+    next_link text,
+    expires bigint NOT NULL
+);
+CREATE TABLE ui_auth_sessions (
+    session_id text NOT NULL,
+    creation_time bigint NOT NULL,
+    serverdict text NOT NULL,
+    clientdict text NOT NULL,
+    uri text NOT NULL,
+    method text NOT NULL,
+    description text NOT NULL
+);
+CREATE TABLE ui_auth_sessions_credentials (
+    session_id text NOT NULL,
+    stage_type text NOT NULL,
+    result text NOT NULL
+);
+CREATE TABLE ui_auth_sessions_ips (
+    session_id text NOT NULL,
+    ip text NOT NULL,
+    user_agent text NOT NULL
+);
+CREATE TABLE user_daily_visits (
+    user_id text NOT NULL,
+    device_id text,
+    "timestamp" bigint NOT NULL,
+    user_agent text
+);
+CREATE TABLE user_directory (
+    user_id text NOT NULL,
+    room_id text,
+    display_name text,
+    avatar_url text
+);
+CREATE TABLE user_directory_search (
+    user_id text NOT NULL,
+    vector tsvector
+);
+CREATE TABLE user_directory_stream_pos (
+    lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+    stream_id bigint,
+    CONSTRAINT user_directory_stream_pos_lock_check CHECK ((lock = 'X'::bpchar))
+);
+CREATE TABLE user_external_ids (
+    auth_provider text NOT NULL,
+    external_id text NOT NULL,
+    user_id text NOT NULL
+);
+CREATE TABLE user_filters (
+    user_id text NOT NULL,
+    filter_id bigint NOT NULL,
+    filter_json bytea NOT NULL
+);
+CREATE SEQUENCE user_id_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE user_ips (
+    user_id text NOT NULL,
+    access_token text NOT NULL,
+    device_id text,
+    ip text NOT NULL,
+    user_agent text NOT NULL,
+    last_seen bigint NOT NULL
+);
+CREATE TABLE user_signature_stream (
+    stream_id bigint NOT NULL,
+    from_user_id text NOT NULL,
+    user_ids text NOT NULL
+);
+CREATE TABLE user_stats_current (
+    user_id text NOT NULL,
+    joined_rooms bigint NOT NULL,
+    completed_delta_stream_id bigint NOT NULL
+);
+CREATE TABLE user_threepid_id_server (
+    user_id text NOT NULL,
+    medium text NOT NULL,
+    address text NOT NULL,
+    id_server text NOT NULL
+);
+CREATE TABLE user_threepids (
+    user_id text NOT NULL,
+    medium text NOT NULL,
+    address text NOT NULL,
+    validated_at bigint NOT NULL,
+    added_at bigint NOT NULL
+);
+CREATE TABLE users (
+    name text,
+    password_hash text,
+    creation_ts bigint,
+    admin smallint DEFAULT 0 NOT NULL,
+    upgrade_ts bigint,
+    is_guest smallint DEFAULT 0 NOT NULL,
+    appservice_id text,
+    consent_version text,
+    consent_server_notice_sent text,
+    user_type text,
+    deactivated smallint DEFAULT 0 NOT NULL,
+    shadow_banned boolean,
+    consent_ts bigint
+);
+CREATE TABLE users_in_public_rooms (
+    user_id text NOT NULL,
+    room_id text NOT NULL
+);
+CREATE TABLE users_pending_deactivation (
+    user_id text NOT NULL
+);
+CREATE TABLE users_to_send_full_presence_to (
+    user_id text NOT NULL,
+    presence_stream_id bigint
+);
+CREATE TABLE users_who_share_private_rooms (
+    user_id text NOT NULL,
+    other_user_id text NOT NULL,
+    room_id text NOT NULL
+);
+CREATE TABLE worker_locks (
+    lock_name text NOT NULL,
+    lock_key text NOT NULL,
+    instance_name text NOT NULL,
+    token text NOT NULL,
+    last_renewed_ts bigint NOT NULL
+);
+ALTER TABLE ONLY instance_map ALTER COLUMN instance_id SET DEFAULT nextval('instance_map_instance_id_seq'::regclass);
+ALTER TABLE ONLY access_tokens
+    ADD CONSTRAINT access_tokens_pkey PRIMARY KEY (id);
+ALTER TABLE ONLY access_tokens
+    ADD CONSTRAINT access_tokens_token_key UNIQUE (token);
+ALTER TABLE ONLY account_data
+    ADD CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type);
+ALTER TABLE ONLY account_validity
+    ADD CONSTRAINT account_validity_pkey PRIMARY KEY (user_id);
+ALTER TABLE ONLY application_services_state
+    ADD CONSTRAINT application_services_state_pkey PRIMARY KEY (as_id);
+ALTER TABLE ONLY application_services_txns
+    ADD CONSTRAINT application_services_txns_as_id_txn_id_key UNIQUE (as_id, txn_id);
+ALTER TABLE ONLY appservice_stream_position
+    ADD CONSTRAINT appservice_stream_position_lock_key UNIQUE (lock);
+ALTER TABLE ONLY current_state_events
+    ADD CONSTRAINT current_state_events_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY current_state_events
+    ADD CONSTRAINT current_state_events_room_id_type_state_key_key UNIQUE (room_id, type, state_key);
+ALTER TABLE ONLY dehydrated_devices
+    ADD CONSTRAINT dehydrated_devices_pkey PRIMARY KEY (user_id);
+ALTER TABLE ONLY destination_rooms
+    ADD CONSTRAINT destination_rooms_pkey PRIMARY KEY (destination, room_id);
+ALTER TABLE ONLY destinations
+    ADD CONSTRAINT destinations_pkey PRIMARY KEY (destination);
+ALTER TABLE ONLY devices
+    ADD CONSTRAINT device_uniqueness UNIQUE (user_id, device_id);
+ALTER TABLE ONLY e2e_device_keys_json
+    ADD CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id);
+ALTER TABLE ONLY e2e_fallback_keys_json
+    ADD CONSTRAINT e2e_fallback_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm);
+ALTER TABLE ONLY e2e_one_time_keys_json
+    ADD CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id);
+ALTER TABLE ONLY event_auth_chain_to_calculate
+    ADD CONSTRAINT event_auth_chain_to_calculate_pkey PRIMARY KEY (event_id);
+ALTER TABLE ONLY event_auth_chains
+    ADD CONSTRAINT event_auth_chains_pkey PRIMARY KEY (event_id);
+ALTER TABLE ONLY event_backward_extremities
+    ADD CONSTRAINT event_backward_extremities_event_id_room_id_key UNIQUE (event_id, room_id);
+ALTER TABLE ONLY event_expiry
+    ADD CONSTRAINT event_expiry_pkey PRIMARY KEY (event_id);
+ALTER TABLE ONLY event_forward_extremities
+    ADD CONSTRAINT event_forward_extremities_event_id_room_id_key UNIQUE (event_id, room_id);
+ALTER TABLE ONLY event_push_actions
+    ADD CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag);
+ALTER TABLE ONLY event_json
+    ADD CONSTRAINT event_json_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY event_labels
+    ADD CONSTRAINT event_labels_pkey PRIMARY KEY (event_id, label);
+ALTER TABLE ONLY event_push_summary_last_receipt_stream_id
+    ADD CONSTRAINT event_push_summary_last_receipt_stream_id_lock_key UNIQUE (lock);
+ALTER TABLE ONLY event_push_summary_stream_ordering
+    ADD CONSTRAINT event_push_summary_stream_ordering_lock_key UNIQUE (lock);
+ALTER TABLE ONLY event_reports
+    ADD CONSTRAINT event_reports_pkey PRIMARY KEY (id);
+ALTER TABLE ONLY event_to_state_groups
+    ADD CONSTRAINT event_to_state_groups_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY events
+    ADD CONSTRAINT events_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY ex_outlier_stream
+    ADD CONSTRAINT ex_outlier_stream_pkey PRIMARY KEY (event_stream_ordering);
+ALTER TABLE ONLY instance_map
+    ADD CONSTRAINT instance_map_pkey PRIMARY KEY (instance_id);
+ALTER TABLE ONLY local_media_repository
+    ADD CONSTRAINT local_media_repository_media_id_key UNIQUE (media_id);
+ALTER TABLE ONLY user_threepids
+    ADD CONSTRAINT medium_address UNIQUE (medium, address);
+ALTER TABLE ONLY open_id_tokens
+    ADD CONSTRAINT open_id_tokens_pkey PRIMARY KEY (token);
+ALTER TABLE ONLY partial_state_events
+    ADD CONSTRAINT partial_state_events_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY partial_state_rooms
+    ADD CONSTRAINT partial_state_rooms_pkey PRIMARY KEY (room_id);
+ALTER TABLE ONLY partial_state_rooms_servers
+    ADD CONSTRAINT partial_state_rooms_servers_room_id_server_name_key UNIQUE (room_id, server_name);
+ALTER TABLE ONLY presence
+    ADD CONSTRAINT presence_user_id_key UNIQUE (user_id);
+ALTER TABLE ONLY profiles
+    ADD CONSTRAINT profiles_user_id_key UNIQUE (user_id);
+ALTER TABLE ONLY push_rules_enable
+    ADD CONSTRAINT push_rules_enable_pkey PRIMARY KEY (id);
+ALTER TABLE ONLY push_rules_enable
+    ADD CONSTRAINT push_rules_enable_user_name_rule_id_key UNIQUE (user_name, rule_id);
+ALTER TABLE ONLY push_rules
+    ADD CONSTRAINT push_rules_pkey PRIMARY KEY (id);
+ALTER TABLE ONLY push_rules
+    ADD CONSTRAINT push_rules_user_name_rule_id_key UNIQUE (user_name, rule_id);
+ALTER TABLE ONLY pusher_throttle
+    ADD CONSTRAINT pusher_throttle_pkey PRIMARY KEY (pusher, room_id);
+ALTER TABLE ONLY pushers
+    ADD CONSTRAINT pushers2_app_id_pushkey_user_name_key UNIQUE (app_id, pushkey, user_name);
+ALTER TABLE ONLY pushers
+    ADD CONSTRAINT pushers2_pkey PRIMARY KEY (id);
+ALTER TABLE ONLY receipts_graph
+    ADD CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id);
+ALTER TABLE ONLY receipts_graph
+    ADD CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id);
+ALTER TABLE ONLY receipts_linearized
+    ADD CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id);
+ALTER TABLE ONLY receipts_linearized
+    ADD CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id);
+ALTER TABLE ONLY received_transactions
+    ADD CONSTRAINT received_transactions_transaction_id_origin_key UNIQUE (transaction_id, origin);
+ALTER TABLE ONLY redactions
+    ADD CONSTRAINT redactions_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY refresh_tokens
+    ADD CONSTRAINT refresh_tokens_pkey PRIMARY KEY (id);
+ALTER TABLE ONLY refresh_tokens
+    ADD CONSTRAINT refresh_tokens_token_key UNIQUE (token);
+ALTER TABLE ONLY registration_tokens
+    ADD CONSTRAINT registration_tokens_token_key UNIQUE (token);
+ALTER TABLE ONLY rejections
+    ADD CONSTRAINT rejections_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY remote_media_cache
+    ADD CONSTRAINT remote_media_cache_media_origin_media_id_key UNIQUE (media_origin, media_id);
+ALTER TABLE ONLY room_account_data
+    ADD CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type);
+ALTER TABLE ONLY room_aliases
+    ADD CONSTRAINT room_aliases_room_alias_key UNIQUE (room_alias);
+ALTER TABLE ONLY room_depth
+    ADD CONSTRAINT room_depth_room_id_key UNIQUE (room_id);
+ALTER TABLE ONLY room_memberships
+    ADD CONSTRAINT room_memberships_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY room_retention
+    ADD CONSTRAINT room_retention_pkey PRIMARY KEY (room_id, event_id);
+ALTER TABLE ONLY room_stats_current
+    ADD CONSTRAINT room_stats_current_pkey PRIMARY KEY (room_id);
+ALTER TABLE ONLY room_tags_revisions
+    ADD CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id);
+ALTER TABLE ONLY room_tags
+    ADD CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag);
+ALTER TABLE ONLY rooms
+    ADD CONSTRAINT rooms_pkey PRIMARY KEY (room_id);
+ALTER TABLE ONLY server_keys_json
+    ADD CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server);
+ALTER TABLE ONLY server_signature_keys
+    ADD CONSTRAINT server_signature_keys_server_name_key_id_key UNIQUE (server_name, key_id);
+ALTER TABLE ONLY sessions
+    ADD CONSTRAINT sessions_session_type_session_id_key UNIQUE (session_type, session_id);
+ALTER TABLE ONLY state_events
+    ADD CONSTRAINT state_events_event_id_key UNIQUE (event_id);
+ALTER TABLE ONLY stats_incremental_position
+    ADD CONSTRAINT stats_incremental_position_lock_key UNIQUE (lock);
+ALTER TABLE ONLY threepid_validation_session
+    ADD CONSTRAINT threepid_validation_session_pkey PRIMARY KEY (session_id);
+ALTER TABLE ONLY threepid_validation_token
+    ADD CONSTRAINT threepid_validation_token_pkey PRIMARY KEY (token);
+ALTER TABLE ONLY ui_auth_sessions_credentials
+    ADD CONSTRAINT ui_auth_sessions_credentials_session_id_stage_type_key UNIQUE (session_id, stage_type);
+ALTER TABLE ONLY ui_auth_sessions_ips
+    ADD CONSTRAINT ui_auth_sessions_ips_session_id_ip_user_agent_key UNIQUE (session_id, ip, user_agent);
+ALTER TABLE ONLY ui_auth_sessions
+    ADD CONSTRAINT ui_auth_sessions_session_id_key UNIQUE (session_id);
+ALTER TABLE ONLY user_directory_stream_pos
+    ADD CONSTRAINT user_directory_stream_pos_lock_key UNIQUE (lock);
+ALTER TABLE ONLY user_external_ids
+    ADD CONSTRAINT user_external_ids_auth_provider_external_id_key UNIQUE (auth_provider, external_id);
+ALTER TABLE ONLY user_stats_current
+    ADD CONSTRAINT user_stats_current_pkey PRIMARY KEY (user_id);
+ALTER TABLE ONLY users
+    ADD CONSTRAINT users_name_key UNIQUE (name);
+ALTER TABLE ONLY users_to_send_full_presence_to
+    ADD CONSTRAINT users_to_send_full_presence_to_pkey PRIMARY KEY (user_id);
+CREATE INDEX access_tokens_device_id ON access_tokens USING btree (user_id, device_id);
+CREATE INDEX account_data_stream_id ON account_data USING btree (user_id, stream_id);
+CREATE INDEX application_services_txns_id ON application_services_txns USING btree (as_id);
+CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list USING btree (appservice_id, network_id, room_id);
+CREATE INDEX batch_events_batch_id ON batch_events USING btree (batch_id);
+CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms USING btree (room_id);
+CREATE UNIQUE INDEX cache_invalidation_stream_by_instance_id ON cache_invalidation_stream_by_instance USING btree (stream_id);
+CREATE INDEX cache_invalidation_stream_by_instance_instance_index ON cache_invalidation_stream_by_instance USING btree (instance_name, stream_id);
+CREATE UNIQUE INDEX chunk_events_event_id ON batch_events USING btree (event_id);
+CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream USING btree (stream_id);
+CREATE INDEX current_state_events_member_index ON current_state_events USING btree (state_key) WHERE (type = 'm.room.member'::text);
+CREATE INDEX deleted_pushers_stream_id ON deleted_pushers USING btree (stream_id);
+CREATE INDEX destination_rooms_room_id ON destination_rooms USING btree (room_id);
+CREATE INDEX device_auth_providers_devices ON device_auth_providers USING btree (user_id, device_id);
+CREATE INDEX device_auth_providers_sessions ON device_auth_providers USING btree (auth_provider_id, auth_provider_session_id);
+CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox USING btree (origin, message_id);
+CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox USING btree (destination, stream_id);
+CREATE INDEX device_federation_outbox_id ON device_federation_outbox USING btree (stream_id);
+CREATE INDEX device_inbox_stream_id_user_id ON device_inbox USING btree (stream_id, user_id);
+CREATE INDEX device_inbox_user_stream_id ON device_inbox USING btree (user_id, device_id, stream_id);
+CREATE UNIQUE INDEX device_lists_changes_in_stream_id ON device_lists_changes_in_room USING btree (stream_id, room_id);
+CREATE INDEX device_lists_changes_in_stream_id_unconverted ON device_lists_changes_in_room USING btree (stream_id) WHERE (NOT converted_to_destinations);
+CREATE UNIQUE INDEX device_lists_outbound_last_success_unique_idx ON device_lists_outbound_last_success USING btree (destination, user_id);
+CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes USING btree (destination, stream_id);
+CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes USING btree (stream_id);
+CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes USING btree (destination, user_id);
+CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache USING btree (user_id, device_id);
+CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties USING btree (user_id);
+CREATE UNIQUE INDEX device_lists_remote_resync_idx ON device_lists_remote_resync USING btree (user_id);
+CREATE INDEX device_lists_remote_resync_ts_idx ON device_lists_remote_resync USING btree (added_ts);
+CREATE INDEX device_lists_stream_id ON device_lists_stream USING btree (stream_id, user_id);
+CREATE INDEX device_lists_stream_user_id ON device_lists_stream USING btree (user_id, device_id);
+CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys USING btree (user_id, keytype, stream_id);
+CREATE UNIQUE INDEX e2e_cross_signing_keys_stream_idx ON e2e_cross_signing_keys USING btree (stream_id);
+CREATE INDEX e2e_cross_signing_signatures2_idx ON e2e_cross_signing_signatures USING btree (user_id, target_user_id, target_device_id);
+CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions USING btree (user_id, version);
+CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys USING btree (user_id, version, room_id, session_id);
+CREATE UNIQUE INDEX erased_users_user ON erased_users USING btree (user_id);
+CREATE INDEX ev_b_extrem_id ON event_backward_extremities USING btree (event_id);
+CREATE INDEX ev_b_extrem_room ON event_backward_extremities USING btree (room_id);
+CREATE INDEX ev_edges_prev_id ON event_edges USING btree (prev_event_id);
+CREATE INDEX ev_extrem_id ON event_forward_extremities USING btree (event_id);
+CREATE INDEX ev_extrem_room ON event_forward_extremities USING btree (room_id);
+CREATE INDEX evauth_edges_id ON event_auth USING btree (event_id);
+CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links USING btree (origin_chain_id, target_chain_id);
+CREATE INDEX event_auth_chain_to_calculate_rm_id ON event_auth_chain_to_calculate USING btree (room_id);
+CREATE UNIQUE INDEX event_auth_chains_c_seq_index ON event_auth_chains USING btree (chain_id, sequence_number);
+CREATE INDEX event_contains_url_index ON events USING btree (room_id, topological_ordering, stream_ordering) WHERE ((contains_url = true) AND (outlier = false));
+CREATE UNIQUE INDEX event_edges_event_id_prev_event_id_idx ON event_edges USING btree (event_id, prev_event_id);
+CREATE INDEX event_expiry_expiry_ts_idx ON event_expiry USING btree (expiry_ts);
+CREATE INDEX event_labels_room_id_label_idx ON event_labels USING btree (room_id, label, topological_ordering);
+CREATE INDEX event_push_actions_highlights_index ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering) WHERE (highlight = 1);
+CREATE INDEX event_push_actions_rm_tokens ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering);
+CREATE INDEX event_push_actions_room_id_user_id ON event_push_actions USING btree (room_id, user_id);
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging USING btree (event_id);
+CREATE INDEX event_push_actions_stream_highlight_index ON event_push_actions USING btree (highlight, stream_ordering) WHERE (highlight = 0);
+CREATE INDEX event_push_actions_stream_ordering ON event_push_actions USING btree (stream_ordering, user_id);
+CREATE INDEX event_push_actions_u_highlight ON event_push_actions USING btree (user_id, stream_ordering);
+CREATE UNIQUE INDEX event_push_summary_unique_index ON event_push_summary USING btree (user_id, room_id);
+CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary USING btree (user_id, room_id, thread_id);
+CREATE UNIQUE INDEX event_relations_id ON event_relations USING btree (event_id);
+CREATE INDEX event_relations_relates ON event_relations USING btree (relates_to_id, relation_type, aggregation_key);
+CREATE INDEX event_search_ev_ridx ON event_search USING btree (room_id);
+CREATE UNIQUE INDEX event_search_event_id_idx ON event_search USING btree (event_id);
+CREATE INDEX event_search_fts_idx ON event_search USING gin (vector);
+CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups USING btree (state_group);
+CREATE UNIQUE INDEX event_txn_id_event_id ON event_txn_id USING btree (event_id);
+CREATE INDEX event_txn_id_ts ON event_txn_id USING btree (inserted_ts);
+CREATE UNIQUE INDEX event_txn_id_txn_id ON event_txn_id USING btree (room_id, user_id, token_id, txn_id);
+CREATE INDEX events_order_room ON events USING btree (room_id, topological_ordering, stream_ordering);
+CREATE INDEX events_room_stream ON events USING btree (room_id, stream_ordering);
+CREATE UNIQUE INDEX events_stream_ordering ON events USING btree (stream_ordering);
+CREATE INDEX events_ts ON events USING btree (origin_server_ts, stream_ordering);
+CREATE UNIQUE INDEX federation_inbound_events_staging_instance_event ON federation_inbound_events_staging USING btree (origin, event_id);
+CREATE INDEX federation_inbound_events_staging_room ON federation_inbound_events_staging USING btree (room_id, received_ts);
+CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position USING btree (type, instance_name);
+CREATE INDEX ignored_users_ignored_user_id ON ignored_users USING btree (ignored_user_id);
+CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users USING btree (ignorer_user_id, ignored_user_id);
+CREATE INDEX insertion_event_edges_event_id ON insertion_event_edges USING btree (event_id);
+CREATE INDEX insertion_event_edges_insertion_prev_event_id ON insertion_event_edges USING btree (insertion_prev_event_id);
+CREATE INDEX insertion_event_edges_insertion_room_id ON insertion_event_edges USING btree (room_id);
+CREATE UNIQUE INDEX insertion_event_extremities_event_id ON insertion_event_extremities USING btree (event_id);
+CREATE INDEX insertion_event_extremities_room_id ON insertion_event_extremities USING btree (room_id);
+CREATE UNIQUE INDEX insertion_events_event_id ON insertion_events USING btree (event_id);
+CREATE INDEX insertion_events_next_batch_id ON insertion_events USING btree (next_batch_id);
+CREATE UNIQUE INDEX instance_map_idx ON instance_map USING btree (instance_name);
+CREATE UNIQUE INDEX local_current_membership_idx ON local_current_membership USING btree (user_id, room_id);
+CREATE INDEX local_current_membership_room_idx ON local_current_membership USING btree (room_id);
+CREATE UNIQUE INDEX local_media_repository_thumbn_media_id_width_height_method_key ON local_media_repository_thumbnails USING btree (media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method);
+CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails USING btree (media_id);
+CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache USING btree (url, download_ts);
+CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache USING btree (expires_ts);
+CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache USING btree (media_id);
+CREATE INDEX local_media_repository_url_idx ON local_media_repository USING btree (created_ts) WHERE (url_cache IS NOT NULL);
+CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users USING btree ("timestamp");
+CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users USING btree (user_id);
+CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens USING btree (ts_valid_until_ms);
+CREATE INDEX partial_state_events_room_id_idx ON partial_state_events USING btree (room_id);
+CREATE INDEX presence_stream_id ON presence_stream USING btree (stream_id, user_id);
+CREATE INDEX presence_stream_state_not_offline_idx ON presence_stream USING btree (state) WHERE (state <> 'offline'::text);
+CREATE INDEX presence_stream_user_id ON presence_stream USING btree (user_id);
+CREATE INDEX public_room_index ON rooms USING btree (is_public);
+CREATE INDEX push_rules_enable_user_name ON push_rules_enable USING btree (user_name);
+CREATE INDEX push_rules_stream_id ON push_rules_stream USING btree (stream_id);
+CREATE INDEX push_rules_stream_user_stream_id ON push_rules_stream USING btree (user_id, stream_id);
+CREATE INDEX push_rules_user_name ON push_rules USING btree (user_name);
+CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override USING btree (user_id);
+CREATE UNIQUE INDEX receipts_graph_unique_index ON receipts_graph USING btree (room_id, receipt_type, user_id) WHERE (thread_id IS NULL);
+CREATE INDEX receipts_linearized_id ON receipts_linearized USING btree (stream_id);
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized USING btree (room_id, stream_id);
+CREATE UNIQUE INDEX receipts_linearized_unique_index ON receipts_linearized USING btree (room_id, receipt_type, user_id) WHERE (thread_id IS NULL);
+CREATE INDEX receipts_linearized_user ON receipts_linearized USING btree (user_id);
+CREATE INDEX received_transactions_ts ON received_transactions USING btree (ts);
+CREATE INDEX redactions_have_censored_ts ON redactions USING btree (received_ts) WHERE (NOT have_censored);
+CREATE INDEX redactions_redacts ON redactions USING btree (redacts);
+CREATE INDEX refresh_tokens_next_token_id ON refresh_tokens USING btree (next_token_id) WHERE (next_token_id IS NOT NULL);
+CREATE UNIQUE INDEX remote_media_repository_thumbn_media_origin_id_width_height_met ON remote_media_cache_thumbnails USING btree (media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method);
+CREATE INDEX room_account_data_stream_id ON room_account_data USING btree (user_id, stream_id);
+CREATE INDEX room_alias_servers_alias ON room_alias_servers USING btree (room_alias);
+CREATE INDEX room_aliases_id ON room_aliases USING btree (room_id);
+CREATE INDEX room_memberships_room_id ON room_memberships USING btree (room_id);
+CREATE INDEX room_memberships_user_id ON room_memberships USING btree (user_id);
+CREATE INDEX room_memberships_user_room_forgotten ON room_memberships USING btree (user_id, room_id) WHERE (forgotten = 1);
+CREATE INDEX room_retention_max_lifetime_idx ON room_retention USING btree (max_lifetime);
+CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token USING btree (room_id);
+CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state USING btree (room_id);
+CREATE INDEX stream_ordering_to_exterm_idx ON stream_ordering_to_exterm USING btree (stream_ordering);
+CREATE INDEX stream_ordering_to_exterm_rm_idx ON stream_ordering_to_exterm USING btree (room_id, stream_ordering);
+CREATE UNIQUE INDEX stream_positions_idx ON stream_positions USING btree (stream_name, instance_name);
+CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens USING btree (medium, address);
+CREATE INDEX threepid_validation_token_session_id ON threepid_validation_token USING btree (session_id);
+CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits USING btree ("timestamp");
+CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits USING btree (user_id, "timestamp");
+CREATE INDEX user_directory_room_idx ON user_directory USING btree (room_id);
+CREATE INDEX user_directory_search_fts_idx ON user_directory_search USING gin (vector);
+CREATE UNIQUE INDEX user_directory_search_user_idx ON user_directory_search USING btree (user_id);
+CREATE UNIQUE INDEX user_directory_user_idx ON user_directory USING btree (user_id);
+CREATE INDEX user_external_ids_user_id_idx ON user_external_ids USING btree (user_id);
+CREATE UNIQUE INDEX user_filters_unique ON user_filters USING btree (user_id, filter_id);
+CREATE INDEX user_ips_device_id ON user_ips USING btree (user_id, device_id, last_seen);
+CREATE INDEX user_ips_last_seen ON user_ips USING btree (user_id, last_seen);
+CREATE INDEX user_ips_last_seen_only ON user_ips USING btree (last_seen);
+CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips USING btree (user_id, access_token, ip);
+CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream USING btree (stream_id);
+CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server USING btree (user_id, medium, address, id_server);
+CREATE INDEX user_threepids_medium_address ON user_threepids USING btree (medium, address);
+CREATE INDEX user_threepids_user_id ON user_threepids USING btree (user_id);
+CREATE INDEX users_creation_ts ON users USING btree (creation_ts);
+CREATE INDEX users_have_local_media ON local_media_repository USING btree (user_id, created_ts);
+CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms USING btree (room_id);
+CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms USING btree (user_id, room_id);
+CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms USING btree (other_user_id);
+CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms USING btree (room_id);
+CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms USING btree (user_id, other_user_id, room_id);
+CREATE UNIQUE INDEX worker_locks_key ON worker_locks USING btree (lock_name, lock_key);
+CREATE TRIGGER check_partial_state_events BEFORE INSERT OR UPDATE ON partial_state_events FOR EACH ROW EXECUTE PROCEDURE check_partial_state_events();
+ALTER TABLE ONLY access_tokens
+    ADD CONSTRAINT access_tokens_refresh_token_id_fkey FOREIGN KEY (refresh_token_id) REFERENCES refresh_tokens(id) ON DELETE CASCADE;
+ALTER TABLE ONLY destination_rooms
+    ADD CONSTRAINT destination_rooms_destination_fkey FOREIGN KEY (destination) REFERENCES destinations(destination);
+ALTER TABLE ONLY destination_rooms
+    ADD CONSTRAINT destination_rooms_room_id_fkey FOREIGN KEY (room_id) REFERENCES rooms(room_id);
+ALTER TABLE ONLY event_edges
+    ADD CONSTRAINT event_edges_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id);
+ALTER TABLE ONLY event_txn_id
+    ADD CONSTRAINT event_txn_id_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id) ON DELETE CASCADE;
+ALTER TABLE ONLY event_txn_id
+    ADD CONSTRAINT event_txn_id_token_id_fkey FOREIGN KEY (token_id) REFERENCES access_tokens(id) ON DELETE CASCADE;
+ALTER TABLE ONLY partial_state_events
+    ADD CONSTRAINT partial_state_events_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id);
+ALTER TABLE ONLY partial_state_events
+    ADD CONSTRAINT partial_state_events_room_id_fkey FOREIGN KEY (room_id) REFERENCES partial_state_rooms(room_id);
+ALTER TABLE ONLY partial_state_rooms
+    ADD CONSTRAINT partial_state_rooms_room_id_fkey FOREIGN KEY (room_id) REFERENCES rooms(room_id);
+ALTER TABLE ONLY partial_state_rooms_servers
+    ADD CONSTRAINT partial_state_rooms_servers_room_id_fkey FOREIGN KEY (room_id) REFERENCES partial_state_rooms(room_id);
+ALTER TABLE ONLY refresh_tokens
+    ADD CONSTRAINT refresh_tokens_next_token_id_fkey FOREIGN KEY (next_token_id) REFERENCES refresh_tokens(id) ON DELETE CASCADE;
+ALTER TABLE ONLY ui_auth_sessions_credentials
+    ADD CONSTRAINT ui_auth_sessions_credentials_session_id_fkey FOREIGN KEY (session_id) REFERENCES ui_auth_sessions(session_id);
+ALTER TABLE ONLY ui_auth_sessions_ips
+    ADD CONSTRAINT ui_auth_sessions_ips_session_id_fkey FOREIGN KEY (session_id) REFERENCES ui_auth_sessions(session_id);
+ALTER TABLE ONLY users_to_send_full_presence_to
+    ADD CONSTRAINT users_to_send_full_presence_to_user_id_fkey FOREIGN KEY (user_id) REFERENCES users(name);
+INSERT INTO appservice_stream_position VALUES ('X', 0);
+INSERT INTO event_push_summary_last_receipt_stream_id VALUES ('X', 0);
+INSERT INTO event_push_summary_stream_ordering VALUES ('X', 0);
+INSERT INTO federation_stream_position VALUES ('federation', -1, 'master');
+INSERT INTO federation_stream_position VALUES ('events', -1, 'master');
+INSERT INTO stats_incremental_position VALUES ('X', 1);
+INSERT INTO user_directory_stream_pos VALUES ('X', 1);
+SELECT pg_catalog.setval('account_data_sequence', 1, true);
+SELECT pg_catalog.setval('application_services_txn_id_seq', 1, false);
+SELECT pg_catalog.setval('cache_invalidation_stream_seq', 1, true);
+SELECT pg_catalog.setval('device_inbox_sequence', 1, true);
+SELECT pg_catalog.setval('event_auth_chain_id', 1, false);
+SELECT pg_catalog.setval('events_backfill_stream_seq', 1, true);
+SELECT pg_catalog.setval('events_stream_seq', 1, true);
+SELECT pg_catalog.setval('instance_map_instance_id_seq', 1, false);
+SELECT pg_catalog.setval('presence_stream_sequence', 1, true);
+SELECT pg_catalog.setval('receipts_sequence', 1, true);
+SELECT pg_catalog.setval('user_id_seq', 1, false);
diff --git a/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite
new file mode 100644
index 0000000000..d403baf1fb
--- /dev/null
+++ b/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite
@@ -0,0 +1,646 @@
+CREATE TABLE application_services_txns( as_id TEXT NOT NULL, txn_id INTEGER NOT NULL, event_ids TEXT NOT NULL, UNIQUE(as_id, txn_id) );
+CREATE INDEX application_services_txns_id ON application_services_txns ( as_id );
+CREATE TABLE presence( user_id TEXT NOT NULL, state VARCHAR(20), status_msg TEXT, mtime BIGINT, UNIQUE (user_id) );
+CREATE TABLE users( name TEXT, password_hash TEXT, creation_ts BIGINT, admin SMALLINT DEFAULT 0 NOT NULL, upgrade_ts BIGINT, is_guest SMALLINT DEFAULT 0 NOT NULL, appservice_id TEXT, consent_version TEXT, consent_server_notice_sent TEXT, user_type TEXT DEFAULT NULL, deactivated SMALLINT DEFAULT 0 NOT NULL, shadow_banned BOOLEAN, consent_ts bigint, UNIQUE(name) );
+CREATE TABLE user_ips ( user_id TEXT NOT NULL, access_token TEXT NOT NULL, device_id TEXT, ip TEXT NOT NULL, user_agent TEXT NOT NULL, last_seen BIGINT NOT NULL );
+CREATE TABLE profiles( user_id TEXT NOT NULL, displayname TEXT, avatar_url TEXT, UNIQUE(user_id) );
+CREATE TABLE received_transactions( transaction_id TEXT, origin TEXT, ts BIGINT, response_code INTEGER, response_json bytea, has_been_referenced smallint default 0, UNIQUE (transaction_id, origin) );
+CREATE TABLE destinations( destination TEXT PRIMARY KEY, retry_last_ts BIGINT, retry_interval INTEGER , failure_ts BIGINT, last_successful_stream_ordering BIGINT);
+CREATE TABLE events( stream_ordering INTEGER PRIMARY KEY, topological_ordering BIGINT NOT NULL, event_id TEXT NOT NULL, type TEXT NOT NULL, room_id TEXT NOT NULL, content TEXT, unrecognized_keys TEXT, processed BOOL NOT NULL, outlier BOOL NOT NULL, depth BIGINT DEFAULT 0 NOT NULL, origin_server_ts BIGINT, received_ts BIGINT, sender TEXT, contains_url BOOLEAN, instance_name TEXT, state_key TEXT DEFAULT NULL, rejection_reason TEXT DEFAULT NULL, UNIQUE (event_id) );
+CREATE INDEX events_order_room ON events ( room_id, topological_ordering, stream_ordering );
+CREATE TABLE event_json( event_id TEXT NOT NULL, room_id TEXT NOT NULL, internal_metadata TEXT NOT NULL, json TEXT NOT NULL, format_version INTEGER, UNIQUE (event_id) );
+CREATE TABLE state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, prev_state TEXT, UNIQUE (event_id) );
+CREATE TABLE current_state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, membership TEXT, UNIQUE (event_id), UNIQUE (room_id, type, state_key) );
+CREATE TABLE room_memberships( event_id TEXT NOT NULL, user_id TEXT NOT NULL, sender TEXT NOT NULL, room_id TEXT NOT NULL, membership TEXT NOT NULL, forgotten INTEGER DEFAULT 0, display_name TEXT, avatar_url TEXT, UNIQUE (event_id) );
+CREATE INDEX room_memberships_room_id ON room_memberships (room_id);
+CREATE INDEX room_memberships_user_id ON room_memberships (user_id);
+CREATE TABLE rooms( room_id TEXT PRIMARY KEY NOT NULL, is_public BOOL, creator TEXT , room_version TEXT, has_auth_chain_index BOOLEAN);
+CREATE TABLE server_signature_keys( server_name TEXT, key_id TEXT, from_server TEXT, ts_added_ms BIGINT, verify_key bytea, ts_valid_until_ms BIGINT, UNIQUE (server_name, key_id) );
+CREATE TABLE rejections( event_id TEXT NOT NULL, reason TEXT NOT NULL, last_check TEXT NOT NULL, UNIQUE (event_id) );
+CREATE TABLE push_rules ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, priority_class SMALLINT NOT NULL, priority INTEGER NOT NULL DEFAULT 0, conditions TEXT NOT NULL, actions TEXT NOT NULL, UNIQUE(user_name, rule_id) );
+CREATE INDEX push_rules_user_name on push_rules (user_name);
+CREATE TABLE push_rules_enable ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, enabled SMALLINT, UNIQUE(user_name, rule_id) );
+CREATE INDEX push_rules_enable_user_name on push_rules_enable (user_name);
+CREATE TABLE event_forward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) );
+CREATE INDEX ev_extrem_room ON event_forward_extremities(room_id);
+CREATE INDEX ev_extrem_id ON event_forward_extremities(event_id);
+CREATE TABLE event_backward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) );
+CREATE INDEX ev_b_extrem_room ON event_backward_extremities(room_id);
+CREATE INDEX ev_b_extrem_id ON event_backward_extremities(event_id);
+CREATE TABLE room_depth( room_id TEXT NOT NULL, min_depth INTEGER NOT NULL, UNIQUE (room_id) );
+CREATE TABLE event_to_state_groups( event_id TEXT NOT NULL, state_group BIGINT NOT NULL, UNIQUE (event_id) );
+CREATE TABLE local_media_repository ( media_id TEXT, media_type TEXT, media_length INTEGER, created_ts BIGINT, upload_name TEXT, user_id TEXT, quarantined_by TEXT, url_cache TEXT, last_access_ts BIGINT, safe_from_quarantine BOOLEAN NOT NULL DEFAULT 0, UNIQUE (media_id) );
+CREATE TABLE remote_media_cache ( media_origin TEXT, media_id TEXT, media_type TEXT, created_ts BIGINT, upload_name TEXT, media_length INTEGER, filesystem_id TEXT, last_access_ts BIGINT, quarantined_by TEXT, UNIQUE (media_origin, media_id) );
+CREATE TABLE redactions ( event_id TEXT NOT NULL, redacts TEXT NOT NULL, have_censored BOOL NOT NULL DEFAULT false, received_ts BIGINT, UNIQUE (event_id) );
+CREATE INDEX redactions_redacts ON redactions (redacts);
+CREATE TABLE room_aliases( room_alias TEXT NOT NULL, room_id TEXT NOT NULL, creator TEXT, UNIQUE (room_alias) );
+CREATE INDEX room_aliases_id ON room_aliases(room_id);
+CREATE TABLE room_alias_servers( room_alias TEXT NOT NULL, server TEXT NOT NULL );
+CREATE INDEX room_alias_servers_alias ON room_alias_servers(room_alias);
+CREATE TABLE IF NOT EXISTS "server_keys_json" ( server_name TEXT NOT NULL, key_id TEXT NOT NULL, from_server TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, ts_valid_until_ms BIGINT NOT NULL, key_json bytea NOT NULL, CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server) );
+CREATE TABLE e2e_device_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id) );
+CREATE TABLE e2e_one_time_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, algorithm TEXT NOT NULL, key_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id) );
+CREATE TABLE IF NOT EXISTS "user_threepids" ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, validated_at BIGINT NOT NULL, added_at BIGINT NOT NULL, CONSTRAINT medium_address UNIQUE (medium, address) );
+CREATE INDEX user_threepids_user_id ON user_threepids(user_id);
+CREATE VIRTUAL TABLE event_search USING fts4 ( event_id, room_id, sender, key, value )
+/* event_search(event_id,room_id,sender,"key",value) */;
+CREATE TABLE room_tags( user_id TEXT NOT NULL, room_id TEXT NOT NULL, tag     TEXT NOT NULL, content TEXT NOT NULL, CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) );
+CREATE TABLE room_tags_revisions ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, stream_id BIGINT NOT NULL, instance_name TEXT, CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id) );
+CREATE TABLE account_data( user_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, instance_name TEXT, CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type) );
+CREATE TABLE room_account_data( user_id TEXT NOT NULL, room_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, instance_name TEXT, CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type) );
+CREATE INDEX account_data_stream_id on account_data(user_id, stream_id);
+CREATE INDEX room_account_data_stream_id on room_account_data(user_id, stream_id);
+CREATE INDEX events_ts ON events(origin_server_ts, stream_ordering);
+CREATE TABLE event_push_actions( room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, profile_tag VARCHAR(32), actions TEXT NOT NULL, topological_ordering BIGINT, stream_ordering BIGINT, notif SMALLINT, highlight SMALLINT, unread SMALLINT, thread_id TEXT, CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) );
+CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
+CREATE INDEX events_room_stream on events(room_id, stream_ordering);
+CREATE INDEX public_room_index on rooms(is_public);
+CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering );
+CREATE TABLE presence_stream( stream_id BIGINT, user_id TEXT, state TEXT, last_active_ts BIGINT, last_federation_update_ts BIGINT, last_user_sync_ts BIGINT, status_msg TEXT, currently_active BOOLEAN , instance_name TEXT);
+CREATE INDEX presence_stream_id ON presence_stream(stream_id, user_id);
+CREATE INDEX presence_stream_user_id ON presence_stream(user_id);
+CREATE TABLE push_rules_stream( stream_id BIGINT NOT NULL, event_stream_ordering BIGINT NOT NULL, user_id TEXT NOT NULL, rule_id TEXT NOT NULL, op TEXT NOT NULL, priority_class SMALLINT, priority INTEGER, conditions TEXT, actions TEXT );
+CREATE INDEX push_rules_stream_id ON push_rules_stream(stream_id);
+CREATE INDEX push_rules_stream_user_stream_id on push_rules_stream(user_id, stream_id);
+CREATE TABLE ex_outlier_stream( event_stream_ordering BIGINT PRIMARY KEY NOT NULL, event_id TEXT NOT NULL, state_group BIGINT NOT NULL , instance_name TEXT);
+CREATE TABLE threepid_guest_access_tokens( medium TEXT, address TEXT, guest_access_token TEXT, first_inviter TEXT );
+CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens(medium, address);
+CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id );
+CREATE TABLE open_id_tokens ( token TEXT NOT NULL PRIMARY KEY, ts_valid_until_ms bigint NOT NULL, user_id TEXT NOT NULL, UNIQUE (token) );
+CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens(ts_valid_until_ms);
+CREATE TABLE pusher_throttle( pusher BIGINT NOT NULL, room_id TEXT NOT NULL, last_sent_ts BIGINT, throttle_ms BIGINT, PRIMARY KEY (pusher, room_id) );
+CREATE TABLE event_reports( id BIGINT NOT NULL PRIMARY KEY, received_ts BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, reason TEXT, content TEXT );
+CREATE TABLE appservice_stream_position( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT, CHECK (Lock='X') );
+CREATE TABLE device_inbox ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, stream_id BIGINT NOT NULL, message_json TEXT NOT NULL , instance_name TEXT);
+CREATE INDEX device_inbox_user_stream_id ON device_inbox(user_id, device_id, stream_id);
+CREATE INDEX received_transactions_ts ON received_transactions(ts);
+CREATE TABLE device_federation_outbox ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, queued_ts BIGINT NOT NULL, messages_json TEXT NOT NULL , instance_name TEXT);
+CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox(destination, stream_id);
+CREATE TABLE device_federation_inbox ( origin TEXT NOT NULL, message_id TEXT NOT NULL, received_ts BIGINT NOT NULL , instance_name TEXT);
+CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox(origin, message_id);
+CREATE TABLE stream_ordering_to_exterm ( stream_ordering BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL );
+CREATE INDEX stream_ordering_to_exterm_idx on stream_ordering_to_exterm( stream_ordering );
+CREATE INDEX stream_ordering_to_exterm_rm_idx on stream_ordering_to_exterm( room_id, stream_ordering );
+CREATE TABLE IF NOT EXISTS "event_auth"( event_id TEXT NOT NULL, auth_id TEXT NOT NULL, room_id TEXT NOT NULL );
+CREATE INDEX evauth_edges_id ON event_auth(event_id);
+CREATE INDEX user_threepids_medium_address on user_threepids (medium, address);
+CREATE TABLE appservice_room_list( appservice_id TEXT NOT NULL, network_id TEXT NOT NULL, room_id TEXT NOT NULL );
+CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list( appservice_id, network_id, room_id );
+CREATE INDEX device_federation_outbox_id ON device_federation_outbox(stream_id);
+CREATE TABLE federation_stream_position( type TEXT NOT NULL, stream_id INTEGER NOT NULL , instance_name TEXT NOT NULL DEFAULT 'master');
+CREATE TABLE device_lists_remote_cache ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, content TEXT NOT NULL );
+CREATE TABLE device_lists_remote_extremeties ( user_id TEXT NOT NULL, stream_id TEXT NOT NULL );
+CREATE TABLE device_lists_stream ( stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL );
+CREATE INDEX device_lists_stream_id ON device_lists_stream(stream_id, user_id);
+CREATE TABLE device_lists_outbound_pokes ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL, sent BOOLEAN NOT NULL, ts BIGINT NOT NULL , opentracing_context TEXT);
+CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes(destination, stream_id);
+CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes(destination, user_id);
+CREATE TABLE event_push_summary ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, notif_count BIGINT NOT NULL, stream_ordering BIGINT NOT NULL , unread_count BIGINT, last_receipt_stream_ordering BIGINT, thread_id TEXT);
+CREATE TABLE event_push_summary_stream_ordering ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT NOT NULL, CHECK (Lock='X') );
+CREATE TABLE IF NOT EXISTS "pushers" ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, access_token BIGINT DEFAULT NULL, profile_tag TEXT NOT NULL, kind TEXT NOT NULL, app_id TEXT NOT NULL, app_display_name TEXT NOT NULL, device_display_name TEXT NOT NULL, pushkey TEXT NOT NULL, ts BIGINT NOT NULL, lang TEXT, data TEXT, last_stream_ordering INTEGER, last_success BIGINT, failing_since BIGINT, UNIQUE (app_id, pushkey, user_name) );
+CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes(stream_id);
+CREATE TABLE ratelimit_override ( user_id TEXT NOT NULL, messages_per_second BIGINT, burst_count BIGINT );
+CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override(user_id);
+CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, event_id TEXT, prev_event_id TEXT , instance_name TEXT);
+CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id);
+CREATE TABLE user_directory_stream_pos ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT, CHECK (Lock='X') );
+CREATE VIRTUAL TABLE user_directory_search USING fts4 ( user_id, value )
+/* user_directory_search(user_id,value) */;
+CREATE TABLE blocked_rooms ( room_id TEXT NOT NULL, user_id TEXT NOT NULL );
+CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms(room_id);
+CREATE TABLE IF NOT EXISTS "local_media_repository_url_cache"( url TEXT, response_code INTEGER, etag TEXT, expires_ts BIGINT, og TEXT, media_id TEXT, download_ts BIGINT );
+CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts);
+CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache(url, download_ts);
+CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache(media_id);
+CREATE TABLE IF NOT EXISTS "deleted_pushers" ( stream_id BIGINT NOT NULL, app_id TEXT NOT NULL, pushkey TEXT NOT NULL, user_id TEXT NOT NULL );
+CREATE INDEX deleted_pushers_stream_id ON deleted_pushers (stream_id);
+CREATE TABLE IF NOT EXISTS "user_directory" ( user_id TEXT NOT NULL, room_id TEXT, display_name TEXT, avatar_url TEXT );
+CREATE INDEX user_directory_room_idx ON user_directory(room_id);
+CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id);
+CREATE TABLE event_push_actions_staging ( event_id TEXT NOT NULL, user_id TEXT NOT NULL, actions TEXT NOT NULL, notif SMALLINT NOT NULL, highlight SMALLINT NOT NULL , unread SMALLINT, thread_id TEXT);
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id);
+CREATE TABLE users_pending_deactivation ( user_id TEXT NOT NULL );
+CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, device_id TEXT, timestamp BIGINT NOT NULL , user_agent TEXT);
+CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp);
+CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp);
+CREATE TABLE erased_users ( user_id TEXT NOT NULL );
+CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id);
+CREATE TABLE monthly_active_users ( user_id TEXT NOT NULL, timestamp BIGINT NOT NULL );
+CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id);
+CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp);
+CREATE TABLE IF NOT EXISTS "e2e_room_keys_versions" ( user_id TEXT NOT NULL, version BIGINT NOT NULL, algorithm TEXT NOT NULL, auth_data TEXT NOT NULL, deleted SMALLINT DEFAULT 0 NOT NULL , etag BIGINT);
+CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version);
+CREATE TABLE IF NOT EXISTS "e2e_room_keys" ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, session_id TEXT NOT NULL, version BIGINT NOT NULL, first_message_index INT, forwarded_count INT, is_verified BOOLEAN, session_data TEXT NOT NULL );
+CREATE TABLE users_who_share_private_rooms ( user_id TEXT NOT NULL, other_user_id TEXT NOT NULL, room_id TEXT NOT NULL );
+CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id);
+CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id);
+CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id);
+CREATE TABLE user_threepid_id_server ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, id_server TEXT NOT NULL );
+CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server( user_id, medium, address, id_server );
+CREATE TABLE users_in_public_rooms ( user_id TEXT NOT NULL, room_id TEXT NOT NULL );
+CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id);
+CREATE TABLE account_validity ( user_id TEXT PRIMARY KEY, expiration_ts_ms BIGINT NOT NULL, email_sent BOOLEAN NOT NULL, renewal_token TEXT , token_used_ts_ms BIGINT);
+CREATE TABLE event_relations ( event_id TEXT NOT NULL, relates_to_id TEXT NOT NULL, relation_type TEXT NOT NULL, aggregation_key TEXT );
+CREATE UNIQUE INDEX event_relations_id ON event_relations(event_id);
+CREATE INDEX event_relations_relates ON event_relations(relates_to_id, relation_type, aggregation_key);
+CREATE TABLE room_stats_earliest_token ( room_id TEXT NOT NULL, token BIGINT NOT NULL );
+CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token(room_id);
+CREATE INDEX user_ips_device_id ON user_ips (user_id, device_id, last_seen);
+CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering);
+CREATE INDEX device_inbox_stream_id_user_id ON device_inbox (stream_id, user_id);
+CREATE INDEX device_lists_stream_user_id ON device_lists_stream (user_id, device_id);
+CREATE INDEX user_ips_last_seen ON user_ips (user_id, last_seen);
+CREATE INDEX user_ips_last_seen_only ON user_ips (last_seen);
+CREATE INDEX users_creation_ts ON users (creation_ts);
+CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups (state_group);
+CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache (user_id, device_id);
+CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties (user_id);
+CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips (user_id, access_token, ip);
+CREATE TABLE threepid_validation_session (
+    session_id TEXT PRIMARY KEY,
+    medium TEXT NOT NULL,
+    address TEXT NOT NULL,
+    client_secret TEXT NOT NULL,
+    last_send_attempt BIGINT NOT NULL,
+    validated_at BIGINT
+);
+CREATE TABLE threepid_validation_token (
+    token TEXT PRIMARY KEY,
+    session_id TEXT NOT NULL,
+    next_link TEXT,
+    expires BIGINT NOT NULL
+);
+CREATE INDEX threepid_validation_token_session_id ON threepid_validation_token(session_id);
+CREATE TABLE event_expiry (
+    event_id TEXT PRIMARY KEY,
+    expiry_ts BIGINT NOT NULL
+);
+CREATE INDEX event_expiry_expiry_ts_idx ON event_expiry(expiry_ts);
+CREATE TABLE event_labels (
+    event_id TEXT,
+    label TEXT,
+    room_id TEXT NOT NULL,
+    topological_ordering BIGINT NOT NULL,
+    PRIMARY KEY(event_id, label)
+);
+CREATE INDEX event_labels_room_id_label_idx ON event_labels(room_id, label, topological_ordering);
+CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys(user_id, version, room_id, session_id);
+CREATE TABLE IF NOT EXISTS "devices" (
+    user_id TEXT NOT NULL,
+    device_id TEXT NOT NULL,
+    display_name TEXT,
+    last_seen BIGINT,
+    ip TEXT,
+    user_agent TEXT,
+    hidden BOOLEAN DEFAULT 0,
+    CONSTRAINT device_uniqueness UNIQUE (user_id, device_id)
+);
+CREATE TABLE room_retention(
+    room_id TEXT,
+    event_id TEXT,
+    min_lifetime BIGINT,
+    max_lifetime BIGINT,
+
+    PRIMARY KEY(room_id, event_id)
+);
+CREATE INDEX room_retention_max_lifetime_idx on room_retention(max_lifetime);
+CREATE TABLE e2e_cross_signing_keys (
+    user_id TEXT NOT NULL,
+    -- the type of cross-signing key (master, user_signing, or self_signing)
+    keytype TEXT NOT NULL,
+    -- the full key information, as a json-encoded dict
+    keydata TEXT NOT NULL,
+    -- for keeping the keys in order, so that we can fetch the latest one
+    stream_id BIGINT NOT NULL
+);
+CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, stream_id);
+CREATE TABLE e2e_cross_signing_signatures (
+    -- user who did the signing
+    user_id TEXT NOT NULL,
+    -- key used to sign
+    key_id TEXT NOT NULL,
+    -- user who was signed
+    target_user_id TEXT NOT NULL,
+    -- device/key that was signed
+    target_device_id TEXT NOT NULL,
+    -- the actual signature
+    signature TEXT NOT NULL
+);
+CREATE TABLE user_signature_stream (
+    -- uses the same stream ID as device list stream
+    stream_id BIGINT NOT NULL,
+    -- user who did the signing
+    from_user_id TEXT NOT NULL,
+    -- list of users who were signed, as a JSON array
+    user_ids TEXT NOT NULL
+);
+CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream(stream_id);
+CREATE INDEX e2e_cross_signing_signatures2_idx ON e2e_cross_signing_signatures(user_id, target_user_id, target_device_id);
+CREATE TABLE stats_incremental_position (
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    stream_id  BIGINT NOT NULL,
+    CHECK (Lock='X')
+);
+CREATE TABLE room_stats_current (
+    room_id TEXT NOT NULL PRIMARY KEY,
+
+    -- These are absolute counts
+    current_state_events INT NOT NULL,
+    joined_members INT NOT NULL,
+    invited_members INT NOT NULL,
+    left_members INT NOT NULL,
+    banned_members INT NOT NULL,
+
+    local_users_in_room INT NOT NULL,
+
+    -- The maximum delta stream position that this row takes into account.
+    completed_delta_stream_id BIGINT NOT NULL
+, knocked_members INT);
+CREATE TABLE user_stats_current (
+    user_id TEXT NOT NULL PRIMARY KEY,
+
+    joined_rooms BIGINT NOT NULL,
+
+    -- The maximum delta stream position that this row takes into account.
+    completed_delta_stream_id BIGINT NOT NULL
+);
+CREATE TABLE room_stats_state (
+    room_id TEXT NOT NULL,
+    name TEXT,
+    canonical_alias TEXT,
+    join_rules TEXT,
+    history_visibility TEXT,
+    encryption TEXT,
+    avatar TEXT,
+    guest_access TEXT,
+    is_federatable BOOLEAN,
+    topic TEXT
+, room_type TEXT);
+CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state(room_id);
+CREATE TABLE IF NOT EXISTS "user_filters" ( user_id TEXT NOT NULL, filter_id BIGINT NOT NULL, filter_json BYTEA NOT NULL );
+CREATE UNIQUE INDEX user_filters_unique ON "user_filters" (user_id, filter_id);
+CREATE TABLE user_external_ids (
+    auth_provider TEXT NOT NULL,
+    external_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    UNIQUE (auth_provider, external_id)
+);
+CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms(room_id);
+CREATE TABLE device_lists_remote_resync (
+    user_id TEXT NOT NULL,
+    added_ts BIGINT NOT NULL
+);
+CREATE UNIQUE INDEX device_lists_remote_resync_idx ON device_lists_remote_resync (user_id);
+CREATE INDEX device_lists_remote_resync_ts_idx ON device_lists_remote_resync (added_ts);
+CREATE TABLE local_current_membership (
+            room_id TEXT NOT NULL,
+            user_id TEXT NOT NULL,
+            event_id TEXT NOT NULL,
+            membership TEXT NOT NULL
+        );
+CREATE UNIQUE INDEX local_current_membership_idx ON local_current_membership(user_id, room_id);
+CREATE INDEX local_current_membership_room_idx ON local_current_membership(room_id);
+CREATE TABLE ui_auth_sessions(
+    session_id TEXT NOT NULL,  -- The session ID passed to the client.
+    creation_time BIGINT NOT NULL,  -- The time this session was created (epoch time in milliseconds).
+    serverdict TEXT NOT NULL,  -- A JSON dictionary of arbitrary data added by Synapse.
+    clientdict TEXT NOT NULL,  -- A JSON dictionary of arbitrary data from the client.
+    uri TEXT NOT NULL,  -- The URI the UI authentication session is using.
+    method TEXT NOT NULL,  -- The HTTP method the UI authentication session is using.
+    -- The clientdict, uri, and method make up an tuple that must be immutable
+    -- throughout the lifetime of the UI Auth session.
+    description TEXT NOT NULL,  -- A human readable description of the operation which caused the UI Auth flow to occur.
+    UNIQUE (session_id)
+);
+CREATE TABLE ui_auth_sessions_credentials(
+    session_id TEXT NOT NULL,  -- The corresponding UI Auth session.
+    stage_type TEXT NOT NULL,  -- The stage type.
+    result TEXT NOT NULL,  -- The result of the stage verification, stored as JSON.
+    UNIQUE (session_id, stage_type),
+    FOREIGN KEY (session_id)
+        REFERENCES ui_auth_sessions (session_id)
+);
+CREATE TABLE IF NOT EXISTS "device_lists_outbound_last_success" ( destination TEXT NOT NULL, user_id TEXT NOT NULL, stream_id BIGINT NOT NULL );
+CREATE UNIQUE INDEX device_lists_outbound_last_success_unique_idx ON "device_lists_outbound_last_success" (destination, user_id);
+CREATE TABLE IF NOT EXISTS "local_media_repository_thumbnails" ( media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_type TEXT, thumbnail_method TEXT, thumbnail_length INTEGER, UNIQUE ( media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method ) );
+CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails (media_id);
+CREATE TABLE IF NOT EXISTS "remote_media_cache_thumbnails" ( media_origin TEXT, media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_method TEXT, thumbnail_type TEXT, thumbnail_length INTEGER, filesystem_id TEXT, UNIQUE ( media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method ) );
+CREATE TABLE ui_auth_sessions_ips(
+    session_id TEXT NOT NULL,
+    ip TEXT NOT NULL,
+    user_agent TEXT NOT NULL,
+    UNIQUE (session_id, ip, user_agent),
+    FOREIGN KEY (session_id)
+        REFERENCES ui_auth_sessions (session_id)
+);
+CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position(type, instance_name);
+CREATE TABLE dehydrated_devices(
+    user_id TEXT NOT NULL PRIMARY KEY,
+    device_id TEXT NOT NULL,
+    device_data TEXT NOT NULL -- JSON-encoded client-defined data
+);
+CREATE TABLE e2e_fallback_keys_json (
+    user_id TEXT NOT NULL, -- The user this fallback key is for.
+    device_id TEXT NOT NULL, -- The device this fallback key is for.
+    algorithm TEXT NOT NULL, -- Which algorithm this fallback key is for.
+    key_id TEXT NOT NULL, -- An id for suppressing duplicate uploads.
+    key_json TEXT NOT NULL, -- The key as a JSON blob.
+    used BOOLEAN NOT NULL DEFAULT FALSE, -- Whether the key has been used or not.
+    CONSTRAINT e2e_fallback_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm)
+);
+CREATE TABLE destination_rooms (
+  -- the destination in question.
+  destination TEXT NOT NULL REFERENCES destinations (destination),
+  -- the ID of the room in question
+  room_id TEXT NOT NULL REFERENCES rooms (room_id),
+  -- the stream_ordering of the event
+  stream_ordering BIGINT NOT NULL,
+  PRIMARY KEY (destination, room_id)
+  -- We don't declare a foreign key on stream_ordering here because that'd mean
+  -- we'd need to either maintain an index (expensive) or do a table scan of
+  -- destination_rooms whenever we delete an event (also potentially expensive).
+  -- In addition to that, a foreign key on stream_ordering would be redundant
+  -- as this row doesn't need to refer to a specific event; if the event gets
+  -- deleted then it doesn't affect the validity of the stream_ordering here.
+);
+CREATE INDEX destination_rooms_room_id
+    ON destination_rooms (room_id);
+CREATE TABLE stream_positions (
+    stream_name TEXT NOT NULL,
+    instance_name TEXT NOT NULL,
+    stream_id BIGINT NOT NULL
+);
+CREATE UNIQUE INDEX stream_positions_idx ON stream_positions(stream_name, instance_name);
+CREATE TABLE IF NOT EXISTS "access_tokens" (
+    id BIGINT PRIMARY KEY, 
+    user_id TEXT NOT NULL, 
+    device_id TEXT, 
+    token TEXT NOT NULL,
+    valid_until_ms BIGINT,
+    puppets_user_id TEXT,
+    last_validated BIGINT, refresh_token_id BIGINT REFERENCES refresh_tokens (id) ON DELETE CASCADE, used BOOLEAN,
+    UNIQUE(token) 
+);
+CREATE INDEX access_tokens_device_id ON access_tokens (user_id, device_id);
+CREATE TABLE IF NOT EXISTS "event_txn_id" (
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    token_id BIGINT NOT NULL,
+    txn_id TEXT NOT NULL,
+    inserted_ts BIGINT NOT NULL,
+    FOREIGN KEY (event_id)
+        REFERENCES events (event_id) ON DELETE CASCADE,
+    FOREIGN KEY (token_id)
+        REFERENCES access_tokens (id) ON DELETE CASCADE
+);
+CREATE UNIQUE INDEX event_txn_id_event_id ON event_txn_id(event_id);
+CREATE UNIQUE INDEX event_txn_id_txn_id ON event_txn_id(room_id, user_id, token_id, txn_id);
+CREATE INDEX event_txn_id_ts ON event_txn_id(inserted_ts);
+CREATE TABLE ignored_users( ignorer_user_id TEXT NOT NULL, ignored_user_id TEXT NOT NULL );
+CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users (ignorer_user_id, ignored_user_id);
+CREATE INDEX ignored_users_ignored_user_id ON ignored_users (ignored_user_id);
+CREATE TABLE event_auth_chains (
+  event_id TEXT PRIMARY KEY,
+  chain_id BIGINT NOT NULL,
+  sequence_number BIGINT NOT NULL
+);
+CREATE UNIQUE INDEX event_auth_chains_c_seq_index ON event_auth_chains (chain_id, sequence_number);
+CREATE TABLE event_auth_chain_links (
+  origin_chain_id BIGINT NOT NULL,
+  origin_sequence_number BIGINT NOT NULL,
+
+  target_chain_id BIGINT NOT NULL,
+  target_sequence_number BIGINT NOT NULL
+);
+CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links (origin_chain_id, target_chain_id);
+CREATE TABLE event_auth_chain_to_calculate (
+  event_id TEXT PRIMARY KEY,
+  room_id TEXT NOT NULL,
+  type TEXT NOT NULL,
+  state_key TEXT NOT NULL
+);
+CREATE INDEX event_auth_chain_to_calculate_rm_id ON event_auth_chain_to_calculate(room_id);
+CREATE TABLE users_to_send_full_presence_to(
+    -- The user ID to send full presence to.
+    user_id TEXT PRIMARY KEY,
+    -- A presence stream ID token - the current presence stream token when the row was last upserted.
+    -- If a user calls /sync and this token is part of the update they're to receive, we also include
+    -- full user presence in the response.
+    -- This allows multiple devices for a user to receive full presence whenever they next call /sync.
+    presence_stream_id BIGINT,
+    FOREIGN KEY (user_id)
+        REFERENCES users (name)
+);
+CREATE TABLE refresh_tokens (
+  id BIGINT PRIMARY KEY,
+  user_id TEXT NOT NULL,
+  device_id TEXT NOT NULL,
+  token TEXT NOT NULL,
+  -- When consumed, a new refresh token is generated, which is tracked by
+  -- this foreign key
+  next_token_id BIGINT REFERENCES refresh_tokens (id) ON DELETE CASCADE, expiry_ts BIGINT DEFAULT NULL, ultimate_session_expiry_ts BIGINT DEFAULT NULL,
+  UNIQUE(token)
+);
+CREATE TABLE worker_locks (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- We write the instance name to ease manual debugging, we don't ever read
+    -- from it.
+    -- Note: instance names aren't guarenteed to be unique.
+    instance_name TEXT NOT NULL,
+    -- A random string generated each time an instance takes out a lock. Used by
+    -- the instance to tell whether the lock is still held by it (e.g. in the
+    -- case where the process stalls for a long time the lock may time out and
+    -- be taken out by another instance, at which point the original instance
+    -- can tell it no longer holds the lock as the tokens no longer match).
+    token TEXT NOT NULL,
+    last_renewed_ts BIGINT NOT NULL
+);
+CREATE UNIQUE INDEX worker_locks_key ON worker_locks (lock_name, lock_key);
+CREATE TABLE federation_inbound_events_staging (
+    origin TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    received_ts BIGINT NOT NULL,
+    event_json TEXT NOT NULL,
+    internal_metadata TEXT NOT NULL
+);
+CREATE INDEX federation_inbound_events_staging_room ON federation_inbound_events_staging(room_id, received_ts);
+CREATE UNIQUE INDEX federation_inbound_events_staging_instance_event ON federation_inbound_events_staging(origin, event_id);
+CREATE TABLE insertion_event_edges(
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    insertion_prev_event_id TEXT NOT NULL
+);
+CREATE INDEX insertion_event_edges_insertion_room_id ON insertion_event_edges(room_id);
+CREATE INDEX insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id);
+CREATE TABLE insertion_event_extremities(
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL
+);
+CREATE UNIQUE INDEX insertion_event_extremities_event_id ON insertion_event_extremities(event_id);
+CREATE INDEX insertion_event_extremities_room_id ON insertion_event_extremities(room_id);
+CREATE TABLE registration_tokens(
+    token TEXT NOT NULL,  -- The token that can be used for authentication.
+    uses_allowed INT,  -- The total number of times this token can be used. NULL if no limit.
+    pending INT NOT NULL, -- The number of in progress registrations using this token.
+    completed INT NOT NULL, -- The number of times this token has been used to complete a registration.
+    expiry_time BIGINT,  -- The latest time this token will be valid (epoch time in milliseconds). NULL if token doesn't expire.
+    UNIQUE (token)
+);
+CREATE TABLE sessions(
+    session_type TEXT NOT NULL,  -- The unique key for this type of session.
+    session_id TEXT NOT NULL,  -- The session ID passed to the client.
+    value TEXT NOT NULL, -- A JSON dictionary to persist.
+    expiry_time_ms BIGINT NOT NULL,  -- The time this session will expire (epoch time in milliseconds).
+    UNIQUE (session_type, session_id)
+);
+CREATE TABLE insertion_events(
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    next_batch_id TEXT NOT NULL
+);
+CREATE UNIQUE INDEX insertion_events_event_id ON insertion_events(event_id);
+CREATE INDEX insertion_events_next_batch_id ON insertion_events(next_batch_id);
+CREATE TABLE batch_events(
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    batch_id TEXT NOT NULL
+);
+CREATE UNIQUE INDEX batch_events_event_id ON batch_events(event_id);
+CREATE INDEX batch_events_batch_id ON batch_events(batch_id);
+CREATE INDEX insertion_event_edges_event_id ON insertion_event_edges(event_id);
+CREATE TABLE device_auth_providers (
+  user_id TEXT NOT NULL,
+  device_id TEXT NOT NULL,
+  auth_provider_id TEXT NOT NULL,
+  auth_provider_session_id TEXT NOT NULL
+);
+CREATE INDEX device_auth_providers_devices
+  ON device_auth_providers (user_id, device_id);
+CREATE INDEX device_auth_providers_sessions
+  ON device_auth_providers (auth_provider_id, auth_provider_session_id);
+CREATE INDEX refresh_tokens_next_token_id
+    ON refresh_tokens(next_token_id)
+    WHERE next_token_id IS NOT NULL;
+CREATE TABLE partial_state_rooms (
+    room_id TEXT PRIMARY KEY,
+    FOREIGN KEY(room_id) REFERENCES rooms(room_id)
+);
+CREATE TABLE partial_state_rooms_servers (
+    room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id),
+    server_name TEXT NOT NULL,
+    UNIQUE(room_id, server_name)
+);
+CREATE TABLE partial_state_events (
+    -- the room_id is denormalised for efficient indexing (the canonical source is `events`)
+    room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id),
+    event_id TEXT NOT NULL REFERENCES events(event_id),
+    UNIQUE(event_id)
+);
+CREATE INDEX partial_state_events_room_id_idx
+     ON partial_state_events (room_id);
+CREATE TRIGGER partial_state_events_bad_room_id
+            BEFORE INSERT ON partial_state_events
+            FOR EACH ROW
+            BEGIN
+                SELECT RAISE(ABORT, 'Incorrect room_id in partial_state_events')
+                WHERE EXISTS (
+                    SELECT 1 FROM events
+                    WHERE events.event_id = NEW.event_id
+                       AND events.room_id != NEW.room_id
+                );
+            END;
+CREATE TABLE device_lists_changes_in_room (
+    user_id TEXT NOT NULL,
+    device_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+
+    -- This initially matches `device_lists_stream.stream_id`. Note that we
+    -- delete older values from `device_lists_stream`, so we can't use a foreign
+    -- constraint here.
+    --
+    -- The table will contain rows with the same `stream_id` but different
+    -- `room_id`, as for each device update we store a row per room the user is
+    -- joined to. Therefore `(stream_id, room_id)` gives a unique index.
+    stream_id BIGINT NOT NULL,
+
+    -- We have a background process which goes through this table and converts
+    -- entries into rows in `device_lists_outbound_pokes`. Once we have processed
+    -- a row, we mark it as such by setting `converted_to_destinations=TRUE`.
+    converted_to_destinations BOOLEAN NOT NULL,
+    opentracing_context TEXT
+);
+CREATE UNIQUE INDEX device_lists_changes_in_stream_id ON device_lists_changes_in_room(stream_id, room_id);
+CREATE INDEX device_lists_changes_in_stream_id_unconverted ON device_lists_changes_in_room(stream_id) WHERE NOT converted_to_destinations;
+CREATE TABLE IF NOT EXISTS "event_edges" (
+  event_id TEXT NOT NULL,
+  prev_event_id TEXT NOT NULL,
+  room_id TEXT NULL,
+  is_state BOOL NOT NULL DEFAULT 0,
+  FOREIGN KEY(event_id) REFERENCES events(event_id)
+);
+CREATE UNIQUE INDEX event_edges_event_id_prev_event_id_idx
+  ON event_edges (event_id, prev_event_id);
+CREATE INDEX ev_edges_prev_id ON event_edges (prev_event_id);
+CREATE TABLE event_push_summary_last_receipt_stream_id (
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    stream_id BIGINT NOT NULL,
+    CHECK (Lock='X')
+);
+CREATE TABLE IF NOT EXISTS "application_services_state" (
+    as_id TEXT PRIMARY KEY NOT NULL,
+    state VARCHAR(5),
+    read_receipt_stream_id BIGINT,
+    presence_stream_id BIGINT,
+    to_device_stream_id BIGINT,
+    device_list_stream_id BIGINT
+);
+CREATE TABLE IF NOT EXISTS "receipts_linearized" (
+    stream_id BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    thread_id TEXT,
+    event_stream_ordering BIGINT,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id),
+    CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id)
+);
+CREATE TABLE IF NOT EXISTS "receipts_graph" (
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_ids TEXT NOT NULL,
+    thread_id TEXT,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id),
+    CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id)
+);
+CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id );
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id );
+CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id );
+CREATE INDEX redactions_have_censored_ts ON redactions (received_ts) WHERE NOT have_censored;
+CREATE INDEX room_memberships_user_room_forgotten ON room_memberships (user_id, room_id) WHERE forgotten = 1;
+CREATE INDEX users_have_local_media ON local_media_repository (user_id, created_ts) ;
+CREATE UNIQUE INDEX e2e_cross_signing_keys_stream_idx ON e2e_cross_signing_keys (stream_id) ;
+CREATE INDEX user_external_ids_user_id_idx ON user_external_ids (user_id) ;
+CREATE INDEX presence_stream_state_not_offline_idx ON presence_stream (state) WHERE state != 'offline';
+CREATE UNIQUE INDEX event_push_summary_unique_index ON event_push_summary (user_id, room_id) ;
+CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary (user_id, room_id, thread_id) ;
+CREATE UNIQUE INDEX receipts_graph_unique_index ON receipts_graph (room_id, receipt_type, user_id) WHERE thread_id IS NULL;
+CREATE UNIQUE INDEX receipts_linearized_unique_index ON receipts_linearized (room_id, receipt_type, user_id) WHERE thread_id IS NULL;
+CREATE INDEX event_push_actions_stream_highlight_index ON event_push_actions (highlight, stream_ordering) WHERE highlight=0;
+CREATE INDEX current_state_events_member_index ON current_state_events (state_key) WHERE type='m.room.member';
+CREATE INDEX event_contains_url_index ON events (room_id, topological_ordering, stream_ordering) WHERE contains_url = true AND outlier = false;
+CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering) WHERE highlight=1;
+CREATE INDEX local_media_repository_url_idx ON local_media_repository (created_ts) WHERE url_cache IS NOT NULL;
+INSERT INTO appservice_stream_position VALUES('X',0);
+INSERT INTO federation_stream_position VALUES('federation',-1,'master');
+INSERT INTO federation_stream_position VALUES('events',-1,'master');
+INSERT INTO event_push_summary_stream_ordering VALUES('X',0);
+INSERT INTO user_directory_stream_pos VALUES('X',1);
+INSERT INTO stats_incremental_position VALUES('X',1);
+INSERT INTO event_push_summary_last_receipt_stream_id VALUES('X',0);
diff --git a/synapse/storage/schema/state/full_schemas/72/full.sql.postgres b/synapse/storage/schema/state/full_schemas/72/full.sql.postgres
new file mode 100644
index 0000000000..263ade761e
--- /dev/null
+++ b/synapse/storage/schema/state/full_schemas/72/full.sql.postgres
@@ -0,0 +1,30 @@
+CREATE TABLE state_group_edges (
+    state_group bigint NOT NULL,
+    prev_state_group bigint NOT NULL
+);
+CREATE SEQUENCE state_group_id_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+CREATE TABLE state_groups (
+    id bigint NOT NULL,
+    room_id text NOT NULL,
+    event_id text NOT NULL
+);
+CREATE TABLE state_groups_state (
+    state_group bigint NOT NULL,
+    room_id text NOT NULL,
+    type text NOT NULL,
+    state_key text NOT NULL,
+    event_id text NOT NULL
+);
+ALTER TABLE ONLY state_groups_state ALTER COLUMN state_group SET (n_distinct=-0.02);
+ALTER TABLE ONLY state_groups
+    ADD CONSTRAINT state_groups_pkey PRIMARY KEY (id);
+CREATE INDEX state_group_edges_prev_idx ON state_group_edges USING btree (prev_state_group);
+CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges USING btree (state_group, prev_state_group);
+CREATE INDEX state_groups_room_id_idx ON state_groups USING btree (room_id);
+CREATE INDEX state_groups_state_type_idx ON state_groups_state USING btree (state_group, type, state_key);
+SELECT pg_catalog.setval('state_group_id_seq', 1, false);
diff --git a/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite
new file mode 100644
index 0000000000..dda060b638
--- /dev/null
+++ b/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite
@@ -0,0 +1,20 @@
+CREATE TABLE state_groups (
+    id BIGINT PRIMARY KEY,
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL
+);
+CREATE TABLE state_groups_state (
+    state_group BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    type TEXT NOT NULL,
+    state_key TEXT NOT NULL,
+    event_id TEXT NOT NULL
+);
+CREATE TABLE state_group_edges (
+    state_group BIGINT NOT NULL,
+    prev_state_group BIGINT NOT NULL
+);
+CREATE INDEX state_group_edges_prev_idx ON state_group_edges (prev_state_group);
+CREATE INDEX state_groups_state_type_idx ON state_groups_state (state_group, type, state_key);
+CREATE INDEX state_groups_room_id_idx ON state_groups (room_id) ;
+CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges (state_group, prev_state_group) ;
-- 
cgit 1.5.1


From 2fae1a3f7862bf38cd0b52dfd3ea3ae76794d2b7 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 26 Sep 2022 14:28:12 -0400
Subject: Improve tests for get_unread_push_actions_for_user_in_range_*.
 (#13893)

* Adds a docstring.
* Reduces a small amount of duplicated code.
* Improves tests.
---
 changelog.d/13893.feature                          |  1 +
 .../storage/databases/main/event_push_actions.py   | 38 ++++++----
 tests/storage/test_event_push_actions.py           | 88 ++++++++++++++++++----
 3 files changed, 97 insertions(+), 30 deletions(-)
 create mode 100644 changelog.d/13893.feature

(limited to 'synapse')

diff --git a/changelog.d/13893.feature b/changelog.d/13893.feature
new file mode 100644
index 0000000000..d0cb902dff
--- /dev/null
+++ b/changelog.d/13893.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 6b8668d2dc..f4cdc2e399 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -559,7 +559,18 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
     def _get_receipts_by_room_txn(
         self, txn: LoggingTransaction, user_id: str
-    ) -> List[Tuple[str, int]]:
+    ) -> Dict[str, int]:
+        """
+        Generate a map of room ID to the latest stream ordering that has been
+        read by the given user.
+
+        Args:
+            txn:
+            user_id: The user to fetch receipts for.
+
+        Returns:
+            A map of room ID to stream ordering for all rooms the user has a receipt in.
+        """
         receipt_types_clause, args = make_in_list_sql_clause(
             self.database_engine,
             "receipt_type",
@@ -580,7 +591,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         args.extend((user_id,))
         txn.execute(sql, args)
-        return cast(List[Tuple[str, int]], txn.fetchall())
+        return {
+            room_id: latest_stream_ordering
+            for room_id, latest_stream_ordering in txn.fetchall()
+        }
 
     async def get_unread_push_actions_for_user_in_range_for_http(
         self,
@@ -605,12 +619,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             The list will have between 0~limit entries.
         """
 
-        receipts_by_room = dict(
-            await self.db_pool.runInteraction(
-                "get_unread_push_actions_for_user_in_range_http_receipts",
-                self._get_receipts_by_room_txn,
-                user_id=user_id,
-            ),
+        receipts_by_room = await self.db_pool.runInteraction(
+            "get_unread_push_actions_for_user_in_range_http_receipts",
+            self._get_receipts_by_room_txn,
+            user_id=user_id,
         )
 
         def get_push_actions_txn(
@@ -679,12 +691,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             The list will have between 0~limit entries.
         """
 
-        receipts_by_room = dict(
-            await self.db_pool.runInteraction(
-                "get_unread_push_actions_for_user_in_range_email_receipts",
-                self._get_receipts_by_room_txn,
-                user_id=user_id,
-            ),
+        receipts_by_room = await self.db_pool.runInteraction(
+            "get_unread_push_actions_for_user_in_range_email_receipts",
+            self._get_receipts_by_room_txn,
+            user_id=user_id,
         )
 
         def get_push_actions_txn(
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 08c74b93e3..473c965e19 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Tuple
+
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.rest import admin
@@ -22,8 +24,6 @@ from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
-USER_ID = "@user:example.com"
-
 
 class EventPushActionsStoreTestCase(HomeserverTestCase):
     servlets = [
@@ -38,21 +38,13 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         assert persist_events_store is not None
         self.persist_events_store = persist_events_store
 
-    def test_get_unread_push_actions_for_user_in_range_for_http(self) -> None:
-        self.get_success(
-            self.store.get_unread_push_actions_for_user_in_range_for_http(
-                USER_ID, 0, 1000, 20
-            )
-        )
+    def _create_users_and_room(self) -> Tuple[str, str, str, str, str]:
+        """
+        Creates two users and a shared room.
 
-    def test_get_unread_push_actions_for_user_in_range_for_email(self) -> None:
-        self.get_success(
-            self.store.get_unread_push_actions_for_user_in_range_for_email(
-                USER_ID, 0, 1000, 20
-            )
-        )
-
-    def test_count_aggregation(self) -> None:
+        Returns:
+            Tuple of (user 1 ID, user 1 token, user 2 ID, user 2 token, room ID).
+        """
         # Create a user to receive notifications and send receipts.
         user_id = self.register_user("user1235", "pass")
         token = self.login("user1235", "pass")
@@ -65,6 +57,70 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         room_id = self.helper.create_room_as(user_id, tok=token)
         self.helper.join(room_id, other_id, tok=other_token)
 
+        return user_id, token, other_id, other_token, room_id
+
+    def test_get_unread_push_actions_for_user_in_range(self) -> None:
+        """Test getting unread push actions for HTTP and email pushers."""
+        user_id, token, _, other_token, room_id = self._create_users_and_room()
+
+        # Create two events, one of which is a highlight.
+        self.helper.send_event(
+            room_id,
+            type="m.room.message",
+            content={"msgtype": "m.text", "body": "msg"},
+            tok=other_token,
+        )
+        event_id = self.helper.send_event(
+            room_id,
+            type="m.room.message",
+            content={"msgtype": "m.text", "body": user_id},
+            tok=other_token,
+        )["event_id"]
+
+        # Fetch unread actions for HTTP pushers.
+        http_actions = self.get_success(
+            self.store.get_unread_push_actions_for_user_in_range_for_http(
+                user_id, 0, 1000, 20
+            )
+        )
+        self.assertEqual(2, len(http_actions))
+
+        # Fetch unread actions for email pushers.
+        email_actions = self.get_success(
+            self.store.get_unread_push_actions_for_user_in_range_for_email(
+                user_id, 0, 1000, 20
+            )
+        )
+        self.assertEqual(2, len(email_actions))
+
+        # Send a receipt, which should clear any actions.
+        self.get_success(
+            self.store.insert_receipt(
+                room_id,
+                "m.read",
+                user_id=user_id,
+                event_ids=[event_id],
+                thread_id=None,
+                data={},
+            )
+        )
+        http_actions = self.get_success(
+            self.store.get_unread_push_actions_for_user_in_range_for_http(
+                user_id, 0, 1000, 20
+            )
+        )
+        self.assertEqual([], http_actions)
+        email_actions = self.get_success(
+            self.store.get_unread_push_actions_for_user_in_range_for_email(
+                user_id, 0, 1000, 20
+            )
+        )
+        self.assertEqual([], email_actions)
+
+    def test_count_aggregation(self) -> None:
+        # Create a user to receive notifications and send receipts.
+        user_id, token, _, other_token, room_id = self._create_users_and_room()
+
         last_event_id: str
 
         def _assert_counts(noitf_count: int, highlight_count: int) -> None:
-- 
cgit 1.5.1


From d6b85a2a7dea2737e69d67842c2246975ec64bce Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 26 Sep 2022 23:07:02 +0100
Subject: Complement image: propagate SIGTERM to all workers (#13914)

This should mean that logs from worker processes are flushed before shutdown.

When a test completes, Complement stops the docker container, which means that
synapse will receive a SIGTERM. Currently, the `complement_fork_starter` exits
immediately (without notifying the worker processes), which means that the
workers never get a chance to flush their logs before the whole container is
vaped. We can fix this by propagating the SIGTERM to the children.
---
 changelog.d/13914.misc                 |  1 +
 synapse/app/complement_fork_starter.py | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13914.misc

(limited to 'synapse')

diff --git a/changelog.d/13914.misc b/changelog.d/13914.misc
new file mode 100644
index 0000000000..c29bc25d38
--- /dev/null
+++ b/changelog.d/13914.misc
@@ -0,0 +1 @@
+Complement image: propagate SIGTERM to all workers.
diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py
index 89eb07df27..b22f315453 100644
--- a/synapse/app/complement_fork_starter.py
+++ b/synapse/app/complement_fork_starter.py
@@ -51,11 +51,18 @@ import argparse
 import importlib
 import itertools
 import multiprocessing
+import os
+import signal
 import sys
-from typing import Any, Callable, List
+from types import FrameType
+from typing import Any, Callable, List, Optional
 
 from twisted.internet.main import installReactor
 
+# a list of the original signal handlers, before we installed our custom ones.
+# We restore these in our child processes.
+_original_signal_handlers: dict[int, Any] = {}
+
 
 class ProxiedReactor:
     """
@@ -105,6 +112,11 @@ def _worker_entrypoint(
 
     sys.argv = args
 
+    # reset the custom signal handlers that we installed, so that the children start
+    # from a clean slate.
+    for sig, handler in _original_signal_handlers.items():
+        signal.signal(sig, handler)
+
     from twisted.internet.epollreactor import EPollReactor
 
     proxy_reactor._install_real_reactor(EPollReactor())
@@ -167,13 +179,29 @@ def main() -> None:
     update_proc.join()
     print("===== PREPARED DATABASE =====", file=sys.stderr)
 
+    processes: List[multiprocessing.Process] = []
+
+    # Install signal handlers to propagate signals to all our children, so that they
+    # shut down cleanly. This also inhibits our own exit, but that's good: we want to
+    # wait until the children have exited.
+    def handle_signal(signum: int, frame: Optional[FrameType]) -> None:
+        print(
+            f"complement_fork_starter: Caught signal {signum}. Stopping children.",
+            file=sys.stderr,
+        )
+        for p in processes:
+            if p.pid:
+                os.kill(p.pid, signum)
+
+    for sig in (signal.SIGINT, signal.SIGTERM):
+        _original_signal_handlers[sig] = signal.signal(sig, handle_signal)
+
     # At this point, we've imported all the main entrypoints for all the workers.
     # Now we basically just fork() out to create the workers we need.
     # Because we're using fork(), all the workers get a clone of this launcher's
     # memory space and don't need to repeat the work of loading the code!
     # Instead of using fork() directly, we use the multiprocessing library,
     # which uses fork() on Unix platforms.
-    processes = []
     for (func, worker_args) in zip(worker_functions, args_by_worker):
         process = multiprocessing.Process(
             target=_worker_entrypoint, args=(func, proxy_reactor, worker_args)
-- 
cgit 1.5.1


From 85e161631a2ca7d495b619456221311ec1c93096 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 27 Sep 2022 11:17:23 +0100
Subject: Faster room joins: Fix spurious error when joining a room (#13872)

During a `lazy_load_members` `/sync`, we look through auth events in
rooms with partial state to find prior membership events. When such a
membership is not found, an error is logged.

Since the first join event for a user never has a prior membership event
to cite, the error would always be logged when one appeared in the room
timeline.

Avoid logging errors for such events.

Introduced in #13477.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13872.bugfix |  1 +
 synapse/handlers/sync.py | 22 +++++++++++++++++++---
 2 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13872.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13872.bugfix b/changelog.d/13872.bugfix
new file mode 100644
index 0000000000..67d3d9e643
--- /dev/null
+++ b/changelog.d/13872.bugfix
@@ -0,0 +1 @@
+Faster room joins: Fix a bug introduced in 1.66.0 where an error would be logged when syncing after joining a room.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 5293fa4d0e..e75fc6b947 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1191,7 +1191,9 @@ class SyncHandler:
             room_id: The partial state room to find the remaining memberships for.
             members_to_fetch: The memberships to find.
             events_with_membership_auth: A mapping from user IDs to events whose auth
-                events are known to contain their membership.
+                events would contain their prior membership, if one exists.
+                Note that join events will not cite a prior membership if a user has
+                never been in a room before.
             found_state_ids: A dict from (type, state_key) -> state_event_id, containing
                 memberships that have been previously found. Entries in
                 `members_to_fetch` that have a membership in `found_state_ids` are
@@ -1201,6 +1203,10 @@ class SyncHandler:
             A dict from ("m.room.member", state_key) -> state_event_id, containing the
             memberships missing from `found_state_ids`.
 
+            When `events_with_membership_auth` contains a join event for a given user
+            which does not cite a prior membership, no membership is returned for that
+            user.
+
         Raises:
             KeyError: if `events_with_membership_auth` does not have an entry for a
                 missing membership. Memberships in `found_state_ids` do not need an
@@ -1218,8 +1224,18 @@ class SyncHandler:
             if (EventTypes.Member, member) in found_state_ids:
                 continue
 
-            missing_members.add(member)
             event_with_membership_auth = events_with_membership_auth[member]
+            is_join = (
+                event_with_membership_auth.is_state()
+                and event_with_membership_auth.type == EventTypes.Member
+                and event_with_membership_auth.state_key == member
+                and event_with_membership_auth.content.get("membership")
+                == Membership.JOIN
+            )
+            if not is_join:
+                # The event must include the desired membership as an auth event, unless
+                # it's the first join event for a given user.
+                missing_members.add(member)
             auth_event_ids.update(event_with_membership_auth.auth_event_ids())
 
         auth_events = await self.store.get_events(auth_event_ids)
@@ -1243,7 +1259,7 @@ class SyncHandler:
                     auth_event.type == EventTypes.Member
                     and auth_event.state_key == member
                 ):
-                    missing_members.remove(member)
+                    missing_members.discard(member)
                     additional_state_ids[
                         (EventTypes.Member, member)
                     ] = auth_event.event_id
-- 
cgit 1.5.1


From e8318a433356413648bd180dcfc69c29ca319fc6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 27 Sep 2022 13:01:08 +0100
Subject: Handle the case of remote users leaving a partial join room for
 device lists (#13885)

---
 changelog.d/13885.misc                        |  1 +
 synapse/app/admin_cmd.py                      |  2 +-
 synapse/storage/controllers/persist_events.py | 71 ---------------------------
 synapse/storage/databases/main/__init__.py    |  2 +-
 synapse/storage/databases/main/devices.py     | 64 ++++++++++++++++++------
 synapse/storage/databases/main/events.py      |  6 +++
 synapse/storage/databases/main/roommember.py  | 46 +++++++++--------
 7 files changed, 85 insertions(+), 107 deletions(-)
 create mode 100644 changelog.d/13885.misc

(limited to 'synapse')

diff --git a/changelog.d/13885.misc b/changelog.d/13885.misc
new file mode 100644
index 0000000000..bc76b862df
--- /dev/null
+++ b/changelog.d/13885.misc
@@ -0,0 +1 @@
+Correctly handle a race with device lists when a remote user leaves during a partial join.
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 8a583d3ec6..3c8c00ea5b 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -53,9 +53,9 @@ logger = logging.getLogger("synapse.app.admin_cmd")
 
 class AdminCmdSlavedStore(
     SlavedFilteringStore,
-    SlavedDeviceStore,
     SlavedPushRuleStore,
     SlavedEventStore,
+    SlavedDeviceStore,
     TagsWorkerStore,
     DeviceInboxWorkerStore,
     AccountDataWorkerStore,
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 501dbbc990..709cb792ed 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -598,11 +598,6 @@ class EventsPersistenceStorageController:
             # room
             state_delta_for_room: Dict[str, DeltaState] = {}
 
-            # Set of remote users which were in rooms the server has left or who may
-            # have left rooms the server is in. We should check if we still share any
-            # rooms and if not we mark their device lists as stale.
-            potentially_left_users: Set[str] = set()
-
             if not backfilled:
                 with Measure(self._clock, "_calculate_state_and_extrem"):
                     # Work out the new "current state" for each room.
@@ -716,8 +711,6 @@ class EventsPersistenceStorageController:
                                 room_id,
                                 ev_ctx_rm,
                                 delta,
-                                current_state,
-                                potentially_left_users,
                             )
                             if not is_still_joined:
                                 logger.info("Server no longer in room %s", room_id)
@@ -725,20 +718,6 @@ class EventsPersistenceStorageController:
                                 current_state = {}
                                 delta.no_longer_in_room = True
 
-                            # Add all remote users that might have left rooms.
-                            potentially_left_users.update(
-                                user_id
-                                for event_type, user_id in delta.to_delete
-                                if event_type == EventTypes.Member
-                                and not self.is_mine_id(user_id)
-                            )
-                            potentially_left_users.update(
-                                user_id
-                                for event_type, user_id in delta.to_insert.keys()
-                                if event_type == EventTypes.Member
-                                and not self.is_mine_id(user_id)
-                            )
-
                             state_delta_for_room[room_id] = delta
 
             await self.persist_events_store._persist_events_and_state_updates(
@@ -749,8 +728,6 @@ class EventsPersistenceStorageController:
                 inhibit_local_membership_updates=backfilled,
             )
 
-            await self._handle_potentially_left_users(potentially_left_users)
-
         return replaced_events
 
     async def _calculate_new_extremities(
@@ -1126,8 +1103,6 @@ class EventsPersistenceStorageController:
         room_id: str,
         ev_ctx_rm: List[Tuple[EventBase, EventContext]],
         delta: DeltaState,
-        current_state: Optional[StateMap[str]],
-        potentially_left_users: Set[str],
     ) -> bool:
         """Check if the server will still be joined after the given events have
         been persised.
@@ -1137,11 +1112,6 @@ class EventsPersistenceStorageController:
             ev_ctx_rm
             delta: The delta of current state between what is in the database
                 and what the new current state will be.
-            current_state: The new current state if it already been calculated,
-                otherwise None.
-            potentially_left_users: If the server has left the room, then joined
-                remote users will be added to this set to indicate that the
-                server may no longer be sharing a room with them.
         """
 
         if not any(
@@ -1195,45 +1165,4 @@ class EventsPersistenceStorageController:
         ):
             return True
 
-        # The server will leave the room, so we go and find out which remote
-        # users will still be joined when we leave.
-        if current_state is None:
-            current_state = await self.main_store.get_partial_current_state_ids(room_id)
-            current_state = dict(current_state)
-            for key in delta.to_delete:
-                current_state.pop(key, None)
-
-            current_state.update(delta.to_insert)
-
-        remote_event_ids = [
-            event_id
-            for (
-                typ,
-                state_key,
-            ), event_id in current_state.items()
-            if typ == EventTypes.Member and not self.is_mine_id(state_key)
-        ]
-        members = await self.main_store.get_membership_from_event_ids(remote_event_ids)
-        potentially_left_users.update(
-            member.user_id
-            for member in members.values()
-            if member and member.membership == Membership.JOIN
-        )
-
         return False
-
-    async def _handle_potentially_left_users(self, user_ids: Set[str]) -> None:
-        """Given a set of remote users check if the server still shares a room with
-        them. If not then mark those users' device cache as stale.
-        """
-
-        if not user_ids:
-            return
-
-        joined_users = await self.main_store.get_users_server_still_shares_room_with(
-            user_ids
-        )
-        left_users = user_ids - joined_users
-
-        for user_id in left_users:
-            await self.main_store.mark_remote_user_device_list_as_unsubscribed(user_id)
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 4dccbb732a..0843f10340 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -83,6 +83,7 @@ logger = logging.getLogger(__name__)
 
 class DataStore(
     EventsBackgroundUpdatesStore,
+    DeviceStore,
     RoomMemberStore,
     RoomStore,
     RoomBatchStore,
@@ -114,7 +115,6 @@ class DataStore(
     StreamWorkerStore,
     OpenIdStore,
     ClientIpWorkerStore,
-    DeviceStore,
     DeviceInboxStore,
     UserDirectoryStore,
     UserErasureStore,
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 5d700ca6c3..1151fb0cc3 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -47,6 +47,7 @@ from synapse.storage.database import (
     make_tuple_comparison_clause,
 )
 from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyWorkerStore
+from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
 from synapse.storage.types import Cursor
 from synapse.types import JsonDict, get_verify_key_from_cross_signing_key
 from synapse.util import json_decoder, json_encoder
@@ -70,7 +71,7 @@ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = (
 BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES = "remove_dup_outbound_pokes"
 
 
-class DeviceWorkerStore(EndToEndKeyWorkerStore):
+class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     def __init__(
         self,
         database: DatabasePool,
@@ -985,24 +986,59 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore):
             desc="mark_remote_user_device_cache_as_valid",
         )
 
+    async def handle_potentially_left_users(self, user_ids: Set[str]) -> None:
+        """Given a set of remote users check if the server still shares a room with
+        them. If not then mark those users' device cache as stale.
+        """
+
+        if not user_ids:
+            return
+
+        await self.db_pool.runInteraction(
+            "_handle_potentially_left_users",
+            self.handle_potentially_left_users_txn,
+            user_ids,
+        )
+
+    def handle_potentially_left_users_txn(
+        self,
+        txn: LoggingTransaction,
+        user_ids: Set[str],
+    ) -> None:
+        """Given a set of remote users check if the server still shares a room with
+        them. If not then mark those users' device cache as stale.
+        """
+
+        if not user_ids:
+            return
+
+        joined_users = self.get_users_server_still_shares_room_with_txn(txn, user_ids)
+        left_users = user_ids - joined_users
+
+        for user_id in left_users:
+            self.mark_remote_user_device_list_as_unsubscribed_txn(txn, user_id)
+
     async def mark_remote_user_device_list_as_unsubscribed(self, user_id: str) -> None:
         """Mark that we no longer track device lists for remote user."""
 
-        def _mark_remote_user_device_list_as_unsubscribed_txn(
-            txn: LoggingTransaction,
-        ) -> None:
-            self.db_pool.simple_delete_txn(
-                txn,
-                table="device_lists_remote_extremeties",
-                keyvalues={"user_id": user_id},
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_device_list_last_stream_id_for_remote, (user_id,)
-            )
-
         await self.db_pool.runInteraction(
             "mark_remote_user_device_list_as_unsubscribed",
-            _mark_remote_user_device_list_as_unsubscribed_txn,
+            self.mark_remote_user_device_list_as_unsubscribed_txn,
+            user_id,
+        )
+
+    def mark_remote_user_device_list_as_unsubscribed_txn(
+        self,
+        txn: LoggingTransaction,
+        user_id: str,
+    ) -> None:
+        self.db_pool.simple_delete_txn(
+            txn,
+            table="device_lists_remote_extremeties",
+            keyvalues={"user_id": user_id},
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_device_list_last_stream_id_for_remote, (user_id,)
         )
 
     async def get_dehydrated_device(
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 2e156a4a11..b59eb7478b 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1202,6 +1202,12 @@ class PersistEventsStore:
                 txn, room_id, members_changed
             )
 
+            # Check if any of the remote membership changes requires us to
+            # unsubscribe from their device lists.
+            self.store.handle_potentially_left_users_txn(
+                txn, {m for m in members_changed if not self.hs.is_mine_id(m)}
+            )
+
     def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str) -> None:
         """Update the room version in the database based off current state
         events.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index a8d224602a..8ada3cdac3 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -662,31 +662,37 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         if not user_ids:
             return set()
 
-        def _get_users_server_still_shares_room_with_txn(
-            txn: LoggingTransaction,
-        ) -> Set[str]:
-            sql = """
-                SELECT state_key FROM current_state_events
-                WHERE
-                    type = 'm.room.member'
-                    AND membership = 'join'
-                    AND %s
-                GROUP BY state_key
-            """
-
-            clause, args = make_in_list_sql_clause(
-                self.database_engine, "state_key", user_ids
-            )
+        return await self.db_pool.runInteraction(
+            "get_users_server_still_shares_room_with",
+            self.get_users_server_still_shares_room_with_txn,
+            user_ids,
+        )
 
-            txn.execute(sql % (clause,), args)
+    def get_users_server_still_shares_room_with_txn(
+        self,
+        txn: LoggingTransaction,
+        user_ids: Collection[str],
+    ) -> Set[str]:
+        if not user_ids:
+            return set()
 
-            return {row[0] for row in txn}
+        sql = """
+            SELECT state_key FROM current_state_events
+            WHERE
+                type = 'm.room.member'
+                AND membership = 'join'
+                AND %s
+            GROUP BY state_key
+        """
 
-        return await self.db_pool.runInteraction(
-            "get_users_server_still_shares_room_with",
-            _get_users_server_still_shares_room_with_txn,
+        clause, args = make_in_list_sql_clause(
+            self.database_engine, "state_key", user_ids
         )
 
+        txn.execute(sql % (clause,), args)
+
+        return {row[0] for row in txn}
+
     @cancellable
     async def get_rooms_for_user(
         self, user_id: str, on_invalidate: Optional[Callable[[], None]] = None
-- 
cgit 1.5.1


From 50c92f3a692a745d2b42f9731af4da493fa27715 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 27 Sep 2022 15:38:14 +0200
Subject: Carry IdP Session IDs through user-mapping sessions. (#13839)

Since #11482, we're saving sessions IDs from upstream IdPs, but we've been losing them when the user goes through a user mapping session on account registration.
---
 changelog.d/13839.misc  | 1 +
 synapse/handlers/sso.py | 9 +++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 changelog.d/13839.misc

(limited to 'synapse')

diff --git a/changelog.d/13839.misc b/changelog.d/13839.misc
new file mode 100644
index 0000000000..549872c90f
--- /dev/null
+++ b/changelog.d/13839.misc
@@ -0,0 +1 @@
+Carry IdP Session IDs through user-mapping sessions.
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 6bc1cbd787..e035677b8a 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -147,6 +147,9 @@ class UsernameMappingSession:
     # A unique identifier for this SSO provider, e.g.  "oidc" or "saml".
     auth_provider_id: str
 
+    # An optional session ID from the IdP.
+    auth_provider_session_id: Optional[str]
+
     # user ID on the IdP server
     remote_user_id: str
 
@@ -464,6 +467,7 @@ class SsoHandler:
                         client_redirect_url,
                         next_step_url,
                         extra_login_attributes,
+                        auth_provider_session_id,
                     )
 
                 user_id = await self._register_mapped_user(
@@ -585,6 +589,7 @@ class SsoHandler:
         client_redirect_url: str,
         next_step_url: bytes,
         extra_login_attributes: Optional[JsonDict],
+        auth_provider_session_id: Optional[str],
     ) -> NoReturn:
         """Creates a UsernameMappingSession and redirects the browser
 
@@ -607,6 +612,8 @@ class SsoHandler:
             extra_login_attributes: An optional dictionary of extra
                 attributes to be provided to the client in the login response.
 
+            auth_provider_session_id: An optional session ID from the IdP.
+
         Raises:
             RedirectException
         """
@@ -615,6 +622,7 @@ class SsoHandler:
         now = self._clock.time_msec()
         session = UsernameMappingSession(
             auth_provider_id=auth_provider_id,
+            auth_provider_session_id=auth_provider_session_id,
             remote_user_id=remote_user_id,
             display_name=attributes.display_name,
             emails=attributes.emails,
@@ -968,6 +976,7 @@ class SsoHandler:
             session.client_redirect_url,
             session.extra_login_attributes,
             new_user=True,
+            auth_provider_session_id=session.auth_provider_session_id,
         )
 
     def _expire_old_sessions(self) -> None:
-- 
cgit 1.5.1


From 299b00d968ee23ba4e4806dd7c4fa97c7fcfb6f5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 27 Sep 2022 15:17:41 +0100
Subject: Prioritize outbound to-device over device list updates (#13922)

Otherwise device list changes for large accounts can temporarily delay to-device messages.
---
 changelog.d/13922.bugfix                           |  1 +
 synapse/federation/sender/per_destination_queue.py | 29 ++++++++++++----------
 2 files changed, 17 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/13922.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13922.bugfix b/changelog.d/13922.bugfix
new file mode 100644
index 0000000000..7269d28dee
--- /dev/null
+++ b/changelog.d/13922.bugfix
@@ -0,0 +1 @@
+Fix long-standing bug where device updates could cause delays sending out to-device messages over federation.
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 41d8b937af..084c45a95c 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -646,29 +646,32 @@ class _TransactionQueueManager:
 
         # We start by fetching device related EDUs, i.e device updates and to
         # device messages. We have to keep 2 free slots for presence and rr_edus.
-        limit = MAX_EDUS_PER_TRANSACTION - 2
-
-        device_update_edus, dev_list_id = await self.queue._get_device_update_edus(
-            limit
-        )
-
-        if device_update_edus:
-            self._device_list_id = dev_list_id
-        else:
-            self.queue._last_device_list_stream_id = dev_list_id
-
-        limit -= len(device_update_edus)
+        device_edu_limit = MAX_EDUS_PER_TRANSACTION - 2
 
+        # We prioritize to-device messages so that existing encryption channels
+        # work. We also keep a few slots spare (by reducing the limit) so that
+        # we can still trickle out some device list updates.
         (
             to_device_edus,
             device_stream_id,
-        ) = await self.queue._get_to_device_message_edus(limit)
+        ) = await self.queue._get_to_device_message_edus(device_edu_limit - 10)
 
         if to_device_edus:
             self._device_stream_id = device_stream_id
         else:
             self.queue._last_device_stream_id = device_stream_id
 
+        device_edu_limit -= len(to_device_edus)
+
+        device_update_edus, dev_list_id = await self.queue._get_device_update_edus(
+            device_edu_limit
+        )
+
+        if device_update_edus:
+            self._device_list_id = dev_list_id
+        else:
+            self.queue._last_device_list_stream_id = dev_list_id
+
         pending_edus = device_update_edus + to_device_edus
 
         # Now add the read receipt EDU.
-- 
cgit 1.5.1


From 87fe9db4675e510ea9c0234429b4773341c4e86d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 27 Sep 2022 10:47:34 -0400
Subject: Support the stable dir parameter for /relations. (#13920)

Since MSC3715 has passed FCP, the stable parameter can be used.

This currently falls back to the unstable parameter if the stable
parameter is not provided (and MSC3715 support is enabled in
the configuration).
---
 changelog.d/13920.feature           |  1 +
 synapse/rest/client/relations.py    | 24 +++++++++++++++---------
 tests/rest/client/test_relations.py |  6 ++----
 3 files changed, 18 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/13920.feature

(limited to 'synapse')

diff --git a/changelog.d/13920.feature b/changelog.d/13920.feature
new file mode 100644
index 0000000000..aee702bcd2
--- /dev/null
+++ b/changelog.d/13920.feature
@@ -0,0 +1 @@
+Support a `dir` parameter on the `/relations` endpoint per [MSC3715](https://github.com/matrix-org/matrix-doc/pull/3715).
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index ce97080013..205c556f64 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -56,15 +56,21 @@ class RelationPaginationServlet(RestServlet):
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
 
         limit = parse_integer(request, "limit", default=5)
-        if self._msc3715_enabled:
-            direction = parse_string(
-                request,
-                "org.matrix.msc3715.dir",
-                default="b",
-                allowed_values=["f", "b"],
-            )
-        else:
-            direction = "b"
+        # Fetch the direction parameter, if provided.
+        #
+        # TODO Use PaginationConfig.from_request when the unstable parameter is
+        #      no longer needed.
+        direction = parse_string(request, "dir", allowed_values=["f", "b"])
+        if direction is None:
+            if self._msc3715_enabled:
+                direction = parse_string(
+                    request,
+                    "org.matrix.msc3715.dir",
+                    default="b",
+                    allowed_values=["f", "b"],
+                )
+            else:
+                direction = "b"
         from_token_str = parse_string(request, "from")
         to_token_str = parse_string(request, "to")
 
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index d33e34d829..fef3b72d76 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -728,7 +728,6 @@ class RelationsTestCase(BaseRelationsTestCase):
 
 
 class RelationPaginationTestCase(BaseRelationsTestCase):
-    @unittest.override_config({"experimental_features": {"msc3715_enabled": True}})
     def test_basic_paginate_relations(self) -> None:
         """Tests that calling pagination API correctly the latest relations."""
         channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
@@ -771,7 +770,7 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
         channel = self.make_request(
             "GET",
             f"/_matrix/client/v1/rooms/{self.room}/relations"
-            f"/{self.parent_id}?limit=1&org.matrix.msc3715.dir=f",
+            f"/{self.parent_id}?limit=1&dir=f",
             access_token=self.user_token,
         )
         self.assertEqual(200, channel.code, channel.json_body)
@@ -788,7 +787,6 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
             channel.json_body["chunk"][0],
         )
 
-    @unittest.override_config({"experimental_features": {"msc3715_enabled": True}})
     def test_repeated_paginate_relations(self) -> None:
         """Test that if we paginate using a limit and tokens then we get the
         expected events.
@@ -838,7 +836,7 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
 
             channel = self.make_request(
                 "GET",
-                f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}?org.matrix.msc3715.dir=f&limit=3{from_token}",
+                f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}?dir=f&limit=3{from_token}",
                 access_token=self.user_token,
             )
             self.assertEqual(200, channel.code, channel.json_body)
-- 
cgit 1.5.1


From f5aaa55e2702af3cac1e195bf5d703970c24ff29 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 27 Sep 2022 17:26:35 +0100
Subject: Add new columns tracking when we partial-joined (#13892)

---
 changelog.d/13892.feature                          |  1 +
 synapse/handlers/federation.py                     | 14 +++++-
 synapse/storage/databases/main/room.py             | 52 +++++++++++++++++++++-
 .../main/delta/73/04partial_join_details.sql       | 23 ++++++++++
 4 files changed, 87 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13892.feature
 create mode 100644 synapse/storage/schema/main/delta/73/04partial_join_details.sql

(limited to 'synapse')

diff --git a/changelog.d/13892.feature b/changelog.d/13892.feature
new file mode 100644
index 0000000000..df3f576536
--- /dev/null
+++ b/changelog.d/13892.feature
@@ -0,0 +1 @@
+Faster remote room joins: record _when_ we first partial-join to a room.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index e1a4265a64..74580f60df 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -581,7 +581,11 @@ class FederationHandler:
                 # Mark the room as having partial state.
                 # The background process is responsible for unmarking this flag,
                 # even if the join fails.
-                await self.store.store_partial_state_room(room_id, ret.servers_in_room)
+                await self.store.store_partial_state_room(
+                    room_id=room_id,
+                    servers=ret.servers_in_room,
+                    device_lists_stream_id=self.store.get_device_stream_token(),
+                )
 
             try:
                 max_stream_id = (
@@ -606,6 +610,14 @@ class FederationHandler:
                     room_id,
                 )
                 raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)
+            else:
+                # Record the join event id for future use (when we finish the full
+                # join). We have to do this after persisting the event to keep foreign
+                # key constraints intact.
+                if ret.partial_state:
+                    await self.store.write_partial_state_rooms_join_event_id(
+                        room_id, event.event_id
+                    )
             finally:
                 # Always kick off the background process that asynchronously fetches
                 # state for the room.
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 5dd116d766..064c332fb7 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1777,28 +1777,46 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         self,
         room_id: str,
         servers: Collection[str],
+        device_lists_stream_id: int,
     ) -> None:
-        """Mark the given room as containing events with partial state
+        """Mark the given room as containing events with partial state.
+
+        We also store additional data that describes _when_ we first partial-joined this
+        room, which helps us to keep other homeservers in sync when we finally fully
+        join this room.
+
+        We do not include a `join_event_id` here---we need to wait for the join event
+        to be persisted first.
 
         Args:
             room_id: the ID of the room
             servers: other servers known to be in the room
+            device_lists_stream_id: the device_lists stream ID at the time when we first
+                joined the room.
         """
         await self.db_pool.runInteraction(
             "store_partial_state_room",
             self._store_partial_state_room_txn,
             room_id,
             servers,
+            device_lists_stream_id,
         )
 
     def _store_partial_state_room_txn(
-        self, txn: LoggingTransaction, room_id: str, servers: Collection[str]
+        self,
+        txn: LoggingTransaction,
+        room_id: str,
+        servers: Collection[str],
+        device_lists_stream_id: int,
     ) -> None:
         DatabasePool.simple_insert_txn(
             txn,
             table="partial_state_rooms",
             values={
                 "room_id": room_id,
+                "device_lists_stream_id": device_lists_stream_id,
+                # To be updated later once the join event is persisted.
+                "join_event_id": None,
             },
         )
         DatabasePool.simple_insert_many_txn(
@@ -1809,6 +1827,36 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         )
         self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
 
+    async def write_partial_state_rooms_join_event_id(
+        self,
+        room_id: str,
+        join_event_id: str,
+    ) -> None:
+        """Record the join event which resulted from a partial join.
+
+        We do this separately to `store_partial_state_room` because we need to wait for
+        the join event to be persisted. Otherwise we violate a foreign key constraint.
+        """
+        await self.db_pool.runInteraction(
+            "write_partial_state_rooms_join_event_id",
+            self._write_partial_state_rooms_join_event_id,
+            room_id,
+            join_event_id,
+        )
+
+    def _write_partial_state_rooms_join_event_id(
+        self,
+        txn: LoggingTransaction,
+        room_id: str,
+        join_event_id: str,
+    ) -> None:
+        DatabasePool.simple_update_txn(
+            txn,
+            table="partial_state_rooms",
+            keyvalues={"room_id": room_id},
+            updatevalues={"join_event_id": join_event_id},
+        )
+
     async def maybe_store_room_on_outlier_membership(
         self, room_id: str, room_version: RoomVersion
     ) -> None:
diff --git a/synapse/storage/schema/main/delta/73/04partial_join_details.sql b/synapse/storage/schema/main/delta/73/04partial_join_details.sql
new file mode 100644
index 0000000000..5fb2bfe1a2
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/04partial_join_details.sql
@@ -0,0 +1,23 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- To ensure we correctly notify other homeservers about device list changes from our
+-- users after a partial join transitions to a full join, we need to know when we began
+-- the partial join. For now it's sufficient to know the device_list stream_id at the
+-- time of the partial join, and the join event created for us during a partial join.
+--
+-- Both columns are backwards compatible.
+ALTER TABLE partial_state_rooms ADD COLUMN device_lists_stream_id BIGINT NOT NULL DEFAULT 0;
+ALTER TABLE partial_state_rooms ADD COLUMN join_event_id TEXT REFERENCES events(event_id);
-- 
cgit 1.5.1


From 29269d9d3f3419a3d92cdd80dae4a37e2d99a395 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 27 Sep 2022 15:55:43 -0500
Subject: Fix `have_seen_event` cache not being invalidated (#13863)

Fix https://github.com/matrix-org/synapse/issues/13856
Fix https://github.com/matrix-org/synapse/issues/13865

> Discovered while trying to make Synapse fast enough for [this MSC2716 test for importing many batches](https://github.com/matrix-org/complement/pull/214#discussion_r741678240). As an example, disabling the `have_seen_event` cache saves 10 seconds for each `/messages` request in that MSC2716 Complement test because we're not making as many federation requests for `/state` (speeding up `have_seen_event` itself is related to https://github.com/matrix-org/synapse/issues/13625)
>
> But this will also make `/messages` faster in general so we can include it in the [faster `/messages` milestone](https://github.com/matrix-org/synapse/milestone/11).
>
> *-- https://github.com/matrix-org/synapse/issues/13856*


### The problem

`_invalidate_caches_for_event` doesn't run in monolith mode which means we never even tried to clear the `have_seen_event` and other caches. And even in worker mode, it only runs on the workers, not the master (AFAICT).

Additionally there was bug with the key being wrong so `_invalidate_caches_for_event` never invalidates the `have_seen_event` cache even when it does run.

Because we were using the `@cachedList` wrong, it was putting items in the cache under keys like `((room_id, event_id),)` with a `set` in a `set` (ex. `(('!TnCIJPKzdQdUlIyXdQ:test', '$Iu0eqEBN7qcyF1S9B3oNB3I91v2o5YOgRNPwi_78s-k'),)`) and we we're trying to invalidate with just `(room_id, event_id)` which did nothing.
---
 changelog.d/13863.bugfix                           |   1 +
 synapse/storage/databases/main/events_worker.py    |  40 +++---
 synapse/util/caches/descriptors.py                 |   6 +
 tests/storage/databases/main/test_events_worker.py | 152 ++++++++++++++-------
 tests/util/caches/test_descriptors.py              |  33 ++++-
 5 files changed, 165 insertions(+), 67 deletions(-)
 create mode 100644 changelog.d/13863.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13863.bugfix b/changelog.d/13863.bugfix
new file mode 100644
index 0000000000..74264a4fab
--- /dev/null
+++ b/changelog.d/13863.bugfix
@@ -0,0 +1 @@
+Fix `have_seen_event` cache not being invalidated after we persist an event which causes inefficiency effects like extra `/state` federation calls.
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 52914febf9..7cdc9fe98f 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1474,32 +1474,38 @@ class EventsWorkerStore(SQLBaseStore):
         # the batches as big as possible.
 
         results: Set[str] = set()
-        for chunk in batch_iter(event_ids, 500):
-            r = await self._have_seen_events_dict(
-                [(room_id, event_id) for event_id in chunk]
+        for event_ids_chunk in batch_iter(event_ids, 500):
+            events_seen_dict = await self._have_seen_events_dict(
+                room_id, event_ids_chunk
+            )
+            results.update(
+                eid for (eid, have_event) in events_seen_dict.items() if have_event
             )
-            results.update(eid for ((_rid, eid), have_event) in r.items() if have_event)
 
         return results
 
-    @cachedList(cached_method_name="have_seen_event", list_name="keys")
+    @cachedList(cached_method_name="have_seen_event", list_name="event_ids")
     async def _have_seen_events_dict(
-        self, keys: Collection[Tuple[str, str]]
-    ) -> Dict[Tuple[str, str], bool]:
+        self,
+        room_id: str,
+        event_ids: Collection[str],
+    ) -> Dict[str, bool]:
         """Helper for have_seen_events
 
         Returns:
-             a dict {(room_id, event_id)-> bool}
+             a dict {event_id -> bool}
         """
         # if the event cache contains the event, obviously we've seen it.
 
         cache_results = {
-            (rid, eid)
-            for (rid, eid) in keys
-            if await self._get_event_cache.contains((eid,))
+            event_id
+            for event_id in event_ids
+            if await self._get_event_cache.contains((event_id,))
         }
         results = dict.fromkeys(cache_results, True)
-        remaining = [k for k in keys if k not in cache_results]
+        remaining = [
+            event_id for event_id in event_ids if event_id not in cache_results
+        ]
         if not remaining:
             return results
 
@@ -1511,23 +1517,21 @@ class EventsWorkerStore(SQLBaseStore):
 
             sql = "SELECT event_id FROM events AS e WHERE "
             clause, args = make_in_list_sql_clause(
-                txn.database_engine, "e.event_id", [eid for (_rid, eid) in remaining]
+                txn.database_engine, "e.event_id", remaining
             )
             txn.execute(sql + clause, args)
             found_events = {eid for eid, in txn}
 
             # ... and then we can update the results for each key
-            results.update(
-                {(rid, eid): (eid in found_events) for (rid, eid) in remaining}
-            )
+            results.update({eid: (eid in found_events) for eid in remaining})
 
         await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn)
         return results
 
     @cached(max_entries=100000, tree=True)
     async def have_seen_event(self, room_id: str, event_id: str) -> bool:
-        res = await self._have_seen_events_dict(((room_id, event_id),))
-        return res[(room_id, event_id)]
+        res = await self._have_seen_events_dict(room_id, [event_id])
+        return res[event_id]
 
     def _get_current_state_event_counts_txn(
         self, txn: LoggingTransaction, room_id: str
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 3909f1caea..0391966462 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -431,6 +431,12 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
         cache: DeferredCache[CacheKey, Any] = cached_method.cache
         num_args = cached_method.num_args
 
+        if num_args != self.num_args:
+            raise Exception(
+                "Number of args (%s) does not match underlying cache_method_name=%s (%s)."
+                % (self.num_args, self.cached_method_name, num_args)
+            )
+
         @functools.wraps(self.orig)
         def wrapped(*args: Any, **kwargs: Any) -> "defer.Deferred[Dict]":
             # If we're passed a cache_context then we'll want to call its
diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py
index 67401272ac..32a798d74b 100644
--- a/tests/storage/databases/main/test_events_worker.py
+++ b/tests/storage/databases/main/test_events_worker.py
@@ -35,66 +35,45 @@ from synapse.util import Clock
 from synapse.util.async_helpers import yieldable_gather_results
 
 from tests import unittest
+from tests.test_utils.event_injection import create_event, inject_event
 
 
 class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
     def prepare(self, reactor, clock, hs):
+        self.hs = hs
         self.store: EventsWorkerStore = hs.get_datastores().main
 
-        # insert some test data
-        for rid in ("room1", "room2"):
-            self.get_success(
-                self.store.db_pool.simple_insert(
-                    "rooms",
-                    {"room_id": rid, "room_version": 4},
-                )
-            )
+        self.user = self.register_user("user", "pass")
+        self.token = self.login(self.user, "pass")
+        self.room_id = self.helper.create_room_as(self.user, tok=self.token)
 
         self.event_ids: List[str] = []
-        for idx, rid in enumerate(
-            (
-                "room1",
-                "room1",
-                "room1",
-                "room2",
-            )
-        ):
-            event_json = {"type": f"test {idx}", "room_id": rid}
-            event = make_event_from_dict(event_json, room_version=RoomVersions.V4)
-            event_id = event.event_id
-
-            self.get_success(
-                self.store.db_pool.simple_insert(
-                    "events",
-                    {
-                        "event_id": event_id,
-                        "room_id": rid,
-                        "topological_ordering": idx,
-                        "stream_ordering": idx,
-                        "type": event.type,
-                        "processed": True,
-                        "outlier": False,
-                    },
+        for i in range(3):
+            event = self.get_success(
+                inject_event(
+                    hs,
+                    room_version=RoomVersions.V7.identifier,
+                    room_id=self.room_id,
+                    sender=self.user,
+                    type="test_event_type",
+                    content={"body": f"foobarbaz{i}"},
                 )
             )
-            self.get_success(
-                self.store.db_pool.simple_insert(
-                    "event_json",
-                    {
-                        "event_id": event_id,
-                        "room_id": rid,
-                        "json": json.dumps(event_json),
-                        "internal_metadata": "{}",
-                        "format_version": 3,
-                    },
-                )
-            )
-            self.event_ids.append(event_id)
+
+            self.event_ids.append(event.event_id)
 
     def test_simple(self):
         with LoggingContext(name="test") as ctx:
             res = self.get_success(
-                self.store.have_seen_events("room1", [self.event_ids[0], "event19"])
+                self.store.have_seen_events(
+                    self.room_id, [self.event_ids[0], "eventdoesnotexist"]
+                )
             )
             self.assertEqual(res, {self.event_ids[0]})
 
@@ -104,7 +83,9 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
         # a second lookup of the same events should cause no queries
         with LoggingContext(name="test") as ctx:
             res = self.get_success(
-                self.store.have_seen_events("room1", [self.event_ids[0], "event19"])
+                self.store.have_seen_events(
+                    self.room_id, [self.event_ids[0], "eventdoesnotexist"]
+                )
             )
             self.assertEqual(res, {self.event_ids[0]})
             self.assertEqual(ctx.get_resource_usage().db_txn_count, 0)
@@ -116,11 +97,86 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
         # looking it up should now cause no db hits
         with LoggingContext(name="test") as ctx:
             res = self.get_success(
-                self.store.have_seen_events("room1", [self.event_ids[0]])
+                self.store.have_seen_events(self.room_id, [self.event_ids[0]])
             )
             self.assertEqual(res, {self.event_ids[0]})
             self.assertEqual(ctx.get_resource_usage().db_txn_count, 0)
 
+    def test_persisting_event_invalidates_cache(self):
+        """
+        Test to make sure that the `have_seen_event` cache
+        is invalidated after we persist an event and returns
+        the updated value.
+        """
+        event, event_context = self.get_success(
+            create_event(
+                self.hs,
+                room_id=self.room_id,
+                sender=self.user,
+                type="test_event_type",
+                content={"body": "garply"},
+            )
+        )
+
+        with LoggingContext(name="test") as ctx:
+            # First, check `have_seen_event` for an event we have not seen yet
+            # to prime the cache with a `false` value.
+            res = self.get_success(
+                self.store.have_seen_events(event.room_id, [event.event_id])
+            )
+            self.assertEqual(res, set())
+
+            # That should result in a single db query to lookup
+            self.assertEqual(ctx.get_resource_usage().db_txn_count, 1)
+
+        # Persist the event which should invalidate or prefill the
+        # `have_seen_event` cache so we don't return stale values.
+        persistence = self.hs.get_storage_controllers().persistence
+        self.get_success(
+            persistence.persist_event(
+                event,
+                event_context,
+            )
+        )
+
+        with LoggingContext(name="test") as ctx:
+            # Check `have_seen_event` again and we should see the updated fact
+            # that we have now seen the event after persisting it.
+            res = self.get_success(
+                self.store.have_seen_events(event.room_id, [event.event_id])
+            )
+            self.assertEqual(res, {event.event_id})
+
+            # That should result in a single db query to lookup
+            self.assertEqual(ctx.get_resource_usage().db_txn_count, 1)
+
+    def test_invalidate_cache_by_room_id(self):
+        """
+        Test to make sure that all events associated with the given `(room_id,)`
+        are invalidated in the `have_seen_event` cache.
+        """
+        with LoggingContext(name="test") as ctx:
+            # Prime the cache with some values
+            res = self.get_success(
+                self.store.have_seen_events(self.room_id, self.event_ids)
+            )
+            self.assertEqual(res, set(self.event_ids))
+
+            # That should result in a single db query to lookup
+            self.assertEqual(ctx.get_resource_usage().db_txn_count, 1)
+
+        # Clear the cache with any events associated with the `room_id`
+        self.store.have_seen_event.invalidate((self.room_id,))
+
+        with LoggingContext(name="test") as ctx:
+            res = self.get_success(
+                self.store.have_seen_events(self.room_id, self.event_ids)
+            )
+            self.assertEqual(res, set(self.event_ids))
+
+            # Since we cleared the cache, it should result in another db query to lookup
+            self.assertEqual(ctx.get_resource_usage().db_txn_count, 1)
+
 
 class EventCacheTestCase(unittest.HomeserverTestCase):
     """Test that the various layers of event cache works."""
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 48e616ac74..90861fe522 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import Set
+from typing import Iterable, Set, Tuple
 from unittest import mock
 
 from twisted.internet import defer, reactor
@@ -1008,3 +1008,34 @@ class CachedListDescriptorTestCase(unittest.TestCase):
             obj.inner_context_was_finished, "Tried to restart a finished logcontext"
         )
         self.assertEqual(current_context(), SENTINEL_CONTEXT)
+
+    def test_num_args_mismatch(self):
+        """
+        Make sure someone does not accidentally use @cachedList on a method with
+        a mismatch in the number args to the underlying single cache method.
+        """
+
+        class Cls:
+            @descriptors.cached(tree=True)
+            def fn(self, room_id, event_id):
+                pass
+
+            # This is wrong ❌. `@cachedList` expects to be given the same number
+            # of arguments as the underlying cached function, just with one of
+            # the arguments being an iterable
+            @descriptors.cachedList(cached_method_name="fn", list_name="keys")
+            def list_fn(self, keys: Iterable[Tuple[str, str]]):
+                pass
+
+            # Corrected syntax ✅
+            #
+            # @cachedList(cached_method_name="fn", list_name="event_ids")
+            # async def list_fn(
+            #     self, room_id: str, event_ids: Collection[str],
+            # )
+
+        obj = Cls()
+
+        # Make sure this raises an error about the arg mismatch
+        with self.assertRaises(Exception):
+            obj.list_fn([("foo", "bar")])
-- 
cgit 1.5.1


From a2cf66a94d5dfd9d6496ac3e48ec9a22f17be69a Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 28 Sep 2022 02:39:03 -0700
Subject: Prepatory work for batching events to send (#13487)

This PR begins work on batching up events during the creation of a room. The PR splits out the creation and sending/persisting of the events. The first three events in the creation of the room-creating the room, joining the creator to the room, and the power levels event are sent sequentially, while the subsequent events are created and collected to be sent at the end of the function. This is currently done by appending them to a list and then iterating over the list to send, the next step (after this PR) would be to send and persist the collected events as a batch.
---
 changelog.d/13487.misc          |   1 +
 synapse/handlers/message.py     | 175 ++++++++++++++++++++++++++--------------
 synapse/handlers/room.py        | 155 ++++++++++++++++++++++++-----------
 synapse/state/__init__.py       |  63 +++++++++++++++
 tests/rest/client/test_rooms.py |   4 +-
 5 files changed, 290 insertions(+), 108 deletions(-)
 create mode 100644 changelog.d/13487.misc

(limited to 'synapse')

diff --git a/changelog.d/13487.misc b/changelog.d/13487.misc
new file mode 100644
index 0000000000..761adc8b05
--- /dev/null
+++ b/changelog.d/13487.misc
@@ -0,0 +1 @@
+Speed up creation of DM rooms.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index e07cda133a..062f93bc67 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -63,6 +63,7 @@ from synapse.types import (
     MutableStateMap,
     Requester,
     RoomAlias,
+    StateMap,
     StreamToken,
     UserID,
     create_requester,
@@ -567,9 +568,17 @@ class EventCreationHandler:
         outlier: bool = False,
         historical: bool = False,
         depth: Optional[int] = None,
+        state_map: Optional[StateMap[str]] = None,
+        for_batch: bool = False,
+        current_state_group: Optional[int] = None,
     ) -> Tuple[EventBase, EventContext]:
         """
-        Given a dict from a client, create a new event.
+        Given a dict from a client, create a new event. If bool for_batch is true, will
+        create an event using the prev_event_ids, and will create an event context for
+        the event using the parameters state_map and current_state_group, thus these parameters
+        must be provided in this case if for_batch is True. The subsequently created event
+        and context are suitable for being batched up and bulk persisted to the database
+        with other similarly created events.
 
         Creates an FrozenEvent object, filling out auth_events, prev_events,
         etc.
@@ -612,16 +621,27 @@ class EventCreationHandler:
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
+
             historical: Indicates whether the message is being inserted
                 back in time around some existing events. This is used to skip
                 a few checks and mark the event as backfilled.
+
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
 
+            state_map: A state map of previously created events, used only when creating events
+                for batch persisting
+
+            for_batch: whether the event is being created for batch persisting to the db
+
+            current_state_group: the current state group, used only for creating events for
+                batch persisting
+
         Raises:
             ResourceLimitError if server is blocked to some resource being
             exceeded
+
         Returns:
             Tuple of created event, Context
         """
@@ -693,6 +713,9 @@ class EventCreationHandler:
             auth_event_ids=auth_event_ids,
             state_event_ids=state_event_ids,
             depth=depth,
+            state_map=state_map,
+            for_batch=for_batch,
+            current_state_group=current_state_group,
         )
 
         # In an ideal world we wouldn't need the second part of this condition. However,
@@ -707,10 +730,14 @@ class EventCreationHandler:
             # federation as well as those created locally. As of room v3, aliases events
             # can be created by users that are not in the room, therefore we have to
             # tolerate them in event_auth.check().
-            prev_state_ids = await context.get_prev_state_ids(
-                StateFilter.from_types([(EventTypes.Member, None)])
-            )
-            prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender))
+            if for_batch:
+                assert state_map is not None
+                prev_event_id = state_map.get((EventTypes.Member, event.sender))
+            else:
+                prev_state_ids = await context.get_prev_state_ids(
+                    StateFilter.from_types([(EventTypes.Member, None)])
+                )
+                prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender))
             prev_event = (
                 await self.store.get_event(prev_event_id, allow_none=True)
                 if prev_event_id
@@ -1009,8 +1036,16 @@ class EventCreationHandler:
         auth_event_ids: Optional[List[str]] = None,
         state_event_ids: Optional[List[str]] = None,
         depth: Optional[int] = None,
+        state_map: Optional[StateMap[str]] = None,
+        for_batch: bool = False,
+        current_state_group: Optional[int] = None,
     ) -> Tuple[EventBase, EventContext]:
-        """Create a new event for a local client
+        """Create a new event for a local client. If bool for_batch is true, will
+        create an event using the prev_event_ids, and will create an event context for
+        the event using the parameters state_map and current_state_group, thus these parameters
+        must be provided in this case if for_batch is True. The subsequently created event
+        and context are suitable for being batched up and bulk persisted to the database
+        with other similarly created events.
 
         Args:
             builder:
@@ -1043,6 +1078,14 @@ class EventCreationHandler:
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
 
+            state_map: A state map of previously created events, used only when creating events
+                for batch persisting
+
+            for_batch: whether the event is being created for batch persisting to the db
+
+            current_state_group: the current state group, used only for creating events for
+                batch persisting
+
         Returns:
             Tuple of created event, context
         """
@@ -1095,64 +1138,76 @@ class EventCreationHandler:
                 builder.type == EventTypes.Create or prev_event_ids
             ), "Attempting to create a non-m.room.create event with no prev_events"
 
-        event = await builder.build(
-            prev_event_ids=prev_event_ids,
-            auth_event_ids=auth_event_ids,
-            depth=depth,
-        )
+        if for_batch:
+            assert prev_event_ids is not None
+            assert state_map is not None
+            assert current_state_group is not None
+            auth_ids = self._event_auth_handler.compute_auth_events(builder, state_map)
+            event = await builder.build(
+                prev_event_ids=prev_event_ids, auth_event_ids=auth_ids, depth=depth
+            )
+            context = await self.state.compute_event_context_for_batched(
+                event, state_map, current_state_group
+            )
+        else:
+            event = await builder.build(
+                prev_event_ids=prev_event_ids,
+                auth_event_ids=auth_event_ids,
+                depth=depth,
+            )
 
-        # Pass on the outlier property from the builder to the event
-        # after it is created
-        if builder.internal_metadata.outlier:
-            event.internal_metadata.outlier = True
-            context = EventContext.for_outlier(self._storage_controllers)
-        elif (
-            event.type == EventTypes.MSC2716_INSERTION
-            and state_event_ids
-            and builder.internal_metadata.is_historical()
-        ):
-            # Add explicit state to the insertion event so it has state to derive
-            # from even though it's floating with no `prev_events`. The rest of
-            # the batch can derive from this state and state_group.
-            #
-            # TODO(faster_joins): figure out how this works, and make sure that the
-            #   old state is complete.
-            #   https://github.com/matrix-org/synapse/issues/13003
-            metadata = await self.store.get_metadata_for_events(state_event_ids)
-
-            state_map_for_event: MutableStateMap[str] = {}
-            for state_id in state_event_ids:
-                data = metadata.get(state_id)
-                if data is None:
-                    # We're trying to persist a new historical batch of events
-                    # with the given state, e.g. via
-                    # `RoomBatchSendEventRestServlet`. The state can be inferred
-                    # by Synapse or set directly by the client.
-                    #
-                    # Either way, we should have persisted all the state before
-                    # getting here.
-                    raise Exception(
-                        f"State event {state_id} not found in DB,"
-                        " Synapse should have persisted it before using it."
-                    )
+            # Pass on the outlier property from the builder to the event
+            # after it is created
+            if builder.internal_metadata.outlier:
+                event.internal_metadata.outlier = True
+                context = EventContext.for_outlier(self._storage_controllers)
+            elif (
+                event.type == EventTypes.MSC2716_INSERTION
+                and state_event_ids
+                and builder.internal_metadata.is_historical()
+            ):
+                # Add explicit state to the insertion event so it has state to derive
+                # from even though it's floating with no `prev_events`. The rest of
+                # the batch can derive from this state and state_group.
+                #
+                # TODO(faster_joins): figure out how this works, and make sure that the
+                #   old state is complete.
+                #   https://github.com/matrix-org/synapse/issues/13003
+                metadata = await self.store.get_metadata_for_events(state_event_ids)
+
+                state_map_for_event: MutableStateMap[str] = {}
+                for state_id in state_event_ids:
+                    data = metadata.get(state_id)
+                    if data is None:
+                        # We're trying to persist a new historical batch of events
+                        # with the given state, e.g. via
+                        # `RoomBatchSendEventRestServlet`. The state can be inferred
+                        # by Synapse or set directly by the client.
+                        #
+                        # Either way, we should have persisted all the state before
+                        # getting here.
+                        raise Exception(
+                            f"State event {state_id} not found in DB,"
+                            " Synapse should have persisted it before using it."
+                        )
 
-                if data.state_key is None:
-                    raise Exception(
-                        f"Trying to set non-state event {state_id} as state"
-                    )
+                    if data.state_key is None:
+                        raise Exception(
+                            f"Trying to set non-state event {state_id} as state"
+                        )
 
-                state_map_for_event[(data.event_type, data.state_key)] = state_id
+                    state_map_for_event[(data.event_type, data.state_key)] = state_id
 
-            context = await self.state.compute_event_context(
-                event,
-                state_ids_before_event=state_map_for_event,
-                # TODO(faster_joins): check how MSC2716 works and whether we can have
-                #   partial state here
-                #   https://github.com/matrix-org/synapse/issues/13003
-                partial_state=False,
-            )
-        else:
-            context = await self.state.compute_event_context(event)
+                context = await self.state.compute_event_context(
+                    event,
+                    state_ids_before_event=state_map_for_event,
+                    # TODO(faster_joins): check how MSC2716 works and whether we can have
+                    #   partial state here
+                    #   https://github.com/matrix-org/synapse/issues/13003
+                    partial_state=False,
+                )
+            else:
+                context = await self.state.compute_event_context(event)
 
         if requester:
             context.app_service = requester.app_service
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 33e9a87002..09a1a82e6c 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -716,7 +716,7 @@ class RoomCreationHandler:
 
         if (
             self._server_notices_mxid is not None
-            and requester.user.to_string() == self._server_notices_mxid
+            and user_id == self._server_notices_mxid
         ):
             # allow the server notices mxid to create rooms
             is_requester_admin = True
@@ -1042,7 +1042,9 @@ class RoomCreationHandler:
         creator_join_profile: Optional[JsonDict] = None,
         ratelimit: bool = True,
     ) -> Tuple[int, str, int]:
-        """Sends the initial events into a new room.
+        """Sends the initial events into a new room. Sends the room creation, membership,
+        and power level events into the room sequentially, then creates and batches up the
+        rest of the events to persist as a batch to the DB.
 
         `power_level_content_override` doesn't apply when initial state has
         power level state event content.
@@ -1053,13 +1055,21 @@ class RoomCreationHandler:
         """
 
         creator_id = creator.user.to_string()
-
         event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""}
-
         depth = 1
+        # the last event sent/persisted to the db
         last_sent_event_id: Optional[str] = None
-
-        def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict:
+        # the most recently created event
+        prev_event: List[str] = []
+        # a map of event types, state keys -> event_ids. We collect these mappings this as events are
+        # created (but not persisted to the db) to determine state for future created events
+        # (as this info can't be pulled from the db)
+        state_map: MutableStateMap[str] = {}
+        # current_state_group of last event created. Used for computing event context of
+        # events to be batched
+        current_state_group = None
+
+        def create_event_dict(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict:
             e = {"type": etype, "content": content}
 
             e.update(event_keys)
@@ -1067,32 +1077,52 @@ class RoomCreationHandler:
 
             return e
 
-        async def send(etype: str, content: JsonDict, **kwargs: Any) -> int:
-            nonlocal last_sent_event_id
+        async def create_event(
+            etype: str,
+            content: JsonDict,
+            for_batch: bool,
+            **kwargs: Any,
+        ) -> Tuple[EventBase, synapse.events.snapshot.EventContext]:
             nonlocal depth
+            nonlocal prev_event
 
-            event = create(etype, content, **kwargs)
-            logger.debug("Sending %s in new room", etype)
-            # Allow these events to be sent even if the user is shadow-banned to
-            # allow the room creation to complete.
-            (
-                sent_event,
-                last_stream_id,
-            ) = await self.event_creation_handler.create_and_send_nonmember_event(
+            event_dict = create_event_dict(etype, content, **kwargs)
+
+            new_event, new_context = await self.event_creation_handler.create_event(
                 creator,
-                event,
+                event_dict,
+                prev_event_ids=prev_event,
+                depth=depth,
+                state_map=state_map,
+                for_batch=for_batch,
+                current_state_group=current_state_group,
+            )
+            depth += 1
+            prev_event = [new_event.event_id]
+            state_map[(new_event.type, new_event.state_key)] = new_event.event_id
+
+            return new_event, new_context
+
+        async def send(
+            event: EventBase,
+            context: synapse.events.snapshot.EventContext,
+            creator: Requester,
+        ) -> int:
+            nonlocal last_sent_event_id
+
+            ev = await self.event_creation_handler.handle_new_client_event(
+                requester=creator,
+                event=event,
+                context=context,
                 ratelimit=False,
                 ignore_shadow_ban=True,
-                # Note: we don't pass state_event_ids here because this triggers
-                # an additional query per event to look them up from the events table.
-                prev_event_ids=[last_sent_event_id] if last_sent_event_id else [],
-                depth=depth,
             )
 
-            last_sent_event_id = sent_event.event_id
-            depth += 1
+            last_sent_event_id = ev.event_id
 
-            return last_stream_id
+            # we know it was persisted, so must have a stream ordering
+            assert ev.internal_metadata.stream_ordering
+            return ev.internal_metadata.stream_ordering
 
         try:
             config = self._presets_dict[preset_config]
@@ -1102,9 +1132,13 @@ class RoomCreationHandler:
             )
 
         creation_content.update({"creator": creator_id})
-        await send(etype=EventTypes.Create, content=creation_content)
+        creation_event, creation_context = await create_event(
+            EventTypes.Create, creation_content, False
+        )
 
         logger.debug("Sending %s in new room", EventTypes.Member)
+        await send(creation_event, creation_context, creator)
+
         # Room create event must exist at this point
         assert last_sent_event_id is not None
         member_event_id, _ = await self.room_member_handler.update_membership(
@@ -1119,14 +1153,22 @@ class RoomCreationHandler:
             depth=depth,
         )
         last_sent_event_id = member_event_id
+        prev_event = [member_event_id]
+
+        # update the depth and state map here as the membership event has been created
+        # through a different code path
+        depth += 1
+        state_map[(EventTypes.Member, creator.user.to_string())] = member_event_id
 
         # We treat the power levels override specially as this needs to be one
         # of the first events that get sent into a room.
         pl_content = initial_state.pop((EventTypes.PowerLevels, ""), None)
         if pl_content is not None:
-            last_sent_stream_id = await send(
-                etype=EventTypes.PowerLevels, content=pl_content
+            power_event, power_context = await create_event(
+                EventTypes.PowerLevels, pl_content, False
             )
+            current_state_group = power_context._state_group
+            last_sent_stream_id = await send(power_event, power_context, creator)
         else:
             power_level_content: JsonDict = {
                 "users": {creator_id: 100},
@@ -1169,47 +1211,68 @@ class RoomCreationHandler:
             # apply those.
             if power_level_content_override:
                 power_level_content.update(power_level_content_override)
-
-            last_sent_stream_id = await send(
-                etype=EventTypes.PowerLevels, content=power_level_content
+            pl_event, pl_context = await create_event(
+                EventTypes.PowerLevels,
+                power_level_content,
+                False,
             )
+            current_state_group = pl_context._state_group
+            last_sent_stream_id = await send(pl_event, pl_context, creator)
 
+        events_to_send = []
         if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state:
-            last_sent_stream_id = await send(
-                etype=EventTypes.CanonicalAlias,
-                content={"alias": room_alias.to_string()},
+            room_alias_event, room_alias_context = await create_event(
+                EventTypes.CanonicalAlias, {"alias": room_alias.to_string()}, True
             )
+            current_state_group = room_alias_context._state_group
+            events_to_send.append((room_alias_event, room_alias_context))
 
         if (EventTypes.JoinRules, "") not in initial_state:
-            last_sent_stream_id = await send(
-                etype=EventTypes.JoinRules, content={"join_rule": config["join_rules"]}
+            join_rules_event, join_rules_context = await create_event(
+                EventTypes.JoinRules,
+                {"join_rule": config["join_rules"]},
+                True,
             )
+            current_state_group = join_rules_context._state_group
+            events_to_send.append((join_rules_event, join_rules_context))
 
         if (EventTypes.RoomHistoryVisibility, "") not in initial_state:
-            last_sent_stream_id = await send(
-                etype=EventTypes.RoomHistoryVisibility,
-                content={"history_visibility": config["history_visibility"]},
+            visibility_event, visibility_context = await create_event(
+                EventTypes.RoomHistoryVisibility,
+                {"history_visibility": config["history_visibility"]},
+                True,
             )
+            current_state_group = visibility_context._state_group
+            events_to_send.append((visibility_event, visibility_context))
 
         if config["guest_can_join"]:
             if (EventTypes.GuestAccess, "") not in initial_state:
-                last_sent_stream_id = await send(
-                    etype=EventTypes.GuestAccess,
-                    content={EventContentFields.GUEST_ACCESS: GuestAccess.CAN_JOIN},
+                guest_access_event, guest_access_context = await create_event(
+                    EventTypes.GuestAccess,
+                    {EventContentFields.GUEST_ACCESS: GuestAccess.CAN_JOIN},
+                    True,
                 )
+                current_state_group = guest_access_context._state_group
+                events_to_send.append((guest_access_event, guest_access_context))
 
         for (etype, state_key), content in initial_state.items():
-            last_sent_stream_id = await send(
-                etype=etype, state_key=state_key, content=content
+            event, context = await create_event(
+                etype, content, True, state_key=state_key
             )
+            current_state_group = context._state_group
+            events_to_send.append((event, context))
 
         if config["encrypted"]:
-            last_sent_stream_id = await send(
-                etype=EventTypes.RoomEncryption,
+            encryption_event, encryption_context = await create_event(
+                EventTypes.RoomEncryption,
+                {"algorithm": RoomEncryptionAlgorithms.DEFAULT},
+                True,
                 state_key="",
-                content={"algorithm": RoomEncryptionAlgorithms.DEFAULT},
             )
+            events_to_send.append((encryption_event, encryption_context))
 
+        for event, context in events_to_send:
+            last_sent_stream_id = await send(event, context, creator)
         return last_sent_stream_id, last_sent_event_id, depth
 
     def _generate_room_id(self) -> str:
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 3787d35b24..6f3dd0463e 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -420,6 +420,69 @@ class StateHandler:
             partial_state=partial_state,
         )
 
+    async def compute_event_context_for_batched(
+        self,
+        event: EventBase,
+        state_ids_before_event: StateMap[str],
+        current_state_group: int,
+    ) -> EventContext:
+        """
+        Generate an event context for an event that has not yet been persisted to the
+        database. Intended for use with events that are created to be persisted in a batch.
+        Args:
+            event: the event the context is being computed for
+            state_ids_before_event: a state map consisting of the state ids of the events
+            created prior to this event.
+            current_state_group: the current state group before the event.
+        """
+        state_group_before_event_prev_group = None
+        deltas_to_state_group_before_event = None
+
+        state_group_before_event = current_state_group
+
+        # if the event is not state, we are set
+        if not event.is_state():
+            return EventContext.with_state(
+                storage=self._storage_controllers,
+                state_group_before_event=state_group_before_event,
+                state_group=state_group_before_event,
+                state_delta_due_to_event={},
+                prev_group=state_group_before_event_prev_group,
+                delta_ids=deltas_to_state_group_before_event,
+                partial_state=False,
+            )
+
+        # otherwise, we'll need to create a new state group for after the event
+        key = (event.type, event.state_key)
+
+        if state_ids_before_event is not None:
+            replaces = state_ids_before_event.get(key)
+
+        if replaces and replaces != event.event_id:
+            event.unsigned["replaces_state"] = replaces
+
+        delta_ids = {key: event.event_id}
+
+        state_group_after_event = (
+            await self._state_storage_controller.store_state_group(
+                event.event_id,
+                event.room_id,
+                prev_group=state_group_before_event,
+                delta_ids=delta_ids,
+                current_state_ids=None,
+            )
+        )
+
+        return EventContext.with_state(
+            storage=self._storage_controllers,
+            state_group=state_group_after_event,
+            state_group_before_event=state_group_before_event,
+            state_delta_due_to_event=delta_ids,
+            prev_group=state_group_before_event,
+            delta_ids=delta_ids,
+            partial_state=False,
+        )
+
     @measure_func()
     async def resolve_state_groups_for_events(
         self, room_id: str, event_ids: Collection[str], await_full_state: bool = True
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index c7eb88d33f..e281aef779 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -710,7 +710,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(44, channel.resource_usage.db_txn_count)
+        self.assertEqual(35, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -723,7 +723,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(50, channel.resource_usage.db_txn_count)
+        self.assertEqual(38, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
-- 
cgit 1.5.1


From 8ab16a92edd675453c78cfd9974081e374b0f998 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 28 Sep 2022 03:11:48 -0700
Subject: Persist CreateRoom events to DB in a batch (#13800)

---
 changelog.d/13800.misc                  |   1 +
 synapse/handlers/message.py             | 663 +++++++++++++++++---------------
 synapse/handlers/room.py                |  21 +-
 synapse/handlers/room_batch.py          |   3 +-
 synapse/handlers/room_member.py         |  11 +-
 synapse/replication/http/__init__.py    |   2 +
 synapse/replication/http/send_event.py  |   4 +-
 synapse/replication/http/send_events.py | 171 ++++++++
 tests/handlers/test_message.py          |  10 +-
 tests/handlers/test_register.py         |   4 +-
 tests/storage/test_event_chain.py       |   8 +-
 tests/unittest.py                       |   4 +-
 12 files changed, 563 insertions(+), 339 deletions(-)
 create mode 100644 changelog.d/13800.misc
 create mode 100644 synapse/replication/http/send_events.py

(limited to 'synapse')

diff --git a/changelog.d/13800.misc b/changelog.d/13800.misc
new file mode 100644
index 0000000000..761adc8b05
--- /dev/null
+++ b/changelog.d/13800.misc
@@ -0,0 +1 @@
+Speed up creation of DM rooms.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 062f93bc67..00e7645ba5 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -56,11 +56,13 @@ from synapse.logging import opentracing
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.send_event import ReplicationSendEventRestServlet
+from synapse.replication.http.send_events import ReplicationSendEventsRestServlet
 from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.state import StateFilter
 from synapse.types import (
     MutableStateMap,
+    PersistedEventPosition,
     Requester,
     RoomAlias,
     StateMap,
@@ -493,6 +495,7 @@ class EventCreationHandler:
             self.membership_types_to_include_profile_data_in.add(Membership.INVITE)
 
         self.send_event = ReplicationSendEventRestServlet.make_client(hs)
+        self.send_events = ReplicationSendEventsRestServlet.make_client(hs)
 
         self.request_ratelimiter = hs.get_request_ratelimiter()
 
@@ -1016,8 +1019,7 @@ class EventCreationHandler:
 
             ev = await self.handle_new_client_event(
                 requester=requester,
-                event=event,
-                context=context,
+                events_and_context=[(event, context)],
                 ratelimit=ratelimit,
                 ignore_shadow_ban=ignore_shadow_ban,
             )
@@ -1293,13 +1295,13 @@ class EventCreationHandler:
     async def handle_new_client_event(
         self,
         requester: Requester,
-        event: EventBase,
-        context: EventContext,
+        events_and_context: List[Tuple[EventBase, EventContext]],
         ratelimit: bool = True,
         extra_users: Optional[List[UserID]] = None,
         ignore_shadow_ban: bool = False,
     ) -> EventBase:
-        """Processes a new event.
+        """Processes new events. Please note that if batch persisting events, an error in
+        handling any one of these events will result in all of the events being dropped.
 
         This includes deduplicating, checking auth, persisting,
         notifying users, sending to remote servers, etc.
@@ -1309,8 +1311,7 @@ class EventCreationHandler:
 
         Args:
             requester
-            event
-            context
+            events_and_context: A list of one or more tuples of event, context to be persisted
             ratelimit
             extra_users: Any extra users to notify about event
 
@@ -1328,62 +1329,63 @@ class EventCreationHandler:
         """
         extra_users = extra_users or []
 
-        # we don't apply shadow-banning to membership events here. Invites are blocked
-        # higher up the stack, and we allow shadow-banned users to send join and leave
-        # events as normal.
-        if (
-            event.type != EventTypes.Member
-            and not ignore_shadow_ban
-            and requester.shadow_banned
-        ):
-            # We randomly sleep a bit just to annoy the requester.
-            await self.clock.sleep(random.randint(1, 10))
-            raise ShadowBanError()
+        for event, context in events_and_context:
+            # we don't apply shadow-banning to membership events here. Invites are blocked
+            # higher up the stack, and we allow shadow-banned users to send join and leave
+            # events as normal.
+            if (
+                event.type != EventTypes.Member
+                and not ignore_shadow_ban
+                and requester.shadow_banned
+            ):
+                # We randomly sleep a bit just to annoy the requester.
+                await self.clock.sleep(random.randint(1, 10))
+                raise ShadowBanError()
 
-        if event.is_state():
-            prev_event = await self.deduplicate_state_event(event, context)
-            if prev_event is not None:
-                logger.info(
-                    "Not bothering to persist state event %s duplicated by %s",
-                    event.event_id,
-                    prev_event.event_id,
-                )
-                return prev_event
+            if event.is_state():
+                prev_event = await self.deduplicate_state_event(event, context)
+                if prev_event is not None:
+                    logger.info(
+                        "Not bothering to persist state event %s duplicated by %s",
+                        event.event_id,
+                        prev_event.event_id,
+                    )
+                    return prev_event
 
-        if event.internal_metadata.is_out_of_band_membership():
-            # the only sort of out-of-band-membership events we expect to see here are
-            # invite rejections and rescinded knocks that we have generated ourselves.
-            assert event.type == EventTypes.Member
-            assert event.content["membership"] == Membership.LEAVE
-        else:
-            try:
-                validate_event_for_room_version(event)
-                await self._event_auth_handler.check_auth_rules_from_context(
-                    event, context
-                )
-            except AuthError as err:
-                logger.warning("Denying new event %r because %s", event, err)
-                raise err
+            if event.internal_metadata.is_out_of_band_membership():
+                # the only sort of out-of-band-membership events we expect to see here are
+                # invite rejections and rescinded knocks that we have generated ourselves.
+                assert event.type == EventTypes.Member
+                assert event.content["membership"] == Membership.LEAVE
+            else:
+                try:
+                    validate_event_for_room_version(event)
+                    await self._event_auth_handler.check_auth_rules_from_context(
+                        event, context
+                    )
+                except AuthError as err:
+                    logger.warning("Denying new event %r because %s", event, err)
+                    raise err
 
-        # Ensure that we can round trip before trying to persist in db
-        try:
-            dump = json_encoder.encode(event.content)
-            json_decoder.decode(dump)
-        except Exception:
-            logger.exception("Failed to encode content: %r", event.content)
-            raise
+            # Ensure that we can round trip before trying to persist in db
+            try:
+                dump = json_encoder.encode(event.content)
+                json_decoder.decode(dump)
+            except Exception:
+                logger.exception("Failed to encode content: %r", event.content)
+                raise
 
         # We now persist the event (and update the cache in parallel, since we
         # don't want to block on it).
+        event, context = events_and_context[0]
         try:
             result, _ = await make_deferred_yieldable(
                 gather_results(
                     (
                         run_in_background(
-                            self._persist_event,
+                            self._persist_events,
                             requester=requester,
-                            event=event,
-                            context=context,
+                            events_and_context=events_and_context,
                             ratelimit=ratelimit,
                             extra_users=extra_users,
                         ),
@@ -1407,45 +1409,47 @@ class EventCreationHandler:
 
         return result
 
-    async def _persist_event(
+    async def _persist_events(
         self,
         requester: Requester,
-        event: EventBase,
-        context: EventContext,
+        events_and_context: List[Tuple[EventBase, EventContext]],
         ratelimit: bool = True,
         extra_users: Optional[List[UserID]] = None,
     ) -> EventBase:
-        """Actually persists the event. Should only be called by
+        """Actually persists new events. Should only be called by
         `handle_new_client_event`, and see its docstring for documentation of
-        the arguments.
+        the arguments. Please note that if batch persisting events, an error in
+        handling any one of these events will result in all of the events being dropped.
 
         PartialStateConflictError: if attempting to persist a partial state event in
             a room that has been un-partial stated.
         """
 
-        # Skip push notification actions for historical messages
-        # because we don't want to notify people about old history back in time.
-        # The historical messages also do not have the proper `context.current_state_ids`
-        # and `state_groups` because they have `prev_events` that aren't persisted yet
-        # (historical messages persisted in reverse-chronological order).
-        if not event.internal_metadata.is_historical():
-            with opentracing.start_active_span("calculate_push_actions"):
-                await self._bulk_push_rule_evaluator.action_for_event_by_user(
-                    event, context
-                )
+        for event, context in events_and_context:
+            # Skip push notification actions for historical messages
+            # because we don't want to notify people about old history back in time.
+            # The historical messages also do not have the proper `context.current_state_ids`
+            # and `state_groups` because they have `prev_events` that aren't persisted yet
+            # (historical messages persisted in reverse-chronological order).
+            if not event.internal_metadata.is_historical():
+                with opentracing.start_active_span("calculate_push_actions"):
+                    await self._bulk_push_rule_evaluator.action_for_event_by_user(
+                        event, context
+                    )
 
         try:
             # If we're a worker we need to hit out to the master.
-            writer_instance = self._events_shard_config.get_instance(event.room_id)
+            first_event, _ = events_and_context[0]
+            writer_instance = self._events_shard_config.get_instance(
+                first_event.room_id
+            )
             if writer_instance != self._instance_name:
                 try:
-                    result = await self.send_event(
+                    result = await self.send_events(
                         instance_name=writer_instance,
-                        event_id=event.event_id,
+                        events_and_context=events_and_context,
                         store=self.store,
                         requester=requester,
-                        event=event,
-                        context=context,
                         ratelimit=ratelimit,
                         extra_users=extra_users,
                     )
@@ -1455,6 +1459,11 @@ class EventCreationHandler:
                     raise
                 stream_id = result["stream_id"]
                 event_id = result["event_id"]
+
+                # If we batch persisted events we return the last persisted event, otherwise
+                # we return the one event that was persisted
+                event, _ = events_and_context[-1]
+
                 if event_id != event.event_id:
                     # If we get a different event back then it means that its
                     # been de-duplicated, so we replace the given event with the
@@ -1467,15 +1476,19 @@ class EventCreationHandler:
                     event.internal_metadata.stream_ordering = stream_id
                 return event
 
-            event = await self.persist_and_notify_client_event(
-                requester, event, context, ratelimit=ratelimit, extra_users=extra_users
+            event = await self.persist_and_notify_client_events(
+                requester,
+                events_and_context,
+                ratelimit=ratelimit,
+                extra_users=extra_users,
             )
 
             return event
         except Exception:
-            # Ensure that we actually remove the entries in the push actions
-            # staging area, if we calculated them.
-            await self.store.remove_push_actions_from_staging(event.event_id)
+            for event, _ in events_and_context:
+                # Ensure that we actually remove the entries in the push actions
+                # staging area, if we calculated them.
+                await self.store.remove_push_actions_from_staging(event.event_id)
             raise
 
     async def cache_joined_hosts_for_event(
@@ -1569,23 +1582,26 @@ class EventCreationHandler:
                 Codes.BAD_ALIAS,
             )
 
-    async def persist_and_notify_client_event(
+    async def persist_and_notify_client_events(
         self,
         requester: Requester,
-        event: EventBase,
-        context: EventContext,
+        events_and_context: List[Tuple[EventBase, EventContext]],
         ratelimit: bool = True,
         extra_users: Optional[List[UserID]] = None,
     ) -> EventBase:
-        """Called when we have fully built the event, have already
-        calculated the push actions for the event, and checked auth.
+        """Called when we have fully built the events, have already
+        calculated the push actions for the events, and checked auth.
 
         This should only be run on the instance in charge of persisting events.
 
+        Please note that if batch persisting events, an error in
+        handling any one of these events will result in all of the events being dropped.
+
         Returns:
-            The persisted event. This may be different than the given event if
-            it was de-duplicated (e.g. because we had already persisted an
-            event with the same transaction ID.)
+            The persisted event, if one event is passed in, or the last event in the
+            list in the case of batch persisting. If only one event was persisted, the
+            returned event may be different than the given event if it was de-duplicated
+            (e.g. because we had already persisted an event with the same transaction ID.)
 
         Raises:
             PartialStateConflictError: if attempting to persist a partial state event in
@@ -1593,277 +1609,297 @@ class EventCreationHandler:
         """
         extra_users = extra_users or []
 
-        assert self._storage_controllers.persistence is not None
-        assert self._events_shard_config.should_handle(
-            self._instance_name, event.room_id
-        )
+        for event, context in events_and_context:
+            assert self._events_shard_config.should_handle(
+                self._instance_name, event.room_id
+            )
 
-        if ratelimit:
-            # We check if this is a room admin redacting an event so that we
-            # can apply different ratelimiting. We do this by simply checking
-            # it's not a self-redaction (to avoid having to look up whether the
-            # user is actually admin or not).
-            is_admin_redaction = False
-            if event.type == EventTypes.Redaction:
-                assert event.redacts is not None
+            if ratelimit:
+                # We check if this is a room admin redacting an event so that we
+                # can apply different ratelimiting. We do this by simply checking
+                # it's not a self-redaction (to avoid having to look up whether the
+                # user is actually admin or not).
+                is_admin_redaction = False
+                if event.type == EventTypes.Redaction:
+                    assert event.redacts is not None
+
+                    original_event = await self.store.get_event(
+                        event.redacts,
+                        redact_behaviour=EventRedactBehaviour.as_is,
+                        get_prev_content=False,
+                        allow_rejected=False,
+                        allow_none=True,
+                    )
 
-                original_event = await self.store.get_event(
-                    event.redacts,
-                    redact_behaviour=EventRedactBehaviour.as_is,
-                    get_prev_content=False,
-                    allow_rejected=False,
-                    allow_none=True,
+                    is_admin_redaction = bool(
+                        original_event and event.sender != original_event.sender
+                    )
+
+                await self.request_ratelimiter.ratelimit(
+                    requester, is_admin_redaction=is_admin_redaction
                 )
 
-                is_admin_redaction = bool(
-                    original_event and event.sender != original_event.sender
+            # run checks/actions on event based on type
+            if event.type == EventTypes.Member and event.membership == Membership.JOIN:
+                (
+                    current_membership,
+                    _,
+                ) = await self.store.get_local_current_membership_for_user_in_room(
+                    event.state_key, event.room_id
                 )
+                if current_membership != Membership.JOIN:
+                    self._notifier.notify_user_joined_room(
+                        event.event_id, event.room_id
+                    )
 
-            await self.request_ratelimiter.ratelimit(
-                requester, is_admin_redaction=is_admin_redaction
-            )
+            await self._maybe_kick_guest_users(event, context)
 
-        if event.type == EventTypes.Member and event.membership == Membership.JOIN:
-            (
-                current_membership,
-                _,
-            ) = await self.store.get_local_current_membership_for_user_in_room(
-                event.state_key, event.room_id
-            )
-            if current_membership != Membership.JOIN:
-                self._notifier.notify_user_joined_room(event.event_id, event.room_id)
+            if event.type == EventTypes.CanonicalAlias:
+                # Validate a newly added alias or newly added alt_aliases.
 
-        await self._maybe_kick_guest_users(event, context)
+                original_alias = None
+                original_alt_aliases: object = []
 
-        if event.type == EventTypes.CanonicalAlias:
-            # Validate a newly added alias or newly added alt_aliases.
+                original_event_id = event.unsigned.get("replaces_state")
+                if original_event_id:
+                    original_alias_event = await self.store.get_event(original_event_id)
 
-            original_alias = None
-            original_alt_aliases: object = []
+                    if original_alias_event:
+                        original_alias = original_alias_event.content.get("alias", None)
+                        original_alt_aliases = original_alias_event.content.get(
+                            "alt_aliases", []
+                        )
 
-            original_event_id = event.unsigned.get("replaces_state")
-            if original_event_id:
-                original_event = await self.store.get_event(original_event_id)
+                # Check the alias is currently valid (if it has changed).
+                room_alias_str = event.content.get("alias", None)
+                directory_handler = self.hs.get_directory_handler()
+                if room_alias_str and room_alias_str != original_alias:
+                    await self._validate_canonical_alias(
+                        directory_handler, room_alias_str, event.room_id
+                    )
 
-                if original_event:
-                    original_alias = original_event.content.get("alias", None)
-                    original_alt_aliases = original_event.content.get("alt_aliases", [])
-
-            # Check the alias is currently valid (if it has changed).
-            room_alias_str = event.content.get("alias", None)
-            directory_handler = self.hs.get_directory_handler()
-            if room_alias_str and room_alias_str != original_alias:
-                await self._validate_canonical_alias(
-                    directory_handler, room_alias_str, event.room_id
-                )
+                # Check that alt_aliases is the proper form.
+                alt_aliases = event.content.get("alt_aliases", [])
+                if not isinstance(alt_aliases, (list, tuple)):
+                    raise SynapseError(
+                        400,
+                        "The alt_aliases property must be a list.",
+                        Codes.INVALID_PARAM,
+                    )
 
-            # Check that alt_aliases is the proper form.
-            alt_aliases = event.content.get("alt_aliases", [])
-            if not isinstance(alt_aliases, (list, tuple)):
-                raise SynapseError(
-                    400, "The alt_aliases property must be a list.", Codes.INVALID_PARAM
-                )
+                # If the old version of alt_aliases is of an unknown form,
+                # completely replace it.
+                if not isinstance(original_alt_aliases, (list, tuple)):
+                    # TODO: check that the original_alt_aliases' entries are all strings
+                    original_alt_aliases = []
+
+                # Check that each alias is currently valid.
+                new_alt_aliases = set(alt_aliases) - set(original_alt_aliases)
+                if new_alt_aliases:
+                    for alias_str in new_alt_aliases:
+                        await self._validate_canonical_alias(
+                            directory_handler, alias_str, event.room_id
+                        )
 
-            # If the old version of alt_aliases is of an unknown form,
-            # completely replace it.
-            if not isinstance(original_alt_aliases, (list, tuple)):
-                # TODO: check that the original_alt_aliases' entries are all strings
-                original_alt_aliases = []
+            federation_handler = self.hs.get_federation_handler()
 
-            # Check that each alias is currently valid.
-            new_alt_aliases = set(alt_aliases) - set(original_alt_aliases)
-            if new_alt_aliases:
-                for alias_str in new_alt_aliases:
-                    await self._validate_canonical_alias(
-                        directory_handler, alias_str, event.room_id
+            if event.type == EventTypes.Member:
+                if event.content["membership"] == Membership.INVITE:
+                    event.unsigned[
+                        "invite_room_state"
+                    ] = await self.store.get_stripped_room_state_from_event_context(
+                        context,
+                        self.room_prejoin_state_types,
+                        membership_user_id=event.sender,
                     )
 
-        federation_handler = self.hs.get_federation_handler()
+                    invitee = UserID.from_string(event.state_key)
+                    if not self.hs.is_mine(invitee):
+                        # TODO: Can we add signature from remote server in a nicer
+                        # way? If we have been invited by a remote server, we need
+                        # to get them to sign the event.
 
-        if event.type == EventTypes.Member:
-            if event.content["membership"] == Membership.INVITE:
-                event.unsigned[
-                    "invite_room_state"
-                ] = await self.store.get_stripped_room_state_from_event_context(
-                    context,
-                    self.room_prejoin_state_types,
-                    membership_user_id=event.sender,
-                )
+                        returned_invite = await federation_handler.send_invite(
+                            invitee.domain, event
+                        )
+                        event.unsigned.pop("room_state", None)
 
-                invitee = UserID.from_string(event.state_key)
-                if not self.hs.is_mine(invitee):
-                    # TODO: Can we add signature from remote server in a nicer
-                    # way? If we have been invited by a remote server, we need
-                    # to get them to sign the event.
+                        # TODO: Make sure the signatures actually are correct.
+                        event.signatures.update(returned_invite.signatures)
 
-                    returned_invite = await federation_handler.send_invite(
-                        invitee.domain, event
+                if event.content["membership"] == Membership.KNOCK:
+                    event.unsigned[
+                        "knock_room_state"
+                    ] = await self.store.get_stripped_room_state_from_event_context(
+                        context,
+                        self.room_prejoin_state_types,
                     )
-                    event.unsigned.pop("room_state", None)
 
-                    # TODO: Make sure the signatures actually are correct.
-                    event.signatures.update(returned_invite.signatures)
+            if event.type == EventTypes.Redaction:
+                assert event.redacts is not None
 
-            if event.content["membership"] == Membership.KNOCK:
-                event.unsigned[
-                    "knock_room_state"
-                ] = await self.store.get_stripped_room_state_from_event_context(
-                    context,
-                    self.room_prejoin_state_types,
+                original_event = await self.store.get_event(
+                    event.redacts,
+                    redact_behaviour=EventRedactBehaviour.as_is,
+                    get_prev_content=False,
+                    allow_rejected=False,
+                    allow_none=True,
                 )
 
-        if event.type == EventTypes.Redaction:
-            assert event.redacts is not None
+                room_version = await self.store.get_room_version_id(event.room_id)
+                room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
 
-            original_event = await self.store.get_event(
-                event.redacts,
-                redact_behaviour=EventRedactBehaviour.as_is,
-                get_prev_content=False,
-                allow_rejected=False,
-                allow_none=True,
-            )
+                # we can make some additional checks now if we have the original event.
+                if original_event:
+                    if original_event.type == EventTypes.Create:
+                        raise AuthError(403, "Redacting create events is not permitted")
 
-            room_version = await self.store.get_room_version_id(event.room_id)
-            room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
-
-            # we can make some additional checks now if we have the original event.
-            if original_event:
-                if original_event.type == EventTypes.Create:
-                    raise AuthError(403, "Redacting create events is not permitted")
-
-                if original_event.room_id != event.room_id:
-                    raise SynapseError(400, "Cannot redact event from a different room")
-
-                if original_event.type == EventTypes.ServerACL:
-                    raise AuthError(403, "Redacting server ACL events is not permitted")
-
-                # Add a little safety stop-gap to prevent people from trying to
-                # redact MSC2716 related events when they're in a room version
-                # which does not support it yet. We allow people to use MSC2716
-                # events in existing room versions but only from the room
-                # creator since it does not require any changes to the auth
-                # rules and in effect, the redaction algorithm . In the
-                # supported room version, we add the `historical` power level to
-                # auth the MSC2716 related events and adjust the redaction
-                # algorthim to keep the `historical` field around (redacting an
-                # event should only strip fields which don't affect the
-                # structural protocol level).
-                is_msc2716_event = (
-                    original_event.type == EventTypes.MSC2716_INSERTION
-                    or original_event.type == EventTypes.MSC2716_BATCH
-                    or original_event.type == EventTypes.MSC2716_MARKER
-                )
-                if not room_version_obj.msc2716_historical and is_msc2716_event:
-                    raise AuthError(
-                        403,
-                        "Redacting MSC2716 events is not supported in this room version",
-                    )
+                    if original_event.room_id != event.room_id:
+                        raise SynapseError(
+                            400, "Cannot redact event from a different room"
+                        )
 
-            event_types = event_auth.auth_types_for_event(event.room_version, event)
-            prev_state_ids = await context.get_prev_state_ids(
-                StateFilter.from_types(event_types)
-            )
+                    if original_event.type == EventTypes.ServerACL:
+                        raise AuthError(
+                            403, "Redacting server ACL events is not permitted"
+                        )
 
-            auth_events_ids = self._event_auth_handler.compute_auth_events(
-                event, prev_state_ids, for_verification=True
-            )
-            auth_events_map = await self.store.get_events(auth_events_ids)
-            auth_events = {(e.type, e.state_key): e for e in auth_events_map.values()}
+                    # Add a little safety stop-gap to prevent people from trying to
+                    # redact MSC2716 related events when they're in a room version
+                    # which does not support it yet. We allow people to use MSC2716
+                    # events in existing room versions but only from the room
+                    # creator since it does not require any changes to the auth
+                    # rules and in effect, the redaction algorithm . In the
+                    # supported room version, we add the `historical` power level to
+                    # auth the MSC2716 related events and adjust the redaction
+                    # algorthim to keep the `historical` field around (redacting an
+                    # event should only strip fields which don't affect the
+                    # structural protocol level).
+                    is_msc2716_event = (
+                        original_event.type == EventTypes.MSC2716_INSERTION
+                        or original_event.type == EventTypes.MSC2716_BATCH
+                        or original_event.type == EventTypes.MSC2716_MARKER
+                    )
+                    if not room_version_obj.msc2716_historical and is_msc2716_event:
+                        raise AuthError(
+                            403,
+                            "Redacting MSC2716 events is not supported in this room version",
+                        )
 
-            if event_auth.check_redaction(
-                room_version_obj, event, auth_events=auth_events
-            ):
-                # this user doesn't have 'redact' rights, so we need to do some more
-                # checks on the original event. Let's start by checking the original
-                # event exists.
-                if not original_event:
-                    raise NotFoundError("Could not find event %s" % (event.redacts,))
-
-                if event.user_id != original_event.user_id:
-                    raise AuthError(403, "You don't have permission to redact events")
-
-                # all the checks are done.
-                event.internal_metadata.recheck_redaction = False
-
-        if event.type == EventTypes.Create:
-            prev_state_ids = await context.get_prev_state_ids()
-            if prev_state_ids:
-                raise AuthError(403, "Changing the room create event is forbidden")
-
-        if event.type == EventTypes.MSC2716_INSERTION:
-            room_version = await self.store.get_room_version_id(event.room_id)
-            room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
-
-            create_event = await self.store.get_create_event_for_room(event.room_id)
-            room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
-
-            # Only check an insertion event if the room version
-            # supports it or the event is from the room creator.
-            if room_version_obj.msc2716_historical or (
-                self.config.experimental.msc2716_enabled
-                and event.sender == room_creator
-            ):
-                next_batch_id = event.content.get(
-                    EventContentFields.MSC2716_NEXT_BATCH_ID
+                event_types = event_auth.auth_types_for_event(event.room_version, event)
+                prev_state_ids = await context.get_prev_state_ids(
+                    StateFilter.from_types(event_types)
                 )
-                conflicting_insertion_event_id = None
-                if next_batch_id:
-                    conflicting_insertion_event_id = (
-                        await self.store.get_insertion_event_id_by_batch_id(
-                            event.room_id, next_batch_id
+
+                auth_events_ids = self._event_auth_handler.compute_auth_events(
+                    event, prev_state_ids, for_verification=True
+                )
+                auth_events_map = await self.store.get_events(auth_events_ids)
+                auth_events = {
+                    (e.type, e.state_key): e for e in auth_events_map.values()
+                }
+
+                if event_auth.check_redaction(
+                    room_version_obj, event, auth_events=auth_events
+                ):
+                    # this user doesn't have 'redact' rights, so we need to do some more
+                    # checks on the original event. Let's start by checking the original
+                    # event exists.
+                    if not original_event:
+                        raise NotFoundError(
+                            "Could not find event %s" % (event.redacts,)
                         )
+
+                    if event.user_id != original_event.user_id:
+                        raise AuthError(
+                            403, "You don't have permission to redact events"
+                        )
+
+                    # all the checks are done.
+                    event.internal_metadata.recheck_redaction = False
+
+            if event.type == EventTypes.Create:
+                prev_state_ids = await context.get_prev_state_ids()
+                if prev_state_ids:
+                    raise AuthError(403, "Changing the room create event is forbidden")
+
+            if event.type == EventTypes.MSC2716_INSERTION:
+                room_version = await self.store.get_room_version_id(event.room_id)
+                room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
+
+                create_event = await self.store.get_create_event_for_room(event.room_id)
+                room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
+
+                # Only check an insertion event if the room version
+                # supports it or the event is from the room creator.
+                if room_version_obj.msc2716_historical or (
+                    self.config.experimental.msc2716_enabled
+                    and event.sender == room_creator
+                ):
+                    next_batch_id = event.content.get(
+                        EventContentFields.MSC2716_NEXT_BATCH_ID
                     )
-                if conflicting_insertion_event_id is not None:
-                    # The current insertion event that we're processing is invalid
-                    # because an insertion event already exists in the room with the
-                    # same next_batch_id. We can't allow multiple because the batch
-                    # pointing will get weird, e.g. we can't determine which insertion
-                    # event the batch event is pointing to.
-                    raise SynapseError(
-                        HTTPStatus.BAD_REQUEST,
-                        "Another insertion event already exists with the same next_batch_id",
-                        errcode=Codes.INVALID_PARAM,
-                    )
+                    conflicting_insertion_event_id = None
+                    if next_batch_id:
+                        conflicting_insertion_event_id = (
+                            await self.store.get_insertion_event_id_by_batch_id(
+                                event.room_id, next_batch_id
+                            )
+                        )
+                    if conflicting_insertion_event_id is not None:
+                        # The current insertion event that we're processing is invalid
+                        # because an insertion event already exists in the room with the
+                        # same next_batch_id. We can't allow multiple because the batch
+                        # pointing will get weird, e.g. we can't determine which insertion
+                        # event the batch event is pointing to.
+                        raise SynapseError(
+                            HTTPStatus.BAD_REQUEST,
+                            "Another insertion event already exists with the same next_batch_id",
+                            errcode=Codes.INVALID_PARAM,
+                        )
 
-        # Mark any `m.historical` messages as backfilled so they don't appear
-        # in `/sync` and have the proper decrementing `stream_ordering` as we import
-        backfilled = False
-        if event.internal_metadata.is_historical():
-            backfilled = True
+            # Mark any `m.historical` messages as backfilled so they don't appear
+            # in `/sync` and have the proper decrementing `stream_ordering` as we import
+            backfilled = False
+            if event.internal_metadata.is_historical():
+                backfilled = True
 
-        # Note that this returns the event that was persisted, which may not be
-        # the same as we passed in if it was deduplicated due transaction IDs.
+        assert self._storage_controllers.persistence is not None
         (
-            event,
-            event_pos,
+            persisted_events,
             max_stream_token,
-        ) = await self._storage_controllers.persistence.persist_event(
-            event, context=context, backfilled=backfilled
+        ) = await self._storage_controllers.persistence.persist_events(
+            events_and_context, backfilled=backfilled
         )
 
-        if self._ephemeral_events_enabled:
-            # If there's an expiry timestamp on the event, schedule its expiry.
-            self._message_handler.maybe_schedule_expiry(event)
+        for event in persisted_events:
+            if self._ephemeral_events_enabled:
+                # If there's an expiry timestamp on the event, schedule its expiry.
+                self._message_handler.maybe_schedule_expiry(event)
 
-        async def _notify() -> None:
-            try:
-                await self.notifier.on_new_room_event(
-                    event, event_pos, max_stream_token, extra_users=extra_users
-                )
-            except Exception:
-                logger.exception(
-                    "Error notifying about new room event %s",
-                    event.event_id,
-                )
+            stream_ordering = event.internal_metadata.stream_ordering
+            assert stream_ordering is not None
+            pos = PersistedEventPosition(self._instance_name, stream_ordering)
+
+            async def _notify() -> None:
+                try:
+                    await self.notifier.on_new_room_event(
+                        event, pos, max_stream_token, extra_users=extra_users
+                    )
+                except Exception:
+                    logger.exception(
+                        "Error notifying about new room event %s",
+                        event.event_id,
+                    )
 
-        run_in_background(_notify)
+            run_in_background(_notify)
 
-        if event.type == EventTypes.Message:
-            # We don't want to block sending messages on any presence code. This
-            # matters as sometimes presence code can take a while.
-            run_in_background(self._bump_active_time, requester.user)
+            if event.type == EventTypes.Message:
+                # We don't want to block sending messages on any presence code. This
+                # matters as sometimes presence code can take a while.
+                run_in_background(self._bump_active_time, requester.user)
 
-        return event
+        return persisted_events[-1]
 
     async def _maybe_kick_guest_users(
         self, event: EventBase, context: EventContext
@@ -1952,8 +1988,7 @@ class EventCreationHandler:
                 # shadow-banned user.
                 await self.handle_new_client_event(
                     requester,
-                    event,
-                    context,
+                    events_and_context=[(event, context)],
                     ratelimit=False,
                     ignore_shadow_ban=True,
                 )
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 09a1a82e6c..b220238e55 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -301,8 +301,7 @@ class RoomCreationHandler:
         # now send the tombstone
         await self.event_creation_handler.handle_new_client_event(
             requester=requester,
-            event=tombstone_event,
-            context=tombstone_context,
+            events_and_context=[(tombstone_event, tombstone_context)],
         )
 
         state_filter = StateFilter.from_types(
@@ -1057,8 +1056,10 @@ class RoomCreationHandler:
         creator_id = creator.user.to_string()
         event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""}
         depth = 1
+
         # the last event sent/persisted to the db
         last_sent_event_id: Optional[str] = None
+
         # the most recently created event
         prev_event: List[str] = []
         # a map of event types, state keys -> event_ids. We collect these mappings this as events are
@@ -1112,8 +1113,7 @@ class RoomCreationHandler:
 
             ev = await self.event_creation_handler.handle_new_client_event(
                 requester=creator,
-                event=event,
-                context=context,
+                events_and_context=[(event, context)],
                 ratelimit=False,
                 ignore_shadow_ban=True,
             )
@@ -1152,7 +1152,6 @@ class RoomCreationHandler:
             prev_event_ids=[last_sent_event_id],
             depth=depth,
         )
-        last_sent_event_id = member_event_id
         prev_event = [member_event_id]
 
         # update the depth and state map here as the membership event has been created
@@ -1168,7 +1167,7 @@ class RoomCreationHandler:
                 EventTypes.PowerLevels, pl_content, False
             )
             current_state_group = power_context._state_group
-            last_sent_stream_id = await send(power_event, power_context, creator)
+            await send(power_event, power_context, creator)
         else:
             power_level_content: JsonDict = {
                 "users": {creator_id: 100},
@@ -1217,7 +1216,7 @@ class RoomCreationHandler:
                 False,
             )
             current_state_group = pl_context._state_group
-            last_sent_stream_id = await send(pl_event, pl_context, creator)
+            await send(pl_event, pl_context, creator)
 
         events_to_send = []
         if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state:
@@ -1271,9 +1270,11 @@ class RoomCreationHandler:
             )
             events_to_send.append((encryption_event, encryption_context))
 
-        for event, context in events_to_send:
-            last_sent_stream_id = await send(event, context, creator)
-        return last_sent_stream_id, last_sent_event_id, depth
+        last_event = await self.event_creation_handler.handle_new_client_event(
+            creator, events_to_send, ignore_shadow_ban=True
+        )
+        assert last_event.internal_metadata.stream_ordering is not None
+        return last_event.internal_metadata.stream_ordering, last_event.event_id, depth
 
     def _generate_room_id(self) -> str:
         """Generates a random room ID.
diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py
index 1414e575d6..411a6fb22f 100644
--- a/synapse/handlers/room_batch.py
+++ b/synapse/handlers/room_batch.py
@@ -379,8 +379,7 @@ class RoomBatchHandler:
                 await self.create_requester_for_user_id_from_app_service(
                     event.sender, app_service_requester.app_service
                 ),
-                event=event,
-                context=context,
+                events_and_context=[(event, context)],
             )
 
         return event_ids
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 8d01f4bf2b..88158822e0 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -432,8 +432,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         with opentracing.start_active_span("handle_new_client_event"):
             result_event = await self.event_creation_handler.handle_new_client_event(
                 requester,
-                event,
-                context,
+                events_and_context=[(event, context)],
                 extra_users=[target],
                 ratelimit=ratelimit,
             )
@@ -1252,7 +1251,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 raise SynapseError(403, "This room has been blocked on this server")
 
         event = await self.event_creation_handler.handle_new_client_event(
-            requester, event, context, extra_users=[target_user], ratelimit=ratelimit
+            requester,
+            events_and_context=[(event, context)],
+            extra_users=[target_user],
+            ratelimit=ratelimit,
         )
 
         prev_member_event_id = prev_state_ids.get(
@@ -1860,8 +1862,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
 
         result_event = await self.event_creation_handler.handle_new_client_event(
             requester,
-            event,
-            context,
+            events_and_context=[(event, context)],
             extra_users=[UserID.from_string(target_user)],
         )
         # we know it was persisted, so must have a stream ordering
diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py
index 53aa7fa4c6..ac9a92240a 100644
--- a/synapse/replication/http/__init__.py
+++ b/synapse/replication/http/__init__.py
@@ -25,6 +25,7 @@ from synapse.replication.http import (
     push,
     register,
     send_event,
+    send_events,
     state,
     streams,
 )
@@ -43,6 +44,7 @@ class ReplicationRestResource(JsonResource):
 
     def register_servlets(self, hs: "HomeServer") -> None:
         send_event.register_servlets(hs, self)
+        send_events.register_servlets(hs, self)
         federation.register_servlets(hs, self)
         presence.register_servlets(hs, self)
         membership.register_servlets(hs, self)
diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py
index 486f04723c..4215a1c1bc 100644
--- a/synapse/replication/http/send_event.py
+++ b/synapse/replication/http/send_event.py
@@ -141,8 +141,8 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint):
             "Got event to send with ID: %s into room: %s", event.event_id, event.room_id
         )
 
-        event = await self.event_creation_handler.persist_and_notify_client_event(
-            requester, event, context, ratelimit=ratelimit, extra_users=extra_users
+        event = await self.event_creation_handler.persist_and_notify_client_events(
+            requester, [(event, context)], ratelimit=ratelimit, extra_users=extra_users
         )
 
         return (
diff --git a/synapse/replication/http/send_events.py b/synapse/replication/http/send_events.py
new file mode 100644
index 0000000000..8889bbb644
--- /dev/null
+++ b/synapse/replication/http/send_events.py
@@ -0,0 +1,171 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING, List, Tuple
+
+from twisted.web.server import Request
+
+from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
+from synapse.events import EventBase, make_event_from_dict
+from synapse.events.snapshot import EventContext
+from synapse.http.server import HttpServer
+from synapse.http.servlet import parse_json_object_from_request
+from synapse.replication.http._base import ReplicationEndpoint
+from synapse.types import JsonDict, Requester, UserID
+from synapse.util.metrics import Measure
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+    from synapse.storage.databases.main import DataStore
+
+logger = logging.getLogger(__name__)
+
+
+class ReplicationSendEventsRestServlet(ReplicationEndpoint):
+    """Handles batches of newly created events on workers, including persisting and
+    notifying.
+
+    The API looks like:
+
+        POST /_synapse/replication/send_events/:txn_id
+
+        {
+            "events": [{
+                "event": { .. serialized event .. },
+                "room_version": .., // "1", "2", "3", etc: the version of the room
+                            // containing the event
+                "event_format_version": .., // 1,2,3 etc: the event format version
+                "internal_metadata": { .. serialized internal_metadata .. },
+                "outlier": true|false,
+                "rejected_reason": ..,   // The event.rejected_reason field
+                "context": { .. serialized event context .. },
+                "requester": { .. serialized requester .. },
+                "ratelimit": true,
+            }]
+        }
+
+        200 OK
+
+        { "stream_id": 12345, "event_id": "$abcdef..." }
+
+    Responds with a 409 when a `PartialStateConflictError` is raised due to an event
+    context that needs to be recomputed due to the un-partial stating of a room.
+
+    """
+
+    NAME = "send_events"
+    PATH_ARGS = ()
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        self.event_creation_handler = hs.get_event_creation_handler()
+        self.store = hs.get_datastores().main
+        self._storage_controllers = hs.get_storage_controllers()
+        self.clock = hs.get_clock()
+
+    @staticmethod
+    async def _serialize_payload(  # type: ignore[override]
+        events_and_context: List[Tuple[EventBase, EventContext]],
+        store: "DataStore",
+        requester: Requester,
+        ratelimit: bool,
+        extra_users: List[UserID],
+    ) -> JsonDict:
+        """
+        Args:
+            store
+            requester
+            events_and_ctx
+            ratelimit
+        """
+        serialized_events = []
+
+        for event, context in events_and_context:
+            serialized_context = await context.serialize(event, store)
+            serialized_event = {
+                "event": event.get_pdu_json(),
+                "room_version": event.room_version.identifier,
+                "event_format_version": event.format_version,
+                "internal_metadata": event.internal_metadata.get_dict(),
+                "outlier": event.internal_metadata.is_outlier(),
+                "rejected_reason": event.rejected_reason,
+                "context": serialized_context,
+                "requester": requester.serialize(),
+                "ratelimit": ratelimit,
+                "extra_users": [u.to_string() for u in extra_users],
+            }
+            serialized_events.append(serialized_event)
+
+        payload = {"events": serialized_events}
+
+        return payload
+
+    async def _handle_request(  # type: ignore[override]
+        self, request: Request
+    ) -> Tuple[int, JsonDict]:
+        with Measure(self.clock, "repl_send_events_parse"):
+            payload = parse_json_object_from_request(request)
+            events_and_context = []
+            events = payload["events"]
+
+            for event_payload in events:
+                event_dict = event_payload["event"]
+                room_ver = KNOWN_ROOM_VERSIONS[event_payload["room_version"]]
+                internal_metadata = event_payload["internal_metadata"]
+                rejected_reason = event_payload["rejected_reason"]
+
+                event = make_event_from_dict(
+                    event_dict, room_ver, internal_metadata, rejected_reason
+                )
+                event.internal_metadata.outlier = event_payload["outlier"]
+
+                requester = Requester.deserialize(
+                    self.store, event_payload["requester"]
+                )
+                context = EventContext.deserialize(
+                    self._storage_controllers, event_payload["context"]
+                )
+
+                ratelimit = event_payload["ratelimit"]
+                events_and_context.append((event, context))
+
+                extra_users = [
+                    UserID.from_string(u) for u in event_payload["extra_users"]
+                ]
+
+                logger.info(
+                    "Got batch of events to send, last ID of batch is: %s, sending into room: %s",
+                    event.event_id,
+                    event.room_id,
+                )
+
+            last_event = (
+                await self.event_creation_handler.persist_and_notify_client_events(
+                    requester, events_and_context, ratelimit, extra_users
+                )
+            )
+
+        return (
+            200,
+            {
+                "stream_id": last_event.internal_metadata.stream_ordering,
+                "event_id": last_event.event_id,
+            },
+        )
+
+
+def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+    ReplicationSendEventsRestServlet(hs).register(http_server)
diff --git a/tests/handlers/test_message.py b/tests/handlers/test_message.py
index 986b50ce0c..99384837d0 100644
--- a/tests/handlers/test_message.py
+++ b/tests/handlers/test_message.py
@@ -105,7 +105,10 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
         event1, context = self._create_duplicate_event(txn_id)
 
         ret_event1 = self.get_success(
-            self.handler.handle_new_client_event(self.requester, event1, context)
+            self.handler.handle_new_client_event(
+                self.requester,
+                events_and_context=[(event1, context)],
+            )
         )
         stream_id1 = ret_event1.internal_metadata.stream_ordering
 
@@ -118,7 +121,10 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
         self.assertNotEqual(event1.event_id, event2.event_id)
 
         ret_event2 = self.get_success(
-            self.handler.handle_new_client_event(self.requester, event2, context)
+            self.handler.handle_new_client_event(
+                self.requester,
+                events_and_context=[(event2, context)],
+            )
         )
         stream_id2 = ret_event2.internal_metadata.stream_ordering
 
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index 86b3d51975..765df75d91 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -497,7 +497,9 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             )
         )
         self.get_success(
-            event_creation_handler.handle_new_client_event(requester, event, context)
+            event_creation_handler.handle_new_client_event(
+                requester, events_and_context=[(event, context)]
+            )
         )
 
         # Register a second user, which won't be be in the room (or even have an invite)
diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py
index a0ce077a99..de9f4af2de 100644
--- a/tests/storage/test_event_chain.py
+++ b/tests/storage/test_event_chain.py
@@ -531,7 +531,9 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
             )
         )
         self.get_success(
-            event_handler.handle_new_client_event(self.requester, event, context)
+            event_handler.handle_new_client_event(
+                self.requester, events_and_context=[(event, context)]
+            )
         )
         state1 = set(self.get_success(context.get_current_state_ids()).values())
 
@@ -549,7 +551,9 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
             )
         )
         self.get_success(
-            event_handler.handle_new_client_event(self.requester, event, context)
+            event_handler.handle_new_client_event(
+                self.requester, events_and_context=[(event, context)]
+            )
         )
         state2 = set(self.get_success(context.get_current_state_ids()).values())
 
diff --git a/tests/unittest.py b/tests/unittest.py
index 00cb023198..5116be338e 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -734,7 +734,9 @@ class HomeserverTestCase(TestCase):
             event.internal_metadata.soft_failed = True
 
         self.get_success(
-            event_creator.handle_new_client_event(requester, event, context)
+            event_creator.handle_new_client_event(
+                requester, events_and_context=[(event, context)]
+            )
         )
 
         return event.event_id
-- 
cgit 1.5.1


From 6caa3030835f879724c003a5b0dc66a6285451d8 Mon Sep 17 00:00:00 2001
From: Kateřina Churanová <katerina.churanova@gmail.com>
Date: Wed, 28 Sep 2022 14:31:53 +0200
Subject: fix: Push notifications for invite over federation (#13719)

---
 changelog.d/13719.bugfix                      |  1 +
 synapse/events/__init__.py                    |  4 ++++
 synapse/handlers/federation.py                | 13 ++++++++++---
 synapse/handlers/federation_event.py          |  1 +
 synapse/push/bulk_push_rule_evaluator.py      | 10 +++++++---
 synapse/push/push_rule_evaluator.py           | 16 ++++++++--------
 synapse/storage/controllers/persist_events.py | 10 ++++++----
 synapse/storage/databases/main/events.py      | 10 +++++-----
 8 files changed, 42 insertions(+), 23 deletions(-)
 create mode 100644 changelog.d/13719.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13719.bugfix b/changelog.d/13719.bugfix
new file mode 100644
index 0000000000..4318f4daff
--- /dev/null
+++ b/changelog.d/13719.bugfix
@@ -0,0 +1 @@
+Send invite push notifications for invite over federation.
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index b2c9119fd0..030c3ca408 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -289,6 +289,10 @@ class _EventInternalMetadata:
         """
         return self._dict.get("historical", False)
 
+    def is_notifiable(self) -> bool:
+        """Whether this event can trigger a push notification"""
+        return not self.is_outlier() or self.is_out_of_band_membership()
+
 
 class EventBase(metaclass=abc.ABCMeta):
     @property
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 74580f60df..8f847ff845 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -149,6 +149,7 @@ class FederationHandler:
         self.http_client = hs.get_proxied_blacklisted_http_client()
         self._replication = hs.get_replication_data_handler()
         self._federation_event_handler = hs.get_federation_event_handler()
+        self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator()
 
         self._clean_room_for_join_client = ReplicationCleanRoomRestServlet.make_client(
             hs
@@ -956,9 +957,15 @@ class FederationHandler:
         )
 
         context = EventContext.for_outlier(self._storage_controllers)
-        await self._federation_event_handler.persist_events_and_notify(
-            event.room_id, [(event, context)]
-        )
+
+        await self._bulk_push_rule_evaluator.action_for_event_by_user(event, context)
+        try:
+            await self._federation_event_handler.persist_events_and_notify(
+                event.room_id, [(event, context)]
+            )
+        except Exception:
+            await self.store.remove_push_actions_from_staging(event.event_id)
+            raise
 
         return event
 
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 2d7cde7506..3fac256881 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -2170,6 +2170,7 @@ class FederationEventHandler:
         if instance != self._instance_name:
             # Limit the number of events sent over replication. We choose 200
             # here as that is what we default to in `max_request_body_size(..)`
+            result = {}
             try:
                 for batch in batch_iter(event_and_contexts, 200):
                     result = await self._send_events(
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 404379ef67..32313e3bcf 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -173,7 +173,11 @@ class BulkPushRuleEvaluator:
 
     async def _get_power_levels_and_sender_level(
         self, event: EventBase, context: EventContext
-    ) -> Tuple[dict, int]:
+    ) -> Tuple[dict, Optional[int]]:
+        # There are no power levels and sender levels possible to get from outlier
+        if event.internal_metadata.is_outlier():
+            return {}, None
+
         event_types = auth_types_for_event(event.room_version, event)
         prev_state_ids = await context.get_prev_state_ids(
             StateFilter.from_types(event_types)
@@ -250,8 +254,8 @@ class BulkPushRuleEvaluator:
         should increment the unread count, and insert the results into the
         event_push_actions_staging table.
         """
-        if event.internal_metadata.is_outlier():
-            # This can happen due to out of band memberships
+        if not event.internal_metadata.is_notifiable():
+            # Push rules for events that aren't notifiable can't be processed by this
             return
 
         # Disable counting as unread unless the experimental configuration is
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index 3c5632cd91..f8176c5a42 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -42,18 +42,18 @@ IS_GLOB = re.compile(r"[\?\*\[\]]")
 INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$")
 
 
-def _room_member_count(
-    ev: EventBase, condition: Mapping[str, Any], room_member_count: int
-) -> bool:
+def _room_member_count(condition: Mapping[str, Any], room_member_count: int) -> bool:
     return _test_ineq_condition(condition, room_member_count)
 
 
 def _sender_notification_permission(
-    ev: EventBase,
     condition: Mapping[str, Any],
-    sender_power_level: int,
+    sender_power_level: Optional[int],
     power_levels: Dict[str, Union[int, Dict[str, int]]],
 ) -> bool:
+    if sender_power_level is None:
+        return False
+
     notif_level_key = condition.get("key")
     if notif_level_key is None:
         return False
@@ -129,7 +129,7 @@ class PushRuleEvaluatorForEvent:
         self,
         event: EventBase,
         room_member_count: int,
-        sender_power_level: int,
+        sender_power_level: Optional[int],
         power_levels: Dict[str, Union[int, Dict[str, int]]],
         relations: Dict[str, Set[Tuple[str, str]]],
         relations_match_enabled: bool,
@@ -198,10 +198,10 @@ class PushRuleEvaluatorForEvent:
         elif condition["kind"] == "contains_display_name":
             return self._contains_display_name(display_name)
         elif condition["kind"] == "room_member_count":
-            return _room_member_count(self._event, condition, self._room_member_count)
+            return _room_member_count(condition, self._room_member_count)
         elif condition["kind"] == "sender_notification_permission":
             return _sender_notification_permission(
-                self._event, condition, self._sender_power_level, self._power_levels
+                condition, self._sender_power_level, self._power_levels
             )
         elif (
             condition["kind"] == "org.matrix.msc3772.relation_match"
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 709cb792ed..06e71a8053 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -423,16 +423,18 @@ class EventsPersistenceStorageController:
         for d in ret_vals:
             replaced_events.update(d)
 
-        events = []
+        persisted_events = []
         for event, _ in events_and_contexts:
             existing_event_id = replaced_events.get(event.event_id)
             if existing_event_id:
-                events.append(await self.main_store.get_event(existing_event_id))
+                persisted_events.append(
+                    await self.main_store.get_event(existing_event_id)
+                )
             else:
-                events.append(event)
+                persisted_events.append(event)
 
         return (
-            events,
+            persisted_events,
             self.main_store.get_room_max_token(),
         )
 
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index b59eb7478b..bb489b8189 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2134,13 +2134,13 @@ class PersistEventsStore:
                 appear in events_and_context.
         """
 
-        # Only non outlier events will have push actions associated with them,
+        # Only notifiable events will have push actions associated with them,
         # so let's filter them out. (This makes joining large rooms faster, as
         # these queries took seconds to process all the state events).
-        non_outlier_events = [
+        notifiable_events = [
             event
             for event, _ in events_and_contexts
-            if not event.internal_metadata.is_outlier()
+            if event.internal_metadata.is_notifiable()
         ]
 
         sql = """
@@ -2153,7 +2153,7 @@ class PersistEventsStore:
             WHERE event_id = ?
         """
 
-        if non_outlier_events:
+        if notifiable_events:
             txn.execute_batch(
                 sql,
                 (
@@ -2163,7 +2163,7 @@ class PersistEventsStore:
                         event.depth,
                         event.event_id,
                     )
-                    for event in non_outlier_events
+                    for event in notifiable_events
                 ),
             )
 
-- 
cgit 1.5.1


From 4b17a5ace846d82b09fccce79da77a8207a6765f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 28 Sep 2022 14:42:43 +0100
Subject: Handle remote device list updates during partial join (#13913)

c.f. #12993 (comment), point 3

This stores all device list updates that we receive while partial joins are ongoing, and processes them once we have the full state.

Note: We don't actually process the device lists in the same ways as if we weren't partially joined. Instead of updating the device list remote cache, we simply notify local users that a change in the remote user's devices has happened. I think this is safe as if the local user requests the keys for the remote user and we don't have them we'll simply fetch them as normal.
---
 changelog.d/13913.misc                             |  1 +
 synapse/handlers/device.py                         | 62 ++++++++++++++++++++++
 synapse/handlers/federation.py                     |  4 ++
 synapse/storage/databases/main/devices.py          | 55 +++++++++++++++++++
 synapse/storage/databases/main/room.py             | 20 +++++++
 .../delta/73/04pending_device_list_updates.sql     | 28 ++++++++++
 6 files changed, 170 insertions(+)
 create mode 100644 changelog.d/13913.misc
 create mode 100644 synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql

(limited to 'synapse')

diff --git a/changelog.d/13913.misc b/changelog.d/13913.misc
new file mode 100644
index 0000000000..30b4401049
--- /dev/null
+++ b/changelog.d/13913.misc
@@ -0,0 +1 @@
+Faster remote room joins: correctly handle remote device list updates during a partial join.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index bad262731c..f2ef591103 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -309,6 +309,17 @@ class DeviceWorkerHandler:
             "self_signing_key": self_signing_key,
         }
 
+    async def handle_room_un_partial_stated(self, room_id: str) -> None:
+        """Handles sending appropriate device list updates in a room that has
+        gone from partial to full state.
+        """
+
+        # TODO(faster_joins): worker mode support
+        #   https://github.com/matrix-org/synapse/issues/12994
+        logger.error(
+            "Trying handling device list state for partial join: not supported on workers."
+        )
+
 
 class DeviceHandler(DeviceWorkerHandler):
     def __init__(self, hs: "HomeServer"):
@@ -746,6 +757,15 @@ class DeviceHandler(DeviceWorkerHandler):
         finally:
             self._handle_new_device_update_is_processing = False
 
+    async def handle_room_un_partial_stated(self, room_id: str) -> None:
+        """Handles sending appropriate device list updates in a room that has
+        gone from partial to full state.
+        """
+
+        # We defer to the device list updater implementation as we're on the
+        # right worker.
+        await self.device_list_updater.handle_room_un_partial_stated(room_id)
+
 
 def _update_device_from_client_ips(
     device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]]
@@ -836,6 +856,16 @@ class DeviceListUpdater:
             )
             return
 
+        # Check if we are partially joining any rooms. If so we need to store
+        # all device list updates so that we can handle them correctly once we
+        # know who is in the room.
+        partial_rooms = await self.store.get_partial_state_rooms_and_servers()
+        if partial_rooms:
+            await self.store.add_remote_device_list_to_pending(
+                user_id,
+                device_id,
+            )
+
         room_ids = await self.store.get_rooms_for_user(user_id)
         if not room_ids:
             # We don't share any rooms with this user. Ignore update, as we
@@ -1175,3 +1205,35 @@ class DeviceListUpdater:
             device_ids.append(verify_key.version)
 
         return device_ids
+
+    async def handle_room_un_partial_stated(self, room_id: str) -> None:
+        """Handles sending appropriate device list updates in a room that has
+        gone from partial to full state.
+        """
+
+        pending_updates = (
+            await self.store.get_pending_remote_device_list_updates_for_room(room_id)
+        )
+
+        for user_id, device_id in pending_updates:
+            logger.info(
+                "Got pending device list update in room %s: %s / %s",
+                room_id,
+                user_id,
+                device_id,
+            )
+            position = await self.store.add_device_change_to_streams(
+                user_id,
+                [device_id],
+                room_ids=[room_id],
+            )
+
+            if not position:
+                # This should only happen if there are no updates, which
+                # shouldn't happen when we've passed in a non-empty set of
+                # device IDs.
+                continue
+
+            self.device_handler.notifier.on_new_event(
+                StreamKeyType.DEVICE_LIST, position, rooms=[room_id]
+            )
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 8f847ff845..360ab6fee2 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -149,6 +149,7 @@ class FederationHandler:
         self.http_client = hs.get_proxied_blacklisted_http_client()
         self._replication = hs.get_replication_data_handler()
         self._federation_event_handler = hs.get_federation_event_handler()
+        self._device_handler = hs.get_device_handler()
         self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator()
 
         self._clean_room_for_join_client = ReplicationCleanRoomRestServlet.make_client(
@@ -1631,6 +1632,9 @@ class FederationHandler:
                 #   https://github.com/matrix-org/synapse/issues/12994
                 await self.state_handler.update_current_state(room_id)
 
+                logger.info("Handling any pending device list updates")
+                await self._device_handler.handle_room_un_partial_stated(room_id)
+
                 logger.info("Clearing partial-state flag for %s", room_id)
                 success = await self.store.clear_partial_state_room(room_id)
                 if success:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 1151fb0cc3..1e562d4a40 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1995,3 +1995,58 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 add_device_list_outbound_pokes_txn,
                 stream_ids,
             )
+
+    async def add_remote_device_list_to_pending(
+        self, user_id: str, device_id: str
+    ) -> None:
+        """Add a device list update to the table tracking remote device list
+        updates during partial joins.
+        """
+
+        async with self._device_list_id_gen.get_next() as stream_id:  # type: ignore[attr-defined]
+            await self.db_pool.simple_upsert(
+                table="device_lists_remote_pending",
+                keyvalues={
+                    "user_id": user_id,
+                    "device_id": device_id,
+                },
+                values={"stream_id": stream_id},
+                desc="add_remote_device_list_to_pending",
+            )
+
+    async def get_pending_remote_device_list_updates_for_room(
+        self, room_id: str
+    ) -> Collection[Tuple[str, str]]:
+        """Get the set of remote device list updates from the pending table for
+        the room.
+        """
+
+        min_device_stream_id = await self.db_pool.simple_select_one_onecol(
+            table="partial_state_rooms",
+            keyvalues={
+                "room_id": room_id,
+            },
+            retcol="device_lists_stream_id",
+            desc="get_pending_remote_device_list_updates_for_room_device",
+        )
+
+        sql = """
+            SELECT user_id, device_id FROM device_lists_remote_pending AS d
+            INNER JOIN current_state_events AS c ON
+                type = 'm.room.member'
+                AND state_key = user_id
+                AND membership = 'join'
+            WHERE
+                room_id = ? AND stream_id > ?
+        """
+
+        def get_pending_remote_device_list_updates_for_room_txn(
+            txn: LoggingTransaction,
+        ) -> Collection[Tuple[str, str]]:
+            txn.execute(sql, (room_id, min_device_stream_id))
+            return cast(Collection[Tuple[str, str]], txn.fetchall())
+
+        return await self.db_pool.runInteraction(
+            "get_pending_remote_device_list_updates_for_room",
+            get_pending_remote_device_list_updates_for_room_txn,
+        )
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 064c332fb7..672c9a03fc 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1217,6 +1217,26 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         )
         self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
 
+        # We now delete anything from `device_lists_remote_pending` with a
+        # stream ID less than the minimum
+        # `partial_state_rooms.device_lists_stream_id`, as we no longer need them.
+        device_lists_stream_id = DatabasePool.simple_select_one_onecol_txn(
+            txn,
+            table="partial_state_rooms",
+            keyvalues={},
+            retcol="MIN(device_lists_stream_id)",
+            allow_none=True,
+        )
+        if device_lists_stream_id is None:
+            # There are no rooms being currently partially joined, so we delete everything.
+            txn.execute("DELETE FROM device_lists_remote_pending")
+        else:
+            sql = """
+                DELETE FROM device_lists_remote_pending
+                WHERE stream_id <= ?
+            """
+            txn.execute(sql, (device_lists_stream_id,))
+
     @cached()
     async def is_partial_state_room(self, room_id: str) -> bool:
         """Checks if this room has partial state.
diff --git a/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql b/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql
new file mode 100644
index 0000000000..dbd78d677d
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql
@@ -0,0 +1,28 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Stores remote device lists we have received for remote users while a partial
+-- join is in progress.
+--
+-- This allows us to replay any device list updates if it turns out the remote
+-- user was in the partially joined room
+CREATE TABLE device_lists_remote_pending(
+    stream_id BIGINT PRIMARY KEY,
+    user_id TEXT NOT NULL,
+    device_id TEXT NOT NULL
+);
+
+-- We only keep the most recent update for a given user/device pair.
+CREATE UNIQUE INDEX device_lists_remote_pending_user_device_id ON device_lists_remote_pending(user_id, device_id);
-- 
cgit 1.5.1


From 7766bd5b354cd4ea1a33351ba320e54a14d3aeac Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 28 Sep 2022 10:58:25 -0400
Subject: Stop returning an unused column when handling new receipts. (#13933)

---
 changelog.d/13933.feature                            | 1 +
 synapse/storage/databases/main/event_push_actions.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13933.feature

(limited to 'synapse')

diff --git a/changelog.d/13933.feature b/changelog.d/13933.feature
new file mode 100644
index 0000000000..d0cb902dff
--- /dev/null
+++ b/changelog.d/13933.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index f4cdc2e399..3e4b4485d6 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1053,7 +1053,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         )
 
         sql = """
-            SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering
+            SELECT r.room_id, r.user_id, e.stream_ordering
             FROM receipts_linearized AS r
             INNER JOIN events AS e USING (event_id)
             WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ?
@@ -1078,7 +1078,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         # For each new read receipt we delete push actions from before it and
         # recalculate the summary.
-        for _, room_id, user_id, stream_ordering in rows:
+        for room_id, user_id, stream_ordering in rows:
             # Only handle our own read receipts.
             if not self.hs.is_mine_id(user_id):
                 continue
-- 
cgit 1.5.1


From 1386ce4735019ea6e918591509ee58a82c9c635c Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 28 Sep 2022 11:01:41 -0400
Subject: Revert "Stop returning an unused column when handling new receipts.
 (#13933)" (#13935)

This reverts commit 7766bd5b354cd4ea1a33351ba320e54a14d3aeac (#13933).

The unused column is actually used, but much further down in the function.
---
 changelog.d/13933.feature                            | 1 -
 synapse/storage/databases/main/event_push_actions.py | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)
 delete mode 100644 changelog.d/13933.feature

(limited to 'synapse')

diff --git a/changelog.d/13933.feature b/changelog.d/13933.feature
deleted file mode 100644
index d0cb902dff..0000000000
--- a/changelog.d/13933.feature
+++ /dev/null
@@ -1 +0,0 @@
-Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 3e4b4485d6..f4cdc2e399 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1053,7 +1053,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         )
 
         sql = """
-            SELECT r.room_id, r.user_id, e.stream_ordering
+            SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering
             FROM receipts_linearized AS r
             INNER JOIN events AS e USING (event_id)
             WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ?
@@ -1078,7 +1078,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         # For each new read receipt we delete push actions from before it and
         # recalculate the summary.
-        for room_id, user_id, stream_ordering in rows:
+        for _, room_id, user_id, stream_ordering in rows:
             # Only handle our own read receipts.
             if not self.hs.is_mine_id(user_id):
                 continue
-- 
cgit 1.5.1


From df8b91ed2bba4995c59a5b067e3b252ab90c9a5e Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 28 Sep 2022 15:26:16 -0500
Subject: Limit and filter the number of backfill points to get from the
 database (#13879)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no need to grab thousands of backfill points when we only need 5 to make the `/backfill` request with. We need to grab a few extra in case the first few aren't visible in the history.

Previously, we grabbed thousands of backfill points from the database, then sorted and filtered them in the app. Fetching the 4.6k backfill points for `#matrix:matrix.org` from the database takes ~50ms - ~570ms so it's not like this saves a lot of time 🤷. But it might save us more time now that `get_backfill_points_in_room`/`get_insertion_event_backward_extremities_in_room` are more complicated after https://github.com/matrix-org/synapse/pull/13635

This PR moves the filtering and limiting to the SQL query so we just have less data to work with in the first place.

Part of https://github.com/matrix-org/synapse/issues/13356
---
 changelog.d/13879.misc                             |   1 +
 synapse/handlers/federation.py                     | 109 ++++++++++++---------
 synapse/storage/databases/main/event_federation.py |  90 ++++++++++++++---
 tests/storage/test_event_federation.py             |  80 ++++++++++-----
 4 files changed, 198 insertions(+), 82 deletions(-)
 create mode 100644 changelog.d/13879.misc

(limited to 'synapse')

diff --git a/changelog.d/13879.misc b/changelog.d/13879.misc
new file mode 100644
index 0000000000..3cc2a2420f
--- /dev/null
+++ b/changelog.d/13879.misc
@@ -0,0 +1 @@
+Only pull relevant backfill points from the database based on the current depth and limit (instead of all) every time we want to `/backfill`.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 360ab6fee2..500c1c16d0 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -38,7 +38,7 @@ from signedjson.sign import verify_signed_json
 from unpaddedbase64 import decode_base64
 
 from synapse import event_auth
-from synapse.api.constants import EventContentFields, EventTypes, Membership
+from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
 from synapse.api.errors import (
     AuthError,
     CodeMessageException,
@@ -211,7 +211,7 @@ class FederationHandler:
         current_depth: int,
         limit: int,
         *,
-        processing_start_time: int,
+        processing_start_time: Optional[int],
     ) -> bool:
         """
         Checks whether the `current_depth` is at or approaching any backfill
@@ -223,12 +223,23 @@ class FederationHandler:
             room_id: The room to backfill in.
             current_depth: The depth to check at for any upcoming backfill points.
             limit: The max number of events to request from the remote federated server.
-            processing_start_time: The time when `maybe_backfill` started
-                processing. Only used for timing.
+            processing_start_time: The time when `maybe_backfill` started processing.
+                Only used for timing. If `None`, no timing observation will be made.
         """
         backwards_extremities = [
             _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY)
-            for event_id, depth in await self.store.get_backfill_points_in_room(room_id)
+            for event_id, depth in await self.store.get_backfill_points_in_room(
+                room_id=room_id,
+                current_depth=current_depth,
+                # We only need to end up with 5 extremities combined with the
+                # insertion event extremities to make the `/backfill` request
+                # but fetch an order of magnitude more to make sure there is
+                # enough even after we filter them by whether visible in the
+                # history. This isn't fool-proof as all backfill points within
+                # our limit could be filtered out but seems like a good amount
+                # to try with at least.
+                limit=50,
+            )
         ]
 
         insertion_events_to_be_backfilled: List[_BackfillPoint] = []
@@ -236,7 +247,12 @@ class FederationHandler:
             insertion_events_to_be_backfilled = [
                 _BackfillPoint(event_id, depth, _BackfillPointType.INSERTION_PONT)
                 for event_id, depth in await self.store.get_insertion_event_backward_extremities_in_room(
-                    room_id
+                    room_id=room_id,
+                    current_depth=current_depth,
+                    # We only need to end up with 5 extremities combined with
+                    # the backfill points to make the `/backfill` request ...
+                    # (see the other comment above for more context).
+                    limit=50,
                 )
             ]
         logger.debug(
@@ -245,10 +261,6 @@ class FederationHandler:
             insertion_events_to_be_backfilled,
         )
 
-        if not backwards_extremities and not insertion_events_to_be_backfilled:
-            logger.debug("Not backfilling as no extremeties found.")
-            return False
-
         # we now have a list of potential places to backpaginate from. We prefer to
         # start with the most recent (ie, max depth), so let's sort the list.
         sorted_backfill_points: List[_BackfillPoint] = sorted(
@@ -269,6 +281,33 @@ class FederationHandler:
             sorted_backfill_points,
         )
 
+        # If we have no backfill points lower than the `current_depth` then
+        # either we can a) bail or b) still attempt to backfill. We opt to try
+        # backfilling anyway just in case we do get relevant events.
+        if not sorted_backfill_points and current_depth != MAX_DEPTH:
+            logger.debug(
+                "_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points."
+            )
+            return await self._maybe_backfill_inner(
+                room_id=room_id,
+                # We use `MAX_DEPTH` so that we find all backfill points next
+                # time (all events are below the `MAX_DEPTH`)
+                current_depth=MAX_DEPTH,
+                limit=limit,
+                # We don't want to start another timing observation from this
+                # nested recursive call. The top-most call can record the time
+                # overall otherwise the smaller one will throw off the results.
+                processing_start_time=None,
+            )
+
+        # Even after recursing with `MAX_DEPTH`, we didn't find any
+        # backward extremities to backfill from.
+        if not sorted_backfill_points:
+            logger.debug(
+                "_maybe_backfill_inner: Not backfilling as no backward extremeties found."
+            )
+            return False
+
         # If we're approaching an extremity we trigger a backfill, otherwise we
         # no-op.
         #
@@ -278,47 +317,16 @@ class FederationHandler:
         # chose more than one times the limit in case of failure, but choosing a
         # much larger factor will result in triggering a backfill request much
         # earlier than necessary.
-        #
-        # XXX: shouldn't we do this *after* the filter by depth below? Again, we don't
-        # care about events that have happened after our current position.
-        #
-        max_depth = sorted_backfill_points[0].depth
-        if current_depth - 2 * limit > max_depth:
+        max_depth_of_backfill_points = sorted_backfill_points[0].depth
+        if current_depth - 2 * limit > max_depth_of_backfill_points:
             logger.debug(
                 "Not backfilling as we don't need to. %d < %d - 2 * %d",
-                max_depth,
+                max_depth_of_backfill_points,
                 current_depth,
                 limit,
             )
             return False
 
-        # We ignore extremities that have a greater depth than our current depth
-        # as:
-        #    1. we don't really care about getting events that have happened
-        #       after our current position; and
-        #    2. we have likely previously tried and failed to backfill from that
-        #       extremity, so to avoid getting "stuck" requesting the same
-        #       backfill repeatedly we drop those extremities.
-        #
-        # However, we need to check that the filtered extremities are non-empty.
-        # If they are empty then either we can a) bail or b) still attempt to
-        # backfill. We opt to try backfilling anyway just in case we do get
-        # relevant events.
-        #
-        filtered_sorted_backfill_points = [
-            t for t in sorted_backfill_points if t.depth <= current_depth
-        ]
-        if filtered_sorted_backfill_points:
-            logger.debug(
-                "_maybe_backfill_inner: backfill points before current depth: %s",
-                filtered_sorted_backfill_points,
-            )
-            sorted_backfill_points = filtered_sorted_backfill_points
-        else:
-            logger.debug(
-                "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway."
-            )
-
         # For performance's sake, we only want to paginate from a particular extremity
         # if we can actually see the events we'll get. Otherwise, we'd just spend a lot
         # of resources to get redacted events. We check each extremity in turn and
@@ -452,10 +460,15 @@ class FederationHandler:
 
             return False
 
-        processing_end_time = self.clock.time_msec()
-        backfill_processing_before_timer.observe(
-            (processing_end_time - processing_start_time) / 1000
-        )
+        # If we have the `processing_start_time`, then we can make an
+        # observation. We wouldn't have the `processing_start_time` in the case
+        # where `_maybe_backfill_inner` is recursively called to find any
+        # backfill points regardless of `current_depth`.
+        if processing_start_time is not None:
+            processing_end_time = self.clock.time_msec()
+            backfill_processing_before_timer.observe(
+                (processing_end_time - processing_start_time) / 1000
+            )
 
         success = await try_backfill(likely_domains)
         if success:
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 3251fca6fb..17f2fd4458 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -726,17 +726,35 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
     async def get_backfill_points_in_room(
         self,
         room_id: str,
+        current_depth: int,
+        limit: int,
     ) -> List[Tuple[str, int]]:
         """
-        Gets the oldest events(backwards extremities) in the room along with the
-        approximate depth. Sorted by depth, highest to lowest (descending).
+        Get the backward extremities to backfill from in the room along with the
+        approximate depth.
+
+        Only returns events that are at a depth lower than or
+        equal to the `current_depth`. Sorted by depth, highest to lowest (descending)
+        so the closest events to the `current_depth` are first in the list.
+
+        We ignore extremities that are newer than the user's current scroll position
+        (ie, those with depth greater than `current_depth`) as:
+            1. we don't really care about getting events that have happened
+               after our current position; and
+            2. by the nature of paginating and scrolling back, we have likely
+               previously tried and failed to backfill from that extremity, so
+               to avoid getting "stuck" requesting the same backfill repeatedly
+               we drop those extremities.
 
         Args:
             room_id: Room where we want to find the oldest events
+            current_depth: The depth at the user's current scrollback position
+            limit: The max number of backfill points to return
 
         Returns:
             List of (event_id, depth) tuples. Sorted by depth, highest to lowest
-            (descending)
+            (descending) so the closest events to the `current_depth` are first
+            in the list.
         """
 
         def get_backfill_points_in_room_txn(
@@ -784,6 +802,18 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                      * necessarily safe to assume that it will have been completed.
                      */
                     AND edge.is_state is ? /* False */
+                    /**
+                     * We only want backwards extremities that are older than or at
+                     * the same position of the given `current_depth` (where older
+                     * means less than the given depth) because we're looking backwards
+                     * from the `current_depth` when backfilling.
+                     *
+                     *                         current_depth (ignore events that come after this, ignore 2-4)
+                     *                         |
+                     *                         ▼
+                     * <oldest-in-time> [0]<--[1]<--[2]<--[3]<--[4] <newest-in-time>
+                     */
+                    AND event.depth <= ? /* current_depth */
                     /**
                      * Exponential back-off (up to the upper bound) so we don't retry the
                      * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc.
@@ -798,11 +828,13 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                         OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */)
                     )
                 /**
-                 * Sort from highest to the lowest depth. Then tie-break on
-                 * alphabetical order of the event_ids so we get a consistent
-                 * ordering which is nice when asserting things in tests.
+                 * Sort from highest (closest to the `current_depth`) to the lowest depth
+                 * because the closest are most relevant to backfill from first.
+                 * Then tie-break on alphabetical order of the event_ids so we get a
+                 * consistent ordering which is nice when asserting things in tests.
                  */
                 ORDER BY event.depth DESC, backward_extrem.event_id DESC
+                LIMIT ?
             """
 
             if isinstance(self.database_engine, PostgresEngine):
@@ -817,9 +849,11 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 (
                     room_id,
                     False,
+                    current_depth,
                     self._clock.time_msec(),
                     1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS,
                     1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS,
+                    limit,
                 ),
             )
 
@@ -835,18 +869,34 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
     async def get_insertion_event_backward_extremities_in_room(
         self,
         room_id: str,
+        current_depth: int,
+        limit: int,
     ) -> List[Tuple[str, int]]:
         """
         Get the insertion events we know about that we haven't backfilled yet
-        along with the approximate depth. Sorted by depth, highest to lowest
-        (descending).
+        along with the approximate depth. Only returns insertion events that are
+        at a depth lower than or equal to the `current_depth`. Sorted by depth,
+        highest to lowest (descending) so the closest events to the
+        `current_depth` are first in the list.
+
+        We ignore insertion events that are newer than the user's current scroll
+        position (ie, those with depth greater than `current_depth`) as:
+            1. we don't really care about getting events that have happened
+               after our current position; and
+            2. by the nature of paginating and scrolling back, we have likely
+               previously tried and failed to backfill from that insertion event, so
+               to avoid getting "stuck" requesting the same backfill repeatedly
+               we drop those insertion event.
 
         Args:
             room_id: Room where we want to find the oldest events
+            current_depth: The depth at the user's current scrollback position
+            limit: The max number of insertion event extremities to return
 
         Returns:
             List of (event_id, depth) tuples. Sorted by depth, highest to lowest
-            (descending)
+            (descending) so the closest events to the `current_depth` are first
+            in the list.
         """
 
         def get_insertion_event_backward_extremities_in_room_txn(
@@ -869,6 +919,18 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                     AND failed_backfill_attempt_info.event_id = insertion_event_extremity.event_id
                 WHERE
                     insertion_event_extremity.room_id = ?
+                    /**
+                     * We only want extremities that are older than or at
+                     * the same position of the given `current_depth` (where older
+                     * means less than the given depth) because we're looking backwards
+                     * from the `current_depth` when backfilling.
+                     *
+                     *                         current_depth (ignore events that come after this, ignore 2-4)
+                     *                         |
+                     *                         ▼
+                     * <oldest-in-time> [0]<--[1]<--[2]<--[3]<--[4] <newest-in-time>
+                     */
+                    AND event.depth <= ? /* current_depth */
                     /**
                      * Exponential back-off (up to the upper bound) so we don't retry the
                      * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc
@@ -883,11 +945,13 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                         OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */)
                     )
                 /**
-                 * Sort from highest to the lowest depth. Then tie-break on
-                 * alphabetical order of the event_ids so we get a consistent
-                 * ordering which is nice when asserting things in tests.
+                 * Sort from highest (closest to the `current_depth`) to the lowest depth
+                 * because the closest are most relevant to backfill from first.
+                 * Then tie-break on alphabetical order of the event_ids so we get a
+                 * consistent ordering which is nice when asserting things in tests.
                  */
                 ORDER BY event.depth DESC, insertion_event_extremity.event_id DESC
+                LIMIT ?
             """
 
             if isinstance(self.database_engine, PostgresEngine):
@@ -901,9 +965,11 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 sql % (least_function,),
                 (
                     room_id,
+                    current_depth,
                     self._clock.time_msec(),
                     1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS,
                     1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS,
+                    limit,
                 ),
             )
             return cast(List[Tuple[str, int]], txn.fetchall())
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 85739c464e..398f338b66 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -754,19 +754,31 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
     def test_get_backfill_points_in_room(self):
         """
-        Test to make sure we get some backfill points
+        Test to make sure only backfill points that are older and come before
+        the `current_depth` are returned.
         """
         setup_info = self._setup_room_for_backfill_tests()
         room_id = setup_info.room_id
+        depth_map = setup_info.depth_map
 
+        # Try at "B"
         backfill_points = self.get_success(
-            self.store.get_backfill_points_in_room(room_id)
+            self.store.get_backfill_points_in_room(room_id, depth_map["B"], limit=100)
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         self.assertListEqual(
             backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"]
         )
 
+        # Try at "A"
+        backfill_points = self.get_success(
+            self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        # Event "2" has a depth of 2 but is not included here because we only
+        # know the approximate depth of 5 from our event "3".
+        self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"])
+
     def test_get_backfill_points_in_room_excludes_events_we_have_attempted(
         self,
     ):
@@ -776,6 +788,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         """
         setup_info = self._setup_room_for_backfill_tests()
         room_id = setup_info.room_id
+        depth_map = setup_info.depth_map
 
         # Record some attempts to backfill these events which will make
         # `get_backfill_points_in_room` exclude them because we
@@ -795,8 +808,9 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
         # No time has passed since we attempted to backfill ^
 
+        # Try at "B"
         backfill_points = self.get_success(
-            self.store.get_backfill_points_in_room(room_id)
+            self.store.get_backfill_points_in_room(room_id, depth_map["B"], limit=100)
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         # Only the backfill points that we didn't record earlier exist here.
@@ -812,6 +826,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         """
         setup_info = self._setup_room_for_backfill_tests()
         room_id = setup_info.room_id
+        depth_map = setup_info.depth_map
 
         # Record some attempts to backfill these events which will make
         # `get_backfill_points_in_room` exclude them because we
@@ -839,26 +854,24 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         # visible regardless.
         self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
 
-        # Make sure that "b1" is not in the list because we've
+        # Try at "A" and make sure that "b1" is not in the list because we've
         # already attempted many times
         backfill_points = self.get_success(
-            self.store.get_backfill_points_in_room(room_id)
+            self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2"])
+        self.assertListEqual(backfill_event_ids, ["b3", "b2"])
 
         # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
         # see if we can now backfill it
         self.reactor.advance(datetime.timedelta(hours=20).total_seconds())
 
-        # Try again after we advanced enough time and we should see "b3" again
+        # Try at "A" again after we advanced enough time and we should see "b3" again
         backfill_points = self.get_success(
-            self.store.get_backfill_points_in_room(room_id)
+            self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(
-            backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"]
-        )
+        self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"])
 
     def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo:
         """
@@ -938,19 +951,35 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
     def test_get_insertion_event_backward_extremities_in_room(self):
         """
-        Test to make sure insertion event backward extremities are returned.
+        Test to make sure only insertion event backward extremities that are
+        older and come before the `current_depth` are returned.
         """
         setup_info = self._setup_room_for_insertion_backfill_tests()
         room_id = setup_info.room_id
+        depth_map = setup_info.depth_map
 
+        # Try at "insertion_eventB"
         backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(room_id)
+            self.store.get_insertion_event_backward_extremities_in_room(
+                room_id, depth_map["insertion_eventB"], limit=100
+            )
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         self.assertListEqual(
             backfill_event_ids, ["insertion_eventB", "insertion_eventA"]
         )
 
+        # Try at "insertion_eventA"
+        backfill_points = self.get_success(
+            self.store.get_insertion_event_backward_extremities_in_room(
+                room_id, depth_map["insertion_eventA"], limit=100
+            )
+        )
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        # Event "2" has a depth of 2 but is not included here because we only
+        # know the approximate depth of 5 from our event "3".
+        self.assertListEqual(backfill_event_ids, ["insertion_eventA"])
+
     def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_have_attempted(
         self,
     ):
@@ -961,6 +990,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         """
         setup_info = self._setup_room_for_insertion_backfill_tests()
         room_id = setup_info.room_id
+        depth_map = setup_info.depth_map
 
         # Record some attempts to backfill these events which will make
         # `get_insertion_event_backward_extremities_in_room` exclude them
@@ -973,8 +1003,11 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
         # No time has passed since we attempted to backfill ^
 
+        # Try at "insertion_eventB"
         backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(room_id)
+            self.store.get_insertion_event_backward_extremities_in_room(
+                room_id, depth_map["insertion_eventB"], limit=100
+            )
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         # Only the backfill points that we didn't record earlier exist here.
@@ -991,6 +1024,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         """
         setup_info = self._setup_room_for_insertion_backfill_tests()
         room_id = setup_info.room_id
+        depth_map = setup_info.depth_map
 
         # Record some attempts to backfill these events which will make
         # `get_backfill_points_in_room` exclude them because we
@@ -1027,13 +1061,15 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         # because we haven't waited long enough for this many attempts.
         self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
 
-        # Make sure that "insertion_eventA" is not in the list because we've
-        # already attempted many times
+        # Try at "insertion_eventA" and make sure that "insertion_eventA" is not
+        # in the list because we've already attempted many times
         backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(room_id)
+            self.store.get_insertion_event_backward_extremities_in_room(
+                room_id, depth_map["insertion_eventA"], limit=100
+            )
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(backfill_event_ids, ["insertion_eventB"])
+        self.assertListEqual(backfill_event_ids, [])
 
         # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
         # see if we can now backfill it
@@ -1042,12 +1078,12 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         # Try at "insertion_eventA" again after we advanced enough time and we
         # should see "insertion_eventA" again
         backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(room_id)
+            self.store.get_insertion_event_backward_extremities_in_room(
+                room_id, depth_map["insertion_eventA"], limit=100
+            )
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(
-            backfill_event_ids, ["insertion_eventB", "insertion_eventA"]
-        )
+        self.assertListEqual(backfill_event_ids, ["insertion_eventA"])
 
 
 @attr.s
-- 
cgit 1.5.1


From 5f659d4a88e602ca8519984808dcf4df036c781b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 28 Sep 2022 23:22:35 +0100
Subject: Handle local device list updates during partial join (#13934)

---
 changelog.d/13934.misc                    |  1 +
 synapse/handlers/device.py                | 84 ++++++++++++++++++++++++++++++-
 synapse/storage/databases/main/devices.py | 55 +++++++++++++++-----
 synapse/storage/databases/main/room.py    | 16 ++++++
 4 files changed, 141 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/13934.misc

(limited to 'synapse')

diff --git a/changelog.d/13934.misc b/changelog.d/13934.misc
new file mode 100644
index 0000000000..6610a9f567
--- /dev/null
+++ b/changelog.d/13934.misc
@@ -0,0 +1 @@
+Correctly handle sending local device list updates to remote servers during a partial join.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index f2ef591103..03082fce42 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -762,10 +762,90 @@ class DeviceHandler(DeviceWorkerHandler):
         gone from partial to full state.
         """
 
-        # We defer to the device list updater implementation as we're on the
-        # right worker.
+        # We defer to the device list updater to handle pending remote device
+        # list updates.
         await self.device_list_updater.handle_room_un_partial_stated(room_id)
 
+        # Replay local updates.
+        (
+            join_event_id,
+            device_lists_stream_id,
+        ) = await self.store.get_join_event_id_and_device_lists_stream_id_for_partial_state(
+            room_id
+        )
+
+        # Get the local device list changes that have happened in the room since
+        # we started joining. If there are no updates there's nothing left to do.
+        changes = await self.store.get_device_list_changes_in_room(
+            room_id, device_lists_stream_id
+        )
+        local_changes = {(u, d) for u, d in changes if self.hs.is_mine_id(u)}
+        if not local_changes:
+            return
+
+        # Note: We have persisted the full state at this point, we just haven't
+        # cleared the `partial_room` flag.
+        join_state_ids = await self._state_storage.get_state_ids_for_event(
+            join_event_id, await_full_state=False
+        )
+        current_state_ids = await self.store.get_partial_current_state_ids(room_id)
+
+        # Now we need to work out all servers that might have been in the room
+        # at any point during our join.
+
+        # First we look for any membership states that have changed between the
+        # initial join and now...
+        all_keys = set(join_state_ids)
+        all_keys.update(current_state_ids)
+
+        potentially_changed_hosts = set()
+        for etype, state_key in all_keys:
+            if etype != EventTypes.Member:
+                continue
+
+            prev = join_state_ids.get((etype, state_key))
+            current = current_state_ids.get((etype, state_key))
+
+            if prev != current:
+                potentially_changed_hosts.add(get_domain_from_id(state_key))
+
+        # ... then we add all the hosts that are currently joined to the room...
+        current_hosts_in_room = await self.store.get_current_hosts_in_room(room_id)
+        potentially_changed_hosts.update(current_hosts_in_room)
+
+        # ... and finally we remove any hosts that we were told about, as we
+        # will have sent device list updates to those hosts when they happened.
+        known_hosts_at_join = await self.store.get_partial_state_servers_at_join(
+            room_id
+        )
+        potentially_changed_hosts.difference_update(known_hosts_at_join)
+
+        potentially_changed_hosts.discard(self.server_name)
+
+        if not potentially_changed_hosts:
+            # Nothing to do.
+            return
+
+        logger.info(
+            "Found %d changed hosts to send device list updates to",
+            len(potentially_changed_hosts),
+        )
+
+        for user_id, device_id in local_changes:
+            await self.store.add_device_list_outbound_pokes(
+                user_id=user_id,
+                device_id=device_id,
+                room_id=room_id,
+                stream_id=None,
+                hosts=potentially_changed_hosts,
+                context=None,
+            )
+
+        # Notify things that device lists need to be sent out.
+        self.notifier.notify_replication()
+        for host in potentially_changed_hosts:
+            self.federation_sender.send_device_messages(host, immediate=False)
+
 
 def _update_device_from_client_ips(
     device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]]
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 1e562d4a40..18358eca46 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1307,6 +1307,33 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
 
         return changes
 
+    async def get_device_list_changes_in_room(
+        self, room_id: str, min_stream_id: int
+    ) -> Collection[Tuple[str, str]]:
+        """Get all device list changes that happened in the room since the given
+        stream ID.
+
+        Returns:
+            Collection of user ID/device ID tuples of all devices that have
+            changed
+        """
+
+        sql = """
+            SELECT DISTINCT user_id, device_id FROM device_lists_changes_in_room
+            WHERE room_id = ? AND stream_id > ?
+        """
+
+        def get_device_list_changes_in_room_txn(
+            txn: LoggingTransaction,
+        ) -> Collection[Tuple[str, str]]:
+            txn.execute(sql, (room_id, min_stream_id))
+            return cast(Collection[Tuple[str, str]], txn.fetchall())
+
+        return await self.db_pool.runInteraction(
+            "get_device_list_changes_in_room",
+            get_device_list_changes_in_room_txn,
+        )
+
 
 class DeviceBackgroundUpdateStore(SQLBaseStore):
     def __init__(
@@ -1946,14 +1973,15 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         user_id: str,
         device_id: str,
         room_id: str,
-        stream_id: int,
+        stream_id: Optional[int],
         hosts: Collection[str],
         context: Optional[Dict[str, str]],
     ) -> None:
         """Queue the device update to be sent to the given set of hosts,
         calculated from the room ID.
 
-        Marks the associated row in `device_lists_changes_in_room` as handled.
+        Marks the associated row in `device_lists_changes_in_room` as handled,
+        if `stream_id` is provided.
         """
 
         def add_device_list_outbound_pokes_txn(
@@ -1969,17 +1997,18 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                     context=context,
                 )
 
-            self.db_pool.simple_update_txn(
-                txn,
-                table="device_lists_changes_in_room",
-                keyvalues={
-                    "user_id": user_id,
-                    "device_id": device_id,
-                    "stream_id": stream_id,
-                    "room_id": room_id,
-                },
-                updatevalues={"converted_to_destinations": True},
-            )
+            if stream_id:
+                self.db_pool.simple_update_txn(
+                    txn,
+                    table="device_lists_changes_in_room",
+                    keyvalues={
+                        "user_id": user_id,
+                        "device_id": device_id,
+                        "stream_id": stream_id,
+                        "room_id": room_id,
+                    },
+                    updatevalues={"converted_to_destinations": True},
+                )
 
         if not hosts:
             # If there are no hosts then we don't try and generate stream IDs.
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 672c9a03fc..059eef5c22 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1256,6 +1256,22 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         return entry is not None
 
+    async def get_join_event_id_and_device_lists_stream_id_for_partial_state(
+        self, room_id: str
+    ) -> Tuple[str, int]:
+        """Get the event ID of the initial join that started the partial
+        join, and the device list stream ID at the point we started the partial
+        join.
+        """
+
+        result = await self.db_pool.simple_select_one(
+            table="partial_state_rooms",
+            keyvalues={"room_id": room_id},
+            retcols=("join_event_id", "device_lists_stream_id"),
+            desc="get_join_event_id_for_partial_state",
+        )
+        return result["join_event_id"], result["device_lists_stream_id"]
+
 
 class _BackgroundUpdates:
     REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
-- 
cgit 1.5.1


From 73ecff7e9ed456c64368296858d17d4b393c9f9a Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 29 Sep 2022 10:00:02 +0000
Subject: Improve backfill robustness by trying more servers. (#13890)

Co-authored-by: Eric Eastwood <erice@element.io>
---
 changelog.d/13890.misc         |  1 +
 synapse/handlers/federation.py | 33 +++++++++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13890.misc

(limited to 'synapse')

diff --git a/changelog.d/13890.misc b/changelog.d/13890.misc
new file mode 100644
index 0000000000..bf76cf7be7
--- /dev/null
+++ b/changelog.d/13890.misc
@@ -0,0 +1 @@
+Improve backfill robustness by trying more servers when we get a `4xx` error back.
\ No newline at end of file
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 500c1c16d0..b866258298 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -417,6 +417,15 @@ class FederationHandler:
 
         async def try_backfill(domains: Collection[str]) -> bool:
             # TODO: Should we try multiple of these at a time?
+
+            # Number of contacted remote homeservers that have denied our backfill
+            # request with a 4xx code.
+            denied_count = 0
+
+            # Maximum number of contacted remote homeservers that can deny our
+            # backfill request with 4xx codes before we give up.
+            max_denied_count = 5
+
             for dom in domains:
                 # We don't want to ask our own server for information we don't have
                 if dom == self.server_name:
@@ -435,13 +444,33 @@ class FederationHandler:
                     continue
                 except HttpResponseException as e:
                     if 400 <= e.code < 500:
-                        raise e.to_synapse_error()
+                        logger.warning(
+                            "Backfill denied from %s because %s [%d/%d]",
+                            dom,
+                            e,
+                            denied_count,
+                            max_denied_count,
+                        )
+                        denied_count += 1
+                        if denied_count >= max_denied_count:
+                            return False
+                        continue
 
                     logger.info("Failed to backfill from %s because %s", dom, e)
                     continue
                 except CodeMessageException as e:
                     if 400 <= e.code < 500:
-                        raise
+                        logger.warning(
+                            "Backfill denied from %s because %s [%d/%d]",
+                            dom,
+                            e,
+                            denied_count,
+                            max_denied_count,
+                        )
+                        denied_count += 1
+                        if denied_count >= max_denied_count:
+                            return False
+                        continue
 
                     logger.info("Failed to backfill from %s because %s", dom, e)
                     continue
-- 
cgit 1.5.1


From 99a7e7e0230cba5d00ec204926edae89d4b6b8c3 Mon Sep 17 00:00:00 2001
From: Nicolas Werner <89468146+nico-famedly@users.noreply.github.com>
Date: Thu, 29 Sep 2022 10:57:00 +0000
Subject: Always send default and rule_id to clients (#13904)

---
 changelog.d/13904.bugfix     | 1 +
 synapse/push/clientformat.py | 6 ++----
 2 files changed, 3 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13904.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13904.bugfix b/changelog.d/13904.bugfix
new file mode 100644
index 0000000000..397a3108ac
--- /dev/null
+++ b/changelog.d/13904.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.66 where some required fields in the pushrules sent to clients were not present anymore. Contributed by Nico.
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index ebc13beda1..7095ae83f9 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -102,10 +102,8 @@ def _rule_to_template(rule: PushRule) -> Optional[Dict[str, Any]]:
         # with PRIORITY_CLASS_INVERSE_MAP.
         raise ValueError("Unexpected template_name: %s" % (template_name,))
 
-    if unscoped_rule_id:
-        templaterule["rule_id"] = unscoped_rule_id
-    if rule.default:
-        templaterule["default"] = True
+    templaterule["rule_id"] = unscoped_rule_id
+    templaterule["default"] = rule.default
     return templaterule
 
 
-- 
cgit 1.5.1


From 568016929f3d22f632cb9145429fa45754a8d59f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 29 Sep 2022 07:07:31 -0400
Subject: Clarify that a method returns only unthreaded receipts. (#13937)

By renaming it and updating the docstring.

Additionally, refactors a method which is used only by tests.
---
 changelog.d/13937.feature                          |  1 +
 .../storage/databases/main/event_push_actions.py   | 12 +---
 synapse/storage/databases/main/receipts.py         | 36 ++---------
 tests/storage/test_receipts.py                     | 74 +++++++++++-----------
 4 files changed, 47 insertions(+), 76 deletions(-)
 create mode 100644 changelog.d/13937.feature

(limited to 'synapse')

diff --git a/changelog.d/13937.feature b/changelog.d/13937.feature
new file mode 100644
index 0000000000..d0cb902dff
--- /dev/null
+++ b/changelog.d/13937.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index f4cdc2e399..7e0ffef7d3 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -366,14 +366,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         user_id: str,
     ) -> NotifCounts:
         # Get the stream ordering of the user's latest receipt in the room.
-        result = self.get_last_receipt_for_user_txn(
+        result = self.get_last_unthreaded_receipt_for_user_txn(
             txn,
             user_id,
             room_id,
-            receipt_types=(
-                ReceiptTypes.READ,
-                ReceiptTypes.READ_PRIVATE,
-            ),
+            receipt_types=(ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
 
         if result:
@@ -574,10 +571,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         receipt_types_clause, args = make_in_list_sql_clause(
             self.database_engine,
             "receipt_type",
-            (
-                ReceiptTypes.READ,
-                ReceiptTypes.READ_PRIVATE,
-            ),
+            (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
 
         sql = f"""
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 52fe0db924..246f78ac1f 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -135,34 +135,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
         """Get the current max stream ID for receipts stream"""
         return self._receipts_id_gen.get_current_token()
 
-    async def get_last_receipt_event_id_for_user(
-        self, user_id: str, room_id: str, receipt_types: Collection[str]
-    ) -> Optional[str]:
-        """
-        Fetch the event ID for the latest receipt in a room with one of the given receipt types.
-
-        Args:
-            user_id: The user to fetch receipts for.
-            room_id: The room ID to fetch the receipt for.
-            receipt_type: The receipt types to fetch.
-
-        Returns:
-            The latest receipt, if one exists.
-        """
-        result = await self.db_pool.runInteraction(
-            "get_last_receipt_event_id_for_user",
-            self.get_last_receipt_for_user_txn,
-            user_id,
-            room_id,
-            receipt_types,
-        )
-        if not result:
-            return None
-
-        event_id, _ = result
-        return event_id
-
-    def get_last_receipt_for_user_txn(
+    def get_last_unthreaded_receipt_for_user_txn(
         self,
         txn: LoggingTransaction,
         user_id: str,
@@ -170,13 +143,13 @@ class ReceiptsWorkerStore(SQLBaseStore):
         receipt_types: Collection[str],
     ) -> Optional[Tuple[str, int]]:
         """
-        Fetch the event ID and stream_ordering for the latest receipt in a room
-        with one of the given receipt types.
+        Fetch the event ID and stream_ordering for the latest unthreaded receipt
+        in a room with one of the given receipt types.
 
         Args:
             user_id: The user to fetch receipts for.
             room_id: The room ID to fetch the receipt for.
-            receipt_type: The receipt types to fetch.
+            receipt_types: The receipt types to fetch.
 
         Returns:
             The event ID and stream ordering of the latest receipt, if one exists.
@@ -193,6 +166,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             WHERE {clause}
             AND user_id = ?
             AND room_id = ?
+            AND thread_id IS NULL
             ORDER BY stream_ordering DESC
             LIMIT 1
         """
diff --git a/tests/storage/test_receipts.py b/tests/storage/test_receipts.py
index 9459ee1705..81253d0361 100644
--- a/tests/storage/test_receipts.py
+++ b/tests/storage/test_receipts.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Collection, Optional
 
 from synapse.api.constants import ReceiptTypes
 from synapse.types import UserID, create_requester
@@ -84,6 +85,33 @@ class ReceiptTestCase(HomeserverTestCase):
             )
         )
 
+    def get_last_unthreaded_receipt(
+        self, receipt_types: Collection[str], room_id: Optional[str] = None
+    ) -> Optional[str]:
+        """
+        Fetch the event ID for the latest unthreaded receipt in the test room for the test user.
+
+        Args:
+            receipt_types: The receipt types to fetch.
+
+        Returns:
+            The latest receipt, if one exists.
+        """
+        result = self.get_success(
+            self.store.db_pool.runInteraction(
+                "get_last_receipt_event_id_for_user",
+                self.store.get_last_unthreaded_receipt_for_user_txn,
+                OUR_USER_ID,
+                room_id or self.room_id1,
+                receipt_types,
+            )
+        )
+        if not result:
+            return None
+
+        event_id, _ = result
+        return event_id
+
     def test_return_empty_with_no_data(self) -> None:
         res = self.get_success(
             self.store.get_receipts_for_user(
@@ -107,16 +135,10 @@ class ReceiptTestCase(HomeserverTestCase):
         )
         self.assertEqual(res, {})
 
-        res = self.get_success(
-            self.store.get_last_receipt_event_id_for_user(
-                OUR_USER_ID,
-                self.room_id1,
-                [
-                    ReceiptTypes.READ,
-                    ReceiptTypes.READ_PRIVATE,
-                ],
-            )
+        res = self.get_last_unthreaded_receipt(
+            [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]
         )
+
         self.assertEqual(res, None)
 
     def test_get_receipts_for_user(self) -> None:
@@ -228,29 +250,17 @@ class ReceiptTestCase(HomeserverTestCase):
         )
 
         # Test we get the latest event when we want both private and public receipts
-        res = self.get_success(
-            self.store.get_last_receipt_event_id_for_user(
-                OUR_USER_ID,
-                self.room_id1,
-                [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE],
-            )
+        res = self.get_last_unthreaded_receipt(
+            [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]
         )
         self.assertEqual(res, event1_2_id)
 
         # Test we get the older event when we want only public receipt
-        res = self.get_success(
-            self.store.get_last_receipt_event_id_for_user(
-                OUR_USER_ID, self.room_id1, [ReceiptTypes.READ]
-            )
-        )
+        res = self.get_last_unthreaded_receipt([ReceiptTypes.READ])
         self.assertEqual(res, event1_1_id)
 
         # Test we get the latest event when we want only the private receipt
-        res = self.get_success(
-            self.store.get_last_receipt_event_id_for_user(
-                OUR_USER_ID, self.room_id1, [ReceiptTypes.READ_PRIVATE]
-            )
-        )
+        res = self.get_last_unthreaded_receipt([ReceiptTypes.READ_PRIVATE])
         self.assertEqual(res, event1_2_id)
 
         # Test receipt updating
@@ -259,11 +269,7 @@ class ReceiptTestCase(HomeserverTestCase):
                 self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], None, {}
             )
         )
-        res = self.get_success(
-            self.store.get_last_receipt_event_id_for_user(
-                OUR_USER_ID, self.room_id1, [ReceiptTypes.READ]
-            )
-        )
+        res = self.get_last_unthreaded_receipt([ReceiptTypes.READ])
         self.assertEqual(res, event1_2_id)
 
         # Send some events into the second room
@@ -282,11 +288,7 @@ class ReceiptTestCase(HomeserverTestCase):
                 {},
             )
         )
-        res = self.get_success(
-            self.store.get_last_receipt_event_id_for_user(
-                OUR_USER_ID,
-                self.room_id2,
-                [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE],
-            )
+        res = self.get_last_unthreaded_receipt(
+            [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], room_id=self.room_id2
         )
         self.assertEqual(res, event2_1_id)
-- 
cgit 1.5.1


From e5fdf16d4680b00ca8120ddb697bd14ab89fdf0c Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@users.noreply.github.com>
Date: Thu, 29 Sep 2022 12:22:27 +0100
Subject: Expose MSC3882 only be under an unstable endpoint. (#13868)

---
 changelog.d/13868.misc                        |  1 +
 synapse/rest/client/login_token_request.py    |  4 +++-
 tests/rest/client/test_login_token_request.py | 16 +++++++++-------
 3 files changed, 13 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/13868.misc

(limited to 'synapse')

diff --git a/changelog.d/13868.misc b/changelog.d/13868.misc
new file mode 100644
index 0000000000..d7a99c042a
--- /dev/null
+++ b/changelog.d/13868.misc
@@ -0,0 +1 @@
+Fix unstable MSC3882 endpoint being incorrectly available on stable API versions.
\ No newline at end of file
diff --git a/synapse/rest/client/login_token_request.py b/synapse/rest/client/login_token_request.py
index ca5c54bf17..277b20fb63 100644
--- a/synapse/rest/client/login_token_request.py
+++ b/synapse/rest/client/login_token_request.py
@@ -47,7 +47,9 @@ class LoginTokenRequestServlet(RestServlet):
     }
     """
 
-    PATTERNS = client_patterns("/login/token$")
+    PATTERNS = client_patterns(
+        "/org.matrix.msc3882/login/token$", releases=[], v1=False, unstable=True
+    )
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/tests/rest/client/test_login_token_request.py b/tests/rest/client/test_login_token_request.py
index d5bb16c98d..c2e1e08811 100644
--- a/tests/rest/client/test_login_token_request.py
+++ b/tests/rest/client/test_login_token_request.py
@@ -22,6 +22,8 @@ from synapse.util import Clock
 from tests import unittest
 from tests.unittest import override_config
 
+endpoint = "/_matrix/client/unstable/org.matrix.msc3882/login/token"
+
 
 class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
 
@@ -45,18 +47,18 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
         self.password = "password"
 
     def test_disabled(self) -> None:
-        channel = self.make_request("POST", "/login/token", {}, access_token=None)
+        channel = self.make_request("POST", endpoint, {}, access_token=None)
         self.assertEqual(channel.code, 400)
 
         self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
-        channel = self.make_request("POST", "/login/token", {}, access_token=token)
+        channel = self.make_request("POST", endpoint, {}, access_token=token)
         self.assertEqual(channel.code, 400)
 
     @override_config({"experimental_features": {"msc3882_enabled": True}})
     def test_require_auth(self) -> None:
-        channel = self.make_request("POST", "/login/token", {}, access_token=None)
+        channel = self.make_request("POST", endpoint, {}, access_token=None)
         self.assertEqual(channel.code, 401)
 
     @override_config({"experimental_features": {"msc3882_enabled": True}})
@@ -64,7 +66,7 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
         user_id = self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
-        channel = self.make_request("POST", "/login/token", {}, access_token=token)
+        channel = self.make_request("POST", endpoint, {}, access_token=token)
         self.assertEqual(channel.code, 401)
         self.assertIn({"stages": ["m.login.password"]}, channel.json_body["flows"])
 
@@ -79,7 +81,7 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
             },
         }
 
-        channel = self.make_request("POST", "/login/token", uia, access_token=token)
+        channel = self.make_request("POST", endpoint, uia, access_token=token)
         self.assertEqual(channel.code, 200)
         self.assertEqual(channel.json_body["expires_in"], 300)
 
@@ -100,7 +102,7 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
         user_id = self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
-        channel = self.make_request("POST", "/login/token", {}, access_token=token)
+        channel = self.make_request("POST", endpoint, {}, access_token=token)
         self.assertEqual(channel.code, 200)
         self.assertEqual(channel.json_body["expires_in"], 300)
 
@@ -127,6 +129,6 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
         self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
-        channel = self.make_request("POST", "/login/token", {}, access_token=token)
+        channel = self.make_request("POST", endpoint, {}, access_token=token)
         self.assertEqual(channel.code, 200)
         self.assertEqual(channel.json_body["expires_in"], 15)
-- 
cgit 1.5.1


From 8625ad80994d6049a778b5d1ef65c8d1b1042c74 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 29 Sep 2022 07:22:41 -0400
Subject: Explicit cast to enforce type hints. (#13939)

---
 changelog.d/13939.feature                            | 1 +
 synapse/storage/databases/main/event_push_actions.py | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/13939.feature

(limited to 'synapse')

diff --git a/changelog.d/13939.feature b/changelog.d/13939.feature
new file mode 100644
index 0000000000..d0cb902dff
--- /dev/null
+++ b/changelog.d/13939.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 7e0ffef7d3..3fdf128d9e 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1068,7 +1068,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 limit,
             ),
         )
-        rows = txn.fetchall()
+        rows = cast(List[Tuple[int, str, str, int]], txn.fetchall())
 
         # For each new read receipt we delete push actions from before it and
         # recalculate the summary.
@@ -1113,18 +1113,18 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         # We always update `event_push_summary_last_receipt_stream_id` to
         # ensure that we don't rescan the same receipts for remote users.
 
-        upper_limit = max_receipts_stream_id
+        receipts_last_processed_stream_id = max_receipts_stream_id
         if len(rows) >= limit:
             # If we pulled out a limited number of rows we only update the
             # position to the last receipt we processed, so we continue
             # processing the rest next iteration.
-            upper_limit = rows[-1][0]
+            receipts_last_processed_stream_id = rows[-1][0]
 
         self.db_pool.simple_update_txn(
             txn,
             table="event_push_summary_last_receipt_stream_id",
             keyvalues={},
-            updatevalues={"stream_id": upper_limit},
+            updatevalues={"stream_id": receipts_last_processed_stream_id},
         )
 
         return len(rows) < limit
-- 
cgit 1.5.1


From be76cd8200b18f3c68b895f85ac7ef5b0ddc2466 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 29 Sep 2022 14:23:24 +0100
Subject: Allow admins to require a manual approval process before new accounts
 can be used (using MSC3866) (#13556)

---
 changelog.d/13556.feature                          |   1 +
 synapse/_scripts/synapse_port_db.py                |   2 +-
 synapse/api/constants.py                           |  11 ++
 synapse/api/errors.py                              |  16 ++
 synapse/config/experimental.py                     |  19 +++
 synapse/handlers/admin.py                          |   5 +
 synapse/handlers/auth.py                           |  11 ++
 synapse/handlers/register.py                       |   8 +
 synapse/replication/http/register.py               |   5 +
 synapse/rest/admin/users.py                        |  43 ++++-
 synapse/rest/client/login.py                       |  37 +++-
 synapse/rest/client/register.py                    |  22 ++-
 synapse/storage/databases/main/__init__.py         |   9 +-
 synapse/storage/databases/main/registration.py     | 150 +++++++++++++++--
 .../main/delta/73/03users_approved_column.sql      |  20 +++
 tests/rest/admin/test_user.py                      | 186 ++++++++++++++++++++-
 tests/rest/client/test_auth.py                     |  33 +++-
 tests/rest/client/test_login.py                    |  41 +++++
 tests/rest/client/test_register.py                 |  32 +++-
 tests/rest/client/utils.py                         |  12 +-
 tests/storage/test_registration.py                 | 102 ++++++++++-
 21 files changed, 731 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/13556.feature
 create mode 100644 synapse/storage/schema/main/delta/73/03users_approved_column.sql

(limited to 'synapse')

diff --git a/changelog.d/13556.feature b/changelog.d/13556.feature
new file mode 100644
index 0000000000..f9d63db6c0
--- /dev/null
+++ b/changelog.d/13556.feature
@@ -0,0 +1 @@
+Allow server admins to require a manual approval process before new accounts can be used (using [MSC3866](https://github.com/matrix-org/matrix-spec-proposals/pull/3866)).
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 450ba462ba..5fa599e70e 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -107,7 +107,7 @@ BOOLEAN_COLUMNS = {
     "redactions": ["have_censored"],
     "room_stats_state": ["is_federatable"],
     "local_media_repository": ["safe_from_quarantine"],
-    "users": ["shadow_banned"],
+    "users": ["shadow_banned", "approved"],
     "e2e_fallback_keys_json": ["used"],
     "access_tokens": ["used"],
     "device_lists_changes_in_room": ["converted_to_destinations"],
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index c178ddf070..c031903b1a 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -269,3 +269,14 @@ class PublicRoomsFilterFields:
 
     GENERIC_SEARCH_TERM: Final = "generic_search_term"
     ROOM_TYPES: Final = "room_types"
+
+
+class ApprovalNoticeMedium:
+    """Identifier for the medium this server will use to serve notice of approval for a
+    specific user's registration.
+
+    As defined in https://github.com/matrix-org/matrix-spec-proposals/blob/babolivier/m_not_approved/proposals/3866-user-not-approved-error.md
+    """
+
+    NONE = "org.matrix.msc3866.none"
+    EMAIL = "org.matrix.msc3866.email"
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 1c6b53aa24..c606207569 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -106,6 +106,8 @@ class Codes(str, Enum):
     # Part of MSC3895.
     UNABLE_DUE_TO_PARTIAL_STATE = "ORG.MATRIX.MSC3895_UNABLE_DUE_TO_PARTIAL_STATE"
 
+    USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL"
+
 
 class CodeMessageException(RuntimeError):
     """An exception with integer code and message string attributes.
@@ -566,6 +568,20 @@ class UnredactedContentDeletedError(SynapseError):
         return cs_error(self.msg, self.errcode, **extra)
 
 
+class NotApprovedError(SynapseError):
+    def __init__(
+        self,
+        msg: str,
+        approval_notice_medium: str,
+    ):
+        super().__init__(
+            code=403,
+            msg=msg,
+            errcode=Codes.USER_AWAITING_APPROVAL,
+            additional_fields={"approval_notice_medium": approval_notice_medium},
+        )
+
+
 def cs_error(msg: str, code: str = Codes.UNKNOWN, **kwargs: Any) -> "JsonDict":
     """Utility method for constructing an error response for client-server
     interactions.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 933779c23a..31834fb27d 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -14,10 +14,25 @@
 
 from typing import Any
 
+import attr
+
 from synapse.config._base import Config
 from synapse.types import JsonDict
 
 
+@attr.s(auto_attribs=True, frozen=True, slots=True)
+class MSC3866Config:
+    """Configuration for MSC3866 (mandating approval for new users)"""
+
+    # Whether the base support for the approval process is enabled. This includes the
+    # ability for administrators to check and update the approval of users, even if no
+    # approval is currently required.
+    enabled: bool = False
+    # Whether to require that new users are approved by an admin before their account
+    # can be used. Note that this setting is ignored if 'enabled' is false.
+    require_approval_for_new_accounts: bool = False
+
+
 class ExperimentalConfig(Config):
     """Config section for enabling experimental features"""
 
@@ -97,6 +112,10 @@ class ExperimentalConfig(Config):
         # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices.
         self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False)
 
+        # MSC3866: M_USER_AWAITING_APPROVAL error code
+        raw_msc3866_config = experimental.get("msc3866", {})
+        self.msc3866 = MSC3866Config(**raw_msc3866_config)
+
         # MSC3881: Remotely toggle push notifications for another client
         self.msc3881_enabled: bool = experimental.get("msc3881_enabled", False)
 
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index cf9f19608a..f2989cc4a2 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -32,6 +32,7 @@ class AdminHandler:
         self.store = hs.get_datastores().main
         self._storage_controllers = hs.get_storage_controllers()
         self._state_storage_controller = self._storage_controllers.state
+        self._msc3866_enabled = hs.config.experimental.msc3866.enabled
 
     async def get_whois(self, user: UserID) -> JsonDict:
         connections = []
@@ -75,6 +76,10 @@ class AdminHandler:
             "is_guest",
         }
 
+        if self._msc3866_enabled:
+            # Only include the approved flag if support for MSC3866 is enabled.
+            user_info_to_return.add("approved")
+
         # Restrict returned keys to a known set.
         user_info_dict = {
             key: value
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index eacd631ee0..f5f0e0e7a7 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -1009,6 +1009,17 @@ class AuthHandler:
             return res[0]
         return None
 
+    async def is_user_approved(self, user_id: str) -> bool:
+        """Checks if a user is approved and therefore can be allowed to log in.
+
+        Args:
+            user_id: the user to check the approval status of.
+
+        Returns:
+            A boolean that is True if the user is approved, False otherwise.
+        """
+        return await self.store.is_user_approved(user_id)
+
     async def _find_user_id_and_pwd_hash(
         self, user_id: str
     ) -> Optional[Tuple[str, str]]:
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index cfcadb34db..ca1c7a1866 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -220,6 +220,7 @@ class RegistrationHandler:
         by_admin: bool = False,
         user_agent_ips: Optional[List[Tuple[str, str]]] = None,
         auth_provider_id: Optional[str] = None,
+        approved: bool = False,
     ) -> str:
         """Registers a new client on the server.
 
@@ -246,6 +247,8 @@ class RegistrationHandler:
             user_agent_ips: Tuples of user-agents and IP addresses used
                 during the registration process.
             auth_provider_id: The SSO IdP the user used, if any.
+            approved: True if the new user should be considered already
+                approved by an administrator.
         Returns:
             The registered user_id.
         Raises:
@@ -307,6 +310,7 @@ class RegistrationHandler:
                 user_type=user_type,
                 address=address,
                 shadow_banned=shadow_banned,
+                approved=approved,
             )
 
             profile = await self.store.get_profileinfo(localpart)
@@ -695,6 +699,7 @@ class RegistrationHandler:
         user_type: Optional[str] = None,
         address: Optional[str] = None,
         shadow_banned: bool = False,
+        approved: bool = False,
     ) -> None:
         """Register user in the datastore.
 
@@ -713,6 +718,7 @@ class RegistrationHandler:
                 api.constants.UserTypes, or None for a normal user.
             address: the IP address used to perform the registration.
             shadow_banned: Whether to shadow-ban the user
+            approved: Whether to mark the user as approved by an administrator
         """
         if self.hs.config.worker.worker_app:
             await self._register_client(
@@ -726,6 +732,7 @@ class RegistrationHandler:
                 user_type=user_type,
                 address=address,
                 shadow_banned=shadow_banned,
+                approved=approved,
             )
         else:
             await self.store.register_user(
@@ -738,6 +745,7 @@ class RegistrationHandler:
                 admin=admin,
                 user_type=user_type,
                 shadow_banned=shadow_banned,
+                approved=approved,
             )
 
             # Only call the account validity module(s) on the main process, to avoid
diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py
index 6c8f8388fd..61abb529c8 100644
--- a/synapse/replication/http/register.py
+++ b/synapse/replication/http/register.py
@@ -51,6 +51,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
         user_type: Optional[str],
         address: Optional[str],
         shadow_banned: bool,
+        approved: bool,
     ) -> JsonDict:
         """
         Args:
@@ -68,6 +69,8 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
                 or None for a normal user.
             address: the IP address used to perform the regitration.
             shadow_banned: Whether to shadow-ban the user
+            approved: Whether the user should be considered already approved by an
+                administrator.
         """
         return {
             "password_hash": password_hash,
@@ -79,6 +82,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
             "user_type": user_type,
             "address": address,
             "shadow_banned": shadow_banned,
+            "approved": approved,
         }
 
     async def _handle_request(  # type: ignore[override]
@@ -99,6 +103,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
             user_type=content["user_type"],
             address=content["address"],
             shadow_banned=content["shadow_banned"],
+            approved=content["approved"],
         )
 
         return 200, {}
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 1274773d7e..15ac2059aa 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -69,6 +69,7 @@ class UsersRestServletV2(RestServlet):
         self.store = hs.get_datastores().main
         self.auth = hs.get_auth()
         self.admin_handler = hs.get_admin_handler()
+        self._msc3866_enabled = hs.config.experimental.msc3866.enabled
 
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self.auth, request)
@@ -95,6 +96,13 @@ class UsersRestServletV2(RestServlet):
         guests = parse_boolean(request, "guests", default=True)
         deactivated = parse_boolean(request, "deactivated", default=False)
 
+        # If support for MSC3866 is not enabled, apply no filtering based on the
+        # `approved` column.
+        if self._msc3866_enabled:
+            approved = parse_boolean(request, "approved", default=True)
+        else:
+            approved = True
+
         order_by = parse_string(
             request,
             "order_by",
@@ -115,8 +123,22 @@ class UsersRestServletV2(RestServlet):
         direction = parse_string(request, "dir", default="f", allowed_values=("f", "b"))
 
         users, total = await self.store.get_users_paginate(
-            start, limit, user_id, name, guests, deactivated, order_by, direction
+            start,
+            limit,
+            user_id,
+            name,
+            guests,
+            deactivated,
+            order_by,
+            direction,
+            approved,
         )
+
+        # If support for MSC3866 is not enabled, don't show the approval flag.
+        if not self._msc3866_enabled:
+            for user in users:
+                del user["approved"]
+
         ret = {"users": users, "total": total}
         if (start + limit) < total:
             ret["next_token"] = str(start + len(users))
@@ -163,6 +185,7 @@ class UserRestServletV2(RestServlet):
         self.deactivate_account_handler = hs.get_deactivate_account_handler()
         self.registration_handler = hs.get_registration_handler()
         self.pusher_pool = hs.get_pusherpool()
+        self._msc3866_enabled = hs.config.experimental.msc3866.enabled
 
     async def on_GET(
         self, request: SynapseRequest, user_id: str
@@ -239,6 +262,15 @@ class UserRestServletV2(RestServlet):
                 HTTPStatus.BAD_REQUEST, "'deactivated' parameter is not of type boolean"
             )
 
+        approved: Optional[bool] = None
+        if "approved" in body and self._msc3866_enabled:
+            approved = body["approved"]
+            if not isinstance(approved, bool):
+                raise SynapseError(
+                    HTTPStatus.BAD_REQUEST,
+                    "'approved' parameter is not of type boolean",
+                )
+
         # convert List[Dict[str, str]] into List[Tuple[str, str]]
         if external_ids is not None:
             new_external_ids = [
@@ -343,6 +375,9 @@ class UserRestServletV2(RestServlet):
             if "user_type" in body:
                 await self.store.set_user_type(target_user, user_type)
 
+            if approved is not None:
+                await self.store.update_user_approval_status(target_user, approved)
+
             user = await self.admin_handler.get_user(target_user)
             assert user is not None
 
@@ -355,6 +390,10 @@ class UserRestServletV2(RestServlet):
             if password is not None:
                 password_hash = await self.auth_handler.hash(password)
 
+            new_user_approved = True
+            if self._msc3866_enabled and approved is not None:
+                new_user_approved = approved
+
             user_id = await self.registration_handler.register_user(
                 localpart=target_user.localpart,
                 password_hash=password_hash,
@@ -362,6 +401,7 @@ class UserRestServletV2(RestServlet):
                 default_display_name=displayname,
                 user_type=user_type,
                 by_admin=True,
+                approved=new_user_approved,
             )
 
             if threepids is not None:
@@ -550,6 +590,7 @@ class UserRegisterServlet(RestServlet):
             user_type=user_type,
             default_display_name=displayname,
             by_admin=True,
+            approved=True,
         )
 
         result = await register._create_registration_details(user_id, body)
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index 0437c87d8d..f554586ac3 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -28,7 +28,14 @@ from typing import (
 
 from typing_extensions import TypedDict
 
-from synapse.api.errors import Codes, InvalidClientTokenError, LoginError, SynapseError
+from synapse.api.constants import ApprovalNoticeMedium
+from synapse.api.errors import (
+    Codes,
+    InvalidClientTokenError,
+    LoginError,
+    NotApprovedError,
+    SynapseError,
+)
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.api.urls import CLIENT_API_PREFIX
 from synapse.appservice import ApplicationService
@@ -55,11 +62,11 @@ logger = logging.getLogger(__name__)
 
 class LoginResponse(TypedDict, total=False):
     user_id: str
-    access_token: str
+    access_token: Optional[str]
     home_server: str
     expires_in_ms: Optional[int]
     refresh_token: Optional[str]
-    device_id: str
+    device_id: Optional[str]
     well_known: Optional[Dict[str, Any]]
 
 
@@ -92,6 +99,12 @@ class LoginRestServlet(RestServlet):
             hs.config.registration.refreshable_access_token_lifetime is not None
         )
 
+        # Whether we need to check if the user has been approved or not.
+        self._require_approval = (
+            hs.config.experimental.msc3866.enabled
+            and hs.config.experimental.msc3866.require_approval_for_new_accounts
+        )
+
         self.auth = hs.get_auth()
 
         self.clock = hs.get_clock()
@@ -220,6 +233,14 @@ class LoginRestServlet(RestServlet):
         except KeyError:
             raise SynapseError(400, "Missing JSON keys.")
 
+        if self._require_approval:
+            approved = await self.auth_handler.is_user_approved(result["user_id"])
+            if not approved:
+                raise NotApprovedError(
+                    msg="This account is pending approval by a server administrator.",
+                    approval_notice_medium=ApprovalNoticeMedium.NONE,
+                )
+
         well_known_data = self._well_known_builder.get_well_known()
         if well_known_data:
             result["well_known"] = well_known_data
@@ -356,6 +377,16 @@ class LoginRestServlet(RestServlet):
                 errcode=Codes.INVALID_PARAM,
             )
 
+        if self._require_approval:
+            approved = await self.auth_handler.is_user_approved(user_id)
+            if not approved:
+                # If the user isn't approved (and needs to be) we won't allow them to
+                # actually log in, so we don't want to create a device/access token.
+                return LoginResponse(
+                    user_id=user_id,
+                    home_server=self.hs.hostname,
+                )
+
         initial_display_name = login_submission.get("initial_device_display_name")
         (
             device_id,
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 20bab20c8f..de810ae3ec 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -21,10 +21,15 @@ from twisted.web.server import Request
 import synapse
 import synapse.api.auth
 import synapse.types
-from synapse.api.constants import APP_SERVICE_REGISTRATION_TYPE, LoginType
+from synapse.api.constants import (
+    APP_SERVICE_REGISTRATION_TYPE,
+    ApprovalNoticeMedium,
+    LoginType,
+)
 from synapse.api.errors import (
     Codes,
     InteractiveAuthIncompleteError,
+    NotApprovedError,
     SynapseError,
     ThreepidValidationError,
     UnrecognizedRequestError,
@@ -414,6 +419,11 @@ class RegisterRestServlet(RestServlet):
             hs.config.registration.inhibit_user_in_use_error
         )
 
+        self._require_approval = (
+            hs.config.experimental.msc3866.enabled
+            and hs.config.experimental.msc3866.require_approval_for_new_accounts
+        )
+
         self._registration_flows = _calculate_registration_flows(
             hs.config, self.auth_handler
         )
@@ -734,6 +744,12 @@ class RegisterRestServlet(RestServlet):
                 access_token=return_dict.get("access_token"),
             )
 
+            if self._require_approval:
+                raise NotApprovedError(
+                    msg="This account needs to be approved by an administrator before it can be used.",
+                    approval_notice_medium=ApprovalNoticeMedium.NONE,
+                )
+
         return 200, return_dict
 
     async def _do_appservice_registration(
@@ -778,7 +794,9 @@ class RegisterRestServlet(RestServlet):
             "user_id": user_id,
             "home_server": self.hs.hostname,
         }
-        if not params.get("inhibit_login", False):
+        # We don't want to log the user in if we're going to deny them access because
+        # they need to be approved first.
+        if not params.get("inhibit_login", False) and not self._require_approval:
             device_id = params.get("device_id")
             initial_display_name = params.get("initial_device_display_name")
             (
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 0843f10340..a62b4abd4e 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -203,6 +203,7 @@ class DataStore(
         deactivated: bool = False,
         order_by: str = UserSortOrder.USER_ID.value,
         direction: str = "f",
+        approved: bool = True,
     ) -> Tuple[List[JsonDict], int]:
         """Function to retrieve a paginated list of users from
         users list. This will return a json list of users and the
@@ -217,6 +218,7 @@ class DataStore(
             deactivated: whether to include deactivated users
             order_by: the sort order of the returned list
             direction: sort ascending or descending
+            approved: whether to include approved users
         Returns:
             A tuple of a list of mappings from user to information and a count of total users.
         """
@@ -249,6 +251,11 @@ class DataStore(
             if not deactivated:
                 filters.append("deactivated = 0")
 
+            if not approved:
+                # We ignore NULL values for the approved flag because these should only
+                # be already existing users that we consider as already approved.
+                filters.append("approved IS FALSE")
+
             where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
 
             sql_base = f"""
@@ -262,7 +269,7 @@ class DataStore(
 
             sql = f"""
                 SELECT name, user_type, is_guest, admin, deactivated, shadow_banned,
-                displayname, avatar_url, creation_ts * 1000 as creation_ts
+                displayname, avatar_url, creation_ts * 1000 as creation_ts, approved
                 {sql_base}
                 ORDER BY {order_by_column} {order}, u.name ASC
                 LIMIT ? OFFSET ?
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index ac821878b0..2996d6bb4d 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -166,27 +166,49 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
     @cached()
     async def get_user_by_id(self, user_id: str) -> Optional[Dict[str, Any]]:
         """Deprecated: use get_userinfo_by_id instead"""
-        return await self.db_pool.simple_select_one(
-            table="users",
-            keyvalues={"name": user_id},
-            retcols=[
-                "name",
-                "password_hash",
-                "is_guest",
-                "admin",
-                "consent_version",
-                "consent_ts",
-                "consent_server_notice_sent",
-                "appservice_id",
-                "creation_ts",
-                "user_type",
-                "deactivated",
-                "shadow_banned",
-            ],
-            allow_none=True,
+
+        def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]:
+            # We could technically use simple_select_one here, but it would not perform
+            # the COALESCEs (unless hacked into the column names), which could yield
+            # confusing results.
+            txn.execute(
+                """
+                SELECT
+                    name, password_hash, is_guest, admin, consent_version, consent_ts,
+                    consent_server_notice_sent, appservice_id, creation_ts, user_type,
+                    deactivated, COALESCE(shadow_banned, FALSE) AS shadow_banned,
+                    COALESCE(approved, TRUE) AS approved
+                FROM users
+                WHERE name = ?
+                """,
+                (user_id,),
+            )
+
+            rows = self.db_pool.cursor_to_dict(txn)
+
+            if len(rows) == 0:
+                return None
+
+            return rows[0]
+
+        row = await self.db_pool.runInteraction(
             desc="get_user_by_id",
+            func=get_user_by_id_txn,
         )
 
+        if row is not None:
+            # If we're using SQLite our boolean values will be integers. Because we
+            # present some of this data as is to e.g. server admins via REST APIs, we
+            # want to make sure we're returning the right type of data.
+            # Note: when adding a column name to this list, be wary of NULLable columns,
+            # since NULL values will be turned into False.
+            boolean_columns = ["admin", "deactivated", "shadow_banned", "approved"]
+            for column in boolean_columns:
+                if not isinstance(row[column], bool):
+                    row[column] = bool(row[column])
+
+        return row
+
     async def get_userinfo_by_id(self, user_id: str) -> Optional[UserInfo]:
         """Get a UserInfo object for a user by user ID.
 
@@ -1779,6 +1801,40 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
 
         return res if res else False
 
+    @cached()
+    async def is_user_approved(self, user_id: str) -> bool:
+        """Checks if a user is approved and therefore can be allowed to log in.
+
+        If the user's 'approved' column is NULL, we consider it as true given it means
+        the user was registered when support for an approval flow was either disabled
+        or nonexistent.
+
+        Args:
+            user_id: the user to check the approval status of.
+
+        Returns:
+            A boolean that is True if the user is approved, False otherwise.
+        """
+
+        def is_user_approved_txn(txn: LoggingTransaction) -> bool:
+            txn.execute(
+                """
+                SELECT COALESCE(approved, TRUE) AS approved FROM users WHERE name = ?
+                """,
+                (user_id,),
+            )
+
+            rows = self.db_pool.cursor_to_dict(txn)
+
+            # We cast to bool because the value returned by the database engine might
+            # be an integer if we're using SQLite.
+            return bool(rows[0]["approved"])
+
+        return await self.db_pool.runInteraction(
+            desc="is_user_pending_approval",
+            func=is_user_approved_txn,
+        )
+
 
 class RegistrationBackgroundUpdateStore(RegistrationWorkerStore):
     def __init__(
@@ -1916,6 +1972,29 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore):
         self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
         txn.call_after(self.is_guest.invalidate, (user_id,))
 
+    def update_user_approval_status_txn(
+        self, txn: LoggingTransaction, user_id: str, approved: bool
+    ) -> None:
+        """Set the user's 'approved' flag to the given value.
+
+        The boolean is turned into an int because the column is a smallint.
+
+        Args:
+            txn: the current database transaction.
+            user_id: the user to update the flag for.
+            approved: the value to set the flag to.
+        """
+        self.db_pool.simple_update_one_txn(
+            txn=txn,
+            table="users",
+            keyvalues={"name": user_id},
+            updatevalues={"approved": approved},
+        )
+
+        # Invalidate the caches of methods that read the value of the 'approved' flag.
+        self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
+        self._invalidate_cache_and_stream(txn, self.is_user_approved, (user_id,))
+
 
 class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
     def __init__(
@@ -1933,6 +2012,13 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
         self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id")
         self._refresh_tokens_id_gen = IdGenerator(db_conn, "refresh_tokens", "id")
 
+        # If support for MSC3866 is enabled and configured to require approval for new
+        # account, we will create new users with an 'approved' flag set to false.
+        self._require_approval = (
+            hs.config.experimental.msc3866.enabled
+            and hs.config.experimental.msc3866.require_approval_for_new_accounts
+        )
+
     async def add_access_token_to_user(
         self,
         user_id: str,
@@ -2065,6 +2151,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
         admin: bool = False,
         user_type: Optional[str] = None,
         shadow_banned: bool = False,
+        approved: bool = False,
     ) -> None:
         """Attempts to register an account.
 
@@ -2083,6 +2170,8 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
                 or None for a normal user.
             shadow_banned: Whether the user is shadow-banned, i.e. they may be
                 told their requests succeeded but we ignore them.
+            approved: Whether to consider the user has already been approved by an
+                administrator.
 
         Raises:
             StoreError if the user_id could not be registered.
@@ -2099,6 +2188,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
             admin,
             user_type,
             shadow_banned,
+            approved,
         )
 
     def _register_user(
@@ -2113,11 +2203,14 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
         admin: bool,
         user_type: Optional[str],
         shadow_banned: bool,
+        approved: bool,
     ) -> None:
         user_id_obj = UserID.from_string(user_id)
 
         now = int(self._clock.time())
 
+        user_approved = approved or not self._require_approval
+
         try:
             if was_guest:
                 # Ensure that the guest user actually exists
@@ -2143,6 +2236,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
                         "admin": 1 if admin else 0,
                         "user_type": user_type,
                         "shadow_banned": shadow_banned,
+                        "approved": user_approved,
                     },
                 )
             else:
@@ -2158,6 +2252,7 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
                         "admin": 1 if admin else 0,
                         "user_type": user_type,
                         "shadow_banned": shadow_banned,
+                        "approved": user_approved,
                     },
                 )
 
@@ -2503,6 +2598,25 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
             start_or_continue_validation_session_txn,
         )
 
+    async def update_user_approval_status(
+        self, user_id: UserID, approved: bool
+    ) -> None:
+        """Set the user's 'approved' flag to the given value.
+
+        The boolean will be turned into an int (in update_user_approval_status_txn)
+        because the column is a smallint.
+
+        Args:
+            user_id: the user to update the flag for.
+            approved: the value to set the flag to.
+        """
+        await self.db_pool.runInteraction(
+            "update_user_approval_status",
+            self.update_user_approval_status_txn,
+            user_id.to_string(),
+            approved,
+        )
+
 
 def find_max_generated_user_id_localpart(cur: Cursor) -> int:
     """
diff --git a/synapse/storage/schema/main/delta/73/03users_approved_column.sql b/synapse/storage/schema/main/delta/73/03users_approved_column.sql
new file mode 100644
index 0000000000..5328d592ea
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/03users_approved_column.sql
@@ -0,0 +1,20 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a column to the users table to track whether the user needs to be approved by an
+-- administrator.
+-- A NULL column means the user was created before this feature was supported by Synapse,
+-- and should be considered as TRUE.
+ALTER TABLE users ADD COLUMN approved BOOLEAN;
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 1847e6ad6b..4c1ce33463 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -25,10 +25,10 @@ from parameterized import parameterized, parameterized_class
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
-from synapse.api.constants import UserTypes
+from synapse.api.constants import ApprovalNoticeMedium, LoginType, UserTypes
 from synapse.api.errors import Codes, HttpResponseException, ResourceLimitError
 from synapse.api.room_versions import RoomVersions
-from synapse.rest.client import devices, login, logout, profile, room, sync
+from synapse.rest.client import devices, login, logout, profile, register, room, sync
 from synapse.rest.media.v1.filepath import MediaFilePaths
 from synapse.server import HomeServer
 from synapse.types import JsonDict, UserID
@@ -578,6 +578,16 @@ class UsersListTestCase(unittest.HomeserverTestCase):
         _search_test(None, "foo", "user_id")
         _search_test(None, "bar", "user_id")
 
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3866": {
+                    "enabled": True,
+                    "require_approval_for_new_accounts": True,
+                }
+            }
+        }
+    )
     def test_invalid_parameter(self) -> None:
         """
         If parameters are invalid, an error is returned.
@@ -623,6 +633,16 @@ class UsersListTestCase(unittest.HomeserverTestCase):
         self.assertEqual(400, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
 
+        # invalid approved
+        channel = self.make_request(
+            "GET",
+            self.url + "?approved=not_bool",
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(400, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
+
         # unkown order_by
         channel = self.make_request(
             "GET",
@@ -841,6 +861,69 @@ class UsersListTestCase(unittest.HomeserverTestCase):
         self._order_test([self.admin_user, user1, user2], "creation_ts", "f")
         self._order_test([user2, user1, self.admin_user], "creation_ts", "b")
 
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3866": {
+                    "enabled": True,
+                    "require_approval_for_new_accounts": True,
+                }
+            }
+        }
+    )
+    def test_filter_out_approved(self) -> None:
+        """Tests that the endpoint can filter out approved users."""
+        # Create our users.
+        self._create_users(2)
+
+        # Get the list of users.
+        channel = self.make_request(
+            "GET",
+            self.url,
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, channel.result)
+
+        # Exclude the admin, because we don't want to accidentally un-approve the admin.
+        non_admin_user_ids = [
+            user["name"]
+            for user in channel.json_body["users"]
+            if user["name"] != self.admin_user
+        ]
+
+        self.assertEqual(2, len(non_admin_user_ids), non_admin_user_ids)
+
+        # Select a user and un-approve them. We do this rather than the other way around
+        # because, since these users are created by an admin, we consider them already
+        # approved.
+        not_approved_user = non_admin_user_ids[0]
+
+        channel = self.make_request(
+            "PUT",
+            f"/_synapse/admin/v2/users/{not_approved_user}",
+            {"approved": False},
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, channel.result)
+
+        # Now get the list of users again, this time filtering out approved users.
+        channel = self.make_request(
+            "GET",
+            self.url + "?approved=false",
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, channel.result)
+
+        non_admin_user_ids = [
+            user["name"]
+            for user in channel.json_body["users"]
+            if user["name"] != self.admin_user
+        ]
+
+        # We should only have our unapproved user now.
+        self.assertEqual(1, len(non_admin_user_ids), non_admin_user_ids)
+        self.assertEqual(not_approved_user, non_admin_user_ids[0])
+
     def _order_test(
         self,
         expected_user_list: List[str],
@@ -1272,6 +1355,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         synapse.rest.admin.register_servlets,
         login.register_servlets,
         sync.register_servlets,
+        register.register_servlets,
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
@@ -2536,6 +2620,104 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         # Ensure they're still alive
         self.assertEqual(0, channel.json_body["deactivated"])
 
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3866": {
+                    "enabled": True,
+                    "require_approval_for_new_accounts": True,
+                }
+            }
+        }
+    )
+    def test_approve_account(self) -> None:
+        """Tests that approving an account correctly sets the approved flag for the user."""
+        url = self.url_prefix % "@bob:test"
+
+        # Create the user using the client-server API since otherwise the user will be
+        # marked as approved automatically.
+        channel = self.make_request(
+            "POST",
+            "register",
+            {
+                "username": "bob",
+                "password": "test",
+                "auth": {"type": LoginType.DUMMY},
+            },
+        )
+        self.assertEqual(403, channel.code, channel.result)
+        self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"])
+        self.assertEqual(
+            ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"]
+        )
+
+        # Get user
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertIs(False, channel.json_body["approved"])
+
+        # Approve user
+        channel = self.make_request(
+            "PUT",
+            url,
+            access_token=self.admin_user_tok,
+            content={"approved": True},
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertIs(True, channel.json_body["approved"])
+
+        # Check that the user is now approved
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertIs(True, channel.json_body["approved"])
+
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3866": {
+                    "enabled": True,
+                    "require_approval_for_new_accounts": True,
+                }
+            }
+        }
+    )
+    def test_register_approved(self) -> None:
+        url = self.url_prefix % "@bob:test"
+
+        # Create user
+        channel = self.make_request(
+            "PUT",
+            url,
+            access_token=self.admin_user_tok,
+            content={"password": "abc123", "approved": True},
+        )
+
+        self.assertEqual(201, channel.code, msg=channel.json_body)
+        self.assertEqual("@bob:test", channel.json_body["name"])
+        self.assertEqual(1, channel.json_body["approved"])
+
+        # Get user
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual("@bob:test", channel.json_body["name"])
+        self.assertEqual(1, channel.json_body["approved"])
+
     def _is_erased(self, user_id: str, expect: bool) -> None:
         """Assert that the user is erased or not"""
         d = self.store.is_user_erased(user_id)
diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py
index 05355c7fb6..090cef5216 100644
--- a/tests/rest/client/test_auth.py
+++ b/tests/rest/client/test_auth.py
@@ -20,7 +20,8 @@ from twisted.test.proto_helpers import MemoryReactor
 from twisted.web.resource import Resource
 
 import synapse.rest.admin
-from synapse.api.constants import LoginType
+from synapse.api.constants import ApprovalNoticeMedium, LoginType
+from synapse.api.errors import Codes
 from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker
 from synapse.rest.client import account, auth, devices, login, logout, register
 from synapse.rest.synapse.client import build_synapse_client_resource_tree
@@ -567,6 +568,36 @@ class UIAuthTests(unittest.HomeserverTestCase):
             body={"auth": {"session": session_id}},
         )
 
+    @skip_unless(HAS_OIDC, "requires OIDC")
+    @override_config(
+        {
+            "oidc_config": TEST_OIDC_CONFIG,
+            "experimental_features": {
+                "msc3866": {
+                    "enabled": True,
+                    "require_approval_for_new_accounts": True,
+                }
+            },
+        }
+    )
+    def test_sso_not_approved(self) -> None:
+        """Tests that if we register a user via SSO while requiring approval for new
+        accounts, we still raise the correct error before logging the user in.
+        """
+        login_resp = self.helper.login_via_oidc("username", expected_status=403)
+
+        self.assertEqual(login_resp["errcode"], Codes.USER_AWAITING_APPROVAL)
+        self.assertEqual(
+            ApprovalNoticeMedium.NONE, login_resp["approval_notice_medium"]
+        )
+
+        # Check that we didn't register a device for the user during the login attempt.
+        devices = self.get_success(
+            self.hs.get_datastores().main.get_devices_by_user("@username:test")
+        )
+
+        self.assertEqual(len(devices), 0)
+
 
 class RefreshAuthTests(unittest.HomeserverTestCase):
     servlets = [
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index e2a4d98275..e801ba8c8b 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -23,6 +23,8 @@ from twisted.test.proto_helpers import MemoryReactor
 from twisted.web.resource import Resource
 
 import synapse.rest.admin
+from synapse.api.constants import ApprovalNoticeMedium, LoginType
+from synapse.api.errors import Codes
 from synapse.appservice import ApplicationService
 from synapse.rest.client import devices, login, logout, register
 from synapse.rest.client.account import WhoamiRestServlet
@@ -94,6 +96,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
         logout.register_servlets,
         devices.register_servlets,
         lambda hs, http_server: WhoamiRestServlet(hs).register(http_server),
+        register.register_servlets,
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
@@ -406,6 +409,44 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 400)
         self.assertEqual(channel.json_body["errcode"], "M_INVALID_PARAM")
 
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3866": {
+                    "enabled": True,
+                    "require_approval_for_new_accounts": True,
+                }
+            }
+        }
+    )
+    def test_require_approval(self) -> None:
+        channel = self.make_request(
+            "POST",
+            "register",
+            {
+                "username": "kermit",
+                "password": "monkey",
+                "auth": {"type": LoginType.DUMMY},
+            },
+        )
+        self.assertEqual(403, channel.code, channel.result)
+        self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"])
+        self.assertEqual(
+            ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"]
+        )
+
+        params = {
+            "type": LoginType.PASSWORD,
+            "identifier": {"type": "m.id.user", "user": "kermit"},
+            "password": "monkey",
+        }
+        channel = self.make_request("POST", LOGIN_URL, params)
+        self.assertEqual(403, channel.code, channel.result)
+        self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"])
+        self.assertEqual(
+            ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"]
+        )
+
 
 @skip_unless(has_saml2 and HAS_OIDC, "Requires SAML2 and OIDC")
 class MultiSSOTestCase(unittest.HomeserverTestCase):
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index b781875d52..11cf3939d8 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -22,7 +22,11 @@ import pkg_resources
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
-from synapse.api.constants import APP_SERVICE_REGISTRATION_TYPE, LoginType
+from synapse.api.constants import (
+    APP_SERVICE_REGISTRATION_TYPE,
+    ApprovalNoticeMedium,
+    LoginType,
+)
 from synapse.api.errors import Codes
 from synapse.appservice import ApplicationService
 from synapse.rest.client import account, account_validity, login, logout, register, sync
@@ -765,6 +769,32 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 400, channel.json_body)
         self.assertEqual(channel.json_body["errcode"], Codes.USER_IN_USE)
 
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3866": {
+                    "enabled": True,
+                    "require_approval_for_new_accounts": True,
+                }
+            }
+        }
+    )
+    def test_require_approval(self) -> None:
+        channel = self.make_request(
+            "POST",
+            "register",
+            {
+                "username": "kermit",
+                "password": "monkey",
+                "auth": {"type": LoginType.DUMMY},
+            },
+        )
+        self.assertEqual(403, channel.code, channel.result)
+        self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"])
+        self.assertEqual(
+            ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"]
+        )
+
 
 class AccountValidityTestCase(unittest.HomeserverTestCase):
 
diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py
index dd26145bf8..c249a42bb6 100644
--- a/tests/rest/client/utils.py
+++ b/tests/rest/client/utils.py
@@ -543,8 +543,12 @@ class RestHelper:
 
         return channel.json_body
 
-    def login_via_oidc(self, remote_user_id: str) -> JsonDict:
-        """Log in (as a new user) via OIDC
+    def login_via_oidc(
+        self,
+        remote_user_id: str,
+        expected_status: int = 200,
+    ) -> JsonDict:
+        """Log in via OIDC
 
         Returns the result of the final token login.
 
@@ -578,7 +582,9 @@ class RestHelper:
             "/login",
             content={"type": "m.login.token", "token": login_token},
         )
-        assert channel.code == HTTPStatus.OK
+        assert (
+            channel.code == expected_status
+        ), f"unexpected status in response: {channel.code}"
         return channel.json_body
 
     def auth_via_oidc(
diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py
index 853a93afab..05ea802008 100644
--- a/tests/storage/test_registration.py
+++ b/tests/storage/test_registration.py
@@ -16,9 +16,10 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.api.constants import UserTypes
 from synapse.api.errors import ThreepidValidationError
 from synapse.server import HomeServer
+from synapse.types import JsonDict, UserID
 from synapse.util import Clock
 
-from tests.unittest import HomeserverTestCase
+from tests.unittest import HomeserverTestCase, override_config
 
 
 class RegistrationStoreTestCase(HomeserverTestCase):
@@ -48,6 +49,7 @@ class RegistrationStoreTestCase(HomeserverTestCase):
                 "user_type": None,
                 "deactivated": 0,
                 "shadow_banned": 0,
+                "approved": 1,
             },
             (self.get_success(self.store.get_user_by_id(self.user_id))),
         )
@@ -166,3 +168,101 @@ class RegistrationStoreTestCase(HomeserverTestCase):
             ThreepidValidationError,
         )
         self.assertEqual(e.value.msg, "Validation token not found or has expired", e)
+
+
+class ApprovalRequiredRegistrationTestCase(HomeserverTestCase):
+    def default_config(self) -> JsonDict:
+        config = super().default_config()
+
+        # If there's already some config for this feature in the default config, it
+        # means we're overriding it with @override_config. In this case we don't want
+        # to do anything more with it.
+        msc3866_config = config.get("experimental_features", {}).get("msc3866")
+        if msc3866_config is not None:
+            return config
+
+        # Require approval for all new accounts.
+        config["experimental_features"] = {
+            "msc3866": {
+                "enabled": True,
+                "require_approval_for_new_accounts": True,
+            }
+        }
+        return config
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+        self.user_id = "@my-user:test"
+        self.pwhash = "{xx1}123456789"
+
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3866": {
+                    "enabled": True,
+                    "require_approval_for_new_accounts": False,
+                }
+            }
+        }
+    )
+    def test_approval_not_required(self) -> None:
+        """Tests that if we don't require approval for new accounts, newly created
+        accounts are automatically marked as approved.
+        """
+        self.get_success(self.store.register_user(self.user_id, self.pwhash))
+
+        user = self.get_success(self.store.get_user_by_id(self.user_id))
+        assert user is not None
+        self.assertTrue(user["approved"])
+
+        approved = self.get_success(self.store.is_user_approved(self.user_id))
+        self.assertTrue(approved)
+
+    def test_approval_required(self) -> None:
+        """Tests that if we require approval for new accounts, newly created accounts
+        are not automatically marked as approved.
+        """
+        self.get_success(self.store.register_user(self.user_id, self.pwhash))
+
+        user = self.get_success(self.store.get_user_by_id(self.user_id))
+        assert user is not None
+        self.assertFalse(user["approved"])
+
+        approved = self.get_success(self.store.is_user_approved(self.user_id))
+        self.assertFalse(approved)
+
+    def test_override(self) -> None:
+        """Tests that if we require approval for new accounts, but we explicitly say the
+        new user should be considered approved, they're marked as approved.
+        """
+        self.get_success(
+            self.store.register_user(
+                self.user_id,
+                self.pwhash,
+                approved=True,
+            )
+        )
+
+        user = self.get_success(self.store.get_user_by_id(self.user_id))
+        self.assertIsNotNone(user)
+        assert user is not None
+        self.assertEqual(user["approved"], 1)
+
+        approved = self.get_success(self.store.is_user_approved(self.user_id))
+        self.assertTrue(approved)
+
+    def test_approve_user(self) -> None:
+        """Tests that approving the user updates their approval status."""
+        self.get_success(self.store.register_user(self.user_id, self.pwhash))
+
+        approved = self.get_success(self.store.is_user_approved(self.user_id))
+        self.assertFalse(approved)
+
+        self.get_success(
+            self.store.update_user_approval_status(
+                UserID.from_string(self.user_id), True
+            )
+        )
+
+        approved = self.get_success(self.store.is_user_approved(self.user_id))
+        self.assertTrue(approved)
-- 
cgit 1.5.1


From a466164647b969efd2e85168144cd75693443c05 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Thu, 29 Sep 2022 14:55:12 +0100
Subject: Optimise get_rooms_for_user (drop with_stream_ordering) (#13787)

---
 changelog.d/13787.misc                       |   1 +
 synapse/handlers/device.py                   |   6 +-
 synapse/handlers/sync.py                     |  14 +---
 synapse/storage/_base.py                     |   1 +
 synapse/storage/databases/main/cache.py      |   1 +
 synapse/storage/databases/main/roommember.py | 117 +++++++++++++--------------
 tests/handlers/test_sync.py                  |   1 +
 7 files changed, 66 insertions(+), 75 deletions(-)
 create mode 100644 changelog.d/13787.misc

(limited to 'synapse')

diff --git a/changelog.d/13787.misc b/changelog.d/13787.misc
new file mode 100644
index 0000000000..a9b93717f0
--- /dev/null
+++ b/changelog.d/13787.misc
@@ -0,0 +1 @@
+Optimise get rooms for user calls. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 03082fce42..f9cc5bddbc 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -273,11 +273,9 @@ class DeviceWorkerHandler:
             possibly_left = possibly_changed | possibly_left
 
             # Double check if we still share rooms with the given user.
-            users_rooms = await self.store.get_rooms_for_users_with_stream_ordering(
-                possibly_left
-            )
+            users_rooms = await self.store.get_rooms_for_users(possibly_left)
             for changed_user_id, entries in users_rooms.items():
-                if any(e.room_id in room_ids for e in entries):
+                if any(rid in room_ids for rid in entries):
                     possibly_left.discard(changed_user_id)
                 else:
                     possibly_joined.discard(changed_user_id)
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index e75fc6b947..4abb9b6127 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1490,16 +1490,14 @@ class SyncHandler:
                 since_token.device_list_key
             )
             if changed_users is not None:
-                result = await self.store.get_rooms_for_users_with_stream_ordering(
-                    changed_users
-                )
+                result = await self.store.get_rooms_for_users(changed_users)
 
                 for changed_user_id, entries in result.items():
                     # Check if the changed user shares any rooms with the user,
                     # or if the changed user is the syncing user (as we always
                     # want to include device list updates of their own devices).
                     if user_id == changed_user_id or any(
-                        e.room_id in joined_rooms for e in entries
+                        rid in joined_rooms for rid in entries
                     ):
                         users_that_have_changed.add(changed_user_id)
             else:
@@ -1533,13 +1531,9 @@ class SyncHandler:
                 newly_left_users.update(left_users)
 
             # Remove any users that we still share a room with.
-            left_users_rooms = (
-                await self.store.get_rooms_for_users_with_stream_ordering(
-                    newly_left_users
-                )
-            )
+            left_users_rooms = await self.store.get_rooms_for_users(newly_left_users)
             for user_id, entries in left_users_rooms.items():
-                if any(e.room_id in joined_rooms for e in entries):
+                if any(rid in joined_rooms for rid in entries):
                     newly_left_users.discard(user_id)
 
             return DeviceListUpdates(
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 313e8aca7d..bf42aeb8d1 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -94,6 +94,7 @@ class SQLBaseStore(metaclass=ABCMeta):
             self._attempt_to_invalidate_cache(
                 "get_rooms_for_user_with_stream_ordering", (user_id,)
             )
+            self._attempt_to_invalidate_cache("get_rooms_for_user", (user_id,))
 
         # Purge other caches based on room state.
         self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index db6ce83a2b..3b8ed1f7ee 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -205,6 +205,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
                 self.get_rooms_for_user_with_stream_ordering.invalidate(
                     (data.state_key,)
                 )
+                self.get_rooms_for_user.invalidate((data.state_key,))
         else:
             raise Exception("Unknown events stream row type %s" % (row.type,))
 
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 8ada3cdac3..982e1f08e3 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -15,7 +15,6 @@
 import logging
 from typing import (
     TYPE_CHECKING,
-    Callable,
     Collection,
     Dict,
     FrozenSet,
@@ -52,7 +51,6 @@ from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import intern_string
 from synapse.util.caches.descriptors import _CacheContext, cached, cachedList
-from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
 
@@ -600,58 +598,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             for room_id, instance, stream_id in txn
         )
 
-    @cachedList(
-        cached_method_name="get_rooms_for_user_with_stream_ordering",
-        list_name="user_ids",
-    )
-    async def get_rooms_for_users_with_stream_ordering(
-        self, user_ids: Collection[str]
-    ) -> Dict[str, FrozenSet[GetRoomsForUserWithStreamOrdering]]:
-        """A batched version of `get_rooms_for_user_with_stream_ordering`.
-
-        Returns:
-            Map from user_id to set of rooms that is currently in.
-        """
-        return await self.db_pool.runInteraction(
-            "get_rooms_for_users_with_stream_ordering",
-            self._get_rooms_for_users_with_stream_ordering_txn,
-            user_ids,
-        )
-
-    def _get_rooms_for_users_with_stream_ordering_txn(
-        self, txn: LoggingTransaction, user_ids: Collection[str]
-    ) -> Dict[str, FrozenSet[GetRoomsForUserWithStreamOrdering]]:
-
-        clause, args = make_in_list_sql_clause(
-            self.database_engine,
-            "c.state_key",
-            user_ids,
-        )
-
-        sql = f"""
-            SELECT c.state_key, room_id, e.instance_name, e.stream_ordering
-            FROM current_state_events AS c
-            INNER JOIN events AS e USING (room_id, event_id)
-            WHERE
-                c.type = 'm.room.member'
-                AND c.membership = ?
-                AND {clause}
-        """
-
-        txn.execute(sql, [Membership.JOIN] + args)
-
-        result: Dict[str, Set[GetRoomsForUserWithStreamOrdering]] = {
-            user_id: set() for user_id in user_ids
-        }
-        for user_id, room_id, instance, stream_id in txn:
-            result[user_id].add(
-                GetRoomsForUserWithStreamOrdering(
-                    room_id, PersistedEventPosition(instance, stream_id)
-                )
-            )
-
-        return {user_id: frozenset(v) for user_id, v in result.items()}
-
     async def get_users_server_still_shares_room_with(
         self, user_ids: Collection[str]
     ) -> Set[str]:
@@ -693,19 +639,68 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return {row[0] for row in txn}
 
-    @cancellable
-    async def get_rooms_for_user(
-        self, user_id: str, on_invalidate: Optional[Callable[[], None]] = None
-    ) -> FrozenSet[str]:
+    @cached(max_entries=500000, iterable=True)
+    async def get_rooms_for_user(self, user_id: str) -> FrozenSet[str]:
         """Returns a set of room_ids the user is currently joined to.
 
         If a remote user only returns rooms this server is currently
         participating in.
         """
-        rooms = await self.get_rooms_for_user_with_stream_ordering(
-            user_id, on_invalidate=on_invalidate
+        rooms = self.get_rooms_for_user_with_stream_ordering.cache.get_immediate(
+            (user_id,),
+            None,
+            update_metrics=False,
+        )
+        if rooms:
+            return frozenset(r.room_id for r in rooms)
+
+        room_ids = await self.db_pool.simple_select_onecol(
+            table="current_state_events",
+            keyvalues={
+                "type": EventTypes.Member,
+                "membership": Membership.JOIN,
+                "state_key": user_id,
+            },
+            retcol="room_id",
+            desc="get_rooms_for_user",
         )
-        return frozenset(r.room_id for r in rooms)
+
+        return frozenset(room_ids)
+
+    @cachedList(
+        cached_method_name="get_rooms_for_user",
+        list_name="user_ids",
+    )
+    async def get_rooms_for_users(
+        self, user_ids: Collection[str]
+    ) -> Dict[str, FrozenSet[str]]:
+        """A batched version of `get_rooms_for_user`.
+
+        Returns:
+            Map from user_id to set of rooms that is currently in.
+        """
+
+        rows = await self.db_pool.simple_select_many_batch(
+            table="current_state_events",
+            column="state_key",
+            iterable=user_ids,
+            retcols=(
+                "state_key",
+                "room_id",
+            ),
+            keyvalues={
+                "type": EventTypes.Member,
+                "membership": Membership.JOIN,
+            },
+            desc="get_rooms_for_users",
+        )
+
+        user_rooms: Dict[str, Set[str]] = {user_id: set() for user_id in user_ids}
+
+        for row in rows:
+            user_rooms[row["state_key"]].add(row["room_id"])
+
+        return {key: frozenset(rooms) for key, rooms in user_rooms.items()}
 
     @cached(max_entries=10000)
     async def does_pair_of_users_share_a_room(
diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py
index e3f38fbcc5..ab5c101eb7 100644
--- a/tests/handlers/test_sync.py
+++ b/tests/handlers/test_sync.py
@@ -159,6 +159,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase):
 
         # Blow away caches (supported room versions can only change due to a restart).
         self.store.get_rooms_for_user_with_stream_ordering.invalidate_all()
+        self.store.get_rooms_for_user.invalidate_all()
         self.get_success(self.store._get_event_cache.clear())
         self.store._event_ref.clear()
 
-- 
cgit 1.5.1


From ebd9e2dac6495a1857617d1a76c9259a988f8bb4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 29 Sep 2022 16:12:09 +0100
Subject: Implement push rule evaluation in Rust. (#13838)

---
 changelog.d/13838.misc                   |   1 +
 rust/Cargo.toml                          |   4 +-
 rust/benches/evaluator.rs                | 149 ++++++++++++
 rust/benches/glob.rs                     |  40 ++++
 rust/build.rs                            |   2 +-
 rust/src/push/base_rules.rs              |   1 +
 rust/src/push/evaluator.rs               | 374 +++++++++++++++++++++++++++++++
 rust/src/push/mod.rs                     |  28 ++-
 rust/src/push/utils.rs                   | 215 ++++++++++++++++++
 stubs/synapse/synapse_rust/push.pyi      |  19 +-
 synapse/push/bulk_push_rule_evaluator.py |  44 ++--
 synapse/push/httppusher.py               |  39 +++-
 synapse/push/push_rule_evaluator.py      | 361 -----------------------------
 tests/push/test_push_rule_evaluator.py   |  20 +-
 14 files changed, 894 insertions(+), 403 deletions(-)
 create mode 100644 changelog.d/13838.misc
 create mode 100644 rust/benches/evaluator.rs
 create mode 100644 rust/benches/glob.rs
 create mode 100644 rust/src/push/evaluator.rs
 create mode 100644 rust/src/push/utils.rs
 delete mode 100644 synapse/push/push_rule_evaluator.py

(limited to 'synapse')

diff --git a/changelog.d/13838.misc b/changelog.d/13838.misc
new file mode 100644
index 0000000000..28bddb7059
--- /dev/null
+++ b/changelog.d/13838.misc
@@ -0,0 +1 @@
+Port push rules to using Rust.
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 44263bf77e..cffaa5b51b 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -11,7 +11,9 @@ rust-version = "1.58.1"
 
 [lib]
 name = "synapse"
-crate-type = ["cdylib"]
+# We generate a `cdylib` for Python and a standard `lib` for running
+# tests/benchmarks.
+crate-type = ["lib", "cdylib"]
 
 [package.metadata.maturin]
 # This is where we tell maturin where to place the built library.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
new file mode 100644
index 0000000000..ed411461d1
--- /dev/null
+++ b/rust/benches/evaluator.rs
@@ -0,0 +1,149 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#![feature(test)]
+use synapse::push::{
+    evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules,
+};
+use test::Bencher;
+
+extern crate test;
+
+#[bench]
+fn bench_match_exact(b: &mut Bencher) {
+    let flattened_keys = [
+        ("type".to_string(), "m.text".to_string()),
+        ("room_id".to_string(), "!room:server".to_string()),
+        ("content.body".to_string(), "test message".to_string()),
+    ]
+    .into_iter()
+    .collect();
+
+    let eval = PushRuleEvaluator::py_new(
+        flattened_keys,
+        10,
+        0,
+        Default::default(),
+        Default::default(),
+        true,
+    )
+    .unwrap();
+
+    let condition = Condition::Known(synapse::push::KnownCondition::EventMatch(
+        EventMatchCondition {
+            key: "room_id".into(),
+            pattern: Some("!room:server".into()),
+            pattern_type: None,
+        },
+    ));
+
+    let matched = eval.match_condition(&condition, None, None).unwrap();
+    assert!(matched, "Didn't match");
+
+    b.iter(|| eval.match_condition(&condition, None, None).unwrap());
+}
+
+#[bench]
+fn bench_match_word(b: &mut Bencher) {
+    let flattened_keys = [
+        ("type".to_string(), "m.text".to_string()),
+        ("room_id".to_string(), "!room:server".to_string()),
+        ("content.body".to_string(), "test message".to_string()),
+    ]
+    .into_iter()
+    .collect();
+
+    let eval = PushRuleEvaluator::py_new(
+        flattened_keys,
+        10,
+        0,
+        Default::default(),
+        Default::default(),
+        true,
+    )
+    .unwrap();
+
+    let condition = Condition::Known(synapse::push::KnownCondition::EventMatch(
+        EventMatchCondition {
+            key: "content.body".into(),
+            pattern: Some("test".into()),
+            pattern_type: None,
+        },
+    ));
+
+    let matched = eval.match_condition(&condition, None, None).unwrap();
+    assert!(matched, "Didn't match");
+
+    b.iter(|| eval.match_condition(&condition, None, None).unwrap());
+}
+
+#[bench]
+fn bench_match_word_miss(b: &mut Bencher) {
+    let flattened_keys = [
+        ("type".to_string(), "m.text".to_string()),
+        ("room_id".to_string(), "!room:server".to_string()),
+        ("content.body".to_string(), "test message".to_string()),
+    ]
+    .into_iter()
+    .collect();
+
+    let eval = PushRuleEvaluator::py_new(
+        flattened_keys,
+        10,
+        0,
+        Default::default(),
+        Default::default(),
+        true,
+    )
+    .unwrap();
+
+    let condition = Condition::Known(synapse::push::KnownCondition::EventMatch(
+        EventMatchCondition {
+            key: "content.body".into(),
+            pattern: Some("foobar".into()),
+            pattern_type: None,
+        },
+    ));
+
+    let matched = eval.match_condition(&condition, None, None).unwrap();
+    assert!(!matched, "Didn't match");
+
+    b.iter(|| eval.match_condition(&condition, None, None).unwrap());
+}
+
+#[bench]
+fn bench_eval_message(b: &mut Bencher) {
+    let flattened_keys = [
+        ("type".to_string(), "m.text".to_string()),
+        ("room_id".to_string(), "!room:server".to_string()),
+        ("content.body".to_string(), "test message".to_string()),
+    ]
+    .into_iter()
+    .collect();
+
+    let eval = PushRuleEvaluator::py_new(
+        flattened_keys,
+        10,
+        0,
+        Default::default(),
+        Default::default(),
+        true,
+    )
+    .unwrap();
+
+    let rules =
+        FilteredPushRules::py_new(PushRules::new(Vec::new()), Default::default(), false, false);
+
+    b.iter(|| eval.run(&rules, Some("bob"), Some("person")));
+}
diff --git a/rust/benches/glob.rs b/rust/benches/glob.rs
new file mode 100644
index 0000000000..b6697d9285
--- /dev/null
+++ b/rust/benches/glob.rs
@@ -0,0 +1,40 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#![feature(test)]
+
+use synapse::push::utils::{glob_to_regex, GlobMatchType};
+use test::Bencher;
+
+extern crate test;
+
+#[bench]
+fn bench_whole(b: &mut Bencher) {
+    b.iter(|| glob_to_regex("test", GlobMatchType::Whole));
+}
+
+#[bench]
+fn bench_word(b: &mut Bencher) {
+    b.iter(|| glob_to_regex("test", GlobMatchType::Word));
+}
+
+#[bench]
+fn bench_whole_wildcard_run(b: &mut Bencher) {
+    b.iter(|| glob_to_regex("test***??*?*?foo", GlobMatchType::Whole));
+}
+
+#[bench]
+fn bench_word_wildcard_run(b: &mut Bencher) {
+    b.iter(|| glob_to_regex("test***??*?*?foo", GlobMatchType::Whole));
+}
diff --git a/rust/build.rs b/rust/build.rs
index 2117975e56..ef370e6b41 100644
--- a/rust/build.rs
+++ b/rust/build.rs
@@ -22,7 +22,7 @@ fn main() -> Result<(), std::io::Error> {
 
         for entry in entries {
             if entry.is_dir() {
-                dirs.push(entry)
+                dirs.push(entry);
             } else {
                 paths.push(entry.to_str().expect("valid rust paths").to_string());
             }
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 7c62bc4849..bb59676bde 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -262,6 +262,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
         priority_class: 1,
         conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch {
             rel_type: Cow::Borrowed("m.thread"),
+            event_type_pattern: None,
             sender: None,
             sender_type: Some(Cow::Borrowed("user_id")),
         })]),
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
new file mode 100644
index 0000000000..efe88ec76e
--- /dev/null
+++ b/rust/src/push/evaluator.rs
@@ -0,0 +1,374 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::{
+    borrow::Cow,
+    collections::{BTreeMap, BTreeSet},
+};
+
+use anyhow::{Context, Error};
+use lazy_static::lazy_static;
+use log::warn;
+use pyo3::prelude::*;
+use regex::Regex;
+
+use super::{
+    utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType},
+    Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition,
+};
+
+lazy_static! {
+    /// Used to parse the `is` clause in the room member count condition.
+    static ref INEQUALITY_EXPR: Regex = Regex::new(r"^([=<>]*)([0-9]+)$").expect("valid regex");
+}
+
+/// Allows running a set of push rules against a particular event.
+#[pyclass]
+pub struct PushRuleEvaluator {
+    /// A mapping of "flattened" keys to string values in the event, e.g.
+    /// includes things like "type" and "content.msgtype".
+    flattened_keys: BTreeMap<String, String>,
+
+    /// The "content.body", if any.
+    body: String,
+
+    /// The number of users in the room.
+    room_member_count: u64,
+
+    /// The `notifications` section of the current power levels in the room.
+    notification_power_levels: BTreeMap<String, i64>,
+
+    /// The relations related to the event as a mapping from relation type to
+    /// set of sender/event type 2-tuples.
+    relations: BTreeMap<String, BTreeSet<(String, String)>>,
+
+    /// Is running "relation" conditions enabled?
+    relation_match_enabled: bool,
+
+    /// The power level of the sender of the event, or None if event is an
+    /// outlier.
+    sender_power_level: Option<i64>,
+}
+
+#[pymethods]
+impl PushRuleEvaluator {
+    /// Create a new `PushRuleEvaluator`. See struct docstring for details.
+    #[new]
+    pub fn py_new(
+        flattened_keys: BTreeMap<String, String>,
+        room_member_count: u64,
+        sender_power_level: Option<i64>,
+        notification_power_levels: BTreeMap<String, i64>,
+        relations: BTreeMap<String, BTreeSet<(String, String)>>,
+        relation_match_enabled: bool,
+    ) -> Result<Self, Error> {
+        let body = flattened_keys
+            .get("content.body")
+            .cloned()
+            .unwrap_or_default();
+
+        Ok(PushRuleEvaluator {
+            flattened_keys,
+            body,
+            room_member_count,
+            notification_power_levels,
+            relations,
+            relation_match_enabled,
+            sender_power_level,
+        })
+    }
+
+    /// Run the evaluator with the given push rules, for the given user ID and
+    /// display name of the user.
+    ///
+    /// Passing in None will skip evaluating rules matching user ID and display
+    /// name.
+    ///
+    /// Returns the set of actions, if any, that match (filtering out any
+    /// `dont_notify` actions).
+    pub fn run(
+        &self,
+        push_rules: &FilteredPushRules,
+        user_id: Option<&str>,
+        display_name: Option<&str>,
+    ) -> Vec<Action> {
+        'outer: for (push_rule, enabled) in push_rules.iter() {
+            if !enabled {
+                continue;
+            }
+
+            for condition in push_rule.conditions.iter() {
+                match self.match_condition(condition, user_id, display_name) {
+                    Ok(true) => {}
+                    Ok(false) => continue 'outer,
+                    Err(err) => {
+                        warn!("Condition match failed {err}");
+                        continue 'outer;
+                    }
+                }
+            }
+
+            let actions = push_rule
+                .actions
+                .iter()
+                // Filter out "dont_notify" actions, as we don't store them.
+                .filter(|a| **a != Action::DontNotify)
+                .cloned()
+                .collect();
+
+            return actions;
+        }
+
+        Vec::new()
+    }
+
+    /// Check if the given condition matches.
+    fn matches(
+        &self,
+        condition: Condition,
+        user_id: Option<&str>,
+        display_name: Option<&str>,
+    ) -> bool {
+        match self.match_condition(&condition, user_id, display_name) {
+            Ok(true) => true,
+            Ok(false) => false,
+            Err(err) => {
+                warn!("Condition match failed {err}");
+                false
+            }
+        }
+    }
+}
+
+impl PushRuleEvaluator {
+    /// Match a given `Condition` for a push rule.
+    pub fn match_condition(
+        &self,
+        condition: &Condition,
+        user_id: Option<&str>,
+        display_name: Option<&str>,
+    ) -> Result<bool, Error> {
+        let known_condition = match condition {
+            Condition::Known(known) => known,
+            Condition::Unknown(_) => {
+                return Ok(false);
+            }
+        };
+
+        let result = match known_condition {
+            KnownCondition::EventMatch(event_match) => {
+                self.match_event_match(event_match, user_id)?
+            }
+            KnownCondition::ContainsDisplayName => {
+                if let Some(dn) = display_name {
+                    if !dn.is_empty() {
+                        get_glob_matcher(dn, GlobMatchType::Word)?.is_match(&self.body)?
+                    } else {
+                        // We specifically ignore empty display names, as otherwise
+                        // they would always match.
+                        false
+                    }
+                } else {
+                    false
+                }
+            }
+            KnownCondition::RoomMemberCount { is } => {
+                if let Some(is) = is {
+                    self.match_member_count(is)?
+                } else {
+                    false
+                }
+            }
+            KnownCondition::SenderNotificationPermission { key } => {
+                if let Some(sender_power_level) = &self.sender_power_level {
+                    let required_level = self
+                        .notification_power_levels
+                        .get(key.as_ref())
+                        .copied()
+                        .unwrap_or(50);
+
+                    *sender_power_level >= required_level
+                } else {
+                    false
+                }
+            }
+            KnownCondition::RelationMatch {
+                rel_type,
+                event_type_pattern,
+                sender,
+                sender_type,
+            } => {
+                self.match_relations(rel_type, sender, sender_type, user_id, event_type_pattern)?
+            }
+        };
+
+        Ok(result)
+    }
+
+    /// Evaluates a relation condition.
+    fn match_relations(
+        &self,
+        rel_type: &str,
+        sender: &Option<Cow<str>>,
+        sender_type: &Option<Cow<str>>,
+        user_id: Option<&str>,
+        event_type_pattern: &Option<Cow<str>>,
+    ) -> Result<bool, Error> {
+        // First check if relation matching is enabled...
+        if !self.relation_match_enabled {
+            return Ok(false);
+        }
+
+        // ... and if there are any relations to match against.
+        let relations = if let Some(relations) = self.relations.get(rel_type) {
+            relations
+        } else {
+            return Ok(false);
+        };
+
+        // Extract the sender pattern from the condition
+        let sender_pattern = if let Some(sender) = sender {
+            Some(sender.as_ref())
+        } else if let Some(sender_type) = sender_type {
+            if sender_type == "user_id" {
+                if let Some(user_id) = user_id {
+                    Some(user_id)
+                } else {
+                    return Ok(false);
+                }
+            } else {
+                warn!("Unrecognized sender_type: {sender_type}");
+                return Ok(false);
+            }
+        } else {
+            None
+        };
+
+        let mut sender_compiled_pattern = if let Some(pattern) = sender_pattern {
+            Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
+        } else {
+            None
+        };
+
+        let mut type_compiled_pattern = if let Some(pattern) = event_type_pattern {
+            Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
+        } else {
+            None
+        };
+
+        for (relation_sender, event_type) in relations {
+            if let Some(pattern) = &mut sender_compiled_pattern {
+                if !pattern.is_match(relation_sender)? {
+                    continue;
+                }
+            }
+
+            if let Some(pattern) = &mut type_compiled_pattern {
+                if !pattern.is_match(event_type)? {
+                    continue;
+                }
+            }
+
+            return Ok(true);
+        }
+
+        Ok(false)
+    }
+
+    /// Evaluates a `event_match` condition.
+    fn match_event_match(
+        &self,
+        event_match: &EventMatchCondition,
+        user_id: Option<&str>,
+    ) -> Result<bool, Error> {
+        let pattern = if let Some(pattern) = &event_match.pattern {
+            pattern
+        } else if let Some(pattern_type) = &event_match.pattern_type {
+            // The `pattern_type` can either be "user_id" or "user_localpart",
+            // either way if we don't have a `user_id` then the condition can't
+            // match.
+            let user_id = if let Some(user_id) = user_id {
+                user_id
+            } else {
+                return Ok(false);
+            };
+
+            match &**pattern_type {
+                "user_id" => user_id,
+                "user_localpart" => get_localpart_from_id(user_id)?,
+                _ => return Ok(false),
+            }
+        } else {
+            return Ok(false);
+        };
+
+        let haystack = if let Some(haystack) = self.flattened_keys.get(&*event_match.key) {
+            haystack
+        } else {
+            return Ok(false);
+        };
+
+        // For the content.body we match against "words", but for everything
+        // else we match against the entire value.
+        let match_type = if event_match.key == "content.body" {
+            GlobMatchType::Word
+        } else {
+            GlobMatchType::Whole
+        };
+
+        let mut compiled_pattern = get_glob_matcher(pattern, match_type)?;
+        compiled_pattern.is_match(haystack)
+    }
+
+    /// Match the member count against an 'is' condition
+    /// The `is` condition can be things like '>2', '==3' or even just '4'.
+    fn match_member_count(&self, is: &str) -> Result<bool, Error> {
+        let captures = INEQUALITY_EXPR.captures(is).context("bad 'is' clause")?;
+        let ineq = captures.get(1).map_or("==", |m| m.as_str());
+        let rhs: u64 = captures
+            .get(2)
+            .context("missing number")?
+            .as_str()
+            .parse()?;
+
+        let matches = match ineq {
+            "" | "==" => self.room_member_count == rhs,
+            "<" => self.room_member_count < rhs,
+            ">" => self.room_member_count > rhs,
+            ">=" => self.room_member_count >= rhs,
+            "<=" => self.room_member_count <= rhs,
+            _ => false,
+        };
+
+        Ok(matches)
+    }
+}
+
+#[test]
+fn push_rule_evaluator() {
+    let mut flattened_keys = BTreeMap::new();
+    flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
+    let evaluator = PushRuleEvaluator::py_new(
+        flattened_keys,
+        10,
+        Some(0),
+        BTreeMap::new(),
+        BTreeMap::new(),
+        true,
+    )
+    .unwrap();
+
+    let result = evaluator.run(&FilteredPushRules::default(), None, Some("bob"));
+    assert_eq!(result.len(), 3);
+}
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index de6764e7c5..30fffc31ad 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -42,7 +42,6 @@
 //!
 //! The set of "base rules" are the list of rules that every user has by default. A
 //! user can modify their copy of the push rules in one of three ways:
-//!
 //!     1. Adding a new push rule of a certain kind
 //!     2. Changing the actions of a base rule
 //!     3. Enabling/disabling a base rule.
@@ -58,12 +57,16 @@ use std::collections::{BTreeMap, HashMap, HashSet};
 use anyhow::{Context, Error};
 use log::warn;
 use pyo3::prelude::*;
-use pythonize::pythonize;
+use pythonize::{depythonize, pythonize};
 use serde::de::Error as _;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 
+use self::evaluator::PushRuleEvaluator;
+
 mod base_rules;
+pub mod evaluator;
+pub mod utils;
 
 /// Called when registering modules with python.
 pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
@@ -71,6 +74,7 @@ pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
     child_module.add_class::<PushRule>()?;
     child_module.add_class::<PushRules>()?;
     child_module.add_class::<FilteredPushRules>()?;
+    child_module.add_class::<PushRuleEvaluator>()?;
     child_module.add_function(wrap_pyfunction!(get_base_rule_ids, m)?)?;
 
     m.add_submodule(child_module)?;
@@ -274,6 +278,8 @@ pub enum KnownCondition {
     #[serde(rename = "org.matrix.msc3772.relation_match")]
     RelationMatch {
         rel_type: Cow<'static, str>,
+        #[serde(skip_serializing_if = "Option::is_none", rename = "type")]
+        event_type_pattern: Option<Cow<'static, str>>,
         #[serde(skip_serializing_if = "Option::is_none")]
         sender: Option<Cow<'static, str>>,
         #[serde(skip_serializing_if = "Option::is_none")]
@@ -287,20 +293,26 @@ impl IntoPy<PyObject> for Condition {
     }
 }
 
+impl<'source> FromPyObject<'source> for Condition {
+    fn extract(ob: &'source PyAny) -> PyResult<Self> {
+        Ok(depythonize(ob)?)
+    }
+}
+
 /// The body of a [`Condition::EventMatch`]
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct EventMatchCondition {
-    key: Cow<'static, str>,
+    pub key: Cow<'static, str>,
     #[serde(skip_serializing_if = "Option::is_none")]
-    pattern: Option<Cow<'static, str>>,
+    pub pattern: Option<Cow<'static, str>>,
     #[serde(skip_serializing_if = "Option::is_none")]
-    pattern_type: Option<Cow<'static, str>>,
+    pub pattern_type: Option<Cow<'static, str>>,
 }
 
 /// The collection of push rules for a user.
 #[derive(Debug, Clone, Default)]
 #[pyclass(frozen)]
-struct PushRules {
+pub struct PushRules {
     /// Custom push rules that override a base rule.
     overridden_base_rules: HashMap<Cow<'static, str>, PushRule>,
 
@@ -319,7 +331,7 @@ struct PushRules {
 #[pymethods]
 impl PushRules {
     #[new]
-    fn new(rules: Vec<PushRule>) -> PushRules {
+    pub fn new(rules: Vec<PushRule>) -> PushRules {
         let mut push_rules: PushRules = Default::default();
 
         for rule in rules {
@@ -396,7 +408,7 @@ pub struct FilteredPushRules {
 #[pymethods]
 impl FilteredPushRules {
     #[new]
-    fn py_new(
+    pub fn py_new(
         push_rules: PushRules,
         enabled_map: BTreeMap<String, bool>,
         msc3786_enabled: bool,
diff --git a/rust/src/push/utils.rs b/rust/src/push/utils.rs
new file mode 100644
index 0000000000..8759340473
--- /dev/null
+++ b/rust/src/push/utils.rs
@@ -0,0 +1,215 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use anyhow::bail;
+use anyhow::Context;
+use anyhow::Error;
+use lazy_static::lazy_static;
+use regex;
+use regex::Regex;
+use regex::RegexBuilder;
+
+lazy_static! {
+    /// Matches runs of non-wildcard characters followed by wildcard characters.
+    static ref WILDCARD_RUN: Regex = Regex::new(r"([^\?\*]*)([\?\*]*)").expect("valid regex");
+}
+
+/// Extract the localpart from a Matrix style ID
+pub(crate) fn get_localpart_from_id(id: &str) -> Result<&str, Error> {
+    let (localpart, _) = id
+        .split_once(':')
+        .with_context(|| format!("ID does not contain colon: {id}"))?;
+
+    // We need to strip off the first character, which is the ID type.
+    if localpart.is_empty() {
+        bail!("Invalid ID {id}");
+    }
+
+    Ok(&localpart[1..])
+}
+
+/// Used by `glob_to_regex` to specify what to match the regex against.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum GlobMatchType {
+    /// The generated regex will match against the entire input.
+    Whole,
+    /// The generated regex will match against words.
+    Word,
+}
+
+/// Convert a "glob" style expression to a regex, anchoring either to the entire
+/// input or to individual words.
+pub fn glob_to_regex(glob: &str, match_type: GlobMatchType) -> Result<Regex, Error> {
+    let mut chunks = Vec::new();
+
+    // Patterns with wildcards must be simplified to avoid performance cliffs
+    // - The glob `?**?**?` is equivalent to the glob `???*`
+    // - The glob `???*` is equivalent to the regex `.{3,}`
+    for captures in WILDCARD_RUN.captures_iter(glob) {
+        if let Some(chunk) = captures.get(1) {
+            chunks.push(regex::escape(chunk.as_str()));
+        }
+
+        if let Some(wildcards) = captures.get(2) {
+            if wildcards.as_str() == "" {
+                continue;
+            }
+
+            let question_marks = wildcards.as_str().chars().filter(|c| *c == '?').count();
+
+            if wildcards.as_str().contains('*') {
+                chunks.push(format!(".{{{question_marks},}}"));
+            } else {
+                chunks.push(format!(".{{{question_marks}}}"));
+            }
+        }
+    }
+
+    let joined = chunks.join("");
+
+    let regex_str = match match_type {
+        GlobMatchType::Whole => format!(r"\A{joined}\z"),
+
+        // `^|\W` and `\W|$` handle the case where `pattern` starts or ends with a non-word
+        // character.
+        GlobMatchType::Word => format!(r"(?:^|\b|\W){joined}(?:\b|\W|$)"),
+    };
+
+    Ok(RegexBuilder::new(&regex_str)
+        .case_insensitive(true)
+        .build()?)
+}
+
+/// Compiles the glob into a `Matcher`.
+pub fn get_glob_matcher(glob: &str, match_type: GlobMatchType) -> Result<Matcher, Error> {
+    // There are a number of shortcuts we can make if the glob doesn't contain a
+    // wild card.
+    let matcher = if glob.contains(['*', '?']) {
+        let regex = glob_to_regex(glob, match_type)?;
+        Matcher::Regex(regex)
+    } else if match_type == GlobMatchType::Whole {
+        // If there aren't any wildcards and we're matching the whole thing,
+        // then we simply can do a case-insensitive string match.
+        Matcher::Whole(glob.to_lowercase())
+    } else {
+        // Otherwise, if we're matching against words then can first check
+        // if the haystack contains the glob at all.
+        Matcher::Word {
+            word: glob.to_lowercase(),
+            regex: None,
+        }
+    };
+
+    Ok(matcher)
+}
+
+/// Matches against a glob
+pub enum Matcher {
+    /// Plain regex matching.
+    Regex(Regex),
+
+    /// Case-insensitive equality.
+    Whole(String),
+
+    /// Word matching. `regex` is a cache of calling [`glob_to_regex`] on word.
+    Word { word: String, regex: Option<Regex> },
+}
+
+impl Matcher {
+    /// Checks if the glob matches the given haystack.
+    pub fn is_match(&mut self, haystack: &str) -> Result<bool, Error> {
+        // We want to to do case-insensitive matching, so we convert to
+        // lowercase first.
+        let haystack = haystack.to_lowercase();
+
+        match self {
+            Matcher::Regex(regex) => Ok(regex.is_match(&haystack)),
+            Matcher::Whole(whole) => Ok(whole == &haystack),
+            Matcher::Word { word, regex } => {
+                // If we're looking for a literal word, then we first check if
+                // the haystack contains the word as a substring.
+                if !haystack.contains(&*word) {
+                    return Ok(false);
+                }
+
+                // If it does contain the word as a substring, then we need to
+                // check if it is an actual word by testing it against the regex.
+                let regex = if let Some(regex) = regex {
+                    regex
+                } else {
+                    let compiled_regex = glob_to_regex(word, GlobMatchType::Word)?;
+                    regex.insert(compiled_regex)
+                };
+
+                Ok(regex.is_match(&haystack))
+            }
+        }
+    }
+}
+
+#[test]
+fn test_get_domain_from_id() {
+    get_localpart_from_id("").unwrap_err();
+    get_localpart_from_id(":").unwrap_err();
+    get_localpart_from_id(":asd").unwrap_err();
+    get_localpart_from_id("::as::asad").unwrap_err();
+
+    assert_eq!(get_localpart_from_id("@test:foo").unwrap(), "test");
+    assert_eq!(get_localpart_from_id("@:").unwrap(), "");
+    assert_eq!(get_localpart_from_id("@test:foo:907").unwrap(), "test");
+}
+
+#[test]
+fn tset_glob() -> Result<(), Error> {
+    assert_eq!(
+        glob_to_regex("simple", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple\z"
+    );
+    assert_eq!(
+        glob_to_regex("simple*", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple.{0,}\z"
+    );
+    assert_eq!(
+        glob_to_regex("simple?", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple.{1}\z"
+    );
+    assert_eq!(
+        glob_to_regex("simple?*?*", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple.{2,}\z"
+    );
+    assert_eq!(
+        glob_to_regex("simple???", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple.{3}\z"
+    );
+
+    assert_eq!(
+        glob_to_regex("escape.", GlobMatchType::Whole)?.as_str(),
+        r"\Aescape\.\z"
+    );
+
+    assert!(glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simple"));
+    assert!(!glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simples"));
+    assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simples"));
+    assert!(glob_to_regex("simple?", GlobMatchType::Whole)?.is_match("simples"));
+    assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simple"));
+
+    assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("some simple."));
+    assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("simple"));
+    assert!(!glob_to_regex("simple", GlobMatchType::Word)?.is_match("simples"));
+
+    assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("Some @user:foo test"));
+    assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("@user:foo"));
+
+    Ok(())
+}
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 93c4e69d42..fffb8419c6 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -1,4 +1,4 @@
-from typing import Any, Collection, Dict, Mapping, Sequence, Tuple, Union
+from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union
 
 from synapse.types import JsonDict
 
@@ -35,3 +35,20 @@ class FilteredPushRules:
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
 def get_base_rule_ids() -> Collection[str]: ...
+
+class PushRuleEvaluator:
+    def __init__(
+        self,
+        flattened_keys: Mapping[str, str],
+        room_member_count: int,
+        sender_power_level: Optional[int],
+        notification_power_levels: Mapping[str, int],
+        relations: Mapping[str, Set[Tuple[str, str]]],
+        relation_match_enabled: bool,
+    ): ...
+    def run(
+        self,
+        push_rules: FilteredPushRules,
+        user_id: Optional[str],
+        display_name: Optional[str],
+    ) -> Collection[dict]: ...
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 32313e3bcf..60f3129005 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -17,6 +17,7 @@ import itertools
 import logging
 from typing import (
     TYPE_CHECKING,
+    Any,
     Collection,
     Dict,
     Iterable,
@@ -37,13 +38,11 @@ from synapse.events.snapshot import EventContext
 from synapse.state import POWER_KEY
 from synapse.storage.databases.main.roommember import EventIdMembership
 from synapse.storage.state import StateFilter
-from synapse.synapse_rust.push import FilteredPushRules, PushRule
+from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRuleEvaluator
 from synapse.util.caches import register_cache
 from synapse.util.metrics import measure_func
 from synapse.visibility import filter_event_for_clients_with_state
 
-from .push_rule_evaluator import PushRuleEvaluatorForEvent
-
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
@@ -290,11 +289,11 @@ class BulkPushRuleEvaluator:
             if relation.rel_type == RelationTypes.THREAD:
                 thread_id = relation.parent_id
 
-        evaluator = PushRuleEvaluatorForEvent(
-            event,
+        evaluator = PushRuleEvaluator(
+            _flatten_dict(event),
             room_member_count,
             sender_power_level,
-            power_levels,
+            power_levels.get("notifications", {}),
             relations,
             self._relations_match_enabled,
         )
@@ -338,17 +337,10 @@ class BulkPushRuleEvaluator:
                 # current user, it'll be added to the dict later.
                 actions_by_user[uid] = []
 
-            for rule, enabled in rules.rules():
-                if not enabled:
-                    continue
-
-                matches = evaluator.check_conditions(rule.conditions, uid, display_name)
-                if matches:
-                    actions = [x for x in rule.actions if x != "dont_notify"]
-                    if actions and "notify" in actions:
-                        # Push rules say we should notify the user of this event
-                        actions_by_user[uid] = actions
-                    break
+            actions = evaluator.run(rules, uid, display_name)
+            if "notify" in actions:
+                # Push rules say we should notify the user of this event
+                actions_by_user[uid] = actions
 
         # Mark in the DB staging area the push actions for users who should be
         # notified for this event. (This will then get handled when we persist
@@ -365,3 +357,21 @@ MemberMap = Dict[str, Optional[EventIdMembership]]
 Rule = Dict[str, dict]
 RulesByUser = Dict[str, List[Rule]]
 StateGroup = Union[object, int]
+
+
+def _flatten_dict(
+    d: Union[EventBase, Mapping[str, Any]],
+    prefix: Optional[List[str]] = None,
+    result: Optional[Dict[str, str]] = None,
+) -> Dict[str, str]:
+    if prefix is None:
+        prefix = []
+    if result is None:
+        result = {}
+    for key, value in d.items():
+        if isinstance(value, str):
+            result[".".join(prefix + [key])] = value.lower()
+        elif isinstance(value, Mapping):
+            _flatten_dict(value, prefix=(prefix + [key]), result=result)
+
+    return result
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index e96fb45e9f..b048b03a74 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import logging
 import urllib.parse
-from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
 
 from prometheus_client import Counter
 
@@ -28,7 +28,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.push import Pusher, PusherConfig, PusherConfigException
 from synapse.storage.databases.main.event_push_actions import HttpPushAction
 
-from . import push_rule_evaluator, push_tools
+from . import push_tools
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -56,6 +56,39 @@ http_badges_failed_counter = Counter(
 )
 
 
+def tweaks_for_actions(actions: List[Union[str, Dict]]) -> Dict[str, Any]:
+    """
+    Converts a list of actions into a `tweaks` dict (which can then be passed to
+        the push gateway).
+
+    This function ignores all actions other than `set_tweak` actions, and treats
+    absent `value`s as `True`, which agrees with the only spec-defined treatment
+    of absent `value`s (namely, for `highlight` tweaks).
+
+    Args:
+        actions: list of actions
+            e.g. [
+                {"set_tweak": "a", "value": "AAA"},
+                {"set_tweak": "b", "value": "BBB"},
+                {"set_tweak": "highlight"},
+                "notify"
+            ]
+
+    Returns:
+        dictionary of tweaks for those actions
+            e.g. {"a": "AAA", "b": "BBB", "highlight": True}
+    """
+    tweaks = {}
+    for a in actions:
+        if not isinstance(a, dict):
+            continue
+        if "set_tweak" in a:
+            # value is allowed to be absent in which case the value assumed
+            # should be True.
+            tweaks[a["set_tweak"]] = a.get("value", True)
+    return tweaks
+
+
 class HttpPusher(Pusher):
     INITIAL_BACKOFF_SEC = 1  # in seconds because that's what Twisted takes
     MAX_BACKOFF_SEC = 60 * 60
@@ -281,7 +314,7 @@ class HttpPusher(Pusher):
         if "notify" not in push_action.actions:
             return True
 
-        tweaks = push_rule_evaluator.tweaks_for_actions(push_action.actions)
+        tweaks = tweaks_for_actions(push_action.actions)
         badge = await push_tools.get_badge_count(
             self.hs.get_datastores().main,
             self.user_id,
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
deleted file mode 100644
index f8176c5a42..0000000000
--- a/synapse/push/push_rule_evaluator.py
+++ /dev/null
@@ -1,361 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-# Copyright 2017 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import re
-from typing import (
-    Any,
-    Dict,
-    List,
-    Mapping,
-    Optional,
-    Pattern,
-    Sequence,
-    Set,
-    Tuple,
-    Union,
-)
-
-from matrix_common.regex import glob_to_regex, to_word_pattern
-
-from synapse.events import EventBase
-from synapse.types import UserID
-from synapse.util.caches.lrucache import LruCache
-
-logger = logging.getLogger(__name__)
-
-
-GLOB_REGEX = re.compile(r"\\\[(\\\!|)(.*)\\\]")
-IS_GLOB = re.compile(r"[\?\*\[\]]")
-INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$")
-
-
-def _room_member_count(condition: Mapping[str, Any], room_member_count: int) -> bool:
-    return _test_ineq_condition(condition, room_member_count)
-
-
-def _sender_notification_permission(
-    condition: Mapping[str, Any],
-    sender_power_level: Optional[int],
-    power_levels: Dict[str, Union[int, Dict[str, int]]],
-) -> bool:
-    if sender_power_level is None:
-        return False
-
-    notif_level_key = condition.get("key")
-    if notif_level_key is None:
-        return False
-
-    notif_levels = power_levels.get("notifications", {})
-    assert isinstance(notif_levels, dict)
-    room_notif_level = notif_levels.get(notif_level_key, 50)
-
-    return sender_power_level >= room_notif_level
-
-
-def _test_ineq_condition(condition: Mapping[str, Any], number: int) -> bool:
-    if "is" not in condition:
-        return False
-    m = INEQUALITY_EXPR.match(condition["is"])
-    if not m:
-        return False
-    ineq = m.group(1)
-    rhs = m.group(2)
-    if not rhs.isdigit():
-        return False
-    rhs_int = int(rhs)
-
-    if ineq == "" or ineq == "==":
-        return number == rhs_int
-    elif ineq == "<":
-        return number < rhs_int
-    elif ineq == ">":
-        return number > rhs_int
-    elif ineq == ">=":
-        return number >= rhs_int
-    elif ineq == "<=":
-        return number <= rhs_int
-    else:
-        return False
-
-
-def tweaks_for_actions(actions: List[Union[str, Dict]]) -> Dict[str, Any]:
-    """
-    Converts a list of actions into a `tweaks` dict (which can then be passed to
-        the push gateway).
-
-    This function ignores all actions other than `set_tweak` actions, and treats
-    absent `value`s as `True`, which agrees with the only spec-defined treatment
-    of absent `value`s (namely, for `highlight` tweaks).
-
-    Args:
-        actions: list of actions
-            e.g. [
-                {"set_tweak": "a", "value": "AAA"},
-                {"set_tweak": "b", "value": "BBB"},
-                {"set_tweak": "highlight"},
-                "notify"
-            ]
-
-    Returns:
-        dictionary of tweaks for those actions
-            e.g. {"a": "AAA", "b": "BBB", "highlight": True}
-    """
-    tweaks = {}
-    for a in actions:
-        if not isinstance(a, dict):
-            continue
-        if "set_tweak" in a:
-            # value is allowed to be absent in which case the value assumed
-            # should be True.
-            tweaks[a["set_tweak"]] = a.get("value", True)
-    return tweaks
-
-
-class PushRuleEvaluatorForEvent:
-    def __init__(
-        self,
-        event: EventBase,
-        room_member_count: int,
-        sender_power_level: Optional[int],
-        power_levels: Dict[str, Union[int, Dict[str, int]]],
-        relations: Dict[str, Set[Tuple[str, str]]],
-        relations_match_enabled: bool,
-    ):
-        self._event = event
-        self._room_member_count = room_member_count
-        self._sender_power_level = sender_power_level
-        self._power_levels = power_levels
-        self._relations = relations
-        self._relations_match_enabled = relations_match_enabled
-
-        # Maps strings of e.g. 'content.body' -> event["content"]["body"]
-        self._value_cache = _flatten_dict(event)
-
-        # Maps cache keys to final values.
-        self._condition_cache: Dict[str, bool] = {}
-
-    def check_conditions(
-        self, conditions: Sequence[Mapping], uid: str, display_name: Optional[str]
-    ) -> bool:
-        """
-        Returns true if a user's conditions/user ID/display name match the event.
-
-        Args:
-            conditions: The user's conditions to match.
-            uid: The user's MXID.
-            display_name: The display name.
-
-        Returns:
-             True if all conditions match the event, False otherwise.
-        """
-        for cond in conditions:
-            _cache_key = cond.get("_cache_key", None)
-            if _cache_key:
-                res = self._condition_cache.get(_cache_key, None)
-                if res is False:
-                    return False
-                elif res is True:
-                    continue
-
-            res = self.matches(cond, uid, display_name)
-            if _cache_key:
-                self._condition_cache[_cache_key] = bool(res)
-
-            if not res:
-                return False
-
-        return True
-
-    def matches(
-        self, condition: Mapping[str, Any], user_id: str, display_name: Optional[str]
-    ) -> bool:
-        """
-        Returns true if a user's condition/user ID/display name match the event.
-
-        Args:
-            condition: The user's condition to match.
-            uid: The user's MXID.
-            display_name: The display name, or None if there is not one.
-
-        Returns:
-             True if the condition matches the event, False otherwise.
-        """
-        if condition["kind"] == "event_match":
-            return self._event_match(condition, user_id)
-        elif condition["kind"] == "contains_display_name":
-            return self._contains_display_name(display_name)
-        elif condition["kind"] == "room_member_count":
-            return _room_member_count(condition, self._room_member_count)
-        elif condition["kind"] == "sender_notification_permission":
-            return _sender_notification_permission(
-                condition, self._sender_power_level, self._power_levels
-            )
-        elif (
-            condition["kind"] == "org.matrix.msc3772.relation_match"
-            and self._relations_match_enabled
-        ):
-            return self._relation_match(condition, user_id)
-        else:
-            # XXX This looks incorrect -- we have reached an unknown condition
-            #     kind and are unconditionally returning that it matches. Note
-            #     that it seems possible to provide a condition to the /pushrules
-            #     endpoint with an unknown kind, see _rule_tuple_from_request_object.
-            return True
-
-    def _event_match(self, condition: Mapping, user_id: str) -> bool:
-        """
-        Check an "event_match" push rule condition.
-
-        Args:
-            condition: The "event_match" push rule condition to match.
-            user_id: The user's MXID.
-
-        Returns:
-             True if the condition matches the event, False otherwise.
-        """
-        pattern = condition.get("pattern", None)
-
-        if not pattern:
-            pattern_type = condition.get("pattern_type", None)
-            if pattern_type == "user_id":
-                pattern = user_id
-            elif pattern_type == "user_localpart":
-                pattern = UserID.from_string(user_id).localpart
-
-        if not pattern:
-            logger.warning("event_match condition with no pattern")
-            return False
-
-        # XXX: optimisation: cache our pattern regexps
-        if condition["key"] == "content.body":
-            body = self._event.content.get("body", None)
-            if not body or not isinstance(body, str):
-                return False
-
-            return _glob_matches(pattern, body, word_boundary=True)
-        else:
-            haystack = self._value_cache.get(condition["key"], None)
-            if haystack is None:
-                return False
-
-            return _glob_matches(pattern, haystack)
-
-    def _contains_display_name(self, display_name: Optional[str]) -> bool:
-        """
-        Check an "event_match" push rule condition.
-
-        Args:
-            display_name: The display name, or None if there is not one.
-
-        Returns:
-             True if the display name is found in the event body, False otherwise.
-        """
-        if not display_name:
-            return False
-
-        body = self._event.content.get("body", None)
-        if not body or not isinstance(body, str):
-            return False
-
-        # Similar to _glob_matches, but do not treat display_name as a glob.
-        r = regex_cache.get((display_name, False, True), None)
-        if not r:
-            r1 = re.escape(display_name)
-            r1 = to_word_pattern(r1)
-            r = re.compile(r1, flags=re.IGNORECASE)
-            regex_cache[(display_name, False, True)] = r
-
-        return bool(r.search(body))
-
-    def _relation_match(self, condition: Mapping, user_id: str) -> bool:
-        """
-        Check an "relation_match" push rule condition.
-
-        Args:
-            condition: The "event_match" push rule condition to match.
-            user_id: The user's MXID.
-
-        Returns:
-             True if the condition matches the event, False otherwise.
-        """
-        rel_type = condition.get("rel_type")
-        if not rel_type:
-            logger.warning("relation_match condition missing rel_type")
-            return False
-
-        sender_pattern = condition.get("sender")
-        if sender_pattern is None:
-            sender_type = condition.get("sender_type")
-            if sender_type == "user_id":
-                sender_pattern = user_id
-        type_pattern = condition.get("type")
-
-        # If any other relations matches, return True.
-        for sender, event_type in self._relations.get(rel_type, ()):
-            if sender_pattern and not _glob_matches(sender_pattern, sender):
-                continue
-            if type_pattern and not _glob_matches(type_pattern, event_type):
-                continue
-            # All values must have matched.
-            return True
-
-        # No relations matched.
-        return False
-
-
-# Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches
-regex_cache: LruCache[Tuple[str, bool, bool], Pattern] = LruCache(
-    50000, "regex_push_cache"
-)
-
-
-def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool:
-    """Tests if value matches glob.
-
-    Args:
-        glob
-        value: String to test against glob.
-        word_boundary: Whether to match against word boundaries or entire
-            string. Defaults to False.
-    """
-
-    try:
-        r = regex_cache.get((glob, True, word_boundary), None)
-        if not r:
-            r = glob_to_regex(glob, word_boundary=word_boundary)
-            regex_cache[(glob, True, word_boundary)] = r
-        return bool(r.search(value))
-    except re.error:
-        logger.warning("Failed to parse glob to regex: %r", glob)
-        return False
-
-
-def _flatten_dict(
-    d: Union[EventBase, Mapping[str, Any]],
-    prefix: Optional[List[str]] = None,
-    result: Optional[Dict[str, str]] = None,
-) -> Dict[str, str]:
-    if prefix is None:
-        prefix = []
-    if result is None:
-        result = {}
-    for key, value in d.items():
-        if isinstance(value, str):
-            result[".".join(prefix + [key])] = value.lower()
-        elif isinstance(value, Mapping):
-            _flatten_dict(value, prefix=(prefix + [key]), result=result)
-
-    return result
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 718f489577..b8308cbc05 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -23,11 +23,12 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.room_versions import RoomVersions
 from synapse.appservice import ApplicationService
 from synapse.events import FrozenEvent
-from synapse.push import push_rule_evaluator
-from synapse.push.push_rule_evaluator import PushRuleEvaluatorForEvent
+from synapse.push.bulk_push_rule_evaluator import _flatten_dict
+from synapse.push.httppusher import tweaks_for_actions
 from synapse.rest.client import login, register, room
 from synapse.server import HomeServer
 from synapse.storage.databases.main.appservice import _make_exclusive_regex
+from synapse.synapse_rust.push import PushRuleEvaluator
 from synapse.types import JsonDict
 from synapse.util import Clock
 
@@ -41,7 +42,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         content: JsonDict,
         relations: Optional[Dict[str, Set[Tuple[str, str]]]] = None,
         relations_match_enabled: bool = False,
-    ) -> PushRuleEvaluatorForEvent:
+    ) -> PushRuleEvaluator:
         event = FrozenEvent(
             {
                 "event_id": "$event_id",
@@ -56,12 +57,12 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         room_member_count = 0
         sender_power_level = 0
         power_levels: Dict[str, Union[int, Dict[str, int]]] = {}
-        return PushRuleEvaluatorForEvent(
-            event,
+        return PushRuleEvaluator(
+            _flatten_dict(event),
             room_member_count,
             sender_power_level,
-            power_levels,
-            relations or set(),
+            power_levels.get("notifications", {}),
+            relations or {},
             relations_match_enabled,
         )
 
@@ -293,7 +294,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         ]
 
         self.assertEqual(
-            push_rule_evaluator.tweaks_for_actions(actions),
+            tweaks_for_actions(actions),
             {"sound": "default", "highlight": True},
         )
 
@@ -304,9 +305,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         evaluator = self._get_evaluator(
             {}, {"m.annotation": {("@user:test", "m.reaction")}}
         )
-        condition = {"kind": "relation_match"}
-        # Oddly, an unknown condition always matches.
-        self.assertTrue(evaluator.matches(condition, "@user:test", "foo"))
 
         # A push rule evaluator with the experimental rule enabled.
         evaluator = self._get_evaluator(
-- 
cgit 1.5.1


From 15754d720feb3af88d97a2dafd0b05633abf42f5 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 29 Sep 2022 19:10:47 +0100
Subject: Update UPSERT comment now that native upserts are the default
 (#13924)

---
 changelog.d/13924.misc      |  1 +
 synapse/storage/database.py | 60 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 51 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/13924.misc

(limited to 'synapse')

diff --git a/changelog.d/13924.misc b/changelog.d/13924.misc
new file mode 100644
index 0000000000..7770b6f03f
--- /dev/null
+++ b/changelog.d/13924.misc
@@ -0,0 +1 @@
+Update an innaccurate comment in Synapse's upsert database helper.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 6cc88aad32..bb28ded1b5 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1141,17 +1141,57 @@ class DatabasePool:
         desc: str = "simple_upsert",
         lock: bool = True,
     ) -> bool:
-        """
+        """Insert a row with values + insertion_values; on conflict, update with values.
+
+        All of our supported databases accept the nonstandard "upsert" statement in
+        their dialect of SQL. We call this a "native upsert". The syntax looks roughly
+        like:
+
+            INSERT INTO table VALUES (values + insertion_values)
+            ON CONFLICT (keyvalues)
+            DO UPDATE SET (values); -- overwrite `values` columns only
+
+        If (values) is empty, the resulting query is slighlty simpler:
+
+            INSERT INTO table VALUES (insertion_values)
+            ON CONFLICT (keyvalues)
+            DO NOTHING;             -- do not overwrite any columns
+
+        This function is a helper to build such queries.
+
+        In order for upserts to make sense, the database must be able to determine when
+        an upsert CONFLICTs with an existing row. Postgres and SQLite ensure this by
+        requiring that a unique index exist on the column names used to detect a
+        conflict (i.e. `keyvalues.keys()`).
+
+        If there is no such index, we can "emulate" an upsert with a SELECT followed
+        by either an INSERT or an UPDATE. This is unsafe: we cannot make the same
+        atomicity guarantees that a native upsert can and are very vulnerable to races
+        and crashes. Therefore if we wish to upsert without an appropriate unique index,
+        we must either:
+
+        1. Acquire a table-level lock before the emulated upsert (`lock=True`), or
+        2. VERY CAREFULLY ensure that we are the only thread and worker which will be
+           writing to this table, in which case we can proceed without a lock
+           (`lock=False`).
+
+        Generally speaking, you should use `lock=True`. If the table in question has a
+        unique index[*], this class will use a native upsert (which is atomic and so can
+        ignore the `lock` argument). Otherwise this class will use an emulated upsert,
+        in which case we want the safer option unless we been VERY CAREFUL.
+
+        [*]: Some tables have unique indices added to them in the background. Those
+             tables `T` are keys in the dictionary UNIQUE_INDEX_BACKGROUND_UPDATES,
+             where `T` maps to the background update that adds a unique index to `T`.
+             This dictionary is maintained by hand.
+
+             At runtime, we constantly check to see if each of these background updates
+             has run. If so, we deem the coresponding table safe to upsert into, because
+             we can now use a native insert to do so. If not, we deem the table unsafe
+             to upsert into and require an emulated upsert.
 
-        `lock` should generally be set to True (the default), but can be set
-        to False if either of the following are true:
-            1. there is a UNIQUE INDEX on the key columns. In this case a conflict
-            will cause an IntegrityError in which case this function will retry
-            the update.
-            2. we somehow know that we are the only thread which will be updating
-            this table.
-        As an additional note, this parameter only matters for old SQLite versions
-        because we will use native upserts otherwise.
+             Tables that do not appear in this dictionary are assumed to have an
+             appropriate unique index and therefore be safe to upsert into.
 
         Args:
             table: The table to upsert into
-- 
cgit 1.5.1


From 6f0c3e669da458e838e7b4b165a13e8a5312d6d0 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 29 Sep 2022 21:16:08 +0100
Subject: Don't require `setuptools_rust` at runtime (#13952)

---
 changelog.d/13952.bugfix              |  1 +
 synapse/util/check_dependencies.py    | 17 ++++++++++++++++-
 tests/util/test_check_dependencies.py | 20 ++++++++++++++++++--
 3 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/13952.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13952.bugfix b/changelog.d/13952.bugfix
new file mode 100644
index 0000000000..a6af20f051
--- /dev/null
+++ b/changelog.d/13952.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in v1.68.0 where Synapse would require `setuptools_rust` at runtime, even though the package is only required at build time.
diff --git a/synapse/util/check_dependencies.py b/synapse/util/check_dependencies.py
index 66f1da7502..3b1e205700 100644
--- a/synapse/util/check_dependencies.py
+++ b/synapse/util/check_dependencies.py
@@ -66,6 +66,21 @@ def _is_dev_dependency(req: Requirement) -> bool:
     )
 
 
+def _should_ignore_runtime_requirement(req: Requirement) -> bool:
+    # This is a build-time dependency. Irritatingly, `poetry build` ignores the
+    # requirements listed in the [build-system] section of pyproject.toml, so in order
+    # to support `poetry install --no-dev` we have to mark it as a runtime dependency.
+    # See discussion on https://github.com/python-poetry/poetry/issues/6154 (it sounds
+    # like the poetry authors don't consider this a bug?)
+    #
+    # In any case, workaround this by ignoring setuptools_rust here. (It might be
+    # slightly cleaner to put `setuptools_rust` in a `build` extra or similar, but for
+    # now let's do something quick and dirty.
+    if req.name == "setuptools_rust":
+        return True
+    return False
+
+
 class Dependency(NamedTuple):
     requirement: Requirement
     must_be_installed: bool
@@ -77,7 +92,7 @@ def _generic_dependencies() -> Iterable[Dependency]:
     assert requirements is not None
     for raw_requirement in requirements:
         req = Requirement(raw_requirement)
-        if _is_dev_dependency(req):
+        if _is_dev_dependency(req) or _should_ignore_runtime_requirement(req):
             continue
 
         # https://packaging.pypa.io/en/latest/markers.html#usage notes that
diff --git a/tests/util/test_check_dependencies.py b/tests/util/test_check_dependencies.py
index 5d1aa025d1..6913de24b9 100644
--- a/tests/util/test_check_dependencies.py
+++ b/tests/util/test_check_dependencies.py
@@ -40,7 +40,10 @@ class TestDependencyChecker(TestCase):
     def mock_installed_package(
         self, distribution: Optional[DummyDistribution]
     ) -> Generator[None, None, None]:
-        """Pretend that looking up any distribution yields the given `distribution`."""
+        """Pretend that looking up any package yields the given `distribution`.
+
+        If `distribution = None`, we pretend that the package is not installed.
+        """
 
         def mock_distribution(name: str):
             if distribution is None:
@@ -81,7 +84,7 @@ class TestDependencyChecker(TestCase):
                 self.assertRaises(DependencyException, check_requirements)
 
     def test_checks_ignore_dev_dependencies(self) -> None:
-        """Bot generic and per-extra checks should ignore dev dependencies."""
+        """Both generic and per-extra checks should ignore dev dependencies."""
         with patch(
             "synapse.util.check_dependencies.metadata.requires",
             return_value=["dummypkg >= 1; extra == 'mypy'"],
@@ -142,3 +145,16 @@ class TestDependencyChecker(TestCase):
             with self.mock_installed_package(new_release_candidate):
                 # should not raise
                 check_requirements()
+
+    def test_setuptools_rust_ignored(self) -> None:
+        """Test a workaround for a `poetry build` problem. Reproduces #13926."""
+        with patch(
+            "synapse.util.check_dependencies.metadata.requires",
+            return_value=["setuptools_rust >= 1.3"],
+        ):
+            with self.mock_installed_package(None):
+                # should not raise, even if setuptools_rust is not installed
+                check_requirements()
+            with self.mock_installed_package(old):
+                # We also ignore old versions of setuptools_rust
+                check_requirements()
-- 
cgit 1.5.1


From 1cc2ca81badb9c5161d219dfc9a273a338adedd2 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Fri, 30 Sep 2022 11:27:21 +0100
Subject: Add missing version information in the ModuleApi (#13947)

---
 changelog.d/13947.feature      | 1 +
 synapse/module_api/__init__.py | 4 ++++
 2 files changed, 5 insertions(+)
 create mode 100644 changelog.d/13947.feature

(limited to 'synapse')

diff --git a/changelog.d/13947.feature b/changelog.d/13947.feature
new file mode 100644
index 0000000000..a0b3cfe18c
--- /dev/null
+++ b/changelog.d/13947.feature
@@ -0,0 +1 @@
+Add cache invalidation across workers to module API.
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 59755bff6d..b7b2d3b8c5 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -842,6 +842,8 @@ class ModuleApi:
         however invalidation that needs to go to other workers needs to call `invalidate_cache`
         on the module API instead.
 
+        Added in Synapse v1.69.0.
+
         Args:
             cached_function: The cached function that will be registered to receive invalidation
             locally and from other workers.
@@ -856,6 +858,8 @@ class ModuleApi:
         """Invalidate a cache entry of a cached function across workers. The cached function
         needs to be registered on all workers first with `register_cached_function`.
 
+        Added in Synapse v1.69.0.
+
         Args:
             cached_function: The cached function that needs an invalidation
             keys: keys of the entry to invalidate, usually matching the arguments of the
-- 
cgit 1.5.1


From e8f30a76caa4394ebb3e77c56df951e3626b3fdd Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 30 Sep 2022 11:54:53 +0100
Subject: Fix overflows in /messages backfill calculation (#13936)

* Reproduce bug
* Compute `least_function` first
* Substitute `least_function` with an f-string
* Bugfix: avoid overflow

Co-authored-by: Eric Eastwood <erice@element.io>
---
 changelog.d/13936.feature                          |  1 +
 synapse/storage/databases/main/event_federation.py | 82 ++++++++++++++--------
 tests/storage/test_event_federation.py             | 61 ++++++++++++----
 3 files changed, 103 insertions(+), 41 deletions(-)
 create mode 100644 changelog.d/13936.feature

(limited to 'synapse')

diff --git a/changelog.d/13936.feature b/changelog.d/13936.feature
new file mode 100644
index 0000000000..d86bf7ed80
--- /dev/null
+++ b/changelog.d/13936.feature
@@ -0,0 +1 @@
+Exponentially backoff from backfilling the same event over and over.
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 17f2fd4458..6b9a629edd 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -73,13 +73,30 @@ pdus_pruned_from_federation_queue = Counter(
 
 logger = logging.getLogger(__name__)
 
-BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS: int = int(
-    datetime.timedelta(days=7).total_seconds()
-)
-BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS: int = int(
-    datetime.timedelta(hours=1).total_seconds()
+# Parameters controlling exponential backoff between backfill failures.
+# After the first failure to backfill, we wait 2 hours before trying again. If the
+# second attempt fails, we wait 4 hours before trying again. If the third attempt fails,
+# we wait 8 hours before trying again, ... and so on.
+#
+# Each successive backoff period is twice as long as the last. However we cap this
+# period at a maximum of 2^8 = 256 hours: a little over 10 days. (This is the smallest
+# power of 2 which yields a maximum backoff period of at least 7 days---which was the
+# original maximum backoff period.) Even when we hit this cap, we will continue to
+# make backfill attempts once every 10 days.
+BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS = 8
+BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS = int(
+    datetime.timedelta(hours=1).total_seconds() * 1000
 )
 
+# We need a cap on the power of 2 or else the backoff period
+#   2^N * (milliseconds per hour)
+# will overflow when calcuated within the database. We ensure overflow does not occur
+# by checking that the largest backoff period fits in a 32-bit signed integer.
+_LONGEST_BACKOFF_PERIOD_MILLISECONDS = (
+    2**BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS
+) * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS
+assert 0 < _LONGEST_BACKOFF_PERIOD_MILLISECONDS <= ((2**31) - 1)
+
 
 # All the info we need while iterating the DAG while backfilling
 @attr.s(frozen=True, slots=True, auto_attribs=True)
@@ -767,7 +784,15 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             # persisted in our database yet (meaning we don't know their depth
             # specifically). So we need to look for the approximate depth from
             # the events connected to the current backwards extremeties.
-            sql = """
+
+            if isinstance(self.database_engine, PostgresEngine):
+                least_function = "LEAST"
+            elif isinstance(self.database_engine, Sqlite3Engine):
+                least_function = "MIN"
+            else:
+                raise RuntimeError("Unknown database engine")
+
+            sql = f"""
                 SELECT backward_extrem.event_id, event.depth FROM events AS event
                 /**
                  * Get the edge connections from the event_edges table
@@ -825,7 +850,10 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                      */
                     AND (
                         failed_backfill_attempt_info.event_id IS NULL
-                        OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */)
+                        OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + (
+                            (1 << {least_function}(failed_backfill_attempt_info.num_attempts, ? /* max doubling steps */))
+                            * ? /* step */
+                        )
                     )
                 /**
                  * Sort from highest (closest to the `current_depth`) to the lowest depth
@@ -837,22 +865,15 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 LIMIT ?
             """
 
-            if isinstance(self.database_engine, PostgresEngine):
-                least_function = "least"
-            elif isinstance(self.database_engine, Sqlite3Engine):
-                least_function = "min"
-            else:
-                raise RuntimeError("Unknown database engine")
-
             txn.execute(
-                sql % (least_function,),
+                sql,
                 (
                     room_id,
                     False,
                     current_depth,
                     self._clock.time_msec(),
-                    1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS,
-                    1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS,
+                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
+                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS,
                     limit,
                 ),
             )
@@ -902,7 +923,14 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         def get_insertion_event_backward_extremities_in_room_txn(
             txn: LoggingTransaction, room_id: str
         ) -> List[Tuple[str, int]]:
-            sql = """
+            if isinstance(self.database_engine, PostgresEngine):
+                least_function = "LEAST"
+            elif isinstance(self.database_engine, Sqlite3Engine):
+                least_function = "MIN"
+            else:
+                raise RuntimeError("Unknown database engine")
+
+            sql = f"""
                 SELECT
                     insertion_event_extremity.event_id, event.depth
                 /* We only want insertion events that are also marked as backwards extremities */
@@ -942,7 +970,10 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                      */
                     AND (
                         failed_backfill_attempt_info.event_id IS NULL
-                        OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */)
+                        OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + (
+                            (1 << {least_function}(failed_backfill_attempt_info.num_attempts, ? /* max doubling steps */))
+                            * ? /* step */
+                        )
                     )
                 /**
                  * Sort from highest (closest to the `current_depth`) to the lowest depth
@@ -954,21 +985,14 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 LIMIT ?
             """
 
-            if isinstance(self.database_engine, PostgresEngine):
-                least_function = "least"
-            elif isinstance(self.database_engine, Sqlite3Engine):
-                least_function = "min"
-            else:
-                raise RuntimeError("Unknown database engine")
-
             txn.execute(
-                sql % (least_function,),
+                sql,
                 (
                     room_id,
                     current_depth,
                     self._clock.time_msec(),
-                    1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS,
-                    1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS,
+                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
+                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS,
                     limit,
                 ),
             )
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 398f338b66..59b8910907 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -766,9 +766,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             self.store.get_backfill_points_in_room(room_id, depth_map["B"], limit=100)
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(
-            backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"]
-        )
+        self.assertEqual(backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"])
 
         # Try at "A"
         backfill_points = self.get_success(
@@ -814,7 +812,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         # Only the backfill points that we didn't record earlier exist here.
-        self.assertListEqual(backfill_event_ids, ["b6", "2", "b1"])
+        self.assertEqual(backfill_event_ids, ["b6", "2", "b1"])
 
     def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duration(
         self,
@@ -860,7 +858,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(backfill_event_ids, ["b3", "b2"])
+        self.assertEqual(backfill_event_ids, ["b3", "b2"])
 
         # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
         # see if we can now backfill it
@@ -871,7 +869,48 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"])
+        self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"])
+
+    def test_get_backfill_points_in_room_works_after_many_failed_pull_attempts_that_could_naively_overflow(
+        self,
+    ) -> None:
+        """
+        A test that reproduces #13929 (Postgres only).
+
+        Test to make sure we can still get backfill points after many failed pull
+        attempts that cause us to backoff to the limit. Even if the backoff formula
+        would tell us to wait for more seconds than can be expressed in a 32 bit
+        signed int.
+        """
+        setup_info = self._setup_room_for_backfill_tests()
+        room_id = setup_info.room_id
+        depth_map = setup_info.depth_map
+
+        # Pretend that we have tried and failed 10 times to backfill event b1.
+        for _ in range(10):
+            self.get_success(
+                self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
+            )
+
+        # If the backoff periods grow without limit:
+        # After the first failed attempt, we would have backed off for 1 << 1 = 2 hours.
+        # After the second failed attempt we would have backed off for 1 << 2 = 4 hours,
+        # so after the 10th failed attempt we should backoff for 1 << 10 == 1024 hours.
+        # Wait 1100 hours just so we have a nice round number.
+        self.reactor.advance(datetime.timedelta(hours=1100).total_seconds())
+
+        # 1024 hours in milliseconds is 1024 * 3600000, which exceeds the largest 32 bit
+        # signed integer. The bug we're reproducing is that this overflow causes an
+        # error in postgres preventing us from fetching a set of backwards extremities
+        # to retry fetching.
+        backfill_points = self.get_success(
+            self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
+        )
+
+        # We should aim to fetch all backoff points: b1's latest backoff period has
+        # expired, and we haven't tried the rest.
+        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
+        self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"])
 
     def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo:
         """
@@ -965,9 +1004,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             )
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(
-            backfill_event_ids, ["insertion_eventB", "insertion_eventA"]
-        )
+        self.assertEqual(backfill_event_ids, ["insertion_eventB", "insertion_eventA"])
 
         # Try at "insertion_eventA"
         backfill_points = self.get_success(
@@ -1011,7 +1048,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         # Only the backfill points that we didn't record earlier exist here.
-        self.assertListEqual(backfill_event_ids, ["insertion_eventB"])
+        self.assertEqual(backfill_event_ids, ["insertion_eventB"])
 
     def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_after_backoff_duration(
         self,
@@ -1069,7 +1106,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             )
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(backfill_event_ids, [])
+        self.assertEqual(backfill_event_ids, [])
 
         # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
         # see if we can now backfill it
@@ -1083,7 +1120,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             )
         )
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertListEqual(backfill_event_ids, ["insertion_eventA"])
+        self.assertEqual(backfill_event_ids, ["insertion_eventA"])
 
 
 @attr.s
-- 
cgit 1.5.1


From 3dfc4a08dc2e77178f2c2af68dc14b32da2d8b8f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 30 Sep 2022 13:15:32 +0100
Subject: Fix performance regression in `get_users_in_room` (#13972)

Fixes #13942. Introduced in #13575.

Basically, let's only get the ordered set of hosts out of the DB if we need an ordered set of hosts. Since we split the function up the caching won't be as good, but I think it will still be fine as e.g. multiple backfill requests for the same room will hit the cache.
---
 changelog.d/13972.bugfix                     |   1 +
 synapse/handlers/federation.py               |   4 +-
 synapse/handlers/room.py                     |   4 +-
 synapse/storage/controllers/state.py         |  30 ++++---
 synapse/storage/databases/main/roommember.py | 129 +++++++++++++++------------
 5 files changed, 98 insertions(+), 70 deletions(-)
 create mode 100644 changelog.d/13972.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13972.bugfix b/changelog.d/13972.bugfix
new file mode 100644
index 0000000000..4c1e19ef8c
--- /dev/null
+++ b/changelog.d/13972.bugfix
@@ -0,0 +1 @@
+Fix a performance regression in the `get_users_in_room` database query. Introduced in v1.67.0.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index b866258298..986ffed3d5 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -412,7 +412,9 @@ class FederationHandler:
         # First we try hosts that are already in the room.
         # TODO: HEURISTIC ALERT.
         likely_domains = (
-            await self._storage_controllers.state.get_current_hosts_in_room(room_id)
+            await self._storage_controllers.state.get_current_hosts_in_room_ordered(
+                room_id
+            )
         )
 
         async def try_backfill(domains: Collection[str]) -> bool:
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index b220238e55..57ab05ad25 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1540,7 +1540,9 @@ class TimestampLookupHandler:
             )
 
             likely_domains = (
-                await self._storage_controllers.state.get_current_hosts_in_room(room_id)
+                await self._storage_controllers.state.get_current_hosts_in_room_ordered(
+                    room_id
+                )
             )
 
             # Loop through each homeserver candidate until we get a succesful response
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index bb60130afe..2b31ce54bb 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -23,7 +23,7 @@ from typing import (
     List,
     Mapping,
     Optional,
-    Sequence,
+    Set,
     Tuple,
 )
 
@@ -529,7 +529,18 @@ class StateStorageController:
         )
         return state_map.get(key)
 
-    async def get_current_hosts_in_room(self, room_id: str) -> List[str]:
+    async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
+        """Get current hosts in room based on current state.
+
+        Blocks until we have full state for the given room. This only happens for rooms
+        with partial state.
+        """
+
+        await self._partial_state_room_tracker.await_full_state(room_id)
+
+        return await self.stores.main.get_current_hosts_in_room(room_id)
+
+    async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]:
         """Get current hosts in room based on current state.
 
         Blocks until we have full state for the given room. This only happens for rooms
@@ -542,11 +553,11 @@ class StateStorageController:
 
         await self._partial_state_room_tracker.await_full_state(room_id)
 
-        return await self.stores.main.get_current_hosts_in_room(room_id)
+        return await self.stores.main.get_current_hosts_in_room_ordered(room_id)
 
     async def get_current_hosts_in_room_or_partial_state_approximation(
         self, room_id: str
-    ) -> Sequence[str]:
+    ) -> Collection[str]:
         """Get approximation of current hosts in room based on current state.
 
         For rooms with full state, this is equivalent to `get_current_hosts_in_room`,
@@ -566,14 +577,9 @@ class StateStorageController:
         )
 
         hosts_from_state = await self.stores.main.get_current_hosts_in_room(room_id)
-        hosts_from_state_set = set(hosts_from_state)
-
-        # First take the list of hosts based on the current state.
-        # For rooms with partial state, this will be missing most hosts.
-        hosts = list(hosts_from_state)
-        # Then add in the list of hosts in the room at the time we joined.
-        # This will be an empty list for rooms with full state.
-        hosts.extend(host for host in hosts_at_join if host not in hosts_from_state_set)
+
+        hosts = set(hosts_at_join)
+        hosts.update(hosts_from_state)
 
         return hosts
 
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 982e1f08e3..2337289d88 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -146,42 +146,37 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
     @cached(max_entries=100000, iterable=True)
     async def get_users_in_room(self, room_id: str) -> List[str]:
-        """
-        Returns a list of users in the room sorted by longest in the room first
-        (aka. with the lowest depth). This is done to match the sort in
-        `get_current_hosts_in_room()` and so we can re-use the cache but it's
-        not horrible to have here either.
-
-        Uses `m.room.member`s in the room state at the current forward extremities to
-        determine which users are in the room.
+        """Returns a list of users in the room.
 
         Will return inaccurate results for rooms with partial state, since the state for
         the forward extremities of those rooms will exclude most members. We may also
         calculate room state incorrectly for such rooms and believe that a member is or
         is not in the room when the opposite is true.
         """
-        return await self.db_pool.runInteraction(
-            "get_users_in_room", self.get_users_in_room_txn, room_id
+        return await self.db_pool.simple_select_onecol(
+            table="current_state_events",
+            keyvalues={
+                "type": EventTypes.Member,
+                "room_id": room_id,
+                "membership": Membership.JOIN,
+            },
+            retcol="state_key",
+            desc="get_users_in_room",
         )
 
     def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]:
-        """
-        Returns a list of users in the room sorted by longest in the room first
-        (aka. with the lowest depth). This is done to match the sort in
-        `get_current_hosts_in_room()` and so we can re-use the cache but it's
-        not horrible to have here either.
-        """
-        sql = """
-            SELECT c.state_key FROM current_state_events as c
-            /* Get the depth of the event from the events table */
-            INNER JOIN events AS e USING (event_id)
-            WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ?
-            /* Sorted by lowest depth first */
-            ORDER BY e.depth ASC;
-        """
+        """Returns a list of users in the room."""
 
-        txn.execute(sql, (room_id, Membership.JOIN))
-        return [r[0] for r in txn]
+        return self.db_pool.simple_select_onecol_txn(
+            txn,
+            table="current_state_events",
+            keyvalues={
+                "type": EventTypes.Member,
+                "room_id": room_id,
+                "membership": Membership.JOIN,
+            },
+            retcol="state_key",
+        )
 
     @cached()
     def get_user_in_room_with_profile(
@@ -931,7 +926,44 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return True
 
     @cached(iterable=True, max_entries=10000)
-    async def get_current_hosts_in_room(self, room_id: str) -> List[str]:
+    async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
+        """Get current hosts in room based on current state."""
+
+        # First we check if we already have `get_users_in_room` in the cache, as
+        # we can just calculate result from that
+        users = self.get_users_in_room.cache.get_immediate(
+            (room_id,), None, update_metrics=False
+        )
+        if users is not None:
+            return {get_domain_from_id(u) for u in users}
+
+        if isinstance(self.database_engine, Sqlite3Engine):
+            # If we're using SQLite then let's just always use
+            # `get_users_in_room` rather than funky SQL.
+            users = await self.get_users_in_room(room_id)
+            return {get_domain_from_id(u) for u in users}
+
+        # For PostgreSQL we can use a regex to pull out the domains from the
+        # joined users in `current_state_events` via regex.
+
+        def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]:
+            sql = """
+                SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$')
+                FROM current_state_events
+                WHERE
+                    type = 'm.room.member'
+                    AND membership = 'join'
+                    AND room_id = ?
+            """
+            txn.execute(sql, (room_id,))
+            return {d for d, in txn}
+
+        return await self.db_pool.runInteraction(
+            "get_current_hosts_in_room", get_current_hosts_in_room_txn
+        )
+
+    @cached(iterable=True, max_entries=10000)
+    async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]:
         """
         Get current hosts in room based on current state.
 
@@ -939,48 +971,33 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         longest is good because they're most likely to have anything we ask
         about.
 
-        Uses `m.room.member`s in the room state at the current forward extremities to
-        determine which hosts are in the room.
+        For SQLite the returned list is not ordered, as SQLite doesn't support
+        the appropriate SQL.
 
-        Will return inaccurate results for rooms with partial state, since the state for
-        the forward extremities of those rooms will exclude most members. We may also
-        calculate room state incorrectly for such rooms and believe that a host is or
-        is not in the room when the opposite is true.
+        Uses `m.room.member`s in the room state at the current forward
+        extremities to determine which hosts are in the room.
+
+        Will return inaccurate results for rooms with partial state, since the
+        state for the forward extremities of those rooms will exclude most
+        members. We may also calculate room state incorrectly for such rooms and
+        believe that a host is or is not in the room when the opposite is true.
 
         Returns:
             Returns a list of servers sorted by longest in the room first. (aka.
             sorted by join with the lowest depth first).
         """
 
-        # First we check if we already have `get_users_in_room` in the cache, as
-        # we can just calculate result from that
-        users = self.get_users_in_room.cache.get_immediate(
-            (room_id,), None, update_metrics=False
-        )
-        if users is None and isinstance(self.database_engine, Sqlite3Engine):
+        if isinstance(self.database_engine, Sqlite3Engine):
             # If we're using SQLite then let's just always use
             # `get_users_in_room` rather than funky SQL.
-            users = await self.get_users_in_room(room_id)
 
-        if users is not None:
-            # Because `users` is sorted from lowest -> highest depth, the list
-            # of domains will also be sorted that way.
-            domains: List[str] = []
-            # We use a `Set` just for fast lookups
-            domain_set: Set[str] = set()
-            for u in users:
-                if ":" not in u:
-                    continue
-                domain = get_domain_from_id(u)
-                if domain not in domain_set:
-                    domain_set.add(domain)
-                    domains.append(domain)
-            return domains
+            domains = await self.get_current_hosts_in_room(room_id)
+            return list(domains)
 
         # For PostgreSQL we can use a regex to pull out the domains from the
         # joined users in `current_state_events` via regex.
 
-        def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]:
+        def get_current_hosts_in_room_ordered_txn(txn: LoggingTransaction) -> List[str]:
             # Returns a list of servers currently joined in the room sorted by
             # longest in the room first (aka. with the lowest depth). The
             # heuristic of sorting by servers who have been in the room the
@@ -1008,7 +1025,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             return [d for d, in txn if d is not None]
 
         return await self.db_pool.runInteraction(
-            "get_current_hosts_in_room", get_current_hosts_in_room_txn
+            "get_current_hosts_in_room_ordered", get_current_hosts_in_room_ordered_txn
         )
 
     async def get_joined_hosts(
-- 
cgit 1.5.1


From 5507bfa769e61f5ef507c6172b8e798a87ac84b1 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 30 Sep 2022 14:23:37 +0100
Subject: Discourage automatic replies to Synapse's emails (#13957)

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/13957.feature      |  1 +
 synapse/handlers/send_email.py | 13 +++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 changelog.d/13957.feature

(limited to 'synapse')

diff --git a/changelog.d/13957.feature b/changelog.d/13957.feature
new file mode 100644
index 0000000000..4080147357
--- /dev/null
+++ b/changelog.d/13957.feature
@@ -0,0 +1 @@
+Ask mail servers receiving emails from Synapse to not send automatic reply (e.g. out-of-office responses).
diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py
index e2844799e8..804cc6e81e 100644
--- a/synapse/handlers/send_email.py
+++ b/synapse/handlers/send_email.py
@@ -187,6 +187,19 @@ class SendEmailHandler:
         multipart_msg["To"] = email_address
         multipart_msg["Date"] = email.utils.formatdate()
         multipart_msg["Message-ID"] = email.utils.make_msgid()
+        # Discourage automatic responses to Synapse's emails.
+        # Per RFC 3834, automatic responses should not be sent if the "Auto-Submitted"
+        # header is present with any value other than "no". See
+        #     https://www.rfc-editor.org/rfc/rfc3834.html#section-5.1
+        multipart_msg["Auto-Submitted"] = "auto-generated"
+        # Also include a Microsoft-Exchange specific header:
+        #    https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxcmail/ced68690-498a-4567-9d14-5c01f974d8b1
+        # which suggests it can take the value "All" to "suppress all auto-replies",
+        # or a comma separated list of auto-reply classes to suppress.
+        # The following stack overflow question has a little more context:
+        #    https://stackoverflow.com/a/25324691/5252017
+        #    https://stackoverflow.com/a/61646381/5252017
+        multipart_msg["X-Auto-Response-Suppress"] = "All"
         multipart_msg.attach(text_part)
         multipart_msg.attach(html_part)
 
-- 
cgit 1.5.1


From 285b9e9b6c3558718e7d4f513062e277948ac35d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 30 Sep 2022 14:27:00 +0100
Subject: Speed up calculating push actions in large rooms (#13973)

We move the expensive check of visibility to after calculating push actions, avoiding the expensive check for users who won't get pushed anyway.

I think this should have a big impact on rooms with large numbers of local users that have pushed disabled.
---
 changelog.d/13973.misc                   |  1 +
 synapse/push/bulk_push_rule_evaluator.py | 25 ++++++----
 tests/push/test_push_rule_evaluator.py   | 82 +++++++++++++++++++++++++++++++-
 3 files changed, 96 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/13973.misc

(limited to 'synapse')

diff --git a/changelog.d/13973.misc b/changelog.d/13973.misc
new file mode 100644
index 0000000000..58150a2b35
--- /dev/null
+++ b/changelog.d/13973.misc
@@ -0,0 +1 @@
+Speed up calculating push actions in large rooms.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 60f3129005..7bfe380543 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -303,20 +303,10 @@ class BulkPushRuleEvaluator:
             event.room_id, users
         )
 
-        # This is a check for the case where user joins a room without being
-        # allowed to see history, and then the server receives a delayed event
-        # from before the user joined, which they should not be pushed for
-        uids_with_visibility = await filter_event_for_clients_with_state(
-            self.store, users, event, context
-        )
-
         for uid, rules in rules_by_user.items():
             if event.sender == uid:
                 continue
 
-            if uid not in uids_with_visibility:
-                continue
-
             display_name = None
             profile = profiles.get(uid)
             if profile:
@@ -342,6 +332,21 @@ class BulkPushRuleEvaluator:
                 # Push rules say we should notify the user of this event
                 actions_by_user[uid] = actions
 
+        # This is a check for the case where user joins a room without being
+        # allowed to see history, and then the server receives a delayed event
+        # from before the user joined, which they should not be pushed for
+        #
+        # We do this *after* calculating the push actions as a) its unlikely
+        # that we'll filter anyone out and b) for large rooms its likely that
+        # most users will have push disabled and so the set of users to check is
+        # much smaller.
+        uids_with_visibility = await filter_event_for_clients_with_state(
+            self.store, actions_by_user.keys(), event, context
+        )
+
+        for user_id in set(actions_by_user).difference(uids_with_visibility):
+            actions_by_user.pop(user_id, None)
+
         # Mark in the DB staging area the push actions for users who should be
         # notified for this event. (This will then get handled when we persist
         # the event)
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index b8308cbc05..8804f0e0d3 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -19,17 +19,18 @@ import frozendict
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import EventTypes, HistoryVisibility, Membership
 from synapse.api.room_versions import RoomVersions
 from synapse.appservice import ApplicationService
 from synapse.events import FrozenEvent
 from synapse.push.bulk_push_rule_evaluator import _flatten_dict
 from synapse.push.httppusher import tweaks_for_actions
+from synapse.rest import admin
 from synapse.rest.client import login, register, room
 from synapse.server import HomeServer
 from synapse.storage.databases.main.appservice import _make_exclusive_regex
 from synapse.synapse_rust.push import PushRuleEvaluator
-from synapse.types import JsonDict
+from synapse.types import JsonDict, UserID
 from synapse.util import Clock
 
 from tests import unittest
@@ -437,3 +438,80 @@ class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase):
         )
 
         self.assertEqual(len(users_with_push_actions), 0)
+
+
+class BulkPushRuleEvaluatorTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        login.register_servlets,
+        room.register_servlets,
+    ]
+
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
+        self.main_store = homeserver.get_datastores().main
+
+        self.user_id1 = self.register_user("user1", "password")
+        self.tok1 = self.login(self.user_id1, "password")
+        self.user_id2 = self.register_user("user2", "password")
+        self.tok2 = self.login(self.user_id2, "password")
+
+        self.room_id = self.helper.create_room_as(tok=self.tok1)
+
+        # We want to test history visibility works correctly.
+        self.helper.send_state(
+            self.room_id,
+            EventTypes.RoomHistoryVisibility,
+            {"history_visibility": HistoryVisibility.JOINED},
+            tok=self.tok1,
+        )
+
+    def get_notif_count(self, user_id: str) -> int:
+        return self.get_success(
+            self.main_store.db_pool.simple_select_one_onecol(
+                table="event_push_actions",
+                keyvalues={"user_id": user_id},
+                retcol="COALESCE(SUM(notif), 0)",
+                desc="get_staging_notif_count",
+            )
+        )
+
+    def test_plain_message(self) -> None:
+        """Test that sending a normal message in a room will trigger a
+        notification
+        """
+
+        # Have user2 join the room and cle
+        self.helper.join(self.room_id, self.user_id2, tok=self.tok2)
+
+        # They start off with no notifications, but get them when messages are
+        # sent.
+        self.assertEqual(self.get_notif_count(self.user_id2), 0)
+
+        user1 = UserID.from_string(self.user_id1)
+        self.create_and_send_event(self.room_id, user1)
+
+        self.assertEqual(self.get_notif_count(self.user_id2), 1)
+
+    def test_delayed_message(self) -> None:
+        """Test that a delayed message that was from before a user joined
+        doesn't cause a notification for the joined user.
+        """
+        user1 = UserID.from_string(self.user_id1)
+
+        # Send a message before user2 joins
+        event_id1 = self.create_and_send_event(self.room_id, user1)
+
+        # Have user2 join the room
+        self.helper.join(self.room_id, self.user_id2, tok=self.tok2)
+
+        # They start off with no notifications
+        self.assertEqual(self.get_notif_count(self.user_id2), 0)
+
+        # Send another message that references the event before the join to
+        # simulate a "delayed" event
+        self.create_and_send_event(self.room_id, user1, prev_event_ids=[event_id1])
+
+        # user2 should not be notified about it, because they can't see it.
+        self.assertEqual(self.get_notif_count(self.user_id2), 0)
-- 
cgit 1.5.1


From 6d543d6d9f56e39199b7e460d0081b02d61f12be Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 30 Sep 2022 16:34:47 +0100
Subject: Update mypy and mypy-zope (#13925)

* Update mypy and mypy-zope

* Unignore assigning to LogRecord attributes

Presumably https://github.com/python/typeshed/pull/8064 makes this ok

Cherry-picked from #13521

* Remove unused ignores due to mypy ParamSpec fixes

https://github.com/python/mypy/pull/12668

Cherry-picked from #13521

* Remove additional unused ignores

* Fix new mypy complaints related to `assertGreater`

Presumably due to https://github.com/python/typeshed/pull/8077

* Changelog

* Reword changelog

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/13925.misc                     |  1 +
 poetry.lock                                | 59 +++++++++++++++---------------
 scripts-dev/check_pydantic_models.py       |  5 +--
 synapse/app/_base.py                       |  4 +-
 synapse/logging/context.py                 | 20 +++++-----
 synapse/logging/opentracing.py             |  4 +-
 synapse/storage/database.py                | 22 +++--------
 synapse/storage/databases/main/search.py   |  2 +-
 tests/storage/test_monthly_active_users.py |  6 +++
 tests/utils.py                             |  4 +-
 10 files changed, 60 insertions(+), 67 deletions(-)
 create mode 100644 changelog.d/13925.misc

(limited to 'synapse')

diff --git a/changelog.d/13925.misc b/changelog.d/13925.misc
new file mode 100644
index 0000000000..f490ab122e
--- /dev/null
+++ b/changelog.d/13925.misc
@@ -0,0 +1 @@
+Update mypy (0.950 -> 0.981) and mypy-zope (0.3.7 -> 0.3.11).
diff --git a/poetry.lock b/poetry.lock
index 0f6d1cfa69..63ef8573a0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -573,11 +573,11 @@ python-versions = "*"
 
 [[package]]
 name = "mypy"
-version = "0.950"
+version = "0.981"
 description = "Optional static typing for Python"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 
 [package.dependencies]
 mypy-extensions = ">=0.4.3"
@@ -600,14 +600,14 @@ python-versions = "*"
 
 [[package]]
 name = "mypy-zope"
-version = "0.3.7"
+version = "0.3.11"
 description = "Plugin for mypy to support zope interfaces"
 category = "dev"
 optional = false
 python-versions = "*"
 
 [package.dependencies]
-mypy = "0.950"
+mypy = "0.981"
 "zope.interface" = "*"
 "zope.schema" = "*"
 
@@ -2162,37 +2162,38 @@ msgpack = [
     {file = "msgpack-1.0.3.tar.gz", hash = "sha256:51fdc7fb93615286428ee7758cecc2f374d5ff363bdd884c7ea622a7a327a81e"},
 ]
 mypy = [
-    {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"},
-    {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"},
-    {file = "mypy-0.950-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22"},
-    {file = "mypy-0.950-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb"},
-    {file = "mypy-0.950-cp310-cp310-win_amd64.whl", hash = "sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334"},
-    {file = "mypy-0.950-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f"},
-    {file = "mypy-0.950-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc"},
-    {file = "mypy-0.950-cp36-cp36m-win_amd64.whl", hash = "sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2"},
-    {file = "mypy-0.950-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed"},
-    {file = "mypy-0.950-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075"},
-    {file = "mypy-0.950-cp37-cp37m-win_amd64.whl", hash = "sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b"},
-    {file = "mypy-0.950-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d"},
-    {file = "mypy-0.950-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a"},
-    {file = "mypy-0.950-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605"},
-    {file = "mypy-0.950-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2"},
-    {file = "mypy-0.950-cp38-cp38-win_amd64.whl", hash = "sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff"},
-    {file = "mypy-0.950-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8"},
-    {file = "mypy-0.950-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038"},
-    {file = "mypy-0.950-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2"},
-    {file = "mypy-0.950-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519"},
-    {file = "mypy-0.950-cp39-cp39-win_amd64.whl", hash = "sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef"},
-    {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"},
-    {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"},
+    {file = "mypy-0.981-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4bc460e43b7785f78862dab78674e62ec3cd523485baecfdf81a555ed29ecfa0"},
+    {file = "mypy-0.981-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:756fad8b263b3ba39e4e204ee53042671b660c36c9017412b43af210ddee7b08"},
+    {file = "mypy-0.981-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16a0145d6d7d00fbede2da3a3096dcc9ecea091adfa8da48fa6a7b75d35562d"},
+    {file = "mypy-0.981-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce65f70b14a21fdac84c294cde75e6dbdabbcff22975335e20827b3b94bdbf49"},
+    {file = "mypy-0.981-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e35d764784b42c3e256848fb8ed1d4292c9fc0098413adb28d84974c095b279"},
+    {file = "mypy-0.981-cp310-cp310-win_amd64.whl", hash = "sha256:e53773073c864d5f5cec7f3fc72fbbcef65410cde8cc18d4f7242dea60dac52e"},
+    {file = "mypy-0.981-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6ee196b1d10b8b215e835f438e06965d7a480f6fe016eddbc285f13955cca659"},
+    {file = "mypy-0.981-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad21d4c9d3673726cf986ea1d0c9fb66905258709550ddf7944c8f885f208be"},
+    {file = "mypy-0.981-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d1debb09043e1f5ee845fa1e96d180e89115b30e47c5d3ce53bc967bab53f62d"},
+    {file = "mypy-0.981-cp37-cp37m-win_amd64.whl", hash = "sha256:9f362470a3480165c4c6151786b5379351b790d56952005be18bdbdd4c7ce0ae"},
+    {file = "mypy-0.981-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c9e0efb95ed6ca1654951bd5ec2f3fa91b295d78bf6527e026529d4aaa1e0c30"},
+    {file = "mypy-0.981-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e178eaffc3c5cd211a87965c8c0df6da91ed7d258b5fc72b8e047c3771317ddb"},
+    {file = "mypy-0.981-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06e1eac8d99bd404ed8dd34ca29673c4346e76dd8e612ea507763dccd7e13c7a"},
+    {file = "mypy-0.981-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa38f82f53e1e7beb45557ff167c177802ba7b387ad017eab1663d567017c8ee"},
+    {file = "mypy-0.981-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:64e1f6af81c003f85f0dfed52db632817dabb51b65c0318ffbf5ff51995bbb08"},
+    {file = "mypy-0.981-cp38-cp38-win_amd64.whl", hash = "sha256:e1acf62a8c4f7c092462c738aa2c2489e275ed386320c10b2e9bff31f6f7e8d6"},
+    {file = "mypy-0.981-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6ede64e52257931315826fdbfc6ea878d89a965580d1a65638ef77cb551f56d"},
+    {file = "mypy-0.981-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb3978b191b9fa0488524bb4ffedf2c573340e8c2b4206fc191d44c7093abfb7"},
+    {file = "mypy-0.981-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f8fcf7b4b3cc0c74fb33ae54a4cd00bb854d65645c48beccf65fa10b17882c"},
+    {file = "mypy-0.981-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64d2ce043a209a297df322eb4054dfbaa9de9e8738291706eaafda81ab2b362"},
+    {file = "mypy-0.981-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2ee3dbc53d4df7e6e3b1c68ac6a971d3a4fb2852bf10a05fda228721dd44fae1"},
+    {file = "mypy-0.981-cp39-cp39-win_amd64.whl", hash = "sha256:8e8e49aa9cc23aa4c926dc200ce32959d3501c4905147a66ce032f05cb5ecb92"},
+    {file = "mypy-0.981-py3-none-any.whl", hash = "sha256:794f385653e2b749387a42afb1e14c2135e18daeb027e0d97162e4b7031210f8"},
+    {file = "mypy-0.981.tar.gz", hash = "sha256:ad77c13037d3402fbeffda07d51e3f228ba078d1c7096a73759c9419ea031bf4"},
 ]
 mypy-extensions = [
     {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
     {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
 ]
 mypy-zope = [
-    {file = "mypy-zope-0.3.7.tar.gz", hash = "sha256:9da171e78e8ef7ac8922c86af1a62f1b7f3244f121020bd94a2246bc3f33c605"},
-    {file = "mypy_zope-0.3.7-py3-none-any.whl", hash = "sha256:9c7637d066e4d1bafa0651abc091c752009769098043b236446e6725be2bc9c2"},
+    {file = "mypy-zope-0.3.11.tar.gz", hash = "sha256:d4255f9f04d48c79083bbd4e2fea06513a6ac7b8de06f8c4ce563fd85142ca05"},
+    {file = "mypy_zope-0.3.11-py3-none-any.whl", hash = "sha256:ec080a6508d1f7805c8d2054f9fdd13c849742ce96803519e1fdfa3d3cab7140"},
 ]
 netaddr = [
     {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"},
diff --git a/scripts-dev/check_pydantic_models.py b/scripts-dev/check_pydantic_models.py
index d0fb811bdb..9f2b7ded5b 100755
--- a/scripts-dev/check_pydantic_models.py
+++ b/scripts-dev/check_pydantic_models.py
@@ -88,10 +88,9 @@ def make_wrapper(factory: Callable[P, R]) -> Callable[P, R]:
 
     @functools.wraps(factory)
     def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
-        # type-ignore: should be redundant once we can use https://github.com/python/mypy/pull/12668
-        if "strict" not in kwargs:  # type: ignore[attr-defined]
+        if "strict" not in kwargs:
             raise MissingStrictInConstrainedTypeException(factory.__name__)
-        if not kwargs["strict"]:  # type: ignore[index]
+        if not kwargs["strict"]:
             raise MissingStrictInConstrainedTypeException(factory.__name__)
         return factory(*args, **kwargs)
 
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 9a24bed0a0..000912e86e 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -98,9 +98,7 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs)
         func: Function to be called when sent a SIGHUP signal.
         *args, **kwargs: args and kwargs to be passed to the target function.
     """
-    # This type-ignore should be redundant once we use a mypy release with
-    # https://github.com/python/mypy/pull/12668.
-    _sighup_callbacks.append((func, args, kwargs))  # type: ignore[arg-type]
+    _sighup_callbacks.append((func, args, kwargs))
 
 
 def start_worker_reactor(
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index fd9cb97920..6a08ffed64 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -586,7 +586,7 @@ class LoggingContextFilter(logging.Filter):
             True to include the record in the log output.
         """
         context = current_context()
-        record.request = self._default_request  # type: ignore
+        record.request = self._default_request
 
         # context should never be None, but if it somehow ends up being, then
         # we end up in a death spiral of infinite loops, so let's check, for
@@ -594,21 +594,21 @@ class LoggingContextFilter(logging.Filter):
         if context is not None:
             # Logging is interested in the request ID. Note that for backwards
             # compatibility this is stored as the "request" on the record.
-            record.request = str(context)  # type: ignore
+            record.request = str(context)
 
             # Add some data from the HTTP request.
             request = context.request
             if request is None:
                 return True
 
-            record.ip_address = request.ip_address  # type: ignore
-            record.site_tag = request.site_tag  # type: ignore
-            record.requester = request.requester  # type: ignore
-            record.authenticated_entity = request.authenticated_entity  # type: ignore
-            record.method = request.method  # type: ignore
-            record.url = request.url  # type: ignore
-            record.protocol = request.protocol  # type: ignore
-            record.user_agent = request.user_agent  # type: ignore
+            record.ip_address = request.ip_address
+            record.site_tag = request.site_tag
+            record.requester = request.requester
+            record.authenticated_entity = request.authenticated_entity
+            record.method = request.method
+            record.url = request.url
+            record.protocol = request.protocol
+            record.user_agent = request.user_agent
 
         return True
 
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index ca2735dd6d..8ce5a2a338 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -992,9 +992,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]:
         # FIXME: We could update this to handle any type of function by ignoring the
         #   first argument only if it's named `self` or `cls`. This isn't fool-proof
         #   but handles the idiomatic cases.
-        for i, arg in enumerate(args[1:], start=1):  # type: ignore[index]
+        for i, arg in enumerate(args[1:], start=1):
             set_tag(SynapseTags.FUNC_ARG_PREFIX + argspec.args[i], str(arg))
-        set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :]))  # type: ignore[index]
+        set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :]))
         set_tag(SynapseTags.FUNC_KWARGS, str(kwargs))
         yield
 
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index bb28ded1b5..a252f8eaa0 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -290,8 +290,7 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.after_callbacks is not None
-        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
-        self.after_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
+        self.after_callbacks.append((callback, args, kwargs))
 
     def async_call_after(
         self, callback: Callable[P, Awaitable], *args: P.args, **kwargs: P.kwargs
@@ -312,8 +311,7 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.async_after_callbacks is not None
-        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
-        self.async_after_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
+        self.async_after_callbacks.append((callback, args, kwargs))
 
     def call_on_exception(
         self, callback: Callable[P, object], *args: P.args, **kwargs: P.kwargs
@@ -331,8 +329,7 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.exception_callbacks is not None
-        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
-        self.exception_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
+        self.exception_callbacks.append((callback, args, kwargs))
 
     def fetchone(self) -> Optional[Tuple]:
         return self.txn.fetchone()
@@ -421,10 +418,7 @@ class LoggingTransaction:
         sql = self.database_engine.convert_param_style(sql)
         if args:
             try:
-                # The type-ignore should be redundant once mypy releases a version with
-                # https://github.com/python/mypy/pull/12668. (`args` might be empty,
-                # (but we'll catch the index error if so.)
-                sql_logger.debug("[SQL values] {%s} %r", self.name, args[0])  # type: ignore[index]
+                sql_logger.debug("[SQL values] {%s} %r", self.name, args[0])
             except Exception:
                 # Don't let logging failures stop SQL from working
                 pass
@@ -655,9 +649,7 @@ class DatabasePool:
         # For now, we just log an error, and hope that it works on the first attempt.
         # TODO: raise an exception.
 
-        # Type-ignore Mypy doesn't yet consider ParamSpec.args to be iterable; see
-        # https://github.com/python/mypy/pull/12668
-        for i, arg in enumerate(args):  # type: ignore[arg-type, var-annotated]
+        for i, arg in enumerate(args):
             if inspect.isgenerator(arg):
                 logger.error(
                     "Programming error: generator passed to new_transaction as "
@@ -665,9 +657,7 @@ class DatabasePool:
                     i,
                     func,
                 )
-        # Type-ignore Mypy doesn't yet consider ParamSpec.args to be a mapping; see
-        # https://github.com/python/mypy/pull/12668
-        for name, val in kwargs.items():  # type: ignore[attr-defined]
+        for name, val in kwargs.items():
             if inspect.isgenerator(val):
                 logger.error(
                     "Programming error: generator passed to new_transaction as "
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index f6e24b68d2..1b79acf955 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -641,7 +641,7 @@ class SearchStore(SearchBackgroundUpdateStore):
             raise Exception("Unrecognized database engine")
 
         # mypy expects to append only a `str`, not an `int`
-        args.append(limit)  # type: ignore[arg-type]
+        args.append(limit)
 
         results = await self.db_pool.execute(
             "search_rooms", self.db_pool.cursor_to_dict, sql, *args
diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py
index e8b4a5644b..3da8221109 100644
--- a/tests/storage/test_monthly_active_users.py
+++ b/tests/storage/test_monthly_active_users.py
@@ -96,8 +96,12 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
 
         # Test each of the registered users is marked as active
         timestamp = self.get_success(self.store.user_last_seen_monthly_active(user1))
+        # Mypy notes that one shouldn't compare Optional[int] to 0 with assertGreater.
+        # Check that timestamp really is an int.
+        assert timestamp is not None
         self.assertGreater(timestamp, 0)
         timestamp = self.get_success(self.store.user_last_seen_monthly_active(user2))
+        assert timestamp is not None
         self.assertGreater(timestamp, 0)
 
         # Test that users with reserved 3pids are not removed from the MAU table
@@ -166,9 +170,11 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.get_success(self.store.upsert_monthly_active_user(user_id2))
 
         result = self.get_success(self.store.user_last_seen_monthly_active(user_id1))
+        assert result is not None
         self.assertGreater(result, 0)
 
         result = self.get_success(self.store.user_last_seen_monthly_active(user_id3))
+        assert result is not None
         self.assertNotEqual(result, 0)
 
     @override_config({"max_mau_value": 5})
diff --git a/tests/utils.py b/tests/utils.py
index 65db437697..045a8b5fa7 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -270,9 +270,7 @@ class MockClock:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> None:
-        # This type-ignore should be redundant once we use a mypy release with
-        # https://github.com/python/mypy/pull/12668.
-        self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))  # type: ignore[arg-type]
+        self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))
 
     def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None:
         if timer.expired:
-- 
cgit 1.5.1


From 8e52cb0bce4c4e42a0f151f16e51529b7aba8f7d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 30 Sep 2022 16:37:48 +0100
Subject: Revert "Update mypy and mypy-zope (#13925)"

This reverts commit 6d543d6d9f56e39199b7e460d0081b02d61f12be.
---
 changelog.d/13925.misc                     |  1 -
 poetry.lock                                | 59 +++++++++++++++---------------
 scripts-dev/check_pydantic_models.py       |  5 ++-
 synapse/app/_base.py                       |  4 +-
 synapse/logging/context.py                 | 20 +++++-----
 synapse/logging/opentracing.py             |  4 +-
 synapse/storage/database.py                | 22 ++++++++---
 synapse/storage/databases/main/search.py   |  2 +-
 tests/storage/test_monthly_active_users.py |  6 ---
 tests/utils.py                             |  4 +-
 10 files changed, 67 insertions(+), 60 deletions(-)
 delete mode 100644 changelog.d/13925.misc

(limited to 'synapse')

diff --git a/changelog.d/13925.misc b/changelog.d/13925.misc
deleted file mode 100644
index f490ab122e..0000000000
--- a/changelog.d/13925.misc
+++ /dev/null
@@ -1 +0,0 @@
-Update mypy (0.950 -> 0.981) and mypy-zope (0.3.7 -> 0.3.11).
diff --git a/poetry.lock b/poetry.lock
index 63ef8573a0..0f6d1cfa69 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -573,11 +573,11 @@ python-versions = "*"
 
 [[package]]
 name = "mypy"
-version = "0.981"
+version = "0.950"
 description = "Optional static typing for Python"
 category = "dev"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.6"
 
 [package.dependencies]
 mypy-extensions = ">=0.4.3"
@@ -600,14 +600,14 @@ python-versions = "*"
 
 [[package]]
 name = "mypy-zope"
-version = "0.3.11"
+version = "0.3.7"
 description = "Plugin for mypy to support zope interfaces"
 category = "dev"
 optional = false
 python-versions = "*"
 
 [package.dependencies]
-mypy = "0.981"
+mypy = "0.950"
 "zope.interface" = "*"
 "zope.schema" = "*"
 
@@ -2162,38 +2162,37 @@ msgpack = [
     {file = "msgpack-1.0.3.tar.gz", hash = "sha256:51fdc7fb93615286428ee7758cecc2f374d5ff363bdd884c7ea622a7a327a81e"},
 ]
 mypy = [
-    {file = "mypy-0.981-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4bc460e43b7785f78862dab78674e62ec3cd523485baecfdf81a555ed29ecfa0"},
-    {file = "mypy-0.981-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:756fad8b263b3ba39e4e204ee53042671b660c36c9017412b43af210ddee7b08"},
-    {file = "mypy-0.981-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16a0145d6d7d00fbede2da3a3096dcc9ecea091adfa8da48fa6a7b75d35562d"},
-    {file = "mypy-0.981-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce65f70b14a21fdac84c294cde75e6dbdabbcff22975335e20827b3b94bdbf49"},
-    {file = "mypy-0.981-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e35d764784b42c3e256848fb8ed1d4292c9fc0098413adb28d84974c095b279"},
-    {file = "mypy-0.981-cp310-cp310-win_amd64.whl", hash = "sha256:e53773073c864d5f5cec7f3fc72fbbcef65410cde8cc18d4f7242dea60dac52e"},
-    {file = "mypy-0.981-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6ee196b1d10b8b215e835f438e06965d7a480f6fe016eddbc285f13955cca659"},
-    {file = "mypy-0.981-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad21d4c9d3673726cf986ea1d0c9fb66905258709550ddf7944c8f885f208be"},
-    {file = "mypy-0.981-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d1debb09043e1f5ee845fa1e96d180e89115b30e47c5d3ce53bc967bab53f62d"},
-    {file = "mypy-0.981-cp37-cp37m-win_amd64.whl", hash = "sha256:9f362470a3480165c4c6151786b5379351b790d56952005be18bdbdd4c7ce0ae"},
-    {file = "mypy-0.981-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c9e0efb95ed6ca1654951bd5ec2f3fa91b295d78bf6527e026529d4aaa1e0c30"},
-    {file = "mypy-0.981-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e178eaffc3c5cd211a87965c8c0df6da91ed7d258b5fc72b8e047c3771317ddb"},
-    {file = "mypy-0.981-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06e1eac8d99bd404ed8dd34ca29673c4346e76dd8e612ea507763dccd7e13c7a"},
-    {file = "mypy-0.981-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa38f82f53e1e7beb45557ff167c177802ba7b387ad017eab1663d567017c8ee"},
-    {file = "mypy-0.981-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:64e1f6af81c003f85f0dfed52db632817dabb51b65c0318ffbf5ff51995bbb08"},
-    {file = "mypy-0.981-cp38-cp38-win_amd64.whl", hash = "sha256:e1acf62a8c4f7c092462c738aa2c2489e275ed386320c10b2e9bff31f6f7e8d6"},
-    {file = "mypy-0.981-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6ede64e52257931315826fdbfc6ea878d89a965580d1a65638ef77cb551f56d"},
-    {file = "mypy-0.981-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb3978b191b9fa0488524bb4ffedf2c573340e8c2b4206fc191d44c7093abfb7"},
-    {file = "mypy-0.981-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f8fcf7b4b3cc0c74fb33ae54a4cd00bb854d65645c48beccf65fa10b17882c"},
-    {file = "mypy-0.981-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64d2ce043a209a297df322eb4054dfbaa9de9e8738291706eaafda81ab2b362"},
-    {file = "mypy-0.981-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2ee3dbc53d4df7e6e3b1c68ac6a971d3a4fb2852bf10a05fda228721dd44fae1"},
-    {file = "mypy-0.981-cp39-cp39-win_amd64.whl", hash = "sha256:8e8e49aa9cc23aa4c926dc200ce32959d3501c4905147a66ce032f05cb5ecb92"},
-    {file = "mypy-0.981-py3-none-any.whl", hash = "sha256:794f385653e2b749387a42afb1e14c2135e18daeb027e0d97162e4b7031210f8"},
-    {file = "mypy-0.981.tar.gz", hash = "sha256:ad77c13037d3402fbeffda07d51e3f228ba078d1c7096a73759c9419ea031bf4"},
+    {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"},
+    {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"},
+    {file = "mypy-0.950-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22"},
+    {file = "mypy-0.950-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb"},
+    {file = "mypy-0.950-cp310-cp310-win_amd64.whl", hash = "sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334"},
+    {file = "mypy-0.950-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f"},
+    {file = "mypy-0.950-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc"},
+    {file = "mypy-0.950-cp36-cp36m-win_amd64.whl", hash = "sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2"},
+    {file = "mypy-0.950-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed"},
+    {file = "mypy-0.950-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075"},
+    {file = "mypy-0.950-cp37-cp37m-win_amd64.whl", hash = "sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b"},
+    {file = "mypy-0.950-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d"},
+    {file = "mypy-0.950-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a"},
+    {file = "mypy-0.950-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605"},
+    {file = "mypy-0.950-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2"},
+    {file = "mypy-0.950-cp38-cp38-win_amd64.whl", hash = "sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff"},
+    {file = "mypy-0.950-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8"},
+    {file = "mypy-0.950-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038"},
+    {file = "mypy-0.950-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2"},
+    {file = "mypy-0.950-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519"},
+    {file = "mypy-0.950-cp39-cp39-win_amd64.whl", hash = "sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef"},
+    {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"},
+    {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"},
 ]
 mypy-extensions = [
     {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
     {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
 ]
 mypy-zope = [
-    {file = "mypy-zope-0.3.11.tar.gz", hash = "sha256:d4255f9f04d48c79083bbd4e2fea06513a6ac7b8de06f8c4ce563fd85142ca05"},
-    {file = "mypy_zope-0.3.11-py3-none-any.whl", hash = "sha256:ec080a6508d1f7805c8d2054f9fdd13c849742ce96803519e1fdfa3d3cab7140"},
+    {file = "mypy-zope-0.3.7.tar.gz", hash = "sha256:9da171e78e8ef7ac8922c86af1a62f1b7f3244f121020bd94a2246bc3f33c605"},
+    {file = "mypy_zope-0.3.7-py3-none-any.whl", hash = "sha256:9c7637d066e4d1bafa0651abc091c752009769098043b236446e6725be2bc9c2"},
 ]
 netaddr = [
     {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"},
diff --git a/scripts-dev/check_pydantic_models.py b/scripts-dev/check_pydantic_models.py
index 9f2b7ded5b..d0fb811bdb 100755
--- a/scripts-dev/check_pydantic_models.py
+++ b/scripts-dev/check_pydantic_models.py
@@ -88,9 +88,10 @@ def make_wrapper(factory: Callable[P, R]) -> Callable[P, R]:
 
     @functools.wraps(factory)
     def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
-        if "strict" not in kwargs:
+        # type-ignore: should be redundant once we can use https://github.com/python/mypy/pull/12668
+        if "strict" not in kwargs:  # type: ignore[attr-defined]
             raise MissingStrictInConstrainedTypeException(factory.__name__)
-        if not kwargs["strict"]:
+        if not kwargs["strict"]:  # type: ignore[index]
             raise MissingStrictInConstrainedTypeException(factory.__name__)
         return factory(*args, **kwargs)
 
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 000912e86e..9a24bed0a0 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -98,7 +98,9 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs)
         func: Function to be called when sent a SIGHUP signal.
         *args, **kwargs: args and kwargs to be passed to the target function.
     """
-    _sighup_callbacks.append((func, args, kwargs))
+    # This type-ignore should be redundant once we use a mypy release with
+    # https://github.com/python/mypy/pull/12668.
+    _sighup_callbacks.append((func, args, kwargs))  # type: ignore[arg-type]
 
 
 def start_worker_reactor(
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 6a08ffed64..fd9cb97920 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -586,7 +586,7 @@ class LoggingContextFilter(logging.Filter):
             True to include the record in the log output.
         """
         context = current_context()
-        record.request = self._default_request
+        record.request = self._default_request  # type: ignore
 
         # context should never be None, but if it somehow ends up being, then
         # we end up in a death spiral of infinite loops, so let's check, for
@@ -594,21 +594,21 @@ class LoggingContextFilter(logging.Filter):
         if context is not None:
             # Logging is interested in the request ID. Note that for backwards
             # compatibility this is stored as the "request" on the record.
-            record.request = str(context)
+            record.request = str(context)  # type: ignore
 
             # Add some data from the HTTP request.
             request = context.request
             if request is None:
                 return True
 
-            record.ip_address = request.ip_address
-            record.site_tag = request.site_tag
-            record.requester = request.requester
-            record.authenticated_entity = request.authenticated_entity
-            record.method = request.method
-            record.url = request.url
-            record.protocol = request.protocol
-            record.user_agent = request.user_agent
+            record.ip_address = request.ip_address  # type: ignore
+            record.site_tag = request.site_tag  # type: ignore
+            record.requester = request.requester  # type: ignore
+            record.authenticated_entity = request.authenticated_entity  # type: ignore
+            record.method = request.method  # type: ignore
+            record.url = request.url  # type: ignore
+            record.protocol = request.protocol  # type: ignore
+            record.user_agent = request.user_agent  # type: ignore
 
         return True
 
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 8ce5a2a338..ca2735dd6d 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -992,9 +992,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]:
         # FIXME: We could update this to handle any type of function by ignoring the
         #   first argument only if it's named `self` or `cls`. This isn't fool-proof
         #   but handles the idiomatic cases.
-        for i, arg in enumerate(args[1:], start=1):
+        for i, arg in enumerate(args[1:], start=1):  # type: ignore[index]
             set_tag(SynapseTags.FUNC_ARG_PREFIX + argspec.args[i], str(arg))
-        set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :]))
+        set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :]))  # type: ignore[index]
         set_tag(SynapseTags.FUNC_KWARGS, str(kwargs))
         yield
 
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index a252f8eaa0..bb28ded1b5 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -290,7 +290,8 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.after_callbacks is not None
-        self.after_callbacks.append((callback, args, kwargs))
+        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
+        self.after_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
 
     def async_call_after(
         self, callback: Callable[P, Awaitable], *args: P.args, **kwargs: P.kwargs
@@ -311,7 +312,8 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.async_after_callbacks is not None
-        self.async_after_callbacks.append((callback, args, kwargs))
+        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
+        self.async_after_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
 
     def call_on_exception(
         self, callback: Callable[P, object], *args: P.args, **kwargs: P.kwargs
@@ -329,7 +331,8 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.exception_callbacks is not None
-        self.exception_callbacks.append((callback, args, kwargs))
+        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
+        self.exception_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
 
     def fetchone(self) -> Optional[Tuple]:
         return self.txn.fetchone()
@@ -418,7 +421,10 @@ class LoggingTransaction:
         sql = self.database_engine.convert_param_style(sql)
         if args:
             try:
-                sql_logger.debug("[SQL values] {%s} %r", self.name, args[0])
+                # The type-ignore should be redundant once mypy releases a version with
+                # https://github.com/python/mypy/pull/12668. (`args` might be empty,
+                # (but we'll catch the index error if so.)
+                sql_logger.debug("[SQL values] {%s} %r", self.name, args[0])  # type: ignore[index]
             except Exception:
                 # Don't let logging failures stop SQL from working
                 pass
@@ -649,7 +655,9 @@ class DatabasePool:
         # For now, we just log an error, and hope that it works on the first attempt.
         # TODO: raise an exception.
 
-        for i, arg in enumerate(args):
+        # Type-ignore Mypy doesn't yet consider ParamSpec.args to be iterable; see
+        # https://github.com/python/mypy/pull/12668
+        for i, arg in enumerate(args):  # type: ignore[arg-type, var-annotated]
             if inspect.isgenerator(arg):
                 logger.error(
                     "Programming error: generator passed to new_transaction as "
@@ -657,7 +665,9 @@ class DatabasePool:
                     i,
                     func,
                 )
-        for name, val in kwargs.items():
+        # Type-ignore Mypy doesn't yet consider ParamSpec.args to be a mapping; see
+        # https://github.com/python/mypy/pull/12668
+        for name, val in kwargs.items():  # type: ignore[attr-defined]
             if inspect.isgenerator(val):
                 logger.error(
                     "Programming error: generator passed to new_transaction as "
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 1b79acf955..f6e24b68d2 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -641,7 +641,7 @@ class SearchStore(SearchBackgroundUpdateStore):
             raise Exception("Unrecognized database engine")
 
         # mypy expects to append only a `str`, not an `int`
-        args.append(limit)
+        args.append(limit)  # type: ignore[arg-type]
 
         results = await self.db_pool.execute(
             "search_rooms", self.db_pool.cursor_to_dict, sql, *args
diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py
index 3da8221109..e8b4a5644b 100644
--- a/tests/storage/test_monthly_active_users.py
+++ b/tests/storage/test_monthly_active_users.py
@@ -96,12 +96,8 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
 
         # Test each of the registered users is marked as active
         timestamp = self.get_success(self.store.user_last_seen_monthly_active(user1))
-        # Mypy notes that one shouldn't compare Optional[int] to 0 with assertGreater.
-        # Check that timestamp really is an int.
-        assert timestamp is not None
         self.assertGreater(timestamp, 0)
         timestamp = self.get_success(self.store.user_last_seen_monthly_active(user2))
-        assert timestamp is not None
         self.assertGreater(timestamp, 0)
 
         # Test that users with reserved 3pids are not removed from the MAU table
@@ -170,11 +166,9 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.get_success(self.store.upsert_monthly_active_user(user_id2))
 
         result = self.get_success(self.store.user_last_seen_monthly_active(user_id1))
-        assert result is not None
         self.assertGreater(result, 0)
 
         result = self.get_success(self.store.user_last_seen_monthly_active(user_id3))
-        assert result is not None
         self.assertNotEqual(result, 0)
 
     @override_config({"max_mau_value": 5})
diff --git a/tests/utils.py b/tests/utils.py
index 045a8b5fa7..65db437697 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -270,7 +270,9 @@ class MockClock:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> None:
-        self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))
+        # This type-ignore should be redundant once we use a mypy release with
+        # https://github.com/python/mypy/pull/12668.
+        self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))  # type: ignore[arg-type]
 
     def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None:
         if timer.expired:
-- 
cgit 1.5.1


From 285d72556bb3c36f075b336b2bdd6acb08391ad5 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 30 Sep 2022 17:36:28 +0100
Subject: Update mypy and mypy-zope, attempt 3 (#13993)

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/13925.misc                     |  1 +
 changelog.d/13993.misc                     |  1 +
 poetry.lock                                | 59 +++++++++++++++---------------
 scripts-dev/check_pydantic_models.py       |  5 +--
 synapse/app/_base.py                       |  4 +-
 synapse/logging/context.py                 | 20 +++++-----
 synapse/logging/opentracing.py             |  4 +-
 synapse/storage/database.py                | 22 +++--------
 synapse/storage/databases/main/search.py   |  2 +-
 tests/storage/test_monthly_active_users.py |  7 +++-
 tests/utils.py                             |  4 +-
 11 files changed, 61 insertions(+), 68 deletions(-)
 create mode 100644 changelog.d/13925.misc
 create mode 100644 changelog.d/13993.misc

(limited to 'synapse')

diff --git a/changelog.d/13925.misc b/changelog.d/13925.misc
new file mode 100644
index 0000000000..f490ab122e
--- /dev/null
+++ b/changelog.d/13925.misc
@@ -0,0 +1 @@
+Update mypy (0.950 -> 0.981) and mypy-zope (0.3.7 -> 0.3.11).
diff --git a/changelog.d/13993.misc b/changelog.d/13993.misc
new file mode 100644
index 0000000000..f490ab122e
--- /dev/null
+++ b/changelog.d/13993.misc
@@ -0,0 +1 @@
+Update mypy (0.950 -> 0.981) and mypy-zope (0.3.7 -> 0.3.11).
diff --git a/poetry.lock b/poetry.lock
index 0f6d1cfa69..63ef8573a0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -573,11 +573,11 @@ python-versions = "*"
 
 [[package]]
 name = "mypy"
-version = "0.950"
+version = "0.981"
 description = "Optional static typing for Python"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 
 [package.dependencies]
 mypy-extensions = ">=0.4.3"
@@ -600,14 +600,14 @@ python-versions = "*"
 
 [[package]]
 name = "mypy-zope"
-version = "0.3.7"
+version = "0.3.11"
 description = "Plugin for mypy to support zope interfaces"
 category = "dev"
 optional = false
 python-versions = "*"
 
 [package.dependencies]
-mypy = "0.950"
+mypy = "0.981"
 "zope.interface" = "*"
 "zope.schema" = "*"
 
@@ -2162,37 +2162,38 @@ msgpack = [
     {file = "msgpack-1.0.3.tar.gz", hash = "sha256:51fdc7fb93615286428ee7758cecc2f374d5ff363bdd884c7ea622a7a327a81e"},
 ]
 mypy = [
-    {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"},
-    {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"},
-    {file = "mypy-0.950-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22"},
-    {file = "mypy-0.950-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb"},
-    {file = "mypy-0.950-cp310-cp310-win_amd64.whl", hash = "sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334"},
-    {file = "mypy-0.950-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f"},
-    {file = "mypy-0.950-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc"},
-    {file = "mypy-0.950-cp36-cp36m-win_amd64.whl", hash = "sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2"},
-    {file = "mypy-0.950-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed"},
-    {file = "mypy-0.950-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075"},
-    {file = "mypy-0.950-cp37-cp37m-win_amd64.whl", hash = "sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b"},
-    {file = "mypy-0.950-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d"},
-    {file = "mypy-0.950-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a"},
-    {file = "mypy-0.950-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605"},
-    {file = "mypy-0.950-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2"},
-    {file = "mypy-0.950-cp38-cp38-win_amd64.whl", hash = "sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff"},
-    {file = "mypy-0.950-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8"},
-    {file = "mypy-0.950-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038"},
-    {file = "mypy-0.950-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2"},
-    {file = "mypy-0.950-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519"},
-    {file = "mypy-0.950-cp39-cp39-win_amd64.whl", hash = "sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef"},
-    {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"},
-    {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"},
+    {file = "mypy-0.981-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4bc460e43b7785f78862dab78674e62ec3cd523485baecfdf81a555ed29ecfa0"},
+    {file = "mypy-0.981-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:756fad8b263b3ba39e4e204ee53042671b660c36c9017412b43af210ddee7b08"},
+    {file = "mypy-0.981-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16a0145d6d7d00fbede2da3a3096dcc9ecea091adfa8da48fa6a7b75d35562d"},
+    {file = "mypy-0.981-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce65f70b14a21fdac84c294cde75e6dbdabbcff22975335e20827b3b94bdbf49"},
+    {file = "mypy-0.981-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e35d764784b42c3e256848fb8ed1d4292c9fc0098413adb28d84974c095b279"},
+    {file = "mypy-0.981-cp310-cp310-win_amd64.whl", hash = "sha256:e53773073c864d5f5cec7f3fc72fbbcef65410cde8cc18d4f7242dea60dac52e"},
+    {file = "mypy-0.981-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6ee196b1d10b8b215e835f438e06965d7a480f6fe016eddbc285f13955cca659"},
+    {file = "mypy-0.981-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad21d4c9d3673726cf986ea1d0c9fb66905258709550ddf7944c8f885f208be"},
+    {file = "mypy-0.981-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d1debb09043e1f5ee845fa1e96d180e89115b30e47c5d3ce53bc967bab53f62d"},
+    {file = "mypy-0.981-cp37-cp37m-win_amd64.whl", hash = "sha256:9f362470a3480165c4c6151786b5379351b790d56952005be18bdbdd4c7ce0ae"},
+    {file = "mypy-0.981-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c9e0efb95ed6ca1654951bd5ec2f3fa91b295d78bf6527e026529d4aaa1e0c30"},
+    {file = "mypy-0.981-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e178eaffc3c5cd211a87965c8c0df6da91ed7d258b5fc72b8e047c3771317ddb"},
+    {file = "mypy-0.981-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06e1eac8d99bd404ed8dd34ca29673c4346e76dd8e612ea507763dccd7e13c7a"},
+    {file = "mypy-0.981-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa38f82f53e1e7beb45557ff167c177802ba7b387ad017eab1663d567017c8ee"},
+    {file = "mypy-0.981-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:64e1f6af81c003f85f0dfed52db632817dabb51b65c0318ffbf5ff51995bbb08"},
+    {file = "mypy-0.981-cp38-cp38-win_amd64.whl", hash = "sha256:e1acf62a8c4f7c092462c738aa2c2489e275ed386320c10b2e9bff31f6f7e8d6"},
+    {file = "mypy-0.981-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6ede64e52257931315826fdbfc6ea878d89a965580d1a65638ef77cb551f56d"},
+    {file = "mypy-0.981-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb3978b191b9fa0488524bb4ffedf2c573340e8c2b4206fc191d44c7093abfb7"},
+    {file = "mypy-0.981-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f8fcf7b4b3cc0c74fb33ae54a4cd00bb854d65645c48beccf65fa10b17882c"},
+    {file = "mypy-0.981-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64d2ce043a209a297df322eb4054dfbaa9de9e8738291706eaafda81ab2b362"},
+    {file = "mypy-0.981-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2ee3dbc53d4df7e6e3b1c68ac6a971d3a4fb2852bf10a05fda228721dd44fae1"},
+    {file = "mypy-0.981-cp39-cp39-win_amd64.whl", hash = "sha256:8e8e49aa9cc23aa4c926dc200ce32959d3501c4905147a66ce032f05cb5ecb92"},
+    {file = "mypy-0.981-py3-none-any.whl", hash = "sha256:794f385653e2b749387a42afb1e14c2135e18daeb027e0d97162e4b7031210f8"},
+    {file = "mypy-0.981.tar.gz", hash = "sha256:ad77c13037d3402fbeffda07d51e3f228ba078d1c7096a73759c9419ea031bf4"},
 ]
 mypy-extensions = [
     {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
     {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
 ]
 mypy-zope = [
-    {file = "mypy-zope-0.3.7.tar.gz", hash = "sha256:9da171e78e8ef7ac8922c86af1a62f1b7f3244f121020bd94a2246bc3f33c605"},
-    {file = "mypy_zope-0.3.7-py3-none-any.whl", hash = "sha256:9c7637d066e4d1bafa0651abc091c752009769098043b236446e6725be2bc9c2"},
+    {file = "mypy-zope-0.3.11.tar.gz", hash = "sha256:d4255f9f04d48c79083bbd4e2fea06513a6ac7b8de06f8c4ce563fd85142ca05"},
+    {file = "mypy_zope-0.3.11-py3-none-any.whl", hash = "sha256:ec080a6508d1f7805c8d2054f9fdd13c849742ce96803519e1fdfa3d3cab7140"},
 ]
 netaddr = [
     {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"},
diff --git a/scripts-dev/check_pydantic_models.py b/scripts-dev/check_pydantic_models.py
index d0fb811bdb..9f2b7ded5b 100755
--- a/scripts-dev/check_pydantic_models.py
+++ b/scripts-dev/check_pydantic_models.py
@@ -88,10 +88,9 @@ def make_wrapper(factory: Callable[P, R]) -> Callable[P, R]:
 
     @functools.wraps(factory)
     def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
-        # type-ignore: should be redundant once we can use https://github.com/python/mypy/pull/12668
-        if "strict" not in kwargs:  # type: ignore[attr-defined]
+        if "strict" not in kwargs:
             raise MissingStrictInConstrainedTypeException(factory.__name__)
-        if not kwargs["strict"]:  # type: ignore[index]
+        if not kwargs["strict"]:
             raise MissingStrictInConstrainedTypeException(factory.__name__)
         return factory(*args, **kwargs)
 
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 9a24bed0a0..000912e86e 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -98,9 +98,7 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs)
         func: Function to be called when sent a SIGHUP signal.
         *args, **kwargs: args and kwargs to be passed to the target function.
     """
-    # This type-ignore should be redundant once we use a mypy release with
-    # https://github.com/python/mypy/pull/12668.
-    _sighup_callbacks.append((func, args, kwargs))  # type: ignore[arg-type]
+    _sighup_callbacks.append((func, args, kwargs))
 
 
 def start_worker_reactor(
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index fd9cb97920..6a08ffed64 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -586,7 +586,7 @@ class LoggingContextFilter(logging.Filter):
             True to include the record in the log output.
         """
         context = current_context()
-        record.request = self._default_request  # type: ignore
+        record.request = self._default_request
 
         # context should never be None, but if it somehow ends up being, then
         # we end up in a death spiral of infinite loops, so let's check, for
@@ -594,21 +594,21 @@ class LoggingContextFilter(logging.Filter):
         if context is not None:
             # Logging is interested in the request ID. Note that for backwards
             # compatibility this is stored as the "request" on the record.
-            record.request = str(context)  # type: ignore
+            record.request = str(context)
 
             # Add some data from the HTTP request.
             request = context.request
             if request is None:
                 return True
 
-            record.ip_address = request.ip_address  # type: ignore
-            record.site_tag = request.site_tag  # type: ignore
-            record.requester = request.requester  # type: ignore
-            record.authenticated_entity = request.authenticated_entity  # type: ignore
-            record.method = request.method  # type: ignore
-            record.url = request.url  # type: ignore
-            record.protocol = request.protocol  # type: ignore
-            record.user_agent = request.user_agent  # type: ignore
+            record.ip_address = request.ip_address
+            record.site_tag = request.site_tag
+            record.requester = request.requester
+            record.authenticated_entity = request.authenticated_entity
+            record.method = request.method
+            record.url = request.url
+            record.protocol = request.protocol
+            record.user_agent = request.user_agent
 
         return True
 
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index ca2735dd6d..8ce5a2a338 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -992,9 +992,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]:
         # FIXME: We could update this to handle any type of function by ignoring the
         #   first argument only if it's named `self` or `cls`. This isn't fool-proof
         #   but handles the idiomatic cases.
-        for i, arg in enumerate(args[1:], start=1):  # type: ignore[index]
+        for i, arg in enumerate(args[1:], start=1):
             set_tag(SynapseTags.FUNC_ARG_PREFIX + argspec.args[i], str(arg))
-        set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :]))  # type: ignore[index]
+        set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :]))
         set_tag(SynapseTags.FUNC_KWARGS, str(kwargs))
         yield
 
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index bb28ded1b5..a252f8eaa0 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -290,8 +290,7 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.after_callbacks is not None
-        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
-        self.after_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
+        self.after_callbacks.append((callback, args, kwargs))
 
     def async_call_after(
         self, callback: Callable[P, Awaitable], *args: P.args, **kwargs: P.kwargs
@@ -312,8 +311,7 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.async_after_callbacks is not None
-        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
-        self.async_after_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
+        self.async_after_callbacks.append((callback, args, kwargs))
 
     def call_on_exception(
         self, callback: Callable[P, object], *args: P.args, **kwargs: P.kwargs
@@ -331,8 +329,7 @@ class LoggingTransaction:
         # LoggingTransaction isn't expecting there to be any callbacks; assert that
         # is not the case.
         assert self.exception_callbacks is not None
-        # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668
-        self.exception_callbacks.append((callback, args, kwargs))  # type: ignore[arg-type]
+        self.exception_callbacks.append((callback, args, kwargs))
 
     def fetchone(self) -> Optional[Tuple]:
         return self.txn.fetchone()
@@ -421,10 +418,7 @@ class LoggingTransaction:
         sql = self.database_engine.convert_param_style(sql)
         if args:
             try:
-                # The type-ignore should be redundant once mypy releases a version with
-                # https://github.com/python/mypy/pull/12668. (`args` might be empty,
-                # (but we'll catch the index error if so.)
-                sql_logger.debug("[SQL values] {%s} %r", self.name, args[0])  # type: ignore[index]
+                sql_logger.debug("[SQL values] {%s} %r", self.name, args[0])
             except Exception:
                 # Don't let logging failures stop SQL from working
                 pass
@@ -655,9 +649,7 @@ class DatabasePool:
         # For now, we just log an error, and hope that it works on the first attempt.
         # TODO: raise an exception.
 
-        # Type-ignore Mypy doesn't yet consider ParamSpec.args to be iterable; see
-        # https://github.com/python/mypy/pull/12668
-        for i, arg in enumerate(args):  # type: ignore[arg-type, var-annotated]
+        for i, arg in enumerate(args):
             if inspect.isgenerator(arg):
                 logger.error(
                     "Programming error: generator passed to new_transaction as "
@@ -665,9 +657,7 @@ class DatabasePool:
                     i,
                     func,
                 )
-        # Type-ignore Mypy doesn't yet consider ParamSpec.args to be a mapping; see
-        # https://github.com/python/mypy/pull/12668
-        for name, val in kwargs.items():  # type: ignore[attr-defined]
+        for name, val in kwargs.items():
             if inspect.isgenerator(val):
                 logger.error(
                     "Programming error: generator passed to new_transaction as "
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index f6e24b68d2..1b79acf955 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -641,7 +641,7 @@ class SearchStore(SearchBackgroundUpdateStore):
             raise Exception("Unrecognized database engine")
 
         # mypy expects to append only a `str`, not an `int`
-        args.append(limit)  # type: ignore[arg-type]
+        args.append(limit)
 
         results = await self.db_pool.execute(
             "search_rooms", self.db_pool.cursor_to_dict, sql, *args
diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py
index e8b4a5644b..c55c4db970 100644
--- a/tests/storage/test_monthly_active_users.py
+++ b/tests/storage/test_monthly_active_users.py
@@ -96,8 +96,12 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
 
         # Test each of the registered users is marked as active
         timestamp = self.get_success(self.store.user_last_seen_monthly_active(user1))
+        # Mypy notes that one shouldn't compare Optional[int] to 0 with assertGreater.
+        # Check that timestamp really is an int.
+        assert timestamp is not None
         self.assertGreater(timestamp, 0)
         timestamp = self.get_success(self.store.user_last_seen_monthly_active(user2))
+        assert timestamp is not None
         self.assertGreater(timestamp, 0)
 
         # Test that users with reserved 3pids are not removed from the MAU table
@@ -166,10 +170,11 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.get_success(self.store.upsert_monthly_active_user(user_id2))
 
         result = self.get_success(self.store.user_last_seen_monthly_active(user_id1))
+        assert result is not None
         self.assertGreater(result, 0)
 
         result = self.get_success(self.store.user_last_seen_monthly_active(user_id3))
-        self.assertNotEqual(result, 0)
+        self.assertIsNone(result)
 
     @override_config({"max_mau_value": 5})
     def test_reap_monthly_active_users(self):
diff --git a/tests/utils.py b/tests/utils.py
index 65db437697..045a8b5fa7 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -270,9 +270,7 @@ class MockClock:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> None:
-        # This type-ignore should be redundant once we use a mypy release with
-        # https://github.com/python/mypy/pull/12668.
-        self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))  # type: ignore[arg-type]
+        self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs))
 
     def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None:
         if timer.expired:
-- 
cgit 1.5.1


From 535f8c8f7d64d4058500a5988278fd3026645164 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 30 Sep 2022 17:40:33 +0100
Subject: Skip filtering during push if there are no push actions (#13992)

---
 changelog.d/13992.misc                   | 1 +
 synapse/push/bulk_push_rule_evaluator.py | 5 +++++
 synapse/visibility.py                    | 4 ++++
 tests/rest/client/test_rooms.py          | 4 ++--
 4 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13992.misc

(limited to 'synapse')

diff --git a/changelog.d/13992.misc b/changelog.d/13992.misc
new file mode 100644
index 0000000000..58150a2b35
--- /dev/null
+++ b/changelog.d/13992.misc
@@ -0,0 +1 @@
+Speed up calculating push actions in large rooms.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 7bfe380543..4270438918 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -332,6 +332,11 @@ class BulkPushRuleEvaluator:
                 # Push rules say we should notify the user of this event
                 actions_by_user[uid] = actions
 
+        # If there aren't any actions then we can skip the rest of the
+        # processing.
+        if not actions_by_user:
+            return
+
         # This is a check for the case where user joins a room without being
         # allowed to see history, and then the server receives a delayed event
         # from before the user joined, which they should not be pushed for
diff --git a/synapse/visibility.py b/synapse/visibility.py
index c810a05907..c4048d2477 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -162,6 +162,10 @@ async def filter_event_for_clients_with_state(
     if event.internal_metadata.is_soft_failed():
         return []
 
+    # Fast path if we don't have any user IDs to check.
+    if not user_ids:
+        return ()
+
     # Make a set for all user IDs that haven't been filtered out by a check.
     allowed_user_ids = set(user_ids)
 
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index e281aef779..7f8cf4fab0 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -710,7 +710,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(35, channel.resource_usage.db_txn_count)
+        self.assertEqual(34, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -723,7 +723,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(38, channel.resource_usage.db_txn_count)
+        self.assertEqual(37, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
-- 
cgit 1.5.1


From ad4c14e4b0c44d6a8ee42e760d7e1fe1755559a2 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 30 Sep 2022 14:40:18 -0500
Subject: Clarifications in user directory for users who share rooms tracking
 (#13966)

Spawned while working on [`get_users_in_room` mis-uses](https://github.com/matrix-org/synapse/pull/13958#discussion_r984074897) and thinking we could use `get_local_users_in_room` here but we can't.

From first glance, it seemed like this was only using local users from all of the `is_mine_id(user_id)` checks but I see that it does actually use remote users. Just making things a little more clear here what it does and mentions remote users so maybe that will be more obvious in the future.
---
 changelog.d/13966.misc             |  1 +
 synapse/handlers/user_directory.py | 36 ++++++++++++++++++++++++------------
 2 files changed, 25 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/13966.misc

(limited to 'synapse')

diff --git a/changelog.d/13966.misc b/changelog.d/13966.misc
new file mode 100644
index 0000000000..b54ad5c776
--- /dev/null
+++ b/changelog.d/13966.misc
@@ -0,0 +1 @@
+Refactor language in user directory `_track_user_joined_room` code to make it more clear that we use both local and remote users.
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 8c3c52e1ca..3610b6bf78 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
 
 import synapse.metrics
 from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, Membership
@@ -379,7 +379,7 @@ class UserDirectoryHandler(StateDeltasHandler):
             user_id, event.content.get("displayname"), event.content.get("avatar_url")
         )
 
-    async def _track_user_joined_room(self, room_id: str, user_id: str) -> None:
+    async def _track_user_joined_room(self, room_id: str, joining_user_id: str) -> None:
         """Someone's just joined a room. Update `users_in_public_rooms` or
         `users_who_share_private_rooms` as appropriate.
 
@@ -390,32 +390,44 @@ class UserDirectoryHandler(StateDeltasHandler):
             room_id
         )
         if is_public:
-            await self.store.add_users_in_public_rooms(room_id, (user_id,))
+            await self.store.add_users_in_public_rooms(room_id, (joining_user_id,))
         else:
             users_in_room = await self.store.get_users_in_room(room_id)
             other_users_in_room = [
                 other
                 for other in users_in_room
-                if other != user_id
+                if other != joining_user_id
                 and (
+                    # We can't apply any special rules to remote users so
+                    # they're always included
                     not self.is_mine_id(other)
+                    # Check the special rules whether the local user should be
+                    # included in the user directory
                     or await self.store.should_include_local_user_in_dir(other)
                 )
             ]
-            to_insert = set()
+            updates_to_users_who_share_rooms: Set[Tuple[str, str]] = set()
 
-            # First, if they're our user then we need to update for every user
-            if self.is_mine_id(user_id):
+            # First, if the joining user is our local user then we need an
+            # update for every other user in the room.
+            if self.is_mine_id(joining_user_id):
                 for other_user_id in other_users_in_room:
-                    to_insert.add((user_id, other_user_id))
+                    updates_to_users_who_share_rooms.add(
+                        (joining_user_id, other_user_id)
+                    )
 
-            # Next we need to update for every local user in the room
+            # Next, we need an update for every other local user in the room
+            # that they now share a room with the joining user.
             for other_user_id in other_users_in_room:
                 if self.is_mine_id(other_user_id):
-                    to_insert.add((other_user_id, user_id))
+                    updates_to_users_who_share_rooms.add(
+                        (other_user_id, joining_user_id)
+                    )
 
-            if to_insert:
-                await self.store.add_users_who_share_private_room(room_id, to_insert)
+            if updates_to_users_who_share_rooms:
+                await self.store.add_users_who_share_private_room(
+                    room_id, updates_to_users_who_share_rooms
+                )
 
     async def _handle_remove_user(self, room_id: str, user_id: str) -> None:
         """Called when when someone leaves a room. The user may be local or remote.
-- 
cgit 1.5.1


From a52c40e2a6d3a142c9cf768479ec963354c3e360 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 30 Sep 2022 20:10:50 -0500
Subject: Fix `get_users_in_room` mis-use in
 `transfer_room_state_on_room_upgrade` (#13960)

Spawning from looking into `get_users_in_room` while investigating https://github.com/matrix-org/synapse/issues/13942#issuecomment-1262787050.

See https://github.com/matrix-org/synapse/pull/13575#discussion_r953023755 for the original exploration around finding `get_users_in_room` mis-uses.

Related to the following PRs where we also cleaned up some `get_users_in_room` mis-uses:

 - https://github.com/matrix-org/synapse/pull/13605
 - https://github.com/matrix-org/synapse/pull/13608
 - https://github.com/matrix-org/synapse/pull/13606
 - https://github.com/matrix-org/synapse/pull/13958
---
 changelog.d/13960.misc          | 1 +
 synapse/handlers/room_member.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13960.misc

(limited to 'synapse')

diff --git a/changelog.d/13960.misc b/changelog.d/13960.misc
new file mode 100644
index 0000000000..a7ba532bcb
--- /dev/null
+++ b/changelog.d/13960.misc
@@ -0,0 +1 @@
+Use dedicated `get_local_users_in_room(room_id)` function to find local users when calculating users to copy over during a room upgrade.
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 88158822e0..ee669eb30f 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -1150,8 +1150,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         logger.info("Transferring room state from %s to %s", old_room_id, room_id)
 
         # Find all local users that were in the old room and copy over each user's state
-        users = await self.store.get_users_in_room(old_room_id)
-        await self.copy_user_state_on_room_upgrade(old_room_id, room_id, users)
+        local_users = await self.store.get_local_users_in_room(old_room_id)
+        await self.copy_user_state_on_room_upgrade(old_room_id, room_id, local_users)
 
         # Add new room to the room directory if the old room was there
         # Remove old room from the room directory
-- 
cgit 1.5.1


From 2769ef4df125f91b59693457052930379582d614 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 3 Oct 2022 04:14:45 -0500
Subject: Revert the general exception recording introduced in #13814 (#13969)

* Maybe not catch all errors to avoid things in the nature-of CancelledError

See https://github.com/matrix-org/synapse/pull/13815#discussion_r983384698

* Remove general exception tracking

* Add changelog
---
 changelog.d/13969.misc               |  1 +
 synapse/handlers/federation_event.py | 10 ----------
 2 files changed, 1 insertion(+), 10 deletions(-)
 create mode 100644 changelog.d/13969.misc

(limited to 'synapse')

diff --git a/changelog.d/13969.misc b/changelog.d/13969.misc
new file mode 100644
index 0000000000..5ede0069c8
--- /dev/null
+++ b/changelog.d/13969.misc
@@ -0,0 +1 @@
+Revert catch-all exceptions being recorded as event pull attempt failures (only handle what we know about).
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 3fac256881..778d8869b3 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -866,11 +866,6 @@ class FederationEventHandler:
                 event.room_id, event_id, str(err)
             )
             return
-        except Exception as exc:
-            await self._store.record_event_failed_pull_attempt(
-                event.room_id, event_id, str(exc)
-            )
-            raise exc
 
         try:
             try:
@@ -913,11 +908,6 @@ class FederationEventHandler:
                 logger.warning("Pulled event %s failed history check.", event_id)
             else:
                 raise
-        except Exception as exc:
-            await self._store.record_event_failed_pull_attempt(
-                event.room_id, event_id, str(exc)
-            )
-            raise exc
 
     @trace
     async def _compute_event_context_with_maybe_missing_prevs(
-- 
cgit 1.5.1


From d65862c41f2992a253778753d7f378d3ef1fb996 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 3 Oct 2022 13:46:36 +0100
Subject: Refactor `_get_e2e_device_keys_txn` to split large queries (#13956)

Instead of running a single large query, run a single query for
user-only lookups and additional queries for batches of user device
lookups.

Resolves #13580.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/13956.bugfix                          |  1 +
 synapse/storage/database.py                       | 60 ++++++++++++++++
 synapse/storage/databases/main/end_to_end_keys.py | 83 +++++++++++++++--------
 3 files changed, 115 insertions(+), 29 deletions(-)
 create mode 100644 changelog.d/13956.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13956.bugfix b/changelog.d/13956.bugfix
new file mode 100644
index 0000000000..5682c3e002
--- /dev/null
+++ b/changelog.d/13956.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where `POST /_matrix/client/v3/keys/query` requests could result in excessively large SQL queries.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index a252f8eaa0..b4469eb964 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -2461,6 +2461,66 @@ def make_in_list_sql_clause(
         return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), list(iterable)
 
 
+# These overloads ensure that `columns` and `iterable` values have the same length.
+# Suppress "Single overload definition, multiple required" complaint.
+@overload  # type: ignore[misc]
+def make_tuple_in_list_sql_clause(
+    database_engine: BaseDatabaseEngine,
+    columns: Tuple[str, str],
+    iterable: Collection[Tuple[Any, Any]],
+) -> Tuple[str, list]:
+    ...
+
+
+def make_tuple_in_list_sql_clause(
+    database_engine: BaseDatabaseEngine,
+    columns: Tuple[str, ...],
+    iterable: Collection[Tuple[Any, ...]],
+) -> Tuple[str, list]:
+    """Returns an SQL clause that checks the given tuple of columns is in the iterable.
+
+    Args:
+        database_engine
+        columns: Names of the columns in the tuple.
+        iterable: The tuples to check the columns against.
+
+    Returns:
+        A tuple of SQL query and the args
+    """
+    if len(columns) == 0:
+        # Should be unreachable due to mypy, as long as the overloads are set up right.
+        if () in iterable:
+            return "TRUE", []
+        else:
+            return "FALSE", []
+
+    if len(columns) == 1:
+        # Use `= ANY(?)` on postgres.
+        return make_in_list_sql_clause(
+            database_engine, next(iter(columns)), [values[0] for values in iterable]
+        )
+
+    # There are multiple columns. Avoid using an `= ANY(?)` clause on postgres, as
+    # indices are not used when there are multiple columns. Instead, use an `IN`
+    # expression.
+    #
+    # `IN ((?, ...), ...)` with tuples is supported by postgres only, whereas
+    # `IN (VALUES (?, ...), ...)` is supported by both sqlite and postgres.
+    # Thus, the latter is chosen.
+
+    if len(iterable) == 0:
+        # A 0-length `VALUES` list is not allowed in sqlite or postgres.
+        # Also note that a 0-length `IN (...)` clause (not using `VALUES`) is not
+        # allowed in postgres.
+        return "FALSE", []
+
+    tuple_sql = "(%s)" % (",".join("?" for _ in columns),)
+    return "(%s) IN (VALUES %s)" % (
+        ",".join(column for column in columns),
+        ",".join(tuple_sql for _ in iterable),
+    ), [value for values in iterable for value in values]
+
+
 KV = TypeVar("KV")
 
 
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 8e9e1b0b4b..8a10ae800c 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -43,6 +43,7 @@ from synapse.storage.database import (
     LoggingDatabaseConnection,
     LoggingTransaction,
     make_in_list_sql_clause,
+    make_tuple_in_list_sql_clause,
 )
 from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
 from synapse.storage.engines import PostgresEngine
@@ -278,7 +279,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     def _get_e2e_device_keys_txn(
         self,
         txn: LoggingTransaction,
-        query_list: Collection[Tuple[str, str]],
+        query_list: Collection[Tuple[str, Optional[str]]],
         include_all_devices: bool = False,
         include_deleted_devices: bool = False,
     ) -> Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]]:
@@ -288,8 +289,8 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         cross-signing signatures which have been added subsequently (for which, see
         get_e2e_device_keys_and_signatures)
         """
-        query_clauses = []
-        query_params = []
+        query_clauses: List[str] = []
+        query_params_list: List[List[object]] = []
 
         if include_all_devices is False:
             include_deleted_devices = False
@@ -297,40 +298,64 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         if include_deleted_devices:
             deleted_devices = set(query_list)
 
+        # Split the query list into queries for users and queries for particular
+        # devices.
+        user_list = []
+        user_device_list = []
         for (user_id, device_id) in query_list:
-            query_clause = "user_id = ?"
-            query_params.append(user_id)
-
-            if device_id is not None:
-                query_clause += " AND device_id = ?"
-                query_params.append(device_id)
-
-            query_clauses.append(query_clause)
-
-        sql = (
-            "SELECT user_id, device_id, "
-            "    d.display_name, "
-            "    k.key_json"
-            " FROM devices d"
-            "    %s JOIN e2e_device_keys_json k USING (user_id, device_id)"
-            " WHERE %s AND NOT d.hidden"
-        ) % (
-            "LEFT" if include_all_devices else "INNER",
-            " OR ".join("(" + q + ")" for q in query_clauses),
-        )
+            if device_id is None:
+                user_list.append(user_id)
+            else:
+                user_device_list.append((user_id, device_id))
 
-        txn.execute(sql, query_params)
+        if user_list:
+            user_id_in_list_clause, user_args = make_in_list_sql_clause(
+                txn.database_engine, "user_id", user_list
+            )
+            query_clauses.append(user_id_in_list_clause)
+            query_params_list.append(user_args)
+
+        if user_device_list:
+            # Divide the device queries into batches, to avoid excessively large
+            # queries.
+            for user_device_batch in batch_iter(user_device_list, 1024):
+                (
+                    user_device_id_in_list_clause,
+                    user_device_args,
+                ) = make_tuple_in_list_sql_clause(
+                    txn.database_engine, ("user_id", "device_id"), user_device_batch
+                )
+                query_clauses.append(user_device_id_in_list_clause)
+                query_params_list.append(user_device_args)
 
         result: Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]] = {}
-        for (user_id, device_id, display_name, key_json) in txn:
-            if include_deleted_devices:
-                deleted_devices.remove((user_id, device_id))
-            result.setdefault(user_id, {})[device_id] = DeviceKeyLookupResult(
-                display_name, db_to_json(key_json) if key_json else None
+        for query_clause, query_params in zip(query_clauses, query_params_list):
+            sql = (
+                "SELECT user_id, device_id, "
+                "    d.display_name, "
+                "    k.key_json"
+                " FROM devices d"
+                "    %s JOIN e2e_device_keys_json k USING (user_id, device_id)"
+                " WHERE %s AND NOT d.hidden"
+            ) % (
+                "LEFT" if include_all_devices else "INNER",
+                query_clause,
             )
 
+            txn.execute(sql, query_params)
+
+            for (user_id, device_id, display_name, key_json) in txn:
+                assert device_id is not None
+                if include_deleted_devices:
+                    deleted_devices.remove((user_id, device_id))
+                result.setdefault(user_id, {})[device_id] = DeviceKeyLookupResult(
+                    display_name, db_to_json(key_json) if key_json else None
+                )
+
         if include_deleted_devices:
             for user_id, device_id in deleted_devices:
+                if device_id is None:
+                    continue
                 result.setdefault(user_id, {})[device_id] = None
 
         return result
-- 
cgit 1.5.1


From 606b2d9009f0a3e70056dec7e9cdccd0c0d7afed Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 3 Oct 2022 14:13:11 +0100
Subject: Add cache to `get_partial_state_servers_at_join` (#14013)

---
 changelog.d/14013.misc                 | 1 +
 synapse/storage/databases/main/room.py | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 changelog.d/14013.misc

(limited to 'synapse')

diff --git a/changelog.d/14013.misc b/changelog.d/14013.misc
new file mode 100644
index 0000000000..499e488c35
--- /dev/null
+++ b/changelog.d/14013.misc
@@ -0,0 +1 @@
+Faster room joins: Send device list updates to most servers in rooms with partial state.
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 059eef5c22..7412bce255 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1134,6 +1134,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             get_rooms_for_retention_period_in_range_txn,
         )
 
+    @cached(iterable=True)
     async def get_partial_state_servers_at_join(self, room_id: str) -> Sequence[str]:
         """Gets the list of servers in a partial state room at the time we joined it.
 
@@ -1216,6 +1217,9 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             keyvalues={"room_id": room_id},
         )
         self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
+        self._invalidate_cache_and_stream(
+            txn, self.get_partial_state_servers_at_join, (room_id,)
+        )
 
         # We now delete anything from `device_lists_remote_pending` with a
         # stream ID less than the minimum
@@ -1862,6 +1866,9 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             values=((room_id, s) for s in servers),
         )
         self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
+        self._invalidate_cache_and_stream(
+            txn, self.get_partial_state_servers_at_join, (room_id,)
+        )
 
     async def write_partial_state_rooms_join_event_id(
         self,
-- 
cgit 1.5.1


From a423f452942c5b1597c29be50b235c8df4d6c93d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 3 Oct 2022 14:26:49 +0100
Subject: Fix twisted trunk mypy errors (#14012)

---
 changelog.d/14012.misc               | 1 +
 synapse/handlers/cas.py              | 3 +++
 synapse/handlers/ui_auth/checkers.py | 3 +++
 3 files changed, 7 insertions(+)
 create mode 100644 changelog.d/14012.misc

(limited to 'synapse')

diff --git a/changelog.d/14012.misc b/changelog.d/14012.misc
new file mode 100644
index 0000000000..9888dc6cc1
--- /dev/null
+++ b/changelog.d/14012.misc
@@ -0,0 +1 @@
+Fix type annotations to be compatible with new annotations in development versions of twisted.
diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py
index 7163af8004..fc467bc7c1 100644
--- a/synapse/handlers/cas.py
+++ b/synapse/handlers/cas.py
@@ -130,6 +130,9 @@ class CasHandler:
         except PartialDownloadError as pde:
             # Twisted raises this error if the connection is closed,
             # even if that's being used old-http style to signal end-of-data
+            # Assertion is for mypy's benefit. Error.response is Optional[bytes],
+            # but a PartialDownloadError should always have a non-None response.
+            assert pde.response is not None
             body = pde.response
         except HttpResponseException as e:
             description = (
diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py
index a744d68c64..332edcca24 100644
--- a/synapse/handlers/ui_auth/checkers.py
+++ b/synapse/handlers/ui_auth/checkers.py
@@ -119,6 +119,9 @@ class RecaptchaAuthChecker(UserInteractiveAuthChecker):
         except PartialDownloadError as pde:
             # Twisted is silly
             data = pde.response
+            # For mypy's benefit. A general Error.response is Optional[bytes], but
+            # a PartialDownloadError.response should be bytes AFAICS.
+            assert data is not None
             resp_body = json_decoder.decode(data.decode("utf-8"))
 
         if "success" in resp_body:
-- 
cgit 1.5.1


From 719488dda87b04e4650a32f0c2b0b71782e0d48b Mon Sep 17 00:00:00 2001
From: lukasdenk <63459921+lukasdenk@users.noreply.github.com>
Date: Mon, 3 Oct 2022 14:30:45 +0100
Subject: Add query parameter `ts` to allow appservices set the
 `origin_server_ts` for state events. (#11866)

MSC3316 declares that both /rooms/{roomId}/send and /rooms/{roomId}/state
should accept a ts parameter for appservices. This change expands support
to /state and adds tests.
---
 changelog.d/11866.feature       |   1 +
 synapse/handlers/room_member.py |  13 +++++
 synapse/rest/client/room.py     |  34 +++++++-----
 tests/rest/client/test_rooms.py | 119 +++++++++++++++++++++++++++++++++++++++-
 4 files changed, 152 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/11866.feature

(limited to 'synapse')

diff --git a/changelog.d/11866.feature b/changelog.d/11866.feature
new file mode 100644
index 0000000000..0b52caf805
--- /dev/null
+++ b/changelog.d/11866.feature
@@ -0,0 +1 @@
+Allow application services to set the `origin_server_ts` of a state event by providing the query parameter `ts` in `PUT /_matrix/client/r0/rooms/{roomId}/state/{eventType}/{stateKey}`, per [MSC3316](https://github.com/matrix-org/matrix-doc/pull/3316). Contributed by @lukasdenk.
\ No newline at end of file
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index ee669eb30f..6ad2b38b8f 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -322,6 +322,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         require_consent: bool = True,
         outlier: bool = False,
         historical: bool = False,
+        origin_server_ts: Optional[int] = None,
     ) -> Tuple[str, int]:
         """
         Internal membership update function to get an existing event or create
@@ -361,6 +362,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             historical: Indicates whether the message is being inserted
                 back in time around some existing events. This is used to skip
                 a few checks and mark the event as backfilled.
+            origin_server_ts: The origin_server_ts to use if a new event is created. Uses
+                the current timestamp if set to None.
 
         Returns:
             Tuple of event ID and stream ordering position
@@ -399,6 +402,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 "state_key": user_id,
                 # For backwards compatibility:
                 "membership": membership,
+                "origin_server_ts": origin_server_ts,
             },
             txn_id=txn_id,
             allow_no_prev_events=allow_no_prev_events,
@@ -504,6 +508,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         prev_event_ids: Optional[List[str]] = None,
         state_event_ids: Optional[List[str]] = None,
         depth: Optional[int] = None,
+        origin_server_ts: Optional[int] = None,
     ) -> Tuple[str, int]:
         """Update a user's membership in a room.
 
@@ -542,6 +547,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
+            origin_server_ts: The origin_server_ts to use if a new event is created. Uses
+                the current timestamp if set to None.
 
         Returns:
             A tuple of the new event ID and stream ID.
@@ -583,6 +590,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                         prev_event_ids=prev_event_ids,
                         state_event_ids=state_event_ids,
                         depth=depth,
+                        origin_server_ts=origin_server_ts,
                     )
 
         return result
@@ -606,6 +614,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         prev_event_ids: Optional[List[str]] = None,
         state_event_ids: Optional[List[str]] = None,
         depth: Optional[int] = None,
+        origin_server_ts: Optional[int] = None,
     ) -> Tuple[str, int]:
         """Helper for update_membership.
 
@@ -646,6 +655,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
+            origin_server_ts: The origin_server_ts to use if a new event is created. Uses
+                the current timestamp if set to None.
 
         Returns:
             A tuple of the new event ID and stream ID.
@@ -785,6 +796,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 require_consent=require_consent,
                 outlier=outlier,
                 historical=historical,
+                origin_server_ts=origin_server_ts,
             )
 
         latest_event_ids = await self.store.get_prev_events_for_room(room_id)
@@ -1030,6 +1042,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             content=content,
             require_consent=require_consent,
             outlier=outlier,
+            origin_server_ts=origin_server_ts,
         )
 
     async def _should_perform_remote_join(
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 0bca012535..b6dedbed04 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -268,15 +268,9 @@ class RoomStateEventRestServlet(TransactionRestServlet):
 
         content = parse_json_object_from_request(request)
 
-        event_dict = {
-            "type": event_type,
-            "content": content,
-            "room_id": room_id,
-            "sender": requester.user.to_string(),
-        }
-
-        if state_key is not None:
-            event_dict["state_key"] = state_key
+        origin_server_ts = None
+        if requester.app_service:
+            origin_server_ts = parse_integer(request, "ts")
 
         try:
             if event_type == EventTypes.Member:
@@ -287,8 +281,22 @@ class RoomStateEventRestServlet(TransactionRestServlet):
                     room_id=room_id,
                     action=membership,
                     content=content,
+                    origin_server_ts=origin_server_ts,
                 )
             else:
+                event_dict: JsonDict = {
+                    "type": event_type,
+                    "content": content,
+                    "room_id": room_id,
+                    "sender": requester.user.to_string(),
+                }
+
+                if state_key is not None:
+                    event_dict["state_key"] = state_key
+
+                if origin_server_ts is not None:
+                    event_dict["origin_server_ts"] = origin_server_ts
+
                 (
                     event,
                     _,
@@ -333,10 +341,10 @@ class RoomSendEventRestServlet(TransactionRestServlet):
             "sender": requester.user.to_string(),
         }
 
-        # Twisted will have processed the args by now.
-        assert request.args is not None
-        if b"ts" in request.args and requester.app_service:
-            event_dict["origin_server_ts"] = parse_integer(request, "ts", 0)
+        if requester.app_service:
+            origin_server_ts = parse_integer(request, "ts")
+            if origin_server_ts is not None:
+                event_dict["origin_server_ts"] = origin_server_ts
 
         try:
             (
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 7f8cf4fab0..5e66b5b26c 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -20,7 +20,7 @@
 import json
 from http import HTTPStatus
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
-from unittest.mock import Mock, call
+from unittest.mock import Mock, call, patch
 from urllib import parse as urlparse
 
 from parameterized import param, parameterized
@@ -39,9 +39,10 @@ from synapse.api.constants import (
     RoomTypes,
 )
 from synapse.api.errors import Codes, HttpResponseException
+from synapse.appservice import ApplicationService
 from synapse.handlers.pagination import PurgeStatus
 from synapse.rest import admin
-from synapse.rest.client import account, directory, login, profile, room, sync
+from synapse.rest.client import account, directory, login, profile, register, room, sync
 from synapse.server import HomeServer
 from synapse.types import JsonDict, RoomAlias, UserID, create_requester
 from synapse.util import Clock
@@ -1252,6 +1253,120 @@ class RoomJoinTestCase(RoomBase):
         )
 
 
+class RoomAppserviceTsParamTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        room.register_servlets,
+        synapse.rest.admin.register_servlets,
+        register.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.appservice_user, _ = self.register_appservice_user(
+            "as_user_potato", self.appservice.token
+        )
+
+        # Create a room as the appservice user.
+        args = {
+            "access_token": self.appservice.token,
+            "user_id": self.appservice_user,
+        }
+        channel = self.make_request(
+            "POST",
+            f"/_matrix/client/r0/createRoom?{urlparse.urlencode(args)}",
+            content={"visibility": "public"},
+        )
+
+        assert channel.code == 200
+        self.room = channel.json_body["room_id"]
+
+        self.main_store = self.hs.get_datastores().main
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        config = self.default_config()
+
+        self.appservice = ApplicationService(
+            token="i_am_an_app_service",
+            id="1234",
+            namespaces={"users": [{"regex": r"@as_user.*", "exclusive": True}]},
+            # Note: this user does not have to match the regex above
+            sender="@as_main:test",
+        )
+
+        mock_load_appservices = Mock(return_value=[self.appservice])
+        with patch(
+            "synapse.storage.databases.main.appservice.load_appservices",
+            mock_load_appservices,
+        ):
+            hs = self.setup_test_homeserver(config=config)
+        return hs
+
+    def test_send_event_ts(self) -> None:
+        """Test sending a non-state event with a custom timestamp."""
+        ts = 1
+
+        url_params = {
+            "user_id": self.appservice_user,
+            "ts": ts,
+        }
+        channel = self.make_request(
+            "PUT",
+            path=f"/_matrix/client/r0/rooms/{self.room}/send/m.room.message/1234?"
+            + urlparse.urlencode(url_params),
+            content={"body": "test", "msgtype": "m.text"},
+            access_token=self.appservice.token,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+        event_id = channel.json_body["event_id"]
+
+        # Ensure the event was persisted with the correct timestamp.
+        res = self.get_success(self.main_store.get_event(event_id))
+        self.assertEquals(ts, res.origin_server_ts)
+
+    def test_send_state_event_ts(self) -> None:
+        """Test sending a state event with a custom timestamp."""
+        ts = 1
+
+        url_params = {
+            "user_id": self.appservice_user,
+            "ts": ts,
+        }
+        channel = self.make_request(
+            "PUT",
+            path=f"/_matrix/client/r0/rooms/{self.room}/state/m.room.name?"
+            + urlparse.urlencode(url_params),
+            content={"name": "test"},
+            access_token=self.appservice.token,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+        event_id = channel.json_body["event_id"]
+
+        # Ensure the event was persisted with the correct timestamp.
+        res = self.get_success(self.main_store.get_event(event_id))
+        self.assertEquals(ts, res.origin_server_ts)
+
+    def test_send_membership_event_ts(self) -> None:
+        """Test sending a membership event with a custom timestamp."""
+        ts = 1
+
+        url_params = {
+            "user_id": self.appservice_user,
+            "ts": ts,
+        }
+        channel = self.make_request(
+            "PUT",
+            path=f"/_matrix/client/r0/rooms/{self.room}/state/m.room.member/{self.appservice_user}?"
+            + urlparse.urlencode(url_params),
+            content={"membership": "join", "display_name": "test"},
+            access_token=self.appservice.token,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+        event_id = channel.json_body["event_id"]
+
+        # Ensure the event was persisted with the correct timestamp.
+        res = self.get_success(self.main_store.get_event(event_id))
+        self.assertEquals(ts, res.origin_server_ts)
+
+
 class RoomJoinRatelimitTestCase(RoomBase):
     user_id = "@sid1:red"
 
-- 
cgit 1.5.1


From 2c237debd3476bcc45a76e360b0cb33032b23045 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 3 Oct 2022 14:45:19 +0100
Subject: Fix bug where we didn't delete staging push actions (#14014)

Introduced in #13719
---
 changelog.d/14014.bugfix                 | 1 +
 synapse/storage/databases/main/events.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14014.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14014.bugfix b/changelog.d/14014.bugfix
new file mode 100644
index 0000000000..4318f4daff
--- /dev/null
+++ b/changelog.d/14014.bugfix
@@ -0,0 +1 @@
+Send invite push notifications for invite over federation.
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index bb489b8189..3e15827986 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2174,7 +2174,7 @@ class PersistEventsStore:
             (
                 (event.event_id,)
                 for event, _ in all_events_and_contexts
-                if not event.internal_metadata.is_outlier()
+                if event.internal_metadata.is_notifiable()
             ),
         )
 
-- 
cgit 1.5.1


From b706111b7805dceb268e114b6c291c4318288cf0 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 3 Oct 2022 12:47:15 -0400
Subject: Do not return unspecced original_event field when using the stable
 /relations endpoint. (#14025)

Keep the old behavior (of including the original_event field) for any
requests to the /unstable version of the endpoint, but do not include
the field when the /v1 version is used.

This should avoid new clients from depending on this field, but will
not help with current dependencies.
---
 changelog.d/14025.bugfix            |  1 +
 synapse/handlers/relations.py       | 25 +++++++++++++------------
 synapse/rest/client/relations.py    |  6 ++++++
 tests/rest/client/test_relations.py | 13 ++++++++-----
 4 files changed, 28 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/14025.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14025.bugfix b/changelog.d/14025.bugfix
new file mode 100644
index 0000000000..391364f44d
--- /dev/null
+++ b/changelog.d/14025.bugfix
@@ -0,0 +1 @@
+Do not return an unspecified `original_event` field when using the stable `/relations` endpoint. Introduced in Synapse v1.57.0.
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 28d7093f08..63bc6a7aa5 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -78,6 +78,7 @@ class RelationsHandler:
         direction: str = "b",
         from_token: Optional[StreamToken] = None,
         to_token: Optional[StreamToken] = None,
+        include_original_event: bool = False,
     ) -> JsonDict:
         """Get related events of a event, ordered by topological ordering.
 
@@ -94,6 +95,7 @@ class RelationsHandler:
                 oldest first (`"f"`).
             from_token: Fetch rows from the given token, or from the start if None.
             to_token: Fetch rows up to the given token, or up to the end if None.
+            include_original_event: Whether to include the parent event.
 
         Returns:
             The pagination chunk.
@@ -138,25 +140,24 @@ class RelationsHandler:
             is_peeking=(member_event_id is None),
         )
 
-        now = self._clock.time_msec()
-        # Do not bundle aggregations when retrieving the original event because
-        # we want the content before relations are applied to it.
-        original_event = self._event_serializer.serialize_event(
-            event, now, bundle_aggregations=None
-        )
         # The relations returned for the requested event do include their
         # bundled aggregations.
         aggregations = await self.get_bundled_aggregations(
             events, requester.user.to_string()
         )
-        serialized_events = self._event_serializer.serialize_events(
-            events, now, bundle_aggregations=aggregations
-        )
 
-        return_value = {
-            "chunk": serialized_events,
-            "original_event": original_event,
+        now = self._clock.time_msec()
+        return_value: JsonDict = {
+            "chunk": self._event_serializer.serialize_events(
+                events, now, bundle_aggregations=aggregations
+            ),
         }
+        if include_original_event:
+            # Do not bundle aggregations when retrieving the original event because
+            # we want the content before relations are applied to it.
+            return_value["original_event"] = self._event_serializer.serialize_event(
+                event, now, bundle_aggregations=None
+            )
 
         if next_token:
             return_value["next_batch"] = await next_token.to_string(self._main_store)
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index 205c556f64..7a25de5c85 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -82,6 +82,11 @@ class RelationPaginationServlet(RestServlet):
         if to_token_str:
             to_token = await StreamToken.from_string(self.store, to_token_str)
 
+        # The unstable version of this API returns an extra field for client
+        # compatibility, see https://github.com/matrix-org/synapse/issues/12930.
+        assert request.path is not None
+        include_original_event = request.path.startswith(b"/_matrix/client/unstable/")
+
         result = await self._relations_handler.get_relations(
             requester=requester,
             event_id=parent_id,
@@ -92,6 +97,7 @@ class RelationPaginationServlet(RestServlet):
             direction=direction,
             from_token=from_token,
             to_token=to_token,
+            include_original_event=include_original_event,
         )
 
         return 200, result
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index fef3b72d76..988cdb746d 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -654,6 +654,14 @@ class RelationsTestCase(BaseRelationsTestCase):
         )
 
         # We also expect to get the original event (the id of which is self.parent_id)
+        # when requesting the unstable endpoint.
+        self.assertNotIn("original_event", channel.json_body)
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/rooms/{self.room}/relations/{self.parent_id}?limit=1",
+            access_token=self.user_token,
+        )
+        self.assertEqual(200, channel.code, channel.json_body)
         self.assertEqual(
             channel.json_body["original_event"]["event_id"], self.parent_id
         )
@@ -755,11 +763,6 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
             channel.json_body["chunk"][0],
         )
 
-        # We also expect to get the original event (the id of which is self.parent_id)
-        self.assertEqual(
-            channel.json_body["original_event"]["event_id"], self.parent_id
-        )
-
         # Make sure next_batch has something in it that looks like it could be a
         # valid token.
         self.assertIsInstance(
-- 
cgit 1.5.1


From b381701f8c07444fb86d80a79f561c8468a6c0b7 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 3 Oct 2022 17:16:15 +0000
Subject: Announce that legacy metric names are deprecated, will be turned off
 by default in Synapse v1.71.0 and removed altogether in Synapse v1.73.0.
 (#14024)

---
 changelog.d/14024.removal                        |  1 +
 docs/metrics-howto.md                            | 11 +++++++++-
 docs/upgrade.md                                  | 28 ++++++++++++++++++++++++
 docs/usage/configuration/config_documentation.md | 25 +++++++++++++++++++++
 synapse/config/metrics.py                        | 26 ----------------------
 5 files changed, 64 insertions(+), 27 deletions(-)
 create mode 100644 changelog.d/14024.removal

(limited to 'synapse')

diff --git a/changelog.d/14024.removal b/changelog.d/14024.removal
new file mode 100644
index 0000000000..9b83cb3927
--- /dev/null
+++ b/changelog.d/14024.removal
@@ -0,0 +1 @@
+Announce that legacy metric names are deprecated, will be turned off by default in Synapse v1.71.0 and removed altogether in Synapse v1.73.0. See the upgrade notes for more information.
\ No newline at end of file
diff --git a/docs/metrics-howto.md b/docs/metrics-howto.md
index 279303a798..d8416b5a5f 100644
--- a/docs/metrics-howto.md
+++ b/docs/metrics-howto.md
@@ -135,6 +135,8 @@ Synapse 1.2 updates the Prometheus metrics to match the naming
 convention of the upstream `prometheus_client`. The old names are
 considered deprecated and will be removed in a future version of
 Synapse.
+**The old names will be disabled by default in Synapse v1.71.0 and removed
+altogether in Synapse v1.73.0.**
 
 | New Name                                                                     | Old Name                                                               |
 | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
@@ -146,6 +148,13 @@ Synapse.
 | synapse_federation_client_events_processed_total                             | synapse_federation_client_events_processed                             |
 | synapse_event_processing_loop_count_total                                    | synapse_event_processing_loop_count                                    |
 | synapse_event_processing_loop_room_count_total                               | synapse_event_processing_loop_room_count                               |
+| synapse_util_caches_cache_hits                                               | synapse_util_caches_cache:hits                                         |
+| synapse_util_caches_cache_size                                               | synapse_util_caches_cache:size                                         |
+| synapse_util_caches_cache_evicted_size                                       | synapse_util_caches_cache:evicted_size                                 |
+| synapse_util_caches_cache                                                    | synapse_util_caches_cache:total                                        |
+| synapse_util_caches_response_cache_size                                      | synapse_util_caches_response_cache:size                                |
+| synapse_util_caches_response_cache_hits                                      | synapse_util_caches_response_cache:hits                                |
+| synapse_util_caches_response_cache_evicted_size                              | synapse_util_caches_response_cache:evicted_size                        |
 | synapse_util_metrics_block_count_total                                       | synapse_util_metrics_block_count                                       |
 | synapse_util_metrics_block_time_seconds_total                                | synapse_util_metrics_block_time_seconds                                |
 | synapse_util_metrics_block_ru_utime_seconds_total                            | synapse_util_metrics_block_ru_utime_seconds                            |
@@ -261,7 +270,7 @@ Standard Metric Names
 
 As of synapse version 0.18.2, the format of the process-wide metrics has
 been changed to fit prometheus standard naming conventions. Additionally
-the units have been changed to seconds, from miliseconds.
+the units have been changed to seconds, from milliseconds.
 
 | New name                                 | Old name                          |
 | ---------------------------------------- | --------------------------------- |
diff --git a/docs/upgrade.md b/docs/upgrade.md
index c4db19e23d..002ef70059 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -100,6 +100,34 @@ vice versa.
 Once all workers are upgraded to v1.69 (or downgraded to v1.68), receipts
 replication will resume as normal.
 
+
+## Deprecation of legacy Prometheus metric names
+
+In current versions of Synapse, some Prometheus metrics are emitted under two different names,
+with one of the names being older but non-compliant with OpenMetrics and Prometheus conventions
+and one of the names being newer but compliant.
+
+Synapse v1.71.0 will turn the old metric names off *by default*.
+For administrators that still rely on them and have not had chance to update their
+uses of the metrics, it's possible to specify `enable_legacy_metrics: true` in
+the configuration to re-enable them temporarily.
+
+Synapse v1.73.0 will **remove legacy metric names altogether** and it will no longer
+be possible to re-enable them.
+
+The Grafana dashboard, Prometheus recording rules and Prometheus Consoles included
+in the `contrib` directory in the Synapse repository have been updated to no longer
+rely on the legacy names. These can be used on a current version of Synapse
+because current versions of Synapse emit both old and new names.
+
+You may need to update your alerting rules or any other rules that depend on
+the names of Prometheus metrics.
+If you want to test your changes before legacy names are disabled by default,
+you may specify `enable_legacy_metrics: false` in your homeserver configuration.
+
+A list of affected metrics is available on the [Metrics How-to page](https://matrix-org.github.io/synapse/v1.69/metrics-howto.html?highlight=metrics%20deprecated#renaming-of-metrics--deprecation-of-old-names-in-12).
+
+
 # Upgrading to v1.68.0
 
 Two changes announced in the upgrade notes for v1.67.0 have now landed in v1.68.0.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index f46b4932fd..5e40166ff5 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2436,6 +2436,31 @@ Example configuration:
 enable_metrics: true
 ```
 ---
+### `enable_legacy_metrics`
+
+Set to `true` to publish both legacy and non-legacy Prometheus metric names,
+or to `false` to only publish non-legacy Prometheus metric names.
+Defaults to `true`. Has no effect if `enable_metrics` is `false`.
+**In Synapse v1.71.0, this will default to `false` before being removed in Synapse v1.73.0.**
+
+Legacy metric names include:
+- metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules;
+- counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard.
+
+These legacy metric names are unconventional and not compliant with OpenMetrics standards.
+They are included for backwards compatibility.
+
+Example configuration:
+```yaml
+enable_legacy_metrics: false
+```
+
+See https://github.com/matrix-org/synapse/issues/11106 for context.
+
+*Since v1.67.0.*
+
+**Will be removed in v1.73.0.**
+---
 ### `sentry`
 
 Use this option to enable sentry integration. Provide the DSN assigned to you by sentry
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index f3134834e5..bb065f9f2f 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -43,32 +43,6 @@ class MetricsConfig(Config):
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         self.enable_metrics = config.get("enable_metrics", False)
 
-        """
-        ### `enable_legacy_metrics` (experimental)
-
-        **Experimental: this option may be removed or have its behaviour
-        changed at any time, with no notice.**
-
-        Set to `true` to publish both legacy and non-legacy Prometheus metric names,
-        or to `false` to only publish non-legacy Prometheus metric names.
-        Defaults to `true`. Has no effect if `enable_metrics` is `false`.
-
-        Legacy metric names include:
-        - metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules;
-        - counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard.
-
-        These legacy metric names are unconventional and not compliant with OpenMetrics standards.
-        They are included for backwards compatibility.
-
-        Example configuration:
-        ```yaml
-        enable_legacy_metrics: false
-        ```
-
-        See https://github.com/matrix-org/synapse/issues/11106 for context.
-
-        *Since v1.67.0.*
-        """
         self.enable_legacy_metrics = config.get("enable_legacy_metrics", True)
 
         self.report_stats = config.get("report_stats", None)
-- 
cgit 1.5.1


From 5a6d02524685187b8ed212b8e8027e4d15575fd0 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 3 Oct 2022 18:44:44 +0100
Subject: Clear out old rows from `event_push_actions_staging` (#14020)

On matrix.org we have ~5 million stale rows in `event_push_actions_staging`, let's add a background job to make sure we clear them out.
---
 changelog.d/14020.misc                             |  1 +
 .../storage/databases/main/event_push_actions.py   | 58 +++++++++++++++++++++-
 synapse/storage/schema/__init__.py                 |  1 +
 .../main/delta/73/05old_push_actions.sql.postgres  | 22 ++++++++
 .../main/delta/73/05old_push_actions.sql.sqlite    | 24 +++++++++
 5 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14020.misc
 create mode 100644 synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/14020.misc b/changelog.d/14020.misc
new file mode 100644
index 0000000000..85550b307d
--- /dev/null
+++ b/changelog.d/14020.misc
@@ -0,0 +1 @@
+Clear out stale entries in `event_push_actions_staging` table.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 3fdf128d9e..cdc9ee5a37 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -205,6 +205,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
     ):
         super().__init__(database, db_conn, hs)
 
+        # Track when the process started.
+        self._started_ts = self._clock.time_msec()
+
         # These get correctly set by _find_stream_orderings_for_times_txn
         self.stream_ordering_month_ago: Optional[int] = None
         self.stream_ordering_day_ago: Optional[int] = None
@@ -224,6 +227,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 self._rotate_notifs, 30 * 1000
             )
 
+            self._clear_old_staging_loop = self._clock.looping_call(
+                self._clear_old_push_actions_staging, 30 * 60 * 1000
+            )
+
         self.db_pool.updates.register_background_index_update(
             "event_push_summary_unique_index",
             index_name="event_push_summary_unique_index",
@@ -791,7 +798,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         # can be used to insert into the `event_push_actions_staging` table.
         def _gen_entry(
             user_id: str, actions: Collection[Union[Mapping, str]]
-        ) -> Tuple[str, str, str, int, int, int, str]:
+        ) -> Tuple[str, str, str, int, int, int, str, int]:
             is_highlight = 1 if _action_has_highlight(actions) else 0
             notif = 1 if "notify" in actions else 0
             return (
@@ -802,6 +809,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 is_highlight,  # highlight column
                 int(count_as_unread),  # unread column
                 thread_id,  # thread_id column
+                self._clock.time_msec(),  # inserted_ts column
             )
 
         await self.db_pool.simple_insert_many(
@@ -814,6 +822,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 "highlight",
                 "unread",
                 "thread_id",
+                "inserted_ts",
             ),
             values=[
                 _gen_entry(user_id, actions)
@@ -1340,6 +1349,53 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             if done:
                 break
 
+    @wrap_as_background_process("_clear_old_push_actions_staging")
+    async def _clear_old_push_actions_staging(self) -> None:
+        """Clear out any old event push actions from the staging table for
+        events that we failed to persist.
+        """
+
+        # We delete anything more than an hour old, on the assumption that we'll
+        # never take more than an hour to persist an event.
+        delete_before_ts = self._clock.time_msec() - 60 * 60 * 1000
+
+        if self._started_ts > delete_before_ts:
+            # We need to wait for at least an hour before we started deleting,
+            # so that we know it's safe to delete rows with NULL `inserted_ts`.
+            return
+
+        # We don't have an index on `inserted_ts`, instead we assume that the
+        # number of "live" rows in `event_push_actions_staging` is small enough
+        # that an infrequent periodic scan won't cause a problem.
+        #
+        # Note: we also delete any columns with NULL `inserted_ts`, this is safe
+        # as we added a default value to new rows and so they must be at least
+        # an hour old.
+        limit = 1000
+        sql = """
+            DELETE FROM event_push_actions_staging WHERE event_id IN (
+                SELECT event_id FROM event_push_actions_staging WHERE
+                inserted_ts < ? OR inserted_ts IS NULL
+                LIMIT ?
+            )
+        """
+
+        def _clear_old_push_actions_staging_txn(txn: LoggingTransaction) -> bool:
+            txn.execute(sql, (delete_before_ts, limit))
+            return txn.rowcount >= limit
+
+        while True:
+            # Returns true if we have more stuff to delete from the table.
+            deleted = await self.db_pool.runInteraction(
+                "_clear_old_push_actions_staging", _clear_old_push_actions_staging_txn
+            )
+
+            if not deleted:
+                return
+
+            # We sleep to ensure that we don't overwhelm the DB.
+            await self._clock.sleep(1.0)
+
 
 class EventPushActionsStore(EventPushActionsWorkerStore):
     EPA_HIGHLIGHT_INDEX = "epa_highlight_index"
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index f29424d17a..4a5c947699 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -85,6 +85,7 @@ Changes in SCHEMA_VERSION = 73;
       events over federation.
     - Add indexes to various tables (`event_failed_pull_attempts`, `insertion_events`,
       `batch_events`) to make it easy to delete all associated rows when purging a room.
+    - `inserted_ts` column is added to `event_push_actions_staging` table.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres
new file mode 100644
index 0000000000..4af1a8470b
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres
@@ -0,0 +1,22 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add a column so that we know when a push action was inserted, to make it
+-- easier to clear out old ones.
+ALTER TABLE event_push_actions_staging ADD COLUMN inserted_ts BIGINT;
+
+-- We now add a default for *new* rows. We don't do this above as we don't want
+-- to have to update every remove with the new default.
+ALTER TABLE event_push_actions_staging ALTER COLUMN inserted_ts SET DEFAULT extract(epoch from now()) * 1000;
diff --git a/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite
new file mode 100644
index 0000000000..7482dabba2
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite
@@ -0,0 +1,24 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- On SQLite we must be in monolith mode and updating the database from Synapse,
+-- so its safe to assume that `event_push_actions_staging` should be empty (as
+-- over restart an event must either have been fully persisted or we'll
+-- recalculate the push actions)
+DELETE FROM event_push_actions_staging;
+
+-- Add a column so that we know when a push action was inserted, to make it
+-- easier to clear out old ones.
+ALTER TABLE event_push_actions_staging ADD COLUMN inserted_ts BIGINT;
-- 
cgit 1.5.1


From 70a4317692adcf7f1dacb201cda2188c8495bfa9 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 3 Oct 2022 14:53:29 -0500
Subject: Track when the pulled event signature fails (#13815)

Because we're doing the recording in `_check_sigs_and_hash_for_pulled_events_and_fetch` (previously named `_check_sigs_and_hash_and_fetch`), this means we will track signature failures for `backfill`, `get_room_state`, `get_event_auth`, and `get_missing_events` (all pulled event scenarios). And we also record signature failures from `get_pdu`.

Part of https://github.com/matrix-org/synapse/issues/13700

Part of https://github.com/matrix-org/synapse/issues/13676 and https://github.com/matrix-org/synapse/issues/13356

This PR will be especially important for https://github.com/matrix-org/synapse/pull/13816 so we can avoid the costly `_get_state_ids_after_missing_prev_event` down the line when `/messages` calls backfill.
---
 changelog.d/13815.feature                  |  1 +
 synapse/federation/federation_base.py      | 25 ++++++++--
 synapse/federation/federation_client.py    | 50 ++++++++++++++++----
 tests/federation/test_federation_client.py | 75 ++++++++++++++++++++++++++++++
 tests/test_federation.py                   |  4 +-
 5 files changed, 140 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/13815.feature

(limited to 'synapse')

diff --git a/changelog.d/13815.feature b/changelog.d/13815.feature
new file mode 100644
index 0000000000..ba411f5067
--- /dev/null
+++ b/changelog.d/13815.feature
@@ -0,0 +1 @@
+Keep track when an event pulled over federation fails its signature check so we can intelligently back-off in the future.
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index abe2c1971a..6bd4742140 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Awaitable, Callable, Optional
 
 from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
 from synapse.api.errors import Codes, SynapseError
@@ -58,7 +58,12 @@ class FederationBase:
 
     @trace
     async def _check_sigs_and_hash(
-        self, room_version: RoomVersion, pdu: EventBase
+        self,
+        room_version: RoomVersion,
+        pdu: EventBase,
+        record_failure_callback: Optional[
+            Callable[[EventBase, str], Awaitable[None]]
+        ] = None,
     ) -> EventBase:
         """Checks that event is correctly signed by the sending server.
 
@@ -70,6 +75,11 @@ class FederationBase:
         Args:
             room_version: The room version of the PDU
             pdu: the event to be checked
+            record_failure_callback: A callback to run whenever the given event
+                fails signature or hash checks. This includes exceptions
+                that would be normally be thrown/raised but also things like
+                checking for event tampering where we just return the redacted
+                event.
 
         Returns:
               * the original event if the checks pass
@@ -80,7 +90,12 @@ class FederationBase:
           InvalidEventSignatureError if the signature check failed. Nothing
              will be logged in this case.
         """
-        await _check_sigs_on_pdu(self.keyring, room_version, pdu)
+        try:
+            await _check_sigs_on_pdu(self.keyring, room_version, pdu)
+        except InvalidEventSignatureError as exc:
+            if record_failure_callback:
+                await record_failure_callback(pdu, str(exc))
+            raise exc
 
         if not check_event_content_hash(pdu):
             # let's try to distinguish between failures because the event was
@@ -116,6 +131,10 @@ class FederationBase:
                         "event_id": pdu.event_id,
                     }
                 )
+                if record_failure_callback:
+                    await record_failure_callback(
+                        pdu, "Event content has been tampered with"
+                    )
             return redacted_event
 
         spam_check = await self.spam_checker.check_event_for_spam(pdu)
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 464672a3da..4dca711cd2 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -278,7 +278,7 @@ class FederationClient(FederationBase):
         pdus = [event_from_pdu_json(p, room_version) for p in transaction_data_pdus]
 
         # Check signatures and hash of pdus, removing any from the list that fail checks
-        pdus[:] = await self._check_sigs_and_hash_and_fetch(
+        pdus[:] = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
             dest, pdus, room_version=room_version
         )
 
@@ -328,7 +328,17 @@ class FederationClient(FederationBase):
 
             # Check signatures are correct.
             try:
-                signed_pdu = await self._check_sigs_and_hash(room_version, pdu)
+
+                async def _record_failure_callback(
+                    event: EventBase, cause: str
+                ) -> None:
+                    await self.store.record_event_failed_pull_attempt(
+                        event.room_id, event.event_id, cause
+                    )
+
+                signed_pdu = await self._check_sigs_and_hash(
+                    room_version, pdu, _record_failure_callback
+                )
             except InvalidEventSignatureError as e:
                 errmsg = f"event id {pdu.event_id}: {e}"
                 logger.warning("%s", errmsg)
@@ -547,24 +557,28 @@ class FederationClient(FederationBase):
             len(auth_event_map),
         )
 
-        valid_auth_events = await self._check_sigs_and_hash_and_fetch(
+        valid_auth_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
             destination, auth_event_map.values(), room_version
         )
 
-        valid_state_events = await self._check_sigs_and_hash_and_fetch(
-            destination, state_event_map.values(), room_version
+        valid_state_events = (
+            await self._check_sigs_and_hash_for_pulled_events_and_fetch(
+                destination, state_event_map.values(), room_version
+            )
         )
 
         return valid_state_events, valid_auth_events
 
     @trace
-    async def _check_sigs_and_hash_and_fetch(
+    async def _check_sigs_and_hash_for_pulled_events_and_fetch(
         self,
         origin: str,
         pdus: Collection[EventBase],
         room_version: RoomVersion,
     ) -> List[EventBase]:
-        """Checks the signatures and hashes of a list of events.
+        """
+        Checks the signatures and hashes of a list of pulled events we got from
+        federation and records any signature failures as failed pull attempts.
 
         If a PDU fails its signature check then we check if we have it in
         the database, and if not then request it from the sender's server (if that
@@ -597,11 +611,17 @@ class FederationClient(FederationBase):
 
         valid_pdus: List[EventBase] = []
 
+        async def _record_failure_callback(event: EventBase, cause: str) -> None:
+            await self.store.record_event_failed_pull_attempt(
+                event.room_id, event.event_id, cause
+            )
+
         async def _execute(pdu: EventBase) -> None:
             valid_pdu = await self._check_sigs_and_hash_and_fetch_one(
                 pdu=pdu,
                 origin=origin,
                 room_version=room_version,
+                record_failure_callback=_record_failure_callback,
             )
 
             if valid_pdu:
@@ -618,6 +638,9 @@ class FederationClient(FederationBase):
         pdu: EventBase,
         origin: str,
         room_version: RoomVersion,
+        record_failure_callback: Optional[
+            Callable[[EventBase, str], Awaitable[None]]
+        ] = None,
     ) -> Optional[EventBase]:
         """Takes a PDU and checks its signatures and hashes.
 
@@ -634,6 +657,11 @@ class FederationClient(FederationBase):
             origin
             pdu
             room_version
+            record_failure_callback: A callback to run whenever the given event
+                fails signature or hash checks. This includes exceptions
+                that would be normally be thrown/raised but also things like
+                checking for event tampering where we just return the redacted
+                event.
 
         Returns:
             The PDU (possibly redacted) if it has valid signatures and hashes.
@@ -641,7 +669,9 @@ class FederationClient(FederationBase):
         """
 
         try:
-            return await self._check_sigs_and_hash(room_version, pdu)
+            return await self._check_sigs_and_hash(
+                room_version, pdu, record_failure_callback
+            )
         except InvalidEventSignatureError as e:
             logger.warning(
                 "Signature on retrieved event %s was invalid (%s). "
@@ -694,7 +724,7 @@ class FederationClient(FederationBase):
 
         auth_chain = [event_from_pdu_json(p, room_version) for p in res["auth_chain"]]
 
-        signed_auth = await self._check_sigs_and_hash_and_fetch(
+        signed_auth = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
             destination, auth_chain, room_version=room_version
         )
 
@@ -1401,7 +1431,7 @@ class FederationClient(FederationBase):
                 event_from_pdu_json(e, room_version) for e in content.get("events", [])
             ]
 
-            signed_events = await self._check_sigs_and_hash_and_fetch(
+            signed_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
                 destination, events, room_version=room_version
             )
         except HttpResponseException as e:
diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py
index 50e376f695..a538215931 100644
--- a/tests/federation/test_federation_client.py
+++ b/tests/federation/test_federation_client.py
@@ -23,14 +23,23 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase
+from synapse.rest import admin
+from synapse.rest.client import login, room
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
 
+from tests.test_utils import event_injection
 from tests.unittest import FederatingHomeserverTestCase
 
 
 class FederationClientTest(FederatingHomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
     def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer):
         super().prepare(reactor, clock, homeserver)
 
@@ -231,6 +240,72 @@ class FederationClientTest(FederatingHomeserverTestCase):
 
         return remote_pdu
 
+    def test_backfill_invalid_signature_records_failed_pull_attempts(
+        self,
+    ) -> None:
+        """
+        Test to make sure that events from /backfill with invalid signatures get
+        recorded as failed pull attempts.
+        """
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+
+        # We purposely don't run `add_hashes_and_signatures_from_other_server`
+        # over this because we want the signature check to fail.
+        pulled_event, _ = self.get_success(
+            event_injection.create_event(
+                self.hs,
+                room_id=room_id,
+                sender=OTHER_USER,
+                type="test_event_type",
+                content={"body": "garply"},
+            )
+        )
+
+        # We expect an outbound request to /backfill, so stub that out
+        self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed(
+            _mock_response(
+                {
+                    "origin": "yet.another.server",
+                    "origin_server_ts": 900,
+                    # Mimic the other server returning our new `pulled_event`
+                    "pdus": [pulled_event.get_pdu_json()],
+                }
+            )
+        )
+
+        self.get_success(
+            self.hs.get_federation_client().backfill(
+                # We use "yet.another.server" instead of
+                # `self.OTHER_SERVER_NAME` because we want to see the behavior
+                # from `_check_sigs_and_hash_and_fetch_one` where it tries to
+                # fetch the PDU again from the origin server if the signature
+                # fails. Just want to make sure that the failure is counted from
+                # both code paths.
+                dest="yet.another.server",
+                room_id=room_id,
+                limit=1,
+                extremities=[pulled_event.event_id],
+            ),
+        )
+
+        # Make sure our failed pull attempt was recorded
+        backfill_num_attempts = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+            )
+        )
+        # This is 2 because it failed once from `self.OTHER_SERVER_NAME` and the
+        # other from "yet.another.server"
+        self.assertEqual(backfill_num_attempts, 2)
+
 
 def _mock_response(resp: JsonDict):
     body = json.dumps(resp).encode("utf-8")
diff --git a/tests/test_federation.py b/tests/test_federation.py
index 779fad1f63..80e5c590d8 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -86,8 +86,8 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
 
         federation_event_handler._check_event_auth = _check_event_auth
         self.client = self.homeserver.get_federation_client()
-        self.client._check_sigs_and_hash_and_fetch = lambda dest, pdus, **k: succeed(
-            pdus
+        self.client._check_sigs_and_hash_for_pulled_events_and_fetch = (
+            lambda dest, pdus, **k: succeed(pdus)
         )
 
         # Send the join, it should return None (which is not an error)
-- 
cgit 1.5.1


From 27fa0fa6987c691bf6a8528bb870503d2869a740 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 4 Oct 2022 07:06:41 -0400
Subject: Send the appservice access token as a header. (#13996)

Implements MSC2832 by sending application service access
tokens in the Authorization header.

The access token is also still sent as a query parameter until
the application service ecosystem has fully migrated to using
headers. In the future this could be made opt-in, or removed
completely.
---
 changelog.d/13996.feature    |  1 +
 synapse/appservice/api.py    | 23 +++++++++++++++++++----
 tests/appservice/test_api.py |  8 ++++++--
 3 files changed, 26 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13996.feature

(limited to 'synapse')

diff --git a/changelog.d/13996.feature b/changelog.d/13996.feature
new file mode 100644
index 0000000000..771f1c97a3
--- /dev/null
+++ b/changelog.d/13996.feature
@@ -0,0 +1 @@
+Send application service access tokens as a header (and query parameter). Implement [MSC2832](https://github.com/matrix-org/matrix-spec-proposals/pull/2832).
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 0963fb3bb4..fbac4375b0 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -120,7 +120,11 @@ class ApplicationServiceApi(SimpleHttpClient):
 
         uri = service.url + ("/users/%s" % urllib.parse.quote(user_id))
         try:
-            response = await self.get_json(uri, {"access_token": service.hs_token})
+            response = await self.get_json(
+                uri,
+                {"access_token": service.hs_token},
+                headers={"Authorization": f"Bearer {service.hs_token}"},
+            )
             if response is not None:  # just an empty json object
                 return True
         except CodeMessageException as e:
@@ -140,7 +144,11 @@ class ApplicationServiceApi(SimpleHttpClient):
 
         uri = service.url + ("/rooms/%s" % urllib.parse.quote(alias))
         try:
-            response = await self.get_json(uri, {"access_token": service.hs_token})
+            response = await self.get_json(
+                uri,
+                {"access_token": service.hs_token},
+                headers={"Authorization": f"Bearer {service.hs_token}"},
+            )
             if response is not None:  # just an empty json object
                 return True
         except CodeMessageException as e:
@@ -181,7 +189,9 @@ class ApplicationServiceApi(SimpleHttpClient):
                 **fields,
                 b"access_token": service.hs_token,
             }
-            response = await self.get_json(uri, args=args)
+            response = await self.get_json(
+                uri, args=args, headers={"Authorization": f"Bearer {service.hs_token}"}
+            )
             if not isinstance(response, list):
                 logger.warning(
                     "query_3pe to %s returned an invalid response %r", uri, response
@@ -217,7 +227,11 @@ class ApplicationServiceApi(SimpleHttpClient):
                 urllib.parse.quote(protocol),
             )
             try:
-                info = await self.get_json(uri, {"access_token": service.hs_token})
+                info = await self.get_json(
+                    uri,
+                    {"access_token": service.hs_token},
+                    headers={"Authorization": f"Bearer {service.hs_token}"},
+                )
 
                 if not _is_valid_3pe_metadata(info):
                     logger.warning(
@@ -313,6 +327,7 @@ class ApplicationServiceApi(SimpleHttpClient):
                 uri=uri,
                 json_body=body,
                 args={"access_token": service.hs_token},
+                headers={"Authorization": f"Bearer {service.hs_token}"},
             )
             if logger.isEnabledFor(logging.DEBUG):
                 logger.debug(
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 532b676365..11008ac1fb 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -69,10 +69,14 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
 
         self.request_url = None
 
-        async def get_json(url: str, args: Mapping[Any, Any]) -> List[JsonDict]:
-            if not args.get(b"access_token"):
+        async def get_json(
+            url: str, args: Mapping[Any, Any], headers: Mapping[Any, Any]
+        ) -> List[JsonDict]:
+            # Ensure the access token is passed as both a header and query arg.
+            if not headers.get("Authorization") or not args.get(b"access_token"):
                 raise RuntimeError("Access token not provided")
 
+            self.assertEqual(headers.get("Authorization"), f"Bearer {TOKEN}")
             self.assertEqual(args.get(b"access_token"), TOKEN)
             self.request_url = url
             if url == URL_USER:
-- 
cgit 1.5.1


From e70c6b720ed537c0b7fc0cd4aa20eac195941d73 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 4 Oct 2022 07:08:27 -0400
Subject: Disable pushing for server ACL events (MSC3786). (#13997)

Switches to the stable identifier for MSC3786 and enables it
by default.

This disables pushes of m.room.server_acl events.
---
 changelog.d/13997.feature                   | 1 +
 rust/src/push/base_rules.rs                 | 2 +-
 rust/src/push/mod.rs                        | 9 ---------
 stubs/synapse/synapse_rust/push.pyi         | 6 +-----
 synapse/config/experimental.py              | 3 ---
 synapse/storage/databases/main/push_rule.py | 9 ++-------
 6 files changed, 5 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/13997.feature

(limited to 'synapse')

diff --git a/changelog.d/13997.feature b/changelog.d/13997.feature
new file mode 100644
index 0000000000..23f7ed106f
--- /dev/null
+++ b/changelog.d/13997.feature
@@ -0,0 +1 @@
+Ignore server ACL changes when generating pushes. Implement [MSC3786](https://github.com/matrix-org/matrix-spec-proposals/pull/3786).
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index bb59676bde..2a09cf99ae 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -173,7 +173,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default_enabled: true,
     },
     PushRule {
-        rule_id: Cow::Borrowed("global/override/.org.matrix.msc3786.rule.room.server_acl"),
+        rule_id: Cow::Borrowed("global/override/.m.rule.room.server_acl"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[
             Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 30fffc31ad..208b9c0d73 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -401,7 +401,6 @@ impl PushRules {
 pub struct FilteredPushRules {
     push_rules: PushRules,
     enabled_map: BTreeMap<String, bool>,
-    msc3786_enabled: bool,
     msc3772_enabled: bool,
 }
 
@@ -411,13 +410,11 @@ impl FilteredPushRules {
     pub fn py_new(
         push_rules: PushRules,
         enabled_map: BTreeMap<String, bool>,
-        msc3786_enabled: bool,
         msc3772_enabled: bool,
     ) -> Self {
         Self {
             push_rules,
             enabled_map,
-            msc3786_enabled,
             msc3772_enabled,
         }
     }
@@ -437,12 +434,6 @@ impl FilteredPushRules {
             .iter()
             .filter(|rule| {
                 // Ignore disabled experimental push rules
-                if !self.msc3786_enabled
-                    && rule.rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl"
-                {
-                    return false;
-                }
-
                 if !self.msc3772_enabled
                     && rule.rule_id == "global/underride/.org.matrix.msc3772.thread_reply"
                 {
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index fffb8419c6..5900e61450 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -26,11 +26,7 @@ class PushRules:
 
 class FilteredPushRules:
     def __init__(
-        self,
-        push_rules: PushRules,
-        enabled_map: Dict[str, bool],
-        msc3786_enabled: bool,
-        msc3772_enabled: bool,
+        self, push_rules: PushRules, enabled_map: Dict[str, bool], msc3772_enabled: bool
     ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 31834fb27d..83695f24d9 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -95,9 +95,6 @@ class ExperimentalConfig(Config):
         # MSC2815 (allow room moderators to view redacted event content)
         self.msc2815_enabled: bool = experimental.get("msc2815_enabled", False)
 
-        # MSC3786 (Add a default push rule to ignore m.room.server_acl events)
-        self.msc3786_enabled: bool = experimental.get("msc3786_enabled", False)
-
         # MSC3771: Thread read receipts
         self.msc3771_enabled: bool = experimental.get("msc3771_enabled", False)
         # MSC3772: A push rule for mutual relations.
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index ed17b2e70c..8295322b0e 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -81,15 +81,10 @@ def _load_rules(
         for rawrule in rawrules
     ]
 
-    push_rules = PushRules(
-        ruleslist,
-    )
+    push_rules = PushRules(ruleslist)
 
     filtered_rules = FilteredPushRules(
-        push_rules,
-        enabled_map,
-        msc3786_enabled=experimental_config.msc3786_enabled,
-        msc3772_enabled=experimental_config.msc3772_enabled,
+        push_rules, enabled_map, msc3772_enabled=experimental_config.msc3772_enabled
     )
 
     return filtered_rules
-- 
cgit 1.5.1


From b4ec4f5e71a87d5bdc840a4220dfd9a34c54c847 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 4 Oct 2022 09:47:04 -0400
Subject: Track notification counts per thread (implement MSC3773). (#13776)

When retrieving counts of notifications segment the results based on the
thread ID, but choose whether to return them as individual threads or as
a single summed field by letting the client opt-in via a sync flag.

The summarization code is also updated to be per thread, instead of per
room.
---
 changelog.d/13776.feature                          |   1 +
 synapse/api/constants.py                           |   3 +
 synapse/api/filtering.py                           |  10 ++
 synapse/config/experimental.py                     |   2 +
 synapse/handlers/sync.py                           |  40 ++++-
 synapse/push/bulk_push_rule_evaluator.py           |   4 +-
 synapse/push/push_tools.py                         |   9 +-
 synapse/rest/client/sync.py                        |   4 +
 synapse/rest/client/versions.py                    |   3 +-
 synapse/storage/database.py                        |   2 +-
 .../storage/databases/main/event_push_actions.py   | 188 +++++++++++++--------
 synapse/storage/schema/__init__.py                 |   6 +-
 .../delta/73/06thread_notifications_backfill.sql   |  29 ++++
 .../07thread_notifications_not_null.sql.postgres   |  19 +++
 .../73/07thread_notifications_not_null.sql.sqlite  | 101 +++++++++++
 tests/replication/slave/storage/test_events.py     |  17 +-
 tests/storage/test_event_push_actions.py           | 169 +++++++++++++++++-
 17 files changed, 514 insertions(+), 93 deletions(-)
 create mode 100644 changelog.d/13776.feature
 create mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql
 create mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/13776.feature b/changelog.d/13776.feature
new file mode 100644
index 0000000000..22bce125ce
--- /dev/null
+++ b/changelog.d/13776.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific notifications ([MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index c031903b1a..44c5ffc6a5 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -31,6 +31,9 @@ MAX_ALIAS_LENGTH = 255
 # the maximum length for a user id is 255 characters
 MAX_USERID_LENGTH = 255
 
+# Constant value used for the pseudo-thread which is the main timeline.
+MAIN_TIMELINE: Final = "main"
+
 
 class Membership:
 
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index f7f46f8d80..c6e44dcf82 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -84,6 +84,7 @@ ROOM_EVENT_FILTER_SCHEMA = {
         "contains_url": {"type": "boolean"},
         "lazy_load_members": {"type": "boolean"},
         "include_redundant_members": {"type": "boolean"},
+        "org.matrix.msc3773.unread_thread_notifications": {"type": "boolean"},
         # Include or exclude events with the provided labels.
         # cf https://github.com/matrix-org/matrix-doc/pull/2326
         "org.matrix.labels": {"type": "array", "items": {"type": "string"}},
@@ -240,6 +241,9 @@ class FilterCollection:
     def include_redundant_members(self) -> bool:
         return self._room_state_filter.include_redundant_members
 
+    def unread_thread_notifications(self) -> bool:
+        return self._room_timeline_filter.unread_thread_notifications
+
     async def filter_presence(
         self, events: Iterable[UserPresenceState]
     ) -> List[UserPresenceState]:
@@ -304,6 +308,12 @@ class Filter:
         self.include_redundant_members = filter_json.get(
             "include_redundant_members", False
         )
+        if hs.config.experimental.msc3773_enabled:
+            self.unread_thread_notifications: bool = filter_json.get(
+                "org.matrix.msc3773.unread_thread_notifications", False
+            )
+        else:
+            self.unread_thread_notifications = False
 
         self.types = filter_json.get("types", None)
         self.not_types = filter_json.get("not_types", [])
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 83695f24d9..6503ce6e34 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -99,6 +99,8 @@ class ExperimentalConfig(Config):
         self.msc3771_enabled: bool = experimental.get("msc3771_enabled", False)
         # MSC3772: A push rule for mutual relations.
         self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False)
+        # MSC3773: Thread notifications
+        self.msc3773_enabled: bool = experimental.get("msc3773_enabled", False)
 
         # MSC3715: dir param on /relations.
         self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False)
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4abb9b6127..329e89c604 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -40,7 +40,7 @@ from synapse.handlers.relations import BundledAggregations
 from synapse.logging.context import current_context
 from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span
 from synapse.push.clientformat import format_push_rules_for_user
-from synapse.storage.databases.main.event_push_actions import NotifCounts
+from synapse.storage.databases.main.event_push_actions import RoomNotifCounts
 from synapse.storage.roommember import MemberSummary
 from synapse.storage.state import StateFilter
 from synapse.types import (
@@ -128,6 +128,7 @@ class JoinedSyncResult:
     ephemeral: List[JsonDict]
     account_data: List[JsonDict]
     unread_notifications: JsonDict
+    unread_thread_notifications: JsonDict
     summary: Optional[JsonDict]
     unread_count: int
 
@@ -278,6 +279,8 @@ class SyncHandler:
 
         self.rooms_to_exclude = hs.config.server.rooms_to_exclude_from_sync
 
+        self._msc3773_enabled = hs.config.experimental.msc3773_enabled
+
     async def wait_for_sync_for_user(
         self,
         requester: Requester,
@@ -1288,7 +1291,7 @@ class SyncHandler:
 
     async def unread_notifs_for_room_id(
         self, room_id: str, sync_config: SyncConfig
-    ) -> NotifCounts:
+    ) -> RoomNotifCounts:
         with Measure(self.clock, "unread_notifs_for_room_id"):
 
             return await self.store.get_unread_event_push_actions_by_room_for_user(
@@ -2353,6 +2356,7 @@ class SyncHandler:
                     ephemeral=ephemeral,
                     account_data=account_data_events,
                     unread_notifications=unread_notifications,
+                    unread_thread_notifications={},
                     summary=summary,
                     unread_count=0,
                 )
@@ -2360,10 +2364,36 @@ class SyncHandler:
                 if room_sync or always_include:
                     notifs = await self.unread_notifs_for_room_id(room_id, sync_config)
 
-                    unread_notifications["notification_count"] = notifs.notify_count
-                    unread_notifications["highlight_count"] = notifs.highlight_count
+                    # Notifications for the main timeline.
+                    notify_count = notifs.main_timeline.notify_count
+                    highlight_count = notifs.main_timeline.highlight_count
+                    unread_count = notifs.main_timeline.unread_count
 
-                    room_sync.unread_count = notifs.unread_count
+                    # Check the sync configuration.
+                    if (
+                        self._msc3773_enabled
+                        and sync_config.filter_collection.unread_thread_notifications()
+                    ):
+                        # And add info for each thread.
+                        room_sync.unread_thread_notifications = {
+                            thread_id: {
+                                "notification_count": thread_notifs.notify_count,
+                                "highlight_count": thread_notifs.highlight_count,
+                            }
+                            for thread_id, thread_notifs in notifs.threads.items()
+                            if thread_id is not None
+                        }
+
+                    else:
+                        # Combine the unread counts for all threads and main timeline.
+                        for thread_notifs in notifs.threads.values():
+                            notify_count += thread_notifs.notify_count
+                            highlight_count += thread_notifs.highlight_count
+                            unread_count += thread_notifs.unread_count
+
+                    unread_notifications["notification_count"] = notify_count
+                    unread_notifications["highlight_count"] = highlight_count
+                    room_sync.unread_count = unread_count
 
                     sync_result_builder.joined.append(room_sync)
 
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 4270438918..61d952742d 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -31,7 +31,7 @@ from typing import (
 
 from prometheus_client import Counter
 
-from synapse.api.constants import EventTypes, Membership, RelationTypes
+from synapse.api.constants import MAIN_TIMELINE, EventTypes, Membership, RelationTypes
 from synapse.event_auth import auth_types_for_event, get_user_power_level
 from synapse.events import EventBase, relation_from_event
 from synapse.events.snapshot import EventContext
@@ -280,7 +280,7 @@ class BulkPushRuleEvaluator:
         # If the event does not have a relation, then cannot have any mutual
         # relations or thread ID.
         relations = {}
-        thread_id = "main"
+        thread_id = MAIN_TIMELINE
         if relation:
             relations = await self._get_mutual_relations(
                 relation.parent_id,
diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py
index 658bf373b7..edeba27a45 100644
--- a/synapse/push/push_tools.py
+++ b/synapse/push/push_tools.py
@@ -39,7 +39,12 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -
     await concurrently_execute(get_room_unread_count, joins, 10)
 
     for notifs in room_notifs:
-        if notifs.notify_count == 0:
+        # Combine the counts from all the threads.
+        notify_count = notifs.main_timeline.notify_count + sum(
+            n.notify_count for n in notifs.threads.values()
+        )
+
+        if notify_count == 0:
             continue
 
         if group_by_room:
@@ -47,7 +52,7 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -
             badge += 1
         else:
             # increment the badge count by the number of unread messages in the room
-            badge += notifs.notify_count
+            badge += notify_count
     return badge
 
 
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index c2989765ce..f1c23d68e5 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -509,6 +509,10 @@ class SyncRestServlet(RestServlet):
             ephemeral_events = room.ephemeral
             result["ephemeral"] = {"events": ephemeral_events}
             result["unread_notifications"] = room.unread_notifications
+            if room.unread_thread_notifications:
+                result[
+                    "org.matrix.msc3773.unread_thread_notifications"
+                ] = room.unread_thread_notifications
             result["summary"] = room.summary
             if self._msc2654_enabled:
                 result["org.matrix.msc2654.unread_count"] = room.unread_count
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index c95b0d6f19..280d306483 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -103,8 +103,9 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3030": self.config.experimental.msc3030_enabled,
                     # Adds support for thread relations, per MSC3440.
                     "org.matrix.msc3440.stable": True,  # TODO: remove when "v1.3" is added above
-                    # Support for thread read receipts.
+                    # Support for thread read receipts & notification counts.
                     "org.matrix.msc3771": self.config.experimental.msc3771_enabled,
+                    "org.matrix.msc3773": self.config.experimental.msc3773_enabled,
                     # Allows moderators to fetch redacted event content as described in MSC2815
                     "fi.mau.msc2815": self.config.experimental.msc2815_enabled,
                     # Adds support for login token requests as per MSC3882
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index b4469eb964..7bb21f8f81 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -94,7 +94,7 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = {
     "event_search": "event_search_event_id_idx",
     "local_media_repository_thumbnails": "local_media_repository_thumbnails_method_idx",
     "remote_media_cache_thumbnails": "remote_media_repository_thumbnails_method_idx",
-    "event_push_summary": "event_push_summary_unique_index",
+    "event_push_summary": "event_push_summary_unique_index2",
     "receipts_linearized": "receipts_linearized_unique_index",
     "receipts_graph": "receipts_graph_unique_index",
 }
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index cdc9ee5a37..3210e9cca1 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -88,7 +88,7 @@ from typing import (
 
 import attr
 
-from synapse.api.constants import ReceiptTypes
+from synapse.api.constants import MAIN_TIMELINE, ReceiptTypes
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
@@ -157,7 +157,7 @@ class UserPushAction(EmailPushAction):
 @attr.s(slots=True, auto_attribs=True)
 class NotifCounts:
     """
-    The per-user, per-room count of notifications. Used by sync and push.
+    The per-user, per-room, per-thread count of notifications. Used by sync and push.
     """
 
     notify_count: int = 0
@@ -165,6 +165,21 @@ class NotifCounts:
     highlight_count: int = 0
 
 
+@attr.s(slots=True, auto_attribs=True)
+class RoomNotifCounts:
+    """
+    The per-user, per-room count of notifications. Used by sync and push.
+    """
+
+    main_timeline: NotifCounts
+    # Map of thread ID to the notification counts.
+    threads: Dict[str, NotifCounts]
+
+    def __len__(self) -> int:
+        # To properly account for the amount of space in any caches.
+        return len(self.threads) + 1
+
+
 def _serialize_action(
     actions: Collection[Union[Mapping, str]], is_highlight: bool
 ) -> str:
@@ -338,12 +353,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         return result
 
-    @cached(tree=True, max_entries=5000)
+    @cached(tree=True, max_entries=5000, iterable=True)
     async def get_unread_event_push_actions_by_room_for_user(
         self,
         room_id: str,
         user_id: str,
-    ) -> NotifCounts:
+    ) -> RoomNotifCounts:
         """Get the notification count, the highlight count and the unread message count
         for a given user in a given room after their latest read receipt.
 
@@ -356,8 +371,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             user_id: The user to retrieve the counts for.
 
         Returns
-            A NotifCounts object containing the notification count, the highlight count
-            and the unread message count.
+            A RoomNotifCounts object containing the notification count, the
+            highlight count and the unread message count for both the main timeline
+            and threads.
         """
         return await self.db_pool.runInteraction(
             "get_unread_event_push_actions_by_room",
@@ -371,7 +387,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         txn: LoggingTransaction,
         room_id: str,
         user_id: str,
-    ) -> NotifCounts:
+    ) -> RoomNotifCounts:
         # Get the stream ordering of the user's latest receipt in the room.
         result = self.get_last_unthreaded_receipt_for_user_txn(
             txn,
@@ -406,7 +422,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         room_id: str,
         user_id: str,
         receipt_stream_ordering: int,
-    ) -> NotifCounts:
+    ) -> RoomNotifCounts:
         """Get the number of unread messages for a user/room that have happened
         since the given stream ordering.
 
@@ -418,12 +434,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 receipt in the room. If there are no receipts, the stream ordering
                 of the user's join event.
 
-        Returns
-            A NotifCounts object containing the notification count, the highlight count
-            and the unread message count.
+        Returns:
+            A RoomNotifCounts object containing the notification count, the
+            highlight count and the unread message count for both the main timeline
+            and threads.
         """
 
-        counts = NotifCounts()
+        main_counts = NotifCounts()
+        thread_counts: Dict[str, NotifCounts] = {}
+
+        def _get_thread(thread_id: str) -> NotifCounts:
+            if thread_id == MAIN_TIMELINE:
+                return main_counts
+            return thread_counts.setdefault(thread_id, NotifCounts())
 
         # First we pull the counts from the summary table.
         #
@@ -440,52 +463,61 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         # receipt).
         txn.execute(
             """
-                SELECT stream_ordering, notif_count, COALESCE(unread_count, 0)
+                SELECT stream_ordering, notif_count, COALESCE(unread_count, 0), thread_id
                 FROM event_push_summary
                 WHERE room_id = ? AND user_id = ?
                 AND (
                     (last_receipt_stream_ordering IS NULL AND stream_ordering > ?)
                     OR last_receipt_stream_ordering = ?
-                )
+                ) AND (notif_count != 0 OR COALESCE(unread_count, 0) != 0)
             """,
             (room_id, user_id, receipt_stream_ordering, receipt_stream_ordering),
         )
-        row = txn.fetchone()
-
-        summary_stream_ordering = 0
-        if row:
-            summary_stream_ordering = row[0]
-            counts.notify_count += row[1]
-            counts.unread_count += row[2]
+        max_summary_stream_ordering = 0
+        for summary_stream_ordering, notif_count, unread_count, thread_id in txn:
+            counts = _get_thread(thread_id)
+            counts.notify_count += notif_count
+            counts.unread_count += unread_count
+
+            # Summaries will only be used if they have not been invalidated by
+            # a recent receipt; track the latest stream ordering or a valid summary.
+            #
+            # Note that since there's only one read receipt in the room per user,
+            # valid summaries are contiguous.
+            max_summary_stream_ordering = max(
+                summary_stream_ordering, max_summary_stream_ordering
+            )
 
         # Next we need to count highlights, which aren't summarised
         sql = """
-            SELECT COUNT(*) FROM event_push_actions
+            SELECT COUNT(*), thread_id FROM event_push_actions
             WHERE user_id = ?
                 AND room_id = ?
                 AND stream_ordering > ?
                 AND highlight = 1
+            GROUP BY thread_id
         """
         txn.execute(sql, (user_id, room_id, receipt_stream_ordering))
-        row = txn.fetchone()
-        if row:
-            counts.highlight_count += row[0]
+        for highlight_count, thread_id in txn:
+            _get_thread(thread_id).highlight_count += highlight_count
 
         # Finally we need to count push actions that aren't included in the
         # summary returned above. This might be due to recent events that haven't
         # been summarised yet or the summary is out of date due to a recent read
         # receipt.
         start_unread_stream_ordering = max(
-            receipt_stream_ordering, summary_stream_ordering
+            receipt_stream_ordering, max_summary_stream_ordering
         )
-        notify_count, unread_count = self._get_notif_unread_count_for_user_room(
+        unread_counts = self._get_notif_unread_count_for_user_room(
             txn, room_id, user_id, start_unread_stream_ordering
         )
 
-        counts.notify_count += notify_count
-        counts.unread_count += unread_count
+        for notif_count, unread_count, thread_id in unread_counts:
+            counts = _get_thread(thread_id)
+            counts.notify_count += notif_count
+            counts.unread_count += unread_count
 
-        return counts
+        return RoomNotifCounts(main_counts, thread_counts)
 
     def _get_notif_unread_count_for_user_room(
         self,
@@ -494,7 +526,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         user_id: str,
         stream_ordering: int,
         max_stream_ordering: Optional[int] = None,
-    ) -> Tuple[int, int]:
+    ) -> List[Tuple[int, int, str]]:
         """Returns the notify and unread counts from `event_push_actions` for
         the given user/room in the given range.
 
@@ -510,13 +542,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 If this is not given, then no maximum is applied.
 
         Return:
-            A tuple of the notif count and unread count in the given range.
+            A tuple of the notif count and unread count in the given range for
+            each thread.
         """
 
         # If there have been no events in the room since the stream ordering,
         # there can't be any push actions either.
         if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering):
-            return 0, 0
+            return []
 
         clause = ""
         args = [user_id, room_id, stream_ordering]
@@ -527,26 +560,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             # If the max stream ordering is less than the min stream ordering,
             # then obviously there are zero push actions in that range.
             if max_stream_ordering <= stream_ordering:
-                return 0, 0
+                return []
 
         sql = f"""
             SELECT
                COUNT(CASE WHEN notif = 1 THEN 1 END),
-               COUNT(CASE WHEN unread = 1 THEN 1 END)
-             FROM event_push_actions ea
-             WHERE user_id = ?
+               COUNT(CASE WHEN unread = 1 THEN 1 END),
+               thread_id
+            FROM event_push_actions ea
+            WHERE user_id = ?
                AND room_id = ?
                AND ea.stream_ordering > ?
                {clause}
+            GROUP BY thread_id
         """
 
         txn.execute(sql, args)
-        row = txn.fetchone()
-
-        if row:
-            return cast(Tuple[int, int], row)
-
-        return 0, 0
+        return cast(List[Tuple[int, int, str]], txn.fetchall())
 
     async def get_push_action_users_in_range(
         self, min_stream_ordering: int, max_stream_ordering: int
@@ -1099,26 +1129,34 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
             # Fetch the notification counts between the stream ordering of the
             # latest receipt and what was previously summarised.
-            notif_count, unread_count = self._get_notif_unread_count_for_user_room(
+            unread_counts = self._get_notif_unread_count_for_user_room(
                 txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering
             )
 
-            # Replace the previous summary with the new counts.
-            #
-            # TODO(threads): Upsert per-thread instead of setting them all to main.
-            self.db_pool.simple_upsert_txn(
+            # First mark the summary for all threads in the room as cleared.
+            self.db_pool.simple_update_txn(
                 txn,
                 table="event_push_summary",
-                keyvalues={"room_id": room_id, "user_id": user_id},
-                values={
-                    "notif_count": notif_count,
-                    "unread_count": unread_count,
+                keyvalues={"user_id": user_id, "room_id": room_id},
+                updatevalues={
+                    "notif_count": 0,
+                    "unread_count": 0,
                     "stream_ordering": old_rotate_stream_ordering,
                     "last_receipt_stream_ordering": stream_ordering,
-                    "thread_id": "main",
                 },
             )
 
+            # Then any updated threads get their notification count and unread
+            # count updated.
+            self.db_pool.simple_update_many_txn(
+                txn,
+                table="event_push_summary",
+                key_names=("room_id", "user_id", "thread_id"),
+                key_values=[(room_id, user_id, row[2]) for row in unread_counts],
+                value_names=("notif_count", "unread_count"),
+                value_values=[(row[0], row[1]) for row in unread_counts],
+            )
+
         # We always update `event_push_summary_last_receipt_stream_id` to
         # ensure that we don't rescan the same receipts for remote users.
 
@@ -1204,23 +1242,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         # Calculate the new counts that should be upserted into event_push_summary
         sql = """
-            SELECT user_id, room_id,
+            SELECT user_id, room_id, thread_id,
                 coalesce(old.%s, 0) + upd.cnt,
                 upd.stream_ordering
             FROM (
-                SELECT user_id, room_id, count(*) as cnt,
+                SELECT user_id, room_id, thread_id, count(*) as cnt,
                     max(ea.stream_ordering) as stream_ordering
                 FROM event_push_actions AS ea
-                LEFT JOIN event_push_summary AS old USING (user_id, room_id)
+                LEFT JOIN event_push_summary AS old USING (user_id, room_id, thread_id)
                 WHERE ? < ea.stream_ordering AND ea.stream_ordering <= ?
                     AND (
                         old.last_receipt_stream_ordering IS NULL
                         OR old.last_receipt_stream_ordering < ea.stream_ordering
                     )
                     AND %s = 1
-                GROUP BY user_id, room_id
+                GROUP BY user_id, room_id, thread_id
             ) AS upd
-            LEFT JOIN event_push_summary AS old USING (user_id, room_id)
+            LEFT JOIN event_push_summary AS old USING (user_id, room_id, thread_id)
         """
 
         # First get the count of unread messages.
@@ -1234,11 +1272,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         # object because we might not have the same amount of rows in each of them. To do
         # this, we use a dict indexed on the user ID and room ID to make it easier to
         # populate.
-        summaries: Dict[Tuple[str, str], _EventPushSummary] = {}
+        summaries: Dict[Tuple[str, str, str], _EventPushSummary] = {}
         for row in txn:
-            summaries[(row[0], row[1])] = _EventPushSummary(
-                unread_count=row[2],
-                stream_ordering=row[3],
+            summaries[(row[0], row[1], row[2])] = _EventPushSummary(
+                unread_count=row[3],
+                stream_ordering=row[4],
                 notif_count=0,
             )
 
@@ -1249,34 +1287,35 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         )
 
         for row in txn:
-            if (row[0], row[1]) in summaries:
-                summaries[(row[0], row[1])].notif_count = row[2]
+            if (row[0], row[1], row[2]) in summaries:
+                summaries[(row[0], row[1], row[2])].notif_count = row[3]
             else:
                 # Because the rules on notifying are different than the rules on marking
                 # a message unread, we might end up with messages that notify but aren't
                 # marked unread, so we might not have a summary for this (user, room)
                 # tuple to complete.
-                summaries[(row[0], row[1])] = _EventPushSummary(
+                summaries[(row[0], row[1], row[2])] = _EventPushSummary(
                     unread_count=0,
-                    stream_ordering=row[3],
-                    notif_count=row[2],
+                    stream_ordering=row[4],
+                    notif_count=row[3],
                 )
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
-        # TODO(threads): Update on a per-thread basis.
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
-            key_names=("user_id", "room_id"),
-            key_values=[(user_id, room_id) for user_id, room_id in summaries],
-            value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"),
+            key_names=("user_id", "room_id", "thread_id"),
+            key_values=[
+                (user_id, room_id, thread_id)
+                for user_id, room_id, thread_id in summaries
+            ],
+            value_names=("notif_count", "unread_count", "stream_ordering"),
             value_values=[
                 (
                     summary.notif_count,
                     summary.unread_count,
                     summary.stream_ordering,
-                    "main",
                 )
                 for summary in summaries.values()
             ],
@@ -1288,7 +1327,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         )
 
     async def _remove_old_push_actions_that_have_rotated(self) -> None:
-        """Clear out old push actions that have been summarised."""
+        """
+        Clear out old push actions that have been summarised (and are older than
+        1 day ago).
+        """
 
         # We want to clear out anything that is older than a day that *has* already
         # been rotated.
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 4a5c947699..19dbf2da7f 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -90,9 +90,9 @@ Changes in SCHEMA_VERSION = 73;
 
 
 SCHEMA_COMPAT_VERSION = (
-    # The groups tables are no longer accessible, so synapses with SCHEMA_VERSION < 72
-    # could break.
-    72
+    # The threads_id column must exist for event_push_actions, event_push_summary,
+    # receipts_linearized, and receipts_graph.
+    73
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql
new file mode 100644
index 0000000000..0ffde9bbeb
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql
@@ -0,0 +1,29 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Forces the background updates from 06thread_notifications.sql to run in the
+-- foreground as code will now require those to be "done".
+
+DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id';
+
+-- Overwrite any null thread_id columns.
+UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL;
+UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
+UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
+
+-- Do not run the event_push_summary_unique_index job if it is pending; the
+-- thread_id field will be made required.
+DELETE FROM background_updates WHERE update_name = 'event_push_summary_unique_index';
+DROP INDEX IF EXISTS event_push_summary_unique_index;
diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres
new file mode 100644
index 0000000000..33674f8c62
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres
@@ -0,0 +1,19 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- The columns can now be made non-nullable.
+ALTER TABLE event_push_actions_staging ALTER COLUMN thread_id SET NOT NULL;
+ALTER TABLE event_push_actions ALTER COLUMN thread_id SET NOT NULL;
+ALTER TABLE event_push_summary ALTER COLUMN thread_id SET NOT NULL;
diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite
new file mode 100644
index 0000000000..5322ad77a4
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite
@@ -0,0 +1,101 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- SQLite doesn't support modifying columns to an existing table, so it must
+-- be recreated.
+
+-- Create the new tables.
+CREATE TABLE event_push_actions_staging_new (
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    actions TEXT NOT NULL,
+    notif SMALLINT NOT NULL,
+    highlight SMALLINT NOT NULL,
+    unread SMALLINT,
+    thread_id TEXT NOT NULL,
+    inserted_ts BIGINT
+);
+
+CREATE TABLE event_push_actions_new (
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    profile_tag VARCHAR(32),
+    actions TEXT NOT NULL,
+    topological_ordering BIGINT,
+    stream_ordering BIGINT,
+    notif SMALLINT,
+    highlight SMALLINT,
+    unread SMALLINT,
+    thread_id TEXT NOT NULL,
+    CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag)
+);
+
+CREATE TABLE event_push_summary_new (
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    notif_count BIGINT NOT NULL,
+    stream_ordering BIGINT NOT NULL,
+    unread_count BIGINT,
+    last_receipt_stream_ordering BIGINT,
+    thread_id TEXT NOT NULL
+);
+
+-- Swap the indexes.
+DROP INDEX IF EXISTS event_push_actions_staging_id;
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging_new(event_id);
+
+DROP INDEX IF EXISTS event_push_actions_room_id_user_id;
+DROP INDEX IF EXISTS event_push_actions_rm_tokens;
+DROP INDEX IF EXISTS event_push_actions_stream_ordering;
+DROP INDEX IF EXISTS event_push_actions_u_highlight;
+DROP INDEX IF EXISTS event_push_actions_highlights_index;
+CREATE INDEX event_push_actions_room_id_user_id on event_push_actions_new(room_id, user_id);
+CREATE INDEX event_push_actions_rm_tokens on event_push_actions_new( user_id, room_id, topological_ordering, stream_ordering );
+CREATE INDEX event_push_actions_stream_ordering on event_push_actions_new( stream_ordering, user_id );
+CREATE INDEX event_push_actions_u_highlight ON event_push_actions_new (user_id, stream_ordering);
+CREATE INDEX event_push_actions_highlights_index ON event_push_actions_new (user_id, room_id, topological_ordering, stream_ordering);
+
+-- Copy the data.
+INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts)
+    SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts
+    FROM event_push_actions_staging;
+
+INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id)
+    SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id
+    FROM event_push_actions;
+
+INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id)
+    SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id
+    FROM event_push_summary;
+
+-- Drop the old tables.
+DROP TABLE event_push_actions_staging;
+DROP TABLE event_push_actions;
+DROP TABLE event_push_summary;
+
+-- Rename the tables.
+ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging;
+ALTER TABLE event_push_actions_new RENAME TO event_push_actions;
+ALTER TABLE event_push_summary_new RENAME TO event_push_summary;
+
+-- Re-run background updates from 72/02event_push_actions_index.sql and
+-- 72/06thread_notifications.sql.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7307, 'event_push_summary_unique_index2', '{}')
+  ON CONFLICT (update_name) DO NOTHING;
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7307, 'event_push_actions_stream_highlight_index', '{}')
+  ON CONFLICT (update_name) DO NOTHING;
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
index efd92793c0..d42e36cdf1 100644
--- a/tests/replication/slave/storage/test_events.py
+++ b/tests/replication/slave/storage/test_events.py
@@ -22,7 +22,10 @@ from synapse.api.room_versions import RoomVersions
 from synapse.events import FrozenEvent, _EventInternalMetadata, make_event_from_dict
 from synapse.handlers.room import RoomEventSource
 from synapse.replication.slave.storage.events import SlavedEventStore
-from synapse.storage.databases.main.event_push_actions import NotifCounts
+from synapse.storage.databases.main.event_push_actions import (
+    NotifCounts,
+    RoomNotifCounts,
+)
 from synapse.storage.roommember import GetRoomsForUserWithStreamOrdering, RoomsForUser
 from synapse.types import PersistedEventPosition
 
@@ -178,7 +181,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase):
         self.check(
             "get_unread_event_push_actions_by_room_for_user",
             [ROOM_ID, USER_ID_2],
-            NotifCounts(highlight_count=0, unread_count=0, notify_count=0),
+            RoomNotifCounts(
+                NotifCounts(highlight_count=0, unread_count=0, notify_count=0), {}
+            ),
         )
 
         self.persist(
@@ -191,7 +196,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase):
         self.check(
             "get_unread_event_push_actions_by_room_for_user",
             [ROOM_ID, USER_ID_2],
-            NotifCounts(highlight_count=0, unread_count=0, notify_count=1),
+            RoomNotifCounts(
+                NotifCounts(highlight_count=0, unread_count=0, notify_count=1), {}
+            ),
         )
 
         self.persist(
@@ -206,7 +213,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase):
         self.check(
             "get_unread_event_push_actions_by_room_for_user",
             [ROOM_ID, USER_ID_2],
-            NotifCounts(highlight_count=1, unread_count=0, notify_count=2),
+            RoomNotifCounts(
+                NotifCounts(highlight_count=1, unread_count=0, notify_count=2), {}
+            ),
         )
 
     def test_get_rooms_for_user_with_stream_ordering(self):
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 473c965e19..89f986ac34 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Tuple
+from typing import Optional, Tuple
 
 from twisted.test.proto_helpers import MemoryReactor
 
@@ -20,6 +20,7 @@ from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
 from synapse.storage.databases.main.event_push_actions import NotifCounts
+from synapse.types import JsonDict
 from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
@@ -133,13 +134,14 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
                 )
             )
             self.assertEqual(
-                counts,
+                counts.main_timeline,
                 NotifCounts(
                     notify_count=noitf_count,
                     unread_count=0,
                     highlight_count=highlight_count,
                 ),
             )
+            self.assertEqual(counts.threads, {})
 
         def _create_event(highlight: bool = False) -> str:
             result = self.helper.send_event(
@@ -186,6 +188,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _assert_counts(0, 0)
 
         _create_event()
+        _assert_counts(1, 0)
         _rotate()
         _assert_counts(1, 0)
 
@@ -236,6 +239,168 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _rotate()
         _assert_counts(0, 0)
 
+    def test_count_aggregation_threads(self) -> None:
+        """
+        This is essentially the same test as test_count_aggregation, but adds
+        events to the main timeline and to a thread.
+        """
+
+        user_id, token, _, other_token, room_id = self._create_users_and_room()
+        thread_id: str
+
+        last_event_id: str
+
+        def _assert_counts(
+            noitf_count: int,
+            highlight_count: int,
+            thread_notif_count: int,
+            thread_highlight_count: int,
+        ) -> None:
+            counts = self.get_success(
+                self.store.db_pool.runInteraction(
+                    "get-unread-counts",
+                    self.store._get_unread_counts_by_receipt_txn,
+                    room_id,
+                    user_id,
+                )
+            )
+            self.assertEqual(
+                counts.main_timeline,
+                NotifCounts(
+                    notify_count=noitf_count,
+                    unread_count=0,
+                    highlight_count=highlight_count,
+                ),
+            )
+            if thread_notif_count or thread_highlight_count:
+                self.assertEqual(
+                    counts.threads,
+                    {
+                        thread_id: NotifCounts(
+                            notify_count=thread_notif_count,
+                            unread_count=0,
+                            highlight_count=thread_highlight_count,
+                        ),
+                    },
+                )
+            else:
+                self.assertEqual(counts.threads, {})
+
+        def _create_event(
+            highlight: bool = False, thread_id: Optional[str] = None
+        ) -> str:
+            content: JsonDict = {
+                "msgtype": "m.text",
+                "body": user_id if highlight else "msg",
+            }
+            if thread_id:
+                content["m.relates_to"] = {
+                    "rel_type": "m.thread",
+                    "event_id": thread_id,
+                }
+
+            result = self.helper.send_event(
+                room_id,
+                type="m.room.message",
+                content=content,
+                tok=other_token,
+            )
+            nonlocal last_event_id
+            last_event_id = result["event_id"]
+            return last_event_id
+
+        def _rotate() -> None:
+            self.get_success(self.store._rotate_notifs())
+
+        def _mark_read(event_id: str, thread_id: Optional[str] = None) -> None:
+            self.get_success(
+                self.store.insert_receipt(
+                    room_id,
+                    "m.read",
+                    user_id=user_id,
+                    event_ids=[event_id],
+                    thread_id=thread_id,
+                    data={},
+                )
+            )
+
+        _assert_counts(0, 0, 0, 0)
+        thread_id = _create_event()
+        _assert_counts(1, 0, 0, 0)
+        _rotate()
+        _assert_counts(1, 0, 0, 0)
+
+        _create_event(thread_id=thread_id)
+        _assert_counts(1, 0, 1, 0)
+        _rotate()
+        _assert_counts(1, 0, 1, 0)
+
+        _create_event()
+        _assert_counts(2, 0, 1, 0)
+        _rotate()
+        _assert_counts(2, 0, 1, 0)
+
+        event_id = _create_event(thread_id=thread_id)
+        _assert_counts(2, 0, 2, 0)
+        _rotate()
+        _assert_counts(2, 0, 2, 0)
+
+        _create_event()
+        _create_event(thread_id=thread_id)
+        _mark_read(event_id)
+        _assert_counts(1, 0, 1, 0)
+
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0, 0)
+
+        _create_event()
+        _create_event(thread_id=thread_id)
+        _assert_counts(1, 0, 1, 0)
+        _rotate()
+        _assert_counts(1, 0, 1, 0)
+
+        # Delete old event push actions, this should not affect the (summarised) count.
+        self.get_success(self.store._remove_old_push_actions_that_have_rotated())
+        _assert_counts(1, 0, 1, 0)
+
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0, 0)
+
+        _create_event(True)
+        _assert_counts(1, 1, 0, 0)
+        _rotate()
+        _assert_counts(1, 1, 0, 0)
+
+        event_id = _create_event(True, thread_id)
+        _assert_counts(1, 1, 1, 1)
+        _rotate()
+        _assert_counts(1, 1, 1, 1)
+
+        # Check that adding another notification and rotating after highlight
+        # works.
+        _create_event()
+        _rotate()
+        _assert_counts(2, 1, 1, 1)
+
+        _create_event(thread_id=thread_id)
+        _rotate()
+        _assert_counts(2, 1, 2, 1)
+
+        # Check that sending read receipts at different points results in the
+        # right counts.
+        _mark_read(event_id)
+        _assert_counts(1, 0, 1, 0)
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0, 0)
+
+        _create_event(True)
+        _create_event(True, thread_id)
+        _assert_counts(1, 1, 1, 1)
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0, 0)
+        _rotate()
+        _assert_counts(0, 0, 0, 0)
+
     def test_find_first_stream_ordering_after_ts(self) -> None:
         def add_event(so: int, ts: int) -> None:
             self.get_success(
-- 
cgit 1.5.1


From d8663f5e6358f8eaeda9a3f923fae720a140ca4d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 4 Oct 2022 10:21:16 -0400
Subject: Advertise supporting version 1.3 of the Matrix spec. (#14032)

Now that all features / changes in 1.3 are supported in Synapse.
---
 changelog.d/14032.feature       | 1 +
 synapse/rest/client/versions.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/14032.feature

(limited to 'synapse')

diff --git a/changelog.d/14032.feature b/changelog.d/14032.feature
new file mode 100644
index 0000000000..bb221d3ca6
--- /dev/null
+++ b/changelog.d/14032.feature
@@ -0,0 +1 @@
+Advertise Matrix 1.3 support on `/_matrix/client/versions`.
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 280d306483..18ed313b5c 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -75,6 +75,7 @@ class VersionsRestServlet(RestServlet):
                     "r0.6.1",
                     "v1.1",
                     "v1.2",
+                    "v1.3",
                 ],
                 # as per MSC1497:
                 "unstable_features": {
-- 
cgit 1.5.1


From a7ba457b2b967ca098792d742bc304604b1824b7 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 4 Oct 2022 10:46:42 -0400
Subject: Mark events as read using threaded read receipts from MSC3771.
 (#13877)

Applies the proper logic for unthreaded and threaded receipts to either
apply to all events in the room or only events in the same thread, respectively.
---
 changelog.d/13877.feature                          |   1 +
 .../storage/databases/main/event_push_actions.py   | 277 ++++++++++++++++-----
 .../73/08thread_receipts_non_null.sql.postgres     |  23 ++
 .../delta/73/08thread_receipts_non_null.sql.sqlite |  76 ++++++
 tests/storage/test_event_push_actions.py           | 189 +++++++++++++-
 5 files changed, 504 insertions(+), 62 deletions(-)
 create mode 100644 changelog.d/13877.feature
 create mode 100644 synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/13877.feature b/changelog.d/13877.feature
new file mode 100644
index 0000000000..d0cb902dff
--- /dev/null
+++ b/changelog.d/13877.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 3210e9cca1..7469cd336c 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -421,7 +421,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         txn: LoggingTransaction,
         room_id: str,
         user_id: str,
-        receipt_stream_ordering: int,
+        unthreaded_receipt_stream_ordering: int,
     ) -> RoomNotifCounts:
         """Get the number of unread messages for a user/room that have happened
         since the given stream ordering.
@@ -430,9 +430,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             txn: The database transaction.
             room_id: The room ID to get unread counts for.
             user_id: The user ID to get unread counts for.
-            receipt_stream_ordering: The stream ordering of the user's latest
-                receipt in the room. If there are no receipts, the stream ordering
-                of the user's join event.
+            unthreaded_receipt_stream_ordering: The stream ordering of the user's latest
+                unthreaded receipt in the room. If there are no unthreaded receipts,
+                the stream ordering of the user's join event.
 
         Returns:
             A RoomNotifCounts object containing the notification count, the
@@ -448,71 +448,181 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 return main_counts
             return thread_counts.setdefault(thread_id, NotifCounts())
 
+        receipt_types_clause, receipts_args = make_in_list_sql_clause(
+            self.database_engine,
+            "receipt_type",
+            (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
+        )
+
         # First we pull the counts from the summary table.
         #
-        # We check that `last_receipt_stream_ordering` matches the stream
-        # ordering given. If it doesn't match then a new read receipt has arrived and
-        # we haven't yet updated the counts in `event_push_summary` to reflect
-        # that; in that case we simply ignore `event_push_summary` counts
-        # and do a manual count of all of the rows in the `event_push_actions` table
-        # for this user/room.
+        # We check that `last_receipt_stream_ordering` matches the stream ordering of the
+        # latest receipt for the thread (which may be either the unthreaded read receipt
+        # or the threaded read receipt).
         #
-        # If `last_receipt_stream_ordering` is null then that means it's up to
-        # date (as the row was written by an older version of Synapse that
+        # If it doesn't match then a new read receipt has arrived and we haven't yet
+        # updated the counts in `event_push_summary` to reflect that; in that case we
+        # simply ignore `event_push_summary` counts.
+        #
+        # We then do a manual count of all the rows in the `event_push_actions` table
+        # for any user/room/thread which did not have a valid summary found.
+        #
+        # If `last_receipt_stream_ordering` is null then that means it's up-to-date
+        # (as the row was written by an older version of Synapse that
         # updated `event_push_summary` synchronously when persisting a new read
         # receipt).
         txn.execute(
-            """
-                SELECT stream_ordering, notif_count, COALESCE(unread_count, 0), thread_id
+            f"""
+                SELECT notif_count, COALESCE(unread_count, 0), thread_id
                 FROM event_push_summary
+                LEFT JOIN (
+                    SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering
+                    FROM receipts_linearized
+                    LEFT JOIN events USING (room_id, event_id)
+                    WHERE
+                        user_id = ?
+                        AND room_id = ?
+                        AND stream_ordering > ?
+                        AND {receipt_types_clause}
+                    GROUP BY thread_id
+                ) AS receipts USING (thread_id)
                 WHERE room_id = ? AND user_id = ?
                 AND (
-                    (last_receipt_stream_ordering IS NULL AND stream_ordering > ?)
-                    OR last_receipt_stream_ordering = ?
+                    (last_receipt_stream_ordering IS NULL AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?))
+                    OR last_receipt_stream_ordering = COALESCE(threaded_receipt_stream_ordering, ?)
                 ) AND (notif_count != 0 OR COALESCE(unread_count, 0) != 0)
             """,
-            (room_id, user_id, receipt_stream_ordering, receipt_stream_ordering),
+            (
+                user_id,
+                room_id,
+                unthreaded_receipt_stream_ordering,
+                *receipts_args,
+                room_id,
+                user_id,
+                unthreaded_receipt_stream_ordering,
+                unthreaded_receipt_stream_ordering,
+            ),
         )
-        max_summary_stream_ordering = 0
-        for summary_stream_ordering, notif_count, unread_count, thread_id in txn:
+        summarised_threads = set()
+        for notif_count, unread_count, thread_id in txn:
+            summarised_threads.add(thread_id)
             counts = _get_thread(thread_id)
             counts.notify_count += notif_count
             counts.unread_count += unread_count
 
-            # Summaries will only be used if they have not been invalidated by
-            # a recent receipt; track the latest stream ordering or a valid summary.
-            #
-            # Note that since there's only one read receipt in the room per user,
-            # valid summaries are contiguous.
-            max_summary_stream_ordering = max(
-                summary_stream_ordering, max_summary_stream_ordering
-            )
-
         # Next we need to count highlights, which aren't summarised
-        sql = """
+        sql = f"""
             SELECT COUNT(*), thread_id FROM event_push_actions
+            LEFT JOIN (
+                SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering
+                FROM receipts_linearized
+                LEFT JOIN events USING (room_id, event_id)
+                WHERE
+                    user_id = ?
+                    AND room_id = ?
+                    AND stream_ordering > ?
+                    AND {receipt_types_clause}
+                GROUP BY thread_id
+            ) AS receipts USING (thread_id)
             WHERE user_id = ?
                 AND room_id = ?
-                AND stream_ordering > ?
+                AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?)
                 AND highlight = 1
             GROUP BY thread_id
         """
-        txn.execute(sql, (user_id, room_id, receipt_stream_ordering))
+        txn.execute(
+            sql,
+            (
+                user_id,
+                room_id,
+                unthreaded_receipt_stream_ordering,
+                *receipts_args,
+                user_id,
+                room_id,
+                unthreaded_receipt_stream_ordering,
+            ),
+        )
         for highlight_count, thread_id in txn:
             _get_thread(thread_id).highlight_count += highlight_count
 
+        # For threads which were summarised we need to count actions since the last
+        # rotation.
+        thread_id_clause, thread_id_args = make_in_list_sql_clause(
+            self.database_engine, "thread_id", summarised_threads
+        )
+
+        # The (inclusive) event stream ordering that was previously summarised.
+        rotated_upto_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+            txn,
+            table="event_push_summary_stream_ordering",
+            keyvalues={},
+            retcol="stream_ordering",
+        )
+
+        unread_counts = self._get_notif_unread_count_for_user_room(
+            txn, room_id, user_id, rotated_upto_stream_ordering
+        )
+        for notif_count, unread_count, thread_id in unread_counts:
+            if thread_id not in summarised_threads:
+                continue
+
+            if thread_id == MAIN_TIMELINE:
+                counts.notify_count += notif_count
+                counts.unread_count += unread_count
+            elif thread_id in thread_counts:
+                thread_counts[thread_id].notify_count += notif_count
+                thread_counts[thread_id].unread_count += unread_count
+            else:
+                # Previous thread summaries of 0 are discarded above.
+                #
+                # TODO If empty summaries are deleted this can be removed.
+                thread_counts[thread_id] = NotifCounts(
+                    notify_count=notif_count,
+                    unread_count=unread_count,
+                    highlight_count=0,
+                )
+
         # Finally we need to count push actions that aren't included in the
         # summary returned above. This might be due to recent events that haven't
         # been summarised yet or the summary is out of date due to a recent read
         # receipt.
-        start_unread_stream_ordering = max(
-            receipt_stream_ordering, max_summary_stream_ordering
-        )
-        unread_counts = self._get_notif_unread_count_for_user_room(
-            txn, room_id, user_id, start_unread_stream_ordering
+        sql = f"""
+            SELECT
+                COUNT(CASE WHEN notif = 1 THEN 1 END),
+                COUNT(CASE WHEN unread = 1 THEN 1 END),
+                thread_id
+            FROM event_push_actions
+            LEFT JOIN (
+                SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering
+                FROM receipts_linearized
+                LEFT JOIN events USING (room_id, event_id)
+                WHERE
+                    user_id = ?
+                    AND room_id = ?
+                    AND stream_ordering > ?
+                    AND {receipt_types_clause}
+                GROUP BY thread_id
+            ) AS receipts USING (thread_id)
+            WHERE user_id = ?
+                AND room_id = ?
+                AND stream_ordering > COALESCE(threaded_receipt_stream_ordering, ?)
+                AND NOT {thread_id_clause}
+            GROUP BY thread_id
+        """
+        txn.execute(
+            sql,
+            (
+                user_id,
+                room_id,
+                unthreaded_receipt_stream_ordering,
+                *receipts_args,
+                user_id,
+                room_id,
+                unthreaded_receipt_stream_ordering,
+                *thread_id_args,
+            ),
         )
-
-        for notif_count, unread_count, thread_id in unread_counts:
+        for notif_count, unread_count, thread_id in txn:
             counts = _get_thread(thread_id)
             counts.notify_count += notif_count
             counts.unread_count += unread_count
@@ -526,6 +636,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         user_id: str,
         stream_ordering: int,
         max_stream_ordering: Optional[int] = None,
+        thread_id: Optional[str] = None,
     ) -> List[Tuple[int, int, str]]:
         """Returns the notify and unread counts from `event_push_actions` for
         the given user/room in the given range.
@@ -540,6 +651,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             stream_ordering: The (exclusive) minimum stream ordering to consider.
             max_stream_ordering: The (inclusive) maximum stream ordering to consider.
                 If this is not given, then no maximum is applied.
+            thread_id: The thread ID to fetch unread counts for. If this is not provided
+                then the results for *all* threads is returned.
+
+                Note that if this is provided the resulting list will only have 0 or
+                1 tuples in it.
 
         Return:
             A tuple of the notif count and unread count in the given range for
@@ -551,10 +667,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering):
             return []
 
-        clause = ""
+        stream_ordering_clause = ""
         args = [user_id, room_id, stream_ordering]
         if max_stream_ordering is not None:
-            clause = "AND ea.stream_ordering <= ?"
+            stream_ordering_clause = "AND ea.stream_ordering <= ?"
             args.append(max_stream_ordering)
 
             # If the max stream ordering is less than the min stream ordering,
@@ -562,6 +678,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             if max_stream_ordering <= stream_ordering:
                 return []
 
+        # Either limit the results to a specific thread or fetch all threads.
+        thread_id_clause = ""
+        if thread_id is not None:
+            thread_id_clause = "AND thread_id = ?"
+            args.append(thread_id)
+
         sql = f"""
             SELECT
                COUNT(CASE WHEN notif = 1 THEN 1 END),
@@ -571,7 +693,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             WHERE user_id = ?
                AND room_id = ?
                AND ea.stream_ordering > ?
-               {clause}
+               {stream_ordering_clause}
+               {thread_id_clause}
             GROUP BY thread_id
         """
 
@@ -1086,7 +1209,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         )
 
         sql = """
-            SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering
+            SELECT r.stream_id, r.room_id, r.user_id, r.thread_id, e.stream_ordering
             FROM receipts_linearized AS r
             INNER JOIN events AS e USING (event_id)
             WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ?
@@ -1107,45 +1230,69 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 limit,
             ),
         )
-        rows = cast(List[Tuple[int, str, str, int]], txn.fetchall())
+        rows = cast(List[Tuple[int, str, str, Optional[str], int]], txn.fetchall())
 
         # For each new read receipt we delete push actions from before it and
         # recalculate the summary.
-        for _, room_id, user_id, stream_ordering in rows:
+        #
+        # Care must be taken of whether it is a threaded or unthreaded receipt.
+        for _, room_id, user_id, thread_id, stream_ordering in rows:
             # Only handle our own read receipts.
             if not self.hs.is_mine_id(user_id):
                 continue
 
+            thread_clause = ""
+            thread_args: Tuple = ()
+            if thread_id is not None:
+                thread_clause = "AND thread_id = ?"
+                thread_args = (thread_id,)
+
+            # For each new read receipt we delete push actions from before it and
+            # recalculate the summary.
             txn.execute(
-                """
+                f"""
                 DELETE FROM event_push_actions
                 WHERE room_id = ?
                     AND user_id = ?
                     AND stream_ordering <= ?
                     AND highlight = 0
+                    {thread_clause}
                 """,
-                (room_id, user_id, stream_ordering),
+                (room_id, user_id, stream_ordering, *thread_args),
             )
 
             # Fetch the notification counts between the stream ordering of the
             # latest receipt and what was previously summarised.
             unread_counts = self._get_notif_unread_count_for_user_room(
-                txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering
-            )
-
-            # First mark the summary for all threads in the room as cleared.
-            self.db_pool.simple_update_txn(
                 txn,
-                table="event_push_summary",
-                keyvalues={"user_id": user_id, "room_id": room_id},
-                updatevalues={
-                    "notif_count": 0,
-                    "unread_count": 0,
-                    "stream_ordering": old_rotate_stream_ordering,
-                    "last_receipt_stream_ordering": stream_ordering,
-                },
+                room_id,
+                user_id,
+                stream_ordering,
+                old_rotate_stream_ordering,
+                thread_id,
             )
 
+            # For an unthreaded receipt, mark the summary for all threads in the room
+            # as cleared.
+            if thread_id is None:
+                self.db_pool.simple_update_txn(
+                    txn,
+                    table="event_push_summary",
+                    keyvalues={"user_id": user_id, "room_id": room_id},
+                    updatevalues={
+                        "notif_count": 0,
+                        "unread_count": 0,
+                        "stream_ordering": old_rotate_stream_ordering,
+                        "last_receipt_stream_ordering": stream_ordering,
+                    },
+                )
+
+            # For a threaded receipt, we *always* want to update that receipt,
+            # event if there are no new notifications in that thread. This ensures
+            # the stream_ordering & last_receipt_stream_ordering are updated.
+            elif not unread_counts:
+                unread_counts = [(0, 0, thread_id)]
+
             # Then any updated threads get their notification count and unread
             # count updated.
             self.db_pool.simple_update_many_txn(
@@ -1153,8 +1300,16 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 table="event_push_summary",
                 key_names=("room_id", "user_id", "thread_id"),
                 key_values=[(room_id, user_id, row[2]) for row in unread_counts],
-                value_names=("notif_count", "unread_count"),
-                value_values=[(row[0], row[1]) for row in unread_counts],
+                value_names=(
+                    "notif_count",
+                    "unread_count",
+                    "stream_ordering",
+                    "last_receipt_stream_ordering",
+                ),
+                value_values=[
+                    (row[0], row[1], old_rotate_stream_ordering, stream_ordering)
+                    for row in unread_counts
+                ],
             )
 
         # We always update `event_push_summary_last_receipt_stream_id` to
diff --git a/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.postgres b/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.postgres
new file mode 100644
index 0000000000..3e0bc9e5eb
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.postgres
@@ -0,0 +1,23 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Drop constraint on (room_id, receipt_type, user_id).
+
+-- Rebuild the unique constraint with the thread_id.
+ALTER TABLE receipts_linearized
+    DROP CONSTRAINT receipts_linearized_uniqueness;
+
+ALTER TABLE receipts_graph
+    DROP CONSTRAINT receipts_graph_uniqueness;
diff --git a/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.sqlite b/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.sqlite
new file mode 100644
index 0000000000..e664889fbc
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/08thread_receipts_non_null.sql.sqlite
@@ -0,0 +1,76 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Drop constraint on (room_id, receipt_type, user_id).
+--
+-- SQLite doesn't support modifying constraints to an existing table, so it must
+-- be recreated.
+
+-- Create the new tables.
+CREATE TABLE receipts_linearized_new (
+    stream_id BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    thread_id TEXT,
+    event_stream_ordering BIGINT,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id)
+);
+
+CREATE TABLE receipts_graph_new (
+    room_id TEXT NOT NULL,
+    receipt_type TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    event_ids TEXT NOT NULL,
+    thread_id TEXT,
+    data TEXT NOT NULL,
+    CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id)
+);
+
+-- Drop the old indexes.
+DROP INDEX IF EXISTS receipts_linearized_id;
+DROP INDEX IF EXISTS receipts_linearized_room_stream;
+DROP INDEX IF EXISTS receipts_linearized_user;
+
+-- Copy the data.
+INSERT INTO receipts_linearized_new (stream_id, room_id, receipt_type, user_id, event_id, data)
+    SELECT stream_id, room_id, receipt_type, user_id, event_id, data
+    FROM receipts_linearized;
+INSERT INTO receipts_graph_new (room_id, receipt_type, user_id, event_ids, data)
+    SELECT room_id, receipt_type, user_id, event_ids, data
+    FROM receipts_graph;
+
+-- Drop the old tables.
+DROP TABLE receipts_linearized;
+DROP TABLE receipts_graph;
+
+-- Rename the tables.
+ALTER TABLE receipts_linearized_new RENAME TO receipts_linearized;
+ALTER TABLE receipts_graph_new RENAME TO receipts_graph;
+
+-- Create the indices.
+CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id );
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id );
+CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id );
+
+-- Re-run background updates from 72/08thread_receipts.sql.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7308, 'receipts_linearized_unique_index', '{}')
+  ON CONFLICT (update_name) DO NOTHING;
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7308, 'receipts_graph_unique_index', '{}')
+  ON CONFLICT (update_name) DO NOTHING;
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 89f986ac34..6fa0cafb75 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -16,6 +16,7 @@ from typing import Optional, Tuple
 
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.api.constants import MAIN_TIMELINE
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
@@ -312,7 +313,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         def _rotate() -> None:
             self.get_success(self.store._rotate_notifs())
 
-        def _mark_read(event_id: str, thread_id: Optional[str] = None) -> None:
+        def _mark_read(event_id: str, thread_id: str = MAIN_TIMELINE) -> None:
             self.get_success(
                 self.store.insert_receipt(
                     room_id,
@@ -348,9 +349,12 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _create_event()
         _create_event(thread_id=thread_id)
         _mark_read(event_id)
+        _assert_counts(1, 0, 3, 0)
+        _mark_read(event_id, thread_id)
         _assert_counts(1, 0, 1, 0)
 
         _mark_read(last_event_id)
+        _mark_read(last_event_id, thread_id)
         _assert_counts(0, 0, 0, 0)
 
         _create_event()
@@ -364,6 +368,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _assert_counts(1, 0, 1, 0)
 
         _mark_read(last_event_id)
+        _mark_read(last_event_id, thread_id)
         _assert_counts(0, 0, 0, 0)
 
         _create_event(True)
@@ -389,8 +394,190 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         # Check that sending read receipts at different points results in the
         # right counts.
         _mark_read(event_id)
+        _assert_counts(1, 0, 2, 1)
+        _mark_read(event_id, thread_id)
         _assert_counts(1, 0, 1, 0)
         _mark_read(last_event_id)
+        _assert_counts(0, 0, 1, 0)
+        _mark_read(last_event_id, thread_id)
+        _assert_counts(0, 0, 0, 0)
+
+        _create_event(True)
+        _create_event(True, thread_id)
+        _assert_counts(1, 1, 1, 1)
+        _mark_read(last_event_id)
+        _mark_read(last_event_id, thread_id)
+        _assert_counts(0, 0, 0, 0)
+        _rotate()
+        _assert_counts(0, 0, 0, 0)
+
+    def test_count_aggregation_mixed(self) -> None:
+        """
+        This is essentially the same test as test_count_aggregation_threads, but
+        sends both unthreaded and threaded receipts.
+        """
+
+        # Create a user to receive notifications and send receipts.
+        user_id = self.register_user("user1235", "pass")
+        token = self.login("user1235", "pass")
+
+        # And another users to send events.
+        other_id = self.register_user("other", "pass")
+        other_token = self.login("other", "pass")
+
+        # Create a room and put both users in it.
+        room_id = self.helper.create_room_as(user_id, tok=token)
+        self.helper.join(room_id, other_id, tok=other_token)
+        thread_id: str
+
+        last_event_id: str
+
+        def _assert_counts(
+            noitf_count: int,
+            highlight_count: int,
+            thread_notif_count: int,
+            thread_highlight_count: int,
+        ) -> None:
+            counts = self.get_success(
+                self.store.db_pool.runInteraction(
+                    "get-unread-counts",
+                    self.store._get_unread_counts_by_receipt_txn,
+                    room_id,
+                    user_id,
+                )
+            )
+            self.assertEqual(
+                counts.main_timeline,
+                NotifCounts(
+                    notify_count=noitf_count,
+                    unread_count=0,
+                    highlight_count=highlight_count,
+                ),
+            )
+            if thread_notif_count or thread_highlight_count:
+                self.assertEqual(
+                    counts.threads,
+                    {
+                        thread_id: NotifCounts(
+                            notify_count=thread_notif_count,
+                            unread_count=0,
+                            highlight_count=thread_highlight_count,
+                        ),
+                    },
+                )
+            else:
+                self.assertEqual(counts.threads, {})
+
+        def _create_event(
+            highlight: bool = False, thread_id: Optional[str] = None
+        ) -> str:
+            content: JsonDict = {
+                "msgtype": "m.text",
+                "body": user_id if highlight else "msg",
+            }
+            if thread_id:
+                content["m.relates_to"] = {
+                    "rel_type": "m.thread",
+                    "event_id": thread_id,
+                }
+
+            result = self.helper.send_event(
+                room_id,
+                type="m.room.message",
+                content=content,
+                tok=other_token,
+            )
+            nonlocal last_event_id
+            last_event_id = result["event_id"]
+            return last_event_id
+
+        def _rotate() -> None:
+            self.get_success(self.store._rotate_notifs())
+
+        def _mark_read(event_id: str, thread_id: Optional[str] = None) -> None:
+            self.get_success(
+                self.store.insert_receipt(
+                    room_id,
+                    "m.read",
+                    user_id=user_id,
+                    event_ids=[event_id],
+                    thread_id=thread_id,
+                    data={},
+                )
+            )
+
+        _assert_counts(0, 0, 0, 0)
+        thread_id = _create_event()
+        _assert_counts(1, 0, 0, 0)
+        _rotate()
+        _assert_counts(1, 0, 0, 0)
+
+        _create_event(thread_id=thread_id)
+        _assert_counts(1, 0, 1, 0)
+        _rotate()
+        _assert_counts(1, 0, 1, 0)
+
+        _create_event()
+        _assert_counts(2, 0, 1, 0)
+        _rotate()
+        _assert_counts(2, 0, 1, 0)
+
+        event_id = _create_event(thread_id=thread_id)
+        _assert_counts(2, 0, 2, 0)
+        _rotate()
+        _assert_counts(2, 0, 2, 0)
+
+        _create_event()
+        _create_event(thread_id=thread_id)
+        _mark_read(event_id)
+        _assert_counts(1, 0, 1, 0)
+
+        _mark_read(last_event_id, MAIN_TIMELINE)
+        _mark_read(last_event_id, thread_id)
+        _assert_counts(0, 0, 0, 0)
+
+        _create_event()
+        _create_event(thread_id=thread_id)
+        _assert_counts(1, 0, 1, 0)
+        _rotate()
+        _assert_counts(1, 0, 1, 0)
+
+        # Delete old event push actions, this should not affect the (summarised) count.
+        self.get_success(self.store._remove_old_push_actions_that_have_rotated())
+        _assert_counts(1, 0, 1, 0)
+
+        _mark_read(last_event_id)
+        _assert_counts(0, 0, 0, 0)
+
+        _create_event(True)
+        _assert_counts(1, 1, 0, 0)
+        _rotate()
+        _assert_counts(1, 1, 0, 0)
+
+        event_id = _create_event(True, thread_id)
+        _assert_counts(1, 1, 1, 1)
+        _rotate()
+        _assert_counts(1, 1, 1, 1)
+
+        # Check that adding another notification and rotating after highlight
+        # works.
+        _create_event()
+        _rotate()
+        _assert_counts(2, 1, 1, 1)
+
+        _create_event(thread_id=thread_id)
+        _rotate()
+        _assert_counts(2, 1, 2, 1)
+
+        # Check that sending read receipts at different points results in the
+        # right counts.
+        _mark_read(event_id)
+        _assert_counts(1, 0, 1, 0)
+        _mark_read(event_id, MAIN_TIMELINE)
+        _assert_counts(1, 0, 1, 0)
+        _mark_read(last_event_id, MAIN_TIMELINE)
+        _assert_counts(0, 0, 1, 0)
+        _mark_read(last_event_id, thread_id)
         _assert_counts(0, 0, 0, 0)
 
         _create_event(True)
-- 
cgit 1.5.1


From 2b6d41ebd685fb546e52acdbcb0024dfcf5a5db1 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 4 Oct 2022 11:36:16 -0400
Subject: Recursively fetch the thread for receipts & notifications. (#13824)

Consider an event to be part of a thread if you can follow a
chain of relations up to a thread root.

Part of MSC3773 & MSC3771.
---
 changelog.d/13824.feature                   |   1 +
 synapse/push/bulk_push_rule_evaluator.py    |   5 ++
 synapse/rest/client/receipts.py             |  22 +++++-
 synapse/storage/databases/main/relations.py |  36 ++++++++++
 tests/storage/test_event_push_actions.py    | 100 ++++++++++++++++++++++++++++
 5 files changed, 162 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13824.feature

(limited to 'synapse')

diff --git a/changelog.d/13824.feature b/changelog.d/13824.feature
new file mode 100644
index 0000000000..d0cb902dff
--- /dev/null
+++ b/changelog.d/13824.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 61d952742d..f8c4dd74f0 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -286,8 +286,13 @@ class BulkPushRuleEvaluator:
                 relation.parent_id,
                 itertools.chain(*(r.rules() for r in rules_by_user.values())),
             )
+            # Recursively attempt to find the thread this event relates to.
             if relation.rel_type == RelationTypes.THREAD:
                 thread_id = relation.parent_id
+            else:
+                # Since the event has not yet been persisted we check whether
+                # the parent is part of a thread.
+                thread_id = await self.store.get_thread_id(relation.parent_id) or "main"
 
         evaluator = PushRuleEvaluator(
             _flatten_dict(event),
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index f3ff156abe..287dfdd69e 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -16,7 +16,7 @@ import logging
 from typing import TYPE_CHECKING, Tuple
 
 from synapse.api.constants import ReceiptTypes
-from synapse.api.errors import SynapseError
+from synapse.api.errors import Codes, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
@@ -43,6 +43,7 @@ class ReceiptRestServlet(RestServlet):
         self.receipts_handler = hs.get_receipts_handler()
         self.read_marker_handler = hs.get_read_marker_handler()
         self.presence_handler = hs.get_presence_handler()
+        self._main_store = hs.get_datastores().main
 
         self._known_receipt_types = {
             ReceiptTypes.READ,
@@ -71,7 +72,24 @@ class ReceiptRestServlet(RestServlet):
                 thread_id = body.get("thread_id")
                 if not thread_id or not isinstance(thread_id, str):
                     raise SynapseError(
-                        400, "thread_id field must be a non-empty string"
+                        400,
+                        "thread_id field must be a non-empty string",
+                        Codes.INVALID_PARAM,
+                    )
+
+                if receipt_type == ReceiptTypes.FULLY_READ:
+                    raise SynapseError(
+                        400,
+                        f"thread_id is not compatible with {ReceiptTypes.FULLY_READ} receipts.",
+                        Codes.INVALID_PARAM,
+                    )
+
+                # Ensure the event ID roughly correlates to the thread ID.
+                if thread_id != await self._main_store.get_thread_id(event_id):
+                    raise SynapseError(
+                        400,
+                        f"event_id {event_id} is not related to thread {thread_id}",
+                        Codes.INVALID_PARAM,
                     )
 
         await self.presence_handler.bump_presence_active_time(requester.user)
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 898947af95..154385b1e8 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -832,6 +832,42 @@ class RelationsWorkerStore(SQLBaseStore):
             "get_event_relations", _get_event_relations
         )
 
+    @cached()
+    async def get_thread_id(self, event_id: str) -> Optional[str]:
+        """
+        Get the thread ID for an event. This considers multi-level relations,
+        e.g. an annotation to an event which is part of a thread.
+
+        Args:
+            event_id: The event ID to fetch the thread ID for.
+
+        Returns:
+            The event ID of the root event in the thread, if this event is part
+            of a thread. None, otherwise.
+        """
+        # Since event relations form a tree, we should only ever find 0 or 1
+        # results from the below query.
+        sql = """
+            WITH RECURSIVE related_events AS (
+                SELECT event_id, relates_to_id, relation_type
+                FROM event_relations
+                WHERE event_id = ?
+                UNION SELECT e.event_id, e.relates_to_id, e.relation_type
+                FROM event_relations e
+                INNER JOIN related_events r ON r.relates_to_id = e.event_id
+            ) SELECT relates_to_id FROM related_events WHERE relation_type = 'm.thread';
+        """
+
+        def _get_thread_id(txn: LoggingTransaction) -> Optional[str]:
+            txn.execute(sql, (event_id,))
+            # TODO Should we ensure there's only a single result here?
+            row = txn.fetchone()
+            if row:
+                return row[0]
+            return None
+
+        return await self.db_pool.runInteraction("get_thread_id", _get_thread_id)
+
 
 class RelationsStore(RelationsWorkerStore):
     pass
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 6fa0cafb75..886585e9f2 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -588,6 +588,106 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         _rotate()
         _assert_counts(0, 0, 0, 0)
 
+    def test_recursive_thread(self) -> None:
+        """
+        Events related to events in a thread should still be considered part of
+        that thread.
+        """
+
+        # Create a user to receive notifications and send receipts.
+        user_id = self.register_user("user1235", "pass")
+        token = self.login("user1235", "pass")
+
+        # And another users to send events.
+        other_id = self.register_user("other", "pass")
+        other_token = self.login("other", "pass")
+
+        # Create a room and put both users in it.
+        room_id = self.helper.create_room_as(user_id, tok=token)
+        self.helper.join(room_id, other_id, tok=other_token)
+
+        # Update the user's push rules to care about reaction events.
+        self.get_success(
+            self.store.add_push_rule(
+                user_id,
+                "related_events",
+                priority_class=5,
+                conditions=[
+                    {"kind": "event_match", "key": "type", "pattern": "m.reaction"}
+                ],
+                actions=["notify"],
+            )
+        )
+
+        def _create_event(type: str, content: JsonDict) -> str:
+            result = self.helper.send_event(
+                room_id, type=type, content=content, tok=other_token
+            )
+            return result["event_id"]
+
+        def _assert_counts(noitf_count: int, thread_notif_count: int) -> None:
+            counts = self.get_success(
+                self.store.db_pool.runInteraction(
+                    "get-unread-counts",
+                    self.store._get_unread_counts_by_receipt_txn,
+                    room_id,
+                    user_id,
+                )
+            )
+            self.assertEqual(
+                counts.main_timeline,
+                NotifCounts(
+                    notify_count=noitf_count, unread_count=0, highlight_count=0
+                ),
+            )
+            if thread_notif_count:
+                self.assertEqual(
+                    counts.threads,
+                    {
+                        thread_id: NotifCounts(
+                            notify_count=thread_notif_count,
+                            unread_count=0,
+                            highlight_count=0,
+                        ),
+                    },
+                )
+            else:
+                self.assertEqual(counts.threads, {})
+
+        # Create a root event.
+        thread_id = _create_event(
+            "m.room.message", {"msgtype": "m.text", "body": "msg"}
+        )
+        _assert_counts(1, 0)
+
+        # Reply, creating a thread.
+        reply_id = _create_event(
+            "m.room.message",
+            {
+                "msgtype": "m.text",
+                "body": "msg",
+                "m.relates_to": {
+                    "rel_type": "m.thread",
+                    "event_id": thread_id,
+                },
+            },
+        )
+        _assert_counts(1, 1)
+
+        # Create an event related to a thread event, this should still appear in
+        # the thread.
+        _create_event(
+            type="m.reaction",
+            content={
+                "m.relates_to": {
+                    "rel_type": "m.annotation",
+                    "event_id": reply_id,
+                    "key": "A",
+                }
+            },
+        )
+        _assert_counts(1, 2)
+
     def test_find_first_stream_ordering_after_ts(self) -> None:
         def add_event(so: int, ts: int) -> None:
             self.get_success(
-- 
cgit 1.5.1


From 0506bb100e0348ab6e6e213c6624677a83ef9303 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Tue, 4 Oct 2022 16:42:59 +0100
Subject: Remove get rooms for user with stream ordering (#13991)

By getting the joined rooms before the current token we avoid any reading
history to confirm a user *was* in a room. We can then use any membership
change events, which we already fetch during sync, to determine the final
list of joined room IDs.
---
 changelog.d/13991.misc   |   1 +
 synapse/handlers/sync.py | 149 ++++++++++++++++++++++-------------------------
 2 files changed, 70 insertions(+), 80 deletions(-)
 create mode 100644 changelog.d/13991.misc

(limited to 'synapse')

diff --git a/changelog.d/13991.misc b/changelog.d/13991.misc
new file mode 100644
index 0000000000..f425fb17b2
--- /dev/null
+++ b/changelog.d/13991.misc
@@ -0,0 +1 @@
+Optimise queries used to get a users rooms during sync. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 329e89c604..0f684857ca 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1317,6 +1317,19 @@ class SyncHandler:
         At the end, we transfer data from the `sync_result_builder` to a new `SyncResult`
         instance to signify that the sync calculation is complete.
         """
+
+        user_id = sync_config.user.to_string()
+        app_service = self.store.get_app_service_by_user_id(user_id)
+        if app_service:
+            # We no longer support AS users using /sync directly.
+            # See https://github.com/matrix-org/matrix-doc/issues/1144
+            raise NotImplementedError()
+
+        # Note: we get the users room list *before* we get the current token, this
+        # avoids checking back in history if rooms are joined after the token is fetched.
+        token_before_rooms = self.event_sources.get_current_token()
+        mutable_joined_room_ids = set(await self.store.get_rooms_for_user(user_id))
+
         # NB: The now_token gets changed by some of the generate_sync_* methods,
         # this is due to some of the underlying streams not supporting the ability
         # to query up to a given point.
@@ -1324,6 +1337,57 @@ class SyncHandler:
         now_token = self.event_sources.get_current_token()
         log_kv({"now_token": now_token})
 
+        # Since we fetched the users room list before the token, there's a small window
+        # during which membership events may have been persisted, so we fetch these now
+        # and modify the joined room list for any changes between the get_rooms_for_user
+        # call and the get_current_token call.
+        membership_change_events = []
+        if since_token:
+            membership_change_events = await self.store.get_membership_changes_for_user(
+                user_id, since_token.room_key, now_token.room_key, self.rooms_to_exclude
+            )
+
+            mem_last_change_by_room_id: Dict[str, EventBase] = {}
+            for event in membership_change_events:
+                mem_last_change_by_room_id[event.room_id] = event
+
+            # For the latest membership event in each room found, add/remove the room ID
+            # from the joined room list accordingly. In this case we only care if the
+            # latest change is JOIN.
+
+            for room_id, event in mem_last_change_by_room_id.items():
+                assert event.internal_metadata.stream_ordering
+                if (
+                    event.internal_metadata.stream_ordering
+                    < token_before_rooms.room_key.stream
+                ):
+                    continue
+
+                logger.info(
+                    "User membership change between getting rooms and current token: %s %s %s",
+                    user_id,
+                    event.membership,
+                    room_id,
+                )
+                # User joined a room - we have to then check the room state to ensure we
+                # respect any bans if there's a race between the join and ban events.
+                if event.membership == Membership.JOIN:
+                    user_ids_in_room = await self.store.get_users_in_room(room_id)
+                    if user_id in user_ids_in_room:
+                        mutable_joined_room_ids.add(room_id)
+                # The user left the room, or left and was re-invited but not joined yet
+                else:
+                    mutable_joined_room_ids.discard(room_id)
+
+        # Now we have our list of joined room IDs, exclude as configured and freeze
+        joined_room_ids = frozenset(
+            (
+                room_id
+                for room_id in mutable_joined_room_ids
+                if room_id not in self.rooms_to_exclude
+            )
+        )
+
         logger.debug(
             "Calculating sync response for %r between %s and %s",
             sync_config.user,
@@ -1331,22 +1395,13 @@ class SyncHandler:
             now_token,
         )
 
-        user_id = sync_config.user.to_string()
-        app_service = self.store.get_app_service_by_user_id(user_id)
-        if app_service:
-            # We no longer support AS users using /sync directly.
-            # See https://github.com/matrix-org/matrix-doc/issues/1144
-            raise NotImplementedError()
-        else:
-            joined_room_ids = await self.get_rooms_for_user_at(
-                user_id, now_token.room_key
-            )
         sync_result_builder = SyncResultBuilder(
             sync_config,
             full_state,
             since_token=since_token,
             now_token=now_token,
             joined_room_ids=joined_room_ids,
+            membership_change_events=membership_change_events,
         )
 
         logger.debug("Fetching account data")
@@ -1827,19 +1882,12 @@ class SyncHandler:
 
         Does not modify the `sync_result_builder`.
         """
-        user_id = sync_result_builder.sync_config.user.to_string()
         since_token = sync_result_builder.since_token
-        now_token = sync_result_builder.now_token
+        membership_change_events = sync_result_builder.membership_change_events
 
         assert since_token
 
-        # Get a list of membership change events that have happened to the user
-        # requesting the sync.
-        membership_changes = await self.store.get_membership_changes_for_user(
-            user_id, since_token.room_key, now_token.room_key
-        )
-
-        if membership_changes:
+        if membership_change_events:
             return True
 
         stream_id = since_token.room_key.stream
@@ -1878,16 +1926,10 @@ class SyncHandler:
         since_token = sync_result_builder.since_token
         now_token = sync_result_builder.now_token
         sync_config = sync_result_builder.sync_config
+        membership_change_events = sync_result_builder.membership_change_events
 
         assert since_token
 
-        # TODO: we've already called this function and ran this query in
-        #       _have_rooms_changed. We could keep the results in memory to avoid a
-        #       second query, at the cost of more complicated source code.
-        membership_change_events = await self.store.get_membership_changes_for_user(
-            user_id, since_token.room_key, now_token.room_key, self.rooms_to_exclude
-        )
-
         mem_change_events_by_room_id: Dict[str, List[EventBase]] = {}
         for event in membership_change_events:
             mem_change_events_by_room_id.setdefault(event.room_id, []).append(event)
@@ -2415,60 +2457,6 @@ class SyncHandler:
             else:
                 raise Exception("Unrecognized rtype: %r", room_builder.rtype)
 
-    async def get_rooms_for_user_at(
-        self,
-        user_id: str,
-        room_key: RoomStreamToken,
-    ) -> FrozenSet[str]:
-        """Get set of joined rooms for a user at the given stream ordering.
-
-        The stream ordering *must* be recent, otherwise this may throw an
-        exception if older than a month. (This function is called with the
-        current token, which should be perfectly fine).
-
-        Args:
-            user_id
-            stream_ordering
-
-        ReturnValue:
-            Set of room_ids the user is in at given stream_ordering.
-        """
-        joined_rooms = await self.store.get_rooms_for_user_with_stream_ordering(user_id)
-
-        joined_room_ids = set()
-
-        # We need to check that the stream ordering of the join for each room
-        # is before the stream_ordering asked for. This might not be the case
-        # if the user joins a room between us getting the current token and
-        # calling `get_rooms_for_user_with_stream_ordering`.
-        # If the membership's stream ordering is after the given stream
-        # ordering, we need to go and work out if the user was in the room
-        # before.
-        # We also need to check whether the room should be excluded from sync
-        # responses as per the homeserver config.
-        for joined_room in joined_rooms:
-            if joined_room.room_id in self.rooms_to_exclude:
-                continue
-
-            if not joined_room.event_pos.persisted_after(room_key):
-                joined_room_ids.add(joined_room.room_id)
-                continue
-
-            logger.info("User joined room after current token: %s", joined_room.room_id)
-
-            extrems = (
-                await self.store.get_forward_extremities_for_room_at_stream_ordering(
-                    joined_room.room_id, joined_room.event_pos.stream
-                )
-            )
-            user_ids_in_room = await self.state.get_current_user_ids_in_room(
-                joined_room.room_id, extrems
-            )
-            if user_id in user_ids_in_room:
-                joined_room_ids.add(joined_room.room_id)
-
-        return frozenset(joined_room_ids)
-
 
 def _action_has_highlight(actions: List[JsonDict]) -> bool:
     for action in actions:
@@ -2565,6 +2553,7 @@ class SyncResultBuilder:
     since_token: Optional[StreamToken]
     now_token: StreamToken
     joined_room_ids: FrozenSet[str]
+    membership_change_events: List[EventBase]
 
     presence: List[UserPresenceState] = attr.Factory(list)
     account_data: List[JsonDict] = attr.Factory(list)
-- 
cgit 1.5.1


From dcced5a8d76b94e372aefa7d1f05ec0dbc22ea0d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 4 Oct 2022 12:07:02 -0400
Subject: Use threaded receipts when fetching events for push. (#13878)

Update the HTTP and email pushers to consider threaded read receipts
when fetching unread events.
---
 changelog.d/13878.feature                          |  1 +
 .../storage/databases/main/event_push_actions.py   | 80 +++++++++++++++-------
 tests/storage/test_event_push_actions.py           | 57 ++++++++++-----
 3 files changed, 97 insertions(+), 41 deletions(-)
 create mode 100644 changelog.d/13878.feature

(limited to 'synapse')

diff --git a/changelog.d/13878.feature b/changelog.d/13878.feature
new file mode 100644
index 0000000000..d0cb902dff
--- /dev/null
+++ b/changelog.d/13878.feature
@@ -0,0 +1 @@
+Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 7469cd336c..332e13d1c9 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -119,6 +119,32 @@ DEFAULT_HIGHLIGHT_ACTION: List[Union[dict, str]] = [
 ]
 
 
+@attr.s(slots=True, auto_attribs=True)
+class _RoomReceipt:
+    """
+    HttpPushAction instances include the information used to generate HTTP
+    requests to a push gateway.
+    """
+
+    unthreaded_stream_ordering: int = 0
+    # threaded_stream_ordering includes the main pseudo-thread.
+    threaded_stream_ordering: Dict[str, int] = attr.Factory(dict)
+
+    def is_unread(self, thread_id: str, stream_ordering: int) -> bool:
+        """Returns True if the stream ordering is unread according to the receipt information."""
+
+        # Only include push actions with a stream ordering after both the unthreaded
+        # and threaded receipt. Properly handles a user without any receipts present.
+        return (
+            self.unthreaded_stream_ordering < stream_ordering
+            and self.threaded_stream_ordering.get(thread_id, 0) < stream_ordering
+        )
+
+
+# A _RoomReceipt with no receipts in it.
+MISSING_ROOM_RECEIPT = _RoomReceipt()
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class HttpPushAction:
     """
@@ -716,7 +742,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
     def _get_receipts_by_room_txn(
         self, txn: LoggingTransaction, user_id: str
-    ) -> Dict[str, int]:
+    ) -> Dict[str, _RoomReceipt]:
         """
         Generate a map of room ID to the latest stream ordering that has been
         read by the given user.
@@ -726,7 +752,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             user_id: The user to fetch receipts for.
 
         Returns:
-            A map of room ID to stream ordering for all rooms the user has a receipt in.
+            A map including all rooms the user is in with a receipt. It maps
+            room IDs to _RoomReceipt instances
         """
         receipt_types_clause, args = make_in_list_sql_clause(
             self.database_engine,
@@ -735,20 +762,26 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         )
 
         sql = f"""
-            SELECT room_id, MAX(stream_ordering)
+            SELECT room_id, thread_id, MAX(stream_ordering)
             FROM receipts_linearized
             INNER JOIN events USING (room_id, event_id)
             WHERE {receipt_types_clause}
             AND user_id = ?
-            GROUP BY room_id
+            GROUP BY room_id, thread_id
         """
 
         args.extend((user_id,))
         txn.execute(sql, args)
-        return {
-            room_id: latest_stream_ordering
-            for room_id, latest_stream_ordering in txn.fetchall()
-        }
+
+        result: Dict[str, _RoomReceipt] = {}
+        for room_id, thread_id, stream_ordering in txn:
+            room_receipt = result.setdefault(room_id, _RoomReceipt())
+            if thread_id is None:
+                room_receipt.unthreaded_stream_ordering = stream_ordering
+            else:
+                room_receipt.threaded_stream_ordering[thread_id] = stream_ordering
+
+        return result
 
     async def get_unread_push_actions_for_user_in_range_for_http(
         self,
@@ -781,9 +814,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         def get_push_actions_txn(
             txn: LoggingTransaction,
-        ) -> List[Tuple[str, str, int, str, bool]]:
+        ) -> List[Tuple[str, str, str, int, str, bool]]:
             sql = """
-                SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, ep.highlight
+                SELECT ep.event_id, ep.room_id, ep.thread_id, ep.stream_ordering,
+                    ep.actions, ep.highlight
                 FROM event_push_actions AS ep
                 WHERE
                     ep.user_id = ?
@@ -793,7 +827,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 ORDER BY ep.stream_ordering ASC LIMIT ?
             """
             txn.execute(sql, (user_id, min_stream_ordering, max_stream_ordering, limit))
-            return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall())
+            return cast(List[Tuple[str, str, str, int, str, bool]], txn.fetchall())
 
         push_actions = await self.db_pool.runInteraction(
             "get_unread_push_actions_for_user_in_range_http", get_push_actions_txn
@@ -806,10 +840,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 stream_ordering=stream_ordering,
                 actions=_deserialize_action(actions, highlight),
             )
-            for event_id, room_id, stream_ordering, actions, highlight in push_actions
-            # Only include push actions with a stream ordering after any receipt, or without any
-            # receipt present (invited to but never read rooms).
-            if stream_ordering > receipts_by_room.get(room_id, 0)
+            for event_id, room_id, thread_id, stream_ordering, actions, highlight in push_actions
+            if receipts_by_room.get(room_id, MISSING_ROOM_RECEIPT).is_unread(
+                thread_id, stream_ordering
+            )
         ]
 
         # Now sort it so it's ordered correctly, since currently it will
@@ -853,10 +887,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         def get_push_actions_txn(
             txn: LoggingTransaction,
-        ) -> List[Tuple[str, str, int, str, bool, int]]:
+        ) -> List[Tuple[str, str, str, int, str, bool, int]]:
             sql = """
-                SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,
-                    ep.highlight, e.received_ts
+                SELECT ep.event_id, ep.room_id, ep.thread_id, ep.stream_ordering,
+                    ep.actions, ep.highlight, e.received_ts
                 FROM event_push_actions AS ep
                 INNER JOIN events AS e USING (room_id, event_id)
                 WHERE
@@ -867,7 +901,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 ORDER BY ep.stream_ordering DESC LIMIT ?
             """
             txn.execute(sql, (user_id, min_stream_ordering, max_stream_ordering, limit))
-            return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall())
+            return cast(List[Tuple[str, str, str, int, str, bool, int]], txn.fetchall())
 
         push_actions = await self.db_pool.runInteraction(
             "get_unread_push_actions_for_user_in_range_email", get_push_actions_txn
@@ -882,10 +916,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 actions=_deserialize_action(actions, highlight),
                 received_ts=received_ts,
             )
-            for event_id, room_id, stream_ordering, actions, highlight, received_ts in push_actions
-            # Only include push actions with a stream ordering after any receipt, or without any
-            # receipt present (invited to but never read rooms).
-            if stream_ordering > receipts_by_room.get(room_id, 0)
+            for event_id, room_id, thread_id, stream_ordering, actions, highlight, received_ts in push_actions
+            if receipts_by_room.get(room_id, MISSING_ROOM_RECEIPT).is_unread(
+                thread_id, stream_ordering
+            )
         ]
 
         # Now sort it so it's ordered correctly, since currently it will
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 886585e9f2..ee48920f84 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -16,7 +16,7 @@ from typing import Optional, Tuple
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.constants import MAIN_TIMELINE
+from synapse.api.constants import MAIN_TIMELINE, RelationTypes
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
@@ -66,16 +66,23 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         user_id, token, _, other_token, room_id = self._create_users_and_room()
 
         # Create two events, one of which is a highlight.
-        self.helper.send_event(
+        first_event_id = self.helper.send_event(
             room_id,
             type="m.room.message",
             content={"msgtype": "m.text", "body": "msg"},
             tok=other_token,
-        )
-        event_id = self.helper.send_event(
+        )["event_id"]
+        second_event_id = self.helper.send_event(
             room_id,
             type="m.room.message",
-            content={"msgtype": "m.text", "body": user_id},
+            content={
+                "msgtype": "m.text",
+                "body": user_id,
+                "m.relates_to": {
+                    "rel_type": RelationTypes.THREAD,
+                    "event_id": first_event_id,
+                },
+            },
             tok=other_token,
         )["event_id"]
 
@@ -95,13 +102,13 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         )
         self.assertEqual(2, len(email_actions))
 
-        # Send a receipt, which should clear any actions.
+        # Send a receipt, which should clear the first action.
         self.get_success(
             self.store.insert_receipt(
                 room_id,
                 "m.read",
                 user_id=user_id,
-                event_ids=[event_id],
+                event_ids=[first_event_id],
                 thread_id=None,
                 data={},
             )
@@ -111,6 +118,30 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
                 user_id, 0, 1000, 20
             )
         )
+        self.assertEqual(1, len(http_actions))
+        email_actions = self.get_success(
+            self.store.get_unread_push_actions_for_user_in_range_for_email(
+                user_id, 0, 1000, 20
+            )
+        )
+        self.assertEqual(1, len(email_actions))
+
+        # Send a thread receipt to clear the thread action.
+        self.get_success(
+            self.store.insert_receipt(
+                room_id,
+                "m.read",
+                user_id=user_id,
+                event_ids=[second_event_id],
+                thread_id=first_event_id,
+                data={},
+            )
+        )
+        http_actions = self.get_success(
+            self.store.get_unread_push_actions_for_user_in_range_for_http(
+                user_id, 0, 1000, 20
+            )
+        )
         self.assertEqual([], http_actions)
         email_actions = self.get_success(
             self.store.get_unread_push_actions_for_user_in_range_for_email(
@@ -417,17 +448,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         sends both unthreaded and threaded receipts.
         """
 
-        # Create a user to receive notifications and send receipts.
-        user_id = self.register_user("user1235", "pass")
-        token = self.login("user1235", "pass")
-
-        # And another users to send events.
-        other_id = self.register_user("other", "pass")
-        other_token = self.login("other", "pass")
-
-        # Create a room and put both users in it.
-        room_id = self.helper.create_room_as(user_id, tok=token)
-        self.helper.join(room_id, other_id, tok=other_token)
+        user_id, token, _, other_token, room_id = self._create_users_and_room()
         thread_id: str
 
         last_event_id: str
-- 
cgit 1.5.1


From e3d475545467fe587d906d755d8471acbad11266 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 5 Oct 2022 07:56:05 -0400
Subject: Fix backwards compatibility with upcoming threads schema changes.
 (#14045)

Ensure that the upsert will work properly by first updating any existing
rows (in the same way that the background update to backfill data works).
---
 changelog.d/14045.misc                             |  1 +
 .../storage/databases/main/event_push_actions.py   | 34 +++++++++++++++-------
 2 files changed, 24 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/14045.misc

(limited to 'synapse')

diff --git a/changelog.d/14045.misc b/changelog.d/14045.misc
new file mode 100644
index 0000000000..0b0dd8f47a
--- /dev/null
+++ b/changelog.d/14045.misc
@@ -0,0 +1 @@
+Ensure Synapse v1.69 works with upcoming database changes in v1.70.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index cdc9ee5a37..c9724d7345 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1103,19 +1103,26 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering
             )
 
+            # First ensure that the existing rows have an updated thread_id field.
+            self.db_pool.simple_update_txn(
+                txn,
+                table="event_push_summary",
+                keyvalues={"room_id": room_id, "user_id": user_id, "thread_id": None},
+                updatevalues={"thread_id": "main"},
+            )
+
             # Replace the previous summary with the new counts.
             #
             # TODO(threads): Upsert per-thread instead of setting them all to main.
             self.db_pool.simple_upsert_txn(
                 txn,
                 table="event_push_summary",
-                keyvalues={"room_id": room_id, "user_id": user_id},
+                keyvalues={"room_id": room_id, "user_id": user_id, "thread_id": "main"},
                 values={
                     "notif_count": notif_count,
                     "unread_count": unread_count,
                     "stream_ordering": old_rotate_stream_ordering,
                     "last_receipt_stream_ordering": stream_ordering,
-                    "thread_id": "main",
                 },
             )
 
@@ -1264,20 +1271,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
+        # Ensure that any updated threads have an updated thread_id.
+        self.db_pool.simple_update_many_txn(
+            txn,
+            table="event_push_summary",
+            key_names=("user_id", "room_id", "thread_id"),
+            key_values=[(user_id, room_id, None) for user_id, room_id in summaries],
+            value_names=("thread_id",),
+            value_values=[("main",) for _ in summaries],
+        )
+
         # TODO(threads): Update on a per-thread basis.
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
-            key_names=("user_id", "room_id"),
-            key_values=[(user_id, room_id) for user_id, room_id in summaries],
-            value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"),
+            key_names=("user_id", "room_id", "thread_id"),
+            key_values=[(user_id, room_id, "main") for user_id, room_id in summaries],
+            value_names=("notif_count", "unread_count", "stream_ordering"),
             value_values=[
-                (
-                    summary.notif_count,
-                    summary.unread_count,
-                    summary.stream_ordering,
-                    "main",
-                )
+                (summary.notif_count, summary.unread_count, summary.stream_ordering)
                 for summary in summaries.values()
             ],
         )
-- 
cgit 1.5.1


From 0b037d6c918cb04f86b1fccae9610552de9386d7 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 5 Oct 2022 08:49:52 -0400
Subject: Fix handling of public rooms filter with a network tuple. (#14053)

Fixes two related bugs:

* The handling of `[null]` for a `room_types` filter was incorrect.
* The ordering of arguments when providing both a network tuple
  and room type field was incorrect.
---
 changelog.d/14053.bugfix               |  1 +
 synapse/storage/databases/main/room.py | 43 ++++++++++++++++++++--------------
 tests/rest/client/test_rooms.py        | 41 ++++++++++++++++++++++++--------
 3 files changed, 58 insertions(+), 27 deletions(-)
 create mode 100644 changelog.d/14053.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14053.bugfix b/changelog.d/14053.bugfix
new file mode 100644
index 0000000000..07769f51d0
--- /dev/null
+++ b/changelog.d/14053.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.53.0 when querying `/publicRooms` with both a `room_type` filter and a `third_party_instance_id`.
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 7412bce255..e41c99027a 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -207,21 +207,30 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
     def _construct_room_type_where_clause(
         self, room_types: Union[List[Union[str, None]], None]
-    ) -> Tuple[Union[str, None], List[str]]:
+    ) -> Tuple[Union[str, None], list]:
         if not room_types:
             return None, []
-        else:
-            # We use None when we want get rooms without a type
-            is_null_clause = ""
-            if None in room_types:
-                is_null_clause = "OR room_type IS NULL"
-                room_types = [value for value in room_types if value is not None]
 
+        # Since None is used to represent a room without a type, care needs to
+        # be taken into account when constructing the where clause.
+        clauses = []
+        args: list = []
+
+        room_types_set = set(room_types)
+
+        # We use None to represent a room without a type.
+        if None in room_types_set:
+            clauses.append("room_type IS NULL")
+            room_types_set.remove(None)
+
+        # If there are other room types, generate the proper clause.
+        if room_types:
             list_clause, args = make_in_list_sql_clause(
-                self.database_engine, "room_type", room_types
+                self.database_engine, "room_type", room_types_set
             )
+            clauses.append(list_clause)
 
-            return f"({list_clause} {is_null_clause})", args
+        return f"({' OR '.join(clauses)})", args
 
     async def count_public_rooms(
         self,
@@ -241,14 +250,6 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         def _count_public_rooms_txn(txn: LoggingTransaction) -> int:
             query_args = []
 
-            room_type_clause, args = self._construct_room_type_where_clause(
-                search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None)
-                if search_filter
-                else None
-            )
-            room_type_clause = f" AND {room_type_clause}" if room_type_clause else ""
-            query_args += args
-
             if network_tuple:
                 if network_tuple.appservice_id:
                     published_sql = """
@@ -268,6 +269,14 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                     UNION SELECT room_id from appservice_room_list
             """
 
+            room_type_clause, args = self._construct_room_type_where_clause(
+                search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None)
+                if search_filter
+                else None
+            )
+            room_type_clause = f" AND {room_type_clause}" if room_type_clause else ""
+            query_args += args
+
             sql = f"""
                 SELECT
                     COUNT(*)
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 5e66b5b26c..3612ebe7b9 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -2213,14 +2213,17 @@ class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase):
         )
 
     def make_public_rooms_request(
-        self, room_types: Union[List[Union[str, None]], None]
+        self,
+        room_types: Optional[List[Union[str, None]]],
+        instance_id: Optional[str] = None,
     ) -> Tuple[List[Dict[str, Any]], int]:
-        channel = self.make_request(
-            "POST",
-            self.url,
-            {"filter": {PublicRoomsFilterFields.ROOM_TYPES: room_types}},
-            self.token,
-        )
+        body: JsonDict = {"filter": {PublicRoomsFilterFields.ROOM_TYPES: room_types}}
+        if instance_id:
+            body["third_party_instance_id"] = "test|test"
+
+        channel = self.make_request("POST", self.url, body, self.token)
+        self.assertEqual(channel.code, 200)
+
         chunk = channel.json_body["chunk"]
         count = channel.json_body["total_room_count_estimate"]
 
@@ -2230,31 +2233,49 @@ class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase):
 
     def test_returns_both_rooms_and_spaces_if_no_filter(self) -> None:
         chunk, count = self.make_public_rooms_request(None)
-
         self.assertEqual(count, 2)
 
+        # Also check if there's no filter property at all in the body.
+        channel = self.make_request("POST", self.url, {}, self.token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(len(channel.json_body["chunk"]), 2)
+        self.assertEqual(channel.json_body["total_room_count_estimate"], 2)
+
+        chunk, count = self.make_public_rooms_request(None, "test|test")
+        self.assertEqual(count, 0)
+
     def test_returns_only_rooms_based_on_filter(self) -> None:
         chunk, count = self.make_public_rooms_request([None])
 
         self.assertEqual(count, 1)
         self.assertEqual(chunk[0].get("room_type", None), None)
 
+        chunk, count = self.make_public_rooms_request([None], "test|test")
+        self.assertEqual(count, 0)
+
     def test_returns_only_space_based_on_filter(self) -> None:
         chunk, count = self.make_public_rooms_request(["m.space"])
 
         self.assertEqual(count, 1)
         self.assertEqual(chunk[0].get("room_type", None), "m.space")
 
+        chunk, count = self.make_public_rooms_request(["m.space"], "test|test")
+        self.assertEqual(count, 0)
+
     def test_returns_both_rooms_and_space_based_on_filter(self) -> None:
         chunk, count = self.make_public_rooms_request(["m.space", None])
-
         self.assertEqual(count, 2)
 
+        chunk, count = self.make_public_rooms_request(["m.space", None], "test|test")
+        self.assertEqual(count, 0)
+
     def test_returns_both_rooms_and_spaces_if_array_is_empty(self) -> None:
         chunk, count = self.make_public_rooms_request([])
-
         self.assertEqual(count, 2)
 
+        chunk, count = self.make_public_rooms_request([], "test|test")
+        self.assertEqual(count, 0)
+
 
 class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase):
     """Test that we correctly fallback to local filtering if a remote server
-- 
cgit 1.5.1


From 7b7478e8b65cceb9e7362c6c1cb932b569a6f383 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 5 Oct 2022 10:12:48 -0700
Subject: Batch up notifications after event persistence  (#14033)

---
 changelog.d/14033.misc               |  1 +
 synapse/handlers/federation_event.py |  4 +-
 synapse/handlers/message.py          | 25 ++++++------
 synapse/notifier.py                  | 75 ++++++++++++++++++++----------------
 synapse/replication/tcp/client.py    | 19 ++++-----
 5 files changed, 66 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/14033.misc

(limited to 'synapse')

diff --git a/changelog.d/14033.misc b/changelog.d/14033.misc
new file mode 100644
index 0000000000..fe42852aa5
--- /dev/null
+++ b/changelog.d/14033.misc
@@ -0,0 +1 @@
+Don't repeatedly wake up the same users for batched events.
\ No newline at end of file
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 778d8869b3..da319943cc 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -2240,8 +2240,8 @@ class FederationEventHandler:
         event_pos = PersistedEventPosition(
             self._instance_name, event.internal_metadata.stream_ordering
         )
-        await self._notifier.on_new_room_event(
-            event, event_pos, max_stream_token, extra_users=extra_users
+        await self._notifier.on_new_room_events(
+            [(event, event_pos)], max_stream_token, extra_users=extra_users
         )
 
         if event.type == EventTypes.Member and event.membership == Membership.JOIN:
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 00e7645ba5..da1acea275 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1872,6 +1872,7 @@ class EventCreationHandler:
             events_and_context, backfilled=backfilled
         )
 
+        events_and_pos = []
         for event in persisted_events:
             if self._ephemeral_events_enabled:
                 # If there's an expiry timestamp on the event, schedule its expiry.
@@ -1880,25 +1881,23 @@ class EventCreationHandler:
             stream_ordering = event.internal_metadata.stream_ordering
             assert stream_ordering is not None
             pos = PersistedEventPosition(self._instance_name, stream_ordering)
-
-            async def _notify() -> None:
-                try:
-                    await self.notifier.on_new_room_event(
-                        event, pos, max_stream_token, extra_users=extra_users
-                    )
-                except Exception:
-                    logger.exception(
-                        "Error notifying about new room event %s",
-                        event.event_id,
-                    )
-
-            run_in_background(_notify)
+            events_and_pos.append((event, pos))
 
             if event.type == EventTypes.Message:
                 # We don't want to block sending messages on any presence code. This
                 # matters as sometimes presence code can take a while.
                 run_in_background(self._bump_active_time, requester.user)
 
+        async def _notify() -> None:
+            try:
+                await self.notifier.on_new_room_events(
+                    events_and_pos, max_stream_token, extra_users=extra_users
+                )
+            except Exception:
+                logger.exception("Error notifying about new room events")
+
+        run_in_background(_notify)
+
         return persisted_events[-1]
 
     async def _maybe_kick_guest_users(
diff --git a/synapse/notifier.py b/synapse/notifier.py
index c42bb8266a..26b97cf766 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -294,35 +294,31 @@ class Notifier:
         """
         self._new_join_in_room_callbacks.append(cb)
 
-    async def on_new_room_event(
+    async def on_new_room_events(
         self,
-        event: EventBase,
-        event_pos: PersistedEventPosition,
+        events_and_pos: List[Tuple[EventBase, PersistedEventPosition]],
         max_room_stream_token: RoomStreamToken,
         extra_users: Optional[Collection[UserID]] = None,
     ) -> None:
-        """Unwraps event and calls `on_new_room_event_args`."""
-        await self.on_new_room_event_args(
-            event_pos=event_pos,
-            room_id=event.room_id,
-            event_id=event.event_id,
-            event_type=event.type,
-            state_key=event.get("state_key"),
-            membership=event.content.get("membership"),
-            max_room_stream_token=max_room_stream_token,
-            extra_users=extra_users or [],
-        )
+        """Creates a _PendingRoomEventEntry for each of the listed events and calls
+        notify_new_room_events with the results."""
+        event_entries = []
+        for event, pos in events_and_pos:
+            entry = self.create_pending_room_event_entry(
+                pos,
+                extra_users,
+                event.room_id,
+                event.type,
+                event.get("state_key"),
+                event.content.get("membership"),
+            )
+            event_entries.append((entry, event.event_id))
+        await self.notify_new_room_events(event_entries, max_room_stream_token)
 
-    async def on_new_room_event_args(
+    async def notify_new_room_events(
         self,
-        room_id: str,
-        event_id: str,
-        event_type: str,
-        state_key: Optional[str],
-        membership: Optional[str],
-        event_pos: PersistedEventPosition,
+        event_entries: List[Tuple[_PendingRoomEventEntry, str]],
         max_room_stream_token: RoomStreamToken,
-        extra_users: Optional[Collection[UserID]] = None,
     ) -> None:
         """Used by handlers to inform the notifier something has happened
         in the room, room event wise.
@@ -338,22 +334,33 @@ class Notifier:
         until all previous events have been persisted before notifying
         the client streams.
         """
-        self.pending_new_room_events.append(
-            _PendingRoomEventEntry(
-                event_pos=event_pos,
-                extra_users=extra_users or [],
-                room_id=room_id,
-                type=event_type,
-                state_key=state_key,
-                membership=membership,
-            )
-        )
-        self._notify_pending_new_room_events(max_room_stream_token)
+        for event_entry, event_id in event_entries:
+            self.pending_new_room_events.append(event_entry)
+            await self._third_party_rules.on_new_event(event_id)
 
-        await self._third_party_rules.on_new_event(event_id)
+        self._notify_pending_new_room_events(max_room_stream_token)
 
         self.notify_replication()
 
+    def create_pending_room_event_entry(
+        self,
+        event_pos: PersistedEventPosition,
+        extra_users: Optional[Collection[UserID]],
+        room_id: str,
+        event_type: str,
+        state_key: Optional[str],
+        membership: Optional[str],
+    ) -> _PendingRoomEventEntry:
+        """Creates and returns a _PendingRoomEventEntry"""
+        return _PendingRoomEventEntry(
+            event_pos=event_pos,
+            extra_users=extra_users or [],
+            room_id=room_id,
+            type=event_type,
+            state_key=state_key,
+            membership=membership,
+        )
+
     def _notify_pending_new_room_events(
         self, max_room_stream_token: RoomStreamToken
     ) -> None:
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index b2522f98ca..18252a2958 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -210,15 +210,16 @@ class ReplicationDataHandler:
 
                 max_token = self.store.get_room_max_token()
                 event_pos = PersistedEventPosition(instance_name, token)
-                await self.notifier.on_new_room_event_args(
-                    event_pos=event_pos,
-                    max_room_stream_token=max_token,
-                    extra_users=extra_users,
-                    room_id=row.data.room_id,
-                    event_id=row.data.event_id,
-                    event_type=row.data.type,
-                    state_key=row.data.state_key,
-                    membership=row.data.membership,
+                event_entry = self.notifier.create_pending_room_event_entry(
+                    event_pos,
+                    extra_users,
+                    row.data.room_id,
+                    row.data.type,
+                    row.data.state_key,
+                    row.data.membership,
+                )
+                await self.notifier.notify_new_room_events(
+                    [(event_entry, row.data.event_id)], max_token
                 )
 
                 # If this event is a join, make a note of it so we have an accurate
-- 
cgit 1.5.1


From 79c592cec68d66278e3233e2c9472f975942cfec Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Thu, 6 Oct 2022 12:22:36 +0200
Subject: Deprecate the `generate_short_term_login_token` method in favor of an
 async `create_login_token` method in the Module API. (#13842)

Signed-off-by: Quentin Gliech <quenting@element.io>
Co-authored-by: Brendan Abolivier <babolivier@matrix.org>
---
 changelog.d/13842.removal      |  1 +
 docs/upgrade.md                | 33 +++++++++++++++++++++++++++++++++
 synapse/module_api/__init__.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 changelog.d/13842.removal

(limited to 'synapse')

diff --git a/changelog.d/13842.removal b/changelog.d/13842.removal
new file mode 100644
index 0000000000..cbcff38e91
--- /dev/null
+++ b/changelog.d/13842.removal
@@ -0,0 +1 @@
+Deprecate the `generate_short_term_login_token` method in favor of an async `create_login_token` method in the Module API.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 002ef70059..b81385b191 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -128,6 +128,39 @@ you may specify `enable_legacy_metrics: false` in your homeserver configuration.
 A list of affected metrics is available on the [Metrics How-to page](https://matrix-org.github.io/synapse/v1.69/metrics-howto.html?highlight=metrics%20deprecated#renaming-of-metrics--deprecation-of-old-names-in-12).
 
 
+## Deprecation of the `generate_short_term_login_token` module API method
+
+The following method of the module API has been deprecated, and is scheduled to
+be remove in v1.71.0:
+
+```python
+def generate_short_term_login_token(
+    self,
+    user_id: str,
+    duration_in_ms: int = (2 * 60 * 1000),
+    auth_provider_id: str = "",
+    auth_provider_session_id: Optional[str] = None,
+) -> str:
+    ...
+```
+
+It has been replaced by an asynchronous equivalent:
+
+```python
+async def create_login_token(
+    self,
+    user_id: str,
+    duration_in_ms: int = (2 * 60 * 1000),
+    auth_provider_id: Optional[str] = None,
+    auth_provider_session_id: Optional[str] = None,
+) -> str:
+    ...
+```
+
+Synapse will log a warning when a module uses the deprecated method, to help
+administrators find modules using it.
+
+
 # Upgrading to v1.68.0
 
 Two changes announced in the upgrade notes for v1.67.0 have now landed in v1.68.0.
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index b7b2d3b8c5..6a6ae208d1 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -748,6 +748,40 @@ class ModuleApi:
             )
         )
 
+    async def create_login_token(
+        self,
+        user_id: str,
+        duration_in_ms: int = (2 * 60 * 1000),
+        auth_provider_id: Optional[str] = None,
+        auth_provider_session_id: Optional[str] = None,
+    ) -> str:
+        """Create a login token suitable for m.login.token authentication
+
+        Added in Synapse v1.69.0.
+
+        Args:
+            user_id: gives the ID of the user that the token is for
+
+            duration_in_ms: the time that the token will be valid for
+
+            auth_provider_id: the ID of the SSO IdP that the user used to authenticate
+                to get this token, if any. This is encoded in the token so that
+                /login can report stats on number of successful logins by IdP.
+
+            auth_provider_session_id: The session ID got during login from the SSO IdP,
+                if any.
+        """
+        # The deprecated `generate_short_term_login_token` method defaulted to an empty
+        # string for the `auth_provider_id` because of how the underlying macaroon was
+        # generated. This will change to a proper NULL-able field when the tokens get
+        # moved to the database.
+        return self._hs.get_macaroon_generator().generate_short_term_login_token(
+            user_id,
+            auth_provider_id or "",
+            auth_provider_session_id,
+            duration_in_ms,
+        )
+
     def generate_short_term_login_token(
         self,
         user_id: str,
@@ -759,6 +793,9 @@ class ModuleApi:
 
         Added in Synapse v1.9.0.
 
+        This was deprecated in Synapse v1.69.0 in favor of create_login_token, and will
+        be removed in Synapse 1.71.0.
+
         Args:
             user_id: gives the ID of the user that the token is for
 
@@ -768,6 +805,11 @@ class ModuleApi:
                to get this token, if any. This is encoded in the token so that
                /login can report stats on number of successful logins by IdP.
         """
+        logger.warn(
+            "A module configured on this server uses ModuleApi.generate_short_term_login_token(), "
+            "which is deprecated in favor of ModuleApi.create_login_token(), and will be removed in "
+            "Synapse 1.71.0",
+        )
         return self._hs.get_macaroon_generator().generate_short_term_login_token(
             user_id,
             auth_provider_id,
-- 
cgit 1.5.1


From e9a0419c8d28b8e153088073d6b76df6d7ed4ddf Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 6 Oct 2022 14:00:03 +0100
Subject: Fix sending events into rooms with non-integer power levels (#14073)

---
 changelog.d/14073.misc                      |  1 +
 mypy.ini                                    |  3 ++
 synapse/push/bulk_push_rule_evaluator.py    |  9 +++-
 tests/push/test_bulk_push_rule_evaluator.py | 74 +++++++++++++++++++++++++++++
 4 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14073.misc
 create mode 100644 tests/push/test_bulk_push_rule_evaluator.py

(limited to 'synapse')

diff --git a/changelog.d/14073.misc b/changelog.d/14073.misc
new file mode 100644
index 0000000000..7775500194
--- /dev/null
+++ b/changelog.d/14073.misc
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.68.0 where messages could not be sent in rooms with non-integer `notifications` power level.
diff --git a/mypy.ini b/mypy.ini
index 64f9097206..34b4523e00 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -106,6 +106,9 @@ disallow_untyped_defs = False
 [mypy-tests.handlers.test_user_directory]
 disallow_untyped_defs = True
 
+[mypy-tests.push.test_bulk_push_rule_evaluator]
+disallow_untyped_defs = True
+
 [mypy-tests.test_server]
 disallow_untyped_defs = True
 
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 4270438918..998354648f 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -289,11 +289,18 @@ class BulkPushRuleEvaluator:
             if relation.rel_type == RelationTypes.THREAD:
                 thread_id = relation.parent_id
 
+        # It's possible that old room versions have non-integer power levels (floats or
+        # strings). Workaround this by explicitly converting to int.
+        notification_levels = power_levels.get("notifications", {})
+        if not event.room_version.msc3667_int_only_power_levels:
+            for user_id, level in notification_levels.items():
+                notification_levels[user_id] = int(level)
+
         evaluator = PushRuleEvaluator(
             _flatten_dict(event),
             room_member_count,
             sender_power_level,
-            power_levels.get("notifications", {}),
+            notification_levels,
             relations,
             self._relations_match_enabled,
         )
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
new file mode 100644
index 0000000000..675d7df2ac
--- /dev/null
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -0,0 +1,74 @@
+from unittest.mock import patch
+
+from synapse.api.room_versions import RoomVersions
+from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
+from synapse.rest import admin
+from synapse.rest.client import login, register, room
+from synapse.types import create_requester
+
+from tests import unittest
+
+
+class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase):
+
+    servlets = [
+        admin.register_servlets_for_client_rest_resource,
+        room.register_servlets,
+        login.register_servlets,
+        register.register_servlets,
+    ]
+
+    def test_action_for_event_by_user_handles_noninteger_power_levels(self) -> None:
+        """We should convert floats and strings to integers before passing to Rust.
+
+        Reproduces #14060.
+
+        A lack of validation: the gift that keeps on giving.
+        """
+        # Create a new user and room.
+        alice = self.register_user("alice", "pass")
+        token = self.login(alice, "pass")
+
+        room_id = self.helper.create_room_as(
+            alice, room_version=RoomVersions.V9.identifier, tok=token
+        )
+
+        # Alter the power levels in that room to include stringy and floaty levels.
+        # We need to suppress the validation logic or else it will reject these dodgy
+        # values. (Presumably this validation was not always present.)
+        event_creation_handler = self.hs.get_event_creation_handler()
+        requester = create_requester(alice)
+        with patch("synapse.events.validator.validate_canonicaljson"), patch(
+            "synapse.events.validator.jsonschema.validate"
+        ):
+            self.helper.send_state(
+                room_id,
+                "m.room.power_levels",
+                {
+                    "users": {alice: "100"},  # stringy
+                    "notifications": {"room": 100.0},  # float
+                },
+                token,
+                state_key="",
+            )
+
+        # Create a new message event, and try to evaluate it under the dodgy
+        # power level event.
+        event, context = self.get_success(
+            event_creation_handler.create_event(
+                requester,
+                {
+                    "type": "m.room.message",
+                    "room_id": room_id,
+                    "content": {
+                        "msgtype": "m.text",
+                        "body": "helo",
+                    },
+                    "sender": alice,
+                },
+            )
+        )
+
+        bulk_evaluator = BulkPushRuleEvaluator(self.hs)
+        # should not raise
+        self.get_success(bulk_evaluator.action_for_event_by_user(event, context))
-- 
cgit 1.5.1


From cb20b885cb4bd1648581dd043a184d86fc8c7a00 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 6 Oct 2022 19:17:50 +0100
Subject: Always close _all_ `ijson` coroutines, even if doing so raises
 Exceptions (#14065)

---
 changelog.d/14065.misc                    |  1 +
 synapse/federation/transport/client.py    | 29 ++++++++++++++++++++----
 synapse/util/__init__.py                  | 14 +++++++++++-
 tests/federation/transport/test_client.py | 37 +++++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/14065.misc

(limited to 'synapse')

diff --git a/changelog.d/14065.misc b/changelog.d/14065.misc
new file mode 100644
index 0000000000..98998b0015
--- /dev/null
+++ b/changelog.d/14065.misc
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.35.0 where errors parsing a `/send_join` or `/state` response would produce excessive, low-quality Sentry events.
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 32074b8ca6..cd39d4d111 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -45,6 +45,7 @@ from synapse.federation.units import Transaction
 from synapse.http.matrixfederationclient import ByteParser
 from synapse.http.types import QueryParams
 from synapse.types import JsonDict
+from synapse.util import ExceptionBundle
 
 logger = logging.getLogger(__name__)
 
@@ -926,8 +927,7 @@ class SendJoinParser(ByteParser[SendJoinResponse]):
         return len(data)
 
     def finish(self) -> SendJoinResponse:
-        for c in self._coros:
-            c.close()
+        _close_coros(self._coros)
 
         if self._response.event_dict:
             self._response.event = make_event_from_dict(
@@ -970,6 +970,27 @@ class _StateParser(ByteParser[StateRequestResponse]):
         return len(data)
 
     def finish(self) -> StateRequestResponse:
-        for c in self._coros:
-            c.close()
+        _close_coros(self._coros)
         return self._response
+
+
+def _close_coros(coros: Iterable[Generator[None, bytes, None]]) -> None:
+    """Close each of the given coroutines.
+
+    Always calls .close() on each coroutine, even if doing so raises an exception.
+    Any exceptions raised are aggregated into an ExceptionBundle.
+
+    :raises ExceptionBundle: if at least one coroutine fails to close.
+    """
+    exceptions = []
+    for c in coros:
+        try:
+            c.close()
+        except Exception as e:
+            exceptions.append(e)
+
+    if exceptions:
+        # raise from the first exception so that the traceback has slightly more context
+        raise ExceptionBundle(
+            f"There were {len(exceptions)} errors closing coroutines", exceptions
+        ) from exceptions[0]
diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py
index a90f08dd4c..7be9d5f113 100644
--- a/synapse/util/__init__.py
+++ b/synapse/util/__init__.py
@@ -15,7 +15,7 @@
 import json
 import logging
 import typing
-from typing import Any, Callable, Dict, Generator, Optional
+from typing import Any, Callable, Dict, Generator, Optional, Sequence
 
 import attr
 from frozendict import frozendict
@@ -193,3 +193,15 @@ def log_failure(
 # Version string with git info. Computed here once so that we don't invoke git multiple
 # times.
 SYNAPSE_VERSION = get_distribution_version_string("matrix-synapse", __file__)
+
+
+class ExceptionBundle(Exception):
+    # A poor stand-in for something like Python 3.11's ExceptionGroup.
+    # (A backport called `exceptiongroup` exists but seems overkill: we just want a
+    # container type here.)
+    def __init__(self, message: str, exceptions: Sequence[Exception]):
+        parts = [message]
+        for e in exceptions:
+            parts.append(str(e))
+        super().__init__("\n  - ".join(parts))
+        self.exceptions = exceptions
diff --git a/tests/federation/transport/test_client.py b/tests/federation/transport/test_client.py
index c2320ce133..0926e0583d 100644
--- a/tests/federation/transport/test_client.py
+++ b/tests/federation/transport/test_client.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import json
+from unittest.mock import Mock
 
 from synapse.api.room_versions import RoomVersions
 from synapse.federation.transport.client import SendJoinParser
@@ -94,3 +95,39 @@ class SendJoinParserTestCase(TestCase):
         # Retrieve and check the parsed SendJoinResponse
         parsed_response = parser.finish()
         self.assertEqual(parsed_response.servers_in_room, ["hs1", "hs2"])
+
+    def test_errors_closing_coroutines(self) -> None:
+        """Check we close all coroutines, even if closing the first raises an Exception.
+
+        We also check that an Exception of some kind is raised, but we don't make any
+        assertions about its attributes or type.
+        """
+        parser = SendJoinParser(RoomVersions.V1, False)
+        response = {"org.matrix.msc3706.servers_in_room": ["hs1", "hs2"]}
+        serialisation = json.dumps(response).encode()
+
+        # Mock the coroutines managed by this parser.
+        # The first one will error when we try to close it.
+        coro_1 = Mock()
+        coro_1.close = Mock(side_effect=RuntimeError("Couldn't close coro 1"))
+
+        coro_2 = Mock()
+
+        coro_3 = Mock()
+        coro_3.close = Mock(side_effect=RuntimeError("Couldn't close coro 3"))
+
+        parser._coros = [coro_1, coro_2, coro_3]
+
+        # Send half of the data to the parser
+        parser.write(serialisation[: len(serialisation) // 2])
+
+        # Close the parser. There should be _some_ kind of exception, but it need not
+        # be that RuntimeError directly. E.g. we might want to raise a wrapper
+        # encompassing multiple errors from multiple coroutines.
+        with self.assertRaises(Exception):
+            parser.finish()
+
+        # In any case, we should have tried to close both coros.
+        coro_1.close.assert_called()
+        coro_2.close.assert_called()
+        coro_3.close.assert_called()
-- 
cgit 1.5.1


From 1fa2e58772620199075a36c237dd83cd989c0e91 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 7 Oct 2022 13:35:44 +0100
Subject: Catch BrokenPipeError from metrics server, and log as a warning
 (#14072)

---
 changelog.d/14072.misc                |  1 +
 synapse/metrics/_legacy_exposition.py | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14072.misc

(limited to 'synapse')

diff --git a/changelog.d/14072.misc b/changelog.d/14072.misc
new file mode 100644
index 0000000000..3070c756d5
--- /dev/null
+++ b/changelog.d/14072.misc
@@ -0,0 +1 @@
+Don't create noisy Sentry events when a requester drops connection to the metrics server mid-request.
diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py
index 563d8cc2c6..1459f9d224 100644
--- a/synapse/metrics/_legacy_exposition.py
+++ b/synapse/metrics/_legacy_exposition.py
@@ -20,7 +20,7 @@ Due to the renaming of metrics in prometheus_client 0.4.0, this customised
 vendoring of the code will emit both the old versions that Synapse dashboards
 expect, and the newer "best practice" version of the up-to-date official client.
 """
-
+import logging
 import math
 import threading
 from http.server import BaseHTTPRequestHandler, HTTPServer
@@ -34,6 +34,7 @@ from prometheus_client.core import Sample
 from twisted.web.resource import Resource
 from twisted.web.server import Request
 
+logger = logging.getLogger(__name__)
 CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
 
 
@@ -219,11 +220,16 @@ class MetricsHandler(BaseHTTPRequestHandler):
         except Exception:
             self.send_error(500, "error generating metric output")
             raise
-        self.send_response(200)
-        self.send_header("Content-Type", CONTENT_TYPE_LATEST)
-        self.send_header("Content-Length", str(len(output)))
-        self.end_headers()
-        self.wfile.write(output)
+        try:
+            self.send_response(200)
+            self.send_header("Content-Type", CONTENT_TYPE_LATEST)
+            self.send_header("Content-Length", str(len(output)))
+            self.end_headers()
+            self.wfile.write(output)
+        except BrokenPipeError as e:
+            logger.warning(
+                "BrokenPipeError when serving metrics (%s). Did Prometheus restart?", e
+            )
 
     def log_message(self, format: str, *args: Any) -> None:
         """Log nothing."""
-- 
cgit 1.5.1


From 2295095c97f3b4707f30ae8cb4562ebb799f7ac1 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 7 Oct 2022 13:54:07 +0100
Subject: Use Pydantic to validate /devices endpoints (#14054)

---
 changelog.d/14054.feature      |  1 +
 synapse/rest/client/devices.py | 98 ++++++++++++++++++++++--------------------
 2 files changed, 53 insertions(+), 46 deletions(-)
 create mode 100644 changelog.d/14054.feature

(limited to 'synapse')

diff --git a/changelog.d/14054.feature b/changelog.d/14054.feature
new file mode 100644
index 0000000000..9cf3f7a557
--- /dev/null
+++ b/changelog.d/14054.feature
@@ -0,0 +1 @@
+Improve validation of request bodies for the [Device Management](https://spec.matrix.org/v1.4/client-server-api/#device-management) and [MSC2697 Device Dehyrdation](https://github.com/matrix-org/matrix-spec-proposals/pull/2697) client-server API endpoints.
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index ed6ce78d47..90828c95c4 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -14,18 +14,21 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, List, Optional, Tuple
+
+from pydantic import Extra, StrictStr
 
 from synapse.api import errors
 from synapse.api.errors import NotFoundError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     RestServlet,
-    assert_params_in_dict,
-    parse_json_object_from_request,
+    parse_and_validate_json_object_from_request,
 )
 from synapse.http.site import SynapseRequest
 from synapse.rest.client._base import client_patterns, interactive_auth_handler
+from synapse.rest.client.models import AuthenticationData
+from synapse.rest.models import RequestBodyModel
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
@@ -80,27 +83,29 @@ class DeleteDevicesRestServlet(RestServlet):
         self.device_handler = hs.get_device_handler()
         self.auth_handler = hs.get_auth_handler()
 
+    class PostBody(RequestBodyModel):
+        auth: Optional[AuthenticationData]
+        devices: List[StrictStr]
+
     @interactive_auth_handler
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
         try:
-            body = parse_json_object_from_request(request)
+            body = parse_and_validate_json_object_from_request(request, self.PostBody)
         except errors.SynapseError as e:
             if e.errcode == errors.Codes.NOT_JSON:
-                # DELETE
+                # TODO: Can/should we remove this fallback now?
                 # deal with older clients which didn't pass a JSON dict
                 # the same as those that pass an empty dict
-                body = {}
+                body = self.PostBody.parse_obj({})
             else:
                 raise e
 
-        assert_params_in_dict(body, ["devices"])
-
         await self.auth_handler.validate_user_via_ui_auth(
             requester,
             request,
-            body,
+            body.dict(exclude_unset=True),
             "remove device(s) from your account",
             # Users might call this multiple times in a row while cleaning up
             # devices, allow a single UI auth session to be re-used.
@@ -108,7 +113,7 @@ class DeleteDevicesRestServlet(RestServlet):
         )
 
         await self.device_handler.delete_devices(
-            requester.user.to_string(), body["devices"]
+            requester.user.to_string(), body.devices
         )
         return 200, {}
 
@@ -147,6 +152,9 @@ class DeviceRestServlet(RestServlet):
 
         return 200, device
 
+    class DeleteBody(RequestBodyModel):
+        auth: Optional[AuthenticationData]
+
     @interactive_auth_handler
     async def on_DELETE(
         self, request: SynapseRequest, device_id: str
@@ -154,20 +162,21 @@ class DeviceRestServlet(RestServlet):
         requester = await self.auth.get_user_by_req(request)
 
         try:
-            body = parse_json_object_from_request(request)
+            body = parse_and_validate_json_object_from_request(request, self.DeleteBody)
 
         except errors.SynapseError as e:
             if e.errcode == errors.Codes.NOT_JSON:
+                # TODO: can/should we remove this fallback now?
                 # deal with older clients which didn't pass a JSON dict
                 # the same as those that pass an empty dict
-                body = {}
+                body = self.DeleteBody.parse_obj({})
             else:
                 raise
 
         await self.auth_handler.validate_user_via_ui_auth(
             requester,
             request,
-            body,
+            body.dict(exclude_unset=True),
             "remove a device from your account",
             # Users might call this multiple times in a row while cleaning up
             # devices, allow a single UI auth session to be re-used.
@@ -179,18 +188,33 @@ class DeviceRestServlet(RestServlet):
         )
         return 200, {}
 
+    class PutBody(RequestBodyModel):
+        display_name: Optional[StrictStr]
+
     async def on_PUT(
         self, request: SynapseRequest, device_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
 
-        body = parse_json_object_from_request(request)
+        body = parse_and_validate_json_object_from_request(request, self.PutBody)
         await self.device_handler.update_device(
-            requester.user.to_string(), device_id, body
+            requester.user.to_string(), device_id, body.dict()
         )
         return 200, {}
 
 
+class DehydratedDeviceDataModel(RequestBodyModel):
+    """JSON blob describing a dehydrated device to be stored.
+
+    Expects other freeform fields. Use .dict() to access them.
+    """
+
+    class Config:
+        extra = Extra.allow
+
+    algorithm: StrictStr
+
+
 class DehydratedDeviceServlet(RestServlet):
     """Retrieve or store a dehydrated device.
 
@@ -246,27 +270,19 @@ class DehydratedDeviceServlet(RestServlet):
         else:
             raise errors.NotFoundError("No dehydrated device available")
 
+    class PutBody(RequestBodyModel):
+        device_id: StrictStr
+        device_data: DehydratedDeviceDataModel
+        initial_device_display_name: Optional[StrictStr]
+
     async def on_PUT(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        submission = parse_json_object_from_request(request)
+        submission = parse_and_validate_json_object_from_request(request, self.PutBody)
         requester = await self.auth.get_user_by_req(request)
 
-        if "device_data" not in submission:
-            raise errors.SynapseError(
-                400,
-                "device_data missing",
-                errcode=errors.Codes.MISSING_PARAM,
-            )
-        elif not isinstance(submission["device_data"], dict):
-            raise errors.SynapseError(
-                400,
-                "device_data must be an object",
-                errcode=errors.Codes.INVALID_PARAM,
-            )
-
         device_id = await self.device_handler.store_dehydrated_device(
             requester.user.to_string(),
-            submission["device_data"],
-            submission.get("initial_device_display_name", None),
+            submission.device_data,
+            submission.initial_device_display_name,
         )
         return 200, {"device_id": device_id}
 
@@ -300,28 +316,18 @@ class ClaimDehydratedDeviceServlet(RestServlet):
         self.auth = hs.get_auth()
         self.device_handler = hs.get_device_handler()
 
+    class PostBody(RequestBodyModel):
+        device_id: StrictStr
+
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
-        submission = parse_json_object_from_request(request)
-
-        if "device_id" not in submission:
-            raise errors.SynapseError(
-                400,
-                "device_id missing",
-                errcode=errors.Codes.MISSING_PARAM,
-            )
-        elif not isinstance(submission["device_id"], str):
-            raise errors.SynapseError(
-                400,
-                "device_id must be a string",
-                errcode=errors.Codes.INVALID_PARAM,
-            )
+        submission = parse_and_validate_json_object_from_request(request, self.PostBody)
 
         result = await self.device_handler.rehydrate_device(
             requester.user.to_string(),
             self.auth.get_access_token_from_request(request),
-            submission["device_id"],
+            submission.device_id,
         )
 
         return 200, result
-- 
cgit 1.5.1


From 66a785733458d0b5801097caff53624e202a91b4 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 7 Oct 2022 09:26:40 -0400
Subject: Use stable identifiers for MSC3771 & MSC3773. (#14050)

These are both part of Matrix 1.4 which has now been released.

For now, support both the unstable and stable identifiers.
---
 changelog.d/13776.feature       |  2 +-
 changelog.d/13824.feature       |  2 +-
 changelog.d/13877.feature       |  2 +-
 changelog.d/13878.feature       |  2 +-
 changelog.d/14050.feature       |  1 +
 synapse/api/filtering.py        | 13 +++++++----
 synapse/config/experimental.py  |  2 --
 synapse/handlers/receipts.py    | 11 ++++------
 synapse/handlers/sync.py        |  7 +-----
 synapse/rest/client/receipts.py | 48 ++++++++++++++++++++---------------------
 synapse/rest/client/sync.py     |  9 +++++---
 synapse/rest/client/versions.py |  2 +-
 12 files changed, 49 insertions(+), 52 deletions(-)
 create mode 100644 changelog.d/14050.feature

(limited to 'synapse')

diff --git a/changelog.d/13776.feature b/changelog.d/13776.feature
index 22bce125ce..5d0ae16e13 100644
--- a/changelog.d/13776.feature
+++ b/changelog.d/13776.feature
@@ -1 +1 @@
-Experimental support for thread-specific notifications ([MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/changelog.d/13824.feature b/changelog.d/13824.feature
index d0cb902dff..5d0ae16e13 100644
--- a/changelog.d/13824.feature
+++ b/changelog.d/13824.feature
@@ -1 +1 @@
-Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/changelog.d/13877.feature b/changelog.d/13877.feature
index d0cb902dff..5d0ae16e13 100644
--- a/changelog.d/13877.feature
+++ b/changelog.d/13877.feature
@@ -1 +1 @@
-Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/changelog.d/13878.feature b/changelog.d/13878.feature
index d0cb902dff..5d0ae16e13 100644
--- a/changelog.d/13878.feature
+++ b/changelog.d/13878.feature
@@ -1 +1 @@
-Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)).
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/changelog.d/14050.feature b/changelog.d/14050.feature
new file mode 100644
index 0000000000..5d0ae16e13
--- /dev/null
+++ b/changelog.d/14050.feature
@@ -0,0 +1 @@
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index c6e44dcf82..cc31cf8cc7 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -84,6 +84,7 @@ ROOM_EVENT_FILTER_SCHEMA = {
         "contains_url": {"type": "boolean"},
         "lazy_load_members": {"type": "boolean"},
         "include_redundant_members": {"type": "boolean"},
+        "unread_thread_notifications": {"type": "boolean"},
         "org.matrix.msc3773.unread_thread_notifications": {"type": "boolean"},
         # Include or exclude events with the provided labels.
         # cf https://github.com/matrix-org/matrix-doc/pull/2326
@@ -308,12 +309,16 @@ class Filter:
         self.include_redundant_members = filter_json.get(
             "include_redundant_members", False
         )
-        if hs.config.experimental.msc3773_enabled:
-            self.unread_thread_notifications: bool = filter_json.get(
+        self.unread_thread_notifications: bool = filter_json.get(
+            "unread_thread_notifications", False
+        )
+        if (
+            not self.unread_thread_notifications
+            and hs.config.experimental.msc3773_enabled
+        ):
+            self.unread_thread_notifications = filter_json.get(
                 "org.matrix.msc3773.unread_thread_notifications", False
             )
-        else:
-            self.unread_thread_notifications = False
 
         self.types = filter_json.get("types", None)
         self.not_types = filter_json.get("not_types", [])
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 6503ce6e34..c35301207a 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -95,8 +95,6 @@ class ExperimentalConfig(Config):
         # MSC2815 (allow room moderators to view redacted event content)
         self.msc2815_enabled: bool = experimental.get("msc2815_enabled", False)
 
-        # MSC3771: Thread read receipts
-        self.msc3771_enabled: bool = experimental.get("msc3771_enabled", False)
         # MSC3772: A push rule for mutual relations.
         self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False)
         # MSC3773: Thread notifications
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index 4768a34c07..4a7ec9e426 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -63,8 +63,6 @@ class ReceiptsHandler:
         self.clock = self.hs.get_clock()
         self.state = hs.get_state_handler()
 
-        self._msc3771_enabled = hs.config.experimental.msc3771_enabled
-
     async def _received_remote_receipt(self, origin: str, content: JsonDict) -> None:
         """Called when we receive an EDU of type m.receipt from a remote HS."""
         receipts = []
@@ -96,11 +94,10 @@ class ReceiptsHandler:
                     # Check if these receipts apply to a thread.
                     thread_id = None
                     data = user_values.get("data", {})
-                    if self._msc3771_enabled and isinstance(data, dict):
-                        thread_id = data.get("thread_id")
-                        # If the thread ID is invalid, consider it missing.
-                        if not isinstance(thread_id, str):
-                            thread_id = None
+                    thread_id = data.get("thread_id")
+                    # If the thread ID is invalid, consider it missing.
+                    if not isinstance(thread_id, str):
+                        thread_id = None
 
                     receipts.append(
                         ReadReceipt(
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0f684857ca..1db5d68021 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -279,8 +279,6 @@ class SyncHandler:
 
         self.rooms_to_exclude = hs.config.server.rooms_to_exclude_from_sync
 
-        self._msc3773_enabled = hs.config.experimental.msc3773_enabled
-
     async def wait_for_sync_for_user(
         self,
         requester: Requester,
@@ -2412,10 +2410,7 @@ class SyncHandler:
                     unread_count = notifs.main_timeline.unread_count
 
                     # Check the sync configuration.
-                    if (
-                        self._msc3773_enabled
-                        and sync_config.filter_collection.unread_thread_notifications()
-                    ):
+                    if sync_config.filter_collection.unread_thread_notifications():
                         # And add info for each thread.
                         room_sync.unread_thread_notifications = {
                             thread_id: {
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 287dfdd69e..14dec7ac4e 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -50,7 +50,6 @@ class ReceiptRestServlet(RestServlet):
             ReceiptTypes.READ_PRIVATE,
             ReceiptTypes.FULLY_READ,
         }
-        self._msc3771_enabled = hs.config.experimental.msc3771_enabled
 
     async def on_POST(
         self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str
@@ -67,30 +66,29 @@ class ReceiptRestServlet(RestServlet):
 
         # Pull the thread ID, if one exists.
         thread_id = None
-        if self._msc3771_enabled:
-            if "thread_id" in body:
-                thread_id = body.get("thread_id")
-                if not thread_id or not isinstance(thread_id, str):
-                    raise SynapseError(
-                        400,
-                        "thread_id field must be a non-empty string",
-                        Codes.INVALID_PARAM,
-                    )
-
-                if receipt_type == ReceiptTypes.FULLY_READ:
-                    raise SynapseError(
-                        400,
-                        f"thread_id is not compatible with {ReceiptTypes.FULLY_READ} receipts.",
-                        Codes.INVALID_PARAM,
-                    )
-
-                # Ensure the event ID roughly correlates to the thread ID.
-                if thread_id != await self._main_store.get_thread_id(event_id):
-                    raise SynapseError(
-                        400,
-                        f"event_id {event_id} is not related to thread {thread_id}",
-                        Codes.INVALID_PARAM,
-                    )
+        if "thread_id" in body:
+            thread_id = body.get("thread_id")
+            if not thread_id or not isinstance(thread_id, str):
+                raise SynapseError(
+                    400,
+                    "thread_id field must be a non-empty string",
+                    Codes.INVALID_PARAM,
+                )
+
+            if receipt_type == ReceiptTypes.FULLY_READ:
+                raise SynapseError(
+                    400,
+                    f"thread_id is not compatible with {ReceiptTypes.FULLY_READ} receipts.",
+                    Codes.INVALID_PARAM,
+                )
+
+            # Ensure the event ID roughly correlates to the thread ID.
+            if thread_id != await self._main_store.get_thread_id(event_id):
+                raise SynapseError(
+                    400,
+                    f"event_id {event_id} is not related to thread {thread_id}",
+                    Codes.INVALID_PARAM,
+                )
 
         await self.presence_handler.bump_presence_active_time(requester.user)
 
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index f1c23d68e5..8a16459105 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -100,6 +100,7 @@ class SyncRestServlet(RestServlet):
         self._server_notices_sender = hs.get_server_notices_sender()
         self._event_serializer = hs.get_event_client_serializer()
         self._msc2654_enabled = hs.config.experimental.msc2654_enabled
+        self._msc3773_enabled = hs.config.experimental.msc3773_enabled
 
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         # This will always be set by the time Twisted calls us.
@@ -510,9 +511,11 @@ class SyncRestServlet(RestServlet):
             result["ephemeral"] = {"events": ephemeral_events}
             result["unread_notifications"] = room.unread_notifications
             if room.unread_thread_notifications:
-                result[
-                    "org.matrix.msc3773.unread_thread_notifications"
-                ] = room.unread_thread_notifications
+                result["unread_thread_notifications"] = room.unread_thread_notifications
+                if self._msc3773_enabled:
+                    result[
+                        "org.matrix.msc3773.unread_thread_notifications"
+                    ] = room.unread_thread_notifications
             result["summary"] = room.summary
             if self._msc2654_enabled:
                 result["org.matrix.msc2654.unread_count"] = room.unread_count
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 18ed313b5c..d1d2e5f7e3 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -105,7 +105,7 @@ class VersionsRestServlet(RestServlet):
                     # Adds support for thread relations, per MSC3440.
                     "org.matrix.msc3440.stable": True,  # TODO: remove when "v1.3" is added above
                     # Support for thread read receipts & notification counts.
-                    "org.matrix.msc3771": self.config.experimental.msc3771_enabled,
+                    "org.matrix.msc3771": True,
                     "org.matrix.msc3773": self.config.experimental.msc3773_enabled,
                     # Allows moderators to fetch redacted event content as described in MSC2815
                     "fi.mau.msc2815": self.config.experimental.msc2815_enabled,
-- 
cgit 1.5.1


From 00c93d2e7ef5642c9cf900f3fdcfa229e70f843d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 7 Oct 2022 09:29:43 -0400
Subject: Be more lenient in the oEmbed response parsing. (#14089)

Attempt to parse any valid information from an oEmbed response
(instead of bailing at the first unexpected data). This should allow
for more partial oEmbed data to be returned, resulting in better /
more URL previews, even if those URL previews are only partial.
---
 changelog.d/14089.bugfix           |   1 +
 synapse/rest/media/v1/oembed.py    | 107 ++++++++++++++++++++-----------------
 tests/rest/media/v1/test_oembed.py | 103 ++++++++++++++++++++++++++++++++++-
 3 files changed, 160 insertions(+), 51 deletions(-)
 create mode 100644 changelog.d/14089.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14089.bugfix b/changelog.d/14089.bugfix
new file mode 100644
index 0000000000..4a398921bb
--- /dev/null
+++ b/changelog.d/14089.bugfix
@@ -0,0 +1 @@
+Fix a bug where invalid oEmbed fields would cause the entire response to be discarded. Introduced in Synapse 1.18.0.
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
index 2177b46c9e..827afd868d 100644
--- a/synapse/rest/media/v1/oembed.py
+++ b/synapse/rest/media/v1/oembed.py
@@ -139,65 +139,72 @@ class OEmbedProvider:
         try:
             # oEmbed responses *must* be UTF-8 according to the spec.
             oembed = json_decoder.decode(raw_body.decode("utf-8"))
+        except ValueError:
+            return OEmbedResult({}, None, None)
 
-            # The version is a required string field, but not always provided,
-            # or sometimes provided as a float. Be lenient.
-            oembed_version = oembed.get("version", "1.0")
-            if oembed_version != "1.0" and oembed_version != 1:
-                raise RuntimeError(f"Invalid oEmbed version: {oembed_version}")
+        # The version is a required string field, but not always provided,
+        # or sometimes provided as a float. Be lenient.
+        oembed_version = oembed.get("version", "1.0")
+        if oembed_version != "1.0" and oembed_version != 1:
+            return OEmbedResult({}, None, None)
 
-            # Ensure the cache age is None or an int.
-            cache_age = oembed.get("cache_age")
-            if cache_age:
-                cache_age = int(cache_age) * 1000
-
-            # The results.
-            open_graph_response = {
-                "og:url": url,
-            }
-
-            title = oembed.get("title")
-            if title:
-                open_graph_response["og:title"] = title
-
-            author_name = oembed.get("author_name")
+        # Attempt to parse the cache age, if possible.
+        try:
+            cache_age = int(oembed.get("cache_age")) * 1000
+        except (TypeError, ValueError):
+            # If the cache age cannot be parsed (e.g. wrong type or invalid
+            # string), ignore it.
+            cache_age = None
 
-            # Use the provider name and as the site.
-            provider_name = oembed.get("provider_name")
-            if provider_name:
-                open_graph_response["og:site_name"] = provider_name
+        # The oEmbed response converted to Open Graph.
+        open_graph_response: JsonDict = {"og:url": url}
 
-            # If a thumbnail exists, use it. Note that dimensions will be calculated later.
-            if "thumbnail_url" in oembed:
-                open_graph_response["og:image"] = oembed["thumbnail_url"]
+        title = oembed.get("title")
+        if title and isinstance(title, str):
+            open_graph_response["og:title"] = title
 
-            # Process each type separately.
-            oembed_type = oembed["type"]
-            if oembed_type == "rich":
-                calc_description_and_urls(open_graph_response, oembed["html"])
-
-            elif oembed_type == "photo":
-                # If this is a photo, use the full image, not the thumbnail.
-                open_graph_response["og:image"] = oembed["url"]
+        author_name = oembed.get("author_name")
+        if not isinstance(author_name, str):
+            author_name = None
 
-            elif oembed_type == "video":
-                open_graph_response["og:type"] = "video.other"
+        # Use the provider name and as the site.
+        provider_name = oembed.get("provider_name")
+        if provider_name and isinstance(provider_name, str):
+            open_graph_response["og:site_name"] = provider_name
+
+        # If a thumbnail exists, use it. Note that dimensions will be calculated later.
+        thumbnail_url = oembed.get("thumbnail_url")
+        if thumbnail_url and isinstance(thumbnail_url, str):
+            open_graph_response["og:image"] = thumbnail_url
+
+        # Process each type separately.
+        oembed_type = oembed.get("type")
+        if oembed_type == "rich":
+            html = oembed.get("html")
+            if isinstance(html, str):
+                calc_description_and_urls(open_graph_response, html)
+
+        elif oembed_type == "photo":
+            # If this is a photo, use the full image, not the thumbnail.
+            url = oembed.get("url")
+            if url and isinstance(url, str):
+                open_graph_response["og:image"] = url
+
+        elif oembed_type == "video":
+            open_graph_response["og:type"] = "video.other"
+            html = oembed.get("html")
+            if html and isinstance(html, str):
                 calc_description_and_urls(open_graph_response, oembed["html"])
-                open_graph_response["og:video:width"] = oembed["width"]
-                open_graph_response["og:video:height"] = oembed["height"]
-
-            elif oembed_type == "link":
-                open_graph_response["og:type"] = "website"
+            for size in ("width", "height"):
+                val = oembed.get(size)
+                if val is not None and isinstance(val, int):
+                    open_graph_response[f"og:video:{size}"] = val
 
-            else:
-                raise RuntimeError(f"Unknown oEmbed type: {oembed_type}")
+        elif oembed_type == "link":
+            open_graph_response["og:type"] = "website"
 
-        except Exception as e:
-            # Trap any exception and let the code follow as usual.
-            logger.warning("Error parsing oEmbed metadata from %s: %r", url, e)
-            open_graph_response = {}
-            author_name = None
-            cache_age = None
+        else:
+            logger.warning("Unknown oEmbed type: %s", oembed_type)
 
         return OEmbedResult(open_graph_response, author_name, cache_age)
 
diff --git a/tests/rest/media/v1/test_oembed.py b/tests/rest/media/v1/test_oembed.py
index f38d7225f8..319ae8b1cc 100644
--- a/tests/rest/media/v1/test_oembed.py
+++ b/tests/rest/media/v1/test_oembed.py
@@ -14,6 +14,8 @@
 
 import json
 
+from parameterized import parameterized
+
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.rest.media.v1.oembed import OEmbedProvider, OEmbedResult
@@ -23,8 +25,16 @@ from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
+try:
+    import lxml
+except ImportError:
+    lxml = None
+
 
 class OEmbedTests(HomeserverTestCase):
+    if not lxml:
+        skip = "url preview feature requires lxml"
+
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.oembed = OEmbedProvider(hs)
 
@@ -36,7 +46,7 @@ class OEmbedTests(HomeserverTestCase):
     def test_version(self) -> None:
         """Accept versions that are similar to 1.0 as a string or int (or missing)."""
         for version in ("1.0", 1.0, 1):
-            result = self.parse_response({"version": version, "type": "link"})
+            result = self.parse_response({"version": version})
             # An empty Open Graph response is an error, ensure the URL is included.
             self.assertIn("og:url", result.open_graph_result)
 
@@ -49,3 +59,94 @@ class OEmbedTests(HomeserverTestCase):
             result = self.parse_response({"version": version, "type": "link"})
             # An empty Open Graph response is an error, ensure the URL is included.
             self.assertEqual({}, result.open_graph_result)
+
+    def test_cache_age(self) -> None:
+        """Ensure a cache-age is parsed properly."""
+        # Correct-ish cache ages are allowed.
+        for cache_age in ("1", 1.0, 1):
+            result = self.parse_response({"cache_age": cache_age})
+            self.assertEqual(result.cache_age, 1000)
+
+        # Invalid cache ages are ignored.
+        for cache_age in ("invalid", {}):
+            result = self.parse_response({"cache_age": cache_age})
+            self.assertIsNone(result.cache_age)
+
+        # Cache age is optional.
+        result = self.parse_response({})
+        self.assertIsNone(result.cache_age)
+
+    @parameterized.expand(
+        [
+            ("title", "title"),
+            ("provider_name", "site_name"),
+            ("thumbnail_url", "image"),
+        ],
+        name_func=lambda func, num, p: f"{func.__name__}_{p.args[0]}",
+    )
+    def test_property(self, oembed_property: str, open_graph_property: str) -> None:
+        """Test properties which must be strings."""
+        result = self.parse_response({oembed_property: "test"})
+        self.assertIn(f"og:{open_graph_property}", result.open_graph_result)
+        self.assertEqual(result.open_graph_result[f"og:{open_graph_property}"], "test")
+
+        result = self.parse_response({oembed_property: 1})
+        self.assertNotIn(f"og:{open_graph_property}", result.open_graph_result)
+
+    def test_author_name(self) -> None:
+        """Test the author_name property."""
+        result = self.parse_response({"author_name": "test"})
+        self.assertEqual(result.author_name, "test")
+
+        result = self.parse_response({"author_name": 1})
+        self.assertIsNone(result.author_name)
+
+    def test_rich(self) -> None:
+        """Test a type of rich."""
+        result = self.parse_response({"html": "test<img src='foo'>", "type": "rich"})
+        self.assertIn("og:description", result.open_graph_result)
+        self.assertIn("og:image", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:description"], "test")
+        self.assertEqual(result.open_graph_result["og:image"], "foo")
+
+        result = self.parse_response({"type": "rich"})
+        self.assertNotIn("og:description", result.open_graph_result)
+
+        result = self.parse_response({"html": 1, "type": "rich"})
+        self.assertNotIn("og:description", result.open_graph_result)
+
+    def test_photo(self) -> None:
+        """Test a type of photo."""
+        result = self.parse_response({"url": "test", "type": "photo"})
+        self.assertIn("og:image", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:image"], "test")
+
+        result = self.parse_response({"type": "photo"})
+        self.assertNotIn("og:image", result.open_graph_result)
+
+        result = self.parse_response({"url": 1, "type": "photo"})
+        self.assertNotIn("og:image", result.open_graph_result)
+
+    def test_video(self) -> None:
+        """Test a type of video."""
+        result = self.parse_response({"html": "test", "type": "video"})
+        self.assertIn("og:type", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:type"], "video.other")
+        self.assertIn("og:description", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:description"], "test")
+
+        result = self.parse_response({"type": "video"})
+        self.assertIn("og:type", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:type"], "video.other")
+        self.assertNotIn("og:description", result.open_graph_result)
+
+        result = self.parse_response({"url": 1, "type": "video"})
+        self.assertIn("og:type", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:type"], "video.other")
+        self.assertNotIn("og:description", result.open_graph_result)
+
+    def test_link(self) -> None:
+        """Test type of link."""
+        result = self.parse_response({"type": "link"})
+        self.assertIn("og:type", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:type"], "website")
-- 
cgit 1.5.1


From f1673866ed8a39d49e2caaa6f4255a3f696bc3b4 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 7 Oct 2022 15:15:10 +0100
Subject: Unpin build-system requirements, but impose an upper-bound (#14085)

* Revert to prior build-system requirements

This reverts #14080.

* Use normalised extra name, which poetry-core 1.3 will generate anyway

* Changelog

* Upper bound build-system requirements

* Remove upgrade note; expand changelog entry a little.

* Fix typo in build-system comment

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
---
 changelog.d/14085.misc       |  1 +
 pyproject.toml               | 11 ++++++++---
 synapse/config/repository.py |  2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14085.misc

(limited to 'synapse')

diff --git a/changelog.d/14085.misc b/changelog.d/14085.misc
new file mode 100644
index 0000000000..2d2df70a64
--- /dev/null
+++ b/changelog.d/14085.misc
@@ -0,0 +1 @@
+Rename the `url_preview` extra to `url-preview`, for compatability with poetry-core 1.3.0 and [PEP 685](https://peps.python.org/pep-0685/). From-source installations using this extra will need to install using the new name.
diff --git a/pyproject.toml b/pyproject.toml
index 622d6a9e89..81b2659eb1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -219,7 +219,7 @@ oidc = ["authlib"]
 # `systemd.journal.JournalHandler`, as is documented in
 # `contrib/systemd/log_config.yaml`.
 systemd = ["systemd-python"]
-url_preview = ["lxml"]
+url-preview = ["lxml"]
 sentry = ["sentry-sdk"]
 opentracing = ["jaeger-client", "opentracing"]
 jwt = ["authlib"]
@@ -250,7 +250,7 @@ all = [
     "pysaml2",
     # oidc and jwt
     "authlib",
-    # url_preview
+    # url-preview
     "lxml",
     # sentry
     "sentry-sdk",
@@ -307,7 +307,12 @@ twine = "*"
 towncrier = ">=18.6.0rc1"
 
 [build-system]
-requires = ["poetry-core==1.2.0", "setuptools_rust==1.5.2"]
+# The upper bounds here are defensive, intended to prevent situations like
+# #13849 and #14079 where we see buildtime or runtime errors caused by build
+# system changes.
+# We are happy to raise these upper bounds upon request,
+# provided we check that it's safe to do so (i.e. that CI passes).
+requires = ["poetry-core>=1.0.0,<=1.3.1", "setuptools_rust>=1.3,<=1.5.2"]
 build-backend = "poetry.core.masonry.api"
 
 
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 1033496bb4..e4759711ed 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -205,7 +205,7 @@ class ContentRepositoryConfig(Config):
         )
         self.url_preview_enabled = config.get("url_preview_enabled", False)
         if self.url_preview_enabled:
-            check_requirements("url_preview")
+            check_requirements("url-preview")
 
             proxy_env = getproxies_environment()
             if "url_preview_ip_range_blacklist" not in config:
-- 
cgit 1.5.1


From dc37b68a25754240243cdca6f521919abfe71db0 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 7 Oct 2022 16:19:59 +0200
Subject: Parse SYNAPSE_ASYNC_IO_REACTOR env variable & log the reactor on
 startup (#14092)

---
 changelog.d/14092.misc   |  1 +
 synapse/__init__.py      | 26 ++++++++++++--------------
 synapse/config/logger.py |  3 +++
 3 files changed, 16 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/14092.misc

(limited to 'synapse')

diff --git a/changelog.d/14092.misc b/changelog.d/14092.misc
new file mode 100644
index 0000000000..c48f40cd38
--- /dev/null
+++ b/changelog.d/14092.misc
@@ -0,0 +1 @@
+Run the integration test suites with the asyncio reactor enabled in CI.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 1bed6393bd..fbfd506a43 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -21,6 +21,7 @@ import os
 import sys
 
 from synapse.util.rust import check_rust_lib_up_to_date
+from synapse.util.stringutils import strtobool
 
 # Check that we're not running on an unsupported Python version.
 if sys.version_info < (3, 7):
@@ -28,25 +29,22 @@ if sys.version_info < (3, 7):
     sys.exit(1)
 
 # Allow using the asyncio reactor via env var.
-if bool(os.environ.get("SYNAPSE_ASYNC_IO_REACTOR", False)):
-    try:
-        from incremental import Version
+if strtobool(os.environ.get("SYNAPSE_ASYNC_IO_REACTOR", "0")):
+    from incremental import Version
 
-        import twisted
+    import twisted
 
-        # We need a bugfix that is included in Twisted 21.2.0:
-        # https://twistedmatrix.com/trac/ticket/9787
-        if twisted.version < Version("Twisted", 21, 2, 0):
-            print("Using asyncio reactor requires Twisted>=21.2.0")
-            sys.exit(1)
+    # We need a bugfix that is included in Twisted 21.2.0:
+    # https://twistedmatrix.com/trac/ticket/9787
+    if twisted.version < Version("Twisted", 21, 2, 0):
+        print("Using asyncio reactor requires Twisted>=21.2.0")
+        sys.exit(1)
 
-        import asyncio
+    import asyncio
 
-        from twisted.internet import asyncioreactor
+    from twisted.internet import asyncioreactor
 
-        asyncioreactor.install(asyncio.get_event_loop())
-    except ImportError:
-        pass
+    asyncioreactor.install(asyncio.get_event_loop())
 
 # Twisted and canonicaljson will fail to import when this file is executed to
 # get the __version__ during a fresh install. That's OK and subsequent calls to
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 6c1f78f8df..b62b3b9205 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -326,6 +326,8 @@ def setup_logging(
         logBeginner: The Twisted logBeginner to use.
 
     """
+    from twisted.internet import reactor
+
     log_config_path = (
         config.worker.worker_log_config
         if use_worker_options
@@ -348,3 +350,4 @@ def setup_logging(
     )
     logging.info("Server hostname: %s", config.server.server_name)
     logging.info("Instance name: %s", hs.get_instance_name())
+    logging.info("Twisted reactor: %s", type(reactor).__name__)
-- 
cgit 1.5.1


From ab8047b4bf581d0c343c1e900e8740745668d941 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 7 Oct 2022 11:27:50 -0400
Subject: Apply & bundle edits for non-message events. (#14034)

Fixes two related bugs:

* No edit information was bundled for events which aren't `m.room.message`.
* `m.new_content` was not applied for those events.
---
 changelog.d/14034.bugfix                    |  1 +
 synapse/storage/databases/main/relations.py | 11 ++++-------
 2 files changed, 5 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/14034.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14034.bugfix b/changelog.d/14034.bugfix
new file mode 100644
index 0000000000..e437ef3a01
--- /dev/null
+++ b/changelog.d/14034.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where edits of non-`m.room.message` events would not be correctly bundled or have their new content applied.
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 154385b1e8..116abef9de 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -384,12 +384,11 @@ class RelationsWorkerStore(SQLBaseStore):
             the event will map to None.
         """
 
-        # We only allow edits for `m.room.message` events that have the same sender
-        # and event type. We can't assert these things during regular event auth so
-        # we have to do the checks post hoc.
+        # We only allow edits for events that have the same sender and event type.
+        # We can't assert these things during regular event auth so we have to do
+        # the checks post hoc.
 
-        # Fetches latest edit that has the same type and sender as the
-        # original, and is an `m.room.message`.
+        # Fetches latest edit that has the same type and sender as the original.
         if isinstance(self.database_engine, PostgresEngine):
             # The `DISTINCT ON` clause will pick the *first* row it encounters,
             # so ordering by origin server ts + event ID desc will ensure we get
@@ -405,7 +404,6 @@ class RelationsWorkerStore(SQLBaseStore):
                 WHERE
                     %s
                     AND relation_type = ?
-                    AND edit.type = 'm.room.message'
                 ORDER by original.event_id DESC, edit.origin_server_ts DESC, edit.event_id DESC
             """
         else:
@@ -424,7 +422,6 @@ class RelationsWorkerStore(SQLBaseStore):
                 WHERE
                     %s
                     AND relation_type = ?
-                    AND edit.type = 'm.room.message'
                 ORDER by edit.origin_server_ts, edit.event_id
             """
 
-- 
cgit 1.5.1


From e03d7c5fd0577df5b62cd34559925c6cfe3e0360 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 7 Oct 2022 12:38:46 -0400
Subject: Remove support for the unstable dir flag on relations. (#14106)

From MSC3715, this was unused by clients (and there was no
way for clients to know it was supported).

Matrix 1.4 defines the stable field.
---
 changelog.d/14106.removal        |  1 +
 synapse/config/experimental.py   |  3 ---
 synapse/handlers/relations.py    | 33 ++++++++++++++---------------
 synapse/rest/client/relations.py | 45 +++++++++-------------------------------
 synapse/streams/config.py        |  6 ++++--
 5 files changed, 31 insertions(+), 57 deletions(-)
 create mode 100644 changelog.d/14106.removal

(limited to 'synapse')

diff --git a/changelog.d/14106.removal b/changelog.d/14106.removal
new file mode 100644
index 0000000000..08fa752897
--- /dev/null
+++ b/changelog.d/14106.removal
@@ -0,0 +1 @@
+Remove the unstable identifier for [MSC3715](https://github.com/matrix-org/matrix-doc/pull/3715).
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index c35301207a..e00cb7096c 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -100,9 +100,6 @@ class ExperimentalConfig(Config):
         # MSC3773: Thread notifications
         self.msc3773_enabled: bool = experimental.get("msc3773_enabled", False)
 
-        # MSC3715: dir param on /relations.
-        self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False)
-
         # MSC3848: Introduce errcodes for specific event sending failures
         self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False)
 
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 63bc6a7aa5..cc5e45c241 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -21,6 +21,7 @@ from synapse.api.errors import SynapseError
 from synapse.events import EventBase, relation_from_event
 from synapse.logging.opentracing import trace
 from synapse.storage.databases.main.relations import _RelatedEvent
+from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, Requester, StreamToken, UserID
 from synapse.visibility import filter_events_for_client
 
@@ -72,13 +73,10 @@ class RelationsHandler:
         requester: Requester,
         event_id: str,
         room_id: str,
+        pagin_config: PaginationConfig,
+        include_original_event: bool,
         relation_type: Optional[str] = None,
         event_type: Optional[str] = None,
-        limit: int = 5,
-        direction: str = "b",
-        from_token: Optional[StreamToken] = None,
-        to_token: Optional[StreamToken] = None,
-        include_original_event: bool = False,
     ) -> JsonDict:
         """Get related events of a event, ordered by topological ordering.
 
@@ -88,14 +86,10 @@ class RelationsHandler:
             requester: The user requesting the relations.
             event_id: Fetch events that relate to this event ID.
             room_id: The room the event belongs to.
+            pagin_config: The pagination config rules to apply, if any.
+            include_original_event: Whether to include the parent event.
             relation_type: Only fetch events with this relation type, if given.
             event_type: Only fetch events with this event type, if given.
-            limit: Only fetch the most recent `limit` events.
-            direction: Whether to fetch the most recent first (`"b"`) or the
-                oldest first (`"f"`).
-            from_token: Fetch rows from the given token, or from the start if None.
-            to_token: Fetch rows up to the given token, or up to the end if None.
-            include_original_event: Whether to include the parent event.
 
         Returns:
             The pagination chunk.
@@ -114,6 +108,9 @@ class RelationsHandler:
         if event is None:
             raise SynapseError(404, "Unknown parent event.")
 
+        # TODO Update pagination config to not allow None limits.
+        assert pagin_config.limit is not None
+
         # Note that ignored users are not passed into get_relations_for_event
         # below. Ignored users are handled in filter_events_for_client (and by
         # not passing them in here we should get a better cache hit rate).
@@ -123,10 +120,10 @@ class RelationsHandler:
             room_id=room_id,
             relation_type=relation_type,
             event_type=event_type,
-            limit=limit,
-            direction=direction,
-            from_token=from_token,
-            to_token=to_token,
+            limit=pagin_config.limit,
+            direction=pagin_config.direction,
+            from_token=pagin_config.from_token,
+            to_token=pagin_config.to_token,
         )
 
         events = await self._main_store.get_events_as_list(
@@ -162,8 +159,10 @@ class RelationsHandler:
         if next_token:
             return_value["next_batch"] = await next_token.to_string(self._main_store)
 
-        if from_token:
-            return_value["prev_batch"] = await from_token.to_string(self._main_store)
+        if pagin_config.from_token:
+            return_value["prev_batch"] = await pagin_config.from_token.to_string(
+                self._main_store
+            )
 
         return return_value
 
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index 7a25de5c85..b31ce5a0d3 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -16,10 +16,11 @@ import logging
 from typing import TYPE_CHECKING, Optional, Tuple
 
 from synapse.http.server import HttpServer
-from synapse.http.servlet import RestServlet, parse_integer, parse_string
+from synapse.http.servlet import RestServlet
 from synapse.http.site import SynapseRequest
 from synapse.rest.client._base import client_patterns
-from synapse.types import JsonDict, StreamToken
+from synapse.streams.config import PaginationConfig
+from synapse.types import JsonDict
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -41,9 +42,8 @@ class RelationPaginationServlet(RestServlet):
     def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.auth = hs.get_auth()
-        self.store = hs.get_datastores().main
+        self._store = hs.get_datastores().main
         self._relations_handler = hs.get_relations_handler()
-        self._msc3715_enabled = hs.config.experimental.msc3715_enabled
 
     async def on_GET(
         self,
@@ -55,49 +55,24 @@ class RelationPaginationServlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
 
-        limit = parse_integer(request, "limit", default=5)
-        # Fetch the direction parameter, if provided.
-        #
-        # TODO Use PaginationConfig.from_request when the unstable parameter is
-        #      no longer needed.
-        direction = parse_string(request, "dir", allowed_values=["f", "b"])
-        if direction is None:
-            if self._msc3715_enabled:
-                direction = parse_string(
-                    request,
-                    "org.matrix.msc3715.dir",
-                    default="b",
-                    allowed_values=["f", "b"],
-                )
-            else:
-                direction = "b"
-        from_token_str = parse_string(request, "from")
-        to_token_str = parse_string(request, "to")
-
-        # Return the relations
-        from_token = None
-        if from_token_str:
-            from_token = await StreamToken.from_string(self.store, from_token_str)
-        to_token = None
-        if to_token_str:
-            to_token = await StreamToken.from_string(self.store, to_token_str)
+        pagination_config = await PaginationConfig.from_request(
+            self._store, request, default_limit=5, default_dir="b"
+        )
 
         # The unstable version of this API returns an extra field for client
         # compatibility, see https://github.com/matrix-org/synapse/issues/12930.
         assert request.path is not None
         include_original_event = request.path.startswith(b"/_matrix/client/unstable/")
 
+        # Return the relations
         result = await self._relations_handler.get_relations(
             requester=requester,
             event_id=parent_id,
             room_id=room_id,
+            pagin_config=pagination_config,
+            include_original_event=include_original_event,
             relation_type=relation_type,
             event_type=event_type,
-            limit=limit,
-            direction=direction,
-            from_token=from_token,
-            to_token=to_token,
-            include_original_event=include_original_event,
         )
 
         return 200, result
diff --git a/synapse/streams/config.py b/synapse/streams/config.py
index b52723e2b8..f6f7bf3d8b 100644
--- a/synapse/streams/config.py
+++ b/synapse/streams/config.py
@@ -42,10 +42,12 @@ class PaginationConfig:
         cls,
         store: "DataStore",
         request: SynapseRequest,
-        raise_invalid_params: bool = True,
         default_limit: Optional[int] = None,
+        default_dir: str = "f",
     ) -> "PaginationConfig":
-        direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"])
+        direction = parse_string(
+            request, "dir", default=default_dir, allowed_values=["f", "b"]
+        )
 
         from_tok_str = parse_string(request, "from")
         to_tok_str = parse_string(request, "to")
-- 
cgit 1.5.1


From 1bf2832714abdfc5e10395e8e76aecc591ad265f Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 7 Oct 2022 11:39:45 -0500
Subject: Indicate what endpoint came back with a JSON response we were unable
 to parse (#14097)

**Before:**
```
WARNING - POST-11 - Unable to parse JSON: Expecting value: line 1 column 1 (char 0) (b'')
```

**After:**
```
WARNING - POST-11 - Unable to parse JSON from POST /_matrix/client/v3/join/%21ZlmJtelqFroDRJYZaq:hs1?server_name=hs1 response: Expecting value: line 1 column 1 (char 0) (b'')
```

---

It's possible to figure out which endpoint these warnings were coming from before but you had to follow the request ID `POST-11` to the log line that says `Completed request [...]`. Including this key information next to the JSON parsing error makes it much easier to reason whether it matters or not.

```
2022-09-29T08:23:25.7875506Z synapse_main | 2022-09-29 08:21:10,336 - synapse.http.matrixfederationclient - 299 - INFO - POST-11 - {GET-O-13} [hs1] Completed request: 200 OK in 0.53 secs, got 450 bytes - GET matrix://hs1/_matrix/federation/v1/make_join/%21ohtKoQiXlPePSycXwp%3Ahs1/%40charlie%3Ahs2?ver=1&ver=2&ver=3&ver=4&ver=5&ver=6&ver=org.matrix.msc2176&ver=7&ver=8&ver=9&ver=org.matrix.msc3787&ver=10&ver=org.matrix.msc2716v4
```


---

As a note, having no `body` is normal for the `/join` endpoint and it can handle it.

https://github.com/matrix-org/synapse/blob/0c853e09709d52783efd37060ed9e8f55a4fc704/synapse/rest/client/room.py#L398-L403

Alternatively we could remove these extra logs but they are probably more usually helpful to figure out what went wrong.
---
 changelog.d/14097.misc     | 1 +
 synapse/http/servlet.py    | 9 ++++++++-
 tests/http/test_servlet.py | 4 +++-
 3 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14097.misc

(limited to 'synapse')

diff --git a/changelog.d/14097.misc b/changelog.d/14097.misc
new file mode 100644
index 0000000000..8392448c4d
--- /dev/null
+++ b/changelog.d/14097.misc
@@ -0,0 +1 @@
+Indicate what endpoint came back with a JSON response we were unable to parse.
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index 80acbdcf3c..dead02cd5c 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -35,6 +35,7 @@ from typing_extensions import Literal
 from twisted.web.server import Request
 
 from synapse.api.errors import Codes, SynapseError
+from synapse.http import redact_uri
 from synapse.http.server import HttpServer
 from synapse.types import JsonDict, RoomAlias, RoomID
 from synapse.util import json_decoder
@@ -664,7 +665,13 @@ def parse_json_value_from_request(
     try:
         content = json_decoder.decode(content_bytes.decode("utf-8"))
     except Exception as e:
-        logger.warning("Unable to parse JSON: %s (%s)", e, content_bytes)
+        logger.warning(
+            "Unable to parse JSON from %s %s response: %s (%s)",
+            request.method.decode("ascii", errors="replace"),
+            redact_uri(request.uri.decode("ascii", errors="replace")),
+            e,
+            content_bytes,
+        )
         raise SynapseError(
             HTTPStatus.BAD_REQUEST, "Content not JSON.", errcode=Codes.NOT_JSON
         )
diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py
index 3cbca0f5a3..46166292fe 100644
--- a/tests/http/test_servlet.py
+++ b/tests/http/test_servlet.py
@@ -35,11 +35,13 @@ from tests.http.server._base import test_disconnect
 
 def make_request(content):
     """Make an object that acts enough like a request."""
-    request = Mock(spec=["content"])
+    request = Mock(spec=["method", "uri", "content"])
 
     if isinstance(content, dict):
         content = json.dumps(content).encode("utf8")
 
+    request.method = bytes("STUB_METHOD", "ascii")
+    request.uri = bytes("/test_stub_uri", "ascii")
     request.content = BytesIO(content)
     return request
 
-- 
cgit 1.5.1


From 422cff7df6df3ac3691829fbce3fbd486f399869 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Tue, 11 Oct 2022 14:41:06 +0200
Subject: Fallback if 'approved' isn't included in a registration replication
 request (#14135)

---
 changelog.d/14135.bugfix             |  1 +
 synapse/replication/http/register.py | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14135.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14135.bugfix b/changelog.d/14135.bugfix
new file mode 100644
index 0000000000..6d1d7816e8
--- /dev/null
+++ b/changelog.d/14135.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.69.0rc1 which would cause registration replication requests to fail if the worker sending the request is not running Synapse 1.69.
diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py
index 61abb529c8..976c283360 100644
--- a/synapse/replication/http/register.py
+++ b/synapse/replication/http/register.py
@@ -39,6 +39,16 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
         self.store = hs.get_datastores().main
         self.registration_handler = hs.get_registration_handler()
 
+        # Default value if the worker that sent the replication request did not include
+        # an 'approved' property.
+        if (
+            hs.config.experimental.msc3866.enabled
+            and hs.config.experimental.msc3866.require_approval_for_new_accounts
+        ):
+            self._approval_default = False
+        else:
+            self._approval_default = True
+
     @staticmethod
     async def _serialize_payload(  # type: ignore[override]
         user_id: str,
@@ -92,6 +102,12 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
 
         await self.registration_handler.check_registration_ratelimit(content["address"])
 
+        # Always default admin users to approved (since it means they were created by
+        # an admin).
+        approved_default = self._approval_default
+        if content["admin"]:
+            approved_default = True
+
         await self.registration_handler.register_with_store(
             user_id=user_id,
             password_hash=content["password_hash"],
@@ -103,7 +119,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
             user_type=content["user_type"],
             address=content["address"],
             shadow_banned=content["shadow_banned"],
-            approved=content["approved"],
+            approved=content.get("approved", approved_default),
         )
 
         return 200, {}
-- 
cgit 1.5.1


From a9934d48c193bc963e3d232ed83c5cbfa3e5152d Mon Sep 17 00:00:00 2001
From: Abdullah Osama <abdullahosama15@gmail.com>
Date: Tue, 11 Oct 2022 14:42:11 +0200
Subject: Making parse_server_name more consistent (#14007)

Fixes #12122
---
 changelog.d/14007.misc      | 1 +
 synapse/util/stringutils.py | 4 ++--
 tests/http/test_endpoint.py | 3 +++
 3 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14007.misc

(limited to 'synapse')

diff --git a/changelog.d/14007.misc b/changelog.d/14007.misc
new file mode 100644
index 0000000000..3f0f3afe1c
--- /dev/null
+++ b/changelog.d/14007.misc
@@ -0,0 +1 @@
+Make `parse_server_name` consistent in handling invalid server names.
\ No newline at end of file
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index 27a363d7e5..4961fe9313 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -86,7 +86,7 @@ def parse_server_name(server_name: str) -> Tuple[str, Optional[int]]:
         ValueError if the server name could not be parsed.
     """
     try:
-        if server_name[-1] == "]":
+        if server_name and server_name[-1] == "]":
             # ipv6 literal, hopefully
             return server_name, None
 
@@ -123,7 +123,7 @@ def parse_and_validate_server_name(server_name: str) -> Tuple[str, Optional[int]
     # that nobody is sneaking IP literals in that look like hostnames, etc.
 
     # look for ipv6 literals
-    if host[0] == "[":
+    if host and host[0] == "[":
         if host[-1] != "]":
             raise ValueError("Mismatched [...] in server name '%s'" % (server_name,))
 
diff --git a/tests/http/test_endpoint.py b/tests/http/test_endpoint.py
index c8cc21cadd..a801f002a0 100644
--- a/tests/http/test_endpoint.py
+++ b/tests/http/test_endpoint.py
@@ -25,6 +25,8 @@ class ServerNameTestCase(unittest.TestCase):
             "[0abc:1def::1234]": ("[0abc:1def::1234]", None),
             "1.2.3.4:1": ("1.2.3.4", 1),
             "[0abc:1def::1234]:8080": ("[0abc:1def::1234]", 8080),
+            ":80": ("", 80),
+            "": ("", None),
         }
 
         for i, o in test_data.items():
@@ -42,6 +44,7 @@ class ServerNameTestCase(unittest.TestCase):
             "newline.com\n",
             ".empty-label.com",
             "1234:5678:80",  # too many colons
+            ":80",
         ]
         for i in test_data:
             try:
-- 
cgit 1.5.1


From 02086e1da0e3fa3d5002bf2eb7560c043ad47187 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 11 Oct 2022 16:13:32 +0100
Subject: Fix rotating existing notifications in push summary (#14138)

Broke by #14045. Fixes #14120.

Introduced in v1.69.0rc2.
---
 changelog.d/14138.bugfix                             |  1 +
 synapse/storage/databases/main/event_push_actions.py | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/14138.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14138.bugfix b/changelog.d/14138.bugfix
new file mode 100644
index 0000000000..e2a2f3509e
--- /dev/null
+++ b/changelog.d/14138.bugfix
@@ -0,0 +1 @@
+Fix error in background update when rotating existing notifications. Introduced in v1.69.0rc2.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index c9724d7345..87d07f7d9b 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1104,11 +1104,13 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             )
 
             # First ensure that the existing rows have an updated thread_id field.
-            self.db_pool.simple_update_txn(
-                txn,
-                table="event_push_summary",
-                keyvalues={"room_id": room_id, "user_id": user_id, "thread_id": None},
-                updatevalues={"thread_id": "main"},
+            txn.execute(
+                """
+                UPDATE event_push_summary
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                ("main", room_id, user_id),
             )
 
             # Replace the previous summary with the new counts.
@@ -1272,6 +1274,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
         # Ensure that any updated threads have an updated thread_id.
+        txn.execute_batch(
+            """
+            UPDATE event_push_summary
+            SET thread_id = ?
+            WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+            """,
+            [("main", room_id, user_id) for user_id, room_id in summaries],
+        )
         self.db_pool.simple_update_many_txn(
             txn,
             table="event_push_summary",
-- 
cgit 1.5.1


From 6136768e766b4b545d1e0e8ee6e18862292509f3 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 11 Oct 2022 13:14:57 -0400
Subject: Remove the groups config code. (#14142)

This has been unused for a long time, but missed removal in #11584.
---
 changelog.d/14142.misc   |  1 +
 synapse/config/groups.py | 27 ---------------------------
 2 files changed, 1 insertion(+), 27 deletions(-)
 create mode 100644 changelog.d/14142.misc
 delete mode 100644 synapse/config/groups.py

(limited to 'synapse')

diff --git a/changelog.d/14142.misc b/changelog.d/14142.misc
new file mode 100644
index 0000000000..3649317013
--- /dev/null
+++ b/changelog.d/14142.misc
@@ -0,0 +1 @@
+Remove unused configuration code.
diff --git a/synapse/config/groups.py b/synapse/config/groups.py
deleted file mode 100644
index baa051fdd4..0000000000
--- a/synapse/config/groups.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright 2017 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any
-
-from synapse.types import JsonDict
-
-from ._base import Config
-
-
-class GroupsConfig(Config):
-    section = "groups"
-
-    def read_config(self, config: JsonDict, **kwargs: Any) -> None:
-        self.enable_group_creation = config.get("enable_group_creation", False)
-        self.group_creation_prefix = config.get("group_creation_prefix", "")
-- 
cgit 1.5.1


From a86b2f6837f0a067b0a014fbf5140e8773b8da2e Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 11 Oct 2022 11:18:45 -0700
Subject: Fix a bug where redactions were not being sent over federation if we
 did not have the original event. (#13813)

---
 changelog.d/13813.bugfix                        |  1 +
 synapse/federation/sender/__init__.py           | 29 +++++++++++++++++--------
 synapse/handlers/appservice.py                  |  9 +++++---
 synapse/storage/databases/main/events_worker.py | 15 +++++++++----
 synapse/storage/databases/main/stream.py        | 28 +++++++++++-------------
 tests/handlers/test_appservice.py               | 18 +++++++++------
 6 files changed, 62 insertions(+), 38 deletions(-)
 create mode 100644 changelog.d/13813.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13813.bugfix b/changelog.d/13813.bugfix
new file mode 100644
index 0000000000..23388788ff
--- /dev/null
+++ b/changelog.d/13813.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where redactions were not being sent over federation if we did not have the original event.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index a6cb3ba58f..774ecd81b6 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -353,21 +353,25 @@ class FederationSender(AbstractFederationSender):
                 last_token = await self.store.get_federation_out_pos("events")
                 (
                     next_token,
-                    events,
                     event_to_received_ts,
-                ) = await self.store.get_all_new_events_stream(
+                ) = await self.store.get_all_new_event_ids_stream(
                     last_token, self._last_poked_id, limit=100
                 )
 
+                event_ids = event_to_received_ts.keys()
+                event_entries = await self.store.get_unredacted_events_from_cache_or_db(
+                    event_ids
+                )
+
                 logger.debug(
                     "Handling %i -> %i: %i events to send (current id %i)",
                     last_token,
                     next_token,
-                    len(events),
+                    len(event_entries),
                     self._last_poked_id,
                 )
 
-                if not events and next_token >= self._last_poked_id:
+                if not event_entries and next_token >= self._last_poked_id:
                     logger.debug("All events processed")
                     break
 
@@ -508,8 +512,14 @@ class FederationSender(AbstractFederationSender):
                             await handle_event(event)
 
                 events_by_room: Dict[str, List[EventBase]] = {}
-                for event in events:
-                    events_by_room.setdefault(event.room_id, []).append(event)
+
+                for event_id in event_ids:
+                    # `event_entries` is unsorted, so we have to iterate over `event_ids`
+                    # to ensure the events are in the right order
+                    event_cache = event_entries.get(event_id)
+                    if event_cache:
+                        event = event_cache.event
+                        events_by_room.setdefault(event.room_id, []).append(event)
 
                 await make_deferred_yieldable(
                     defer.gatherResults(
@@ -524,9 +534,10 @@ class FederationSender(AbstractFederationSender):
                 logger.debug("Successfully handled up to %i", next_token)
                 await self.store.update_federation_out_pos("events", next_token)
 
-                if events:
+                if event_entries:
                     now = self.clock.time_msec()
-                    ts = event_to_received_ts[events[-1].event_id]
+                    last_id = next(reversed(event_ids))
+                    ts = event_to_received_ts[last_id]
                     assert ts is not None
 
                     synapse.metrics.event_processing_lag.labels(
@@ -536,7 +547,7 @@ class FederationSender(AbstractFederationSender):
                         "federation_sender"
                     ).set(ts)
 
-                    events_processed_counter.inc(len(events))
+                    events_processed_counter.inc(len(event_entries))
 
                     event_processing_loop_room_count.labels("federation_sender").inc(
                         len(events_by_room)
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 203b62e015..66f5b8d108 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -109,10 +109,13 @@ class ApplicationServicesHandler:
                     last_token = await self.store.get_appservice_last_pos()
                     (
                         upper_bound,
-                        events,
                         event_to_received_ts,
-                    ) = await self.store.get_all_new_events_stream(
-                        last_token, self.current_max, limit=100, get_prev_content=True
+                    ) = await self.store.get_all_new_event_ids_stream(
+                        last_token, self.current_max, limit=100
+                    )
+
+                    events = await self.store.get_events_as_list(
+                        event_to_received_ts.keys(), get_prev_content=True
                     )
 
                     events_by_room: Dict[str, List[EventBase]] = {}
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 7cdc9fe98f..d4104462b5 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -474,7 +474,7 @@ class EventsWorkerStore(SQLBaseStore):
             return []
 
         # there may be duplicates so we cast the list to a set
-        event_entry_map = await self._get_events_from_cache_or_db(
+        event_entry_map = await self.get_unredacted_events_from_cache_or_db(
             set(event_ids), allow_rejected=allow_rejected
         )
 
@@ -509,7 +509,9 @@ class EventsWorkerStore(SQLBaseStore):
                     continue
 
                 redacted_event_id = entry.event.redacts
-                event_map = await self._get_events_from_cache_or_db([redacted_event_id])
+                event_map = await self.get_unredacted_events_from_cache_or_db(
+                    [redacted_event_id]
+                )
                 original_event_entry = event_map.get(redacted_event_id)
                 if not original_event_entry:
                     # we don't have the redacted event (or it was rejected).
@@ -588,11 +590,16 @@ class EventsWorkerStore(SQLBaseStore):
         return events
 
     @cancellable
-    async def _get_events_from_cache_or_db(
-        self, event_ids: Iterable[str], allow_rejected: bool = False
+    async def get_unredacted_events_from_cache_or_db(
+        self,
+        event_ids: Iterable[str],
+        allow_rejected: bool = False,
     ) -> Dict[str, EventCacheEntry]:
         """Fetch a bunch of events from the cache or the database.
 
+        Note that the events pulled by this function will not have any redactions
+        applied, and no guarantee is made about the ordering of the events returned.
+
         If events are pulled from the database, they will be cached for future lookups.
 
         Unknown events are omitted from the response.
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 530f04e149..ffeb2b3683 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -1024,28 +1024,31 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             "after": {"event_ids": events_after, "token": end_token},
         }
 
-    async def get_all_new_events_stream(
-        self, from_id: int, current_id: int, limit: int, get_prev_content: bool = False
-    ) -> Tuple[int, List[EventBase], Dict[str, Optional[int]]]:
+    async def get_all_new_event_ids_stream(
+        self,
+        from_id: int,
+        current_id: int,
+        limit: int,
+    ) -> Tuple[int, Dict[str, Optional[int]]]:
         """Get all new events
 
-        Returns all events with from_id < stream_ordering <= current_id.
+        Returns all event ids with from_id < stream_ordering <= current_id.
 
         Args:
             from_id:  the stream_ordering of the last event we processed
             current_id:  the stream_ordering of the most recently processed event
             limit: the maximum number of events to return
-            get_prev_content: whether to fetch previous event content
 
         Returns:
-            A tuple of (next_id, events, event_to_received_ts), where `next_id`
+            A tuple of (next_id, event_to_received_ts), where `next_id`
             is the next value to pass as `from_id` (it will either be the
             stream_ordering of the last returned event, or, if fewer than `limit`
             events were found, the `current_id`). The `event_to_received_ts` is
-            a dictionary mapping event ID to the event `received_ts`.
+            a dictionary mapping event ID to the event `received_ts`, sorted by ascending
+            stream_ordering.
         """
 
-        def get_all_new_events_stream_txn(
+        def get_all_new_event_ids_stream_txn(
             txn: LoggingTransaction,
         ) -> Tuple[int, Dict[str, Optional[int]]]:
             sql = (
@@ -1070,15 +1073,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             return upper_bound, event_to_received_ts
 
         upper_bound, event_to_received_ts = await self.db_pool.runInteraction(
-            "get_all_new_events_stream", get_all_new_events_stream_txn
-        )
-
-        events = await self.get_events_as_list(
-            event_to_received_ts.keys(),
-            get_prev_content=get_prev_content,
+            "get_all_new_event_ids_stream", get_all_new_event_ids_stream_txn
         )
 
-        return upper_bound, events, event_to_received_ts
+        return upper_bound, event_to_received_ts
 
     async def get_federation_out_pos(self, typ: str) -> int:
         if self._need_to_reset_federation_stream_positions:
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index af24c4984d..7e4570f990 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -76,9 +76,13 @@ class AppServiceHandlerTestCase(unittest.TestCase):
         event = Mock(
             sender="@someone:anywhere", type="m.room.message", room_id="!foo:bar"
         )
-        self.mock_store.get_all_new_events_stream.side_effect = [
-            make_awaitable((0, [], {})),
-            make_awaitable((1, [event], {event.event_id: 0})),
+        self.mock_store.get_all_new_event_ids_stream.side_effect = [
+            make_awaitable((0, {})),
+            make_awaitable((1, {event.event_id: 0})),
+        ]
+        self.mock_store.get_events_as_list.side_effect = [
+            make_awaitable([]),
+            make_awaitable([event]),
         ]
         self.handler.notify_interested_services(RoomStreamToken(None, 1))
 
@@ -95,10 +99,10 @@ class AppServiceHandlerTestCase(unittest.TestCase):
 
         event = Mock(sender=user_id, type="m.room.message", room_id="!foo:bar")
         self.mock_as_api.query_user.return_value = make_awaitable(True)
-        self.mock_store.get_all_new_events_stream.side_effect = [
-            make_awaitable((0, [event], {event.event_id: 0})),
+        self.mock_store.get_all_new_event_ids_stream.side_effect = [
+            make_awaitable((0, {event.event_id: 0})),
         ]
-
+        self.mock_store.get_events_as_list.side_effect = [make_awaitable([event])]
         self.handler.notify_interested_services(RoomStreamToken(None, 0))
 
         self.mock_as_api.query_user.assert_called_once_with(services[0], user_id)
@@ -112,7 +116,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
 
         event = Mock(sender=user_id, type="m.room.message", room_id="!foo:bar")
         self.mock_as_api.query_user.return_value = make_awaitable(True)
-        self.mock_store.get_all_new_events_stream.side_effect = [
+        self.mock_store.get_all_new_event_ids_stream.side_effect = [
             make_awaitable((0, [event], {event.event_id: 0})),
         ]
 
-- 
cgit 1.5.1


From 09be8ab5f9d54fa1a577d8b0028abf8acc28f30d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 12 Oct 2022 06:26:39 -0400
Subject: Remove the experimental implementation of MSC3772. (#14094)

MSC3772 has been abandoned.
---
 changelog.d/14094.removal                   |   1 +
 rust/src/push/base_rules.rs                 |  13 ----
 rust/src/push/evaluator.rs                  | 105 +---------------------------
 rust/src/push/mod.rs                        |  44 +++---------
 stubs/synapse/synapse_rust/push.pyi         |   6 +-
 synapse/config/experimental.py              |   2 -
 synapse/push/bulk_push_rule_evaluator.py    |  64 +----------------
 synapse/storage/databases/main/cache.py     |   3 -
 synapse/storage/databases/main/events.py    |   5 --
 synapse/storage/databases/main/push_rule.py |  15 ++--
 synapse/storage/databases/main/relations.py |  53 --------------
 tests/push/test_push_rule_evaluator.py      |  76 +-------------------
 12 files changed, 22 insertions(+), 365 deletions(-)
 create mode 100644 changelog.d/14094.removal

(limited to 'synapse')

diff --git a/changelog.d/14094.removal b/changelog.d/14094.removal
new file mode 100644
index 0000000000..6ef03b1a0f
--- /dev/null
+++ b/changelog.d/14094.removal
@@ -0,0 +1 @@
+Remove the experimental implementation of [MSC3772](https://github.com/matrix-org/matrix-spec-proposals/pull/3772).
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 2a09cf99ae..63240cacfc 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -257,19 +257,6 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
         default: true,
         default_enabled: true,
     },
-    PushRule {
-        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc3772.thread_reply"),
-        priority_class: 1,
-        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch {
-            rel_type: Cow::Borrowed("m.thread"),
-            event_type_pattern: None,
-            sender: None,
-            sender_type: Some(Cow::Borrowed("user_id")),
-        })]),
-        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
-        default: true,
-        default_enabled: true,
-    },
     PushRule {
         rule_id: Cow::Borrowed("global/underride/.m.rule.message"),
         priority_class: 1,
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index efe88ec76e..0365dd01dc 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -12,10 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::{
-    borrow::Cow,
-    collections::{BTreeMap, BTreeSet},
-};
+use std::collections::BTreeMap;
 
 use anyhow::{Context, Error};
 use lazy_static::lazy_static;
@@ -49,13 +46,6 @@ pub struct PushRuleEvaluator {
     /// The `notifications` section of the current power levels in the room.
     notification_power_levels: BTreeMap<String, i64>,
 
-    /// The relations related to the event as a mapping from relation type to
-    /// set of sender/event type 2-tuples.
-    relations: BTreeMap<String, BTreeSet<(String, String)>>,
-
-    /// Is running "relation" conditions enabled?
-    relation_match_enabled: bool,
-
     /// The power level of the sender of the event, or None if event is an
     /// outlier.
     sender_power_level: Option<i64>,
@@ -70,8 +60,6 @@ impl PushRuleEvaluator {
         room_member_count: u64,
         sender_power_level: Option<i64>,
         notification_power_levels: BTreeMap<String, i64>,
-        relations: BTreeMap<String, BTreeSet<(String, String)>>,
-        relation_match_enabled: bool,
     ) -> Result<Self, Error> {
         let body = flattened_keys
             .get("content.body")
@@ -83,8 +71,6 @@ impl PushRuleEvaluator {
             body,
             room_member_count,
             notification_power_levels,
-            relations,
-            relation_match_enabled,
             sender_power_level,
         })
     }
@@ -203,89 +189,11 @@ impl PushRuleEvaluator {
                     false
                 }
             }
-            KnownCondition::RelationMatch {
-                rel_type,
-                event_type_pattern,
-                sender,
-                sender_type,
-            } => {
-                self.match_relations(rel_type, sender, sender_type, user_id, event_type_pattern)?
-            }
         };
 
         Ok(result)
     }
 
-    /// Evaluates a relation condition.
-    fn match_relations(
-        &self,
-        rel_type: &str,
-        sender: &Option<Cow<str>>,
-        sender_type: &Option<Cow<str>>,
-        user_id: Option<&str>,
-        event_type_pattern: &Option<Cow<str>>,
-    ) -> Result<bool, Error> {
-        // First check if relation matching is enabled...
-        if !self.relation_match_enabled {
-            return Ok(false);
-        }
-
-        // ... and if there are any relations to match against.
-        let relations = if let Some(relations) = self.relations.get(rel_type) {
-            relations
-        } else {
-            return Ok(false);
-        };
-
-        // Extract the sender pattern from the condition
-        let sender_pattern = if let Some(sender) = sender {
-            Some(sender.as_ref())
-        } else if let Some(sender_type) = sender_type {
-            if sender_type == "user_id" {
-                if let Some(user_id) = user_id {
-                    Some(user_id)
-                } else {
-                    return Ok(false);
-                }
-            } else {
-                warn!("Unrecognized sender_type: {sender_type}");
-                return Ok(false);
-            }
-        } else {
-            None
-        };
-
-        let mut sender_compiled_pattern = if let Some(pattern) = sender_pattern {
-            Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
-        } else {
-            None
-        };
-
-        let mut type_compiled_pattern = if let Some(pattern) = event_type_pattern {
-            Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
-        } else {
-            None
-        };
-
-        for (relation_sender, event_type) in relations {
-            if let Some(pattern) = &mut sender_compiled_pattern {
-                if !pattern.is_match(relation_sender)? {
-                    continue;
-                }
-            }
-
-            if let Some(pattern) = &mut type_compiled_pattern {
-                if !pattern.is_match(event_type)? {
-                    continue;
-                }
-            }
-
-            return Ok(true);
-        }
-
-        Ok(false)
-    }
-
     /// Evaluates a `event_match` condition.
     fn match_event_match(
         &self,
@@ -359,15 +267,8 @@ impl PushRuleEvaluator {
 fn push_rule_evaluator() {
     let mut flattened_keys = BTreeMap::new();
     flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
-    let evaluator = PushRuleEvaluator::py_new(
-        flattened_keys,
-        10,
-        Some(0),
-        BTreeMap::new(),
-        BTreeMap::new(),
-        true,
-    )
-    .unwrap();
+    let evaluator =
+        PushRuleEvaluator::py_new(flattened_keys, 10, Some(0), BTreeMap::new()).unwrap();
 
     let result = evaluator.run(&FilteredPushRules::default(), None, Some("bob"));
     assert_eq!(result.len(), 3);
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 208b9c0d73..0dabfab8b8 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -275,16 +275,6 @@ pub enum KnownCondition {
     SenderNotificationPermission {
         key: Cow<'static, str>,
     },
-    #[serde(rename = "org.matrix.msc3772.relation_match")]
-    RelationMatch {
-        rel_type: Cow<'static, str>,
-        #[serde(skip_serializing_if = "Option::is_none", rename = "type")]
-        event_type_pattern: Option<Cow<'static, str>>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        sender: Option<Cow<'static, str>>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        sender_type: Option<Cow<'static, str>>,
-    },
 }
 
 impl IntoPy<PyObject> for Condition {
@@ -401,21 +391,15 @@ impl PushRules {
 pub struct FilteredPushRules {
     push_rules: PushRules,
     enabled_map: BTreeMap<String, bool>,
-    msc3772_enabled: bool,
 }
 
 #[pymethods]
 impl FilteredPushRules {
     #[new]
-    pub fn py_new(
-        push_rules: PushRules,
-        enabled_map: BTreeMap<String, bool>,
-        msc3772_enabled: bool,
-    ) -> Self {
+    pub fn py_new(push_rules: PushRules, enabled_map: BTreeMap<String, bool>) -> Self {
         Self {
             push_rules,
             enabled_map,
-            msc3772_enabled,
         }
     }
 
@@ -430,25 +414,13 @@ impl FilteredPushRules {
     /// Iterates over all the rules and their enabled state, including base
     /// rules, in the order they should be executed in.
     fn iter(&self) -> impl Iterator<Item = (&PushRule, bool)> {
-        self.push_rules
-            .iter()
-            .filter(|rule| {
-                // Ignore disabled experimental push rules
-                if !self.msc3772_enabled
-                    && rule.rule_id == "global/underride/.org.matrix.msc3772.thread_reply"
-                {
-                    return false;
-                }
-
-                true
-            })
-            .map(|r| {
-                let enabled = *self
-                    .enabled_map
-                    .get(&*r.rule_id)
-                    .unwrap_or(&r.default_enabled);
-                (r, enabled)
-            })
+        self.push_rules.iter().map(|r| {
+            let enabled = *self
+                .enabled_map
+                .get(&*r.rule_id)
+                .unwrap_or(&r.default_enabled);
+            (r, enabled)
+        })
     }
 }
 
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 5900e61450..f2a61df660 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -25,9 +25,7 @@ class PushRules:
     def rules(self) -> Collection[PushRule]: ...
 
 class FilteredPushRules:
-    def __init__(
-        self, push_rules: PushRules, enabled_map: Dict[str, bool], msc3772_enabled: bool
-    ): ...
+    def __init__(self, push_rules: PushRules, enabled_map: Dict[str, bool]): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
 def get_base_rule_ids() -> Collection[str]: ...
@@ -39,8 +37,6 @@ class PushRuleEvaluator:
         room_member_count: int,
         sender_power_level: Optional[int],
         notification_power_levels: Mapping[str, int],
-        relations: Mapping[str, Set[Tuple[str, str]]],
-        relation_match_enabled: bool,
     ): ...
     def run(
         self,
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index e00cb7096c..f44655516e 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -95,8 +95,6 @@ class ExperimentalConfig(Config):
         # MSC2815 (allow room moderators to view redacted event content)
         self.msc2815_enabled: bool = experimental.get("msc2815_enabled", False)
 
-        # MSC3772: A push rule for mutual relations.
-        self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False)
         # MSC3773: Thread notifications
         self.msc3773_enabled: bool = experimental.get("msc3773_enabled", False)
 
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index eced182fd5..8d94aeaa32 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -13,18 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import itertools
 import logging
 from typing import (
     TYPE_CHECKING,
     Any,
     Collection,
     Dict,
-    Iterable,
     List,
     Mapping,
     Optional,
-    Set,
     Tuple,
     Union,
 )
@@ -38,7 +35,7 @@ from synapse.events.snapshot import EventContext
 from synapse.state import POWER_KEY
 from synapse.storage.databases.main.roommember import EventIdMembership
 from synapse.storage.state import StateFilter
-from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRuleEvaluator
+from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator
 from synapse.util.caches import register_cache
 from synapse.util.metrics import measure_func
 from synapse.visibility import filter_event_for_clients_with_state
@@ -117,9 +114,6 @@ class BulkPushRuleEvaluator:
             resizable=False,
         )
 
-        # Whether to support MSC3772 is supported.
-        self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled
-
     async def _get_rules_for_event(
         self,
         event: EventBase,
@@ -200,51 +194,6 @@ class BulkPushRuleEvaluator:
 
         return pl_event.content if pl_event else {}, sender_level
 
-    async def _get_mutual_relations(
-        self, parent_id: str, rules: Iterable[Tuple[PushRule, bool]]
-    ) -> Dict[str, Set[Tuple[str, str]]]:
-        """
-        Fetch event metadata for events which related to the same event as the given event.
-
-        If the given event has no relation information, returns an empty dictionary.
-
-        Args:
-            parent_id: The event ID which is targeted by relations.
-            rules: The push rules which will be processed for this event.
-
-        Returns:
-            A dictionary of relation type to:
-                A set of tuples of:
-                    The sender
-                    The event type
-        """
-
-        # If the experimental feature is not enabled, skip fetching relations.
-        if not self._relations_match_enabled:
-            return {}
-
-        # Pre-filter to figure out which relation types are interesting.
-        rel_types = set()
-        for rule, enabled in rules:
-            if not enabled:
-                continue
-
-            for condition in rule.conditions:
-                if condition["kind"] != "org.matrix.msc3772.relation_match":
-                    continue
-
-                # rel_type is required.
-                rel_type = condition.get("rel_type")
-                if rel_type:
-                    rel_types.add(rel_type)
-
-        # If no valid rules were found, no mutual relations.
-        if not rel_types:
-            return {}
-
-        # If any valid rules were found, fetch the mutual relations.
-        return await self.store.get_mutual_event_relations(parent_id, rel_types)
-
     @measure_func("action_for_event_by_user")
     async def action_for_event_by_user(
         self, event: EventBase, context: EventContext
@@ -276,16 +225,11 @@ class BulkPushRuleEvaluator:
             sender_power_level,
         ) = await self._get_power_levels_and_sender_level(event, context)
 
+        # Find the event's thread ID.
         relation = relation_from_event(event)
-        # If the event does not have a relation, then cannot have any mutual
-        # relations or thread ID.
-        relations = {}
+        # If the event does not have a relation, then it cannot have a thread ID.
         thread_id = MAIN_TIMELINE
         if relation:
-            relations = await self._get_mutual_relations(
-                relation.parent_id,
-                itertools.chain(*(r.rules() for r in rules_by_user.values())),
-            )
             # Recursively attempt to find the thread this event relates to.
             if relation.rel_type == RelationTypes.THREAD:
                 thread_id = relation.parent_id
@@ -306,8 +250,6 @@ class BulkPushRuleEvaluator:
             room_member_count,
             sender_power_level,
             notification_levels,
-            relations,
-            self._relations_match_enabled,
         )
 
         users = rules_by_user.keys()
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 3b8ed1f7ee..a9f25a5904 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -259,9 +259,6 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._attempt_to_invalidate_cache("get_applicable_edit", (relates_to,))
             self._attempt_to_invalidate_cache("get_thread_summary", (relates_to,))
             self._attempt_to_invalidate_cache("get_thread_participated", (relates_to,))
-            self._attempt_to_invalidate_cache(
-                "get_mutual_event_relations_for_rel_type", (relates_to,)
-            )
 
     async def invalidate_cache_and_stream(
         self, cache_name: str, keys: Tuple[Any, ...]
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 3e15827986..060fe71454 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2024,11 +2024,6 @@ class PersistEventsStore:
             self.store._invalidate_cache_and_stream(
                 txn, self.store.get_thread_participated, (redacted_relates_to,)
             )
-            self.store._invalidate_cache_and_stream(
-                txn,
-                self.store.get_mutual_event_relations_for_rel_type,
-                (redacted_relates_to,),
-            )
 
         self.db_pool.simple_delete_txn(
             txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 8295322b0e..51416b2236 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -29,7 +29,6 @@ from typing import (
 )
 
 from synapse.api.errors import StoreError
-from synapse.config.homeserver import ExperimentalConfig
 from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
@@ -63,9 +62,7 @@ logger = logging.getLogger(__name__)
 
 
 def _load_rules(
-    rawrules: List[JsonDict],
-    enabled_map: Dict[str, bool],
-    experimental_config: ExperimentalConfig,
+    rawrules: List[JsonDict], enabled_map: Dict[str, bool]
 ) -> FilteredPushRules:
     """Take the DB rows returned from the DB and convert them into a full
     `FilteredPushRules` object.
@@ -83,9 +80,7 @@ def _load_rules(
 
     push_rules = PushRules(ruleslist)
 
-    filtered_rules = FilteredPushRules(
-        push_rules, enabled_map, msc3772_enabled=experimental_config.msc3772_enabled
-    )
+    filtered_rules = FilteredPushRules(push_rules, enabled_map)
 
     return filtered_rules
 
@@ -165,7 +160,7 @@ class PushRulesWorkerStore(
 
         enabled_map = await self.get_push_rules_enabled_for_user(user_id)
 
-        return _load_rules(rows, enabled_map, self.hs.config.experimental)
+        return _load_rules(rows, enabled_map)
 
     async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool]:
         results = await self.db_pool.simple_select_list(
@@ -224,9 +219,7 @@ class PushRulesWorkerStore(
         results: Dict[str, FilteredPushRules] = {}
 
         for user_id, rules in raw_rules.items():
-            results[user_id] = _load_rules(
-                rules, enabled_map_by_user.get(user_id, {}), self.hs.config.experimental
-            )
+            results[user_id] = _load_rules(rules, enabled_map_by_user.get(user_id, {}))
 
         return results
 
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 116abef9de..6b7eec4bf2 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -776,59 +776,6 @@ class RelationsWorkerStore(SQLBaseStore):
             "get_if_user_has_annotated_event", _get_if_user_has_annotated_event
         )
 
-    @cached(iterable=True)
-    async def get_mutual_event_relations_for_rel_type(
-        self, event_id: str, relation_type: str
-    ) -> Set[Tuple[str, str]]:
-        raise NotImplementedError()
-
-    @cachedList(
-        cached_method_name="get_mutual_event_relations_for_rel_type",
-        list_name="relation_types",
-    )
-    async def get_mutual_event_relations(
-        self, event_id: str, relation_types: Collection[str]
-    ) -> Dict[str, Set[Tuple[str, str]]]:
-        """
-        Fetch event metadata for events which related to the same event as the given event.
-
-        If the given event has no relation information, returns an empty dictionary.
-
-        Args:
-            event_id: The event ID which is targeted by relations.
-            relation_types: The relation types to check for mutual relations.
-
-        Returns:
-            A dictionary of relation type to:
-                A set of tuples of:
-                    The sender
-                    The event type
-        """
-        rel_type_sql, rel_type_args = make_in_list_sql_clause(
-            self.database_engine, "relation_type", relation_types
-        )
-
-        sql = f"""
-            SELECT DISTINCT relation_type, sender, type FROM event_relations
-            INNER JOIN events USING (event_id)
-            WHERE relates_to_id = ? AND {rel_type_sql}
-        """
-
-        def _get_event_relations(
-            txn: LoggingTransaction,
-        ) -> Dict[str, Set[Tuple[str, str]]]:
-            txn.execute(sql, [event_id] + rel_type_args)
-            result: Dict[str, Set[Tuple[str, str]]] = {
-                rel_type: set() for rel_type in relation_types
-            }
-            for rel_type, sender, type in txn.fetchall():
-                result[rel_type].add((sender, type))
-            return result
-
-        return await self.db_pool.runInteraction(
-            "get_event_relations", _get_event_relations
-        )
-
     @cached()
     async def get_thread_id(self, event_id: str) -> Optional[str]:
         """
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 8804f0e0d3..decf619466 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, Optional, Set, Tuple, Union
+from typing import Dict, Optional, Union
 
 import frozendict
 
@@ -38,12 +38,7 @@ from tests.test_utils.event_injection import create_event, inject_member_event
 
 
 class PushRuleEvaluatorTestCase(unittest.TestCase):
-    def _get_evaluator(
-        self,
-        content: JsonDict,
-        relations: Optional[Dict[str, Set[Tuple[str, str]]]] = None,
-        relations_match_enabled: bool = False,
-    ) -> PushRuleEvaluator:
+    def _get_evaluator(self, content: JsonDict) -> PushRuleEvaluator:
         event = FrozenEvent(
             {
                 "event_id": "$event_id",
@@ -63,8 +58,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             room_member_count,
             sender_power_level,
             power_levels.get("notifications", {}),
-            relations or {},
-            relations_match_enabled,
         )
 
     def test_display_name(self) -> None:
@@ -299,71 +292,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             {"sound": "default", "highlight": True},
         )
 
-    def test_relation_match(self) -> None:
-        """Test the relation_match push rule kind."""
-
-        # Check if the experimental feature is disabled.
-        evaluator = self._get_evaluator(
-            {}, {"m.annotation": {("@user:test", "m.reaction")}}
-        )
-
-        # A push rule evaluator with the experimental rule enabled.
-        evaluator = self._get_evaluator(
-            {}, {"m.annotation": {("@user:test", "m.reaction")}}, True
-        )
-
-        # Check just relation type.
-        condition = {
-            "kind": "org.matrix.msc3772.relation_match",
-            "rel_type": "m.annotation",
-        }
-        self.assertTrue(evaluator.matches(condition, "@user:test", "foo"))
-
-        # Check relation type and sender.
-        condition = {
-            "kind": "org.matrix.msc3772.relation_match",
-            "rel_type": "m.annotation",
-            "sender": "@user:test",
-        }
-        self.assertTrue(evaluator.matches(condition, "@user:test", "foo"))
-        condition = {
-            "kind": "org.matrix.msc3772.relation_match",
-            "rel_type": "m.annotation",
-            "sender": "@other:test",
-        }
-        self.assertFalse(evaluator.matches(condition, "@user:test", "foo"))
-
-        # Check relation type and event type.
-        condition = {
-            "kind": "org.matrix.msc3772.relation_match",
-            "rel_type": "m.annotation",
-            "type": "m.reaction",
-        }
-        self.assertTrue(evaluator.matches(condition, "@user:test", "foo"))
-
-        # Check just sender, this fails since rel_type is required.
-        condition = {
-            "kind": "org.matrix.msc3772.relation_match",
-            "sender": "@user:test",
-        }
-        self.assertFalse(evaluator.matches(condition, "@user:test", "foo"))
-
-        # Check sender glob.
-        condition = {
-            "kind": "org.matrix.msc3772.relation_match",
-            "rel_type": "m.annotation",
-            "sender": "@*:test",
-        }
-        self.assertTrue(evaluator.matches(condition, "@user:test", "foo"))
-
-        # Check event type glob.
-        condition = {
-            "kind": "org.matrix.msc3772.relation_match",
-            "rel_type": "m.annotation",
-            "event_type": "*.reaction",
-        }
-        self.assertTrue(evaluator.matches(condition, "@user:test", "foo"))
-
 
 class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase):
     """Tests for the bulk push rule evaluator"""
-- 
cgit 1.5.1


From f9bc5428c46e73ca471b6976865d5ba4168f938d Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Wed, 12 Oct 2022 11:36:22 +0100
Subject: Batch up calls to `get_rooms_for_users` (#14109)

---
 changelog.d/14109.misc                       |  1 +
 synapse/storage/databases/main/roommember.py | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14109.misc

(limited to 'synapse')

diff --git a/changelog.d/14109.misc b/changelog.d/14109.misc
new file mode 100644
index 0000000000..7987c2050f
--- /dev/null
+++ b/changelog.d/14109.misc
@@ -0,0 +1 @@
+Break up calls to fetch rooms for many users. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 2337289d88..2ed6ad754f 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -666,7 +666,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         cached_method_name="get_rooms_for_user",
         list_name="user_ids",
     )
-    async def get_rooms_for_users(
+    async def _get_rooms_for_users(
         self, user_ids: Collection[str]
     ) -> Dict[str, FrozenSet[str]]:
         """A batched version of `get_rooms_for_user`.
@@ -697,6 +697,21 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return {key: frozenset(rooms) for key, rooms in user_rooms.items()}
 
+    async def get_rooms_for_users(
+        self, user_ids: Collection[str]
+    ) -> Dict[str, FrozenSet[str]]:
+        """A batched wrapper around `_get_rooms_for_users`, to prevent locking
+        other calls to `get_rooms_for_user` for large user lists.
+        """
+        all_user_rooms: Dict[str, FrozenSet[str]] = {}
+
+        # 250 users is pretty arbitrary but the data can be quite large if users
+        # are in many rooms.
+        for user_ids in batch_iter(user_ids, 250):
+            all_user_rooms.update(await self._get_rooms_for_users(user_ids))
+
+        return all_user_rooms
+
     @cached(max_entries=10000)
     async def does_pair_of_users_share_a_room(
         self, user_id: str, other_user_id: str
-- 
cgit 1.5.1


From c604d2c218a80f169876cf3063817e038063f7b9 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 12 Oct 2022 06:46:13 -0400
Subject: Mark /relations endpoint as usable on workers. (#14028)

Co-authored-by: Eric Eastwood <erice@element.io>
---
 changelog.d/14028.feature                      |  1 +
 docker/complement/conf/start_for_complement.sh |  1 +
 docker/configure_workers_and_start.py          | 27 ++++++++++++++++++++++++++
 docs/workers.md                                |  1 +
 scripts-dev/complement.sh                      |  7 +++++--
 synapse/app/generic_worker.py                  |  2 ++
 6 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14028.feature

(limited to 'synapse')

diff --git a/changelog.d/14028.feature b/changelog.d/14028.feature
new file mode 100644
index 0000000000..6f5663a0ef
--- /dev/null
+++ b/changelog.d/14028.feature
@@ -0,0 +1 @@
+The `/relations` endpoint can now be used on workers.
diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh
index cc6482f763..bb85d9fed7 100755
--- a/docker/complement/conf/start_for_complement.sh
+++ b/docker/complement/conf/start_for_complement.sh
@@ -57,6 +57,7 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then
       federation_reader, \
       federation_sender, \
       synchrotron, \
+      client_reader, \
       appservice, \
       pusher"
 
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 51583dc13d..8e7f605b24 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -107,6 +107,33 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
         "shared_extra_conf": {},
         "worker_extra_conf": "",
     },
+    "client_reader": {
+        "app": "synapse.app.generic_worker",
+        "listener_resources": ["client"],
+        "endpoint_patterns": [
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/publicRooms$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/joined_members$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/context/.*$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$",
+            "^/_matrix/client/v1/rooms/.*/hierarchy$",
+            "^/_matrix/client/(v1|unstable)/rooms/.*/relations/",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/login$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/account/3pid$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/account/whoami$",
+            "^/_matrix/client/versions$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$",
+            "^/_matrix/client/(r0|v3|unstable)/register$",
+            "^/_matrix/client/(r0|v3|unstable)/auth/.*/fallback/web$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms",
+            "^/_matrix/client/(api/v1|r0|v3|unstable/.*)/rooms/.*/aliases",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/search",
+        ],
+        "shared_extra_conf": {},
+        "worker_extra_conf": "",
+    },
     "federation_reader": {
         "app": "synapse.app.generic_worker",
         "listener_resources": ["federation"],
diff --git a/docs/workers.md b/docs/workers.md
index 27041ea57c..e8d6cbaf8b 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -203,6 +203,7 @@ information.
     ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$
     ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$
     ^/_matrix/client/v1/rooms/.*/hierarchy$
+    ^/_matrix/client/(v1|unstable)/rooms/.*/relations/
     ^/_matrix/client/unstable/org.matrix.msc2716/rooms/.*/batch_send$
     ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$
     ^/_matrix/client/(r0|v3|unstable)/account/3pid$
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index eab23f18f1..a7b1e1e3a8 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -126,7 +126,7 @@ export COMPLEMENT_BASE_IMAGE=complement-synapse
 
 extra_test_args=()
 
-test_tags="synapse_blacklist,msc2716,msc3030,msc3787"
+test_tags="synapse_blacklist,msc3787"
 
 # All environment variables starting with PASS_ will be shared.
 # (The prefix is stripped off before reaching the container.)
@@ -158,7 +158,10 @@ else
 
   # We only test faster room joins on monoliths, because they are purposefully
   # being developed without worker support to start with.
-  test_tags="$test_tags,faster_joins"
+  #
+  # The tests for importing historical messages (MSC2716) and jump to date (MSC3030)
+  # also only pass with monoliths, currently.
+  test_tags="$test_tags,faster_joins,msc2716,msc3030"
 fi
 
 
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 5e3825fca6..dc49840f73 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -65,6 +65,7 @@ from synapse.rest.client import (
     push_rule,
     read_marker,
     receipts,
+    relations,
     room,
     room_batch,
     room_keys,
@@ -308,6 +309,7 @@ class GenericWorkerServer(HomeServer):
                     sync.register_servlets(self, resource)
                     events.register_servlets(self, resource)
                     room.register_servlets(self, resource, is_worker=True)
+                    relations.register_servlets(self, resource)
                     room.register_deprecated_servlets(self, resource)
                     initial_sync.register_servlets(self, resource)
                     room_batch.register_servlets(self, resource)
-- 
cgit 1.5.1


From 9c23442ac909afe3d827534b00d52ee182d2f423 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 12 Oct 2022 14:37:20 +0100
Subject: Correct field name for stripped state events when knocking.
 `knock_state_events` -> `knock_room_state` (#14102)

---
 changelog.d/14102.bugfix                |  1 +
 synapse/federation/federation_client.py |  2 +-
 synapse/federation/federation_server.py |  9 ++++++++-
 synapse/handlers/federation.py          | 20 ++++++++++++++++----
 4 files changed, 26 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14102.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14102.bugfix b/changelog.d/14102.bugfix
new file mode 100644
index 0000000000..d71e108f7c
--- /dev/null
+++ b/changelog.d/14102.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse v1.37.0 in which an incorrect key name was used for sending and receiving room metadata when knocking on a room.
\ No newline at end of file
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 4dca711cd2..b220ab43fc 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -1294,7 +1294,7 @@ class FederationClient(FederationBase):
         return resp[1]
 
     async def send_knock(self, destinations: List[str], pdu: EventBase) -> JsonDict:
-        """Attempts to send a knock event to given a list of servers. Iterates
+        """Attempts to send a knock event to a given list of servers. Iterates
         through the list until one attempt succeeds.
 
         Doing so will cause the remote server to add the event to the graph,
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 907940e19e..28097664b4 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -824,7 +824,14 @@ class FederationServer(FederationBase):
                 context, self._room_prejoin_state_types
             )
         )
-        return {"knock_state_events": stripped_room_state}
+        return {
+            "knock_room_state": stripped_room_state,
+            # Since v1.37, Synapse incorrectly used "knock_state_events" for this field.
+            # Thus, we also populate a 'knock_state_events' with the same content to
+            # support old instances.
+            # See https://github.com/matrix-org/synapse/issues/14088.
+            "knock_state_events": stripped_room_state,
+        }
 
     async def _on_send_membership_event(
         self, origin: str, content: JsonDict, membership_type: str, room_id: str
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 986ffed3d5..44e70c6c3c 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -781,15 +781,27 @@ class FederationHandler:
 
         # Send the signed event back to the room, and potentially receive some
         # further information about the room in the form of partial state events
-        stripped_room_state = await self.federation_client.send_knock(
-            target_hosts, event
-        )
+        knock_response = await self.federation_client.send_knock(target_hosts, event)
 
         # Store any stripped room state events in the "unsigned" key of the event.
         # This is a bit of a hack and is cribbing off of invites. Basically we
         # store the room state here and retrieve it again when this event appears
         # in the invitee's sync stream. It is stripped out for all other local users.
-        event.unsigned["knock_room_state"] = stripped_room_state["knock_state_events"]
+        stripped_room_state = (
+            knock_response.get("knock_room_state")
+            # Since v1.37, Synapse incorrectly used "knock_state_events" for this field.
+            # Thus, we also check for a 'knock_state_events' to support old instances.
+            # See https://github.com/matrix-org/synapse/issues/14088.
+            or knock_response.get("knock_state_events")
+        )
+
+        if stripped_room_state is None:
+            raise KeyError(
+                "Missing 'knock_room_state' (or legacy 'knock_state_events') field in "
+                "send_knock response"
+            )
+
+        event.unsigned["knock_room_state"] = stripped_room_state
 
         context = EventContext.for_outlier(self._storage_controllers)
         stream_id = await self._federation_event_handler.persist_events_and_notify(
-- 
cgit 1.5.1


From 87099b6ea5cb48b03d2007c46af80bc3f0767519 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 12 Oct 2022 12:15:52 -0400
Subject: Return the main timeline for events which are not part of a thread.
 (#14140)

Fixes a bug where threaded receipts could not be sent for the
main timeline.
---
 changelog.d/14140.feature                   |  1 +
 synapse/push/bulk_push_rule_evaluator.py    |  2 +-
 synapse/storage/databases/main/relations.py | 12 +++++++-----
 3 files changed, 9 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14140.feature

(limited to 'synapse')

diff --git a/changelog.d/14140.feature b/changelog.d/14140.feature
new file mode 100644
index 0000000000..5d0ae16e13
--- /dev/null
+++ b/changelog.d/14140.feature
@@ -0,0 +1 @@
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 8d94aeaa32..a75386f6a0 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -236,7 +236,7 @@ class BulkPushRuleEvaluator:
             else:
                 # Since the event has not yet been persisted we check whether
                 # the parent is part of a thread.
-                thread_id = await self.store.get_thread_id(relation.parent_id) or "main"
+                thread_id = await self.store.get_thread_id(relation.parent_id)
 
         # It's possible that old room versions have non-integer power levels (floats or
         # strings). Workaround this by explicitly converting to int.
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 6b7eec4bf2..e7fbf950e6 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -28,7 +28,7 @@ from typing import (
 
 import attr
 
-from synapse.api.constants import RelationTypes
+from synapse.api.constants import MAIN_TIMELINE, RelationTypes
 from synapse.events import EventBase
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import LoggingTransaction, make_in_list_sql_clause
@@ -777,7 +777,7 @@ class RelationsWorkerStore(SQLBaseStore):
         )
 
     @cached()
-    async def get_thread_id(self, event_id: str) -> Optional[str]:
+    async def get_thread_id(self, event_id: str) -> str:
         """
         Get the thread ID for an event. This considers multi-level relations,
         e.g. an annotation to an event which is part of a thread.
@@ -787,7 +787,7 @@ class RelationsWorkerStore(SQLBaseStore):
 
         Returns:
             The event ID of the root event in the thread, if this event is part
-            of a thread. None, otherwise.
+            of a thread. "main", otherwise.
         """
         # Since event relations form a tree, we should only ever find 0 or 1
         # results from the below query.
@@ -802,13 +802,15 @@ class RelationsWorkerStore(SQLBaseStore):
             ) SELECT relates_to_id FROM related_events WHERE relation_type = 'm.thread';
         """
 
-        def _get_thread_id(txn: LoggingTransaction) -> Optional[str]:
+        def _get_thread_id(txn: LoggingTransaction) -> str:
             txn.execute(sql, (event_id,))
             # TODO Should we ensure there's only a single result here?
             row = txn.fetchone()
             if row:
                 return row[0]
-            return None
+
+            # If no thread was found, it is part of the main timeline.
+            return MAIN_TIMELINE
 
         return await self.db_pool.runInteraction("get_thread_id", _get_thread_id)
 
-- 
cgit 1.5.1


From e6e876b9b158f47811b6dfedd8783f658ce960a4 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <patrickc@matrix.org>
Date: Wed, 12 Oct 2022 12:18:34 -0400
Subject: Return the thread ID properly down sync. (#14159)

A receipt's thread ID, if one exists, should be added to the
body of a receipt.
---
 changelog.d/14159.feature                  | 1 +
 synapse/storage/databases/main/receipts.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/14159.feature

(limited to 'synapse')

diff --git a/changelog.d/14159.feature b/changelog.d/14159.feature
new file mode 100644
index 0000000000..5d0ae16e13
--- /dev/null
+++ b/changelog.d/14159.feature
@@ -0,0 +1 @@
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 246f78ac1f..b04026c21b 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -416,6 +416,8 @@ class ReceiptsWorkerStore(SQLBaseStore):
             # {"$foo:bar": { "read": { "@user:host": <receipt> }, .. }, .. }
             event_entry = room_event["content"].setdefault(row["event_id"], {})
             receipt_type = event_entry.setdefault(row["receipt_type"], {})
+            if row["thread_id"]:
+                receipt_type[row["user_id"]]["thread_id"] = row["thread_id"]
 
             receipt_type[row["user_id"]] = db_to_json(row["data"])
 
-- 
cgit 1.5.1


From b6baa46db078c3ef9e6c5751bccb8d2e1c5c5402 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 12 Oct 2022 11:01:00 -0700
Subject: Fix a bug where the joined hosts for a given event were not being
 properly cached (#14125)

---
 changelog.d/14125.bugfix             |  1 +
 synapse/handlers/federation_event.py |  4 +-
 synapse/handlers/message.py          | 91 +++++++++++++++++++-----------------
 3 files changed, 51 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/14125.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14125.bugfix b/changelog.d/14125.bugfix
new file mode 100644
index 0000000000..852f00ebb9
--- /dev/null
+++ b/changelog.d/14125.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in v1.69.0rc1 where the joined hosts for a given event were not being properly cached.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index da319943cc..f382961099 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -414,7 +414,9 @@ class FederationEventHandler:
 
         # First, precalculate the joined hosts so that the federation sender doesn't
         # need to.
-        await self._event_creation_handler.cache_joined_hosts_for_event(event, context)
+        await self._event_creation_handler.cache_joined_hosts_for_events(
+            [(event, context)]
+        )
 
         await self._check_for_soft_fail(event, context=context, origin=origin)
         await self._run_push_actions_and_persist_event(event, context)
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index da1acea275..4e55ebba0b 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1390,7 +1390,7 @@ class EventCreationHandler:
                             extra_users=extra_users,
                         ),
                         run_in_background(
-                            self.cache_joined_hosts_for_event, event, context
+                            self.cache_joined_hosts_for_events, events_and_context
                         ).addErrback(
                             log_failure, "cache_joined_hosts_for_event failed"
                         ),
@@ -1491,62 +1491,65 @@ class EventCreationHandler:
                 await self.store.remove_push_actions_from_staging(event.event_id)
             raise
 
-    async def cache_joined_hosts_for_event(
-        self, event: EventBase, context: EventContext
+    async def cache_joined_hosts_for_events(
+        self, events_and_context: List[Tuple[EventBase, EventContext]]
     ) -> None:
-        """Precalculate the joined hosts at the event, when using Redis, so that
+        """Precalculate the joined hosts at each of the given events, when using Redis, so that
         external federation senders don't have to recalculate it themselves.
         """
 
-        if not self._external_cache.is_enabled():
-            return
-
-        # If external cache is enabled we should always have this.
-        assert self._external_cache_joined_hosts_updates is not None
+        for event, _ in events_and_context:
+            if not self._external_cache.is_enabled():
+                return
 
-        # We actually store two mappings, event ID -> prev state group,
-        # state group -> joined hosts, which is much more space efficient
-        # than event ID -> joined hosts.
-        #
-        # Note: We have to cache event ID -> prev state group, as we don't
-        # store that in the DB.
-        #
-        # Note: We set the state group -> joined hosts cache if it hasn't been
-        # set for a while, so that the expiry time is reset.
+            # If external cache is enabled we should always have this.
+            assert self._external_cache_joined_hosts_updates is not None
 
-        state_entry = await self.state.resolve_state_groups_for_events(
-            event.room_id, event_ids=event.prev_event_ids()
-        )
+            # We actually store two mappings, event ID -> prev state group,
+            # state group -> joined hosts, which is much more space efficient
+            # than event ID -> joined hosts.
+            #
+            # Note: We have to cache event ID -> prev state group, as we don't
+            # store that in the DB.
+            #
+            # Note: We set the state group -> joined hosts cache if it hasn't been
+            # set for a while, so that the expiry time is reset.
 
-        if state_entry.state_group:
-            await self._external_cache.set(
-                "event_to_prev_state_group",
-                event.event_id,
-                state_entry.state_group,
-                expiry_ms=60 * 60 * 1000,
+            state_entry = await self.state.resolve_state_groups_for_events(
+                event.room_id, event_ids=event.prev_event_ids()
             )
 
-            if state_entry.state_group in self._external_cache_joined_hosts_updates:
-                return
+            if state_entry.state_group:
+                await self._external_cache.set(
+                    "event_to_prev_state_group",
+                    event.event_id,
+                    state_entry.state_group,
+                    expiry_ms=60 * 60 * 1000,
+                )
 
-            state = await state_entry.get_state(
-                self._storage_controllers.state, StateFilter.all()
-            )
-            with opentracing.start_active_span("get_joined_hosts"):
-                joined_hosts = await self.store.get_joined_hosts(
-                    event.room_id, state, state_entry
+                if state_entry.state_group in self._external_cache_joined_hosts_updates:
+                    return
+
+                state = await state_entry.get_state(
+                    self._storage_controllers.state, StateFilter.all()
                 )
+                with opentracing.start_active_span("get_joined_hosts"):
+                    joined_hosts = await self.store.get_joined_hosts(
+                        event.room_id, state, state_entry
+                    )
 
-            # Note that the expiry times must be larger than the expiry time in
-            # _external_cache_joined_hosts_updates.
-            await self._external_cache.set(
-                "get_joined_hosts",
-                str(state_entry.state_group),
-                list(joined_hosts),
-                expiry_ms=60 * 60 * 1000,
-            )
+                # Note that the expiry times must be larger than the expiry time in
+                # _external_cache_joined_hosts_updates.
+                await self._external_cache.set(
+                    "get_joined_hosts",
+                    str(state_entry.state_group),
+                    list(joined_hosts),
+                    expiry_ms=60 * 60 * 1000,
+                )
 
-            self._external_cache_joined_hosts_updates[state_entry.state_group] = None
+                self._external_cache_joined_hosts_updates[
+                    state_entry.state_group
+                ] = None
 
     async def _validate_canonical_alias(
         self,
-- 
cgit 1.5.1


From 3bbe532abb7bfc41467597731ac1a18c0331f539 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 13 Oct 2022 08:02:11 -0400
Subject: Add an API for listing threads in a room. (#13394)

Implement the /threads endpoint from MSC3856.

This is currently unstable and behind an experimental configuration
flag.

It includes a background update to backfill data, results from
the /threads endpoint will be partial until that finishes.
---
 changelog.d/13394.feature                          |   1 +
 synapse/_scripts/synapse_port_db.py                |   2 +
 synapse/config/experimental.py                     |   3 +
 synapse/handlers/relations.py                      |  86 ++++++++++-
 synapse/rest/client/relations.py                   |  50 ++++++-
 synapse/storage/databases/main/cache.py            |   1 +
 synapse/storage/databases/main/events.py           |  38 ++++-
 synapse/storage/databases/main/relations.py        | 166 ++++++++++++++++++++-
 .../schema/main/delta/73/09threads_table.sql       |  30 ++++
 tests/rest/client/test_relations.py                | 151 +++++++++++++++++++
 10 files changed, 522 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/13394.feature
 create mode 100644 synapse/storage/schema/main/delta/73/09threads_table.sql

(limited to 'synapse')

diff --git a/changelog.d/13394.feature b/changelog.d/13394.feature
new file mode 100644
index 0000000000..68de079cf3
--- /dev/null
+++ b/changelog.d/13394.feature
@@ -0,0 +1 @@
+Experimental support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 5fa599e70e..d850e54e17 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -72,6 +72,7 @@ from synapse.storage.databases.main.registration import (
     RegistrationBackgroundUpdateStore,
     find_max_generated_user_id_localpart,
 )
+from synapse.storage.databases.main.relations import RelationsWorkerStore
 from synapse.storage.databases.main.room import RoomBackgroundUpdateStore
 from synapse.storage.databases.main.roommember import RoomMemberBackgroundUpdateStore
 from synapse.storage.databases.main.search import SearchBackgroundUpdateStore
@@ -206,6 +207,7 @@ class Store(
     PusherWorkerStore,
     PresenceBackgroundUpdateStore,
     ReceiptsBackgroundUpdateStore,
+    RelationsWorkerStore,
 ):
     def execute(self, f: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]:
         return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs)
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index f44655516e..1860006536 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -101,6 +101,9 @@ class ExperimentalConfig(Config):
         # MSC3848: Introduce errcodes for specific event sending failures
         self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False)
 
+        # MSC3856: Threads list API
+        self.msc3856_enabled: bool = experimental.get("msc3856_enabled", False)
+
         # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices.
         self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False)
 
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index cc5e45c241..1fdd7a10bc 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import enum
 import logging
 from typing import TYPE_CHECKING, Dict, FrozenSet, Iterable, List, Optional, Tuple
 
@@ -20,7 +21,7 @@ from synapse.api.constants import RelationTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase, relation_from_event
 from synapse.logging.opentracing import trace
-from synapse.storage.databases.main.relations import _RelatedEvent
+from synapse.storage.databases.main.relations import ThreadsNextBatch, _RelatedEvent
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, Requester, StreamToken, UserID
 from synapse.visibility import filter_events_for_client
@@ -32,6 +33,13 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+class ThreadsListInclude(str, enum.Enum):
+    """Valid values for the 'include' flag of /threads."""
+
+    all = "all"
+    participated = "participated"
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _ThreadAggregation:
     # The latest event in the thread.
@@ -482,3 +490,79 @@ class RelationsHandler:
             results.setdefault(event_id, BundledAggregations()).replace = edit
 
         return results
+
+    async def get_threads(
+        self,
+        requester: Requester,
+        room_id: str,
+        include: ThreadsListInclude,
+        limit: int = 5,
+        from_token: Optional[ThreadsNextBatch] = None,
+    ) -> JsonDict:
+        """Get related events of a event, ordered by topological ordering.
+
+        Args:
+            requester: The user requesting the relations.
+            room_id: The room the event belongs to.
+            include: One of "all" or "participated" to indicate which threads should
+                be returned.
+            limit: Only fetch the most recent `limit` events.
+            from_token: Fetch rows from the given token, or from the start if None.
+
+        Returns:
+            The pagination chunk.
+        """
+
+        user_id = requester.user.to_string()
+
+        # TODO Properly handle a user leaving a room.
+        (_, member_event_id) = await self._auth.check_user_in_room_or_world_readable(
+            room_id, requester, allow_departed_users=True
+        )
+
+        # Note that ignored users are not passed into get_relations_for_event
+        # below. Ignored users are handled in filter_events_for_client (and by
+        # not passing them in here we should get a better cache hit rate).
+        thread_roots, next_batch = await self._main_store.get_threads(
+            room_id=room_id, limit=limit, from_token=from_token
+        )
+
+        events = await self._main_store.get_events_as_list(thread_roots)
+
+        if include == ThreadsListInclude.participated:
+            # Pre-seed thread participation with whether the requester sent the event.
+            participated = {event.event_id: event.sender == user_id for event in events}
+            # For events the requester did not send, check the database for whether
+            # the requester sent a threaded reply.
+            participated.update(
+                await self._main_store.get_threads_participated(
+                    [eid for eid, p in participated.items() if not p],
+                    user_id,
+                )
+            )
+
+            # Limit the returned threads to those the user has participated in.
+            events = [event for event in events if participated[event.event_id]]
+
+        events = await filter_events_for_client(
+            self._storage_controllers,
+            user_id,
+            events,
+            is_peeking=(member_event_id is None),
+        )
+
+        aggregations = await self.get_bundled_aggregations(
+            events, requester.user.to_string()
+        )
+
+        now = self._clock.time_msec()
+        serialized_events = self._event_serializer.serialize_events(
+            events, now, bundle_aggregations=aggregations
+        )
+
+        return_value: JsonDict = {"chunk": serialized_events}
+
+        if next_batch:
+            return_value["next_batch"] = str(next_batch)
+
+        return return_value
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index b31ce5a0d3..d1aa1947a5 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -13,12 +13,15 @@
 # limitations under the License.
 
 import logging
+import re
 from typing import TYPE_CHECKING, Optional, Tuple
 
+from synapse.handlers.relations import ThreadsListInclude
 from synapse.http.server import HttpServer
-from synapse.http.servlet import RestServlet
+from synapse.http.servlet import RestServlet, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.rest.client._base import client_patterns
+from synapse.storage.databases.main.relations import ThreadsNextBatch
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict
 
@@ -78,5 +81,50 @@ class RelationPaginationServlet(RestServlet):
         return 200, result
 
 
+class ThreadsServlet(RestServlet):
+    PATTERNS = (
+        re.compile(
+            "^/_matrix/client/unstable/org.matrix.msc3856/rooms/(?P<room_id>[^/]*)/threads"
+        ),
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastores().main
+        self._relations_handler = hs.get_relations_handler()
+
+    async def on_GET(
+        self, request: SynapseRequest, room_id: str
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+
+        limit = parse_integer(request, "limit", default=5)
+        from_token_str = parse_string(request, "from")
+        include = parse_string(
+            request,
+            "include",
+            default=ThreadsListInclude.all.value,
+            allowed_values=[v.value for v in ThreadsListInclude],
+        )
+
+        # Return the relations
+        from_token = None
+        if from_token_str:
+            from_token = ThreadsNextBatch.from_string(from_token_str)
+
+        result = await self._relations_handler.get_threads(
+            requester=requester,
+            room_id=room_id,
+            include=ThreadsListInclude(include),
+            limit=limit,
+            from_token=from_token,
+        )
+
+        return 200, result
+
+
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     RelationPaginationServlet(hs).register(http_server)
+    if hs.config.experimental.msc3856_enabled:
+        ThreadsServlet(hs).register(http_server)
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index a9f25a5904..0ce3156c9c 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -259,6 +259,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._attempt_to_invalidate_cache("get_applicable_edit", (relates_to,))
             self._attempt_to_invalidate_cache("get_thread_summary", (relates_to,))
             self._attempt_to_invalidate_cache("get_thread_participated", (relates_to,))
+            self._attempt_to_invalidate_cache("get_threads", (room_id,))
 
     async def invalidate_cache_and_stream(
         self, cache_name: str, keys: Tuple[Any, ...]
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 060fe71454..6698cbf664 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -35,7 +35,7 @@ import attr
 from prometheus_client import Counter
 
 import synapse.metrics
-from synapse.api.constants import EventContentFields, EventTypes
+from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase, relation_from_event
@@ -1616,7 +1616,7 @@ class PersistEventsStore:
                 )
 
                 # Remove from relations table.
-                self._handle_redact_relations(txn, event.redacts)
+                self._handle_redact_relations(txn, event.room_id, event.redacts)
 
         # Update the event_forward_extremities, event_backward_extremities and
         # event_edges tables.
@@ -1866,6 +1866,34 @@ class PersistEventsStore:
             },
         )
 
+        if relation.rel_type == RelationTypes.THREAD:
+            # Upsert into the threads table, but only overwrite the value if the
+            # new event is of a later topological order OR if the topological
+            # ordering is equal, but the stream ordering is later.
+            sql = """
+            INSERT INTO threads (room_id, thread_id, latest_event_id, topological_ordering, stream_ordering)
+            VALUES (?, ?, ?, ?, ?)
+            ON CONFLICT (room_id, thread_id)
+            DO UPDATE SET
+                latest_event_id = excluded.latest_event_id,
+                topological_ordering = excluded.topological_ordering,
+                stream_ordering = excluded.stream_ordering
+            WHERE
+                threads.topological_ordering <= excluded.topological_ordering AND
+                threads.stream_ordering < excluded.stream_ordering
+            """
+
+            txn.execute(
+                sql,
+                (
+                    event.room_id,
+                    relation.parent_id,
+                    event.event_id,
+                    event.depth,
+                    event.internal_metadata.stream_ordering,
+                ),
+            )
+
     def _handle_insertion_event(
         self, txn: LoggingTransaction, event: EventBase
     ) -> None:
@@ -1989,13 +2017,14 @@ class PersistEventsStore:
         txn.execute(sql, (batch_id,))
 
     def _handle_redact_relations(
-        self, txn: LoggingTransaction, redacted_event_id: str
+        self, txn: LoggingTransaction, room_id: str, redacted_event_id: str
     ) -> None:
         """Handles receiving a redaction and checking whether the redacted event
         has any relations which must be removed from the database.
 
         Args:
             txn
+            room_id: The room ID of the event that was redacted.
             redacted_event_id: The event that was redacted.
         """
 
@@ -2024,6 +2053,9 @@ class PersistEventsStore:
             self.store._invalidate_cache_and_stream(
                 txn, self.store.get_thread_participated, (redacted_relates_to,)
             )
+            self.store._invalidate_cache_and_stream(
+                txn, self.store.get_threads, (room_id,)
+            )
 
         self.db_pool.simple_delete_txn(
             txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index e7fbf950e6..ac9b96ab44 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -14,6 +14,7 @@
 
 import logging
 from typing import (
+    TYPE_CHECKING,
     Collection,
     Dict,
     FrozenSet,
@@ -29,17 +30,46 @@ from typing import (
 import attr
 
 from synapse.api.constants import MAIN_TIMELINE, RelationTypes
+from synapse.api.errors import SynapseError
 from synapse.events import EventBase
 from synapse.storage._base import SQLBaseStore
-from synapse.storage.database import LoggingTransaction, make_in_list_sql_clause
+from synapse.storage.database import (
+    DatabasePool,
+    LoggingDatabaseConnection,
+    LoggingTransaction,
+    make_in_list_sql_clause,
+)
 from synapse.storage.databases.main.stream import generate_pagination_where_clause
 from synapse.storage.engines import PostgresEngine
 from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken
 from synapse.util.caches.descriptors import cached, cachedList
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 logger = logging.getLogger(__name__)
 
 
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class ThreadsNextBatch:
+    topological_ordering: int
+    stream_ordering: int
+
+    def __str__(self) -> str:
+        return f"{self.topological_ordering}_{self.stream_ordering}"
+
+    @classmethod
+    def from_string(cls, string: str) -> "ThreadsNextBatch":
+        """
+        Creates a ThreadsNextBatch from its textual representation.
+        """
+        try:
+            keys = (int(s) for s in string.split("_"))
+            return cls(*keys)
+        except Exception:
+            raise SynapseError(400, "Invalid threads token")
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _RelatedEvent:
     """
@@ -56,6 +86,76 @@ class _RelatedEvent:
 
 
 class RelationsWorkerStore(SQLBaseStore):
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_update_handler(
+            "threads_backfill", self._backfill_threads
+        )
+
+    async def _backfill_threads(self, progress: JsonDict, batch_size: int) -> int:
+        """Backfill the threads table."""
+
+        def threads_backfill_txn(txn: LoggingTransaction) -> int:
+            last_thread_id = progress.get("last_thread_id", "")
+
+            # Get the latest event in each thread by topo ordering / stream ordering.
+            #
+            # Note that the MAX(event_id) is needed to abide by the rules of group by,
+            # but doesn't actually do anything since there should only be a single event
+            # ID per topo/stream ordering pair.
+            sql = f"""
+            SELECT room_id, relates_to_id, MAX(topological_ordering), MAX(stream_ordering), MAX(event_id)
+            FROM event_relations
+            INNER JOIN events USING (event_id)
+            WHERE
+                relates_to_id > ? AND
+                relation_type = '{RelationTypes.THREAD}'
+            GROUP BY room_id, relates_to_id
+            ORDER BY relates_to_id
+            LIMIT ?
+            """
+            txn.execute(sql, (last_thread_id, batch_size))
+
+            # No more rows to process.
+            rows = txn.fetchall()
+            if not rows:
+                return 0
+
+            # Insert the rows into the threads table. If a matching thread already exists,
+            # assume it is from a newer event.
+            sql = """
+            INSERT INTO threads (room_id, thread_id, topological_ordering, stream_ordering, latest_event_id)
+            VALUES %s
+            ON CONFLICT (room_id, thread_id)
+            DO NOTHING
+            """
+            if isinstance(txn.database_engine, PostgresEngine):
+                txn.execute_values(sql % ("?",), rows, fetch=False)
+            else:
+                txn.execute_batch(sql % ("?, ?, ?, ?, ?",), rows)
+
+            # Mark the progress.
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "threads_backfill", {"last_thread_id": rows[-1][1]}
+            )
+
+            return txn.rowcount
+
+        result = await self.db_pool.runInteraction(
+            "threads_backfill", threads_backfill_txn
+        )
+
+        if not result:
+            await self.db_pool.updates._end_background_update("threads_backfill")
+
+        return result
+
     @cached(uncached_args=("event",), tree=True)
     async def get_relations_for_event(
         self,
@@ -776,6 +876,70 @@ class RelationsWorkerStore(SQLBaseStore):
             "get_if_user_has_annotated_event", _get_if_user_has_annotated_event
         )
 
+    @cached(tree=True)
+    async def get_threads(
+        self,
+        room_id: str,
+        limit: int = 5,
+        from_token: Optional[ThreadsNextBatch] = None,
+    ) -> Tuple[List[str], Optional[ThreadsNextBatch]]:
+        """Get a list of thread IDs, ordered by topological ordering of their
+        latest reply.
+
+        Args:
+            room_id: The room the event belongs to.
+            limit: Only fetch the most recent `limit` threads.
+            from_token: Fetch rows from a previous next_batch, or from the start if None.
+
+        Returns:
+            A tuple of:
+                A list of thread root event IDs.
+
+                The next_batch, if one exists.
+        """
+        # Generate the pagination clause, if necessary.
+        #
+        # Find any threads where the latest reply is equal / before the last
+        # thread's topo ordering and earlier in stream ordering.
+        pagination_clause = ""
+        pagination_args: tuple = ()
+        if from_token:
+            pagination_clause = "AND topological_ordering <= ? AND stream_ordering < ?"
+            pagination_args = (
+                from_token.topological_ordering,
+                from_token.stream_ordering,
+            )
+
+        sql = f"""
+            SELECT thread_id, topological_ordering, stream_ordering
+            FROM threads
+            WHERE
+                room_id = ?
+                {pagination_clause}
+            ORDER BY topological_ordering DESC, stream_ordering DESC
+            LIMIT ?
+        """
+
+        def _get_threads_txn(
+            txn: LoggingTransaction,
+        ) -> Tuple[List[str], Optional[ThreadsNextBatch]]:
+            txn.execute(sql, (room_id, *pagination_args, limit + 1))
+
+            rows = cast(List[Tuple[str, int, int]], txn.fetchall())
+            thread_ids = [r[0] for r in rows]
+
+            # If there are more events, generate the next pagination key from the
+            # last thread which will be returned.
+            next_token = None
+            if len(thread_ids) > limit:
+                last_topo_id = rows[-2][1]
+                last_stream_id = rows[-2][2]
+                next_token = ThreadsNextBatch(last_topo_id, last_stream_id)
+
+            return thread_ids[:limit], next_token
+
+        return await self.db_pool.runInteraction("get_threads", _get_threads_txn)
+
     @cached()
     async def get_thread_id(self, event_id: str) -> str:
         """
diff --git a/synapse/storage/schema/main/delta/73/09threads_table.sql b/synapse/storage/schema/main/delta/73/09threads_table.sql
new file mode 100644
index 0000000000..aa7c5e9a2e
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/09threads_table.sql
@@ -0,0 +1,30 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE threads (
+    room_id TEXT NOT NULL,
+    -- The event ID of the root event in the thread.
+    thread_id TEXT NOT NULL,
+    -- The latest event ID and corresponding topo / stream ordering.
+    latest_event_id TEXT NOT NULL,
+    topological_ordering BIGINT NOT NULL,
+    stream_ordering BIGINT NOT NULL,
+    CONSTRAINT threads_uniqueness UNIQUE (room_id, thread_id)
+);
+
+CREATE INDEX threads_ordering_idx ON threads(room_id, topological_ordering, stream_ordering);
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7309, 'threads_backfill', '{}');
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index 988cdb746d..d595295e2c 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -1707,3 +1707,154 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
             relations[RelationTypes.THREAD]["latest_event"]["event_id"],
             related_event_id,
         )
+
+
+class ThreadsTestCase(BaseRelationsTestCase):
+    @unittest.override_config({"experimental_features": {"msc3856_enabled": True}})
+    def test_threads(self) -> None:
+        """Create threads and ensure the ordering is due to their latest event."""
+        # Create 2 threads.
+        thread_1 = self.parent_id
+        res = self.helper.send(self.room, body="Thread Root!", tok=self.user_token)
+        thread_2 = res["event_id"]
+
+        self._send_relation(RelationTypes.THREAD, "m.room.test")
+        self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2)
+
+        # Request the threads in the room.
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads",
+            access_token=self.user_token,
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(thread_roots, [thread_2, thread_1])
+
+        # Update the first thread, the ordering should swap.
+        self._send_relation(RelationTypes.THREAD, "m.room.test")
+
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads",
+            access_token=self.user_token,
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(thread_roots, [thread_1, thread_2])
+
+    @unittest.override_config({"experimental_features": {"msc3856_enabled": True}})
+    def test_pagination(self) -> None:
+        """Create threads and paginate through them."""
+        # Create 2 threads.
+        thread_1 = self.parent_id
+        res = self.helper.send(self.room, body="Thread Root!", tok=self.user_token)
+        thread_2 = res["event_id"]
+
+        self._send_relation(RelationTypes.THREAD, "m.room.test")
+        self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2)
+
+        # Request the threads in the room.
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?limit=1",
+            access_token=self.user_token,
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(thread_roots, [thread_2])
+
+        # Make sure next_batch has something in it that looks like it could be a
+        # valid token.
+        next_batch = channel.json_body.get("next_batch")
+        self.assertIsInstance(next_batch, str, channel.json_body)
+
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?limit=1&from={next_batch}",
+            access_token=self.user_token,
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(thread_roots, [thread_1], channel.json_body)
+
+        self.assertNotIn("next_batch", channel.json_body, channel.json_body)
+
+    @unittest.override_config({"experimental_features": {"msc3856_enabled": True}})
+    def test_include(self) -> None:
+        """Filtering threads to all or participated in should work."""
+        # Thread 1 has the user as the root event.
+        thread_1 = self.parent_id
+        self._send_relation(
+            RelationTypes.THREAD, "m.room.test", access_token=self.user2_token
+        )
+
+        # Thread 2 has the user replying.
+        res = self.helper.send(self.room, body="Thread Root!", tok=self.user2_token)
+        thread_2 = res["event_id"]
+        self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2)
+
+        # Thread 3 has the user not participating in.
+        res = self.helper.send(self.room, body="Another thread!", tok=self.user2_token)
+        thread_3 = res["event_id"]
+        self._send_relation(
+            RelationTypes.THREAD,
+            "m.room.test",
+            access_token=self.user2_token,
+            parent_id=thread_3,
+        )
+
+        # All threads in the room.
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads",
+            access_token=self.user_token,
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(
+            thread_roots, [thread_3, thread_2, thread_1], channel.json_body
+        )
+
+        # Only participated threads.
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?include=participated",
+            access_token=self.user_token,
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(thread_roots, [thread_2, thread_1], channel.json_body)
+
+    @unittest.override_config({"experimental_features": {"msc3856_enabled": True}})
+    def test_ignored_user(self) -> None:
+        """Events from ignored users should be ignored."""
+        # Thread 1 has a reply from an ignored user.
+        thread_1 = self.parent_id
+        self._send_relation(
+            RelationTypes.THREAD, "m.room.test", access_token=self.user2_token
+        )
+
+        # Thread 2 is created by an ignored user.
+        res = self.helper.send(self.room, body="Thread Root!", tok=self.user2_token)
+        thread_2 = res["event_id"]
+        self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2)
+
+        # Ignore user2.
+        self.get_success(
+            self.store.add_account_data_for_user(
+                self.user_id,
+                AccountDataTypes.IGNORED_USER_LIST,
+                {"ignored_users": {self.user2_id: {}}},
+            )
+        )
+
+        # Only thread 1 is returned.
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads",
+            access_token=self.user_token,
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(thread_roots, [thread_1], channel.json_body)
-- 
cgit 1.5.1


From 7d59a515bb97dc4f8253aa9a5a560221a0ef4702 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <patrickc@matrix.org>
Date: Thu, 13 Oct 2022 12:15:41 -0400
Subject: Properly return the thread ID down sync. (#14159)

Fix a broken conflict in e6e876b9b158f47811b6dfedd8783f658ce960a4,
by not stomping over a field right after creating it.
---
 synapse/storage/databases/main/receipts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'synapse')

diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index b04026c21b..dc6989527e 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -416,10 +416,10 @@ class ReceiptsWorkerStore(SQLBaseStore):
             # {"$foo:bar": { "read": { "@user:host": <receipt> }, .. }, .. }
             event_entry = room_event["content"].setdefault(row["event_id"], {})
             receipt_type = event_entry.setdefault(row["receipt_type"], {})
-            if row["thread_id"]:
-                receipt_type[row["user_id"]]["thread_id"] = row["thread_id"]
 
             receipt_type[row["user_id"]] = db_to_json(row["data"])
+            if row["thread_id"]:
+                receipt_type[row["user_id"]]["thread_id"] = row["thread_id"]
 
         results = {
             room_id: [results[room_id]] if room_id in results else []
-- 
cgit 1.5.1


From 2019b60f3bb5a505fc730f38a4b1accbabe444bf Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 13 Oct 2022 12:53:24 -0400
Subject: Fix sqlite syntax for upserts. (#14171)

---
 changelog.d/14171.feature                   | 1 +
 synapse/storage/databases/main/relations.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14171.feature

(limited to 'synapse')

diff --git a/changelog.d/14171.feature b/changelog.d/14171.feature
new file mode 100644
index 0000000000..68de079cf3
--- /dev/null
+++ b/changelog.d/14171.feature
@@ -0,0 +1 @@
+Experimental support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API.
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index ac9b96ab44..7c54ce0b2e 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -138,7 +138,7 @@ class RelationsWorkerStore(SQLBaseStore):
             if isinstance(txn.database_engine, PostgresEngine):
                 txn.execute_values(sql % ("?",), rows, fetch=False)
             else:
-                txn.execute_batch(sql % ("?, ?, ?, ?, ?",), rows)
+                txn.execute_batch(sql % ("(?, ?, ?, ?, ?)",), rows)
 
             # Mark the progress.
             self.db_pool.updates._background_update_progress_txn(
-- 
cgit 1.5.1


From 16c5d95b594e4fe146947c4848057ebe0b9f900b Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 13 Oct 2022 18:32:16 +0100
Subject: Optimise the event_push_backfill_thread_id bg job (#14172)

Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/14172.bugfix                             |  1 +
 synapse/storage/databases/main/event_push_actions.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14172.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14172.bugfix b/changelog.d/14172.bugfix
new file mode 100644
index 0000000000..36521c670c
--- /dev/null
+++ b/changelog.d/14172.bugfix
@@ -0,0 +1 @@
+Fix poor performance of the `event_push_backfill_thread_id` background update, which was introduced in Synapse 1.68.0rc1.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 87d07f7d9b..7f7bcb7094 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -297,9 +297,15 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             sql = f"""
             UPDATE {table_name}
             SET thread_id = 'main'
-            WHERE stream_ordering <= ? AND thread_id IS NULL
+            WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
             """
-            txn.execute(sql, (max_stream_ordering,))
+            txn.execute(
+                sql,
+                (
+                    start_stream_ordering,
+                    max_stream_ordering,
+                ),
+            )
 
             # Update progress.
             processed_rows = txn.rowcount
-- 
cgit 1.5.1


From 9ff4155f6cc9fc0b7aff82da9f0a1cae677dbda5 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 14 Oct 2022 07:10:44 -0400
Subject: Properly invalidate get_thread_id cache. (#14163)

This was missed in 2b6d41ebd685fb546e52acdbcb0024dfcf5a5db1 (#13824).
---
 changelog.d/14163.feature               | 1 +
 synapse/storage/databases/main/cache.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/14163.feature

(limited to 'synapse')

diff --git a/changelog.d/14163.feature b/changelog.d/14163.feature
new file mode 100644
index 0000000000..5d0ae16e13
--- /dev/null
+++ b/changelog.d/14163.feature
@@ -0,0 +1 @@
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 0ce3156c9c..b47fc606c7 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -244,6 +244,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             # redacted.
             self._attempt_to_invalidate_cache("get_relations_for_event", (redacts,))
             self._attempt_to_invalidate_cache("get_applicable_edit", (redacts,))
+            self._attempt_to_invalidate_cache("get_thread_id", (redacts,))
 
         if etype == EventTypes.Member:
             self._membership_stream_cache.entity_has_changed(state_key, stream_ordering)
-- 
cgit 1.5.1


From c3e4edb4d6ba33383bc056e3ff22b2d034d3e248 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 14 Oct 2022 07:16:50 -0400
Subject: Stabilize the threads API. (#14175)

Stabilize the threads API (MSC3856) by supporting (only) the v1
path for the endpoint.

This also marks the API as safe for workers since it is a read-only
API.
---
 changelog.d/13394.feature             |  2 +-
 changelog.d/14175.feature             |  1 +
 docker/configure_workers_and_start.py |  1 +
 docs/workers.md                       |  1 +
 synapse/config/experimental.py        |  3 ---
 synapse/rest/client/relations.py      |  9 ++-----
 tests/rest/client/test_relations.py   | 47 +++++++++++++++++++++--------------
 7 files changed, 35 insertions(+), 29 deletions(-)
 create mode 100644 changelog.d/14175.feature

(limited to 'synapse')

diff --git a/changelog.d/13394.feature b/changelog.d/13394.feature
index 68de079cf3..df3ce45a76 100644
--- a/changelog.d/13394.feature
+++ b/changelog.d/13394.feature
@@ -1 +1 @@
-Experimental support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API.
+Support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API.
diff --git a/changelog.d/14175.feature b/changelog.d/14175.feature
new file mode 100644
index 0000000000..df3ce45a76
--- /dev/null
+++ b/changelog.d/14175.feature
@@ -0,0 +1 @@
+Support for [MSC3856](https://github.com/matrix-org/matrix-spec-proposals/pull/3856): threads list API.
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 8e7f605b24..d708237f69 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -118,6 +118,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$",
             "^/_matrix/client/v1/rooms/.*/hierarchy$",
             "^/_matrix/client/(v1|unstable)/rooms/.*/relations/",
+            "^/_matrix/client/v1/rooms/.*/threads$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/login$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/account/3pid$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/account/whoami$",
diff --git a/docs/workers.md b/docs/workers.md
index e8d6cbaf8b..c27b3f8bd5 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -204,6 +204,7 @@ information.
     ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$
     ^/_matrix/client/v1/rooms/.*/hierarchy$
     ^/_matrix/client/(v1|unstable)/rooms/.*/relations/
+    ^/_matrix/client/v1/rooms/.*/threads$
     ^/_matrix/client/unstable/org.matrix.msc2716/rooms/.*/batch_send$
     ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$
     ^/_matrix/client/(r0|v3|unstable)/account/3pid$
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 1860006536..f44655516e 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -101,9 +101,6 @@ class ExperimentalConfig(Config):
         # MSC3848: Introduce errcodes for specific event sending failures
         self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False)
 
-        # MSC3856: Threads list API
-        self.msc3856_enabled: bool = experimental.get("msc3856_enabled", False)
-
         # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices.
         self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False)
 
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index d1aa1947a5..9dd59196d9 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -82,11 +82,7 @@ class RelationPaginationServlet(RestServlet):
 
 
 class ThreadsServlet(RestServlet):
-    PATTERNS = (
-        re.compile(
-            "^/_matrix/client/unstable/org.matrix.msc3856/rooms/(?P<room_id>[^/]*)/threads"
-        ),
-    )
+    PATTERNS = (re.compile("^/_matrix/client/v1/rooms/(?P<room_id>[^/]*)/threads"),)
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -126,5 +122,4 @@ class ThreadsServlet(RestServlet):
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     RelationPaginationServlet(hs).register(http_server)
-    if hs.config.experimental.msc3856_enabled:
-        ThreadsServlet(hs).register(http_server)
+    ThreadsServlet(hs).register(http_server)
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index d595295e2c..f5c1070b2c 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -1710,7 +1710,15 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
 
 
 class ThreadsTestCase(BaseRelationsTestCase):
-    @unittest.override_config({"experimental_features": {"msc3856_enabled": True}})
+    def _get_threads(self, body: JsonDict) -> List[Tuple[str, str]]:
+        return [
+            (
+                ev["event_id"],
+                ev["unsigned"]["m.relations"]["m.thread"]["latest_event"]["event_id"],
+            )
+            for ev in body["chunk"]
+        ]
+
     def test_threads(self) -> None:
         """Create threads and ensure the ordering is due to their latest event."""
         # Create 2 threads.
@@ -1718,32 +1726,37 @@ class ThreadsTestCase(BaseRelationsTestCase):
         res = self.helper.send(self.room, body="Thread Root!", tok=self.user_token)
         thread_2 = res["event_id"]
 
-        self._send_relation(RelationTypes.THREAD, "m.room.test")
-        self._send_relation(RelationTypes.THREAD, "m.room.test", parent_id=thread_2)
+        channel = self._send_relation(RelationTypes.THREAD, "m.room.test")
+        reply_1 = channel.json_body["event_id"]
+        channel = self._send_relation(
+            RelationTypes.THREAD, "m.room.test", parent_id=thread_2
+        )
+        reply_2 = channel.json_body["event_id"]
 
         # Request the threads in the room.
         channel = self.make_request(
             "GET",
-            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads",
+            f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
         self.assertEquals(200, channel.code, channel.json_body)
-        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
-        self.assertEqual(thread_roots, [thread_2, thread_1])
+        threads = self._get_threads(channel.json_body)
+        self.assertEqual(threads, [(thread_2, reply_2), (thread_1, reply_1)])
 
         # Update the first thread, the ordering should swap.
-        self._send_relation(RelationTypes.THREAD, "m.room.test")
+        channel = self._send_relation(RelationTypes.THREAD, "m.room.test")
+        reply_3 = channel.json_body["event_id"]
 
         channel = self.make_request(
             "GET",
-            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads",
+            f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
         self.assertEquals(200, channel.code, channel.json_body)
-        thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
-        self.assertEqual(thread_roots, [thread_1, thread_2])
+        # Tuple of (thread ID, latest event ID) for each thread.
+        threads = self._get_threads(channel.json_body)
+        self.assertEqual(threads, [(thread_1, reply_3), (thread_2, reply_2)])
 
-    @unittest.override_config({"experimental_features": {"msc3856_enabled": True}})
     def test_pagination(self) -> None:
         """Create threads and paginate through them."""
         # Create 2 threads.
@@ -1757,7 +1770,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
         # Request the threads in the room.
         channel = self.make_request(
             "GET",
-            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?limit=1",
+            f"/_matrix/client/v1/rooms/{self.room}/threads?limit=1",
             access_token=self.user_token,
         )
         self.assertEquals(200, channel.code, channel.json_body)
@@ -1771,7 +1784,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
 
         channel = self.make_request(
             "GET",
-            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?limit=1&from={next_batch}",
+            f"/_matrix/client/v1/rooms/{self.room}/threads?limit=1&from={next_batch}",
             access_token=self.user_token,
         )
         self.assertEquals(200, channel.code, channel.json_body)
@@ -1780,7 +1793,6 @@ class ThreadsTestCase(BaseRelationsTestCase):
 
         self.assertNotIn("next_batch", channel.json_body, channel.json_body)
 
-    @unittest.override_config({"experimental_features": {"msc3856_enabled": True}})
     def test_include(self) -> None:
         """Filtering threads to all or participated in should work."""
         # Thread 1 has the user as the root event.
@@ -1807,7 +1819,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
         # All threads in the room.
         channel = self.make_request(
             "GET",
-            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads",
+            f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
         self.assertEquals(200, channel.code, channel.json_body)
@@ -1819,14 +1831,13 @@ class ThreadsTestCase(BaseRelationsTestCase):
         # Only participated threads.
         channel = self.make_request(
             "GET",
-            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads?include=participated",
+            f"/_matrix/client/v1/rooms/{self.room}/threads?include=participated",
             access_token=self.user_token,
         )
         self.assertEquals(200, channel.code, channel.json_body)
         thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
         self.assertEqual(thread_roots, [thread_2, thread_1], channel.json_body)
 
-    @unittest.override_config({"experimental_features": {"msc3856_enabled": True}})
     def test_ignored_user(self) -> None:
         """Events from ignored users should be ignored."""
         # Thread 1 has a reply from an ignored user.
@@ -1852,7 +1863,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
         # Only thread 1 is returned.
         channel = self.make_request(
             "GET",
-            f"/_matrix/client/unstable/org.matrix.msc3856/rooms/{self.room}/threads",
+            f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
         self.assertEquals(200, channel.code, channel.json_body)
-- 
cgit 1.5.1


From 126a15794c95002560709283640ad412636b29b8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 14 Oct 2022 08:30:05 -0400
Subject: Do not allow a None-limit on PaginationConfig. (#14146)

The callers either set a default limit or manually handle a None-limit
later on (by setting a default value).

Update the callers to always instantiate PaginationConfig with a default
limit and then assume the limit is non-None.
---
 changelog.d/14146.removal                |  1 +
 synapse/handlers/account_data.py         |  2 +-
 synapse/handlers/initial_sync.py         | 27 ++++-----------------------
 synapse/handlers/pagination.py           |  5 -----
 synapse/handlers/presence.py             |  4 +++-
 synapse/handlers/receipts.py             |  2 +-
 synapse/handlers/relations.py            |  3 ---
 synapse/handlers/room.py                 |  2 +-
 synapse/handlers/typing.py               |  2 +-
 synapse/rest/client/events.py            |  4 +++-
 synapse/rest/client/initial_sync.py      |  4 +++-
 synapse/rest/client/room.py              |  4 +++-
 synapse/storage/databases/main/stream.py |  2 --
 synapse/streams/__init__.py              |  2 +-
 synapse/streams/config.py                | 12 +++++-------
 tests/rest/client/test_typing.py         |  3 ++-
 16 files changed, 29 insertions(+), 50 deletions(-)
 create mode 100644 changelog.d/14146.removal

(limited to 'synapse')

diff --git a/changelog.d/14146.removal b/changelog.d/14146.removal
new file mode 100644
index 0000000000..08fa752897
--- /dev/null
+++ b/changelog.d/14146.removal
@@ -0,0 +1 @@
+Remove the unstable identifier for [MSC3715](https://github.com/matrix-org/matrix-doc/pull/3715).
diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py
index 0478448b47..fc21d58001 100644
--- a/synapse/handlers/account_data.py
+++ b/synapse/handlers/account_data.py
@@ -225,7 +225,7 @@ class AccountDataEventSource(EventSource[int, JsonDict]):
         self,
         user: UserID,
         from_key: int,
-        limit: Optional[int],
+        limit: int,
         room_ids: Collection[str],
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 860c82c110..9c335e6863 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -57,13 +57,7 @@ class InitialSyncHandler:
         self.validator = EventValidator()
         self.snapshot_cache: ResponseCache[
             Tuple[
-                str,
-                Optional[StreamToken],
-                Optional[StreamToken],
-                str,
-                Optional[int],
-                bool,
-                bool,
+                str, Optional[StreamToken], Optional[StreamToken], str, int, bool, bool
             ]
         ] = ResponseCache(hs.get_clock(), "initial_sync_cache")
         self._event_serializer = hs.get_event_client_serializer()
@@ -154,11 +148,6 @@ class InitialSyncHandler:
 
         public_room_ids = await self.store.get_public_room_ids()
 
-        if pagin_config.limit is not None:
-            limit = pagin_config.limit
-        else:
-            limit = 10
-
         serializer_options = SerializeEventConfig(as_client_event=as_client_event)
 
         async def handle_room(event: RoomsForUser) -> None:
@@ -210,7 +199,7 @@ class InitialSyncHandler:
                             run_in_background(
                                 self.store.get_recent_events_for_room,
                                 event.room_id,
-                                limit=limit,
+                                limit=pagin_config.limit,
                                 end_token=room_end_token,
                             ),
                             deferred_room_state,
@@ -360,15 +349,11 @@ class InitialSyncHandler:
             member_event_id
         )
 
-        limit = pagin_config.limit if pagin_config else None
-        if limit is None:
-            limit = 10
-
         leave_position = await self.store.get_position_for_event(member_event_id)
         stream_token = leave_position.to_room_stream_token()
 
         messages, token = await self.store.get_recent_events_for_room(
-            room_id, limit=limit, end_token=stream_token
+            room_id, limit=pagin_config.limit, end_token=stream_token
         )
 
         messages = await filter_events_for_client(
@@ -420,10 +405,6 @@ class InitialSyncHandler:
 
         now_token = self.hs.get_event_sources().get_current_token()
 
-        limit = pagin_config.limit if pagin_config else None
-        if limit is None:
-            limit = 10
-
         room_members = [
             m
             for m in current_state.values()
@@ -467,7 +448,7 @@ class InitialSyncHandler:
                     run_in_background(
                         self.store.get_recent_events_for_room,
                         room_id,
-                        limit=limit,
+                        limit=pagin_config.limit,
                         end_token=now_token.room_key,
                     ),
                 ),
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 1f83bab836..a4ca9cb8b4 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -458,11 +458,6 @@ class PaginationHandler:
             # `/messages` should still works with live tokens when manually provided.
             assert from_token.room_key.topological is not None
 
-        if pagin_config.limit is None:
-            # This shouldn't happen as we've set a default limit before this
-            # gets called.
-            raise Exception("limit not set")
-
         room_token = from_token.room_key
 
         async with self.pagination_lock.read(room_id):
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 4e575ffbaa..2670e561d7 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -1596,7 +1596,9 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
         self,
         user: UserID,
         from_key: Optional[int],
-        limit: Optional[int] = None,
+        # Having a default limit doesn't match the EventSource API, but some
+        # callers do not provide it. It is unused in this class.
+        limit: int = 0,
         room_ids: Optional[Collection[str]] = None,
         is_guest: bool = False,
         explicit_room_id: Optional[str] = None,
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index 4a7ec9e426..ac01582442 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -257,7 +257,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]):
         self,
         user: UserID,
         from_key: int,
-        limit: Optional[int],
+        limit: int,
         room_ids: Iterable[str],
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 1fdd7a10bc..0a0c6d938e 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -116,9 +116,6 @@ class RelationsHandler:
         if event is None:
             raise SynapseError(404, "Unknown parent event.")
 
-        # TODO Update pagination config to not allow None limits.
-        assert pagin_config.limit is not None
-
         # Note that ignored users are not passed into get_relations_for_event
         # below. Ignored users are handled in filter_events_for_client (and by
         # not passing them in here we should get a better cache hit rate).
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 57ab05ad25..4e1aacb408 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1646,7 +1646,7 @@ class RoomEventSource(EventSource[RoomStreamToken, EventBase]):
         self,
         user: UserID,
         from_key: RoomStreamToken,
-        limit: Optional[int],
+        limit: int,
         room_ids: Collection[str],
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index f953691669..a0ea719430 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -513,7 +513,7 @@ class TypingNotificationEventSource(EventSource[int, JsonDict]):
         self,
         user: UserID,
         from_key: int,
-        limit: Optional[int],
+        limit: int,
         room_ids: Iterable[str],
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
diff --git a/synapse/rest/client/events.py b/synapse/rest/client/events.py
index 916f5230f1..782e7d14e8 100644
--- a/synapse/rest/client/events.py
+++ b/synapse/rest/client/events.py
@@ -50,7 +50,9 @@ class EventStreamRestServlet(RestServlet):
                 raise SynapseError(400, "Guest users must specify room_id param")
         room_id = parse_string(request, "room_id")
 
-        pagin_config = await PaginationConfig.from_request(self.store, request)
+        pagin_config = await PaginationConfig.from_request(
+            self.store, request, default_limit=10
+        )
         timeout = EventStreamRestServlet.DEFAULT_LONGPOLL_TIME_MS
         if b"timeout" in args:
             try:
diff --git a/synapse/rest/client/initial_sync.py b/synapse/rest/client/initial_sync.py
index cfadcb8e50..9b1bb8b521 100644
--- a/synapse/rest/client/initial_sync.py
+++ b/synapse/rest/client/initial_sync.py
@@ -39,7 +39,9 @@ class InitialSyncRestServlet(RestServlet):
         requester = await self.auth.get_user_by_req(request)
         args: Dict[bytes, List[bytes]] = request.args  # type: ignore
         as_client_event = b"raw" not in args
-        pagination_config = await PaginationConfig.from_request(self.store, request)
+        pagination_config = await PaginationConfig.from_request(
+            self.store, request, default_limit=10
+        )
         include_archived = parse_boolean(request, "archived", default=False)
         content = await self.initial_sync_handler.snapshot_all_rooms(
             user_id=requester.user.to_string(),
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index b6dedbed04..01e5079963 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -729,7 +729,9 @@ class RoomInitialSyncRestServlet(RestServlet):
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
-        pagination_config = await PaginationConfig.from_request(self.store, request)
+        pagination_config = await PaginationConfig.from_request(
+            self.store, request, default_limit=10
+        )
         content = await self.initial_sync_handler.room_initial_sync(
             room_id=room_id, requester=requester, pagin_config=pagination_config
         )
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index ffeb2b3683..5baffbfe55 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -1200,8 +1200,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             `to_token`), or `limit` is zero.
         """
 
-        assert int(limit) >= 0
-
         # Tokens really represent positions between elements, but we use
         # the convention of pointing to the event before the gap. Hence
         # we have a bit of asymmetry when it comes to equalities.
diff --git a/synapse/streams/__init__.py b/synapse/streams/__init__.py
index 806b671305..2dcd43d0a2 100644
--- a/synapse/streams/__init__.py
+++ b/synapse/streams/__init__.py
@@ -27,7 +27,7 @@ class EventSource(Generic[K, R]):
         self,
         user: UserID,
         from_key: K,
-        limit: Optional[int],
+        limit: int,
         room_ids: Collection[str],
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
diff --git a/synapse/streams/config.py b/synapse/streams/config.py
index f6f7bf3d8b..6df2de919c 100644
--- a/synapse/streams/config.py
+++ b/synapse/streams/config.py
@@ -35,14 +35,14 @@ class PaginationConfig:
     from_token: Optional[StreamToken]
     to_token: Optional[StreamToken]
     direction: str
-    limit: Optional[int]
+    limit: int
 
     @classmethod
     async def from_request(
         cls,
         store: "DataStore",
         request: SynapseRequest,
-        default_limit: Optional[int] = None,
+        default_limit: int,
         default_dir: str = "f",
     ) -> "PaginationConfig":
         direction = parse_string(
@@ -69,12 +69,10 @@ class PaginationConfig:
             raise SynapseError(400, "'to' parameter is invalid")
 
         limit = parse_integer(request, "limit", default=default_limit)
+        if limit < 0:
+            raise SynapseError(400, "Limit must be 0 or above")
 
-        if limit:
-            if limit < 0:
-                raise SynapseError(400, "Limit must be 0 or above")
-
-            limit = min(int(limit), MAX_LIMIT)
+        limit = min(limit, MAX_LIMIT)
 
         try:
             return PaginationConfig(from_tok, to_tok, direction, limit)
diff --git a/tests/rest/client/test_typing.py b/tests/rest/client/test_typing.py
index 61b66d7685..fdc433a8b5 100644
--- a/tests/rest/client/test_typing.py
+++ b/tests/rest/client/test_typing.py
@@ -59,7 +59,8 @@ class RoomTypingTestCase(unittest.HomeserverTestCase):
             self.event_source.get_new_events(
                 user=UserID.from_string(self.user_id),
                 from_key=0,
-                limit=None,
+                # Limit is unused.
+                limit=0,
                 room_ids=[self.room_id],
                 is_guest=False,
             )
-- 
cgit 1.5.1


From 97b3d037c043d5c91c2a36109cab0c668a6a13ed Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 14 Oct 2022 13:48:33 +0100
Subject: Don't require optional `invite_room_state` field on fed v2 invite
 (#14083)

---
 changelog.d/14083.bugfix                          | 1 +
 synapse/federation/transport/server/federation.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14083.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14083.bugfix b/changelog.d/14083.bugfix
new file mode 100644
index 0000000000..752982b1ca
--- /dev/null
+++ b/changelog.d/14083.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where Synapse would error on the optional 'invite_room_state' field not being provided to [`PUT /_matrix/federation/v2/invite/{roomId}/{eventId}`](https://spec.matrix.org/v1.4/server-server-api/#put_matrixfederationv2inviteroomideventid).
\ No newline at end of file
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 6bb4659c4c..6f11138b57 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -489,7 +489,7 @@ class FederationV2InviteServlet(BaseFederationServerServlet):
 
         room_version = content["room_version"]
         event = content["event"]
-        invite_room_state = content["invite_room_state"]
+        invite_room_state = content.get("invite_room_state", [])
 
         # Synapse expects invite_room_state to be in unsigned, as it is in v1
         # API
-- 
cgit 1.5.1


From 022f25b3090f7f3a494cecb398bfdbbc2488c2bf Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 14 Oct 2022 09:21:55 -0400
Subject: Advertise support for Matrix 1.4. (#14184)

All features / changes in Matrix 1.4 are now supported in
Synapse.
---
 changelog.d/14032.feature       | 2 +-
 changelog.d/14184.feature       | 1 +
 synapse/rest/client/versions.py | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14184.feature

(limited to 'synapse')

diff --git a/changelog.d/14032.feature b/changelog.d/14032.feature
index bb221d3ca6..016c704227 100644
--- a/changelog.d/14032.feature
+++ b/changelog.d/14032.feature
@@ -1 +1 @@
-Advertise Matrix 1.3 support on `/_matrix/client/versions`.
+Advertise support for Matrix 1.3 and 1.4 on `/_matrix/client/versions`.
diff --git a/changelog.d/14184.feature b/changelog.d/14184.feature
new file mode 100644
index 0000000000..016c704227
--- /dev/null
+++ b/changelog.d/14184.feature
@@ -0,0 +1 @@
+Advertise support for Matrix 1.3 and 1.4 on `/_matrix/client/versions`.
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index d1d2e5f7e3..4e1fd2bbe7 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -76,6 +76,7 @@ class VersionsRestServlet(RestServlet):
                     "v1.1",
                     "v1.2",
                     "v1.3",
+                    "v1.4",
                 ],
                 # as per MSC1497:
                 "unstable_features": {
-- 
cgit 1.5.1


From d241a1350d5b0e1cf8262114f0cb34325cb91a26 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 14 Oct 2022 14:46:23 +0100
Subject: Fix background update to use an index (#14181)

---
 changelog.d/14181.bugfix                           |  1 +
 .../storage/databases/main/event_push_actions.py   | 62 ++++++++++++++++++----
 2 files changed, 52 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/14181.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14181.bugfix b/changelog.d/14181.bugfix
new file mode 100644
index 0000000000..36521c670c
--- /dev/null
+++ b/changelog.d/14181.bugfix
@@ -0,0 +1 @@
+Fix poor performance of the `event_push_backfill_thread_id` background update, which was introduced in Synapse 1.68.0rc1.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 7f7bcb7094..72cf91eb39 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -269,11 +269,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
         event_push_actions_done = progress.get("event_push_actions_done", False)
 
         def add_thread_id_txn(
-            txn: LoggingTransaction, table_name: str, start_stream_ordering: int
+            txn: LoggingTransaction, start_stream_ordering: int
         ) -> int:
-            sql = f"""
+            sql = """
             SELECT stream_ordering
-            FROM {table_name}
+            FROM event_push_actions
             WHERE
                 thread_id IS NULL
                 AND stream_ordering > ?
@@ -285,7 +285,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             # No more rows to process.
             rows = txn.fetchall()
             if not rows:
-                progress[f"{table_name}_done"] = True
+                progress["event_push_actions_done"] = True
                 self.db_pool.updates._background_update_progress_txn(
                     txn, "event_push_backfill_thread_id", progress
                 )
@@ -294,8 +294,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             # Update the thread ID for any of those rows.
             max_stream_ordering = rows[-1][0]
 
-            sql = f"""
-            UPDATE {table_name}
+            sql = """
+            UPDATE event_push_actions
             SET thread_id = 'main'
             WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
             """
@@ -309,7 +309,50 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
             # Update progress.
             processed_rows = txn.rowcount
-            progress[f"max_{table_name}_stream_ordering"] = max_stream_ordering
+            progress["max_event_push_actions_stream_ordering"] = max_stream_ordering
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "event_push_backfill_thread_id", progress
+            )
+
+            return processed_rows
+
+        def add_thread_id_summary_txn(txn: LoggingTransaction) -> int:
+            min_user_id = progress.get("max_summary_user_id", "")
+            min_room_id = progress.get("max_summary_room_id", "")
+
+            # Slightly overcomplicated query for getting the Nth user ID / room
+            # ID tuple, or the last if there are less than N remaining.
+            sql = """
+            SELECT user_id, room_id FROM (
+                SELECT user_id, room_id FROM event_push_summary
+                WHERE (user_id, room_id) > (?, ?)
+                    AND thread_id IS NULL
+                ORDER BY user_id, room_id
+                LIMIT ?
+            ) AS e
+            ORDER BY user_id DESC, room_id DESC
+            LIMIT 1
+            """
+
+            txn.execute(sql, (min_user_id, min_room_id, batch_size))
+            row = txn.fetchone()
+            if not row:
+                return 0
+
+            max_user_id, max_room_id = row
+
+            sql = """
+            UPDATE event_push_summary
+            SET thread_id = 'main'
+            WHERE
+                (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?)
+                AND thread_id IS NULL
+            """
+            txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id))
+            processed_rows = txn.rowcount
+
+            progress["max_summary_user_id"] = max_user_id
+            progress["max_summary_room_id"] = max_room_id
             self.db_pool.updates._background_update_progress_txn(
                 txn, "event_push_backfill_thread_id", progress
             )
@@ -325,15 +368,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             result = await self.db_pool.runInteraction(
                 "event_push_backfill_thread_id",
                 add_thread_id_txn,
-                "event_push_actions",
                 progress.get("max_event_push_actions_stream_ordering", 0),
             )
         else:
             result = await self.db_pool.runInteraction(
                 "event_push_backfill_thread_id",
-                add_thread_id_txn,
-                "event_push_summary",
-                progress.get("max_event_push_summary_stream_ordering", 0),
+                add_thread_id_summary_txn,
             )
 
             # Only done after the event_push_summary table is done.
-- 
cgit 1.5.1


From d1bdeccb50550ef454067aa01dd9d004c4704633 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 14 Oct 2022 14:05:25 -0400
Subject: Accept threaded receipts for events related to the root event.
 (#14174)

The root node of a thread (and events related to it) are considered
"part of a thread" when validating receipts. This allows clients which
show the root node in both the main timeline and the threaded timeline
to easily send receipts in either.

Note that threaded notifications are not created for these events, these
events created notifications on the main timeline.
---
 changelog.d/14174.feature                   |   1 +
 synapse/rest/client/receipts.py             |  44 ++++++++++-
 synapse/storage/databases/main/cache.py     |   1 +
 synapse/storage/databases/main/relations.py |  98 ++++++++++++++++++++++--
 tests/storage/test_relations.py             | 111 ++++++++++++++++++++++++++++
 5 files changed, 247 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/14174.feature
 create mode 100644 tests/storage/test_relations.py

(limited to 'synapse')

diff --git a/changelog.d/14174.feature b/changelog.d/14174.feature
new file mode 100644
index 0000000000..5d0ae16e13
--- /dev/null
+++ b/changelog.d/14174.feature
@@ -0,0 +1 @@
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 14dec7ac4e..18a282b22c 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -15,7 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, Tuple
 
-from synapse.api.constants import ReceiptTypes
+from synapse.api.constants import MAIN_TIMELINE, ReceiptTypes
 from synapse.api.errors import Codes, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
@@ -83,7 +83,7 @@ class ReceiptRestServlet(RestServlet):
                 )
 
             # Ensure the event ID roughly correlates to the thread ID.
-            if thread_id != await self._main_store.get_thread_id(event_id):
+            if not await self._is_event_in_thread(event_id, thread_id):
                 raise SynapseError(
                     400,
                     f"event_id {event_id} is not related to thread {thread_id}",
@@ -109,6 +109,46 @@ class ReceiptRestServlet(RestServlet):
 
         return 200, {}
 
+    async def _is_event_in_thread(self, event_id: str, thread_id: str) -> bool:
+        """
+        The event must be related to the thread ID (in a vague sense) to ensure
+        clients aren't sending bogus receipts.
+
+        A thread ID is considered valid for a given event E if:
+
+        1. E has a thread relation which matches the thread ID;
+        2. E has another event which has a thread relation to E matching the
+           thread ID; or
+        3. E is recursively related (via any rel_type) to an event which
+           satisfies 1 or 2.
+
+        Given the following DAG:
+
+            A <---[m.thread]-- B <--[m.annotation]-- C
+            ^
+            |--[m.reference]-- D <--[m.annotation]-- E
+
+        It is valid to send a receipt for thread A on A, B, C, D, or E.
+
+        It is valid to send a receipt for the main timeline on A, D, and E.
+
+        Args:
+            event_id: The event ID to check.
+            thread_id: The thread ID the event is potentially part of.
+
+        Returns:
+            True if the event belongs to the given thread, otherwise False.
+        """
+
+        # If the receipt is on the main timeline, it is enough to check whether
+        # the event is directly related to a thread.
+        if thread_id == MAIN_TIMELINE:
+            return MAIN_TIMELINE == await self._main_store.get_thread_id(event_id)
+
+        # Otherwise, check if the event is directly part of a thread, or is the
+        # root message (or related to the root message) of a thread.
+        return thread_id == await self._main_store.get_thread_id_for_receipts(event_id)
+
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ReceiptRestServlet(hs).register(http_server)
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index b47fc606c7..ed0be4abe5 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -245,6 +245,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._attempt_to_invalidate_cache("get_relations_for_event", (redacts,))
             self._attempt_to_invalidate_cache("get_applicable_edit", (redacts,))
             self._attempt_to_invalidate_cache("get_thread_id", (redacts,))
+            self._attempt_to_invalidate_cache("get_thread_id_for_receipts", (redacts,))
 
         if etype == EventTypes.Member:
             self._membership_stream_cache.entity_has_changed(state_key, stream_ordering)
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 7c54ce0b2e..1de62ee9df 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -946,6 +946,20 @@ class RelationsWorkerStore(SQLBaseStore):
         Get the thread ID for an event. This considers multi-level relations,
         e.g. an annotation to an event which is part of a thread.
 
+        It only searches up the relations tree, i.e. it only searches for events
+        which the given event is related to (and which those events are related
+        to, etc.)
+
+        Given the following DAG:
+
+            A <---[m.thread]-- B <--[m.annotation]-- C
+            ^
+            |--[m.reference]-- D <--[m.annotation]-- E
+
+        get_thread_id(X) considers events B and C as part of thread A.
+
+        See also get_thread_id_for_receipts.
+
         Args:
             event_id: The event ID to fetch the thread ID for.
 
@@ -953,22 +967,32 @@ class RelationsWorkerStore(SQLBaseStore):
             The event ID of the root event in the thread, if this event is part
             of a thread. "main", otherwise.
         """
-        # Since event relations form a tree, we should only ever find 0 or 1
-        # results from the below query.
+
+        # Recurse event relations up to the *root* event, then search that chain
+        # of relations for a thread relation. If one is found, the root event is
+        # returned.
+        #
+        # Note that this should only ever find 0 or 1 entries since it is invalid
+        # for an event to have a thread relation to an event which also has a
+        # relation.
         sql = """
             WITH RECURSIVE related_events AS (
-                SELECT event_id, relates_to_id, relation_type
+                SELECT event_id, relates_to_id, relation_type, 0 depth
                 FROM event_relations
                 WHERE event_id = ?
-                UNION SELECT e.event_id, e.relates_to_id, e.relation_type
+                UNION SELECT e.event_id, e.relates_to_id, e.relation_type, depth + 1
                 FROM event_relations e
                 INNER JOIN related_events r ON r.relates_to_id = e.event_id
-            ) SELECT relates_to_id FROM related_events WHERE relation_type = 'm.thread';
+                WHERE depth <= 3
+            )
+            SELECT relates_to_id FROM related_events
+            WHERE relation_type = 'm.thread'
+            ORDER BY depth DESC
+            LIMIT 1;
         """
 
         def _get_thread_id(txn: LoggingTransaction) -> str:
             txn.execute(sql, (event_id,))
-            # TODO Should we ensure there's only a single result here?
             row = txn.fetchone()
             if row:
                 return row[0]
@@ -978,6 +1002,68 @@ class RelationsWorkerStore(SQLBaseStore):
 
         return await self.db_pool.runInteraction("get_thread_id", _get_thread_id)
 
+    @cached()
+    async def get_thread_id_for_receipts(self, event_id: str) -> str:
+        """
+        Get the thread ID for an event by traversing to the top-most related event
+        and confirming any children events form a thread.
+
+        Given the following DAG:
+
+            A <---[m.thread]-- B <--[m.annotation]-- C
+            ^
+            |--[m.reference]-- D <--[m.annotation]-- E
+
+        get_thread_id_for_receipts(X) considers events A, B, C, D, and E as part
+        of thread A.
+
+        See also get_thread_id.
+
+        Args:
+            event_id: The event ID to fetch the thread ID for.
+
+        Returns:
+            The event ID of the root event in the thread, if this event is part
+            of a thread. "main", otherwise.
+        """
+
+        # Recurse event relations up to the *root* event, then search for any events
+        # related to that root node for a thread relation. If one is found, the
+        # root event is returned.
+        #
+        # Note that there cannot be thread relations in the middle of the chain since
+        # it is invalid for an event to have a thread relation to an event which also
+        # has a relation.
+        sql = """
+        SELECT relates_to_id FROM event_relations WHERE relates_to_id = COALESCE((
+            WITH RECURSIVE related_events AS (
+                SELECT event_id, relates_to_id, relation_type, 0 depth
+                FROM event_relations
+                WHERE event_id = ?
+                UNION SELECT e.event_id, e.relates_to_id, e.relation_type, depth + 1
+                FROM event_relations e
+                INNER JOIN related_events r ON r.relates_to_id = e.event_id
+                WHERE depth <= 3
+            )
+            SELECT relates_to_id FROM related_events
+            ORDER BY depth DESC
+            LIMIT 1
+        ), ?) AND relation_type = 'm.thread' LIMIT 1;
+        """
+
+        def _get_related_thread_id(txn: LoggingTransaction) -> str:
+            txn.execute(sql, (event_id, event_id))
+            row = txn.fetchone()
+            if row:
+                return row[0]
+
+            # If no thread was found, it is part of the main timeline.
+            return MAIN_TIMELINE
+
+        return await self.db_pool.runInteraction(
+            "get_related_thread_id", _get_related_thread_id
+        )
+
 
 class RelationsStore(RelationsWorkerStore):
     pass
diff --git a/tests/storage/test_relations.py b/tests/storage/test_relations.py
new file mode 100644
index 0000000000..cd1d00208b
--- /dev/null
+++ b/tests/storage/test_relations.py
@@ -0,0 +1,111 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.api.constants import MAIN_TIMELINE
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+
+
+class RelationsStoreTestCase(unittest.HomeserverTestCase):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        """
+        Creates a DAG:
+
+            A <---[m.thread]-- B <--[m.annotation]-- C
+            ^
+            |--[m.reference]-- D <--[m.annotation]-- E
+
+            F <--[m.annotation]-- G
+
+        """
+        self._main_store = self.hs.get_datastores().main
+
+        self._create_relation("A", "B", "m.thread")
+        self._create_relation("B", "C", "m.annotation")
+        self._create_relation("A", "D", "m.reference")
+        self._create_relation("D", "E", "m.annotation")
+        self._create_relation("F", "G", "m.annotation")
+
+    def _create_relation(self, parent_id: str, event_id: str, rel_type: str) -> None:
+        self.get_success(
+            self._main_store.db_pool.simple_insert(
+                table="event_relations",
+                values={
+                    "event_id": event_id,
+                    "relates_to_id": parent_id,
+                    "relation_type": rel_type,
+                },
+            )
+        )
+
+    def test_get_thread_id(self) -> None:
+        """
+        Ensure that get_thread_id only searches up the tree for threads.
+        """
+        # The thread itself and children of it return the thread.
+        thread_id = self.get_success(self._main_store.get_thread_id("B"))
+        self.assertEqual("A", thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id("C"))
+        self.assertEqual("A", thread_id)
+
+        # But the root and events related to the root do not.
+        thread_id = self.get_success(self._main_store.get_thread_id("A"))
+        self.assertEqual(MAIN_TIMELINE, thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id("D"))
+        self.assertEqual(MAIN_TIMELINE, thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id("E"))
+        self.assertEqual(MAIN_TIMELINE, thread_id)
+
+        # Events which are not related to a thread at all should return the
+        # main timeline.
+        thread_id = self.get_success(self._main_store.get_thread_id("F"))
+        self.assertEqual(MAIN_TIMELINE, thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id("G"))
+        self.assertEqual(MAIN_TIMELINE, thread_id)
+
+    def test_get_thread_id_for_receipts(self) -> None:
+        """
+        Ensure that get_thread_id_for_receipts searches up and down the tree for a thread.
+        """
+        # All of the events are considered related to this thread.
+        thread_id = self.get_success(self._main_store.get_thread_id_for_receipts("A"))
+        self.assertEqual("A", thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id_for_receipts("B"))
+        self.assertEqual("A", thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id_for_receipts("C"))
+        self.assertEqual("A", thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id_for_receipts("D"))
+        self.assertEqual("A", thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id_for_receipts("E"))
+        self.assertEqual("A", thread_id)
+
+        # Events which are not related to a thread at all should return the
+        # main timeline.
+        thread_id = self.get_success(self._main_store.get_thread_id("F"))
+        self.assertEqual(MAIN_TIMELINE, thread_id)
+
+        thread_id = self.get_success(self._main_store.get_thread_id("G"))
+        self.assertEqual(MAIN_TIMELINE, thread_id)
-- 
cgit 1.5.1


From 40bb37eb27e1841754a297ac1277748de7f6c1cb Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Sat, 15 Oct 2022 00:36:49 -0500
Subject: Stop getting missing `prev_events` after we already know their
 signature is invalid (#13816)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While https://github.com/matrix-org/synapse/pull/13635 stops us from doing the slow thing after we've already done it once, this PR stops us from doing one of the slow things in the first place.

Related to
 - https://github.com/matrix-org/synapse/issues/13622
    - https://github.com/matrix-org/synapse/pull/13635
 - https://github.com/matrix-org/synapse/issues/13676

Part of https://github.com/matrix-org/synapse/issues/13356

Follow-up to https://github.com/matrix-org/synapse/pull/13815 which tracks event signature failures.

With this PR, we avoid the call to the costly `_get_state_ids_after_missing_prev_event` because the signature failure will count as an attempt before and we filter events based on the backoff before calling `_get_state_ids_after_missing_prev_event` now.

For example, this will save us 156s out of the 185s total that this `matrix.org` `/messages` request. If you want to see the full Jaeger trace of this, you can drag and drop this `trace.json` into your own Jaeger, https://gist.github.com/MadLittleMods/4b12d0d0afe88c2f65ffcc907306b761

To explain this exact scenario around `/messages` -> backfill, we call `/backfill` and first check the signatures of the 100 events. We see bad signature for `$luA4l7QHhf_jadH3mI-AyFqho0U2Q-IXXUbGSMq6h6M` and `$zuOn2Rd2vsC7SUia3Hp3r6JSkSFKcc5j3QTTqW_0jDw` (both member events). Then we process the 98 events remaining that have valid signatures but one of the events references `$luA4l7QHhf_jadH3mI-AyFqho0U2Q-IXXUbGSMq6h6M` as a `prev_event`. So we have to do the whole `_get_state_ids_after_missing_prev_event` rigmarole which pulls in those same events which fail again because the signatures are still invalid.

 - `backfill`
    - `outgoing-federation-request` `/backfill`
    - `_check_sigs_and_hash_and_fetch`
       - `_check_sigs_and_hash_and_fetch_one` for each event received over backfill
          - ❗ `$luA4l7QHhf_jadH3mI-AyFqho0U2Q-IXXUbGSMq6h6M` fails with `Signature on retrieved event was invalid.`: `unable to verify signature for sender domain xxx: 401: Failed to find any key to satisfy: _FetchKeyRequest(...)`
          - ❗ `$zuOn2Rd2vsC7SUia3Hp3r6JSkSFKcc5j3QTTqW_0jDw` fails with `Signature on retrieved event was invalid.`: `unable to verify signature for sender domain xxx: 401: Failed to find any key to satisfy: _FetchKeyRequest(...)`
   - `_process_pulled_events`
      - `_process_pulled_event` for each validated event
         - ❗ Event `$Q0iMdqtz3IJYfZQU2Xk2WjB5NDF8Gg8cFSYYyKQgKJ0` references `$luA4l7QHhf_jadH3mI-AyFqho0U2Q-IXXUbGSMq6h6M` as a `prev_event` which is missing so we try to get it
            - `_get_state_ids_after_missing_prev_event`
               - `outgoing-federation-request` `/state_ids`
               - ❗ `get_pdu` for `$luA4l7QHhf_jadH3mI-AyFqho0U2Q-IXXUbGSMq6h6M` which fails the signature check again
               - ❗ `get_pdu` for `$zuOn2Rd2vsC7SUia3Hp3r6JSkSFKcc5j3QTTqW_0jDw` which fails the signature check
---
 changelog.d/13816.feature                          |   1 +
 synapse/api/errors.py                              |  21 +++
 synapse/handlers/federation.py                     |  16 ++
 synapse/handlers/federation_event.py               |  31 ++++
 synapse/storage/databases/main/event_federation.py |  54 ++++++
 tests/handlers/test_federation_event.py            | 201 ++++++++++++++++++++-
 tests/storage/test_event_federation.py             |  64 +++++++
 7 files changed, 386 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13816.feature

(limited to 'synapse')

diff --git a/changelog.d/13816.feature b/changelog.d/13816.feature
new file mode 100644
index 0000000000..5eaa936b08
--- /dev/null
+++ b/changelog.d/13816.feature
@@ -0,0 +1 @@
+Stop fetching missing `prev_events` after we already know their signature is invalid.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index c606207569..e0873b1913 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -640,6 +640,27 @@ class FederationError(RuntimeError):
         }
 
 
+class FederationPullAttemptBackoffError(RuntimeError):
+    """
+    Raised to indicate that we are are deliberately not attempting to pull the given
+    event over federation because we've already done so recently and are backing off.
+
+    Attributes:
+        event_id: The event_id which we are refusing to pull
+        message: A custom error message that gives more context
+    """
+
+    def __init__(self, event_ids: List[str], message: Optional[str]):
+        self.event_ids = event_ids
+
+        if message:
+            error_message = message
+        else:
+            error_message = f"Not attempting to pull event_ids={self.event_ids} because we already tried to pull them recently (backing off)."
+
+        super().__init__(error_message)
+
+
 class HttpResponseException(CodeMessageException):
     """
     Represents an HTTP-level failure of an outbound request
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 44e70c6c3c..5f7e0a1f79 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -45,6 +45,7 @@ from synapse.api.errors import (
     Codes,
     FederationDeniedError,
     FederationError,
+    FederationPullAttemptBackoffError,
     HttpResponseException,
     LimitExceededError,
     NotFoundError,
@@ -1720,7 +1721,22 @@ class FederationHandler:
                             destination, event
                         )
                         break
+                    except FederationPullAttemptBackoffError as exc:
+                        # Log a warning about why we failed to process the event (the error message
+                        # for `FederationPullAttemptBackoffError` is pretty good)
+                        logger.warning("_sync_partial_state_room: %s", exc)
+                        # We do not record a failed pull attempt when we backoff fetching a missing
+                        # `prev_event` because not being able to fetch the `prev_events` just means
+                        # we won't be able to de-outlier the pulled event. But we can still use an
+                        # `outlier` in the state/auth chain for another event. So we shouldn't stop
+                        # a downstream event from trying to pull it.
+                        #
+                        # This avoids a cascade of backoff for all events in the DAG downstream from
+                        # one event backoff upstream.
                     except FederationError as e:
+                        # TODO: We should `record_event_failed_pull_attempt` here,
+                        #   see https://github.com/matrix-org/synapse/issues/13700
+
                         if attempt == len(destinations) - 1:
                             # We have tried every remote server for this event. Give up.
                             # TODO(faster_joins) giving up isn't the right thing to do
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index f382961099..4300e8dd40 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -44,6 +44,7 @@ from synapse.api.errors import (
     AuthError,
     Codes,
     FederationError,
+    FederationPullAttemptBackoffError,
     HttpResponseException,
     RequestSendFailed,
     SynapseError,
@@ -567,6 +568,9 @@ class FederationEventHandler:
             event: partial-state event to be de-partial-stated
 
         Raises:
+            FederationPullAttemptBackoffError if we are are deliberately not attempting
+                to pull the given event over federation because we've already done so
+                recently and are backing off.
             FederationError if we fail to request state from the remote server.
         """
         logger.info("Updating state for %s", event.event_id)
@@ -901,6 +905,18 @@ class FederationEventHandler:
                     context,
                     backfilled=backfilled,
                 )
+        except FederationPullAttemptBackoffError as exc:
+            # Log a warning about why we failed to process the event (the error message
+            # for `FederationPullAttemptBackoffError` is pretty good)
+            logger.warning("_process_pulled_event: %s", exc)
+            # We do not record a failed pull attempt when we backoff fetching a missing
+            # `prev_event` because not being able to fetch the `prev_events` just means
+            # we won't be able to de-outlier the pulled event. But we can still use an
+            # `outlier` in the state/auth chain for another event. So we shouldn't stop
+            # a downstream event from trying to pull it.
+            #
+            # This avoids a cascade of backoff for all events in the DAG downstream from
+            # one event backoff upstream.
         except FederationError as e:
             await self._store.record_event_failed_pull_attempt(
                 event.room_id, event_id, str(e)
@@ -947,6 +963,9 @@ class FederationEventHandler:
             The event context.
 
         Raises:
+            FederationPullAttemptBackoffError if we are are deliberately not attempting
+                to pull the given event over federation because we've already done so
+                recently and are backing off.
             FederationError if we fail to get the state from the remote server after any
                 missing `prev_event`s.
         """
@@ -957,6 +976,18 @@ class FederationEventHandler:
         seen = await self._store.have_events_in_timeline(prevs)
         missing_prevs = prevs - seen
 
+        # If we've already recently attempted to pull this missing event, don't
+        # try it again so soon. Since we have to fetch all of the prev_events, we can
+        # bail early here if we find any to ignore.
+        prevs_to_ignore = await self._store.get_event_ids_to_not_pull_from_backoff(
+            room_id, missing_prevs
+        )
+        if len(prevs_to_ignore) > 0:
+            raise FederationPullAttemptBackoffError(
+                event_ids=prevs_to_ignore,
+                message=f"While computing context for event={event_id}, not attempting to pull missing prev_event={prevs_to_ignore[0]} because we already tried to pull recently (backing off).",
+            )
+
         if not missing_prevs:
             return await self._state_handler.compute_event_context(event)
 
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 6b9a629edd..309a4ba664 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1501,6 +1501,12 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             event_id: The event that failed to be fetched or processed
             cause: The error message or reason that we failed to pull the event
         """
+        logger.debug(
+            "record_event_failed_pull_attempt room_id=%s, event_id=%s, cause=%s",
+            room_id,
+            event_id,
+            cause,
+        )
         await self.db_pool.runInteraction(
             "record_event_failed_pull_attempt",
             self._record_event_failed_pull_attempt_upsert_txn,
@@ -1530,6 +1536,54 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause))
 
+    @trace
+    async def get_event_ids_to_not_pull_from_backoff(
+        self,
+        room_id: str,
+        event_ids: Collection[str],
+    ) -> List[str]:
+        """
+        Filter down the events to ones that we've failed to pull before recently. Uses
+        exponential backoff.
+
+        Args:
+            room_id: The room that the events belong to
+            event_ids: A list of events to filter down
+
+        Returns:
+            List of event_ids that should not be attempted to be pulled
+        """
+        event_failed_pull_attempts = await self.db_pool.simple_select_many_batch(
+            table="event_failed_pull_attempts",
+            column="event_id",
+            iterable=event_ids,
+            keyvalues={},
+            retcols=(
+                "event_id",
+                "last_attempt_ts",
+                "num_attempts",
+            ),
+            desc="get_event_ids_to_not_pull_from_backoff",
+        )
+
+        current_time = self._clock.time_msec()
+        return [
+            event_failed_pull_attempt["event_id"]
+            for event_failed_pull_attempt in event_failed_pull_attempts
+            # Exponential back-off (up to the upper bound) so we don't try to
+            # pull the same event over and over. ex. 2hr, 4hr, 8hr, 16hr, etc.
+            if current_time
+            < event_failed_pull_attempt["last_attempt_ts"]
+            + (
+                2
+                ** min(
+                    event_failed_pull_attempt["num_attempts"],
+                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
+                )
+            )
+            * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS
+        ]
+
     async def get_missing_events(
         self,
         room_id: str,
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index 918010cddb..e448cb1901 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -14,7 +14,7 @@
 from typing import Optional
 from unittest import mock
 
-from synapse.api.errors import AuthError
+from synapse.api.errors import AuthError, StoreError
 from synapse.api.room_versions import RoomVersion
 from synapse.event_auth import (
     check_state_dependent_auth_rules,
@@ -43,7 +43,7 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
     def make_homeserver(self, reactor, clock):
         # mock out the federation transport client
         self.mock_federation_transport_client = mock.Mock(
-            spec=["get_room_state_ids", "get_room_state", "get_event"]
+            spec=["get_room_state_ids", "get_room_state", "get_event", "backfill"]
         )
         return super().setup_test_homeserver(
             federation_transport_client=self.mock_federation_transport_client
@@ -459,6 +459,203 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
         )
         self.assertIsNotNone(persisted, "pulled event was not persisted at all")
 
+    def test_backfill_signature_failure_does_not_fetch_same_prev_event_later(
+        self,
+    ) -> None:
+        """
+        Test to make sure we backoff and don't try to fetch a missing prev_event when we
+        already know it has a invalid signature from checking the signatures of all of
+        the events in the backfill response.
+        """
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+        room_version = self.get_success(main_store.get_room_version(room_id))
+
+        # Allow the remote user to send state events
+        self.helper.send_state(
+            room_id,
+            "m.room.power_levels",
+            {"events_default": 0, "state_default": 0},
+            tok=tok,
+        )
+
+        # Add the remote user to the room
+        member_event = self.get_success(
+            event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
+        )
+
+        initial_state_map = self.get_success(
+            main_store.get_partial_current_state_ids(room_id)
+        )
+
+        auth_event_ids = [
+            initial_state_map[("m.room.create", "")],
+            initial_state_map[("m.room.power_levels", "")],
+            member_event.event_id,
+        ]
+
+        # We purposely don't run `add_hashes_and_signatures_from_other_server`
+        # over this because we want the signature check to fail.
+        pulled_event_without_signatures = make_event_from_dict(
+            {
+                "type": "test_regular_type",
+                "room_id": room_id,
+                "sender": OTHER_USER,
+                "prev_events": [member_event.event_id],
+                "auth_events": auth_event_ids,
+                "origin_server_ts": 1,
+                "depth": 12,
+                "content": {"body": "pulled_event_without_signatures"},
+            },
+            room_version,
+        )
+
+        # Create a regular event that should pass except for the
+        # `pulled_event_without_signatures` in the `prev_event`.
+        pulled_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "test_regular_type",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [
+                        member_event.event_id,
+                        pulled_event_without_signatures.event_id,
+                    ],
+                    "auth_events": auth_event_ids,
+                    "origin_server_ts": 1,
+                    "depth": 12,
+                    "content": {"body": "pulled_event"},
+                }
+            ),
+            room_version,
+        )
+
+        # We expect an outbound request to /backfill, so stub that out
+        self.mock_federation_transport_client.backfill.return_value = make_awaitable(
+            {
+                "origin": self.OTHER_SERVER_NAME,
+                "origin_server_ts": 123,
+                "pdus": [
+                    # This is one of the important aspects of this test: we include
+                    # `pulled_event_without_signatures` so it fails the signature check
+                    # when we filter down the backfill response down to events which
+                    # have valid signatures in
+                    # `_check_sigs_and_hash_for_pulled_events_and_fetch`
+                    pulled_event_without_signatures.get_pdu_json(),
+                    # Then later when we process this valid signature event, when we
+                    # fetch the missing `prev_event`s, we want to make sure that we
+                    # backoff and don't try and fetch `pulled_event_without_signatures`
+                    # again since we know it just had an invalid signature.
+                    pulled_event.get_pdu_json(),
+                ],
+            }
+        )
+
+        # Keep track of the count and make sure we don't make any of these requests
+        event_endpoint_requested_count = 0
+        room_state_ids_endpoint_requested_count = 0
+        room_state_endpoint_requested_count = 0
+
+        async def get_event(
+            destination: str, event_id: str, timeout: Optional[int] = None
+        ) -> None:
+            nonlocal event_endpoint_requested_count
+            event_endpoint_requested_count += 1
+
+        async def get_room_state_ids(
+            destination: str, room_id: str, event_id: str
+        ) -> None:
+            nonlocal room_state_ids_endpoint_requested_count
+            room_state_ids_endpoint_requested_count += 1
+
+        async def get_room_state(
+            room_version: RoomVersion, destination: str, room_id: str, event_id: str
+        ) -> None:
+            nonlocal room_state_endpoint_requested_count
+            room_state_endpoint_requested_count += 1
+
+        # We don't expect an outbound request to `/event`, `/state_ids`, or `/state` in
+        # the happy path but if the logic is sneaking around what we expect, stub that
+        # out so we can detect that failure
+        self.mock_federation_transport_client.get_event.side_effect = get_event
+        self.mock_federation_transport_client.get_room_state_ids.side_effect = (
+            get_room_state_ids
+        )
+        self.mock_federation_transport_client.get_room_state.side_effect = (
+            get_room_state
+        )
+
+        # The function under test: try to backfill and process the pulled event
+        with LoggingContext("test"):
+            self.get_success(
+                self.hs.get_federation_event_handler().backfill(
+                    self.OTHER_SERVER_NAME,
+                    room_id,
+                    limit=1,
+                    extremities=["$some_extremity"],
+                )
+            )
+
+        if event_endpoint_requested_count > 0:
+            self.fail(
+                "We don't expect an outbound request to /event in the happy path but if "
+                "the logic is sneaking around what we expect, make sure to fail the test. "
+                "We don't expect it because the signature failure should cause us to backoff "
+                "and not asking about pulled_event_without_signatures="
+                f"{pulled_event_without_signatures.event_id} again"
+            )
+
+        if room_state_ids_endpoint_requested_count > 0:
+            self.fail(
+                "We don't expect an outbound request to /state_ids in the happy path but if "
+                "the logic is sneaking around what we expect, make sure to fail the test. "
+                "We don't expect it because the signature failure should cause us to backoff "
+                "and not asking about pulled_event_without_signatures="
+                f"{pulled_event_without_signatures.event_id} again"
+            )
+
+        if room_state_endpoint_requested_count > 0:
+            self.fail(
+                "We don't expect an outbound request to /state in the happy path but if "
+                "the logic is sneaking around what we expect, make sure to fail the test. "
+                "We don't expect it because the signature failure should cause us to backoff "
+                "and not asking about pulled_event_without_signatures="
+                f"{pulled_event_without_signatures.event_id} again"
+            )
+
+        # Make sure we only recorded a single failure which corresponds to the signature
+        # failure initially in `_check_sigs_and_hash_for_pulled_events_and_fetch` before
+        # we process all of the pulled events.
+        backfill_num_attempts_for_event_without_signatures = self.get_success(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event_without_signatures.event_id},
+                retcol="num_attempts",
+            )
+        )
+        self.assertEqual(backfill_num_attempts_for_event_without_signatures, 1)
+
+        # And make sure we didn't record a failure for the event that has the missing
+        # prev_event because we don't want to cause a cascade of failures. Not being
+        # able to fetch the `prev_events` just means we won't be able to de-outlier the
+        # pulled event. But we can still use an `outlier` in the state/auth chain for
+        # another event. So we shouldn't stop a downstream event from trying to pull it.
+        self.get_failure(
+            main_store.db_pool.simple_select_one_onecol(
+                table="event_failed_pull_attempts",
+                keyvalues={"event_id": pulled_event.event_id},
+                retcol="num_attempts",
+            ),
+            # StoreError: 404: No row found
+            StoreError,
+        )
+
     def test_process_pulled_event_with_rejected_missing_state(self) -> None:
         """Ensure that we correctly handle pulled events with missing state containing a
         rejected state event
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 59b8910907..853db930d6 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -27,6 +27,8 @@ from synapse.api.room_versions import (
     RoomVersion,
 )
 from synapse.events import _EventInternalMetadata
+from synapse.rest import admin
+from synapse.rest.client import login, room
 from synapse.server import HomeServer
 from synapse.storage.database import LoggingTransaction
 from synapse.types import JsonDict
@@ -43,6 +45,12 @@ class _BackfillSetupInfo:
 
 
 class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
@@ -1122,6 +1130,62 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         self.assertEqual(backfill_event_ids, ["insertion_eventA"])
 
+    def test_get_event_ids_to_not_pull_from_backoff(
+        self,
+    ):
+        """
+        Test to make sure only event IDs we should backoff from are returned.
+        """
+        # Create the room
+        user_id = self.register_user("alice", "test")
+        tok = self.login("alice", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "$failed_event_id", "fake cause"
+            )
+        )
+
+        event_ids_to_backoff = self.get_success(
+            self.store.get_event_ids_to_not_pull_from_backoff(
+                room_id=room_id, event_ids=["$failed_event_id", "$normal_event_id"]
+            )
+        )
+
+        self.assertEqual(event_ids_to_backoff, ["$failed_event_id"])
+
+    def test_get_event_ids_to_not_pull_from_backoff_retry_after_backoff_duration(
+        self,
+    ):
+        """
+        Test to make sure no event IDs are returned after the backoff duration has
+        elapsed.
+        """
+        # Create the room
+        user_id = self.register_user("alice", "test")
+        tok = self.login("alice", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "$failed_event_id", "fake cause"
+            )
+        )
+
+        # Now advance time by 2 hours so we wait long enough for the single failed
+        # attempt (2^1 hours).
+        self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
+
+        event_ids_to_backoff = self.get_success(
+            self.store.get_event_ids_to_not_pull_from_backoff(
+                room_id=room_id, event_ids=["$failed_event_id", "$normal_event_id"]
+            )
+        )
+        # Since this function only returns events we should backoff from, time has
+        # elapsed past the backoff range so there is no events to backoff from.
+        self.assertEqual(event_ids_to_backoff, [])
+
 
 @attr.s
 class FakeEvent:
-- 
cgit 1.5.1


From 2c2c3f8b2c1e33d5aee6d480c60c75c1179e3dba Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Mon, 17 Oct 2022 13:27:51 +0100
Subject: Invalidate rooms for user caches when receiving membership events
 (#14155)

This should fix a race where the event notification comes in over
replication before the state replication, leaving a window during
which a sync may get an incorrect list of rooms for the user.
---
 changelog.d/14155.misc                  | 1 +
 synapse/storage/databases/main/cache.py | 4 ++++
 2 files changed, 5 insertions(+)
 create mode 100644 changelog.d/14155.misc

(limited to 'synapse')

diff --git a/changelog.d/14155.misc b/changelog.d/14155.misc
new file mode 100644
index 0000000000..79539cdc32
--- /dev/null
+++ b/changelog.d/14155.misc
@@ -0,0 +1 @@
+Invalidate rooms for user caches on replicated event, fix sync cache race in synapse workers. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index ed0be4abe5..ddb7397714 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -252,6 +252,10 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._attempt_to_invalidate_cache(
                 "get_invited_rooms_for_local_user", (state_key,)
             )
+            self._attempt_to_invalidate_cache(
+                "get_rooms_for_user_with_stream_ordering", (state_key,)
+            )
+            self._attempt_to_invalidate_cache("get_rooms_for_user", (state_key,))
 
         if relates_to:
             self._attempt_to_invalidate_cache("get_relations_for_event", (relates_to,))
-- 
cgit 1.5.1


From ccce8cdfc5e567b5b905b58e82a1d725f2647524 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 17 Oct 2022 13:39:12 +0100
Subject: Use Pydantic when PUTting room aliases (#14179)

---
 changelog.d/14179.feature        |  1 +
 synapse/handlers/directory.py    | 19 +++++++------
 synapse/rest/client/directory.py | 58 ++++++++++++++++++++++++----------------
 3 files changed, 47 insertions(+), 31 deletions(-)
 create mode 100644 changelog.d/14179.feature

(limited to 'synapse')

diff --git a/changelog.d/14179.feature b/changelog.d/14179.feature
new file mode 100644
index 0000000000..48f2db91d3
--- /dev/null
+++ b/changelog.d/14179.feature
@@ -0,0 +1 @@
+Improve the validation of the following PUT endpoints: [`/directory/room/{roomAlias}`](https://spec.matrix.org/v1.4/client-server-api/#put_matrixclientv3directoryroomroomalias), [`/directory/list/room/{roomId}`](https://spec.matrix.org/v1.4/client-server-api/#put_matrixclientv3directorylistroomroomid) and [`/directory/list/appservice/{networkId}/{roomId}`](https://spec.matrix.org/v1.4/application-service-api/#put_matrixclientv3directorylistappservicenetworkidroomid).
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 7127d5aefc..d52ebada6b 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -16,6 +16,8 @@ import logging
 import string
 from typing import TYPE_CHECKING, Iterable, List, Optional
 
+from typing_extensions import Literal
+
 from synapse.api.constants import MAX_ALIAS_LENGTH, EventTypes
 from synapse.api.errors import (
     AuthError,
@@ -429,7 +431,10 @@ class DirectoryHandler:
         return await self.auth.check_can_change_room_list(room_id, requester)
 
     async def edit_published_room_list(
-        self, requester: Requester, room_id: str, visibility: str
+        self,
+        requester: Requester,
+        room_id: str,
+        visibility: Literal["public", "private"],
     ) -> None:
         """Edit the entry of the room in the published room list.
 
@@ -451,9 +456,6 @@ class DirectoryHandler:
         if requester.is_guest:
             raise AuthError(403, "Guests cannot edit the published room list")
 
-        if visibility not in ["public", "private"]:
-            raise SynapseError(400, "Invalid visibility setting")
-
         if visibility == "public" and not self.enable_room_list_search:
             # The room list has been disabled.
             raise AuthError(
@@ -505,7 +507,11 @@ class DirectoryHandler:
         await self.store.set_room_is_public(room_id, making_public)
 
     async def edit_published_appservice_room_list(
-        self, appservice_id: str, network_id: str, room_id: str, visibility: str
+        self,
+        appservice_id: str,
+        network_id: str,
+        room_id: str,
+        visibility: Literal["public", "private"],
     ) -> None:
         """Add or remove a room from the appservice/network specific public
         room list.
@@ -516,9 +522,6 @@ class DirectoryHandler:
             room_id
             visibility: either "public" or "private"
         """
-        if visibility not in ["public", "private"]:
-            raise SynapseError(400, "Invalid visibility setting")
-
         await self.store.set_room_is_public_appservice(
             room_id, appservice_id, network_id, visibility == "public"
         )
diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index bc1b18c92d..f17b4c8d22 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -13,15 +13,22 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, List, Optional, Tuple
+
+from pydantic import StrictStr
+from typing_extensions import Literal
 
 from twisted.web.server import Request
 
 from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
-from synapse.http.servlet import RestServlet, parse_json_object_from_request
+from synapse.http.servlet import (
+    RestServlet,
+    parse_and_validate_json_object_from_request,
+)
 from synapse.http.site import SynapseRequest
 from synapse.rest.client._base import client_patterns
+from synapse.rest.models import RequestBodyModel
 from synapse.types import JsonDict, RoomAlias
 
 if TYPE_CHECKING:
@@ -54,6 +61,12 @@ class ClientDirectoryServer(RestServlet):
 
         return 200, res
 
+    class PutBody(RequestBodyModel):
+        # TODO: get Pydantic to validate that this is a valid room id?
+        room_id: StrictStr
+        # `servers` is unspecced
+        servers: Optional[List[StrictStr]] = None
+
     async def on_PUT(
         self, request: SynapseRequest, room_alias: str
     ) -> Tuple[int, JsonDict]:
@@ -61,31 +74,22 @@ class ClientDirectoryServer(RestServlet):
             raise SynapseError(400, "Room alias invalid", errcode=Codes.INVALID_PARAM)
         room_alias_obj = RoomAlias.from_string(room_alias)
 
-        content = parse_json_object_from_request(request)
-        if "room_id" not in content:
-            raise SynapseError(
-                400, 'Missing params: ["room_id"]', errcode=Codes.BAD_JSON
-            )
+        content = parse_and_validate_json_object_from_request(request, self.PutBody)
 
         logger.debug("Got content: %s", content)
         logger.debug("Got room name: %s", room_alias_obj.to_string())
 
-        room_id = content["room_id"]
-        servers = content["servers"] if "servers" in content else None
-
-        logger.debug("Got room_id: %s", room_id)
-        logger.debug("Got servers: %s", servers)
+        logger.debug("Got room_id: %s", content.room_id)
+        logger.debug("Got servers: %s", content.servers)
 
-        # TODO(erikj): Check types.
-
-        room = await self.store.get_room(room_id)
+        room = await self.store.get_room(content.room_id)
         if room is None:
             raise SynapseError(400, "Room does not exist")
 
         requester = await self.auth.get_user_by_req(request)
 
         await self.directory_handler.create_association(
-            requester, room_alias_obj, room_id, servers
+            requester, room_alias_obj, content.room_id, content.servers
         )
 
         return 200, {}
@@ -137,16 +141,18 @@ class ClientDirectoryListServer(RestServlet):
 
         return 200, {"visibility": "public" if room["is_public"] else "private"}
 
+    class PutBody(RequestBodyModel):
+        visibility: Literal["public", "private"] = "public"
+
     async def on_PUT(
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
-        content = parse_json_object_from_request(request)
-        visibility = content.get("visibility", "public")
+        content = parse_and_validate_json_object_from_request(request, self.PutBody)
 
         await self.directory_handler.edit_published_room_list(
-            requester, room_id, visibility
+            requester, room_id, content.visibility
         )
 
         return 200, {}
@@ -163,12 +169,14 @@ class ClientAppserviceDirectoryListServer(RestServlet):
         self.directory_handler = hs.get_directory_handler()
         self.auth = hs.get_auth()
 
+    class PutBody(RequestBodyModel):
+        visibility: Literal["public", "private"] = "public"
+
     async def on_PUT(
         self, request: SynapseRequest, network_id: str, room_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-        visibility = content.get("visibility", "public")
-        return await self._edit(request, network_id, room_id, visibility)
+        content = parse_and_validate_json_object_from_request(request, self.PutBody)
+        return await self._edit(request, network_id, room_id, content.visibility)
 
     async def on_DELETE(
         self, request: SynapseRequest, network_id: str, room_id: str
@@ -176,7 +184,11 @@ class ClientAppserviceDirectoryListServer(RestServlet):
         return await self._edit(request, network_id, room_id, "private")
 
     async def _edit(
-        self, request: SynapseRequest, network_id: str, room_id: str, visibility: str
+        self,
+        request: SynapseRequest,
+        network_id: str,
+        room_id: str,
+        visibility: Literal["public", "private"],
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
         if not requester.app_service:
-- 
cgit 1.5.1


From 4283bd1cf9c3da2157c3642a7c4f105e9fac2636 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 17 Oct 2022 11:32:11 -0400
Subject: Support filtering the /messages API by relation type (MSC3874).
 (#14148)

Gated behind an experimental configuration flag.
---
 changelog.d/14148.feature                |   1 +
 synapse/api/filtering.py                 |  27 +++++-
 synapse/config/experimental.py           |   3 +
 synapse/rest/client/versions.py          |   2 +
 synapse/storage/databases/main/stream.py |  29 ++++++-
 tests/api/test_filtering.py              |  63 +++++++++++++-
 tests/rest/client/test_relations.py      |   1 -
 tests/rest/client/test_rooms.py          | 145 ++-----------------------------
 tests/storage/test_stream.py             | 118 ++++++++++++++++++-------
 9 files changed, 212 insertions(+), 177 deletions(-)
 create mode 100644 changelog.d/14148.feature

(limited to 'synapse')

diff --git a/changelog.d/14148.feature b/changelog.d/14148.feature
new file mode 100644
index 0000000000..951d0cac80
--- /dev/null
+++ b/changelog.d/14148.feature
@@ -0,0 +1 @@
+Experimental support for [MSC3874](https://github.com/matrix-org/matrix-spec-proposals/pull/3874).
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index cc31cf8cc7..26be377d03 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -36,7 +36,7 @@ from jsonschema import FormatChecker
 from synapse.api.constants import EduTypes, EventContentFields
 from synapse.api.errors import SynapseError
 from synapse.api.presence import UserPresenceState
-from synapse.events import EventBase
+from synapse.events import EventBase, relation_from_event
 from synapse.types import JsonDict, RoomID, UserID
 
 if TYPE_CHECKING:
@@ -53,6 +53,12 @@ FILTER_SCHEMA = {
         # check types are valid event types
         "types": {"type": "array", "items": {"type": "string"}},
         "not_types": {"type": "array", "items": {"type": "string"}},
+        # MSC3874, filtering /messages.
+        "org.matrix.msc3874.rel_types": {"type": "array", "items": {"type": "string"}},
+        "org.matrix.msc3874.not_rel_types": {
+            "type": "array",
+            "items": {"type": "string"},
+        },
     },
 }
 
@@ -334,8 +340,15 @@ class Filter:
         self.labels = filter_json.get("org.matrix.labels", None)
         self.not_labels = filter_json.get("org.matrix.not_labels", [])
 
-        self.related_by_senders = self.filter_json.get("related_by_senders", None)
-        self.related_by_rel_types = self.filter_json.get("related_by_rel_types", None)
+        self.related_by_senders = filter_json.get("related_by_senders", None)
+        self.related_by_rel_types = filter_json.get("related_by_rel_types", None)
+
+        # For compatibility with _check_fields.
+        self.rel_types = None
+        self.not_rel_types = []
+        if hs.config.experimental.msc3874_enabled:
+            self.rel_types = filter_json.get("org.matrix.msc3874.rel_types", None)
+            self.not_rel_types = filter_json.get("org.matrix.msc3874.not_rel_types", [])
 
     def filters_all_types(self) -> bool:
         return "*" in self.not_types
@@ -386,11 +399,19 @@ class Filter:
             # check if there is a string url field in the content for filtering purposes
             labels = content.get(EventContentFields.LABELS, [])
 
+            # Check if the event has a relation.
+            rel_type = None
+            if isinstance(event, EventBase):
+                relation = relation_from_event(event)
+                if relation:
+                    rel_type = relation.rel_type
+
             field_matchers = {
                 "rooms": lambda v: room_id == v,
                 "senders": lambda v: sender == v,
                 "types": lambda v: _matches_wildcard(ev_type, v),
                 "labels": lambda v: v in labels,
+                "rel_types": lambda v: rel_type == v,
             }
 
             result = self._check_fields(field_matchers)
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index f44655516e..f9a49451d8 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -117,3 +117,6 @@ class ExperimentalConfig(Config):
         self.msc3882_token_timeout = self.parse_duration(
             experimental.get("msc3882_token_timeout", "5m")
         )
+
+        # MSC3874: Filtering /messages with rel_types / not_rel_types.
+        self.msc3874_enabled: bool = experimental.get("msc3874_enabled", False)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 4e1fd2bbe7..4b87ee978a 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -114,6 +114,8 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3882": self.config.experimental.msc3882_enabled,
                     # Adds support for remotely enabling/disabling pushers, as per MSC3881
                     "org.matrix.msc3881": self.config.experimental.msc3881_enabled,
+                    # Adds support for filtering /messages by event relation.
+                    "org.matrix.msc3874": self.config.experimental.msc3874_enabled,
                 },
             },
         )
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 5baffbfe55..09ce855aa8 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -357,6 +357,24 @@ def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]:
         )
         args.extend(event_filter.related_by_rel_types)
 
+    if event_filter.rel_types:
+        clauses.append(
+            "(%s)"
+            % " OR ".join(
+                "event_relation.relation_type = ?" for _ in event_filter.rel_types
+            )
+        )
+        args.extend(event_filter.rel_types)
+
+    if event_filter.not_rel_types:
+        clauses.append(
+            "((%s) OR event_relation.relation_type IS NULL)"
+            % " AND ".join(
+                "event_relation.relation_type != ?" for _ in event_filter.not_rel_types
+            )
+        )
+        args.extend(event_filter.not_rel_types)
+
     return " AND ".join(clauses), args
 
 
@@ -1278,8 +1296,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
                 # Multiple labels could cause the same event to appear multiple times.
                 needs_distinct = True
 
-        # If there is a filter on relation_senders and relation_types join to the
-        # relations table.
+        # If there is a relation_senders and relation_types filter join to the
+        # relations table to get events related to the current event.
         if event_filter and (
             event_filter.related_by_senders or event_filter.related_by_rel_types
         ):
@@ -1294,6 +1312,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
                     LEFT JOIN events AS related_event ON (relation.event_id = related_event.event_id)
                 """
 
+        # If there is a not_rel_types filter join to the relations table to get
+        # the event's relation information.
+        if event_filter and (event_filter.rel_types or event_filter.not_rel_types):
+            join_clause += """
+                LEFT JOIN event_relations AS event_relation USING (event_id)
+            """
+
         if needs_distinct:
             select_keywords += " DISTINCT"
 
diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py
index a269c477fb..a82c4eed86 100644
--- a/tests/api/test_filtering.py
+++ b/tests/api/test_filtering.py
@@ -35,6 +35,8 @@ def MockEvent(**kwargs):
         kwargs["event_id"] = "fake_event_id"
     if "type" not in kwargs:
         kwargs["type"] = "fake_type"
+    if "content" not in kwargs:
+        kwargs["content"] = {}
     return make_event_from_dict(kwargs)
 
 
@@ -357,6 +359,66 @@ class FilteringTestCase(unittest.HomeserverTestCase):
 
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
+    @unittest.override_config({"experimental_features": {"msc3874_enabled": True}})
+    def test_filter_rel_type(self):
+        definition = {"org.matrix.msc3874.rel_types": ["m.thread"]}
+        event = MockEvent(
+            sender="@foo:bar",
+            type="m.room.message",
+            room_id="!secretbase:unknown",
+            content={},
+        )
+
+        self.assertFalse(Filter(self.hs, definition)._check(event))
+
+        event = MockEvent(
+            sender="@foo:bar",
+            type="m.room.message",
+            room_id="!secretbase:unknown",
+            content={"m.relates_to": {"event_id": "$abc", "rel_type": "m.reference"}},
+        )
+
+        self.assertFalse(Filter(self.hs, definition)._check(event))
+
+        event = MockEvent(
+            sender="@foo:bar",
+            type="m.room.message",
+            room_id="!secretbase:unknown",
+            content={"m.relates_to": {"event_id": "$abc", "rel_type": "m.thread"}},
+        )
+
+        self.assertTrue(Filter(self.hs, definition)._check(event))
+
+    @unittest.override_config({"experimental_features": {"msc3874_enabled": True}})
+    def test_filter_not_rel_type(self):
+        definition = {"org.matrix.msc3874.not_rel_types": ["m.thread"]}
+        event = MockEvent(
+            sender="@foo:bar",
+            type="m.room.message",
+            room_id="!secretbase:unknown",
+            content={"m.relates_to": {"event_id": "$abc", "rel_type": "m.thread"}},
+        )
+
+        self.assertFalse(Filter(self.hs, definition)._check(event))
+
+        event = MockEvent(
+            sender="@foo:bar",
+            type="m.room.message",
+            room_id="!secretbase:unknown",
+            content={},
+        )
+
+        self.assertTrue(Filter(self.hs, definition)._check(event))
+
+        event = MockEvent(
+            sender="@foo:bar",
+            type="m.room.message",
+            room_id="!secretbase:unknown",
+            content={"m.relates_to": {"event_id": "$abc", "rel_type": "m.reference"}},
+        )
+
+        self.assertTrue(Filter(self.hs, definition)._check(event))
+
     def test_filter_presence_match(self):
         user_filter_json = {"presence": {"types": ["m.*"]}}
         filter_id = self.get_success(
@@ -456,7 +518,6 @@ class FilteringTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(filtered_room_ids, ["!allowed:example.com"])
 
-    @unittest.override_config({"experimental_features": {"msc3440_enabled": True}})
     def test_filter_relations(self):
         events = [
             # An event without a relation.
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index f5c1070b2c..ddf315b894 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -1677,7 +1677,6 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
             {"chunk": [{"type": "m.reaction", "key": "👍", "count": 1}]},
         )
 
-    @unittest.override_config({"experimental_features": {"msc3440_enabled": True}})
     def test_redact_parent_thread(self) -> None:
         """
         Test that thread replies are still available when the root event is redacted.
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 3612ebe7b9..71b1637be8 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -35,7 +35,6 @@ from synapse.api.constants import (
     EventTypes,
     Membership,
     PublicRoomsFilterFields,
-    RelationTypes,
     RoomTypes,
 )
 from synapse.api.errors import Codes, HttpResponseException
@@ -50,6 +49,7 @@ from synapse.util.stringutils import random_string
 
 from tests import unittest
 from tests.http.server._base import make_request_with_cancellation_test
+from tests.storage.test_stream import PaginationTestCase
 from tests.test_utils import make_awaitable
 
 PATH_PREFIX = b"/_matrix/client/api/v1"
@@ -2915,149 +2915,20 @@ class LabelsTestCase(unittest.HomeserverTestCase):
         return event_id
 
 
-class RelationsTestCase(unittest.HomeserverTestCase):
-    servlets = [
-        synapse.rest.admin.register_servlets_for_client_rest_resource,
-        room.register_servlets,
-        login.register_servlets,
-    ]
-
-    def default_config(self) -> Dict[str, Any]:
-        config = super().default_config()
-        config["experimental_features"] = {"msc3440_enabled": True}
-        return config
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.user_id = self.register_user("test", "test")
-        self.tok = self.login("test", "test")
-        self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok)
-
-        self.second_user_id = self.register_user("second", "test")
-        self.second_tok = self.login("second", "test")
-        self.helper.join(
-            room=self.room_id, user=self.second_user_id, tok=self.second_tok
-        )
-
-        self.third_user_id = self.register_user("third", "test")
-        self.third_tok = self.login("third", "test")
-        self.helper.join(room=self.room_id, user=self.third_user_id, tok=self.third_tok)
-
-        # An initial event with a relation from second user.
-        res = self.helper.send_event(
-            room_id=self.room_id,
-            type=EventTypes.Message,
-            content={"msgtype": "m.text", "body": "Message 1"},
-            tok=self.tok,
-        )
-        self.event_id_1 = res["event_id"]
-        self.helper.send_event(
-            room_id=self.room_id,
-            type="m.reaction",
-            content={
-                "m.relates_to": {
-                    "rel_type": RelationTypes.ANNOTATION,
-                    "event_id": self.event_id_1,
-                    "key": "👍",
-                }
-            },
-            tok=self.second_tok,
-        )
-
-        # Another event with a relation from third user.
-        res = self.helper.send_event(
-            room_id=self.room_id,
-            type=EventTypes.Message,
-            content={"msgtype": "m.text", "body": "Message 2"},
-            tok=self.tok,
-        )
-        self.event_id_2 = res["event_id"]
-        self.helper.send_event(
-            room_id=self.room_id,
-            type="m.reaction",
-            content={
-                "m.relates_to": {
-                    "rel_type": RelationTypes.REFERENCE,
-                    "event_id": self.event_id_2,
-                }
-            },
-            tok=self.third_tok,
-        )
-
-        # An event with no relations.
-        self.helper.send_event(
-            room_id=self.room_id,
-            type=EventTypes.Message,
-            content={"msgtype": "m.text", "body": "No relations"},
-            tok=self.tok,
-        )
-
-    def _filter_messages(self, filter: JsonDict) -> List[JsonDict]:
+class RelationsTestCase(PaginationTestCase):
+    def _filter_messages(self, filter: JsonDict) -> List[str]:
         """Make a request to /messages with a filter, returns the chunk of events."""
+        from_token = self.get_success(
+            self.from_token.to_string(self.hs.get_datastores().main)
+        )
         channel = self.make_request(
             "GET",
-            "/rooms/%s/messages?filter=%s&dir=b" % (self.room_id, json.dumps(filter)),
+            f"/rooms/{self.room_id}/messages?filter={json.dumps(filter)}&dir=f&from={from_token}",
             access_token=self.tok,
         )
         self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
 
-        return channel.json_body["chunk"]
-
-    def test_filter_relation_senders(self) -> None:
-        # Messages which second user reacted to.
-        filter = {"related_by_senders": [self.second_user_id]}
-        chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0]["event_id"], self.event_id_1)
-
-        # Messages which third user reacted to.
-        filter = {"related_by_senders": [self.third_user_id]}
-        chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0]["event_id"], self.event_id_2)
-
-        # Messages which either user reacted to.
-        filter = {"related_by_senders": [self.second_user_id, self.third_user_id]}
-        chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 2, chunk)
-        self.assertCountEqual(
-            [c["event_id"] for c in chunk], [self.event_id_1, self.event_id_2]
-        )
-
-    def test_filter_relation_type(self) -> None:
-        # Messages which have annotations.
-        filter = {"related_by_rel_types": [RelationTypes.ANNOTATION]}
-        chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0]["event_id"], self.event_id_1)
-
-        # Messages which have references.
-        filter = {"related_by_rel_types": [RelationTypes.REFERENCE]}
-        chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0]["event_id"], self.event_id_2)
-
-        # Messages which have either annotations or references.
-        filter = {
-            "related_by_rel_types": [
-                RelationTypes.ANNOTATION,
-                RelationTypes.REFERENCE,
-            ]
-        }
-        chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 2, chunk)
-        self.assertCountEqual(
-            [c["event_id"] for c in chunk], [self.event_id_1, self.event_id_2]
-        )
-
-    def test_filter_relation_senders_and_type(self) -> None:
-        # Messages which second user reacted to.
-        filter = {
-            "related_by_senders": [self.second_user_id],
-            "related_by_rel_types": [RelationTypes.ANNOTATION],
-        }
-        chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0]["event_id"], self.event_id_1)
+        return [ev["event_id"] for ev in channel.json_body["chunk"]]
 
 
 class ContextTestCase(unittest.HomeserverTestCase):
diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py
index 78663a53fe..34fa810cf6 100644
--- a/tests/storage/test_stream.py
+++ b/tests/storage/test_stream.py
@@ -16,7 +16,6 @@ from typing import List
 
 from synapse.api.constants import EventTypes, RelationTypes
 from synapse.api.filtering import Filter
-from synapse.events import EventBase
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.types import JsonDict
@@ -40,7 +39,7 @@ class PaginationTestCase(HomeserverTestCase):
 
     def default_config(self):
         config = super().default_config()
-        config["experimental_features"] = {"msc3440_enabled": True}
+        config["experimental_features"] = {"msc3874_enabled": True}
         return config
 
     def prepare(self, reactor, clock, homeserver):
@@ -58,6 +57,11 @@ class PaginationTestCase(HomeserverTestCase):
         self.third_tok = self.login("third", "test")
         self.helper.join(room=self.room_id, user=self.third_user_id, tok=self.third_tok)
 
+        # Store a token which is after all the room creation events.
+        self.from_token = self.get_success(
+            self.hs.get_event_sources().get_current_token_for_pagination(self.room_id)
+        )
+
         # An initial event with a relation from second user.
         res = self.helper.send_event(
             room_id=self.room_id,
@@ -66,7 +70,7 @@ class PaginationTestCase(HomeserverTestCase):
             tok=self.tok,
         )
         self.event_id_1 = res["event_id"]
-        self.helper.send_event(
+        res = self.helper.send_event(
             room_id=self.room_id,
             type="m.reaction",
             content={
@@ -78,6 +82,7 @@ class PaginationTestCase(HomeserverTestCase):
             },
             tok=self.second_tok,
         )
+        self.event_id_annotation = res["event_id"]
 
         # Another event with a relation from third user.
         res = self.helper.send_event(
@@ -87,7 +92,7 @@ class PaginationTestCase(HomeserverTestCase):
             tok=self.tok,
         )
         self.event_id_2 = res["event_id"]
-        self.helper.send_event(
+        res = self.helper.send_event(
             room_id=self.room_id,
             type="m.reaction",
             content={
@@ -98,68 +103,59 @@ class PaginationTestCase(HomeserverTestCase):
             },
             tok=self.third_tok,
         )
+        self.event_id_reference = res["event_id"]
 
         # An event with no relations.
-        self.helper.send_event(
+        res = self.helper.send_event(
             room_id=self.room_id,
             type=EventTypes.Message,
             content={"msgtype": "m.text", "body": "No relations"},
             tok=self.tok,
         )
+        self.event_id_none = res["event_id"]
 
-    def _filter_messages(self, filter: JsonDict) -> List[EventBase]:
+    def _filter_messages(self, filter: JsonDict) -> List[str]:
         """Make a request to /messages with a filter, returns the chunk of events."""
 
-        from_token = self.get_success(
-            self.hs.get_event_sources().get_current_token_for_pagination(self.room_id)
-        )
-
         events, next_key = self.get_success(
             self.hs.get_datastores().main.paginate_room_events(
                 room_id=self.room_id,
-                from_key=from_token.room_key,
+                from_key=self.from_token.room_key,
                 to_key=None,
-                direction="b",
+                direction="f",
                 limit=10,
                 event_filter=Filter(self.hs, filter),
             )
         )
 
-        return events
+        return [ev.event_id for ev in events]
 
     def test_filter_relation_senders(self):
         # Messages which second user reacted to.
         filter = {"related_by_senders": [self.second_user_id]}
         chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0].event_id, self.event_id_1)
+        self.assertEqual(chunk, [self.event_id_1])
 
         # Messages which third user reacted to.
         filter = {"related_by_senders": [self.third_user_id]}
         chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0].event_id, self.event_id_2)
+        self.assertEqual(chunk, [self.event_id_2])
 
         # Messages which either user reacted to.
         filter = {"related_by_senders": [self.second_user_id, self.third_user_id]}
         chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 2, chunk)
-        self.assertCountEqual(
-            [c.event_id for c in chunk], [self.event_id_1, self.event_id_2]
-        )
+        self.assertCountEqual(chunk, [self.event_id_1, self.event_id_2])
 
     def test_filter_relation_type(self):
         # Messages which have annotations.
         filter = {"related_by_rel_types": [RelationTypes.ANNOTATION]}
         chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0].event_id, self.event_id_1)
+        self.assertEqual(chunk, [self.event_id_1])
 
         # Messages which have references.
         filter = {"related_by_rel_types": [RelationTypes.REFERENCE]}
         chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0].event_id, self.event_id_2)
+        self.assertEqual(chunk, [self.event_id_2])
 
         # Messages which have either annotations or references.
         filter = {
@@ -169,10 +165,7 @@ class PaginationTestCase(HomeserverTestCase):
             ]
         }
         chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 2, chunk)
-        self.assertCountEqual(
-            [c.event_id for c in chunk], [self.event_id_1, self.event_id_2]
-        )
+        self.assertCountEqual(chunk, [self.event_id_1, self.event_id_2])
 
     def test_filter_relation_senders_and_type(self):
         # Messages which second user reacted to.
@@ -181,8 +174,7 @@ class PaginationTestCase(HomeserverTestCase):
             "related_by_rel_types": [RelationTypes.ANNOTATION],
         }
         chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0].event_id, self.event_id_1)
+        self.assertEqual(chunk, [self.event_id_1])
 
     def test_duplicate_relation(self):
         """An event should only be returned once if there are multiple relations to it."""
@@ -201,5 +193,65 @@ class PaginationTestCase(HomeserverTestCase):
 
         filter = {"related_by_senders": [self.second_user_id]}
         chunk = self._filter_messages(filter)
-        self.assertEqual(len(chunk), 1, chunk)
-        self.assertEqual(chunk[0].event_id, self.event_id_1)
+        self.assertEqual(chunk, [self.event_id_1])
+
+    def test_filter_rel_types(self) -> None:
+        # Messages which are annotations.
+        filter = {"org.matrix.msc3874.rel_types": [RelationTypes.ANNOTATION]}
+        chunk = self._filter_messages(filter)
+        self.assertEqual(chunk, [self.event_id_annotation])
+
+        # Messages which are references.
+        filter = {"org.matrix.msc3874.rel_types": [RelationTypes.REFERENCE]}
+        chunk = self._filter_messages(filter)
+        self.assertEqual(chunk, [self.event_id_reference])
+
+        # Messages which are either annotations or references.
+        filter = {
+            "org.matrix.msc3874.rel_types": [
+                RelationTypes.ANNOTATION,
+                RelationTypes.REFERENCE,
+            ]
+        }
+        chunk = self._filter_messages(filter)
+        self.assertCountEqual(
+            chunk,
+            [self.event_id_annotation, self.event_id_reference],
+        )
+
+    def test_filter_not_rel_types(self) -> None:
+        # Messages which are not annotations.
+        filter = {"org.matrix.msc3874.not_rel_types": [RelationTypes.ANNOTATION]}
+        chunk = self._filter_messages(filter)
+        self.assertEqual(
+            chunk,
+            [
+                self.event_id_1,
+                self.event_id_2,
+                self.event_id_reference,
+                self.event_id_none,
+            ],
+        )
+
+        # Messages which are not references.
+        filter = {"org.matrix.msc3874.not_rel_types": [RelationTypes.REFERENCE]}
+        chunk = self._filter_messages(filter)
+        self.assertEqual(
+            chunk,
+            [
+                self.event_id_1,
+                self.event_id_annotation,
+                self.event_id_2,
+                self.event_id_none,
+            ],
+        )
+
+        # Messages which are neither annotations or references.
+        filter = {
+            "org.matrix.msc3874.not_rel_types": [
+                RelationTypes.ANNOTATION,
+                RelationTypes.REFERENCE,
+            ]
+        }
+        chunk = self._filter_messages(filter)
+        self.assertEqual(chunk, [self.event_id_1, self.event_id_2, self.event_id_none])
-- 
cgit 1.5.1


From 2c63cdcc3f1aa4625e947de3c23e0a8133c61286 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 17 Oct 2022 16:02:39 -0500
Subject: Add debug logs to figure out why an event was filtered (#14095)

Spawned while investigating https://github.com/matrix-org/synapse/issues/13944

This way we might get some more context whenever an `403 Forbidden - body: {"errcode":"M_FORBIDDEN","error":"You don't have permission to access that event."}` error is produced.

`log_config.yaml`
```yaml
loggers:
    synapse:
        level: INFO

    synapse.visibility:
        level: DEBUG
```
---
 changelog.d/14095.misc |  1 +
 synapse/visibility.py  | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14095.misc

(limited to 'synapse')

diff --git a/changelog.d/14095.misc b/changelog.d/14095.misc
new file mode 100644
index 0000000000..3483201d5f
--- /dev/null
+++ b/changelog.d/14095.misc
@@ -0,0 +1 @@
+Add debug logs to figure out why an event was filtered out of the client response.
diff --git a/synapse/visibility.py b/synapse/visibility.py
index c4048d2477..40a9c5b53f 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -84,7 +84,15 @@ async def filter_events_for_client(
     """
     # Filter out events that have been soft failed so that we don't relay them
     # to clients.
+    events_before_filtering = events
     events = [e for e in events if not e.internal_metadata.is_soft_failed()]
+    if len(events_before_filtering) != len(events):
+        if logger.isEnabledFor(logging.DEBUG):
+            logger.debug(
+                "filter_events_for_client: Filtered out soft-failed events: Before=%s, After=%s",
+                [event.event_id for event in events_before_filtering],
+                [event.event_id for event in events],
+            )
 
     types = (_HISTORY_VIS_KEY, (EventTypes.Member, user_id))
 
@@ -301,6 +309,10 @@ def _check_client_allowed_to_see_event(
             _check_filter_send_to_client(event, clock, retention_policy, sender_ignored)
             == _CheckFilter.DENIED
         ):
+            logger.debug(
+                "_check_client_allowed_to_see_event(event=%s): Filtered out event because `_check_filter_send_to_client` returned `_CheckFilter.DENIED`",
+                event.event_id,
+            )
             return None
 
     if event.event_id in always_include_ids:
@@ -312,9 +324,17 @@ def _check_client_allowed_to_see_event(
         # for out-of-band membership events (eg, incoming invites, or rejections of
         # said invite) for the user themselves.
         if event.type == EventTypes.Member and event.state_key == user_id:
-            logger.debug("Returning out-of-band-membership event %s", event)
+            logger.debug(
+                "_check_client_allowed_to_see_event(event=%s): Returning out-of-band-membership event %s",
+                event.event_id,
+                event,
+            )
             return event
 
+        logger.debug(
+            "_check_client_allowed_to_see_event(event=%s): Filtered out event because it's an outlier",
+            event.event_id,
+        )
         return None
 
     if state is None:
@@ -337,11 +357,21 @@ def _check_client_allowed_to_see_event(
 
     membership_result = _check_membership(user_id, event, visibility, state, is_peeking)
     if not membership_result.allowed:
+        logger.debug(
+            "_check_client_allowed_to_see_event(event=%s): Filtered out event because the user can't see the event because of their membership, membership_result.allowed=%s membership_result.joined=%s",
+            event.event_id,
+            membership_result.allowed,
+            membership_result.joined,
+        )
         return None
 
     # If the sender has been erased and the user was not joined at the time, we
     # must only return the redacted form.
     if sender_erased and not membership_result.joined:
+        logger.debug(
+            "_check_client_allowed_to_see_event(event=%s): Returning pruned event because `sender_erased` and the user was not joined at the time",
+            event.event_id,
+        )
         event = prune_event(event)
 
     return event
-- 
cgit 1.5.1


From 828b5502cfdf4f1b20750941714ce95cdb242f0d Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 18 Oct 2022 10:33:21 +0100
Subject: Remove `_get_events_cache` check optimisation from
 `_have_seen_events_dict` (#14161)

---
 changelog.d/14161.bugfix                           |  1 +
 synapse/storage/databases/main/events_worker.py    | 31 +++++++++-------------
 tests/storage/databases/main/test_events_worker.py | 12 ---------
 3 files changed, 14 insertions(+), 30 deletions(-)
 create mode 100644 changelog.d/14161.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14161.bugfix b/changelog.d/14161.bugfix
new file mode 100644
index 0000000000..aed4d9e386
--- /dev/null
+++ b/changelog.d/14161.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.30.0 where purging and rejoining a room without restarting in-between would result in a broken room.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index d4104462b5..cfd4780add 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1502,21 +1502,15 @@ class EventsWorkerStore(SQLBaseStore):
         Returns:
              a dict {event_id -> bool}
         """
-        # if the event cache contains the event, obviously we've seen it.
-
-        cache_results = {
-            event_id
-            for event_id in event_ids
-            if await self._get_event_cache.contains((event_id,))
-        }
-        results = dict.fromkeys(cache_results, True)
-        remaining = [
-            event_id for event_id in event_ids if event_id not in cache_results
-        ]
-        if not remaining:
-            return results
+        # TODO: We used to query the _get_event_cache here as a fast-path before
+        #  hitting the database. For if an event were in the cache, we've presumably
+        #  seen it before.
+        #
+        #  But this is currently an invalid assumption due to the _get_event_cache
+        #  not being invalidated when purging events from a room. The optimisation can
+        #  be re-added after https://github.com/matrix-org/synapse/issues/13476
 
-        def have_seen_events_txn(txn: LoggingTransaction) -> None:
+        def have_seen_events_txn(txn: LoggingTransaction) -> Dict[str, bool]:
             # we deliberately do *not* query the database for room_id, to make the
             # query an index-only lookup on `events_event_id_key`.
             #
@@ -1524,16 +1518,17 @@ class EventsWorkerStore(SQLBaseStore):
 
             sql = "SELECT event_id FROM events AS e WHERE "
             clause, args = make_in_list_sql_clause(
-                txn.database_engine, "e.event_id", remaining
+                txn.database_engine, "e.event_id", event_ids
             )
             txn.execute(sql + clause, args)
             found_events = {eid for eid, in txn}
 
             # ... and then we can update the results for each key
-            results.update({eid: (eid in found_events) for eid in remaining})
+            return {eid: (eid in found_events) for eid in event_ids}
 
-        await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn)
-        return results
+        return await self.db_pool.runInteraction(
+            "have_seen_events", have_seen_events_txn
+        )
 
     @cached(max_entries=100000, tree=True)
     async def have_seen_event(self, room_id: str, event_id: str) -> bool:
diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py
index 32a798d74b..5773172ab8 100644
--- a/tests/storage/databases/main/test_events_worker.py
+++ b/tests/storage/databases/main/test_events_worker.py
@@ -90,18 +90,6 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
             self.assertEqual(res, {self.event_ids[0]})
             self.assertEqual(ctx.get_resource_usage().db_txn_count, 0)
 
-    def test_query_via_event_cache(self):
-        # fetch an event into the event cache
-        self.get_success(self.store.get_event(self.event_ids[0]))
-
-        # looking it up should now cause no db hits
-        with LoggingContext(name="test") as ctx:
-            res = self.get_success(
-                self.store.have_seen_events(self.room_id, [self.event_ids[0]])
-            )
-            self.assertEqual(res, {self.event_ids[0]})
-            self.assertEqual(ctx.get_resource_usage().db_txn_count, 0)
-
     def test_persisting_event_invalidates_cache(self):
         """
         Test to make sure that the `have_seen_event` cache
-- 
cgit 1.5.1


From dc02d9f8c54576d4b41ce51a2704fdd43b582d66 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 18 Oct 2022 10:33:35 +0100
Subject: Avoid checking the event cache when backfilling events (#14164)

---
 changelog.d/14164.bugfix                        |   1 +
 synapse/handlers/federation_event.py            |  47 ++++++++---
 synapse/storage/databases/main/events_worker.py |   2 +-
 tests/handlers/test_federation.py               | 105 +++++++++++++++++++++++-
 4 files changed, 140 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/14164.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14164.bugfix b/changelog.d/14164.bugfix
new file mode 100644
index 0000000000..aed4d9e386
--- /dev/null
+++ b/changelog.d/14164.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.30.0 where purging and rejoining a room without restarting in-between would result in a broken room.
\ No newline at end of file
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 4300e8dd40..06e41b5cc0 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -798,9 +798,42 @@ class FederationEventHandler:
             ],
         )
 
+        # Check if we already any of these have these events.
+        # Note: we currently make a lookup in the database directly here rather than
+        # checking the event cache, due to:
+        # https://github.com/matrix-org/synapse/issues/13476
+        existing_events_map = await self._store._get_events_from_db(
+            [event.event_id for event in events]
+        )
+
+        new_events = []
+        for event in events:
+            event_id = event.event_id
+
+            # If we've already seen this event ID...
+            if event_id in existing_events_map:
+                existing_event = existing_events_map[event_id]
+
+                # ...and the event itself was not previously stored as an outlier...
+                if not existing_event.event.internal_metadata.is_outlier():
+                    # ...then there's no need to persist it. We have it already.
+                    logger.info(
+                        "_process_pulled_event: Ignoring received event %s which we "
+                        "have already seen",
+                        event.event_id,
+                    )
+                    continue
+
+                # While we have seen this event before, it was stored as an outlier.
+                # We'll now persist it as a non-outlier.
+                logger.info("De-outliering event %s", event_id)
+
+            # Continue on with the events that are new to us.
+            new_events.append(event)
+
         # We want to sort these by depth so we process them and
         # tell clients about them in order.
-        sorted_events = sorted(events, key=lambda x: x.depth)
+        sorted_events = sorted(new_events, key=lambda x: x.depth)
         for ev in sorted_events:
             with nested_logging_context(ev.event_id):
                 await self._process_pulled_event(origin, ev, backfilled=backfilled)
@@ -852,18 +885,6 @@ class FederationEventHandler:
 
         event_id = event.event_id
 
-        existing = await self._store.get_event(
-            event_id, allow_none=True, allow_rejected=True
-        )
-        if existing:
-            if not existing.internal_metadata.is_outlier():
-                logger.info(
-                    "_process_pulled_event: Ignoring received event %s which we have already seen",
-                    event_id,
-                )
-                return
-            logger.info("De-outliering event %s", event_id)
-
         try:
             self._sanity_check_event(event)
         except SynapseError as err:
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index cfd4780add..7bc7f2f33e 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -374,7 +374,7 @@ class EventsWorkerStore(SQLBaseStore):
                 If there is a mismatch, behave as per allow_none.
 
         Returns:
-            The event, or None if the event was not found.
+            The event, or None if the event was not found and allow_none is `True`.
         """
         if not isinstance(event_id, str):
             raise TypeError("Invalid event event_id %r" % (event_id,))
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index 745750b1d7..d00c69c229 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -19,7 +19,13 @@ from unittest.mock import Mock, patch
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import EventTypes
-from synapse.api.errors import AuthError, Codes, LimitExceededError, SynapseError
+from synapse.api.errors import (
+    AuthError,
+    Codes,
+    LimitExceededError,
+    NotFoundError,
+    SynapseError,
+)
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase, make_event_from_dict
 from synapse.federation.federation_base import event_from_pdu_json
@@ -28,6 +34,7 @@ from synapse.logging.context import LoggingContext, run_in_background
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
+from synapse.storage.databases.main.events_worker import EventCacheEntry
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
@@ -322,6 +329,102 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
             )
         self.get_success(d)
 
+    def test_backfill_ignores_known_events(self) -> None:
+        """
+        Tests that events that we already know about are ignored when backfilling.
+        """
+        # Set up users
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+
+        other_server = "otherserver"
+        other_user = "@otheruser:" + other_server
+
+        # Create a room to backfill events into
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+        room_version = self.get_success(self.store.get_room_version(room_id))
+
+        # Build an event to backfill
+        event = event_from_pdu_json(
+            {
+                "type": EventTypes.Message,
+                "content": {"body": "hello world", "msgtype": "m.text"},
+                "room_id": room_id,
+                "sender": other_user,
+                "depth": 32,
+                "prev_events": [],
+                "auth_events": [],
+                "origin_server_ts": self.clock.time_msec(),
+            },
+            room_version,
+        )
+
+        # Ensure the event is not already in the DB
+        self.get_failure(
+            self.store.get_event(event.event_id),
+            NotFoundError,
+        )
+
+        # Backfill the event and check that it has entered the DB.
+
+        # We mock out the FederationClient.backfill method, to pretend that a remote
+        # server has returned our fake event.
+        federation_client_backfill_mock = Mock(return_value=make_awaitable([event]))
+        self.hs.get_federation_client().backfill = federation_client_backfill_mock
+
+        # We also mock the persist method with a side effect of itself. This allows us
+        # to track when it has been called while preserving its function.
+        persist_events_and_notify_mock = Mock(
+            side_effect=self.hs.get_federation_event_handler().persist_events_and_notify
+        )
+        self.hs.get_federation_event_handler().persist_events_and_notify = (
+            persist_events_and_notify_mock
+        )
+
+        # Small side-tangent. We populate the event cache with the event, even though
+        # it is not yet in the DB. This is an invalid scenario that can currently occur
+        # due to not properly invalidating the event cache.
+        # See https://github.com/matrix-org/synapse/issues/13476.
+        #
+        # As a result, backfill should not rely on the event cache to check whether
+        # we already have an event in the DB.
+        # TODO: Remove this bit when the event cache is properly invalidated.
+        cache_entry = EventCacheEntry(
+            event=event,
+            redacted_event=None,
+        )
+        self.store._get_event_cache.set_local((event.event_id,), cache_entry)
+
+        # We now call FederationEventHandler.backfill (a separate method) to trigger
+        # a backfill request. It should receive the fake event.
+        self.get_success(
+            self.hs.get_federation_event_handler().backfill(
+                other_user,
+                room_id,
+                limit=10,
+                extremities=[],
+            )
+        )
+
+        # Check that our fake event was persisted.
+        persist_events_and_notify_mock.assert_called_once()
+        persist_events_and_notify_mock.reset_mock()
+
+        # Now we repeat the backfill, having the homeserver receive the fake event
+        # again.
+        self.get_success(
+            self.hs.get_federation_event_handler().backfill(
+                other_user,
+                room_id,
+                limit=10,
+                extremities=[],
+            ),
+        )
+
+        # This time, we expect no event persistence to have occurred, as we already
+        # have this event.
+        persist_events_and_notify_mock.assert_not_called()
+
     @unittest.override_config(
         {"rc_invites": {"per_user": {"per_second": 0.5, "burst_count": 3}}}
     )
-- 
cgit 1.5.1


From c3a4780080a5bcb04132283c0f32f7452655792a Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 18 Oct 2022 12:33:18 +0100
Subject: When restarting a partial join resync, prioritise the server which
 actioned a partial join (#14126)

---
 changelog.d/14126.misc                             |  1 +
 synapse/handlers/device.py                         |  5 +-
 synapse/handlers/federation.py                     | 57 +++++++++++++---------
 synapse/storage/database.py                        |  2 +-
 synapse/storage/databases/main/room.py             | 43 +++++++++++++---
 .../delta/73/09partial_joined_via_destination.sql  | 18 +++++++
 6 files changed, 95 insertions(+), 31 deletions(-)
 create mode 100644 changelog.d/14126.misc
 create mode 100644 synapse/storage/schema/main/delta/73/09partial_joined_via_destination.sql

(limited to 'synapse')

diff --git a/changelog.d/14126.misc b/changelog.d/14126.misc
new file mode 100644
index 0000000000..30b3482fbd
--- /dev/null
+++ b/changelog.d/14126.misc
@@ -0,0 +1 @@
+Faster joins: prioritise the server we joined by when restarting a partial join resync.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index f9cc5bddbc..c597639a7f 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -937,7 +937,10 @@ class DeviceListUpdater:
         # Check if we are partially joining any rooms. If so we need to store
         # all device list updates so that we can handle them correctly once we
         # know who is in the room.
-        partial_rooms = await self.store.get_partial_state_rooms_and_servers()
+        # TODO(faster joins): this fetches and processes a bunch of data that we don't
+        # use. Could be replaced by a tighter query e.g.
+        #   SELECT EXISTS(SELECT 1 FROM partial_state_rooms)
+        partial_rooms = await self.store.get_partial_state_room_resync_info()
         if partial_rooms:
             await self.store.add_remote_device_list_to_pending(
                 user_id,
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 5f7e0a1f79..ccc045d36f 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -632,6 +632,7 @@ class FederationHandler:
                     room_id=room_id,
                     servers=ret.servers_in_room,
                     device_lists_stream_id=self.store.get_device_stream_token(),
+                    joined_via=origin,
                 )
 
             try:
@@ -1615,13 +1616,13 @@ class FederationHandler:
         """Resumes resyncing of all partial-state rooms after a restart."""
         assert not self.config.worker.worker_app
 
-        partial_state_rooms = await self.store.get_partial_state_rooms_and_servers()
-        for room_id, servers_in_room in partial_state_rooms.items():
+        partial_state_rooms = await self.store.get_partial_state_room_resync_info()
+        for room_id, resync_info in partial_state_rooms.items():
             run_as_background_process(
                 desc="sync_partial_state_room",
                 func=self._sync_partial_state_room,
-                initial_destination=None,
-                other_destinations=servers_in_room,
+                initial_destination=resync_info.joined_via,
+                other_destinations=resync_info.servers_in_room,
                 room_id=room_id,
             )
 
@@ -1650,28 +1651,12 @@ class FederationHandler:
         #   really leave, that might mean we have difficulty getting the room state over
         #   federation.
         #   https://github.com/matrix-org/synapse/issues/12802
-        #
-        # TODO(faster_joins): we need some way of prioritising which homeservers in
-        #   `other_destinations` to try first, otherwise we'll spend ages trying dead
-        #   homeservers for large rooms.
-        #   https://github.com/matrix-org/synapse/issues/12999
-
-        if initial_destination is None and len(other_destinations) == 0:
-            raise ValueError(
-                f"Cannot resync state of {room_id}: no destinations provided"
-            )
 
         # Make an infinite iterator of destinations to try. Once we find a working
         # destination, we'll stick with it until it flakes.
-        destinations: Collection[str]
-        if initial_destination is not None:
-            # Move `initial_destination` to the front of the list.
-            destinations = list(other_destinations)
-            if initial_destination in destinations:
-                destinations.remove(initial_destination)
-            destinations = [initial_destination] + destinations
-        else:
-            destinations = other_destinations
+        destinations = _prioritise_destinations_for_partial_state_resync(
+            initial_destination, other_destinations, room_id
+        )
         destination_iter = itertools.cycle(destinations)
 
         # `destination` is the current remote homeserver we're pulling from.
@@ -1769,3 +1754,29 @@ class FederationHandler:
                             room_id,
                             destination,
                         )
+
+
+def _prioritise_destinations_for_partial_state_resync(
+    initial_destination: Optional[str],
+    other_destinations: Collection[str],
+    room_id: str,
+) -> Collection[str]:
+    """Work out the order in which we should ask servers to resync events.
+
+    If an `initial_destination` is given, it takes top priority. Otherwise
+    all servers are treated equally.
+
+    :raises ValueError: if no destination is provided at all.
+    """
+    if initial_destination is None and len(other_destinations) == 0:
+        raise ValueError(f"Cannot resync state of {room_id}: no destinations provided")
+
+    if initial_destination is None:
+        return other_destinations
+
+    # Move `initial_destination` to the front of the list.
+    destinations = list(other_destinations)
+    if initial_destination in destinations:
+        destinations.remove(initial_destination)
+    destinations = [initial_destination] + destinations
+    return destinations
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 7bb21f8f81..4717c9728a 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1658,7 +1658,7 @@ class DatabasePool:
             table: string giving the table name
             keyvalues: dict of column names and values to select the row with
             retcol: string giving the name of the column to return
-            allow_none: If true, return None instead of failing if the SELECT
+            allow_none: If true, return None instead of raising StoreError if the SELECT
                 statement returns no rows
             desc: description of the transaction, for logging and metrics
         """
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index e41c99027a..7d97f8f60e 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -97,6 +97,12 @@ class RoomSortOrder(Enum):
     STATE_EVENTS = "state_events"
 
 
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class PartialStateResyncInfo:
+    joined_via: Optional[str]
+    servers_in_room: List[str] = attr.ib(factory=list)
+
+
 class RoomWorkerStore(CacheInvalidationWorkerStore):
     def __init__(
         self,
@@ -1160,17 +1166,29 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             desc="get_partial_state_servers_at_join",
         )
 
-    async def get_partial_state_rooms_and_servers(
+    async def get_partial_state_room_resync_info(
         self,
-    ) -> Mapping[str, Collection[str]]:
-        """Get all rooms containing events with partial state, and the servers known
-        to be in the room.
+    ) -> Mapping[str, PartialStateResyncInfo]:
+        """Get all rooms containing events with partial state, and the information
+        needed to restart a "resync" of those rooms.
 
         Returns:
             A dictionary of rooms with partial state, with room IDs as keys and
             lists of servers in rooms as values.
         """
-        room_servers: Dict[str, List[str]] = {}
+        room_servers: Dict[str, PartialStateResyncInfo] = {}
+
+        rows = await self.db_pool.simple_select_list(
+            table="partial_state_rooms",
+            keyvalues={},
+            retcols=("room_id", "joined_via"),
+            desc="get_server_which_served_partial_join",
+        )
+
+        for row in rows:
+            room_id = row["room_id"]
+            joined_via = row["joined_via"]
+            room_servers[room_id] = PartialStateResyncInfo(joined_via=joined_via)
 
         rows = await self.db_pool.simple_select_list(
             "partial_state_rooms_servers",
@@ -1182,7 +1200,15 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         for row in rows:
             room_id = row["room_id"]
             server_name = row["server_name"]
-            room_servers.setdefault(room_id, []).append(server_name)
+            entry = room_servers.get(room_id)
+            if entry is None:
+                # There is a foreign key constraint which enforces that every room_id in
+                # partial_state_rooms_servers appears in partial_state_rooms. So we
+                # expect `entry` to be non-null. (This reasoning fails if we've
+                # partial-joined between the two SELECTs, but this is unlikely to happen
+                # in practice.)
+                continue
+            entry.servers_in_room.append(server_name)
 
         return room_servers
 
@@ -1827,6 +1853,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         room_id: str,
         servers: Collection[str],
         device_lists_stream_id: int,
+        joined_via: str,
     ) -> None:
         """Mark the given room as containing events with partial state.
 
@@ -1842,6 +1869,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             servers: other servers known to be in the room
             device_lists_stream_id: the device_lists stream ID at the time when we first
                 joined the room.
+            joined_via: the server name we requested a partial join from.
         """
         await self.db_pool.runInteraction(
             "store_partial_state_room",
@@ -1849,6 +1877,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             room_id,
             servers,
             device_lists_stream_id,
+            joined_via,
         )
 
     def _store_partial_state_room_txn(
@@ -1857,6 +1886,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         room_id: str,
         servers: Collection[str],
         device_lists_stream_id: int,
+        joined_via: str,
     ) -> None:
         DatabasePool.simple_insert_txn(
             txn,
@@ -1866,6 +1896,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
                 "device_lists_stream_id": device_lists_stream_id,
                 # To be updated later once the join event is persisted.
                 "join_event_id": None,
+                "joined_via": joined_via,
             },
         )
         DatabasePool.simple_insert_many_txn(
diff --git a/synapse/storage/schema/main/delta/73/09partial_joined_via_destination.sql b/synapse/storage/schema/main/delta/73/09partial_joined_via_destination.sql
new file mode 100644
index 0000000000..066d602b18
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/09partial_joined_via_destination.sql
@@ -0,0 +1,18 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- When we resync partial state, we prioritise doing so using the server we
+-- partial-joined from. To do this we need to record which server that was!
+ALTER TABLE partial_state_rooms ADD COLUMN joined_via TEXT;
-- 
cgit 1.5.1


From 8e50299d8b112364b011ca8f89bc19a97e9622ec Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 18 Oct 2022 13:59:04 +0100
Subject: Fix `track_memory_usage` on poetry-core 1.3.x installations (#14221)

* Fix `track_memory_usage` on poetry-core 1.3.x installations

The same kind of problem as discussed in #14085:

1. we defined an extra with an underscore
2. we look it up at runtime with an underscore
3. but poetry-core 1.3.x. installs it with a dash, causing (2) to fail.

Fix by using a dash everywhere.

* Changelog
---
 changelog.d/14221.misc  | 1 +
 pyproject.toml          | 4 ++--
 synapse/config/cache.py | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14221.misc

(limited to 'synapse')

diff --git a/changelog.d/14221.misc b/changelog.d/14221.misc
new file mode 100644
index 0000000000..fe7afac245
--- /dev/null
+++ b/changelog.d/14221.misc
@@ -0,0 +1 @@
+Rename the `cache_memory` extra to `cache-memory`, for compatability with poetry-core 1.3.0 and [PEP 685](https://peps.python.org/pep-0685/). From-source installations using this extra will need to install using the new name.
diff --git a/pyproject.toml b/pyproject.toml
index 7fbbc08915..8bc24c556a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -227,7 +227,7 @@ jwt = ["authlib"]
 # (if it is not installed, we fall back to slow code.)
 redis = ["txredisapi", "hiredis"]
 # Required to use experimental `caches.track_memory_usage` config option.
-cache_memory = ["pympler"]
+cache-memory = ["pympler"]
 test = ["parameterized", "idna"]
 
 # The duplication here is awful. I hate hate hate hate hate it. However, for now I want
@@ -258,7 +258,7 @@ all = [
     "jaeger-client", "opentracing",
     # redis
     "txredisapi", "hiredis",
-    # cache_memory
+    # cache-memory
     "pympler",
     # omitted:
     #   - test: it's useful to have this separate from dev deps in the olddeps job
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
index 2db8cfb005..eb4194a5a9 100644
--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -159,7 +159,7 @@ class CacheConfig(Config):
 
         self.track_memory_usage = cache_config.get("track_memory_usage", False)
         if self.track_memory_usage:
-            check_requirements("cache_memory")
+            check_requirements("cache-memory")
 
         expire_caches = cache_config.get("expire_caches", True)
         cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m")
-- 
cgit 1.5.1


From dbf18f514ea5d2539ba3148049eae5a6793f1d60 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 18 Oct 2022 10:55:41 -0400
Subject: Update the thread_id right before use (in case the bg update hasn't
 finished) (#14222)

This avoids running a forced-update of a null thread_id rows.

An index is added (in the background) to hopefully make this
easier in the future.
---
 changelog.d/14222.feature                          |   1 +
 .../storage/databases/main/event_push_actions.py   | 103 +++++++++++++++++++++
 .../delta/73/06thread_notifications_backfill.sql   |  29 ------
 .../73/06thread_notifications_thread_id_idx.sql    |  23 +++++
 .../07thread_notifications_not_null.sql.postgres   |  19 ----
 .../73/07thread_notifications_not_null.sql.sqlite  | 101 --------------------
 6 files changed, 127 insertions(+), 149 deletions(-)
 create mode 100644 changelog.d/14222.feature
 delete mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql
 create mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql
 delete mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres
 delete mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/14222.feature b/changelog.d/14222.feature
new file mode 100644
index 0000000000..5d0ae16e13
--- /dev/null
+++ b/changelog.d/14222.feature
@@ -0,0 +1 @@
+Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)).
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index f070e6e88a..b283ab0f9c 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -294,6 +294,44 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             self._background_backfill_thread_id,
         )
 
+        # Indexes which will be used to quickly make the thread_id column non-null.
+        self.db_pool.updates.register_background_index_update(
+            "event_push_actions_thread_id_null",
+            index_name="event_push_actions_thread_id_null",
+            table="event_push_actions",
+            columns=["thread_id"],
+            where_clause="thread_id IS NULL",
+        )
+        self.db_pool.updates.register_background_index_update(
+            "event_push_summary_thread_id_null",
+            index_name="event_push_summary_thread_id_null",
+            table="event_push_summary",
+            columns=["thread_id"],
+            where_clause="thread_id IS NULL",
+        )
+
+        # Check ASAP (and then later, every 1s) to see if we have finished
+        # background updates the event_push_actions and event_push_summary tables.
+        self._clock.call_later(0.0, self._check_event_push_backfill_thread_id)
+        self._event_push_backfill_thread_id_done = False
+
+    @wrap_as_background_process("check_event_push_backfill_thread_id")
+    async def _check_event_push_backfill_thread_id(self) -> None:
+        """
+        Has thread_id finished backfilling?
+
+        If not, we need to just-in-time update it so the queries work.
+        """
+        done = await self.db_pool.updates.has_completed_background_update(
+            "event_push_backfill_thread_id"
+        )
+
+        if done:
+            self._event_push_backfill_thread_id_done = True
+        else:
+            # Reschedule to run.
+            self._clock.call_later(15.0, self._check_event_push_backfill_thread_id)
+
     async def _background_backfill_thread_id(
         self, progress: JsonDict, batch_size: int
     ) -> int:
@@ -526,6 +564,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
 
+        # First ensure that the existing rows have an updated thread_id field.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute(
+                """
+                UPDATE event_push_summary
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                (MAIN_TIMELINE, room_id, user_id),
+            )
+            txn.execute(
+                """
+                UPDATE event_push_actions
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                (MAIN_TIMELINE, room_id, user_id),
+            )
+
         # First we pull the counts from the summary table.
         #
         # We check that `last_receipt_stream_ordering` matches the stream ordering of the
@@ -1341,6 +1398,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 (room_id, user_id, stream_ordering, *thread_args),
             )
 
+            # First ensure that the existing rows have an updated thread_id field.
+            if not self._event_push_backfill_thread_id_done:
+                txn.execute(
+                    """
+                    UPDATE event_push_summary
+                    SET thread_id = ?
+                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                    """,
+                    (MAIN_TIMELINE, room_id, user_id),
+                )
+                txn.execute(
+                    """
+                    UPDATE event_push_actions
+                    SET thread_id = ?
+                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                    """,
+                    (MAIN_TIMELINE, room_id, user_id),
+                )
+
             # Fetch the notification counts between the stream ordering of the
             # latest receipt and what was previously summarised.
             unread_counts = self._get_notif_unread_count_for_user_room(
@@ -1475,6 +1551,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             rotate_to_stream_ordering: The new maximum event stream ordering to summarise.
         """
 
+        # Ensure that any new actions have an updated thread_id.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute(
+                """
+                UPDATE event_push_actions
+                SET thread_id = ?
+                WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
+                """,
+                (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering),
+            )
+
+        # XXX Do we need to update summaries here too?
+
         # Calculate the new counts that should be upserted into event_push_summary
         sql = """
             SELECT user_id, room_id, thread_id,
@@ -1537,6 +1626,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
+        # Ensure that any updated threads have the proper thread_id.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute_batch(
+                """
+                UPDATE event_push_summary
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                [
+                    (MAIN_TIMELINE, room_id, user_id)
+                    for user_id, room_id, _ in summaries
+                ],
+            )
+
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql
deleted file mode 100644
index 0ffde9bbeb..0000000000
--- a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright 2022 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Forces the background updates from 06thread_notifications.sql to run in the
--- foreground as code will now require those to be "done".
-
-DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id';
-
--- Overwrite any null thread_id columns.
-UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL;
-UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
-UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
-
--- Do not run the event_push_summary_unique_index job if it is pending; the
--- thread_id field will be made required.
-DELETE FROM background_updates WHERE update_name = 'event_push_summary_unique_index';
-DROP INDEX IF EXISTS event_push_summary_unique_index;
diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql
new file mode 100644
index 0000000000..8b3c636594
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/06thread_notifications_thread_id_idx.sql
@@ -0,0 +1,23 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Allow there to be multiple summaries per user/room.
+DROP INDEX IF EXISTS event_push_summary_unique_index;
+
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7306, 'event_push_actions_thread_id_null', '{}', 'event_push_backfill_thread_id');
+
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7306, 'event_push_summary_thread_id_null', '{}', 'event_push_backfill_thread_id');
diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres
deleted file mode 100644
index 33674f8c62..0000000000
--- a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Copyright 2022 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- The columns can now be made non-nullable.
-ALTER TABLE event_push_actions_staging ALTER COLUMN thread_id SET NOT NULL;
-ALTER TABLE event_push_actions ALTER COLUMN thread_id SET NOT NULL;
-ALTER TABLE event_push_summary ALTER COLUMN thread_id SET NOT NULL;
diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite
deleted file mode 100644
index 5322ad77a4..0000000000
--- a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright 2022 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- SQLite doesn't support modifying columns to an existing table, so it must
--- be recreated.
-
--- Create the new tables.
-CREATE TABLE event_push_actions_staging_new (
-    event_id TEXT NOT NULL,
-    user_id TEXT NOT NULL,
-    actions TEXT NOT NULL,
-    notif SMALLINT NOT NULL,
-    highlight SMALLINT NOT NULL,
-    unread SMALLINT,
-    thread_id TEXT NOT NULL,
-    inserted_ts BIGINT
-);
-
-CREATE TABLE event_push_actions_new (
-    room_id TEXT NOT NULL,
-    event_id TEXT NOT NULL,
-    user_id TEXT NOT NULL,
-    profile_tag VARCHAR(32),
-    actions TEXT NOT NULL,
-    topological_ordering BIGINT,
-    stream_ordering BIGINT,
-    notif SMALLINT,
-    highlight SMALLINT,
-    unread SMALLINT,
-    thread_id TEXT NOT NULL,
-    CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag)
-);
-
-CREATE TABLE event_push_summary_new (
-    user_id TEXT NOT NULL,
-    room_id TEXT NOT NULL,
-    notif_count BIGINT NOT NULL,
-    stream_ordering BIGINT NOT NULL,
-    unread_count BIGINT,
-    last_receipt_stream_ordering BIGINT,
-    thread_id TEXT NOT NULL
-);
-
--- Swap the indexes.
-DROP INDEX IF EXISTS event_push_actions_staging_id;
-CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging_new(event_id);
-
-DROP INDEX IF EXISTS event_push_actions_room_id_user_id;
-DROP INDEX IF EXISTS event_push_actions_rm_tokens;
-DROP INDEX IF EXISTS event_push_actions_stream_ordering;
-DROP INDEX IF EXISTS event_push_actions_u_highlight;
-DROP INDEX IF EXISTS event_push_actions_highlights_index;
-CREATE INDEX event_push_actions_room_id_user_id on event_push_actions_new(room_id, user_id);
-CREATE INDEX event_push_actions_rm_tokens on event_push_actions_new( user_id, room_id, topological_ordering, stream_ordering );
-CREATE INDEX event_push_actions_stream_ordering on event_push_actions_new( stream_ordering, user_id );
-CREATE INDEX event_push_actions_u_highlight ON event_push_actions_new (user_id, stream_ordering);
-CREATE INDEX event_push_actions_highlights_index ON event_push_actions_new (user_id, room_id, topological_ordering, stream_ordering);
-
--- Copy the data.
-INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts)
-    SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts
-    FROM event_push_actions_staging;
-
-INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id)
-    SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id
-    FROM event_push_actions;
-
-INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id)
-    SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id
-    FROM event_push_summary;
-
--- Drop the old tables.
-DROP TABLE event_push_actions_staging;
-DROP TABLE event_push_actions;
-DROP TABLE event_push_summary;
-
--- Rename the tables.
-ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging;
-ALTER TABLE event_push_actions_new RENAME TO event_push_actions;
-ALTER TABLE event_push_summary_new RENAME TO event_push_summary;
-
--- Re-run background updates from 72/02event_push_actions_index.sql and
--- 72/06thread_notifications.sql.
-INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
-  (7307, 'event_push_summary_unique_index2', '{}')
-  ON CONFLICT (update_name) DO NOTHING;
-INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
-  (7307, 'event_push_actions_stream_highlight_index', '{}')
-  ON CONFLICT (update_name) DO NOTHING;
-- 
cgit 1.5.1


From 4eaf3eb840b8cfa78d970216c74fc128495f08a5 Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@users.noreply.github.com>
Date: Tue, 18 Oct 2022 16:52:25 +0100
Subject: Implementation of HTTP 307 response for MSC3886 POST endpoint
 (#14018)

Co-authored-by: reivilibre <olivier@librepush.net>
Co-authored-by: Andrew Morgan <andrewm@element.io>
---
 changelog.d/14018.feature                       |  1 +
 synapse/config/experimental.py                  |  7 +-
 synapse/config/server.py                        |  4 ++
 synapse/handlers/sso.py                         |  2 +-
 synapse/http/server.py                          | 48 ++++++++++---
 synapse/http/site.py                            |  3 +
 synapse/rest/__init__.py                        |  2 +
 synapse/rest/client/rendezvous.py               | 74 +++++++++++++++++++
 synapse/rest/client/versions.py                 |  3 +
 synapse/rest/key/v2/local_key_resource.py       |  4 +-
 synapse/rest/synapse/client/new_user_consent.py |  3 +-
 synapse/rest/well_known.py                      |  3 +-
 tests/logging/test_terse_json.py                |  1 +
 tests/rest/client/test_rendezvous.py            | 45 ++++++++++++
 tests/server.py                                 |  8 ++-
 tests/test_server.py                            | 94 ++++++++++++++++++-------
 16 files changed, 257 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/14018.feature
 create mode 100644 synapse/rest/client/rendezvous.py
 create mode 100644 tests/rest/client/test_rendezvous.py

(limited to 'synapse')

diff --git a/changelog.d/14018.feature b/changelog.d/14018.feature
new file mode 100644
index 0000000000..c8454607eb
--- /dev/null
+++ b/changelog.d/14018.feature
@@ -0,0 +1 @@
+Support for redirecting to an implementation of a [MSC3886](https://github.com/matrix-org/matrix-spec-proposals/pull/3886) HTTP rendezvous service.
\ No newline at end of file
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index f9a49451d8..4009add01d 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
+from typing import Any, Optional
 
 import attr
 
@@ -120,3 +120,8 @@ class ExperimentalConfig(Config):
 
         # MSC3874: Filtering /messages with rel_types / not_rel_types.
         self.msc3874_enabled: bool = experimental.get("msc3874_enabled", False)
+
+        # MSC3886: Simple client rendezvous capability
+        self.msc3886_endpoint: Optional[str] = experimental.get(
+            "msc3886_endpoint", None
+        )
diff --git a/synapse/config/server.py b/synapse/config/server.py
index f2353ce5fb..ec46ca63ad 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -207,6 +207,9 @@ class HttpListenerConfig:
     additional_resources: Dict[str, dict] = attr.Factory(dict)
     tag: Optional[str] = None
     request_id_header: Optional[str] = None
+    # If true, the listener will return CORS response headers compatible with MSC3886:
+    # https://github.com/matrix-org/matrix-spec-proposals/pull/3886
+    experimental_cors_msc3886: bool = False
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
@@ -935,6 +938,7 @@ def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
             additional_resources=listener.get("additional_resources", {}),
             tag=listener.get("tag"),
             request_id_header=listener.get("request_id_header"),
+            experimental_cors_msc3886=listener.get("experimental_cors_msc3886", False),
         )
 
     return ListenerConfig(port, bind_addresses, listener_type, tls, http_config)
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index e035677b8a..5943f08e91 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -874,7 +874,7 @@ class SsoHandler:
         )
 
     async def handle_terms_accepted(
-        self, request: Request, session_id: str, terms_version: str
+        self, request: SynapseRequest, session_id: str, terms_version: str
     ) -> None:
         """Handle a request to the new-user 'consent' endpoint
 
diff --git a/synapse/http/server.py b/synapse/http/server.py
index bcbfac2c9f..b26e34bceb 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -19,6 +19,7 @@ import logging
 import types
 import urllib
 from http import HTTPStatus
+from http.client import FOUND
 from inspect import isawaitable
 from typing import (
     TYPE_CHECKING,
@@ -339,7 +340,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
 
             return callback_return
 
-        _unrecognised_request_handler(request)
+        return _unrecognised_request_handler(request)
 
     @abc.abstractmethod
     def _send_response(
@@ -598,7 +599,7 @@ class RootRedirect(resource.Resource):
 class OptionsResource(resource.Resource):
     """Responds to OPTION requests for itself and all children."""
 
-    def render_OPTIONS(self, request: Request) -> bytes:
+    def render_OPTIONS(self, request: SynapseRequest) -> bytes:
         request.setResponseCode(204)
         request.setHeader(b"Content-Length", b"0")
 
@@ -763,7 +764,7 @@ def respond_with_json(
 
 
 def respond_with_json_bytes(
-    request: Request,
+    request: SynapseRequest,
     code: int,
     json_bytes: bytes,
     send_cors: bool = False,
@@ -859,7 +860,7 @@ def _write_bytes_to_request(request: Request, bytes_to_write: bytes) -> None:
     _ByteProducer(request, bytes_generator)
 
 
-def set_cors_headers(request: Request) -> None:
+def set_cors_headers(request: SynapseRequest) -> None:
     """Set the CORS headers so that javascript running in a web browsers can
     use this API
 
@@ -870,10 +871,20 @@ def set_cors_headers(request: Request) -> None:
     request.setHeader(
         b"Access-Control-Allow-Methods", b"GET, HEAD, POST, PUT, DELETE, OPTIONS"
     )
-    request.setHeader(
-        b"Access-Control-Allow-Headers",
-        b"X-Requested-With, Content-Type, Authorization, Date",
-    )
+    if request.experimental_cors_msc3886:
+        request.setHeader(
+            b"Access-Control-Allow-Headers",
+            b"X-Requested-With, Content-Type, Authorization, Date, If-Match, If-None-Match",
+        )
+        request.setHeader(
+            b"Access-Control-Expose-Headers",
+            b"ETag, Location, X-Max-Bytes",
+        )
+    else:
+        request.setHeader(
+            b"Access-Control-Allow-Headers",
+            b"X-Requested-With, Content-Type, Authorization, Date",
+        )
 
 
 def set_corp_headers(request: Request) -> None:
@@ -942,10 +953,25 @@ def set_clickjacking_protection_headers(request: Request) -> None:
     request.setHeader(b"Content-Security-Policy", b"frame-ancestors 'none';")
 
 
-def respond_with_redirect(request: Request, url: bytes) -> None:
-    """Write a 302 response to the request, if it is still alive."""
+def respond_with_redirect(
+    request: SynapseRequest, url: bytes, statusCode: int = FOUND, cors: bool = False
+) -> None:
+    """
+    Write a 302 (or other specified status code) response to the request, if it is still alive.
+
+    Args:
+        request: The http request to respond to.
+        url: The URL to redirect to.
+        statusCode: The HTTP status code to use for the redirect (defaults to 302).
+        cors: Whether to set CORS headers on the response.
+    """
     logger.debug("Redirect to %s", url.decode("utf-8"))
-    request.redirect(url)
+
+    if cors:
+        set_cors_headers(request)
+
+    request.setResponseCode(statusCode)
+    request.setHeader(b"location", url)
     finish_request(request)
 
 
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 55a6afce35..3dbd541fed 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -82,6 +82,7 @@ class SynapseRequest(Request):
         self.reactor = site.reactor
         self._channel = channel  # this is used by the tests
         self.start_time = 0.0
+        self.experimental_cors_msc3886 = site.experimental_cors_msc3886
 
         # The requester, if authenticated. For federation requests this is the
         # server name, for client requests this is the Requester object.
@@ -622,6 +623,8 @@ class SynapseSite(Site):
 
         request_id_header = config.http_options.request_id_header
 
+        self.experimental_cors_msc3886 = config.http_options.experimental_cors_msc3886
+
         def request_factory(channel: HTTPChannel, queued: bool) -> Request:
             return request_class(
                 channel,
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 9a2ab99ede..28542cd774 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -44,6 +44,7 @@ from synapse.rest.client import (
     receipts,
     register,
     relations,
+    rendezvous,
     report_event,
     room,
     room_batch,
@@ -132,3 +133,4 @@ class ClientRestResource(JsonResource):
         # unstable
         mutual_rooms.register_servlets(hs, client_resource)
         login_token_request.register_servlets(hs, client_resource)
+        rendezvous.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/rendezvous.py b/synapse/rest/client/rendezvous.py
new file mode 100644
index 0000000000..89176b1ffa
--- /dev/null
+++ b/synapse/rest/client/rendezvous.py
@@ -0,0 +1,74 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from http.client import TEMPORARY_REDIRECT
+from typing import TYPE_CHECKING, Optional
+
+from synapse.http.server import HttpServer, respond_with_redirect
+from synapse.http.servlet import RestServlet
+from synapse.http.site import SynapseRequest
+from synapse.rest.client._base import client_patterns
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class RendezvousServlet(RestServlet):
+    """
+    This is a placeholder implementation of [MSC3886](https://github.com/matrix-org/matrix-spec-proposals/pull/3886)
+    simple client rendezvous capability that is used by the "Sign in with QR" functionality.
+
+    This implementation only serves as a 307 redirect to a configured server rather than being a full implementation.
+
+    A module that implements the full functionality is available at: https://pypi.org/project/matrix-http-rendezvous-synapse/.
+
+    Request:
+
+    POST /rendezvous HTTP/1.1
+    Content-Type: ...
+
+    ...
+
+    Response:
+
+    HTTP/1.1 307
+    Location: <configured endpoint>
+    """
+
+    PATTERNS = client_patterns(
+        "/org.matrix.msc3886/rendezvous$", releases=[], v1=False, unstable=True
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        redirection_target: Optional[str] = hs.config.experimental.msc3886_endpoint
+        assert (
+            redirection_target is not None
+        ), "Servlet is only registered if there is a redirection target"
+        self.endpoint = redirection_target.encode("utf-8")
+
+    async def on_POST(self, request: SynapseRequest) -> None:
+        respond_with_redirect(
+            request, self.endpoint, statusCode=TEMPORARY_REDIRECT, cors=True
+        )
+
+    # PUT, GET and DELETE are not implemented as they should be fulfilled by the redirect target.
+
+
+def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+    if hs.config.experimental.msc3886_endpoint is not None:
+        RendezvousServlet(hs).register(http_server)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 4b87ee978a..9b1b72c68a 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -116,6 +116,9 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3881": self.config.experimental.msc3881_enabled,
                     # Adds support for filtering /messages by event relation.
                     "org.matrix.msc3874": self.config.experimental.msc3874_enabled,
+                    # Adds support for simple HTTP rendezvous as per MSC3886
+                    "org.matrix.msc3886": self.config.experimental.msc3886_endpoint
+                    is not None,
                 },
             },
         )
diff --git a/synapse/rest/key/v2/local_key_resource.py b/synapse/rest/key/v2/local_key_resource.py
index 0c9f042c84..095993415c 100644
--- a/synapse/rest/key/v2/local_key_resource.py
+++ b/synapse/rest/key/v2/local_key_resource.py
@@ -20,9 +20,9 @@ from signedjson.sign import sign_json
 from unpaddedbase64 import encode_base64
 
 from twisted.web.resource import Resource
-from twisted.web.server import Request
 
 from synapse.http.server import respond_with_json_bytes
+from synapse.http.site import SynapseRequest
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
@@ -99,7 +99,7 @@ class LocalKey(Resource):
             json_object = sign_json(json_object, self.config.server.server_name, key)
         return json_object
 
-    def render_GET(self, request: Request) -> Optional[int]:
+    def render_GET(self, request: SynapseRequest) -> Optional[int]:
         time_now = self.clock.time_msec()
         # Update the expiry time if less than half the interval remains.
         if time_now + self.config.key.key_refresh_interval / 2 > self.valid_until_ts:
diff --git a/synapse/rest/synapse/client/new_user_consent.py b/synapse/rest/synapse/client/new_user_consent.py
index 1c1c7b3613..22784157e6 100644
--- a/synapse/rest/synapse/client/new_user_consent.py
+++ b/synapse/rest/synapse/client/new_user_consent.py
@@ -20,6 +20,7 @@ from synapse.api.errors import SynapseError
 from synapse.handlers.sso import get_username_mapping_session_cookie_from_request
 from synapse.http.server import DirectServeHtmlResource, respond_with_html
 from synapse.http.servlet import parse_string
+from synapse.http.site import SynapseRequest
 from synapse.types import UserID
 from synapse.util.templates import build_jinja_env
 
@@ -88,7 +89,7 @@ class NewUserConsentResource(DirectServeHtmlResource):
         html = template.render(template_params)
         respond_with_html(request, 200, html)
 
-    async def _async_render_POST(self, request: Request) -> None:
+    async def _async_render_POST(self, request: SynapseRequest) -> None:
         try:
             session_id = get_username_mapping_session_cookie_from_request(request)
         except SynapseError as e:
diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py
index 6f7ac54c65..e2174fdfea 100644
--- a/synapse/rest/well_known.py
+++ b/synapse/rest/well_known.py
@@ -18,6 +18,7 @@ from twisted.web.resource import Resource
 from twisted.web.server import Request
 
 from synapse.http.server import set_cors_headers
+from synapse.http.site import SynapseRequest
 from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.stringutils import parse_server_name
@@ -63,7 +64,7 @@ class ClientWellKnownResource(Resource):
         Resource.__init__(self)
         self._well_known_builder = WellKnownBuilder(hs)
 
-    def render_GET(self, request: Request) -> bytes:
+    def render_GET(self, request: SynapseRequest) -> bytes:
         set_cors_headers(request)
         r = self._well_known_builder.get_well_known()
         if not r:
diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py
index 96f399b7ab..0b0d8737c1 100644
--- a/tests/logging/test_terse_json.py
+++ b/tests/logging/test_terse_json.py
@@ -153,6 +153,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         site.site_tag = "test-site"
         site.server_version_string = "Server v1"
         site.reactor = Mock()
+        site.experimental_cors_msc3886 = False
         request = SynapseRequest(FakeChannel(site, None), site)
         # Call requestReceived to finish instantiating the object.
         request.content = BytesIO()
diff --git a/tests/rest/client/test_rendezvous.py b/tests/rest/client/test_rendezvous.py
new file mode 100644
index 0000000000..ad00a476e1
--- /dev/null
+++ b/tests/rest/client/test_rendezvous.py
@@ -0,0 +1,45 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.rest.client import rendezvous
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.unittest import override_config
+
+endpoint = "/_matrix/client/unstable/org.matrix.msc3886/rendezvous"
+
+
+class RendezvousServletTestCase(unittest.HomeserverTestCase):
+
+    servlets = [
+        rendezvous.register_servlets,
+    ]
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        self.hs = self.setup_test_homeserver()
+        return self.hs
+
+    def test_disabled(self) -> None:
+        channel = self.make_request("POST", endpoint, {}, access_token=None)
+        self.assertEqual(channel.code, 400)
+
+    @override_config({"experimental_features": {"msc3886_endpoint": "/asd"}})
+    def test_redirect(self) -> None:
+        channel = self.make_request("POST", endpoint, {}, access_token=None)
+        self.assertEqual(channel.code, 307)
+        self.assertEqual(channel.headers.getRawHeaders("Location"), ["/asd"])
diff --git a/tests/server.py b/tests/server.py
index c447d5e4c4..8b1d186219 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -266,7 +266,12 @@ class FakeSite:
     site_tag = "test"
     access_logger = logging.getLogger("synapse.access.http.fake")
 
-    def __init__(self, resource: IResource, reactor: IReactorTime):
+    def __init__(
+        self,
+        resource: IResource,
+        reactor: IReactorTime,
+        experimental_cors_msc3886: bool = False,
+    ):
         """
 
         Args:
@@ -274,6 +279,7 @@ class FakeSite:
         """
         self._resource = resource
         self.reactor = reactor
+        self.experimental_cors_msc3886 = experimental_cors_msc3886
 
     def getResourceFor(self, request):
         return self._resource
diff --git a/tests/test_server.py b/tests/test_server.py
index 7c66448245..2d9a0257d4 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -222,13 +222,22 @@ class OptionsResourceTests(unittest.TestCase):
         self.resource = OptionsResource()
         self.resource.putChild(b"res", DummyResource())
 
-    def _make_request(self, method: bytes, path: bytes) -> FakeChannel:
+    def _make_request(
+        self, method: bytes, path: bytes, experimental_cors_msc3886: bool = False
+    ) -> FakeChannel:
         """Create a request from the method/path and return a channel with the response."""
         # Create a site and query for the resource.
         site = SynapseSite(
             "test",
             "site_tag",
-            parse_listener_def(0, {"type": "http", "port": 0}),
+            parse_listener_def(
+                0,
+                {
+                    "type": "http",
+                    "port": 0,
+                    "experimental_cors_msc3886": experimental_cors_msc3886,
+                },
+            ),
             self.resource,
             "1.0",
             max_request_body_size=4096,
@@ -239,25 +248,58 @@ class OptionsResourceTests(unittest.TestCase):
         channel = make_request(self.reactor, site, method, path, shorthand=False)
         return channel
 
+    def _check_cors_standard_headers(self, channel: FakeChannel) -> None:
+        # Ensure the correct CORS headers have been added
+        # as per https://spec.matrix.org/v1.4/client-server-api/#web-browser-clients
+        self.assertEqual(
+            channel.headers.getRawHeaders(b"Access-Control-Allow-Origin"),
+            [b"*"],
+            "has correct CORS Origin header",
+        )
+        self.assertEqual(
+            channel.headers.getRawHeaders(b"Access-Control-Allow-Methods"),
+            [b"GET, HEAD, POST, PUT, DELETE, OPTIONS"],  # HEAD isn't in the spec
+            "has correct CORS Methods header",
+        )
+        self.assertEqual(
+            channel.headers.getRawHeaders(b"Access-Control-Allow-Headers"),
+            [b"X-Requested-With, Content-Type, Authorization, Date"],
+            "has correct CORS Headers header",
+        )
+
+    def _check_cors_msc3886_headers(self, channel: FakeChannel) -> None:
+        # Ensure the correct CORS headers have been added
+        # as per https://github.com/matrix-org/matrix-spec-proposals/blob/hughns/simple-rendezvous-capability/proposals/3886-simple-rendezvous-capability.md#cors
+        self.assertEqual(
+            channel.headers.getRawHeaders(b"Access-Control-Allow-Origin"),
+            [b"*"],
+            "has correct CORS Origin header",
+        )
+        self.assertEqual(
+            channel.headers.getRawHeaders(b"Access-Control-Allow-Methods"),
+            [b"GET, HEAD, POST, PUT, DELETE, OPTIONS"],  # HEAD isn't in the spec
+            "has correct CORS Methods header",
+        )
+        self.assertEqual(
+            channel.headers.getRawHeaders(b"Access-Control-Allow-Headers"),
+            [
+                b"X-Requested-With, Content-Type, Authorization, Date, If-Match, If-None-Match"
+            ],
+            "has correct CORS Headers header",
+        )
+        self.assertEqual(
+            channel.headers.getRawHeaders(b"Access-Control-Expose-Headers"),
+            [b"ETag, Location, X-Max-Bytes"],
+            "has correct CORS Expose Headers header",
+        )
+
     def test_unknown_options_request(self) -> None:
         """An OPTIONS requests to an unknown URL still returns 204 No Content."""
         channel = self._make_request(b"OPTIONS", b"/foo/")
         self.assertEqual(channel.code, 204)
         self.assertNotIn("body", channel.result)
 
-        # Ensure the correct CORS headers have been added
-        self.assertTrue(
-            channel.headers.hasHeader(b"Access-Control-Allow-Origin"),
-            "has CORS Origin header",
-        )
-        self.assertTrue(
-            channel.headers.hasHeader(b"Access-Control-Allow-Methods"),
-            "has CORS Methods header",
-        )
-        self.assertTrue(
-            channel.headers.hasHeader(b"Access-Control-Allow-Headers"),
-            "has CORS Headers header",
-        )
+        self._check_cors_standard_headers(channel)
 
     def test_known_options_request(self) -> None:
         """An OPTIONS requests to an known URL still returns 204 No Content."""
@@ -265,19 +307,17 @@ class OptionsResourceTests(unittest.TestCase):
         self.assertEqual(channel.code, 204)
         self.assertNotIn("body", channel.result)
 
-        # Ensure the correct CORS headers have been added
-        self.assertTrue(
-            channel.headers.hasHeader(b"Access-Control-Allow-Origin"),
-            "has CORS Origin header",
-        )
-        self.assertTrue(
-            channel.headers.hasHeader(b"Access-Control-Allow-Methods"),
-            "has CORS Methods header",
-        )
-        self.assertTrue(
-            channel.headers.hasHeader(b"Access-Control-Allow-Headers"),
-            "has CORS Headers header",
+        self._check_cors_standard_headers(channel)
+
+    def test_known_options_request_msc3886(self) -> None:
+        """An OPTIONS requests to an known URL still returns 204 No Content."""
+        channel = self._make_request(
+            b"OPTIONS", b"/res/", experimental_cors_msc3886=True
         )
+        self.assertEqual(channel.code, 204)
+        self.assertNotIn("body", channel.result)
+
+        self._check_cors_msc3886_headers(channel)
 
     def test_unknown_request(self) -> None:
         """A non-OPTIONS request to an unknown URL should 404."""
-- 
cgit 1.5.1


From 847e2393f3198b88809c9b99de5c681efbf1c92e Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 18 Oct 2022 09:58:47 -0700
Subject: Prepatory work for adding power level event to batched events
 (#14214)

---
 changelog.d/14214.misc         |  1 +
 synapse/event_auth.py          | 19 ++++++++++++++++++-
 synapse/handlers/event_auth.py | 18 +++++++++++++-----
 synapse/handlers/federation.py | 12 +++++-------
 synapse/handlers/message.py    | 10 +++++++++-
 synapse/handlers/room.py       |  4 +---
 6 files changed, 47 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/14214.misc

(limited to 'synapse')

diff --git a/changelog.d/14214.misc b/changelog.d/14214.misc
new file mode 100644
index 0000000000..102928b575
--- /dev/null
+++ b/changelog.d/14214.misc
@@ -0,0 +1 @@
+When authenticating batched events, check for auth events in batch as well as DB.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index c7d5ef92fc..bab31e33c5 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -15,7 +15,18 @@
 
 import logging
 import typing
-from typing import Any, Collection, Dict, Iterable, List, Optional, Set, Tuple, Union
+from typing import (
+    Any,
+    Collection,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
 
 from canonicaljson import encode_canonical_json
 from signedjson.key import decode_verify_key_bytes
@@ -134,6 +145,7 @@ def validate_event_for_room_version(event: "EventBase") -> None:
 async def check_state_independent_auth_rules(
     store: _EventSourceStore,
     event: "EventBase",
+    batched_auth_events: Optional[Mapping[str, "EventBase"]] = None,
 ) -> None:
     """Check that an event complies with auth rules that are independent of room state
 
@@ -143,6 +155,8 @@ async def check_state_independent_auth_rules(
     Args:
         store: the datastore; used to fetch the auth events for validation
         event: the event being checked.
+        batched_auth_events: if the event being authed is part of a batch, any events
+            from the same batch that may be necessary to auth the current event
 
     Raises:
         AuthError if the checks fail
@@ -162,6 +176,9 @@ async def check_state_independent_auth_rules(
         redact_behaviour=EventRedactBehaviour.as_is,
         allow_rejected=True,
     )
+    if batched_auth_events:
+        auth_events.update(batched_auth_events)
+
     room_id = event.room_id
     auth_dict: MutableStateMap[str] = {}
     expected_auth_types = auth_types_for_event(event.room_version, event)
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index 8249ca1ed2..3bbad0271b 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Collection, List, Optional, Union
+from typing import TYPE_CHECKING, Collection, List, Mapping, Optional, Union
 
 from synapse import event_auth
 from synapse.api.constants import (
@@ -29,7 +29,6 @@ from synapse.event_auth import (
 )
 from synapse.events import EventBase
 from synapse.events.builder import EventBuilder
-from synapse.events.snapshot import EventContext
 from synapse.types import StateMap, get_domain_from_id
 
 if TYPE_CHECKING:
@@ -51,12 +50,21 @@ class EventAuthHandler:
     async def check_auth_rules_from_context(
         self,
         event: EventBase,
-        context: EventContext,
+        batched_auth_events: Optional[Mapping[str, EventBase]] = None,
     ) -> None:
-        """Check an event passes the auth rules at its own auth events"""
-        await check_state_independent_auth_rules(self._store, event)
+        """Check an event passes the auth rules at its own auth events
+        Args:
+            event: event to be authed
+            batched_auth_events: if the event being authed is part of a batch, any events
+            from the same batch that may be necessary to auth the current event
+        """
+        await check_state_independent_auth_rules(
+            self._store, event, batched_auth_events
+        )
         auth_event_ids = event.auth_event_ids()
         auth_events_by_id = await self._store.get_events(auth_event_ids)
+        if batched_auth_events:
+            auth_events_by_id.update(batched_auth_events)
         check_state_dependent_auth_rules(event, auth_events_by_id.values())
 
     def compute_auth_events(
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index ccc045d36f..275a37a575 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -942,7 +942,7 @@ class FederationHandler:
 
         # The remote hasn't signed it yet, obviously. We'll do the full checks
         # when we get the event back in `on_send_join_request`
-        await self._event_auth_handler.check_auth_rules_from_context(event, context)
+        await self._event_auth_handler.check_auth_rules_from_context(event)
         return event
 
     async def on_invite_request(
@@ -1123,7 +1123,7 @@ class FederationHandler:
         try:
             # The remote hasn't signed it yet, obviously. We'll do the full checks
             # when we get the event back in `on_send_leave_request`
-            await self._event_auth_handler.check_auth_rules_from_context(event, context)
+            await self._event_auth_handler.check_auth_rules_from_context(event)
         except AuthError as e:
             logger.warning("Failed to create new leave %r because %s", event, e)
             raise e
@@ -1182,7 +1182,7 @@ class FederationHandler:
         try:
             # The remote hasn't signed it yet, obviously. We'll do the full checks
             # when we get the event back in `on_send_knock_request`
-            await self._event_auth_handler.check_auth_rules_from_context(event, context)
+            await self._event_auth_handler.check_auth_rules_from_context(event)
         except AuthError as e:
             logger.warning("Failed to create new knock %r because %s", event, e)
             raise e
@@ -1348,9 +1348,7 @@ class FederationHandler:
 
             try:
                 validate_event_for_room_version(event)
-                await self._event_auth_handler.check_auth_rules_from_context(
-                    event, context
-                )
+                await self._event_auth_handler.check_auth_rules_from_context(event)
             except AuthError as e:
                 logger.warning("Denying new third party invite %r because %s", event, e)
                 raise e
@@ -1400,7 +1398,7 @@ class FederationHandler:
 
         try:
             validate_event_for_room_version(event)
-            await self._event_auth_handler.check_auth_rules_from_context(event, context)
+            await self._event_auth_handler.check_auth_rules_from_context(event)
         except AuthError as e:
             logger.warning("Denying third party invite %r because %s", event, e)
             raise e
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 4e55ebba0b..15b828dd74 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1360,8 +1360,16 @@ class EventCreationHandler:
             else:
                 try:
                     validate_event_for_room_version(event)
+                    # If we are persisting a batch of events the event(s) needed to auth the
+                    # current event may be part of the batch and will not be in the DB yet
+                    event_id_to_event = {e.event_id: e for e, _ in events_and_context}
+                    batched_auth_events = {}
+                    for event_id in event.auth_event_ids():
+                        auth_event = event_id_to_event.get(event_id)
+                        if auth_event:
+                            batched_auth_events[event_id] = auth_event
                     await self._event_auth_handler.check_auth_rules_from_context(
-                        event, context
+                        event, batched_auth_events
                     )
                 except AuthError as err:
                     logger.warning("Denying new event %r because %s", event, err)
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 4e1aacb408..638f54051a 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -229,9 +229,7 @@ class RoomCreationHandler:
             },
         )
         validate_event_for_room_version(tombstone_event)
-        await self._event_auth_handler.check_auth_rules_from_context(
-            tombstone_event, tombstone_context
-        )
+        await self._event_auth_handler.check_auth_rules_from_context(tombstone_event)
 
         # Upgrade the room
         #
-- 
cgit 1.5.1


From 1c777ef1e87d7be39a2b8f6fb119fa4b51e2be4c Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 18 Oct 2022 13:40:50 -0700
Subject: Fix docstring in EventContext (#14145)

---
 changelog.d/14145.doc      | 2 ++
 synapse/events/snapshot.py | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14145.doc

(limited to 'synapse')

diff --git a/changelog.d/14145.doc b/changelog.d/14145.doc
new file mode 100644
index 0000000000..8f876e08fc
--- /dev/null
+++ b/changelog.d/14145.doc
@@ -0,0 +1,2 @@
+Clarify comment on event contexts.
+
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index d3c8083e4a..1c0e96bec7 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -65,7 +65,8 @@ class EventContext:
             None does not necessarily mean that ``state_group`` does not have
             a prev_group!
 
-            If the event is a state event, this is normally the same as ``prev_group``.
+            If the event is a state event, this is normally the same as
+            ``state_group_before_event``.
 
             If ``state_group`` is None (ie, the event is an outlier), ``prev_group``
             will always also be ``None``.
-- 
cgit 1.5.1


From 2a76a7369fc54477185f53f6e81897fa84e24de5 Mon Sep 17 00:00:00 2001
From: Aaron Raimist <aaron@raim.ist>
Date: Tue, 18 Oct 2022 14:54:27 -0600
Subject: Fix hiding devices names over federation (#10015)

And don't include blank opentracing stuff in device list updates.

Signed-off-by: Aaron Raimist <aaron@raim.ist>
---
 changelog.d/10015.bugfix                  |  1 +
 synapse/storage/databases/main/devices.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/10015.bugfix

(limited to 'synapse')

diff --git a/changelog.d/10015.bugfix b/changelog.d/10015.bugfix
new file mode 100644
index 0000000000..cbebd97e58
--- /dev/null
+++ b/changelog.d/10015.bugfix
@@ -0,0 +1 @@
+Prevent device names from appearing in device list updates when `allow_device_name_lookup_over_federation` is `false`.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 18358eca46..830b076a32 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -539,9 +539,11 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
                     "device_id": device_id,
                     "prev_id": [prev_id] if prev_id else [],
                     "stream_id": stream_id,
-                    "org.matrix.opentracing_context": opentracing_context,
                 }
 
+                if opentracing_context != "{}":
+                    result["org.matrix.opentracing_context"] = opentracing_context
+
                 prev_id = stream_id
 
                 if device is not None:
@@ -549,7 +551,11 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
                     if keys:
                         result["keys"] = keys
 
-                    device_display_name = device.display_name
+                    device_display_name = None
+                    if (
+                        self.hs.config.federation.allow_device_name_lookup_over_federation
+                    ):
+                        device_display_name = device.display_name
                     if device_display_name:
                         result["device_display_name"] = device_display_name
                 else:
-- 
cgit 1.5.1


From fa8616e65c82367712a7b75c62682a89541b6330 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 18 Oct 2022 19:46:25 -0500
Subject: Fix MSC3030 `/timestamp_to_event` returning `outliers` that it has no
 idea whether are near a gap or not (#14215)

Fix MSC3030 `/timestamp_to_event` endpoint returning `outliers` that it has no idea whether are near a gap or not (and therefore unable to determine whether it's actually the closest event). The reason Synapse doesn't know whether an `outlier` is next to a gap is because our gap checks rely on entries in the `event_edges`, `event_forward_extremeties`, and `event_backward_extremities` tables which is [not the case for `outliers`](https://github.com/matrix-org/synapse/blob/2c63cdcc3f1aa4625e947de3c23e0a8133c61286/docs/development/room-dag-concepts.md#outliers).

Also fixes MSC3030 Complement `can_paginate_after_getting_remote_event_from_timestamp_to_event_endpoint` test flake.  Although this acted flakey in Complement, if `sync_partial_state` raced and beat us before `/timestamp_to_event`, then even if we retried the failing `/context` request it wouldn't work until we made this Synapse change. With this PR, Synapse will never return an `outlier` event so that test will always go and ask over federation.

Fix  https://github.com/matrix-org/synapse/issues/13944


### Why did this fail before? Why was it flakey?

Sleuthing the server logs on the [CI failure](https://github.com/matrix-org/synapse/actions/runs/3149623842/jobs/5121449357#step:5:5805), it looks like `hs2:/timestamp_to_event` found `$NP6-oU7mIFVyhtKfGvfrEQX949hQX-T-gvuauG6eurU` as an `outlier` event locally. Then when we went and asked for it via `/context`, since it's an `outlier`, it was filtered out of the results -> `You don't have permission to access that event.`

This is reproducible when `sync_partial_state` races and persists `$NP6-oU7mIFVyhtKfGvfrEQX949hQX-T-gvuauG6eurU` as an `outlier` before we evaluate `get_event_for_timestamp(...)`. To consistently reproduce locally, just add a delay at the [start of `get_event_for_timestamp(...)`](https://github.com/matrix-org/synapse/blob/cb20b885cb4bd1648581dd043a184d86fc8c7a00/synapse/handlers/room.py#L1470-L1496) so it always runs after `sync_partial_state` completes.

```py
from twisted.internet import task as twisted_task
d = twisted_task.deferLater(self.hs.get_reactor(), 3.5)
await d
```

In a run where it passes, on `hs2`, `get_event_for_timestamp(...)` finds a different event locally which is next to a gap and we request from a closer one from `hs1` which gets backfilled. And since the backfilled event is not an `outlier`, it's returned as expected during `/context`.

With this PR, Synapse will never return an `outlier` event so that test will always go and ask over federation.
---
 changelog.d/14215.bugfix                        |  1 +
 synapse/storage/databases/main/events_worker.py | 59 ++++++++++++++--------
 tests/rest/client/test_rooms.py                 | 65 +++++++++++++++++++++++++
 3 files changed, 104 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/14215.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14215.bugfix b/changelog.d/14215.bugfix
new file mode 100644
index 0000000000..31c109f534
--- /dev/null
+++ b/changelog.d/14215.bugfix
@@ -0,0 +1 @@
+Fix [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint returning potentially inaccurate closest events with `outliers` present.
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 7bc7f2f33e..69fea452ad 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1971,12 +1971,17 @@ class EventsWorkerStore(SQLBaseStore):
 
         Args:
             room_id: room where the event lives
-            event_id: event to check
+            event: event to check (can't be an `outlier`)
 
         Returns:
             Boolean indicating whether it's an extremity
         """
 
+        assert not event.internal_metadata.is_outlier(), (
+            "is_event_next_to_backward_gap(...) can't be used with `outlier` events. "
+            "This function relies on `event_backward_extremities` which won't be filled in for `outliers`."
+        )
+
         def is_event_next_to_backward_gap_txn(txn: LoggingTransaction) -> bool:
             # If the event in question has any of its prev_events listed as a
             # backward extremity, it's next to a gap.
@@ -2026,12 +2031,17 @@ class EventsWorkerStore(SQLBaseStore):
 
         Args:
             room_id: room where the event lives
-            event_id: event to check
+            event: event to check (can't be an `outlier`)
 
         Returns:
             Boolean indicating whether it's an extremity
         """
 
+        assert not event.internal_metadata.is_outlier(), (
+            "is_event_next_to_forward_gap(...) can't be used with `outlier` events. "
+            "This function relies on `event_edges` and `event_forward_extremities` which won't be filled in for `outliers`."
+        )
+
         def is_event_next_to_gap_txn(txn: LoggingTransaction) -> bool:
             # If the event in question is a forward extremity, we will just
             # consider any potential forward gap as not a gap since it's one of
@@ -2112,13 +2122,33 @@ class EventsWorkerStore(SQLBaseStore):
             The closest event_id otherwise None if we can't find any event in
             the given direction.
         """
+        if direction == "b":
+            # Find closest event *before* a given timestamp. We use descending
+            # (which gives values largest to smallest) because we want the
+            # largest possible timestamp *before* the given timestamp.
+            comparison_operator = "<="
+            order = "DESC"
+        else:
+            # Find closest event *after* a given timestamp. We use ascending
+            # (which gives values smallest to largest) because we want the
+            # closest possible timestamp *after* the given timestamp.
+            comparison_operator = ">="
+            order = "ASC"
 
-        sql_template = """
+        sql_template = f"""
             SELECT event_id FROM events
             LEFT JOIN rejections USING (event_id)
             WHERE
-                origin_server_ts %s ?
-                AND room_id = ?
+                room_id = ?
+                AND origin_server_ts {comparison_operator} ?
+                /**
+                 * Make sure the event isn't an `outlier` because we have no way
+                 * to later check whether it's next to a gap. `outliers` do not
+                 * have entries in the `event_edges`, `event_forward_extremeties`,
+                 * and `event_backward_extremities` tables to check against
+                 * (used by `is_event_next_to_backward_gap` and `is_event_next_to_forward_gap`).
+                 */
+                AND NOT outlier
                 /* Make sure event is not rejected */
                 AND rejections.event_id IS NULL
             /**
@@ -2128,27 +2158,14 @@ class EventsWorkerStore(SQLBaseStore):
              * Finally, we can tie-break based on when it was received on the server
              * (`stream_ordering`).
              */
-            ORDER BY origin_server_ts %s, depth %s, stream_ordering %s
+            ORDER BY origin_server_ts {order}, depth {order}, stream_ordering {order}
             LIMIT 1;
         """
 
         def get_event_id_for_timestamp_txn(txn: LoggingTransaction) -> Optional[str]:
-            if direction == "b":
-                # Find closest event *before* a given timestamp. We use descending
-                # (which gives values largest to smallest) because we want the
-                # largest possible timestamp *before* the given timestamp.
-                comparison_operator = "<="
-                order = "DESC"
-            else:
-                # Find closest event *after* a given timestamp. We use ascending
-                # (which gives values smallest to largest) because we want the
-                # closest possible timestamp *after* the given timestamp.
-                comparison_operator = ">="
-                order = "ASC"
-
             txn.execute(
-                sql_template % (comparison_operator, order, order, order),
-                (timestamp, room_id),
+                sql_template,
+                (room_id, timestamp),
             )
             row = txn.fetchone()
             if row:
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 71b1637be8..716366eb90 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -39,6 +39,8 @@ from synapse.api.constants import (
 )
 from synapse.api.errors import Codes, HttpResponseException
 from synapse.appservice import ApplicationService
+from synapse.events import EventBase
+from synapse.events.snapshot import EventContext
 from synapse.handlers.pagination import PurgeStatus
 from synapse.rest import admin
 from synapse.rest.client import account, directory, login, profile, register, room, sync
@@ -51,6 +53,7 @@ from tests import unittest
 from tests.http.server._base import make_request_with_cancellation_test
 from tests.storage.test_stream import PaginationTestCase
 from tests.test_utils import make_awaitable
+from tests.test_utils.event_injection import create_event
 
 PATH_PREFIX = b"/_matrix/client/api/v1"
 
@@ -3486,3 +3489,65 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(channel.code, 400)
         self.assertEqual(channel.json_body["errcode"], "M_MISSING_PARAM")
+
+
+class TimestampLookupTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    def default_config(self) -> JsonDict:
+        config = super().default_config()
+        config["experimental_features"] = {"msc3030_enabled": True}
+        return config
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self._storage_controllers = self.hs.get_storage_controllers()
+
+        self.room_owner = self.register_user("room_owner", "test")
+        self.room_owner_tok = self.login("room_owner", "test")
+
+    def _inject_outlier(self, room_id: str) -> EventBase:
+        event, _context = self.get_success(
+            create_event(
+                self.hs,
+                room_id=room_id,
+                type="m.test",
+                sender="@test_remote_user:remote",
+            )
+        )
+
+        event.internal_metadata.outlier = True
+        self.get_success(
+            self._storage_controllers.persistence.persist_event(
+                event, EventContext.for_outlier(self._storage_controllers)
+            )
+        )
+        return event
+
+    def test_no_outliers(self) -> None:
+        """
+        Test to make sure `/timestamp_to_event` does not return `outlier` events.
+        We're unable to determine whether an `outlier` is next to a gap so we
+        don't know whether it's actually the closest event. Instead, let's just
+        ignore `outliers` with this endpoint.
+
+        This test is really seeing that we choose the non-`outlier` event behind the
+        `outlier`. Since the gap checking logic considers the latest message in the room
+        as *not* next to a gap, asking over federation does not come into play here.
+        """
+        room_id = self.helper.create_room_as(self.room_owner, tok=self.room_owner_tok)
+
+        outlier_event = self._inject_outlier(room_id)
+
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/unstable/org.matrix.msc3030/rooms/{room_id}/timestamp_to_event?dir=b&ts={outlier_event.origin_server_ts}",
+            access_token=self.room_owner_tok,
+        )
+        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body)
+
+        # Make sure the outlier event is not returned
+        self.assertNotEqual(channel.json_body["event_id"], outlier_event.event_id)
-- 
cgit 1.5.1


From fe50738e597817735aa910e3cd1e13e4792f7d9f Mon Sep 17 00:00:00 2001
From: Finn <finn@finn.io>
Date: Wed, 19 Oct 2022 11:08:40 -0700
Subject: let update_synapse_database run on a multi-database configurations
 (#13422)

* Allow sharded database in db migrate script

Signed-off-by: Finn Herzfeld <finn@beeper.com>

* Update changelog.d/13422.bugfix

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Remove check entirely

* remove unused import

Signed-off-by: Finn Herzfeld <finn@beeper.com>
Co-authored-by: finn <finn@beeper.com>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/13422.bugfix                    | 1 +
 synapse/_scripts/update_synapse_database.py | 8 --------
 2 files changed, 1 insertion(+), 8 deletions(-)
 create mode 100644 changelog.d/13422.bugfix
 mode change 100755 => 100644 synapse/_scripts/update_synapse_database.py

(limited to 'synapse')

diff --git a/changelog.d/13422.bugfix b/changelog.d/13422.bugfix
new file mode 100644
index 0000000000..3a099acbe6
--- /dev/null
+++ b/changelog.d/13422.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the `update_synapse_database` script could not be run with multiple databases. Contributed by @thefinn93 @ Beeper.
\ No newline at end of file
diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py
old mode 100755
new mode 100644
index fb1fb83f50..0adf94bba6
--- a/synapse/_scripts/update_synapse_database.py
+++ b/synapse/_scripts/update_synapse_database.py
@@ -15,7 +15,6 @@
 
 import argparse
 import logging
-import sys
 from typing import cast
 
 import yaml
@@ -100,13 +99,6 @@ def main() -> None:
     # Load, process and sanity-check the config.
     hs_config = yaml.safe_load(args.database_config)
 
-    if "database" not in hs_config and "databases" not in hs_config:
-        sys.stderr.write(
-            "The configuration file must have a 'database' or 'databases' section. "
-            "See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database"
-        )
-        sys.exit(4)
-
     config = HomeServerConfig()
     config.parse_config_dict(hs_config, "", "")
 
-- 
cgit 1.5.1


From 0b7830e457359ce651b293c8748bf636973404a9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 19 Oct 2022 19:38:24 +0000
Subject: Bump flake8-bugbear from 21.3.2 to 22.9.23 (#14042)

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Erik Johnston <erik@matrix.org>
Co-authored-by: David Robertson <davidr@element.io>
---
 .flake8                                      | 9 ++++++++-
 changelog.d/14042.misc                       | 1 +
 poetry.lock                                  | 8 ++++----
 synapse/storage/databases/main/roommember.py | 4 ++--
 synapse/util/caches/deferred_cache.py        | 4 ++--
 synapse/util/caches/descriptors.py           | 2 +-
 tests/federation/transport/test_client.py    | 7 +++----
 tests/util/caches/test_descriptors.py        | 2 +-
 8 files changed, 22 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/14042.misc

(limited to 'synapse')

diff --git a/.flake8 b/.flake8
index acb118c86e..4c6a4d5843 100644
--- a/.flake8
+++ b/.flake8
@@ -8,4 +8,11 @@
 #  E203: whitespace before ':' (which is contrary to pep8?)
 #  E731: do not assign a lambda expression, use a def
 #  E501: Line too long (black enforces this for us)
-ignore=W503,W504,E203,E731,E501
+#
+# flake8-bugbear runs extra checks. Its error codes are described at
+# https://github.com/PyCQA/flake8-bugbear#list-of-warnings
+#  B019: Use of functools.lru_cache or functools.cache on methods can lead to memory leaks
+#  B023: Functions defined inside a loop must not use variables redefined in the loop
+#  B024: Abstract base class with no abstract method.
+
+ignore=W503,W504,E203,E731,E501,B019,B023,B024
diff --git a/changelog.d/14042.misc b/changelog.d/14042.misc
new file mode 100644
index 0000000000..868d55e76a
--- /dev/null
+++ b/changelog.d/14042.misc
@@ -0,0 +1 @@
+Bump flake8-bugbear from 21.3.2 to 22.9.23.
diff --git a/poetry.lock b/poetry.lock
index ed0b59fbe5..0a2f9ab69e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -260,7 +260,7 @@ pyflakes = ">=2.4.0,<2.5.0"
 
 [[package]]
 name = "flake8-bugbear"
-version = "21.3.2"
+version = "22.9.23"
 description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle."
 category = "dev"
 optional = false
@@ -271,7 +271,7 @@ attrs = ">=19.2.0"
 flake8 = ">=3.0.0"
 
 [package.extras]
-dev = ["black", "coverage", "hypothesis", "hypothesmith"]
+dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit"]
 
 [[package]]
 name = "flake8-comprehensions"
@@ -1826,8 +1826,8 @@ flake8 = [
     {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
 ]
 flake8-bugbear = [
-    {file = "flake8-bugbear-21.3.2.tar.gz", hash = "sha256:cadce434ceef96463b45a7c3000f23527c04ea4b531d16c7ac8886051f516ca0"},
-    {file = "flake8_bugbear-21.3.2-py36.py37.py38-none-any.whl", hash = "sha256:5d6ccb0c0676c738a6e066b4d50589c408dcc1c5bf1d73b464b18b73cd6c05c2"},
+    {file = "flake8-bugbear-22.9.23.tar.gz", hash = "sha256:17b9623325e6e0dcdcc80ed9e4aa811287fcc81d7e03313b8736ea5733759937"},
+    {file = "flake8_bugbear-22.9.23-py3-none-any.whl", hash = "sha256:cd2779b2b7ada212d7a322814a1e5651f1868ab0d3f24cc9da66169ab8fda474"},
 ]
 flake8-comprehensions = [
     {file = "flake8-comprehensions-3.8.0.tar.gz", hash = "sha256:8e108707637b1d13734f38e03435984f6b7854fa6b5a4e34f93e69534be8e521"},
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 2ed6ad754f..32e1e983a5 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -707,8 +707,8 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         # 250 users is pretty arbitrary but the data can be quite large if users
         # are in many rooms.
-        for user_ids in batch_iter(user_ids, 250):
-            all_user_rooms.update(await self._get_rooms_for_users(user_ids))
+        for batch_user_ids in batch_iter(user_ids, 250):
+            all_user_rooms.update(await self._get_rooms_for_users(batch_user_ids))
 
         return all_user_rooms
 
diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py
index 6425f851ea..bcb1cba362 100644
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@@ -395,8 +395,8 @@ class DeferredCache(Generic[KT, VT]):
             # _pending_deferred_cache.pop should either return a CacheEntry, or, in the
             # case of a TreeCache, a dict of keys to cache entries. Either way calling
             # iterate_tree_cache_entry on it will do the right thing.
-            for entry in iterate_tree_cache_entry(entry):
-                for cb in entry.get_invalidation_callbacks(key):
+            for iter_entry in iterate_tree_cache_entry(entry):
+                for cb in iter_entry.get_invalidation_callbacks(key):
                     cb()
 
     def invalidate_all(self) -> None:
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 0391966462..b3c748ef44 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -432,7 +432,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase):
         num_args = cached_method.num_args
 
         if num_args != self.num_args:
-            raise Exception(
+            raise TypeError(
                 "Number of args (%s) does not match underlying cache_method_name=%s (%s)."
                 % (self.num_args, self.cached_method_name, num_args)
             )
diff --git a/tests/federation/transport/test_client.py b/tests/federation/transport/test_client.py
index 0926e0583d..dd4d1b56de 100644
--- a/tests/federation/transport/test_client.py
+++ b/tests/federation/transport/test_client.py
@@ -17,6 +17,7 @@ from unittest.mock import Mock
 
 from synapse.api.room_versions import RoomVersions
 from synapse.federation.transport.client import SendJoinParser
+from synapse.util import ExceptionBundle
 
 from tests.unittest import TestCase
 
@@ -121,10 +122,8 @@ class SendJoinParserTestCase(TestCase):
         # Send half of the data to the parser
         parser.write(serialisation[: len(serialisation) // 2])
 
-        # Close the parser. There should be _some_ kind of exception, but it need not
-        # be that RuntimeError directly. E.g. we might want to raise a wrapper
-        # encompassing multiple errors from multiple coroutines.
-        with self.assertRaises(Exception):
+        # Close the parser. There should be _some_ kind of exception.
+        with self.assertRaises(ExceptionBundle):
             parser.finish()
 
         # In any case, we should have tried to close both coros.
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 90861fe522..78fd7b6961 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -1037,5 +1037,5 @@ class CachedListDescriptorTestCase(unittest.TestCase):
         obj = Cls()
 
         # Make sure this raises an error about the arg mismatch
-        with self.assertRaises(Exception):
+        with self.assertRaises(TypeError):
             obj.list_fn([("foo", "bar")])
-- 
cgit 1.5.1


From 70b33965065f0e93eaba68e371896149c9405f51 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 19 Oct 2022 15:39:43 -0500
Subject: Explain `SynapseError` and `FederationError` better (#14191)

Explain `SynapseError` and `FederationError` better

Spawning from https://github.com/matrix-org/synapse/pull/13816#discussion_r993262622
---
 changelog.d/14191.doc                   |  1 +
 synapse/api/errors.py                   | 24 +++++++++++++++++++++---
 synapse/federation/federation_server.py |  8 ++++++++
 3 files changed, 30 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14191.doc

(limited to 'synapse')

diff --git a/changelog.d/14191.doc b/changelog.d/14191.doc
new file mode 100644
index 0000000000..6b0eeb1ae1
--- /dev/null
+++ b/changelog.d/14191.doc
@@ -0,0 +1 @@
+Update docstrings of `SynapseError` and `FederationError` to bettter describe what they are used for and the effects of using them are.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index e0873b1913..400dd12aba 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -155,7 +155,13 @@ class RedirectException(CodeMessageException):
 
 class SynapseError(CodeMessageException):
     """A base exception type for matrix errors which have an errcode and error
-    message (as well as an HTTP status code).
+    message (as well as an HTTP status code). These often bubble all the way up to the
+    client API response so the error code and status often reach the client directly as
+    defined here. If the error doesn't make sense to present to a client, then it
+    probably shouldn't be a `SynapseError`. For example, if we contact another
+    homeserver over federation, we shouldn't automatically ferry response errors back to
+    the client on our end (a 500 from a remote server does not make sense to a client
+    when our server did not experience a 500).
 
     Attributes:
         errcode: Matrix error code e.g 'M_FORBIDDEN'
@@ -600,8 +606,20 @@ def cs_error(msg: str, code: str = Codes.UNKNOWN, **kwargs: Any) -> "JsonDict":
 
 
 class FederationError(RuntimeError):
-    """This class is used to inform remote homeservers about erroneous
-    PDUs they sent us.
+    """
+    Raised when we process an erroneous PDU.
+
+    There are two kinds of scenarios where this exception can be raised:
+
+    1. We may pull an invalid PDU from a remote homeserver (e.g. during backfill). We
+       raise this exception to signal an error to the rest of the application.
+    2. We may be pushed an invalid PDU as part of a `/send` transaction from a remote
+       homeserver. We raise so that we can respond to the transaction and include the
+       error string in the "PDU Processing Result". The message which will likely be
+       ignored by the remote homeserver and is not machine parse-able since it's just a
+       string.
+
+    TODO: In the future, we should split these usage scenarios into their own error types.
 
     FATAL: The remote server could not interpret the source event.
         (e.g., it was missing a required field)
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 28097664b4..59e351595b 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -481,6 +481,14 @@ class FederationServer(FederationBase):
                     pdu_results[pdu.event_id] = await process_pdu(pdu)
 
         async def process_pdu(pdu: EventBase) -> JsonDict:
+            """
+            Processes a pushed PDU sent to us via a `/send` transaction
+
+            Returns:
+                JsonDict representing a "PDU Processing Result" that will be bundled up
+                with the other processed PDU's in the `/send` transaction and sent back
+                to remote homeserver.
+            """
             event_id = pdu.event_id
             with nested_logging_context(event_id):
                 try:
-- 
cgit 1.5.1


From da2c93d4b69200c1ea9fb94ec3c951fd4b424864 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 20 Oct 2022 15:17:45 +0100
Subject: Stop returning `unsigned.invite_room_state` in `PUT
 /_matrix/federation/v2/invite/{roomId}/{eventId}` responses (#14064)

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/14064.bugfix                          | 1 +
 synapse/federation/transport/server/federation.py | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 changelog.d/14064.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14064.bugfix b/changelog.d/14064.bugfix
new file mode 100644
index 0000000000..cce6ef3b71
--- /dev/null
+++ b/changelog.d/14064.bugfix
@@ -0,0 +1 @@
+ Fix a long-standing bug where Synapse would accidentally include extra information in the response to [`PUT /_matrix/federation/v2/invite/{roomId}/{eventId}`](https://spec.matrix.org/v1.4/server-server-api/#put_matrixfederationv2inviteroomideventid).
\ No newline at end of file
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 6f11138b57..205fd16daa 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -499,6 +499,11 @@ class FederationV2InviteServlet(BaseFederationServerServlet):
         result = await self.handler.on_invite_request(
             origin, event, room_version_id=room_version
         )
+
+        # We only store invite_room_state for internal use, so remove it before
+        # returning the event to the remote homeserver.
+        result["event"].get("unsigned", {}).pop("invite_room_state", None)
+
         return 200, result
 
 
-- 
cgit 1.5.1


From 755bfeee3a1ac7077045ab9e5a994b6ca89afba3 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 20 Oct 2022 11:32:47 -0400
Subject: Use servlets for /key/ endpoints. (#14229)

To fix the response for unknown endpoints under that prefix.

See MSC3743.
---
 changelog.d/14229.misc                        |  1 +
 synapse/api/urls.py                           |  2 +-
 synapse/app/generic_worker.py                 | 20 +++-----
 synapse/app/homeserver.py                     | 26 ++++------
 synapse/rest/key/v2/__init__.py               | 19 ++++---
 synapse/rest/key/v2/local_key_resource.py     | 22 ++++----
 synapse/rest/key/v2/remote_key_resource.py    | 73 +++++++++++++++------------
 tests/app/test_openid_listener.py             |  2 +-
 tests/rest/key/v2/test_remote_key_resource.py |  4 +-
 9 files changed, 86 insertions(+), 83 deletions(-)
 create mode 100644 changelog.d/14229.misc

(limited to 'synapse')

diff --git a/changelog.d/14229.misc b/changelog.d/14229.misc
new file mode 100644
index 0000000000..b9cd9a34d5
--- /dev/null
+++ b/changelog.d/14229.misc
@@ -0,0 +1 @@
+Refactor `/key/` endpoints to use `RestServlet` classes.
diff --git a/synapse/api/urls.py b/synapse/api/urls.py
index bd49fa6a5f..a918579f50 100644
--- a/synapse/api/urls.py
+++ b/synapse/api/urls.py
@@ -28,7 +28,7 @@ FEDERATION_V1_PREFIX = FEDERATION_PREFIX + "/v1"
 FEDERATION_V2_PREFIX = FEDERATION_PREFIX + "/v2"
 FEDERATION_UNSTABLE_PREFIX = FEDERATION_PREFIX + "/unstable"
 STATIC_PREFIX = "/_matrix/static"
-SERVER_KEY_V2_PREFIX = "/_matrix/key/v2"
+SERVER_KEY_PREFIX = "/_matrix/key"
 MEDIA_R0_PREFIX = "/_matrix/media/r0"
 MEDIA_V3_PREFIX = "/_matrix/media/v3"
 LEGACY_MEDIA_PREFIX = "/_matrix/media/v1"
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index dc49840f73..2a9f039367 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -28,7 +28,7 @@ from synapse.api.urls import (
     LEGACY_MEDIA_PREFIX,
     MEDIA_R0_PREFIX,
     MEDIA_V3_PREFIX,
-    SERVER_KEY_V2_PREFIX,
+    SERVER_KEY_PREFIX,
 )
 from synapse.app import _base
 from synapse.app._base import (
@@ -89,7 +89,7 @@ from synapse.rest.client.register import (
     RegistrationTokenValidityRestServlet,
 )
 from synapse.rest.health import HealthResource
-from synapse.rest.key.v2 import KeyApiV2Resource
+from synapse.rest.key.v2 import KeyResource
 from synapse.rest.synapse.client import build_synapse_client_resource_tree
 from synapse.rest.well_known import well_known_resource
 from synapse.server import HomeServer
@@ -325,13 +325,13 @@ class GenericWorkerServer(HomeServer):
 
                     presence.register_servlets(self, resource)
 
-                    resources.update({CLIENT_API_PREFIX: resource})
+                    resources[CLIENT_API_PREFIX] = resource
 
                     resources.update(build_synapse_client_resource_tree(self))
-                    resources.update({"/.well-known": well_known_resource(self)})
+                    resources["/.well-known"] = well_known_resource(self)
 
                 elif name == "federation":
-                    resources.update({FEDERATION_PREFIX: TransportLayerServer(self)})
+                    resources[FEDERATION_PREFIX] = TransportLayerServer(self)
                 elif name == "media":
                     if self.config.media.can_load_media_repo:
                         media_repo = self.get_media_repository_resource()
@@ -359,16 +359,12 @@ class GenericWorkerServer(HomeServer):
                     # Only load the openid resource separately if federation resource
                     # is not specified since federation resource includes openid
                     # resource.
-                    resources.update(
-                        {
-                            FEDERATION_PREFIX: TransportLayerServer(
-                                self, servlet_groups=["openid"]
-                            )
-                        }
+                    resources[FEDERATION_PREFIX] = TransportLayerServer(
+                        self, servlet_groups=["openid"]
                     )
 
                 if name in ["keys", "federation"]:
-                    resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self)
+                    resources[SERVER_KEY_PREFIX] = KeyResource(self)
 
                 if name == "replication":
                     resources[REPLICATION_PREFIX] = ReplicationRestResource(self)
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 883f2fd2ec..de3f08876f 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -31,7 +31,7 @@ from synapse.api.urls import (
     LEGACY_MEDIA_PREFIX,
     MEDIA_R0_PREFIX,
     MEDIA_V3_PREFIX,
-    SERVER_KEY_V2_PREFIX,
+    SERVER_KEY_PREFIX,
     STATIC_PREFIX,
 )
 from synapse.app import _base
@@ -60,7 +60,7 @@ from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
 from synapse.rest import ClientRestResource
 from synapse.rest.admin import AdminRestResource
 from synapse.rest.health import HealthResource
-from synapse.rest.key.v2 import KeyApiV2Resource
+from synapse.rest.key.v2 import KeyResource
 from synapse.rest.synapse.client import build_synapse_client_resource_tree
 from synapse.rest.well_known import well_known_resource
 from synapse.server import HomeServer
@@ -215,30 +215,22 @@ class SynapseHomeServer(HomeServer):
             consent_resource: Resource = ConsentResource(self)
             if compress:
                 consent_resource = gz_wrap(consent_resource)
-            resources.update({"/_matrix/consent": consent_resource})
+            resources["/_matrix/consent"] = consent_resource
 
         if name == "federation":
             federation_resource: Resource = TransportLayerServer(self)
             if compress:
                 federation_resource = gz_wrap(federation_resource)
-            resources.update({FEDERATION_PREFIX: federation_resource})
+            resources[FEDERATION_PREFIX] = federation_resource
 
         if name == "openid":
-            resources.update(
-                {
-                    FEDERATION_PREFIX: TransportLayerServer(
-                        self, servlet_groups=["openid"]
-                    )
-                }
+            resources[FEDERATION_PREFIX] = TransportLayerServer(
+                self, servlet_groups=["openid"]
             )
 
         if name in ["static", "client"]:
-            resources.update(
-                {
-                    STATIC_PREFIX: StaticResource(
-                        os.path.join(os.path.dirname(synapse.__file__), "static")
-                    )
-                }
+            resources[STATIC_PREFIX] = StaticResource(
+                os.path.join(os.path.dirname(synapse.__file__), "static")
             )
 
         if name in ["media", "federation", "client"]:
@@ -257,7 +249,7 @@ class SynapseHomeServer(HomeServer):
                 )
 
         if name in ["keys", "federation"]:
-            resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self)
+            resources[SERVER_KEY_PREFIX] = KeyResource(self)
 
         if name == "metrics" and self.config.metrics.enable_metrics:
             metrics_resource: Resource = MetricsResource(RegistryProxy)
diff --git a/synapse/rest/key/v2/__init__.py b/synapse/rest/key/v2/__init__.py
index 7f8c1de1ff..26403facb8 100644
--- a/synapse/rest/key/v2/__init__.py
+++ b/synapse/rest/key/v2/__init__.py
@@ -14,17 +14,20 @@
 
 from typing import TYPE_CHECKING
 
-from twisted.web.resource import Resource
-
-from .local_key_resource import LocalKey
-from .remote_key_resource import RemoteKey
+from synapse.http.server import HttpServer, JsonResource
+from synapse.rest.key.v2.local_key_resource import LocalKey
+from synapse.rest.key.v2.remote_key_resource import RemoteKey
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
-class KeyApiV2Resource(Resource):
+class KeyResource(JsonResource):
     def __init__(self, hs: "HomeServer"):
-        Resource.__init__(self)
-        self.putChild(b"server", LocalKey(hs))
-        self.putChild(b"query", RemoteKey(hs))
+        super().__init__(hs, canonical_json=True)
+        self.register_servlets(self, hs)
+
+    @staticmethod
+    def register_servlets(http_server: HttpServer, hs: "HomeServer") -> None:
+        LocalKey(hs).register(http_server)
+        RemoteKey(hs).register(http_server)
diff --git a/synapse/rest/key/v2/local_key_resource.py b/synapse/rest/key/v2/local_key_resource.py
index 095993415c..d03e728d42 100644
--- a/synapse/rest/key/v2/local_key_resource.py
+++ b/synapse/rest/key/v2/local_key_resource.py
@@ -13,16 +13,15 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Optional
+import re
+from typing import TYPE_CHECKING, Optional, Tuple
 
-from canonicaljson import encode_canonical_json
 from signedjson.sign import sign_json
 from unpaddedbase64 import encode_base64
 
-from twisted.web.resource import Resource
+from twisted.web.server import Request
 
-from synapse.http.server import respond_with_json_bytes
-from synapse.http.site import SynapseRequest
+from synapse.http.servlet import RestServlet
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
@@ -31,7 +30,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class LocalKey(Resource):
+class LocalKey(RestServlet):
     """HTTP resource containing encoding the TLS X.509 certificate and NACL
     signature verification keys for this server::
 
@@ -61,18 +60,17 @@ class LocalKey(Resource):
         }
     """
 
-    isLeaf = True
+    PATTERNS = (re.compile("^/_matrix/key/v2/server(/(?P<key_id>[^/]*))?$"),)
 
     def __init__(self, hs: "HomeServer"):
         self.config = hs.config
         self.clock = hs.get_clock()
         self.update_response_body(self.clock.time_msec())
-        Resource.__init__(self)
 
     def update_response_body(self, time_now_msec: int) -> None:
         refresh_interval = self.config.key.key_refresh_interval
         self.valid_until_ts = int(time_now_msec + refresh_interval)
-        self.response_body = encode_canonical_json(self.response_json_object())
+        self.response_body = self.response_json_object()
 
     def response_json_object(self) -> JsonDict:
         verify_keys = {}
@@ -99,9 +97,11 @@ class LocalKey(Resource):
             json_object = sign_json(json_object, self.config.server.server_name, key)
         return json_object
 
-    def render_GET(self, request: SynapseRequest) -> Optional[int]:
+    def on_GET(
+        self, request: Request, key_id: Optional[str] = None
+    ) -> Tuple[int, JsonDict]:
         time_now = self.clock.time_msec()
         # Update the expiry time if less than half the interval remains.
         if time_now + self.config.key.key_refresh_interval / 2 > self.valid_until_ts:
             self.update_response_body(time_now)
-        return respond_with_json_bytes(request, 200, self.response_body)
+        return 200, self.response_body
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index 7f8ad29566..19820886f5 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -13,15 +13,20 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Dict, Set
+import re
+from typing import TYPE_CHECKING, Dict, Optional, Set, Tuple
 
 from signedjson.sign import sign_json
 
-from synapse.api.errors import Codes, SynapseError
+from twisted.web.server import Request
+
 from synapse.crypto.keyring import ServerKeyFetcher
-from synapse.http.server import DirectServeJsonResource, respond_with_json
-from synapse.http.servlet import parse_integer, parse_json_object_from_request
-from synapse.http.site import SynapseRequest
+from synapse.http.server import HttpServer
+from synapse.http.servlet import (
+    RestServlet,
+    parse_integer,
+    parse_json_object_from_request,
+)
 from synapse.types import JsonDict
 from synapse.util import json_decoder
 from synapse.util.async_helpers import yieldable_gather_results
@@ -32,7 +37,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class RemoteKey(DirectServeJsonResource):
+class RemoteKey(RestServlet):
     """HTTP resource for retrieving the TLS certificate and NACL signature
     verification keys for a collection of servers. Checks that the reported
     X.509 TLS certificate matches the one used in the HTTPS connection. Checks
@@ -88,11 +93,7 @@ class RemoteKey(DirectServeJsonResource):
     }
     """
 
-    isLeaf = True
-
     def __init__(self, hs: "HomeServer"):
-        super().__init__()
-
         self.fetcher = ServerKeyFetcher(hs)
         self.store = hs.get_datastores().main
         self.clock = hs.get_clock()
@@ -101,36 +102,48 @@ class RemoteKey(DirectServeJsonResource):
         )
         self.config = hs.config
 
-    async def _async_render_GET(self, request: SynapseRequest) -> None:
-        assert request.postpath is not None
-        if len(request.postpath) == 1:
-            (server,) = request.postpath
-            query: dict = {server.decode("ascii"): {}}
-        elif len(request.postpath) == 2:
-            server, key_id = request.postpath
+    def register(self, http_server: HttpServer) -> None:
+        http_server.register_paths(
+            "GET",
+            (
+                re.compile(
+                    "^/_matrix/key/v2/query/(?P<server>[^/]*)(/(?P<key_id>[^/]*))?$"
+                ),
+            ),
+            self.on_GET,
+            self.__class__.__name__,
+        )
+        http_server.register_paths(
+            "POST",
+            (re.compile("^/_matrix/key/v2/query$"),),
+            self.on_POST,
+            self.__class__.__name__,
+        )
+
+    async def on_GET(
+        self, request: Request, server: str, key_id: Optional[str] = None
+    ) -> Tuple[int, JsonDict]:
+        if server and key_id:
             minimum_valid_until_ts = parse_integer(request, "minimum_valid_until_ts")
             arguments = {}
             if minimum_valid_until_ts is not None:
                 arguments["minimum_valid_until_ts"] = minimum_valid_until_ts
-            query = {server.decode("ascii"): {key_id.decode("ascii"): arguments}}
+            query = {server: {key_id: arguments}}
         else:
-            raise SynapseError(404, "Not found %r" % request.postpath, Codes.NOT_FOUND)
+            query = {server: {}}
 
-        await self.query_keys(request, query, query_remote_on_cache_miss=True)
+        return 200, await self.query_keys(query, query_remote_on_cache_miss=True)
 
-    async def _async_render_POST(self, request: SynapseRequest) -> None:
+    async def on_POST(self, request: Request) -> Tuple[int, JsonDict]:
         content = parse_json_object_from_request(request)
 
         query = content["server_keys"]
 
-        await self.query_keys(request, query, query_remote_on_cache_miss=True)
+        return 200, await self.query_keys(query, query_remote_on_cache_miss=True)
 
     async def query_keys(
-        self,
-        request: SynapseRequest,
-        query: JsonDict,
-        query_remote_on_cache_miss: bool = False,
-    ) -> None:
+        self, query: JsonDict, query_remote_on_cache_miss: bool = False
+    ) -> JsonDict:
         logger.info("Handling query for keys %r", query)
 
         store_queries = []
@@ -232,7 +245,7 @@ class RemoteKey(DirectServeJsonResource):
                     for server_name, keys in cache_misses.items()
                 ),
             )
-            await self.query_keys(request, query, query_remote_on_cache_miss=False)
+            return await self.query_keys(query, query_remote_on_cache_miss=False)
         else:
             signed_keys = []
             for key_json_raw in json_results:
@@ -244,6 +257,4 @@ class RemoteKey(DirectServeJsonResource):
 
                 signed_keys.append(key_json)
 
-            response = {"server_keys": signed_keys}
-
-            respond_with_json(request, 200, response, canonical_json=True)
+            return {"server_keys": signed_keys}
diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py
index c7dae58eb5..8d03da7f96 100644
--- a/tests/app/test_openid_listener.py
+++ b/tests/app/test_openid_listener.py
@@ -79,7 +79,7 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase):
         self.assertEqual(channel.code, 401)
 
 
-@patch("synapse.app.homeserver.KeyApiV2Resource", new=Mock())
+@patch("synapse.app.homeserver.KeyResource", new=Mock())
 class SynapseHomeserverOpenIDListenerTests(HomeserverTestCase):
     def make_homeserver(self, reactor, clock):
         hs = self.setup_test_homeserver(
diff --git a/tests/rest/key/v2/test_remote_key_resource.py b/tests/rest/key/v2/test_remote_key_resource.py
index ac0ac06b7e..7f1fba1086 100644
--- a/tests/rest/key/v2/test_remote_key_resource.py
+++ b/tests/rest/key/v2/test_remote_key_resource.py
@@ -26,7 +26,7 @@ from twisted.web.resource import NoResource, Resource
 
 from synapse.crypto.keyring import PerspectivesKeyFetcher
 from synapse.http.site import SynapseRequest
-from synapse.rest.key.v2 import KeyApiV2Resource
+from synapse.rest.key.v2 import KeyResource
 from synapse.server import HomeServer
 from synapse.storage.keys import FetchKeyResult
 from synapse.types import JsonDict
@@ -46,7 +46,7 @@ class BaseRemoteKeyResourceTestCase(unittest.HomeserverTestCase):
 
     def create_test_resource(self) -> Resource:
         return create_resource_tree(
-            {"/_matrix/key/v2": KeyApiV2Resource(self.hs)}, root_resource=NoResource()
+            {"/_matrix/key/v2": KeyResource(self.hs)}, root_resource=NoResource()
         )
 
     def expect_outgoing_key_request(
-- 
cgit 1.5.1


From fab495a9e1442d99e922367f65f41de5eaa488eb Mon Sep 17 00:00:00 2001
From: "DeepBlueV7.X" <nicolas.werner@hotmail.de>
Date: Fri, 21 Oct 2022 08:49:47 +0000
Subject: Fix event size checks (#13710)

---
 changelog.d/13710.bugfix |  1 +
 synapse/event_auth.py    | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/13710.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13710.bugfix b/changelog.d/13710.bugfix
new file mode 100644
index 0000000000..4c318d15f5
--- /dev/null
+++ b/changelog.d/13710.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where Synapse would count codepoints instead of bytes when validating the size of some fields.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index bab31e33c5..5036604036 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -342,15 +342,15 @@ def check_state_dependent_auth_rules(
 
 
 def _check_size_limits(event: "EventBase") -> None:
-    if len(event.user_id) > 255:
+    if len(event.user_id.encode("utf-8")) > 255:
         raise EventSizeError("'user_id' too large")
-    if len(event.room_id) > 255:
+    if len(event.room_id.encode("utf-8")) > 255:
         raise EventSizeError("'room_id' too large")
-    if event.is_state() and len(event.state_key) > 255:
+    if event.is_state() and len(event.state_key.encode("utf-8")) > 255:
         raise EventSizeError("'state_key' too large")
-    if len(event.type) > 255:
+    if len(event.type.encode("utf-8")) > 255:
         raise EventSizeError("'type' too large")
-    if len(event.event_id) > 255:
+    if len(event.event_id.encode("utf-8")) > 255:
         raise EventSizeError("'event_id' too large")
     if len(encode_canonical_json(event.get_pdu_json())) > MAX_PDU_SIZE:
         raise EventSizeError("event too large")
-- 
cgit 1.5.1


From 1433b5d5b64c3a6624e6e4ff4fef22127c49df86 Mon Sep 17 00:00:00 2001
From: Tadeusz Sośnierz <tadzik@tadzik.net>
Date: Fri, 21 Oct 2022 14:52:44 +0200
Subject: Show erasure status when listing users in the Admin API (#14205)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Show erasure status when listing users in the Admin API

* Use USING when joining erased_users

* Add changelog entry

* Revert "Use USING when joining erased_users"

This reverts commit 30bd2bf106415caadcfdbdd1b234ef2b106cc394.

* Make the erased check work on postgres

* Add a testcase for showing erased user status

* Appease the style linter

* Explicitly convert `erased` to bool to make SQLite consistent with Postgres

This also adds us an easy way in to fix the other accidentally integered columns.

* Move erasure status test to UsersListTestCase

* Include user erased status when fetching user info via the admin API

* Document the erase status in user_admin_api

* Appease the linter and mypy

* Signpost comments in tests

Co-authored-by: Tadeusz Sośnierz <tadeusz@sosnierz.com>
Co-authored-by: David Robertson <david.m.robertson1@gmail.com>
---
 changelog.d/14205.feature                  |  1 +
 docs/admin_api/user_admin_api.md           |  4 ++++
 synapse/handlers/admin.py                  |  1 +
 synapse/storage/databases/main/__init__.py | 13 +++++++++--
 tests/rest/admin/test_user.py              | 35 +++++++++++++++++++++++++++++-
 5 files changed, 51 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14205.feature

(limited to 'synapse')

diff --git a/changelog.d/14205.feature b/changelog.d/14205.feature
new file mode 100644
index 0000000000..6692063352
--- /dev/null
+++ b/changelog.d/14205.feature
@@ -0,0 +1 @@
+Show erasure status when listing users in the Admin API.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 3625c7b6c5..c95d6c9b05 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -37,6 +37,7 @@ It returns a JSON body like the following:
     "is_guest": 0,
     "admin": 0,
     "deactivated": 0,
+    "erased": false,
     "shadow_banned": 0,
     "creation_ts": 1560432506,
     "appservice_id": null,
@@ -167,6 +168,7 @@ A response body like the following is returned:
             "admin": 0,
             "user_type": null,
             "deactivated": 0,
+            "erased": false,
             "shadow_banned": 0,
             "displayname": "<User One>",
             "avatar_url": null,
@@ -177,6 +179,7 @@ A response body like the following is returned:
             "admin": 1,
             "user_type": null,
             "deactivated": 0,
+            "erased": false,
             "shadow_banned": 0,
             "displayname": "<User Two>",
             "avatar_url": "<avatar_url>",
@@ -247,6 +250,7 @@ The following fields are returned in the JSON response body:
   - `user_type` - string - Type of the user. Normal users are type `None`.
     This allows user type specific behaviour. There are also types `support` and `bot`. 
   - `deactivated` - bool - Status if that user has been marked as deactivated.
+  - `erased` - bool - Status if that user has been marked as erased.
   - `shadow_banned` - bool - Status if that user has been marked as shadow banned.
   - `displayname` - string - The user's display name if they have set one.
   - `avatar_url` - string -  The user's avatar URL if they have set one.
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index f2989cc4a2..5bf8e86387 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -100,6 +100,7 @@ class AdminHandler:
         user_info_dict["avatar_url"] = profile.avatar_url
         user_info_dict["threepids"] = threepids
         user_info_dict["external_ids"] = external_ids
+        user_info_dict["erased"] = await self.store.is_user_erased(user.to_string())
 
         return user_info_dict
 
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index a62b4abd4e..cfaedf5e0c 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -201,7 +201,7 @@ class DataStore(
         name: Optional[str] = None,
         guests: bool = True,
         deactivated: bool = False,
-        order_by: str = UserSortOrder.USER_ID.value,
+        order_by: str = UserSortOrder.NAME.value,
         direction: str = "f",
         approved: bool = True,
     ) -> Tuple[List[JsonDict], int]:
@@ -261,6 +261,7 @@ class DataStore(
             sql_base = f"""
                 FROM users as u
                 LEFT JOIN profiles AS p ON u.name = '@' || p.user_id || ':' || ?
+                LEFT JOIN erased_users AS eu ON u.name = eu.user_id
                 {where_clause}
                 """
             sql = "SELECT COUNT(*) as total_users " + sql_base
@@ -269,7 +270,8 @@ class DataStore(
 
             sql = f"""
                 SELECT name, user_type, is_guest, admin, deactivated, shadow_banned,
-                displayname, avatar_url, creation_ts * 1000 as creation_ts, approved
+                displayname, avatar_url, creation_ts * 1000 as creation_ts, approved,
+                eu.user_id is not null as erased
                 {sql_base}
                 ORDER BY {order_by_column} {order}, u.name ASC
                 LIMIT ? OFFSET ?
@@ -277,6 +279,13 @@ class DataStore(
             args += [limit, start]
             txn.execute(sql, args)
             users = self.db_pool.cursor_to_dict(txn)
+
+            # some of those boolean values are returned as integers when we're on SQLite
+            columns_to_boolify = ["erased"]
+            for user in users:
+                for column in columns_to_boolify:
+                    user[column] = bool(user[column])
+
             return users, count
 
         return await self.db_pool.runInteraction(
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 4c1ce33463..63410ffdf1 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -31,7 +31,7 @@ from synapse.api.room_versions import RoomVersions
 from synapse.rest.client import devices, login, logout, profile, register, room, sync
 from synapse.rest.media.v1.filepath import MediaFilePaths
 from synapse.server import HomeServer
-from synapse.types import JsonDict, UserID
+from synapse.types import JsonDict, UserID, create_requester
 from synapse.util import Clock
 
 from tests import unittest
@@ -924,6 +924,36 @@ class UsersListTestCase(unittest.HomeserverTestCase):
         self.assertEqual(1, len(non_admin_user_ids), non_admin_user_ids)
         self.assertEqual(not_approved_user, non_admin_user_ids[0])
 
+    def test_erasure_status(self) -> None:
+        # Create a new user.
+        user_id = self.register_user("eraseme", "eraseme")
+
+        # They should appear in the list users API, marked as not erased.
+        channel = self.make_request(
+            "GET",
+            self.url + "?deactivated=true",
+            access_token=self.admin_user_tok,
+        )
+        users = {user["name"]: user for user in channel.json_body["users"]}
+        self.assertIs(users[user_id]["erased"], False)
+
+        # Deactivate that user, requesting erasure.
+        deactivate_account_handler = self.hs.get_deactivate_account_handler()
+        self.get_success(
+            deactivate_account_handler.deactivate_account(
+                user_id, erase_data=True, requester=create_requester(user_id)
+            )
+        )
+
+        # Repeat the list users query. They should now be marked as erased.
+        channel = self.make_request(
+            "GET",
+            self.url + "?deactivated=true",
+            access_token=self.admin_user_tok,
+        )
+        users = {user["name"]: user for user in channel.json_body["users"]}
+        self.assertIs(users[user_id]["erased"], True)
+
     def _order_test(
         self,
         expected_user_list: List[str],
@@ -1195,6 +1225,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
         self.assertEqual("foo@bar.com", channel.json_body["threepids"][0]["address"])
         self.assertEqual("mxc://servername/mediaid", channel.json_body["avatar_url"])
         self.assertEqual("User1", channel.json_body["displayname"])
+        self.assertFalse(channel.json_body["erased"])
 
         # Deactivate and erase user
         channel = self.make_request(
@@ -1219,6 +1250,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
         self.assertEqual(0, len(channel.json_body["threepids"]))
         self.assertIsNone(channel.json_body["avatar_url"])
         self.assertIsNone(channel.json_body["displayname"])
+        self.assertTrue(channel.json_body["erased"])
 
         self._is_erased("@user:test", True)
 
@@ -2757,6 +2789,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertIn("avatar_url", content)
         self.assertIn("admin", content)
         self.assertIn("deactivated", content)
+        self.assertIn("erased", content)
         self.assertIn("shadow_banned", content)
         self.assertIn("creation_ts", content)
         self.assertIn("appservice_id", content)
-- 
cgit 1.5.1


From 4dd7aa371b6bc746fa4b0a9af220b2013b17a45d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 21 Oct 2022 09:11:19 -0400
Subject: Properly update the threads table when thread events are redacted.
 (#14248)

When the last event in a thread is redacted we need to update
the threads table:

* Find the new latest event in the thread and store it into the table; or
* Remove the thread from the table if it is no longer a thread (i.e. all
  events in the thread were redacted).
---
 changelog.d/14248.bugfix                 |   1 +
 synapse/storage/databases/main/events.py |  61 ++++++++++++++---
 tests/rest/client/test_relations.py      | 110 +++++++++++++++++++++----------
 3 files changed, 129 insertions(+), 43 deletions(-)
 create mode 100644 changelog.d/14248.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14248.bugfix b/changelog.d/14248.bugfix
new file mode 100644
index 0000000000..203c52c16b
--- /dev/null
+++ b/changelog.d/14248.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0rc1 where the information returned from the `/threads` API could be stale when threaded events are redacted.
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 6698cbf664..00880bb37d 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2028,25 +2028,37 @@ class PersistEventsStore:
             redacted_event_id: The event that was redacted.
         """
 
-        # Fetch the current relation of the event being redacted.
-        redacted_relates_to = self.db_pool.simple_select_one_onecol_txn(
+        # Fetch the relation of the event being redacted.
+        row = self.db_pool.simple_select_one_txn(
             txn,
             table="event_relations",
             keyvalues={"event_id": redacted_event_id},
-            retcol="relates_to_id",
+            retcols=("relates_to_id", "relation_type"),
             allow_none=True,
         )
+        # Nothing to do if no relation is found.
+        if row is None:
+            return
+
+        redacted_relates_to = row["relates_to_id"]
+        rel_type = row["relation_type"]
+        self.db_pool.simple_delete_txn(
+            txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
+        )
+
         # Any relation information for the related event must be cleared.
-        if redacted_relates_to is not None:
-            self.store._invalidate_cache_and_stream(
-                txn, self.store.get_relations_for_event, (redacted_relates_to,)
-            )
+        self.store._invalidate_cache_and_stream(
+            txn, self.store.get_relations_for_event, (redacted_relates_to,)
+        )
+        if rel_type == RelationTypes.ANNOTATION:
             self.store._invalidate_cache_and_stream(
                 txn, self.store.get_aggregation_groups_for_event, (redacted_relates_to,)
             )
+        if rel_type == RelationTypes.REPLACE:
             self.store._invalidate_cache_and_stream(
                 txn, self.store.get_applicable_edit, (redacted_relates_to,)
             )
+        if rel_type == RelationTypes.THREAD:
             self.store._invalidate_cache_and_stream(
                 txn, self.store.get_thread_summary, (redacted_relates_to,)
             )
@@ -2057,9 +2069,38 @@ class PersistEventsStore:
                 txn, self.store.get_threads, (room_id,)
             )
 
-        self.db_pool.simple_delete_txn(
-            txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
-        )
+            # Find the new latest event in the thread.
+            sql = """
+            SELECT event_id, topological_ordering, stream_ordering
+            FROM event_relations
+            INNER JOIN events USING (event_id)
+            WHERE relates_to_id = ? AND relation_type = ?
+            ORDER BY topological_ordering DESC, stream_ordering DESC
+            LIMIT 1
+            """
+            txn.execute(sql, (redacted_relates_to, RelationTypes.THREAD))
+
+            # If a latest event is found, update the threads table, this might
+            # be the same current latest event (if an earlier event in the thread
+            # was redacted).
+            latest_event_row = txn.fetchone()
+            if latest_event_row:
+                self.db_pool.simple_upsert_txn(
+                    txn,
+                    table="threads",
+                    keyvalues={"room_id": room_id, "thread_id": redacted_relates_to},
+                    values={
+                        "latest_event_id": latest_event_row[0],
+                        "topological_ordering": latest_event_row[1],
+                        "stream_ordering": latest_event_row[2],
+                    },
+                )
+
+            # Otherwise, delete the thread: it no longer exists.
+            else:
+                self.db_pool.simple_delete_one_txn(
+                    txn, table="threads", keyvalues={"thread_id": redacted_relates_to}
+                )
 
     def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase) -> None:
         if isinstance(event.content.get("topic"), str):
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index ddf315b894..e3d801f7a8 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -1523,6 +1523,26 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         )
         self.assertEqual(200, channel.code, channel.json_body)
 
+    def _get_threads(self) -> List[Tuple[str, str]]:
+        """Request the threads in the room and returns a list of thread ID and latest event ID."""
+        # Request the threads in the room.
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/v1/rooms/{self.room}/threads",
+            access_token=self.user_token,
+        )
+        self.assertEquals(200, channel.code, channel.json_body)
+        threads = channel.json_body["chunk"]
+        return [
+            (
+                t["event_id"],
+                t["unsigned"]["m.relations"][RelationTypes.THREAD]["latest_event"][
+                    "event_id"
+                ],
+            )
+            for t in threads
+        ]
+
     def test_redact_relation_annotation(self) -> None:
         """
         Test that annotations of an event are properly handled after the
@@ -1567,58 +1587,82 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         The redacted event should not be included in bundled aggregations or
         the response to relations.
         """
-        channel = self._send_relation(
-            RelationTypes.THREAD,
-            EventTypes.Message,
-            content={"body": "reply 1", "msgtype": "m.text"},
-        )
-        unredacted_event_id = channel.json_body["event_id"]
+        # Create a thread with a few events in it.
+        thread_replies = []
+        for i in range(3):
+            channel = self._send_relation(
+                RelationTypes.THREAD,
+                EventTypes.Message,
+                content={"body": f"reply {i}", "msgtype": "m.text"},
+            )
+            thread_replies.append(channel.json_body["event_id"])
 
-        # Note that the *last* event in the thread is redacted, as that gets
-        # included in the bundled aggregation.
-        channel = self._send_relation(
-            RelationTypes.THREAD,
-            EventTypes.Message,
-            content={"body": "reply 2", "msgtype": "m.text"},
+        ##################################################
+        # Check the test data is configured as expected. #
+        ##################################################
+        self.assertEquals(self._get_related_events(), list(reversed(thread_replies)))
+        relations = self._get_bundled_aggregations()
+        self.assertDictContainsSubset(
+            {"count": 3, "current_user_participated": True},
+            relations[RelationTypes.THREAD],
+        )
+        # The latest event is the last sent event.
+        self.assertEqual(
+            relations[RelationTypes.THREAD]["latest_event"]["event_id"],
+            thread_replies[-1],
         )
-        to_redact_event_id = channel.json_body["event_id"]
 
-        # Both relations exist.
-        event_ids = self._get_related_events()
+        # There should be one thread, the latest event is the event that will be redacted.
+        self.assertEqual(self._get_threads(), [(self.parent_id, thread_replies[-1])])
+
+        ##########################
+        # Redact the last event. #
+        ##########################
+        self._redact(thread_replies.pop())
+
+        # The thread should still exist, but the latest event should be updated.
+        self.assertEquals(self._get_related_events(), list(reversed(thread_replies)))
         relations = self._get_bundled_aggregations()
-        self.assertEquals(event_ids, [to_redact_event_id, unredacted_event_id])
         self.assertDictContainsSubset(
-            {
-                "count": 2,
-                "current_user_participated": True,
-            },
+            {"count": 2, "current_user_participated": True},
             relations[RelationTypes.THREAD],
         )
-        # And the latest event returned is the event that will be redacted.
+        # And the latest event is the last unredacted event.
         self.assertEqual(
             relations[RelationTypes.THREAD]["latest_event"]["event_id"],
-            to_redact_event_id,
+            thread_replies[-1],
         )
+        self.assertEqual(self._get_threads(), [(self.parent_id, thread_replies[-1])])
 
-        # Redact one of the reactions.
-        self._redact(to_redact_event_id)
+        ###########################################
+        # Redact the *first* event in the thread. #
+        ###########################################
+        self._redact(thread_replies.pop(0))
 
-        # The unredacted relation should still exist.
-        event_ids = self._get_related_events()
+        # Nothing should have changed (except the thread count).
+        self.assertEquals(self._get_related_events(), thread_replies)
         relations = self._get_bundled_aggregations()
-        self.assertEquals(event_ids, [unredacted_event_id])
         self.assertDictContainsSubset(
-            {
-                "count": 1,
-                "current_user_participated": True,
-            },
+            {"count": 1, "current_user_participated": True},
             relations[RelationTypes.THREAD],
         )
-        # And the latest event is now the unredacted event.
+        # And the latest event is the last unredacted event.
         self.assertEqual(
             relations[RelationTypes.THREAD]["latest_event"]["event_id"],
-            unredacted_event_id,
+            thread_replies[-1],
         )
+        self.assertEqual(self._get_threads(), [(self.parent_id, thread_replies[-1])])
+
+        ####################################
+        # Redact the last remaining event. #
+        ####################################
+        self._redact(thread_replies.pop(0))
+        self.assertEquals(thread_replies, [])
+
+        # The event should no longer be considered a thread.
+        self.assertEquals(self._get_related_events(), [])
+        self.assertEquals(self._get_bundled_aggregations(), {})
+        self.assertEqual(self._get_threads(), [])
 
     def test_redact_parent_edit(self) -> None:
         """Test that edits of an event are redacted when the original event
-- 
cgit 1.5.1


From d24346f53055eae7fb8e9038ef35fa843790742b Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 21 Oct 2022 16:03:44 +0100
Subject: Fix logging error on SIGHUP (#14258)

---
 changelog.d/14258.bugfix | 2 ++
 synapse/app/_base.py     | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14258.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14258.bugfix b/changelog.d/14258.bugfix
new file mode 100644
index 0000000000..de97945844
--- /dev/null
+++ b/changelog.d/14258.bugfix
@@ -0,0 +1,2 @@
+Fix a bug introduced in Synapse 1.60.0 which caused an error to be logged when Synapse received a SIGHUP signal, and debug logging was enabled.
+
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 000912e86e..a683ebf4cb 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -558,7 +558,7 @@ def reload_cache_config(config: HomeServerConfig) -> None:
             logger.warning(f)
     else:
         logger.debug(
-            "New cache config. Was:\n %s\nNow:\n",
+            "New cache config. Was:\n %s\nNow:\n %s",
             previous_cache_config.__dict__,
             config.caches.__dict__,
         )
-- 
cgit 1.5.1


From 1d45ad8b2ab1c41dd489ccd581d027077bc917e5 Mon Sep 17 00:00:00 2001
From: Germain <germain@souquet.com>
Date: Fri, 21 Oct 2022 18:44:00 +0100
Subject: Improve aesthetics and reusability of HTML templates. (#13652)

Use a base template to create a cohesive feel across the HTML
templates provided by Synapse.

Adds basic styling to the base template for a more user-friendly
look and feel.
---
 changelog.d/13652.feature                          |   1 +
 synapse/res/templates/_base.html                   |  29 ++
 .../res/templates/account_previously_renewed.html  |  18 +-
 synapse/res/templates/account_renewed.html         |  18 +-
 synapse/res/templates/add_threepid.html            |  22 +-
 synapse/res/templates/add_threepid_failure.html    |  20 +-
 synapse/res/templates/add_threepid_success.html    |  18 +-
 synapse/res/templates/auth_success.html            |  28 +-
 synapse/res/templates/invalid_token.html           |  17 +-
 synapse/res/templates/notice_expiry.html           |  93 +++---
 synapse/res/templates/notif_mail.html              | 116 ++++---
 synapse/res/templates/password_reset.html          |  19 +-
 .../res/templates/password_reset_confirmation.html |  14 +-
 synapse/res/templates/password_reset_failure.html  |  14 +-
 synapse/res/templates/password_reset_success.html  |  12 +-
 synapse/res/templates/recaptcha.html               |  19 +-
 synapse/res/templates/registration.html            |  21 +-
 synapse/res/templates/registration_failure.html    |  12 +-
 synapse/res/templates/registration_success.html    |  13 +-
 synapse/res/templates/registration_token.html      |  16 +-
 synapse/res/templates/sso_account_deactivated.html |  49 ++-
 .../res/templates/sso_auth_account_details.html    | 372 ++++++++++-----------
 synapse/res/templates/sso_auth_bad_user.html       |  52 ++-
 synapse/res/templates/sso_auth_confirm.html        |  56 ++--
 synapse/res/templates/sso_auth_success.html        |  54 ++-
 synapse/res/templates/sso_error.html               |  34 +-
 synapse/res/templates/sso_login_idp_picker.html    | 114 +++----
 synapse/res/templates/sso_new_user_consent.html    |  60 ++--
 synapse/res/templates/sso_redirect_confirm.html    |  75 ++---
 synapse/res/templates/style.css                    |  29 ++
 synapse/res/templates/terms.html                   |  16 +-
 31 files changed, 691 insertions(+), 740 deletions(-)
 create mode 100644 changelog.d/13652.feature
 create mode 100644 synapse/res/templates/_base.html
 create mode 100644 synapse/res/templates/style.css

(limited to 'synapse')

diff --git a/changelog.d/13652.feature b/changelog.d/13652.feature
new file mode 100644
index 0000000000..bc7f2926dc
--- /dev/null
+++ b/changelog.d/13652.feature
@@ -0,0 +1 @@
+Improve aesthetics of HTML templates. Note that these changes do not retroactively apply to templates which have been [customised](https://matrix-org.github.io/synapse/latest/templates.html#templates) by server admins.
\ No newline at end of file
diff --git a/synapse/res/templates/_base.html b/synapse/res/templates/_base.html
new file mode 100644
index 0000000000..46439fce6a
--- /dev/null
+++ b/synapse/res/templates/_base.html
@@ -0,0 +1,29 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{% block title %}{% endblock %}</title>
+    <style type="text/css">
+      {%- include 'style.css' without context %}
+    </style>
+    {% block header %}{%  endblock %}
+</head>
+<body>
+<header class="mx_Header">
+    {% if app_name == "Riot" %}
+        <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
+    {% elif app_name == "Vector" %}
+        <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
+    {% elif app_name == "Element" %}
+        <img src="https://static.element.io/images/email-logo.png" width="83" height="83" alt="[Element]"/>
+    {% else %}
+        <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
+    {% endif %}
+</header>
+
+{% block body %}{% endblock %}
+
+</body>
+</html>
diff --git a/synapse/res/templates/account_previously_renewed.html b/synapse/res/templates/account_previously_renewed.html
index bd4f7cea97..91582a8af0 100644
--- a/synapse/res/templates/account_previously_renewed.html
+++ b/synapse/res/templates/account_previously_renewed.html
@@ -1,12 +1,6 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Your account is valid until {{ expiration_ts|format_ts("%d-%m-%Y") }}.</title>
-</head>
-<body>
-    Your account is valid until {{ expiration_ts|format_ts("%d-%m-%Y") }}.
-</body>
-</html>
\ No newline at end of file
+{% extends "_base.html" %}
+{% block title %}Your account is valid until {{ expiration_ts|format_ts("%d-%m-%Y") }}.{% endblock %}
+
+{% block body %}
+<p>Your account is valid until {{ expiration_ts|format_ts("%d-%m-%Y") }}.</p>
+{% endblock %}
diff --git a/synapse/res/templates/account_renewed.html b/synapse/res/templates/account_renewed.html
index 57b319f375..18a57833f1 100644
--- a/synapse/res/templates/account_renewed.html
+++ b/synapse/res/templates/account_renewed.html
@@ -1,12 +1,6 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Your account has been successfully renewed and is valid until {{ expiration_ts|format_ts("%d-%m-%Y") }}.</title>
-</head>
-<body>
-    Your account has been successfully renewed and is valid until {{ expiration_ts|format_ts("%d-%m-%Y") }}.
-</body>
-</html>
\ No newline at end of file
+{% extends "_base.html" %}
+{% block title %}Your account has been successfully renewed and is valid until {{ expiration_ts|format_ts("%d-%m-%Y") }}.{% endblock %}
+
+{% block body %}
+<p>Your account has been successfully renewed and is valid until {{ expiration_ts|format_ts("%d-%m-%Y") }}.</p>
+{% endblock %}
diff --git a/synapse/res/templates/add_threepid.html b/synapse/res/templates/add_threepid.html
index 71f2215b7a..33c883936a 100644
--- a/synapse/res/templates/add_threepid.html
+++ b/synapse/res/templates/add_threepid.html
@@ -1,14 +1,8 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Request to add an email address to your Matrix account</title>
-</head>
-<body>
-    <p>A request to add an email address to your Matrix account has been received. If this was you, please click the link below to confirm adding this email:</p>
-    <a href="{{ link }}">{{ link }}</a>
-    <p>If this was not you, you can safely ignore this email. Thank you.</p>
-</body>
-</html>
+{% extends "_base.html" %}
+{% block title %}Request to add an email address to your Matrix account{% endblock %}
+
+{% block body %}
+<p>A request to add an email address to your Matrix account has been received. If this was you, please click the link below to confirm adding this email:</p>
+<a href="{{ link }}">{{ link }}</a>
+<p>If this was not you, you can safely ignore this email. Thank you.</p>
+{% endblock %}
diff --git a/synapse/res/templates/add_threepid_failure.html b/synapse/res/templates/add_threepid_failure.html
index bd627ee9ce..f6d7e33825 100644
--- a/synapse/res/templates/add_threepid_failure.html
+++ b/synapse/res/templates/add_threepid_failure.html
@@ -1,13 +1,7 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Request failed</title>
-</head>
-<body>
-    <p>The request failed for the following reason: {{ failure_reason }}.</p>
-    <p>No changes have been made to your account.</p>
-</body>
-</html>
+{% extends "_base.html" %}
+{% block title %}Request failed{% endblock %}
+
+{% block body %}
+<p>The request failed for the following reason: {{ failure_reason }}.</p>
+<p>No changes have been made to your account.</p>
+{% endblock %}
diff --git a/synapse/res/templates/add_threepid_success.html b/synapse/res/templates/add_threepid_success.html
index 49170c138e..6d45111796 100644
--- a/synapse/res/templates/add_threepid_success.html
+++ b/synapse/res/templates/add_threepid_success.html
@@ -1,12 +1,6 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Your email has now been validated</title>
-</head>
-<body>
-    <p>Your email has now been validated, please return to your client. You may now close this window.</p>
-</body>
-</html>
\ No newline at end of file
+{% extends "_base.html" %}
+{% block title %}Your email has now been validated{% endblock %}
+
+{% block body %}
+<p>Your email has now been validated, please return to your client. You may now close this window.</p>
+{% endblock %}
diff --git a/synapse/res/templates/auth_success.html b/synapse/res/templates/auth_success.html
index 2d6ac44a0e..9178332f59 100644
--- a/synapse/res/templates/auth_success.html
+++ b/synapse/res/templates/auth_success.html
@@ -1,21 +1,21 @@
-<html>
-<head>
-<title>Success!</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+{% extends "_base.html" %}
+{% block title %}Success!{% endblock %}
+
+{% block header %}
 <link rel="stylesheet" href="/_matrix/static/client/register/style.css">
 <script>
 if (window.onAuthDone) {
     window.onAuthDone();
 } else if (window.opener && window.opener.postMessage) {
-     window.opener.postMessage("authDone", "*");
+    window.opener.postMessage("authDone", "*");
 }
 </script>
-</head>
-<body>
-    <div>
-        <p>Thank you</p>
-        <p>You may now close this window and return to the application</p>
-    </div>
-</body>
-</html>
+{% endblock %}
+
+{% block body %}
+<div>
+    <p>Thank you</p>
+    <p>You may now close this window and return to the application</p>
+</div>
+
+{% endblock %}
diff --git a/synapse/res/templates/invalid_token.html b/synapse/res/templates/invalid_token.html
index 2c7c384fe3..d0b1dae669 100644
--- a/synapse/res/templates/invalid_token.html
+++ b/synapse/res/templates/invalid_token.html
@@ -1,12 +1,5 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Invalid renewal token.</title>
-</head>
-<body>
-    Invalid renewal token.
-</body>
-</html>
+{% block title %}Invalid renewal token.{% endblock %}
+
+{% block body %}
+<p>Invalid renewal token.</p>
+{% endblock %}
diff --git a/synapse/res/templates/notice_expiry.html b/synapse/res/templates/notice_expiry.html
index 865f9f7ada..406397aaca 100644
--- a/synapse/res/templates/notice_expiry.html
+++ b/synapse/res/templates/notice_expiry.html
@@ -1,47 +1,46 @@
-<!doctype html>
-<html lang="en">
-    <head>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <style type="text/css">
-            {% include 'mail.css' without context %}
-            {% include "mail-%s.css" % app_name ignore missing without context %}
-            {% include 'mail-expiry.css' without context %}
-        </style>
-    </head>
-    <body>
-        <table id="page">
-            <tr>
-                <td> </td>
-                <td id="inner">
-                    <table class="header">
-                        <tr>
-                            <td>
-                                <div class="salutation">Hi {{ display_name }},</div>
-                            </td>
-                            <td class="logo">
-                                {% if app_name == "Riot" %}
-                                    <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
-                                {% elif app_name == "Vector" %}
-                                    <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
-                                {% elif app_name == "Element" %}
-                                    <img src="https://static.element.io/images/email-logo.png" width="83" height="83" alt="[Element]"/>
-                                {% else %}
-                                    <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
-                                {% endif %}
-                            </td>
-                        </tr>
-                        <tr>
-                          <td colspan="2">
-                            <div class="noticetext">Your account will expire on {{ expiration_ts|format_ts("%d-%m-%Y") }}. This means that you will lose access to your account after this date.</div>
-                            <div class="noticetext">To extend the validity of your account, please click on the link below (or copy and paste it into a new browser tab):</div>
-                            <div class="noticetext"><a href="{{ url }}">{{ url }}</a></div>
-                          </td>
-                        </tr>
-                    </table>
-                </td>
-                <td> </td>
-            </tr>
-        </table>
-    </body>
-</html>
+{% extends "_base.html" %}
+{% block title %}Notice of expiry{% endblock %}
+
+{% block header %}
+<style type="text/css">
+    {% include 'mail.css' without context %}
+    {% include "mail-%s.css" % app_name ignore missing without context %}
+    {% include 'mail-expiry.css' without context %}
+</style>
+{% endblock %}
+
+{% block body %}
+<table id="page">
+    <tr>
+        <td> </td>
+        <td id="inner">
+            <table class="header">
+                <tr>
+                    <td>
+                        <div class="salutation">Hi {{ display_name }},</div>
+                    </td>
+                    <td class="logo">
+                        {% if app_name == "Riot" %}
+                            <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
+                        {% elif app_name == "Vector" %}
+                            <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
+                        {% elif app_name == "Element" %}
+                            <img src="https://static.element.io/images/email-logo.png" width="83" height="83" alt="[Element]"/>
+                        {% else %}
+                            <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
+                        {% endif %}
+                    </td>
+                </tr>
+                <tr>
+                    <td colspan="2">
+                    <div class="noticetext">Your account will expire on {{ expiration_ts|format_ts("%d-%m-%Y") }}. This means that you will lose access to your account after this date.</div>
+                    <div class="noticetext">To extend the validity of your account, please click on the link below (or copy and paste it into a new browser tab):</div>
+                    <div class="noticetext"><a href="{{ url }}">{{ url }}</a></div>
+                    </td>
+                </tr>
+            </table>
+        </td>
+        <td> </td>
+    </tr>
+</table>
+{% endblock %}
diff --git a/synapse/res/templates/notif_mail.html b/synapse/res/templates/notif_mail.html
index 9dba0c0253..939d40315f 100644
--- a/synapse/res/templates/notif_mail.html
+++ b/synapse/res/templates/notif_mail.html
@@ -1,59 +1,57 @@
-<!doctype html>
-<html lang="en">
-    <head>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <style type="text/css">
-            {%- include 'mail.css' without context %}
-            {%- include "mail-%s.css" % app_name ignore missing without context %}
-        </style>
-    </head>
-    <body>
-        <table id="page">
-            <tr>
-                <td> </td>
-                <td id="inner">
-                    <table class="header">
-                        <tr>
-                            <td>
-                                <div class="salutation">Hi {{ user_display_name }},</div>
-                                <div class="summarytext">{{ summary_text }}</div>
-                            </td>
-                            <td class="logo">
-                                {%- if app_name == "Riot" %}
-                                    <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
-                                {%- elif app_name == "Vector" %}
-                                    <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
-                                {%- elif app_name == "Element" %}
-                                    <img src="https://static.element.io/images/email-logo.png" width="83" height="83" alt="[Element]"/>
-                                {%- else %}
-                                    <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
-                                {%- endif %}
-                            </td>
-                        </tr>
-                    </table>
-                    {%- for room in rooms %}
-                        {%- include 'room.html' with context %}
-                    {%- endfor %}
-                    <div class="footer">
-                        <a href="{{ unsubscribe_link }}">Unsubscribe</a>
-                        <br/>
-                        <br/>
-                        <div class="debug">
-                            Sending email at {{ reason.now|format_ts("%c") }} due to activity in room {{ reason.room_name }} because
-                            an event was received at {{ reason.received_at|format_ts("%c") }}
-                            which is more than {{ "%.1f"|format(reason.delay_before_mail_ms / (60*1000)) }} ({{ reason.delay_before_mail_ms }}) mins ago,
-                            {%- if reason.last_sent_ts %}
-                                and the last time we sent a mail for this room was {{ reason.last_sent_ts|format_ts("%c") }},
-                                which is more than {{ "%.1f"|format(reason.throttle_ms / (60*1000)) }} (current throttle_ms) mins ago.
-                            {%- else %}
-                                and we don't have a last time we sent a mail for this room.
-                            {%- endif %}
-                        </div>
-                    </div>
-                </td>
-                <td> </td>
-            </tr>
-        </table>
-    </body>
-</html>
+{% block title %}New activity in room{% endblock %}
+
+{% block header %}
+<style type="text/css">
+    {%- include 'mail.css' without context %}
+    {%- include "mail-%s.css" % app_name ignore missing without context %}
+</style>
+{% endblock %}
+
+{% block body %}
+<table id="page">
+    <tr>
+        <td> </td>
+        <td id="inner">
+            <table class="header">
+                <tr>
+                    <td>
+                        <div class="salutation">Hi {{ user_display_name }},</div>
+                        <div class="summarytext">{{ summary_text }}</div>
+                    </td>
+                    <td class="logo">
+                        {%- if app_name == "Riot" %}
+                            <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
+                        {%- elif app_name == "Vector" %}
+                            <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
+                        {%- elif app_name == "Element" %}
+                            <img src="https://static.element.io/images/email-logo.png" width="83" height="83" alt="[Element]"/>
+                        {%- else %}
+                            <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
+                        {%- endif %}
+                    </td>
+                </tr>
+            </table>
+            {%- for room in rooms %}
+                {%- include 'room.html' with context %}
+            {%- endfor %}
+            <div class="footer">
+                <a href="{{ unsubscribe_link }}">Unsubscribe</a>
+                <br/>
+                <br/>
+                <div class="debug">
+                    Sending email at {{ reason.now|format_ts("%c") }} due to activity in room {{ reason.room_name }} because
+                    an event was received at {{ reason.received_at|format_ts("%c") }}
+                    which is more than {{ "%.1f"|format(reason.delay_before_mail_ms / (60*1000)) }} ({{ reason.delay_before_mail_ms }}) mins ago,
+                    {%- if reason.last_sent_ts %}
+                        and the last time we sent a mail for this room was {{ reason.last_sent_ts|format_ts("%c") }},
+                        which is more than {{ "%.1f"|format(reason.throttle_ms / (60*1000)) }} (current throttle_ms) mins ago.
+                    {%- else %}
+                        and we don't have a last time we sent a mail for this room.
+                    {%- endif %}
+                </div>
+            </div>
+        </td>
+        <td> </td>
+    </tr>
+</table>
+{% endblock %}
diff --git a/synapse/res/templates/password_reset.html b/synapse/res/templates/password_reset.html
index a8bdce357b..de5a9ec68f 100644
--- a/synapse/res/templates/password_reset.html
+++ b/synapse/res/templates/password_reset.html
@@ -1,14 +1,9 @@
-<html lang="en">
-    <head>
-        <title>Password reset</title>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    </head>
-<body>
-    <p>A password reset request has been received for your Matrix account. If this was you, please click the link below to confirm resetting your password:</p>
+{% block title %}Password reset{% endblock %}
 
-    <a href="{{ link }}">{{ link }}</a>
+{% block body %}
+<p>A password reset request has been received for your Matrix account. If this was you, please click the link below to confirm resetting your password:</p>
 
-    <p>If this was not you, <strong>do not</strong> click the link above and instead contact your server administrator. Thank you.</p>
-</body>
-</html>
+<a href="{{ link }}">{{ link }}</a>
+
+<p>If this was not you, <strong>do not</strong> click the link above and instead contact your server administrator. Thank you.</p>
+{% endblock %}
diff --git a/synapse/res/templates/password_reset_confirmation.html b/synapse/res/templates/password_reset_confirmation.html
index 2e3fd2ec1e..0eac64b6a8 100644
--- a/synapse/res/templates/password_reset_confirmation.html
+++ b/synapse/res/templates/password_reset_confirmation.html
@@ -1,10 +1,6 @@
-<html lang="en">
-<head>
-    <title>Password reset confirmation</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-</head>
-<body>
+{% block title %}Password reset confirmation{% endblock %}
+
+{% block body %}
 <!--Use a hidden form to resubmit the information necessary to reset the password-->
 <form method="post">
     <input type="hidden" name="sid" value="{{ sid }}">
@@ -15,6 +11,4 @@
         If you did not mean to do this, please close this page and your password will not be changed.</p>
     <p><button type="submit">Confirm changing my password</button></p>
 </form>
-</body>
-</html>
-
+{% endblock %}
diff --git a/synapse/res/templates/password_reset_failure.html b/synapse/res/templates/password_reset_failure.html
index 2d59c463f0..977babdb40 100644
--- a/synapse/res/templates/password_reset_failure.html
+++ b/synapse/res/templates/password_reset_failure.html
@@ -1,12 +1,6 @@
-<html lang="en">
-<head>
-    <title>Password reset failure</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-</head>
-<body>
-<p>The request failed for the following reason: {{ failure_reason }}.</p>
+{% block title %}Password reset failure{% endblock %}
 
+{% block body %}
+<p>The request failed for the following reason: {{ failure_reason }}.</p>
 <p>Your password has not been reset.</p>
-</body>
-</html>
+{% endblock %}
diff --git a/synapse/res/templates/password_reset_success.html b/synapse/res/templates/password_reset_success.html
index 5165bd1fa2..0e99fad7ff 100644
--- a/synapse/res/templates/password_reset_success.html
+++ b/synapse/res/templates/password_reset_success.html
@@ -1,9 +1,5 @@
-<html lang="en">
-<head>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-</head>
-<body>
+{% block title %}Password reset success{% endblock %}
+
+{% block body %}
 <p>Your email has now been validated, please return to your client to reset your password. You may now close this window.</p>
-</body>
-</html>
+{% endblock %}
diff --git a/synapse/res/templates/recaptcha.html b/synapse/res/templates/recaptcha.html
index 615d3239c6..feaf3f6aed 100644
--- a/synapse/res/templates/recaptcha.html
+++ b/synapse/res/templates/recaptcha.html
@@ -1,10 +1,7 @@
-<html>
-<head>
-<title>Authentication</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-<script src="https://www.recaptcha.net/recaptcha/api.js"
-    async defer></script>
+{% block title %}Authentication{% endblock %}
+
+{% block header %}
+<script src="https://www.recaptcha.net/recaptcha/api.js" async defer></script>
 <script src="//code.jquery.com/jquery-1.11.2.min.js"></script>
 <link rel="stylesheet" href="/_matrix/static/client/register/style.css">
 <script>
@@ -12,8 +9,9 @@ function captchaDone() {
     $('#registrationForm').submit();
 }
 </script>
-</head>
-<body>
+{% endblock %}
+
+{% block body %}
 <form id="registrationForm" method="post" action="{{ myurl }}">
     <div>
         {% if error is defined %}
@@ -37,5 +35,4 @@ function captchaDone() {
         </div>
     </div>
 </form>
-</body>
-</html>
+{% endblock %}
\ No newline at end of file
diff --git a/synapse/res/templates/registration.html b/synapse/res/templates/registration.html
index 20e831ff4a..189960a832 100644
--- a/synapse/res/templates/registration.html
+++ b/synapse/res/templates/registration.html
@@ -1,16 +1,11 @@
-<html lang="en">
-<head>
-    <title>Registration</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-</head>
-<body>
-    <p>You have asked us to register this email with a new Matrix account. If this was you, please click the link below to confirm your email address:</p>
+{% block title %}Registration{% endblock %}
 
-    <a href="{{ link }}">Verify Your Email Address</a>
+{% block body %}
+<p>You have asked us to register this email with a new Matrix account. If this was you, please click the link below to confirm your email address:</p>
 
-    <p>If this was not you, you can safely disregard this email.</p>
+<a href="{{ link }}">Verify Your Email Address</a>
 
-    <p>Thank you.</p>
-</body>
-</html>
+<p>If this was not you, you can safely disregard this email.</p>
+
+<p>Thank you.</p>
+{% endblock %}
diff --git a/synapse/res/templates/registration_failure.html b/synapse/res/templates/registration_failure.html
index a6ed22bc90..3debe9301d 100644
--- a/synapse/res/templates/registration_failure.html
+++ b/synapse/res/templates/registration_failure.html
@@ -1,9 +1,5 @@
-<html lang="en">
-<head>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-</head>
-<body>
+{% block title %}Registration failure{% endblock %}
+
+{% block body %}
 <p>Validation failed for the following reason: {{ failure_reason }}.</p>
-</body>
-</html>
+{% endblock %}
diff --git a/synapse/res/templates/registration_success.html b/synapse/res/templates/registration_success.html
index d51d5549d8..e2dd020a9e 100644
--- a/synapse/res/templates/registration_success.html
+++ b/synapse/res/templates/registration_success.html
@@ -1,10 +1,5 @@
-<html lang="en">
-<head>
-    <title>Your email has now been validated</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-</head>
-<body>
+{% block title %}Your email has now been validated{% endblock %}
+
+{% block body %}
 <p>Your email has now been validated, please return to your client. You may now close this window.</p>
-</body>
-</html>
+{% endblock %}
diff --git a/synapse/res/templates/registration_token.html b/synapse/res/templates/registration_token.html
index 59a98f564c..2ee5866ba5 100644
--- a/synapse/res/templates/registration_token.html
+++ b/synapse/res/templates/registration_token.html
@@ -1,11 +1,10 @@
-<html lang="en">
-<head>
-<title>Authentication</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+{% block title %}Authentication{% endblock %}
+
+{% block header %}
 <link rel="stylesheet" href="/_matrix/static/client/register/style.css">
-</head>
-<body>
+{% endblock %}
+
+{% block body %}
 <form id="registrationForm" method="post" action="{{ myurl }}">
     <div>
         {% if error is defined %}
@@ -19,5 +18,4 @@
         <input type="submit" value="Authenticate" />
     </div>
 </form>
-</body>
-</html>
+{% endblock %}
diff --git a/synapse/res/templates/sso_account_deactivated.html b/synapse/res/templates/sso_account_deactivated.html
index 075f801cec..c634229840 100644
--- a/synapse/res/templates/sso_account_deactivated.html
+++ b/synapse/res/templates/sso_account_deactivated.html
@@ -1,25 +1,24 @@
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <title>SSO account deactivated</title>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">        <style type="text/css">
-            {% include "sso.css" without context %}
-        </style>
-    </head>
-    <body class="error_page">
-        <header>
-            <h1>Your account has been deactivated</h1>
-            <p>
-                <strong>No account found</strong>
-            </p>
-            <p>
-                Your account might have been deactivated by the server administrator.
-                You can either try to create a new account or contact the server’s
-                administrator.
-            </p>
-        </header>
-        {% include "sso_footer.html" without context %}
-    </body>
-</html>
+{% block title %}SSO account deactivated{% endblock %}
+
+{% block header %}
+<style type="text/css">
+    {% include "sso.css" without context %}
+</style>
+{% endblock %}
+
+{% block body %}
+<div class="error_page">
+    <header>
+        <h1>Your account has been deactivated</h1>
+        <p>
+            <strong>No account found</strong>
+        </p>
+        <p>
+            Your account might have been deactivated by the server administrator.
+            You can either try to create a new account or contact the server’s
+            administrator.
+        </p>
+    </header>
+</div>
+{% include "sso_footer.html" without context %}
+{% endblock %}
diff --git a/synapse/res/templates/sso_auth_account_details.html b/synapse/res/templates/sso_auth_account_details.html
index 2d1db386e1..b516333373 100644
--- a/synapse/res/templates/sso_auth_account_details.html
+++ b/synapse/res/templates/sso_auth_account_details.html
@@ -1,189 +1,185 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <title>Create your account</title>
-    <meta charset="utf-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <script type="text/javascript">
-      let wasKeyboard = false;
-      document.addEventListener("mousedown", function() { wasKeyboard = false; });
-      document.addEventListener("keydown", function() { wasKeyboard = true; });
-      document.addEventListener("focusin", function() {
-        if (wasKeyboard) {
-          document.body.classList.add("keyboard-focus");
-        } else {
-          document.body.classList.remove("keyboard-focus");
-        }
-      });
-    </script>
-    <style type="text/css">
-      {% include "sso.css" without context %}
-
-      body.keyboard-focus :focus, body.keyboard-focus .username_input:focus-within {
-        outline: 3px solid #17191C;
-        outline-offset: 4px;
-      }
-
-      .username_input {
-        display: flex;
-        border: 2px solid #418DED;
-        border-radius: 8px;
-        padding: 12px;
-        position: relative;
-        margin: 16px 0;
-        align-items: center;
-        font-size: 12px;
-      }
-
-      .username_input.invalid {
-        border-color: #FE2928;
-      }
-
-      .username_input.invalid input, .username_input.invalid label {
-        color: #FE2928;
-      }
-
-      .username_input div, .username_input input {
-        line-height: 18px;
-        font-size: 14px;
-      }
-
-      .username_input label {
-        position: absolute;
-        top: -5px;
-        left: 14px;
-        font-size: 10px;
-        line-height: 10px;
-        background: white;
-        padding: 0 2px;
-      }
-
-      .username_input input {
-        flex: 1;
-        display: block;
-        min-width: 0;
-        border: none;
-      }
-
-      /* only clear the outline if we know it will be shown on the parent div using :focus-within */
-      @supports selector(:focus-within) {
-        .username_input input {
-          outline: none !important;
-        }
-      }
-
-      .username_input div {
-        color: #8D99A5;
-      }
-
-      .idp-pick-details {
-        border: 1px solid #E9ECF1;
-        border-radius: 8px;
-        margin: 24px 0;
-      }
-
-      .idp-pick-details h2 {
-        margin: 0;
-        padding: 8px 12px;
-      }
-
-      .idp-pick-details .idp-detail {
-        border-top: 1px solid #E9ECF1;
-        padding: 12px;
-        display: block;
-      }
-      .idp-pick-details .check-row {
-        display: flex;
-        align-items: center;
-      }
-
-      .idp-pick-details .check-row .name {
-        flex: 1;
-      }
-
-      .idp-pick-details .use, .idp-pick-details .idp-value {
-        color: #737D8C;
-      }
-
-      .idp-pick-details .idp-value {
-        margin: 0;
-        margin-top: 8px;
-      }
-
-      .idp-pick-details .avatar {
-        width: 53px;
-        height: 53px;
-        border-radius: 100%;
-        display: block;
-        margin-top: 8px;
-      }
-
-      output {
-        padding: 0 14px;
-        display: block;
-      }
-
-      output.error {
-        color: #FE2928;
-      }
-    </style>
-  </head>
-  <body>
-    <header>
-      <h1>Create your account</h1>
-      <p>This is required. Continue to create your account on {{ server_name }}. You can't change this later.</p>
-    </header>
-    <main>
-      <form method="post" class="form__input" id="form">
-        <div class="username_input" id="username_input">
-          <label for="field-username">Username (required)</label>
-          <div class="prefix">@</div>
-          <input type="text" name="username" id="field-username" value="{{ user_attributes.localpart }}" autofocus autocorrect="off" autocapitalize="none">
-          <div class="postfix">:{{ server_name }}</div>
+{% block title %}Create your account{% endblock %}
+
+{% block header %}
+<script type="text/javascript">
+  let wasKeyboard = false;
+  document.addEventListener("mousedown", function() { wasKeyboard = false; });
+  document.addEventListener("keydown", function() { wasKeyboard = true; });
+  document.addEventListener("focusin", function() {
+    if (wasKeyboard) {
+      document.body.classList.add("keyboard-focus");
+    } else {
+      document.body.classList.remove("keyboard-focus");
+    }
+  });
+</script>
+<style type="text/css">
+  {% include "sso.css" without context %}
+
+  body.keyboard-focus :focus, body.keyboard-focus .username_input:focus-within {
+    outline: 3px solid #17191C;
+    outline-offset: 4px;
+  }
+
+  .username_input {
+    display: flex;
+    border: 2px solid #418DED;
+    border-radius: 8px;
+    padding: 12px;
+    position: relative;
+    margin: 16px 0;
+    align-items: center;
+    font-size: 12px;
+  }
+
+  .username_input.invalid {
+    border-color: #FE2928;
+  }
+
+  .username_input.invalid input, .username_input.invalid label {
+    color: #FE2928;
+  }
+
+  .username_input div, .username_input input {
+    line-height: 18px;
+    font-size: 14px;
+  }
+
+  .username_input label {
+    position: absolute;
+    top: -5px;
+    left: 14px;
+    font-size: 10px;
+    line-height: 10px;
+    background: white;
+    padding: 0 2px;
+  }
+
+  .username_input input {
+    flex: 1;
+    display: block;
+    min-width: 0;
+    border: none;
+  }
+
+  /* only clear the outline if we know it will be shown on the parent div using :focus-within */
+  @supports selector(:focus-within) {
+    .username_input input {
+      outline: none !important;
+    }
+  }
+
+  .username_input div {
+    color: #8D99A5;
+  }
+
+  .idp-pick-details {
+    border: 1px solid #E9ECF1;
+    border-radius: 8px;
+    margin: 24px 0;
+  }
+
+  .idp-pick-details h2 {
+    margin: 0;
+    padding: 8px 12px;
+  }
+
+  .idp-pick-details .idp-detail {
+    border-top: 1px solid #E9ECF1;
+    padding: 12px;
+    display: block;
+  }
+  .idp-pick-details .check-row {
+    display: flex;
+    align-items: center;
+  }
+
+  .idp-pick-details .check-row .name {
+    flex: 1;
+  }
+
+  .idp-pick-details .use, .idp-pick-details .idp-value {
+    color: #737D8C;
+  }
+
+  .idp-pick-details .idp-value {
+    margin: 0;
+    margin-top: 8px;
+  }
+
+  .idp-pick-details .avatar {
+    width: 53px;
+    height: 53px;
+    border-radius: 100%;
+    display: block;
+    margin-top: 8px;
+  }
+
+  output {
+    padding: 0 14px;
+    display: block;
+  }
+
+  output.error {
+    color: #FE2928;
+  }
+</style>
+{% endblock %}
+
+{% block body %}
+<header>
+  <h1>Create your account</h1>
+  <p>This is required. Continue to create your account on {{ server_name }}. You can't change this later.</p>
+</header>
+<main>
+  <form method="post" class="form__input" id="form">
+    <div class="username_input" id="username_input">
+      <label for="field-username">Username (required)</label>
+      <div class="prefix">@</div>
+      <input type="text" name="username" id="field-username" value="{{ user_attributes.localpart }}" autofocus autocorrect="off" autocapitalize="none">
+      <div class="postfix">:{{ server_name }}</div>
+    </div>
+    <output for="username_input" id="field-username-output"></output>
+    <input type="submit" value="Continue" class="primary-button">
+    {% if user_attributes.avatar_url or user_attributes.display_name or user_attributes.emails %}
+    <section class="idp-pick-details">
+      <h2>{% if idp.idp_icon %}<img src="{{ idp.idp_icon | mxc_to_http(24, 24) }}"/>{% endif %}Optional data from {{ idp.idp_name }}</h2>
+      {% if user_attributes.avatar_url %}
+      <label class="idp-detail idp-avatar" for="idp-avatar">
+        <div class="check-row">
+          <span class="name">Avatar</span>
+          <span class="use">Use</span>
+          <input type="checkbox" name="use_avatar" id="idp-avatar" value="true" checked>
         </div>
-        <output for="username_input" id="field-username-output"></output>
-        <input type="submit" value="Continue" class="primary-button">
-        {% if user_attributes.avatar_url or user_attributes.display_name or user_attributes.emails %}
-        <section class="idp-pick-details">
-          <h2>{% if idp.idp_icon %}<img src="{{ idp.idp_icon | mxc_to_http(24, 24) }}"/>{% endif %}Optional data from {{ idp.idp_name }}</h2>
-          {% if user_attributes.avatar_url %}
-          <label class="idp-detail idp-avatar" for="idp-avatar">
-            <div class="check-row">
-              <span class="name">Avatar</span>
-              <span class="use">Use</span>
-              <input type="checkbox" name="use_avatar" id="idp-avatar" value="true" checked>
-            </div>
-            <img src="{{ user_attributes.avatar_url }}" class="avatar" />
-          </label>
-          {% endif %}
-          {% if user_attributes.display_name %}
-          <label class="idp-detail" for="idp-displayname">
-            <div class="check-row">
-              <span class="name">Display name</span>
-              <span class="use">Use</span>
-              <input type="checkbox" name="use_display_name" id="idp-displayname" value="true" checked>
-            </div>
-            <p class="idp-value">{{ user_attributes.display_name }}</p>
-          </label>
-          {% endif %}
-          {% for email in user_attributes.emails %}
-          <label class="idp-detail" for="idp-email{{ loop.index }}">
-            <div class="check-row">
-              <span class="name">E-mail</span>
-              <span class="use">Use</span>
-              <input type="checkbox" name="use_email" id="idp-email{{ loop.index }}" value="{{ email }}" checked>
-            </div>
-            <p class="idp-value">{{ email }}</p>
-          </label>
-          {% endfor %}
-        </section>
-        {% endif %}
-      </form>
-    </main>
-    {% include "sso_footer.html" without context %}
-    <script type="text/javascript">
-      {% include "sso_auth_account_details.js" without context %}
-    </script>
-  </body>
-</html>
+        <img src="{{ user_attributes.avatar_url }}" class="avatar" />
+      </label>
+      {% endif %}
+      {% if user_attributes.display_name %}
+      <label class="idp-detail" for="idp-displayname">
+        <div class="check-row">
+          <span class="name">Display name</span>
+          <span class="use">Use</span>
+          <input type="checkbox" name="use_display_name" id="idp-displayname" value="true" checked>
+        </div>
+        <p class="idp-value">{{ user_attributes.display_name }}</p>
+      </label>
+      {% endif %}
+      {% for email in user_attributes.emails %}
+      <label class="idp-detail" for="idp-email{{ loop.index }}">
+        <div class="check-row">
+          <span class="name">E-mail</span>
+          <span class="use">Use</span>
+          <input type="checkbox" name="use_email" id="idp-email{{ loop.index }}" value="{{ email }}" checked>
+        </div>
+        <p class="idp-value">{{ email }}</p>
+      </label>
+      {% endfor %}
+    </section>
+    {% endif %}
+  </form>
+</main>
+{% include "sso_footer.html" without context %}
+<script type="text/javascript">
+  {% include "sso_auth_account_details.js" without context %}
+</script>
+{% endblock %}
diff --git a/synapse/res/templates/sso_auth_bad_user.html b/synapse/res/templates/sso_auth_bad_user.html
index 94403fc3ce..69fdcc9ef0 100644
--- a/synapse/res/templates/sso_auth_bad_user.html
+++ b/synapse/res/templates/sso_auth_bad_user.html
@@ -1,27 +1,25 @@
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <title>Authentication failed</title>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <style type="text/css">
-            {% include "sso.css" without context %}
-        </style>
-    </head>
-    <body class="error_page">
-        <header>
-            <h1>That doesn't look right</h1>
-            <p>
-                <strong>We were unable to validate your {{ server_name }} account</strong>
-                via single&nbsp;sign&#8209;on&nbsp;(SSO), because the SSO Identity
-                Provider returned different details than when you logged in.
-            </p>
-            <p>
-                Try the operation again, and ensure that you use the same details on
-                the Identity Provider as when you log into your account.
-            </p>
-        </header>
-        {% include "sso_footer.html" without context %}
-    </body>
-</html>
+{% block title %}Authentication failed{% endblock %}
+
+{% block header %}
+<style type="text/css">
+    {% include "sso.css" without context %}
+</style>
+{% endblock %}
+
+{% block body %}
+<div class="error_page">
+    <header>
+        <h1>That doesn't look right</h1>
+        <p>
+            <strong>We were unable to validate your {{ server_name }} account</strong>
+            via single&nbsp;sign&#8209;on&nbsp;(SSO), because the SSO Identity
+            Provider returned different details than when you logged in.
+        </p>
+        <p>
+            Try the operation again, and ensure that you use the same details on
+            the Identity Provider as when you log into your account.
+        </p>
+    </header>
+</div>
+{% include "sso_footer.html" without context %}
+{% endblock %}
diff --git a/synapse/res/templates/sso_auth_confirm.html b/synapse/res/templates/sso_auth_confirm.html
index aa1c974a6b..2d106e0ae4 100644
--- a/synapse/res/templates/sso_auth_confirm.html
+++ b/synapse/res/templates/sso_auth_confirm.html
@@ -1,30 +1,26 @@
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <title>Confirm it's you</title>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <style type="text/css">
-            {% include "sso.css" without context %}
-        </style>
-    </head>
-    <body>
-        <header>
-            <h1>Confirm it's you to continue</h1>
-            <p>
-                A client is trying to {{ description }}. To confirm this action
-                re-authorize your account with single sign-on.
-            </p>
-            <p><strong>
-                If you did not expect this, your account may be compromised.
-            </strong></p>
-        </header>
-        <main>
-            <a href="{{ redirect_url }}" class="primary-button">
-                Continue with {{ idp.idp_name }}
-            </a>
-        </main>
-        {% include "sso_footer.html" without context %}
-    </body>
-</html>
+{% block title %}Confirm it's you{% endblock %}
+
+{% block header %}
+<style type="text/css">
+    {% include "sso.css" without context %}
+</style>
+{% endblock %}
+
+{% block body %}
+<header>
+    <h1>Confirm it's you to continue</h1>
+    <p>
+        A client is trying to {{ description }}. To confirm this action
+        re-authorize your account with single sign-on.
+    </p>
+    <p><strong>
+        If you did not expect this, your account may be compromised.
+    </strong></p>
+</header>
+<main>
+    <a href="{{ redirect_url }}" class="primary-button">
+        Continue with {{ idp.idp_name }}
+    </a>
+</main>
+{% include "sso_footer.html" without context %}
+{% endblock %}
diff --git a/synapse/res/templates/sso_auth_success.html b/synapse/res/templates/sso_auth_success.html
index 4898af6011..56150eaefe 100644
--- a/synapse/res/templates/sso_auth_success.html
+++ b/synapse/res/templates/sso_auth_success.html
@@ -1,29 +1,25 @@
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <title>Authentication successful</title>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <style type="text/css">
-            {% include "sso.css" without context %}
-        </style>
-        <script>
-            if (window.onAuthDone) {
-                window.onAuthDone();
-            } else if (window.opener && window.opener.postMessage) {
-                window.opener.postMessage("authDone", "*");
-            }
-        </script>
-    </head>
-    <body>
-        <header>
-            <h1>Thank you</h1>
-            <p>
-                Now we know it’s you, you can close this window and return to the
-                application.
-            </p>
-        </header>
-        {% include "sso_footer.html" without context %}
-    </body>
-</html>
+{% block title %}Authentication successful{% endblock %}
+
+{% block header %}
+<style type="text/css">
+    {% include "sso.css" without context %}
+</style>
+<script>
+    if (window.onAuthDone) {
+        window.onAuthDone();
+    } else if (window.opener && window.opener.postMessage) {
+        window.opener.postMessage("authDone", "*");
+    }
+</script>
+{% endblock %}
+
+{% block body %}
+<header>
+    <h1>Thank you</h1>
+    <p>
+        Now we know it’s you, you can close this window and return to the
+        application.
+    </p>
+</header>
+{% include "sso_footer.html" without context %}
+{% endblock %}
diff --git a/synapse/res/templates/sso_error.html b/synapse/res/templates/sso_error.html
index 19992ff2ad..e394a92623 100644
--- a/synapse/res/templates/sso_error.html
+++ b/synapse/res/templates/sso_error.html
@@ -1,19 +1,19 @@
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <title>Authentication failed</title>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <style type="text/css">
-            {% include "sso.css" without context %}
+{% block title %}Authentication failed{% endblock %}
 
-            #error_code {
-                margin-top: 56px;
-            }
-        </style>
-    </head>
-    <body class="error_page">
+{% block header %}
+{% if error == "unauthorised" %}
+<style type="text/css">
+    {% include "sso.css" without context %}
+
+    #error_code {
+        margin-top: 56px;
+    }
+</style>
+{% endif %}
+{% endblock %}
+
+{% block body %}
+<div class="error_page">
 {# If an error of unauthorised is returned it means we have actively rejected their login #}
 {% if error == "unauthorised" %}
         <header>
@@ -66,5 +66,5 @@
             }
         </script>
 {% endif %}
-</body>
-</html>
+</div>
+{% endblock %}
diff --git a/synapse/res/templates/sso_login_idp_picker.html b/synapse/res/templates/sso_login_idp_picker.html
index 56fabfa3d2..a2772ca9ef 100644
--- a/synapse/res/templates/sso_login_idp_picker.html
+++ b/synapse/res/templates/sso_login_idp_picker.html
@@ -1,63 +1,59 @@
-<!DOCTYPE html>
-<html lang="en">
-    <head>
-        <meta http-equiv="X-UA-Compatible" content="IE=edge">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <meta charset="UTF-8">
-        <title>Choose identity provider</title>
-        <style type="text/css">
-          {% include "sso.css" without context %}
+{% block title %}Choose identity provider{% endblock %}
 
-          .providers {
-            list-style: none;
-            padding: 0;
-          }
+{% block header %}
+<style type="text/css">
+  {% include "sso.css" without context %}
 
-          .providers li {
-            margin: 12px;
-          }
+  .providers {
+    list-style: none;
+    padding: 0;
+  }
 
-          .providers a {
-            display: block;
-            border-radius: 4px;
-            border: 1px solid #17191C;
-            padding: 8px;
-            text-align: center;
-            text-decoration: none;
-            color: #17191C;
-            display: flex;
-            align-items: center;
-            font-weight: bold;
-          }
+  .providers li {
+    margin: 12px;
+  }
 
-          .providers a img {
-            width: 24px;
-            height: 24px;
-          }
-          .providers a span {
-            flex: 1;
-          }
-        </style>
-    </head>
-    <body>
-        <header>
-            <h1>Log in to {{ server_name }} </h1>
-            <p>Choose an identity provider to log in</p>
-        </header>
-        <main>
-            <ul class="providers">
-                {% for p in providers %}
-                <li>
-                    <a href="pick_idp?idp={{ p.idp_id }}&redirectUrl={{ redirect_url | urlencode }}">
-                        {% if p.idp_icon %}
-                        <img src="{{ p.idp_icon | mxc_to_http(32, 32) }}"/>
-                        {% endif %}
-                        <span>{{ p.idp_name }}</span>
-                    </a>
-                </li>
-                {% endfor %}
-            </ul>
-        </main>
-        {% include "sso_footer.html" without context %}
-    </body>
-</html>
+  .providers a {
+    display: block;
+    border-radius: 4px;
+    border: 1px solid #17191C;
+    padding: 8px;
+    text-align: center;
+    text-decoration: none;
+    color: #17191C;
+    display: flex;
+    align-items: center;
+    font-weight: bold;
+  }
+
+  .providers a img {
+    width: 24px;
+    height: 24px;
+  }
+  .providers a span {
+    flex: 1;
+  }
+</style>
+{% endblock %}
+
+{% block body %}
+<header>
+    <h1>Log in to {{ server_name }} </h1>
+    <p>Choose an identity provider to log in</p>
+</header>
+<main>
+    <ul class="providers">
+        {% for p in providers %}
+        <li>
+            <a href="pick_idp?idp={{ p.idp_id }}&redirectUrl={{ redirect_url | urlencode }}">
+                {% if p.idp_icon %}
+                <img src="{{ p.idp_icon | mxc_to_http(32, 32) }}"/>
+                {% endif %}
+                <span>{{ p.idp_name }}</span>
+            </a>
+        </li>
+        {% endfor %}
+    </ul>
+</main>
+{% include "sso_footer.html" without context %}
+{% endblock %}
diff --git a/synapse/res/templates/sso_new_user_consent.html b/synapse/res/templates/sso_new_user_consent.html
index 523f64c4fc..126887d26c 100644
--- a/synapse/res/templates/sso_new_user_consent.html
+++ b/synapse/res/templates/sso_new_user_consent.html
@@ -1,33 +1,29 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <title>Agree to terms and conditions</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <style type="text/css">
-      {% include "sso.css" without context %}
+{% block title %}Agree to terms and conditions{% endblock %}
 
-      #consent_form {
-        margin-top: 56px;
-      }
-    </style>
-</head>
-    <body>
-        <header>
-            <h1>Your account is nearly ready</h1>
-            <p>Agree to the terms to create your account.</p>
-        </header>
-        <main>
-            {% include "sso_partial_profile.html" %}
-            <form method="post" action="{{my_url}}" id="consent_form">
-                <p>
-                    <input id="accepted_version" type="checkbox" name="accepted_version" value="{{ consent_version }}" required>
-                    <label for="accepted_version">I have read and agree to the <a href="{{ terms_url }}" target="_blank" rel="noopener">terms and conditions</a>.</label>
-                </p>
-                <input type="submit" class="primary-button" value="Continue"/>
-            </form>
-        </main>
-        {% include "sso_footer.html" without context %}
-    </body>
-</html>
+{% block header %}
+<style type="text/css">
+    {% include "sso.css" without context %}
+
+    #consent_form {
+      margin-top: 56px;
+    }
+</style>
+{% endblock %}
+
+{% block body %}
+<header>
+    <h1>Your account is nearly ready</h1>
+    <p>Agree to the terms to create your account.</p>
+</header>
+<main>
+    {% include "sso_partial_profile.html" %}
+    <form method="post" action="{{my_url}}" id="consent_form">
+        <p>
+            <input id="accepted_version" type="checkbox" name="accepted_version" value="{{ consent_version }}" required>
+            <label for="accepted_version">I have read and agree to the <a href="{{ terms_url }}" target="_blank" rel="noopener">terms and conditions</a>.</label>
+        </p>
+        <input type="submit" class="primary-button" value="Continue"/>
+    </form>
+</main>
+{% include "sso_footer.html" without context %}
+{% endblock %}
diff --git a/synapse/res/templates/sso_redirect_confirm.html b/synapse/res/templates/sso_redirect_confirm.html
index 1049a9bd92..887ee0d294 100644
--- a/synapse/res/templates/sso_redirect_confirm.html
+++ b/synapse/res/templates/sso_redirect_confirm.html
@@ -1,41 +1,38 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <title>Continue to your account</title>
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <style type="text/css">
-      {% include "sso.css" without context %}
+{% block title %}Continue to your account{% endblock %}
 
-      .confirm-trust {
-        margin: 34px 0;
-        color: #8D99A5;
-      }
-      .confirm-trust strong {
-        color: #17191C;
-      }
+{% block header %}
+<style type="text/css">
+  {% include "sso.css" without context %}
 
-      .confirm-trust::before {
-        content: "";
-        background-image: url('data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTgiIGhlaWdodD0iMTgiIHZpZXdCb3g9IjAgMCAxOCAxOCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPHBhdGggZmlsbC1ydWxlPSJldmVub2RkIiBjbGlwLXJ1bGU9ImV2ZW5vZGQiIGQ9Ik0xNi41IDlDMTYuNSAxMy4xNDIxIDEzLjE0MjEgMTYuNSA5IDE2LjVDNC44NTc4NiAxNi41IDEuNSAxMy4xNDIxIDEuNSA5QzEuNSA0Ljg1Nzg2IDQuODU3ODYgMS41IDkgMS41QzEzLjE0MjEgMS41IDE2LjUgNC44NTc4NiAxNi41IDlaTTcuMjUgOUM3LjI1IDkuNDY1OTYgNy41Njg2OSA5Ljg1NzQ4IDggOS45Njg1VjEyLjM3NUM4IDEyLjkyNzMgOC40NDc3MiAxMy4zNzUgOSAxMy4zNzVIMTAuMTI1QzEwLjY3NzMgMTMuMzc1IDExLjEyNSAxMi45MjczIDExLjEyNSAxMi4zNzVDMTEuMTI1IDExLjgyMjcgMTAuNjc3MyAxMS4zNzUgMTAuMTI1IDExLjM3NUgxMFY5QzEwIDguOTY1NDggOS45OTgyNSA4LjkzMTM3IDkuOTk0ODQgOC44OTc3NkM5Ljk0MzYzIDguMzkzNSA5LjUxNzc3IDggOSA4SDguMjVDNy42OTc3MiA4IDcuMjUgOC40NDc3MiA3LjI1IDlaTTkgNy41QzkuNjIxMzIgNy41IDEwLjEyNSA2Ljk5NjMyIDEwLjEyNSA2LjM3NUMxMC4xMjUgNS43NTM2OCA5LjYyMTMyIDUuMjUgOSA1LjI1QzguMzc4NjggNS4yNSA3Ljg3NSA1Ljc1MzY4IDcuODc1IDYuMzc1QzcuODc1IDYuOTk2MzIgOC4zNzg2OCA3LjUgOSA3LjVaIiBmaWxsPSIjQzFDNkNEIi8+Cjwvc3ZnPgoK');
-        background-repeat: no-repeat;
-        width: 24px;
-        height: 24px;
-        display: block;
-        float: left;
-      }
-    </style>
-</head>
-    <body>
-        <header>
-            <h1>Continue to your account</h1>
-        </header>
-        <main>
-            {% include "sso_partial_profile.html" %}
-            <p class="confirm-trust">Continuing will grant <strong>{{ display_url }}</strong> access to your account.</p>
-            <a href="{{ redirect_url }}" class="primary-button">Continue</a>
-        </main>
-        {% include "sso_footer.html" without context %}
-    </body>
-</html>
+  .confirm-trust {
+    margin: 34px 0;
+    color: #8D99A5;
+  }
+  .confirm-trust strong {
+    color: #17191C;
+  }
+
+  .confirm-trust::before {
+    content: "";
+    background-image: url('data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTgiIGhlaWdodD0iMTgiIHZpZXdCb3g9IjAgMCAxOCAxOCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPHBhdGggZmlsbC1ydWxlPSJldmVub2RkIiBjbGlwLXJ1bGU9ImV2ZW5vZGQiIGQ9Ik0xNi41IDlDMTYuNSAxMy4xNDIxIDEzLjE0MjEgMTYuNSA5IDE2LjVDNC44NTc4NiAxNi41IDEuNSAxMy4xNDIxIDEuNSA5QzEuNSA0Ljg1Nzg2IDQuODU3ODYgMS41IDkgMS41QzEzLjE0MjEgMS41IDE2LjUgNC44NTc4NiAxNi41IDlaTTcuMjUgOUM3LjI1IDkuNDY1OTYgNy41Njg2OSA5Ljg1NzQ4IDggOS45Njg1VjEyLjM3NUM4IDEyLjkyNzMgOC40NDc3MiAxMy4zNzUgOSAxMy4zNzVIMTAuMTI1QzEwLjY3NzMgMTMuMzc1IDExLjEyNSAxMi45MjczIDExLjEyNSAxMi4zNzVDMTEuMTI1IDExLjgyMjcgMTAuNjc3MyAxMS4zNzUgMTAuMTI1IDExLjM3NUgxMFY5QzEwIDguOTY1NDggOS45OTgyNSA4LjkzMTM3IDkuOTk0ODQgOC44OTc3NkM5Ljk0MzYzIDguMzkzNSA5LjUxNzc3IDggOSA4SDguMjVDNy42OTc3MiA4IDcuMjUgOC40NDc3MiA3LjI1IDlaTTkgNy41QzkuNjIxMzIgNy41IDEwLjEyNSA2Ljk5NjMyIDEwLjEyNSA2LjM3NUMxMC4xMjUgNS43NTM2OCA5LjYyMTMyIDUuMjUgOSA1LjI1QzguMzc4NjggNS4yNSA3Ljg3NSA1Ljc1MzY4IDcuODc1IDYuMzc1QzcuODc1IDYuOTk2MzIgOC4zNzg2OCA3LjUgOSA3LjVaIiBmaWxsPSIjQzFDNkNEIi8+Cjwvc3ZnPgoK');
+    background-repeat: no-repeat;
+    width: 24px;
+    height: 24px;
+    display: block;
+    float: left;
+  }
+</style>
+{% endblock %}
+
+{% block body %}
+<header>
+    <h1>Continue to your account</h1>
+</header>
+<main>
+    {% include "sso_partial_profile.html" %}
+    <p class="confirm-trust">Continuing will grant <strong>{{ display_url }}</strong> access to your account.</p>
+    <a href="{{ redirect_url }}" class="primary-button">Continue</a>
+</main>
+{% include "sso_footer.html" without context %}
+
+{% endblock %}
diff --git a/synapse/res/templates/style.css b/synapse/res/templates/style.css
new file mode 100644
index 0000000000..097b235ae5
--- /dev/null
+++ b/synapse/res/templates/style.css
@@ -0,0 +1,29 @@
+html {
+    height: 100%;
+}
+
+body {
+    background: #f9fafb;
+    max-width: 680px;
+    margin: auto;
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
+}
+
+.mx_Header {
+    border-bottom: 3px solid #ddd;
+    margin-bottom: 1rem;
+    padding-top: 1rem;
+    padding-bottom: 1rem;
+    text-align: center;
+}
+
+@media screen and (max-width: 1120px) {
+    body {
+        font-size: 20px;
+    }
+
+    h1 { font-size: 1rem; }
+    h2 { font-size: .9rem; }
+    h3 { font-size: .85rem; }
+    h4 { font-size: .8rem; }
+}
diff --git a/synapse/res/templates/terms.html b/synapse/res/templates/terms.html
index 2081d990ab..977c3d0bc7 100644
--- a/synapse/res/templates/terms.html
+++ b/synapse/res/templates/terms.html
@@ -1,11 +1,10 @@
-<html>
-<head>
-<title>Authentication</title>
-<meta http-equiv="X-UA-Compatible" content="IE=edge">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
+{% block title %}Authentication{% endblock %}
+
+{% block header %}
 <link rel="stylesheet" href="/_matrix/static/client/register/style.css">
-</head>
-<body>
+{% endblock %}
+
+{% block body %}
 <form id="registrationForm" method="post" action="{{ myurl }}">
     <div>
         {% if error is defined %}
@@ -19,5 +18,4 @@
         <input type="submit" value="Agree" />
     </div>
 </form>
-</body>
-</html>
+{% endblock %}
-- 
cgit 1.5.1


From b7a7ff6ee39da4981dcfdce61bf8ac4735e3d047 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Fri, 21 Oct 2022 10:46:22 -0700
Subject: Add initial power level event to batch of bulk persisted events when
 creating a new room. (#14228)

---
 changelog.d/14228.misc                      |  1 +
 synapse/handlers/federation.py              |  4 +-
 synapse/handlers/federation_event.py        |  4 +-
 synapse/handlers/message.py                 | 14 ++----
 synapse/handlers/room.py                    | 39 ++++-----------
 synapse/push/bulk_push_rule_evaluator.py    | 74 ++++++++++++++++++++++++-----
 tests/push/test_bulk_push_rule_evaluator.py |  2 +-
 tests/replication/_base.py                  |  2 +-
 8 files changed, 82 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/14228.misc

(limited to 'synapse')

diff --git a/changelog.d/14228.misc b/changelog.d/14228.misc
new file mode 100644
index 0000000000..14fe31a8bc
--- /dev/null
+++ b/changelog.d/14228.misc
@@ -0,0 +1 @@
+Add initial power level event to batch of bulk persisted events when creating a new room.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 275a37a575..4fbc79a6cb 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1017,7 +1017,9 @@ class FederationHandler:
 
         context = EventContext.for_outlier(self._storage_controllers)
 
-        await self._bulk_push_rule_evaluator.action_for_event_by_user(event, context)
+        await self._bulk_push_rule_evaluator.action_for_events_by_user(
+            [(event, context)]
+        )
         try:
             await self._federation_event_handler.persist_events_and_notify(
                 event.room_id, [(event, context)]
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 06e41b5cc0..7da6316a82 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -2171,8 +2171,8 @@ class FederationEventHandler:
                     min_depth,
                 )
             else:
-                await self._bulk_push_rule_evaluator.action_for_event_by_user(
-                    event, context
+                await self._bulk_push_rule_evaluator.action_for_events_by_user(
+                    [(event, context)]
                 )
 
         try:
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 15b828dd74..468900a07f 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1433,17 +1433,9 @@ class EventCreationHandler:
             a room that has been un-partial stated.
         """
 
-        for event, context in events_and_context:
-            # Skip push notification actions for historical messages
-            # because we don't want to notify people about old history back in time.
-            # The historical messages also do not have the proper `context.current_state_ids`
-            # and `state_groups` because they have `prev_events` that aren't persisted yet
-            # (historical messages persisted in reverse-chronological order).
-            if not event.internal_metadata.is_historical():
-                with opentracing.start_active_span("calculate_push_actions"):
-                    await self._bulk_push_rule_evaluator.action_for_event_by_user(
-                        event, context
-                    )
+        await self._bulk_push_rule_evaluator.action_for_events_by_user(
+            events_and_context
+        )
 
         try:
             # If we're a worker we need to hit out to the master.
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 638f54051a..cc1e5c8f97 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1055,9 +1055,6 @@ class RoomCreationHandler:
         event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""}
         depth = 1
 
-        # the last event sent/persisted to the db
-        last_sent_event_id: Optional[str] = None
-
         # the most recently created event
         prev_event: List[str] = []
         # a map of event types, state keys -> event_ids. We collect these mappings this as events are
@@ -1102,26 +1099,6 @@ class RoomCreationHandler:
 
             return new_event, new_context
 
-        async def send(
-            event: EventBase,
-            context: synapse.events.snapshot.EventContext,
-            creator: Requester,
-        ) -> int:
-            nonlocal last_sent_event_id
-
-            ev = await self.event_creation_handler.handle_new_client_event(
-                requester=creator,
-                events_and_context=[(event, context)],
-                ratelimit=False,
-                ignore_shadow_ban=True,
-            )
-
-            last_sent_event_id = ev.event_id
-
-            # we know it was persisted, so must have a stream ordering
-            assert ev.internal_metadata.stream_ordering
-            return ev.internal_metadata.stream_ordering
-
         try:
             config = self._presets_dict[preset_config]
         except KeyError:
@@ -1135,10 +1112,14 @@ class RoomCreationHandler:
         )
 
         logger.debug("Sending %s in new room", EventTypes.Member)
-        await send(creation_event, creation_context, creator)
+        ev = await self.event_creation_handler.handle_new_client_event(
+            requester=creator,
+            events_and_context=[(creation_event, creation_context)],
+            ratelimit=False,
+            ignore_shadow_ban=True,
+        )
+        last_sent_event_id = ev.event_id
 
-        # Room create event must exist at this point
-        assert last_sent_event_id is not None
         member_event_id, _ = await self.room_member_handler.update_membership(
             creator,
             creator.user,
@@ -1157,6 +1138,7 @@ class RoomCreationHandler:
         depth += 1
         state_map[(EventTypes.Member, creator.user.to_string())] = member_event_id
 
+        events_to_send = []
         # We treat the power levels override specially as this needs to be one
         # of the first events that get sent into a room.
         pl_content = initial_state.pop((EventTypes.PowerLevels, ""), None)
@@ -1165,7 +1147,7 @@ class RoomCreationHandler:
                 EventTypes.PowerLevels, pl_content, False
             )
             current_state_group = power_context._state_group
-            await send(power_event, power_context, creator)
+            events_to_send.append((power_event, power_context))
         else:
             power_level_content: JsonDict = {
                 "users": {creator_id: 100},
@@ -1214,9 +1196,8 @@ class RoomCreationHandler:
                 False,
             )
             current_state_group = pl_context._state_group
-            await send(pl_event, pl_context, creator)
+            events_to_send.append((pl_event, pl_context))
 
-        events_to_send = []
         if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state:
             room_alias_event, room_alias_context = await create_event(
                 EventTypes.CanonicalAlias, {"alias": room_alias.to_string()}, True
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index a75386f6a0..d7795a9080 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -165,8 +165,21 @@ class BulkPushRuleEvaluator:
         return rules_by_user
 
     async def _get_power_levels_and_sender_level(
-        self, event: EventBase, context: EventContext
+        self,
+        event: EventBase,
+        context: EventContext,
+        event_id_to_event: Mapping[str, EventBase],
     ) -> Tuple[dict, Optional[int]]:
+        """
+        Given an event and an event context, get the power level event relevant to the event
+        and the power level of the sender of the event.
+        Args:
+            event: event to check
+            context: context of event to check
+            event_id_to_event: a mapping of event_id to event for a set of events being
+            batch persisted. This is needed as the sought-after power level event may
+            be in this batch rather than the DB
+        """
         # There are no power levels and sender levels possible to get from outlier
         if event.internal_metadata.is_outlier():
             return {}, None
@@ -177,15 +190,26 @@ class BulkPushRuleEvaluator:
         )
         pl_event_id = prev_state_ids.get(POWER_KEY)
 
+        # fastpath: if there's a power level event, that's all we need, and
+        # not having a power level event is an extreme edge case
         if pl_event_id:
-            # fastpath: if there's a power level event, that's all we need, and
-            # not having a power level event is an extreme edge case
-            auth_events = {POWER_KEY: await self.store.get_event(pl_event_id)}
+            # Get the power level event from the batch, or fall back to the database.
+            pl_event = event_id_to_event.get(pl_event_id)
+            if pl_event:
+                auth_events = {POWER_KEY: pl_event}
+            else:
+                auth_events = {POWER_KEY: await self.store.get_event(pl_event_id)}
         else:
             auth_events_ids = self._event_auth_handler.compute_auth_events(
                 event, prev_state_ids, for_verification=False
             )
             auth_events_dict = await self.store.get_events(auth_events_ids)
+            # Some needed auth events might be in the batch, combine them with those
+            # fetched from the database.
+            for auth_event_id in auth_events_ids:
+                auth_event = event_id_to_event.get(auth_event_id)
+                if auth_event:
+                    auth_events_dict[auth_event_id] = auth_event
             auth_events = {(e.type, e.state_key): e for e in auth_events_dict.values()}
 
         sender_level = get_user_power_level(event.sender, auth_events)
@@ -194,16 +218,38 @@ class BulkPushRuleEvaluator:
 
         return pl_event.content if pl_event else {}, sender_level
 
-    @measure_func("action_for_event_by_user")
-    async def action_for_event_by_user(
-        self, event: EventBase, context: EventContext
+    async def action_for_events_by_user(
+        self, events_and_context: List[Tuple[EventBase, EventContext]]
     ) -> None:
-        """Given an event and context, evaluate the push rules, check if the message
-        should increment the unread count, and insert the results into the
-        event_push_actions_staging table.
+        """Given a list of events and their associated contexts, evaluate the push rules
+        for each event, check if the message should increment the unread count, and
+        insert the results into the event_push_actions_staging table.
         """
-        if not event.internal_metadata.is_notifiable():
-            # Push rules for events that aren't notifiable can't be processed by this
+        # For batched events the power level events may not have been persisted yet,
+        # so we pass in the batched events. Thus if the event cannot be found in the
+        # database we can check in the batch.
+        event_id_to_event = {e.event_id: e for e, _ in events_and_context}
+        for event, context in events_and_context:
+            await self._action_for_event_by_user(event, context, event_id_to_event)
+
+    @measure_func("action_for_event_by_user")
+    async def _action_for_event_by_user(
+        self,
+        event: EventBase,
+        context: EventContext,
+        event_id_to_event: Mapping[str, EventBase],
+    ) -> None:
+
+        if (
+            not event.internal_metadata.is_notifiable()
+            or event.internal_metadata.is_historical()
+        ):
+            # Push rules for events that aren't notifiable can't be processed by this and
+            # we want to skip push notification actions for historical messages
+            # because we don't want to notify people about old history back in time.
+            # The historical messages also do not have the proper `context.current_state_ids`
+            # and `state_groups` because they have `prev_events` that aren't persisted yet
+            # (historical messages persisted in reverse-chronological order).
             return
 
         # Disable counting as unread unless the experimental configuration is
@@ -223,7 +269,9 @@ class BulkPushRuleEvaluator:
         (
             power_levels,
             sender_power_level,
-        ) = await self._get_power_levels_and_sender_level(event, context)
+        ) = await self._get_power_levels_and_sender_level(
+            event, context, event_id_to_event
+        )
 
         # Find the event's thread ID.
         relation = relation_from_event(event)
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 675d7df2ac..594e7937a8 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -71,4 +71,4 @@ class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase):
 
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
         # should not raise
-        self.get_success(bulk_evaluator.action_for_event_by_user(event, context))
+        self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index ce53f808db..121f3d8d65 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -371,7 +371,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
             config=worker_hs.config.server.listeners[0],
             resource=resource,
             server_version_string="1",
-            max_request_body_size=4096,
+            max_request_body_size=8192,
             reactor=self.reactor,
         )
 
-- 
cgit 1.5.1


From 1469fed0e39d31a063e8a54c2ea027774eec6acb Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 24 Oct 2022 10:45:10 +0100
Subject: Add debugging to help diagnose lost device-list-update (#14268)

---
 changelog.d/14268.misc                    |  1 +
 synapse/storage/databases/main/devices.py | 54 +++++++++++++++++++++----------
 2 files changed, 38 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/14268.misc

(limited to 'synapse')

diff --git a/changelog.d/14268.misc b/changelog.d/14268.misc
new file mode 100644
index 0000000000..894b1e1d4c
--- /dev/null
+++ b/changelog.d/14268.misc
@@ -0,0 +1 @@
+Add debugging to help diagnose lost device-list-update.
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 830b076a32..979dd4e17e 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -274,6 +274,13 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             destination, int(from_stream_id)
         )
         if not has_changed:
+            # debugging for https://github.com/matrix-org/synapse/issues/14251
+            issue_8631_logger.debug(
+                "%s: no change between %i and %i",
+                destination,
+                from_stream_id,
+                now_stream_id,
+            )
             return now_stream_id, []
 
         updates = await self.db_pool.runInteraction(
@@ -1848,7 +1855,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         self,
         txn: LoggingTransaction,
         user_id: str,
-        device_ids: Iterable[str],
+        device_id: str,
         hosts: Collection[str],
         stream_ids: List[int],
         context: Optional[Dict[str, str]],
@@ -1864,6 +1871,21 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         stream_id_iterator = iter(stream_ids)
 
         encoded_context = json_encoder.encode(context)
+        mark_sent = not self.hs.is_mine_id(user_id)
+
+        values = [
+            (
+                destination,
+                next(stream_id_iterator),
+                user_id,
+                device_id,
+                mark_sent,
+                now,
+                encoded_context if whitelisted_homeserver(destination) else "{}",
+            )
+            for destination in hosts
+        ]
+
         self.db_pool.simple_insert_many_txn(
             txn,
             table="device_lists_outbound_pokes",
@@ -1876,23 +1898,21 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 "ts",
                 "opentracing_context",
             ),
-            values=[
-                (
-                    destination,
-                    next(stream_id_iterator),
-                    user_id,
-                    device_id,
-                    not self.hs.is_mine_id(
-                        user_id
-                    ),  # We only need to send out update for *our* users
-                    now,
-                    encoded_context if whitelisted_homeserver(destination) else "{}",
-                )
-                for destination in hosts
-                for device_id in device_ids
-            ],
+            values=values,
         )
 
+        # debugging for https://github.com/matrix-org/synapse/issues/14251
+        if issue_8631_logger.isEnabledFor(logging.DEBUG):
+            issue_8631_logger.debug(
+                "Recorded outbound pokes for %s:%s with device stream ids %s",
+                user_id,
+                device_id,
+                {
+                    stream_id: destination
+                    for (destination, stream_id, _, _, _, _, _) in values
+                },
+            )
+
     def _add_device_outbound_room_poke_txn(
         self,
         txn: LoggingTransaction,
@@ -1997,7 +2017,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 self._add_device_outbound_poke_to_stream_txn(
                     txn,
                     user_id=user_id,
-                    device_ids=[device_id],
+                    device_id=device_id,
                     hosts=hosts,
                     stream_ids=stream_ids,
                     context=context,
-- 
cgit 1.5.1


From 09b588854e3a6abc4ea2eaa68bb0345f23be5ce8 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 24 Oct 2022 13:05:14 +0100
Subject: Fix `TypeError: 'dict_keys' object is not reversible` (#14280)

---
 changelog.d/14280.bugfix              | 1 +
 synapse/federation/sender/__init__.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14280.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14280.bugfix b/changelog.d/14280.bugfix
new file mode 100644
index 0000000000..c546d2be48
--- /dev/null
+++ b/changelog.d/14280.bugfix
@@ -0,0 +1 @@
+Fix broken outbound federation when using Python 3.7. Broke in v1.70.0rc1.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 774ecd81b6..3ad483efe0 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -536,8 +536,7 @@ class FederationSender(AbstractFederationSender):
 
                 if event_entries:
                     now = self.clock.time_msec()
-                    last_id = next(reversed(event_ids))
-                    ts = event_to_received_ts[last_id]
+                    ts = max(t for t in event_to_received_ts.values() if t)
                     assert ts is not None
 
                     synapse.metrics.event_processing_lag.labels(
-- 
cgit 1.5.1


From 19c0e55ef7742d67cff1cb6fb7c3e862b86ea788 Mon Sep 17 00:00:00 2001
From: Ryan Miguel <1818590+renegaderyu@users.noreply.github.com>
Date: Mon, 24 Oct 2022 08:55:06 -0700
Subject: Return NOT_JSON if decode fails and defer set_timeline_upper_limit
 ca… (#14262)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Return NOT_JSON if decode fails and defer set_timeline_upper_limit call until after check_valid_filter. Fixes #13661. Signed-off-by: Ryan Miguel <miguel.ryanj@gmail.com>.

* Reword changelog
---
 changelog.d/14262.misc      | 1 +
 synapse/rest/client/sync.py | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14262.misc

(limited to 'synapse')

diff --git a/changelog.d/14262.misc b/changelog.d/14262.misc
new file mode 100644
index 0000000000..c1d23bc67d
--- /dev/null
+++ b/changelog.d/14262.misc
@@ -0,0 +1 @@
+Provide a specific error code when a `/sync` request provides a filter which doesn't represent a JSON object.
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index 8a16459105..f2013faeb2 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -146,12 +146,12 @@ class SyncRestServlet(RestServlet):
         elif filter_id.startswith("{"):
             try:
                 filter_object = json_decoder.decode(filter_id)
-                set_timeline_upper_limit(
-                    filter_object, self.hs.config.server.filter_timeline_limit
-                )
             except Exception:
-                raise SynapseError(400, "Invalid filter JSON")
+                raise SynapseError(400, "Invalid filter JSON", errcode=Codes.NOT_JSON)
             self.filtering.check_valid_filter(filter_object)
+            set_timeline_upper_limit(
+                filter_object, self.hs.config.server.filter_timeline_limit
+            )
             filter_collection = FilterCollection(self.hs, filter_object)
         else:
             try:
-- 
cgit 1.5.1


From 581b37b5d6c1c9430108930a4fe409cf3f86332f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 24 Oct 2022 12:07:16 -0400
Subject: Revert behavior change for bundling edits of non-message events
 (#14283)

---
 changelog.d/14283.bugfix                    |  1 +
 synapse/storage/databases/main/relations.py | 11 +++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14283.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14283.bugfix b/changelog.d/14283.bugfix
new file mode 100644
index 0000000000..a80a8c0361
--- /dev/null
+++ b/changelog.d/14283.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0rc1 where edits to non-message events were aggregated by the homeserver.
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 1de62ee9df..c022510e76 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -484,11 +484,12 @@ class RelationsWorkerStore(SQLBaseStore):
             the event will map to None.
         """
 
-        # We only allow edits for events that have the same sender and event type.
-        # We can't assert these things during regular event auth so we have to do
-        # the checks post hoc.
+        # We only allow edits for `m.room.message` events that have the same sender
+        # and event type. We can't assert these things during regular event auth so
+        # we have to do the checks post hoc.
 
-        # Fetches latest edit that has the same type and sender as the original.
+        # Fetches latest edit that has the same type and sender as the
+        # original, and is an `m.room.message`.
         if isinstance(self.database_engine, PostgresEngine):
             # The `DISTINCT ON` clause will pick the *first* row it encounters,
             # so ordering by origin server ts + event ID desc will ensure we get
@@ -504,6 +505,7 @@ class RelationsWorkerStore(SQLBaseStore):
                 WHERE
                     %s
                     AND relation_type = ?
+                    AND edit.type = 'm.room.message'
                 ORDER by original.event_id DESC, edit.origin_server_ts DESC, edit.event_id DESC
             """
         else:
@@ -522,6 +524,7 @@ class RelationsWorkerStore(SQLBaseStore):
                 WHERE
                     %s
                     AND relation_type = ?
+                    AND edit.type = 'm.room.message'
                 ORDER by edit.origin_server_ts, edit.event_id
             """
 
-- 
cgit 1.5.1


From 8c94dd3a277d4e11192f98a9ca32cb6638606b66 Mon Sep 17 00:00:00 2001
From: asymmetric <lorenzo@mailbox.org>
Date: Tue, 25 Oct 2022 11:22:55 +0200
Subject: Enable WAL for SQLite (#13897)

Signed-off-by: Lorenzo Manacorda <lorenzo@mailbox.org>
---
 changelog.d/13897.feature         | 1 +
 synapse/storage/engines/sqlite.py | 4 ++++
 2 files changed, 5 insertions(+)
 create mode 100644 changelog.d/13897.feature

(limited to 'synapse')

diff --git a/changelog.d/13897.feature b/changelog.d/13897.feature
new file mode 100644
index 0000000000..d46fdf9fa5
--- /dev/null
+++ b/changelog.d/13897.feature
@@ -0,0 +1 @@
+Enable Write-Ahead Logging for SQLite installs. Contributed by [asymmetric](https://github.com/asymmetric).
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index faa574dbfd..14260442b6 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -88,6 +88,10 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
 
         db_conn.create_function("rank", 1, _rank)
         db_conn.execute("PRAGMA foreign_keys = ON;")
+
+        # Enable WAL.
+        # see https://www.sqlite.org/wal.html
+        db_conn.execute("PRAGMA journal_mode = WAL;")
         db_conn.commit()
 
     def is_deadlock(self, error: Exception) -> bool:
-- 
cgit 1.5.1


From c9dffd5b330553c5803784be5bc0e2479fab79b0 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Tue, 25 Oct 2022 11:39:25 +0100
Subject: Remove unused `@lru_cache` decorator (#13595)

* Remove unused `@lru_cache` decorator

Spotted this working on something else.

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/13595.misc                |   1 +
 synapse/util/caches/descriptors.py    | 104 ----------------------------------
 tests/util/caches/test_descriptors.py |  40 ++-----------
 3 files changed, 5 insertions(+), 140 deletions(-)
 create mode 100644 changelog.d/13595.misc

(limited to 'synapse')

diff --git a/changelog.d/13595.misc b/changelog.d/13595.misc
new file mode 100644
index 0000000000..71959a6ee7
--- /dev/null
+++ b/changelog.d/13595.misc
@@ -0,0 +1 @@
+Remove unused `@lru_cache` decorator.
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index b3c748ef44..75428d19ba 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import enum
 import functools
 import inspect
 import logging
@@ -146,109 +145,6 @@ class _CacheDescriptorBase:
         )
 
 
-class _LruCachedFunction(Generic[F]):
-    cache: LruCache[CacheKey, Any]
-    __call__: F
-
-
-def lru_cache(
-    *, max_entries: int = 1000, cache_context: bool = False
-) -> Callable[[F], _LruCachedFunction[F]]:
-    """A method decorator that applies a memoizing cache around the function.
-
-    This is more-or-less a drop-in equivalent to functools.lru_cache, although note
-    that the signature is slightly different.
-
-    The main differences with functools.lru_cache are:
-        (a) the size of the cache can be controlled via the cache_factor mechanism
-        (b) the wrapped function can request a "cache_context" which provides a
-            callback mechanism to indicate that the result is no longer valid
-        (c) prometheus metrics are exposed automatically.
-
-    The function should take zero or more arguments, which are used as the key for the
-    cache. Single-argument functions use that argument as the cache key; otherwise the
-    arguments are built into a tuple.
-
-    Cached functions can be "chained" (i.e. a cached function can call other cached
-    functions and get appropriately invalidated when they called caches are
-    invalidated) by adding a special "cache_context" argument to the function
-    and passing that as a kwarg to all caches called. For example:
-
-        @lru_cache(cache_context=True)
-        def foo(self, key, cache_context):
-            r1 = self.bar1(key, on_invalidate=cache_context.invalidate)
-            r2 = self.bar2(key, on_invalidate=cache_context.invalidate)
-            return r1 + r2
-
-    The wrapped function also has a 'cache' property which offers direct access to the
-    underlying LruCache.
-    """
-
-    def func(orig: F) -> _LruCachedFunction[F]:
-        desc = LruCacheDescriptor(
-            orig,
-            max_entries=max_entries,
-            cache_context=cache_context,
-        )
-        return cast(_LruCachedFunction[F], desc)
-
-    return func
-
-
-class LruCacheDescriptor(_CacheDescriptorBase):
-    """Helper for @lru_cache"""
-
-    class _Sentinel(enum.Enum):
-        sentinel = object()
-
-    def __init__(
-        self,
-        orig: Callable[..., Any],
-        max_entries: int = 1000,
-        cache_context: bool = False,
-    ):
-        super().__init__(
-            orig, num_args=None, uncached_args=None, cache_context=cache_context
-        )
-        self.max_entries = max_entries
-
-    def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]:
-        cache: LruCache[CacheKey, Any] = LruCache(
-            cache_name=self.name,
-            max_size=self.max_entries,
-        )
-
-        get_cache_key = self.cache_key_builder
-        sentinel = LruCacheDescriptor._Sentinel.sentinel
-
-        @functools.wraps(self.orig)
-        def _wrapped(*args: Any, **kwargs: Any) -> Any:
-            invalidate_callback = kwargs.pop("on_invalidate", None)
-            callbacks = (invalidate_callback,) if invalidate_callback else ()
-
-            cache_key = get_cache_key(args, kwargs)
-
-            ret = cache.get(cache_key, default=sentinel, callbacks=callbacks)
-            if ret != sentinel:
-                return ret
-
-            # Add our own `cache_context` to argument list if the wrapped function
-            # has asked for one
-            if self.add_cache_context:
-                kwargs["cache_context"] = _CacheContext.get_instance(cache, cache_key)
-
-            ret2 = self.orig(obj, *args, **kwargs)
-            cache.set(cache_key, ret2, callbacks=callbacks)
-
-            return ret2
-
-        wrapped = cast(CachedFunction, _wrapped)
-        wrapped.cache = cache
-        obj.__dict__[self.name] = wrapped
-
-        return wrapped
-
-
 class DeferredCacheDescriptor(_CacheDescriptorBase):
     """A method decorator that applies a memoizing cache around the function.
 
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 78fd7b6961..43475a307f 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -28,7 +28,7 @@ from synapse.logging.context import (
     make_deferred_yieldable,
 )
 from synapse.util.caches import descriptors
-from synapse.util.caches.descriptors import cached, cachedList, lru_cache
+from synapse.util.caches.descriptors import cached, cachedList
 
 from tests import unittest
 from tests.test_utils import get_awaitable_result
@@ -36,38 +36,6 @@ from tests.test_utils import get_awaitable_result
 logger = logging.getLogger(__name__)
 
 
-class LruCacheDecoratorTestCase(unittest.TestCase):
-    def test_base(self):
-        class Cls:
-            def __init__(self):
-                self.mock = mock.Mock()
-
-            @lru_cache()
-            def fn(self, arg1, arg2):
-                return self.mock(arg1, arg2)
-
-        obj = Cls()
-        obj.mock.return_value = "fish"
-        r = obj.fn(1, 2)
-        self.assertEqual(r, "fish")
-        obj.mock.assert_called_once_with(1, 2)
-        obj.mock.reset_mock()
-
-        # a call with different params should call the mock again
-        obj.mock.return_value = "chips"
-        r = obj.fn(1, 3)
-        self.assertEqual(r, "chips")
-        obj.mock.assert_called_once_with(1, 3)
-        obj.mock.reset_mock()
-
-        # the two values should now be cached
-        r = obj.fn(1, 2)
-        self.assertEqual(r, "fish")
-        r = obj.fn(1, 3)
-        self.assertEqual(r, "chips")
-        obj.mock.assert_not_called()
-
-
 def run_on_reactor():
     d = defer.Deferred()
     reactor.callLater(0, d.callback, 0)
@@ -478,10 +446,10 @@ class DescriptorTestCase(unittest.TestCase):
 
             @cached(cache_context=True)
             async def func2(self, key, cache_context):
-                return self.func3(key, on_invalidate=cache_context.invalidate)
+                return await self.func3(key, on_invalidate=cache_context.invalidate)
 
-            @lru_cache(cache_context=True)
-            def func3(self, key, cache_context):
+            @cached(cache_context=True)
+            async def func3(self, key, cache_context):
                 self.invalidate = cache_context.invalidate
                 return 42
 
-- 
cgit 1.5.1


From 2d0ba3f89aaf9545d81c4027500e543ec70b68a6 Mon Sep 17 00:00:00 2001
From: "DeepBlueV7.X" <nicolas.werner@hotmail.de>
Date: Tue, 25 Oct 2022 13:38:01 +0000
Subject: Implementation for MSC3664: Pushrules for relations (#11804)

---
 changelog.d/11804.feature                   |   1 +
 rust/src/push/base_rules.rs                 |  17 +++
 rust/src/push/evaluator.rs                  |  99 ++++++++++++-
 rust/src/push/mod.rs                        |  61 ++++++--
 stubs/synapse/synapse_rust/push.pyi         |   6 +-
 synapse/config/experimental.py              |   3 +
 synapse/push/bulk_push_rule_evaluator.py    |  49 ++++++-
 synapse/rest/client/capabilities.py         |   5 +
 synapse/storage/databases/main/push_rule.py |  15 +-
 tests/push/test_push_rule_evaluator.py      | 215 +++++++++++++++++++++++++++-
 10 files changed, 454 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/11804.feature

(limited to 'synapse')

diff --git a/changelog.d/11804.feature b/changelog.d/11804.feature
new file mode 100644
index 0000000000..6420393541
--- /dev/null
+++ b/changelog.d/11804.feature
@@ -0,0 +1 @@
+Implement [MSC3664](https://github.com/matrix-org/matrix-doc/pull/3664). Contributed by Nico.
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 63240cacfc..49802fa4eb 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -25,6 +25,7 @@ use crate::push::Action;
 use crate::push::Condition;
 use crate::push::EventMatchCondition;
 use crate::push::PushRule;
+use crate::push::RelatedEventMatchCondition;
 use crate::push::SetTweak;
 use crate::push::TweakValue;
 
@@ -114,6 +115,22 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default: true,
         default_enabled: true,
     },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.im.nheko.msc3664.reply"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelatedEventMatch(
+            RelatedEventMatchCondition {
+                key: Some(Cow::Borrowed("sender")),
+                pattern: None,
+                pattern_type: Some(Cow::Borrowed("user_id")),
+                rel_type: Cow::Borrowed("m.in_reply_to"),
+                include_fallbacks: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
     PushRule {
         rule_id: Cow::Borrowed("global/override/.m.rule.contains_display_name"),
         priority_class: 5,
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 0365dd01dc..cedd42c54d 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -23,6 +23,7 @@ use regex::Regex;
 use super::{
     utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType},
     Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition,
+    RelatedEventMatchCondition,
 };
 
 lazy_static! {
@@ -49,6 +50,13 @@ pub struct PushRuleEvaluator {
     /// The power level of the sender of the event, or None if event is an
     /// outlier.
     sender_power_level: Option<i64>,
+
+    /// The related events, indexed by relation type. Flattened in the same manner as
+    /// `flattened_keys`.
+    related_events_flattened: BTreeMap<String, BTreeMap<String, String>>,
+
+    /// If msc3664, push rules for related events, is enabled.
+    related_event_match_enabled: bool,
 }
 
 #[pymethods]
@@ -60,6 +68,8 @@ impl PushRuleEvaluator {
         room_member_count: u64,
         sender_power_level: Option<i64>,
         notification_power_levels: BTreeMap<String, i64>,
+        related_events_flattened: BTreeMap<String, BTreeMap<String, String>>,
+        related_event_match_enabled: bool,
     ) -> Result<Self, Error> {
         let body = flattened_keys
             .get("content.body")
@@ -72,6 +82,8 @@ impl PushRuleEvaluator {
             room_member_count,
             notification_power_levels,
             sender_power_level,
+            related_events_flattened,
+            related_event_match_enabled,
         })
     }
 
@@ -156,6 +168,9 @@ impl PushRuleEvaluator {
             KnownCondition::EventMatch(event_match) => {
                 self.match_event_match(event_match, user_id)?
             }
+            KnownCondition::RelatedEventMatch(event_match) => {
+                self.match_related_event_match(event_match, user_id)?
+            }
             KnownCondition::ContainsDisplayName => {
                 if let Some(dn) = display_name {
                     if !dn.is_empty() {
@@ -239,6 +254,79 @@ impl PushRuleEvaluator {
         compiled_pattern.is_match(haystack)
     }
 
+    /// Evaluates a `related_event_match` condition. (MSC3664)
+    fn match_related_event_match(
+        &self,
+        event_match: &RelatedEventMatchCondition,
+        user_id: Option<&str>,
+    ) -> Result<bool, Error> {
+        // First check if related event matching is enabled...
+        if !self.related_event_match_enabled {
+            return Ok(false);
+        }
+
+        // get the related event, fail if there is none.
+        let event = if let Some(event) = self.related_events_flattened.get(&*event_match.rel_type) {
+            event
+        } else {
+            return Ok(false);
+        };
+
+        // If we are not matching fallbacks, don't match if our special key indicating this is a
+        // fallback relation is not present.
+        if !event_match.include_fallbacks.unwrap_or(false)
+            && event.contains_key("im.vector.is_falling_back")
+        {
+            return Ok(false);
+        }
+
+        // if we have no key, accept the event as matching, if it existed without matching any
+        // fields.
+        let key = if let Some(key) = &event_match.key {
+            key
+        } else {
+            return Ok(true);
+        };
+
+        let pattern = if let Some(pattern) = &event_match.pattern {
+            pattern
+        } else if let Some(pattern_type) = &event_match.pattern_type {
+            // The `pattern_type` can either be "user_id" or "user_localpart",
+            // either way if we don't have a `user_id` then the condition can't
+            // match.
+            let user_id = if let Some(user_id) = user_id {
+                user_id
+            } else {
+                return Ok(false);
+            };
+
+            match &**pattern_type {
+                "user_id" => user_id,
+                "user_localpart" => get_localpart_from_id(user_id)?,
+                _ => return Ok(false),
+            }
+        } else {
+            return Ok(false);
+        };
+
+        let haystack = if let Some(haystack) = event.get(&**key) {
+            haystack
+        } else {
+            return Ok(false);
+        };
+
+        // For the content.body we match against "words", but for everything
+        // else we match against the entire value.
+        let match_type = if key == "content.body" {
+            GlobMatchType::Word
+        } else {
+            GlobMatchType::Whole
+        };
+
+        let mut compiled_pattern = get_glob_matcher(pattern, match_type)?;
+        compiled_pattern.is_match(haystack)
+    }
+
     /// Match the member count against an 'is' condition
     /// The `is` condition can be things like '>2', '==3' or even just '4'.
     fn match_member_count(&self, is: &str) -> Result<bool, Error> {
@@ -267,8 +355,15 @@ impl PushRuleEvaluator {
 fn push_rule_evaluator() {
     let mut flattened_keys = BTreeMap::new();
     flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
-    let evaluator =
-        PushRuleEvaluator::py_new(flattened_keys, 10, Some(0), BTreeMap::new()).unwrap();
+    let evaluator = PushRuleEvaluator::py_new(
+        flattened_keys,
+        10,
+        Some(0),
+        BTreeMap::new(),
+        BTreeMap::new(),
+        true,
+    )
+    .unwrap();
 
     let result = evaluator.run(&FilteredPushRules::default(), None, Some("bob"));
     assert_eq!(result.len(), 3);
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 0dabfab8b8..d57800aa4a 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -267,6 +267,8 @@ pub enum Condition {
 #[serde(tag = "kind")]
 pub enum KnownCondition {
     EventMatch(EventMatchCondition),
+    #[serde(rename = "im.nheko.msc3664.related_event_match")]
+    RelatedEventMatch(RelatedEventMatchCondition),
     ContainsDisplayName,
     RoomMemberCount {
         #[serde(skip_serializing_if = "Option::is_none")]
@@ -299,6 +301,20 @@ pub struct EventMatchCondition {
     pub pattern_type: Option<Cow<'static, str>>,
 }
 
+/// The body of a [`Condition::RelatedEventMatch`]
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct RelatedEventMatchCondition {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub key: Option<Cow<'static, str>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub pattern: Option<Cow<'static, str>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub pattern_type: Option<Cow<'static, str>>,
+    pub rel_type: Cow<'static, str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include_fallbacks: Option<bool>,
+}
+
 /// The collection of push rules for a user.
 #[derive(Debug, Clone, Default)]
 #[pyclass(frozen)]
@@ -391,15 +407,21 @@ impl PushRules {
 pub struct FilteredPushRules {
     push_rules: PushRules,
     enabled_map: BTreeMap<String, bool>,
+    msc3664_enabled: bool,
 }
 
 #[pymethods]
 impl FilteredPushRules {
     #[new]
-    pub fn py_new(push_rules: PushRules, enabled_map: BTreeMap<String, bool>) -> Self {
+    pub fn py_new(
+        push_rules: PushRules,
+        enabled_map: BTreeMap<String, bool>,
+        msc3664_enabled: bool,
+    ) -> Self {
         Self {
             push_rules,
             enabled_map,
+            msc3664_enabled,
         }
     }
 
@@ -414,13 +436,25 @@ impl FilteredPushRules {
     /// Iterates over all the rules and their enabled state, including base
     /// rules, in the order they should be executed in.
     fn iter(&self) -> impl Iterator<Item = (&PushRule, bool)> {
-        self.push_rules.iter().map(|r| {
-            let enabled = *self
-                .enabled_map
-                .get(&*r.rule_id)
-                .unwrap_or(&r.default_enabled);
-            (r, enabled)
-        })
+        self.push_rules
+            .iter()
+            .filter(|rule| {
+                // Ignore disabled experimental push rules
+                if !self.msc3664_enabled
+                    && rule.rule_id == "global/override/.im.nheko.msc3664.reply"
+                {
+                    return false;
+                }
+
+                true
+            })
+            .map(|r| {
+                let enabled = *self
+                    .enabled_map
+                    .get(&*r.rule_id)
+                    .unwrap_or(&r.default_enabled);
+                (r, enabled)
+            })
     }
 }
 
@@ -446,6 +480,17 @@ fn test_deserialize_condition() {
     let _: Condition = serde_json::from_str(json).unwrap();
 }
 
+#[test]
+fn test_deserialize_unstable_msc3664_condition() {
+    let json = r#"{"kind":"im.nheko.msc3664.related_event_match","key":"content.body","pattern":"coffee","rel_type":"m.in_reply_to"}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(
+        condition,
+        Condition::Known(KnownCondition::RelatedEventMatch(_))
+    ));
+}
+
 #[test]
 fn test_deserialize_custom_condition() {
     let json = r#"{"kind":"custom_tag"}"#;
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index f2a61df660..f3b6d6c933 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -25,7 +25,9 @@ class PushRules:
     def rules(self) -> Collection[PushRule]: ...
 
 class FilteredPushRules:
-    def __init__(self, push_rules: PushRules, enabled_map: Dict[str, bool]): ...
+    def __init__(
+        self, push_rules: PushRules, enabled_map: Dict[str, bool], msc3664_enabled: bool
+    ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
 def get_base_rule_ids() -> Collection[str]: ...
@@ -37,6 +39,8 @@ class PushRuleEvaluator:
         room_member_count: int,
         sender_power_level: Optional[int],
         notification_power_levels: Mapping[str, int],
+        related_events_flattened: Mapping[str, Mapping[str, str]],
+        related_event_match_enabled: bool,
     ): ...
     def run(
         self,
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 4009add01d..d9bdd66d55 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -98,6 +98,9 @@ class ExperimentalConfig(Config):
         # MSC3773: Thread notifications
         self.msc3773_enabled: bool = experimental.get("msc3773_enabled", False)
 
+        # MSC3664: Pushrules to match on related events
+        self.msc3664_enabled: bool = experimental.get("msc3664_enabled", False)
+
         # MSC3848: Introduce errcodes for specific event sending failures
         self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False)
 
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index d7795a9080..75b7e126ca 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -45,7 +45,6 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-
 push_rules_invalidation_counter = Counter(
     "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter", ""
 )
@@ -107,6 +106,8 @@ class BulkPushRuleEvaluator:
         self.clock = hs.get_clock()
         self._event_auth_handler = hs.get_event_auth_handler()
 
+        self._related_event_match_enabled = self.hs.config.experimental.msc3664_enabled
+
         self.room_push_rule_cache_metrics = register_cache(
             "cache",
             "room_push_rule_cache",
@@ -218,6 +219,48 @@ class BulkPushRuleEvaluator:
 
         return pl_event.content if pl_event else {}, sender_level
 
+    async def _related_events(self, event: EventBase) -> Dict[str, Dict[str, str]]:
+        """Fetches the related events for 'event'. Sets the im.vector.is_falling_back key if the event is from a fallback relation
+
+        Returns:
+            Mapping of relation type to flattened events.
+        """
+        related_events: Dict[str, Dict[str, str]] = {}
+        if self._related_event_match_enabled:
+            related_event_id = event.content.get("m.relates_to", {}).get("event_id")
+            relation_type = event.content.get("m.relates_to", {}).get("rel_type")
+            if related_event_id is not None and relation_type is not None:
+                related_event = await self.store.get_event(
+                    related_event_id, allow_none=True
+                )
+                if related_event is not None:
+                    related_events[relation_type] = _flatten_dict(related_event)
+
+            reply_event_id = (
+                event.content.get("m.relates_to", {})
+                .get("m.in_reply_to", {})
+                .get("event_id")
+            )
+
+            # convert replies to pseudo relations
+            if reply_event_id is not None:
+                related_event = await self.store.get_event(
+                    reply_event_id, allow_none=True
+                )
+
+                if related_event is not None:
+                    related_events["m.in_reply_to"] = _flatten_dict(related_event)
+
+                    # indicate that this is from a fallback relation.
+                    if relation_type == "m.thread" and event.content.get(
+                        "m.relates_to", {}
+                    ).get("is_falling_back", False):
+                        related_events["m.in_reply_to"][
+                            "im.vector.is_falling_back"
+                        ] = ""
+
+        return related_events
+
     async def action_for_events_by_user(
         self, events_and_context: List[Tuple[EventBase, EventContext]]
     ) -> None:
@@ -286,6 +329,8 @@ class BulkPushRuleEvaluator:
                 # the parent is part of a thread.
                 thread_id = await self.store.get_thread_id(relation.parent_id)
 
+        related_events = await self._related_events(event)
+
         # It's possible that old room versions have non-integer power levels (floats or
         # strings). Workaround this by explicitly converting to int.
         notification_levels = power_levels.get("notifications", {})
@@ -298,6 +343,8 @@ class BulkPushRuleEvaluator:
             room_member_count,
             sender_power_level,
             notification_levels,
+            related_events,
+            self._related_event_match_enabled,
         )
 
         users = rules_by_user.keys()
diff --git a/synapse/rest/client/capabilities.py b/synapse/rest/client/capabilities.py
index 4237071c61..e84dde31b1 100644
--- a/synapse/rest/client/capabilities.py
+++ b/synapse/rest/client/capabilities.py
@@ -77,6 +77,11 @@ class CapabilitiesRestServlet(RestServlet):
                 "enabled": True,
             }
 
+        if self.config.experimental.msc3664_enabled:
+            response["capabilities"]["im.nheko.msc3664.related_event_match"] = {
+                "enabled": self.config.experimental.msc3664_enabled,
+            }
+
         return HTTPStatus.OK, response
 
 
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 51416b2236..b6c15f29f8 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -29,6 +29,7 @@ from typing import (
 )
 
 from synapse.api.errors import StoreError
+from synapse.config.homeserver import ExperimentalConfig
 from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
@@ -62,7 +63,9 @@ logger = logging.getLogger(__name__)
 
 
 def _load_rules(
-    rawrules: List[JsonDict], enabled_map: Dict[str, bool]
+    rawrules: List[JsonDict],
+    enabled_map: Dict[str, bool],
+    experimental_config: ExperimentalConfig,
 ) -> FilteredPushRules:
     """Take the DB rows returned from the DB and convert them into a full
     `FilteredPushRules` object.
@@ -80,7 +83,9 @@ def _load_rules(
 
     push_rules = PushRules(ruleslist)
 
-    filtered_rules = FilteredPushRules(push_rules, enabled_map)
+    filtered_rules = FilteredPushRules(
+        push_rules, enabled_map, msc3664_enabled=experimental_config.msc3664_enabled
+    )
 
     return filtered_rules
 
@@ -160,7 +165,7 @@ class PushRulesWorkerStore(
 
         enabled_map = await self.get_push_rules_enabled_for_user(user_id)
 
-        return _load_rules(rows, enabled_map)
+        return _load_rules(rows, enabled_map, self.hs.config.experimental)
 
     async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool]:
         results = await self.db_pool.simple_select_list(
@@ -219,7 +224,9 @@ class PushRulesWorkerStore(
         results: Dict[str, FilteredPushRules] = {}
 
         for user_id, rules in raw_rules.items():
-            results[user_id] = _load_rules(rules, enabled_map_by_user.get(user_id, {}))
+            results[user_id] = _load_rules(
+                rules, enabled_map_by_user.get(user_id, {}), self.hs.config.experimental
+            )
 
         return results
 
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index decf619466..fe7c145840 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -38,7 +38,9 @@ from tests.test_utils.event_injection import create_event, inject_member_event
 
 
 class PushRuleEvaluatorTestCase(unittest.TestCase):
-    def _get_evaluator(self, content: JsonDict) -> PushRuleEvaluator:
+    def _get_evaluator(
+        self, content: JsonDict, related_events=None
+    ) -> PushRuleEvaluator:
         event = FrozenEvent(
             {
                 "event_id": "$event_id",
@@ -58,6 +60,8 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             room_member_count,
             sender_power_level,
             power_levels.get("notifications", {}),
+            {} if related_events is None else related_events,
+            True,
         )
 
     def test_display_name(self) -> None:
@@ -292,6 +296,215 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             {"sound": "default", "highlight": True},
         )
 
+    def test_related_event_match(self):
+        evaluator = self._get_evaluator(
+            {
+                "m.relates_to": {
+                    "event_id": "$parent_event_id",
+                    "key": "😀",
+                    "rel_type": "m.annotation",
+                    "m.in_reply_to": {
+                        "event_id": "$parent_event_id",
+                    },
+                }
+            },
+            {
+                "m.in_reply_to": {
+                    "event_id": "$parent_event_id",
+                    "type": "m.room.message",
+                    "sender": "@other_user:test",
+                    "room_id": "!room:test",
+                    "content.msgtype": "m.text",
+                    "content.body": "Original message",
+                },
+                "m.annotation": {
+                    "event_id": "$parent_event_id",
+                    "type": "m.room.message",
+                    "sender": "@other_user:test",
+                    "room_id": "!room:test",
+                    "content.msgtype": "m.text",
+                    "content.body": "Original message",
+                },
+            },
+        )
+        self.assertTrue(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.in_reply_to",
+                    "pattern": "@other_user:test",
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+        self.assertFalse(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.in_reply_to",
+                    "pattern": "@user:test",
+                },
+                "@other_user:test",
+                "display_name",
+            )
+        )
+        self.assertTrue(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.annotation",
+                    "pattern": "@other_user:test",
+                },
+                "@other_user:test",
+                "display_name",
+            )
+        )
+        self.assertFalse(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.in_reply_to",
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+        self.assertTrue(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "rel_type": "m.in_reply_to",
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+        self.assertFalse(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "rel_type": "m.replace",
+                },
+                "@other_user:test",
+                "display_name",
+            )
+        )
+
+    def test_related_event_match_with_fallback(self):
+        evaluator = self._get_evaluator(
+            {
+                "m.relates_to": {
+                    "event_id": "$parent_event_id",
+                    "key": "😀",
+                    "rel_type": "m.thread",
+                    "is_falling_back": True,
+                    "m.in_reply_to": {
+                        "event_id": "$parent_event_id",
+                    },
+                }
+            },
+            {
+                "m.in_reply_to": {
+                    "event_id": "$parent_event_id",
+                    "type": "m.room.message",
+                    "sender": "@other_user:test",
+                    "room_id": "!room:test",
+                    "content.msgtype": "m.text",
+                    "content.body": "Original message",
+                    "im.vector.is_falling_back": "",
+                },
+                "m.thread": {
+                    "event_id": "$parent_event_id",
+                    "type": "m.room.message",
+                    "sender": "@other_user:test",
+                    "room_id": "!room:test",
+                    "content.msgtype": "m.text",
+                    "content.body": "Original message",
+                },
+            },
+        )
+        self.assertTrue(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.in_reply_to",
+                    "pattern": "@other_user:test",
+                    "include_fallbacks": True,
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+        self.assertFalse(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.in_reply_to",
+                    "pattern": "@other_user:test",
+                    "include_fallbacks": False,
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+        self.assertFalse(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.in_reply_to",
+                    "pattern": "@other_user:test",
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+
+    def test_related_event_match_no_related_event(self):
+        evaluator = self._get_evaluator(
+            {"msgtype": "m.text", "body": "Message without related event"}
+        )
+        self.assertFalse(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.in_reply_to",
+                    "pattern": "@other_user:test",
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+        self.assertFalse(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "key": "sender",
+                    "rel_type": "m.in_reply_to",
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+        self.assertFalse(
+            evaluator.matches(
+                {
+                    "kind": "im.nheko.msc3664.related_event_match",
+                    "rel_type": "m.in_reply_to",
+                },
+                "@user:test",
+                "display_name",
+            )
+        )
+
 
 class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase):
     """Tests for the bulk push rule evaluator"""
-- 
cgit 1.5.1


From 9192d74b0bf2f87b00d3e106a18baa9ce27acda1 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 25 Oct 2022 16:25:02 +0200
Subject: Refactor OIDC tests to better mimic an actual OIDC provider. (#13910)

This implements a fake OIDC server, which intercepts calls to the HTTP client.
Improves accuracy of tests by covering more internal methods.

One particular example was the ID token validation, which previously mocked.

This uncovered an incorrect dependency: Synapse actually requires at least
authlib 0.15.1, not 0.14.0.
---
 changelog.d/13910.misc                     |   1 +
 pyproject.toml                             |   2 +-
 synapse/handlers/oidc.py                   |  15 +-
 tests/federation/test_federation_client.py |  36 +-
 tests/handlers/test_oidc.py                | 580 +++++++++++++----------------
 tests/rest/client/test_auth.py             |  32 +-
 tests/rest/client/test_login.py            |  40 +-
 tests/rest/client/utils.py                 | 136 +++----
 tests/test_utils/__init__.py               |  40 +-
 tests/test_utils/oidc.py                   | 325 ++++++++++++++++
 10 files changed, 747 insertions(+), 460 deletions(-)
 create mode 100644 changelog.d/13910.misc
 create mode 100644 tests/test_utils/oidc.py

(limited to 'synapse')

diff --git a/changelog.d/13910.misc b/changelog.d/13910.misc
new file mode 100644
index 0000000000..e906952aab
--- /dev/null
+++ b/changelog.d/13910.misc
@@ -0,0 +1 @@
+Refactor OIDC tests to better mimic an actual OIDC provider.
diff --git a/pyproject.toml b/pyproject.toml
index 6ebac41ed1..7e0feb75aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -192,7 +192,7 @@ psycopg2 = { version = ">=2.8", markers = "platform_python_implementation != 'Py
 psycopg2cffi = { version = ">=2.8", markers = "platform_python_implementation == 'PyPy'", optional = true }
 psycopg2cffi-compat = { version = "==1.1", markers = "platform_python_implementation == 'PyPy'", optional = true }
 pysaml2 = { version = ">=4.5.0", optional = true }
-authlib = { version = ">=0.14.0", optional = true }
+authlib = { version = ">=0.15.1", optional = true }
 # systemd-python is necessary for logging to the systemd journal via
 # `systemd.journal.JournalHandler`, as is documented in
 # `contrib/systemd/log_config.yaml`.
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index d7a8226900..9759daf043 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -275,6 +275,7 @@ class OidcProvider:
         provider: OidcProviderConfig,
     ):
         self._store = hs.get_datastores().main
+        self._clock = hs.get_clock()
 
         self._macaroon_generaton = macaroon_generator
 
@@ -673,6 +674,13 @@ class OidcProvider:
         Returns:
             The decoded claims in the ID token.
         """
+        id_token = token.get("id_token")
+        logger.debug("Attempting to decode JWT id_token %r", id_token)
+
+        # That has been theoritically been checked by the caller, so even though
+        # assertion are not enabled in production, it is mainly here to appease mypy
+        assert id_token is not None
+
         metadata = await self.load_metadata()
         claims_params = {
             "nonce": nonce,
@@ -688,9 +696,6 @@ class OidcProvider:
 
         claim_options = {"iss": {"values": [metadata["issuer"]]}}
 
-        id_token = token["id_token"]
-        logger.debug("Attempting to decode JWT id_token %r", id_token)
-
         # Try to decode the keys in cache first, then retry by forcing the keys
         # to be reloaded
         jwk_set = await self.load_jwks()
@@ -715,7 +720,9 @@ class OidcProvider:
 
         logger.debug("Decoded id_token JWT %r; validating", claims)
 
-        claims.validate(leeway=120)  # allows 2 min of clock skew
+        claims.validate(
+            now=self._clock.time(), leeway=120
+        )  # allows 2 min of clock skew
 
         return claims
 
diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py
index a538215931..51d3bb8fff 100644
--- a/tests/federation/test_federation_client.py
+++ b/tests/federation/test_federation_client.py
@@ -12,13 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 from unittest import mock
 
 import twisted.web.client
 from twisted.internet import defer
-from twisted.internet.protocol import Protocol
-from twisted.python.failure import Failure
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.room_versions import RoomVersions
@@ -26,10 +23,9 @@ from synapse.events import EventBase
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
-from synapse.types import JsonDict
 from synapse.util import Clock
 
-from tests.test_utils import event_injection
+from tests.test_utils import FakeResponse, event_injection
 from tests.unittest import FederatingHomeserverTestCase
 
 
@@ -98,8 +94,8 @@ class FederationClientTest(FederatingHomeserverTestCase):
 
         # mock up the response, and have the agent return it
         self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed(
-            _mock_response(
-                {
+            FakeResponse.json(
+                payload={
                     "pdus": [
                         create_event_dict,
                         member_event_dict,
@@ -208,8 +204,8 @@ class FederationClientTest(FederatingHomeserverTestCase):
 
         # mock up the response, and have the agent return it
         self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed(
-            _mock_response(
-                {
+            FakeResponse.json(
+                payload={
                     "origin": "yet.another.server",
                     "origin_server_ts": 900,
                     "pdus": [
@@ -269,8 +265,8 @@ class FederationClientTest(FederatingHomeserverTestCase):
 
         # We expect an outbound request to /backfill, so stub that out
         self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed(
-            _mock_response(
-                {
+            FakeResponse.json(
+                payload={
                     "origin": "yet.another.server",
                     "origin_server_ts": 900,
                     # Mimic the other server returning our new `pulled_event`
@@ -305,21 +301,3 @@ class FederationClientTest(FederatingHomeserverTestCase):
         # This is 2 because it failed once from `self.OTHER_SERVER_NAME` and the
         # other from "yet.another.server"
         self.assertEqual(backfill_num_attempts, 2)
-
-
-def _mock_response(resp: JsonDict):
-    body = json.dumps(resp).encode("utf-8")
-
-    def deliver_body(p: Protocol):
-        p.dataReceived(body)
-        p.connectionLost(Failure(twisted.web.client.ResponseDone()))
-
-    response = mock.Mock(
-        code=200,
-        phrase=b"OK",
-        headers=twisted.web.client.Headers({"content-Type": ["application/json"]}),
-        length=len(body),
-        deliverBody=deliver_body,
-    )
-    mock.seal(response)
-    return response
diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py
index e6cd3af7b7..5955410524 100644
--- a/tests/handlers/test_oidc.py
+++ b/tests/handlers/test_oidc.py
@@ -11,9 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import json
 import os
-from typing import Any, Dict
+from typing import Any, Dict, Tuple
 from unittest.mock import ANY, Mock, patch
 from urllib.parse import parse_qs, urlparse
 
@@ -22,12 +21,15 @@ import pymacaroons
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.handlers.sso import MappingException
+from synapse.http.site import SynapseRequest
 from synapse.server import HomeServer
-from synapse.types import JsonDict, UserID
+from synapse.types import UserID
 from synapse.util import Clock
-from synapse.util.macaroons import OidcSessionData, get_value_from_macaroon
+from synapse.util.macaroons import get_value_from_macaroon
+from synapse.util.stringutils import random_string
 
 from tests.test_utils import FakeResponse, get_awaitable_result, simple_async_mock
+from tests.test_utils.oidc import FakeAuthorizationGrant, FakeOidcServer
 from tests.unittest import HomeserverTestCase, override_config
 
 try:
@@ -46,12 +48,6 @@ BASE_URL = "https://synapse/"
 CALLBACK_URL = BASE_URL + "_synapse/client/oidc/callback"
 SCOPES = ["openid"]
 
-AUTHORIZATION_ENDPOINT = ISSUER + "authorize"
-TOKEN_ENDPOINT = ISSUER + "token"
-USERINFO_ENDPOINT = ISSUER + "userinfo"
-WELL_KNOWN = ISSUER + ".well-known/openid-configuration"
-JWKS_URI = ISSUER + ".well-known/jwks.json"
-
 # config for common cases
 DEFAULT_CONFIG = {
     "enabled": True,
@@ -66,9 +62,9 @@ DEFAULT_CONFIG = {
 EXPLICIT_ENDPOINT_CONFIG = {
     **DEFAULT_CONFIG,
     "discover": False,
-    "authorization_endpoint": AUTHORIZATION_ENDPOINT,
-    "token_endpoint": TOKEN_ENDPOINT,
-    "jwks_uri": JWKS_URI,
+    "authorization_endpoint": ISSUER + "authorize",
+    "token_endpoint": ISSUER + "token",
+    "jwks_uri": ISSUER + "jwks",
 }
 
 
@@ -102,27 +98,6 @@ class TestMappingProviderFailures(TestMappingProvider):
         }
 
 
-async def get_json(url: str) -> JsonDict:
-    # Mock get_json calls to handle jwks & oidc discovery endpoints
-    if url == WELL_KNOWN:
-        # Minimal discovery document, as defined in OpenID.Discovery
-        # https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata
-        return {
-            "issuer": ISSUER,
-            "authorization_endpoint": AUTHORIZATION_ENDPOINT,
-            "token_endpoint": TOKEN_ENDPOINT,
-            "jwks_uri": JWKS_URI,
-            "userinfo_endpoint": USERINFO_ENDPOINT,
-            "response_types_supported": ["code"],
-            "subject_types_supported": ["public"],
-            "id_token_signing_alg_values_supported": ["RS256"],
-        }
-    elif url == JWKS_URI:
-        return {"keys": []}
-
-    return {}
-
-
 def _key_file_path() -> str:
     """path to a file containing the private half of a test key"""
 
@@ -159,11 +134,11 @@ class OidcHandlerTestCase(HomeserverTestCase):
         return config
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        self.http_client = Mock(spec=["get_json"])
-        self.http_client.get_json.side_effect = get_json
-        self.http_client.user_agent = b"Synapse Test"
+        self.fake_server = FakeOidcServer(clock=clock, issuer=ISSUER)
 
-        hs = self.setup_test_homeserver(proxied_http_client=self.http_client)
+        hs = self.setup_test_homeserver()
+        self.hs_patcher = self.fake_server.patch_homeserver(hs=hs)
+        self.hs_patcher.start()
 
         self.handler = hs.get_oidc_handler()
         self.provider = self.handler._providers["oidc"]
@@ -175,18 +150,51 @@ class OidcHandlerTestCase(HomeserverTestCase):
         # Reduce the number of attempts when generating MXIDs.
         sso_handler._MAP_USERNAME_RETRIES = 3
 
+        auth_handler = hs.get_auth_handler()
+        # Mock the complete SSO login method.
+        self.complete_sso_login = simple_async_mock()
+        auth_handler.complete_sso_login = self.complete_sso_login  # type: ignore[assignment]
+
         return hs
 
+    def tearDown(self) -> None:
+        self.hs_patcher.stop()
+        return super().tearDown()
+
+    def reset_mocks(self):
+        """Reset all the Mocks."""
+        self.fake_server.reset_mocks()
+        self.render_error.reset_mock()
+        self.complete_sso_login.reset_mock()
+
     def metadata_edit(self, values):
         """Modify the result that will be returned by the well-known query"""
 
-        async def patched_get_json(uri):
-            res = await get_json(uri)
-            if uri == WELL_KNOWN:
-                res.update(values)
-            return res
+        metadata = self.fake_server.get_metadata()
+        metadata.update(values)
+        return patch.object(self.fake_server, "get_metadata", return_value=metadata)
 
-        return patch.object(self.http_client, "get_json", patched_get_json)
+    def start_authorization(
+        self,
+        userinfo: dict,
+        client_redirect_url: str = "http://client/redirect",
+        scope: str = "openid",
+        with_sid: bool = False,
+    ) -> Tuple[SynapseRequest, FakeAuthorizationGrant]:
+        """Start an authorization request, and get the callback request back."""
+        nonce = random_string(10)
+        state = random_string(10)
+
+        code, grant = self.fake_server.start_authorization(
+            userinfo=userinfo,
+            scope=scope,
+            client_id=self.provider._client_auth.client_id,
+            redirect_uri=self.provider._callback_url,
+            nonce=nonce,
+            with_sid=with_sid,
+        )
+        session = self._generate_oidc_session_token(state, nonce, client_redirect_url)
+        return _build_callback_request(code, state, session), grant
 
     def assertRenderedError(self, error, error_description=None):
         self.render_error.assert_called_once()
@@ -210,52 +218,54 @@ class OidcHandlerTestCase(HomeserverTestCase):
         """The handler should discover the endpoints from OIDC discovery document."""
         # This would throw if some metadata were invalid
         metadata = self.get_success(self.provider.load_metadata())
-        self.http_client.get_json.assert_called_once_with(WELL_KNOWN)
+        self.fake_server.get_metadata_handler.assert_called_once()
 
-        self.assertEqual(metadata.issuer, ISSUER)
-        self.assertEqual(metadata.authorization_endpoint, AUTHORIZATION_ENDPOINT)
-        self.assertEqual(metadata.token_endpoint, TOKEN_ENDPOINT)
-        self.assertEqual(metadata.jwks_uri, JWKS_URI)
-        # FIXME: it seems like authlib does not have that defined in its metadata models
-        # self.assertEqual(metadata.userinfo_endpoint, USERINFO_ENDPOINT)
+        self.assertEqual(metadata.issuer, self.fake_server.issuer)
+        self.assertEqual(
+            metadata.authorization_endpoint,
+            self.fake_server.authorization_endpoint,
+        )
+        self.assertEqual(metadata.token_endpoint, self.fake_server.token_endpoint)
+        self.assertEqual(metadata.jwks_uri, self.fake_server.jwks_uri)
+        # It seems like authlib does not have that defined in its metadata models
+        self.assertEqual(
+            metadata.get("userinfo_endpoint"),
+            self.fake_server.userinfo_endpoint,
+        )
 
         # subsequent calls should be cached
-        self.http_client.reset_mock()
+        self.reset_mocks()
         self.get_success(self.provider.load_metadata())
-        self.http_client.get_json.assert_not_called()
+        self.fake_server.get_metadata_handler.assert_not_called()
 
     @override_config({"oidc_config": EXPLICIT_ENDPOINT_CONFIG})
     def test_no_discovery(self) -> None:
         """When discovery is disabled, it should not try to load from discovery document."""
         self.get_success(self.provider.load_metadata())
-        self.http_client.get_json.assert_not_called()
+        self.fake_server.get_metadata_handler.assert_not_called()
 
-    @override_config({"oidc_config": EXPLICIT_ENDPOINT_CONFIG})
+    @override_config({"oidc_config": DEFAULT_CONFIG})
     def test_load_jwks(self) -> None:
         """JWKS loading is done once (then cached) if used."""
         jwks = self.get_success(self.provider.load_jwks())
-        self.http_client.get_json.assert_called_once_with(JWKS_URI)
-        self.assertEqual(jwks, {"keys": []})
+        self.fake_server.get_jwks_handler.assert_called_once()
+        self.assertEqual(jwks, self.fake_server.get_jwks())
 
         # subsequent calls should be cached…
-        self.http_client.reset_mock()
+        self.reset_mocks()
         self.get_success(self.provider.load_jwks())
-        self.http_client.get_json.assert_not_called()
+        self.fake_server.get_jwks_handler.assert_not_called()
 
         # …unless forced
-        self.http_client.reset_mock()
+        self.reset_mocks()
         self.get_success(self.provider.load_jwks(force=True))
-        self.http_client.get_json.assert_called_once_with(JWKS_URI)
+        self.fake_server.get_jwks_handler.assert_called_once()
 
-        # Throw if the JWKS uri is missing
-        original = self.provider.load_metadata
-
-        async def patched_load_metadata():
-            m = (await original()).copy()
-            m.update({"jwks_uri": None})
-            return m
-
-        with patch.object(self.provider, "load_metadata", patched_load_metadata):
+        with self.metadata_edit({"jwks_uri": None}):
+            # If we don't do this, the load_metadata call will throw because of the
+            # missing jwks_uri
+            self.provider._user_profile_method = "userinfo_endpoint"
+            self.get_success(self.provider.load_metadata(force=True))
             self.get_failure(self.provider.load_jwks(force=True), RuntimeError)
 
     @override_config({"oidc_config": DEFAULT_CONFIG})
@@ -359,7 +369,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
                 self.provider.handle_redirect_request(req, b"http://client/redirect")
             )
         )
-        auth_endpoint = urlparse(AUTHORIZATION_ENDPOINT)
+        auth_endpoint = urlparse(self.fake_server.authorization_endpoint)
 
         self.assertEqual(url.scheme, auth_endpoint.scheme)
         self.assertEqual(url.netloc, auth_endpoint.netloc)
@@ -424,48 +434,34 @@ class OidcHandlerTestCase(HomeserverTestCase):
         with self.assertRaises(AttributeError):
             _ = mapping_provider.get_extra_attributes
 
-        token = {
-            "type": "bearer",
-            "id_token": "id_token",
-            "access_token": "access_token",
-        }
         username = "bar"
         userinfo = {
             "sub": "foo",
             "username": username,
         }
         expected_user_id = "@%s:%s" % (username, self.hs.hostname)
-        self.provider._exchange_code = simple_async_mock(return_value=token)  # type: ignore[assignment]
-        self.provider._parse_id_token = simple_async_mock(return_value=userinfo)  # type: ignore[assignment]
-        self.provider._fetch_userinfo = simple_async_mock(return_value=userinfo)  # type: ignore[assignment]
-        auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = simple_async_mock()
 
-        code = "code"
-        state = "state"
-        nonce = "nonce"
         client_redirect_url = "http://client/redirect"
-        ip_address = "10.0.0.1"
-        session = self._generate_oidc_session_token(state, nonce, client_redirect_url)
-        request = _build_callback_request(code, state, session, ip_address=ip_address)
-
+        request, _ = self.start_authorization(
+            userinfo, client_redirect_url=client_redirect_url
+        )
         self.get_success(self.handler.handle_oidc_callback(request))
 
-        auth_handler.complete_sso_login.assert_called_once_with(
+        self.complete_sso_login.assert_called_once_with(
             expected_user_id,
-            "oidc",
+            self.provider.idp_id,
             request,
             client_redirect_url,
             None,
             new_user=True,
             auth_provider_session_id=None,
         )
-        self.provider._exchange_code.assert_called_once_with(code)
-        self.provider._parse_id_token.assert_called_once_with(token, nonce=nonce)
-        self.provider._fetch_userinfo.assert_not_called()
+        self.fake_server.post_token_handler.assert_called_once()
+        self.fake_server.get_userinfo_handler.assert_not_called()
         self.render_error.assert_not_called()
 
         # Handle mapping errors
+        request, _ = self.start_authorization(userinfo)
         with patch.object(
             self.provider,
             "_remote_id_from_userinfo",
@@ -475,81 +471,63 @@ class OidcHandlerTestCase(HomeserverTestCase):
             self.assertRenderedError("mapping_error")
 
         # Handle ID token errors
-        self.provider._parse_id_token = simple_async_mock(raises=Exception())  # type: ignore[assignment]
-        self.get_success(self.handler.handle_oidc_callback(request))
+        request, _ = self.start_authorization(userinfo)
+        with self.fake_server.id_token_override({"iss": "https://bad.issuer/"}):
+            self.get_success(self.handler.handle_oidc_callback(request))
         self.assertRenderedError("invalid_token")
 
-        auth_handler.complete_sso_login.reset_mock()
-        self.provider._exchange_code.reset_mock()
-        self.provider._parse_id_token.reset_mock()
-        self.provider._fetch_userinfo.reset_mock()
+        self.reset_mocks()
 
         # With userinfo fetching
         self.provider._user_profile_method = "userinfo_endpoint"
-        token = {
-            "type": "bearer",
-            "access_token": "access_token",
-        }
-        self.provider._exchange_code = simple_async_mock(return_value=token)  # type: ignore[assignment]
+        # Without the "openid" scope, the FakeProvider does not generate an id_token
+        request, _ = self.start_authorization(userinfo, scope="")
         self.get_success(self.handler.handle_oidc_callback(request))
 
-        auth_handler.complete_sso_login.assert_called_once_with(
+        self.complete_sso_login.assert_called_once_with(
             expected_user_id,
-            "oidc",
+            self.provider.idp_id,
             request,
-            client_redirect_url,
+            ANY,
             None,
             new_user=False,
             auth_provider_session_id=None,
         )
-        self.provider._exchange_code.assert_called_once_with(code)
-        self.provider._parse_id_token.assert_not_called()
-        self.provider._fetch_userinfo.assert_called_once_with(token)
+        self.fake_server.post_token_handler.assert_called_once()
+        self.fake_server.get_userinfo_handler.assert_called_once()
         self.render_error.assert_not_called()
 
+        self.reset_mocks()
+
         # With an ID token, userinfo fetching and sid in the ID token
         self.provider._user_profile_method = "userinfo_endpoint"
-        token = {
-            "type": "bearer",
-            "access_token": "access_token",
-            "id_token": "id_token",
-        }
-        id_token = {
-            "sid": "abcdefgh",
-        }
-        self.provider._parse_id_token = simple_async_mock(return_value=id_token)  # type: ignore[assignment]
-        self.provider._exchange_code = simple_async_mock(return_value=token)  # type: ignore[assignment]
-        auth_handler.complete_sso_login.reset_mock()
-        self.provider._fetch_userinfo.reset_mock()
+        request, grant = self.start_authorization(userinfo, with_sid=True)
+        self.assertIsNotNone(grant.sid)
         self.get_success(self.handler.handle_oidc_callback(request))
 
-        auth_handler.complete_sso_login.assert_called_once_with(
+        self.complete_sso_login.assert_called_once_with(
             expected_user_id,
-            "oidc",
+            self.provider.idp_id,
             request,
-            client_redirect_url,
+            ANY,
             None,
             new_user=False,
-            auth_provider_session_id=id_token["sid"],
+            auth_provider_session_id=grant.sid,
         )
-        self.provider._exchange_code.assert_called_once_with(code)
-        self.provider._parse_id_token.assert_called_once_with(token, nonce=nonce)
-        self.provider._fetch_userinfo.assert_called_once_with(token)
+        self.fake_server.post_token_handler.assert_called_once()
+        self.fake_server.get_userinfo_handler.assert_called_once()
         self.render_error.assert_not_called()
 
         # Handle userinfo fetching error
-        self.provider._fetch_userinfo = simple_async_mock(raises=Exception())  # type: ignore[assignment]
-        self.get_success(self.handler.handle_oidc_callback(request))
+        request, _ = self.start_authorization(userinfo)
+        with self.fake_server.buggy_endpoint(userinfo=True):
+            self.get_success(self.handler.handle_oidc_callback(request))
         self.assertRenderedError("fetch_error")
 
-        # Handle code exchange failure
-        from synapse.handlers.oidc import OidcError
-
-        self.provider._exchange_code = simple_async_mock(  # type: ignore[assignment]
-            raises=OidcError("invalid_request")
-        )
-        self.get_success(self.handler.handle_oidc_callback(request))
-        self.assertRenderedError("invalid_request")
+        request, _ = self.start_authorization(userinfo)
+        with self.fake_server.buggy_endpoint(token=True):
+            self.get_success(self.handler.handle_oidc_callback(request))
+        self.assertRenderedError("server_error")
 
     @override_config({"oidc_config": DEFAULT_CONFIG})
     def test_callback_session(self) -> None:
@@ -599,18 +577,22 @@ class OidcHandlerTestCase(HomeserverTestCase):
     )
     def test_exchange_code(self) -> None:
         """Code exchange behaves correctly and handles various error scenarios."""
-        token = {"type": "bearer"}
-        token_json = json.dumps(token).encode("utf-8")
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse(code=200, phrase=b"OK", body=token_json)
+        token = {
+            "type": "Bearer",
+            "access_token": "aabbcc",
+        }
+
+        self.fake_server.post_token_handler.side_effect = None
+        self.fake_server.post_token_handler.return_value = FakeResponse.json(
+            payload=token
         )
         code = "code"
         ret = self.get_success(self.provider._exchange_code(code))
-        kwargs = self.http_client.request.call_args[1]
+        kwargs = self.fake_server.request.call_args[1]
 
         self.assertEqual(ret, token)
         self.assertEqual(kwargs["method"], "POST")
-        self.assertEqual(kwargs["uri"], TOKEN_ENDPOINT)
+        self.assertEqual(kwargs["uri"], self.fake_server.token_endpoint)
 
         args = parse_qs(kwargs["data"].decode("utf-8"))
         self.assertEqual(args["grant_type"], ["authorization_code"])
@@ -620,12 +602,8 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.assertEqual(args["redirect_uri"], [CALLBACK_URL])
 
         # Test error handling
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse(
-                code=400,
-                phrase=b"Bad Request",
-                body=b'{"error": "foo", "error_description": "bar"}',
-            )
+        self.fake_server.post_token_handler.return_value = FakeResponse.json(
+            code=400, payload={"error": "foo", "error_description": "bar"}
         )
         from synapse.handlers.oidc import OidcError
 
@@ -634,46 +612,30 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.assertEqual(exc.value.error_description, "bar")
 
         # Internal server error with no JSON body
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse(
-                code=500,
-                phrase=b"Internal Server Error",
-                body=b"Not JSON",
-            )
+        self.fake_server.post_token_handler.return_value = FakeResponse(
+            code=500, body=b"Not JSON"
         )
         exc = self.get_failure(self.provider._exchange_code(code), OidcError)
         self.assertEqual(exc.value.error, "server_error")
 
         # Internal server error with JSON body
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse(
-                code=500,
-                phrase=b"Internal Server Error",
-                body=b'{"error": "internal_server_error"}',
-            )
+        self.fake_server.post_token_handler.return_value = FakeResponse.json(
+            code=500, payload={"error": "internal_server_error"}
         )
 
         exc = self.get_failure(self.provider._exchange_code(code), OidcError)
         self.assertEqual(exc.value.error, "internal_server_error")
 
         # 4xx error without "error" field
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse(
-                code=400,
-                phrase=b"Bad request",
-                body=b"{}",
-            )
+        self.fake_server.post_token_handler.return_value = FakeResponse.json(
+            code=400, payload={}
         )
         exc = self.get_failure(self.provider._exchange_code(code), OidcError)
         self.assertEqual(exc.value.error, "server_error")
 
         # 2xx error with "error" field
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse(
-                code=200,
-                phrase=b"OK",
-                body=b'{"error": "some_error"}',
-            )
+        self.fake_server.post_token_handler.return_value = FakeResponse.json(
+            code=200, payload={"error": "some_error"}
         )
         exc = self.get_failure(self.provider._exchange_code(code), OidcError)
         self.assertEqual(exc.value.error, "some_error")
@@ -697,11 +659,14 @@ class OidcHandlerTestCase(HomeserverTestCase):
         """Test that code exchange works with a JWK client secret."""
         from authlib.jose import jwt
 
-        token = {"type": "bearer"}
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse(
-                code=200, phrase=b"OK", body=json.dumps(token).encode("utf-8")
-            )
+        token = {
+            "type": "Bearer",
+            "access_token": "aabbcc",
+        }
+
+        self.fake_server.post_token_handler.side_effect = None
+        self.fake_server.post_token_handler.return_value = FakeResponse.json(
+            payload=token
         )
         code = "code"
 
@@ -714,9 +679,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.assertEqual(ret, token)
 
         # the request should have hit the token endpoint
-        kwargs = self.http_client.request.call_args[1]
+        kwargs = self.fake_server.request.call_args[1]
         self.assertEqual(kwargs["method"], "POST")
-        self.assertEqual(kwargs["uri"], TOKEN_ENDPOINT)
+        self.assertEqual(kwargs["uri"], self.fake_server.token_endpoint)
 
         # the client secret provided to the should be a jwt which can be checked with
         # the public key
@@ -750,11 +715,14 @@ class OidcHandlerTestCase(HomeserverTestCase):
     )
     def test_exchange_code_no_auth(self) -> None:
         """Test that code exchange works with no client secret."""
-        token = {"type": "bearer"}
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse(
-                code=200, phrase=b"OK", body=json.dumps(token).encode("utf-8")
-            )
+        token = {
+            "type": "Bearer",
+            "access_token": "aabbcc",
+        }
+
+        self.fake_server.post_token_handler.side_effect = None
+        self.fake_server.post_token_handler.return_value = FakeResponse.json(
+            payload=token
         )
         code = "code"
         ret = self.get_success(self.provider._exchange_code(code))
@@ -762,9 +730,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.assertEqual(ret, token)
 
         # the request should have hit the token endpoint
-        kwargs = self.http_client.request.call_args[1]
+        kwargs = self.fake_server.request.call_args[1]
         self.assertEqual(kwargs["method"], "POST")
-        self.assertEqual(kwargs["uri"], TOKEN_ENDPOINT)
+        self.assertEqual(kwargs["uri"], self.fake_server.token_endpoint)
 
         # check the POSTed data
         args = parse_qs(kwargs["data"].decode("utf-8"))
@@ -787,37 +755,19 @@ class OidcHandlerTestCase(HomeserverTestCase):
         """
         Login while using a mapping provider that implements get_extra_attributes.
         """
-        token = {
-            "type": "bearer",
-            "id_token": "id_token",
-            "access_token": "access_token",
-        }
         userinfo = {
             "sub": "foo",
             "username": "foo",
             "phone": "1234567",
         }
-        self.provider._exchange_code = simple_async_mock(return_value=token)  # type: ignore[assignment]
-        self.provider._parse_id_token = simple_async_mock(return_value=userinfo)  # type: ignore[assignment]
-        auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = simple_async_mock()
-
-        state = "state"
-        client_redirect_url = "http://client/redirect"
-        session = self._generate_oidc_session_token(
-            state=state,
-            nonce="nonce",
-            client_redirect_url=client_redirect_url,
-        )
-        request = _build_callback_request("code", state, session)
-
+        request, _ = self.start_authorization(userinfo)
         self.get_success(self.handler.handle_oidc_callback(request))
 
-        auth_handler.complete_sso_login.assert_called_once_with(
+        self.complete_sso_login.assert_called_once_with(
             "@foo:test",
-            "oidc",
+            self.provider.idp_id,
             request,
-            client_redirect_url,
+            ANY,
             {"phone": "1234567"},
             new_user=True,
             auth_provider_session_id=None,
@@ -826,41 +776,40 @@ class OidcHandlerTestCase(HomeserverTestCase):
     @override_config({"oidc_config": DEFAULT_CONFIG})
     def test_map_userinfo_to_user(self) -> None:
         """Ensure that mapping the userinfo returned from a provider to an MXID works properly."""
-        auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = simple_async_mock()
-
         userinfo: dict = {
             "sub": "test_user",
             "username": "test_user",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_called_once_with(
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_called_once_with(
             "@test_user:test",
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=True,
             auth_provider_session_id=None,
         )
-        auth_handler.complete_sso_login.reset_mock()
+        self.reset_mocks()
 
         # Some providers return an integer ID.
         userinfo = {
             "sub": 1234,
             "username": "test_user_2",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_called_once_with(
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_called_once_with(
             "@test_user_2:test",
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=True,
             auth_provider_session_id=None,
         )
-        auth_handler.complete_sso_login.reset_mock()
+        self.reset_mocks()
 
         # Test if the mxid is already taken
         store = self.hs.get_datastores().main
@@ -869,8 +818,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
             store.register_user(user_id=user3.to_string(), password_hash=None)
         )
         userinfo = {"sub": "test3", "username": "test_user_3"}
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
         self.assertRenderedError(
             "mapping_error",
             "Mapping provider does not support de-duplicating Matrix IDs",
@@ -885,38 +835,37 @@ class OidcHandlerTestCase(HomeserverTestCase):
             store.register_user(user_id=user.to_string(), password_hash=None)
         )
 
-        auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = simple_async_mock()
-
         # Map a user via SSO.
         userinfo = {
             "sub": "test",
             "username": "test_user",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_called_once_with(
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_called_once_with(
             user.to_string(),
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=False,
             auth_provider_session_id=None,
         )
-        auth_handler.complete_sso_login.reset_mock()
+        self.reset_mocks()
 
         # Subsequent calls should map to the same mxid.
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_called_once_with(
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_called_once_with(
             user.to_string(),
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=False,
             auth_provider_session_id=None,
         )
-        auth_handler.complete_sso_login.reset_mock()
+        self.reset_mocks()
 
         # Note that a second SSO user can be mapped to the same Matrix ID. (This
         # requires a unique sub, but something that maps to the same matrix ID,
@@ -927,17 +876,18 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "sub": "test1",
             "username": "test_user",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_called_once_with(
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_called_once_with(
             user.to_string(),
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=False,
             auth_provider_session_id=None,
         )
-        auth_handler.complete_sso_login.reset_mock()
+        self.reset_mocks()
 
         # Register some non-exact matching cases.
         user2 = UserID.from_string("@TEST_user_2:test")
@@ -954,8 +904,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "sub": "test2",
             "username": "TEST_USER_2",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
         args = self.assertRenderedError("mapping_error")
         self.assertTrue(
             args[2].startswith(
@@ -969,11 +920,12 @@ class OidcHandlerTestCase(HomeserverTestCase):
             store.register_user(user_id=user2.to_string(), password_hash=None)
         )
 
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_called_once_with(
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_called_once_with(
             "@TEST_USER_2:test",
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=False,
@@ -983,9 +935,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
     @override_config({"oidc_config": DEFAULT_CONFIG})
     def test_map_userinfo_to_invalid_localpart(self) -> None:
         """If the mapping provider generates an invalid localpart it should be rejected."""
-        self.get_success(
-            _make_callback_with_userinfo(self.hs, {"sub": "test2", "username": "föö"})
-        )
+        userinfo = {"sub": "test2", "username": "föö"}
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
         self.assertRenderedError("mapping_error", "localpart is invalid: föö")
 
     @override_config(
@@ -1000,9 +952,6 @@ class OidcHandlerTestCase(HomeserverTestCase):
     )
     def test_map_userinfo_to_user_retries(self) -> None:
         """The mapping provider can retry generating an MXID if the MXID is already in use."""
-        auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = simple_async_mock()
-
         store = self.hs.get_datastores().main
         self.get_success(
             store.register_user(user_id="@test_user:test", password_hash=None)
@@ -1011,19 +960,20 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "sub": "test",
             "username": "test_user",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
 
         # test_user is already taken, so test_user1 gets registered instead.
-        auth_handler.complete_sso_login.assert_called_once_with(
+        self.complete_sso_login.assert_called_once_with(
             "@test_user1:test",
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=True,
             auth_provider_session_id=None,
         )
-        auth_handler.complete_sso_login.reset_mock()
+        self.reset_mocks()
 
         # Register all of the potential mxids for a particular OIDC username.
         self.get_success(
@@ -1039,8 +989,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "sub": "tester",
             "username": "tester",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
         self.assertRenderedError(
             "mapping_error", "Unable to generate a Matrix ID from the SSO response"
         )
@@ -1052,7 +1003,8 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "sub": "tester",
             "username": "",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
         self.assertRenderedError("mapping_error", "localpart is invalid: ")
 
     @override_config(
@@ -1071,7 +1023,8 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "sub": "tester",
             "username": None,
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
         self.assertRenderedError("mapping_error", "localpart is invalid: ")
 
     @override_config(
@@ -1084,16 +1037,14 @@ class OidcHandlerTestCase(HomeserverTestCase):
     )
     def test_attribute_requirements(self) -> None:
         """The required attributes must be met from the OIDC userinfo response."""
-        auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = simple_async_mock()
-
         # userinfo lacking "test": "foobar" attribute should fail.
         userinfo = {
             "sub": "tester",
             "username": "tester",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
 
         # userinfo with "test": "foobar" attribute should succeed.
         userinfo = {
@@ -1101,13 +1052,14 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "username": "tester",
             "test": "foobar",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
 
         # check that the auth handler got called as expected
-        auth_handler.complete_sso_login.assert_called_once_with(
+        self.complete_sso_login.assert_called_once_with(
             "@tester:test",
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=True,
@@ -1124,21 +1076,20 @@ class OidcHandlerTestCase(HomeserverTestCase):
     )
     def test_attribute_requirements_contains(self) -> None:
         """Test that auth succeeds if userinfo attribute CONTAINS required value"""
-        auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = simple_async_mock()
         # userinfo with "test": ["foobar", "foo", "bar"] attribute should succeed.
         userinfo = {
             "sub": "tester",
             "username": "tester",
             "test": ["foobar", "foo", "bar"],
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
 
         # check that the auth handler got called as expected
-        auth_handler.complete_sso_login.assert_called_once_with(
+        self.complete_sso_login.assert_called_once_with(
             "@tester:test",
-            "oidc",
-            ANY,
+            self.provider.idp_id,
+            request,
             ANY,
             None,
             new_user=True,
@@ -1158,16 +1109,15 @@ class OidcHandlerTestCase(HomeserverTestCase):
         Test that auth fails if attributes exist but don't match,
         or are non-string values.
         """
-        auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = simple_async_mock()
         # userinfo with "test": "not_foobar" attribute should fail
         userinfo: dict = {
             "sub": "tester",
             "username": "tester",
             "test": "not_foobar",
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
 
         # userinfo with "test": ["foo", "bar"] attribute should fail
         userinfo = {
@@ -1175,8 +1125,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "username": "tester",
             "test": ["foo", "bar"],
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
 
         # userinfo with "test": False attribute should fail
         # this is largely just to ensure we don't crash here
@@ -1185,8 +1136,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "username": "tester",
             "test": False,
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
 
         # userinfo with "test": None attribute should fail
         # a value of None breaks the OIDC spec, but it's important to not crash here
@@ -1195,8 +1147,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "username": "tester",
             "test": None,
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
 
         # userinfo with "test": 1 attribute should fail
         # this is largely just to ensure we don't crash here
@@ -1205,8 +1158,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "username": "tester",
             "test": 1,
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
 
         # userinfo with "test": 3.14 attribute should fail
         # this is largely just to ensure we don't crash here
@@ -1215,8 +1169,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "username": "tester",
             "test": 3.14,
         }
-        self.get_success(_make_callback_with_userinfo(self.hs, userinfo))
-        auth_handler.complete_sso_login.assert_not_called()
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
 
     def _generate_oidc_session_token(
         self,
@@ -1230,7 +1185,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
         return self.handler._macaroon_generator.generate_oidc_session_token(
             state=state,
             session_data=OidcSessionData(
-                idp_id="oidc",
+                idp_id=self.provider.idp_id,
                 nonce=nonce,
                 client_redirect_url=client_redirect_url,
                 ui_auth_session_id=ui_auth_session_id,
@@ -1238,41 +1193,6 @@ class OidcHandlerTestCase(HomeserverTestCase):
         )
 
 
-async def _make_callback_with_userinfo(
-    hs: HomeServer, userinfo: dict, client_redirect_url: str = "http://client/redirect"
-) -> None:
-    """Mock up an OIDC callback with the given userinfo dict
-
-    We'll pull out the OIDC handler from the homeserver, stub out a couple of methods,
-    and poke in the userinfo dict as if it were the response to an OIDC userinfo call.
-
-    Args:
-        hs: the HomeServer impl to send the callback to.
-        userinfo: the OIDC userinfo dict
-        client_redirect_url: the URL to redirect to on success.
-    """
-
-    handler = hs.get_oidc_handler()
-    provider = handler._providers["oidc"]
-    provider._exchange_code = simple_async_mock(return_value={"id_token": ""})  # type: ignore[assignment]
-    provider._parse_id_token = simple_async_mock(return_value=userinfo)  # type: ignore[assignment]
-    provider._fetch_userinfo = simple_async_mock(return_value=userinfo)  # type: ignore[assignment]
-
-    state = "state"
-    session = handler._macaroon_generator.generate_oidc_session_token(
-        state=state,
-        session_data=OidcSessionData(
-            idp_id="oidc",
-            nonce="nonce",
-            client_redirect_url=client_redirect_url,
-            ui_auth_session_id="",
-        ),
-    )
-    request = _build_callback_request("code", state, session)
-
-    await handler.handle_oidc_callback(request)
-
-
 def _build_callback_request(
     code: str,
     state: str,
diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py
index 090cef5216..ebf653d018 100644
--- a/tests/rest/client/test_auth.py
+++ b/tests/rest/client/test_auth.py
@@ -465,9 +465,11 @@ class UIAuthTests(unittest.HomeserverTestCase):
           * checking that the original operation succeeds
         """
 
+        fake_oidc_server = self.helper.fake_oidc_server()
+
         # log the user in
         remote_user_id = UserID.from_string(self.user).localpart
-        login_resp = self.helper.login_via_oidc(remote_user_id)
+        login_resp, _ = self.helper.login_via_oidc(fake_oidc_server, remote_user_id)
         self.assertEqual(login_resp["user_id"], self.user)
 
         # initiate a UI Auth process by attempting to delete the device
@@ -481,8 +483,8 @@ class UIAuthTests(unittest.HomeserverTestCase):
 
         # run the UIA-via-SSO flow
         session_id = channel.json_body["session"]
-        channel = self.helper.auth_via_oidc(
-            {"sub": remote_user_id}, ui_auth_session_id=session_id
+        channel, _ = self.helper.auth_via_oidc(
+            fake_oidc_server, {"sub": remote_user_id}, ui_auth_session_id=session_id
         )
 
         # that should serve a confirmation page
@@ -499,7 +501,8 @@ class UIAuthTests(unittest.HomeserverTestCase):
     @skip_unless(HAS_OIDC, "requires OIDC")
     @override_config({"oidc_config": TEST_OIDC_CONFIG})
     def test_does_not_offer_password_for_sso_user(self) -> None:
-        login_resp = self.helper.login_via_oidc("username")
+        fake_oidc_server = self.helper.fake_oidc_server()
+        login_resp, _ = self.helper.login_via_oidc(fake_oidc_server, "username")
         user_tok = login_resp["access_token"]
         device_id = login_resp["device_id"]
 
@@ -522,7 +525,10 @@ class UIAuthTests(unittest.HomeserverTestCase):
     @override_config({"oidc_config": TEST_OIDC_CONFIG})
     def test_offers_both_flows_for_upgraded_user(self) -> None:
         """A user that had a password and then logged in with SSO should get both flows"""
-        login_resp = self.helper.login_via_oidc(UserID.from_string(self.user).localpart)
+        fake_oidc_server = self.helper.fake_oidc_server()
+        login_resp, _ = self.helper.login_via_oidc(
+            fake_oidc_server, UserID.from_string(self.user).localpart
+        )
         self.assertEqual(login_resp["user_id"], self.user)
 
         channel = self.delete_device(
@@ -539,8 +545,13 @@ class UIAuthTests(unittest.HomeserverTestCase):
     @override_config({"oidc_config": TEST_OIDC_CONFIG})
     def test_ui_auth_fails_for_incorrect_sso_user(self) -> None:
         """If the user tries to authenticate with the wrong SSO user, they get an error"""
+
+        fake_oidc_server = self.helper.fake_oidc_server()
+
         # log the user in
-        login_resp = self.helper.login_via_oidc(UserID.from_string(self.user).localpart)
+        login_resp, _ = self.helper.login_via_oidc(
+            fake_oidc_server, UserID.from_string(self.user).localpart
+        )
         self.assertEqual(login_resp["user_id"], self.user)
 
         # start a UI Auth flow by attempting to delete a device
@@ -553,8 +564,8 @@ class UIAuthTests(unittest.HomeserverTestCase):
         session_id = channel.json_body["session"]
 
         # do the OIDC auth, but auth as the wrong user
-        channel = self.helper.auth_via_oidc(
-            {"sub": "wrong_user"}, ui_auth_session_id=session_id
+        channel, _ = self.helper.auth_via_oidc(
+            fake_oidc_server, {"sub": "wrong_user"}, ui_auth_session_id=session_id
         )
 
         # that should return a failure message
@@ -584,7 +595,10 @@ class UIAuthTests(unittest.HomeserverTestCase):
         """Tests that if we register a user via SSO while requiring approval for new
         accounts, we still raise the correct error before logging the user in.
         """
-        login_resp = self.helper.login_via_oidc("username", expected_status=403)
+        fake_oidc_server = self.helper.fake_oidc_server()
+        login_resp, _ = self.helper.login_via_oidc(
+            fake_oidc_server, "username", expected_status=403
+        )
 
         self.assertEqual(login_resp["errcode"], Codes.USER_AWAITING_APPROVAL)
         self.assertEqual(
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index e801ba8c8b..ff5baa9f0a 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -36,7 +36,7 @@ from synapse.util import Clock
 from tests import unittest
 from tests.handlers.test_oidc import HAS_OIDC
 from tests.handlers.test_saml import has_saml2
-from tests.rest.client.utils import TEST_OIDC_AUTH_ENDPOINT, TEST_OIDC_CONFIG
+from tests.rest.client.utils import TEST_OIDC_CONFIG
 from tests.server import FakeChannel
 from tests.test_utils.html_parsers import TestHtmlParser
 from tests.unittest import HomeserverTestCase, override_config, skip_unless
@@ -612,13 +612,16 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
     def test_login_via_oidc(self) -> None:
         """If OIDC is chosen, should redirect to the OIDC auth endpoint"""
 
-        # pick the default OIDC provider
-        channel = self.make_request(
-            "GET",
-            "/_synapse/client/pick_idp?redirectUrl="
-            + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL)
-            + "&idp=oidc",
-        )
+        fake_oidc_server = self.helper.fake_oidc_server()
+
+        with fake_oidc_server.patch_homeserver(hs=self.hs):
+            # pick the default OIDC provider
+            channel = self.make_request(
+                "GET",
+                "/_synapse/client/pick_idp?redirectUrl="
+                + urllib.parse.quote_plus(TEST_CLIENT_REDIRECT_URL)
+                + "&idp=oidc",
+            )
         self.assertEqual(channel.code, 302, channel.result)
         location_headers = channel.headers.getRawHeaders("Location")
         assert location_headers
@@ -626,7 +629,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
         oidc_uri_path, oidc_uri_query = oidc_uri.split("?", 1)
 
         # it should redirect us to the auth page of the OIDC server
-        self.assertEqual(oidc_uri_path, TEST_OIDC_AUTH_ENDPOINT)
+        self.assertEqual(oidc_uri_path, fake_oidc_server.authorization_endpoint)
 
         # ... and should have set a cookie including the redirect url
         cookie_headers = channel.headers.getRawHeaders("Set-Cookie")
@@ -643,7 +646,9 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
             TEST_CLIENT_REDIRECT_URL,
         )
 
-        channel = self.helper.complete_oidc_auth(oidc_uri, cookies, {"sub": "user1"})
+        channel, _ = self.helper.complete_oidc_auth(
+            fake_oidc_server, oidc_uri, cookies, {"sub": "user1"}
+        )
 
         # that should serve a confirmation page
         self.assertEqual(channel.code, 200, channel.result)
@@ -693,7 +698,10 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
 
     def test_client_idp_redirect_to_oidc(self) -> None:
         """If the client pick a known IdP, redirect to it"""
-        channel = self._make_sso_redirect_request("oidc")
+        fake_oidc_server = self.helper.fake_oidc_server()
+
+        with fake_oidc_server.patch_homeserver(hs=self.hs):
+            channel = self._make_sso_redirect_request("oidc")
         self.assertEqual(channel.code, 302, channel.result)
         location_headers = channel.headers.getRawHeaders("Location")
         assert location_headers
@@ -701,7 +709,7 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
         oidc_uri_path, oidc_uri_query = oidc_uri.split("?", 1)
 
         # it should redirect us to the auth page of the OIDC server
-        self.assertEqual(oidc_uri_path, TEST_OIDC_AUTH_ENDPOINT)
+        self.assertEqual(oidc_uri_path, fake_oidc_server.authorization_endpoint)
 
     def _make_sso_redirect_request(self, idp_prov: Optional[str] = None) -> FakeChannel:
         """Send a request to /_matrix/client/r0/login/sso/redirect
@@ -1280,9 +1288,13 @@ class UsernamePickerTestCase(HomeserverTestCase):
     def test_username_picker(self) -> None:
         """Test the happy path of a username picker flow."""
 
+        fake_oidc_server = self.helper.fake_oidc_server()
+
         # do the start of the login flow
-        channel = self.helper.auth_via_oidc(
-            {"sub": "tester", "displayname": "Jonny"}, TEST_CLIENT_REDIRECT_URL
+        channel, _ = self.helper.auth_via_oidc(
+            fake_oidc_server,
+            {"sub": "tester", "displayname": "Jonny"},
+            TEST_CLIENT_REDIRECT_URL,
         )
 
         # that should redirect to the username picker
diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py
index c249a42bb6..967d229223 100644
--- a/tests/rest/client/utils.py
+++ b/tests/rest/client/utils.py
@@ -31,7 +31,6 @@ from typing import (
     Tuple,
     overload,
 )
-from unittest.mock import patch
 from urllib.parse import urlencode
 
 import attr
@@ -46,8 +45,19 @@ from synapse.server import HomeServer
 from synapse.types import JsonDict
 
 from tests.server import FakeChannel, FakeSite, make_request
-from tests.test_utils import FakeResponse
 from tests.test_utils.html_parsers import TestHtmlParser
+from tests.test_utils.oidc import FakeAuthorizationGrant, FakeOidcServer
+
+# an 'oidc_config' suitable for login_via_oidc.
+TEST_OIDC_ISSUER = "https://issuer.test/"
+TEST_OIDC_CONFIG = {
+    "enabled": True,
+    "issuer": TEST_OIDC_ISSUER,
+    "client_id": "test-client-id",
+    "client_secret": "test-client-secret",
+    "scopes": ["openid"],
+    "user_mapping_provider": {"config": {"localpart_template": "{{ user.sub }}"}},
+}
 
 
 @attr.s(auto_attribs=True)
@@ -543,12 +553,28 @@ class RestHelper:
 
         return channel.json_body
 
+    def fake_oidc_server(self, issuer: str = TEST_OIDC_ISSUER) -> FakeOidcServer:
+        """Create a ``FakeOidcServer``.
+
+        This can be used in conjuction with ``login_via_oidc``::
+
+            fake_oidc_server = self.helper.fake_oidc_server()
+            login_data, _ = self.helper.login_via_oidc(fake_oidc_server, "user")
+        """
+
+        return FakeOidcServer(
+            clock=self.hs.get_clock(),
+            issuer=issuer,
+        )
+
     def login_via_oidc(
         self,
+        fake_server: FakeOidcServer,
         remote_user_id: str,
+        with_sid: bool = False,
         expected_status: int = 200,
-    ) -> JsonDict:
-        """Log in via OIDC
+    ) -> Tuple[JsonDict, FakeAuthorizationGrant]:
+        """Log in (as a new user) via OIDC
 
         Returns the result of the final token login.
 
@@ -560,7 +586,10 @@ class RestHelper:
         the normal places.
         """
         client_redirect_url = "https://x"
-        channel = self.auth_via_oidc({"sub": remote_user_id}, client_redirect_url)
+        userinfo = {"sub": remote_user_id}
+        channel, grant = self.auth_via_oidc(
+            fake_server, userinfo, client_redirect_url, with_sid=with_sid
+        )
 
         # expect a confirmation page
         assert channel.code == HTTPStatus.OK, channel.result
@@ -585,14 +614,16 @@ class RestHelper:
         assert (
             channel.code == expected_status
         ), f"unexpected status in response: {channel.code}"
-        return channel.json_body
+        return channel.json_body, grant
 
     def auth_via_oidc(
         self,
+        fake_server: FakeOidcServer,
         user_info_dict: JsonDict,
         client_redirect_url: Optional[str] = None,
         ui_auth_session_id: Optional[str] = None,
-    ) -> FakeChannel:
+        with_sid: bool = False,
+    ) -> Tuple[FakeChannel, FakeAuthorizationGrant]:
         """Perform an OIDC authentication flow via a mock OIDC provider.
 
         This can be used for either login or user-interactive auth.
@@ -616,6 +647,7 @@ class RestHelper:
                 the login redirect endpoint
             ui_auth_session_id: if set, we will perform a UI Auth flow. The session id
                 of the UI auth.
+            with_sid: if True, generates a random `sid` (OIDC session ID)
 
         Returns:
             A FakeChannel containing the result of calling the OIDC callback endpoint.
@@ -625,14 +657,15 @@ class RestHelper:
 
         cookies: Dict[str, str] = {}
 
-        # if we're doing a ui auth, hit the ui auth redirect endpoint
-        if ui_auth_session_id:
-            # can't set the client redirect url for UI Auth
-            assert client_redirect_url is None
-            oauth_uri = self.initiate_sso_ui_auth(ui_auth_session_id, cookies)
-        else:
-            # otherwise, hit the login redirect endpoint
-            oauth_uri = self.initiate_sso_login(client_redirect_url, cookies)
+        with fake_server.patch_homeserver(hs=self.hs):
+            # if we're doing a ui auth, hit the ui auth redirect endpoint
+            if ui_auth_session_id:
+                # can't set the client redirect url for UI Auth
+                assert client_redirect_url is None
+                oauth_uri = self.initiate_sso_ui_auth(ui_auth_session_id, cookies)
+            else:
+                # otherwise, hit the login redirect endpoint
+                oauth_uri = self.initiate_sso_login(client_redirect_url, cookies)
 
         # we now have a URI for the OIDC IdP, but we skip that and go straight
         # back to synapse's OIDC callback resource. However, we do need the "state"
@@ -640,17 +673,21 @@ class RestHelper:
         # that synapse passes to the client.
 
         oauth_uri_path, _ = oauth_uri.split("?", 1)
-        assert oauth_uri_path == TEST_OIDC_AUTH_ENDPOINT, (
+        assert oauth_uri_path == fake_server.authorization_endpoint, (
             "unexpected SSO URI " + oauth_uri_path
         )
-        return self.complete_oidc_auth(oauth_uri, cookies, user_info_dict)
+        return self.complete_oidc_auth(
+            fake_server, oauth_uri, cookies, user_info_dict, with_sid=with_sid
+        )
 
     def complete_oidc_auth(
         self,
+        fake_serer: FakeOidcServer,
         oauth_uri: str,
         cookies: Mapping[str, str],
         user_info_dict: JsonDict,
-    ) -> FakeChannel:
+        with_sid: bool = False,
+    ) -> Tuple[FakeChannel, FakeAuthorizationGrant]:
         """Mock out an OIDC authentication flow
 
         Assumes that an OIDC auth has been initiated by one of initiate_sso_login or
@@ -661,50 +698,37 @@ class RestHelper:
         Requires the OIDC callback resource to be mounted at the normal place.
 
         Args:
+            fake_server: the fake OIDC server with which the auth should be done
             oauth_uri: the OIDC URI returned by synapse's redirect endpoint (ie,
                from initiate_sso_login or initiate_sso_ui_auth).
             cookies: the cookies set by synapse's redirect endpoint, which will be
                sent back to the callback endpoint.
             user_info_dict: the remote userinfo that the OIDC provider should present.
                 Typically this should be '{"sub": "<remote user id>"}'.
+            with_sid: if True, generates a random `sid` (OIDC session ID)
 
         Returns:
             A FakeChannel containing the result of calling the OIDC callback endpoint.
         """
         _, oauth_uri_qs = oauth_uri.split("?", 1)
         params = urllib.parse.parse_qs(oauth_uri_qs)
+
+        code, grant = fake_serer.start_authorization(
+            scope=params["scope"][0],
+            userinfo=user_info_dict,
+            client_id=params["client_id"][0],
+            redirect_uri=params["redirect_uri"][0],
+            nonce=params["nonce"][0],
+            with_sid=with_sid,
+        )
+        state = params["state"][0]
+
         callback_uri = "%s?%s" % (
             urllib.parse.urlparse(params["redirect_uri"][0]).path,
-            urllib.parse.urlencode({"state": params["state"][0], "code": "TEST_CODE"}),
+            urllib.parse.urlencode({"state": state, "code": code}),
         )
 
-        # before we hit the callback uri, stub out some methods in the http client so
-        # that we don't have to handle full HTTPS requests.
-        # (expected url, json response) pairs, in the order we expect them.
-        expected_requests = [
-            # first we get a hit to the token endpoint, which we tell to return
-            # a dummy OIDC access token
-            (TEST_OIDC_TOKEN_ENDPOINT, {"access_token": "TEST"}),
-            # and then one to the user_info endpoint, which returns our remote user id.
-            (TEST_OIDC_USERINFO_ENDPOINT, user_info_dict),
-        ]
-
-        async def mock_req(
-            method: str,
-            uri: str,
-            data: Optional[dict] = None,
-            headers: Optional[Iterable[Tuple[AnyStr, AnyStr]]] = None,
-        ):
-            (expected_uri, resp_obj) = expected_requests.pop(0)
-            assert uri == expected_uri
-            resp = FakeResponse(
-                code=HTTPStatus.OK,
-                phrase=b"OK",
-                body=json.dumps(resp_obj).encode("utf-8"),
-            )
-            return resp
-
-        with patch.object(self.hs.get_proxied_http_client(), "request", mock_req):
+        with fake_serer.patch_homeserver(hs=self.hs):
             # now hit the callback URI with the right params and a made-up code
             channel = make_request(
                 self.hs.get_reactor(),
@@ -715,7 +739,7 @@ class RestHelper:
                     ("Cookie", "%s=%s" % (k, v)) for (k, v) in cookies.items()
                 ],
             )
-        return channel
+        return channel, grant
 
     def initiate_sso_login(
         self, client_redirect_url: Optional[str], cookies: MutableMapping[str, str]
@@ -806,21 +830,3 @@ class RestHelper:
         assert len(p.links) == 1, "not exactly one link in confirmation page"
         oauth_uri = p.links[0]
         return oauth_uri
-
-
-# an 'oidc_config' suitable for login_via_oidc.
-TEST_OIDC_AUTH_ENDPOINT = "https://issuer.test/auth"
-TEST_OIDC_TOKEN_ENDPOINT = "https://issuer.test/token"
-TEST_OIDC_USERINFO_ENDPOINT = "https://issuer.test/userinfo"
-TEST_OIDC_CONFIG = {
-    "enabled": True,
-    "discover": False,
-    "issuer": "https://issuer.test",
-    "client_id": "test-client-id",
-    "client_secret": "test-client-secret",
-    "scopes": ["profile"],
-    "authorization_endpoint": TEST_OIDC_AUTH_ENDPOINT,
-    "token_endpoint": TEST_OIDC_TOKEN_ENDPOINT,
-    "userinfo_endpoint": TEST_OIDC_USERINFO_ENDPOINT,
-    "user_mapping_provider": {"config": {"localpart_template": "{{ user.sub }}"}},
-}
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
index 0d0d6faf0d..e62ebcc6a5 100644
--- a/tests/test_utils/__init__.py
+++ b/tests/test_utils/__init__.py
@@ -15,17 +15,24 @@
 """
 Utilities for running the unit tests
 """
+import json
 import sys
 import warnings
 from asyncio import Future
 from binascii import unhexlify
-from typing import Awaitable, Callable, TypeVar
+from typing import Awaitable, Callable, Tuple, TypeVar
 from unittest.mock import Mock
 
 import attr
+import zope.interface
 
 from twisted.python.failure import Failure
 from twisted.web.client import ResponseDone
+from twisted.web.http import RESPONSES
+from twisted.web.http_headers import Headers
+from twisted.web.iweb import IResponse
+
+from synapse.types import JsonDict
 
 TV = TypeVar("TV")
 
@@ -97,27 +104,44 @@ def simple_async_mock(return_value=None, raises=None) -> Mock:
     return Mock(side_effect=cb)
 
 
-@attr.s
-class FakeResponse:
+# Type ignore: it does not fully implement IResponse, but is good enough for tests
+@zope.interface.implementer(IResponse)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class FakeResponse:  # type: ignore[misc]
     """A fake twisted.web.IResponse object
 
     there is a similar class at treq.test.test_response, but it lacks a `phrase`
     attribute, and didn't support deliverBody until recently.
     """
 
-    # HTTP response code
-    code = attr.ib(type=int)
+    version: Tuple[bytes, int, int] = (b"HTTP", 1, 1)
 
-    # HTTP response phrase (eg b'OK' for a 200)
-    phrase = attr.ib(type=bytes)
+    # HTTP response code
+    code: int = 200
 
     # body of the response
-    body = attr.ib(type=bytes)
+    body: bytes = b""
+
+    headers: Headers = attr.Factory(Headers)
+
+    @property
+    def phrase(self):
+        return RESPONSES.get(self.code, b"Unknown Status")
+
+    @property
+    def length(self):
+        return len(self.body)
 
     def deliverBody(self, protocol):
         protocol.dataReceived(self.body)
         protocol.connectionLost(Failure(ResponseDone()))
 
+    @classmethod
+    def json(cls, *, code: int = 200, payload: JsonDict) -> "FakeResponse":
+        headers = Headers({"Content-Type": ["application/json"]})
+        body = json.dumps(payload).encode("utf-8")
+        return cls(code=code, body=body, headers=headers)
+
 
 # A small image used in some tests.
 #
diff --git a/tests/test_utils/oidc.py b/tests/test_utils/oidc.py
new file mode 100644
index 0000000000..de134bbc89
--- /dev/null
+++ b/tests/test_utils/oidc.py
@@ -0,0 +1,325 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import json
+from typing import Any, Dict, List, Optional, Tuple
+from unittest.mock import Mock, patch
+from urllib.parse import parse_qs
+
+import attr
+
+from twisted.web.http_headers import Headers
+from twisted.web.iweb import IResponse
+
+from synapse.server import HomeServer
+from synapse.util import Clock
+from synapse.util.stringutils import random_string
+
+from tests.test_utils import FakeResponse
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class FakeAuthorizationGrant:
+    userinfo: dict
+    client_id: str
+    redirect_uri: str
+    scope: str
+    nonce: Optional[str]
+    sid: Optional[str]
+
+
+class FakeOidcServer:
+    """A fake OpenID Connect Provider."""
+
+    # All methods here are mocks, so we can track when they are called, and override
+    # their values
+    request: Mock
+    get_jwks_handler: Mock
+    get_metadata_handler: Mock
+    get_userinfo_handler: Mock
+    post_token_handler: Mock
+
+    def __init__(self, clock: Clock, issuer: str):
+        from authlib.jose import ECKey, KeySet
+
+        self._clock = clock
+        self.issuer = issuer
+
+        self.request = Mock(side_effect=self._request)
+        self.get_jwks_handler = Mock(side_effect=self._get_jwks_handler)
+        self.get_metadata_handler = Mock(side_effect=self._get_metadata_handler)
+        self.get_userinfo_handler = Mock(side_effect=self._get_userinfo_handler)
+        self.post_token_handler = Mock(side_effect=self._post_token_handler)
+
+        # A code -> grant mapping
+        self._authorization_grants: Dict[str, FakeAuthorizationGrant] = {}
+        # An access token -> grant mapping
+        self._sessions: Dict[str, FakeAuthorizationGrant] = {}
+
+        # We generate here an ECDSA key with the P-256 curve (ES256 algorithm) used for
+        # signing JWTs. ECDSA keys are really quick to generate compared to RSA.
+        self._key = ECKey.generate_key(crv="P-256", is_private=True)
+        self._jwks = KeySet([ECKey.import_key(self._key.as_pem(is_private=False))])
+
+        self._id_token_overrides: Dict[str, Any] = {}
+
+    def reset_mocks(self):
+        self.request.reset_mock()
+        self.get_jwks_handler.reset_mock()
+        self.get_metadata_handler.reset_mock()
+        self.get_userinfo_handler.reset_mock()
+        self.post_token_handler.reset_mock()
+
+    def patch_homeserver(self, hs: HomeServer):
+        """Patch the ``HomeServer`` HTTP client to handle requests through the ``FakeOidcServer``.
+
+        This patch should be used whenever the HS is expected to perform request to the
+        OIDC provider, e.g.::
+
+            fake_oidc_server = self.helper.fake_oidc_server()
+            with fake_oidc_server.patch_homeserver(hs):
+                self.make_request("GET", "/_matrix/client/r0/login/sso/redirect")
+        """
+        return patch.object(hs.get_proxied_http_client(), "request", self.request)
+
+    @property
+    def authorization_endpoint(self) -> str:
+        return self.issuer + "authorize"
+
+    @property
+    def token_endpoint(self) -> str:
+        return self.issuer + "token"
+
+    @property
+    def userinfo_endpoint(self) -> str:
+        return self.issuer + "userinfo"
+
+    @property
+    def metadata_endpoint(self) -> str:
+        return self.issuer + ".well-known/openid-configuration"
+
+    @property
+    def jwks_uri(self) -> str:
+        return self.issuer + "jwks"
+
+    def get_metadata(self) -> dict:
+        return {
+            "issuer": self.issuer,
+            "authorization_endpoint": self.authorization_endpoint,
+            "token_endpoint": self.token_endpoint,
+            "jwks_uri": self.jwks_uri,
+            "userinfo_endpoint": self.userinfo_endpoint,
+            "response_types_supported": ["code"],
+            "subject_types_supported": ["public"],
+            "id_token_signing_alg_values_supported": ["ES256"],
+        }
+
+    def get_jwks(self) -> dict:
+        return self._jwks.as_dict()
+
+    def get_userinfo(self, access_token: str) -> Optional[dict]:
+        """Given an access token, get the userinfo of the associated session."""
+        session = self._sessions.get(access_token, None)
+        if session is None:
+            return None
+        return session.userinfo
+
+    def _sign(self, payload: dict) -> str:
+        from authlib.jose import JsonWebSignature
+
+        jws = JsonWebSignature()
+        kid = self.get_jwks()["keys"][0]["kid"]
+        protected = {"alg": "ES256", "kid": kid}
+        json_payload = json.dumps(payload)
+        return jws.serialize_compact(protected, json_payload, self._key).decode("utf-8")
+
+    def generate_id_token(self, grant: FakeAuthorizationGrant) -> str:
+        now = self._clock.time()
+        id_token = {
+            **grant.userinfo,
+            "iss": self.issuer,
+            "aud": grant.client_id,
+            "iat": now,
+            "nbf": now,
+            "exp": now + 600,
+        }
+
+        if grant.nonce is not None:
+            id_token["nonce"] = grant.nonce
+
+        if grant.sid is not None:
+            id_token["sid"] = grant.sid
+
+        id_token.update(self._id_token_overrides)
+
+        return self._sign(id_token)
+
+    def id_token_override(self, overrides: dict):
+        """Temporarily patch the ID token generated by the token endpoint."""
+        return patch.object(self, "_id_token_overrides", overrides)
+
+    def start_authorization(
+        self,
+        client_id: str,
+        scope: str,
+        redirect_uri: str,
+        userinfo: dict,
+        nonce: Optional[str] = None,
+        with_sid: bool = False,
+    ) -> Tuple[str, FakeAuthorizationGrant]:
+        """Start an authorization request, and get back the code to use on the authorization endpoint."""
+        code = random_string(10)
+        sid = None
+        if with_sid:
+            sid = random_string(10)
+
+        grant = FakeAuthorizationGrant(
+            userinfo=userinfo,
+            scope=scope,
+            redirect_uri=redirect_uri,
+            nonce=nonce,
+            client_id=client_id,
+            sid=sid,
+        )
+        self._authorization_grants[code] = grant
+
+        return code, grant
+
+    def exchange_code(self, code: str) -> Optional[Dict[str, Any]]:
+        grant = self._authorization_grants.pop(code, None)
+        if grant is None:
+            return None
+
+        access_token = random_string(10)
+        self._sessions[access_token] = grant
+
+        token = {
+            "token_type": "Bearer",
+            "access_token": access_token,
+            "expires_in": 3600,
+            "scope": grant.scope,
+        }
+
+        if "openid" in grant.scope:
+            token["id_token"] = self.generate_id_token(grant)
+
+        return dict(token)
+
+    def buggy_endpoint(
+        self,
+        *,
+        jwks: bool = False,
+        metadata: bool = False,
+        token: bool = False,
+        userinfo: bool = False,
+    ):
+        """A context which makes a set of endpoints return a 500 error.
+
+        Args:
+            jwks: If True, makes the JWKS endpoint return a 500 error.
+            metadata: If True, makes the OIDC Discovery endpoint return a 500 error.
+            token: If True, makes the token endpoint return a 500 error.
+            userinfo: If True, makes the userinfo endpoint return a 500 error.
+        """
+        buggy = FakeResponse(code=500, body=b"Internal server error")
+
+        patches = {}
+        if jwks:
+            patches["get_jwks_handler"] = Mock(return_value=buggy)
+        if metadata:
+            patches["get_metadata_handler"] = Mock(return_value=buggy)
+        if token:
+            patches["post_token_handler"] = Mock(return_value=buggy)
+        if userinfo:
+            patches["get_userinfo_handler"] = Mock(return_value=buggy)
+
+        return patch.multiple(self, **patches)
+
+    async def _request(
+        self,
+        method: str,
+        uri: str,
+        data: Optional[bytes] = None,
+        headers: Optional[Headers] = None,
+    ) -> IResponse:
+        """The override of the SimpleHttpClient#request() method"""
+        access_token: Optional[str] = None
+
+        if headers is None:
+            headers = Headers()
+
+        # Try to find the access token in the headers if any
+        auth_headers = headers.getRawHeaders(b"Authorization")
+        if auth_headers:
+            parts = auth_headers[0].split(b" ")
+            if parts[0] == b"Bearer" and len(parts) == 2:
+                access_token = parts[1].decode("ascii")
+
+        if method == "POST":
+            # If the method is POST, assume it has an url-encoded body
+            if data is None or headers.getRawHeaders(b"Content-Type") != [
+                b"application/x-www-form-urlencoded"
+            ]:
+                return FakeResponse.json(code=400, payload={"error": "invalid_request"})
+
+            params = parse_qs(data.decode("utf-8"))
+
+            if uri == self.token_endpoint:
+                # Even though this endpoint should be protected, this does not check
+                # for client authentication. We're not checking it for simplicity,
+                # and because client authentication is tested in other standalone tests.
+                return self.post_token_handler(params)
+
+        elif method == "GET":
+            if uri == self.jwks_uri:
+                return self.get_jwks_handler()
+            elif uri == self.metadata_endpoint:
+                return self.get_metadata_handler()
+            elif uri == self.userinfo_endpoint:
+                return self.get_userinfo_handler(access_token=access_token)
+
+        return FakeResponse(code=404, body=b"404 not found")
+
+    # Request handlers
+    def _get_jwks_handler(self) -> IResponse:
+        """Handles requests to the JWKS URI."""
+        return FakeResponse.json(payload=self.get_jwks())
+
+    def _get_metadata_handler(self) -> IResponse:
+        """Handles requests to the OIDC well-known document."""
+        return FakeResponse.json(payload=self.get_metadata())
+
+    def _get_userinfo_handler(self, access_token: Optional[str]) -> IResponse:
+        """Handles requests to the userinfo endpoint."""
+        if access_token is None:
+            return FakeResponse(code=401)
+        user_info = self.get_userinfo(access_token)
+        if user_info is None:
+            return FakeResponse(code=401)
+
+        return FakeResponse.json(payload=user_info)
+
+    def _post_token_handler(self, params: Dict[str, List[str]]) -> IResponse:
+        """Handles requests to the token endpoint."""
+        code = params.get("code", [])
+
+        if len(code) != 1:
+            return FakeResponse.json(code=400, payload={"error": "invalid_request"})
+
+        grant = self.exchange_code(code=code[0])
+        if grant is None:
+            return FakeResponse.json(code=400, payload={"error": "invalid_grant"})
+
+        return FakeResponse.json(payload=grant)
-- 
cgit 1.5.1


From d902181de98399d90c46c4e4e2cf631064757941 Mon Sep 17 00:00:00 2001
From: James Salter <iteration@gmail.com>
Date: Tue, 25 Oct 2022 19:05:22 +0100
Subject: Unified search query syntax using the full-text search capabilities
 of the underlying DB. (#11635)

Support a unified search query syntax which leverages more of the full-text
search of each database supported by Synapse.

Supports, with the same syntax across Postgresql 11+ and Sqlite:

- quoted "search terms"
- `AND`, `OR`, `-` (negation) operators
- Matching words based on their stem, e.g. searches for "dog" matches
  documents containing "dogs".

This is achieved by

- If on postgresql 11+, pass the user input to `websearch_to_tsquery`
- If on sqlite, manually parse the query and transform it into the sqlite-specific
  query syntax.

Note that postgresql 10, which is close to end-of-life, falls back to using
`phraseto_tsquery`, which only supports a subset of the features.

Multiple terms separated by a space are implicitly ANDed.

Note that:

1. There is no escaping of full-text syntax that might be supported by the database;
  e.g. `NOT`, `NEAR`, `*` in sqlite. This runs the risk that people might discover this
  as accidental functionality and depend on something we don't guarantee.
2. English text is assumed for stemming. To support other languages, either the target
  language needs to be known at the time of indexing the message (via room metadata,
  or otherwise), or a separate index for each language supported could be created.

Sqlite docs: https://www.sqlite.org/fts3.html#full_text_index_queries
Postgres docs: https://www.postgresql.org/docs/11/textsearch-controls.html
---
 changelog.d/11635.feature                          |   1 +
 synapse/storage/databases/main/search.py           | 197 +++++++++++++++----
 synapse/storage/engines/postgres.py                |  16 ++
 .../delta/73/10_update_sqlite_fts4_tokenizer.py    |  62 ++++++
 tests/storage/test_room_search.py                  | 213 +++++++++++++++++++++
 5 files changed, 454 insertions(+), 35 deletions(-)
 create mode 100644 changelog.d/11635.feature
 create mode 100644 synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py

(limited to 'synapse')

diff --git a/changelog.d/11635.feature b/changelog.d/11635.feature
new file mode 100644
index 0000000000..94c8a83212
--- /dev/null
+++ b/changelog.d/11635.feature
@@ -0,0 +1 @@
+Allow use of postgres and sqllite full-text search operators in search queries.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 1b79acf955..a89fc54c2c 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -11,10 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import enum
 import logging
 import re
-from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Set, Tuple
+from collections import deque
+from dataclasses import dataclass
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Collection,
+    Iterable,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
 
 import attr
 
@@ -27,7 +39,7 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
@@ -421,8 +433,6 @@ class SearchStore(SearchBackgroundUpdateStore):
         """
         clauses = []
 
-        search_query = _parse_query(self.database_engine, search_term)
-
         args: List[Any] = []
 
         # Make sure we don't explode because the person is in too many rooms.
@@ -444,20 +454,24 @@ class SearchStore(SearchBackgroundUpdateStore):
         count_clauses = clauses
 
         if isinstance(self.database_engine, PostgresEngine):
+            search_query = search_term
+            tsquery_func = self.database_engine.tsquery_func
             sql = (
-                "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) AS rank,"
+                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,"
                 " room_id, event_id"
                 " FROM event_search"
-                " WHERE vector @@ to_tsquery('english', ?)"
+                f" WHERE vector @@  {tsquery_func}('english', ?)"
             )
             args = [search_query, search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
-                " WHERE vector @@ to_tsquery('english', ?)"
+                f" WHERE vector @@ {tsquery_func}('english', ?)"
             )
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
+            search_query = _parse_query_for_sqlite(search_term)
+
             sql = (
                 "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id"
                 " FROM event_search"
@@ -469,7 +483,7 @@ class SearchStore(SearchBackgroundUpdateStore):
                 "SELECT room_id, count(*) as count FROM event_search"
                 " WHERE value MATCH ?"
             )
-            count_args = [search_term] + count_args
+            count_args = [search_query] + count_args
         else:
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
@@ -501,7 +515,9 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         highlights = None
         if isinstance(self.database_engine, PostgresEngine):
-            highlights = await self._find_highlights_in_postgres(search_query, events)
+            highlights = await self._find_highlights_in_postgres(
+                search_query, events, tsquery_func
+            )
 
         count_sql += " GROUP BY room_id"
 
@@ -510,7 +526,6 @@ class SearchStore(SearchBackgroundUpdateStore):
         )
 
         count = sum(row["count"] for row in count_results if row["room_id"] in room_ids)
-
         return {
             "results": [
                 {"event": event_map[r["event_id"]], "rank": r["rank"]}
@@ -542,9 +557,6 @@ class SearchStore(SearchBackgroundUpdateStore):
             Each match as a dictionary.
         """
         clauses = []
-
-        search_query = _parse_query(self.database_engine, search_term)
-
         args: List[Any] = []
 
         # Make sure we don't explode because the person is in too many rooms.
@@ -582,20 +594,23 @@ class SearchStore(SearchBackgroundUpdateStore):
             args.extend([origin_server_ts, origin_server_ts, stream])
 
         if isinstance(self.database_engine, PostgresEngine):
+            search_query = search_term
+            tsquery_func = self.database_engine.tsquery_func
             sql = (
-                "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank,"
+                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,"
                 " origin_server_ts, stream_ordering, room_id, event_id"
                 " FROM event_search"
-                " WHERE vector @@ to_tsquery('english', ?) AND "
+                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
             )
             args = [search_query, search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
-                " WHERE vector @@ to_tsquery('english', ?) AND "
+                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
             )
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
+
             # We use CROSS JOIN here to ensure we use the right indexes.
             # https://sqlite.org/optoverview.html#crossjoin
             #
@@ -614,13 +629,14 @@ class SearchStore(SearchBackgroundUpdateStore):
                 " CROSS JOIN events USING (event_id)"
                 " WHERE "
             )
+            search_query = _parse_query_for_sqlite(search_term)
             args = [search_query] + args
 
             count_sql = (
                 "SELECT room_id, count(*) as count FROM event_search"
                 " WHERE value MATCH ? AND "
             )
-            count_args = [search_term] + count_args
+            count_args = [search_query] + count_args
         else:
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
@@ -660,7 +676,9 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         highlights = None
         if isinstance(self.database_engine, PostgresEngine):
-            highlights = await self._find_highlights_in_postgres(search_query, events)
+            highlights = await self._find_highlights_in_postgres(
+                search_query, events, tsquery_func
+            )
 
         count_sql += " GROUP BY room_id"
 
@@ -686,7 +704,7 @@ class SearchStore(SearchBackgroundUpdateStore):
         }
 
     async def _find_highlights_in_postgres(
-        self, search_query: str, events: List[EventBase]
+        self, search_query: str, events: List[EventBase], tsquery_func: str
     ) -> Set[str]:
         """Given a list of events and a search term, return a list of words
         that match from the content of the event.
@@ -697,6 +715,7 @@ class SearchStore(SearchBackgroundUpdateStore):
         Args:
             search_query
             events: A list of events
+            tsquery_func: The tsquery_* function to use when making queries
 
         Returns:
             A set of strings.
@@ -729,7 +748,7 @@ class SearchStore(SearchBackgroundUpdateStore):
                 while stop_sel in value:
                     stop_sel += ">"
 
-                query = "SELECT ts_headline(?, to_tsquery('english', ?), %s)" % (
+                query = f"SELECT ts_headline(?, {tsquery_func}('english', ?), %s)" % (
                     _to_postgres_options(
                         {
                             "StartSel": start_sel,
@@ -760,20 +779,128 @@ def _to_postgres_options(options_dict: JsonDict) -> str:
     return "'%s'" % (",".join("%s=%s" % (k, v) for k, v in options_dict.items()),)
 
 
-def _parse_query(database_engine: BaseDatabaseEngine, search_term: str) -> str:
-    """Takes a plain unicode string from the user and converts it into a form
-    that can be passed to database.
-    We use this so that we can add prefix matching, which isn't something
-    that is supported by default.
+@dataclass
+class Phrase:
+    phrase: List[str]
+
+
+class SearchToken(enum.Enum):
+    Not = enum.auto()
+    Or = enum.auto()
+    And = enum.auto()
+
+
+Token = Union[str, Phrase, SearchToken]
+TokenList = List[Token]
+
+
+def _is_stop_word(word: str) -> bool:
+    # TODO Pull these out of the dictionary:
+    #  https://github.com/postgres/postgres/blob/master/src/backend/snowball/stopwords/english.stop
+    return word in {"the", "a", "you", "me", "and", "but"}
+
+
+def _tokenize_query(query: str) -> TokenList:
+    """
+    Convert the user-supplied `query` into a TokenList, which can be translated into
+    some DB-specific syntax.
+
+    The following constructs are supported:
+
+    - phrase queries using "double quotes"
+    - case-insensitive `or` and `and` operators
+    - negation of a keyword via unary `-`
+    - unary hyphen to denote NOT e.g. 'include -exclude'
+
+    The following differs from websearch_to_tsquery:
+
+    - Stop words are not removed.
+    - Unclosed phrases are treated differently.
+
+    """
+    tokens: TokenList = []
+
+    # Find phrases.
+    in_phrase = False
+    parts = deque(query.split('"'))
+    for i, part in enumerate(parts):
+        # The contents inside double quotes is treated as a phrase, a trailing
+        # double quote is not implied.
+        in_phrase = bool(i % 2) and i != (len(parts) - 1)
+
+        # Pull out the individual words, discarding any non-word characters.
+        words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE))
+
+        # Phrases have simplified handling of words.
+        if in_phrase:
+            # Skip stop words.
+            phrase = [word for word in words if not _is_stop_word(word)]
+
+            # Consecutive words are implicitly ANDed together.
+            if tokens and tokens[-1] not in (SearchToken.Not, SearchToken.Or):
+                tokens.append(SearchToken.And)
+
+            # Add the phrase.
+            tokens.append(Phrase(phrase))
+            continue
+
+        # Otherwise, not in a phrase.
+        while words:
+            word = words.popleft()
+
+            if word.startswith("-"):
+                tokens.append(SearchToken.Not)
+
+                # If there's more word, put it back to be processed again.
+                word = word[1:]
+                if word:
+                    words.appendleft(word)
+            elif word.lower() == "or":
+                tokens.append(SearchToken.Or)
+            else:
+                # Skip stop words.
+                if _is_stop_word(word):
+                    continue
+
+                # Consecutive words are implicitly ANDed together.
+                if tokens and tokens[-1] not in (SearchToken.Not, SearchToken.Or):
+                    tokens.append(SearchToken.And)
+
+                # Add the search term.
+                tokens.append(word)
+
+    return tokens
+
+
+def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
+    """
+    Convert the list of tokens to a string suitable for passing to sqlite's MATCH.
+    Assume sqlite was compiled with enhanced query syntax.
+
+    Ref: https://www.sqlite.org/fts3.html#full_text_index_queries
     """
+    match_query = []
+    for token in tokens:
+        if isinstance(token, str):
+            match_query.append(token)
+        elif isinstance(token, Phrase):
+            match_query.append('"' + " ".join(token.phrase) + '"')
+        elif token == SearchToken.Not:
+            # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search
+            # term has already been added before this.
+            match_query.append(" NOT ")
+        elif token == SearchToken.Or:
+            match_query.append(" OR ")
+        elif token == SearchToken.And:
+            match_query.append(" AND ")
+        else:
+            raise ValueError(f"unknown token {token}")
+
+    return "".join(match_query)
 
-    # Pull out the individual words, discarding any non-word characters.
-    results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 
-    if isinstance(database_engine, PostgresEngine):
-        return " & ".join(result + ":*" for result in results)
-    elif isinstance(database_engine, Sqlite3Engine):
-        return " & ".join(result + "*" for result in results)
-    else:
-        # This should be unreachable.
-        raise Exception("Unrecognized database engine")
+def _parse_query_for_sqlite(search_term: str) -> str:
+    """Takes a plain unicode string from the user and converts it into a form
+    that can be passed to sqllite's matchinfo().
+    """
+    return _tokens_to_sqlite_match_query(_tokenize_query(search_term))
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index d8c0f64d9a..9bf74bbf59 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -170,6 +170,22 @@ class PostgresEngine(
         """Do we support the `RETURNING` clause in insert/update/delete?"""
         return True
 
+    @property
+    def tsquery_func(self) -> str:
+        """
+        Selects a tsquery_* func to use.
+
+        Ref: https://www.postgresql.org/docs/current/textsearch-controls.html
+
+        Returns:
+            The function name.
+        """
+        # Postgres 11 added support for websearch_to_tsquery.
+        assert self._version is not None
+        if self._version >= 110000:
+            return "websearch_to_tsquery"
+        return "plainto_tsquery"
+
     def is_deadlock(self, error: Exception) -> bool:
         if isinstance(error, psycopg2.DatabaseError):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
new file mode 100644
index 0000000000..3de0a709eb
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
@@ -0,0 +1,62 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+
+from synapse.storage.engines import BaseDatabaseEngine, Sqlite3Engine
+from synapse.storage.types import Cursor
+
+
+def run_create(cur: Cursor, database_engine: BaseDatabaseEngine) -> None:
+    """
+    Upgrade the event_search table to use the porter tokenizer if it isn't already
+
+    Applies only for sqlite.
+    """
+    if not isinstance(database_engine, Sqlite3Engine):
+        return
+
+    # Rebuild the table event_search table with tokenize=porter configured.
+    cur.execute("DROP TABLE event_search")
+    cur.execute(
+        """
+        CREATE VIRTUAL TABLE event_search
+        USING fts4 (tokenize=porter, event_id, room_id, sender, key, value )
+        """
+    )
+
+    # Re-run the background job to re-populate the event_search table.
+    cur.execute("SELECT MIN(stream_ordering) FROM events")
+    row = cur.fetchone()
+    min_stream_id = row[0]
+
+    # If there are not any events, nothing to do.
+    if min_stream_id is None:
+        return
+
+    cur.execute("SELECT MAX(stream_ordering) FROM events")
+    row = cur.fetchone()
+    max_stream_id = row[0]
+
+    progress = {
+        "target_min_stream_id_inclusive": min_stream_id,
+        "max_stream_id_exclusive": max_stream_id + 1,
+    }
+    progress_json = json.dumps(progress)
+
+    sql = """
+    INSERT into background_updates (ordering, update_name, progress_json)
+    VALUES (?, ?, ?)
+    """
+
+    cur.execute(sql, (7310, "event_search", progress_json))
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index e747c6b50e..9ddc19900a 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -12,11 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import List, Tuple, Union
+from unittest.case import SkipTest
+from unittest.mock import PropertyMock, patch
+
+from twisted.test.proto_helpers import MemoryReactor
+
 import synapse.rest.admin
 from synapse.api.constants import EventTypes
 from synapse.api.errors import StoreError
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
+from synapse.storage.databases.main import DataStore
+from synapse.storage.databases.main.search import Phrase, SearchToken, _tokenize_query
 from synapse.storage.engines import PostgresEngine
+from synapse.storage.engines.sqlite import Sqlite3Engine
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase, skip_unless
 from tests.utils import USE_POSTGRES_FOR_TESTS
@@ -187,3 +198,205 @@ class EventSearchInsertionTest(HomeserverTestCase):
             ),
         )
         self.assertCountEqual(values, ["hi", "2"])
+
+
+class MessageSearchTest(HomeserverTestCase):
+    """
+    Check message search.
+
+    A powerful way to check the behaviour is to run the following in Postgres >= 11:
+
+        # SELECT websearch_to_tsquery('english', <your string>);
+
+    The result can be compared to the tokenized version for SQLite and Postgres < 11.
+
+    """
+
+    servlets = [
+        synapse.rest.admin.register_servlets_for_client_rest_resource,
+        login.register_servlets,
+        room.register_servlets,
+    ]
+
+    PHRASE = "the quick brown fox jumps over the lazy dog"
+
+    # Each entry is a search query, followed by either a boolean of whether it is
+    # in the phrase OR a tuple of booleans: whether it matches using websearch
+    # and using plain search.
+    COMMON_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
+        ("nope", False),
+        ("brown", True),
+        ("quick brown", True),
+        ("brown quick", True),
+        ("quick \t brown", True),
+        ("jump", True),
+        ("brown nope", False),
+        ('"brown quick"', (False, True)),
+        ('"jumps over"', True),
+        ('"quick fox"', (False, True)),
+        ("nope OR doublenope", False),
+        ("furphy OR fox", (True, False)),
+        ("fox -nope", (True, False)),
+        ("fox -brown", (False, True)),
+        ('"fox" quick', True),
+        ('"fox quick', True),
+        ('"quick brown', True),
+        ('" quick "', True),
+        ('" nope"', False),
+    ]
+    # TODO Test non-ASCII cases.
+
+    # Case that fail on SQLite.
+    POSTGRES_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
+        # SQLite treats NOT as a binary operator.
+        ("- fox", (False, True)),
+        ("- nope", (True, False)),
+        ('"-fox quick', (False, True)),
+        # PostgreSQL skips stop words.
+        ('"the quick brown"', True),
+        ('"over lazy"', True),
+    ]
+
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
+        # Register a user and create a room, create some messages
+        self.register_user("alice", "password")
+        self.access_token = self.login("alice", "password")
+        self.room_id = self.helper.create_room_as("alice", tok=self.access_token)
+
+        # Send the phrase as a message and check it was created
+        response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token)
+        self.assertIn("event_id", response)
+
+    def test_tokenize_query(self) -> None:
+        """Test the custom logic to tokenize a user's query."""
+        cases = (
+            ("brown", ["brown"]),
+            ("quick brown", ["quick", SearchToken.And, "brown"]),
+            ("quick \t brown", ["quick", SearchToken.And, "brown"]),
+            ('"brown quick"', [Phrase(["brown", "quick"])]),
+            ("furphy OR fox", ["furphy", SearchToken.Or, "fox"]),
+            ("fox -brown", ["fox", SearchToken.Not, "brown"]),
+            ("- fox", [SearchToken.Not, "fox"]),
+            ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]),
+            # No trailing double quoe.
+            ('"fox quick', ["fox", SearchToken.And, "quick"]),
+            ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]),
+            ('" quick "', [Phrase(["quick"])]),
+            (
+                'q"uick brow"n',
+                [
+                    "q",
+                    SearchToken.And,
+                    Phrase(["uick", "brow"]),
+                    SearchToken.And,
+                    "n",
+                ],
+            ),
+            (
+                '-"quick brown"',
+                [SearchToken.Not, Phrase(["quick", "brown"])],
+            ),
+        )
+
+        for query, expected in cases:
+            tokenized = _tokenize_query(query)
+            self.assertEqual(
+                tokenized, expected, f"{tokenized} != {expected} for {query}"
+            )
+
+    def _check_test_cases(
+        self,
+        store: DataStore,
+        cases: List[Tuple[str, Union[bool, Tuple[bool, bool]]]],
+        index=0,
+    ) -> None:
+        # Run all the test cases versus search_msgs
+        for query, expect_to_contain in cases:
+            if isinstance(expect_to_contain, tuple):
+                expect_to_contain = expect_to_contain[index]
+
+            result = self.get_success(
+                store.search_msgs([self.room_id], query, ["content.body"])
+            )
+            self.assertEquals(
+                result["count"],
+                1 if expect_to_contain else 0,
+                f"expected '{query}' to match '{self.PHRASE}'"
+                if expect_to_contain
+                else f"'{query}' unexpectedly matched '{self.PHRASE}'",
+            )
+            self.assertEquals(
+                len(result["results"]),
+                1 if expect_to_contain else 0,
+                "results array length should match count",
+            )
+
+        # Run them again versus search_rooms
+        for query, expect_to_contain in cases:
+            if isinstance(expect_to_contain, tuple):
+                expect_to_contain = expect_to_contain[index]
+
+            result = self.get_success(
+                store.search_rooms([self.room_id], query, ["content.body"], 10)
+            )
+            self.assertEquals(
+                result["count"],
+                1 if expect_to_contain else 0,
+                f"expected '{query}' to match '{self.PHRASE}'"
+                if expect_to_contain
+                else f"'{query}' unexpectedly matched '{self.PHRASE}'",
+            )
+            self.assertEquals(
+                len(result["results"]),
+                1 if expect_to_contain else 0,
+                "results array length should match count",
+            )
+
+    def test_postgres_web_search_for_phrase(self):
+        """
+        Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery.
+        This test is skipped unless the postgres instance supports websearch_to_tsquery.
+        """
+
+        store = self.hs.get_datastores().main
+        if not isinstance(store.database_engine, PostgresEngine):
+            raise SkipTest("Test only applies when postgres is used as the database")
+
+        if store.database_engine.tsquery_func != "websearch_to_tsquery":
+            raise SkipTest(
+                "Test only applies when postgres supporting websearch_to_tsquery is used as the database"
+            )
+
+        self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES, index=0)
+
+    def test_postgres_non_web_search_for_phrase(self):
+        """
+        Test postgres searching for phrases without using web search, which is used when websearch_to_tsquery isn't
+        supported by the current postgres version.
+        """
+
+        store = self.hs.get_datastores().main
+        if not isinstance(store.database_engine, PostgresEngine):
+            raise SkipTest("Test only applies when postgres is used as the database")
+
+        # Patch supports_websearch_to_tsquery to always return False to ensure we're testing the plainto_tsquery path.
+        with patch(
+            "synapse.storage.engines.postgres.PostgresEngine.tsquery_func",
+            new_callable=PropertyMock,
+        ) as supports_websearch_to_tsquery:
+            supports_websearch_to_tsquery.return_value = "plainto_tsquery"
+            self._check_test_cases(
+                store, self.COMMON_CASES + self.POSTGRES_CASES, index=1
+            )
+
+    def test_sqlite_search(self):
+        """
+        Test sqlite searching for phrases.
+        """
+        store = self.hs.get_datastores().main
+        if not isinstance(store.database_engine, Sqlite3Engine):
+            raise SkipTest("Test only applies when sqlite is used as the database")
+
+        self._check_test_cases(store, self.COMMON_CASES, index=0)
-- 
cgit 1.5.1


From 8756d5c87efc5637da55c9e21d2a4eb2369ba693 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 26 Oct 2022 12:45:41 +0200
Subject: Save login tokens in database (#13844)

* Save login tokens in database

Signed-off-by: Quentin Gliech <quenting@element.io>

* Add upgrade notes

* Track login token reuse in a Prometheus metric

Signed-off-by: Quentin Gliech <quenting@element.io>
---
 changelog.d/13844.misc                             |   1 +
 docs/upgrade.md                                    |   9 ++
 synapse/handlers/auth.py                           |  64 +++++++--
 synapse/module_api/__init__.py                     |  41 +-----
 synapse/rest/client/login.py                       |   3 +-
 synapse/rest/client/login_token_request.py         |   5 +-
 synapse/storage/databases/main/registration.py     | 156 ++++++++++++++++++++-
 .../schema/main/delta/73/10login_tokens.sql        |  35 +++++
 synapse/util/macaroons.py                          |  87 +-----------
 tests/handlers/test_auth.py                        | 135 ++++++++++--------
 tests/util/test_macaroons.py                       |  28 ----
 11 files changed, 337 insertions(+), 227 deletions(-)
 create mode 100644 changelog.d/13844.misc
 create mode 100644 synapse/storage/schema/main/delta/73/10login_tokens.sql

(limited to 'synapse')

diff --git a/changelog.d/13844.misc b/changelog.d/13844.misc
new file mode 100644
index 0000000000..66f4414df7
--- /dev/null
+++ b/changelog.d/13844.misc
@@ -0,0 +1 @@
+Save login tokens in database and prevent login token reuse.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index b81385b191..78c34d0c15 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,15 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.71.0
+
+## Removal of the `generate_short_term_login_token` module API method
+
+As announced with the release of [Synapse 1.69.0](#deprecation-of-the-generate_short_term_login_token-module-api-method), the deprecated `generate_short_term_login_token` module method has been removed.
+
+Modules relying on it can instead use the `create_login_token` method.
+
+
 # Upgrading to v1.69.0
 
 ## Changes to the receipts replication streams
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index f5f0e0e7a7..8b9ef25d29 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -38,6 +38,7 @@ from typing import (
 import attr
 import bcrypt
 import unpaddedbase64
+from prometheus_client import Counter
 
 from twisted.internet.defer import CancelledError
 from twisted.web.server import Request
@@ -48,6 +49,7 @@ from synapse.api.errors import (
     Codes,
     InteractiveAuthIncompleteError,
     LoginError,
+    NotFoundError,
     StoreError,
     SynapseError,
     UserDeactivatedError,
@@ -63,10 +65,14 @@ from synapse.http.server import finish_request, respond_with_html
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import defer_to_thread
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage.databases.main.registration import (
+    LoginTokenExpired,
+    LoginTokenLookupResult,
+    LoginTokenReused,
+)
 from synapse.types import JsonDict, Requester, UserID
 from synapse.util import stringutils as stringutils
 from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
-from synapse.util.macaroons import LoginTokenAttributes
 from synapse.util.msisdn import phone_number_to_msisdn
 from synapse.util.stringutils import base62_encode
 from synapse.util.threepids import canonicalise_email
@@ -80,6 +86,12 @@ logger = logging.getLogger(__name__)
 
 INVALID_USERNAME_OR_PASSWORD = "Invalid username or password"
 
+invalid_login_token_counter = Counter(
+    "synapse_user_login_invalid_login_tokens",
+    "Counts the number of rejected m.login.token on /login",
+    ["reason"],
+)
+
 
 def convert_client_dict_legacy_fields_to_identifier(
     submission: JsonDict,
@@ -883,6 +895,25 @@ class AuthHandler:
 
         return True
 
+    async def create_login_token_for_user_id(
+        self,
+        user_id: str,
+        duration_ms: int = (2 * 60 * 1000),
+        auth_provider_id: Optional[str] = None,
+        auth_provider_session_id: Optional[str] = None,
+    ) -> str:
+        login_token = self.generate_login_token()
+        now = self._clock.time_msec()
+        expiry_ts = now + duration_ms
+        await self.store.add_login_token_to_user(
+            user_id=user_id,
+            token=login_token,
+            expiry_ts=expiry_ts,
+            auth_provider_id=auth_provider_id,
+            auth_provider_session_id=auth_provider_session_id,
+        )
+        return login_token
+
     async def create_refresh_token_for_user_id(
         self,
         user_id: str,
@@ -1401,6 +1432,18 @@ class AuthHandler:
             return None
         return user_id
 
+    def generate_login_token(self) -> str:
+        """Generates an opaque string, for use as an short-term login token"""
+
+        # we use the following format for access tokens:
+        #    syl_<random string>_<base62 crc check>
+
+        random_string = stringutils.random_string(20)
+        base = f"syl_{random_string}"
+
+        crc = base62_encode(crc32(base.encode("ascii")), minwidth=6)
+        return f"{base}_{crc}"
+
     def generate_access_token(self, for_user: UserID) -> str:
         """Generates an opaque string, for use as an access token"""
 
@@ -1427,16 +1470,17 @@ class AuthHandler:
         crc = base62_encode(crc32(base.encode("ascii")), minwidth=6)
         return f"{base}_{crc}"
 
-    async def validate_short_term_login_token(
-        self, login_token: str
-    ) -> LoginTokenAttributes:
+    async def consume_login_token(self, login_token: str) -> LoginTokenLookupResult:
         try:
-            res = self.macaroon_gen.verify_short_term_login_token(login_token)
-        except Exception:
-            raise AuthError(403, "Invalid login token", errcode=Codes.FORBIDDEN)
+            return await self.store.consume_login_token(login_token)
+        except LoginTokenExpired:
+            invalid_login_token_counter.labels("expired").inc()
+        except LoginTokenReused:
+            invalid_login_token_counter.labels("reused").inc()
+        except NotFoundError:
+            invalid_login_token_counter.labels("not found").inc()
 
-        await self.auth_blocking.check_auth_blocking(res.user_id)
-        return res
+        raise AuthError(403, "Invalid login token", errcode=Codes.FORBIDDEN)
 
     async def delete_access_token(self, access_token: str) -> None:
         """Invalidate a single access token
@@ -1711,7 +1755,7 @@ class AuthHandler:
             )
 
         # Create a login token
-        login_token = self.macaroon_gen.generate_short_term_login_token(
+        login_token = await self.create_login_token_for_user_id(
             registered_user_id,
             auth_provider_id=auth_provider_id,
             auth_provider_session_id=auth_provider_session_id,
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 6a6ae208d1..30e689d00d 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -771,50 +771,11 @@ class ModuleApi:
             auth_provider_session_id: The session ID got during login from the SSO IdP,
                 if any.
         """
-        # The deprecated `generate_short_term_login_token` method defaulted to an empty
-        # string for the `auth_provider_id` because of how the underlying macaroon was
-        # generated. This will change to a proper NULL-able field when the tokens get
-        # moved to the database.
-        return self._hs.get_macaroon_generator().generate_short_term_login_token(
+        return await self._hs.get_auth_handler().create_login_token_for_user_id(
             user_id,
-            auth_provider_id or "",
-            auth_provider_session_id,
             duration_in_ms,
-        )
-
-    def generate_short_term_login_token(
-        self,
-        user_id: str,
-        duration_in_ms: int = (2 * 60 * 1000),
-        auth_provider_id: str = "",
-        auth_provider_session_id: Optional[str] = None,
-    ) -> str:
-        """Generate a login token suitable for m.login.token authentication
-
-        Added in Synapse v1.9.0.
-
-        This was deprecated in Synapse v1.69.0 in favor of create_login_token, and will
-        be removed in Synapse 1.71.0.
-
-        Args:
-            user_id: gives the ID of the user that the token is for
-
-            duration_in_ms: the time that the token will be valid for
-
-            auth_provider_id: the ID of the SSO IdP that the user used to authenticate
-               to get this token, if any. This is encoded in the token so that
-               /login can report stats on number of successful logins by IdP.
-        """
-        logger.warn(
-            "A module configured on this server uses ModuleApi.generate_short_term_login_token(), "
-            "which is deprecated in favor of ModuleApi.create_login_token(), and will be removed in "
-            "Synapse 1.71.0",
-        )
-        return self._hs.get_macaroon_generator().generate_short_term_login_token(
-            user_id,
             auth_provider_id,
             auth_provider_session_id,
-            duration_in_ms,
         )
 
     @defer.inlineCallbacks
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index f554586ac3..7774f1967d 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -436,8 +436,7 @@ class LoginRestServlet(RestServlet):
             The body of the JSON response.
         """
         token = login_submission["token"]
-        auth_handler = self.auth_handler
-        res = await auth_handler.validate_short_term_login_token(token)
+        res = await self.auth_handler.consume_login_token(token)
 
         return await self._complete_login(
             res.user_id,
diff --git a/synapse/rest/client/login_token_request.py b/synapse/rest/client/login_token_request.py
index 277b20fb63..43ea21d5e6 100644
--- a/synapse/rest/client/login_token_request.py
+++ b/synapse/rest/client/login_token_request.py
@@ -57,7 +57,6 @@ class LoginTokenRequestServlet(RestServlet):
         self.store = hs.get_datastores().main
         self.clock = hs.get_clock()
         self.server_name = hs.config.server.server_name
-        self.macaroon_gen = hs.get_macaroon_generator()
         self.auth_handler = hs.get_auth_handler()
         self.token_timeout = hs.config.experimental.msc3882_token_timeout
         self.ui_auth = hs.config.experimental.msc3882_ui_auth
@@ -76,10 +75,10 @@ class LoginTokenRequestServlet(RestServlet):
                 can_skip_ui_auth=False,  # Don't allow skipping of UI auth
             )
 
-        login_token = self.macaroon_gen.generate_short_term_login_token(
+        login_token = await self.auth_handler.create_login_token_for_user_id(
             user_id=requester.user.to_string(),
             auth_provider_id="org.matrix.msc3882.login_token_request",
-            duration_in_ms=self.token_timeout,
+            duration_ms=self.token_timeout,
         )
 
         return (
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 2996d6bb4d..0255295317 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -21,7 +21,13 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
 import attr
 
 from synapse.api.constants import UserTypes
-from synapse.api.errors import Codes, StoreError, SynapseError, ThreepidValidationError
+from synapse.api.errors import (
+    Codes,
+    NotFoundError,
+    StoreError,
+    SynapseError,
+    ThreepidValidationError,
+)
 from synapse.config.homeserver import HomeServerConfig
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage.database import (
@@ -50,6 +56,14 @@ class ExternalIDReuseException(Exception):
     because this external id is given to an other user."""
 
 
+class LoginTokenExpired(Exception):
+    """Exception if the login token sent expired"""
+
+
+class LoginTokenReused(Exception):
+    """Exception if the login token sent was already used"""
+
+
 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class TokenLookupResult:
     """Result of looking up an access token.
@@ -115,6 +129,20 @@ class RefreshTokenLookupResult:
     If None, the session can be refreshed indefinitely."""
 
 
+@attr.s(auto_attribs=True, frozen=True, slots=True)
+class LoginTokenLookupResult:
+    """Result of looking up a login token."""
+
+    user_id: str
+    """The user this token belongs to."""
+
+    auth_provider_id: Optional[str]
+    """The SSO Identity Provider that the user authenticated with, to get this token."""
+
+    auth_provider_session_id: Optional[str]
+    """The session ID advertised by the SSO Identity Provider."""
+
+
 class RegistrationWorkerStore(CacheInvalidationWorkerStore):
     def __init__(
         self,
@@ -1789,6 +1817,109 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             "replace_refresh_token", _replace_refresh_token_txn
         )
 
+    async def add_login_token_to_user(
+        self,
+        user_id: str,
+        token: str,
+        expiry_ts: int,
+        auth_provider_id: Optional[str],
+        auth_provider_session_id: Optional[str],
+    ) -> None:
+        """Adds a short-term login token for the given user.
+
+        Args:
+            user_id: The user ID.
+            token: The new login token to add.
+            expiry_ts (milliseconds since the epoch): Time after which the login token
+                cannot be used.
+            auth_provider_id: The SSO Identity Provider that the user authenticated with
+                to get this token, if any
+            auth_provider_session_id: The session ID advertised by the SSO Identity
+                Provider, if any.
+        """
+        await self.db_pool.simple_insert(
+            "login_tokens",
+            {
+                "token": token,
+                "user_id": user_id,
+                "expiry_ts": expiry_ts,
+                "auth_provider_id": auth_provider_id,
+                "auth_provider_session_id": auth_provider_session_id,
+            },
+            desc="add_login_token_to_user",
+        )
+
+    def _consume_login_token(
+        self,
+        txn: LoggingTransaction,
+        token: str,
+        ts: int,
+    ) -> LoginTokenLookupResult:
+        values = self.db_pool.simple_select_one_txn(
+            txn,
+            "login_tokens",
+            keyvalues={"token": token},
+            retcols=(
+                "user_id",
+                "expiry_ts",
+                "used_ts",
+                "auth_provider_id",
+                "auth_provider_session_id",
+            ),
+            allow_none=True,
+        )
+
+        if values is None:
+            raise NotFoundError()
+
+        self.db_pool.simple_update_one_txn(
+            txn,
+            "login_tokens",
+            keyvalues={"token": token},
+            updatevalues={"used_ts": ts},
+        )
+        user_id = values["user_id"]
+        expiry_ts = values["expiry_ts"]
+        used_ts = values["used_ts"]
+        auth_provider_id = values["auth_provider_id"]
+        auth_provider_session_id = values["auth_provider_session_id"]
+
+        # Token was already used
+        if used_ts is not None:
+            raise LoginTokenReused()
+
+        # Token expired
+        if ts > int(expiry_ts):
+            raise LoginTokenExpired()
+
+        return LoginTokenLookupResult(
+            user_id=user_id,
+            auth_provider_id=auth_provider_id,
+            auth_provider_session_id=auth_provider_session_id,
+        )
+
+    async def consume_login_token(self, token: str) -> LoginTokenLookupResult:
+        """Lookup a login token and consume it.
+
+        Args:
+            token: The login token.
+
+        Returns:
+            The data stored with that token, including the `user_id`. Returns `None` if
+            the token does not exist or if it expired.
+
+        Raises:
+            NotFound if the login token was not found in database
+            LoginTokenExpired if the login token expired
+            LoginTokenReused if the login token was already used
+        """
+        return await self.db_pool.runInteraction(
+            "consume_login_token",
+            self._consume_login_token,
+            token,
+            self._clock.time_msec(),
+        )
+
     @cached()
     async def is_guest(self, user_id: str) -> bool:
         res = await self.db_pool.simple_select_one_onecol(
@@ -2019,6 +2150,12 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
             and hs.config.experimental.msc3866.require_approval_for_new_accounts
         )
 
+        # Create a background job for removing expired login tokens
+        if hs.config.worker.run_background_tasks:
+            self._clock.looping_call(
+                self._delete_expired_login_tokens, THIRTY_MINUTES_IN_MS
+            )
+
     async def add_access_token_to_user(
         self,
         user_id: str,
@@ -2617,6 +2754,23 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
             approved,
         )
 
+    @wrap_as_background_process("delete_expired_login_tokens")
+    async def _delete_expired_login_tokens(self) -> None:
+        """Remove login tokens with expiry dates that have passed."""
+
+        def _delete_expired_login_tokens_txn(txn: LoggingTransaction, ts: int) -> None:
+            sql = "DELETE FROM login_tokens WHERE expiry_ts <= ?"
+            txn.execute(sql, (ts,))
+
+        # We keep the expired tokens for an extra 5 minutes so we can measure how many
+        # times a token is being used after its expiry
+        now = self._clock.time_msec()
+        await self.db_pool.runInteraction(
+            "delete_expired_login_tokens",
+            _delete_expired_login_tokens_txn,
+            now - (5 * 60 * 1000),
+        )
+
 
 def find_max_generated_user_id_localpart(cur: Cursor) -> int:
     """
diff --git a/synapse/storage/schema/main/delta/73/10login_tokens.sql b/synapse/storage/schema/main/delta/73/10login_tokens.sql
new file mode 100644
index 0000000000..a39b7bcece
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/10login_tokens.sql
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2022 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Login tokens are short-lived tokens that are used for the m.login.token
+-- login method, mainly during SSO logins
+CREATE TABLE login_tokens (
+    token TEXT PRIMARY KEY,
+    user_id TEXT NOT NULL, 
+    expiry_ts BIGINT NOT NULL,
+    used_ts BIGINT,
+    auth_provider_id TEXT,
+    auth_provider_session_id TEXT
+);
+
+-- We're sometimes querying them by their session ID we got from their IDP
+CREATE INDEX login_tokens_auth_provider_idx 
+    ON login_tokens (auth_provider_id, auth_provider_session_id);
+
+-- We're deleting them by their expiration time
+CREATE INDEX login_tokens_expiry_time_idx 
+    ON login_tokens (expiry_ts);
+
diff --git a/synapse/util/macaroons.py b/synapse/util/macaroons.py
index df77edcce2..5df03d3ddc 100644
--- a/synapse/util/macaroons.py
+++ b/synapse/util/macaroons.py
@@ -24,7 +24,7 @@ from typing_extensions import Literal
 
 from synapse.util import Clock, stringutils
 
-MacaroonType = Literal["access", "delete_pusher", "session", "login"]
+MacaroonType = Literal["access", "delete_pusher", "session"]
 
 
 def get_value_from_macaroon(macaroon: pymacaroons.Macaroon, key: str) -> str:
@@ -111,19 +111,6 @@ class OidcSessionData:
     """The session ID of the ongoing UI Auth ("" if this is a login)"""
 
 
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class LoginTokenAttributes:
-    """Data we store in a short-term login token"""
-
-    user_id: str
-
-    auth_provider_id: str
-    """The SSO Identity Provider that the user authenticated with, to get this token."""
-
-    auth_provider_session_id: Optional[str]
-    """The session ID advertised by the SSO Identity Provider."""
-
-
 class MacaroonGenerator:
     def __init__(self, clock: Clock, location: str, secret_key: bytes):
         self._clock = clock
@@ -165,35 +152,6 @@ class MacaroonGenerator:
         macaroon.add_first_party_caveat(f"pushkey = {pushkey}")
         return macaroon.serialize()
 
-    def generate_short_term_login_token(
-        self,
-        user_id: str,
-        auth_provider_id: str,
-        auth_provider_session_id: Optional[str] = None,
-        duration_in_ms: int = (2 * 60 * 1000),
-    ) -> str:
-        """Generate a short-term login token used during SSO logins
-
-        Args:
-            user_id: The user for which the token is valid.
-            auth_provider_id: The SSO IdP the user used.
-            auth_provider_session_id: The session ID got during login from the SSO IdP.
-
-        Returns:
-            A signed token valid for using as a ``m.login.token`` token.
-        """
-        now = self._clock.time_msec()
-        expiry = now + duration_in_ms
-        macaroon = self._generate_base_macaroon("login")
-        macaroon.add_first_party_caveat(f"user_id = {user_id}")
-        macaroon.add_first_party_caveat(f"time < {expiry}")
-        macaroon.add_first_party_caveat(f"auth_provider_id = {auth_provider_id}")
-        if auth_provider_session_id is not None:
-            macaroon.add_first_party_caveat(
-                f"auth_provider_session_id = {auth_provider_session_id}"
-            )
-        return macaroon.serialize()
-
     def generate_oidc_session_token(
         self,
         state: str,
@@ -233,49 +191,6 @@ class MacaroonGenerator:
 
         return macaroon.serialize()
 
-    def verify_short_term_login_token(self, token: str) -> LoginTokenAttributes:
-        """Verify a short-term-login macaroon
-
-        Checks that the given token is a valid, unexpired short-term-login token
-        minted by this server.
-
-        Args:
-            token: The login token to verify.
-
-        Returns:
-            A set of attributes carried by this token, including the
-            ``user_id`` and informations about the SSO IDP used during that
-            login.
-
-        Raises:
-            MacaroonVerificationFailedException if the verification failed
-        """
-        macaroon = pymacaroons.Macaroon.deserialize(token)
-
-        v = self._base_verifier("login")
-        v.satisfy_general(lambda c: c.startswith("user_id = "))
-        v.satisfy_general(lambda c: c.startswith("auth_provider_id = "))
-        v.satisfy_general(lambda c: c.startswith("auth_provider_session_id = "))
-        satisfy_expiry(v, self._clock.time_msec)
-        v.verify(macaroon, self._secret_key)
-
-        user_id = get_value_from_macaroon(macaroon, "user_id")
-        auth_provider_id = get_value_from_macaroon(macaroon, "auth_provider_id")
-
-        auth_provider_session_id: Optional[str] = None
-        try:
-            auth_provider_session_id = get_value_from_macaroon(
-                macaroon, "auth_provider_session_id"
-            )
-        except MacaroonVerificationFailedException:
-            pass
-
-        return LoginTokenAttributes(
-            user_id=user_id,
-            auth_provider_id=auth_provider_id,
-            auth_provider_session_id=auth_provider_session_id,
-        )
-
     def verify_guest_token(self, token: str) -> str:
         """Verify a guest access token macaroon
 
diff --git a/tests/handlers/test_auth.py b/tests/handlers/test_auth.py
index 7106799d44..036dbbc45b 100644
--- a/tests/handlers/test_auth.py
+++ b/tests/handlers/test_auth.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
 from unittest.mock import Mock
 
 import pymacaroons
@@ -19,6 +20,7 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import AuthError, ResourceLimitError
 from synapse.rest import admin
+from synapse.rest.client import login
 from synapse.server import HomeServer
 from synapse.util import Clock
 
@@ -29,6 +31,7 @@ from tests.test_utils import make_awaitable
 class AuthTestCase(unittest.HomeserverTestCase):
     servlets = [
         admin.register_servlets,
+        login.register_servlets,
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
@@ -46,6 +49,23 @@ class AuthTestCase(unittest.HomeserverTestCase):
 
         self.user1 = self.register_user("a_user", "pass")
 
+    def token_login(self, token: str) -> Optional[str]:
+        body = {
+            "type": "m.login.token",
+            "token": token,
+        }
+
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/login",
+            body,
+        )
+
+        if channel.code == 200:
+            return channel.json_body["user_id"]
+
+        return None
+
     def test_macaroon_caveats(self) -> None:
         token = self.macaroon_generator.generate_guest_access_token("a_user")
         macaroon = pymacaroons.Macaroon.deserialize(token)
@@ -73,49 +93,62 @@ class AuthTestCase(unittest.HomeserverTestCase):
         v.satisfy_general(verify_guest)
         v.verify(macaroon, self.hs.config.key.macaroon_secret_key)
 
-    def test_short_term_login_token_gives_user_id(self) -> None:
-        token = self.macaroon_generator.generate_short_term_login_token(
-            self.user1, "", duration_in_ms=5000
+    def test_login_token_gives_user_id(self) -> None:
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(
+                self.user1,
+                duration_ms=(5 * 1000),
+            )
         )
-        res = self.get_success(self.auth_handler.validate_short_term_login_token(token))
+
+        res = self.get_success(self.auth_handler.consume_login_token(token))
         self.assertEqual(self.user1, res.user_id)
-        self.assertEqual("", res.auth_provider_id)
+        self.assertEqual(None, res.auth_provider_id)
 
-        # when we advance the clock, the token should be rejected
-        self.reactor.advance(6)
-        self.get_failure(
-            self.auth_handler.validate_short_term_login_token(token),
-            AuthError,
+    def test_login_token_reuse_fails(self) -> None:
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(
+                self.user1,
+                duration_ms=(5 * 1000),
+            )
         )
 
-    def test_short_term_login_token_gives_auth_provider(self) -> None:
-        token = self.macaroon_generator.generate_short_term_login_token(
-            self.user1, auth_provider_id="my_idp"
-        )
-        res = self.get_success(self.auth_handler.validate_short_term_login_token(token))
-        self.assertEqual(self.user1, res.user_id)
-        self.assertEqual("my_idp", res.auth_provider_id)
+        self.get_success(self.auth_handler.consume_login_token(token))
 
-    def test_short_term_login_token_cannot_replace_user_id(self) -> None:
-        token = self.macaroon_generator.generate_short_term_login_token(
-            self.user1, "", duration_in_ms=5000
+        self.get_failure(
+            self.auth_handler.consume_login_token(token),
+            AuthError,
         )
-        macaroon = pymacaroons.Macaroon.deserialize(token)
 
-        res = self.get_success(
-            self.auth_handler.validate_short_term_login_token(macaroon.serialize())
+    def test_login_token_expires(self) -> None:
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(
+                self.user1,
+                duration_ms=(5 * 1000),
+            )
         )
-        self.assertEqual(self.user1, res.user_id)
-
-        # add another "user_id" caveat, which might allow us to override the
-        # user_id.
-        macaroon.add_first_party_caveat("user_id = b_user")
 
+        # when we advance the clock, the token should be rejected
+        self.reactor.advance(6)
         self.get_failure(
-            self.auth_handler.validate_short_term_login_token(macaroon.serialize()),
+            self.auth_handler.consume_login_token(token),
             AuthError,
         )
 
+    def test_login_token_gives_auth_provider(self) -> None:
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(
+                self.user1,
+                auth_provider_id="my_idp",
+                auth_provider_session_id="11-22-33-44",
+                duration_ms=(5 * 1000),
+            )
+        )
+        res = self.get_success(self.auth_handler.consume_login_token(token))
+        self.assertEqual(self.user1, res.user_id)
+        self.assertEqual("my_idp", res.auth_provider_id)
+        self.assertEqual("11-22-33-44", res.auth_provider_session_id)
+
     def test_mau_limits_disabled(self) -> None:
         self.auth_blocking._limit_usage_by_mau = False
         # Ensure does not throw exception
@@ -125,12 +158,12 @@ class AuthTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        self.get_success(
-            self.auth_handler.validate_short_term_login_token(
-                self._get_macaroon().serialize()
-            )
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(self.user1)
         )
 
+        self.assertIsNotNone(self.token_login(token))
+
     def test_mau_limits_exceeded_large(self) -> None:
         self.auth_blocking._limit_usage_by_mau = True
         self.hs.get_datastores().main.get_monthly_active_count = Mock(
@@ -147,12 +180,10 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.hs.get_datastores().main.get_monthly_active_count = Mock(
             return_value=make_awaitable(self.large_number_of_users)
         )
-        self.get_failure(
-            self.auth_handler.validate_short_term_login_token(
-                self._get_macaroon().serialize()
-            ),
-            ResourceLimitError,
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(self.user1)
         )
+        self.assertIsNone(self.token_login(token))
 
     def test_mau_limits_parity(self) -> None:
         # Ensure we're not at the unix epoch.
@@ -171,12 +202,10 @@ class AuthTestCase(unittest.HomeserverTestCase):
             ),
             ResourceLimitError,
         )
-        self.get_failure(
-            self.auth_handler.validate_short_term_login_token(
-                self._get_macaroon().serialize()
-            ),
-            ResourceLimitError,
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(self.user1)
         )
+        self.assertIsNone(self.token_login(token))
 
         # If in monthly active cohort
         self.hs.get_datastores().main.user_last_seen_monthly_active = Mock(
@@ -187,11 +216,10 @@ class AuthTestCase(unittest.HomeserverTestCase):
                 self.user1, device_id=None, valid_until_ms=None
             )
         )
-        self.get_success(
-            self.auth_handler.validate_short_term_login_token(
-                self._get_macaroon().serialize()
-            )
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(self.user1)
         )
+        self.assertIsNotNone(self.token_login(token))
 
     def test_mau_limits_not_exceeded(self) -> None:
         self.auth_blocking._limit_usage_by_mau = True
@@ -209,14 +237,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.hs.get_datastores().main.get_monthly_active_count = Mock(
             return_value=make_awaitable(self.small_number_of_users)
         )
-        self.get_success(
-            self.auth_handler.validate_short_term_login_token(
-                self._get_macaroon().serialize()
-            )
-        )
-
-    def _get_macaroon(self) -> pymacaroons.Macaroon:
-        token = self.macaroon_generator.generate_short_term_login_token(
-            self.user1, "", duration_in_ms=5000
+        token = self.get_success(
+            self.auth_handler.create_login_token_for_user_id(self.user1)
         )
-        return pymacaroons.Macaroon.deserialize(token)
+        self.assertIsNotNone(self.token_login(token))
diff --git a/tests/util/test_macaroons.py b/tests/util/test_macaroons.py
index 32125f7bb7..40754a4711 100644
--- a/tests/util/test_macaroons.py
+++ b/tests/util/test_macaroons.py
@@ -84,34 +84,6 @@ class MacaroonGeneratorTestCase(TestCase):
         )
         self.assertEqual(user_id, "@user:tesths")
 
-    def test_short_term_login_token(self):
-        """Test the generation and verification of short-term login tokens"""
-        token = self.macaroon_generator.generate_short_term_login_token(
-            user_id="@user:tesths",
-            auth_provider_id="oidc",
-            auth_provider_session_id="sid",
-            duration_in_ms=2 * 60 * 1000,
-        )
-
-        info = self.macaroon_generator.verify_short_term_login_token(token)
-        self.assertEqual(info.user_id, "@user:tesths")
-        self.assertEqual(info.auth_provider_id, "oidc")
-        self.assertEqual(info.auth_provider_session_id, "sid")
-
-        # Raises with another secret key
-        with self.assertRaises(MacaroonVerificationFailedException):
-            self.other_macaroon_generator.verify_short_term_login_token(token)
-
-        # Wait a minute
-        self.reactor.pump([60])
-        # Shouldn't raise
-        self.macaroon_generator.verify_short_term_login_token(token)
-        # Wait another minute
-        self.reactor.pump([60])
-        # Should raise since it expired
-        with self.assertRaises(MacaroonVerificationFailedException):
-            self.macaroon_generator.verify_short_term_login_token(token)
-
     def test_oidc_session_token(self):
         """Test the generation and verification of OIDC session cookies"""
         state = "arandomstate"
-- 
cgit 1.5.1


From 04fd6221de026a74e8a3e896796d39dcf5ac6e3b Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 26 Oct 2022 14:00:01 +0100
Subject: Fix incorrectly sending authentication tokens to application service
 as headers (#14301)

---
 changelog.d/14301.bugfix     |  1 +
 synapse/appservice/api.py    | 12 +++++++-----
 tests/appservice/test_api.py |  8 +++++---
 3 files changed, 13 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/14301.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14301.bugfix b/changelog.d/14301.bugfix
new file mode 100644
index 0000000000..668c1f3b9c
--- /dev/null
+++ b/changelog.d/14301.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0rc1 where access tokens would be incorrectly sent to application services as headers. Application services which were obtaining access tokens from query parameters were not affected.
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index fbac4375b0..60774b240d 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -123,7 +123,7 @@ class ApplicationServiceApi(SimpleHttpClient):
             response = await self.get_json(
                 uri,
                 {"access_token": service.hs_token},
-                headers={"Authorization": f"Bearer {service.hs_token}"},
+                headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if response is not None:  # just an empty json object
                 return True
@@ -147,7 +147,7 @@ class ApplicationServiceApi(SimpleHttpClient):
             response = await self.get_json(
                 uri,
                 {"access_token": service.hs_token},
-                headers={"Authorization": f"Bearer {service.hs_token}"},
+                headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if response is not None:  # just an empty json object
                 return True
@@ -190,7 +190,9 @@ class ApplicationServiceApi(SimpleHttpClient):
                 b"access_token": service.hs_token,
             }
             response = await self.get_json(
-                uri, args=args, headers={"Authorization": f"Bearer {service.hs_token}"}
+                uri,
+                args=args,
+                headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if not isinstance(response, list):
                 logger.warning(
@@ -230,7 +232,7 @@ class ApplicationServiceApi(SimpleHttpClient):
                 info = await self.get_json(
                     uri,
                     {"access_token": service.hs_token},
-                    headers={"Authorization": f"Bearer {service.hs_token}"},
+                    headers={"Authorization": [f"Bearer {service.hs_token}"]},
                 )
 
                 if not _is_valid_3pe_metadata(info):
@@ -327,7 +329,7 @@ class ApplicationServiceApi(SimpleHttpClient):
                 uri=uri,
                 json_body=body,
                 args={"access_token": service.hs_token},
-                headers={"Authorization": f"Bearer {service.hs_token}"},
+                headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if logger.isEnabledFor(logging.DEBUG):
                 logger.debug(
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 11008ac1fb..89ee79396f 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, List, Mapping
+from typing import Any, List, Mapping, Sequence, Union
 from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -70,13 +70,15 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
         self.request_url = None
 
         async def get_json(
-            url: str, args: Mapping[Any, Any], headers: Mapping[Any, Any]
+            url: str,
+            args: Mapping[Any, Any],
+            headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]],
         ) -> List[JsonDict]:
             # Ensure the access token is passed as both a header and query arg.
             if not headers.get("Authorization") or not args.get(b"access_token"):
                 raise RuntimeError("Access token not provided")
 
-            self.assertEqual(headers.get("Authorization"), f"Bearer {TOKEN}")
+            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
             self.assertEqual(args.get(b"access_token"), TOKEN)
             self.request_url = url
             if url == URL_USER:
-- 
cgit 1.5.1


From 0cfbb3513152b8360155c2d75df50e06ea861fa4 Mon Sep 17 00:00:00 2001
From: Ashish Kumar <ashfame@users.noreply.github.com>
Date: Wed, 26 Oct 2022 18:51:23 +0400
Subject: fix broken avatar checks when server_name contains a port (#13927)

Fixes check_avatar_size_and_mime_type() to successfully update avatars on homeservers running on non-default ports which it would mistakenly treat as remote homeserver while validating the avatar's size and mime type.

Signed-off-by: Ashish Kumar ashfame@users.noreply.github.com
---
 changelog.d/13927.bugfix       |  1 +
 synapse/handlers/profile.py    |  6 +++++-
 tests/handlers/test_profile.py | 49 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13927.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13927.bugfix b/changelog.d/13927.bugfix
new file mode 100644
index 0000000000..119cd128e7
--- /dev/null
+++ b/changelog.d/13927.bugfix
@@ -0,0 +1 @@
+Fix a bug which prevented setting an avatar on homeservers which have an explicit port in their `server_name` and have `max_avatar_size` and/or `allowed_avatar_mimetypes` configuration. Contributed by @ashfame.
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index d8ff5289b5..4bf9a047a3 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -307,7 +307,11 @@ class ProfileHandler:
         if not self.max_avatar_size and not self.allowed_avatar_mimetypes:
             return True
 
-        server_name, _, media_id = parse_and_validate_mxc_uri(mxc)
+        host, port, media_id = parse_and_validate_mxc_uri(mxc)
+        if port is not None:
+            server_name = host + ":" + str(port)
+        else:
+            server_name = host
 
         if server_name == self.server_name:
             media_info = await self.store.get_local_media(media_id)
diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py
index f88c725a42..675aa023ac 100644
--- a/tests/handlers/test_profile.py
+++ b/tests/handlers/test_profile.py
@@ -14,6 +14,8 @@
 from typing import Any, Awaitable, Callable, Dict
 from unittest.mock import Mock
 
+from parameterized import parameterized
+
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.types
@@ -327,6 +329,53 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(res)
 
+    @unittest.override_config(
+        {"server_name": "test:8888", "allowed_avatar_mimetypes": ["image/png"]}
+    )
+    def test_avatar_constraint_on_local_server_with_port(self):
+        """Test that avatar metadata is correctly fetched when the media is on a local
+        server and the server has an explicit port.
+
+        (This was previously a bug)
+        """
+        local_server_name = self.hs.config.server.server_name
+        media_id = "local"
+        local_mxc = f"mxc://{local_server_name}/{media_id}"
+
+        # mock up the existence of the avatar file
+        self._setup_local_files({media_id: {"mimetype": "image/png"}})
+
+        # and now check that check_avatar_size_and_mime_type is happy
+        self.assertTrue(
+            self.get_success(self.handler.check_avatar_size_and_mime_type(local_mxc))
+        )
+
+    @parameterized.expand([("remote",), ("remote:1234",)])
+    @unittest.override_config({"allowed_avatar_mimetypes": ["image/png"]})
+    def test_check_avatar_on_remote_server(self, remote_server_name: str) -> None:
+        """Test that avatar metadata is correctly fetched from a remote server"""
+        media_id = "remote"
+        remote_mxc = f"mxc://{remote_server_name}/{media_id}"
+
+        # if the media is remote, check_avatar_size_and_mime_type just checks the
+        # media cache, so we don't need to instantiate a real remote server. It is
+        # sufficient to poke an entry into the db.
+        self.get_success(
+            self.hs.get_datastores().main.store_cached_remote_media(
+                media_id=media_id,
+                media_type="image/png",
+                media_length=50,
+                origin=remote_server_name,
+                time_now_ms=self.clock.time_msec(),
+                upload_name=None,
+                filesystem_id="xyz",
+            )
+        )
+
+        self.assertTrue(
+            self.get_success(self.handler.check_avatar_size_and_mime_type(remote_mxc))
+        )
+
     def _setup_local_files(self, names_and_props: Dict[str, Dict[str, Any]]):
         """Stores metadata about files in the database.
 
-- 
cgit 1.5.1


From 40fa8294e3096132819287dd0c6d6bd71a408902 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 26 Oct 2022 16:10:55 -0500
Subject: Refactor MSC3030 `/timestamp_to_event` to move away from our
 snowflake pull from `destination` pattern (#14096)

 1. `federation_client.timestamp_to_event(...)` now handles all `destination` looping and uses our generic `_try_destination_list(...)` helper.
 2. Consistently handling `NotRetryingDestination` and `FederationDeniedError` across `get_pdu` , backfill, and the generic `_try_destination_list` which is used for many places we use this pattern.
 3. `get_pdu(...)` now returns `PulledPduInfo` so we know which `destination` we ended up pulling the PDU from
---
 changelog.d/14096.misc                     |   1 +
 synapse/federation/federation_client.py    | 130 ++++++++++++++++++++++++-----
 synapse/handlers/federation.py             |  15 ++--
 synapse/handlers/federation_event.py       |  31 ++++---
 synapse/handlers/room.py                   | 126 +++++++++++-----------------
 synapse/util/retryutils.py                 |   2 +-
 tests/federation/test_federation_client.py |  12 ++-
 7 files changed, 191 insertions(+), 126 deletions(-)
 create mode 100644 changelog.d/14096.misc

(limited to 'synapse')

diff --git a/changelog.d/14096.misc b/changelog.d/14096.misc
new file mode 100644
index 0000000000..2c07dc673b
--- /dev/null
+++ b/changelog.d/14096.misc
@@ -0,0 +1 @@
+Refactor [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to loop over federation destinations with standard pattern and error handling.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index b220ab43fc..fa225182be 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -80,6 +80,18 @@ PDU_RETRY_TIME_MS = 1 * 60 * 1000
 T = TypeVar("T")
 
 
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class PulledPduInfo:
+    """
+    A result object that stores the PDU and info about it like which homeserver we
+    pulled it from (`pull_origin`)
+    """
+
+    pdu: EventBase
+    # Which homeserver we pulled the PDU from
+    pull_origin: str
+
+
 class InvalidResponseError(RuntimeError):
     """Helper for _try_destination_list: indicates that the server returned a response
     we couldn't parse
@@ -114,7 +126,9 @@ class FederationClient(FederationBase):
         self.hostname = hs.hostname
         self.signing_key = hs.signing_key
 
-        self._get_pdu_cache: ExpiringCache[str, EventBase] = ExpiringCache(
+        # Cache mapping `event_id` to a tuple of the event itself and the `pull_origin`
+        # (which server we pulled the event from)
+        self._get_pdu_cache: ExpiringCache[str, Tuple[EventBase, str]] = ExpiringCache(
             cache_name="get_pdu_cache",
             clock=self._clock,
             max_len=1000,
@@ -352,11 +366,11 @@ class FederationClient(FederationBase):
     @tag_args
     async def get_pdu(
         self,
-        destinations: Iterable[str],
+        destinations: Collection[str],
         event_id: str,
         room_version: RoomVersion,
         timeout: Optional[int] = None,
-    ) -> Optional[EventBase]:
+    ) -> Optional[PulledPduInfo]:
         """Requests the PDU with given origin and ID from the remote home
         servers.
 
@@ -371,11 +385,11 @@ class FederationClient(FederationBase):
                 moving to the next destination. None indicates no timeout.
 
         Returns:
-            The requested PDU, or None if we were unable to find it.
+            The requested PDU wrapped in `PulledPduInfo`, or None if we were unable to find it.
         """
 
         logger.debug(
-            "get_pdu: event_id=%s from destinations=%s", event_id, destinations
+            "get_pdu(event_id=%s): from destinations=%s", event_id, destinations
         )
 
         # TODO: Rate limit the number of times we try and get the same event.
@@ -384,19 +398,25 @@ class FederationClient(FederationBase):
         # it gets persisted to the database), so we cache the results of the lookup.
         # Note that this is separate to the regular get_event cache which caches
         # events once they have been persisted.
-        event = self._get_pdu_cache.get(event_id)
+        get_pdu_cache_entry = self._get_pdu_cache.get(event_id)
 
+        event = None
+        pull_origin = None
+        if get_pdu_cache_entry:
+            event, pull_origin = get_pdu_cache_entry
         # If we don't see the event in the cache, go try to fetch it from the
         # provided remote federated destinations
-        if not event:
+        else:
             pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {})
 
+            # TODO: We can probably refactor this to use `_try_destination_list`
             for destination in destinations:
                 now = self._clock.time_msec()
                 last_attempt = pdu_attempts.get(destination, 0)
                 if last_attempt + PDU_RETRY_TIME_MS > now:
                     logger.debug(
-                        "get_pdu: skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)",
+                        "get_pdu(event_id=%s): skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)",
+                        event_id,
                         destination,
                         last_attempt,
                         PDU_RETRY_TIME_MS,
@@ -411,43 +431,48 @@ class FederationClient(FederationBase):
                         room_version=room_version,
                         timeout=timeout,
                     )
+                    pull_origin = destination
 
                     pdu_attempts[destination] = now
 
                     if event:
                         # Prime the cache
-                        self._get_pdu_cache[event.event_id] = event
+                        self._get_pdu_cache[event.event_id] = (event, pull_origin)
 
                         # Now that we have an event, we can break out of this
                         # loop and stop asking other destinations.
                         break
 
+                except NotRetryingDestination as e:
+                    logger.info("get_pdu(event_id=%s): %s", event_id, e)
+                    continue
+                except FederationDeniedError:
+                    logger.info(
+                        "get_pdu(event_id=%s): Not attempting to fetch PDU from %s because the homeserver is not on our federation whitelist",
+                        event_id,
+                        destination,
+                    )
+                    continue
                 except SynapseError as e:
                     logger.info(
-                        "Failed to get PDU %s from %s because %s",
+                        "get_pdu(event_id=%s): Failed to get PDU from %s because %s",
                         event_id,
                         destination,
                         e,
                     )
                     continue
-                except NotRetryingDestination as e:
-                    logger.info(str(e))
-                    continue
-                except FederationDeniedError as e:
-                    logger.info(str(e))
-                    continue
                 except Exception as e:
                     pdu_attempts[destination] = now
 
                     logger.info(
-                        "Failed to get PDU %s from %s because %s",
+                        "get_pdu(event_id=): Failed to get PDU from %s because %s",
                         event_id,
                         destination,
                         e,
                     )
                     continue
 
-        if not event:
+        if not event or not pull_origin:
             return None
 
         # `event` now refers to an object stored in `get_pdu_cache`. Our
@@ -459,7 +484,7 @@ class FederationClient(FederationBase):
             event.room_version,
         )
 
-        return event_copy
+        return PulledPduInfo(event_copy, pull_origin)
 
     @trace
     @tag_args
@@ -699,12 +724,14 @@ class FederationClient(FederationBase):
         pdu_origin = get_domain_from_id(pdu.sender)
         if not res and pdu_origin != origin:
             try:
-                res = await self.get_pdu(
+                pulled_pdu_info = await self.get_pdu(
                     destinations=[pdu_origin],
                     event_id=pdu.event_id,
                     room_version=room_version,
                     timeout=10000,
                 )
+                if pulled_pdu_info is not None:
+                    res = pulled_pdu_info.pdu
             except SynapseError:
                 pass
 
@@ -806,6 +833,7 @@ class FederationClient(FederationBase):
             )
 
         for destination in destinations:
+            # We don't want to ask our own server for information we don't have
             if destination == self.server_name:
                 continue
 
@@ -814,9 +842,21 @@ class FederationClient(FederationBase):
             except (
                 RequestSendFailed,
                 InvalidResponseError,
-                NotRetryingDestination,
             ) as e:
                 logger.warning("Failed to %s via %s: %s", description, destination, e)
+                # Skip to the next homeserver in the list to try.
+                continue
+            except NotRetryingDestination as e:
+                logger.info("%s: %s", description, e)
+                continue
+            except FederationDeniedError:
+                logger.info(
+                    "%s: Not attempting to %s from %s because the homeserver is not on our federation whitelist",
+                    description,
+                    description,
+                    destination,
+                )
+                continue
             except UnsupportedRoomVersionError:
                 raise
             except HttpResponseException as e:
@@ -1609,6 +1649,54 @@ class FederationClient(FederationBase):
         return result
 
     async def timestamp_to_event(
+        self, *, destinations: List[str], room_id: str, timestamp: int, direction: str
+    ) -> Optional["TimestampToEventResponse"]:
+        """
+        Calls each remote federating server from `destinations` asking for their closest
+        event to the given timestamp in the given direction until we get a response.
+        Also validates the response to always return the expected keys or raises an
+        error.
+
+        Args:
+            destinations: The domains of homeservers to try fetching from
+            room_id: Room to fetch the event from
+            timestamp: The point in time (inclusive) we should navigate from in
+                the given direction to find the closest event.
+            direction: ["f"|"b"] to indicate whether we should navigate forward
+                or backward from the given timestamp to find the closest event.
+
+        Returns:
+            A parsed TimestampToEventResponse including the closest event_id
+            and origin_server_ts or None if no destination has a response.
+        """
+
+        async def _timestamp_to_event_from_destination(
+            destination: str,
+        ) -> TimestampToEventResponse:
+            return await self._timestamp_to_event_from_destination(
+                destination, room_id, timestamp, direction
+            )
+
+        try:
+            # Loop through each homeserver candidate until we get a succesful response
+            timestamp_to_event_response = await self._try_destination_list(
+                "timestamp_to_event",
+                destinations,
+                # TODO: The requested timestamp may lie in a part of the
+                #   event graph that the remote server *also* didn't have,
+                #   in which case they will have returned another event
+                #   which may be nowhere near the requested timestamp. In
+                #   the future, we may need to reconcile that gap and ask
+                #   other homeservers, and/or extend `/timestamp_to_event`
+                #   to return events on *both* sides of the timestamp to
+                #   help reconcile the gap faster.
+                _timestamp_to_event_from_destination,
+            )
+            return timestamp_to_event_response
+        except SynapseError:
+            return None
+
+    async def _timestamp_to_event_from_destination(
         self, destination: str, room_id: str, timestamp: int, direction: str
     ) -> "TimestampToEventResponse":
         """
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 4fbc79a6cb..5fc3b8bc8c 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -442,6 +442,15 @@ class FederationHandler:
                     # appropriate stuff.
                     # TODO: We can probably do something more intelligent here.
                     return True
+                except NotRetryingDestination as e:
+                    logger.info("_maybe_backfill_inner: %s", e)
+                    continue
+                except FederationDeniedError:
+                    logger.info(
+                        "_maybe_backfill_inner: Not attempting to backfill from %s because the homeserver is not on our federation whitelist",
+                        dom,
+                    )
+                    continue
                 except (SynapseError, InvalidResponseError) as e:
                     logger.info("Failed to backfill from %s because %s", dom, e)
                     continue
@@ -477,15 +486,9 @@ class FederationHandler:
 
                     logger.info("Failed to backfill from %s because %s", dom, e)
                     continue
-                except NotRetryingDestination as e:
-                    logger.info(str(e))
-                    continue
                 except RequestSendFailed as e:
                     logger.info("Failed to get backfill from %s because %s", dom, e)
                     continue
-                except FederationDeniedError as e:
-                    logger.info(e)
-                    continue
                 except Exception as e:
                     logger.exception("Failed to backfill from %s because %s", dom, e)
                     continue
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 7da6316a82..9ca5df7c78 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -58,7 +58,7 @@ from synapse.event_auth import (
 )
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
-from synapse.federation.federation_client import InvalidResponseError
+from synapse.federation.federation_client import InvalidResponseError, PulledPduInfo
 from synapse.logging.context import nested_logging_context
 from synapse.logging.opentracing import (
     SynapseTags,
@@ -1517,8 +1517,8 @@ class FederationEventHandler:
         )
 
     async def backfill_event_id(
-        self, destination: str, room_id: str, event_id: str
-    ) -> EventBase:
+        self, destinations: List[str], room_id: str, event_id: str
+    ) -> PulledPduInfo:
         """Backfill a single event and persist it as a non-outlier which means
         we also pull in all of the state and auth events necessary for it.
 
@@ -1530,24 +1530,21 @@ class FederationEventHandler:
         Raises:
             FederationError if we are unable to find the event from the destination
         """
-        logger.info(
-            "backfill_event_id: event_id=%s from destination=%s", event_id, destination
-        )
+        logger.info("backfill_event_id: event_id=%s", event_id)
 
         room_version = await self._store.get_room_version(room_id)
 
-        event_from_response = await self._federation_client.get_pdu(
-            [destination],
+        pulled_pdu_info = await self._federation_client.get_pdu(
+            destinations,
             event_id,
             room_version,
         )
 
-        if not event_from_response:
+        if not pulled_pdu_info:
             raise FederationError(
                 "ERROR",
                 404,
-                "Unable to find event_id=%s from destination=%s to backfill."
-                % (event_id, destination),
+                f"Unable to find event_id={event_id} from remote servers to backfill.",
                 affected=event_id,
             )
 
@@ -1555,13 +1552,13 @@ class FederationEventHandler:
         # and auth events to de-outlier it. This also sets up the necessary
         # `state_groups` for the event.
         await self._process_pulled_events(
-            destination,
-            [event_from_response],
+            pulled_pdu_info.pull_origin,
+            [pulled_pdu_info.pdu],
             # Prevent notifications going to clients
             backfilled=True,
         )
 
-        return event_from_response
+        return pulled_pdu_info
 
     @trace
     @tag_args
@@ -1584,19 +1581,19 @@ class FederationEventHandler:
         async def get_event(event_id: str) -> None:
             with nested_logging_context(event_id):
                 try:
-                    event = await self._federation_client.get_pdu(
+                    pulled_pdu_info = await self._federation_client.get_pdu(
                         [destination],
                         event_id,
                         room_version,
                     )
-                    if event is None:
+                    if pulled_pdu_info is None:
                         logger.warning(
                             "Server %s didn't return event %s",
                             destination,
                             event_id,
                         )
                         return
-                    events.append(event)
+                    events.append(pulled_pdu_info.pdu)
 
                 except Exception as e:
                     logger.warning(
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index cc1e5c8f97..de97886ea9 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -49,7 +49,6 @@ from synapse.api.constants import (
 from synapse.api.errors import (
     AuthError,
     Codes,
-    HttpResponseException,
     LimitExceededError,
     NotFoundError,
     StoreError,
@@ -60,7 +59,6 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
 from synapse.event_auth import validate_event_for_room_version
 from synapse.events import EventBase
 from synapse.events.utils import copy_and_fixup_power_levels_contents
-from synapse.federation.federation_client import InvalidResponseError
 from synapse.handlers.relations import BundledAggregations
 from synapse.module_api import NOT_SPAM
 from synapse.rest.admin._base import assert_user_is_admin
@@ -1472,7 +1470,12 @@ class TimestampLookupHandler:
         Raises:
             SynapseError if unable to find any event locally in the given direction
         """
-
+        logger.debug(
+            "get_event_for_timestamp(room_id=%s, timestamp=%s, direction=%s) Finding closest event...",
+            room_id,
+            timestamp,
+            direction,
+        )
         local_event_id = await self.store.get_event_id_for_timestamp(
             room_id, timestamp, direction
         )
@@ -1524,85 +1527,54 @@ class TimestampLookupHandler:
                 )
             )
 
-            # Loop through each homeserver candidate until we get a succesful response
-            for domain in likely_domains:
-                # We don't want to ask our own server for information we don't have
-                if domain == self.server_name:
-                    continue
+            remote_response = await self.federation_client.timestamp_to_event(
+                destinations=likely_domains,
+                room_id=room_id,
+                timestamp=timestamp,
+                direction=direction,
+            )
+            if remote_response is not None:
+                logger.debug(
+                    "get_event_for_timestamp: remote_response=%s",
+                    remote_response,
+                )
 
-                try:
-                    remote_response = await self.federation_client.timestamp_to_event(
-                        domain, room_id, timestamp, direction
-                    )
-                    logger.debug(
-                        "get_event_for_timestamp: response from domain(%s)=%s",
-                        domain,
-                        remote_response,
-                    )
+                remote_event_id = remote_response.event_id
+                remote_origin_server_ts = remote_response.origin_server_ts
 
-                    remote_event_id = remote_response.event_id
-                    remote_origin_server_ts = remote_response.origin_server_ts
-
-                    # Backfill this event so we can get a pagination token for
-                    # it with `/context` and paginate `/messages` from this
-                    # point.
-                    #
-                    # TODO: The requested timestamp may lie in a part of the
-                    #   event graph that the remote server *also* didn't have,
-                    #   in which case they will have returned another event
-                    #   which may be nowhere near the requested timestamp. In
-                    #   the future, we may need to reconcile that gap and ask
-                    #   other homeservers, and/or extend `/timestamp_to_event`
-                    #   to return events on *both* sides of the timestamp to
-                    #   help reconcile the gap faster.
-                    remote_event = (
-                        await self.federation_event_handler.backfill_event_id(
-                            domain, room_id, remote_event_id
-                        )
-                    )
+                # Backfill this event so we can get a pagination token for
+                # it with `/context` and paginate `/messages` from this
+                # point.
+                pulled_pdu_info = await self.federation_event_handler.backfill_event_id(
+                    likely_domains, room_id, remote_event_id
+                )
+                remote_event = pulled_pdu_info.pdu
 
-                    # XXX: When we see that the remote server is not trustworthy,
-                    # maybe we should not ask them first in the future.
-                    if remote_origin_server_ts != remote_event.origin_server_ts:
-                        logger.info(
-                            "get_event_for_timestamp: Remote server (%s) claimed that remote_event_id=%s occured at remote_origin_server_ts=%s but that isn't true (actually occured at %s). Their claims are dubious and we should consider not trusting them.",
-                            domain,
-                            remote_event_id,
-                            remote_origin_server_ts,
-                            remote_event.origin_server_ts,
-                        )
-
-                    # Only return the remote event if it's closer than the local event
-                    if not local_event or (
-                        abs(remote_event.origin_server_ts - timestamp)
-                        < abs(local_event.origin_server_ts - timestamp)
-                    ):
-                        logger.info(
-                            "get_event_for_timestamp: returning remote_event_id=%s (%s) since it's closer to timestamp=%s than local_event=%s (%s)",
-                            remote_event_id,
-                            remote_event.origin_server_ts,
-                            timestamp,
-                            local_event.event_id if local_event else None,
-                            local_event.origin_server_ts if local_event else None,
-                        )
-                        return remote_event_id, remote_origin_server_ts
-                except (HttpResponseException, InvalidResponseError) as ex:
-                    # Let's not put a high priority on some other homeserver
-                    # failing to respond or giving a random response
-                    logger.debug(
-                        "get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s",
-                        domain,
-                        type(ex).__name__,
-                        ex,
-                        ex.args,
+                # XXX: When we see that the remote server is not trustworthy,
+                # maybe we should not ask them first in the future.
+                if remote_origin_server_ts != remote_event.origin_server_ts:
+                    logger.info(
+                        "get_event_for_timestamp: Remote server (%s) claimed that remote_event_id=%s occured at remote_origin_server_ts=%s but that isn't true (actually occured at %s). Their claims are dubious and we should consider not trusting them.",
+                        pulled_pdu_info.pull_origin,
+                        remote_event_id,
+                        remote_origin_server_ts,
+                        remote_event.origin_server_ts,
                     )
-                except Exception:
-                    # But we do want to see some exceptions in our code
-                    logger.warning(
-                        "get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception",
-                        domain,
-                        exc_info=True,
+
+                # Only return the remote event if it's closer than the local event
+                if not local_event or (
+                    abs(remote_event.origin_server_ts - timestamp)
+                    < abs(local_event.origin_server_ts - timestamp)
+                ):
+                    logger.info(
+                        "get_event_for_timestamp: returning remote_event_id=%s (%s) since it's closer to timestamp=%s than local_event=%s (%s)",
+                        remote_event_id,
+                        remote_event.origin_server_ts,
+                        timestamp,
+                        local_event.event_id if local_event else None,
+                        local_event.origin_server_ts if local_event else None,
                     )
+                    return remote_event_id, remote_origin_server_ts
 
         # To appease mypy, we have to add both of these conditions to check for
         # `None`. We only expect `local_event` to be `None` when
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index d0a69ff843..dcc037b982 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -51,7 +51,7 @@ class NotRetryingDestination(Exception):
             destination: the domain in question
         """
 
-        msg = "Not retrying server %s." % (destination,)
+        msg = f"Not retrying server {destination} because we tried it recently retry_last_ts={retry_last_ts} and we won't check for another retry_interval={retry_interval}ms."
         super().__init__(msg)
 
         self.retry_last_ts = retry_last_ts
diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py
index 51d3bb8fff..e67f405826 100644
--- a/tests/federation/test_federation_client.py
+++ b/tests/federation/test_federation_client.py
@@ -142,14 +142,14 @@ class FederationClientTest(FederatingHomeserverTestCase):
 
     def test_get_pdu_returns_nothing_when_event_does_not_exist(self):
         """No event should be returned when the event does not exist"""
-        remote_pdu = self.get_success(
+        pulled_pdu_info = self.get_success(
             self.hs.get_federation_client().get_pdu(
                 ["yet.another.server"],
                 "event_should_not_exist",
                 RoomVersions.V9,
             )
         )
-        self.assertEqual(remote_pdu, None)
+        self.assertEqual(pulled_pdu_info, None)
 
     def test_get_pdu(self):
         """Test to make sure an event is returned by `get_pdu()`"""
@@ -169,13 +169,15 @@ class FederationClientTest(FederatingHomeserverTestCase):
         remote_pdu.internal_metadata.outlier = True
 
         # Get the event again. This time it should read it from cache.
-        remote_pdu2 = self.get_success(
+        pulled_pdu_info2 = self.get_success(
             self.hs.get_federation_client().get_pdu(
                 ["yet.another.server"],
                 remote_pdu.event_id,
                 RoomVersions.V9,
             )
         )
+        self.assertIsNotNone(pulled_pdu_info2)
+        remote_pdu2 = pulled_pdu_info2.pdu
 
         # Sanity check that we are working against the same event
         self.assertEqual(remote_pdu.event_id, remote_pdu2.event_id)
@@ -215,13 +217,15 @@ class FederationClientTest(FederatingHomeserverTestCase):
             )
         )
 
-        remote_pdu = self.get_success(
+        pulled_pdu_info = self.get_success(
             self.hs.get_federation_client().get_pdu(
                 ["yet.another.server"],
                 "event_id",
                 RoomVersions.V9,
             )
         )
+        self.assertIsNotNone(pulled_pdu_info)
+        remote_pdu = pulled_pdu_info.pdu
 
         # check the right call got made to the agent
         self._mock_agent.request.assert_called_once_with(
-- 
cgit 1.5.1


From cbe01ccc3f9c09a0a7233f90200fbcb8ae5245cf Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 27 Oct 2022 10:52:23 +0100
Subject: Reject history insertion during partial joins (#14291)

---
 changelog.d/14291.bugfix          | 1 +
 synapse/rest/client/room_batch.py | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 changelog.d/14291.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14291.bugfix b/changelog.d/14291.bugfix
new file mode 100644
index 0000000000..bac5065e94
--- /dev/null
+++ b/changelog.d/14291.bugfix
@@ -0,0 +1 @@
+Prevent history insertion ([MSC2716](https://github.com/matrix-org/matrix-spec-proposals/pull/2716)) during an partial join ([MSC3706](https://github.com/matrix-org/matrix-spec-proposals/pull/3706)).
diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py
index dd91dabedd..10be4a781b 100644
--- a/synapse/rest/client/room_batch.py
+++ b/synapse/rest/client/room_batch.py
@@ -108,6 +108,13 @@ class RoomBatchSendEventRestServlet(RestServlet):
                 errcode=Codes.MISSING_PARAM,
             )
 
+        if await self.store.is_partial_state_room(room_id):
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "Cannot insert history batches until we have fully joined the room",
+                errcode=Codes.UNABLE_DUE_TO_PARTIAL_STATE,
+            )
+
         # Verify the batch_id_from_query corresponds to an actual insertion event
         # and have the batch connected.
         if batch_id_from_query:
-- 
cgit 1.5.1


From 4dc05f30193935224103e8772b1bbc15293e5cb6 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 27 Oct 2022 14:16:00 +0200
Subject: Fix presence bug introduced in 1.64 by #13313 (#14243)

* Fix presence bug introduced in 1.64 by #13313

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>

* Add changelog

* Add DISTINCT

* Apply suggestions from code review

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
---
 changelog.d/14243.bugfix                     | 1 +
 synapse/storage/databases/main/roommember.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14243.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14243.bugfix b/changelog.d/14243.bugfix
new file mode 100644
index 0000000000..ac0b21c2c5
--- /dev/null
+++ b/changelog.d/14243.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.64.0 where presence updates could be missing from `/sync` responses.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 32e1e983a5..ab708b0ba5 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -742,7 +742,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             # user and the set of other users, and then checking if there is any
             # overlap.
             sql = f"""
-                SELECT b.state_key
+                SELECT DISTINCT b.state_key
                 FROM (
                     SELECT room_id FROM current_state_events
                     WHERE type = 'm.room.member' AND membership = 'join' AND state_key = ?
@@ -751,7 +751,6 @@ class RoomMemberWorkerStore(EventsWorkerStore):
                     SELECT room_id, state_key FROM current_state_events
                     WHERE type = 'm.room.member' AND membership = 'join' AND {clause}
                 ) AS b using (room_id)
-                LIMIT 1
             """
 
             txn.execute(sql, (user_id, *args))
-- 
cgit 1.5.1


From 1357ae869f279a3f0855c1b1c2750eca2887928e Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Thu, 27 Oct 2022 15:39:47 +0200
Subject: Add workers settings to configuration manual (#14086)

* Add workers settings to configuration manual
* Update `pusher_instances`
* update url to python logger
* update headlines
* update links after headline change
* remove link from `daemon process`

There is no docs in Synapse for this

* extend example for `federation_sender_instances` and `pusher_instances`
* more infos about stream writers
* add link to DAG
* update `pusher_instances`
* update `worker_listeners`
* update `stream_writers`
* Update `worker_name`

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/14086.doc                            |   1 +
 docs/sample_log_config.yaml                      |   2 +-
 docs/usage/configuration/config_documentation.md | 268 +++++++++++++++++++----
 docs/workers.md                                  | 100 ++++++---
 synapse/config/logger.py                         |   2 +-
 5 files changed, 291 insertions(+), 82 deletions(-)
 create mode 100644 changelog.d/14086.doc

(limited to 'synapse')

diff --git a/changelog.d/14086.doc b/changelog.d/14086.doc
new file mode 100644
index 0000000000..5b4b938759
--- /dev/null
+++ b/changelog.d/14086.doc
@@ -0,0 +1 @@
+Add workers settings to [configuration manual](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#individual-worker-configuration).
\ No newline at end of file
diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml
index 3065a0e2d9..6339160d00 100644
--- a/docs/sample_log_config.yaml
+++ b/docs/sample_log_config.yaml
@@ -6,7 +6,7 @@
 # Synapse also supports structured logging for machine readable logs which can
 # be ingested by ELK stacks. See [2] for details.
 #
-# [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema
+# [1]: https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema
 # [2]: https://matrix-org.github.io/synapse/latest/structured_logging.html
 
 version: 1
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index d81eda52c1..fb5eb42c52 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -99,7 +99,7 @@ modules:
     config: {}
 ```
 ---
-## Server ##
+## Server
 
 Define your homeserver name and other base options.
 
@@ -159,7 +159,7 @@ including _matrix/...). This is the same URL a user might enter into the
 'Custom Homeserver URL' field on their client. If you use Synapse with a
 reverse proxy, this should be the URL to reach Synapse via the proxy.
 Otherwise, it should be the URL to reach Synapse's client HTTP listener (see
-'listeners' below).
+['listeners'](#listeners) below).
 
 Defaults to `https://<server_name>/`.
 
@@ -570,7 +570,7 @@ Example configuration:
 delete_stale_devices_after: 1y
 ```
 
-## Homeserver blocking ##
+## Homeserver blocking
 Useful options for Synapse admins.
 
 ---
@@ -922,7 +922,7 @@ retention:
       interval: 1d
 ```
 ---
-## TLS ##
+## TLS
 
 Options related to TLS.
 
@@ -1012,7 +1012,7 @@ federation_custom_ca_list:
   - myCA3.pem
 ```
 ---
-## Federation ##
+## Federation
 
 Options related to federation.
 
@@ -1071,7 +1071,7 @@ Example configuration:
 allow_device_name_lookup_over_federation: true
 ```
 ---
-## Caching ##
+## Caching
 
 Options related to caching.
 
@@ -1185,7 +1185,7 @@ file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using
 `systemctl reload matrix-synapse`.
 
 ---
-## Database ##
+## Database
 Config options related to database settings.
 
 ---
@@ -1332,20 +1332,21 @@ databases:
       cp_max: 10
 ```
 ---
-## Logging ##
+## Logging
 Config options related to logging.
 
 ---
 ### `log_config`
 
-This option specifies a yaml python logging config file as described [here](https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema).
+This option specifies a yaml python logging config file as described
+[here](https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema).
 
 Example configuration:
 ```yaml
 log_config: "CONFDIR/SERVERNAME.log.config"
 ```
 ---
-## Ratelimiting ##
+## Ratelimiting
 Options related to ratelimiting in Synapse.
 
 Each ratelimiting configuration is made of two parameters:
@@ -1576,7 +1577,7 @@ Example configuration:
 federation_rr_transactions_per_room_per_second: 40
 ```
 ---
-## Media Store ##
+## Media Store
 Config options related to Synapse's media store.
 
 ---
@@ -1766,7 +1767,7 @@ url_preview_ip_range_blacklist:
   - 'ff00::/8'
   - 'fec0::/10'
 ```
-----
+---
 ### `url_preview_ip_range_whitelist`
 
 This option sets a list of IP address CIDR ranges that the URL preview spider is allowed
@@ -1860,7 +1861,7 @@ Example configuration:
    - 'fr;q=0.8'
    - '*;q=0.7'
 ```
-----
+---
 ### `oembed`
 
 oEmbed allows for easier embedding content from a website. It can be
@@ -1877,7 +1878,7 @@ oembed:
     - oembed/my_providers.json
 ```
 ---
-## Captcha ##
+## Captcha
 
 See [here](../../CAPTCHA_SETUP.md) for full details on setting up captcha.
 
@@ -1926,7 +1927,7 @@ Example configuration:
 recaptcha_siteverify_api: "https://my.recaptcha.site"
 ```
 ---
-## TURN ##
+## TURN
 Options related to adding a TURN server to Synapse.
 
 ---
@@ -1947,7 +1948,7 @@ Example configuration:
 ```yaml
 turn_shared_secret: "YOUR_SHARED_SECRET"
 ```
-----
+---
 ### `turn_username` and `turn_password`
 
 The Username and password if the TURN server needs them and does not use a token.
@@ -2366,7 +2367,7 @@ Example configuration:
 ```yaml
 session_lifetime: 24h
 ```
-----
+---
 ### `refresh_access_token_lifetime`
 
 Time that an access token remains valid for, if the session is using refresh tokens.
@@ -2422,7 +2423,7 @@ nonrefreshable_access_token_lifetime: 24h
 ```
 
 ---
-## Metrics ###
+## Metrics
 Config options related to metrics.
 
 ---
@@ -2519,7 +2520,7 @@ Example configuration:
 report_stats_endpoint: https://example.com/report-usage-stats/push
 ```
 ---
-## API Configuration ##
+## API Configuration
 Config settings related to the client/server API
 
 ---
@@ -2619,7 +2620,7 @@ Example configuration:
 form_secret: <PRIVATE STRING>
 ```
 ---
-## Signing Keys ##
+## Signing Keys
 Config options relating to signing keys
 
 ---
@@ -2728,7 +2729,7 @@ Example configuration:
 key_server_signing_keys_path: "key_server_signing_keys.key"
 ```
 ---
-## Single sign-on integration ##
+## Single sign-on integration
 
 The following settings can be used to make Synapse use a single sign-on
 provider for authentication, instead of its internal password database.
@@ -3348,7 +3349,7 @@ email:
     email_validation: "[%(server_name)s] Validate your email"
 ```
 ---
-## Push ##
+## Push
 Configuration settings related to push notifications
 
 ---
@@ -3381,7 +3382,7 @@ push:
   group_unread_count_by_room: false
 ```
 ---
-## Rooms ##
+## Rooms
 Config options relating to rooms.
 
 ---
@@ -3627,7 +3628,7 @@ default_power_level_content_override:
 ```
 
 ---
-## Opentracing ##
+## Opentracing
 Configuration options related to Opentracing support.
 
 ---
@@ -3670,14 +3671,71 @@ opentracing:
         false
 ```
 ---
-## Workers ##
-Configuration options related to workers.
+## Coordinating workers
+Configuration options related to workers which belong in the main config file
+(usually called `homeserver.yaml`).
+A Synapse deployment can scale horizontally by running multiple Synapse processes
+called _workers_. Incoming requests are distributed between workers to handle higher
+loads. Some workers are privileged and can accept requests from other workers.
+
+As a result, the worker configuration is divided into two parts.
+
+1. The first part (in this section of the manual) defines which shardable tasks
+   are delegated to privileged workers. This allows unprivileged workers to make
+   request a privileged worker to act on their behalf.
+1. [The second part](#individual-worker-configuration)
+   controls the behaviour of individual workers in isolation.
+
+For guidance on setting up workers, see the [worker documentation](../../workers.md).
+
+---
+### `worker_replication_secret`
+
+A shared secret used by the replication APIs on the main process to authenticate
+HTTP requests from workers.
+
+The default, this value is omitted (equivalently `null`), which means that 
+traffic between the workers and the main process is not authenticated.
+
+Example configuration:
+```yaml
+worker_replication_secret: "secret_secret"
+```
+---
+### `start_pushers`
+
+Controls sending of push notifications on the main process. Set to `false`
+if using a [pusher worker](../../workers.md#synapseapppusher). Defaults to `true`.
+
+Example configuration:
+```yaml
+start_pushers: false
+```
+---
+### `pusher_instances`
+
+It is possible to run multiple [pusher workers](../../workers.md#synapseapppusher),
+in which case the work is balanced across them. Use this setting to list the pushers by
+[`worker_name`](#worker_name). Ensure the main process and all pusher workers are
+restarted after changing this option.
 
+If no or only one pusher worker is configured, this setting is not necessary.
+The main process will send out push notifications by default if you do not disable
+it by setting [`start_pushers: false`](#start_pushers).
+
+Example configuration:
+```yaml
+start_pushers: false
+pusher_instances:
+  - pusher_worker1
+  - pusher_worker2
+```
 ---
 ### `send_federation`
 
 Controls sending of outbound federation transactions on the main process.
-Set to false if using a federation sender worker. Defaults to true.
+Set to `false` if using a [federation sender worker](../../workers.md#synapseappfederation_sender).
+Defaults to `true`.
 
 Example configuration:
 ```yaml
@@ -3686,8 +3744,9 @@ send_federation: false
 ---
 ### `federation_sender_instances`
 
-It is possible to run multiple federation sender workers, in which case the
-work is balanced across them. Use this setting to list the senders.
+It is possible to run multiple
+[federation sender worker](../../workers.md#synapseappfederation_sender), in which
+case the work is balanced across them. Use this setting to list the senders.
 
 This configuration setting must be shared between all federation sender workers, and if
 changed all federation sender workers must be stopped at the same time and then
@@ -3696,14 +3755,19 @@ events may be dropped).
 
 Example configuration:
 ```yaml
+send_federation: false
 federation_sender_instances:
   - federation_sender1
 ```
 ---
 ### `instance_map`
 
-When using workers this should be a map from worker name to the
+When using workers this should be a map from [`worker_name`](#worker_name) to the
 HTTP replication listener of the worker, if configured.
+Each worker declared under [`stream_writers`](../../workers.md#stream-writers) needs 
+a HTTP replication listener, and that listener should be included in the `instance_map`.
+(The main process also needs an HTTP replication listener, but it should not be 
+listed in the `instance_map`.)
 
 Example configuration:
 ```yaml
@@ -3716,8 +3780,11 @@ instance_map:
 ### `stream_writers`
 
 Experimental: When using workers you can define which workers should
-handle event persistence and typing notifications. Any worker
-specified here must also be in the `instance_map`.
+handle writing to streams such as event persistence and typing notifications.
+Any worker specified here must also be in the [`instance_map`](#instance_map).
+
+See the list of available streams in the
+[worker documentation](../../workers.md#stream-writers).
 
 Example configuration:
 ```yaml
@@ -3728,29 +3795,18 @@ stream_writers:
 ---
 ### `run_background_tasks_on`
 
-The worker that is used to run background tasks (e.g. cleaning up expired
-data). If not provided this defaults to the main process.
+The [worker](../../workers.md#background-tasks) that is used to run
+background tasks (e.g. cleaning up expired data). If not provided this
+defaults to the main process.
 
 Example configuration:
 ```yaml
 run_background_tasks_on: worker1
 ```
 ---
-### `worker_replication_secret`
-
-A shared secret used by the replication APIs to authenticate HTTP requests
-from workers.
-
-By default this is unused and traffic is not authenticated.
-
-Example configuration:
-```yaml
-worker_replication_secret: "secret_secret"
-```
 ### `redis`
 
-Configuration for Redis when using workers. This *must* be enabled when
-using workers (unless using old style direct TCP configuration).
+Configuration for Redis when using workers. This *must* be enabled when using workers.
 This setting has the following sub-options:
 * `enabled`: whether to use Redis support. Defaults to false.
 * `host` and `port`: Optional host and port to use to connect to redis. Defaults to
@@ -3765,7 +3821,123 @@ redis:
   port: 6379
   password: <secret_password>
 ```
-## Background Updates ##
+---
+## Individual worker configuration
+These options configure an individual worker, in its worker configuration file.
+They should be not be provided when configuring the main process.
+
+Note also the configuration above for
+[coordinating a cluster of workers](#coordinating-workers).
+
+For guidance on setting up workers, see the [worker documentation](../../workers.md).
+
+---
+### `worker_app`
+
+The type of worker. The currently available worker applications are listed
+in [worker documentation](../../workers.md#available-worker-applications).
+
+The most common worker is the
+[`synapse.app.generic_worker`](../../workers.md#synapseappgeneric_worker).
+
+Example configuration:
+```yaml
+worker_app: synapse.app.generic_worker
+```
+---
+### `worker_name`
+
+A unique name for the worker. The worker needs a name to be addressed in
+further parameters and identification in log files. We strongly recommend
+giving each worker a unique `worker_name`.
+
+Example configuration:
+```yaml
+worker_name: generic_worker1
+```
+---
+### `worker_replication_host`
+
+The HTTP replication endpoint that it should talk to on the main Synapse process.
+The main Synapse process defines this with a `replication` resource in
+[`listeners` option](#listeners).
+
+Example configuration:
+```yaml
+worker_replication_host: 127.0.0.1
+```
+---
+### `worker_replication_http_port`
+
+The HTTP replication port that it should talk to on the main Synapse process.
+The main Synapse process defines this with a `replication` resource in
+[`listeners` option](#listeners).
+
+Example configuration:
+```yaml
+worker_replication_http_port: 9093
+```
+---
+### `worker_listeners`
+
+A worker can handle HTTP requests. To do so, a `worker_listeners` option 
+must be declared, in the same way as the [`listeners` option](#listeners) 
+in the shared config.
+
+Workers declared in [`stream_writers`](#stream_writers) will need to include a
+`replication` listener here, in order to accept internal HTTP requests from
+other workers.
+
+Example configuration:
+```yaml
+worker_listeners:
+  - type: http
+    port: 8083
+    resources:
+      - names: [client, federation]
+```
+---
+### `worker_daemonize`
+
+Specifies whether the worker should be started as a daemon process.
+If Synapse is being managed by [systemd](../../systemd-with-workers/README.md), this option 
+must be omitted or set to `false`.
+
+Defaults to `false`.
+
+Example configuration:
+```yaml
+worker_daemonize: true
+```
+---
+### `worker_pid_file`
+
+When running a worker as a daemon, we need a place to store the 
+[PID](https://en.wikipedia.org/wiki/Process_identifier) of the worker.
+This option defines the location of that "pid file".
+
+This option is required if `worker_daemonize` is `true` and ignored 
+otherwise. It has no default.
+
+See also the [`pid_file` option](#pid_file) option for the main Synapse process.
+
+Example configuration:
+```yaml
+worker_pid_file: DATADIR/generic_worker1.pid
+```
+---
+### `worker_log_config`
+
+This option specifies a yaml python logging config file as described
+[here](https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema).
+See also the [`log_config` option](#log_config) option for the main Synapse process.
+
+Example configuration:
+```yaml
+worker_log_config: /etc/matrix-synapse/generic-worker-log.yaml
+```
+---
+## Background Updates
 Configuration settings related to background updates.
 
 ---
diff --git a/docs/workers.md b/docs/workers.md
index c27b3f8bd5..5e1b9ba220 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -88,10 +88,12 @@ shared configuration file.
 ### Shared configuration
 
 Normally, only a couple of changes are needed to make an existing configuration
-file suitable for use with workers. First, you need to enable an "HTTP replication
-listener" for the main process; and secondly, you need to enable redis-based
-replication. Optionally, a shared secret can be used to authenticate HTTP
-traffic between workers. For example:
+file suitable for use with workers. First, you need to enable an
+["HTTP replication listener"](usage/configuration/config_documentation.md#listeners)
+for the main process; and secondly, you need to enable
+[redis-based replication](usage/configuration/config_documentation.md#redis).
+Optionally, a [shared secret](usage/configuration/config_documentation.md#worker_replication_secret)
+can be used to authenticate HTTP traffic between workers. For example:
 
 ```yaml
 # extend the existing `listeners` section. This defines the ports that the
@@ -111,25 +113,28 @@ redis:
     enabled: true
 ```
 
-See the [configuration manual](usage/configuration/config_documentation.html) for the full documentation of each option.
+See the [configuration manual](usage/configuration/config_documentation.md)
+for the full documentation of each option.
 
 Under **no circumstances** should the replication listener be exposed to the
 public internet; replication traffic is:
 
 * always unencrypted
-* unauthenticated, unless `worker_replication_secret` is configured
+* unauthenticated, unless [`worker_replication_secret`](usage/configuration/config_documentation.md#worker_replication_secret)
+  is configured
 
 
 ### Worker configuration
 
 In the config file for each worker, you must specify:
- * The type of worker (`worker_app`). The currently available worker applications are listed below.
- * A unique name for the worker (`worker_name`).
+ * The type of worker ([`worker_app`](usage/configuration/config_documentation.md#worker_app)).
+   The currently available worker applications are listed [below](#available-worker-applications).
+ * A unique name for the worker ([`worker_name`](usage/configuration/config_documentation.md#worker_name)).
  * The HTTP replication endpoint that it should talk to on the main synapse process
-   (`worker_replication_host` and `worker_replication_http_port`)
- * If handling HTTP requests, a `worker_listeners` option with an `http`
-   listener, in the same way as the [`listeners`](usage/configuration/config_documentation.md#listeners)
-   option in the shared config.
+   ([`worker_replication_host`](usage/configuration/config_documentation.md#worker_replication_host) and
+   [`worker_replication_http_port`](usage/configuration/config_documentation.md#worker_replication_http_port)).
+ * If handling HTTP requests, a [`worker_listeners`](usage/configuration/config_documentation.md#worker_listeners) option
+   with an `http` listener.
  * If handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for
    the main process (`worker_main_http_uri`).
 
@@ -146,7 +151,6 @@ plain HTTP endpoint on port 8083 separately serving various endpoints, e.g.
 Obviously you should configure your reverse-proxy to route the relevant
 endpoints to the worker (`localhost:8083` in the above example).
 
-
 ### Running Synapse with workers
 
 Finally, you need to start your worker processes. This can be done with either
@@ -288,7 +292,8 @@ For multiple workers not handling the SSO endpoints properly, see
 [#9427](https://github.com/matrix-org/synapse/issues/9427).
 
 Note that a [HTTP listener](usage/configuration/config_documentation.md#listeners)
-with `client` and `federation` `resources` must be configured in the `worker_listeners`
+with `client` and `federation` `resources` must be configured in the
+[`worker_listeners`](usage/configuration/config_documentation.md#worker_listeners)
 option in the worker config.
 
 #### Load balancing
@@ -331,9 +336,10 @@ of the main process to a particular worker.
 
 To enable this, the worker must have a
 [HTTP `replication` listener](usage/configuration/config_documentation.md#listeners) configured,
-have a `worker_name` and be listed in the `instance_map` config. The same worker
-can handle multiple streams, but unless otherwise documented, each stream can only
-have a single writer.
+have a [`worker_name`](usage/configuration/config_documentation.md#worker_name)
+and be listed in the [`instance_map`](usage/configuration/config_documentation.md#instance_map)
+config. The same worker can handle multiple streams, but unless otherwise documented,
+each stream can only have a single writer.
 
 For example, to move event persistence off to a dedicated worker, the shared
 configuration would include:
@@ -360,9 +366,26 @@ streams and the endpoints associated with them:
 
 ##### The `events` stream
 
-The `events` stream experimentally supports having multiple writers, where work
-is sharded between them by room ID. Note that you *must* restart all worker
-instances when adding or removing event persisters. An example `stream_writers`
+The `events` stream experimentally supports having multiple writer workers, where load
+is sharded between them by room ID. Each writer is called an _event persister_. They are
+responsible for
+- receiving new events,
+- linking them to those already in the room [DAG](development/room-dag-concepts.md),
+- persisting them to the DB, and finally
+- updating the events stream.
+
+Because load is sharded in this way, you *must* restart all worker instances when 
+adding or removing event persisters.
+
+An `event_persister` should not be mistaken for an `event_creator`.
+An `event_creator` listens for requests from clients to create new events and does
+so. It will then pass those events over HTTP replication to any configured event
+persisters (or the main process if none are configured).
+
+Note that `event_creator`s and `event_persister`s are implemented using the same
+[`synapse.app.generic_worker`](#synapse.app.generic_worker).
+
+An example [`stream_writers`](usage/configuration/config_documentation.md#stream_writers)
 configuration with multiple writers:
 
 ```yaml
@@ -416,16 +439,18 @@ worker. Background tasks are run periodically or started via replication. Exactl
 which tasks are configured to run depends on your Synapse configuration (e.g. if
 stats is enabled). This worker doesn't handle any REST endpoints itself.
 
-To enable this, the worker must have a `worker_name` and can be configured to run
-background tasks. For example, to move background tasks to a dedicated worker,
-the shared configuration would include:
+To enable this, the worker must have a unique
+[`worker_name`](usage/configuration/config_documentation.md#worker_name)
+and can be configured to run background tasks. For example, to move background tasks
+to a dedicated worker, the shared configuration would include:
 
 ```yaml
 run_background_tasks_on: background_worker
 ```
 
-You might also wish to investigate the `update_user_directory_from_worker` and
-`media_instance_running_background_jobs` settings.
+You might also wish to investigate the
+[`update_user_directory_from_worker`](#updating-the-user-directory) and
+[`media_instance_running_background_jobs`](#synapseappmedia_repository) settings.
 
 An example for a dedicated background worker instance:
 
@@ -478,13 +503,17 @@ worker application type.
 ### `synapse.app.pusher`
 
 Handles sending push notifications to sygnal and email. Doesn't handle any
-REST endpoints itself, but you should set `start_pushers: False` in the
+REST endpoints itself, but you should set
+[`start_pushers: false`](usage/configuration/config_documentation.md#start_pushers) in the
 shared configuration file to stop the main synapse sending push notifications.
 
-To run multiple instances at once the `pusher_instances` option should list all
-pusher instances by their worker name, e.g.:
+To run multiple instances at once the
+[`pusher_instances`](usage/configuration/config_documentation.md#pusher_instances)
+option should list all pusher instances by their
+[`worker_name`](usage/configuration/config_documentation.md#worker_name), e.g.:
 
 ```yaml
+start_pushers: false
 pusher_instances:
     - pusher_worker1
     - pusher_worker2
@@ -512,15 +541,20 @@ Note this worker cannot be load-balanced: only one instance should be active.
 ### `synapse.app.federation_sender`
 
 Handles sending federation traffic to other servers. Doesn't handle any
-REST endpoints itself, but you should set `send_federation: False` in the
-shared configuration file to stop the main synapse sending this traffic.
+REST endpoints itself, but you should set
+[`send_federation: false`](usage/configuration/config_documentation.md#send_federation)
+in the shared configuration file to stop the main synapse sending this traffic.
 
 If running multiple federation senders then you must list each
-instance in the `federation_sender_instances` option by their `worker_name`.
+instance in the
+[`federation_sender_instances`](usage/configuration/config_documentation.md#federation_sender_instances)
+option by their
+[`worker_name`](usage/configuration/config_documentation.md#worker_name).
 All instances must be stopped and started when adding or removing instances.
 For example:
 
 ```yaml
+send_federation: false
 federation_sender_instances:
     - federation_sender1
     - federation_sender2
@@ -547,7 +581,9 @@ Handles the media repository. It can handle all endpoints starting with:
     ^/_synapse/admin/v1/quarantine_media/.*$
     ^/_synapse/admin/v1/users/.*/media$
 
-You should also set `enable_media_repo: False` in the shared configuration
+You should also set
+[`enable_media_repo: False`](usage/configuration/config_documentation.md#enable_media_repo)
+in the shared configuration
 file to stop the main synapse running background jobs related to managing the
 media repository. Note that doing so will prevent the main process from being
 able to handle the above endpoints.
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index b62b3b9205..94d1150415 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -53,7 +53,7 @@ DEFAULT_LOG_CONFIG = Template(
 # Synapse also supports structured logging for machine readable logs which can
 # be ingested by ELK stacks. See [2] for details.
 #
-# [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema
+# [1]: https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema
 # [2]: https://matrix-org.github.io/synapse/latest/structured_logging.html
 
 version: 1
-- 
cgit 1.5.1


From 67583281e3f8ea923eedbc56a4c85c7ba75d1582 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 27 Oct 2022 09:58:12 -0400
Subject: Fix tests for change in PostgreSQL 14 behavior change. (#14310)

PostgreSQL 14 changed the behavior of `websearch_to_tsquery` to
improve some behaviour.

The tests were hitting those edge-cases about handling of hanging double
quotes. This fixes the tests to take into account the PostgreSQL version.
---
 changelog.d/14310.feature                |  1 +
 synapse/storage/databases/main/search.py |  5 ++---
 tests/storage/test_room_search.py        | 16 ++++++++++++----
 3 files changed, 15 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/14310.feature

(limited to 'synapse')

diff --git a/changelog.d/14310.feature b/changelog.d/14310.feature
new file mode 100644
index 0000000000..94c8a83212
--- /dev/null
+++ b/changelog.d/14310.feature
@@ -0,0 +1 @@
+Allow use of postgres and sqllite full-text search operators in search queries.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index a89fc54c2c..594b935614 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -824,9 +824,8 @@ def _tokenize_query(query: str) -> TokenList:
     in_phrase = False
     parts = deque(query.split('"'))
     for i, part in enumerate(parts):
-        # The contents inside double quotes is treated as a phrase, a trailing
-        # double quote is not implied.
-        in_phrase = bool(i % 2) and i != (len(parts) - 1)
+        # The contents inside double quotes is treated as a phrase.
+        in_phrase = bool(i % 2)
 
         # Pull out the individual words, discarding any non-word characters.
         words = deque(re.findall(r"([\w\-]+)", part, re.UNICODE))
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index 9ddc19900a..868b5bee84 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -239,7 +239,6 @@ class MessageSearchTest(HomeserverTestCase):
         ("fox -nope", (True, False)),
         ("fox -brown", (False, True)),
         ('"fox" quick', True),
-        ('"fox quick', True),
         ('"quick brown', True),
         ('" quick "', True),
         ('" nope"', False),
@@ -269,6 +268,15 @@ class MessageSearchTest(HomeserverTestCase):
         response = self.helper.send(self.room_id, self.PHRASE, tok=self.access_token)
         self.assertIn("event_id", response)
 
+        # The behaviour of a missing trailing double quote changed in PostgreSQL 14
+        # from ignoring the initial double quote to treating it as a phrase.
+        main_store = homeserver.get_datastores().main
+        found = False
+        if isinstance(main_store.database_engine, PostgresEngine):
+            assert main_store.database_engine._version is not None
+            found = main_store.database_engine._version < 140000
+        self.COMMON_CASES.append(('"fox quick', (found, True)))
+
     def test_tokenize_query(self) -> None:
         """Test the custom logic to tokenize a user's query."""
         cases = (
@@ -280,9 +288,9 @@ class MessageSearchTest(HomeserverTestCase):
             ("fox -brown", ["fox", SearchToken.Not, "brown"]),
             ("- fox", [SearchToken.Not, "fox"]),
             ('"fox" quick', [Phrase(["fox"]), SearchToken.And, "quick"]),
-            # No trailing double quoe.
-            ('"fox quick', ["fox", SearchToken.And, "quick"]),
-            ('"-fox quick', [SearchToken.Not, "fox", SearchToken.And, "quick"]),
+            # No trailing double quote.
+            ('"fox quick', [Phrase(["fox", "quick"])]),
+            ('"-fox quick', [Phrase(["-fox", "quick"])]),
             ('" quick "', [Phrase(["quick"])]),
             (
                 'q"uick brow"n',
-- 
cgit 1.5.1


From aa70556699e649f46f51a198fb104eecdc0d311b Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 27 Oct 2022 13:29:23 -0500
Subject: Check appservice user interest against the local users instead of all
 users (`get_users_in_room` mis-use) (#13958)

---
 changelog.d/13958.bugfix                     |   1 +
 docs/upgrade.md                              |  19 ++++
 synapse/appservice/__init__.py               |  16 ++-
 synapse/storage/databases/main/appservice.py |  17 ++-
 synapse/storage/databases/main/roommember.py |   3 +
 tests/appservice/test_appservice.py          |  10 +-
 tests/handlers/test_appservice.py            | 162 ++++++++++++++++++++++++++-
 7 files changed, 214 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/13958.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13958.bugfix b/changelog.d/13958.bugfix
new file mode 100644
index 0000000000..f9f651bfdc
--- /dev/null
+++ b/changelog.d/13958.bugfix
@@ -0,0 +1 @@
+Check appservice user interest against the local users instead of all users in the room to align with [MSC3905](https://github.com/matrix-org/matrix-spec-proposals/pull/3905).
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 78c34d0c15..f095bbc3a6 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -97,6 +97,25 @@ As announced with the release of [Synapse 1.69.0](#deprecation-of-the-generate_s
 Modules relying on it can instead use the `create_login_token` method.
 
 
+## Changes to the events received by application services (interest)
+
+To align with spec (changed in
+[MSC3905](https://github.com/matrix-org/matrix-spec-proposals/pull/3905)), Synapse now
+only considers local users to be interesting. In other words, the `users` namespace
+regex is only be applied against local users of the homeserver.
+
+Please note, this probably doesn't affect the expected behavior of your application
+service, since an interesting local user in a room still means all messages in the room
+(from local or remote users) will still be considered interesting. And matching a room
+with the `rooms` or `aliases` namespace regex will still consider all events sent in the
+room to be interesting to the application service.
+
+If one of your application service's `users` regex was intending to match a remote user,
+this will no longer match as you expect. The behavioral mismatch between matching all
+local users and some remote users is why the spec was changed/clarified and this
+caveat is no longer supported.
+
+
 # Upgrading to v1.69.0
 
 ## Changes to the receipts replication streams
diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py
index 0dfa00df44..500bdde3a9 100644
--- a/synapse/appservice/__init__.py
+++ b/synapse/appservice/__init__.py
@@ -172,12 +172,24 @@ class ApplicationService:
         Returns:
             True if this service would like to know about this room.
         """
-        member_list = await store.get_users_in_room(
+        # We can use `get_local_users_in_room(...)` here because an application service
+        # can only be interested in local users of the server it's on (ignore any remote
+        # users that might match the user namespace regex).
+        #
+        # In the future, we can consider re-using
+        # `store.get_app_service_users_in_room` which is very similar to this
+        # function but has a slightly worse performance than this because we
+        # have an early escape-hatch if we find a single user that the
+        # appservice is interested in. The juice would be worth the squeeze if
+        # `store.get_app_service_users_in_room` was used in more places besides
+        # an experimental MSC. But for now we can avoid doing more work and
+        # barely using it later.
+        local_user_ids = await store.get_local_users_in_room(
             room_id, on_invalidate=cache_context.invalidate
         )
 
         # check joined member events
-        for user_id in member_list:
+        for user_id in local_user_ids:
             if self.is_interested_in_user(user_id):
                 return True
         return False
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index 64b70a7b28..63046c0527 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -157,10 +157,23 @@ class ApplicationServiceWorkerStore(RoomMemberWorkerStore):
         app_service: "ApplicationService",
         cache_context: _CacheContext,
     ) -> List[str]:
-        users_in_room = await self.get_users_in_room(
+        """
+        Get all users in a room that the appservice controls.
+
+        Args:
+            room_id: The room to check in.
+            app_service: The application service to check interest/control against
+
+        Returns:
+            List of user IDs that the appservice controls.
+        """
+        # We can use `get_local_users_in_room(...)` here because an application service
+        # can only be interested in local users of the server it's on (ignore any remote
+        # users that might match the user namespace regex).
+        local_users_in_room = await self.get_local_users_in_room(
             room_id, on_invalidate=cache_context.invalidate
         )
-        return list(filter(app_service.is_interested_in_user, users_in_room))
+        return list(filter(app_service.is_interested_in_user, local_users_in_room))
 
 
 class ApplicationServiceStore(ApplicationServiceWorkerStore):
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index ab708b0ba5..e56a13f21e 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -152,6 +152,9 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         the forward extremities of those rooms will exclude most members. We may also
         calculate room state incorrectly for such rooms and believe that a member is or
         is not in the room when the opposite is true.
+
+        Note: If you only care about users in the room local to the homeserver, use
+        `get_local_users_in_room(...)` instead which will be more performant.
         """
         return await self.db_pool.simple_select_onecol(
             table="current_state_events",
diff --git a/tests/appservice/test_appservice.py b/tests/appservice/test_appservice.py
index 3018d3fc6f..d4dccfc2f0 100644
--- a/tests/appservice/test_appservice.py
+++ b/tests/appservice/test_appservice.py
@@ -43,7 +43,7 @@ class ApplicationServiceTestCase(unittest.TestCase):
 
         self.store = Mock()
         self.store.get_aliases_for_room = simple_async_mock([])
-        self.store.get_users_in_room = simple_async_mock([])
+        self.store.get_local_users_in_room = simple_async_mock([])
 
     @defer.inlineCallbacks
     def test_regex_user_id_prefix_match(self):
@@ -129,7 +129,7 @@ class ApplicationServiceTestCase(unittest.TestCase):
         self.store.get_aliases_for_room = simple_async_mock(
             ["#irc_foobar:matrix.org", "#athing:matrix.org"]
         )
-        self.store.get_users_in_room = simple_async_mock([])
+        self.store.get_local_users_in_room = simple_async_mock([])
         self.assertTrue(
             (
                 yield defer.ensureDeferred(
@@ -184,7 +184,7 @@ class ApplicationServiceTestCase(unittest.TestCase):
         self.store.get_aliases_for_room = simple_async_mock(
             ["#xmpp_foobar:matrix.org", "#athing:matrix.org"]
         )
-        self.store.get_users_in_room = simple_async_mock([])
+        self.store.get_local_users_in_room = simple_async_mock([])
         self.assertFalse(
             (
                 yield defer.ensureDeferred(
@@ -203,7 +203,7 @@ class ApplicationServiceTestCase(unittest.TestCase):
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         self.event.sender = "@irc_foobar:matrix.org"
         self.store.get_aliases_for_room = simple_async_mock(["#irc_barfoo:matrix.org"])
-        self.store.get_users_in_room = simple_async_mock([])
+        self.store.get_local_users_in_room = simple_async_mock([])
         self.assertTrue(
             (
                 yield defer.ensureDeferred(
@@ -236,7 +236,7 @@ class ApplicationServiceTestCase(unittest.TestCase):
     def test_member_list_match(self):
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         # Note that @irc_fo:here is the AS user.
-        self.store.get_users_in_room = simple_async_mock(
+        self.store.get_local_users_in_room = simple_async_mock(
             ["@alice:here", "@irc_fo:here", "@bob:here"]
         )
         self.store.get_aliases_for_room = simple_async_mock([])
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index 7e4570f990..144e49d0fd 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -22,7 +22,7 @@ from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
 import synapse.storage
-from synapse.api.constants import EduTypes
+from synapse.api.constants import EduTypes, EventTypes
 from synapse.appservice import (
     ApplicationService,
     TransactionOneTimeKeyCounts,
@@ -36,7 +36,7 @@ from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
 from tests import unittest
-from tests.test_utils import make_awaitable, simple_async_mock
+from tests.test_utils import event_injection, make_awaitable, simple_async_mock
 from tests.unittest import override_config
 from tests.utils import MockClock
 
@@ -390,15 +390,16 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
         receipts.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer):
+        self.hs = hs
         # Mock the ApplicationServiceScheduler's _TransactionController's send method so that
         # we can track any outgoing ephemeral events
         self.send_mock = simple_async_mock()
-        hs.get_application_service_handler().scheduler.txn_ctrl.send = self.send_mock
+        hs.get_application_service_handler().scheduler.txn_ctrl.send = self.send_mock  # type: ignore[assignment]
 
         # Mock out application services, and allow defining our own in tests
         self._services: List[ApplicationService] = []
-        self.hs.get_datastores().main.get_app_services = Mock(
+        self.hs.get_datastores().main.get_app_services = Mock(  # type: ignore[assignment]
             return_value=self._services
         )
 
@@ -416,6 +417,157 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
             "exclusive_as_user", "password", self.exclusive_as_user_device_id
         )
 
+    def _notify_interested_services(self):
+        # This is normally set in `notify_interested_services` but we need to call the
+        # internal async version so the reactor gets pushed to completion.
+        self.hs.get_application_service_handler().current_max += 1
+        self.get_success(
+            self.hs.get_application_service_handler()._notify_interested_services(
+                RoomStreamToken(
+                    None, self.hs.get_application_service_handler().current_max
+                )
+            )
+        )
+
+    @parameterized.expand(
+        [
+            ("@local_as_user:test", True),
+            # Defining remote users in an application service user namespace regex is a
+            # footgun since the appservice might assume that it'll receive all events
+            # sent by that remote user, but it will only receive events in rooms that
+            # are shared with a local user. So we just remove this footgun possibility
+            # entirely and we won't notify the application service based on remote
+            # users.
+            ("@remote_as_user:remote", False),
+        ]
+    )
+    def test_match_interesting_room_members(
+        self, interesting_user: str, should_notify: bool
+    ):
+        """
+        Test to make sure that a interesting user (local or remote) in the room is
+        notified as expected when someone else in the room sends a message.
+        """
+        # Register an application service that's interested in the `interesting_user`
+        interested_appservice = self._register_application_service(
+            namespaces={
+                ApplicationService.NS_USERS: [
+                    {
+                        "regex": interesting_user,
+                        "exclusive": False,
+                    },
+                ],
+            },
+        )
+
+        # Create a room
+        alice = self.register_user("alice", "pass")
+        alice_access_token = self.login("alice", "pass")
+        room_id = self.helper.create_room_as(room_creator=alice, tok=alice_access_token)
+
+        # Join the interesting user to the room
+        self.get_success(
+            event_injection.inject_member_event(
+                self.hs, room_id, interesting_user, "join"
+            )
+        )
+        # Kick the appservice into checking this membership event to get the event out
+        # of the way
+        self._notify_interested_services()
+        # We don't care about the interesting user join event (this test is making sure
+        # the next thing works)
+        self.send_mock.reset_mock()
+
+        # Send a message from an uninteresting user
+        self.helper.send_event(
+            room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "message from uninteresting user",
+            },
+            tok=alice_access_token,
+        )
+        # Kick the appservice into checking this new event
+        self._notify_interested_services()
+
+        if should_notify:
+            self.send_mock.assert_called_once()
+            (
+                service,
+                events,
+                _ephemeral,
+                _to_device_messages,
+                _otks,
+                _fbks,
+                _device_list_summary,
+            ) = self.send_mock.call_args[0]
+
+            # Even though the message came from an uninteresting user, it should still
+            # notify us because the interesting user is joined to the room where the
+            # message was sent.
+            self.assertEqual(service, interested_appservice)
+            self.assertEqual(events[0]["type"], "m.room.message")
+            self.assertEqual(events[0]["sender"], alice)
+        else:
+            self.send_mock.assert_not_called()
+
+    def test_application_services_receive_events_sent_by_interesting_local_user(self):
+        """
+        Test to make sure that a messages sent from a local user can be interesting and
+        picked up by the appservice.
+        """
+        # Register an application service that's interested in all local users
+        interested_appservice = self._register_application_service(
+            namespaces={
+                ApplicationService.NS_USERS: [
+                    {
+                        "regex": ".*",
+                        "exclusive": False,
+                    },
+                ],
+            },
+        )
+
+        # Create a room
+        alice = self.register_user("alice", "pass")
+        alice_access_token = self.login("alice", "pass")
+        room_id = self.helper.create_room_as(room_creator=alice, tok=alice_access_token)
+
+        # We don't care about interesting events before this (this test is making sure
+        # the next thing works)
+        self.send_mock.reset_mock()
+
+        # Send a message from the interesting local user
+        self.helper.send_event(
+            room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "message from interesting local user",
+            },
+            tok=alice_access_token,
+        )
+        # Kick the appservice into checking this new event
+        self._notify_interested_services()
+
+        self.send_mock.assert_called_once()
+        (
+            service,
+            events,
+            _ephemeral,
+            _to_device_messages,
+            _otks,
+            _fbks,
+            _device_list_summary,
+        ) = self.send_mock.call_args[0]
+
+        # Events sent from an interesting local user should also be picked up as
+        # interesting to the appservice.
+        self.assertEqual(service, interested_appservice)
+        self.assertEqual(events[0]["type"], "m.room.message")
+        self.assertEqual(events[0]["sender"], alice)
+
     def test_sending_read_receipt_batches_to_application_services(self):
         """Tests that a large batch of read receipts are sent correctly to
         interested application services.
-- 
cgit 1.5.1


From 6a6e1e8c0711939338f25d8d41d1e4d33d984949 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 28 Oct 2022 10:53:34 +0000
Subject: Fix room creation being rate limited too aggressively since Synapse
 v1.69.0. (#14314)

* Introduce a test for the old behaviour which we want to restore

* Reintroduce the old behaviour in a simpler way

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Use 1 credit instead of 2 for creating a room: be more lenient than before

Notably, the UI in Element Web was still broken after restoring to prior behaviour.

After discussion, we agreed that it would be sensible to increase the limit.

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/14314.bugfix        |  1 +
 synapse/api/ratelimiting.py     |  8 +++++-
 synapse/handlers/room.py        | 16 ++++++++----
 tests/rest/client/test_rooms.py | 54 ++++++++++++++++++++++++++++++++++++++---
 4 files changed, 70 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14314.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14314.bugfix b/changelog.d/14314.bugfix
new file mode 100644
index 0000000000..8be47ee083
--- /dev/null
+++ b/changelog.d/14314.bugfix
@@ -0,0 +1 @@
+Fix room creation being rate limited too aggressively since Synapse v1.69.0.
\ No newline at end of file
diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py
index 044c7d4926..511790c7c5 100644
--- a/synapse/api/ratelimiting.py
+++ b/synapse/api/ratelimiting.py
@@ -343,6 +343,7 @@ class RequestRatelimiter:
         requester: Requester,
         update: bool = True,
         is_admin_redaction: bool = False,
+        n_actions: int = 1,
     ) -> None:
         """Ratelimits requests.
 
@@ -355,6 +356,8 @@ class RequestRatelimiter:
             is_admin_redaction: Whether this is a room admin/moderator
                 redacting an event. If so then we may apply different
                 ratelimits depending on config.
+            n_actions: Multiplier for the number of actions to apply to the
+                rate limiter at once.
 
         Raises:
             LimitExceededError if the request should be ratelimited
@@ -383,7 +386,9 @@ class RequestRatelimiter:
         if is_admin_redaction and self.admin_redaction_ratelimiter:
             # If we have separate config for admin redactions, use a separate
             # ratelimiter as to not have user_ids clash
-            await self.admin_redaction_ratelimiter.ratelimit(requester, update=update)
+            await self.admin_redaction_ratelimiter.ratelimit(
+                requester, update=update, n_actions=n_actions
+            )
         else:
             # Override rate and burst count per-user
             await self.request_ratelimiter.ratelimit(
@@ -391,4 +396,5 @@ class RequestRatelimiter:
                 rate_hz=messages_per_second,
                 burst_count=burst_count,
                 update=update,
+                n_actions=n_actions,
             )
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 638f54051a..d74b675adc 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -559,7 +559,6 @@ class RoomCreationHandler:
             invite_list=[],
             initial_state=initial_state,
             creation_content=creation_content,
-            ratelimit=False,
         )
 
         # Transfer membership events
@@ -753,6 +752,10 @@ class RoomCreationHandler:
                 )
 
         if ratelimit:
+            # Rate limit once in advance, but don't rate limit the individual
+            # events in the room — room creation isn't atomic and it's very
+            # janky if half the events in the initial state don't make it because
+            # of rate limiting.
             await self.request_ratelimiter.ratelimit(requester)
 
         room_version_id = config.get(
@@ -913,7 +916,6 @@ class RoomCreationHandler:
             room_alias=room_alias,
             power_level_content_override=power_level_content_override,
             creator_join_profile=creator_join_profile,
-            ratelimit=ratelimit,
         )
 
         if "name" in config:
@@ -1037,7 +1039,6 @@ class RoomCreationHandler:
         room_alias: Optional[RoomAlias] = None,
         power_level_content_override: Optional[JsonDict] = None,
         creator_join_profile: Optional[JsonDict] = None,
-        ratelimit: bool = True,
     ) -> Tuple[int, str, int]:
         """Sends the initial events into a new room. Sends the room creation, membership,
         and power level events into the room sequentially, then creates and batches up the
@@ -1046,6 +1047,8 @@ class RoomCreationHandler:
         `power_level_content_override` doesn't apply when initial state has
         power level state event content.
 
+        Rate limiting should already have been applied by this point.
+
         Returns:
             A tuple containing the stream ID, event ID and depth of the last
             event sent to the room.
@@ -1144,7 +1147,7 @@ class RoomCreationHandler:
             creator.user,
             room_id,
             "join",
-            ratelimit=ratelimit,
+            ratelimit=False,
             content=creator_join_profile,
             new_room=True,
             prev_event_ids=[last_sent_event_id],
@@ -1269,7 +1272,10 @@ class RoomCreationHandler:
             events_to_send.append((encryption_event, encryption_context))
 
         last_event = await self.event_creation_handler.handle_new_client_event(
-            creator, events_to_send, ignore_shadow_ban=True
+            creator,
+            events_to_send,
+            ignore_shadow_ban=True,
+            ratelimit=False,
         )
         assert last_event.internal_metadata.stream_ordering is not None
         return last_event.internal_metadata.stream_ordering, last_event.event_id, depth
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 716366eb90..1084d4ad9d 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -54,6 +54,7 @@ from tests.http.server._base import make_request_with_cancellation_test
 from tests.storage.test_stream import PaginationTestCase
 from tests.test_utils import make_awaitable
 from tests.test_utils.event_injection import create_event
+from tests.unittest import override_config
 
 PATH_PREFIX = b"/_matrix/client/api/v1"
 
@@ -871,6 +872,41 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
         self.assertEqual(join_mock.call_count, 0)
 
+    def _create_basic_room(self) -> Tuple[int, object]:
+        """
+        Tries to create a basic room and returns the response code.
+        """
+        channel = self.make_request(
+            "POST",
+            "/createRoom",
+            {},
+        )
+        return channel.code, channel.json_body
+
+    @override_config(
+        {
+            "rc_message": {"per_second": 0.2, "burst_count": 10},
+        }
+    )
+    def test_room_creation_ratelimiting(self) -> None:
+        """
+        Regression test for #14312, where ratelimiting was made too strict.
+        Clients should be able to create 10 rooms in a row
+        without hitting rate limits, using default rate limit config.
+        (We override rate limiting config back to its default value.)
+
+        To ensure we don't make ratelimiting too generous accidentally,
+        also check that we can't create an 11th room.
+        """
+
+        for _ in range(10):
+            code, json_body = self._create_basic_room()
+            self.assertEqual(code, HTTPStatus.OK, json_body)
+
+        # The 6th room hits the rate limit.
+        code, json_body = self._create_basic_room()
+        self.assertEqual(code, HTTPStatus.TOO_MANY_REQUESTS, json_body)
+
 
 class RoomTopicTestCase(RoomBase):
     """Tests /rooms/$room_id/topic REST events."""
@@ -1390,10 +1426,22 @@ class RoomJoinRatelimitTestCase(RoomBase):
     )
     def test_join_local_ratelimit(self) -> None:
         """Tests that local joins are actually rate-limited."""
-        for _ in range(3):
-            self.helper.create_room_as(self.user_id)
+        # Create 4 rooms
+        room_ids = [
+            self.helper.create_room_as(self.user_id, is_public=True) for _ in range(4)
+        ]
+
+        joiner_user_id = self.register_user("joiner", "secret")
+        # Now make a new user try to join some of them.
 
-        self.helper.create_room_as(self.user_id, expect_code=429)
+        # The user can join 3 rooms
+        for room_id in room_ids[0:3]:
+            self.helper.join(room_id, joiner_user_id)
+
+        # But the user cannot join a 4th room
+        self.helper.join(
+            room_ids[3], joiner_user_id, expect_code=HTTPStatus.TOO_MANY_REQUESTS
+        )
 
     @unittest.override_config(
         {"rc_joins": {"local": {"per_second": 0.5, "burst_count": 3}}}
-- 
cgit 1.5.1


From 81815e0561eea91dbf0c29731589fac2e6f98a40 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 28 Oct 2022 11:44:10 -0400
Subject: Switch search SQL to triple-quote strings. (#14311)

For ease of reading we switch from concatenated strings to
triple quote strings.
---
 changelog.d/14311.feature                |   1 +
 synapse/storage/databases/main/search.py | 188 ++++++++++++++++---------------
 2 files changed, 100 insertions(+), 89 deletions(-)
 create mode 100644 changelog.d/14311.feature

(limited to 'synapse')

diff --git a/changelog.d/14311.feature b/changelog.d/14311.feature
new file mode 100644
index 0000000000..94c8a83212
--- /dev/null
+++ b/changelog.d/14311.feature
@@ -0,0 +1 @@
+Allow use of postgres and sqllite full-text search operators in search queries.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 594b935614..e9588d1755 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -80,11 +80,11 @@ class SearchWorkerStore(SQLBaseStore):
         if not self.hs.config.server.enable_search:
             return
         if isinstance(self.database_engine, PostgresEngine):
-            sql = (
-                "INSERT INTO event_search"
-                " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)"
-                " VALUES (?,?,?,to_tsvector('english', ?),?,?)"
-            )
+            sql = """
+            INSERT INTO event_search
+            (event_id, room_id, key, vector, stream_ordering, origin_server_ts)
+            VALUES (?,?,?,to_tsvector('english', ?),?,?)
+            """
 
             args1 = (
                 (
@@ -101,20 +101,20 @@ class SearchWorkerStore(SQLBaseStore):
             txn.execute_batch(sql, args1)
 
         elif isinstance(self.database_engine, Sqlite3Engine):
-            sql = (
-                "INSERT INTO event_search (event_id, room_id, key, value)"
-                " VALUES (?,?,?,?)"
-            )
-            args2 = (
-                (
-                    entry.event_id,
-                    entry.room_id,
-                    entry.key,
-                    _clean_value_for_search(entry.value),
-                )
-                for entry in entries
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="event_search",
+                keys=("event_id", "room_id", "key", "value"),
+                values=(
+                    (
+                        entry.event_id,
+                        entry.room_id,
+                        entry.key,
+                        _clean_value_for_search(entry.value),
+                    )
+                    for entry in entries
+                ),
             )
-            txn.execute_batch(sql, args2)
 
         else:
             # This should be unreachable.
@@ -162,15 +162,17 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
         TYPES = ["m.room.name", "m.room.message", "m.room.topic"]
 
         def reindex_search_txn(txn: LoggingTransaction) -> int:
-            sql = (
-                "SELECT stream_ordering, event_id, room_id, type, json, "
-                " origin_server_ts FROM events"
-                " JOIN event_json USING (room_id, event_id)"
-                " WHERE ? <= stream_ordering AND stream_ordering < ?"
-                " AND (%s)"
-                " ORDER BY stream_ordering DESC"
-                " LIMIT ?"
-            ) % (" OR ".join("type = '%s'" % (t,) for t in TYPES),)
+            sql = """
+            SELECT stream_ordering, event_id, room_id, type, json, origin_server_ts
+            FROM events
+            JOIN event_json USING (room_id, event_id)
+            WHERE ? <= stream_ordering AND stream_ordering < ?
+            AND (%s)
+            ORDER BY stream_ordering DESC
+            LIMIT ?
+            """ % (
+                " OR ".join("type = '%s'" % (t,) for t in TYPES),
+            )
 
             txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
 
@@ -284,8 +286,10 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
 
                 try:
                     c.execute(
-                        "CREATE INDEX CONCURRENTLY event_search_fts_idx"
-                        " ON event_search USING GIN (vector)"
+                        """
+                        CREATE INDEX CONCURRENTLY event_search_fts_idx
+                        ON event_search USING GIN (vector)
+                        """
                     )
                 except psycopg2.ProgrammingError as e:
                     logger.warning(
@@ -323,12 +327,16 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
                 # We create with NULLS FIRST so that when we search *backwards*
                 # we get the ones with non null origin_server_ts *first*
                 c.execute(
-                    "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search("
-                    "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
+                    """
+                    CREATE INDEX CONCURRENTLY event_search_room_order
+                    ON event_search(room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)
+                    """
                 )
                 c.execute(
-                    "CREATE INDEX CONCURRENTLY event_search_order ON event_search("
-                    "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
+                    """
+                    CREATE INDEX CONCURRENTLY event_search_order
+                    ON event_search(origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)
+                    """
                 )
                 conn.set_session(autocommit=False)
 
@@ -345,14 +353,14 @@ class SearchBackgroundUpdateStore(SearchWorkerStore):
             )
 
         def reindex_search_txn(txn: LoggingTransaction) -> Tuple[int, bool]:
-            sql = (
-                "UPDATE event_search AS es SET stream_ordering = e.stream_ordering,"
-                " origin_server_ts = e.origin_server_ts"
-                " FROM events AS e"
-                " WHERE e.event_id = es.event_id"
-                " AND ? <= e.stream_ordering AND e.stream_ordering < ?"
-                " RETURNING es.stream_ordering"
-            )
+            sql = """
+            UPDATE event_search AS es
+            SET stream_ordering = e.stream_ordering, origin_server_ts = e.origin_server_ts
+            FROM events AS e
+            WHERE e.event_id = es.event_id
+            AND ? <= e.stream_ordering AND e.stream_ordering < ?
+            RETURNING es.stream_ordering
+            """
 
             min_stream_id = max_stream_id - batch_size
             txn.execute(sql, (min_stream_id, max_stream_id))
@@ -456,33 +464,33 @@ class SearchStore(SearchBackgroundUpdateStore):
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
             tsquery_func = self.database_engine.tsquery_func
-            sql = (
-                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,"
-                " room_id, event_id"
-                " FROM event_search"
-                f" WHERE vector @@  {tsquery_func}('english', ?)"
-            )
+            sql = f"""
+            SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,
+            room_id, event_id
+            FROM event_search
+            WHERE vector @@  {tsquery_func}('english', ?)
+            """
             args = [search_query, search_query] + args
 
-            count_sql = (
-                "SELECT room_id, count(*) as count FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?)"
-            )
+            count_sql = f"""
+            SELECT room_id, count(*) as count FROM event_search
+            WHERE vector @@ {tsquery_func}('english', ?)
+            """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
             search_query = _parse_query_for_sqlite(search_term)
 
-            sql = (
-                "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id"
-                " FROM event_search"
-                " WHERE value MATCH ?"
-            )
+            sql = """
+            SELECT rank(matchinfo(event_search)) as rank, room_id, event_id
+            FROM event_search
+            WHERE value MATCH ?
+            """
             args = [search_query] + args
 
-            count_sql = (
-                "SELECT room_id, count(*) as count FROM event_search"
-                " WHERE value MATCH ?"
-            )
+            count_sql = """
+            SELECT room_id, count(*) as count FROM event_search
+            WHERE value MATCH ?
+            """
             count_args = [search_query] + count_args
         else:
             # This should be unreachable.
@@ -588,26 +596,27 @@ class SearchStore(SearchBackgroundUpdateStore):
                 raise SynapseError(400, "Invalid pagination token")
 
             clauses.append(
-                "(origin_server_ts < ?"
-                " OR (origin_server_ts = ? AND stream_ordering < ?))"
+                """
+                (origin_server_ts < ? OR (origin_server_ts = ? AND stream_ordering < ?))
+                """
             )
             args.extend([origin_server_ts, origin_server_ts, stream])
 
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
             tsquery_func = self.database_engine.tsquery_func
-            sql = (
-                f"SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,"
-                " origin_server_ts, stream_ordering, room_id, event_id"
-                " FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
-            )
+            sql = f"""
+            SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,
+            origin_server_ts, stream_ordering, room_id, event_id
+            FROM event_search
+            WHERE vector @@ {tsquery_func}('english', ?) AND
+            """
             args = [search_query, search_query] + args
 
-            count_sql = (
-                "SELECT room_id, count(*) as count FROM event_search"
-                f" WHERE vector @@ {tsquery_func}('english', ?) AND "
-            )
+            count_sql = f"""
+            SELECT room_id, count(*) as count FROM event_search
+            WHERE vector @@ {tsquery_func}('english', ?) AND
+            """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
 
@@ -619,23 +628,24 @@ class SearchStore(SearchBackgroundUpdateStore):
             # in the events table to get the topological ordering. We need
             # to use the indexes in this order because sqlite refuses to
             # MATCH unless it uses the full text search index
-            sql = (
-                "SELECT rank(matchinfo) as rank, room_id, event_id,"
-                " origin_server_ts, stream_ordering"
-                " FROM (SELECT key, event_id, matchinfo(event_search) as matchinfo"
-                " FROM event_search"
-                " WHERE value MATCH ?"
-                " )"
-                " CROSS JOIN events USING (event_id)"
-                " WHERE "
+            sql = """
+            SELECT
+                rank(matchinfo) as rank, room_id, event_id, origin_server_ts, stream_ordering
+            FROM (
+                SELECT key, event_id, matchinfo(event_search) as matchinfo
+                FROM event_search
+                WHERE value MATCH ?
             )
+            CROSS JOIN events USING (event_id)
+            WHERE
+            """
             search_query = _parse_query_for_sqlite(search_term)
             args = [search_query] + args
 
-            count_sql = (
-                "SELECT room_id, count(*) as count FROM event_search"
-                " WHERE value MATCH ? AND "
-            )
+            count_sql = """
+            SELECT room_id, count(*) as count FROM event_search
+            WHERE value MATCH ? AND
+            """
             count_args = [search_query] + count_args
         else:
             # This should be unreachable.
@@ -647,10 +657,10 @@ class SearchStore(SearchBackgroundUpdateStore):
         # We add an arbitrary limit here to ensure we don't try to pull the
         # entire table from the database.
         if isinstance(self.database_engine, PostgresEngine):
-            sql += (
-                " ORDER BY origin_server_ts DESC NULLS LAST,"
-                " stream_ordering DESC NULLS LAST LIMIT ?"
-            )
+            sql += """
+            ORDER BY origin_server_ts DESC NULLS LAST, stream_ordering DESC NULLS LAST
+            LIMIT ?
+            """
         elif isinstance(self.database_engine, Sqlite3Engine):
             sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?"
         else:
-- 
cgit 1.5.1


From 730b13dbc9e48181b1aaf38be870ec21364b1e9c Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 28 Oct 2022 17:04:02 +0100
Subject: Improve `RawHeaders` type hints (#14303)

---
 changelog.d/14303.misc        |  1 +
 synapse/app/generic_worker.py |  8 ++++----
 synapse/http/client.py        | 24 +++++++++++++++++++-----
 3 files changed, 24 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14303.misc

(limited to 'synapse')

diff --git a/changelog.d/14303.misc b/changelog.d/14303.misc
new file mode 100644
index 0000000000..24ce238223
--- /dev/null
+++ b/changelog.d/14303.misc
@@ -0,0 +1 @@
+Improve type hinting of `RawHeaders`.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 2a9f039367..cb5892f041 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -178,13 +178,13 @@ class KeyUploadServlet(RestServlet):
             # Proxy headers from the original request, such as the auth headers
             # (in case the access token is there) and the original IP /
             # User-Agent of the request.
-            headers = {
-                header: request.requestHeaders.getRawHeaders(header, [])
+            headers: Dict[bytes, List[bytes]] = {
+                header: list(request.requestHeaders.getRawHeaders(header, []))
                 for header in (b"Authorization", b"User-Agent")
             }
             # Add the previous hop to the X-Forwarded-For header.
-            x_forwarded_for = request.requestHeaders.getRawHeaders(
-                b"X-Forwarded-For", []
+            x_forwarded_for = list(
+                request.requestHeaders.getRawHeaders(b"X-Forwarded-For", [])
             )
             # we use request.client here, since we want the previous hop, not the
             # original client (as returned by request.getClientAddress()).
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 084d0a5b84..4eb740c040 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -25,7 +25,6 @@ from typing import (
     List,
     Mapping,
     Optional,
-    Sequence,
     Tuple,
     Union,
 )
@@ -90,14 +89,29 @@ incoming_responses_counter = Counter(
     "synapse_http_client_responses", "", ["method", "code"]
 )
 
-# the type of the headers list, to be passed to the t.w.h.Headers.
-# Actually we can mix str and bytes keys, but Mapping treats 'key' as invariant so
-# we simplify.
+# the type of the headers map, to be passed to the t.w.h.Headers.
+#
+# The actual type accepted by Twisted is
+#   Mapping[Union[str, bytes], Sequence[Union[str, bytes]] ,
+# allowing us to mix and match str and bytes freely. However: any str is also a
+# Sequence[str]; passing a header string value which is a
+# standalone str is interpreted as a sequence of 1-codepoint strings. This is a disastrous footgun.
+# We use a narrower value type (RawHeaderValue) to avoid this footgun.
+#
+# We also simplify the keys to be either all str or all bytes. This helps because
+# Dict[K, V] is invariant in K (and indeed V).
 RawHeaders = Union[Mapping[str, "RawHeaderValue"], Mapping[bytes, "RawHeaderValue"]]
 
 # the value actually has to be a List, but List is invariant so we can't specify that
 # the entries can either be Lists or bytes.
-RawHeaderValue = Sequence[Union[str, bytes]]
+RawHeaderValue = Union[
+    List[str],
+    List[bytes],
+    List[Union[str, bytes]],
+    Tuple[str, ...],
+    Tuple[bytes, ...],
+    Tuple[Union[str, bytes], ...],
+]
 
 
 def check_against_blacklist(
-- 
cgit 1.5.1


From 7911e2835df7b4bf1dec98b09da89beda65e2ab2 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 28 Oct 2022 18:06:02 +0100
Subject: Prevent federation user keys query from returning device names if
 disallowed (#14304)

---
 changelog.d/14304.bugfix                          |  1 +
 synapse/handlers/e2e_keys.py                      | 37 ++++++++++++++++++++---
 synapse/storage/databases/main/end_to_end_keys.py | 17 ++++++++---
 3 files changed, 46 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14304.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14304.bugfix b/changelog.d/14304.bugfix
new file mode 100644
index 0000000000..b8d4d91034
--- /dev/null
+++ b/changelog.d/14304.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.34.0 where device names would be returned via a federation user key query request when `allow_device_name_lookup_over_federation` was set to `false`.
\ No newline at end of file
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 09a2492afc..a9912c467d 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -49,6 +49,7 @@ logger = logging.getLogger(__name__)
 
 class E2eKeysHandler:
     def __init__(self, hs: "HomeServer"):
+        self.config = hs.config
         self.store = hs.get_datastores().main
         self.federation = hs.get_federation_client()
         self.device_handler = hs.get_device_handler()
@@ -431,13 +432,17 @@ class E2eKeysHandler:
     @trace
     @cancellable
     async def query_local_devices(
-        self, query: Mapping[str, Optional[List[str]]]
+        self,
+        query: Mapping[str, Optional[List[str]]],
+        include_displaynames: bool = True,
     ) -> Dict[str, Dict[str, dict]]:
         """Get E2E device keys for local users
 
         Args:
             query: map from user_id to a list
                  of devices to query (None for all devices)
+            include_displaynames: Whether to include device displaynames in the returned
+                device details.
 
         Returns:
             A map from user_id -> device_id -> device details
@@ -469,7 +474,9 @@ class E2eKeysHandler:
             # make sure that each queried user appears in the result dict
             result_dict[user_id] = {}
 
-        results = await self.store.get_e2e_device_keys_for_cs_api(local_query)
+        results = await self.store.get_e2e_device_keys_for_cs_api(
+            local_query, include_displaynames
+        )
 
         # Build the result structure
         for user_id, device_keys in results.items():
@@ -482,11 +489,33 @@ class E2eKeysHandler:
     async def on_federation_query_client_keys(
         self, query_body: Dict[str, Dict[str, Optional[List[str]]]]
     ) -> JsonDict:
-        """Handle a device key query from a federated server"""
+        """Handle a device key query from a federated server:
+
+        Handles the path: GET /_matrix/federation/v1/users/keys/query
+
+        Args:
+            query_body: The body of the query request. Should contain a key
+                "device_keys" that map to a dictionary of user ID's -> list of
+                device IDs. If the list of device IDs is empty, all devices of
+                that user will be queried.
+
+        Returns:
+            A json dictionary containing the following:
+                - device_keys: A dictionary containing the requested device information.
+                - master_keys: An optional dictionary of user ID -> master cross-signing
+                   key info.
+                - self_signing_key: An optional dictionary of user ID -> self-signing
+                    key info.
+        """
         device_keys_query: Dict[str, Optional[List[str]]] = query_body.get(
             "device_keys", {}
         )
-        res = await self.query_local_devices(device_keys_query)
+        res = await self.query_local_devices(
+            device_keys_query,
+            include_displaynames=(
+                self.config.federation.allow_device_name_lookup_over_federation
+            ),
+        )
         ret = {"device_keys": res}
 
         # add in the cross-signing keys
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 8a10ae800c..2a4f58ed92 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -139,11 +139,15 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     @trace
     @cancellable
     async def get_e2e_device_keys_for_cs_api(
-        self, query_list: List[Tuple[str, Optional[str]]]
+        self,
+        query_list: List[Tuple[str, Optional[str]]],
+        include_displaynames: bool = True,
     ) -> Dict[str, Dict[str, JsonDict]]:
         """Fetch a list of device keys, formatted suitably for the C/S API.
         Args:
-            query_list(list): List of pairs of user_ids and device_ids.
+            query_list: List of pairs of user_ids and device_ids.
+            include_displaynames: Whether to include the displayname of returned devices
+                (if one exists).
         Returns:
             Dict mapping from user-id to dict mapping from device_id to
             key data.  The key data will be a dict in the same format as the
@@ -166,9 +170,12 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                     continue
 
                 r["unsigned"] = {}
-                display_name = device_info.display_name
-                if display_name is not None:
-                    r["unsigned"]["device_display_name"] = display_name
+                if include_displaynames:
+                    # Include the device's display name in the "unsigned" dictionary
+                    display_name = device_info.display_name
+                    if display_name is not None:
+                        r["unsigned"]["device_display_name"] = display_name
+
                 rv[user_id][device_id] = r
 
         return rv
-- 
cgit 1.5.1


From 2bb2c32e8ed5642a5bf3ba1e8c49e10cecc88905 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 31 Oct 2022 13:02:07 +0000
Subject: Avoid incrementing bg process utime/stime counters by negative
 durations (#14323)

---
 changelog.d/14323.bugfix                         |   1 +
 mypy.ini                                         |   4 +-
 synapse/metrics/background_process_metrics.py    |   6 +-
 tests/metrics/__init__.py                        |   0
 tests/metrics/test_background_process_metrics.py |  19 +++
 tests/metrics/test_metrics.py                    | 206 +++++++++++++++++++++++
 tests/test_metrics.py                            | 200 ----------------------
 7 files changed, 233 insertions(+), 203 deletions(-)
 create mode 100644 changelog.d/14323.bugfix
 create mode 100644 tests/metrics/__init__.py
 create mode 100644 tests/metrics/test_background_process_metrics.py
 create mode 100644 tests/metrics/test_metrics.py
 delete mode 100644 tests/test_metrics.py

(limited to 'synapse')

diff --git a/changelog.d/14323.bugfix b/changelog.d/14323.bugfix
new file mode 100644
index 0000000000..da39bc020c
--- /dev/null
+++ b/changelog.d/14323.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 0.34.0rc2 where logs could include error spam when background processes are measured as taking a negative amount of time.
diff --git a/mypy.ini b/mypy.ini
index 34b4523e00..8f1141a239 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -56,7 +56,6 @@ exclude = (?x)
    |tests/rest/media/v1/test_media_storage.py
    |tests/server.py
    |tests/server_notices/test_resource_limits_server_notices.py
-   |tests/test_metrics.py
    |tests/test_state.py
    |tests/test_terms_auth.py
    |tests/util/caches/test_cached_call.py
@@ -106,6 +105,9 @@ disallow_untyped_defs = False
 [mypy-tests.handlers.test_user_directory]
 disallow_untyped_defs = True
 
+[mypy-tests.metrics.test_background_process_metrics]
+disallow_untyped_defs = True
+
 [mypy-tests.push.test_bulk_push_rule_evaluator]
 disallow_untyped_defs = True
 
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index 7a1516d3a8..9ea4e23b31 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -174,8 +174,10 @@ class _BackgroundProcess:
             diff = new_stats - self._reported_stats
         self._reported_stats = new_stats
 
-        _background_process_ru_utime.labels(self.desc).inc(diff.ru_utime)
-        _background_process_ru_stime.labels(self.desc).inc(diff.ru_stime)
+        # For unknown reasons, the difference in times can be negative. See comment in
+        # synapse.http.request_metrics.RequestMetrics.update_metrics.
+        _background_process_ru_utime.labels(self.desc).inc(max(diff.ru_utime, 0))
+        _background_process_ru_stime.labels(self.desc).inc(max(diff.ru_stime, 0))
         _background_process_db_txn_count.labels(self.desc).inc(diff.db_txn_count)
         _background_process_db_txn_duration.labels(self.desc).inc(
             diff.db_txn_duration_sec
diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/metrics/test_background_process_metrics.py b/tests/metrics/test_background_process_metrics.py
new file mode 100644
index 0000000000..f0f6cb2912
--- /dev/null
+++ b/tests/metrics/test_background_process_metrics.py
@@ -0,0 +1,19 @@
+from unittest import TestCase as StdlibTestCase
+from unittest.mock import Mock
+
+from synapse.logging.context import ContextResourceUsage, LoggingContext
+from synapse.metrics.background_process_metrics import _BackgroundProcess
+
+
+class TestBackgroundProcessMetrics(StdlibTestCase):
+    def test_update_metrics_with_negative_time_diff(self) -> None:
+        """We should ignore negative reported utime and stime differences"""
+        usage = ContextResourceUsage()
+        usage.ru_stime = usage.ru_utime = -1.0
+
+        mock_logging_context = Mock(spec=LoggingContext)
+        mock_logging_context.get_resource_usage.return_value = usage
+
+        process = _BackgroundProcess("test process", mock_logging_context)
+        # Should not raise
+        process.update_metrics()
diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py
new file mode 100644
index 0000000000..bddc4228bc
--- /dev/null
+++ b/tests/metrics/test_metrics.py
@@ -0,0 +1,206 @@
+# Copyright 2018 New Vector Ltd
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing_extensions import Protocol
+
+try:
+    from importlib import metadata
+except ImportError:
+    import importlib_metadata as metadata  # type: ignore[no-redef]
+
+from unittest.mock import patch
+
+from pkg_resources import parse_version
+
+from synapse.app._base import _set_prometheus_client_use_created_metrics
+from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
+from synapse.util.caches.deferred_cache import DeferredCache
+
+from tests import unittest
+
+
+def get_sample_labels_value(sample):
+    """Extract the labels and values of a sample.
+
+    prometheus_client 0.5 changed the sample type to a named tuple with more
+    members than the plain tuple had in 0.4 and earlier. This function can
+    extract the labels and value from the sample for both sample types.
+
+    Args:
+        sample: The sample to get the labels and value from.
+    Returns:
+        A tuple of (labels, value) from the sample.
+    """
+
+    # If the sample has a labels and value attribute, use those.
+    if hasattr(sample, "labels") and hasattr(sample, "value"):
+        return sample.labels, sample.value
+    # Otherwise fall back to treating it as a plain 3 tuple.
+    else:
+        _, labels, value = sample
+        return labels, value
+
+
+class TestMauLimit(unittest.TestCase):
+    def test_basic(self):
+        class MetricEntry(Protocol):
+            foo: int
+            bar: int
+
+        gauge: InFlightGauge[MetricEntry] = InFlightGauge(
+            "test1", "", labels=["test_label"], sub_metrics=["foo", "bar"]
+        )
+
+        def handle1(metrics):
+            metrics.foo += 2
+            metrics.bar = max(metrics.bar, 5)
+
+        def handle2(metrics):
+            metrics.foo += 3
+            metrics.bar = max(metrics.bar, 7)
+
+        gauge.register(("key1",), handle1)
+
+        self.assert_dict(
+            {
+                "test1_total": {("key1",): 1},
+                "test1_foo": {("key1",): 2},
+                "test1_bar": {("key1",): 5},
+            },
+            self.get_metrics_from_gauge(gauge),
+        )
+
+        gauge.unregister(("key1",), handle1)
+
+        self.assert_dict(
+            {
+                "test1_total": {("key1",): 0},
+                "test1_foo": {("key1",): 0},
+                "test1_bar": {("key1",): 0},
+            },
+            self.get_metrics_from_gauge(gauge),
+        )
+
+        gauge.register(("key1",), handle1)
+        gauge.register(("key2",), handle2)
+
+        self.assert_dict(
+            {
+                "test1_total": {("key1",): 1, ("key2",): 1},
+                "test1_foo": {("key1",): 2, ("key2",): 3},
+                "test1_bar": {("key1",): 5, ("key2",): 7},
+            },
+            self.get_metrics_from_gauge(gauge),
+        )
+
+        gauge.unregister(("key2",), handle2)
+        gauge.register(("key1",), handle2)
+
+        self.assert_dict(
+            {
+                "test1_total": {("key1",): 2, ("key2",): 0},
+                "test1_foo": {("key1",): 5, ("key2",): 0},
+                "test1_bar": {("key1",): 7, ("key2",): 0},
+            },
+            self.get_metrics_from_gauge(gauge),
+        )
+
+    def get_metrics_from_gauge(self, gauge):
+        results = {}
+
+        for r in gauge.collect():
+            results[r.name] = {
+                tuple(labels[x] for x in gauge.labels): value
+                for labels, value in map(get_sample_labels_value, r.samples)
+            }
+
+        return results
+
+
+class BuildInfoTests(unittest.TestCase):
+    def test_get_build(self):
+        """
+        The synapse_build_info metric reports the OS version, Python version,
+        and Synapse version.
+        """
+        items = list(
+            filter(
+                lambda x: b"synapse_build_info{" in x,
+                generate_latest(REGISTRY).split(b"\n"),
+            )
+        )
+        self.assertEqual(len(items), 1)
+        self.assertTrue(b"osversion=" in items[0])
+        self.assertTrue(b"pythonversion=" in items[0])
+        self.assertTrue(b"version=" in items[0])
+
+
+class CacheMetricsTests(unittest.HomeserverTestCase):
+    def test_cache_metric(self):
+        """
+        Caches produce metrics reflecting their state when scraped.
+        """
+        CACHE_NAME = "cache_metrics_test_fgjkbdfg"
+        cache: DeferredCache[str, str] = DeferredCache(CACHE_NAME, max_entries=777)
+
+        items = {
+            x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
+            for x in filter(
+                lambda x: b"cache_metrics_test_fgjkbdfg" in x,
+                generate_latest(REGISTRY).split(b"\n"),
+            )
+        }
+
+        self.assertEqual(items["synapse_util_caches_cache_size"], "0.0")
+        self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
+
+        cache.prefill("1", "hi")
+
+        items = {
+            x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
+            for x in filter(
+                lambda x: b"cache_metrics_test_fgjkbdfg" in x,
+                generate_latest(REGISTRY).split(b"\n"),
+            )
+        }
+
+        self.assertEqual(items["synapse_util_caches_cache_size"], "1.0")
+        self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
+
+
+class PrometheusMetricsHackTestCase(unittest.HomeserverTestCase):
+    if parse_version(metadata.version("prometheus_client")) < parse_version("0.14.0"):
+        skip = "prometheus-client too old"
+
+    def test_created_metrics_disabled(self) -> None:
+        """
+        Tests that a brittle hack, to disable `_created` metrics, works.
+        This involves poking at the internals of prometheus-client.
+        It's not the end of the world if this doesn't work.
+
+        This test gives us a way to notice if prometheus-client changes
+        their internals.
+        """
+        import prometheus_client.metrics
+
+        PRIVATE_FLAG_NAME = "_use_created"
+
+        # By default, the pesky `_created` metrics are enabled.
+        # Check this assumption is still valid.
+        self.assertTrue(getattr(prometheus_client.metrics, PRIVATE_FLAG_NAME))
+
+        with patch("prometheus_client.metrics") as mock:
+            setattr(mock, PRIVATE_FLAG_NAME, True)
+            _set_prometheus_client_use_created_metrics(False)
+            self.assertFalse(getattr(mock, PRIVATE_FLAG_NAME, False))
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
deleted file mode 100644
index 1a70eddc9b..0000000000
--- a/tests/test_metrics.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# Copyright 2018 New Vector Ltd
-# Copyright 2019 Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-try:
-    from importlib import metadata
-except ImportError:
-    import importlib_metadata as metadata  # type: ignore[no-redef]
-
-from unittest.mock import patch
-
-from pkg_resources import parse_version
-
-from synapse.app._base import _set_prometheus_client_use_created_metrics
-from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
-from synapse.util.caches.deferred_cache import DeferredCache
-
-from tests import unittest
-
-
-def get_sample_labels_value(sample):
-    """Extract the labels and values of a sample.
-
-    prometheus_client 0.5 changed the sample type to a named tuple with more
-    members than the plain tuple had in 0.4 and earlier. This function can
-    extract the labels and value from the sample for both sample types.
-
-    Args:
-        sample: The sample to get the labels and value from.
-    Returns:
-        A tuple of (labels, value) from the sample.
-    """
-
-    # If the sample has a labels and value attribute, use those.
-    if hasattr(sample, "labels") and hasattr(sample, "value"):
-        return sample.labels, sample.value
-    # Otherwise fall back to treating it as a plain 3 tuple.
-    else:
-        _, labels, value = sample
-        return labels, value
-
-
-class TestMauLimit(unittest.TestCase):
-    def test_basic(self):
-        gauge = InFlightGauge(
-            "test1", "", labels=["test_label"], sub_metrics=["foo", "bar"]
-        )
-
-        def handle1(metrics):
-            metrics.foo += 2
-            metrics.bar = max(metrics.bar, 5)
-
-        def handle2(metrics):
-            metrics.foo += 3
-            metrics.bar = max(metrics.bar, 7)
-
-        gauge.register(("key1",), handle1)
-
-        self.assert_dict(
-            {
-                "test1_total": {("key1",): 1},
-                "test1_foo": {("key1",): 2},
-                "test1_bar": {("key1",): 5},
-            },
-            self.get_metrics_from_gauge(gauge),
-        )
-
-        gauge.unregister(("key1",), handle1)
-
-        self.assert_dict(
-            {
-                "test1_total": {("key1",): 0},
-                "test1_foo": {("key1",): 0},
-                "test1_bar": {("key1",): 0},
-            },
-            self.get_metrics_from_gauge(gauge),
-        )
-
-        gauge.register(("key1",), handle1)
-        gauge.register(("key2",), handle2)
-
-        self.assert_dict(
-            {
-                "test1_total": {("key1",): 1, ("key2",): 1},
-                "test1_foo": {("key1",): 2, ("key2",): 3},
-                "test1_bar": {("key1",): 5, ("key2",): 7},
-            },
-            self.get_metrics_from_gauge(gauge),
-        )
-
-        gauge.unregister(("key2",), handle2)
-        gauge.register(("key1",), handle2)
-
-        self.assert_dict(
-            {
-                "test1_total": {("key1",): 2, ("key2",): 0},
-                "test1_foo": {("key1",): 5, ("key2",): 0},
-                "test1_bar": {("key1",): 7, ("key2",): 0},
-            },
-            self.get_metrics_from_gauge(gauge),
-        )
-
-    def get_metrics_from_gauge(self, gauge):
-        results = {}
-
-        for r in gauge.collect():
-            results[r.name] = {
-                tuple(labels[x] for x in gauge.labels): value
-                for labels, value in map(get_sample_labels_value, r.samples)
-            }
-
-        return results
-
-
-class BuildInfoTests(unittest.TestCase):
-    def test_get_build(self):
-        """
-        The synapse_build_info metric reports the OS version, Python version,
-        and Synapse version.
-        """
-        items = list(
-            filter(
-                lambda x: b"synapse_build_info{" in x,
-                generate_latest(REGISTRY).split(b"\n"),
-            )
-        )
-        self.assertEqual(len(items), 1)
-        self.assertTrue(b"osversion=" in items[0])
-        self.assertTrue(b"pythonversion=" in items[0])
-        self.assertTrue(b"version=" in items[0])
-
-
-class CacheMetricsTests(unittest.HomeserverTestCase):
-    def test_cache_metric(self):
-        """
-        Caches produce metrics reflecting their state when scraped.
-        """
-        CACHE_NAME = "cache_metrics_test_fgjkbdfg"
-        cache = DeferredCache(CACHE_NAME, max_entries=777)
-
-        items = {
-            x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
-            for x in filter(
-                lambda x: b"cache_metrics_test_fgjkbdfg" in x,
-                generate_latest(REGISTRY).split(b"\n"),
-            )
-        }
-
-        self.assertEqual(items["synapse_util_caches_cache_size"], "0.0")
-        self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
-
-        cache.prefill("1", "hi")
-
-        items = {
-            x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")
-            for x in filter(
-                lambda x: b"cache_metrics_test_fgjkbdfg" in x,
-                generate_latest(REGISTRY).split(b"\n"),
-            )
-        }
-
-        self.assertEqual(items["synapse_util_caches_cache_size"], "1.0")
-        self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0")
-
-
-class PrometheusMetricsHackTestCase(unittest.HomeserverTestCase):
-    if parse_version(metadata.version("prometheus_client")) < parse_version("0.14.0"):
-        skip = "prometheus-client too old"
-
-    def test_created_metrics_disabled(self) -> None:
-        """
-        Tests that a brittle hack, to disable `_created` metrics, works.
-        This involves poking at the internals of prometheus-client.
-        It's not the end of the world if this doesn't work.
-
-        This test gives us a way to notice if prometheus-client changes
-        their internals.
-        """
-        import prometheus_client.metrics
-
-        PRIVATE_FLAG_NAME = "_use_created"
-
-        # By default, the pesky `_created` metrics are enabled.
-        # Check this assumption is still valid.
-        self.assertTrue(getattr(prometheus_client.metrics, PRIVATE_FLAG_NAME))
-
-        with patch("prometheus_client.metrics") as mock:
-            setattr(mock, PRIVATE_FLAG_NAME, True)
-            _set_prometheus_client_use_created_metrics(False)
-            self.assertFalse(getattr(mock, PRIVATE_FLAG_NAME, False))
-- 
cgit 1.5.1


From cc3a52b33df72bb4230367536b924a6d1f510d36 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Mon, 31 Oct 2022 18:07:30 +0100
Subject: Support OIDC backchannel logouts (#11414)

If configured an OIDC IdP can log a user's session out of
Synapse when they log out of the identity provider.

The IdP sends a request directly to Synapse (and must be
configured with an endpoint) when a user logs out.
---
 changelog.d/11414.feature                          |   1 +
 docs/openid.md                                     |  14 +
 docs/usage/configuration/config_documentation.md   |   9 +
 synapse/config/oidc.py                             |  12 +
 synapse/handlers/oidc.py                           | 381 ++++++++++++++++++--
 synapse/handlers/sso.py                            |  71 ++++
 synapse/rest/synapse/client/oidc/__init__.py       |   4 +
 .../client/oidc/backchannel_logout_resource.py     |  35 ++
 synapse/storage/databases/main/registration.py     |  21 ++
 tests/rest/client/test_auth.py                     | 390 +++++++++++++++++++--
 tests/rest/client/utils.py                         |  55 ++-
 tests/server.py                                    |   6 +
 tests/test_utils/oidc.py                           |  27 +-
 13 files changed, 960 insertions(+), 66 deletions(-)
 create mode 100644 changelog.d/11414.feature
 create mode 100644 synapse/rest/synapse/client/oidc/backchannel_logout_resource.py

(limited to 'synapse')

diff --git a/changelog.d/11414.feature b/changelog.d/11414.feature
new file mode 100644
index 0000000000..fc035e50a7
--- /dev/null
+++ b/changelog.d/11414.feature
@@ -0,0 +1 @@
+Support back-channel logouts from OpenID Connect providers.
diff --git a/docs/openid.md b/docs/openid.md
index 87ebea4c29..37c5eb244d 100644
--- a/docs/openid.md
+++ b/docs/openid.md
@@ -49,6 +49,13 @@ setting in your configuration file.
 See the [configuration manual](usage/configuration/config_documentation.md#oidc_providers) for some sample settings, as well as
 the text below for example configurations for specific providers.
 
+## OIDC Back-Channel Logout
+
+Synapse supports receiving [OpenID Connect Back-Channel Logout](https://openid.net/specs/openid-connect-backchannel-1_0.html) notifications.
+
+This lets the OpenID Connect Provider notify Synapse when a user logs out, so that Synapse can end that user session.
+This feature can be enabled by setting the `backchannel_logout_enabled` property to `true` in the provider configuration, and setting the following URL as destination for Back-Channel Logout notifications in your OpenID Connect Provider: `[synapse public baseurl]/_synapse/client/oidc/backchannel_logout`
+
 ## Sample configs
 
 Here are a few configs for providers that should work with Synapse.
@@ -123,6 +130,9 @@ oidc_providers:
 
 [Keycloak][keycloak-idp] is an opensource IdP maintained by Red Hat.
 
+Keycloak supports OIDC Back-Channel Logout, which sends logout notification to Synapse, so that Synapse users get logged out when they log out from Keycloak.
+This can be optionally enabled by setting `backchannel_logout_enabled` to `true` in the Synapse configuration, and by setting the "Backchannel Logout URL" in Keycloak.
+
 Follow the [Getting Started Guide](https://www.keycloak.org/getting-started) to install Keycloak and set up a realm.
 
 1. Click `Clients` in the sidebar and click `Create`
@@ -144,6 +154,8 @@ Follow the [Getting Started Guide](https://www.keycloak.org/getting-started) to
 | Client Protocol | `openid-connect` |
 | Access Type | `confidential` |
 | Valid Redirect URIs | `[synapse public baseurl]/_synapse/client/oidc/callback` |
+| Backchannel Logout URL (optional) | `[synapse public baseurl]/_synapse/client/oidc/backchannel_logout` |
+| Backchannel Logout Session Required (optional) | `On` |
 
 5. Click `Save`
 6. On the Credentials tab, update the fields:
@@ -167,7 +179,9 @@ oidc_providers:
       config:
         localpart_template: "{{ user.preferred_username }}"
         display_name_template: "{{ user.name }}"
+    backchannel_logout_enabled: true # Optional
 ```
+
 ### Auth0
 
 [Auth0][auth0] is a hosted SaaS IdP solution.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 97fb505a5f..44358faf59 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3021,6 +3021,15 @@ Options for each entry include:
      which is set to the claims returned by the UserInfo Endpoint and/or
      in the ID Token.
 
+* `backchannel_logout_enabled`: set to `true` to process OIDC Back-Channel Logout notifications. 
+  Those notifications are expected to be received on `/_synapse/client/oidc/backchannel_logout`.
+  Defaults to `false`.
+
+* `backchannel_logout_ignore_sub`: by default, the OIDC Back-Channel Logout feature checks that the
+  `sub` claim matches the subject claim received during login. This check can be disabled by setting
+  this to `true`. Defaults to `false`.
+
+  You might want to disable this if the `subject_claim` returned by the mapping provider is not `sub`.
 
 It is possible to configure Synapse to only allow logins if certain attributes
 match particular values in the OIDC userinfo. The requirements can be listed under
diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py
index 5418a332da..0bd83f4010 100644
--- a/synapse/config/oidc.py
+++ b/synapse/config/oidc.py
@@ -123,6 +123,8 @@ OIDC_PROVIDER_CONFIG_SCHEMA = {
         "userinfo_endpoint": {"type": "string"},
         "jwks_uri": {"type": "string"},
         "skip_verification": {"type": "boolean"},
+        "backchannel_logout_enabled": {"type": "boolean"},
+        "backchannel_logout_ignore_sub": {"type": "boolean"},
         "user_profile_method": {
             "type": "string",
             "enum": ["auto", "userinfo_endpoint"],
@@ -292,6 +294,10 @@ def _parse_oidc_config_dict(
         token_endpoint=oidc_config.get("token_endpoint"),
         userinfo_endpoint=oidc_config.get("userinfo_endpoint"),
         jwks_uri=oidc_config.get("jwks_uri"),
+        backchannel_logout_enabled=oidc_config.get("backchannel_logout_enabled", False),
+        backchannel_logout_ignore_sub=oidc_config.get(
+            "backchannel_logout_ignore_sub", False
+        ),
         skip_verification=oidc_config.get("skip_verification", False),
         user_profile_method=oidc_config.get("user_profile_method", "auto"),
         allow_existing_users=oidc_config.get("allow_existing_users", False),
@@ -368,6 +374,12 @@ class OidcProviderConfig:
     # "openid" scope is used.
     jwks_uri: Optional[str]
 
+    # Whether Synapse should react to backchannel logouts
+    backchannel_logout_enabled: bool
+
+    # Whether Synapse should ignore the `sub` claim in backchannel logouts or not.
+    backchannel_logout_ignore_sub: bool
+
     # Whether to skip metadata verification
     skip_verification: bool
 
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index 9759daf043..867973dcca 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -12,14 +12,28 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import binascii
 import inspect
+import json
 import logging
-from typing import TYPE_CHECKING, Any, Dict, Generic, List, Optional, TypeVar, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Generic,
+    List,
+    Optional,
+    Type,
+    TypeVar,
+    Union,
+)
 from urllib.parse import urlencode, urlparse
 
 import attr
+import unpaddedbase64
 from authlib.common.security import generate_token
-from authlib.jose import JsonWebToken, jwt
+from authlib.jose import JsonWebToken, JWTClaims
+from authlib.jose.errors import InvalidClaimError, JoseError, MissingClaimError
 from authlib.oauth2.auth import ClientAuth
 from authlib.oauth2.rfc6749.parameters import prepare_grant_uri
 from authlib.oidc.core import CodeIDToken, UserInfo
@@ -35,9 +49,12 @@ from typing_extensions import TypedDict
 from twisted.web.client import readBody
 from twisted.web.http_headers import Headers
 
+from synapse.api.errors import SynapseError
 from synapse.config import ConfigError
 from synapse.config.oidc import OidcProviderClientSecretJwtKey, OidcProviderConfig
 from synapse.handlers.sso import MappingException, UserAttributes
+from synapse.http.server import finish_request
+from synapse.http.servlet import parse_string
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import make_deferred_yieldable
 from synapse.types import JsonDict, UserID, map_username_to_mxid_localpart
@@ -88,6 +105,8 @@ class Token(TypedDict):
 #: there is no real point of doing this in our case.
 JWK = Dict[str, str]
 
+C = TypeVar("C")
+
 
 #: A JWK Set, as per RFC7517 sec 5.
 class JWKS(TypedDict):
@@ -247,6 +266,80 @@ class OidcHandler:
 
         await oidc_provider.handle_oidc_callback(request, session_data, code)
 
+    async def handle_backchannel_logout(self, request: SynapseRequest) -> None:
+        """Handle an incoming request to /_synapse/client/oidc/backchannel_logout
+
+        This extracts the logout_token from the request and tries to figure out
+        which OpenID Provider it is comming from. This works by matching the iss claim
+        with the issuer and the aud claim with the client_id.
+
+        Since at this point we don't know who signed the JWT, we can't just
+        decode it using authlib since it will always verifies the signature. We
+        have to decode it manually without validating the signature. The actual JWT
+        verification is done in the `OidcProvider.handler_backchannel_logout` method,
+        once we figured out which provider sent the request.
+
+        Args:
+            request: the incoming request from the browser.
+        """
+        logout_token = parse_string(request, "logout_token")
+        if logout_token is None:
+            raise SynapseError(400, "Missing logout_token in request")
+
+        # A JWT looks like this:
+        #    header.payload.signature
+        # where all parts are encoded with urlsafe base64.
+        # The aud and iss claims we care about are in the payload part, which
+        # is a JSON object.
+        try:
+            # By destructuring the list after splitting, we ensure that we have
+            # exactly 3 segments
+            _, payload, _ = logout_token.split(".")
+        except ValueError:
+            raise SynapseError(400, "Invalid logout_token in request")
+
+        try:
+            payload_bytes = unpaddedbase64.decode_base64(payload)
+            claims = json_decoder.decode(payload_bytes.decode("utf-8"))
+        except (json.JSONDecodeError, binascii.Error, UnicodeError):
+            raise SynapseError(400, "Invalid logout_token payload in request")
+
+        try:
+            # Let's extract the iss and aud claims
+            iss = claims["iss"]
+            aud = claims["aud"]
+            # The aud claim can be either a string or a list of string. Here we
+            # normalize it as a list of strings.
+            if isinstance(aud, str):
+                aud = [aud]
+
+            # Check that we have the right types for the aud and the iss claims
+            if not isinstance(iss, str) or not isinstance(aud, list):
+                raise TypeError()
+            for a in aud:
+                if not isinstance(a, str):
+                    raise TypeError()
+
+            # At this point we properly checked both claims types
+            issuer: str = iss
+            audience: List[str] = aud
+        except (TypeError, KeyError):
+            raise SynapseError(400, "Invalid issuer/audience in logout_token")
+
+        # Now that we know the audience and the issuer, we can figure out from
+        # what provider it is coming from
+        oidc_provider: Optional[OidcProvider] = None
+        for provider in self._providers.values():
+            if provider.issuer == issuer and provider.client_id in audience:
+                oidc_provider = provider
+                break
+
+        if oidc_provider is None:
+            raise SynapseError(400, "Could not find the OP that issued this event")
+
+        # Ask the provider to handle the logout request.
+        await oidc_provider.handle_backchannel_logout(request, logout_token)
+
 
 class OidcError(Exception):
     """Used to catch errors when calling the token_endpoint"""
@@ -342,6 +435,7 @@ class OidcProvider:
         self.idp_brand = provider.idp_brand
 
         self._sso_handler = hs.get_sso_handler()
+        self._device_handler = hs.get_device_handler()
 
         self._sso_handler.register_identity_provider(self)
 
@@ -400,6 +494,41 @@ class OidcProvider:
             # If we're not using userinfo, we need a valid jwks to validate the ID token
             m.validate_jwks_uri()
 
+        if self._config.backchannel_logout_enabled:
+            if not m.get("backchannel_logout_supported", False):
+                logger.warning(
+                    "OIDC Back-Channel Logout is enabled for issuer %r"
+                    "but it does not advertise support for it",
+                    self.issuer,
+                )
+
+            elif not m.get("backchannel_logout_session_supported", False):
+                logger.warning(
+                    "OIDC Back-Channel Logout is enabled and supported "
+                    "by issuer %r but it might not send a session ID with "
+                    "logout tokens, which is required for the logouts to work",
+                    self.issuer,
+                )
+
+            if not self._config.backchannel_logout_ignore_sub:
+                # If OIDC backchannel logouts are enabled, the provider mapping provider
+                # should use the `sub` claim. We verify that by mapping a dumb user and
+                # see if we get back the sub claim
+                user = UserInfo({"sub": "thisisasubject"})
+                try:
+                    subject = self._user_mapping_provider.get_remote_user_id(user)
+                    if subject != user["sub"]:
+                        raise ValueError("Unexpected subject")
+                except Exception:
+                    logger.warning(
+                        f"OIDC Back-Channel Logout is enabled for issuer {self.issuer!r} "
+                        "but it looks like the configured `user_mapping_provider` "
+                        "does not use the `sub` claim as subject. If it is the case, "
+                        "and you want Synapse to ignore the `sub` claim in OIDC "
+                        "Back-Channel Logouts, set `backchannel_logout_ignore_sub` "
+                        "to `true` in the issuer config."
+                    )
+
     @property
     def _uses_userinfo(self) -> bool:
         """Returns True if the ``userinfo_endpoint`` should be used.
@@ -415,6 +544,16 @@ class OidcProvider:
             or self._user_profile_method == "userinfo_endpoint"
         )
 
+    @property
+    def issuer(self) -> str:
+        """The issuer identifying this provider."""
+        return self._config.issuer
+
+    @property
+    def client_id(self) -> str:
+        """The client_id used when interacting with this provider."""
+        return self._config.client_id
+
     async def load_metadata(self, force: bool = False) -> OpenIDProviderMetadata:
         """Return the provider metadata.
 
@@ -662,6 +801,59 @@ class OidcProvider:
 
         return UserInfo(resp)
 
+    async def _verify_jwt(
+        self,
+        alg_values: List[str],
+        token: str,
+        claims_cls: Type[C],
+        claims_options: Optional[dict] = None,
+        claims_params: Optional[dict] = None,
+    ) -> C:
+        """Decode and validate a JWT, re-fetching the JWKS as needed.
+
+        Args:
+            alg_values: list of `alg` values allowed when verifying the JWT.
+            token: the JWT.
+            claims_cls: the JWTClaims class to use to validate the claims.
+            claims_options: dict of options passed to the `claims_cls` constructor.
+            claims_params: dict of params passed to the `claims_cls` constructor.
+
+        Returns:
+            The decoded claims in the JWT.
+        """
+        jwt = JsonWebToken(alg_values)
+
+        logger.debug("Attempting to decode JWT (%s) %r", claims_cls.__name__, token)
+
+        # Try to decode the keys in cache first, then retry by forcing the keys
+        # to be reloaded
+        jwk_set = await self.load_jwks()
+        try:
+            claims = jwt.decode(
+                token,
+                key=jwk_set,
+                claims_cls=claims_cls,
+                claims_options=claims_options,
+                claims_params=claims_params,
+            )
+        except ValueError:
+            logger.info("Reloading JWKS after decode error")
+            jwk_set = await self.load_jwks(force=True)  # try reloading the jwks
+            claims = jwt.decode(
+                token,
+                key=jwk_set,
+                claims_cls=claims_cls,
+                claims_options=claims_options,
+                claims_params=claims_params,
+            )
+
+        logger.debug("Decoded JWT (%s) %r; validating", claims_cls.__name__, claims)
+
+        claims.validate(
+            now=self._clock.time(), leeway=120
+        )  # allows 2 min of clock skew
+        return claims
+
     async def _parse_id_token(self, token: Token, nonce: str) -> CodeIDToken:
         """Return an instance of UserInfo from token's ``id_token``.
 
@@ -675,13 +867,13 @@ class OidcProvider:
             The decoded claims in the ID token.
         """
         id_token = token.get("id_token")
-        logger.debug("Attempting to decode JWT id_token %r", id_token)
 
         # That has been theoritically been checked by the caller, so even though
         # assertion are not enabled in production, it is mainly here to appease mypy
         assert id_token is not None
 
         metadata = await self.load_metadata()
+
         claims_params = {
             "nonce": nonce,
             "client_id": self._client_auth.client_id,
@@ -691,38 +883,17 @@ class OidcProvider:
             # in the `id_token` that we can check against.
             claims_params["access_token"] = token["access_token"]
 
-        alg_values = metadata.get("id_token_signing_alg_values_supported", ["RS256"])
-        jwt = JsonWebToken(alg_values)
-
-        claim_options = {"iss": {"values": [metadata["issuer"]]}}
+        claims_options = {"iss": {"values": [metadata["issuer"]]}}
 
-        # Try to decode the keys in cache first, then retry by forcing the keys
-        # to be reloaded
-        jwk_set = await self.load_jwks()
-        try:
-            claims = jwt.decode(
-                id_token,
-                key=jwk_set,
-                claims_cls=CodeIDToken,
-                claims_options=claim_options,
-                claims_params=claims_params,
-            )
-        except ValueError:
-            logger.info("Reloading JWKS after decode error")
-            jwk_set = await self.load_jwks(force=True)  # try reloading the jwks
-            claims = jwt.decode(
-                id_token,
-                key=jwk_set,
-                claims_cls=CodeIDToken,
-                claims_options=claim_options,
-                claims_params=claims_params,
-            )
-
-        logger.debug("Decoded id_token JWT %r; validating", claims)
+        alg_values = metadata.get("id_token_signing_alg_values_supported", ["RS256"])
 
-        claims.validate(
-            now=self._clock.time(), leeway=120
-        )  # allows 2 min of clock skew
+        claims = await self._verify_jwt(
+            alg_values=alg_values,
+            token=id_token,
+            claims_cls=CodeIDToken,
+            claims_options=claims_options,
+            claims_params=claims_params,
+        )
 
         return claims
 
@@ -1043,6 +1214,146 @@ class OidcProvider:
         # to be strings.
         return str(remote_user_id)
 
+    async def handle_backchannel_logout(
+        self, request: SynapseRequest, logout_token: str
+    ) -> None:
+        """Handle an incoming request to /_synapse/client/oidc/backchannel_logout
+
+        The OIDC Provider posts a logout token to this endpoint when a user
+        session ends. That token is a JWT signed with the same keys as
+        ID tokens. The OpenID Connect Back-Channel Logout draft explains how to
+        validate the JWT and figure out what session to end.
+
+        Args:
+            request: The request to respond to
+            logout_token: The logout token (a JWT) extracted from the request body
+        """
+        # Back-Channel Logout can be disabled in the config, hence this check.
+        # This is not that important for now since Synapse is registered
+        # manually to the OP, so not specifying the backchannel-logout URI is
+        # as effective than disabling it here. It might make more sense if we
+        # support dynamic registration in Synapse at some point.
+        if not self._config.backchannel_logout_enabled:
+            logger.warning(
+                f"Received an OIDC Back-Channel Logout request from issuer {self.issuer!r} but it is disabled in config"
+            )
+
+            # TODO: this responds with a 400 status code, which is what the OIDC
+            # Back-Channel Logout spec expects, but spec also suggests answering with
+            # a JSON object, with the `error` and `error_description` fields set, which
+            # we are not doing here.
+            # See https://openid.net/specs/openid-connect-backchannel-1_0.html#BCResponse
+            raise SynapseError(
+                400, "OpenID Connect Back-Channel Logout is disabled for this provider"
+            )
+
+        metadata = await self.load_metadata()
+
+        # As per OIDC Back-Channel Logout 1.0 sec. 2.4:
+        #   A Logout Token MUST be signed and MAY also be encrypted. The same
+        #   keys are used to sign and encrypt Logout Tokens as are used for ID
+        #   Tokens. If the Logout Token is encrypted, it SHOULD replicate the
+        #   iss (issuer) claim in the JWT Header Parameters, as specified in
+        #   Section 5.3 of [JWT].
+        alg_values = metadata.get("id_token_signing_alg_values_supported", ["RS256"])
+
+        # As per sec. 2.6:
+        #    3. Validate the iss, aud, and iat Claims in the same way they are
+        #       validated in ID Tokens.
+        # Which means the audience should contain Synapse's client_id and the
+        # issuer should be the IdP issuer
+        claims_options = {
+            "iss": {"values": [metadata["issuer"]]},
+            "aud": {"values": [self.client_id]},
+        }
+
+        try:
+            claims = await self._verify_jwt(
+                alg_values=alg_values,
+                token=logout_token,
+                claims_cls=LogoutToken,
+                claims_options=claims_options,
+            )
+        except JoseError:
+            logger.exception("Invalid logout_token")
+            raise SynapseError(400, "Invalid logout_token")
+
+        # As per sec. 2.6:
+        #    4. Verify that the Logout Token contains a sub Claim, a sid Claim,
+        #       or both.
+        #    5. Verify that the Logout Token contains an events Claim whose
+        #       value is JSON object containing the member name
+        #       http://schemas.openid.net/event/backchannel-logout.
+        #    6. Verify that the Logout Token does not contain a nonce Claim.
+        # This is all verified by the LogoutToken claims class, so at this
+        # point the `sid` claim exists and is a string.
+        sid: str = claims.get("sid")
+
+        # If the `sub` claim was included in the logout token, we check that it matches
+        # that it matches the right user. We can have cases where the `sub` claim is not
+        # the ID saved in database, so we let admins disable this check in config.
+        sub: Optional[str] = claims.get("sub")
+        expected_user_id: Optional[str] = None
+        if sub is not None and not self._config.backchannel_logout_ignore_sub:
+            expected_user_id = await self._store.get_user_by_external_id(
+                self.idp_id, sub
+            )
+
+        # Invalidate any running user-mapping sessions, in-flight login tokens and
+        # active devices
+        await self._sso_handler.revoke_sessions_for_provider_session_id(
+            auth_provider_id=self.idp_id,
+            auth_provider_session_id=sid,
+            expected_user_id=expected_user_id,
+        )
+
+        request.setResponseCode(200)
+        request.setHeader(b"Cache-Control", b"no-cache, no-store")
+        request.setHeader(b"Pragma", b"no-cache")
+        finish_request(request)
+
+
+class LogoutToken(JWTClaims):
+    """
+    Holds and verify claims of a logout token, as per
+    https://openid.net/specs/openid-connect-backchannel-1_0.html#LogoutToken
+    """
+
+    REGISTERED_CLAIMS = ["iss", "sub", "aud", "iat", "jti", "events", "sid"]
+
+    def validate(self, now: Optional[int] = None, leeway: int = 0) -> None:
+        """Validate everything in claims payload."""
+        super().validate(now, leeway)
+        self.validate_sid()
+        self.validate_events()
+        self.validate_nonce()
+
+    def validate_sid(self) -> None:
+        """Ensure the sid claim is present"""
+        sid = self.get("sid")
+        if not sid:
+            raise MissingClaimError("sid")
+
+        if not isinstance(sid, str):
+            raise InvalidClaimError("sid")
+
+    def validate_nonce(self) -> None:
+        """Ensure the nonce claim is absent"""
+        if "nonce" in self:
+            raise InvalidClaimError("nonce")
+
+    def validate_events(self) -> None:
+        """Ensure the events claim is present and with the right value"""
+        events = self.get("events")
+        if not events:
+            raise MissingClaimError("events")
+
+        if not isinstance(events, dict):
+            raise InvalidClaimError("events")
+
+        if "http://schemas.openid.net/event/backchannel-logout" not in events:
+            raise InvalidClaimError("events")
+
 
 # number of seconds a newly-generated client secret should be valid for
 CLIENT_SECRET_VALIDITY_SECONDS = 3600
@@ -1112,6 +1423,7 @@ class JwtClientSecret:
         logger.info(
             "Generating new JWT for %s: %s %s", self._oauth_issuer, header, payload
         )
+        jwt = JsonWebToken(header["alg"])
         self._cached_secret = jwt.encode(header, payload, self._key.key)
         self._cached_secret_replacement_time = (
             expires_at - CLIENT_SECRET_MIN_VALIDITY_SECONDS
@@ -1126,9 +1438,6 @@ class UserAttributeDict(TypedDict):
     emails: List[str]
 
 
-C = TypeVar("C")
-
-
 class OidcMappingProvider(Generic[C]):
     """A mapping provider maps a UserInfo object to user attributes.
 
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 5943f08e91..749d7e93b0 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -191,6 +191,7 @@ class SsoHandler:
         self._server_name = hs.hostname
         self._registration_handler = hs.get_registration_handler()
         self._auth_handler = hs.get_auth_handler()
+        self._device_handler = hs.get_device_handler()
         self._error_template = hs.config.sso.sso_error_template
         self._bad_user_template = hs.config.sso.sso_auth_bad_user_template
         self._profile_handler = hs.get_profile_handler()
@@ -1026,6 +1027,76 @@ class SsoHandler:
 
         return True
 
+    async def revoke_sessions_for_provider_session_id(
+        self,
+        auth_provider_id: str,
+        auth_provider_session_id: str,
+        expected_user_id: Optional[str] = None,
+    ) -> None:
+        """Revoke any devices and in-flight logins tied to a provider session.
+
+        Args:
+            auth_provider_id: A unique identifier for this SSO provider, e.g.
+                "oidc" or "saml".
+            auth_provider_session_id: The session ID from the provider to logout
+            expected_user_id: The user we're expecting to logout. If set, it will ignore
+                sessions belonging to other users and log an error.
+        """
+        # Invalidate any running user-mapping sessions
+        to_delete = []
+        for session_id, session in self._username_mapping_sessions.items():
+            if (
+                session.auth_provider_id == auth_provider_id
+                and session.auth_provider_session_id == auth_provider_session_id
+            ):
+                to_delete.append(session_id)
+
+        for session_id in to_delete:
+            logger.info("Revoking mapping session %s", session_id)
+            del self._username_mapping_sessions[session_id]
+
+        # Invalidate any in-flight login tokens
+        await self._store.invalidate_login_tokens_by_session_id(
+            auth_provider_id=auth_provider_id,
+            auth_provider_session_id=auth_provider_session_id,
+        )
+
+        # Fetch any device(s) in the store associated with the session ID.
+        devices = await self._store.get_devices_by_auth_provider_session_id(
+            auth_provider_id=auth_provider_id,
+            auth_provider_session_id=auth_provider_session_id,
+        )
+
+        # We have no guarantee that all the devices of that session are for the same
+        # `user_id`. Hence, we have to iterate over the list of devices and log them out
+        # one by one.
+        for device in devices:
+            user_id = device["user_id"]
+            device_id = device["device_id"]
+
+            # If the user_id associated with that device/session is not the one we got
+            # out of the `sub` claim, skip that device and show log an error.
+            if expected_user_id is not None and user_id != expected_user_id:
+                logger.error(
+                    "Received a logout notification from SSO provider "
+                    f"{auth_provider_id!r} for the user {expected_user_id!r}, but with "
+                    f"a session ID ({auth_provider_session_id!r}) which belongs to "
+                    f"{user_id!r}. This may happen when the SSO provider user mapper "
+                    "uses something else than the standard attribute as mapping ID. "
+                    "For OIDC providers, set `backchannel_logout_ignore_sub` to `true` "
+                    "in the provider config if that is the case."
+                )
+                continue
+
+            logger.info(
+                "Logging out %r (device %r) via SSO (%r) logout notification (session %r).",
+                user_id,
+                device_id,
+                auth_provider_id,
+                auth_provider_session_id,
+            )
+            await self._device_handler.delete_devices(user_id, [device_id])
+
 
 def get_username_mapping_session_cookie_from_request(request: IRequest) -> str:
     """Extract the session ID from the cookie
diff --git a/synapse/rest/synapse/client/oidc/__init__.py b/synapse/rest/synapse/client/oidc/__init__.py
index 81fec39659..e4b28ce3df 100644
--- a/synapse/rest/synapse/client/oidc/__init__.py
+++ b/synapse/rest/synapse/client/oidc/__init__.py
@@ -17,6 +17,9 @@ from typing import TYPE_CHECKING
 
 from twisted.web.resource import Resource
 
+from synapse.rest.synapse.client.oidc.backchannel_logout_resource import (
+    OIDCBackchannelLogoutResource,
+)
 from synapse.rest.synapse.client.oidc.callback_resource import OIDCCallbackResource
 
 if TYPE_CHECKING:
@@ -29,6 +32,7 @@ class OIDCResource(Resource):
     def __init__(self, hs: "HomeServer"):
         Resource.__init__(self)
         self.putChild(b"callback", OIDCCallbackResource(hs))
+        self.putChild(b"backchannel_logout", OIDCBackchannelLogoutResource(hs))
 
 
 __all__ = ["OIDCResource"]
diff --git a/synapse/rest/synapse/client/oidc/backchannel_logout_resource.py b/synapse/rest/synapse/client/oidc/backchannel_logout_resource.py
new file mode 100644
index 0000000000..e07e76855a
--- /dev/null
+++ b/synapse/rest/synapse/client/oidc/backchannel_logout_resource.py
@@ -0,0 +1,35 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING
+
+from synapse.http.server import DirectServeJsonResource
+from synapse.http.site import SynapseRequest
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class OIDCBackchannelLogoutResource(DirectServeJsonResource):
+    isLeaf = 1
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self._oidc_handler = hs.get_oidc_handler()
+
+    async def _async_render_POST(self, request: SynapseRequest) -> None:
+        await self._oidc_handler.handle_backchannel_logout(request)
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 0255295317..5167089e03 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -1920,6 +1920,27 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             self._clock.time_msec(),
         )
 
+    async def invalidate_login_tokens_by_session_id(
+        self, auth_provider_id: str, auth_provider_session_id: str
+    ) -> None:
+        """Invalidate login tokens with the given IdP session ID.
+
+        Args:
+            auth_provider_id: The SSO Identity Provider that the user authenticated with
+                to get this token
+            auth_provider_session_id: The session ID advertised by the SSO Identity
+                Provider
+        """
+        await self.db_pool.simple_update(
+            table="login_tokens",
+            keyvalues={
+                "auth_provider_id": auth_provider_id,
+                "auth_provider_session_id": auth_provider_session_id,
+            },
+            updatevalues={"used_ts": self._clock.time_msec()},
+            desc="invalidate_login_tokens_by_session_id",
+        )
+
     @cached()
     async def is_guest(self, user_id: str) -> bool:
         res = await self.db_pool.simple_select_one_onecol(
diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py
index ebf653d018..847294dc8e 100644
--- a/tests/rest/client/test_auth.py
+++ b/tests/rest/client/test_auth.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
 from http import HTTPStatus
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -21,7 +22,7 @@ from twisted.web.resource import Resource
 
 import synapse.rest.admin
 from synapse.api.constants import ApprovalNoticeMedium, LoginType
-from synapse.api.errors import Codes
+from synapse.api.errors import Codes, SynapseError
 from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker
 from synapse.rest.client import account, auth, devices, login, logout, register
 from synapse.rest.synapse.client import build_synapse_client_resource_tree
@@ -32,8 +33,8 @@ from synapse.util import Clock
 
 from tests import unittest
 from tests.handlers.test_oidc import HAS_OIDC
-from tests.rest.client.utils import TEST_OIDC_CONFIG
-from tests.server import FakeChannel
+from tests.rest.client.utils import TEST_OIDC_CONFIG, TEST_OIDC_ISSUER
+from tests.server import FakeChannel, make_request
 from tests.unittest import override_config, skip_unless
 
 
@@ -638,19 +639,6 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
             {"refresh_token": refresh_token},
         )
 
-    def is_access_token_valid(self, access_token: str) -> bool:
-        """
-        Checks whether an access token is valid, returning whether it is or not.
-        """
-        code = self.make_request(
-            "GET", "/_matrix/client/v3/account/whoami", access_token=access_token
-        ).code
-
-        # Either 200 or 401 is what we get back; anything else is a bug.
-        assert code in {HTTPStatus.OK, HTTPStatus.UNAUTHORIZED}
-
-        return code == HTTPStatus.OK
-
     def test_login_issue_refresh_token(self) -> None:
         """
         A login response should include a refresh_token only if asked.
@@ -847,29 +835,37 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
         self.reactor.advance(59.0)
 
         # Both tokens should still be valid.
-        self.assertTrue(self.is_access_token_valid(refreshable_access_token))
-        self.assertTrue(self.is_access_token_valid(nonrefreshable_access_token))
+        self.helper.whoami(refreshable_access_token, expect_code=HTTPStatus.OK)
+        self.helper.whoami(nonrefreshable_access_token, expect_code=HTTPStatus.OK)
 
         # Advance to 61 s (just past 1 minute, the time of expiry)
         self.reactor.advance(2.0)
 
         # Only the non-refreshable token is still valid.
-        self.assertFalse(self.is_access_token_valid(refreshable_access_token))
-        self.assertTrue(self.is_access_token_valid(nonrefreshable_access_token))
+        self.helper.whoami(
+            refreshable_access_token, expect_code=HTTPStatus.UNAUTHORIZED
+        )
+        self.helper.whoami(nonrefreshable_access_token, expect_code=HTTPStatus.OK)
 
         # Advance to 599 s (just shy of 10 minutes, the time of expiry)
         self.reactor.advance(599.0 - 61.0)
 
         # It's still the case that only the non-refreshable token is still valid.
-        self.assertFalse(self.is_access_token_valid(refreshable_access_token))
-        self.assertTrue(self.is_access_token_valid(nonrefreshable_access_token))
+        self.helper.whoami(
+            refreshable_access_token, expect_code=HTTPStatus.UNAUTHORIZED
+        )
+        self.helper.whoami(nonrefreshable_access_token, expect_code=HTTPStatus.OK)
 
         # Advance to 601 s (just past 10 minutes, the time of expiry)
         self.reactor.advance(2.0)
 
         # Now neither token is valid.
-        self.assertFalse(self.is_access_token_valid(refreshable_access_token))
-        self.assertFalse(self.is_access_token_valid(nonrefreshable_access_token))
+        self.helper.whoami(
+            refreshable_access_token, expect_code=HTTPStatus.UNAUTHORIZED
+        )
+        self.helper.whoami(
+            nonrefreshable_access_token, expect_code=HTTPStatus.UNAUTHORIZED
+        )
 
     @override_config(
         {"refreshable_access_token_lifetime": "1m", "refresh_token_lifetime": "2m"}
@@ -1165,3 +1161,349 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
         # and no refresh token
         self.assertEqual(_table_length("access_tokens"), 0)
         self.assertEqual(_table_length("refresh_tokens"), 0)
+
+
+def oidc_config(
+    id: str, with_localpart_template: bool, **kwargs: Any
+) -> Dict[str, Any]:
+    """Sample OIDC provider config used in backchannel logout tests.
+
+    Args:
+        id: IDP ID for this provider
+        with_localpart_template: Set to `true` to have a default localpart_template in
+            the `user_mapping_provider` config and skip the user mapping session
+        **kwargs: rest of the config
+
+    Returns:
+        A dict suitable for the `oidc_config` or the `oidc_providers[]` parts of
+        the HS config
+    """
+    config: Dict[str, Any] = {
+        "idp_id": id,
+        "idp_name": id,
+        "issuer": TEST_OIDC_ISSUER,
+        "client_id": "test-client-id",
+        "client_secret": "test-client-secret",
+        "scopes": ["openid"],
+    }
+
+    if with_localpart_template:
+        config["user_mapping_provider"] = {
+            "config": {"localpart_template": "{{ user.sub }}"}
+        }
+    else:
+        config["user_mapping_provider"] = {"config": {}}
+
+    config.update(kwargs)
+
+    return config
+
+
+@skip_unless(HAS_OIDC, "Requires OIDC")
+class OidcBackchannelLogoutTests(unittest.HomeserverTestCase):
+    servlets = [
+        account.register_servlets,
+        login.register_servlets,
+    ]
+
+    def default_config(self) -> Dict[str, Any]:
+        config = super().default_config()
+
+        # public_baseurl uses an http:// scheme because FakeChannel.isSecure() returns
+        # False, so synapse will see the requested uri as http://..., so using http in
+        # the public_baseurl stops Synapse trying to redirect to https.
+        config["public_baseurl"] = "http://synapse.test"
+
+        return config
+
+    def create_resource_dict(self) -> Dict[str, Resource]:
+        resource_dict = super().create_resource_dict()
+        resource_dict.update(build_synapse_client_resource_tree(self.hs))
+        return resource_dict
+
+    def submit_logout_token(self, logout_token: str) -> FakeChannel:
+        return self.make_request(
+            "POST",
+            "/_synapse/client/oidc/backchannel_logout",
+            content=f"logout_token={logout_token}",
+            content_is_form=True,
+        )
+
+    @override_config(
+        {
+            "oidc_providers": [
+                oidc_config(
+                    id="oidc",
+                    with_localpart_template=True,
+                    backchannel_logout_enabled=True,
+                )
+            ]
+        }
+    )
+    def test_simple_logout(self) -> None:
+        """
+        Receiving a logout token should logout the user
+        """
+        fake_oidc_server = self.helper.fake_oidc_server()
+        user = "john"
+
+        login_resp, first_grant = self.helper.login_via_oidc(
+            fake_oidc_server, user, with_sid=True
+        )
+        first_access_token: str = login_resp["access_token"]
+        self.helper.whoami(first_access_token, expect_code=HTTPStatus.OK)
+
+        login_resp, second_grant = self.helper.login_via_oidc(
+            fake_oidc_server, user, with_sid=True
+        )
+        second_access_token: str = login_resp["access_token"]
+        self.helper.whoami(second_access_token, expect_code=HTTPStatus.OK)
+
+        self.assertNotEqual(first_grant.sid, second_grant.sid)
+        self.assertEqual(first_grant.userinfo["sub"], second_grant.userinfo["sub"])
+
+        # Logging out of the first session
+        logout_token = fake_oidc_server.generate_logout_token(first_grant)
+        channel = self.submit_logout_token(logout_token)
+        self.assertEqual(channel.code, 200)
+
+        self.helper.whoami(first_access_token, expect_code=HTTPStatus.UNAUTHORIZED)
+        self.helper.whoami(second_access_token, expect_code=HTTPStatus.OK)
+
+        # Logging out of the second session
+        logout_token = fake_oidc_server.generate_logout_token(second_grant)
+        channel = self.submit_logout_token(logout_token)
+        self.assertEqual(channel.code, 200)
+
+    @override_config(
+        {
+            "oidc_providers": [
+                oidc_config(
+                    id="oidc",
+                    with_localpart_template=True,
+                    backchannel_logout_enabled=True,
+                )
+            ]
+        }
+    )
+    def test_logout_during_login(self) -> None:
+        """
+        It should revoke login tokens when receiving a logout token
+        """
+        fake_oidc_server = self.helper.fake_oidc_server()
+        user = "john"
+
+        # Get an authentication, and logout before submitting the logout token
+        client_redirect_url = "https://x"
+        userinfo = {"sub": user}
+        channel, grant = self.helper.auth_via_oidc(
+            fake_oidc_server,
+            userinfo,
+            client_redirect_url,
+            with_sid=True,
+        )
+
+        # expect a confirmation page
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
+
+        # fish the matrix login token out of the body of the confirmation page
+        m = re.search(
+            'a href="%s.*loginToken=([^"]*)"' % (client_redirect_url,),
+            channel.text_body,
+        )
+        assert m, channel.text_body
+        login_token = m.group(1)
+
+        # Submit a logout
+        logout_token = fake_oidc_server.generate_logout_token(grant)
+        channel = self.submit_logout_token(logout_token)
+        self.assertEqual(channel.code, 200)
+
+        # Now try to exchange the login token
+        channel = make_request(
+            self.hs.get_reactor(),
+            self.site,
+            "POST",
+            "/login",
+            content={"type": "m.login.token", "token": login_token},
+        )
+        # It should have failed
+        self.assertEqual(channel.code, 403)
+
+    @override_config(
+        {
+            "oidc_providers": [
+                oidc_config(
+                    id="oidc",
+                    with_localpart_template=False,
+                    backchannel_logout_enabled=True,
+                )
+            ]
+        }
+    )
+    def test_logout_during_mapping(self) -> None:
+        """
+        It should stop ongoing user mapping session when receiving a logout token
+        """
+        fake_oidc_server = self.helper.fake_oidc_server()
+        user = "john"
+
+        # Get an authentication, and logout before submitting the logout token
+        client_redirect_url = "https://x"
+        userinfo = {"sub": user}
+        channel, grant = self.helper.auth_via_oidc(
+            fake_oidc_server,
+            userinfo,
+            client_redirect_url,
+            with_sid=True,
+        )
+
+        # Expect a user mapping page
+        self.assertEqual(channel.code, HTTPStatus.FOUND, channel.result)
+
+        # We should have a user_mapping_session cookie
+        cookie_headers = channel.headers.getRawHeaders("Set-Cookie")
+        assert cookie_headers
+        cookies: Dict[str, str] = {}
+        for h in cookie_headers:
+            key, value = h.split(";")[0].split("=", maxsplit=1)
+            cookies[key] = value
+
+        user_mapping_session_id = cookies["username_mapping_session"]
+
+        # Getting that session should not raise
+        session = self.hs.get_sso_handler().get_mapping_session(user_mapping_session_id)
+        self.assertIsNotNone(session)
+
+        # Submit a logout
+        logout_token = fake_oidc_server.generate_logout_token(grant)
+        channel = self.submit_logout_token(logout_token)
+        self.assertEqual(channel.code, 200)
+
+        # Now it should raise
+        with self.assertRaises(SynapseError):
+            self.hs.get_sso_handler().get_mapping_session(user_mapping_session_id)
+
+    @override_config(
+        {
+            "oidc_providers": [
+                oidc_config(
+                    id="oidc",
+                    with_localpart_template=True,
+                    backchannel_logout_enabled=False,
+                )
+            ]
+        }
+    )
+    def test_disabled(self) -> None:
+        """
+        Receiving a logout token should do nothing if it is disabled in the config
+        """
+        fake_oidc_server = self.helper.fake_oidc_server()
+        user = "john"
+
+        login_resp, grant = self.helper.login_via_oidc(
+            fake_oidc_server, user, with_sid=True
+        )
+        access_token: str = login_resp["access_token"]
+        self.helper.whoami(access_token, expect_code=HTTPStatus.OK)
+
+        # Logging out shouldn't work
+        logout_token = fake_oidc_server.generate_logout_token(grant)
+        channel = self.submit_logout_token(logout_token)
+        self.assertEqual(channel.code, 400)
+
+        # And the token should still be valid
+        self.helper.whoami(access_token, expect_code=HTTPStatus.OK)
+
+    @override_config(
+        {
+            "oidc_providers": [
+                oidc_config(
+                    id="oidc",
+                    with_localpart_template=True,
+                    backchannel_logout_enabled=True,
+                )
+            ]
+        }
+    )
+    def test_no_sid(self) -> None:
+        """
+        Receiving a logout token without `sid` during the login should do nothing
+        """
+        fake_oidc_server = self.helper.fake_oidc_server()
+        user = "john"
+
+        login_resp, grant = self.helper.login_via_oidc(
+            fake_oidc_server, user, with_sid=False
+        )
+        access_token: str = login_resp["access_token"]
+        self.helper.whoami(access_token, expect_code=HTTPStatus.OK)
+
+        # Logging out shouldn't work
+        logout_token = fake_oidc_server.generate_logout_token(grant)
+        channel = self.submit_logout_token(logout_token)
+        self.assertEqual(channel.code, 400)
+
+        # And the token should still be valid
+        self.helper.whoami(access_token, expect_code=HTTPStatus.OK)
+
+    @override_config(
+        {
+            "oidc_providers": [
+                oidc_config(
+                    "first",
+                    issuer="https://first-issuer.com/",
+                    with_localpart_template=True,
+                    backchannel_logout_enabled=True,
+                ),
+                oidc_config(
+                    "second",
+                    issuer="https://second-issuer.com/",
+                    with_localpart_template=True,
+                    backchannel_logout_enabled=True,
+                ),
+            ]
+        }
+    )
+    def test_multiple_providers(self) -> None:
+        """
+        It should be able to distinguish login tokens from two different IdPs
+        """
+        first_server = self.helper.fake_oidc_server(issuer="https://first-issuer.com/")
+        second_server = self.helper.fake_oidc_server(
+            issuer="https://second-issuer.com/"
+        )
+        user = "john"
+
+        login_resp, first_grant = self.helper.login_via_oidc(
+            first_server, user, with_sid=True, idp_id="oidc-first"
+        )
+        first_access_token: str = login_resp["access_token"]
+        self.helper.whoami(first_access_token, expect_code=HTTPStatus.OK)
+
+        login_resp, second_grant = self.helper.login_via_oidc(
+            second_server, user, with_sid=True, idp_id="oidc-second"
+        )
+        second_access_token: str = login_resp["access_token"]
+        self.helper.whoami(second_access_token, expect_code=HTTPStatus.OK)
+
+        # `sid` in the fake providers are generated by a counter, so the first grant of
+        # each provider should give the same SID
+        self.assertEqual(first_grant.sid, second_grant.sid)
+        self.assertEqual(first_grant.userinfo["sub"], second_grant.userinfo["sub"])
+
+        # Logging out of the first session
+        logout_token = first_server.generate_logout_token(first_grant)
+        channel = self.submit_logout_token(logout_token)
+        self.assertEqual(channel.code, 200)
+
+        self.helper.whoami(first_access_token, expect_code=HTTPStatus.UNAUTHORIZED)
+        self.helper.whoami(second_access_token, expect_code=HTTPStatus.OK)
+
+        # Logging out of the second session
+        logout_token = second_server.generate_logout_token(second_grant)
+        channel = self.submit_logout_token(logout_token)
+        self.assertEqual(channel.code, 200)
+
+        self.helper.whoami(second_access_token, expect_code=HTTPStatus.UNAUTHORIZED)
diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py
index 967d229223..706399fae5 100644
--- a/tests/rest/client/utils.py
+++ b/tests/rest/client/utils.py
@@ -553,6 +553,34 @@ class RestHelper:
 
         return channel.json_body
 
+    def whoami(
+        self,
+        access_token: str,
+        expect_code: Literal[HTTPStatus.OK, HTTPStatus.UNAUTHORIZED] = HTTPStatus.OK,
+    ) -> JsonDict:
+        """Perform a 'whoami' request, which can be a quick way to check for access
+        token validity
+
+        Args:
+            access_token: The user token to use during the request
+            expect_code: The return code to expect from attempting the whoami request
+        """
+        channel = make_request(
+            self.hs.get_reactor(),
+            self.site,
+            "GET",
+            "account/whoami",
+            access_token=access_token,
+        )
+
+        assert channel.code == expect_code, "Exepcted: %d, got %d, resp: %r" % (
+            expect_code,
+            channel.code,
+            channel.result["body"],
+        )
+
+        return channel.json_body
+
     def fake_oidc_server(self, issuer: str = TEST_OIDC_ISSUER) -> FakeOidcServer:
         """Create a ``FakeOidcServer``.
 
@@ -572,6 +600,7 @@ class RestHelper:
         fake_server: FakeOidcServer,
         remote_user_id: str,
         with_sid: bool = False,
+        idp_id: Optional[str] = None,
         expected_status: int = 200,
     ) -> Tuple[JsonDict, FakeAuthorizationGrant]:
         """Log in (as a new user) via OIDC
@@ -588,7 +617,11 @@ class RestHelper:
         client_redirect_url = "https://x"
         userinfo = {"sub": remote_user_id}
         channel, grant = self.auth_via_oidc(
-            fake_server, userinfo, client_redirect_url, with_sid=with_sid
+            fake_server,
+            userinfo,
+            client_redirect_url,
+            with_sid=with_sid,
+            idp_id=idp_id,
         )
 
         # expect a confirmation page
@@ -623,6 +656,7 @@ class RestHelper:
         client_redirect_url: Optional[str] = None,
         ui_auth_session_id: Optional[str] = None,
         with_sid: bool = False,
+        idp_id: Optional[str] = None,
     ) -> Tuple[FakeChannel, FakeAuthorizationGrant]:
         """Perform an OIDC authentication flow via a mock OIDC provider.
 
@@ -648,6 +682,7 @@ class RestHelper:
             ui_auth_session_id: if set, we will perform a UI Auth flow. The session id
                 of the UI auth.
             with_sid: if True, generates a random `sid` (OIDC session ID)
+            idp_id: if set, explicitely chooses one specific IDP
 
         Returns:
             A FakeChannel containing the result of calling the OIDC callback endpoint.
@@ -665,7 +700,9 @@ class RestHelper:
                 oauth_uri = self.initiate_sso_ui_auth(ui_auth_session_id, cookies)
             else:
                 # otherwise, hit the login redirect endpoint
-                oauth_uri = self.initiate_sso_login(client_redirect_url, cookies)
+                oauth_uri = self.initiate_sso_login(
+                    client_redirect_url, cookies, idp_id=idp_id
+                )
 
         # we now have a URI for the OIDC IdP, but we skip that and go straight
         # back to synapse's OIDC callback resource. However, we do need the "state"
@@ -742,7 +779,10 @@ class RestHelper:
         return channel, grant
 
     def initiate_sso_login(
-        self, client_redirect_url: Optional[str], cookies: MutableMapping[str, str]
+        self,
+        client_redirect_url: Optional[str],
+        cookies: MutableMapping[str, str],
+        idp_id: Optional[str] = None,
     ) -> str:
         """Make a request to the login-via-sso redirect endpoint, and return the target
 
@@ -753,6 +793,7 @@ class RestHelper:
             client_redirect_url: the client redirect URL to pass to the login redirect
                 endpoint
             cookies: any cookies returned will be added to this dict
+            idp_id: if set, explicitely chooses one specific IDP
 
         Returns:
             the URI that the client gets redirected to (ie, the SSO server)
@@ -761,6 +802,12 @@ class RestHelper:
         if client_redirect_url:
             params["redirectUrl"] = client_redirect_url
 
+        uri = "/_matrix/client/r0/login/sso/redirect"
+        if idp_id is not None:
+            uri = f"{uri}/{idp_id}"
+
+        uri = f"{uri}?{urllib.parse.urlencode(params)}"
+
         # hit the redirect url (which should redirect back to the redirect url. This
         # is the easiest way of figuring out what the Host header ought to be set to
         # to keep Synapse happy.
@@ -768,7 +815,7 @@ class RestHelper:
             self.hs.get_reactor(),
             self.site,
             "GET",
-            "/_matrix/client/r0/login/sso/redirect?" + urllib.parse.urlencode(params),
+            uri,
         )
         assert channel.code == 302
 
diff --git a/tests/server.py b/tests/server.py
index 8b1d186219..b1730fcc8d 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -362,6 +362,12 @@ def make_request(
     # Twisted expects to be at the end of the content when parsing the request.
     req.content.seek(0, SEEK_END)
 
+    # Old version of Twisted (<20.3.0) have issues with parsing x-www-form-urlencoded
+    # bodies if the Content-Length header is missing
+    req.requestHeaders.addRawHeader(
+        b"Content-Length", str(len(content)).encode("ascii")
+    )
+
     if access_token:
         req.requestHeaders.addRawHeader(
             b"Authorization", b"Bearer " + access_token.encode("ascii")
diff --git a/tests/test_utils/oidc.py b/tests/test_utils/oidc.py
index de134bbc89..1461d23ee8 100644
--- a/tests/test_utils/oidc.py
+++ b/tests/test_utils/oidc.py
@@ -51,6 +51,8 @@ class FakeOidcServer:
     get_userinfo_handler: Mock
     post_token_handler: Mock
 
+    sid_counter: int = 0
+
     def __init__(self, clock: Clock, issuer: str):
         from authlib.jose import ECKey, KeySet
 
@@ -146,7 +148,7 @@ class FakeOidcServer:
         return jws.serialize_compact(protected, json_payload, self._key).decode("utf-8")
 
     def generate_id_token(self, grant: FakeAuthorizationGrant) -> str:
-        now = self._clock.time()
+        now = int(self._clock.time())
         id_token = {
             **grant.userinfo,
             "iss": self.issuer,
@@ -166,6 +168,26 @@ class FakeOidcServer:
 
         return self._sign(id_token)
 
+    def generate_logout_token(self, grant: FakeAuthorizationGrant) -> str:
+        now = int(self._clock.time())
+        logout_token = {
+            "iss": self.issuer,
+            "aud": grant.client_id,
+            "iat": now,
+            "jti": random_string(10),
+            "events": {
+                "http://schemas.openid.net/event/backchannel-logout": {},
+            },
+        }
+
+        if grant.sid is not None:
+            logout_token["sid"] = grant.sid
+
+        if "sub" in grant.userinfo:
+            logout_token["sub"] = grant.userinfo["sub"]
+
+        return self._sign(logout_token)
+
     def id_token_override(self, overrides: dict):
         """Temporarily patch the ID token generated by the token endpoint."""
         return patch.object(self, "_id_token_overrides", overrides)
@@ -183,7 +205,8 @@ class FakeOidcServer:
         code = random_string(10)
         sid = None
         if with_sid:
-            sid = random_string(10)
+            sid = str(self.sid_counter)
+            self.sid_counter += 1
 
         grant = FakeAuthorizationGrant(
             userinfo=userinfo,
-- 
cgit 1.5.1


From dbfc9b803ee32f7b31c2b5ccbc53a1bfcaa95983 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 31 Oct 2022 20:31:43 +0000
Subject: Fix dehydrated device REST checks (#14336)

---
 changelog.d/14336.bugfix          |  1 +
 synapse/rest/client/devices.py    |  5 ++---
 tests/rest/client/test_devices.py | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14336.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14336.bugfix b/changelog.d/14336.bugfix
new file mode 100644
index 0000000000..d44ff1bbc7
--- /dev/null
+++ b/changelog.d/14336.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70 where clients were unable to PUT new [dehydrated devices](https://github.com/matrix-org/matrix-spec-proposals/pull/2697).
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 90828c95c4..8f3cbd4ea2 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -231,7 +231,7 @@ class DehydratedDeviceServlet(RestServlet):
       }
     }
 
-    PUT /org.matrix.msc2697/dehydrated_device
+    PUT /org.matrix.msc2697.v2/dehydrated_device
     Content-Type: application/json
 
     {
@@ -271,7 +271,6 @@ class DehydratedDeviceServlet(RestServlet):
             raise errors.NotFoundError("No dehydrated device available")
 
     class PutBody(RequestBodyModel):
-        device_id: StrictStr
         device_data: DehydratedDeviceDataModel
         initial_device_display_name: Optional[StrictStr]
 
@@ -281,7 +280,7 @@ class DehydratedDeviceServlet(RestServlet):
 
         device_id = await self.device_handler.store_dehydrated_device(
             requester.user.to_string(),
-            submission.device_data,
+            submission.device_data.dict(),
             submission.initial_device_display_name,
         )
         return 200, {"device_id": device_id}
diff --git a/tests/rest/client/test_devices.py b/tests/rest/client/test_devices.py
index aa98222434..d80eea17d3 100644
--- a/tests/rest/client/test_devices.py
+++ b/tests/rest/client/test_devices.py
@@ -200,3 +200,37 @@ class DevicesTestCase(unittest.HomeserverTestCase):
         self.reactor.advance(43200)
         self.get_success(self.handler.get_device(user_id, "abc"))
         self.get_failure(self.handler.get_device(user_id, "def"), NotFoundError)
+
+
+class DehydratedDeviceTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets_for_client_rest_resource,
+        login.register_servlets,
+        register.register_servlets,
+        devices.register_servlets,
+    ]
+
+    def test_PUT(self) -> None:
+        """Sanity-check that we can PUT a dehydrated device.
+
+        Detects https://github.com/matrix-org/synapse/issues/14334.
+        """
+        alice = self.register_user("alice", "correcthorse")
+        token = self.login(alice, "correcthorse")
+
+        # Have alice update their device list
+        channel = self.make_request(
+            "PUT",
+            "_matrix/client/unstable/org.matrix.msc2697.v2/dehydrated_device",
+            {
+                "device_data": {
+                    "algorithm": "org.matrix.msc2697.v1.dehydration.v1.olm",
+                    "account": "dehydrated_device",
+                }
+            },
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
+        device_id = channel.json_body.get("device_id")
+        self.assertIsInstance(device_id, str)
-- 
cgit 1.5.1


From b922b54b6143f13c0786a18fcbb5f55724ea72fc Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 1 Nov 2022 10:30:43 +0000
Subject: Fix type annotation causing import time error in the Complement
 forking launcher. (#14084)

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/14084.misc                 | 1 +
 synapse/app/complement_fork_starter.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14084.misc

(limited to 'synapse')

diff --git a/changelog.d/14084.misc b/changelog.d/14084.misc
new file mode 100644
index 0000000000..988e55f437
--- /dev/null
+++ b/changelog.d/14084.misc
@@ -0,0 +1 @@
+Fix type annotation causing import time error in the Complement forking launcher.
\ No newline at end of file
diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py
index b22f315453..8c0f4a57e7 100644
--- a/synapse/app/complement_fork_starter.py
+++ b/synapse/app/complement_fork_starter.py
@@ -55,13 +55,13 @@ import os
 import signal
 import sys
 from types import FrameType
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 
 from twisted.internet.main import installReactor
 
 # a list of the original signal handlers, before we installed our custom ones.
 # We restore these in our child processes.
-_original_signal_handlers: dict[int, Any] = {}
+_original_signal_handlers: Dict[int, Any] = {}
 
 
 class ProxiedReactor:
-- 
cgit 1.5.1


From 9473ebb9e7db9e3f71b341f72ae004db3a0144b8 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 1 Nov 2022 11:47:09 +0000
Subject: Revert "Fix event size checks (#13710)"

This reverts commit fab495a9e1442d99e922367f65f41de5eaa488eb.

As noted in
https://github.com/matrix-org/synapse/pull/13710#issuecomment-1298396007:

> We want to see this change land for the protocol's sake (and plan to
  un-revert it) but want to give this a little more time before releasing
  this.
---
 changelog.d/13710.bugfix |  1 -
 synapse/event_auth.py    | 10 +++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)
 delete mode 100644 changelog.d/13710.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13710.bugfix b/changelog.d/13710.bugfix
deleted file mode 100644
index 4c318d15f5..0000000000
--- a/changelog.d/13710.bugfix
+++ /dev/null
@@ -1 +0,0 @@
-Fix a long-standing bug where Synapse would count codepoints instead of bytes when validating the size of some fields.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 5036604036..bab31e33c5 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -342,15 +342,15 @@ def check_state_dependent_auth_rules(
 
 
 def _check_size_limits(event: "EventBase") -> None:
-    if len(event.user_id.encode("utf-8")) > 255:
+    if len(event.user_id) > 255:
         raise EventSizeError("'user_id' too large")
-    if len(event.room_id.encode("utf-8")) > 255:
+    if len(event.room_id) > 255:
         raise EventSizeError("'room_id' too large")
-    if event.is_state() and len(event.state_key.encode("utf-8")) > 255:
+    if event.is_state() and len(event.state_key) > 255:
         raise EventSizeError("'state_key' too large")
-    if len(event.type.encode("utf-8")) > 255:
+    if len(event.type) > 255:
         raise EventSizeError("'type' too large")
-    if len(event.event_id.encode("utf-8")) > 255:
+    if len(event.event_id) > 255:
         raise EventSizeError("'event_id' too large")
     if len(encode_canonical_json(event.get_pdu_json())) > MAX_PDU_SIZE:
         raise EventSizeError("event too large")
-- 
cgit 1.5.1


From 2bd7f3eeab1a4818359c9f585b660ff3f3d8bc6c Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 1 Nov 2022 15:02:39 +0000
Subject: Allow PUT/GET of aliases during faster join (#14292)

without blocking on full state.
---
 changelog.d/14292.bugfix      | 1 +
 synapse/handlers/directory.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14292.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14292.bugfix b/changelog.d/14292.bugfix
new file mode 100644
index 0000000000..4ed92f5cf2
--- /dev/null
+++ b/changelog.d/14292.bugfix
@@ -0,0 +1 @@
+Faster joins: do not block creation of or queries for room aliases during the resync.
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index d52ebada6b..2ea52257cb 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -85,7 +85,7 @@ class DirectoryHandler:
         # TODO(erikj): Add transactions.
         # TODO(erikj): Check if there is a current association.
         if not servers:
-            servers = await self._storage_controllers.state.get_current_hosts_in_room(
+            servers = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
                 room_id
             )
 
@@ -290,7 +290,7 @@ class DirectoryHandler:
                 Codes.NOT_FOUND,
             )
 
-        extra_servers = await self._storage_controllers.state.get_current_hosts_in_room(
+        extra_servers = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
             room_id
         )
         servers_set = set(extra_servers) | set(servers)
-- 
cgit 1.5.1


From d4fac8a3e27ab3e133c5e5ac603c8d937a1fd86c Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 1 Nov 2022 19:20:35 +0000
Subject: Fix typo in #13320 which could cause log spam (#14347)

---
 changelog.d/14347.bugfix                | 1 +
 synapse/federation/federation_client.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14347.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14347.bugfix b/changelog.d/14347.bugfix
new file mode 100644
index 0000000000..91975757ae
--- /dev/null
+++ b/changelog.d/14347.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.64.0rc1 which could cause log spam when fetching events from other homeservers.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index fa225182be..c4c0bc7315 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -465,7 +465,7 @@ class FederationClient(FederationBase):
                     pdu_attempts[destination] = now
 
                     logger.info(
-                        "get_pdu(event_id=): Failed to get PDU from %s because %s",
+                        "get_pdu(event_id=%s): Failed to get PDU from %s because %s",
                         event_id,
                         destination,
                         e,
-- 
cgit 1.5.1


From 6546308c1e7d3eff316631a5909151dc6c7a9e1e Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 2 Nov 2022 17:33:45 +0000
Subject: Disable legacy Prometheus metric names by default. They can still be
 re-enabled for now, but they will be removed altogether in Synapse 1.73.0.
 (#14353)

---
 CHANGES.md                                       |  9 +++++++++
 changelog.d/14353.removal                        |  1 +
 docs/upgrade.md                                  | 16 ++++++++++++++++
 docs/usage/configuration/config_documentation.md |  4 ++--
 synapse/config/metrics.py                        |  2 +-
 5 files changed, 29 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14353.removal

(limited to 'synapse')

diff --git a/CHANGES.md b/CHANGES.md
index 113ad0d1ee..6bafdd3fad 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,12 @@
+Synapse (Next) (2022-11-01)
+=========================
+
+Please note that, as announced in the release notes for Synapse 1.69.0, legacy Prometheus metric names are now disabled by default.
+They will be removed altogether in Synapse 1.73.0.
+If not already done, server administrators should update their dashboards and alerting rules to avoid using the deprecated metric names.
+See the [upgrade notes](https://matrix-org.github.io/synapse/v1.71/upgrade.html#upgrading-to-v1710) for more details.
+
+
 Synapse 1.71.0rc1 (2022-11-01)
 ==============================
 
diff --git a/changelog.d/14353.removal b/changelog.d/14353.removal
new file mode 100644
index 0000000000..fc42aa9106
--- /dev/null
+++ b/changelog.d/14353.removal
@@ -0,0 +1 @@
+Disable legacy Prometheus metric names by default. They can still be re-enabled for now, but they will be removed altogether in Synapse 1.73.0.
\ No newline at end of file
diff --git a/docs/upgrade.md b/docs/upgrade.md
index f095bbc3a6..41b06cc253 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -116,6 +116,22 @@ local users and some remote users is why the spec was changed/clarified and this
 caveat is no longer supported.
 
 
+## Legacy Prometheus metric names are now disabled by default
+
+Synapse v1.71.0 disables legacy Prometheus metric names by default.
+For administrators that still rely on them and have not yet had chance to update their
+uses of the metrics, it's still possible to specify `enable_legacy_metrics: true` in
+the configuration to re-enable them temporarily.
+
+Synapse v1.73.0 will **remove legacy metric names altogether** and at that point,
+it will no longer be possible to re-enable them.
+
+If you do not use metrics or you have already updated your Grafana dashboard(s),
+Prometheus console(s) and alerting rule(s), there is no action needed.
+
+See [v1.69.0: Deprecation of legacy Prometheus metric names](#deprecation-of-legacy-prometheus-metric-names).
+
+
 # Upgrading to v1.69.0
 
 ## Changes to the receipts replication streams
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 44358faf59..9a6bd08d01 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2441,8 +2441,8 @@ enable_metrics: true
 
 Set to `true` to publish both legacy and non-legacy Prometheus metric names,
 or to `false` to only publish non-legacy Prometheus metric names.
-Defaults to `true`. Has no effect if `enable_metrics` is `false`.
-**In Synapse v1.71.0, this will default to `false` before being removed in Synapse v1.73.0.**
+Defaults to `false`. Has no effect if `enable_metrics` is `false`.
+**In Synapse v1.67.0 up to and including Synapse v1.70.1, this defaulted to `true`.**
 
 Legacy metric names include:
 - metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules;
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index bb065f9f2f..6034a0346e 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -43,7 +43,7 @@ class MetricsConfig(Config):
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         self.enable_metrics = config.get("enable_metrics", False)
 
-        self.enable_legacy_metrics = config.get("enable_legacy_metrics", True)
+        self.enable_legacy_metrics = config.get("enable_legacy_metrics", False)
 
         self.report_stats = config.get("report_stats", None)
         self.report_stats_endpoint = config.get(
-- 
cgit 1.5.1


From 86c5a710d8b4212f8a8a668d7d4a79c0bb371508 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Thu, 3 Nov 2022 16:21:31 +0000
Subject: Implement MSC3912: Relation-based redactions (#14260)

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/14260.feature                   |   1 +
 synapse/api/constants.py                    |   2 +
 synapse/config/experimental.py              |   3 +
 synapse/handlers/message.py                 |  47 ++++-
 synapse/handlers/relations.py               |  56 +++++-
 synapse/rest/client/room.py                 |  57 ++++--
 synapse/rest/client/versions.py             |   2 +
 synapse/storage/databases/main/relations.py |  36 ++++
 tests/rest/client/test_redactions.py        | 273 +++++++++++++++++++++++++++-
 tests/rest/client/utils.py                  |  37 ++++
 10 files changed, 486 insertions(+), 28 deletions(-)
 create mode 100644 changelog.d/14260.feature

(limited to 'synapse')

diff --git a/changelog.d/14260.feature b/changelog.d/14260.feature
new file mode 100644
index 0000000000..102dc7b3e0
--- /dev/null
+++ b/changelog.d/14260.feature
@@ -0,0 +1 @@
+Add experimental support for [MSC3912](https://github.com/matrix-org/matrix-spec-proposals/pull/3912): Relation-based redactions.
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 44c5ffc6a5..bc04a0755b 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -125,6 +125,8 @@ class EventTypes:
     MSC2716_BATCH: Final = "org.matrix.msc2716.batch"
     MSC2716_MARKER: Final = "org.matrix.msc2716.marker"
 
+    Reaction: Final = "m.reaction"
+
 
 class ToDeviceEventTypes:
     RoomKeyRequest: Final = "m.room_key_request"
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index d9bdd66d55..d4b71d1673 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -128,3 +128,6 @@ class ExperimentalConfig(Config):
         self.msc3886_endpoint: Optional[str] = experimental.get(
             "msc3886_endpoint", None
         )
+
+        # MSC3912: Relation-based redactions.
+        self.msc3912_enabled: bool = experimental.get("msc3912_enabled", False)
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 468900a07f..4cf593cfdc 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -877,6 +877,36 @@ class EventCreationHandler:
                 return prev_event
         return None
 
+    async def get_event_from_transaction(
+        self,
+        requester: Requester,
+        txn_id: str,
+        room_id: str,
+    ) -> Optional[EventBase]:
+        """For the given transaction ID and room ID, check if there is a matching event.
+        If so, fetch it and return it.
+
+        Args:
+            requester: The requester making the request in the context of which we want
+                to fetch the event.
+            txn_id: The transaction ID.
+            room_id: The room ID.
+
+        Returns:
+            An event if one could be found, None otherwise.
+        """
+        if requester.access_token_id:
+            existing_event_id = await self.store.get_event_id_from_transaction_id(
+                room_id,
+                requester.user.to_string(),
+                requester.access_token_id,
+                txn_id,
+            )
+            if existing_event_id:
+                return await self.store.get_event(existing_event_id)
+
+        return None
+
     async def create_and_send_nonmember_event(
         self,
         requester: Requester,
@@ -956,18 +986,17 @@ class EventCreationHandler:
         # extremities to pile up, which in turn leads to state resolution
         # taking longer.
         async with self.limiter.queue(event_dict["room_id"]):
-            if txn_id and requester.access_token_id:
-                existing_event_id = await self.store.get_event_id_from_transaction_id(
-                    event_dict["room_id"],
-                    requester.user.to_string(),
-                    requester.access_token_id,
-                    txn_id,
+            if txn_id:
+                event = await self.get_event_from_transaction(
+                    requester, txn_id, event_dict["room_id"]
                 )
-                if existing_event_id:
-                    event = await self.store.get_event(existing_event_id)
+                if event:
                     # we know it was persisted, so must have a stream ordering
                     assert event.internal_metadata.stream_ordering
-                    return event, event.internal_metadata.stream_ordering
+                    return (
+                        event,
+                        event.internal_metadata.stream_ordering,
+                    )
 
             event, context = await self.create_event(
                 requester,
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 0a0c6d938e..8e71dda970 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Dict, FrozenSet, Iterable, List, Optional, Tup
 
 import attr
 
-from synapse.api.constants import RelationTypes
+from synapse.api.constants import EventTypes, RelationTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase, relation_from_event
 from synapse.logging.opentracing import trace
@@ -75,6 +75,7 @@ class RelationsHandler:
         self._clock = hs.get_clock()
         self._event_handler = hs.get_event_handler()
         self._event_serializer = hs.get_event_client_serializer()
+        self._event_creation_handler = hs.get_event_creation_handler()
 
     async def get_relations(
         self,
@@ -205,6 +206,59 @@ class RelationsHandler:
 
         return related_events, next_token
 
+    async def redact_events_related_to(
+        self,
+        requester: Requester,
+        event_id: str,
+        initial_redaction_event: EventBase,
+        relation_types: List[str],
+    ) -> None:
+        """Redacts all events related to the given event ID with one of the given
+        relation types.
+
+        This method is expected to be called when redacting the event referred to by
+        the given event ID.
+
+        If an event cannot be redacted (e.g. because of insufficient permissions), log
+        the error and try to redact the next one.
+
+        Args:
+            requester: The requester to redact events on behalf of.
+            event_id: The event IDs to look and redact relations of.
+            initial_redaction_event: The redaction for the event referred to by
+                event_id.
+            relation_types: The types of relations to look for.
+
+        Raises:
+            ShadowBanError if the requester is shadow-banned
+        """
+        related_event_ids = (
+            await self._main_store.get_all_relations_for_event_with_types(
+                event_id, relation_types
+            )
+        )
+
+        for related_event_id in related_event_ids:
+            try:
+                await self._event_creation_handler.create_and_send_nonmember_event(
+                    requester,
+                    {
+                        "type": EventTypes.Redaction,
+                        "content": initial_redaction_event.content,
+                        "room_id": initial_redaction_event.room_id,
+                        "sender": requester.user.to_string(),
+                        "redacts": related_event_id,
+                    },
+                    ratelimit=False,
+                )
+            except SynapseError as e:
+                logger.warning(
+                    "Failed to redact event %s (related to event %s): %s",
+                    related_event_id,
+                    event_id,
+                    e.msg,
+                )
+
     async def get_annotations_for_event(
         self,
         event_id: str,
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 01e5079963..91cb791139 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -52,6 +52,7 @@ from synapse.http.servlet import (
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.logging.opentracing import set_tag
+from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.rest.client._base import client_patterns
 from synapse.rest.client.transactions import HttpTransactionCache
 from synapse.storage.state import StateFilter
@@ -1029,6 +1030,8 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
         super().__init__(hs)
         self.event_creation_handler = hs.get_event_creation_handler()
         self.auth = hs.get_auth()
+        self._relation_handler = hs.get_relations_handler()
+        self._msc3912_enabled = hs.config.experimental.msc3912_enabled
 
     def register(self, http_server: HttpServer) -> None:
         PATTERNS = "/rooms/(?P<room_id>[^/]*)/redact/(?P<event_id>[^/]*)"
@@ -1045,20 +1048,46 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
         content = parse_json_object_from_request(request)
 
         try:
-            (
-                event,
-                _,
-            ) = await self.event_creation_handler.create_and_send_nonmember_event(
-                requester,
-                {
-                    "type": EventTypes.Redaction,
-                    "content": content,
-                    "room_id": room_id,
-                    "sender": requester.user.to_string(),
-                    "redacts": event_id,
-                },
-                txn_id=txn_id,
-            )
+            with_relations = None
+            if self._msc3912_enabled and "org.matrix.msc3912.with_relations" in content:
+                with_relations = content["org.matrix.msc3912.with_relations"]
+                del content["org.matrix.msc3912.with_relations"]
+
+            # Check if there's an existing event for this transaction now (even though
+            # create_and_send_nonmember_event also does it) because, if there's one,
+            # then we want to skip the call to redact_events_related_to.
+            event = None
+            if txn_id:
+                event = await self.event_creation_handler.get_event_from_transaction(
+                    requester, txn_id, room_id
+                )
+
+            if event is None:
+                (
+                    event,
+                    _,
+                ) = await self.event_creation_handler.create_and_send_nonmember_event(
+                    requester,
+                    {
+                        "type": EventTypes.Redaction,
+                        "content": content,
+                        "room_id": room_id,
+                        "sender": requester.user.to_string(),
+                        "redacts": event_id,
+                    },
+                    txn_id=txn_id,
+                )
+
+                if with_relations:
+                    run_as_background_process(
+                        "redact_related_events",
+                        self._relation_handler.redact_events_related_to,
+                        requester=requester,
+                        event_id=event_id,
+                        initial_redaction_event=event,
+                        relation_types=with_relations,
+                    )
+
             event_id = event.event_id
         except ShadowBanError:
             event_id = "$" + random_string(43)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 9b1b72c68a..180a11ef88 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -119,6 +119,8 @@ class VersionsRestServlet(RestServlet):
                     # Adds support for simple HTTP rendezvous as per MSC3886
                     "org.matrix.msc3886": self.config.experimental.msc3886_endpoint
                     is not None,
+                    # Adds support for relation-based redactions as per MSC3912.
+                    "org.matrix.msc3912": self.config.experimental.msc3912_enabled,
                 },
             },
         )
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index c022510e76..ca431002c8 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -295,6 +295,42 @@ class RelationsWorkerStore(SQLBaseStore):
             "get_recent_references_for_event", _get_recent_references_for_event_txn
         )
 
+    async def get_all_relations_for_event_with_types(
+        self,
+        event_id: str,
+        relation_types: List[str],
+    ) -> List[str]:
+        """Get the event IDs of all events that have a relation to the given event with
+        one of the given relation types.
+
+        Args:
+            event_id: The event for which to look for related events.
+            relation_types: The types of relations to look for.
+
+        Returns:
+            A list of the IDs of the events that relate to the given event with one of
+            the given relation types.
+        """
+
+        def get_all_relation_ids_for_event_with_types_txn(
+            txn: LoggingTransaction,
+        ) -> List[str]:
+            rows = self.db_pool.simple_select_many_txn(
+                txn=txn,
+                table="event_relations",
+                column="relation_type",
+                iterable=relation_types,
+                keyvalues={"relates_to_id": event_id},
+                retcols=["event_id"],
+            )
+
+            return [row["event_id"] for row in rows]
+
+        return await self.db_pool.runInteraction(
+            desc="get_all_relation_ids_for_event_with_types",
+            func=get_all_relation_ids_for_event_with_types_txn,
+        )
+
     async def event_includes_relation(self, event_id: str) -> bool:
         """Check if the given event relates to another event.
 
diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py
index be4c67d68e..5dfe44defb 100644
--- a/tests/rest/client/test_redactions.py
+++ b/tests/rest/client/test_redactions.py
@@ -11,17 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List
+from typing import List, Optional
 
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.api.constants import EventTypes, RelationTypes
 from synapse.rest import admin
 from synapse.rest.client import login, room, sync
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
 
-from tests.unittest import HomeserverTestCase
+from tests.unittest import HomeserverTestCase, override_config
 
 
 class RedactionsTestCase(HomeserverTestCase):
@@ -67,7 +68,12 @@ class RedactionsTestCase(HomeserverTestCase):
         )
 
     def _redact_event(
-        self, access_token: str, room_id: str, event_id: str, expect_code: int = 200
+        self,
+        access_token: str,
+        room_id: str,
+        event_id: str,
+        expect_code: int = 200,
+        with_relations: Optional[List[str]] = None,
     ) -> JsonDict:
         """Helper function to send a redaction event.
 
@@ -75,7 +81,13 @@ class RedactionsTestCase(HomeserverTestCase):
         """
         path = "/_matrix/client/r0/rooms/%s/redact/%s" % (room_id, event_id)
 
-        channel = self.make_request("POST", path, content={}, access_token=access_token)
+        request_content = {}
+        if with_relations:
+            request_content["org.matrix.msc3912.with_relations"] = with_relations
+
+        channel = self.make_request(
+            "POST", path, request_content, access_token=access_token
+        )
         self.assertEqual(channel.code, expect_code)
         return channel.json_body
 
@@ -201,3 +213,256 @@ class RedactionsTestCase(HomeserverTestCase):
             # These should all succeed, even though this would be denied by
             # the standard message ratelimiter
             self._redact_event(self.mod_access_token, self.room_id, msg_id)
+
+    @override_config({"experimental_features": {"msc3912_enabled": True}})
+    def test_redact_relations(self) -> None:
+        """Tests that we can redact the relations of an event at the same time as the
+        event itself.
+        """
+        # Send a root event.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={"msgtype": "m.text", "body": "hello"},
+            tok=self.mod_access_token,
+        )
+        root_event_id = res["event_id"]
+
+        # Send an edit to this root event.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "body": " * hello world",
+                "m.new_content": {
+                    "body": "hello world",
+                    "msgtype": "m.text",
+                },
+                "m.relates_to": {
+                    "event_id": root_event_id,
+                    "rel_type": RelationTypes.REPLACE,
+                },
+                "msgtype": "m.text",
+            },
+            tok=self.mod_access_token,
+        )
+        edit_event_id = res["event_id"]
+
+        # Also send a threaded message whose root is the same as the edit's.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "message 1",
+                "m.relates_to": {
+                    "event_id": root_event_id,
+                    "rel_type": RelationTypes.THREAD,
+                },
+            },
+            tok=self.mod_access_token,
+        )
+        threaded_event_id = res["event_id"]
+
+        # Also send a reaction, again with the same root.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Reaction,
+            content={
+                "m.relates_to": {
+                    "rel_type": RelationTypes.ANNOTATION,
+                    "event_id": root_event_id,
+                    "key": "👍",
+                }
+            },
+            tok=self.mod_access_token,
+        )
+        reaction_event_id = res["event_id"]
+
+        # Redact the root event, specifying that we also want to delete events that
+        # relate to it with m.replace.
+        self._redact_event(
+            self.mod_access_token,
+            self.room_id,
+            root_event_id,
+            with_relations=[
+                RelationTypes.REPLACE,
+                RelationTypes.THREAD,
+            ],
+        )
+
+        # Check that the root event got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, root_event_id, self.mod_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
+        # Check that the edit got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, edit_event_id, self.mod_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
+        # Check that the threaded message got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, threaded_event_id, self.mod_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
+        # Check that the reaction did not get redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, reaction_event_id, self.mod_access_token
+        )
+        self.assertNotIn("redacted_because", event_dict, event_dict)
+
+    @override_config({"experimental_features": {"msc3912_enabled": True}})
+    def test_redact_relations_no_perms(self) -> None:
+        """Tests that, when redacting a message along with its relations, if not all
+        the related messages can be redacted because of insufficient permissions, the
+        server still redacts all the ones that can be.
+        """
+        # Send a root event.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "root",
+            },
+            tok=self.other_access_token,
+        )
+        root_event_id = res["event_id"]
+
+        # Send a first threaded message, this one from the moderator. We do this for the
+        # first message with the m.thread relation (and not the last one) to ensure
+        # that, when the server fails to redact it, it doesn't stop there, and it
+        # instead goes on to redact the other one.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "message 1",
+                "m.relates_to": {
+                    "event_id": root_event_id,
+                    "rel_type": RelationTypes.THREAD,
+                },
+            },
+            tok=self.mod_access_token,
+        )
+        first_threaded_event_id = res["event_id"]
+
+        # Send a second threaded message, this time from the user who'll perform the
+        # redaction.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "message 2",
+                "m.relates_to": {
+                    "event_id": root_event_id,
+                    "rel_type": RelationTypes.THREAD,
+                },
+            },
+            tok=self.other_access_token,
+        )
+        second_threaded_event_id = res["event_id"]
+
+        # Redact the thread's root, and request that all threaded messages are also
+        # redacted. Send that request from the non-mod user, so that the first threaded
+        # event cannot be redacted.
+        self._redact_event(
+            self.other_access_token,
+            self.room_id,
+            root_event_id,
+            with_relations=[RelationTypes.THREAD],
+        )
+
+        # Check that the thread root got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, root_event_id, self.other_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
+        # Check that the last message in the thread got redacted, despite failing to
+        # redact the one before it.
+        event_dict = self.helper.get_event(
+            self.room_id, second_threaded_event_id, self.other_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
+        # Check that the message that was sent into the tread by the mod user is not
+        # redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, first_threaded_event_id, self.other_access_token
+        )
+        self.assertIn("body", event_dict["content"], event_dict)
+        self.assertEqual("message 1", event_dict["content"]["body"])
+
+    @override_config({"experimental_features": {"msc3912_enabled": True}})
+    def test_redact_relations_txn_id_reuse(self) -> None:
+        """Tests that redacting a message using a transaction ID, then reusing the same
+        transaction ID but providing an additional list of relations to redact, is
+        effectively a no-op.
+        """
+        # Send a root event.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "root",
+            },
+            tok=self.mod_access_token,
+        )
+        root_event_id = res["event_id"]
+
+        # Send a first threaded message.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "I'm in a thread!",
+                "m.relates_to": {
+                    "event_id": root_event_id,
+                    "rel_type": RelationTypes.THREAD,
+                },
+            },
+            tok=self.mod_access_token,
+        )
+        threaded_event_id = res["event_id"]
+
+        # Send a first redaction request which redacts only the root event.
+        channel = self.make_request(
+            method="PUT",
+            path=f"/rooms/{self.room_id}/redact/{root_event_id}/foo",
+            content={},
+            access_token=self.mod_access_token,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # Send a second redaction request which redacts the root event as well as
+        # threaded messages.
+        channel = self.make_request(
+            method="PUT",
+            path=f"/rooms/{self.room_id}/redact/{root_event_id}/foo",
+            content={"org.matrix.msc3912.with_relations": [RelationTypes.THREAD]},
+            access_token=self.mod_access_token,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # Check that the root event got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, root_event_id, self.mod_access_token
+        )
+        self.assertIn("redacted_because", event_dict)
+
+        # Check that the threaded message didn't get redacted (since that wasn't part of
+        # the original redaction).
+        event_dict = self.helper.get_event(
+            self.room_id, threaded_event_id, self.mod_access_token
+        )
+        self.assertIn("body", event_dict["content"], event_dict)
+        self.assertEqual("I'm in a thread!", event_dict["content"]["body"])
diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py
index 706399fae5..8d6f2b6ff9 100644
--- a/tests/rest/client/utils.py
+++ b/tests/rest/client/utils.py
@@ -410,6 +410,43 @@ class RestHelper:
 
         return channel.json_body
 
+    def get_event(
+        self,
+        room_id: str,
+        event_id: str,
+        tok: Optional[str] = None,
+        expect_code: int = HTTPStatus.OK,
+    ) -> JsonDict:
+        """Request a specific event from the server.
+
+        Args:
+            room_id: the room in which the event was sent.
+            event_id: the event's ID.
+            tok: the token to request the event with.
+            expect_code: the expected HTTP status for the response.
+
+        Returns:
+            The event as a dict.
+        """
+        path = f"/_matrix/client/v3/rooms/{room_id}/event/{event_id}"
+        if tok:
+            path = path + f"?access_token={tok}"
+
+        channel = make_request(
+            self.hs.get_reactor(),
+            self.site,
+            "GET",
+            path,
+        )
+
+        assert channel.code == expect_code, "Expected: %d, got: %d, resp: %r" % (
+            expect_code,
+            channel.code,
+            channel.result["body"],
+        )
+
+        return channel.json_body
+
     def _read_write_state(
         self,
         room_id: str,
-- 
cgit 1.5.1


From a4b1f6456276e62b3f4d6b060c289b6413b8a5c2 Mon Sep 17 00:00:00 2001
From: Tulir Asokan <tulir@maunium.net>
Date: Fri, 4 Nov 2022 18:43:51 +0200
Subject: Fix /refresh endpoint version (#14364)

---
 changelog.d/14364.bugfix       |  1 +
 synapse/rest/client/login.py   |  2 +-
 tests/rest/client/test_auth.py | 16 ++++++++--------
 3 files changed, 10 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14364.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14364.bugfix b/changelog.d/14364.bugfix
new file mode 100644
index 0000000000..514bf859bb
--- /dev/null
+++ b/changelog.d/14364.bugfix
@@ -0,0 +1 @@
+Fix refresh token endpoint to be under /r0 and /v3 instead of /v1. Contributed by Tulir @ Beeper.
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index 7774f1967d..05706b598c 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -536,7 +536,7 @@ def _get_auth_flow_dict_for_idp(idp: SsoIdentityProvider) -> JsonDict:
 
 
 class RefreshTokenServlet(RestServlet):
-    PATTERNS = (re.compile("^/_matrix/client/v1/refresh$"),)
+    PATTERNS = client_patterns("/refresh$")
 
     def __init__(self, hs: "HomeServer"):
         self._auth_handler = hs.get_auth_handler()
diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py
index 847294dc8e..208ec44829 100644
--- a/tests/rest/client/test_auth.py
+++ b/tests/rest/client/test_auth.py
@@ -635,7 +635,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
         """
         return self.make_request(
             "POST",
-            "/_matrix/client/v1/refresh",
+            "/_matrix/client/v3/refresh",
             {"refresh_token": refresh_token},
         )
 
@@ -724,7 +724,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
 
         refresh_response = self.make_request(
             "POST",
-            "/_matrix/client/v1/refresh",
+            "/_matrix/client/v3/refresh",
             {"refresh_token": login_response.json_body["refresh_token"]},
         )
         self.assertEqual(refresh_response.code, HTTPStatus.OK, refresh_response.result)
@@ -765,7 +765,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
 
         refresh_response = self.make_request(
             "POST",
-            "/_matrix/client/v1/refresh",
+            "/_matrix/client/v3/refresh",
             {"refresh_token": login_response.json_body["refresh_token"]},
         )
         self.assertEqual(refresh_response.code, HTTPStatus.OK, refresh_response.result)
@@ -1002,7 +1002,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
         # This first refresh should work properly
         first_refresh_response = self.make_request(
             "POST",
-            "/_matrix/client/v1/refresh",
+            "/_matrix/client/v3/refresh",
             {"refresh_token": login_response.json_body["refresh_token"]},
         )
         self.assertEqual(
@@ -1012,7 +1012,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
         # This one as well, since the token in the first one was never used
         second_refresh_response = self.make_request(
             "POST",
-            "/_matrix/client/v1/refresh",
+            "/_matrix/client/v3/refresh",
             {"refresh_token": login_response.json_body["refresh_token"]},
         )
         self.assertEqual(
@@ -1022,7 +1022,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
         # This one should not, since the token from the first refresh is not valid anymore
         third_refresh_response = self.make_request(
             "POST",
-            "/_matrix/client/v1/refresh",
+            "/_matrix/client/v3/refresh",
             {"refresh_token": first_refresh_response.json_body["refresh_token"]},
         )
         self.assertEqual(
@@ -1056,7 +1056,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
         # Now that the access token from the last valid refresh was used once, refreshing with the N-1 token should fail
         fourth_refresh_response = self.make_request(
             "POST",
-            "/_matrix/client/v1/refresh",
+            "/_matrix/client/v3/refresh",
             {"refresh_token": login_response.json_body["refresh_token"]},
         )
         self.assertEqual(
@@ -1068,7 +1068,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase):
         # But refreshing from the last valid refresh token still works
         fifth_refresh_response = self.make_request(
             "POST",
-            "/_matrix/client/v1/refresh",
+            "/_matrix/client/v3/refresh",
             {"refresh_token": second_refresh_response.json_body["refresh_token"]},
         )
         self.assertEqual(
-- 
cgit 1.5.1


From 8bcdd712b8ba471b3489d41e569276677cf6c2bd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 4 Nov 2022 18:43:14 +0000
Subject: Bump flake8-bugbear from 22.9.23 to 22.10.27 (#14329)

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: GitHub Actions <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/14329.misc       |  1 +
 poetry.lock                  | 10 +++++-----
 synapse/handlers/presence.py |  6 ++++--
 synapse/server.py            |  2 +-
 synapse/storage/_base.py     |  2 +-
 5 files changed, 12 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14329.misc

(limited to 'synapse')

diff --git a/changelog.d/14329.misc b/changelog.d/14329.misc
new file mode 100644
index 0000000000..2f6bbd3af7
--- /dev/null
+++ b/changelog.d/14329.misc
@@ -0,0 +1 @@
+Bump flake8-bugbear from 22.9.23 to 22.10.27.
diff --git a/poetry.lock b/poetry.lock
index b945463299..f6e462e6ae 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -260,18 +260,18 @@ pyflakes = ">=2.4.0,<2.5.0"
 
 [[package]]
 name = "flake8-bugbear"
-version = "22.9.23"
+version = "22.10.27"
 description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle."
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 
 [package.dependencies]
 attrs = ">=19.2.0"
 flake8 = ">=3.0.0"
 
 [package.extras]
-dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit"]
+dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit", "tox"]
 
 [[package]]
 name = "flake8-comprehensions"
@@ -1829,8 +1829,8 @@ flake8 = [
     {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"},
 ]
 flake8-bugbear = [
-    {file = "flake8-bugbear-22.9.23.tar.gz", hash = "sha256:17b9623325e6e0dcdcc80ed9e4aa811287fcc81d7e03313b8736ea5733759937"},
-    {file = "flake8_bugbear-22.9.23-py3-none-any.whl", hash = "sha256:cd2779b2b7ada212d7a322814a1e5651f1868ab0d3f24cc9da66169ab8fda474"},
+    {file = "flake8-bugbear-22.10.27.tar.gz", hash = "sha256:a6708608965c9e0de5fff13904fed82e0ba21ac929fe4896459226a797e11cd5"},
+    {file = "flake8_bugbear-22.10.27-py3-none-any.whl", hash = "sha256:6ad0ab754507319060695e2f2be80e6d8977cfcea082293089a9226276bd825d"},
 ]
 flake8-comprehensions = [
     {file = "flake8-comprehensions-3.8.0.tar.gz", hash = "sha256:8e108707637b1d13734f38e03435984f6b7854fa6b5a4e34f93e69534be8e521"},
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 2670e561d7..0066d63987 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -256,7 +256,7 @@ class BasePresenceHandler(abc.ABC):
         with the app.
         """
 
-    async def update_external_syncs_row(
+    async def update_external_syncs_row(  # noqa: B027 (no-op by design)
         self, process_id: str, user_id: str, is_syncing: bool, sync_time_msec: int
     ) -> None:
         """Update the syncing users for an external process as a delta.
@@ -272,7 +272,9 @@ class BasePresenceHandler(abc.ABC):
             sync_time_msec: Time in ms when the user was last syncing
         """
 
-    async def update_external_syncs_clear(self, process_id: str) -> None:
+    async def update_external_syncs_clear(  # noqa: B027 (no-op by design)
+        self, process_id: str
+    ) -> None:
         """Marks all users that had been marked as syncing by a given process
         as offline.
 
diff --git a/synapse/server.py b/synapse/server.py
index df3a1cb405..c4e025af22 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -315,7 +315,7 @@ class HomeServer(metaclass=abc.ABCMeta):
         if self.config.worker.run_background_tasks:
             self.setup_background_tasks()
 
-    def start_listening(self) -> None:
+    def start_listening(self) -> None:  # noqa: B027 (no-op by design)
         """Start the HTTP, manhole, metrics, etc listeners
 
         Does nothing in this base class; overridden in derived classes to start the
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index bf42aeb8d1..69abf6fa87 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -50,7 +50,7 @@ class SQLBaseStore(metaclass=ABCMeta):
 
         self.external_cached_functions: Dict[str, CachedFunction] = {}
 
-    def process_replication_rows(
+    def process_replication_rows(  # noqa: B027 (no-op by design)
         self,
         stream_name: str,
         instance_name: str,
-- 
cgit 1.5.1


From e980982b59dea38ec10a5c58993d09e02f845d28 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 7 Nov 2022 13:49:31 +0000
Subject: Do not reject `/sync` requests with unrecognised filter fields
 (#14369)

For forward compatibility, Synapse needs to ignore fields it does not
recognise instead of raising an error.

Fixes #14365.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14369.bugfix    |  1 +
 synapse/api/filtering.py    |  8 ++++----
 tests/api/test_filtering.py | 21 +++++++++++++++++++--
 3 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14369.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14369.bugfix b/changelog.d/14369.bugfix
new file mode 100644
index 0000000000..e6709f4eec
--- /dev/null
+++ b/changelog.d/14369.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where Synapse would raise an error when encountering an unrecognised field in a `/sync` filter, instead of ignoring it for forward compatibility.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 26be377d03..a9888381b4 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -43,7 +43,7 @@ if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 FILTER_SCHEMA = {
-    "additionalProperties": False,
+    "additionalProperties": True,  # Allow new fields for forward compatibility
     "type": "object",
     "properties": {
         "limit": {"type": "number"},
@@ -63,7 +63,7 @@ FILTER_SCHEMA = {
 }
 
 ROOM_FILTER_SCHEMA = {
-    "additionalProperties": False,
+    "additionalProperties": True,  # Allow new fields for forward compatibility
     "type": "object",
     "properties": {
         "not_rooms": {"$ref": "#/definitions/room_id_array"},
@@ -77,7 +77,7 @@ ROOM_FILTER_SCHEMA = {
 }
 
 ROOM_EVENT_FILTER_SCHEMA = {
-    "additionalProperties": False,
+    "additionalProperties": True,  # Allow new fields for forward compatibility
     "type": "object",
     "properties": {
         "limit": {"type": "number"},
@@ -143,7 +143,7 @@ USER_FILTER_SCHEMA = {
             },
         },
     },
-    "additionalProperties": False,
+    "additionalProperties": True,  # Allow new fields for forward compatibility
 }
 
 
diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py
index a82c4eed86..d5524d296e 100644
--- a/tests/api/test_filtering.py
+++ b/tests/api/test_filtering.py
@@ -46,19 +46,36 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         self.datastore = hs.get_datastores().main
 
     def test_errors_on_invalid_filters(self):
+        # See USER_FILTER_SCHEMA for the filter schema.
         invalid_filters = [
-            {"boom": {}},
+            # `account_data` must be a dictionary
             {"account_data": "Hello World"},
+            # `event_fields` entries must not contain backslashes
             {"event_fields": [r"\\foo"]},
-            {"room": {"timeline": {"limit": 0}, "state": {"not_bars": ["*"]}}},
+            # `event_format` must be "client" or "federation"
             {"event_format": "other"},
+            # `not_rooms` must contain valid room IDs
             {"room": {"not_rooms": ["#foo:pik-test"]}},
+            # `senders` must contain valid user IDs
             {"presence": {"senders": ["@bar;pik.test.com"]}},
         ]
         for filter in invalid_filters:
             with self.assertRaises(SynapseError):
                 self.filtering.check_valid_filter(filter)
 
+    def test_ignores_unknown_filter_fields(self):
+        # For forward compatibility, we must ignore unknown filter fields.
+        # See USER_FILTER_SCHEMA for the filter schema.
+        filters = [
+            {"org.matrix.msc9999.future_option": True},
+            {"presence": {"org.matrix.msc9999.future_option": True}},
+            {"room": {"org.matrix.msc9999.future_option": True}},
+            {"room": {"timeline": {"org.matrix.msc9999.future_option": True}}},
+        ]
+        for filter in filters:
+            self.filtering.check_valid_filter(filter)
+            # Must not raise.
+
     def test_valid_filters(self):
         valid_filters = [
             {
-- 
cgit 1.5.1


From 2193513346054769080dd8a07586bed652acae60 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 7 Nov 2022 14:28:00 +0000
Subject: Fix background update table-scanning `events` (#14374)

When this background update did its last batch, it would try to update all the
events that had been inserted since the bgupdate started, which could cause a
table-scan. Make sure we limit the update correctly.
---
 changelog.d/14374.bugfix                            |  1 +
 synapse/storage/databases/main/events_bg_updates.py | 16 ++++++++--------
 2 files changed, 9 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/14374.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14374.bugfix b/changelog.d/14374.bugfix
new file mode 100644
index 0000000000..8366cfbf8a
--- /dev/null
+++ b/changelog.d/14374.bugfix
@@ -0,0 +1 @@
+Fix a background database update, introduced in Synapse 1.64.0, which could cause poor database performance.
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 6e8aeed7b4..9e31798ab1 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -1435,16 +1435,16 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
                 ),
             )
 
-            endpoint = None
             row = txn.fetchone()
             if row:
                 endpoint = row[0]
+            else:
+                # if the query didn't return a row, we must be almost done. We just
+                # need to go up to the recorded max_stream_ordering.
+                endpoint = max_stream_ordering_inclusive
 
-            where_clause = "stream_ordering > ?"
-            args = [min_stream_ordering_exclusive]
-            if endpoint:
-                where_clause += " AND stream_ordering <= ?"
-                args.append(endpoint)
+            where_clause = "stream_ordering > ? AND stream_ordering <= ?"
+            args = [min_stream_ordering_exclusive, endpoint]
 
             # now do the updates.
             txn.execute(
@@ -1458,13 +1458,13 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             )
 
             logger.info(
-                "populated new `events` columns up to %s/%i: updated %i rows",
+                "populated new `events` columns up to %i/%i: updated %i rows",
                 endpoint,
                 max_stream_ordering_inclusive,
                 txn.rowcount,
             )
 
-            if endpoint is None:
+            if endpoint >= max_stream_ordering_inclusive:
                 # we're done
                 return True
 
-- 
cgit 1.5.1


From 7894251bcea7714b47e3849e509ea717bb18e9f5 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 7 Nov 2022 13:38:50 -0800
Subject: Correctly create power level event during initial room creation
 (#14361)

---
 changelog.d/14361.bugfix        |  1 +
 synapse/handlers/room.py        | 25 +++++++++++++++++++++++--
 tests/rest/client/test_rooms.py |  4 ++--
 3 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14361.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14361.bugfix b/changelog.d/14361.bugfix
new file mode 100644
index 0000000000..33ba1d92af
--- /dev/null
+++ b/changelog.d/14361.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in v1.71.0rc1 where the power level event was incorrectly created during initial room creation.
\ No newline at end of file
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index f10cfca073..66a50bca6e 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1080,6 +1080,19 @@ class RoomCreationHandler:
             for_batch: bool,
             **kwargs: Any,
         ) -> Tuple[EventBase, synapse.events.snapshot.EventContext]:
+            """
+            Creates an event and associated event context.
+            Args:
+                etype: the type of event to be created
+                content: content of the event
+                for_batch: whether the event is being created for batch persisting. If
+                bool for_batch is true, this will create an event using the prev_event_ids,
+                and will create an event context for the event using the parameters state_map
+                and current_state_group, thus these parameters must be provided in this
+                case if for_batch is True. The subsequently created event and context
+                are suitable for being batched up and bulk persisted to the database
+                with other similarly created events.
+            """
             nonlocal depth
             nonlocal prev_event
 
@@ -1139,13 +1152,21 @@ class RoomCreationHandler:
         depth += 1
         state_map[(EventTypes.Member, creator.user.to_string())] = member_event_id
 
+        # we need the state group of the membership event as it is the current state group
+        event_to_state = (
+            await self._storage_controllers.state.get_state_group_for_events(
+                [member_event_id]
+            )
+        )
+        current_state_group = event_to_state[member_event_id]
+
         events_to_send = []
         # We treat the power levels override specially as this needs to be one
         # of the first events that get sent into a room.
         pl_content = initial_state.pop((EventTypes.PowerLevels, ""), None)
         if pl_content is not None:
             power_event, power_context = await create_event(
-                EventTypes.PowerLevels, pl_content, False
+                EventTypes.PowerLevels, pl_content, True
             )
             current_state_group = power_context._state_group
             events_to_send.append((power_event, power_context))
@@ -1194,7 +1215,7 @@ class RoomCreationHandler:
             pl_event, pl_context = await create_event(
                 EventTypes.PowerLevels,
                 power_level_content,
-                False,
+                True,
             )
             current_state_group = pl_context._state_group
             events_to_send.append((pl_event, pl_context))
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 1084d4ad9d..e919e089cb 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -715,7 +715,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(34, channel.resource_usage.db_txn_count)
+        self.assertEqual(33, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -728,7 +728,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(37, channel.resource_usage.db_txn_count)
+        self.assertEqual(36, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
-- 
cgit 1.5.1


From a5fcdea090c2396c30dd07c357ce4d9c90004c34 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 8 Nov 2022 17:17:13 +0000
Subject: Remove support for PostgreSQL 10 (#14392)

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 .ci/scripts/calculate_jobs.py       |  2 +-
 .github/workflows/tests.yml         |  2 +-
 changelog.d/14392.removal           |  1 +
 docs/upgrade.md                     | 10 ++++++++++
 synapse/storage/engines/postgres.py |  4 ++--
 5 files changed, 15 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14392.removal

(limited to 'synapse')

diff --git a/.ci/scripts/calculate_jobs.py b/.ci/scripts/calculate_jobs.py
index c53d4d5ff1..b48174bea2 100755
--- a/.ci/scripts/calculate_jobs.py
+++ b/.ci/scripts/calculate_jobs.py
@@ -54,7 +54,7 @@ trial_postgres_tests = [
     {
         "python-version": "3.7",
         "database": "postgres",
-        "postgres-version": "10",
+        "postgres-version": "11",
         "extras": "all",
     }
 ]
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index fea33abd12..2bc237a0ba 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -409,7 +409,7 @@ jobs:
       matrix:
         include:
           - python-version: "3.7"
-            postgres-version: "10"
+            postgres-version: "11"
 
           - python-version: "3.11"
             postgres-version: "14"
diff --git a/changelog.d/14392.removal b/changelog.d/14392.removal
new file mode 100644
index 0000000000..e96b3de2bd
--- /dev/null
+++ b/changelog.d/14392.removal
@@ -0,0 +1 @@
+Remove support for PostgreSQL 10.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 41b06cc253..2aa353e496 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,16 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.72.0
+
+## Dropping support for PostgreSQL 10
+
+In line with our [deprecation policy](deprecation_policy.md), we've dropped
+support for PostgreSQL 10, as it is no longer supported upstream.
+
+This release of Synapse requires PostgreSQL 11+.
+
+
 # Upgrading to v1.71.0
 
 ## Removal of the `generate_short_term_login_token` module API method
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 9bf74bbf59..0c4fd88914 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -81,8 +81,8 @@ class PostgresEngine(
         allow_unsafe_locale = self.config.get("allow_unsafe_locale", False)
 
         # Are we on a supported PostgreSQL version?
-        if not allow_outdated_version and self._version < 100000:
-            raise RuntimeError("Synapse requires PostgreSQL 10 or above.")
+        if not allow_outdated_version and self._version < 110000:
+            raise RuntimeError("Synapse requires PostgreSQL 11 or above.")
 
         with db_conn.cursor() as txn:
             txn.execute("SHOW SERVER_ENCODING")
-- 
cgit 1.5.1


From e9a4343cb2daa55503bb2a2d1431d83bf9773e68 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 9 Nov 2022 09:55:34 -0500
Subject: Drop support for Postgres 10 in full text search code. (#14397)

---
 changelog.d/14397.removal                |  1 +
 synapse/storage/databases/main/search.py | 50 +++++++++++------------
 synapse/storage/engines/postgres.py      | 16 --------
 tests/storage/test_room_search.py        | 69 ++++++++------------------------
 4 files changed, 41 insertions(+), 95 deletions(-)
 create mode 100644 changelog.d/14397.removal

(limited to 'synapse')

diff --git a/changelog.d/14397.removal b/changelog.d/14397.removal
new file mode 100644
index 0000000000..e96b3de2bd
--- /dev/null
+++ b/changelog.d/14397.removal
@@ -0,0 +1 @@
+Remove support for PostgreSQL 10.
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index e9588d1755..3fe433f66c 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -463,18 +463,17 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
-            tsquery_func = self.database_engine.tsquery_func
-            sql = f"""
-            SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,
+            sql = """
+            SELECT ts_rank_cd(vector, websearch_to_tsquery('english', ?)) AS rank,
             room_id, event_id
             FROM event_search
-            WHERE vector @@  {tsquery_func}('english', ?)
+            WHERE vector @@  websearch_to_tsquery('english', ?)
             """
             args = [search_query, search_query] + args
 
-            count_sql = f"""
+            count_sql = """
             SELECT room_id, count(*) as count FROM event_search
-            WHERE vector @@ {tsquery_func}('english', ?)
+            WHERE vector @@ websearch_to_tsquery('english', ?)
             """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
@@ -523,9 +522,7 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         highlights = None
         if isinstance(self.database_engine, PostgresEngine):
-            highlights = await self._find_highlights_in_postgres(
-                search_query, events, tsquery_func
-            )
+            highlights = await self._find_highlights_in_postgres(search_query, events)
 
         count_sql += " GROUP BY room_id"
 
@@ -604,18 +601,17 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         if isinstance(self.database_engine, PostgresEngine):
             search_query = search_term
-            tsquery_func = self.database_engine.tsquery_func
-            sql = f"""
-            SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,
+            sql = """
+            SELECT ts_rank_cd(vector, websearch_to_tsquery('english', ?)) as rank,
             origin_server_ts, stream_ordering, room_id, event_id
             FROM event_search
-            WHERE vector @@ {tsquery_func}('english', ?) AND
+            WHERE vector @@ websearch_to_tsquery('english', ?) AND
             """
             args = [search_query, search_query] + args
 
-            count_sql = f"""
+            count_sql = """
             SELECT room_id, count(*) as count FROM event_search
-            WHERE vector @@ {tsquery_func}('english', ?) AND
+            WHERE vector @@ websearch_to_tsquery('english', ?) AND
             """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
@@ -686,9 +682,7 @@ class SearchStore(SearchBackgroundUpdateStore):
 
         highlights = None
         if isinstance(self.database_engine, PostgresEngine):
-            highlights = await self._find_highlights_in_postgres(
-                search_query, events, tsquery_func
-            )
+            highlights = await self._find_highlights_in_postgres(search_query, events)
 
         count_sql += " GROUP BY room_id"
 
@@ -714,7 +708,7 @@ class SearchStore(SearchBackgroundUpdateStore):
         }
 
     async def _find_highlights_in_postgres(
-        self, search_query: str, events: List[EventBase], tsquery_func: str
+        self, search_query: str, events: List[EventBase]
     ) -> Set[str]:
         """Given a list of events and a search term, return a list of words
         that match from the content of the event.
@@ -725,7 +719,6 @@ class SearchStore(SearchBackgroundUpdateStore):
         Args:
             search_query
             events: A list of events
-            tsquery_func: The tsquery_* function to use when making queries
 
         Returns:
             A set of strings.
@@ -758,13 +751,16 @@ class SearchStore(SearchBackgroundUpdateStore):
                 while stop_sel in value:
                     stop_sel += ">"
 
-                query = f"SELECT ts_headline(?, {tsquery_func}('english', ?), %s)" % (
-                    _to_postgres_options(
-                        {
-                            "StartSel": start_sel,
-                            "StopSel": stop_sel,
-                            "MaxFragments": "50",
-                        }
+                query = (
+                    "SELECT ts_headline(?, websearch_to_tsquery('english', ?), %s)"
+                    % (
+                        _to_postgres_options(
+                            {
+                                "StartSel": start_sel,
+                                "StopSel": stop_sel,
+                                "MaxFragments": "50",
+                            }
+                        )
                     )
                 )
                 txn.execute(query, (value, search_query))
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 0c4fd88914..719a517336 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -170,22 +170,6 @@ class PostgresEngine(
         """Do we support the `RETURNING` clause in insert/update/delete?"""
         return True
 
-    @property
-    def tsquery_func(self) -> str:
-        """
-        Selects a tsquery_* func to use.
-
-        Ref: https://www.postgresql.org/docs/current/textsearch-controls.html
-
-        Returns:
-            The function name.
-        """
-        # Postgres 11 added support for websearch_to_tsquery.
-        assert self._version is not None
-        if self._version >= 110000:
-            return "websearch_to_tsquery"
-        return "plainto_tsquery"
-
     def is_deadlock(self, error: Exception) -> bool:
         if isinstance(error, psycopg2.DatabaseError):
             # https://www.postgresql.org/docs/current/static/errcodes-appendix.html
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index 868b5bee84..ef850daa73 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Tuple, Union
+from typing import List, Tuple
 from unittest.case import SkipTest
-from unittest.mock import PropertyMock, patch
 
 from twisted.test.proto_helpers import MemoryReactor
 
@@ -220,10 +219,8 @@ class MessageSearchTest(HomeserverTestCase):
 
     PHRASE = "the quick brown fox jumps over the lazy dog"
 
-    # Each entry is a search query, followed by either a boolean of whether it is
-    # in the phrase OR a tuple of booleans: whether it matches using websearch
-    # and using plain search.
-    COMMON_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
+    # Each entry is a search query, followed by a boolean of whether it is in the phrase.
+    COMMON_CASES = [
         ("nope", False),
         ("brown", True),
         ("quick brown", True),
@@ -231,13 +228,13 @@ class MessageSearchTest(HomeserverTestCase):
         ("quick \t brown", True),
         ("jump", True),
         ("brown nope", False),
-        ('"brown quick"', (False, True)),
+        ('"brown quick"', False),
         ('"jumps over"', True),
-        ('"quick fox"', (False, True)),
+        ('"quick fox"', False),
         ("nope OR doublenope", False),
-        ("furphy OR fox", (True, False)),
-        ("fox -nope", (True, False)),
-        ("fox -brown", (False, True)),
+        ("furphy OR fox", True),
+        ("fox -nope", True),
+        ("fox -brown", False),
         ('"fox" quick', True),
         ('"quick brown', True),
         ('" quick "', True),
@@ -246,11 +243,11 @@ class MessageSearchTest(HomeserverTestCase):
     # TODO Test non-ASCII cases.
 
     # Case that fail on SQLite.
-    POSTGRES_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
+    POSTGRES_CASES = [
         # SQLite treats NOT as a binary operator.
-        ("- fox", (False, True)),
-        ("- nope", (True, False)),
-        ('"-fox quick', (False, True)),
+        ("- fox", False),
+        ("- nope", True),
+        ('"-fox quick', False),
         # PostgreSQL skips stop words.
         ('"the quick brown"', True),
         ('"over lazy"', True),
@@ -275,7 +272,7 @@ class MessageSearchTest(HomeserverTestCase):
         if isinstance(main_store.database_engine, PostgresEngine):
             assert main_store.database_engine._version is not None
             found = main_store.database_engine._version < 140000
-        self.COMMON_CASES.append(('"fox quick', (found, True)))
+        self.COMMON_CASES.append(('"fox quick', found))
 
     def test_tokenize_query(self) -> None:
         """Test the custom logic to tokenize a user's query."""
@@ -315,16 +312,10 @@ class MessageSearchTest(HomeserverTestCase):
             )
 
     def _check_test_cases(
-        self,
-        store: DataStore,
-        cases: List[Tuple[str, Union[bool, Tuple[bool, bool]]]],
-        index=0,
+        self, store: DataStore, cases: List[Tuple[str, bool]]
     ) -> None:
         # Run all the test cases versus search_msgs
         for query, expect_to_contain in cases:
-            if isinstance(expect_to_contain, tuple):
-                expect_to_contain = expect_to_contain[index]
-
             result = self.get_success(
                 store.search_msgs([self.room_id], query, ["content.body"])
             )
@@ -343,9 +334,6 @@ class MessageSearchTest(HomeserverTestCase):
 
         # Run them again versus search_rooms
         for query, expect_to_contain in cases:
-            if isinstance(expect_to_contain, tuple):
-                expect_to_contain = expect_to_contain[index]
-
             result = self.get_success(
                 store.search_rooms([self.room_id], query, ["content.body"], 10)
             )
@@ -366,38 +354,15 @@ class MessageSearchTest(HomeserverTestCase):
         """
         Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery.
         This test is skipped unless the postgres instance supports websearch_to_tsquery.
-        """
-
-        store = self.hs.get_datastores().main
-        if not isinstance(store.database_engine, PostgresEngine):
-            raise SkipTest("Test only applies when postgres is used as the database")
-
-        if store.database_engine.tsquery_func != "websearch_to_tsquery":
-            raise SkipTest(
-                "Test only applies when postgres supporting websearch_to_tsquery is used as the database"
-            )
 
-        self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES, index=0)
-
-    def test_postgres_non_web_search_for_phrase(self):
-        """
-        Test postgres searching for phrases without using web search, which is used when websearch_to_tsquery isn't
-        supported by the current postgres version.
+        See https://www.postgresql.org/docs/current/textsearch-controls.html
         """
 
         store = self.hs.get_datastores().main
         if not isinstance(store.database_engine, PostgresEngine):
             raise SkipTest("Test only applies when postgres is used as the database")
 
-        # Patch supports_websearch_to_tsquery to always return False to ensure we're testing the plainto_tsquery path.
-        with patch(
-            "synapse.storage.engines.postgres.PostgresEngine.tsquery_func",
-            new_callable=PropertyMock,
-        ) as supports_websearch_to_tsquery:
-            supports_websearch_to_tsquery.return_value = "plainto_tsquery"
-            self._check_test_cases(
-                store, self.COMMON_CASES + self.POSTGRES_CASES, index=1
-            )
+        self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES)
 
     def test_sqlite_search(self):
         """
@@ -407,4 +372,4 @@ class MessageSearchTest(HomeserverTestCase):
         if not isinstance(store.database_engine, Sqlite3Engine):
             raise SkipTest("Test only applies when sqlite is used as the database")
 
-        self._check_test_cases(store, self.COMMON_CASES, index=0)
+        self._check_test_cases(store, self.COMMON_CASES)
-- 
cgit 1.5.1


From d10a85ec9eac6f31aa82a5f07d74e5914b18b320 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 10 Nov 2022 12:17:46 +0000
Subject: Quieter logging for stateres failure at missing prev events (#14346)

---
 changelog.d/14346.misc               | 1 +
 synapse/handlers/federation_event.py | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14346.misc

(limited to 'synapse')

diff --git a/changelog.d/14346.misc b/changelog.d/14346.misc
new file mode 100644
index 0000000000..9833b0733a
--- /dev/null
+++ b/changelog.d/14346.misc
@@ -0,0 +1 @@
+Concisely log a failure to resolve state due to missing `prev_events`.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 9ca5df7c78..f7223b03c3 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1065,10 +1065,9 @@ class FederationEventHandler:
                 state_res_store=StateResolutionStore(self._store),
             )
 
-        except Exception:
+        except Exception as e:
             logger.warning(
-                "Error attempting to resolve state at missing prev_events",
-                exc_info=True,
+                "Error attempting to resolve state at missing prev_events: %s", e
             )
             raise FederationError(
                 "ERROR",
-- 
cgit 1.5.1


From b2c2b030798d0e74d3bf1afb4726465b53620638 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 10 Nov 2022 19:02:27 +0000
Subject: Fix PostgreSQL sometimes using table scans for `event_search`
 (#14409)

PostgreSQL may underestimate the number of distinct `room_id`s in
`event_search`, which can cause it to use table scans for queries for
multiple rooms.

Fix this by setting `n_distinct` on the column.

Resolves #14402.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14409.bugfix                           |  1 +
 .../11event_search_room_id_n_distinct.sql.postgres | 33 ++++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 changelog.d/14409.bugfix
 create mode 100644 synapse/storage/schema/main/delta/73/11event_search_room_id_n_distinct.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/14409.bugfix b/changelog.d/14409.bugfix
new file mode 100644
index 0000000000..f720700653
--- /dev/null
+++ b/changelog.d/14409.bugfix
@@ -0,0 +1 @@
+Fix PostgreSQL sometimes using table scans for queries against the `event_search` table, taking a long time and a large amount of IO.
diff --git a/synapse/storage/schema/main/delta/73/11event_search_room_id_n_distinct.sql.postgres b/synapse/storage/schema/main/delta/73/11event_search_room_id_n_distinct.sql.postgres
new file mode 100644
index 0000000000..93cdaefca1
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/11event_search_room_id_n_distinct.sql.postgres
@@ -0,0 +1,33 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- By default the postgres statistics collector massively underestimates the
+-- number of distinct rooms in `event_search`, which can cause postgres to use
+-- table scans for queries for multiple rooms.
+--
+-- To work around this we can manually tell postgres the number of distinct rooms
+-- by setting `n_distinct` (a negative value here is the number of distinct values
+-- divided by the number of rows, so -0.01 means on average there are 100 rows per
+-- distinct value). We don't need a particularly accurate number here, as a) we just
+-- want it to always use index scans and b) our estimate is going to be better than the
+-- one made by the statistics collector.
+
+ALTER TABLE event_search ALTER COLUMN room_id SET (n_distinct = -0.01);
+
+-- Ideally we'd do an `ANALYZE event_search (room_id)` here so that
+-- the above gets picked up immediately, but that can take a bit of time so we
+-- rely on the autovacuum eventually getting run and doing that in the
+-- background for us.
-- 
cgit 1.5.1


From 13ca8bb2fc05d338ccf62e6f8d1cbf5021d935ba Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 10 Nov 2022 15:33:34 -0500
Subject: Remove duplicated code to evict entries. (#14410)

This code was factored out to a method, but also left in-place.

Calling this twice in a row makes no sense: the first call will reduce
the size appropriately, but the loop will immediately exit since the
cache size was already reduced.
---
 changelog.d/14410.misc                     |  1 +
 synapse/util/caches/stream_change_cache.py | 11 ++---------
 2 files changed, 3 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14410.misc

(limited to 'synapse')

diff --git a/changelog.d/14410.misc b/changelog.d/14410.misc
new file mode 100644
index 0000000000..f085a8bfb2
--- /dev/null
+++ b/changelog.d/14410.misc
@@ -0,0 +1 @@
+Remove unreachable code.
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index 330709b8b7..666f4b6895 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -72,7 +72,7 @@ class StreamChangeCache:
         items from the cache.
 
         Returns:
-            bool: Whether the cache changed size or not.
+            Whether the cache changed size or not.
         """
         new_size = math.floor(self._original_max_size * factor)
         if new_size != self._max_size:
@@ -188,14 +188,8 @@ class StreamChangeCache:
         self._entity_to_key[entity] = stream_pos
         self._evict()
 
-        # if the cache is too big, remove entries
-        while len(self._cache) > self._max_size:
-            k, r = self._cache.popitem(0)
-            self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos)
-            for entity in r:
-                del self._entity_to_key[entity]
-
     def _evict(self) -> None:
+        # if the cache is too big, remove entries
         while len(self._cache) > self._max_size:
             k, r = self._cache.popitem(0)
             self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos)
@@ -203,7 +197,6 @@ class StreamChangeCache:
                 self._entity_to_key.pop(entity, None)
 
     def get_max_pos_of_last_change(self, entity: EntityType) -> int:
-
         """Returns an upper bound of the stream id of the last change to an
         entity.
         """
-- 
cgit 1.5.1


From 3a4f80f8c6f39c5549c56c044e10b35064d8d22f Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Fri, 11 Nov 2022 10:51:49 +0000
Subject: Merge/remove `Slaved*` stores into `WorkerStores` (#14375)

---
 changelog.d/14375.misc                          |  1 +
 synapse/app/admin_cmd.py                        | 36 ++++++++---
 synapse/app/generic_worker.py                   | 44 ++++++++++----
 synapse/replication/slave/storage/devices.py    | 79 ------------------------
 synapse/replication/slave/storage/events.py     | 79 ------------------------
 synapse/replication/slave/storage/filtering.py  | 35 -----------
 synapse/replication/slave/storage/keys.py       | 20 ------
 synapse/replication/slave/storage/push_rule.py  | 35 -----------
 synapse/replication/slave/storage/pushers.py    | 47 --------------
 synapse/storage/databases/main/__init__.py      | 35 -----------
 synapse/storage/databases/main/devices.py       | 81 ++++++++++++++++++++++---
 synapse/storage/databases/main/events_worker.py | 16 +++++
 synapse/storage/databases/main/filtering.py     |  4 +-
 synapse/storage/databases/main/push_rule.py     | 19 ++++--
 synapse/storage/databases/main/pusher.py        | 41 +++++++++++--
 synapse/storage/databases/main/stream.py        |  1 +
 tests/replication/slave/storage/test_events.py  |  6 +-
 17 files changed, 202 insertions(+), 377 deletions(-)
 create mode 100644 changelog.d/14375.misc
 delete mode 100644 synapse/replication/slave/storage/devices.py
 delete mode 100644 synapse/replication/slave/storage/events.py
 delete mode 100644 synapse/replication/slave/storage/filtering.py
 delete mode 100644 synapse/replication/slave/storage/keys.py
 delete mode 100644 synapse/replication/slave/storage/push_rule.py
 delete mode 100644 synapse/replication/slave/storage/pushers.py

(limited to 'synapse')

diff --git a/changelog.d/14375.misc b/changelog.d/14375.misc
new file mode 100644
index 0000000000..d0369b9b8c
--- /dev/null
+++ b/changelog.d/14375.misc
@@ -0,0 +1 @@
+Cleanup old worker datastore classes. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 3c8c00ea5b..165d1c5db0 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -28,10 +28,6 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.logger import setup_logging
 from synapse.events import EventBase
 from synapse.handlers.admin import ExfiltrationWriter
-from synapse.replication.slave.storage.devices import SlavedDeviceStore
-from synapse.replication.slave.storage.events import SlavedEventStore
-from synapse.replication.slave.storage.filtering import SlavedFilteringStore
-from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
 from synapse.server import HomeServer
 from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
 from synapse.storage.databases.main.account_data import AccountDataWorkerStore
@@ -40,10 +36,24 @@ from synapse.storage.databases.main.appservice import (
     ApplicationServiceWorkerStore,
 )
 from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
+from synapse.storage.databases.main.devices import DeviceWorkerStore
+from synapse.storage.databases.main.event_federation import EventFederationWorkerStore
+from synapse.storage.databases.main.event_push_actions import (
+    EventPushActionsWorkerStore,
+)
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.databases.main.filtering import FilteringWorkerStore
+from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.registration import RegistrationWorkerStore
+from synapse.storage.databases.main.relations import RelationsWorkerStore
 from synapse.storage.databases.main.room import RoomWorkerStore
+from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
+from synapse.storage.databases.main.signatures import SignatureWorkerStore
+from synapse.storage.databases.main.state import StateGroupWorkerStore
+from synapse.storage.databases.main.stream import StreamWorkerStore
 from synapse.storage.databases.main.tags import TagsWorkerStore
+from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
 from synapse.types import StateMap
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.logcontext import LoggingContext
@@ -52,17 +62,25 @@ logger = logging.getLogger("synapse.app.admin_cmd")
 
 
 class AdminCmdSlavedStore(
-    SlavedFilteringStore,
-    SlavedPushRuleStore,
-    SlavedEventStore,
-    SlavedDeviceStore,
+    FilteringWorkerStore,
+    DeviceWorkerStore,
     TagsWorkerStore,
     DeviceInboxWorkerStore,
     AccountDataWorkerStore,
+    PushRulesWorkerStore,
     ApplicationServiceTransactionWorkerStore,
     ApplicationServiceWorkerStore,
-    RegistrationWorkerStore,
+    RoomMemberWorkerStore,
+    RelationsWorkerStore,
+    EventFederationWorkerStore,
+    EventPushActionsWorkerStore,
+    StateGroupWorkerStore,
+    SignatureWorkerStore,
+    UserErasureWorkerStore,
     ReceiptsWorkerStore,
+    StreamWorkerStore,
+    EventsWorkerStore,
+    RegistrationWorkerStore,
     RoomWorkerStore,
 ):
     def __init__(
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index cb5892f041..51446b49cd 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -48,12 +48,6 @@ from synapse.http.site import SynapseRequest, SynapseSite
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
-from synapse.replication.slave.storage.devices import SlavedDeviceStore
-from synapse.replication.slave.storage.events import SlavedEventStore
-from synapse.replication.slave.storage.filtering import SlavedFilteringStore
-from synapse.replication.slave.storage.keys import SlavedKeyStore
-from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore
-from synapse.replication.slave.storage.pushers import SlavedPusherStore
 from synapse.rest.admin import register_servlets_for_media_repo
 from synapse.rest.client import (
     account_data,
@@ -101,8 +95,16 @@ from synapse.storage.databases.main.appservice import (
 from synapse.storage.databases.main.censor_events import CensorEventsStore
 from synapse.storage.databases.main.client_ips import ClientIpWorkerStore
 from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
+from synapse.storage.databases.main.devices import DeviceWorkerStore
 from synapse.storage.databases.main.directory import DirectoryWorkerStore
 from synapse.storage.databases.main.e2e_room_keys import EndToEndRoomKeyStore
+from synapse.storage.databases.main.event_federation import EventFederationWorkerStore
+from synapse.storage.databases.main.event_push_actions import (
+    EventPushActionsWorkerStore,
+)
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.databases.main.filtering import FilteringWorkerStore
+from synapse.storage.databases.main.keys import KeyStore
 from synapse.storage.databases.main.lock import LockStore
 from synapse.storage.databases.main.media_repository import MediaRepositoryStore
 from synapse.storage.databases.main.metrics import ServerMetricsStore
@@ -111,17 +113,25 @@ from synapse.storage.databases.main.monthly_active_users import (
 )
 from synapse.storage.databases.main.presence import PresenceStore
 from synapse.storage.databases.main.profile import ProfileWorkerStore
+from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
+from synapse.storage.databases.main.pusher import PusherWorkerStore
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.registration import RegistrationWorkerStore
+from synapse.storage.databases.main.relations import RelationsWorkerStore
 from synapse.storage.databases.main.room import RoomWorkerStore
 from synapse.storage.databases.main.room_batch import RoomBatchStore
+from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
 from synapse.storage.databases.main.search import SearchStore
 from synapse.storage.databases.main.session import SessionStore
+from synapse.storage.databases.main.signatures import SignatureWorkerStore
+from synapse.storage.databases.main.state import StateGroupWorkerStore
 from synapse.storage.databases.main.stats import StatsStore
+from synapse.storage.databases.main.stream import StreamWorkerStore
 from synapse.storage.databases.main.tags import TagsWorkerStore
 from synapse.storage.databases.main.transactions import TransactionWorkerStore
 from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore
 from synapse.storage.databases.main.user_directory import UserDirectoryStore
+from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
 from synapse.types import JsonDict
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.httpresourcetree import create_resource_tree
@@ -232,26 +242,36 @@ class GenericWorkerSlavedStore(
     EndToEndRoomKeyStore,
     PresenceStore,
     DeviceInboxWorkerStore,
-    SlavedDeviceStore,
-    SlavedPushRuleStore,
+    DeviceWorkerStore,
     TagsWorkerStore,
     AccountDataWorkerStore,
-    SlavedPusherStore,
     CensorEventsStore,
     ClientIpWorkerStore,
-    SlavedEventStore,
-    SlavedKeyStore,
+    # KeyStore isn't really safe to use from a worker, but for now we do so and hope that
+    # the races it creates aren't too bad.
+    KeyStore,
     RoomWorkerStore,
     RoomBatchStore,
     DirectoryWorkerStore,
+    PushRulesWorkerStore,
     ApplicationServiceTransactionWorkerStore,
     ApplicationServiceWorkerStore,
     ProfileWorkerStore,
-    SlavedFilteringStore,
+    FilteringWorkerStore,
     MonthlyActiveUsersWorkerStore,
     MediaRepositoryStore,
     ServerMetricsStore,
+    PusherWorkerStore,
+    RoomMemberWorkerStore,
+    RelationsWorkerStore,
+    EventFederationWorkerStore,
+    EventPushActionsWorkerStore,
+    StateGroupWorkerStore,
+    SignatureWorkerStore,
+    UserErasureWorkerStore,
     ReceiptsWorkerStore,
+    StreamWorkerStore,
+    EventsWorkerStore,
     RegistrationWorkerStore,
     SearchStore,
     TransactionWorkerStore,
diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py
deleted file mode 100644
index 6fcade510a..0000000000
--- a/synapse/replication/slave/storage/devices.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import TYPE_CHECKING, Any, Iterable
-
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
-from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream
-from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
-from synapse.storage.databases.main.devices import DeviceWorkerStore
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-
-class SlavedDeviceStore(DeviceWorkerStore):
-    def __init__(
-        self,
-        database: DatabasePool,
-        db_conn: LoggingDatabaseConnection,
-        hs: "HomeServer",
-    ):
-        self.hs = hs
-
-        self._device_list_id_gen = SlavedIdTracker(
-            db_conn,
-            "device_lists_stream",
-            "stream_id",
-            extra_tables=[
-                ("user_signature_stream", "stream_id"),
-                ("device_lists_outbound_pokes", "stream_id"),
-                ("device_lists_changes_in_room", "stream_id"),
-            ],
-        )
-
-        super().__init__(database, db_conn, hs)
-
-    def get_device_stream_token(self) -> int:
-        return self._device_list_id_gen.get_current_token()
-
-    def process_replication_rows(
-        self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
-    ) -> None:
-        if stream_name == DeviceListsStream.NAME:
-            self._device_list_id_gen.advance(instance_name, token)
-            self._invalidate_caches_for_devices(token, rows)
-        elif stream_name == UserSignatureStream.NAME:
-            self._device_list_id_gen.advance(instance_name, token)
-            for row in rows:
-                self._user_signature_stream_cache.entity_has_changed(row.user_id, token)
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
-
-    def _invalidate_caches_for_devices(
-        self, token: int, rows: Iterable[DeviceListsStream.DeviceListsStreamRow]
-    ) -> None:
-        for row in rows:
-            # The entities are either user IDs (starting with '@') whose devices
-            # have changed, or remote servers that we need to tell about
-            # changes.
-            if row.entity.startswith("@"):
-                self._device_list_stream_cache.entity_has_changed(row.entity, token)
-                self.get_cached_devices_for_user.invalidate((row.entity,))
-                self._get_cached_user_device.invalidate((row.entity,))
-                self.get_device_list_last_stream_id_for_remote.invalidate((row.entity,))
-
-            else:
-                self._device_list_federation_stream_cache.entity_has_changed(
-                    row.entity, token
-                )
diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py
deleted file mode 100644
index fe47778cb1..0000000000
--- a/synapse/replication/slave/storage/events.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from typing import TYPE_CHECKING
-
-from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
-from synapse.storage.databases.main.event_federation import EventFederationWorkerStore
-from synapse.storage.databases.main.event_push_actions import (
-    EventPushActionsWorkerStore,
-)
-from synapse.storage.databases.main.events_worker import EventsWorkerStore
-from synapse.storage.databases.main.relations import RelationsWorkerStore
-from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
-from synapse.storage.databases.main.signatures import SignatureWorkerStore
-from synapse.storage.databases.main.state import StateGroupWorkerStore
-from synapse.storage.databases.main.stream import StreamWorkerStore
-from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
-from synapse.util.caches.stream_change_cache import StreamChangeCache
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-# So, um, we want to borrow a load of functions intended for reading from
-# a DataStore, but we don't want to take functions that either write to the
-# DataStore or are cached and don't have cache invalidation logic.
-#
-# Rather than write duplicate versions of those functions, or lift them to
-# a common base class, we going to grab the underlying __func__ object from
-# the method descriptor on the DataStore and chuck them into our class.
-
-
-class SlavedEventStore(
-    EventFederationWorkerStore,
-    RoomMemberWorkerStore,
-    EventPushActionsWorkerStore,
-    StreamWorkerStore,
-    StateGroupWorkerStore,
-    SignatureWorkerStore,
-    EventsWorkerStore,
-    UserErasureWorkerStore,
-    RelationsWorkerStore,
-):
-    def __init__(
-        self,
-        database: DatabasePool,
-        db_conn: LoggingDatabaseConnection,
-        hs: "HomeServer",
-    ):
-        super().__init__(database, db_conn, hs)
-
-        events_max = self._stream_id_gen.get_current_token()
-        curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(
-            db_conn,
-            "current_state_delta_stream",
-            entity_column="room_id",
-            stream_column="stream_id",
-            max_value=events_max,  # As we share the stream id with events token
-            limit=1000,
-        )
-        self._curr_state_delta_stream_cache = StreamChangeCache(
-            "_curr_state_delta_stream_cache",
-            min_curr_state_delta_id,
-            prefilled_cache=curr_state_delta_prefill,
-        )
diff --git a/synapse/replication/slave/storage/filtering.py b/synapse/replication/slave/storage/filtering.py
deleted file mode 100644
index c52679cd60..0000000000
--- a/synapse/replication/slave/storage/filtering.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import TYPE_CHECKING
-
-from synapse.storage._base import SQLBaseStore
-from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
-from synapse.storage.databases.main.filtering import FilteringStore
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-
-class SlavedFilteringStore(SQLBaseStore):
-    def __init__(
-        self,
-        database: DatabasePool,
-        db_conn: LoggingDatabaseConnection,
-        hs: "HomeServer",
-    ):
-        super().__init__(database, db_conn, hs)
-
-    # Filters are immutable so this cache doesn't need to be expired
-    get_user_filter = FilteringStore.__dict__["get_user_filter"]
diff --git a/synapse/replication/slave/storage/keys.py b/synapse/replication/slave/storage/keys.py
deleted file mode 100644
index a00b38c512..0000000000
--- a/synapse/replication/slave/storage/keys.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.storage.databases.main.keys import KeyStore
-
-# KeyStore isn't really safe to use from a worker, but for now we do so and hope that
-# the races it creates aren't too bad.
-
-SlavedKeyStore = KeyStore
diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py
deleted file mode 100644
index 5e65eaf1e0..0000000000
--- a/synapse/replication/slave/storage/push_rule.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright 2015, 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import Any, Iterable
-
-from synapse.replication.tcp.streams import PushRulesStream
-from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
-
-from .events import SlavedEventStore
-
-
-class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore):
-    def get_max_push_rules_stream_id(self) -> int:
-        return self._push_rules_stream_id_gen.get_current_token()
-
-    def process_replication_rows(
-        self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
-    ) -> None:
-        if stream_name == PushRulesStream.NAME:
-            self._push_rules_stream_id_gen.advance(instance_name, token)
-            for row in rows:
-                self.get_push_rules_for_user.invalidate((row.user_id,))
-                self.push_rules_stream_cache.entity_has_changed(row.user_id, token)
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py
deleted file mode 100644
index 44ed20e424..0000000000
--- a/synapse/replication/slave/storage/pushers.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import TYPE_CHECKING, Any, Iterable
-
-from synapse.replication.tcp.streams import PushersStream
-from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
-from synapse.storage.databases.main.pusher import PusherWorkerStore
-
-from ._slaved_id_tracker import SlavedIdTracker
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-
-class SlavedPusherStore(PusherWorkerStore):
-    def __init__(
-        self,
-        database: DatabasePool,
-        db_conn: LoggingDatabaseConnection,
-        hs: "HomeServer",
-    ):
-        super().__init__(database, db_conn, hs)
-        self._pushers_id_gen = SlavedIdTracker(  # type: ignore
-            db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")]
-        )
-
-    def get_pushers_stream_token(self) -> int:
-        return self._pushers_id_gen.get_current_token()
-
-    def process_replication_rows(
-        self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
-    ) -> None:
-        if stream_name == PushersStream.NAME:
-            self._pushers_id_gen.advance(instance_name, token)
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index cfaedf5e0c..0e47592be3 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -26,9 +26,7 @@ from synapse.storage.database import (
 from synapse.storage.databases.main.stats import UserSortOrder
 from synapse.storage.engines import BaseDatabaseEngine
 from synapse.storage.types import Cursor
-from synapse.storage.util.id_generators import StreamIdGenerator
 from synapse.types import JsonDict, get_domain_from_id
-from synapse.util.caches.stream_change_cache import StreamChangeCache
 
 from .account_data import AccountDataStore
 from .appservice import ApplicationServiceStore, ApplicationServiceTransactionStore
@@ -138,41 +136,8 @@ class DataStore(
         self._clock = hs.get_clock()
         self.database_engine = database.engine
 
-        self._device_list_id_gen = StreamIdGenerator(
-            db_conn,
-            "device_lists_stream",
-            "stream_id",
-            extra_tables=[
-                ("user_signature_stream", "stream_id"),
-                ("device_lists_outbound_pokes", "stream_id"),
-                ("device_lists_changes_in_room", "stream_id"),
-            ],
-        )
-
         super().__init__(database, db_conn, hs)
 
-        events_max = self._stream_id_gen.get_current_token()
-        curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(
-            db_conn,
-            "current_state_delta_stream",
-            entity_column="room_id",
-            stream_column="stream_id",
-            max_value=events_max,  # As we share the stream id with events token
-            limit=1000,
-        )
-        self._curr_state_delta_stream_cache = StreamChangeCache(
-            "_curr_state_delta_stream_cache",
-            min_curr_state_delta_id,
-            prefilled_cache=curr_state_delta_prefill,
-        )
-
-        self._stream_order_on_start = self.get_room_max_stream_ordering()
-        self._min_stream_order_on_start = self.get_room_min_stream_ordering()
-
-    def get_device_stream_token(self) -> int:
-        # TODO: shouldn't this be moved to `DeviceWorkerStore`?
-        return self._device_list_id_gen.get_current_token()
-
     async def get_users(self) -> List[JsonDict]:
         """Function to retrieve a list of users in users table.
 
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 979dd4e17e..aa58c2adc3 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import abc
 import logging
 from typing import (
     TYPE_CHECKING,
@@ -39,6 +38,8 @@ from synapse.logging.opentracing import (
     whitelisted_homeserver,
 )
 from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
+from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
@@ -49,6 +50,11 @@ from synapse.storage.database import (
 from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyWorkerStore
 from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
 from synapse.storage.types import Cursor
+from synapse.storage.util.id_generators import (
+    AbstractStreamIdGenerator,
+    AbstractStreamIdTracker,
+    StreamIdGenerator,
+)
 from synapse.types import JsonDict, get_verify_key_from_cross_signing_key
 from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
@@ -80,9 +86,32 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     ):
         super().__init__(database, db_conn, hs)
 
+        if hs.config.worker.worker_app is None:
+            self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+                db_conn,
+                "device_lists_stream",
+                "stream_id",
+                extra_tables=[
+                    ("user_signature_stream", "stream_id"),
+                    ("device_lists_outbound_pokes", "stream_id"),
+                    ("device_lists_changes_in_room", "stream_id"),
+                ],
+            )
+        else:
+            self._device_list_id_gen = SlavedIdTracker(
+                db_conn,
+                "device_lists_stream",
+                "stream_id",
+                extra_tables=[
+                    ("user_signature_stream", "stream_id"),
+                    ("device_lists_outbound_pokes", "stream_id"),
+                    ("device_lists_changes_in_room", "stream_id"),
+                ],
+            )
+
         # Type-ignore: _device_list_id_gen is mixed in from either DataStore (as a
         # StreamIdGenerator) or SlavedDataStore (as a SlavedIdTracker).
-        device_list_max = self._device_list_id_gen.get_current_token()  # type: ignore[attr-defined]
+        device_list_max = self._device_list_id_gen.get_current_token()
         device_list_prefill, min_device_list_id = self.db_pool.get_cache_dict(
             db_conn,
             "device_lists_stream",
@@ -136,6 +165,39 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
                 self._prune_old_outbound_device_pokes, 60 * 60 * 1000
             )
 
+    def process_replication_rows(
+        self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
+    ) -> None:
+        if stream_name == DeviceListsStream.NAME:
+            self._device_list_id_gen.advance(instance_name, token)
+            self._invalidate_caches_for_devices(token, rows)
+        elif stream_name == UserSignatureStream.NAME:
+            self._device_list_id_gen.advance(instance_name, token)
+            for row in rows:
+                self._user_signature_stream_cache.entity_has_changed(row.user_id, token)
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
+
+    def _invalidate_caches_for_devices(
+        self, token: int, rows: Iterable[DeviceListsStream.DeviceListsStreamRow]
+    ) -> None:
+        for row in rows:
+            # The entities are either user IDs (starting with '@') whose devices
+            # have changed, or remote servers that we need to tell about
+            # changes.
+            if row.entity.startswith("@"):
+                self._device_list_stream_cache.entity_has_changed(row.entity, token)
+                self.get_cached_devices_for_user.invalidate((row.entity,))
+                self._get_cached_user_device.invalidate((row.entity,))
+                self.get_device_list_last_stream_id_for_remote.invalidate((row.entity,))
+
+            else:
+                self._device_list_federation_stream_cache.entity_has_changed(
+                    row.entity, token
+                )
+
+    def get_device_stream_token(self) -> int:
+        return self._device_list_id_gen.get_current_token()
+
     async def count_devices_by_users(self, user_ids: Optional[List[str]] = None) -> int:
         """Retrieve number of all devices of given users.
         Only returns number of devices that are not marked as hidden.
@@ -677,11 +739,6 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             },
         )
 
-    @abc.abstractmethod
-    def get_device_stream_token(self) -> int:
-        """Get the current stream id from the _device_list_id_gen"""
-        ...
-
     @trace
     @cancellable
     async def get_user_devices_from_cache(
@@ -1481,6 +1538,10 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
+    # Because we have write access, this will be a StreamIdGenerator
+    # (see DeviceWorkerStore.__init__)
+    _device_list_id_gen: AbstractStreamIdGenerator
+
     def __init__(
         self,
         database: DatabasePool,
@@ -1805,7 +1866,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 context,
             )
 
-        async with self._device_list_id_gen.get_next_mult(  # type: ignore[attr-defined]
+        async with self._device_list_id_gen.get_next_mult(
             len(device_ids)
         ) as stream_ids:
             await self.db_pool.runInteraction(
@@ -2044,7 +2105,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 [],
             )
 
-        async with self._device_list_id_gen.get_next_mult(len(hosts)) as stream_ids:  # type: ignore[attr-defined]
+        async with self._device_list_id_gen.get_next_mult(len(hosts)) as stream_ids:
             return await self.db_pool.runInteraction(
                 "add_device_list_outbound_pokes",
                 add_device_list_outbound_pokes_txn,
@@ -2058,7 +2119,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         updates during partial joins.
         """
 
-        async with self._device_list_id_gen.get_next() as stream_id:  # type: ignore[attr-defined]
+        async with self._device_list_id_gen.get_next() as stream_id:
             await self.db_pool.simple_upsert(
                 table="device_lists_remote_pending",
                 keyvalues={
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 69fea452ad..a79091952a 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -81,6 +81,7 @@ from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import ObservableDeferred, delay_cancellation
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import AsyncLruCache
+from synapse.util.caches.stream_change_cache import StreamChangeCache
 from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
@@ -233,6 +234,21 @@ class EventsWorkerStore(SQLBaseStore):
                     db_conn, "events", "stream_ordering", step=-1
                 )
 
+        events_max = self._stream_id_gen.get_current_token()
+        curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(
+            db_conn,
+            "current_state_delta_stream",
+            entity_column="room_id",
+            stream_column="stream_id",
+            max_value=events_max,  # As we share the stream id with events token
+            limit=1000,
+        )
+        self._curr_state_delta_stream_cache: StreamChangeCache = StreamChangeCache(
+            "_curr_state_delta_stream_cache",
+            min_curr_state_delta_id,
+            prefilled_cache=curr_state_delta_prefill,
+        )
+
         if hs.config.worker.run_background_tasks:
             # We periodically clean out old transaction ID mappings
             self._clock.looping_call(
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index cb9ee08fa8..12f3b601f1 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -24,7 +24,7 @@ from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached
 
 
-class FilteringStore(SQLBaseStore):
+class FilteringWorkerStore(SQLBaseStore):
     @cached(num_args=2)
     async def get_user_filter(
         self, user_localpart: str, filter_id: Union[int, str]
@@ -46,6 +46,8 @@ class FilteringStore(SQLBaseStore):
 
         return db_to_json(def_json)
 
+
+class FilteringStore(FilteringWorkerStore):
     async def add_user_filter(self, user_localpart: str, user_filter: JsonDict) -> int:
         def_json = encode_canonical_json(user_filter)
 
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index b6c15f29f8..8ae10f6127 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -12,13 +12,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import abc
 import logging
 from typing import (
     TYPE_CHECKING,
     Any,
     Collection,
     Dict,
+    Iterable,
     List,
     Mapping,
     Optional,
@@ -31,6 +31,7 @@ from typing import (
 from synapse.api.errors import StoreError
 from synapse.config.homeserver import ExperimentalConfig
 from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
+from synapse.replication.tcp.streams import PushRulesStream
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -90,8 +91,6 @@ def _load_rules(
     return filtered_rules
 
 
-# The ABCMeta metaclass ensures that it cannot be instantiated without
-# the abstract methods being implemented.
 class PushRulesWorkerStore(
     ApplicationServiceWorkerStore,
     PusherWorkerStore,
@@ -99,7 +98,6 @@ class PushRulesWorkerStore(
     ReceiptsWorkerStore,
     EventsWorkerStore,
     SQLBaseStore,
-    metaclass=abc.ABCMeta,
 ):
     """This is an abstract base class where subclasses must implement
     `get_max_push_rules_stream_id` which can be called in the initializer.
@@ -136,14 +134,23 @@ class PushRulesWorkerStore(
             prefilled_cache=push_rules_prefill,
         )
 
-    @abc.abstractmethod
     def get_max_push_rules_stream_id(self) -> int:
         """Get the position of the push rules stream.
 
         Returns:
             int
         """
-        raise NotImplementedError()
+        return self._push_rules_stream_id_gen.get_current_token()
+
+    def process_replication_rows(
+        self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
+    ) -> None:
+        if stream_name == PushRulesStream.NAME:
+            self._push_rules_stream_id_gen.advance(instance_name, token)
+            for row in rows:
+                self.get_push_rules_for_user.invalidate((row.user_id,))
+                self.push_rules_stream_cache.entity_has_changed(row.user_id, token)
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
 
     @cached(max_entries=5000)
     async def get_push_rules_for_user(self, user_id: str) -> FilteredPushRules:
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index 01206950a9..4a01562d45 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -27,13 +27,19 @@ from typing import (
 )
 
 from synapse.push import PusherConfig, ThrottleParams
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
+from synapse.replication.tcp.streams import PushersStream
 from synapse.storage._base import SQLBaseStore, db_to_json
 from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
     LoggingTransaction,
 )
-from synapse.storage.util.id_generators import StreamIdGenerator
+from synapse.storage.util.id_generators import (
+    AbstractStreamIdGenerator,
+    AbstractStreamIdTracker,
+    StreamIdGenerator,
+)
 from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
@@ -52,9 +58,21 @@ class PusherWorkerStore(SQLBaseStore):
         hs: "HomeServer",
     ):
         super().__init__(database, db_conn, hs)
-        self._pushers_id_gen = StreamIdGenerator(
-            db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")]
-        )
+
+        if hs.config.worker.worker_app is None:
+            self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+                db_conn,
+                "pushers",
+                "id",
+                extra_tables=[("deleted_pushers", "stream_id")],
+            )
+        else:
+            self._pushers_id_gen = SlavedIdTracker(
+                db_conn,
+                "pushers",
+                "id",
+                extra_tables=[("deleted_pushers", "stream_id")],
+            )
 
         self.db_pool.updates.register_background_update_handler(
             "remove_deactivated_pushers",
@@ -96,6 +114,16 @@ class PusherWorkerStore(SQLBaseStore):
 
             yield PusherConfig(**r)
 
+    def get_pushers_stream_token(self) -> int:
+        return self._pushers_id_gen.get_current_token()
+
+    def process_replication_rows(
+        self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
+    ) -> None:
+        if stream_name == PushersStream.NAME:
+            self._pushers_id_gen.advance(instance_name, token)
+        return super().process_replication_rows(stream_name, instance_name, token, rows)
+
     async def get_pushers_by_app_id_and_pushkey(
         self, app_id: str, pushkey: str
     ) -> Iterator[PusherConfig]:
@@ -545,8 +573,9 @@ class PusherBackgroundUpdatesStore(SQLBaseStore):
 
 
 class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore):
-    def get_pushers_stream_token(self) -> int:
-        return self._pushers_id_gen.get_current_token()
+    # Because we have write access, this will be a StreamIdGenerator
+    # (see PusherWorkerStore.__init__)
+    _pushers_id_gen: AbstractStreamIdGenerator
 
     async def add_pusher(
         self,
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 09ce855aa8..cc27ec3804 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -415,6 +415,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         )
 
         self._stream_order_on_start = self.get_room_max_stream_ordering()
+        self._min_stream_order_on_start = self.get_room_min_stream_ordering()
 
     def get_room_max_stream_ordering(self) -> int:
         """Get the stream_ordering of regular events that we have committed up to
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
index d42e36cdf1..96f3880923 100644
--- a/tests/replication/slave/storage/test_events.py
+++ b/tests/replication/slave/storage/test_events.py
@@ -21,11 +21,11 @@ from synapse.api.constants import ReceiptTypes
 from synapse.api.room_versions import RoomVersions
 from synapse.events import FrozenEvent, _EventInternalMetadata, make_event_from_dict
 from synapse.handlers.room import RoomEventSource
-from synapse.replication.slave.storage.events import SlavedEventStore
 from synapse.storage.databases.main.event_push_actions import (
     NotifCounts,
     RoomNotifCounts,
 )
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.roommember import GetRoomsForUserWithStreamOrdering, RoomsForUser
 from synapse.types import PersistedEventPosition
 
@@ -58,9 +58,9 @@ def patch__eq__(cls):
     return unpatch
 
 
-class SlavedEventStoreTestCase(BaseSlavedStoreTestCase):
+class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase):
 
-    STORE_TYPE = SlavedEventStore
+    STORE_TYPE = EventsWorkerStore
 
     def setUp(self):
         # Patch up the equality operator for events so that we can check
-- 
cgit 1.5.1


From a3623af74e0af0d2f6cbd37b47dc54a1acd314d5 Mon Sep 17 00:00:00 2001
From: Ashish Kumar <ashfame@users.noreply.github.com>
Date: Fri, 11 Nov 2022 19:38:17 +0400
Subject: Add an Admin API endpoint for looking up users based on 3PID (#14405)

---
 changelog.d/14405.feature        |   1 +
 docs/admin_api/user_admin_api.md |  39 ++++++++++++++
 synapse/rest/admin/__init__.py   |   2 +
 synapse/rest/admin/users.py      |  25 +++++++++
 tests/rest/admin/test_user.py    | 107 ++++++++++++++++++++++++++++++++++-----
 5 files changed, 161 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/14405.feature

(limited to 'synapse')

diff --git a/changelog.d/14405.feature b/changelog.d/14405.feature
new file mode 100644
index 0000000000..d3ba89b597
--- /dev/null
+++ b/changelog.d/14405.feature
@@ -0,0 +1 @@
+Add an [Admin API](https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/index.html) endpoint for user lookup based on third-party ID (3PID). Contributed by @ashfame.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index c95d6c9b05..880bef4194 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -1197,3 +1197,42 @@ Returns a `404` HTTP status code if no user was found, with a response body like
 ```
 
 _Added in Synapse 1.68.0._
+
+
+### Find a user based on their Third Party ID (ThreePID or 3PID)
+
+The API is:
+
+```
+GET /_synapse/admin/v1/threepid/$medium/users/$address
+```
+
+When a user matched the given address for the given medium, an HTTP code `200` with a response body like the following is returned:
+
+```json
+{
+    "user_id": "@hello:example.org"
+}
+```
+
+**Parameters**
+
+The following parameters should be set in the URL:
+
+- `medium` - Kind of third-party ID, either `email` or `msisdn`.
+- `address` - Value of the third-party ID.
+
+The `address` may have characters that are not URL-safe, so it is advised to URL-encode those parameters.
+
+**Errors**
+
+Returns a `404` HTTP status code if no user was found, with a response body like this:
+
+```json
+{
+    "errcode":"M_NOT_FOUND",
+    "error":"User not found"
+}
+```
+
+_Added in Synapse 1.72.0._
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 885669f9c7..c62ea22116 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -81,6 +81,7 @@ from synapse.rest.admin.users import (
     ShadowBanRestServlet,
     UserAdminServlet,
     UserByExternalId,
+    UserByThreePid,
     UserMembershipRestServlet,
     UserRegisterServlet,
     UserRestServletV2,
@@ -277,6 +278,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     RoomMessagesRestServlet(hs).register(http_server)
     RoomTimestampToEventRestServlet(hs).register(http_server)
     UserByExternalId(hs).register(http_server)
+    UserByThreePid(hs).register(http_server)
 
     # Some servlets only get registered for the main process.
     if hs.config.worker.worker_app is None:
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 15ac2059aa..1951b8a9f2 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -1224,3 +1224,28 @@ class UserByExternalId(RestServlet):
             raise NotFoundError("User not found")
 
         return HTTPStatus.OK, {"user_id": user_id}
+
+
+class UserByThreePid(RestServlet):
+    """Find a user based on 3PID of a particular medium"""
+
+    PATTERNS = admin_patterns("/threepid/(?P<medium>[^/]*)/users/(?P<address>[^/]*)")
+
+    def __init__(self, hs: "HomeServer"):
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastores().main
+
+    async def on_GET(
+        self,
+        request: SynapseRequest,
+        medium: str,
+        address: str,
+    ) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self._auth, request)
+
+        user_id = await self._store.get_user_id_by_threepid(medium, address)
+
+        if user_id is None:
+            raise NotFoundError("User not found")
+
+        return HTTPStatus.OK, {"user_id": user_id}
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 63410ffdf1..e8c9457794 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -41,14 +41,12 @@ from tests.unittest import override_config
 
 
 class UserRegisterTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         profile.register_servlets,
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         self.url = "/_synapse/admin/v1/register"
 
         self.registration_handler = Mock()
@@ -446,7 +444,6 @@ class UserRegisterTestCase(unittest.HomeserverTestCase):
 
 
 class UsersListTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -1108,7 +1105,6 @@ class UserDevicesTestCase(unittest.HomeserverTestCase):
 
 
 class DeactivateAccountTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -1382,7 +1378,6 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
 
 class UserRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -2803,7 +2798,6 @@ class UserRestTestCase(unittest.HomeserverTestCase):
 
 
 class UserMembershipRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -2960,7 +2954,6 @@ class UserMembershipRestTestCase(unittest.HomeserverTestCase):
 
 
 class PushersRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -3089,7 +3082,6 @@ class PushersRestTestCase(unittest.HomeserverTestCase):
 
 
 class UserMediaRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -3881,7 +3873,6 @@ class UserTokenRestTestCase(unittest.HomeserverTestCase):
     ],
 )
 class WhoisRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -3961,7 +3952,6 @@ class WhoisRestTestCase(unittest.HomeserverTestCase):
 
 
 class ShadowBanRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -4042,7 +4032,6 @@ class ShadowBanRestTestCase(unittest.HomeserverTestCase):
 
 
 class RateLimitTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -4268,7 +4257,6 @@ class RateLimitTestCase(unittest.HomeserverTestCase):
 
 
 class AccountDataTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -4358,7 +4346,6 @@ class AccountDataTestCase(unittest.HomeserverTestCase):
 
 
 class UsersByExternalIdTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -4442,3 +4429,97 @@ class UsersByExternalIdTestCase(unittest.HomeserverTestCase):
             {"user_id": self.other_user},
             channel.json_body,
         )
+
+
+class UsersByThreePidTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.other_user = self.register_user("user", "pass")
+        self.get_success(
+            self.store.user_add_threepid(
+                self.other_user, "email", "user@email.com", 1, 1
+            )
+        )
+        self.get_success(
+            self.store.user_add_threepid(self.other_user, "msidn", "+1-12345678", 1, 1)
+        )
+
+    def test_no_auth(self) -> None:
+        """Try to look up a user without authentication."""
+        url = "/_synapse/admin/v1/threepid/email/users/user%40email.com"
+
+        channel = self.make_request(
+            "GET",
+            url,
+        )
+
+        self.assertEqual(401, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
+
+    def test_medium_does_not_exist(self) -> None:
+        """Tests that both a lookup for a medium that does not exist and a user that
+        doesn't exist with that third party ID returns a 404"""
+        # test for unknown medium
+        url = "/_synapse/admin/v1/threepid/publickey/users/unknown-key"
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(404, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
+
+        # test for unknown user with a known medium
+        url = "/_synapse/admin/v1/threepid/email/users/unknown"
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(404, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
+
+    def test_success(self) -> None:
+        """Tests a successful medium + address lookup"""
+        # test for email medium with encoded value of user@email.com
+        url = "/_synapse/admin/v1/threepid/email/users/user%40email.com"
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(
+            {"user_id": self.other_user},
+            channel.json_body,
+        )
+
+        # test for msidn medium with encoded value of +1-12345678
+        url = "/_synapse/admin/v1/threepid/msidn/users/%2B1-12345678"
+
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(
+            {"user_id": self.other_user},
+            channel.json_body,
+        )
-- 
cgit 1.5.1


From fb66fae84b165e7bd132bc7cbc5732485ceee827 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 14 Nov 2022 08:13:11 -0500
Subject: Clean-up events persistance code (#14411)

By removing unused variables and making some arguments
required which are always provided.
---
 changelog.d/14411.misc                        | 1 +
 synapse/storage/controllers/persist_events.py | 2 --
 synapse/storage/databases/main/events.py      | 9 +++------
 3 files changed, 4 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/14411.misc

(limited to 'synapse')

diff --git a/changelog.d/14411.misc b/changelog.d/14411.misc
new file mode 100644
index 0000000000..f5cca5c833
--- /dev/null
+++ b/changelog.d/14411.misc
@@ -0,0 +1 @@
+Clean-up event persistence code.
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 06e71a8053..48976dc570 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -716,8 +716,6 @@ class EventsPersistenceStorageController:
                             )
                             if not is_still_joined:
                                 logger.info("Server no longer in room %s", room_id)
-                                latest_event_ids = set()
-                                current_state = {}
                                 delta.no_longer_in_room = True
 
                             state_delta_for_room[room_id] = delta
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 00880bb37d..c4acff5be6 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -355,9 +355,9 @@ class PersistEventsStore:
         txn: LoggingTransaction,
         *,
         events_and_contexts: List[Tuple[EventBase, EventContext]],
-        inhibit_local_membership_updates: bool = False,
-        state_delta_for_room: Optional[Dict[str, DeltaState]] = None,
-        new_forward_extremities: Optional[Dict[str, Set[str]]] = None,
+        inhibit_local_membership_updates: bool,
+        state_delta_for_room: Dict[str, DeltaState],
+        new_forward_extremities: Dict[str, Set[str]],
     ) -> None:
         """Insert some number of room events into the necessary database tables.
 
@@ -384,9 +384,6 @@ class PersistEventsStore:
             PartialStateConflictError: if attempting to persist a partial state event in
                 a room that has been un-partial stated.
         """
-        state_delta_for_room = state_delta_for_room or {}
-        new_forward_extremities = new_forward_extremities or {}
-
         all_events_and_contexts = events_and_contexts
 
         min_stream_order = events_and_contexts[0][0].internal_metadata.stream_ordering
-- 
cgit 1.5.1


From 2cc592584ae9f225216b7663e9144ac6f565b757 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 14 Nov 2022 13:46:29 +0000
Subject: Remove unused type-ignores (#14433)

* Remove unused type-ignores

Oversights in #14427 and #14429.

* Changelog
---
 changelog.d/14433.misc    | 1 +
 scripts-dev/release.py    | 4 +---
 synapse/streams/events.py | 9 ++++++---
 3 files changed, 8 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14433.misc

(limited to 'synapse')

diff --git a/changelog.d/14433.misc b/changelog.d/14433.misc
new file mode 100644
index 0000000000..08a350b13b
--- /dev/null
+++ b/changelog.d/14433.misc
@@ -0,0 +1 @@
+Fix mypy errors introduced by bumping the locked version of `attrs` and `gitpython`.
diff --git a/scripts-dev/release.py b/scripts-dev/release.py
index c82c58c54b..bf47b6c713 100755
--- a/scripts-dev/release.py
+++ b/scripts-dev/release.py
@@ -219,9 +219,7 @@ def _prepare() -> None:
                 update_branch(repo)
 
             # Create the new release branch
-            # Type ignore will no longer be needed after GitPython 3.1.28.
-            # See https://github.com/gitpython-developers/GitPython/pull/1419
-            repo.create_head(release_branch_name, commit=base_branch)  # type: ignore[arg-type]
+            repo.create_head(release_branch_name, commit=base_branch)
 
         # Special-case SyTest: we don't actually prepare any files so we may
         # as well push it now (and only when we create a release branch;
diff --git a/synapse/streams/events.py b/synapse/streams/events.py
index bcd840bd88..f331e1af16 100644
--- a/synapse/streams/events.py
+++ b/synapse/streams/events.py
@@ -45,9 +45,12 @@ class _EventSourcesInner:
 class EventSources:
     def __init__(self, hs: "HomeServer"):
         self.sources = _EventSourcesInner(
-            # mypy thinks attribute.type is `Optional`, but we know it's never `None` here since
-            # all the attributes of `_EventSourcesInner` are annotated.
-            *(attribute.type(hs) for attribute in attr.fields(_EventSourcesInner))  # type: ignore[misc]
+            # mypy previously warned that attribute.type is `Optional`, but we know it's
+            # never `None` here since all the attributes of `_EventSourcesInner` are
+            # annotated.
+            # As of the stubs in attrs 22.1.0, `attr.fields()` now returns Any,
+            # so the call to `attribute.type` is not checked.
+            *(attribute.type(hs) for attribute in attr.fields(_EventSourcesInner))
         )
         self.store = hs.get_datastores().main
 
-- 
cgit 1.5.1


From 36097e88c4da51fce6556a58c49bd675f4cf20ab Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Mon, 14 Nov 2022 17:31:36 +0000
Subject: Remove slaved id tracker (#14376)

This matches the multi instance writer ID generator class which can
both handle advancing the current token over replication and by calling
the database.
---
 changelog.d/14376.misc                             |  1 +
 synapse/replication/slave/__init__.py              | 13 ------
 synapse/replication/slave/storage/__init__.py      | 13 ------
 .../slave/storage/_slaved_id_tracker.py            | 50 ----------------------
 synapse/storage/databases/main/account_data.py     | 30 +++++--------
 synapse/storage/databases/main/devices.py          | 36 ++++++----------
 synapse/storage/databases/main/events_worker.py    | 35 ++++++---------
 synapse/storage/databases/main/push_rule.py        | 17 ++++----
 synapse/storage/databases/main/pusher.py           | 24 ++++-------
 synapse/storage/databases/main/receipts.py         | 18 ++++----
 synapse/storage/util/id_generators.py              | 13 ++++--
 11 files changed, 74 insertions(+), 176 deletions(-)
 create mode 100644 changelog.d/14376.misc
 delete mode 100644 synapse/replication/slave/__init__.py
 delete mode 100644 synapse/replication/slave/storage/__init__.py
 delete mode 100644 synapse/replication/slave/storage/_slaved_id_tracker.py

(limited to 'synapse')

diff --git a/changelog.d/14376.misc b/changelog.d/14376.misc
new file mode 100644
index 0000000000..2ca326fea6
--- /dev/null
+++ b/changelog.d/14376.misc
@@ -0,0 +1 @@
+Remove old stream ID tracking code. Contributed by Nick @Beeper (@fizzadar).
diff --git a/synapse/replication/slave/__init__.py b/synapse/replication/slave/__init__.py
deleted file mode 100644
index f43a360a80..0000000000
--- a/synapse/replication/slave/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/synapse/replication/slave/storage/__init__.py b/synapse/replication/slave/storage/__init__.py
deleted file mode 100644
index f43a360a80..0000000000
--- a/synapse/replication/slave/storage/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/synapse/replication/slave/storage/_slaved_id_tracker.py b/synapse/replication/slave/storage/_slaved_id_tracker.py
deleted file mode 100644
index 8f3f953ed4..0000000000
--- a/synapse/replication/slave/storage/_slaved_id_tracker.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import List, Optional, Tuple
-
-from synapse.storage.database import LoggingDatabaseConnection
-from synapse.storage.util.id_generators import AbstractStreamIdTracker, _load_current_id
-
-
-class SlavedIdTracker(AbstractStreamIdTracker):
-    """Tracks the "current" stream ID of a stream with a single writer.
-
-    See `AbstractStreamIdTracker` for more details.
-
-    Note that this class does not work correctly when there are multiple
-    writers.
-    """
-
-    def __init__(
-        self,
-        db_conn: LoggingDatabaseConnection,
-        table: str,
-        column: str,
-        extra_tables: Optional[List[Tuple[str, str]]] = None,
-        step: int = 1,
-    ):
-        self.step = step
-        self._current = _load_current_id(db_conn, table, column, step)
-        if extra_tables:
-            for table, column in extra_tables:
-                self.advance(None, _load_current_id(db_conn, table, column))
-
-    def advance(self, instance_name: Optional[str], new_id: int) -> None:
-        self._current = (max if self.step > 0 else min)(self._current, new_id)
-
-    def get_current_token(self) -> int:
-        return self._current
-
-    def get_current_token_for_writer(self, instance_name: str) -> int:
-        return self.get_current_token()
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index c38b8a9e5a..282687ebce 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -27,7 +27,6 @@ from typing import (
 )
 
 from synapse.api.constants import AccountDataTypes
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import AccountDataStream, TagAccountDataStream
 from synapse.storage._base import db_to_json
 from synapse.storage.database import (
@@ -68,12 +67,11 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         # to write account data. A value of `True` implies that `_account_data_id_gen`
         # is an `AbstractStreamIdGenerator` and not just a tracker.
         self._account_data_id_gen: AbstractStreamIdTracker
+        self._can_write_to_account_data = (
+            self._instance_name in hs.config.worker.writers.account_data
+        )
 
         if isinstance(database.engine, PostgresEngine):
-            self._can_write_to_account_data = (
-                self._instance_name in hs.config.worker.writers.account_data
-            )
-
             self._account_data_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
@@ -95,21 +93,13 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            if self._instance_name in hs.config.worker.writers.account_data:
-                self._can_write_to_account_data = True
-                self._account_data_id_gen = StreamIdGenerator(
-                    db_conn,
-                    "room_account_data",
-                    "stream_id",
-                    extra_tables=[("room_tags_revisions", "stream_id")],
-                )
-            else:
-                self._account_data_id_gen = SlavedIdTracker(
-                    db_conn,
-                    "room_account_data",
-                    "stream_id",
-                    extra_tables=[("room_tags_revisions", "stream_id")],
-                )
+            self._account_data_id_gen = StreamIdGenerator(
+                db_conn,
+                "room_account_data",
+                "stream_id",
+                extra_tables=[("room_tags_revisions", "stream_id")],
+                is_writer=self._instance_name in hs.config.worker.writers.account_data,
+            )
 
         account_max = self.get_max_account_data_stream_id()
         self._account_data_stream_cache = StreamChangeCache(
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index aa58c2adc3..3e5c16b15b 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -38,7 +38,6 @@ from synapse.logging.opentracing import (
     whitelisted_homeserver,
 )
 from synapse.metrics.background_process_metrics import wrap_as_background_process
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
@@ -86,28 +85,19 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        if hs.config.worker.worker_app is None:
-            self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-                db_conn,
-                "device_lists_stream",
-                "stream_id",
-                extra_tables=[
-                    ("user_signature_stream", "stream_id"),
-                    ("device_lists_outbound_pokes", "stream_id"),
-                    ("device_lists_changes_in_room", "stream_id"),
-                ],
-            )
-        else:
-            self._device_list_id_gen = SlavedIdTracker(
-                db_conn,
-                "device_lists_stream",
-                "stream_id",
-                extra_tables=[
-                    ("user_signature_stream", "stream_id"),
-                    ("device_lists_outbound_pokes", "stream_id"),
-                    ("device_lists_changes_in_room", "stream_id"),
-                ],
-            )
+        # In the worker store this is an ID tracker which we overwrite in the non-worker
+        # class below that is used on the main process.
+        self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+            db_conn,
+            "device_lists_stream",
+            "stream_id",
+            extra_tables=[
+                ("user_signature_stream", "stream_id"),
+                ("device_lists_outbound_pokes", "stream_id"),
+                ("device_lists_changes_in_room", "stream_id"),
+            ],
+            is_writer=hs.config.worker.worker_app is None,
+        )
 
         # Type-ignore: _device_list_id_gen is mixed in from either DataStore (as a
         # StreamIdGenerator) or SlavedDataStore (as a SlavedIdTracker).
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index a79091952a..7a003ab88f 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -59,7 +59,6 @@ from synapse.metrics.background_process_metrics import (
     run_as_background_process,
     wrap_as_background_process,
 )
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import BackfillStream
 from synapse.replication.tcp.streams.events import EventsStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
@@ -213,26 +212,20 @@ class EventsWorkerStore(SQLBaseStore):
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            if hs.get_instance_name() in hs.config.worker.writers.events:
-                self._stream_id_gen = StreamIdGenerator(
-                    db_conn,
-                    "events",
-                    "stream_ordering",
-                )
-                self._backfill_id_gen = StreamIdGenerator(
-                    db_conn,
-                    "events",
-                    "stream_ordering",
-                    step=-1,
-                    extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
-                )
-            else:
-                self._stream_id_gen = SlavedIdTracker(
-                    db_conn, "events", "stream_ordering"
-                )
-                self._backfill_id_gen = SlavedIdTracker(
-                    db_conn, "events", "stream_ordering", step=-1
-                )
+            self._stream_id_gen = StreamIdGenerator(
+                db_conn,
+                "events",
+                "stream_ordering",
+                is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
+            )
+            self._backfill_id_gen = StreamIdGenerator(
+                db_conn,
+                "events",
+                "stream_ordering",
+                step=-1,
+                extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
+                is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
+            )
 
         events_max = self._stream_id_gen.get_current_token()
         curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 8ae10f6127..12ad44dbb3 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -30,7 +30,6 @@ from typing import (
 
 from synapse.api.errors import StoreError
 from synapse.config.homeserver import ExperimentalConfig
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import PushRulesStream
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
@@ -111,14 +110,14 @@ class PushRulesWorkerStore(
     ):
         super().__init__(database, db_conn, hs)
 
-        if hs.config.worker.worker_app is None:
-            self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-                db_conn, "push_rules_stream", "stream_id"
-            )
-        else:
-            self._push_rules_stream_id_gen = SlavedIdTracker(
-                db_conn, "push_rules_stream", "stream_id"
-            )
+        # In the worker store this is an ID tracker which we overwrite in the non-worker
+        # class below that is used on the main process.
+        self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+            db_conn,
+            "push_rules_stream",
+            "stream_id",
+            is_writer=hs.config.worker.worker_app is None,
+        )
 
         push_rules_prefill, push_rules_id = self.db_pool.get_cache_dict(
             db_conn,
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index 4a01562d45..fee37b9ce4 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -27,7 +27,6 @@ from typing import (
 )
 
 from synapse.push import PusherConfig, ThrottleParams
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import PushersStream
 from synapse.storage._base import SQLBaseStore, db_to_json
 from synapse.storage.database import (
@@ -59,20 +58,15 @@ class PusherWorkerStore(SQLBaseStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        if hs.config.worker.worker_app is None:
-            self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-                db_conn,
-                "pushers",
-                "id",
-                extra_tables=[("deleted_pushers", "stream_id")],
-            )
-        else:
-            self._pushers_id_gen = SlavedIdTracker(
-                db_conn,
-                "pushers",
-                "id",
-                extra_tables=[("deleted_pushers", "stream_id")],
-            )
+        # In the worker store this is an ID tracker which we overwrite in the non-worker
+        # class below that is used on the main process.
+        self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+            db_conn,
+            "pushers",
+            "id",
+            extra_tables=[("deleted_pushers", "stream_id")],
+            is_writer=hs.config.worker.worker_app is None,
+        )
 
         self.db_pool.updates.register_background_update_handler(
             "remove_deactivated_pushers",
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index dc6989527e..64519587f8 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -27,7 +27,6 @@ from typing import (
 )
 
 from synapse.api.constants import EduTypes
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import ReceiptsStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
@@ -61,6 +60,9 @@ class ReceiptsWorkerStore(SQLBaseStore):
         hs: "HomeServer",
     ):
         self._instance_name = hs.get_instance_name()
+
+        # In the worker store this is an ID tracker which we overwrite in the non-worker
+        # class below that is used on the main process.
         self._receipts_id_gen: AbstractStreamIdTracker
 
         if isinstance(database.engine, PostgresEngine):
@@ -87,14 +89,12 @@ class ReceiptsWorkerStore(SQLBaseStore):
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            if hs.get_instance_name() in hs.config.worker.writers.receipts:
-                self._receipts_id_gen = StreamIdGenerator(
-                    db_conn, "receipts_linearized", "stream_id"
-                )
-            else:
-                self._receipts_id_gen = SlavedIdTracker(
-                    db_conn, "receipts_linearized", "stream_id"
-                )
+            self._receipts_id_gen = StreamIdGenerator(
+                db_conn,
+                "receipts_linearized",
+                "stream_id",
+                is_writer=hs.get_instance_name() in hs.config.worker.writers.receipts,
+            )
 
         super().__init__(database, db_conn, hs)
 
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 2dfe4c0b66..1af0af1266 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -186,11 +186,13 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         column: str,
         extra_tables: Iterable[Tuple[str, str]] = (),
         step: int = 1,
+        is_writer: bool = True,
     ) -> None:
         assert step != 0
         self._lock = threading.Lock()
         self._step: int = step
         self._current: int = _load_current_id(db_conn, table, column, step)
+        self._is_writer = is_writer
         for table, column in extra_tables:
             self._current = (max if step > 0 else min)(
                 self._current, _load_current_id(db_conn, table, column, step)
@@ -204,9 +206,11 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         self._unfinished_ids: OrderedDict[int, int] = OrderedDict()
 
     def advance(self, instance_name: str, new_id: int) -> None:
-        # `StreamIdGenerator` should only be used when there is a single writer,
-        # so replication should never happen.
-        raise Exception("Replication is not supported by StreamIdGenerator")
+        # Advance should never be called on a writer instance, only over replication
+        if self._is_writer:
+            raise Exception("Replication is not supported by writer StreamIdGenerator")
+
+        self._current = (max if self._step > 0 else min)(self._current, new_id)
 
     def get_next(self) -> AsyncContextManager[int]:
         with self._lock:
@@ -249,6 +253,9 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         return _AsyncCtxManagerWrapper(manager())
 
     def get_current_token(self) -> int:
+        if self._is_writer:
+            return self._current
+
         with self._lock:
             if self._unfinished_ids:
                 return next(iter(self._unfinished_ids)) - self._step
-- 
cgit 1.5.1


From 634359b083eae319d7f065114851590431b7c7fb Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 15 Nov 2022 10:43:17 +0000
Subject: Update docstring to clarify that `get_partial_state_events_batch`
 does not just give you completely arbitrary partial-state events. (#14417)

---
 changelog.d/14417.misc                          |  1 +
 synapse/storage/databases/main/events_worker.py | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14417.misc

(limited to 'synapse')

diff --git a/changelog.d/14417.misc b/changelog.d/14417.misc
new file mode 100644
index 0000000000..7527fe97c2
--- /dev/null
+++ b/changelog.d/14417.misc
@@ -0,0 +1 @@
+Update docstring to clarify that `get_partial_state_events_batch` does not just give you completely arbitrary partial-state events.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 7a003ab88f..296e50d661 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -2228,7 +2228,15 @@ class EventsWorkerStore(SQLBaseStore):
         return result is not None
 
     async def get_partial_state_events_batch(self, room_id: str) -> List[str]:
-        """Get a list of events in the given room that have partial state"""
+        """
+        Get a list of events in the given room that:
+        - have partial state; and
+        - are ready to be resynced (because they have no prev_events that are
+          partial-stated)
+
+        See the docstring on `_get_partial_state_events_batch_txn` for more
+        information.
+        """
         return await self.db_pool.runInteraction(
             "get_partial_state_events_batch",
             self._get_partial_state_events_batch_txn,
-- 
cgit 1.5.1


From b5ab2c428a1c5edd634ff084019811e5f6b963d8 Mon Sep 17 00:00:00 2001
From: Tuomas Ojamies <tuomas.ojamies@gmail.com>
Date: Tue, 15 Nov 2022 13:55:00 +0100
Subject: Support using SSL on worker endpoints. (#14128)

* Fix missing SSL support in worker endpoints.

* Add changelog

* SSL for Replication endpoint

* Remove unit test change

* Refactor listener creation to reduce duplicated code

* Fix the logger message

* Update synapse/app/_base.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Update synapse/app/_base.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Update synapse/app/_base.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Add config documentation for new TLS option

Co-authored-by: Tuomas Ojamies <tojamies@palantir.com>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
Co-authored-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/14128.misc                           |  1 +
 docs/usage/configuration/config_documentation.md | 20 +++++++++
 synapse/app/_base.py                             | 53 +++++++++++++++++++++++-
 synapse/app/generic_worker.py                    | 28 ++++---------
 synapse/app/homeserver.py                        | 34 ++-------------
 synapse/config/workers.py                        |  7 ++++
 synapse/replication/http/_base.py                | 10 ++++-
 7 files changed, 100 insertions(+), 53 deletions(-)
 create mode 100644 changelog.d/14128.misc

(limited to 'synapse')

diff --git a/changelog.d/14128.misc b/changelog.d/14128.misc
new file mode 100644
index 0000000000..29168ef955
--- /dev/null
+++ b/changelog.d/14128.misc
@@ -0,0 +1 @@
+Add TLS support for generic worker endpoints.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 9a6bd08d01..f5937dd902 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3893,6 +3893,26 @@ Example configuration:
 worker_replication_http_port: 9093
 ```
 ---
+### `worker_replication_http_tls`
+
+Whether TLS should be used for talking to the HTTP replication port on the main
+Synapse process.
+The main Synapse process defines this with the `tls` option on its [listener](#listeners) that
+has the `replication` resource enabled.
+
+**Please note:** by default, it is not safe to expose replication ports to the
+public Internet, even with TLS enabled.
+See [`worker_replication_secret`](#worker_replication_secret).
+
+Defaults to `false`.
+
+*Added in Synapse 1.72.0.*
+
+Example configuration:
+```yaml
+worker_replication_http_tls: true
+```
+---
 ### `worker_listeners`
 
 A worker can handle HTTP requests. To do so, a `worker_listeners` option 
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index a683ebf4cb..8f5b1a20f5 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -47,6 +47,7 @@ from twisted.internet.tcp import Port
 from twisted.logger import LoggingFile, LogLevel
 from twisted.protocols.tls import TLSMemoryBIOFactory
 from twisted.python.threadpool import ThreadPool
+from twisted.web.resource import Resource
 
 import synapse.util.caches
 from synapse.api.constants import MAX_PDU_SIZE
@@ -55,12 +56,13 @@ from synapse.app.phone_stats_home import start_phone_stats_home
 from synapse.config import ConfigError
 from synapse.config._base import format_config_error
 from synapse.config.homeserver import HomeServerConfig
-from synapse.config.server import ManholeConfig
+from synapse.config.server import ListenerConfig, ManholeConfig
 from synapse.crypto import context_factory
 from synapse.events.presence_router import load_legacy_presence_router
 from synapse.events.spamcheck import load_legacy_spam_checkers
 from synapse.events.third_party_rules import load_legacy_third_party_event_rules
 from synapse.handlers.auth import load_legacy_password_auth_providers
+from synapse.http.site import SynapseSite
 from synapse.logging.context import PreserveLoggingContext
 from synapse.logging.opentracing import init_tracer
 from synapse.metrics import install_gc_manager, register_threadpool
@@ -357,6 +359,55 @@ def listen_tcp(
     return r  # type: ignore[return-value]
 
 
+def listen_http(
+    listener_config: ListenerConfig,
+    root_resource: Resource,
+    version_string: str,
+    max_request_body_size: int,
+    context_factory: IOpenSSLContextFactory,
+    reactor: IReactorSSL = reactor,
+) -> List[Port]:
+    port = listener_config.port
+    bind_addresses = listener_config.bind_addresses
+    tls = listener_config.tls
+
+    assert listener_config.http_options is not None
+
+    site_tag = listener_config.http_options.tag
+    if site_tag is None:
+        site_tag = str(port)
+
+    site = SynapseSite(
+        "synapse.access.%s.%s" % ("https" if tls else "http", site_tag),
+        site_tag,
+        listener_config,
+        root_resource,
+        version_string,
+        max_request_body_size=max_request_body_size,
+        reactor=reactor,
+    )
+    if tls:
+        # refresh_certificate should have been called before this.
+        assert context_factory is not None
+        ports = listen_ssl(
+            bind_addresses,
+            port,
+            site,
+            context_factory,
+            reactor=reactor,
+        )
+        logger.info("Synapse now listening on TCP port %d (TLS)", port)
+    else:
+        ports = listen_tcp(
+            bind_addresses,
+            port,
+            site,
+            reactor=reactor,
+        )
+        logger.info("Synapse now listening on TCP port %d", port)
+    return ports
+
+
 def listen_ssl(
     bind_addresses: Collection[str],
     port: int,
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 51446b49cd..1d9aef45c2 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -44,7 +44,7 @@ from synapse.config.server import ListenerConfig
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.http.server import JsonResource, OptionsResource
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
-from synapse.http.site import SynapseRequest, SynapseSite
+from synapse.http.site import SynapseRequest
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
@@ -288,15 +288,9 @@ class GenericWorkerServer(HomeServer):
     DATASTORE_CLASS = GenericWorkerSlavedStore  # type: ignore
 
     def _listen_http(self, listener_config: ListenerConfig) -> None:
-        port = listener_config.port
-        bind_addresses = listener_config.bind_addresses
 
         assert listener_config.http_options is not None
 
-        site_tag = listener_config.http_options.tag
-        if site_tag is None:
-            site_tag = str(port)
-
         # We always include a health resource.
         resources: Dict[str, Resource] = {"/health": HealthResource()}
 
@@ -395,23 +389,15 @@ class GenericWorkerServer(HomeServer):
 
         root_resource = create_resource_tree(resources, OptionsResource())
 
-        _base.listen_tcp(
-            bind_addresses,
-            port,
-            SynapseSite(
-                "synapse.access.http.%s" % (site_tag,),
-                site_tag,
-                listener_config,
-                root_resource,
-                self.version_string,
-                max_request_body_size=max_request_body_size(self.config),
-                reactor=self.get_reactor(),
-            ),
+        _base.listen_http(
+            listener_config,
+            root_resource,
+            self.version_string,
+            max_request_body_size(self.config),
+            self.tls_server_context_factory,
             reactor=self.get_reactor(),
         )
 
-        logger.info("Synapse worker now listening on port %d", port)
-
     def start_listening(self) -> None:
         for listener in self.config.worker.worker_listeners:
             if listener.type == "http":
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index de3f08876f..4f4fee4782 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -37,8 +37,7 @@ from synapse.api.urls import (
 from synapse.app import _base
 from synapse.app._base import (
     handle_startup_exception,
-    listen_ssl,
-    listen_tcp,
+    listen_http,
     max_request_body_size,
     redirect_stdio_to_logs,
     register_start,
@@ -53,7 +52,6 @@ from synapse.http.server import (
     RootOptionsRedirectResource,
     StaticResource,
 )
-from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
@@ -83,8 +81,6 @@ class SynapseHomeServer(HomeServer):
         self, config: HomeServerConfig, listener_config: ListenerConfig
     ) -> Iterable[Port]:
         port = listener_config.port
-        bind_addresses = listener_config.bind_addresses
-        tls = listener_config.tls
         # Must exist since this is an HTTP listener.
         assert listener_config.http_options is not None
         site_tag = listener_config.http_options.tag
@@ -140,37 +136,15 @@ class SynapseHomeServer(HomeServer):
         else:
             root_resource = OptionsResource()
 
-        site = SynapseSite(
-            "synapse.access.%s.%s" % ("https" if tls else "http", site_tag),
-            site_tag,
+        ports = listen_http(
             listener_config,
             create_resource_tree(resources, root_resource),
             self.version_string,
-            max_request_body_size=max_request_body_size(self.config),
+            max_request_body_size(self.config),
+            self.tls_server_context_factory,
             reactor=self.get_reactor(),
         )
 
-        if tls:
-            # refresh_certificate should have been called before this.
-            assert self.tls_server_context_factory is not None
-            ports = listen_ssl(
-                bind_addresses,
-                port,
-                site,
-                self.tls_server_context_factory,
-                reactor=self.get_reactor(),
-            )
-            logger.info("Synapse now listening on TCP port %d (TLS)", port)
-
-        else:
-            ports = listen_tcp(
-                bind_addresses,
-                port,
-                site,
-                reactor=self.get_reactor(),
-            )
-            logger.info("Synapse now listening on TCP port %d", port)
-
         return ports
 
     def _configure_named_resource(
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 0fb725dd8f..88b3168cbc 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -67,6 +67,7 @@ class InstanceLocationConfig:
 
     host: str
     port: int
+    tls: bool = False
 
 
 @attr.s
@@ -149,6 +150,12 @@ class WorkerConfig(Config):
         # The port on the main synapse for HTTP replication endpoint
         self.worker_replication_http_port = config.get("worker_replication_http_port")
 
+        # The tls mode on the main synapse for HTTP replication endpoint.
+        # For backward compatibility this defaults to False.
+        self.worker_replication_http_tls = config.get(
+            "worker_replication_http_tls", False
+        )
+
         # The shared secret used for authentication when connecting to the main synapse.
         self.worker_replication_secret = config.get("worker_replication_secret", None)
 
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index acb0bd18f7..5e661f8c73 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -184,8 +184,10 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
         client = hs.get_simple_http_client()
         local_instance_name = hs.get_instance_name()
 
+        # The value of these option should match the replication listener settings
         master_host = hs.config.worker.worker_replication_host
         master_port = hs.config.worker.worker_replication_http_port
+        master_tls = hs.config.worker.worker_replication_http_tls
 
         instance_map = hs.config.worker.instance_map
 
@@ -205,9 +207,11 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                 if instance_name == "master":
                     host = master_host
                     port = master_port
+                    tls = master_tls
                 elif instance_name in instance_map:
                     host = instance_map[instance_name].host
                     port = instance_map[instance_name].port
+                    tls = instance_map[instance_name].tls
                 else:
                     raise Exception(
                         "Instance %r not in 'instance_map' config" % (instance_name,)
@@ -238,7 +242,11 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                         "Unknown METHOD on %s replication endpoint" % (cls.NAME,)
                     )
 
-                uri = "http://%s:%s/_synapse/replication/%s/%s" % (
+                # Here the protocol is hard coded to be http by default or https in case the replication
+                # port is set to have tls true.
+                scheme = "https" if tls else "http"
+                uri = "%s://%s:%s/_synapse/replication/%s/%s" % (
+                    scheme,
                     host,
                     port,
                     cls.NAME,
-- 
cgit 1.5.1


From 63cc56affa3872443fffcac655413a8d9ffabfe4 Mon Sep 17 00:00:00 2001
From: "DeepBlueV7.X" <nicolas.werner@hotmail.de>
Date: Tue, 15 Nov 2022 16:29:30 +0100
Subject: Send content rules with pattern_type to clients (#14356)

---
 changelog.d/14356.bugfix     |  1 +
 synapse/push/clientformat.py | 16 +++++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14356.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14356.bugfix b/changelog.d/14356.bugfix
new file mode 100644
index 0000000000..288d58a540
--- /dev/null
+++ b/changelog.d/14356.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.66 which would not send certain pushrules to clients. Contributed by Nico.
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index 7095ae83f9..622a1e35c5 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -44,6 +44,12 @@ def format_push_rules_for_user(
 
         rulearray.append(template_rule)
 
+        pattern_type = template_rule.pop("pattern_type", None)
+        if pattern_type == "user_id":
+            template_rule["pattern"] = user.to_string()
+        elif pattern_type == "user_localpart":
+            template_rule["pattern"] = user.localpart
+
         template_rule["enabled"] = enabled
 
         if "conditions" not in template_rule:
@@ -93,10 +99,14 @@ def _rule_to_template(rule: PushRule) -> Optional[Dict[str, Any]]:
         if len(rule.conditions) != 1:
             return None
         thecond = rule.conditions[0]
-        if "pattern" not in thecond:
-            return None
+
         templaterule = {"actions": rule.actions}
-        templaterule["pattern"] = thecond["pattern"]
+        if "pattern" in thecond:
+            templaterule["pattern"] = thecond["pattern"]
+        elif "pattern_type" in thecond:
+            templaterule["pattern_type"] = thecond["pattern_type"]
+        else:
+            return None
     else:
         # This should not be reached unless this function is not kept in sync
         # with PRIORITY_CLASS_INVERSE_MAP.
-- 
cgit 1.5.1


From 258b5285b6b486526dffef9431c2ab063913f42b Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 15 Nov 2022 16:36:43 +0000
Subject: Fix typechecking errors introduced in #14128 (#14455)

* Fix typechecking errors introduced in #14128

* Changelog

* Correct annotations

so that context_factory works if you don't use TLS
---
 changelog.d/14455.misc | 1 +
 synapse/app/_base.py   | 4 ++--
 synapse/server.py      | 5 +++--
 3 files changed, 6 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14455.misc

(limited to 'synapse')

diff --git a/changelog.d/14455.misc b/changelog.d/14455.misc
new file mode 100644
index 0000000000..29168ef955
--- /dev/null
+++ b/changelog.d/14455.misc
@@ -0,0 +1 @@
+Add TLS support for generic worker endpoints.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 8f5b1a20f5..41d2732ef9 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -364,8 +364,8 @@ def listen_http(
     root_resource: Resource,
     version_string: str,
     max_request_body_size: int,
-    context_factory: IOpenSSLContextFactory,
-    reactor: IReactorSSL = reactor,
+    context_factory: Optional[IOpenSSLContextFactory],
+    reactor: ISynapseReactor = reactor,
 ) -> List[Port]:
     port = listener_config.port
     bind_addresses = listener_config.bind_addresses
diff --git a/synapse/server.py b/synapse/server.py
index c4e025af22..f0a60d0056 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -221,8 +221,6 @@ class HomeServer(metaclass=abc.ABCMeta):
     # instantiated during setup() for future return by get_datastores()
     DATASTORE_CLASS = abc.abstractproperty()
 
-    tls_server_context_factory: Optional[IOpenSSLContextFactory]
-
     def __init__(
         self,
         hostname: str,
@@ -258,6 +256,9 @@ class HomeServer(metaclass=abc.ABCMeta):
         self._module_web_resources: Dict[str, Resource] = {}
         self._module_web_resources_consumed = False
 
+        # This attribute is set by the free function `refresh_certificate`.
+        self.tls_server_context_factory: Optional[IOpenSSLContextFactory] = None
+
     def register_module_web_resource(self, path: str, resource: Resource) -> None:
         """Allows a module to register a web resource to be served at the given path.
 
-- 
cgit 1.5.1


From 1eed795fc56d95df3968e37f3a4db92f24513e15 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 15 Nov 2022 17:35:19 +0000
Subject: Include heroes in partial join responses' state (#14442)

* Pull out hero selection logic

* Include heroes in partial join response's state

* Changelog

* Fixup trial test

* Remove TODO
---
 changelog.d/14442.feature                    |  1 +
 synapse/federation/federation_server.py      | 23 +++++++++++++++++----
 synapse/handlers/sync.py                     | 20 +++----------------
 synapse/storage/databases/main/roommember.py | 30 ++++++++++++++++++++++++++++
 tests/federation/test_federation_server.py   | 11 ++++++----
 5 files changed, 60 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/14442.feature

(limited to 'synapse')

diff --git a/changelog.d/14442.feature b/changelog.d/14442.feature
new file mode 100644
index 0000000000..917e7edfb3
--- /dev/null
+++ b/changelog.d/14442.feature
@@ -0,0 +1 @@
+Faster joins: include heroes' membership events in the partial join response, for rooms without a name or canonical alias.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 59e351595b..bb20af6e91 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -74,6 +74,8 @@ from synapse.replication.http.federation import (
 )
 from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.lock import Lock
+from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
+from synapse.storage.roommember import MemberSummary
 from synapse.types import JsonDict, StateMap, get_domain_from_id
 from synapse.util import json_decoder, unwrapFirstError
 from synapse.util.async_helpers import Linearizer, concurrently_execute, gather_results
@@ -691,8 +693,9 @@ class FederationServer(FederationBase):
         state_event_ids: Collection[str]
         servers_in_room: Optional[Collection[str]]
         if caller_supports_partial_state:
+            summary = await self.store.get_room_summary(room_id)
             state_event_ids = _get_event_ids_for_partial_state_join(
-                event, prev_state_ids
+                event, prev_state_ids, summary
             )
             servers_in_room = await self.state.get_hosts_in_room_at_events(
                 room_id, event_ids=event.prev_event_ids()
@@ -1495,6 +1498,7 @@ class FederationHandlerRegistry:
 def _get_event_ids_for_partial_state_join(
     join_event: EventBase,
     prev_state_ids: StateMap[str],
+    summary: Dict[str, MemberSummary],
 ) -> Collection[str]:
     """Calculate state to be retuned in a partial_state send_join
 
@@ -1521,8 +1525,19 @@ def _get_event_ids_for_partial_state_join(
     if current_membership_event_id is not None:
         state_event_ids.add(current_membership_event_id)
 
-    # TODO: return a few more members:
-    #   - those with invites
-    #   - those that are kicked? / banned
+    name_id = prev_state_ids.get((EventTypes.Name, ""))
+    canonical_alias_id = prev_state_ids.get((EventTypes.CanonicalAlias, ""))
+    if not name_id and not canonical_alias_id:
+        # Also include the hero members of the room (for DM rooms without a title).
+        # To do this properly, we should select the correct subset of membership events
+        # from `prev_state_ids`. Instead, we are lazier and use the (cached)
+        # `get_room_summary` function, which is based on the current state of the room.
+        # This introduces races; we choose to ignore them because a) they should be rare
+        # and b) even if it's wrong, joining servers will get the full state eventually.
+        heroes = extract_heroes_from_room_summary(summary, join_event.state_key)
+        for hero in heroes:
+            membership_event_id = prev_state_ids.get((EventTypes.Member, hero))
+            if membership_event_id:
+                state_event_ids.add(membership_event_id)
 
     return state_event_ids
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 1db5d68021..259456b55d 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -41,6 +41,7 @@ from synapse.logging.context import current_context
 from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span
 from synapse.push.clientformat import format_push_rules_for_user
 from synapse.storage.databases.main.event_push_actions import RoomNotifCounts
+from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
 from synapse.storage.roommember import MemberSummary
 from synapse.storage.state import StateFilter
 from synapse.types import (
@@ -805,18 +806,6 @@ class SyncHandler:
             if canonical_alias and canonical_alias.content.get("alias"):
                 return summary
 
-        me = sync_config.user.to_string()
-
-        joined_user_ids = [
-            r[0] for r in details.get(Membership.JOIN, empty_ms).members if r[0] != me
-        ]
-        invited_user_ids = [
-            r[0] for r in details.get(Membership.INVITE, empty_ms).members if r[0] != me
-        ]
-        gone_user_ids = [
-            r[0] for r in details.get(Membership.LEAVE, empty_ms).members if r[0] != me
-        ] + [r[0] for r in details.get(Membership.BAN, empty_ms).members if r[0] != me]
-
         # FIXME: only build up a member_ids list for our heroes
         member_ids = {}
         for membership in (
@@ -828,11 +817,8 @@ class SyncHandler:
             for user_id, event_id in details.get(membership, empty_ms).members:
                 member_ids[user_id] = event_id
 
-        # FIXME: order by stream ordering rather than as returned by SQL
-        if joined_user_ids or invited_user_ids:
-            summary["m.heroes"] = sorted(joined_user_ids + invited_user_ids)[0:5]
-        else:
-            summary["m.heroes"] = sorted(gone_user_ids)[0:5]
+        me = sync_config.user.to_string()
+        summary["m.heroes"] = extract_heroes_from_room_summary(details, me)
 
         if not sync_config.filter_collection.lazy_load_members():
             return summary
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index e56a13f21e..f02c1d7ea7 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -1517,6 +1517,36 @@ class RoomMemberStore(
         await self.db_pool.runInteraction("forget_membership", f)
 
 
+def extract_heroes_from_room_summary(
+    details: Mapping[str, MemberSummary], me: str
+) -> List[str]:
+    """Determine the users that represent a room, from the perspective of the `me` user.
+
+    The rules which say which users we select are specified in the "Room Summary"
+    section of
+    https://spec.matrix.org/v1.4/client-server-api/#get_matrixclientv3sync
+
+    Returns a list (possibly empty) of heroes' mxids.
+    """
+    empty_ms = MemberSummary([], 0)
+
+    joined_user_ids = [
+        r[0] for r in details.get(Membership.JOIN, empty_ms).members if r[0] != me
+    ]
+    invited_user_ids = [
+        r[0] for r in details.get(Membership.INVITE, empty_ms).members if r[0] != me
+    ]
+    gone_user_ids = [
+        r[0] for r in details.get(Membership.LEAVE, empty_ms).members if r[0] != me
+    ] + [r[0] for r in details.get(Membership.BAN, empty_ms).members if r[0] != me]
+
+    # FIXME: order by stream ordering rather than as returned by SQL
+    if joined_user_ids or invited_user_ids:
+        return sorted(joined_user_ids + invited_user_ids)[0:5]
+    else:
+        return sorted(gone_user_ids)[0:5]
+
+
 @attr.s(slots=True, auto_attribs=True)
 class _JoinedHostsCache:
     """The cached data used by the `_get_joined_hosts_cache`."""
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index 3a6ef221ae..177e5b5afc 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -212,7 +212,7 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
         self.assertEqual(r[("m.room.member", joining_user)].membership, "join")
 
     @override_config({"experimental_features": {"msc3706_enabled": True}})
-    def test_send_join_partial_state(self):
+    def test_send_join_partial_state(self) -> None:
         """When MSC3706 support is enabled, /send_join should return partial state"""
         joining_user = "@misspiggy:" + self.OTHER_SERVER_NAME
         join_result = self._make_join(joining_user)
@@ -240,6 +240,9 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
                 ("m.room.power_levels", ""),
                 ("m.room.join_rules", ""),
                 ("m.room.history_visibility", ""),
+                # Users included here because they're heroes.
+                ("m.room.member", "@kermit:test"),
+                ("m.room.member", "@fozzie:test"),
             ],
         )
 
@@ -249,9 +252,9 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
         ]
         self.assertCountEqual(
             returned_auth_chain_events,
-            [
-                ("m.room.member", "@kermit:test"),
-            ],
+            # TODO: change the test so that we get at least one event in the auth chain
+            #   here.
+            [],
         )
 
         # the room should show that the new user is a member
-- 
cgit 1.5.1


From 5cb6ad3b87caaadaedc3cc57e5513feb459b519d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 16 Nov 2022 11:14:38 +0000
Subject: Fix HTML templates missing correct HTML tags (#14448)

---
 changelog.d/14448.bugfix                               | 1 +
 synapse/res/templates/invalid_token.html               | 1 +
 synapse/res/templates/notif_mail.html                  | 2 ++
 synapse/res/templates/password_reset.html              | 1 +
 synapse/res/templates/password_reset_confirmation.html | 1 +
 synapse/res/templates/password_reset_failure.html      | 1 +
 synapse/res/templates/password_reset_success.html      | 1 +
 synapse/res/templates/recaptcha.html                   | 1 +
 synapse/res/templates/registration.html                | 1 +
 synapse/res/templates/registration_failure.html        | 1 +
 synapse/res/templates/registration_success.html        | 1 +
 synapse/res/templates/registration_token.html          | 1 +
 synapse/res/templates/sso_account_deactivated.html     | 1 +
 synapse/res/templates/sso_auth_account_details.html    | 1 +
 synapse/res/templates/sso_auth_bad_user.html           | 1 +
 synapse/res/templates/sso_auth_confirm.html            | 1 +
 synapse/res/templates/sso_auth_success.html            | 1 +
 synapse/res/templates/sso_error.html                   | 1 +
 synapse/res/templates/sso_login_idp_picker.html        | 1 +
 synapse/res/templates/sso_new_user_consent.html        | 1 +
 synapse/res/templates/sso_redirect_confirm.html        | 1 +
 synapse/res/templates/terms.html                       | 1 +
 22 files changed, 23 insertions(+)
 create mode 100644 changelog.d/14448.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14448.bugfix b/changelog.d/14448.bugfix
new file mode 100644
index 0000000000..4bf1c183f6
--- /dev/null
+++ b/changelog.d/14448.bugfix
@@ -0,0 +1 @@
+Fix rendering of some HTML templates (including emails). Introduced in v1.71.0.
diff --git a/synapse/res/templates/invalid_token.html b/synapse/res/templates/invalid_token.html
index d0b1dae669..b19e3023a1 100644
--- a/synapse/res/templates/invalid_token.html
+++ b/synapse/res/templates/invalid_token.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Invalid renewal token.{% endblock %}
 
 {% block body %}
diff --git a/synapse/res/templates/notif_mail.html b/synapse/res/templates/notif_mail.html
index 939d40315f..2add9dd859 100644
--- a/synapse/res/templates/notif_mail.html
+++ b/synapse/res/templates/notif_mail.html
@@ -1,3 +1,5 @@
+{% extends "_base.html" %}
+
 {% block title %}New activity in room{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/password_reset.html b/synapse/res/templates/password_reset.html
index de5a9ec68f..1f267946c8 100644
--- a/synapse/res/templates/password_reset.html
+++ b/synapse/res/templates/password_reset.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Password reset{% endblock %}
 
 {% block body %}
diff --git a/synapse/res/templates/password_reset_confirmation.html b/synapse/res/templates/password_reset_confirmation.html
index 0eac64b6a8..fabb9a6ed5 100644
--- a/synapse/res/templates/password_reset_confirmation.html
+++ b/synapse/res/templates/password_reset_confirmation.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Password reset confirmation{% endblock %}
 
 {% block body %}
diff --git a/synapse/res/templates/password_reset_failure.html b/synapse/res/templates/password_reset_failure.html
index 977babdb40..9990e860f9 100644
--- a/synapse/res/templates/password_reset_failure.html
+++ b/synapse/res/templates/password_reset_failure.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Password reset failure{% endblock %}
 
 {% block body %}
diff --git a/synapse/res/templates/password_reset_success.html b/synapse/res/templates/password_reset_success.html
index 0e99fad7ff..edada513ab 100644
--- a/synapse/res/templates/password_reset_success.html
+++ b/synapse/res/templates/password_reset_success.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Password reset success{% endblock %}
 
 {% block body %}
diff --git a/synapse/res/templates/recaptcha.html b/synapse/res/templates/recaptcha.html
index feaf3f6aed..8204928cdf 100644
--- a/synapse/res/templates/recaptcha.html
+++ b/synapse/res/templates/recaptcha.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Authentication{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/registration.html b/synapse/res/templates/registration.html
index 189960a832..cdb815665e 100644
--- a/synapse/res/templates/registration.html
+++ b/synapse/res/templates/registration.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Registration{% endblock %}
 
 {% block body %}
diff --git a/synapse/res/templates/registration_failure.html b/synapse/res/templates/registration_failure.html
index 3debe9301d..ae2a9cae2c 100644
--- a/synapse/res/templates/registration_failure.html
+++ b/synapse/res/templates/registration_failure.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Registration failure{% endblock %}
 
 {% block body %}
diff --git a/synapse/res/templates/registration_success.html b/synapse/res/templates/registration_success.html
index e2dd020a9e..6d45111796 100644
--- a/synapse/res/templates/registration_success.html
+++ b/synapse/res/templates/registration_success.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Your email has now been validated{% endblock %}
 
 {% block body %}
diff --git a/synapse/res/templates/registration_token.html b/synapse/res/templates/registration_token.html
index 2ee5866ba5..ee4e5295e7 100644
--- a/synapse/res/templates/registration_token.html
+++ b/synapse/res/templates/registration_token.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Authentication{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_account_deactivated.html b/synapse/res/templates/sso_account_deactivated.html
index c634229840..b85d96cc74 100644
--- a/synapse/res/templates/sso_account_deactivated.html
+++ b/synapse/res/templates/sso_account_deactivated.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}SSO account deactivated{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_auth_account_details.html b/synapse/res/templates/sso_auth_account_details.html
index b516333373..11636d7f5d 100644
--- a/synapse/res/templates/sso_auth_account_details.html
+++ b/synapse/res/templates/sso_auth_account_details.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Create your account{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_auth_bad_user.html b/synapse/res/templates/sso_auth_bad_user.html
index 69fdcc9ef0..819d79a461 100644
--- a/synapse/res/templates/sso_auth_bad_user.html
+++ b/synapse/res/templates/sso_auth_bad_user.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Authentication failed{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_auth_confirm.html b/synapse/res/templates/sso_auth_confirm.html
index 2d106e0ae4..3927d6eda3 100644
--- a/synapse/res/templates/sso_auth_confirm.html
+++ b/synapse/res/templates/sso_auth_confirm.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Confirm it's you{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_auth_success.html b/synapse/res/templates/sso_auth_success.html
index 56150eaefe..afeffb7191 100644
--- a/synapse/res/templates/sso_auth_success.html
+++ b/synapse/res/templates/sso_auth_success.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Authentication successful{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_error.html b/synapse/res/templates/sso_error.html
index e394a92623..6fa36c11c9 100644
--- a/synapse/res/templates/sso_error.html
+++ b/synapse/res/templates/sso_error.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Authentication failed{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_login_idp_picker.html b/synapse/res/templates/sso_login_idp_picker.html
index a2772ca9ef..58b0b3121c 100644
--- a/synapse/res/templates/sso_login_idp_picker.html
+++ b/synapse/res/templates/sso_login_idp_picker.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Choose identity provider{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_new_user_consent.html b/synapse/res/templates/sso_new_user_consent.html
index 126887d26c..fda29928d1 100644
--- a/synapse/res/templates/sso_new_user_consent.html
+++ b/synapse/res/templates/sso_new_user_consent.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Agree to terms and conditions{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/sso_redirect_confirm.html b/synapse/res/templates/sso_redirect_confirm.html
index 887ee0d294..cc2e7b3a5b 100644
--- a/synapse/res/templates/sso_redirect_confirm.html
+++ b/synapse/res/templates/sso_redirect_confirm.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Continue to your account{% endblock %}
 
 {% block header %}
diff --git a/synapse/res/templates/terms.html b/synapse/res/templates/terms.html
index 977c3d0bc7..ffabebdd8b 100644
--- a/synapse/res/templates/terms.html
+++ b/synapse/res/templates/terms.html
@@ -1,3 +1,4 @@
+{% extends "_base.html" %}
 {% block title %}Authentication{% endblock %}
 
 {% block header %}
-- 
cgit 1.5.1


From 945a0928c793c0bd8573e179583d983187e5f392 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 16 Nov 2022 12:09:33 +0000
Subject: Don't filter state in /context response (#14461)

We don't filter state usually, so doing so here is a waste of time. This is not much of an issue for clients that enable lazy loading of members, since there will be fewer state events.
---
 changelog.d/14461.misc   | 1 +
 synapse/handlers/room.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14461.misc

(limited to 'synapse')

diff --git a/changelog.d/14461.misc b/changelog.d/14461.misc
new file mode 100644
index 0000000000..cdfa577a4c
--- /dev/null
+++ b/changelog.d/14461.misc
@@ -0,0 +1 @@
+Improve performance of `/context` in large rooms.
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 66a50bca6e..6dcfd86fdf 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1451,7 +1451,7 @@ class RoomContextHandler:
             events_before=events_before,
             event=event,
             events_after=events_after,
-            state=await filter_evts(state_events),
+            state=state_events,
             aggregations=aggregations,
             start=await token.copy_and_replace(
                 StreamKeyType.ROOM, results.start
-- 
cgit 1.5.1


From d63814fd736fed5d3d45ff3af5e6d3bfae50c439 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 16 Nov 2022 13:50:07 +0000
Subject: Revert "Remove slaved id tracker (#14376)" (#14463)

This reverts commit 36097e88c4da51fce6556a58c49bd675f4cf20ab.
---
 changelog.d/14376.misc                             |  1 -
 synapse/replication/slave/__init__.py              | 13 ++++++
 synapse/replication/slave/storage/__init__.py      | 13 ++++++
 .../slave/storage/_slaved_id_tracker.py            | 50 ++++++++++++++++++++++
 synapse/storage/databases/main/account_data.py     | 30 ++++++++-----
 synapse/storage/databases/main/devices.py          | 36 ++++++++++------
 synapse/storage/databases/main/events_worker.py    | 35 +++++++++------
 synapse/storage/databases/main/push_rule.py        | 17 ++++----
 synapse/storage/databases/main/pusher.py           | 24 +++++++----
 synapse/storage/databases/main/receipts.py         | 18 ++++----
 synapse/storage/util/id_generators.py              | 13 ++----
 11 files changed, 176 insertions(+), 74 deletions(-)
 delete mode 100644 changelog.d/14376.misc
 create mode 100644 synapse/replication/slave/__init__.py
 create mode 100644 synapse/replication/slave/storage/__init__.py
 create mode 100644 synapse/replication/slave/storage/_slaved_id_tracker.py

(limited to 'synapse')

diff --git a/changelog.d/14376.misc b/changelog.d/14376.misc
deleted file mode 100644
index 2ca326fea6..0000000000
--- a/changelog.d/14376.misc
+++ /dev/null
@@ -1 +0,0 @@
-Remove old stream ID tracking code. Contributed by Nick @Beeper (@fizzadar).
diff --git a/synapse/replication/slave/__init__.py b/synapse/replication/slave/__init__.py
new file mode 100644
index 0000000000..f43a360a80
--- /dev/null
+++ b/synapse/replication/slave/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/synapse/replication/slave/storage/__init__.py b/synapse/replication/slave/storage/__init__.py
new file mode 100644
index 0000000000..f43a360a80
--- /dev/null
+++ b/synapse/replication/slave/storage/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/synapse/replication/slave/storage/_slaved_id_tracker.py b/synapse/replication/slave/storage/_slaved_id_tracker.py
new file mode 100644
index 0000000000..8f3f953ed4
--- /dev/null
+++ b/synapse/replication/slave/storage/_slaved_id_tracker.py
@@ -0,0 +1,50 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List, Optional, Tuple
+
+from synapse.storage.database import LoggingDatabaseConnection
+from synapse.storage.util.id_generators import AbstractStreamIdTracker, _load_current_id
+
+
+class SlavedIdTracker(AbstractStreamIdTracker):
+    """Tracks the "current" stream ID of a stream with a single writer.
+
+    See `AbstractStreamIdTracker` for more details.
+
+    Note that this class does not work correctly when there are multiple
+    writers.
+    """
+
+    def __init__(
+        self,
+        db_conn: LoggingDatabaseConnection,
+        table: str,
+        column: str,
+        extra_tables: Optional[List[Tuple[str, str]]] = None,
+        step: int = 1,
+    ):
+        self.step = step
+        self._current = _load_current_id(db_conn, table, column, step)
+        if extra_tables:
+            for table, column in extra_tables:
+                self.advance(None, _load_current_id(db_conn, table, column))
+
+    def advance(self, instance_name: Optional[str], new_id: int) -> None:
+        self._current = (max if self.step > 0 else min)(self._current, new_id)
+
+    def get_current_token(self) -> int:
+        return self._current
+
+    def get_current_token_for_writer(self, instance_name: str) -> int:
+        return self.get_current_token()
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 282687ebce..c38b8a9e5a 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -27,6 +27,7 @@ from typing import (
 )
 
 from synapse.api.constants import AccountDataTypes
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import AccountDataStream, TagAccountDataStream
 from synapse.storage._base import db_to_json
 from synapse.storage.database import (
@@ -67,11 +68,12 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         # to write account data. A value of `True` implies that `_account_data_id_gen`
         # is an `AbstractStreamIdGenerator` and not just a tracker.
         self._account_data_id_gen: AbstractStreamIdTracker
-        self._can_write_to_account_data = (
-            self._instance_name in hs.config.worker.writers.account_data
-        )
 
         if isinstance(database.engine, PostgresEngine):
+            self._can_write_to_account_data = (
+                self._instance_name in hs.config.worker.writers.account_data
+            )
+
             self._account_data_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
@@ -93,13 +95,21 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            self._account_data_id_gen = StreamIdGenerator(
-                db_conn,
-                "room_account_data",
-                "stream_id",
-                extra_tables=[("room_tags_revisions", "stream_id")],
-                is_writer=self._instance_name in hs.config.worker.writers.account_data,
-            )
+            if self._instance_name in hs.config.worker.writers.account_data:
+                self._can_write_to_account_data = True
+                self._account_data_id_gen = StreamIdGenerator(
+                    db_conn,
+                    "room_account_data",
+                    "stream_id",
+                    extra_tables=[("room_tags_revisions", "stream_id")],
+                )
+            else:
+                self._account_data_id_gen = SlavedIdTracker(
+                    db_conn,
+                    "room_account_data",
+                    "stream_id",
+                    extra_tables=[("room_tags_revisions", "stream_id")],
+                )
 
         account_max = self.get_max_account_data_stream_id()
         self._account_data_stream_cache = StreamChangeCache(
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 3e5c16b15b..aa58c2adc3 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -38,6 +38,7 @@ from synapse.logging.opentracing import (
     whitelisted_homeserver,
 )
 from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
@@ -85,19 +86,28 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        # In the worker store this is an ID tracker which we overwrite in the non-worker
-        # class below that is used on the main process.
-        self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-            db_conn,
-            "device_lists_stream",
-            "stream_id",
-            extra_tables=[
-                ("user_signature_stream", "stream_id"),
-                ("device_lists_outbound_pokes", "stream_id"),
-                ("device_lists_changes_in_room", "stream_id"),
-            ],
-            is_writer=hs.config.worker.worker_app is None,
-        )
+        if hs.config.worker.worker_app is None:
+            self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+                db_conn,
+                "device_lists_stream",
+                "stream_id",
+                extra_tables=[
+                    ("user_signature_stream", "stream_id"),
+                    ("device_lists_outbound_pokes", "stream_id"),
+                    ("device_lists_changes_in_room", "stream_id"),
+                ],
+            )
+        else:
+            self._device_list_id_gen = SlavedIdTracker(
+                db_conn,
+                "device_lists_stream",
+                "stream_id",
+                extra_tables=[
+                    ("user_signature_stream", "stream_id"),
+                    ("device_lists_outbound_pokes", "stream_id"),
+                    ("device_lists_changes_in_room", "stream_id"),
+                ],
+            )
 
         # Type-ignore: _device_list_id_gen is mixed in from either DataStore (as a
         # StreamIdGenerator) or SlavedDataStore (as a SlavedIdTracker).
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 296e50d661..467d20253d 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -59,6 +59,7 @@ from synapse.metrics.background_process_metrics import (
     run_as_background_process,
     wrap_as_background_process,
 )
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import BackfillStream
 from synapse.replication.tcp.streams.events import EventsStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
@@ -212,20 +213,26 @@ class EventsWorkerStore(SQLBaseStore):
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            self._stream_id_gen = StreamIdGenerator(
-                db_conn,
-                "events",
-                "stream_ordering",
-                is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
-            )
-            self._backfill_id_gen = StreamIdGenerator(
-                db_conn,
-                "events",
-                "stream_ordering",
-                step=-1,
-                extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
-                is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
-            )
+            if hs.get_instance_name() in hs.config.worker.writers.events:
+                self._stream_id_gen = StreamIdGenerator(
+                    db_conn,
+                    "events",
+                    "stream_ordering",
+                )
+                self._backfill_id_gen = StreamIdGenerator(
+                    db_conn,
+                    "events",
+                    "stream_ordering",
+                    step=-1,
+                    extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
+                )
+            else:
+                self._stream_id_gen = SlavedIdTracker(
+                    db_conn, "events", "stream_ordering"
+                )
+                self._backfill_id_gen = SlavedIdTracker(
+                    db_conn, "events", "stream_ordering", step=-1
+                )
 
         events_max = self._stream_id_gen.get_current_token()
         curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 12ad44dbb3..8ae10f6127 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -30,6 +30,7 @@ from typing import (
 
 from synapse.api.errors import StoreError
 from synapse.config.homeserver import ExperimentalConfig
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import PushRulesStream
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
@@ -110,14 +111,14 @@ class PushRulesWorkerStore(
     ):
         super().__init__(database, db_conn, hs)
 
-        # In the worker store this is an ID tracker which we overwrite in the non-worker
-        # class below that is used on the main process.
-        self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-            db_conn,
-            "push_rules_stream",
-            "stream_id",
-            is_writer=hs.config.worker.worker_app is None,
-        )
+        if hs.config.worker.worker_app is None:
+            self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+                db_conn, "push_rules_stream", "stream_id"
+            )
+        else:
+            self._push_rules_stream_id_gen = SlavedIdTracker(
+                db_conn, "push_rules_stream", "stream_id"
+            )
 
         push_rules_prefill, push_rules_id = self.db_pool.get_cache_dict(
             db_conn,
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index fee37b9ce4..4a01562d45 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -27,6 +27,7 @@ from typing import (
 )
 
 from synapse.push import PusherConfig, ThrottleParams
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import PushersStream
 from synapse.storage._base import SQLBaseStore, db_to_json
 from synapse.storage.database import (
@@ -58,15 +59,20 @@ class PusherWorkerStore(SQLBaseStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        # In the worker store this is an ID tracker which we overwrite in the non-worker
-        # class below that is used on the main process.
-        self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-            db_conn,
-            "pushers",
-            "id",
-            extra_tables=[("deleted_pushers", "stream_id")],
-            is_writer=hs.config.worker.worker_app is None,
-        )
+        if hs.config.worker.worker_app is None:
+            self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+                db_conn,
+                "pushers",
+                "id",
+                extra_tables=[("deleted_pushers", "stream_id")],
+            )
+        else:
+            self._pushers_id_gen = SlavedIdTracker(
+                db_conn,
+                "pushers",
+                "id",
+                extra_tables=[("deleted_pushers", "stream_id")],
+            )
 
         self.db_pool.updates.register_background_update_handler(
             "remove_deactivated_pushers",
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 64519587f8..dc6989527e 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -27,6 +27,7 @@ from typing import (
 )
 
 from synapse.api.constants import EduTypes
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import ReceiptsStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
@@ -60,9 +61,6 @@ class ReceiptsWorkerStore(SQLBaseStore):
         hs: "HomeServer",
     ):
         self._instance_name = hs.get_instance_name()
-
-        # In the worker store this is an ID tracker which we overwrite in the non-worker
-        # class below that is used on the main process.
         self._receipts_id_gen: AbstractStreamIdTracker
 
         if isinstance(database.engine, PostgresEngine):
@@ -89,12 +87,14 @@ class ReceiptsWorkerStore(SQLBaseStore):
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            self._receipts_id_gen = StreamIdGenerator(
-                db_conn,
-                "receipts_linearized",
-                "stream_id",
-                is_writer=hs.get_instance_name() in hs.config.worker.writers.receipts,
-            )
+            if hs.get_instance_name() in hs.config.worker.writers.receipts:
+                self._receipts_id_gen = StreamIdGenerator(
+                    db_conn, "receipts_linearized", "stream_id"
+                )
+            else:
+                self._receipts_id_gen = SlavedIdTracker(
+                    db_conn, "receipts_linearized", "stream_id"
+                )
 
         super().__init__(database, db_conn, hs)
 
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 1af0af1266..2dfe4c0b66 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -186,13 +186,11 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         column: str,
         extra_tables: Iterable[Tuple[str, str]] = (),
         step: int = 1,
-        is_writer: bool = True,
     ) -> None:
         assert step != 0
         self._lock = threading.Lock()
         self._step: int = step
         self._current: int = _load_current_id(db_conn, table, column, step)
-        self._is_writer = is_writer
         for table, column in extra_tables:
             self._current = (max if step > 0 else min)(
                 self._current, _load_current_id(db_conn, table, column, step)
@@ -206,11 +204,9 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         self._unfinished_ids: OrderedDict[int, int] = OrderedDict()
 
     def advance(self, instance_name: str, new_id: int) -> None:
-        # Advance should never be called on a writer instance, only over replication
-        if self._is_writer:
-            raise Exception("Replication is not supported by writer StreamIdGenerator")
-
-        self._current = (max if self._step > 0 else min)(self._current, new_id)
+        # `StreamIdGenerator` should only be used when there is a single writer,
+        # so replication should never happen.
+        raise Exception("Replication is not supported by StreamIdGenerator")
 
     def get_next(self) -> AsyncContextManager[int]:
         with self._lock:
@@ -253,9 +249,6 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         return _AsyncCtxManagerWrapper(manager())
 
     def get_current_token(self) -> int:
-        if self._is_writer:
-            return self._current
-
         with self._lock:
             if self._unfinished_ids:
                 return next(iter(self._unfinished_ids)) - self._step
-- 
cgit 1.5.1


From 882277008c7b43ab26e3445ab94a38aa25ad0965 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Wed, 16 Nov 2022 15:01:22 +0000
Subject: Fix background updates failing to add unique indexes on receipts
 (#14453)

As part of the database migration to support threaded receipts, there is
a possible window in between
`73/08thread_receipts_non_null.sql.postgres` removing the original
unique constraints on `receipts_linearized` and `receipts_graph` and the
`reeipts_linearized_unique_index` and `receipts_graph_unique_index`
background updates from `72/08thread_receipts.sql` completing where
the unique constraints on `receipts_linearized` and `receipts_graph` are
missing. Any emulated upserts on these tables must therefore be
performed with a lock held, otherwise duplicate rows can end up in the
tables when there are concurrent emulated upserts. Fix the missing lock.

Note that emulated upserts no longer happen by default on sqlite, since
the minimum supported version of sqlite supports native upserts by
default now.

Finally, clean up any duplicate receipts that may have crept in before
trying to create the `receipts_graph_unique_index` and
`receipts_linearized_unique_index` unique indexes.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14453.bugfix                      |   1 +
 synapse/storage/databases/main/receipts.py    | 171 ++++++++++++++++++---
 tests/storage/databases/main/test_receipts.py | 209 ++++++++++++++++++++++++++
 3 files changed, 357 insertions(+), 24 deletions(-)
 create mode 100644 changelog.d/14453.bugfix
 create mode 100644 tests/storage/databases/main/test_receipts.py

(limited to 'synapse')

diff --git a/changelog.d/14453.bugfix b/changelog.d/14453.bugfix
new file mode 100644
index 0000000000..4969e5450c
--- /dev/null
+++ b/changelog.d/14453.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0 where the background updates to add non-thread unique indexes on receipts could fail when upgrading from 1.67.0 or earlier.
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index dc6989527e..fbf27497ec 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -113,24 +113,6 @@ class ReceiptsWorkerStore(SQLBaseStore):
             prefilled_cache=receipts_stream_prefill,
         )
 
-        self.db_pool.updates.register_background_index_update(
-            "receipts_linearized_unique_index",
-            index_name="receipts_linearized_unique_index",
-            table="receipts_linearized",
-            columns=["room_id", "receipt_type", "user_id"],
-            where_clause="thread_id IS NULL",
-            unique=True,
-        )
-
-        self.db_pool.updates.register_background_index_update(
-            "receipts_graph_unique_index",
-            index_name="receipts_graph_unique_index",
-            table="receipts_graph",
-            columns=["room_id", "receipt_type", "user_id"],
-            where_clause="thread_id IS NULL",
-            unique=True,
-        )
-
     def get_max_receipt_stream_id(self) -> int:
         """Get the current max stream ID for receipts stream"""
         return self._receipts_id_gen.get_current_token()
@@ -702,9 +684,6 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 "data": json_encoder.encode(data),
             },
             where_clause=where_clause,
-            # receipts_linearized has a unique constraint on
-            # (user_id, room_id, receipt_type), so no need to lock
-            lock=False,
         )
 
         return rx_ts
@@ -862,14 +841,13 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 "data": json_encoder.encode(data),
             },
             where_clause=where_clause,
-            # receipts_graph has a unique constraint on
-            # (user_id, room_id, receipt_type), so no need to lock
-            lock=False,
         )
 
 
 class ReceiptsBackgroundUpdateStore(SQLBaseStore):
     POPULATE_RECEIPT_EVENT_STREAM_ORDERING = "populate_event_stream_ordering"
+    RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME = "receipts_linearized_unique_index"
+    RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME = "receipts_graph_unique_index"
 
     def __init__(
         self,
@@ -883,6 +861,14 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
             self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING,
             self._populate_receipt_event_stream_ordering,
         )
+        self.db_pool.updates.register_background_update_handler(
+            self.RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME,
+            self._background_receipts_linearized_unique_index,
+        )
+        self.db_pool.updates.register_background_update_handler(
+            self.RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME,
+            self._background_receipts_graph_unique_index,
+        )
 
     async def _populate_receipt_event_stream_ordering(
         self, progress: JsonDict, batch_size: int
@@ -938,6 +924,143 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
 
         return batch_size
 
+    async def _create_receipts_index(self, index_name: str, table: str) -> None:
+        """Adds a unique index on `(room_id, receipt_type, user_id)` to the given
+        receipts table, for non-thread receipts."""
+
+        def _create_index(conn: LoggingDatabaseConnection) -> None:
+            conn.rollback()
+
+            # we have to set autocommit, because postgres refuses to
+            # CREATE INDEX CONCURRENTLY without it.
+            if isinstance(self.database_engine, PostgresEngine):
+                conn.set_session(autocommit=True)
+
+            try:
+                c = conn.cursor()
+
+                # Now that the duplicates are gone, we can create the index.
+                concurrently = (
+                    "CONCURRENTLY"
+                    if isinstance(self.database_engine, PostgresEngine)
+                    else ""
+                )
+                sql = f"""
+                    CREATE UNIQUE INDEX {concurrently} {index_name}
+                    ON {table}(room_id, receipt_type, user_id)
+                    WHERE thread_id IS NULL
+                """
+                c.execute(sql)
+            finally:
+                if isinstance(self.database_engine, PostgresEngine):
+                    conn.set_session(autocommit=False)
+
+        await self.db_pool.runWithConnection(_create_index)
+
+    async def _background_receipts_linearized_unique_index(
+        self, progress: dict, batch_size: int
+    ) -> int:
+        """Removes duplicate receipts and adds a unique index on
+        `(room_id, receipt_type, user_id)` to `receipts_linearized`, for non-thread
+        receipts."""
+
+        def _remote_duplicate_receipts_txn(txn: LoggingTransaction) -> None:
+            # Identify any duplicate receipts arising from
+            # https://github.com/matrix-org/synapse/issues/14406.
+            # We expect the following query to use the per-thread receipt index and take
+            # less than a minute.
+            sql = """
+                SELECT MAX(stream_id), room_id, receipt_type, user_id
+                FROM receipts_linearized
+                WHERE thread_id IS NULL
+                GROUP BY room_id, receipt_type, user_id
+                HAVING COUNT(*) > 1
+            """
+            txn.execute(sql)
+            duplicate_keys = cast(List[Tuple[int, str, str, str]], list(txn))
+
+            # Then remove duplicate receipts, keeping the one with the highest
+            # `stream_id`. There should only be a single receipt with any given
+            # `stream_id`.
+            for max_stream_id, room_id, receipt_type, user_id in duplicate_keys:
+                sql = """
+                    DELETE FROM receipts_linearized
+                    WHERE
+                        room_id = ? AND
+                        receipt_type = ? AND
+                        user_id = ? AND
+                        thread_id IS NULL AND
+                        stream_id < ?
+                """
+                txn.execute(sql, (room_id, receipt_type, user_id, max_stream_id))
+
+        await self.db_pool.runInteraction(
+            self.RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME,
+            _remote_duplicate_receipts_txn,
+        )
+
+        await self._create_receipts_index(
+            "receipts_linearized_unique_index",
+            "receipts_linearized",
+        )
+
+        await self.db_pool.updates._end_background_update(
+            self.RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME
+        )
+
+        return 1
+
+    async def _background_receipts_graph_unique_index(
+        self, progress: dict, batch_size: int
+    ) -> int:
+        """Removes duplicate receipts and adds a unique index on
+        `(room_id, receipt_type, user_id)` to `receipts_graph`, for non-thread
+        receipts."""
+
+        def _remote_duplicate_receipts_txn(txn: LoggingTransaction) -> None:
+            # Identify any duplicate receipts arising from
+            # https://github.com/matrix-org/synapse/issues/14406.
+            # We expect the following query to use the per-thread receipt index and take
+            # less than a minute.
+            sql = """
+                SELECT room_id, receipt_type, user_id FROM receipts_graph
+                WHERE thread_id IS NULL
+                GROUP BY room_id, receipt_type, user_id
+                HAVING COUNT(*) > 1
+            """
+            txn.execute(sql)
+            duplicate_keys = cast(List[Tuple[str, str, str]], list(txn))
+
+            # Then remove all duplicate receipts.
+            # We could be clever and try to keep the latest receipt out of every set of
+            # duplicates, but it's far simpler to remove them all.
+            for room_id, receipt_type, user_id in duplicate_keys:
+                sql = """
+                    DELETE FROM receipts_graph
+                    WHERE
+                        room_id = ? AND
+                        receipt_type = ? AND
+                        user_id = ? AND
+                        thread_id IS NULL
+                """
+                txn.execute(sql, (room_id, receipt_type, user_id))
+
+        await self.db_pool.runInteraction(
+            self.RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME,
+            _remote_duplicate_receipts_txn,
+        )
+
+        await self._create_receipts_index(
+            "receipts_graph_unique_index",
+            "receipts_graph",
+        )
+
+        await self.db_pool.updates._end_background_update(
+            self.RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME
+        )
+
+        return 1
+
 
 class ReceiptsStore(ReceiptsWorkerStore, ReceiptsBackgroundUpdateStore):
     pass
diff --git a/tests/storage/databases/main/test_receipts.py b/tests/storage/databases/main/test_receipts.py
new file mode 100644
index 0000000000..c4f12d81d7
--- /dev/null
+++ b/tests/storage/databases/main/test_receipts.py
@@ -0,0 +1,209 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, Optional, Sequence, Tuple
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.rest import admin
+from synapse.rest.client import login, room
+from synapse.server import HomeServer
+from synapse.storage.database import LoggingTransaction
+from synapse.util import Clock
+
+from tests.unittest import HomeserverTestCase
+
+
+class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase):
+
+    servlets = [
+        admin.register_servlets,
+        room.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer):
+        self.store = hs.get_datastores().main
+        self.user_id = self.register_user("foo", "pass")
+        self.token = self.login("foo", "pass")
+        self.room_id = self.helper.create_room_as(self.user_id, tok=self.token)
+        self.other_room_id = self.helper.create_room_as(self.user_id, tok=self.token)
+
+    def _test_background_receipts_unique_index(
+        self,
+        update_name: str,
+        index_name: str,
+        table: str,
+        receipts: Dict[Tuple[str, str, str], Sequence[Dict[str, Any]]],
+        expected_unique_receipts: Dict[Tuple[str, str, str], Optional[Dict[str, Any]]],
+    ):
+        """Test that the background update to uniqueify non-thread receipts in
+        the given receipts table works properly.
+
+        Args:
+            update_name: The name of the background update to test.
+            index_name: The name of the index that the background update creates.
+            table: The table of receipts that the background update fixes.
+            receipts: The test data containing duplicate receipts.
+                A list of receipt rows to insert, grouped by
+                `(room_id, receipt_type, user_id)`.
+            expected_unique_receipts: A dictionary of `(room_id, receipt_type, user_id)`
+                keys and expected receipt key-values after duplicate receipts have been
+                removed.
+        """
+        # First, undo the background update.
+        def drop_receipts_unique_index(txn: LoggingTransaction) -> None:
+            txn.execute(f"DROP INDEX IF EXISTS {index_name}")
+
+        self.get_success(
+            self.store.db_pool.runInteraction(
+                "drop_receipts_unique_index",
+                drop_receipts_unique_index,
+            )
+        )
+
+        # Populate the receipts table, including duplicates.
+        for (room_id, receipt_type, user_id), rows in receipts.items():
+            for row in rows:
+                self.get_success(
+                    self.store.db_pool.simple_insert(
+                        table,
+                        {
+                            "room_id": room_id,
+                            "receipt_type": receipt_type,
+                            "user_id": user_id,
+                            "thread_id": None,
+                            "data": "{}",
+                            **row,
+                        },
+                    )
+                )
+
+        # Insert and run the background update.
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                {
+                    "update_name": update_name,
+                    "progress_json": "{}",
+                },
+            )
+        )
+
+        self.store.db_pool.updates._all_done = False
+
+        self.wait_for_background_updates()
+
+        # Check that the remaining receipts match expectations.
+        for (
+            room_id,
+            receipt_type,
+            user_id,
+        ), expected_row in expected_unique_receipts.items():
+            # Include the receipt key in the returned columns, for more informative
+            # assertion messages.
+            columns = ["room_id", "receipt_type", "user_id"]
+            if expected_row is not None:
+                columns += expected_row.keys()
+
+            rows = self.get_success(
+                self.store.db_pool.simple_select_list(
+                    table=table,
+                    keyvalues={
+                        "room_id": room_id,
+                        "receipt_type": receipt_type,
+                        "user_id": user_id,
+                        # `simple_select_onecol` does not support NULL filters,
+                        # so skip the filter on `thread_id`.
+                    },
+                    retcols=columns,
+                    desc="get_receipt",
+                )
+            )
+
+            if expected_row is not None:
+                self.assertEqual(
+                    len(rows),
+                    1,
+                    f"Background update did not leave behind latest receipt in {table}",
+                )
+                self.assertEqual(
+                    rows[0],
+                    {
+                        "room_id": room_id,
+                        "receipt_type": receipt_type,
+                        "user_id": user_id,
+                        **expected_row,
+                    },
+                )
+            else:
+                self.assertEqual(
+                    len(rows),
+                    0,
+                    f"Background update did not remove all duplicate receipts from {table}",
+                )
+
+    def test_background_receipts_linearized_unique_index(self):
+        """Test that the background update to uniqueify non-thread receipts in
+        `receipts_linearized` works properly.
+        """
+        self._test_background_receipts_unique_index(
+            "receipts_linearized_unique_index",
+            "receipts_linearized_unique_index",
+            "receipts_linearized",
+            receipts={
+                (self.room_id, "m.read", self.user_id): [
+                    {"stream_id": 5, "event_id": "$some_event"},
+                    {"stream_id": 6, "event_id": "$some_event"},
+                ],
+                (self.other_room_id, "m.read", self.user_id): [
+                    {"stream_id": 7, "event_id": "$some_event"}
+                ],
+            },
+            expected_unique_receipts={
+                (self.room_id, "m.read", self.user_id): {"stream_id": 6},
+                (self.other_room_id, "m.read", self.user_id): {"stream_id": 7},
+            },
+        )
+
+    def test_background_receipts_graph_unique_index(self):
+        """Test that the background update to uniqueify non-thread receipts in
+        `receipts_graph` works properly.
+        """
+        self._test_background_receipts_unique_index(
+            "receipts_graph_unique_index",
+            "receipts_graph_unique_index",
+            "receipts_graph",
+            receipts={
+                (self.room_id, "m.read", self.user_id): [
+                    {
+                        "event_ids": '["$some_event"]',
+                    },
+                    {
+                        "event_ids": '["$some_event"]',
+                    },
+                ],
+                (self.other_room_id, "m.read", self.user_id): [
+                    {
+                        "event_ids": '["$some_event"]',
+                    }
+                ],
+            },
+            expected_unique_receipts={
+                (self.room_id, "m.read", self.user_id): None,
+                (self.other_room_id, "m.read", self.user_id): {
+                    "event_ids": '["$some_event"]'
+                },
+            },
+        )
-- 
cgit 1.5.1


From d8cc86eff484b6f570f55a5badb337080c6e4dcd Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 16 Nov 2022 10:25:24 -0500
Subject: Remove redundant types from comments. (#14412)

Remove type hints from comments which have been added
as Python type hints. This helps avoid drift between comments
and reality, as well as removing redundant information.

Also adds some missing type hints which were simple to fill in.
---
 changelog.d/14412.misc                             |  1 +
 synapse/api/errors.py                              |  2 +-
 synapse/config/logger.py                           |  5 ++-
 synapse/crypto/keyring.py                          |  9 +++--
 synapse/events/__init__.py                         |  3 +-
 synapse/federation/transport/client.py             | 11 +++---
 synapse/federation/transport/server/_base.py       |  4 +--
 synapse/handlers/e2e_keys.py                       |  2 +-
 synapse/handlers/e2e_room_keys.py                  |  5 +--
 synapse/handlers/federation.py                     |  4 +--
 synapse/handlers/identity.py                       |  2 +-
 synapse/handlers/oidc.py                           |  2 +-
 synapse/handlers/presence.py                       |  2 +-
 synapse/handlers/saml.py                           |  4 +--
 synapse/http/additional_resource.py                |  3 +-
 synapse/http/federation/matrix_federation_agent.py |  9 +++--
 synapse/http/matrixfederationclient.py             |  3 +-
 synapse/http/proxyagent.py                         | 20 +++++------
 synapse/http/server.py                             |  2 +-
 synapse/http/site.py                               |  2 +-
 synapse/logging/context.py                         | 39 +++++++++++-----------
 synapse/logging/opentracing.py                     |  4 +--
 synapse/module_api/__init__.py                     |  7 ++--
 synapse/replication/http/_base.py                  |  2 +-
 synapse/rest/admin/users.py                        |  5 +--
 synapse/rest/client/login.py                       |  2 +-
 synapse/rest/media/v1/media_repository.py          |  4 +--
 synapse/rest/media/v1/thumbnailer.py               |  4 +--
 synapse/server_notices/consent_server_notices.py   |  5 ++-
 .../resource_limits_server_notices.py              | 12 ++++---
 synapse/storage/controllers/persist_events.py      |  5 ++-
 synapse/storage/databases/main/devices.py          |  2 +-
 synapse/storage/databases/main/e2e_room_keys.py    |  8 ++---
 synapse/storage/databases/main/end_to_end_keys.py  |  7 ++--
 synapse/storage/databases/main/events.py           | 22 ++++++------
 synapse/storage/databases/main/events_worker.py    |  2 +-
 .../storage/databases/main/monthly_active_users.py |  8 ++---
 synapse/storage/databases/main/registration.py     |  6 ++--
 synapse/storage/databases/main/room.py             |  8 +++--
 synapse/storage/databases/main/user_directory.py   |  9 +++--
 synapse/types.py                                   |  4 +--
 synapse/util/async_helpers.py                      |  3 +-
 synapse/util/caches/__init__.py                    |  2 +-
 synapse/util/caches/deferred_cache.py              |  2 +-
 synapse/util/caches/dictionary_cache.py            |  9 ++---
 synapse/util/caches/expiringcache.py               |  2 +-
 synapse/util/caches/lrucache.py                    |  8 ++---
 synapse/util/ratelimitutils.py                     |  2 +-
 synapse/util/threepids.py                          |  2 +-
 synapse/util/wheel_timer.py                        |  4 +--
 tests/http/__init__.py                             |  7 ++--
 tests/replication/slave/storage/test_events.py     |  7 ++--
 tests/replication/test_multi_media_repo.py         | 14 ++++----
 .../test_resource_limits_server_notices.py         | 10 +++---
 tests/unittest.py                                  | 18 +++++-----
 55 files changed, 174 insertions(+), 176 deletions(-)
 create mode 100644 changelog.d/14412.misc

(limited to 'synapse')

diff --git a/changelog.d/14412.misc b/changelog.d/14412.misc
new file mode 100644
index 0000000000..4da061d461
--- /dev/null
+++ b/changelog.d/14412.misc
@@ -0,0 +1 @@
+Remove duplicated type information from type hints.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 400dd12aba..e2cfcea0f2 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -713,7 +713,7 @@ class HttpResponseException(CodeMessageException):
         set to the reason code from the HTTP response.
 
         Returns:
-            SynapseError:
+            The error converted to a SynapseError.
         """
         # try to parse the body as json, to get better errcode/msg, but
         # default to M_UNKNOWN with the HTTP status as the error text
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 94d1150415..5468b963a2 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -317,10 +317,9 @@ def setup_logging(
     Set up the logging subsystem.
 
     Args:
-        config (LoggingConfig | synapse.config.worker.WorkerConfig):
-            configuration data
+        config: configuration data
 
-        use_worker_options (bool): True to use the 'worker_log_config' option
+        use_worker_options: True to use the 'worker_log_config' option
             instead of 'log_config'.
 
         logBeginner: The Twisted logBeginner to use.
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index c88afb2986..dd9b8089ec 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -213,7 +213,7 @@ class Keyring:
 
     def verify_json_objects_for_server(
         self, server_and_json: Iterable[Tuple[str, dict, int]]
-    ) -> List[defer.Deferred]:
+    ) -> List["defer.Deferred[None]"]:
         """Bulk verifies signatures of json objects, bulk fetching keys as
         necessary.
 
@@ -226,10 +226,9 @@ class Keyring:
                 valid.
 
         Returns:
-            List<Deferred[None]>: for each input triplet, a deferred indicating success
-                or failure to verify each json object's signature for the given
-                server_name. The deferreds run their callbacks in the sentinel
-                logcontext.
+            For each input triplet, a deferred indicating success or failure to
+            verify each json object's signature for the given server_name. The
+            deferreds run their callbacks in the sentinel logcontext.
         """
         return [
             run_in_background(
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 030c3ca408..8aca9a3ab9 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -597,8 +597,7 @@ def _event_type_from_format_version(
         format_version: The event format version
 
     Returns:
-        type: A type that can be initialized as per the initializer of
-        `FrozenEvent`
+        A type that can be initialized as per the initializer of `FrozenEvent`
     """
 
     if format_version == EventFormatVersions.ROOM_V1_V2:
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index cd39d4d111..a3cfc701cd 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -280,12 +280,11 @@ class TransportLayerClient:
         Note that this does not append any events to any graphs.
 
         Args:
-            destination (str): address of remote homeserver
-            room_id (str): room to join/leave
-            user_id (str): user to be joined/left
-            membership (str): one of join/leave
-            params (dict[str, str|Iterable[str]]): Query parameters to include in the
-                request.
+            destination: address of remote homeserver
+            room_id: room to join/leave
+            user_id: user to be joined/left
+            membership: one of join/leave
+            params: Query parameters to include in the request.
 
         Returns:
             Succeeds when we get a 2xx HTTP response. The result
diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py
index 1db8009d6c..cdaf0d5de7 100644
--- a/synapse/federation/transport/server/_base.py
+++ b/synapse/federation/transport/server/_base.py
@@ -224,10 +224,10 @@ class BaseFederationServlet:
 
         With arguments:
 
-            origin (unicode|None): The authenticated server_name of the calling server,
+            origin (str|None): The authenticated server_name of the calling server,
                 unless REQUIRE_AUTH is set to False and authentication failed.
 
-            content (unicode|None): decoded json body of the request. None if the
+            content (str|None): decoded json body of the request. None if the
                 request was a GET.
 
             query (dict[bytes, list[bytes]]): Query params from the request. url-decoded
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index a9912c467d..bf1221f523 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -870,7 +870,7 @@ class E2eKeysHandler:
         - signatures of the user's master key by the user's devices.
 
         Args:
-            user_id (string): the user uploading the keys
+            user_id: the user uploading the keys
             signatures (dict[string, dict]): map of devices to signed keys
 
         Returns:
diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py
index 28dc08c22a..83f53ceb88 100644
--- a/synapse/handlers/e2e_room_keys.py
+++ b/synapse/handlers/e2e_room_keys.py
@@ -377,8 +377,9 @@ class E2eRoomKeysHandler:
         """Deletes a given version of the user's e2e_room_keys backup
 
         Args:
-            user_id(str): the user whose current backup version we're deleting
-            version(str): the version id of the backup being deleted
+            user_id: the user whose current backup version we're deleting
+            version: Optional. the version ID of the backup version we're deleting
+                If missing, we delete the current backup version info.
         Raises:
             NotFoundError: if this backup version doesn't exist
         """
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 5fc3b8bc8c..188f0956ef 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1596,8 +1596,8 @@ class FederationHandler:
         Fetch the complexity of a remote room over federation.
 
         Args:
-            remote_room_hosts (list[str]): The remote servers to ask.
-            room_id (str): The room ID to ask about.
+            remote_room_hosts: The remote servers to ask.
+            room_id: The room ID to ask about.
 
         Returns:
             Dict contains the complexity
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 93d09e9939..848e46eb9b 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -711,7 +711,7 @@ class IdentityHandler:
             inviter_display_name: The current display name of the
                 inviter.
             inviter_avatar_url: The URL of the inviter's avatar.
-            id_access_token (str): The access token to authenticate to the identity
+            id_access_token: The access token to authenticate to the identity
                 server with
 
         Returns:
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index 867973dcca..41c675f408 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -787,7 +787,7 @@ class OidcProvider:
                 Must include an ``access_token`` field.
 
         Returns:
-            UserInfo: an object representing the user.
+            an object representing the user.
         """
         logger.debug("Using the OAuth2 access_token to request userinfo")
         metadata = await self.load_metadata()
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 0066d63987..b7bc787636 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -201,7 +201,7 @@ class BasePresenceHandler(abc.ABC):
         """Get the current presence state for multiple users.
 
         Returns:
-            dict: `user_id` -> `UserPresenceState`
+            A mapping of `user_id` -> `UserPresenceState`
         """
         states = {}
         missing = []
diff --git a/synapse/handlers/saml.py b/synapse/handlers/saml.py
index 9602f0d0bb..874860d461 100644
--- a/synapse/handlers/saml.py
+++ b/synapse/handlers/saml.py
@@ -441,7 +441,7 @@ class DefaultSamlMappingProvider:
             client_redirect_url: where the client wants to redirect to
 
         Returns:
-            dict: A dict containing new user attributes. Possible keys:
+            A dict containing new user attributes. Possible keys:
                 * mxid_localpart (str): Required. The localpart of the user's mxid
                 * displayname (str): The displayname of the user
                 * emails (list[str]): Any emails for the user
@@ -483,7 +483,7 @@ class DefaultSamlMappingProvider:
         Args:
             config: A dictionary containing configuration options for this provider
         Returns:
-            SamlConfig: A custom config object for this module
+            A custom config object for this module
         """
         # Parse config options and use defaults where necessary
         mxid_source_attribute = config.get("mxid_source_attribute", "uid")
diff --git a/synapse/http/additional_resource.py b/synapse/http/additional_resource.py
index 6a9f6635d2..8729630581 100644
--- a/synapse/http/additional_resource.py
+++ b/synapse/http/additional_resource.py
@@ -45,8 +45,7 @@ class AdditionalResource(DirectServeJsonResource):
 
         Args:
             hs: homeserver
-            handler ((twisted.web.server.Request) -> twisted.internet.defer.Deferred):
-                function to be called to handle the request.
+            handler: function to be called to handle the request.
         """
         super().__init__()
         self._handler = handler
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 2f0177f1e2..0359231e7d 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -155,11 +155,10 @@ class MatrixFederationAgent:
                 a file for a file upload).  Or None if the request is to have
                 no body.
         Returns:
-            Deferred[twisted.web.iweb.IResponse]:
-                fires when the header of the response has been received (regardless of the
-                response status code). Fails if there is any problem which prevents that
-                response from being received (including problems that prevent the request
-                from being sent).
+            A deferred which fires when the header of the response has been received
+            (regardless of the response status code). Fails if there is any problem
+            which prevents that response from being received (including problems that
+            prevent the request from being sent).
         """
         # We use urlparse as that will set `port` to None if there is no
         # explicit port.
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 3c35b1d2c7..b92f1d3d1a 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -951,8 +951,7 @@ class MatrixFederationHttpClient:
 
             args: query params
         Returns:
-            dict|list: Succeeds when we get a 2xx HTTP response. The
-            result will be the decoded JSON body.
+            Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body.
 
         Raises:
             HttpResponseException: If we get an HTTP response code >= 300
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index 1f8227896f..18899bc6d1 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -34,7 +34,7 @@ from twisted.web.client import (
 )
 from twisted.web.error import SchemeNotSupported
 from twisted.web.http_headers import Headers
-from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS
+from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS, IResponse
 
 from synapse.http import redact_uri
 from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials
@@ -134,7 +134,7 @@ class ProxyAgent(_AgentBase):
         uri: bytes,
         headers: Optional[Headers] = None,
         bodyProducer: Optional[IBodyProducer] = None,
-    ) -> defer.Deferred:
+    ) -> "defer.Deferred[IResponse]":
         """
         Issue a request to the server indicated by the given uri.
 
@@ -157,17 +157,17 @@ class ProxyAgent(_AgentBase):
                 a file upload). Or, None if the request is to have no body.
 
         Returns:
-            Deferred[IResponse]: completes when the header of the response has
-                 been received (regardless of the response status code).
+            A deferred which completes when the header of the response has
+            been received (regardless of the response status code).
 
-                 Can fail with:
-                    SchemeNotSupported: if the uri is not http or https
+            Can fail with:
+                SchemeNotSupported: if the uri is not http or https
 
-                    twisted.internet.error.TimeoutError if the server we are connecting
-                        to (proxy or destination) does not accept a connection before
-                        connectTimeout.
+                twisted.internet.error.TimeoutError if the server we are connecting
+                    to (proxy or destination) does not accept a connection before
+                    connectTimeout.
 
-                    ... other things too.
+                ... other things too.
         """
         uri = uri.strip()
         if not _VALID_URI.match(uri):
diff --git a/synapse/http/server.py b/synapse/http/server.py
index b26e34bceb..051a1899a0 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -267,7 +267,7 @@ class HttpServer(Protocol):
                 request. The first argument will be the request object and
                 subsequent arguments will be any matched groups from the regex.
                 This should return either tuple of (code, response), or None.
-            servlet_classname (str): The name of the handler to be used in prometheus
+            servlet_classname: The name of the handler to be used in prometheus
                 and opentracing logs.
         """
 
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 3dbd541fed..6a1dbf7f33 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -400,7 +400,7 @@ class SynapseRequest(Request):
         be sure to call finished_processing.
 
         Args:
-            servlet_name (str): the name of the servlet which will be
+            servlet_name: the name of the servlet which will be
                 processing this request. This is used in the metrics.
 
                 It is possible to update this afterwards by updating
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 6a08ffed64..f62bea968f 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -117,8 +117,7 @@ class ContextResourceUsage:
         """Create a new ContextResourceUsage
 
         Args:
-            copy_from (ContextResourceUsage|None): if not None, an object to
-                copy stats from
+            copy_from: if not None, an object to copy stats from
         """
         if copy_from is None:
             self.reset()
@@ -162,7 +161,7 @@ class ContextResourceUsage:
         """Add another ContextResourceUsage's stats to this one's.
 
         Args:
-            other (ContextResourceUsage): the other resource usage object
+            other: the other resource usage object
         """
         self.ru_utime += other.ru_utime
         self.ru_stime += other.ru_stime
@@ -342,7 +341,7 @@ class LoggingContext:
         called directly.
 
         Returns:
-            LoggingContext: the current logging context
+            The current logging context
         """
         warnings.warn(
             "synapse.logging.context.LoggingContext.current_context() is deprecated "
@@ -362,7 +361,8 @@ class LoggingContext:
         called directly.
 
         Args:
-            context(LoggingContext): The context to activate.
+            context: The context to activate.
+
         Returns:
             The context that was previously active
         """
@@ -474,8 +474,7 @@ class LoggingContext:
         """Get resources used by this logcontext so far.
 
         Returns:
-            ContextResourceUsage: a *copy* of the object tracking resource
-                usage so far
+            A *copy* of the object tracking resource usage so far
         """
         # we always return a copy, for consistency
         res = self._resource_usage.copy()
@@ -663,7 +662,8 @@ def current_context() -> LoggingContextOrSentinel:
 def set_current_context(context: LoggingContextOrSentinel) -> LoggingContextOrSentinel:
     """Set the current logging context in thread local storage
     Args:
-        context(LoggingContext): The context to activate.
+        context: The context to activate.
+
     Returns:
         The context that was previously active
     """
@@ -700,7 +700,7 @@ def nested_logging_context(suffix: str) -> LoggingContext:
         suffix: suffix to add to the parent context's 'name'.
 
     Returns:
-        LoggingContext: new logging context.
+        A new logging context.
     """
     curr_context = current_context()
     if not curr_context:
@@ -898,20 +898,19 @@ def defer_to_thread(
     on it.
 
     Args:
-        reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
-            the Deferred will be invoked, and whose threadpool we should use for the
-            function.
+        reactor: The reactor in whose main thread the Deferred will be invoked,
+            and whose threadpool we should use for the function.
 
             Normally this will be hs.get_reactor().
 
-        f (callable): The function to call.
+        f: The function to call.
 
         args: positional arguments to pass to f.
 
         kwargs: keyword arguments to pass to f.
 
     Returns:
-        Deferred: A Deferred which fires a callback with the result of `f`, or an
+        A Deferred which fires a callback with the result of `f`, or an
             errback if `f` throws an exception.
     """
     return defer_to_threadpool(reactor, reactor.getThreadPool(), f, *args, **kwargs)
@@ -939,20 +938,20 @@ def defer_to_threadpool(
     on it.
 
     Args:
-        reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
-            the Deferred will be invoked. Normally this will be hs.get_reactor().
+        reactor: The reactor in whose main thread the Deferred will be invoked.
+            Normally this will be hs.get_reactor().
 
-        threadpool (twisted.python.threadpool.ThreadPool): The threadpool to use for
-            running `f`. Normally this will be hs.get_reactor().getThreadPool().
+        threadpool: The threadpool to use for running `f`. Normally this will be
+            hs.get_reactor().getThreadPool().
 
-        f (callable): The function to call.
+        f: The function to call.
 
         args: positional arguments to pass to f.
 
         kwargs: keyword arguments to pass to f.
 
     Returns:
-        Deferred: A Deferred which fires a callback with the result of `f`, or an
+        A Deferred which fires a callback with the result of `f`, or an
             errback if `f` throws an exception.
     """
     curr_context = current_context()
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 8ce5a2a338..b69060854f 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -721,7 +721,7 @@ def inject_header_dict(
         destination: address of entity receiving the span context. Must be given unless
             check_destination is False. The context will only be injected if the
             destination matches the opentracing whitelist
-        check_destination (bool): If false, destination will be ignored and the context
+        check_destination: If false, destination will be ignored and the context
             will always be injected.
 
     Note:
@@ -780,7 +780,7 @@ def get_active_span_text_map(destination: Optional[str] = None) -> Dict[str, str
         destination: the name of the remote server.
 
     Returns:
-        dict: the active span's context if opentracing is enabled, otherwise empty.
+        the active span's context if opentracing is enabled, otherwise empty.
     """
 
     if destination and not whitelisted_homeserver(destination):
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 30e689d00d..1adc1fd64f 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -787,7 +787,7 @@ class ModuleApi:
         Added in Synapse v0.25.0.
 
         Args:
-            access_token(str): access token
+            access_token: access token
 
         Returns:
             twisted.internet.defer.Deferred - resolves once the access token
@@ -832,7 +832,7 @@ class ModuleApi:
             **kwargs: named args to be passed to func
 
         Returns:
-            Deferred[object]: result of func
+            Result of func
         """
         # type-ignore: See https://github.com/python/mypy/issues/8862
         return defer.ensureDeferred(
@@ -924,8 +924,7 @@ class ModuleApi:
                 to represent 'any') of the room state to acquire.
 
         Returns:
-            twisted.internet.defer.Deferred[list(synapse.events.FrozenEvent)]:
-                The filtered state events in the room.
+            The filtered state events in the room.
         """
         state_ids = yield defer.ensureDeferred(
             self._storage_controllers.state.get_current_state_ids(
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index 5e661f8c73..3f4d3fc51a 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -153,7 +153,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
         argument list.
 
         Returns:
-            dict: If POST/PUT request then dictionary must be JSON serialisable,
+            If POST/PUT request then dictionary must be JSON serialisable,
             otherwise must be appropriate for adding as query args.
         """
         return {}
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 1951b8a9f2..6e0c44be2a 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -903,8 +903,9 @@ class PushersRestServlet(RestServlet):
         @user:server/pushers
 
     Returns:
-        pushers: Dictionary containing pushers information.
-        total: Number of pushers in dictionary `pushers`.
+        A dictionary with keys:
+            pushers: Dictionary containing pushers information.
+            total: Number of pushers in dictionary `pushers`.
     """
 
     PATTERNS = admin_patterns("/users/(?P<user_id>[^/]*)/pushers$")
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index 05706b598c..8adced41e5 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -350,7 +350,7 @@ class LoginRestServlet(RestServlet):
             auth_provider_session_id: The session ID got during login from the SSO IdP.
 
         Returns:
-            result: Dictionary of account information after successful login.
+            Dictionary of account information after successful login.
         """
 
         # Before we actually log them in we check if they've already logged in
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 328c0c5477..40b0d39eb2 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -344,8 +344,8 @@ class MediaRepository:
         download from remote server.
 
         Args:
-            server_name (str): Remote server_name where the media originated.
-            media_id (str): The media ID of the content (as defined by the
+            server_name: Remote server_name where the media originated.
+            media_id: The media ID of the content (as defined by the
                 remote server).
 
         Returns:
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index 9b93b9b4f6..a48a4de92a 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -138,7 +138,7 @@ class Thumbnailer:
         """Rescales the image to the given dimensions.
 
         Returns:
-            BytesIO: the bytes of the encoded image ready to be written to disk
+            The bytes of the encoded image ready to be written to disk
         """
         with self._resize(width, height) as scaled:
             return self._encode_image(scaled, output_type)
@@ -155,7 +155,7 @@ class Thumbnailer:
             max_height: The largest possible height.
 
         Returns:
-            BytesIO: the bytes of the encoded image ready to be written to disk
+            The bytes of the encoded image ready to be written to disk
         """
         if width * self.height > height * self.width:
             scaled_width = width
diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py
index 698ca742ed..94025ba41f 100644
--- a/synapse/server_notices/consent_server_notices.py
+++ b/synapse/server_notices/consent_server_notices.py
@@ -113,9 +113,8 @@ def copy_with_str_subst(x: Any, substitutions: Any) -> Any:
     """Deep-copy a structure, carrying out string substitutions on any strings
 
     Args:
-        x (object): structure to be copied
-        substitutions (object): substitutions to be made - passed into the
-            string '%' operator
+        x: structure to be copied
+        substitutions: substitutions to be made - passed into the string '%' operator
 
     Returns:
         copy of x
diff --git a/synapse/server_notices/resource_limits_server_notices.py b/synapse/server_notices/resource_limits_server_notices.py
index 3134cd2d3d..a31a2c99a7 100644
--- a/synapse/server_notices/resource_limits_server_notices.py
+++ b/synapse/server_notices/resource_limits_server_notices.py
@@ -170,11 +170,13 @@ class ResourceLimitsServerNotices:
             room_id: The room id of the server notices room
 
         Returns:
-            bool: Is the room currently blocked
-            list: The list of pinned event IDs that are unrelated to limit blocking
-            This list can be used as a convenience in the case where the block
-            is to be lifted and the remaining pinned event references need to be
-            preserved
+            Tuple of:
+                Is the room currently blocked
+
+                The list of pinned event IDs that are unrelated to limit blocking
+                This list can be used as a convenience in the case where the block
+                is to be lifted and the remaining pinned event references need to be
+                preserved
         """
         currently_blocked = False
         pinned_state_event = None
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 48976dc570..33ffef521b 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -204,9 +204,8 @@ class _EventPeristenceQueue(Generic[_PersistResult]):
         process to to so, calling the per_item_callback for each item.
 
         Args:
-            room_id (str):
-            task (_EventPersistQueueTask): A _PersistEventsTask or
-                _UpdateCurrentStateTask to process.
+            room_id:
+            task: A _PersistEventsTask or _UpdateCurrentStateTask to process.
 
         Returns:
             the result returned by the `_per_item_callback` passed to
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index aa58c2adc3..e114c733d1 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -535,7 +535,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             limit: Maximum number of device updates to return
 
         Returns:
-            List: List of device update tuples:
+            List of device update tuples:
                 - user_id
                 - device_id
                 - stream_id
diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py
index af59be6b48..6240f9a75e 100644
--- a/synapse/storage/databases/main/e2e_room_keys.py
+++ b/synapse/storage/databases/main/e2e_room_keys.py
@@ -391,10 +391,10 @@ class EndToEndRoomKeyStore(SQLBaseStore):
         Returns:
             A dict giving the info metadata for this backup version, with
             fields including:
-                version(str)
-                algorithm(str)
-                auth_data(object): opaque dict supplied by the client
-                etag(int): tag of the keys in the backup
+                version (str)
+                algorithm (str)
+                auth_data (object): opaque dict supplied by the client
+                etag (int): tag of the keys in the backup
         """
 
         def _get_e2e_room_keys_version_info_txn(txn: LoggingTransaction) -> JsonDict:
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 2a4f58ed92..cf33e73e2b 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -412,10 +412,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         """Retrieve a number of one-time keys for a user
 
         Args:
-            user_id(str): id of user to get keys for
-            device_id(str): id of device to get keys for
-            key_ids(list[str]): list of key ids (excluding algorithm) to
-                retrieve
+            user_id: id of user to get keys for
+            device_id: id of device to get keys for
+            key_ids: list of key ids (excluding algorithm) to retrieve
 
         Returns:
             A map from (algorithm, key_id) to json string for key
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index c4acff5be6..d68f127f9b 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1279,9 +1279,10 @@ class PersistEventsStore:
         Pick the earliest non-outlier if there is one, else the earliest one.
 
         Args:
-            events_and_contexts (list[(EventBase, EventContext)]):
+            events_and_contexts:
+
         Returns:
-            list[(EventBase, EventContext)]: filtered list
+            filtered list
         """
         new_events_and_contexts: OrderedDict[
             str, Tuple[EventBase, EventContext]
@@ -1307,9 +1308,8 @@ class PersistEventsStore:
         """Update min_depth for each room
 
         Args:
-            txn (twisted.enterprise.adbapi.Connection): db connection
-            events_and_contexts (list[(EventBase, EventContext)]): events
-                we are persisting
+            txn: db connection
+            events_and_contexts: events we are persisting
         """
         depth_updates: Dict[str, int] = {}
         for event, context in events_and_contexts:
@@ -1580,13 +1580,11 @@ class PersistEventsStore:
         """Update all the miscellaneous tables for new events
 
         Args:
-            txn (twisted.enterprise.adbapi.Connection): db connection
-            events_and_contexts (list[(EventBase, EventContext)]): events
-                we are persisting
-            all_events_and_contexts (list[(EventBase, EventContext)]): all
-                events that we were going to persist. This includes events
-                we've already persisted, etc, that wouldn't appear in
-                events_and_context.
+            txn: db connection
+            events_and_contexts: events we are persisting
+            all_events_and_contexts: all events that we were going to persist.
+                This includes events we've already persisted, etc, that wouldn't
+                appear in events_and_context.
             inhibit_local_membership_updates: Stop the local_current_membership
                 from being updated by these events. This should be set to True
                 for backfilled events because backfilled events in the past do
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 467d20253d..8a104f7e93 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1589,7 +1589,7 @@ class EventsWorkerStore(SQLBaseStore):
             room_id: The room ID to query.
 
         Returns:
-            dict[str:float] of complexity version to complexity.
+            Map of complexity version to complexity.
         """
         state_events = await self.get_current_state_event_counts(room_id)
 
diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py
index efd136a864..db9a24db5e 100644
--- a/synapse/storage/databases/main/monthly_active_users.py
+++ b/synapse/storage/databases/main/monthly_active_users.py
@@ -217,7 +217,7 @@ class MonthlyActiveUsersWorkerStore(RegistrationWorkerStore):
         def _reap_users(txn: LoggingTransaction, reserved_users: List[str]) -> None:
             """
             Args:
-                reserved_users (tuple): reserved users to preserve
+                reserved_users: reserved users to preserve
             """
 
             thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
@@ -370,8 +370,8 @@ class MonthlyActiveUsersWorkerStore(RegistrationWorkerStore):
         should not appear in the MAU stats).
 
         Args:
-            txn (cursor):
-            user_id (str): user to add/update
+            txn:
+            user_id: user to add/update
         """
         assert (
             self._update_on_this_worker
@@ -401,7 +401,7 @@ class MonthlyActiveUsersWorkerStore(RegistrationWorkerStore):
         add the user to the monthly active tables
 
         Args:
-            user_id(str): the user_id to query
+            user_id: the user_id to query
         """
         assert (
             self._update_on_this_worker
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 5167089e03..31f0f2bd3d 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -953,7 +953,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
         """Returns user id from threepid
 
         Args:
-            txn (cursor):
+            txn:
             medium: threepid medium e.g. email
             address: threepid address e.g. me@example.com
 
@@ -1283,8 +1283,8 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
         """Sets an expiration date to the account with the given user ID.
 
         Args:
-             user_id (str): User ID to set an expiration date for.
-             use_delta (bool): If set to False, the expiration date for the user will be
+             user_id: User ID to set an expiration date for.
+             use_delta: If set to False, the expiration date for the user will be
                 now + validity period. If set to True, this expiration date will be a
                 random value in the [now + period - d ; now + period] range, d being a
                 delta equal to 10% of the validity period.
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 7d97f8f60e..4fbaefad73 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -2057,7 +2057,8 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         Args:
             report_id: ID of reported event in database
         Returns:
-            event_report: json list of information from event report
+            JSON dict of information from an event report or None if the
+            report does not exist.
         """
 
         def _get_event_report_txn(
@@ -2130,8 +2131,9 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             user_id: search for user_id. Ignored if user_id is None
             room_id: search for room_id. Ignored if room_id is None
         Returns:
-            event_reports: json list of event reports
-            count: total number of event reports matching the filter criteria
+            Tuple of:
+                json list of event reports
+                total number of event reports matching the filter criteria
         """
 
         def _get_event_reports_paginate_txn(
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index ddb25b5cea..698d6f7515 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -185,9 +185,8 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
         - who should be in the user_directory.
 
         Args:
-            progress (dict)
-            batch_size (int): Maximum number of state events to process
-                per cycle.
+            progress
+            batch_size: Maximum number of state events to process per cycle.
 
         Returns:
             number of events processed.
@@ -708,10 +707,10 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
         Returns the rooms that a user is in.
 
         Args:
-            user_id(str): Must be a local user
+            user_id: Must be a local user
 
         Returns:
-            list: user_id
+            List of room IDs
         """
         rows = await self.db_pool.simple_select_onecol(
             table="users_who_share_private_rooms",
diff --git a/synapse/types.py b/synapse/types.py
index 773f0438d5..f2d436ddc3 100644
--- a/synapse/types.py
+++ b/synapse/types.py
@@ -143,8 +143,8 @@ class Requester:
         Requester.
 
         Args:
-            store (DataStore): Used to convert AS ID to AS object
-            input (dict): A dict produced by `serialize`
+            store: Used to convert AS ID to AS object
+            input: A dict produced by `serialize`
 
         Returns:
             Requester
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 7f1d41eb3c..d24c4f68c4 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -217,7 +217,8 @@ async def concurrently_execute(
         limit: Maximum number of conccurent executions.
 
     Returns:
-        Deferred: Resolved when all function invocations have finished.
+        None, when all function invocations have finished. The return values
+        from those functions are discarded.
     """
     it = iter(args)
 
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index f7c3a6794e..9387632d0d 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -197,7 +197,7 @@ def register_cache(
         resize_callback: A function which can be called to resize the cache.
 
     Returns:
-        CacheMetric: an object which provides inc_{hits,misses,evictions} methods
+        an object which provides inc_{hits,misses,evictions} methods
     """
     if resizable:
         if not resize_callback:
diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py
index bcb1cba362..bf7bd351e0 100644
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@@ -153,7 +153,7 @@ class DeferredCache(Generic[KT, VT]):
         Args:
             key:
             callback: Gets called when the entry in the cache is invalidated
-            update_metrics (bool): whether to update the cache hit rate metrics
+            update_metrics: whether to update the cache hit rate metrics
 
         Returns:
             A Deferred which completes with the result. Note that this may later fail
diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py
index fa91479c97..5eaf70c7ab 100644
--- a/synapse/util/caches/dictionary_cache.py
+++ b/synapse/util/caches/dictionary_cache.py
@@ -169,10 +169,11 @@ class DictionaryCache(Generic[KT, DKT, DV]):
                 if it is in the cache.
 
         Returns:
-            DictionaryEntry: If `dict_keys` is not None then `DictionaryEntry`
-            will contain include the keys that are in the cache. If None then
-            will either return the full dict if in the cache, or the empty
-            dict (with `full` set to False) if it isn't.
+            If `dict_keys` is not None then `DictionaryEntry` will contain include
+            the keys that are in the cache.
+
+            If None then will either return the full dict if in the cache, or the
+            empty dict (with `full` set to False) if it isn't.
         """
         if dict_keys is None:
             # The caller wants the full set of dictionary keys for this cache key
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index c6a5d0dfc0..01ad02af67 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -207,7 +207,7 @@ class ExpiringCache(Generic[KT, VT]):
         items from the cache.
 
         Returns:
-            bool: Whether the cache changed size or not.
+            Whether the cache changed size or not.
         """
         new_size = int(self._original_max_size * factor)
         if new_size != self._max_size:
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index aa93109d13..dcf0eac3bf 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -389,11 +389,11 @@ class LruCache(Generic[KT, VT]):
             cache_name: The name of this cache, for the prometheus metrics. If unset,
                 no metrics will be reported on this cache.
 
-            cache_type (type):
+            cache_type:
                 type of underlying cache to be used. Typically one of dict
                 or TreeCache.
 
-            size_callback (func(V) -> int | None):
+            size_callback:
 
             metrics_collection_callback:
                 metrics collection callback. This is called early in the metrics
@@ -403,7 +403,7 @@ class LruCache(Generic[KT, VT]):
 
                 Ignored if cache_name is None.
 
-            apply_cache_factor_from_config (bool): If true, `max_size` will be
+            apply_cache_factor_from_config: If true, `max_size` will be
                 multiplied by a cache factor derived from the homeserver config
 
             clock:
@@ -796,7 +796,7 @@ class LruCache(Generic[KT, VT]):
         items from the cache.
 
         Returns:
-            bool: Whether the cache changed size or not.
+            Whether the cache changed size or not.
         """
         if not self.apply_cache_factor_from_config:
             return False
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index 9f64fed0d7..2aceb1a47f 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -183,7 +183,7 @@ class FederationRateLimiter:
                 # Handle request ...
 
         Args:
-            host (str): Origin of incoming request.
+            host: Origin of incoming request.
 
         Returns:
             context manager which returns a deferred.
diff --git a/synapse/util/threepids.py b/synapse/util/threepids.py
index 1e9c2faa64..54bc7589fd 100644
--- a/synapse/util/threepids.py
+++ b/synapse/util/threepids.py
@@ -48,7 +48,7 @@ async def check_3pid_allowed(
         registration: whether we want to bind the 3PID as part of registering a new user.
 
     Returns:
-        bool: whether the 3PID medium/address is allowed to be added to this HS
+        whether the 3PID medium/address is allowed to be added to this HS
     """
     if not await hs.get_password_auth_provider().is_3pid_allowed(
         medium, address, registration
diff --git a/synapse/util/wheel_timer.py b/synapse/util/wheel_timer.py
index 177e198e7e..b1ec7f4bd8 100644
--- a/synapse/util/wheel_timer.py
+++ b/synapse/util/wheel_timer.py
@@ -90,10 +90,10 @@ class WheelTimer(Generic[T]):
         """Fetch any objects that have timed out
 
         Args:
-            now (ms): Current time in msec
+            now: Current time in msec
 
         Returns:
-            list: List of objects that have timed out
+            List of objects that have timed out
         """
         now_key = int(now / self.bucket_size)
 
diff --git a/tests/http/__init__.py b/tests/http/__init__.py
index e74f7f5b48..093537adef 100644
--- a/tests/http/__init__.py
+++ b/tests/http/__init__.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import os.path
 import subprocess
+from typing import List
 
 from zope.interface import implementer
 
@@ -70,14 +71,14 @@ subjectAltName = %(sanentries)s
 """
 
 
-def create_test_cert_file(sanlist):
+def create_test_cert_file(sanlist: List[bytes]) -> str:
     """build an x509 certificate file
 
     Args:
-        sanlist: list[bytes]: a list of subjectAltName values for the cert
+        sanlist: a list of subjectAltName values for the cert
 
     Returns:
-        str: the path to the file
+        The path to the file
     """
     global cert_file_count
     csr_filename = "server.csr"
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
index 96f3880923..dce71f7334 100644
--- a/tests/replication/slave/storage/test_events.py
+++ b/tests/replication/slave/storage/test_events.py
@@ -143,6 +143,7 @@ class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase):
         self.persist(type="m.room.create", key="", creator=USER_ID)
         self.check("get_invited_rooms_for_local_user", [USER_ID_2], [])
         event = self.persist(type="m.room.member", key=USER_ID_2, membership="invite")
+        assert event.internal_metadata.stream_ordering is not None
 
         self.replicate()
 
@@ -230,6 +231,7 @@ class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase):
         j2 = self.persist(
             type="m.room.member", sender=USER_ID_2, key=USER_ID_2, membership="join"
         )
+        assert j2.internal_metadata.stream_ordering is not None
         self.replicate()
 
         expected_pos = PersistedEventPosition(
@@ -287,6 +289,7 @@ class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase):
             )
         )
         self.replicate()
+        assert j2.internal_metadata.stream_ordering is not None
 
         event_source = RoomEventSource(self.hs)
         event_source.store = self.slaved_store
@@ -336,10 +339,10 @@ class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase):
 
     event_id = 0
 
-    def persist(self, backfill=False, **kwargs):
+    def persist(self, backfill=False, **kwargs) -> FrozenEvent:
         """
         Returns:
-            synapse.events.FrozenEvent: The event that was persisted.
+            The event that was persisted.
         """
         event, context = self.build_event(**kwargs)
 
diff --git a/tests/replication/test_multi_media_repo.py b/tests/replication/test_multi_media_repo.py
index 13aa5eb51a..96cdf2c45b 100644
--- a/tests/replication/test_multi_media_repo.py
+++ b/tests/replication/test_multi_media_repo.py
@@ -15,8 +15,9 @@ import logging
 import os
 from typing import Optional, Tuple
 
+from twisted.internet.interfaces import IOpenSSLServerConnectionCreator
 from twisted.internet.protocol import Factory
-from twisted.protocols.tls import TLSMemoryBIOFactory
+from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol
 from twisted.web.http import HTTPChannel
 from twisted.web.server import Request
 
@@ -102,7 +103,7 @@ class MediaRepoShardTestCase(BaseMultiWorkerStreamTestCase):
         )
 
         # fish the test server back out of the server-side TLS protocol.
-        http_server = server_tls_protocol.wrappedProtocol
+        http_server: HTTPChannel = server_tls_protocol.wrappedProtocol  # type: ignore[assignment]
 
         # give the reactor a pump to get the TLS juices flowing.
         self.reactor.pump((0.1,))
@@ -238,16 +239,15 @@ def get_connection_factory():
     return test_server_connection_factory
 
 
-def _build_test_server(connection_creator):
+def _build_test_server(
+    connection_creator: IOpenSSLServerConnectionCreator,
+) -> TLSMemoryBIOProtocol:
     """Construct a test server
 
     This builds an HTTP channel, wrapped with a TLSMemoryBIOProtocol
 
     Args:
-        connection_creator (IOpenSSLServerConnectionCreator): thing to build
-            SSL connections
-        sanlist (list[bytes]): list of the SAN entries for the cert returned
-            by the server
+        connection_creator: thing to build SSL connections
 
     Returns:
         TLSMemoryBIOProtocol
diff --git a/tests/server_notices/test_resource_limits_server_notices.py b/tests/server_notices/test_resource_limits_server_notices.py
index bf403045e9..7cbc40736c 100644
--- a/tests/server_notices/test_resource_limits_server_notices.py
+++ b/tests/server_notices/test_resource_limits_server_notices.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Tuple
 from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -350,14 +351,15 @@ class TestResourceLimitsServerNoticesWithRealRooms(unittest.HomeserverTestCase):
 
         self.assertTrue(notice_in_room, "No server notice in room")
 
-    def _trigger_notice_and_join(self):
+    def _trigger_notice_and_join(self) -> Tuple[str, str, str]:
         """Creates enough active users to hit the MAU limit and trigger a system notice
         about it, then joins the system notices room with one of the users created.
 
         Returns:
-            user_id (str): The ID of the user that joined the room.
-            tok (str): The access token of the user that joined the room.
-            room_id (str): The ID of the room that's been joined.
+            A tuple of:
+                user_id: The ID of the user that joined the room.
+                tok: The access token of the user that joined the room.
+                room_id: The ID of the room that's been joined.
         """
         user_id = None
         tok = None
diff --git a/tests/unittest.py b/tests/unittest.py
index 5116be338e..a120c2976c 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -360,13 +360,13 @@ class HomeserverTestCase(TestCase):
                 store.db_pool.updates.do_next_background_update(False), by=0.1
             )
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock):
         """
         Make and return a homeserver.
 
         Args:
             reactor: A Twisted Reactor, or something that pretends to be one.
-            clock (synapse.util.Clock): The Clock, associated with the reactor.
+            clock: The Clock, associated with the reactor.
 
         Returns:
             A homeserver suitable for testing.
@@ -426,9 +426,8 @@ class HomeserverTestCase(TestCase):
 
         Args:
             reactor: A Twisted Reactor, or something that pretends to be one.
-            clock (synapse.util.Clock): The Clock, associated with the reactor.
-            homeserver (synapse.server.HomeServer): The HomeServer to test
-            against.
+            clock: The Clock, associated with the reactor.
+            homeserver: The HomeServer to test against.
 
         Function to optionally be overridden in subclasses.
         """
@@ -452,11 +451,10 @@ class HomeserverTestCase(TestCase):
         given content.
 
         Args:
-            method (bytes/unicode): The HTTP request method ("verb").
-            path (bytes/unicode): The HTTP path, suitably URL encoded (e.g.
-            escaped UTF-8 & spaces and such).
-            content (bytes or dict): The body of the request. JSON-encoded, if
-            a dict.
+            method: The HTTP request method ("verb").
+            path: The HTTP path, suitably URL encoded (e.g. escaped UTF-8 & spaces
+                and such). content (bytes or dict): The body of the request.
+                JSON-encoded, if a dict.
             shorthand: Whether to try and be helpful and prefix the given URL
             with the usual REST API path, if it doesn't contain it.
             federation_auth_origin: if set to not-None, we will add a fake
-- 
cgit 1.5.1


From 618e4ab81b70e37bdb8e9224bd84fcfe4b15bdea Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Wed, 16 Nov 2022 15:25:35 +0000
Subject: Fix an invalid comparison of `UserPresenceState` to `str` (#14393)

---
 changelog.d/14393.bugfix        |  1 +
 synapse/handlers/presence.py    |  2 +-
 tests/handlers/test_presence.py | 41 +++++++++++++++++++++++++++++++++++------
 tests/module_api/test_api.py    |  3 +++
 tests/replication/_base.py      |  7 ++++++-
 5 files changed, 46 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/14393.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14393.bugfix b/changelog.d/14393.bugfix
new file mode 100644
index 0000000000..97177bc62f
--- /dev/null
+++ b/changelog.d/14393.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.58.0 where a user with presence state 'org.matrix.msc3026.busy' would mistakenly be set to 'online' when calling `/sync` or `/events` on a worker process.
\ No newline at end of file
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index b7bc787636..cf08737d11 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -478,7 +478,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
             return _NullContextManager()
 
         prev_state = await self.current_state_for_user(user_id)
-        if prev_state != PresenceState.BUSY:
+        if prev_state.state != PresenceState.BUSY:
             # We set state here but pass ignore_status_msg = True as we don't want to
             # cause the status message to be cleared.
             # Note that this causes last_active_ts to be incremented which is not
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index c96dc6caf2..c5981ff965 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -15,6 +15,7 @@
 from typing import Optional
 from unittest.mock import Mock, call
 
+from parameterized import parameterized
 from signedjson.key import generate_signing_key
 
 from synapse.api.constants import EventTypes, Membership, PresenceState
@@ -37,6 +38,7 @@ from synapse.rest.client import room
 from synapse.types import UserID, get_domain_from_id
 
 from tests import unittest
+from tests.replication._base import BaseMultiWorkerStreamTestCase
 
 
 class PresenceUpdateTestCase(unittest.HomeserverTestCase):
@@ -505,7 +507,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         self.assertEqual(state, new_state)
 
 
-class PresenceHandlerTestCase(unittest.HomeserverTestCase):
+class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
     def prepare(self, reactor, clock, hs):
         self.presence_handler = hs.get_presence_handler()
         self.clock = hs.get_clock()
@@ -716,20 +718,47 @@ class PresenceHandlerTestCase(unittest.HomeserverTestCase):
         # our status message should be the same as it was before
         self.assertEqual(state.status_msg, status_msg)
 
-    def test_set_presence_from_syncing_keeps_busy(self):
-        """Test that presence set by syncing doesn't affect busy status"""
-        # while this isn't the default
-        self.presence_handler._busy_presence_enabled = True
+    @parameterized.expand([(False,), (True,)])
+    @unittest.override_config(
+        {
+            "experimental_features": {
+                "msc3026_enabled": True,
+            },
+        }
+    )
+    def test_set_presence_from_syncing_keeps_busy(self, test_with_workers: bool):
+        """Test that presence set by syncing doesn't affect busy status
 
+        Args:
+            test_with_workers: If True, check the presence state of the user by calling
+                /sync against a worker, rather than the main process.
+        """
         user_id = "@test:server"
         status_msg = "I'm busy!"
 
+        # By default, we call /sync against the main process.
+        worker_to_sync_against = self.hs
+        if test_with_workers:
+            # Create a worker and use it to handle /sync traffic instead.
+            # This is used to test that presence changes get replicated from workers
+            # to the main process correctly.
+            worker_to_sync_against = self.make_worker_hs(
+                "synapse.app.generic_worker", {"worker_name": "presence_writer"}
+            )
+
+        # Set presence to BUSY
         self._set_presencestate_with_status_msg(user_id, PresenceState.BUSY, status_msg)
 
+        # Perform a sync with a presence state other than busy. This should NOT change
+        # our presence status; we only change from busy if we explicitly set it via
+        # /presence/*.
         self.get_success(
-            self.presence_handler.user_syncing(user_id, True, PresenceState.ONLINE)
+            worker_to_sync_against.get_presence_handler().user_syncing(
+                user_id, True, PresenceState.ONLINE
+            )
         )
 
+        # Check against the main process that the user's presence did not change.
         state = self.get_success(
             self.presence_handler.get_state(UserID.from_string(user_id))
         )
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index 02cef6f876..058ca57e55 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -778,8 +778,11 @@ def _test_sending_local_online_presence_to_local_user(
             worker process. The test users will still sync with the main process. The purpose of testing
             with a worker is to check whether a Synapse module running on a worker can inform other workers/
             the main process that they should include additional presence when a user next syncs.
+            If this argument is True, `test_case` MUST be an instance of BaseMultiWorkerStreamTestCase.
     """
     if test_with_workers:
+        assert isinstance(test_case, BaseMultiWorkerStreamTestCase)
+
         # Create a worker process to make module_api calls against
         worker_hs = test_case.make_worker_hs(
             "synapse.app.generic_worker", {"worker_name": "presence_writer"}
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index 121f3d8d65..3029a16dda 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -542,8 +542,13 @@ class FakeRedisPubSubProtocol(Protocol):
             self.send("OK")
         elif command == b"GET":
             self.send(None)
+
+        # Connection keep-alives.
+        elif command == b"PING":
+            self.send("PONG")
+
         else:
-            raise Exception("Unknown command")
+            raise Exception(f"Unknown command: {command}")
 
     def send(self, msg):
         """Send a message back to the client."""
-- 
cgit 1.5.1


From c15e9a0edb696990365ac5a4e5be847b5ae23921 Mon Sep 17 00:00:00 2001
From: realtyem <realtyem@gmail.com>
Date: Wed, 16 Nov 2022 16:16:25 -0600
Subject: Remove need for `worker_main_http_uri` setting to use /keys/upload.
 (#14400)

---
 changelog.d/14400.misc                |   1 +
 docker/configure_workers_and_start.py |   5 +-
 docs/workers.md                       |   7 +--
 synapse/app/generic_worker.py         | 103 +---------------------------------
 synapse/config/workers.py             |   6 ++
 synapse/replication/http/devices.py   |  67 ++++++++++++++++++++++
 synapse/rest/client/keys.py           |  68 ++++++++++++++++------
 7 files changed, 130 insertions(+), 127 deletions(-)
 create mode 100644 changelog.d/14400.misc

(limited to 'synapse')

diff --git a/changelog.d/14400.misc b/changelog.d/14400.misc
new file mode 100644
index 0000000000..6e025329c4
--- /dev/null
+++ b/changelog.d/14400.misc
@@ -0,0 +1 @@
+Remove the `worker_main_http_uri` configuration setting. This is now handled via internal replication.
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 62b1bab297..c1e1544536 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -213,10 +213,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
         "listener_resources": ["client", "replication"],
         "endpoint_patterns": ["^/_matrix/client/(api/v1|r0|v3|unstable)/keys/upload"],
         "shared_extra_conf": {},
-        "worker_extra_conf": (
-            "worker_main_http_uri: http://127.0.0.1:%d"
-            % (MAIN_PROCESS_HTTP_LISTENER_PORT,)
-        ),
+        "worker_extra_conf": "",
     },
     "account_data": {
         "app": "synapse.app.generic_worker",
diff --git a/docs/workers.md b/docs/workers.md
index 7ee8801161..4604650803 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -135,8 +135,8 @@ In the config file for each worker, you must specify:
    [`worker_replication_http_port`](usage/configuration/config_documentation.md#worker_replication_http_port)).
  * If handling HTTP requests, a [`worker_listeners`](usage/configuration/config_documentation.md#worker_listeners) option
    with an `http` listener.
- * If handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for
-   the main process (`worker_main_http_uri`).
+ * **Synapse 1.71 and older:** if handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for
+   the main process (`worker_main_http_uri`). This config option is no longer required and is ignored when running Synapse 1.72 and newer.
 
 For example:
 
@@ -221,7 +221,6 @@ information.
     ^/_matrix/client/(api/v1|r0|v3|unstable)/search$
 
     # Encryption requests
-    # Note that ^/_matrix/client/(r0|v3|unstable)/keys/upload/ requires `worker_main_http_uri`
     ^/_matrix/client/(r0|v3|unstable)/keys/query$
     ^/_matrix/client/(r0|v3|unstable)/keys/changes$
     ^/_matrix/client/(r0|v3|unstable)/keys/claim$
@@ -376,7 +375,7 @@ responsible for
 - persisting them to the DB, and finally
 - updating the events stream.
 
-Because load is sharded in this way, you *must* restart all worker instances when 
+Because load is sharded in this way, you *must* restart all worker instances when
 adding or removing event persisters.
 
 An `event_persister` should not be mistaken for an `event_creator`.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 1d9aef45c2..74909b7d4a 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -14,14 +14,12 @@
 # limitations under the License.
 import logging
 import sys
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List
 
-from twisted.internet import address
 from twisted.web.resource import Resource
 
 import synapse
 import synapse.events
-from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError
 from synapse.api.urls import (
     CLIENT_API_PREFIX,
     FEDERATION_PREFIX,
@@ -43,8 +41,6 @@ from synapse.config.logger import setup_logging
 from synapse.config.server import ListenerConfig
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.http.server import JsonResource, OptionsResource
-from synapse.http.servlet import RestServlet, parse_json_object_from_request
-from synapse.http.site import SynapseRequest
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
@@ -70,12 +66,12 @@ from synapse.rest.client import (
     versions,
     voip,
 )
-from synapse.rest.client._base import client_patterns
 from synapse.rest.client.account import ThreepidRestServlet, WhoamiRestServlet
 from synapse.rest.client.devices import DevicesRestServlet
 from synapse.rest.client.keys import (
     KeyChangesServlet,
     KeyQueryServlet,
+    KeyUploadServlet,
     OneTimeKeyServlet,
 )
 from synapse.rest.client.register import (
@@ -132,107 +128,12 @@ from synapse.storage.databases.main.transactions import TransactionWorkerStore
 from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore
 from synapse.storage.databases.main.user_directory import UserDirectoryStore
 from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
-from synapse.types import JsonDict
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.httpresourcetree import create_resource_tree
 
 logger = logging.getLogger("synapse.app.generic_worker")
 
 
-class KeyUploadServlet(RestServlet):
-    """An implementation of the `KeyUploadServlet` that responds to read only
-    requests, but otherwise proxies through to the master instance.
-    """
-
-    PATTERNS = client_patterns("/keys/upload(/(?P<device_id>[^/]+))?$")
-
-    def __init__(self, hs: HomeServer):
-        """
-        Args:
-            hs: server
-        """
-        super().__init__()
-        self.auth = hs.get_auth()
-        self.store = hs.get_datastores().main
-        self.http_client = hs.get_simple_http_client()
-        self.main_uri = hs.config.worker.worker_main_http_uri
-
-    async def on_POST(
-        self, request: SynapseRequest, device_id: Optional[str]
-    ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_guest=True)
-        user_id = requester.user.to_string()
-        body = parse_json_object_from_request(request)
-
-        if device_id is not None:
-            # passing the device_id here is deprecated; however, we allow it
-            # for now for compatibility with older clients.
-            if requester.device_id is not None and device_id != requester.device_id:
-                logger.warning(
-                    "Client uploading keys for a different device "
-                    "(logged in as %s, uploading for %s)",
-                    requester.device_id,
-                    device_id,
-                )
-        else:
-            device_id = requester.device_id
-
-        if device_id is None:
-            raise SynapseError(
-                400, "To upload keys, you must pass device_id when authenticating"
-            )
-
-        if body:
-            # They're actually trying to upload something, proxy to main synapse.
-
-            # Proxy headers from the original request, such as the auth headers
-            # (in case the access token is there) and the original IP /
-            # User-Agent of the request.
-            headers: Dict[bytes, List[bytes]] = {
-                header: list(request.requestHeaders.getRawHeaders(header, []))
-                for header in (b"Authorization", b"User-Agent")
-            }
-            # Add the previous hop to the X-Forwarded-For header.
-            x_forwarded_for = list(
-                request.requestHeaders.getRawHeaders(b"X-Forwarded-For", [])
-            )
-            # we use request.client here, since we want the previous hop, not the
-            # original client (as returned by request.getClientAddress()).
-            if isinstance(request.client, (address.IPv4Address, address.IPv6Address)):
-                previous_host = request.client.host.encode("ascii")
-                # If the header exists, add to the comma-separated list of the first
-                # instance of the header. Otherwise, generate a new header.
-                if x_forwarded_for:
-                    x_forwarded_for = [x_forwarded_for[0] + b", " + previous_host]
-                    x_forwarded_for.extend(x_forwarded_for[1:])
-                else:
-                    x_forwarded_for = [previous_host]
-            headers[b"X-Forwarded-For"] = x_forwarded_for
-
-            # Replicate the original X-Forwarded-Proto header. Note that
-            # XForwardedForRequest overrides isSecure() to give us the original protocol
-            # used by the client, as opposed to the protocol used by our upstream proxy
-            # - which is what we want here.
-            headers[b"X-Forwarded-Proto"] = [
-                b"https" if request.isSecure() else b"http"
-            ]
-
-            try:
-                result = await self.http_client.post_json_get_json(
-                    self.main_uri + request.uri.decode("ascii"), body, headers=headers
-                )
-            except HttpResponseException as e:
-                raise e.to_synapse_error() from e
-            except RequestSendFailed as e:
-                raise SynapseError(502, "Failed to talk to master") from e
-
-            return 200, result
-        else:
-            # Just interested in counts.
-            result = await self.store.count_e2e_one_time_keys(user_id, device_id)
-            return 200, {"one_time_key_counts": result}
-
-
 class GenericWorkerSlavedStore(
     # FIXME(#3714): We need to add UserDirectoryStore as we write directly
     # rather than going via the correct worker.
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 88b3168cbc..c4e2273a95 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -162,7 +162,13 @@ class WorkerConfig(Config):
         self.worker_name = config.get("worker_name", self.worker_app)
         self.instance_name = self.worker_name or "master"
 
+        # FIXME: Remove this check after a suitable amount of time.
         self.worker_main_http_uri = config.get("worker_main_http_uri", None)
+        if self.worker_main_http_uri is not None:
+            logger.warning(
+                "The config option worker_main_http_uri is unused since Synapse 1.72. "
+                "It can be safely removed from your configuration."
+            )
 
         # This option is really only here to support `--manhole` command line
         # argument.
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index 3d63645726..c21629def8 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Tuple
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
+from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
 
@@ -78,5 +79,71 @@ class ReplicationUserDevicesResyncRestServlet(ReplicationEndpoint):
         return 200, user_devices
 
 
+class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
+    """Ask master to upload keys for the user and send them out over federation to
+    update other servers.
+
+    For now, only the master is permitted to handle key upload requests;
+    any worker can handle key query requests (since they're read-only).
+
+    Calls to e2e_keys_handler.upload_keys_for_user(user_id, device_id, keys) on
+    the main process to accomplish this.
+
+    Defined in https://spec.matrix.org/v1.4/client-server-api/#post_matrixclientv3keysupload
+    Request format(borrowed and expanded from KeyUploadServlet):
+
+        POST /_synapse/replication/upload_keys_for_user
+
+    {
+        "user_id": "<user_id>",
+        "device_id": "<device_id>",
+        "keys": {
+            ....this part can be found in KeyUploadServlet in rest/client/keys.py....
+        }
+    }
+
+    Response is equivalent to ` /_matrix/client/v3/keys/upload` found in KeyUploadServlet
+
+    """
+
+    NAME = "upload_keys_for_user"
+    PATH_ARGS = ()
+    CACHE = False
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        self.e2e_keys_handler = hs.get_e2e_keys_handler()
+        self.store = hs.get_datastores().main
+        self.clock = hs.get_clock()
+
+    @staticmethod
+    async def _serialize_payload(  # type: ignore[override]
+        user_id: str, device_id: str, keys: JsonDict
+    ) -> JsonDict:
+
+        return {
+            "user_id": user_id,
+            "device_id": device_id,
+            "keys": keys,
+        }
+
+    async def _handle_request(  # type: ignore[override]
+        self, request: Request
+    ) -> Tuple[int, JsonDict]:
+        content = parse_json_object_from_request(request)
+
+        user_id = content["user_id"]
+        device_id = content["device_id"]
+        keys = content["keys"]
+
+        results = await self.e2e_keys_handler.upload_keys_for_user(
+            user_id, device_id, keys
+        )
+
+        return 200, results
+
+
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ReplicationUserDevicesResyncRestServlet(hs).register(http_server)
+    ReplicationUploadKeysForUserRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index f653d2a3e1..ee038c7192 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -27,6 +27,7 @@ from synapse.http.servlet import (
 )
 from synapse.http.site import SynapseRequest
 from synapse.logging.opentracing import log_kv, set_tag
+from synapse.replication.http.devices import ReplicationUploadKeysForUserRestServlet
 from synapse.rest.client._base import client_patterns, interactive_auth_handler
 from synapse.types import JsonDict, StreamToken
 from synapse.util.cancellation import cancellable
@@ -43,24 +44,48 @@ class KeyUploadServlet(RestServlet):
     Content-Type: application/json
 
     {
-      "device_keys": {
-        "user_id": "<user_id>",
-        "device_id": "<device_id>",
-        "valid_until_ts": <millisecond_timestamp>,
-        "algorithms": [
-          "m.olm.curve25519-aes-sha2",
-        ]
-        "keys": {
-          "<algorithm>:<device_id>": "<key_base64>",
+        "device_keys": {
+            "user_id": "<user_id>",
+            "device_id": "<device_id>",
+            "valid_until_ts": <millisecond_timestamp>,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+            ]
+            "keys": {
+                "<algorithm>:<device_id>": "<key_base64>",
+            },
+            "signatures:" {
+                "<user_id>" {
+                    "<algorithm>:<device_id>": "<signature_base64>"
+                }
+            }
+        },
+        "fallback_keys": {
+            "<algorithm>:<device_id>": "<key_base64>",
+            "signed_<algorithm>:<device_id>": {
+                "fallback": true,
+                "key": "<key_base64>",
+                "signatures": {
+                    "<user_id>": {
+                        "<algorithm>:<device_id>": "<key_base64>"
+                    }
+                }
+            }
+        }
+        "one_time_keys": {
+            "<algorithm>:<key_id>": "<key_base64>"
         },
-        "signatures:" {
-          "<user_id>" {
-            "<algorithm>:<device_id>": "<signature_base64>"
-      } } },
-      "one_time_keys": {
-        "<algorithm>:<key_id>": "<key_base64>"
-      },
     }
+
+    response, e.g.:
+
+    {
+        "one_time_key_counts": {
+            "curve25519": 10,
+            "signed_curve25519": 20
+        }
+    }
+
     """
 
     PATTERNS = client_patterns("/keys/upload(/(?P<device_id>[^/]+))?$")
@@ -71,6 +96,13 @@ class KeyUploadServlet(RestServlet):
         self.e2e_keys_handler = hs.get_e2e_keys_handler()
         self.device_handler = hs.get_device_handler()
 
+        if hs.config.worker.worker_app is None:
+            # if main process
+            self.key_uploader = self.e2e_keys_handler.upload_keys_for_user
+        else:
+            # then a worker
+            self.key_uploader = ReplicationUploadKeysForUserRestServlet.make_client(hs)
+
     async def on_POST(
         self, request: SynapseRequest, device_id: Optional[str]
     ) -> Tuple[int, JsonDict]:
@@ -109,8 +141,8 @@ class KeyUploadServlet(RestServlet):
                 400, "To upload keys, you must pass device_id when authenticating"
             )
 
-        result = await self.e2e_keys_handler.upload_keys_for_user(
-            user_id, device_id, body
+        result = await self.key_uploader(
+            user_id=user_id, device_id=device_id, keys=body
         )
         return 200, result
 
-- 
cgit 1.5.1


From 115f0eb2334b13665e5c112bd87f95ea393c9047 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 16 Nov 2022 22:16:46 +0000
Subject: Reintroduce #14376, with bugfix for monoliths (#14468)

* Add tests for StreamIdGenerator

* Drive-by: annotate all defs

* Revert "Revert "Remove slaved id tracker (#14376)" (#14463)"

This reverts commit d63814fd736fed5d3d45ff3af5e6d3bfae50c439, which in
turn reverted 36097e88c4da51fce6556a58c49bd675f4cf20ab. This restores
the latter.

* Fix StreamIdGenerator not handling unpersisted IDs

Spotted by @erikjohnston.

Closes #14456.

* Changelog

Co-authored-by: Nick Mills-Barrett <nick@fizzadar.com>
Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/14376.misc                             |   1 +
 changelog.d/14468.misc                             |   1 +
 mypy.ini                                           |   3 +
 synapse/replication/slave/__init__.py              |  13 --
 synapse/replication/slave/storage/__init__.py      |  13 --
 .../slave/storage/_slaved_id_tracker.py            |  50 -------
 synapse/storage/databases/main/account_data.py     |  30 ++--
 synapse/storage/databases/main/devices.py          |  36 ++---
 synapse/storage/databases/main/events_worker.py    |  35 ++---
 synapse/storage/databases/main/push_rule.py        |  17 +--
 synapse/storage/databases/main/pusher.py           |  24 ++-
 synapse/storage/databases/main/receipts.py         |  18 +--
 synapse/storage/util/id_generators.py              |  13 +-
 tests/storage/test_id_generators.py                | 162 +++++++++++++++++++--
 14 files changed, 230 insertions(+), 186 deletions(-)
 create mode 100644 changelog.d/14376.misc
 create mode 100644 changelog.d/14468.misc
 delete mode 100644 synapse/replication/slave/__init__.py
 delete mode 100644 synapse/replication/slave/storage/__init__.py
 delete mode 100644 synapse/replication/slave/storage/_slaved_id_tracker.py

(limited to 'synapse')

diff --git a/changelog.d/14376.misc b/changelog.d/14376.misc
new file mode 100644
index 0000000000..2ca326fea6
--- /dev/null
+++ b/changelog.d/14376.misc
@@ -0,0 +1 @@
+Remove old stream ID tracking code. Contributed by Nick @Beeper (@fizzadar).
diff --git a/changelog.d/14468.misc b/changelog.d/14468.misc
new file mode 100644
index 0000000000..2ca326fea6
--- /dev/null
+++ b/changelog.d/14468.misc
@@ -0,0 +1 @@
+Remove old stream ID tracking code. Contributed by Nick @Beeper (@fizzadar).
diff --git a/mypy.ini b/mypy.ini
index 8f1141a239..53512b2584 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -117,6 +117,9 @@ disallow_untyped_defs = True
 [mypy-tests.state.test_profile]
 disallow_untyped_defs = True
 
+[mypy-tests.storage.test_id_generators]
+disallow_untyped_defs = True
+
 [mypy-tests.storage.test_profile]
 disallow_untyped_defs = True
 
diff --git a/synapse/replication/slave/__init__.py b/synapse/replication/slave/__init__.py
deleted file mode 100644
index f43a360a80..0000000000
--- a/synapse/replication/slave/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/synapse/replication/slave/storage/__init__.py b/synapse/replication/slave/storage/__init__.py
deleted file mode 100644
index f43a360a80..0000000000
--- a/synapse/replication/slave/storage/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/synapse/replication/slave/storage/_slaved_id_tracker.py b/synapse/replication/slave/storage/_slaved_id_tracker.py
deleted file mode 100644
index 8f3f953ed4..0000000000
--- a/synapse/replication/slave/storage/_slaved_id_tracker.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import List, Optional, Tuple
-
-from synapse.storage.database import LoggingDatabaseConnection
-from synapse.storage.util.id_generators import AbstractStreamIdTracker, _load_current_id
-
-
-class SlavedIdTracker(AbstractStreamIdTracker):
-    """Tracks the "current" stream ID of a stream with a single writer.
-
-    See `AbstractStreamIdTracker` for more details.
-
-    Note that this class does not work correctly when there are multiple
-    writers.
-    """
-
-    def __init__(
-        self,
-        db_conn: LoggingDatabaseConnection,
-        table: str,
-        column: str,
-        extra_tables: Optional[List[Tuple[str, str]]] = None,
-        step: int = 1,
-    ):
-        self.step = step
-        self._current = _load_current_id(db_conn, table, column, step)
-        if extra_tables:
-            for table, column in extra_tables:
-                self.advance(None, _load_current_id(db_conn, table, column))
-
-    def advance(self, instance_name: Optional[str], new_id: int) -> None:
-        self._current = (max if self.step > 0 else min)(self._current, new_id)
-
-    def get_current_token(self) -> int:
-        return self._current
-
-    def get_current_token_for_writer(self, instance_name: str) -> int:
-        return self.get_current_token()
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index c38b8a9e5a..282687ebce 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -27,7 +27,6 @@ from typing import (
 )
 
 from synapse.api.constants import AccountDataTypes
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import AccountDataStream, TagAccountDataStream
 from synapse.storage._base import db_to_json
 from synapse.storage.database import (
@@ -68,12 +67,11 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         # to write account data. A value of `True` implies that `_account_data_id_gen`
         # is an `AbstractStreamIdGenerator` and not just a tracker.
         self._account_data_id_gen: AbstractStreamIdTracker
+        self._can_write_to_account_data = (
+            self._instance_name in hs.config.worker.writers.account_data
+        )
 
         if isinstance(database.engine, PostgresEngine):
-            self._can_write_to_account_data = (
-                self._instance_name in hs.config.worker.writers.account_data
-            )
-
             self._account_data_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
@@ -95,21 +93,13 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            if self._instance_name in hs.config.worker.writers.account_data:
-                self._can_write_to_account_data = True
-                self._account_data_id_gen = StreamIdGenerator(
-                    db_conn,
-                    "room_account_data",
-                    "stream_id",
-                    extra_tables=[("room_tags_revisions", "stream_id")],
-                )
-            else:
-                self._account_data_id_gen = SlavedIdTracker(
-                    db_conn,
-                    "room_account_data",
-                    "stream_id",
-                    extra_tables=[("room_tags_revisions", "stream_id")],
-                )
+            self._account_data_id_gen = StreamIdGenerator(
+                db_conn,
+                "room_account_data",
+                "stream_id",
+                extra_tables=[("room_tags_revisions", "stream_id")],
+                is_writer=self._instance_name in hs.config.worker.writers.account_data,
+            )
 
         account_max = self.get_max_account_data_stream_id()
         self._account_data_stream_cache = StreamChangeCache(
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index e114c733d1..57230df5ae 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -38,7 +38,6 @@ from synapse.logging.opentracing import (
     whitelisted_homeserver,
 )
 from synapse.metrics.background_process_metrics import wrap_as_background_process
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
@@ -86,28 +85,19 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        if hs.config.worker.worker_app is None:
-            self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-                db_conn,
-                "device_lists_stream",
-                "stream_id",
-                extra_tables=[
-                    ("user_signature_stream", "stream_id"),
-                    ("device_lists_outbound_pokes", "stream_id"),
-                    ("device_lists_changes_in_room", "stream_id"),
-                ],
-            )
-        else:
-            self._device_list_id_gen = SlavedIdTracker(
-                db_conn,
-                "device_lists_stream",
-                "stream_id",
-                extra_tables=[
-                    ("user_signature_stream", "stream_id"),
-                    ("device_lists_outbound_pokes", "stream_id"),
-                    ("device_lists_changes_in_room", "stream_id"),
-                ],
-            )
+        # In the worker store this is an ID tracker which we overwrite in the non-worker
+        # class below that is used on the main process.
+        self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+            db_conn,
+            "device_lists_stream",
+            "stream_id",
+            extra_tables=[
+                ("user_signature_stream", "stream_id"),
+                ("device_lists_outbound_pokes", "stream_id"),
+                ("device_lists_changes_in_room", "stream_id"),
+            ],
+            is_writer=hs.config.worker.worker_app is None,
+        )
 
         # Type-ignore: _device_list_id_gen is mixed in from either DataStore (as a
         # StreamIdGenerator) or SlavedDataStore (as a SlavedIdTracker).
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 8a104f7e93..01e935edef 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -59,7 +59,6 @@ from synapse.metrics.background_process_metrics import (
     run_as_background_process,
     wrap_as_background_process,
 )
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import BackfillStream
 from synapse.replication.tcp.streams.events import EventsStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
@@ -213,26 +212,20 @@ class EventsWorkerStore(SQLBaseStore):
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            if hs.get_instance_name() in hs.config.worker.writers.events:
-                self._stream_id_gen = StreamIdGenerator(
-                    db_conn,
-                    "events",
-                    "stream_ordering",
-                )
-                self._backfill_id_gen = StreamIdGenerator(
-                    db_conn,
-                    "events",
-                    "stream_ordering",
-                    step=-1,
-                    extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
-                )
-            else:
-                self._stream_id_gen = SlavedIdTracker(
-                    db_conn, "events", "stream_ordering"
-                )
-                self._backfill_id_gen = SlavedIdTracker(
-                    db_conn, "events", "stream_ordering", step=-1
-                )
+            self._stream_id_gen = StreamIdGenerator(
+                db_conn,
+                "events",
+                "stream_ordering",
+                is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
+            )
+            self._backfill_id_gen = StreamIdGenerator(
+                db_conn,
+                "events",
+                "stream_ordering",
+                step=-1,
+                extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
+                is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
+            )
 
         events_max = self._stream_id_gen.get_current_token()
         curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 8ae10f6127..12ad44dbb3 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -30,7 +30,6 @@ from typing import (
 
 from synapse.api.errors import StoreError
 from synapse.config.homeserver import ExperimentalConfig
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import PushRulesStream
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
@@ -111,14 +110,14 @@ class PushRulesWorkerStore(
     ):
         super().__init__(database, db_conn, hs)
 
-        if hs.config.worker.worker_app is None:
-            self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-                db_conn, "push_rules_stream", "stream_id"
-            )
-        else:
-            self._push_rules_stream_id_gen = SlavedIdTracker(
-                db_conn, "push_rules_stream", "stream_id"
-            )
+        # In the worker store this is an ID tracker which we overwrite in the non-worker
+        # class below that is used on the main process.
+        self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+            db_conn,
+            "push_rules_stream",
+            "stream_id",
+            is_writer=hs.config.worker.worker_app is None,
+        )
 
         push_rules_prefill, push_rules_id = self.db_pool.get_cache_dict(
             db_conn,
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index 4a01562d45..fee37b9ce4 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -27,7 +27,6 @@ from typing import (
 )
 
 from synapse.push import PusherConfig, ThrottleParams
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import PushersStream
 from synapse.storage._base import SQLBaseStore, db_to_json
 from synapse.storage.database import (
@@ -59,20 +58,15 @@ class PusherWorkerStore(SQLBaseStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        if hs.config.worker.worker_app is None:
-            self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
-                db_conn,
-                "pushers",
-                "id",
-                extra_tables=[("deleted_pushers", "stream_id")],
-            )
-        else:
-            self._pushers_id_gen = SlavedIdTracker(
-                db_conn,
-                "pushers",
-                "id",
-                extra_tables=[("deleted_pushers", "stream_id")],
-            )
+        # In the worker store this is an ID tracker which we overwrite in the non-worker
+        # class below that is used on the main process.
+        self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+            db_conn,
+            "pushers",
+            "id",
+            extra_tables=[("deleted_pushers", "stream_id")],
+            is_writer=hs.config.worker.worker_app is None,
+        )
 
         self.db_pool.updates.register_background_update_handler(
             "remove_deactivated_pushers",
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index fbf27497ec..a580e4bdda 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -27,7 +27,6 @@ from typing import (
 )
 
 from synapse.api.constants import EduTypes
-from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
 from synapse.replication.tcp.streams import ReceiptsStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
@@ -61,6 +60,9 @@ class ReceiptsWorkerStore(SQLBaseStore):
         hs: "HomeServer",
     ):
         self._instance_name = hs.get_instance_name()
+
+        # In the worker store this is an ID tracker which we overwrite in the non-worker
+        # class below that is used on the main process.
         self._receipts_id_gen: AbstractStreamIdTracker
 
         if isinstance(database.engine, PostgresEngine):
@@ -87,14 +89,12 @@ class ReceiptsWorkerStore(SQLBaseStore):
             # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
             # updated over replication. (Multiple writers are not supported for
             # SQLite).
-            if hs.get_instance_name() in hs.config.worker.writers.receipts:
-                self._receipts_id_gen = StreamIdGenerator(
-                    db_conn, "receipts_linearized", "stream_id"
-                )
-            else:
-                self._receipts_id_gen = SlavedIdTracker(
-                    db_conn, "receipts_linearized", "stream_id"
-                )
+            self._receipts_id_gen = StreamIdGenerator(
+                db_conn,
+                "receipts_linearized",
+                "stream_id",
+                is_writer=hs.get_instance_name() in hs.config.worker.writers.receipts,
+            )
 
         super().__init__(database, db_conn, hs)
 
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 2dfe4c0b66..0d7108f01b 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -186,11 +186,13 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         column: str,
         extra_tables: Iterable[Tuple[str, str]] = (),
         step: int = 1,
+        is_writer: bool = True,
     ) -> None:
         assert step != 0
         self._lock = threading.Lock()
         self._step: int = step
         self._current: int = _load_current_id(db_conn, table, column, step)
+        self._is_writer = is_writer
         for table, column in extra_tables:
             self._current = (max if step > 0 else min)(
                 self._current, _load_current_id(db_conn, table, column, step)
@@ -204,9 +206,11 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         self._unfinished_ids: OrderedDict[int, int] = OrderedDict()
 
     def advance(self, instance_name: str, new_id: int) -> None:
-        # `StreamIdGenerator` should only be used when there is a single writer,
-        # so replication should never happen.
-        raise Exception("Replication is not supported by StreamIdGenerator")
+        # Advance should never be called on a writer instance, only over replication
+        if self._is_writer:
+            raise Exception("Replication is not supported by writer StreamIdGenerator")
+
+        self._current = (max if self._step > 0 else min)(self._current, new_id)
 
     def get_next(self) -> AsyncContextManager[int]:
         with self._lock:
@@ -249,6 +253,9 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         return _AsyncCtxManagerWrapper(manager())
 
     def get_current_token(self) -> int:
+        if not self._is_writer:
+            return self._current
+
         with self._lock:
             if self._unfinished_ids:
                 return next(iter(self._unfinished_ids)) - self._step
diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py
index 2d8d1f860f..d6a2b8d274 100644
--- a/tests/storage/test_id_generators.py
+++ b/tests/storage/test_id_generators.py
@@ -16,15 +16,157 @@ from typing import List, Optional
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.server import HomeServer
-from synapse.storage.database import DatabasePool, LoggingTransaction
+from synapse.storage.database import (
+    DatabasePool,
+    LoggingDatabaseConnection,
+    LoggingTransaction,
+)
 from synapse.storage.engines import IncorrectDatabaseSetup
-from synapse.storage.util.id_generators import MultiWriterIdGenerator
+from synapse.storage.types import Cursor
+from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator
 from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 from tests.utils import USE_POSTGRES_FOR_TESTS
 
 
+class StreamIdGeneratorTestCase(HomeserverTestCase):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+        self.db_pool: DatabasePool = self.store.db_pool
+
+        self.get_success(self.db_pool.runInteraction("_setup_db", self._setup_db))
+
+    def _setup_db(self, txn: LoggingTransaction) -> None:
+        txn.execute(
+            """
+            CREATE TABLE foobar (
+                stream_id BIGINT NOT NULL,
+                data TEXT
+            );
+            """
+        )
+        txn.execute("INSERT INTO foobar VALUES (123, 'hello world');")
+
+    def _create_id_generator(self) -> StreamIdGenerator:
+        def _create(conn: LoggingDatabaseConnection) -> StreamIdGenerator:
+            return StreamIdGenerator(
+                db_conn=conn,
+                table="foobar",
+                column="stream_id",
+            )
+
+        return self.get_success_or_raise(self.db_pool.runWithConnection(_create))
+
+    def test_initial_value(self) -> None:
+        """Check that we read the current token from the DB."""
+        id_gen = self._create_id_generator()
+        self.assertEqual(id_gen.get_current_token(), 123)
+
+    def test_single_gen_next(self) -> None:
+        """Check that we correctly increment the current token from the DB."""
+        id_gen = self._create_id_generator()
+
+        async def test_gen_next() -> None:
+            async with id_gen.get_next() as next_id:
+                # We haven't persisted `next_id` yet; current token is still 123
+                self.assertEqual(id_gen.get_current_token(), 123)
+                # But we did learn what the next value is
+                self.assertEqual(next_id, 124)
+
+            # Once the context manager closes we assume that the `next_id` has been
+            # written to the DB.
+            self.assertEqual(id_gen.get_current_token(), 124)
+
+        self.get_success(test_gen_next())
+
+    def test_multiple_gen_nexts(self) -> None:
+        """Check that we handle overlapping calls to gen_next sensibly."""
+        id_gen = self._create_id_generator()
+
+        async def test_gen_next() -> None:
+            ctx1 = id_gen.get_next()
+            ctx2 = id_gen.get_next()
+            ctx3 = id_gen.get_next()
+
+            # Request three new stream IDs.
+            self.assertEqual(await ctx1.__aenter__(), 124)
+            self.assertEqual(await ctx2.__aenter__(), 125)
+            self.assertEqual(await ctx3.__aenter__(), 126)
+
+            # None are persisted: current token unchanged.
+            self.assertEqual(id_gen.get_current_token(), 123)
+
+            # Persist each in turn.
+            await ctx1.__aexit__(None, None, None)
+            self.assertEqual(id_gen.get_current_token(), 124)
+            await ctx2.__aexit__(None, None, None)
+            self.assertEqual(id_gen.get_current_token(), 125)
+            await ctx3.__aexit__(None, None, None)
+            self.assertEqual(id_gen.get_current_token(), 126)
+
+        self.get_success(test_gen_next())
+
+    def test_multiple_gen_nexts_closed_in_different_order(self) -> None:
+        """Check that we handle overlapping calls to gen_next, even when their IDs
+        created and persisted in different orders."""
+        id_gen = self._create_id_generator()
+
+        async def test_gen_next() -> None:
+            ctx1 = id_gen.get_next()
+            ctx2 = id_gen.get_next()
+            ctx3 = id_gen.get_next()
+
+            # Request three new stream IDs.
+            self.assertEqual(await ctx1.__aenter__(), 124)
+            self.assertEqual(await ctx2.__aenter__(), 125)
+            self.assertEqual(await ctx3.__aenter__(), 126)
+
+            # None are persisted: current token unchanged.
+            self.assertEqual(id_gen.get_current_token(), 123)
+
+            # Persist them in a different order, starting with 126 from ctx3.
+            await ctx3.__aexit__(None, None, None)
+            # We haven't persisted 124 from ctx1 yet---current token is still 123.
+            self.assertEqual(id_gen.get_current_token(), 123)
+
+            # Now persist 124 from ctx1.
+            await ctx1.__aexit__(None, None, None)
+            # Current token is then 124, waiting for 125 to be persisted.
+            self.assertEqual(id_gen.get_current_token(), 124)
+
+            # Finally persist 125 from ctx2.
+            await ctx2.__aexit__(None, None, None)
+            # Current token is then 126 (skipping over 125).
+            self.assertEqual(id_gen.get_current_token(), 126)
+
+        self.get_success(test_gen_next())
+
+    def test_gen_next_while_still_waiting_for_persistence(self) -> None:
+        """Check that we handle overlapping calls to gen_next."""
+        id_gen = self._create_id_generator()
+
+        async def test_gen_next() -> None:
+            ctx1 = id_gen.get_next()
+            ctx2 = id_gen.get_next()
+            ctx3 = id_gen.get_next()
+
+            # Request two new stream IDs.
+            self.assertEqual(await ctx1.__aenter__(), 124)
+            self.assertEqual(await ctx2.__aenter__(), 125)
+
+            # Persist ctx2 first.
+            await ctx2.__aexit__(None, None, None)
+            # Still waiting on ctx1's ID to be persisted.
+            self.assertEqual(id_gen.get_current_token(), 123)
+
+            # Now request a third stream ID. It should be 126 (the smallest ID that
+            # we've not yet handed out.)
+            self.assertEqual(await ctx3.__aenter__(), 126)
+
+        self.get_success(test_gen_next())
+
+
 class MultiWriterIdGeneratorTestCase(HomeserverTestCase):
     if not USE_POSTGRES_FOR_TESTS:
         skip = "Requires Postgres"
@@ -48,9 +190,9 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase):
         )
 
     def _create_id_generator(
-        self, instance_name="master", writers: Optional[List[str]] = None
+        self, instance_name: str = "master", writers: Optional[List[str]] = None
     ) -> MultiWriterIdGenerator:
-        def _create(conn):
+        def _create(conn: LoggingDatabaseConnection) -> MultiWriterIdGenerator:
             return MultiWriterIdGenerator(
                 conn,
                 self.db_pool,
@@ -446,7 +588,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase):
         self._insert_row_with_id("master", 3)
 
         # Now we add a row *without* updating the stream ID
-        def _insert(txn):
+        def _insert(txn: Cursor) -> None:
             txn.execute("INSERT INTO foobar VALUES (26, 'master')")
 
         self.get_success(self.db_pool.runInteraction("_insert", _insert))
@@ -481,9 +623,9 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase):
         )
 
     def _create_id_generator(
-        self, instance_name="master", writers: Optional[List[str]] = None
+        self, instance_name: str = "master", writers: Optional[List[str]] = None
     ) -> MultiWriterIdGenerator:
-        def _create(conn):
+        def _create(conn: LoggingDatabaseConnection) -> MultiWriterIdGenerator:
             return MultiWriterIdGenerator(
                 conn,
                 self.db_pool,
@@ -617,9 +759,9 @@ class MultiTableMultiWriterIdGeneratorTestCase(HomeserverTestCase):
         )
 
     def _create_id_generator(
-        self, instance_name="master", writers: Optional[List[str]] = None
+        self, instance_name: str = "master", writers: Optional[List[str]] = None
     ) -> MultiWriterIdGenerator:
-        def _create(conn):
+        def _create(conn: LoggingDatabaseConnection) -> MultiWriterIdGenerator:
             return MultiWriterIdGenerator(
                 conn,
                 self.db_pool,
@@ -641,7 +783,7 @@ class MultiTableMultiWriterIdGeneratorTestCase(HomeserverTestCase):
         instance_name: str,
         number: int,
         update_stream_table: bool = True,
-    ):
+    ) -> None:
         """Insert N rows as the given instance, inserting with stream IDs pulled
         from the postgres sequence.
         """
-- 
cgit 1.5.1


From 75888c2b1f5ec1c865c4690627bf101f7e0dffb9 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 17 Nov 2022 17:01:14 +0100
Subject: Faster joins: do not wait for full state when creating events to send
 (#14403)

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
---
 changelog.d/14403.misc    | 1 +
 synapse/events/builder.py | 1 +
 synapse/state/__init__.py | 8 +++++++-
 3 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14403.misc

(limited to 'synapse')

diff --git a/changelog.d/14403.misc b/changelog.d/14403.misc
new file mode 100644
index 0000000000..ff28a2712a
--- /dev/null
+++ b/changelog.d/14403.misc
@@ -0,0 +1 @@
+Faster joins: do not wait for full state when creating events to send.
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index e2ee10dd3d..d62906043f 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -128,6 +128,7 @@ class EventBuilder:
                 state_filter=StateFilter.from_types(
                     auth_types_for_event(self.room_version, self)
                 ),
+                await_full_state=False,
             )
             auth_event_ids = self._event_auth_handler.compute_auth_events(
                 self, state_ids
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 6f3dd0463e..833ffec3de 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -190,6 +190,7 @@ class StateHandler:
         room_id: str,
         event_ids: Collection[str],
         state_filter: Optional[StateFilter] = None,
+        await_full_state: bool = True,
     ) -> StateMap[str]:
         """Fetch the state after each of the given event IDs. Resolve them and return.
 
@@ -200,13 +201,18 @@ class StateHandler:
         Args:
             room_id: the room_id containing the given events.
             event_ids: the events whose state should be fetched and resolved.
+            await_full_state: if `True`, will block if we do not yet have complete state
+                at the given `event_id`s, regardless of whether `state_filter` is
+                satisfied by partial state.
 
         Returns:
             the state dict (a mapping from (event_type, state_key) -> event_id) which
             holds the resolution of the states after the given event IDs.
         """
         logger.debug("calling resolve_state_groups from compute_state_after_events")
-        ret = await self.resolve_state_groups_for_events(room_id, event_ids)
+        ret = await self.resolve_state_groups_for_events(
+            room_id, event_ids, await_full_state
+        )
         return await ret.get_state(self._state_storage_controller, state_filter)
 
     async def get_current_user_ids_in_room(
-- 
cgit 1.5.1


From e7132c3f81acbc50c1923cad7eeab96d3b2e05fd Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 17 Nov 2022 16:09:56 +0000
Subject: Fix check to ignore blank lines in incoming TCP replication (#14449)

---
 changelog.d/14449.misc              | 1 +
 synapse/replication/tcp/protocol.py | 2 +-
 synapse/storage/database.py         | 6 +++---
 3 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14449.misc

(limited to 'synapse')

diff --git a/changelog.d/14449.misc b/changelog.d/14449.misc
new file mode 100644
index 0000000000..320c0b6fae
--- /dev/null
+++ b/changelog.d/14449.misc
@@ -0,0 +1 @@
+Fix type logic in TCP replication code that prevented correctly ignoring blank commands.
\ No newline at end of file
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 7763ffb2d0..56a5c21910 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -245,7 +245,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
             self._parse_and_dispatch_line(line)
 
     def _parse_and_dispatch_line(self, line: bytes) -> None:
-        if line.strip() == "":
+        if line.strip() == b"":
             # Ignore blank lines
             return
 
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 4717c9728a..0dc44b246c 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -569,15 +569,15 @@ class DatabasePool:
             retcols=["update_name"],
             desc="check_background_updates",
         )
-        updates = [x["update_name"] for x in updates]
+        background_update_names = [x["update_name"] for x in updates]
 
         for table, update_name in UNIQUE_INDEX_BACKGROUND_UPDATES.items():
-            if update_name not in updates:
+            if update_name not in background_update_names:
                 logger.debug("Now safe to upsert in %s", table)
                 self._unsafe_to_upsert_tables.discard(table)
 
         # If there's any updates still running, reschedule to run.
-        if updates:
+        if background_update_names:
             self._clock.call_later(
                 15.0,
                 run_as_background_process,
-- 
cgit 1.5.1


From 01a052789266179c70c10ea6a6253c64fd9990d2 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 17 Nov 2022 16:11:08 +0000
Subject: Fix version that `worker_main_http_uri` is redundant from (#14476)

* Fix version that `worker_main_http_uri` is redundant from

* Changelog
---
 changelog.d/14476.misc    | 1 +
 docs/workers.md           | 4 ++--
 synapse/config/workers.py | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14476.misc

(limited to 'synapse')

diff --git a/changelog.d/14476.misc b/changelog.d/14476.misc
new file mode 100644
index 0000000000..6e025329c4
--- /dev/null
+++ b/changelog.d/14476.misc
@@ -0,0 +1 @@
+Remove the `worker_main_http_uri` configuration setting. This is now handled via internal replication.
diff --git a/docs/workers.md b/docs/workers.md
index 4604650803..27e54c5846 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -135,8 +135,8 @@ In the config file for each worker, you must specify:
    [`worker_replication_http_port`](usage/configuration/config_documentation.md#worker_replication_http_port)).
  * If handling HTTP requests, a [`worker_listeners`](usage/configuration/config_documentation.md#worker_listeners) option
    with an `http` listener.
- * **Synapse 1.71 and older:** if handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for
-   the main process (`worker_main_http_uri`). This config option is no longer required and is ignored when running Synapse 1.72 and newer.
+ * **Synapse 1.72 and older:** if handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for
+   the main process (`worker_main_http_uri`). This config option is no longer required and is ignored when running Synapse 1.73 and newer.
 
 For example:
 
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index c4e2273a95..913b83e174 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -166,7 +166,7 @@ class WorkerConfig(Config):
         self.worker_main_http_uri = config.get("worker_main_http_uri", None)
         if self.worker_main_http_uri is not None:
             logger.warning(
-                "The config option worker_main_http_uri is unused since Synapse 1.72. "
+                "The config option worker_main_http_uri is unused since Synapse 1.73. "
                 "It can be safely removed from your configuration."
             )
 
-- 
cgit 1.5.1


From 78e23eea056cbf75b9478140f17699195dd490f2 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 18 Nov 2022 18:10:01 +0000
Subject: Reduce default third party invite rate limit to 216 invites per day
 (#14487)

The previous default was the same as the `rc_message` rate limit, which
defaults to 17,280 per day.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14487.misc         | 1 +
 synapse/config/ratelimiting.py | 5 +----
 2 files changed, 2 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14487.misc

(limited to 'synapse')

diff --git a/changelog.d/14487.misc b/changelog.d/14487.misc
new file mode 100644
index 0000000000..f6b47a1d8e
--- /dev/null
+++ b/changelog.d/14487.misc
@@ -0,0 +1 @@
+Reduce default third party invite rate limit to 216 invites per day.
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index 1ed001e105..5c13fe428a 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -150,8 +150,5 @@ class RatelimitConfig(Config):
 
         self.rc_third_party_invite = RatelimitSettings(
             config.get("rc_third_party_invite", {}),
-            defaults={
-                "per_second": self.rc_message.per_second,
-                "burst_count": self.rc_message.burst_count,
-            },
+            defaults={"per_second": 0.0025, "burst_count": 5},
         )
-- 
cgit 1.5.1


From e1b15f25f3ad4b45b381544ca6b3cd2caf43d25d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 18 Nov 2022 19:56:42 +0000
Subject: Fix /key/v2/server calls with URL-unsafe key IDs (#14490)

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/14490.misc       |  1 +
 synapse/crypto/keyring.py    |  2 +-
 tests/crypto/test_keyring.py | 12 ++++++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14490.misc

(limited to 'synapse')

diff --git a/changelog.d/14490.misc b/changelog.d/14490.misc
new file mode 100644
index 0000000000..c0a4daa885
--- /dev/null
+++ b/changelog.d/14490.misc
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 0.9 where it would fail to fetch server keys whose IDs contain a forward slash.
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index dd9b8089ec..ed15f88350 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -857,7 +857,7 @@ class ServerKeyFetcher(BaseV2KeyFetcher):
                 response = await self.client.get_json(
                     destination=server_name,
                     path="/_matrix/key/v2/server/"
-                    + urllib.parse.quote(requested_key_id),
+                    + urllib.parse.quote(requested_key_id, safe=""),
                     ignore_backoff=True,
                     # we only give the remote server 10s to respond. It should be an
                     # easy request to handle, so if it doesn't reply within 10s, it's
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 820a1a54e2..63628aa6b0 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -469,6 +469,18 @@ class ServerKeyFetcherTestCase(unittest.HomeserverTestCase):
         keys = self.get_success(fetcher.get_keys(SERVER_NAME, ["key1"], 0))
         self.assertEqual(keys, {})
 
+    def test_keyid_containing_forward_slash(self) -> None:
+        """We should url-encode any url unsafe chars in key ids.
+
+        Detects https://github.com/matrix-org/synapse/issues/14488.
+        """
+        fetcher = ServerKeyFetcher(self.hs)
+        self.get_success(fetcher.get_keys("example.com", ["key/potato"], 0))
+
+        self.http_client.get_json.assert_called_once()
+        args, kwargs = self.http_client.get_json.call_args
+        self.assertEqual(kwargs["path"], "/_matrix/key/v2/server/key%2Fpotato")
+
 
 class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor, clock):
-- 
cgit 1.5.1


From 1526ff389f02d14d0df729bd6ea35836e758c449 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Mon, 21 Nov 2022 16:46:14 +0100
Subject: Faster joins: filter out non local events when a room doesn't have
 its full state (#14404)

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
---
 changelog.d/14404.misc                             |  1 +
 synapse/federation/sender/per_destination_queue.py |  1 +
 synapse/handlers/federation.py                     | 15 +++++++----
 synapse/visibility.py                              | 29 +++++++++++++++++++---
 tests/test_visibility.py                           | 10 ++++----
 5 files changed, 43 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/14404.misc

(limited to 'synapse')

diff --git a/changelog.d/14404.misc b/changelog.d/14404.misc
new file mode 100644
index 0000000000..b9ab525f2b
--- /dev/null
+++ b/changelog.d/14404.misc
@@ -0,0 +1 @@
+Faster joins: filter out non local events when a room doesn't have its full state.
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 084c45a95c..3ae5e8634c 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -505,6 +505,7 @@ class PerDestinationQueue:
                     new_pdus = await filter_events_for_server(
                         self._storage_controllers,
                         self._destination,
+                        self._server_name,
                         new_pdus,
                         redact=False,
                     )
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 188f0956ef..d92582fd5c 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -379,6 +379,7 @@ class FederationHandler:
             filtered_extremities = await filter_events_for_server(
                 self._storage_controllers,
                 self.server_name,
+                self.server_name,
                 events_to_check,
                 redact=False,
                 check_history_visibility_only=True,
@@ -1231,7 +1232,9 @@ class FederationHandler:
     async def on_backfill_request(
         self, origin: str, room_id: str, pdu_list: List[str], limit: int
     ) -> List[EventBase]:
-        await self._event_auth_handler.assert_host_in_room(room_id, origin)
+        # We allow partially joined rooms since in this case we are filtering out
+        # non-local events in `filter_events_for_server`.
+        await self._event_auth_handler.assert_host_in_room(room_id, origin, True)
 
         # Synapse asks for 100 events per backfill request. Do not allow more.
         limit = min(limit, 100)
@@ -1252,7 +1255,7 @@ class FederationHandler:
         )
 
         events = await filter_events_for_server(
-            self._storage_controllers, origin, events
+            self._storage_controllers, origin, self.server_name, events
         )
 
         return events
@@ -1283,7 +1286,7 @@ class FederationHandler:
         await self._event_auth_handler.assert_host_in_room(event.room_id, origin)
 
         events = await filter_events_for_server(
-            self._storage_controllers, origin, [event]
+            self._storage_controllers, origin, self.server_name, [event]
         )
         event = events[0]
         return event
@@ -1296,7 +1299,9 @@ class FederationHandler:
         latest_events: List[str],
         limit: int,
     ) -> List[EventBase]:
-        await self._event_auth_handler.assert_host_in_room(room_id, origin)
+        # We allow partially joined rooms since in this case we are filtering out
+        # non-local events in `filter_events_for_server`.
+        await self._event_auth_handler.assert_host_in_room(room_id, origin, True)
 
         # Only allow up to 20 events to be retrieved per request.
         limit = min(limit, 20)
@@ -1309,7 +1314,7 @@ class FederationHandler:
         )
 
         missing_events = await filter_events_for_server(
-            self._storage_controllers, origin, missing_events
+            self._storage_controllers, origin, self.server_name, missing_events
         )
 
         return missing_events
diff --git a/synapse/visibility.py b/synapse/visibility.py
index 40a9c5b53f..b443857571 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -563,7 +563,8 @@ def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str:
 
 async def filter_events_for_server(
     storage: StorageControllers,
-    server_name: str,
+    target_server_name: str,
+    local_server_name: str,
     events: List[EventBase],
     redact: bool = True,
     check_history_visibility_only: bool = False,
@@ -603,7 +604,7 @@ async def filter_events_for_server(
         # if the server is either in the room or has been invited
         # into the room.
         for ev in memberships.values():
-            assert get_domain_from_id(ev.state_key) == server_name
+            assert get_domain_from_id(ev.state_key) == target_server_name
 
             memtype = ev.membership
             if memtype == Membership.JOIN:
@@ -622,6 +623,24 @@ async def filter_events_for_server(
         # to no users having been erased.
         erased_senders = {}
 
+    # Filter out non-local events when we are in the middle of a partial join, since our servers
+    # list can be out of date and we could leak events to servers not in the room anymore.
+    # This can also be true for local events but we consider it to be an acceptable risk.
+
+    # We do this check as a first step and before retrieving membership events because
+    # otherwise a room could be fully joined after we retrieve those, which would then bypass
+    # this check but would base the filtering on an outdated view of the membership events.
+
+    partial_state_invisible_events = set()
+    if not check_history_visibility_only:
+        for e in events:
+            sender_domain = get_domain_from_id(e.sender)
+            if (
+                sender_domain != local_server_name
+                and await storage.main.is_partial_state_room(e.room_id)
+            ):
+                partial_state_invisible_events.add(e)
+
     # Let's check to see if all the events have a history visibility
     # of "shared" or "world_readable". If that's the case then we don't
     # need to check membership (as we know the server is in the room).
@@ -636,7 +655,7 @@ async def filter_events_for_server(
             if event_to_history_vis[e.event_id]
             not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE)
         ],
-        server_name,
+        target_server_name,
     )
 
     to_return = []
@@ -645,6 +664,10 @@ async def filter_events_for_server(
         visible = check_event_is_visible(
             event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {})
         )
+
+        if e in partial_state_invisible_events:
+            visible = False
+
         if visible and not erased:
             to_return.append(e)
         elif redact:
diff --git a/tests/test_visibility.py b/tests/test_visibility.py
index c385b2f8d4..d0b9ad5454 100644
--- a/tests/test_visibility.py
+++ b/tests/test_visibility.py
@@ -61,7 +61,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
 
         filtered = self.get_success(
             filter_events_for_server(
-                self._storage_controllers, "test_server", events_to_filter
+                self._storage_controllers, "test_server", "hs", events_to_filter
             )
         )
 
@@ -83,7 +83,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
         self.assertEqual(
             self.get_success(
                 filter_events_for_server(
-                    self._storage_controllers, "remote_hs", [outlier]
+                    self._storage_controllers, "remote_hs", "hs", [outlier]
                 )
             ),
             [outlier],
@@ -94,7 +94,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
 
         filtered = self.get_success(
             filter_events_for_server(
-                self._storage_controllers, "remote_hs", [outlier, evt]
+                self._storage_controllers, "remote_hs", "local_hs", [outlier, evt]
             )
         )
         self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}")
@@ -106,7 +106,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
         # be redacted)
         filtered = self.get_success(
             filter_events_for_server(
-                self._storage_controllers, "other_server", [outlier, evt]
+                self._storage_controllers, "other_server", "local_hs", [outlier, evt]
             )
         )
         self.assertEqual(filtered[0], outlier)
@@ -141,7 +141,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
         # ... and the filtering happens.
         filtered = self.get_success(
             filter_events_for_server(
-                self._storage_controllers, "test_server", events_to_filter
+                self._storage_controllers, "test_server", "local_hs", events_to_filter
             )
         )
 
-- 
cgit 1.5.1


From 1799a54a545618782840a60950ef4b64da9ee24d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 22 Nov 2022 07:26:11 -0500
Subject: Batch fetch bundled annotations (#14491)

Avoid an n+1 query problem and fetch the bundled aggregations for
m.annotation relations in a single query instead of a query per event.

This applies similar logic for as was previously done for edits in
8b309adb436c162510ed1402f33b8741d71fc058 (#11660) and threads
in b65acead428653b988351ae8d7b22127a22039cd (#11752).
---
 changelog.d/14491.feature                   |   1 +
 synapse/handlers/relations.py               | 197 ++++++++++++++++------------
 synapse/storage/databases/main/relations.py | 139 ++++++++++++--------
 synapse/util/caches/descriptors.py          |   2 +-
 tests/rest/client/test_relations.py         |   4 +-
 5 files changed, 202 insertions(+), 141 deletions(-)
 create mode 100644 changelog.d/14491.feature

(limited to 'synapse')

diff --git a/changelog.d/14491.feature b/changelog.d/14491.feature
new file mode 100644
index 0000000000..4fca7282f7
--- /dev/null
+++ b/changelog.d/14491.feature
@@ -0,0 +1 @@
+Reduce database load of [Client-Server endpoints](https://spec.matrix.org/v1.4/client-server-api/#aggregations) which return bundled aggregations.
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 8e71dda970..ca94239f61 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -13,7 +13,16 @@
 # limitations under the License.
 import enum
 import logging
-from typing import TYPE_CHECKING, Dict, FrozenSet, Iterable, List, Optional, Tuple
+from typing import (
+    TYPE_CHECKING,
+    Collection,
+    Dict,
+    FrozenSet,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+)
 
 import attr
 
@@ -259,48 +268,64 @@ class RelationsHandler:
                     e.msg,
                 )
 
-    async def get_annotations_for_event(
-        self,
-        event_id: str,
-        room_id: str,
-        limit: int = 5,
-        ignored_users: FrozenSet[str] = frozenset(),
-    ) -> List[JsonDict]:
-        """Get a list of annotations on the event, grouped by event type and
+    async def get_annotations_for_events(
+        self, event_ids: Collection[str], ignored_users: FrozenSet[str] = frozenset()
+    ) -> Dict[str, List[JsonDict]]:
+        """Get a list of annotations to the given events, grouped by event type and
         aggregation key, sorted by count.
 
-        This is used e.g. to get the what and how many reactions have happend
+        This is used e.g. to get the what and how many reactions have happened
         on an event.
 
         Args:
-            event_id: Fetch events that relate to this event ID.
-            room_id: The room the event belongs to.
-            limit: Only fetch the `limit` groups.
+            event_ids: Fetch events that relate to these event IDs.
             ignored_users: The users ignored by the requesting user.
 
         Returns:
-            List of groups of annotations that match. Each row is a dict with
-            `type`, `key` and `count` fields.
+            A map of event IDs to a list of groups of annotations that match.
+            Each entry is a dict with `type`, `key` and `count` fields.
         """
         # Get the base results for all users.
-        full_results = await self._main_store.get_aggregation_groups_for_event(
-            event_id, room_id, limit
+        full_results = await self._main_store.get_aggregation_groups_for_events(
+            event_ids
         )
 
+        # Avoid additional logic if there are no ignored users.
+        if not ignored_users:
+            return {
+                event_id: results
+                for event_id, results in full_results.items()
+                if results
+            }
+
         # Then subtract off the results for any ignored users.
         ignored_results = await self._main_store.get_aggregation_groups_for_users(
-            event_id, room_id, limit, ignored_users
+            [event_id for event_id, results in full_results.items() if results],
+            ignored_users,
         )
 
-        filtered_results = []
-        for result in full_results:
-            key = (result["type"], result["key"])
-            if key in ignored_results:
-                result = result.copy()
-                result["count"] -= ignored_results[key]
-                if result["count"] <= 0:
-                    continue
-            filtered_results.append(result)
+        filtered_results = {}
+        for event_id, results in full_results.items():
+            # If no annotations, skip.
+            if not results:
+                continue
+
+            # If there are not ignored results for this event, copy verbatim.
+            if event_id not in ignored_results:
+                filtered_results[event_id] = results
+                continue
+
+            # Otherwise, subtract out the ignored results.
+            event_ignored_results = ignored_results[event_id]
+            for result in results:
+                key = (result["type"], result["key"])
+                if key in event_ignored_results:
+                    # Ensure to not modify the cache.
+                    result = result.copy()
+                    result["count"] -= event_ignored_results[key]
+                    if result["count"] <= 0:
+                        continue
+                filtered_results.setdefault(event_id, []).append(result)
 
         return filtered_results
 
@@ -366,59 +391,62 @@ class RelationsHandler:
         results = {}
 
         for event_id, summary in summaries.items():
-            if summary:
-                thread_count, latest_thread_event = summary
-
-                # Subtract off the count of any ignored users.
-                for ignored_user in ignored_users:
-                    thread_count -= ignored_results.get((event_id, ignored_user), 0)
-
-                # This is gnarly, but if the latest event is from an ignored user,
-                # attempt to find one that isn't from an ignored user.
-                if latest_thread_event.sender in ignored_users:
-                    room_id = latest_thread_event.room_id
-
-                    # If the root event is not found, something went wrong, do
-                    # not include a summary of the thread.
-                    event = await self._event_handler.get_event(user, room_id, event_id)
-                    if event is None:
-                        continue
+            # If no thread, skip.
+            if not summary:
+                continue
 
-                    potential_events, _ = await self.get_relations_for_event(
-                        event_id,
-                        event,
-                        room_id,
-                        RelationTypes.THREAD,
-                        ignored_users,
-                    )
+            thread_count, latest_thread_event = summary
 
-                    # If all found events are from ignored users, do not include
-                    # a summary of the thread.
-                    if not potential_events:
-                        continue
+            # Subtract off the count of any ignored users.
+            for ignored_user in ignored_users:
+                thread_count -= ignored_results.get((event_id, ignored_user), 0)
 
-                    # The *last* event returned is the one that is cared about.
-                    event = await self._event_handler.get_event(
-                        user, room_id, potential_events[-1].event_id
-                    )
-                    # It is unexpected that the event will not exist.
-                    if event is None:
-                        logger.warning(
-                            "Unable to fetch latest event in a thread with event ID: %s",
-                            potential_events[-1].event_id,
-                        )
-                        continue
-                    latest_thread_event = event
-
-                results[event_id] = _ThreadAggregation(
-                    latest_event=latest_thread_event,
-                    count=thread_count,
-                    # If there's a thread summary it must also exist in the
-                    # participated dictionary.
-                    current_user_participated=events_by_id[event_id].sender == user_id
-                    or participated[event_id],
+            # This is gnarly, but if the latest event is from an ignored user,
+            # attempt to find one that isn't from an ignored user.
+            if latest_thread_event.sender in ignored_users:
+                room_id = latest_thread_event.room_id
+
+                # If the root event is not found, something went wrong, do
+                # not include a summary of the thread.
+                event = await self._event_handler.get_event(user, room_id, event_id)
+                if event is None:
+                    continue
+
+                potential_events, _ = await self.get_relations_for_event(
+                    event_id,
+                    event,
+                    room_id,
+                    RelationTypes.THREAD,
+                    ignored_users,
                 )
 
+                # If all found events are from ignored users, do not include
+                # a summary of the thread.
+                if not potential_events:
+                    continue
+
+                # The *last* event returned is the one that is cared about.
+                event = await self._event_handler.get_event(
+                    user, room_id, potential_events[-1].event_id
+                )
+                # It is unexpected that the event will not exist.
+                if event is None:
+                    logger.warning(
+                        "Unable to fetch latest event in a thread with event ID: %s",
+                        potential_events[-1].event_id,
+                    )
+                    continue
+                latest_thread_event = event
+
+            results[event_id] = _ThreadAggregation(
+                latest_event=latest_thread_event,
+                count=thread_count,
+                # If there's a thread summary it must also exist in the
+                # participated dictionary.
+                current_user_participated=events_by_id[event_id].sender == user_id
+                or participated[event_id],
+            )
+
         return results
 
     @trace
@@ -496,17 +524,18 @@ class RelationsHandler:
                 # (as that is what makes it part of the thread).
                 relations_by_id[latest_thread_event.event_id] = RelationTypes.THREAD
 
-        # Fetch other relations per event.
-        for event in events_by_id.values():
-            # Fetch any annotations (ie, reactions) to bundle with this event.
-            annotations = await self.get_annotations_for_event(
-                event.event_id, event.room_id, ignored_users=ignored_users
-            )
+        # Fetch any annotations (ie, reactions) to bundle with this event.
+        annotations_by_event_id = await self.get_annotations_for_events(
+            events_by_id.keys(), ignored_users=ignored_users
+        )
+        for event_id, annotations in annotations_by_event_id.items():
             if annotations:
-                results.setdefault(
-                    event.event_id, BundledAggregations()
-                ).annotations = {"chunk": annotations}
+                results.setdefault(event_id, BundledAggregations()).annotations = {
+                    "chunk": annotations
+                }
 
+        # Fetch other relations per event.
+        for event in events_by_id.values():
             # Fetch any references to bundle with this event.
             references, next_token = await self.get_relations_for_event(
                 event.event_id,
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index ca431002c8..f96a16956a 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -20,6 +20,7 @@ from typing import (
     FrozenSet,
     Iterable,
     List,
+    Mapping,
     Optional,
     Set,
     Tuple,
@@ -394,106 +395,136 @@ class RelationsWorkerStore(SQLBaseStore):
         )
         return result is not None
 
-    @cached(tree=True)
-    async def get_aggregation_groups_for_event(
-        self, event_id: str, room_id: str, limit: int = 5
-    ) -> List[JsonDict]:
-        """Get a list of annotations on the event, grouped by event type and
+    @cached()
+    async def get_aggregation_groups_for_event(self, event_id: str) -> List[JsonDict]:
+        raise NotImplementedError()
+
+    @cachedList(
+        cached_method_name="get_aggregation_groups_for_event", list_name="event_ids"
+    )
+    async def get_aggregation_groups_for_events(
+        self, event_ids: Collection[str]
+    ) -> Mapping[str, Optional[List[JsonDict]]]:
+        """Get a list of annotations on the given events, grouped by event type and
         aggregation key, sorted by count.
 
         This is used e.g. to get the what and how many reactions have happend
         on an event.
 
         Args:
-            event_id: Fetch events that relate to this event ID.
-            room_id: The room the event belongs to.
-            limit: Only fetch the `limit` groups.
+            event_ids: Fetch events that relate to these event IDs.
 
         Returns:
-            List of groups of annotations that match. Each row is a dict with
-            `type`, `key` and `count` fields.
+            A map of event IDs to a list of groups of annotations that match.
+            Each entry is a dict with `type`, `key` and `count` fields.
         """
+        # The number of entries to return per event ID.
+        limit = 5
 
-        args = [
-            event_id,
-            room_id,
-            RelationTypes.ANNOTATION,
-            limit,
-        ]
+        clause, args = make_in_list_sql_clause(
+            self.database_engine, "relates_to_id", event_ids
+        )
+        args.append(RelationTypes.ANNOTATION)
 
-        sql = """
-            SELECT type, aggregation_key, COUNT(DISTINCT sender)
-            FROM event_relations
-            INNER JOIN events USING (event_id)
-            WHERE relates_to_id = ? AND room_id = ? AND relation_type = ?
-            GROUP BY relation_type, type, aggregation_key
-            ORDER BY COUNT(*) DESC
-            LIMIT ?
+        sql = f"""
+            SELECT
+                relates_to_id,
+                annotation.type,
+                aggregation_key,
+                COUNT(DISTINCT annotation.sender)
+            FROM events AS annotation
+            INNER JOIN event_relations USING (event_id)
+            INNER JOIN events AS parent ON
+                parent.event_id = relates_to_id
+                AND parent.room_id = annotation.room_id
+            WHERE
+                {clause}
+                AND relation_type = ?
+            GROUP BY relates_to_id, annotation.type, aggregation_key
+            ORDER BY relates_to_id, COUNT(*) DESC
         """
 
-        def _get_aggregation_groups_for_event_txn(
+        def _get_aggregation_groups_for_events_txn(
             txn: LoggingTransaction,
-        ) -> List[JsonDict]:
+        ) -> Mapping[str, List[JsonDict]]:
             txn.execute(sql, args)
 
-            return [{"type": row[0], "key": row[1], "count": row[2]} for row in txn]
+            result: Dict[str, List[JsonDict]] = {}
+            for event_id, type, key, count in cast(
+                List[Tuple[str, str, str, int]], txn
+            ):
+                event_results = result.setdefault(event_id, [])
+
+                # Limit the number of results per event ID.
+                if len(event_results) == limit:
+                    continue
+
+                event_results.append({"type": type, "key": key, "count": count})
+
+            return result
 
         return await self.db_pool.runInteraction(
-            "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn
+            "get_aggregation_groups_for_events", _get_aggregation_groups_for_events_txn
         )
 
     async def get_aggregation_groups_for_users(
-        self,
-        event_id: str,
-        room_id: str,
-        limit: int,
-        users: FrozenSet[str] = frozenset(),
-    ) -> Dict[Tuple[str, str], int]:
+        self, event_ids: Collection[str], users: FrozenSet[str]
+    ) -> Dict[str, Dict[Tuple[str, str], int]]:
         """Fetch the partial aggregations for an event for specific users.
 
         This is used, in conjunction with get_aggregation_groups_for_event, to
         remove information from the results for ignored users.
 
         Args:
-            event_id: Fetch events that relate to this event ID.
-            room_id: The room the event belongs to.
-            limit: Only fetch the `limit` groups.
+            event_ids: Fetch events that relate to these event IDs.
             users: The users to fetch information for.
 
         Returns:
-            A map of (event type, aggregation key) to a count of users.
+            A map of event ID to a map of (event type, aggregation key) to a
+            count of users.
         """
 
         if not users:
             return {}
 
-        args: List[Union[str, int]] = [
-            event_id,
-            room_id,
-            RelationTypes.ANNOTATION,
-        ]
+        events_sql, args = make_in_list_sql_clause(
+            self.database_engine, "relates_to_id", event_ids
+        )
 
         users_sql, users_args = make_in_list_sql_clause(
-            self.database_engine, "sender", users
+            self.database_engine, "annotation.sender", users
         )
         args.extend(users_args)
+        args.append(RelationTypes.ANNOTATION)
 
         sql = f"""
-            SELECT type, aggregation_key, COUNT(DISTINCT sender)
-            FROM event_relations
-            INNER JOIN events USING (event_id)
-            WHERE relates_to_id = ? AND room_id = ? AND relation_type = ? AND {users_sql}
-            GROUP BY relation_type, type, aggregation_key
-            ORDER BY COUNT(*) DESC
-            LIMIT ?
+            SELECT
+                relates_to_id,
+                annotation.type,
+                aggregation_key,
+                COUNT(DISTINCT annotation.sender)
+            FROM events AS annotation
+            INNER JOIN event_relations USING (event_id)
+            INNER JOIN events AS parent ON
+                parent.event_id = relates_to_id
+                AND parent.room_id = annotation.room_id
+            WHERE {events_sql} AND {users_sql} AND relation_type = ?
+            GROUP BY relates_to_id, annotation.type, aggregation_key
+            ORDER BY relates_to_id, COUNT(*) DESC
         """
 
         def _get_aggregation_groups_for_users_txn(
             txn: LoggingTransaction,
-        ) -> Dict[Tuple[str, str], int]:
-            txn.execute(sql, args + [limit])
+        ) -> Dict[str, Dict[Tuple[str, str], int]]:
+            txn.execute(sql, args)
 
-            return {(row[0], row[1]): row[2] for row in txn}
+            result: Dict[str, Dict[Tuple[str, str], int]] = {}
+            for event_id, type, key, count in cast(
+                List[Tuple[str, str, str, int]], txn
+            ):
+                result.setdefault(event_id, {})[(type, key)] = count
+
+            return result
 
         return await self.db_pool.runInteraction(
             "get_aggregation_groups_for_users", _get_aggregation_groups_for_users_txn
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 75428d19ba..72227359b9 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -503,7 +503,7 @@ def cachedList(
     is specified as a list that is iterated through to lookup keys in the
     original cache. A new tuple consisting of the (deduplicated) keys that weren't in
     the cache gets passed to the original function, which is expected to results
-    in a map of key to value for each passed value. THe new results are stored in the
+    in a map of key to value for each passed value. The new results are stored in the
     original cache. Note that any missing values are cached as None.
 
     Args:
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index e3d801f7a8..2d2b683548 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -1108,7 +1108,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
 
         # The "user" sent the root event and is making queries for the bundled
         # aggregations: they have participated.
-        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 9)
+        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 8)
         # The "user2" sent replies in the thread and is making queries for the
         # bundled aggregations: they have participated.
         #
@@ -1170,7 +1170,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
                 bundled_aggregations["latest_event"].get("unsigned"),
             )
 
-        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 9)
+        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 8)
 
     def test_nested_thread(self) -> None:
         """
-- 
cgit 1.5.1


From 6d7523ef1484ec56f4a6dffdd2ea3d8736b4cc98 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 22 Nov 2022 09:41:09 -0500
Subject: Batch fetch bundled references (#14508)

Avoid an n+1 query problem and fetch the bundled aggregations for
m.reference relations in a single query instead of a query per event.

This applies similar logic for as was previously done for edits in
8b309adb436c162510ed1402f33b8741d71fc058 (#11660; threads
in b65acead428653b988351ae8d7b22127a22039cd (#11752); and
annotations in 1799a54a545618782840a60950ef4b64da9ee24d (#14491).
---
 changelog.d/14508.feature                   |   1 +
 synapse/handlers/relations.py               | 128 +++++++++++++---------------
 synapse/storage/databases/main/cache.py     |   1 +
 synapse/storage/databases/main/events.py    |   4 +
 synapse/storage/databases/main/relations.py |  74 ++++++++++++++--
 tests/rest/client/test_relations.py         |   4 +-
 6 files changed, 133 insertions(+), 79 deletions(-)
 create mode 100644 changelog.d/14508.feature

(limited to 'synapse')

diff --git a/changelog.d/14508.feature b/changelog.d/14508.feature
new file mode 100644
index 0000000000..4fca7282f7
--- /dev/null
+++ b/changelog.d/14508.feature
@@ -0,0 +1 @@
+Reduce database load of [Client-Server endpoints](https://spec.matrix.org/v1.4/client-server-api/#aggregations) which return bundled aggregations.
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index ca94239f61..8414be5879 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -13,16 +13,7 @@
 # limitations under the License.
 import enum
 import logging
-from typing import (
-    TYPE_CHECKING,
-    Collection,
-    Dict,
-    FrozenSet,
-    Iterable,
-    List,
-    Optional,
-    Tuple,
-)
+from typing import TYPE_CHECKING, Collection, Dict, FrozenSet, Iterable, List, Optional
 
 import attr
 
@@ -32,7 +23,7 @@ from synapse.events import EventBase, relation_from_event
 from synapse.logging.opentracing import trace
 from synapse.storage.databases.main.relations import ThreadsNextBatch, _RelatedEvent
 from synapse.streams.config import PaginationConfig
-from synapse.types import JsonDict, Requester, StreamToken, UserID
+from synapse.types import JsonDict, Requester, UserID
 from synapse.visibility import filter_events_for_client
 
 if TYPE_CHECKING:
@@ -181,40 +172,6 @@ class RelationsHandler:
 
         return return_value
 
-    async def get_relations_for_event(
-        self,
-        event_id: str,
-        event: EventBase,
-        room_id: str,
-        relation_type: str,
-        ignored_users: FrozenSet[str] = frozenset(),
-    ) -> Tuple[List[_RelatedEvent], Optional[StreamToken]]:
-        """Get a list of events which relate to an event, ordered by topological ordering.
-
-        Args:
-            event_id: Fetch events that relate to this event ID.
-            event: The matching EventBase to event_id.
-            room_id: The room the event belongs to.
-            relation_type: The type of relation.
-            ignored_users: The users ignored by the requesting user.
-
-        Returns:
-            List of event IDs that match relations requested. The rows are of
-            the form `{"event_id": "..."}`.
-        """
-
-        # Call the underlying storage method, which is cached.
-        related_events, next_token = await self._main_store.get_relations_for_event(
-            event_id, event, room_id, relation_type, direction="f"
-        )
-
-        # Filter out ignored users and convert to the expected format.
-        related_events = [
-            event for event in related_events if event.sender not in ignored_users
-        ]
-
-        return related_events, next_token
-
     async def redact_events_related_to(
         self,
         requester: Requester,
@@ -329,6 +286,46 @@ class RelationsHandler:
 
         return filtered_results
 
+    async def get_references_for_events(
+        self, event_ids: Collection[str], ignored_users: FrozenSet[str] = frozenset()
+    ) -> Dict[str, List[_RelatedEvent]]:
+        """Get a list of references to the given events.
+
+        Args:
+            event_ids: Fetch events that relate to this event ID.
+            ignored_users: The users ignored by the requesting user.
+
+        Returns:
+            A map of event IDs to a list related events.
+        """
+
+        related_events = await self._main_store.get_references_for_events(event_ids)
+
+        # Avoid additional logic if there are no ignored users.
+        if not ignored_users:
+            return {
+                event_id: results
+                for event_id, results in related_events.items()
+                if results
+            }
+
+        # Filter out ignored users.
+        results = {}
+        for event_id, events in related_events.items():
+            # If no references, skip.
+            if not events:
+                continue
+
+            # Filter ignored users out.
+            events = [event for event in events if event.sender not in ignored_users]
+            # If there are no events left, skip this event.
+            if not events:
+                continue
+
+            results[event_id] = events
+
+        return results
+
     async def _get_threads_for_events(
         self,
         events_by_id: Dict[str, EventBase],
@@ -412,14 +409,18 @@ class RelationsHandler:
                 if event is None:
                     continue
 
-                potential_events, _ = await self.get_relations_for_event(
-                    event_id,
-                    event,
-                    room_id,
-                    RelationTypes.THREAD,
-                    ignored_users,
+                # Attempt to find another event to use as the latest event.
+                potential_events, _ = await self._main_store.get_relations_for_event(
+                    event_id, event, room_id, RelationTypes.THREAD, direction="f"
                 )
 
+                # Filter out ignored users.
+                potential_events = [
+                    event
+                    for event in potential_events
+                    if event.sender not in ignored_users
+                ]
+
                 # If all found events are from ignored users, do not include
                 # a summary of the thread.
                 if not potential_events:
@@ -534,27 +535,16 @@ class RelationsHandler:
                     "chunk": annotations
                 }
 
-        # Fetch other relations per event.
-        for event in events_by_id.values():
-            # Fetch any references to bundle with this event.
-            references, next_token = await self.get_relations_for_event(
-                event.event_id,
-                event,
-                event.room_id,
-                RelationTypes.REFERENCE,
-                ignored_users=ignored_users,
-            )
+        # Fetch any references to bundle with this event.
+        references_by_event_id = await self.get_references_for_events(
+            events_by_id.keys(), ignored_users=ignored_users
+        )
+        for event_id, references in references_by_event_id.items():
             if references:
-                aggregations = results.setdefault(event.event_id, BundledAggregations())
-                aggregations.references = {
+                results.setdefault(event_id, BundledAggregations()).references = {
                     "chunk": [{"event_id": ev.event_id} for ev in references]
                 }
 
-                if next_token:
-                    aggregations.references["next_batch"] = await next_token.to_string(
-                        self._main_store
-                    )
-
         # Fetch any edits (but not for redacted events).
         #
         # Note that there is no use in limiting edits by ignored users since the
@@ -600,7 +590,7 @@ class RelationsHandler:
             room_id, requester, allow_departed_users=True
         )
 
-        # Note that ignored users are not passed into get_relations_for_event
+        # Note that ignored users are not passed into get_threads
         # below. Ignored users are handled in filter_events_for_client (and by
         # not passing them in here we should get a better cache hit rate).
         thread_roots, next_batch = await self._main_store.get_threads(
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index ddb7397714..a58668a380 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -259,6 +259,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
 
         if relates_to:
             self._attempt_to_invalidate_cache("get_relations_for_event", (relates_to,))
+            self._attempt_to_invalidate_cache("get_references_for_event", (relates_to,))
             self._attempt_to_invalidate_cache(
                 "get_aggregation_groups_for_event", (relates_to,)
             )
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index d68f127f9b..0f097a2927 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2049,6 +2049,10 @@ class PersistEventsStore:
             self.store._invalidate_cache_and_stream(
                 txn, self.store.get_aggregation_groups_for_event, (redacted_relates_to,)
             )
+        if rel_type == RelationTypes.REFERENCE:
+            self.store._invalidate_cache_and_stream(
+                txn, self.store.get_references_for_event, (redacted_relates_to,)
+            )
         if rel_type == RelationTypes.REPLACE:
             self.store._invalidate_cache_and_stream(
                 txn, self.store.get_applicable_edit, (redacted_relates_to,)
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index f96a16956a..aea96e9d24 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -82,8 +82,6 @@ class _RelatedEvent:
     event_id: str
     # The sender of the related event.
     sender: str
-    topological_ordering: Optional[int]
-    stream_ordering: int
 
 
 class RelationsWorkerStore(SQLBaseStore):
@@ -246,13 +244,17 @@ class RelationsWorkerStore(SQLBaseStore):
             txn.execute(sql, where_args + [limit + 1])
 
             events = []
-            for event_id, relation_type, sender, topo_ordering, stream_ordering in txn:
+            topo_orderings: List[int] = []
+            stream_orderings: List[int] = []
+            for event_id, relation_type, sender, topo_ordering, stream_ordering in cast(
+                List[Tuple[str, str, str, int, int]], txn
+            ):
                 # Do not include edits for redacted events as they leak event
                 # content.
                 if not is_redacted or relation_type != RelationTypes.REPLACE:
-                    events.append(
-                        _RelatedEvent(event_id, sender, topo_ordering, stream_ordering)
-                    )
+                    events.append(_RelatedEvent(event_id, sender))
+                    topo_orderings.append(topo_ordering)
+                    stream_orderings.append(stream_ordering)
 
             # If there are more events, generate the next pagination key from the
             # last event returned.
@@ -261,9 +263,11 @@ class RelationsWorkerStore(SQLBaseStore):
                 # Instead of using the last row (which tells us there is more
                 # data), use the last row to be returned.
                 events = events[:limit]
+                topo_orderings = topo_orderings[:limit]
+                stream_orderings = stream_orderings[:limit]
 
-                topo = events[-1].topological_ordering
-                token = events[-1].stream_ordering
+                topo = topo_orderings[-1]
+                token = stream_orderings[-1]
                 if direction == "b":
                     # Tokens are positions between events.
                     # This token points *after* the last event in the chunk.
@@ -530,6 +534,60 @@ class RelationsWorkerStore(SQLBaseStore):
             "get_aggregation_groups_for_users", _get_aggregation_groups_for_users_txn
         )
 
+    @cached()
+    async def get_references_for_event(self, event_id: str) -> List[JsonDict]:
+        raise NotImplementedError()
+
+    @cachedList(cached_method_name="get_references_for_event", list_name="event_ids")
+    async def get_references_for_events(
+        self, event_ids: Collection[str]
+    ) -> Mapping[str, Optional[List[_RelatedEvent]]]:
+        """Get a list of references to the given events.
+
+        Args:
+            event_ids: Fetch events that relate to these event IDs.
+
+        Returns:
+            A map of event IDs to a list of related event IDs (and their senders).
+        """
+
+        clause, args = make_in_list_sql_clause(
+            self.database_engine, "relates_to_id", event_ids
+        )
+        args.append(RelationTypes.REFERENCE)
+
+        sql = f"""
+            SELECT relates_to_id, ref.event_id, ref.sender
+            FROM events AS ref
+            INNER JOIN event_relations USING (event_id)
+            INNER JOIN events AS parent ON
+                parent.event_id = relates_to_id
+                AND parent.room_id = ref.room_id
+            WHERE
+                {clause}
+                AND relation_type = ?
+            ORDER BY ref.topological_ordering, ref.stream_ordering
+        """
+
+        def _get_references_for_events_txn(
+            txn: LoggingTransaction,
+        ) -> Mapping[str, List[_RelatedEvent]]:
+            txn.execute(sql, args)
+
+            result: Dict[str, List[_RelatedEvent]] = {}
+            for relates_to_id, event_id, sender in cast(
+                List[Tuple[str, str, str]], txn
+            ):
+                result.setdefault(relates_to_id, []).append(
+                    _RelatedEvent(event_id, sender)
+                )
+
+            return result
+
+        return await self.db_pool.runInteraction(
+            "_get_references_for_events_txn", _get_references_for_events_txn
+        )
+
     @cached()
     def get_applicable_edit(self, event_id: str) -> Optional[EventBase]:
         raise NotImplementedError()
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index 2d2b683548..b86f341ff5 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -1108,7 +1108,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
 
         # The "user" sent the root event and is making queries for the bundled
         # aggregations: they have participated.
-        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 8)
+        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 7)
         # The "user2" sent replies in the thread and is making queries for the
         # bundled aggregations: they have participated.
         #
@@ -1170,7 +1170,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
                 bundled_aggregations["latest_event"].get("unsigned"),
             )
 
-        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 8)
+        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 7)
 
     def test_nested_thread(self) -> None:
         """
-- 
cgit 1.5.1


From 7eb74600423e00c6982493eed18551d7f294140d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 22 Nov 2022 09:47:32 -0500
Subject: Parallelize calls to fetch bundled aggregations. (#14510)

The bundled aggregations for annotations, references, and edits
can be parallelized.
---
 changelog.d/14510.feature     |  1 +
 synapse/handlers/relations.py | 83 ++++++++++++++++++++++++++-----------------
 2 files changed, 52 insertions(+), 32 deletions(-)
 create mode 100644 changelog.d/14510.feature

(limited to 'synapse')

diff --git a/changelog.d/14510.feature b/changelog.d/14510.feature
new file mode 100644
index 0000000000..4fca7282f7
--- /dev/null
+++ b/changelog.d/14510.feature
@@ -0,0 +1 @@
+Reduce database load of [Client-Server endpoints](https://spec.matrix.org/v1.4/client-server-api/#aggregations) which return bundled aggregations.
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 8414be5879..e96f9999a8 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -20,10 +20,12 @@ import attr
 from synapse.api.constants import EventTypes, RelationTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase, relation_from_event
+from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.logging.opentracing import trace
 from synapse.storage.databases.main.relations import ThreadsNextBatch, _RelatedEvent
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, Requester, UserID
+from synapse.util.async_helpers import gather_results
 from synapse.visibility import filter_events_for_client
 
 if TYPE_CHECKING:
@@ -525,39 +527,56 @@ class RelationsHandler:
                 # (as that is what makes it part of the thread).
                 relations_by_id[latest_thread_event.event_id] = RelationTypes.THREAD
 
-        # Fetch any annotations (ie, reactions) to bundle with this event.
-        annotations_by_event_id = await self.get_annotations_for_events(
-            events_by_id.keys(), ignored_users=ignored_users
-        )
-        for event_id, annotations in annotations_by_event_id.items():
-            if annotations:
-                results.setdefault(event_id, BundledAggregations()).annotations = {
-                    "chunk": annotations
-                }
-
-        # Fetch any references to bundle with this event.
-        references_by_event_id = await self.get_references_for_events(
-            events_by_id.keys(), ignored_users=ignored_users
-        )
-        for event_id, references in references_by_event_id.items():
-            if references:
-                results.setdefault(event_id, BundledAggregations()).references = {
-                    "chunk": [{"event_id": ev.event_id} for ev in references]
-                }
-
-        # Fetch any edits (but not for redacted events).
-        #
-        # Note that there is no use in limiting edits by ignored users since the
-        # parent event should be ignored in the first place if the user is ignored.
-        edits = await self._main_store.get_applicable_edits(
-            [
-                event_id
-                for event_id, event in events_by_id.items()
-                if not event.internal_metadata.is_redacted()
-            ]
+        async def _fetch_annotations() -> None:
+            """Fetch any annotations (ie, reactions) to bundle with this event."""
+            annotations_by_event_id = await self.get_annotations_for_events(
+                events_by_id.keys(), ignored_users=ignored_users
+            )
+            for event_id, annotations in annotations_by_event_id.items():
+                if annotations:
+                    results.setdefault(event_id, BundledAggregations()).annotations = {
+                        "chunk": annotations
+                    }
+
+        async def _fetch_references() -> None:
+            """Fetch any references to bundle with this event."""
+            references_by_event_id = await self.get_references_for_events(
+                events_by_id.keys(), ignored_users=ignored_users
+            )
+            for event_id, references in references_by_event_id.items():
+                if references:
+                    results.setdefault(event_id, BundledAggregations()).references = {
+                        "chunk": [{"event_id": ev.event_id} for ev in references]
+                    }
+
+        async def _fetch_edits() -> None:
+            """
+            Fetch any edits (but not for redacted events).
+
+            Note that there is no use in limiting edits by ignored users since the
+            parent event should be ignored in the first place if the user is ignored.
+            """
+            edits = await self._main_store.get_applicable_edits(
+                [
+                    event_id
+                    for event_id, event in events_by_id.items()
+                    if not event.internal_metadata.is_redacted()
+                ]
+            )
+            for event_id, edit in edits.items():
+                results.setdefault(event_id, BundledAggregations()).replace = edit
+
+        # Parallelize the calls for annotations, references, and edits since they
+        # are unrelated.
+        await make_deferred_yieldable(
+            gather_results(
+                (
+                    run_in_background(_fetch_annotations),
+                    run_in_background(_fetch_references),
+                    run_in_background(_fetch_edits),
+                )
+            )
         )
-        for event_id, edit in edits.items():
-            results.setdefault(event_id, BundledAggregations()).replace = edit
 
         return results
 
-- 
cgit 1.5.1


From 9cae44f49e6bf4f6b8a20ab11a65da417bb1565f Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 22 Nov 2022 16:46:52 +0000
Subject: Track unconverted device list outbound pokes using a position instead
 (#14516)

When a local device list change is added to
`device_lists_changes_in_room`, the `converted_to_destinations` flag is
set to `FALSE` and the `_handle_new_device_update_async` background
process is started. This background process looks for unconverted rows
in `device_lists_changes_in_room`, copies them to
`device_lists_outbound_pokes` and updates the flag.

To update the `converted_to_destinations` flag, the database performs a
`DELETE` and `INSERT` internally, which fragments the table. To avoid
this, track unconverted rows using a `(stream ID, room ID)` position
instead of the flag.

From now on, the `converted_to_destinations` column indicates rows that
need converting to outbound pokes, but does not indicate whether the
conversion has already taken place.

Closes #14037.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14516.misc                             |   1 +
 synapse/handlers/device.py                         |  30 +++++-
 synapse/storage/database.py                        |  13 +--
 synapse/storage/databases/main/devices.py          | 107 +++++++++++++--------
 .../73/12refactor_device_list_outbound_pokes.sql   |  53 ++++++++++
 tests/storage/test_devices.py                      |   3 +-
 6 files changed, 158 insertions(+), 49 deletions(-)
 create mode 100644 changelog.d/14516.misc
 create mode 100644 synapse/storage/schema/main/delta/73/12refactor_device_list_outbound_pokes.sql

(limited to 'synapse')

diff --git a/changelog.d/14516.misc b/changelog.d/14516.misc
new file mode 100644
index 0000000000..51666c6ffc
--- /dev/null
+++ b/changelog.d/14516.misc
@@ -0,0 +1 @@
+Refactor conversion of device list changes in room to outbound pokes to track unconverted rows using a `(stream ID, room ID)` position instead of updating the `converted_to_destinations` flag on every row.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index c597639a7f..da3ddafeae 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -682,13 +682,33 @@ class DeviceHandler(DeviceWorkerHandler):
         hosts_already_sent_to: Set[str] = set()
 
         try:
+            stream_id, room_id = await self.store.get_device_change_last_converted_pos()
+
             while True:
                 self._handle_new_device_update_new_data = False
-                rows = await self.store.get_uncoverted_outbound_room_pokes()
+                max_stream_id = self.store.get_device_stream_token()
+                rows = await self.store.get_uncoverted_outbound_room_pokes(
+                    stream_id, room_id
+                )
                 if not rows:
                     # If the DB returned nothing then there is nothing left to
                     # do, *unless* a new device list update happened during the
                     # DB query.
+
+                    # Advance `(stream_id, room_id)`.
+                    # `max_stream_id` comes from *before* the query for unconverted
+                    # rows, which means that any unconverted rows must have a larger
+                    # stream ID.
+                    if max_stream_id > stream_id:
+                        stream_id, room_id = max_stream_id, ""
+                        await self.store.set_device_change_last_converted_pos(
+                            stream_id, room_id
+                        )
+                    else:
+                        assert max_stream_id == stream_id
+                        # Avoid moving `room_id` backwards.
+                        pass
+
                     if self._handle_new_device_update_new_data:
                         continue
                     else:
@@ -718,7 +738,6 @@ class DeviceHandler(DeviceWorkerHandler):
                         user_id=user_id,
                         device_id=device_id,
                         room_id=room_id,
-                        stream_id=stream_id,
                         hosts=hosts,
                         context=opentracing_context,
                     )
@@ -752,6 +771,12 @@ class DeviceHandler(DeviceWorkerHandler):
                     hosts_already_sent_to.update(hosts)
                     current_stream_id = stream_id
 
+                # Advance `(stream_id, room_id)`.
+                _, _, room_id, stream_id, _ = rows[-1]
+                await self.store.set_device_change_last_converted_pos(
+                    stream_id, room_id
+                )
+
         finally:
             self._handle_new_device_update_is_processing = False
 
@@ -834,7 +859,6 @@ class DeviceHandler(DeviceWorkerHandler):
                 user_id=user_id,
                 device_id=device_id,
                 room_id=room_id,
-                stream_id=None,
                 hosts=potentially_changed_hosts,
                 context=None,
             )
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 0dc44b246c..a14b13aec8 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -2075,13 +2075,14 @@ class DatabasePool:
         retcols: Collection[str],
         allow_none: bool = False,
     ) -> Optional[Dict[str, Any]]:
-        select_sql = "SELECT %s FROM %s WHERE %s" % (
-            ", ".join(retcols),
-            table,
-            " AND ".join("%s = ?" % (k,) for k in keyvalues),
-        )
+        select_sql = "SELECT %s FROM %s" % (", ".join(retcols), table)
+
+        if keyvalues:
+            select_sql += " WHERE %s" % (" AND ".join("%s = ?" % k for k in keyvalues),)
+            txn.execute(select_sql, list(keyvalues.values()))
+        else:
+            txn.execute(select_sql)
 
-        txn.execute(select_sql, list(keyvalues.values()))
         row = txn.fetchone()
 
         if not row:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 57230df5ae..37629115ab 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -2008,27 +2008,48 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         )
 
     async def get_uncoverted_outbound_room_pokes(
-        self, limit: int = 10
+        self, start_stream_id: int, start_room_id: str, limit: int = 10
     ) -> List[Tuple[str, str, str, int, Optional[Dict[str, str]]]]:
         """Get device list changes by room that have not yet been handled and
         written to `device_lists_outbound_pokes`.
 
+        Args:
+            start_stream_id: Together with `start_room_id`, indicates the position after
+                which to return device list changes.
+            start_room_id: Together with `start_stream_id`, indicates the position after
+                which to return device list changes.
+            limit: The maximum number of device list changes to return.
+
         Returns:
-            A list of user ID, device ID, room ID, stream ID and optional opentracing context.
+            A list of user ID, device ID, room ID, stream ID and optional opentracing
+            context, in order of ascending (stream ID, room ID).
         """
 
         sql = """
             SELECT user_id, device_id, room_id, stream_id, opentracing_context
             FROM device_lists_changes_in_room
-            WHERE NOT converted_to_destinations
-            ORDER BY stream_id
+            WHERE
+                (stream_id, room_id) > (?, ?) AND
+                stream_id <= ? AND
+                NOT converted_to_destinations
+            ORDER BY stream_id ASC, room_id ASC
             LIMIT ?
         """
 
         def get_uncoverted_outbound_room_pokes_txn(
             txn: LoggingTransaction,
         ) -> List[Tuple[str, str, str, int, Optional[Dict[str, str]]]]:
-            txn.execute(sql, (limit,))
+            txn.execute(
+                sql,
+                (
+                    start_stream_id,
+                    start_room_id,
+                    # Avoid returning rows if there may be uncommitted device list
+                    # changes with smaller stream IDs.
+                    self._device_list_id_gen.get_current_token(),
+                    limit,
+                ),
+            )
 
             return [
                 (
@@ -2050,49 +2071,25 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         user_id: str,
         device_id: str,
         room_id: str,
-        stream_id: Optional[int],
         hosts: Collection[str],
         context: Optional[Dict[str, str]],
     ) -> None:
         """Queue the device update to be sent to the given set of hosts,
         calculated from the room ID.
-
-        Marks the associated row in `device_lists_changes_in_room` as handled,
-        if `stream_id` is provided.
         """
+        if not hosts:
+            return
 
         def add_device_list_outbound_pokes_txn(
             txn: LoggingTransaction, stream_ids: List[int]
         ) -> None:
-            if hosts:
-                self._add_device_outbound_poke_to_stream_txn(
-                    txn,
-                    user_id=user_id,
-                    device_id=device_id,
-                    hosts=hosts,
-                    stream_ids=stream_ids,
-                    context=context,
-                )
-
-            if stream_id:
-                self.db_pool.simple_update_txn(
-                    txn,
-                    table="device_lists_changes_in_room",
-                    keyvalues={
-                        "user_id": user_id,
-                        "device_id": device_id,
-                        "stream_id": stream_id,
-                        "room_id": room_id,
-                    },
-                    updatevalues={"converted_to_destinations": True},
-                )
-
-        if not hosts:
-            # If there are no hosts then we don't try and generate stream IDs.
-            return await self.db_pool.runInteraction(
-                "add_device_list_outbound_pokes",
-                add_device_list_outbound_pokes_txn,
-                [],
+            self._add_device_outbound_poke_to_stream_txn(
+                txn,
+                user_id=user_id,
+                device_id=device_id,
+                hosts=hosts,
+                stream_ids=stream_ids,
+                context=context,
             )
 
         async with self._device_list_id_gen.get_next_mult(len(hosts)) as stream_ids:
@@ -2156,3 +2153,37 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             "get_pending_remote_device_list_updates_for_room",
             get_pending_remote_device_list_updates_for_room_txn,
         )
+
+    async def get_device_change_last_converted_pos(self) -> Tuple[int, str]:
+        """
+        Get the position of the last row in `device_list_changes_in_room` that has been
+        converted to `device_lists_outbound_pokes`.
+
+        Rows with a strictly greater position where `converted_to_destinations` is
+        `FALSE` have not been converted.
+        """
+
+        row = await self.db_pool.simple_select_one(
+            table="device_lists_changes_converted_stream_position",
+            keyvalues={},
+            retcols=["stream_id", "room_id"],
+            desc="get_device_change_last_converted_pos",
+        )
+        return row["stream_id"], row["room_id"]
+
+    async def set_device_change_last_converted_pos(
+        self,
+        stream_id: int,
+        room_id: str,
+    ) -> None:
+        """
+        Set the position of the last row in `device_list_changes_in_room` that has been
+        converted to `device_lists_outbound_pokes`.
+        """
+
+        await self.db_pool.simple_update_one(
+            table="device_lists_changes_converted_stream_position",
+            keyvalues={},
+            updatevalues={"stream_id": stream_id, "room_id": room_id},
+            desc="set_device_change_last_converted_pos",
+        )
diff --git a/synapse/storage/schema/main/delta/73/12refactor_device_list_outbound_pokes.sql b/synapse/storage/schema/main/delta/73/12refactor_device_list_outbound_pokes.sql
new file mode 100644
index 0000000000..93d7fcb79b
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/12refactor_device_list_outbound_pokes.sql
@@ -0,0 +1,53 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Prior to this schema delta, we tracked the set of unconverted rows in
+-- `device_lists_changes_in_room` using the `converted_to_destinations` flag. When rows
+-- were converted to `device_lists_outbound_pokes`, the `converted_to_destinations` flag
+-- would be set.
+--
+-- After this schema delta, the `converted_to_destinations` is still populated like
+-- before, but the set of unconverted rows is determined by the `stream_id` in the new
+-- `device_lists_changes_converted_stream_position` table.
+--
+-- If rolled back, Synapse will re-send all device list changes that happened since the
+-- schema delta.
+
+CREATE TABLE IF NOT EXISTS device_lists_changes_converted_stream_position(
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    -- The (stream id, room id) of the last row in `device_lists_changes_in_room` that
+    -- has been converted to `device_lists_outbound_pokes`. Rows with a strictly larger
+    -- (stream id, room id) where `converted_to_destinations` is `FALSE` have not been
+    -- converted.
+    stream_id BIGINT NOT NULL,
+    -- `room_id` may be an empty string, which compares less than all valid room IDs.
+    room_id TEXT NOT NULL,
+    CHECK (Lock='X')
+);
+
+INSERT INTO device_lists_changes_converted_stream_position (stream_id, room_id) VALUES (
+    (
+        SELECT COALESCE(
+            -- The last converted stream id is the smallest unconverted stream id minus
+            -- one.
+            MIN(stream_id) - 1,
+            -- If there is no unconverted stream id, the last converted stream id is the
+            -- largest stream id.
+            -- Otherwise, pick 1, since stream ids start at 2.
+            (SELECT COALESCE(MAX(stream_id), 1) FROM device_lists_changes_in_room)
+        ) FROM device_lists_changes_in_room WHERE NOT converted_to_destinations
+    ),
+    ''
+);
diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py
index f37505b6cf..8e7db2c4ec 100644
--- a/tests/storage/test_devices.py
+++ b/tests/storage/test_devices.py
@@ -28,7 +28,7 @@ class DeviceStoreTestCase(HomeserverTestCase):
         """
 
         for device_id in device_ids:
-            stream_id = self.get_success(
+            self.get_success(
                 self.store.add_device_change_to_streams(
                     user_id, [device_id], ["!some:room"]
                 )
@@ -39,7 +39,6 @@ class DeviceStoreTestCase(HomeserverTestCase):
                     user_id=user_id,
                     device_id=device_id,
                     room_id="!some:room",
-                    stream_id=stream_id,
                     hosts=[host],
                     context={},
                 )
-- 
cgit 1.5.1


From 6d47b7e32589e816eb766446cc1ff19ea73fc7c1 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 22 Nov 2022 14:08:04 -0500
Subject: Add a type hint for `get_device_handler()` and fix incorrect types.
 (#14055)

This was the last untyped handler from the HomeServer object. Since
it was being treated as Any (and thus unchecked) it was being used
incorrectly in a few places.
---
 changelog.d/14055.misc                 |  1 +
 synapse/handlers/deactivate_account.py |  4 +++
 synapse/handlers/device.py             | 65 ++++++++++++++++++++++++++--------
 synapse/handlers/e2e_keys.py           | 61 ++++++++++++++++---------------
 synapse/handlers/register.py           |  4 +++
 synapse/handlers/set_password.py       |  6 +++-
 synapse/handlers/sso.py                |  9 +++++
 synapse/module_api/__init__.py         | 10 +++++-
 synapse/replication/http/devices.py    | 11 ++++--
 synapse/rest/admin/__init__.py         | 26 ++++++++------
 synapse/rest/admin/devices.py          | 13 +++++--
 synapse/rest/client/devices.py         | 17 ++++++---
 synapse/rest/client/logout.py          |  9 +++--
 synapse/server.py                      |  2 +-
 tests/handlers/test_device.py          | 19 ++++++----
 tests/rest/admin/test_device.py        |  5 ++-
 16 files changed, 185 insertions(+), 77 deletions(-)
 create mode 100644 changelog.d/14055.misc

(limited to 'synapse')

diff --git a/changelog.d/14055.misc b/changelog.d/14055.misc
new file mode 100644
index 0000000000..02980bc528
--- /dev/null
+++ b/changelog.d/14055.misc
@@ -0,0 +1 @@
+Add missing type hints to `HomeServer`.
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index 816e1a6d79..d74d135c0c 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -16,6 +16,7 @@ import logging
 from typing import TYPE_CHECKING, Optional
 
 from synapse.api.errors import SynapseError
+from synapse.handlers.device import DeviceHandler
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.types import Codes, Requester, UserID, create_requester
 
@@ -76,6 +77,9 @@ class DeactivateAccountHandler:
             True if identity server supports removing threepids, otherwise False.
         """
 
+        # This can only be called on the main process.
+        assert isinstance(self._device_handler, DeviceHandler)
+
         # Check if this user can be deactivated
         if not await self._third_party_rules.check_can_deactivate_user(
             user_id, by_admin
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index da3ddafeae..b1e55e1b9e 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -65,6 +65,8 @@ DELETE_STALE_DEVICES_INTERVAL_MS = 24 * 60 * 60 * 1000
 
 
 class DeviceWorkerHandler:
+    device_list_updater: "DeviceListWorkerUpdater"
+
     def __init__(self, hs: "HomeServer"):
         self.clock = hs.get_clock()
         self.hs = hs
@@ -76,6 +78,8 @@ class DeviceWorkerHandler:
         self.server_name = hs.hostname
         self._msc3852_enabled = hs.config.experimental.msc3852_enabled
 
+        self.device_list_updater = DeviceListWorkerUpdater(hs)
+
     @trace
     async def get_devices_by_user(self, user_id: str) -> List[JsonDict]:
         """
@@ -99,6 +103,19 @@ class DeviceWorkerHandler:
         log_kv(device_map)
         return devices
 
+    async def get_dehydrated_device(
+        self, user_id: str
+    ) -> Optional[Tuple[str, JsonDict]]:
+        """Retrieve the information for a dehydrated device.
+
+        Args:
+            user_id: the user whose dehydrated device we are looking for
+        Returns:
+            a tuple whose first item is the device ID, and the second item is
+            the dehydrated device information
+        """
+        return await self.store.get_dehydrated_device(user_id)
+
     @trace
     async def get_device(self, user_id: str, device_id: str) -> JsonDict:
         """Retrieve the given device
@@ -127,7 +144,7 @@ class DeviceWorkerHandler:
     @cancellable
     async def get_device_changes_in_shared_rooms(
         self, user_id: str, room_ids: Collection[str], from_token: StreamToken
-    ) -> Collection[str]:
+    ) -> Set[str]:
         """Get the set of users whose devices have changed who share a room with
         the given user.
         """
@@ -320,6 +337,8 @@ class DeviceWorkerHandler:
 
 
 class DeviceHandler(DeviceWorkerHandler):
+    device_list_updater: "DeviceListUpdater"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
 
@@ -606,19 +625,6 @@ class DeviceHandler(DeviceWorkerHandler):
             await self.delete_devices(user_id, [old_device_id])
         return device_id
 
-    async def get_dehydrated_device(
-        self, user_id: str
-    ) -> Optional[Tuple[str, JsonDict]]:
-        """Retrieve the information for a dehydrated device.
-
-        Args:
-            user_id: the user whose dehydrated device we are looking for
-        Returns:
-            a tuple whose first item is the device ID, and the second item is
-            the dehydrated device information
-        """
-        return await self.store.get_dehydrated_device(user_id)
-
     async def rehydrate_device(
         self, user_id: str, access_token: str, device_id: str
     ) -> dict:
@@ -882,7 +888,36 @@ def _update_device_from_client_ips(
     )
 
 
-class DeviceListUpdater:
+class DeviceListWorkerUpdater:
+    "Handles incoming device list updates from federation and contacts the main process over replication"
+
+    def __init__(self, hs: "HomeServer"):
+        from synapse.replication.http.devices import (
+            ReplicationUserDevicesResyncRestServlet,
+        )
+
+        self._user_device_resync_client = (
+            ReplicationUserDevicesResyncRestServlet.make_client(hs)
+        )
+
+    async def user_device_resync(
+        self, user_id: str, mark_failed_as_stale: bool = True
+    ) -> Optional[JsonDict]:
+        """Fetches all devices for a user and updates the device cache with them.
+
+        Args:
+            user_id: The user's id whose device_list will be updated.
+            mark_failed_as_stale: Whether to mark the user's device list as stale
+                if the attempt to resync failed.
+        Returns:
+            A dict with device info as under the "devices" in the result of this
+            request:
+            https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
+        """
+        return await self._user_device_resync_client(user_id=user_id)
+
+
+class DeviceListUpdater(DeviceListWorkerUpdater):
     "Handles incoming device list updates from federation and updates the DB"
 
     def __init__(self, hs: "HomeServer", device_handler: DeviceHandler):
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index bf1221f523..5fe102e2f2 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -27,9 +27,9 @@ from twisted.internet import defer
 
 from synapse.api.constants import EduTypes
 from synapse.api.errors import CodeMessageException, Codes, NotFoundError, SynapseError
+from synapse.handlers.device import DeviceHandler
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.logging.opentracing import log_kv, set_tag, tag_args, trace
-from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet
 from synapse.types import (
     JsonDict,
     UserID,
@@ -56,27 +56,23 @@ class E2eKeysHandler:
         self.is_mine = hs.is_mine
         self.clock = hs.get_clock()
 
-        self._edu_updater = SigningKeyEduUpdater(hs, self)
-
         federation_registry = hs.get_federation_registry()
 
-        self._is_master = hs.config.worker.worker_app is None
-        if not self._is_master:
-            self._user_device_resync_client = (
-                ReplicationUserDevicesResyncRestServlet.make_client(hs)
-            )
-        else:
+        is_master = hs.config.worker.worker_app is None
+        if is_master:
+            edu_updater = SigningKeyEduUpdater(hs)
+
             # Only register this edu handler on master as it requires writing
             # device updates to the db
             federation_registry.register_edu_handler(
                 EduTypes.SIGNING_KEY_UPDATE,
-                self._edu_updater.incoming_signing_key_update,
+                edu_updater.incoming_signing_key_update,
             )
             # also handle the unstable version
             # FIXME: remove this when enough servers have upgraded
             federation_registry.register_edu_handler(
                 EduTypes.UNSTABLE_SIGNING_KEY_UPDATE,
-                self._edu_updater.incoming_signing_key_update,
+                edu_updater.incoming_signing_key_update,
             )
 
         # doesn't really work as part of the generic query API, because the
@@ -319,14 +315,13 @@ class E2eKeysHandler:
             # probably be tracking their device lists. However, we haven't
             # done an initial sync on the device list so we do it now.
             try:
-                if self._is_master:
-                    resync_results = await self.device_handler.device_list_updater.user_device_resync(
+                resync_results = (
+                    await self.device_handler.device_list_updater.user_device_resync(
                         user_id
                     )
-                else:
-                    resync_results = await self._user_device_resync_client(
-                        user_id=user_id
-                    )
+                )
+                if resync_results is None:
+                    raise ValueError("Device resync failed")
 
                 # Add the device keys to the results.
                 user_devices = resync_results["devices"]
@@ -605,6 +600,8 @@ class E2eKeysHandler:
     async def upload_keys_for_user(
         self, user_id: str, device_id: str, keys: JsonDict
     ) -> JsonDict:
+        # This can only be called from the main process.
+        assert isinstance(self.device_handler, DeviceHandler)
 
         time_now = self.clock.time_msec()
 
@@ -732,6 +729,8 @@ class E2eKeysHandler:
             user_id: the user uploading the keys
             keys: the signing keys
         """
+        # This can only be called from the main process.
+        assert isinstance(self.device_handler, DeviceHandler)
 
         # if a master key is uploaded, then check it.  Otherwise, load the
         # stored master key, to check signatures on other keys
@@ -823,6 +822,9 @@ class E2eKeysHandler:
         Raises:
             SynapseError: if the signatures dict is not valid.
         """
+        # This can only be called from the main process.
+        assert isinstance(self.device_handler, DeviceHandler)
+
         failures = {}
 
         # signatures to be stored.  Each item will be a SignatureListItem
@@ -1200,6 +1202,9 @@ class E2eKeysHandler:
             A tuple of the retrieved key content, the key's ID and the matching VerifyKey.
             If the key cannot be retrieved, all values in the tuple will instead be None.
         """
+        # This can only be called from the main process.
+        assert isinstance(self.device_handler, DeviceHandler)
+
         try:
             remote_result = await self.federation.query_user_devices(
                 user.domain, user.to_string()
@@ -1396,11 +1401,14 @@ class SignatureListItem:
 class SigningKeyEduUpdater:
     """Handles incoming signing key updates from federation and updates the DB"""
 
-    def __init__(self, hs: "HomeServer", e2e_keys_handler: E2eKeysHandler):
+    def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastores().main
         self.federation = hs.get_federation_client()
         self.clock = hs.get_clock()
-        self.e2e_keys_handler = e2e_keys_handler
+
+        device_handler = hs.get_device_handler()
+        assert isinstance(device_handler, DeviceHandler)
+        self._device_handler = device_handler
 
         self._remote_edu_linearizer = Linearizer(name="remote_signing_key")
 
@@ -1445,9 +1453,6 @@ class SigningKeyEduUpdater:
             user_id: the user whose updates we are processing
         """
 
-        device_handler = self.e2e_keys_handler.device_handler
-        device_list_updater = device_handler.device_list_updater
-
         async with self._remote_edu_linearizer.queue(user_id):
             pending_updates = self._pending_updates.pop(user_id, [])
             if not pending_updates:
@@ -1459,13 +1464,11 @@ class SigningKeyEduUpdater:
             logger.info("pending updates: %r", pending_updates)
 
             for master_key, self_signing_key in pending_updates:
-                new_device_ids = (
-                    await device_list_updater.process_cross_signing_key_update(
-                        user_id,
-                        master_key,
-                        self_signing_key,
-                    )
+                new_device_ids = await self._device_handler.device_list_updater.process_cross_signing_key_update(
+                    user_id,
+                    master_key,
+                    self_signing_key,
                 )
                 device_ids = device_ids + new_device_ids
 
-            await device_handler.notify_device_update(user_id, device_ids)
+            await self._device_handler.notify_device_update(user_id, device_ids)
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index ca1c7a1866..6307fa9c5d 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -38,6 +38,7 @@ from synapse.api.errors import (
 )
 from synapse.appservice import ApplicationService
 from synapse.config.server import is_threepid_reserved
+from synapse.handlers.device import DeviceHandler
 from synapse.http.servlet import assert_params_in_dict
 from synapse.replication.http.login import RegisterDeviceReplicationServlet
 from synapse.replication.http.register import (
@@ -841,6 +842,9 @@ class RegistrationHandler:
         refresh_token = None
         refresh_token_id = None
 
+        # This can only run on the main process.
+        assert isinstance(self.device_handler, DeviceHandler)
+
         registered_device_id = await self.device_handler.check_device_registered(
             user_id,
             device_id,
diff --git a/synapse/handlers/set_password.py b/synapse/handlers/set_password.py
index 73861bbd40..bd9d0bb34b 100644
--- a/synapse/handlers/set_password.py
+++ b/synapse/handlers/set_password.py
@@ -15,6 +15,7 @@ import logging
 from typing import TYPE_CHECKING, Optional
 
 from synapse.api.errors import Codes, StoreError, SynapseError
+from synapse.handlers.device import DeviceHandler
 from synapse.types import Requester
 
 if TYPE_CHECKING:
@@ -29,7 +30,10 @@ class SetPasswordHandler:
     def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastores().main
         self._auth_handler = hs.get_auth_handler()
-        self._device_handler = hs.get_device_handler()
+        # This can only be instantiated on the main process.
+        device_handler = hs.get_device_handler()
+        assert isinstance(device_handler, DeviceHandler)
+        self._device_handler = device_handler
 
     async def set_password(
         self,
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 749d7e93b0..e1c0bff1b2 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -37,6 +37,7 @@ from twisted.web.server import Request
 from synapse.api.constants import LoginType
 from synapse.api.errors import Codes, NotFoundError, RedirectException, SynapseError
 from synapse.config.sso import SsoAttributeRequirement
+from synapse.handlers.device import DeviceHandler
 from synapse.handlers.register import init_counters_for_auth_provider
 from synapse.handlers.ui_auth import UIAuthSessionDataConstants
 from synapse.http import get_request_user_agent
@@ -1035,6 +1036,8 @@ class SsoHandler:
     ) -> None:
         """Revoke any devices and in-flight logins tied to a provider session.
 
+        Can only be called from the main process.
+
         Args:
             auth_provider_id: A unique identifier for this SSO provider, e.g.
                 "oidc" or "saml".
@@ -1042,6 +1045,12 @@ class SsoHandler:
             expected_user_id: The user we're expecting to logout. If set, it will ignore
                 sessions belonging to other users and log an error.
         """
+
+        # It is expected that this is the main process.
+        assert isinstance(
+            self._device_handler, DeviceHandler
+        ), "revoking SSO sessions can only be called on the main process"
+
         # Invalidate any running user-mapping sessions
         to_delete = []
         for session_id, session in self._username_mapping_sessions.items():
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 1adc1fd64f..96a661177a 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -86,6 +86,7 @@ from synapse.handlers.auth import (
     ON_LOGGED_OUT_CALLBACK,
     AuthHandler,
 )
+from synapse.handlers.device import DeviceHandler
 from synapse.handlers.push_rules import RuleSpec, check_actions
 from synapse.http.client import SimpleHttpClient
 from synapse.http.server import (
@@ -207,6 +208,7 @@ class ModuleApi:
         self._registration_handler = hs.get_registration_handler()
         self._send_email_handler = hs.get_send_email_handler()
         self._push_rules_handler = hs.get_push_rules_handler()
+        self._device_handler = hs.get_device_handler()
         self.custom_template_dir = hs.config.server.custom_template_directory
 
         try:
@@ -784,6 +786,8 @@ class ModuleApi:
     ) -> Generator["defer.Deferred[Any]", Any, None]:
         """Invalidate an access token for a user
 
+        Can only be called from the main process.
+
         Added in Synapse v0.25.0.
 
         Args:
@@ -796,6 +800,10 @@ class ModuleApi:
         Raises:
             synapse.api.errors.AuthError: the access token is invalid
         """
+        assert isinstance(
+            self._device_handler, DeviceHandler
+        ), "invalidate_access_token can only be called on the main process"
+
         # see if the access token corresponds to a device
         user_info = yield defer.ensureDeferred(
             self._auth.get_user_by_access_token(access_token)
@@ -805,7 +813,7 @@ class ModuleApi:
         if device_id:
             # delete the device, which will also delete its access tokens
             yield defer.ensureDeferred(
-                self._hs.get_device_handler().delete_devices(user_id, [device_id])
+                self._device_handler.delete_devices(user_id, [device_id])
             )
         else:
             # no associated device. Just delete the access token.
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index c21629def8..7c4941c3d3 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, Optional, Tuple
 
 from twisted.web.server import Request
 
@@ -63,7 +63,12 @@ class ReplicationUserDevicesResyncRestServlet(ReplicationEndpoint):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
 
-        self.device_list_updater = hs.get_device_handler().device_list_updater
+        from synapse.handlers.device import DeviceHandler
+
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_list_updater = handler.device_list_updater
+
         self.store = hs.get_datastores().main
         self.clock = hs.get_clock()
 
@@ -73,7 +78,7 @@ class ReplicationUserDevicesResyncRestServlet(ReplicationEndpoint):
 
     async def _handle_request(  # type: ignore[override]
         self, request: Request, user_id: str
-    ) -> Tuple[int, JsonDict]:
+    ) -> Tuple[int, Optional[JsonDict]]:
         user_devices = await self.device_list_updater.user_device_resync(user_id)
 
         return 200, user_devices
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index c62ea22116..fb73886df0 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -238,6 +238,10 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     """
     Register all the admin servlets.
     """
+    # Admin servlets aren't registered on workers.
+    if hs.config.worker.worker_app is not None:
+        return
+
     register_servlets_for_client_rest_resource(hs, http_server)
     BlockRoomRestServlet(hs).register(http_server)
     ListRoomRestServlet(hs).register(http_server)
@@ -254,9 +258,6 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     UserTokenRestServlet(hs).register(http_server)
     UserRestServletV2(hs).register(http_server)
     UsersRestServletV2(hs).register(http_server)
-    DeviceRestServlet(hs).register(http_server)
-    DevicesRestServlet(hs).register(http_server)
-    DeleteDevicesRestServlet(hs).register(http_server)
     UserMediaStatisticsRestServlet(hs).register(http_server)
     EventReportDetailRestServlet(hs).register(http_server)
     EventReportsRestServlet(hs).register(http_server)
@@ -280,12 +281,13 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     UserByExternalId(hs).register(http_server)
     UserByThreePid(hs).register(http_server)
 
-    # Some servlets only get registered for the main process.
-    if hs.config.worker.worker_app is None:
-        SendServerNoticeServlet(hs).register(http_server)
-        BackgroundUpdateEnabledRestServlet(hs).register(http_server)
-        BackgroundUpdateRestServlet(hs).register(http_server)
-        BackgroundUpdateStartJobRestServlet(hs).register(http_server)
+    DeviceRestServlet(hs).register(http_server)
+    DevicesRestServlet(hs).register(http_server)
+    DeleteDevicesRestServlet(hs).register(http_server)
+    SendServerNoticeServlet(hs).register(http_server)
+    BackgroundUpdateEnabledRestServlet(hs).register(http_server)
+    BackgroundUpdateRestServlet(hs).register(http_server)
+    BackgroundUpdateStartJobRestServlet(hs).register(http_server)
 
 
 def register_servlets_for_client_rest_resource(
@@ -294,9 +296,11 @@ def register_servlets_for_client_rest_resource(
     """Register only the servlets which need to be exposed on /_matrix/client/xxx"""
     WhoisRestServlet(hs).register(http_server)
     PurgeHistoryStatusRestServlet(hs).register(http_server)
-    DeactivateAccountRestServlet(hs).register(http_server)
     PurgeHistoryRestServlet(hs).register(http_server)
-    ResetPasswordRestServlet(hs).register(http_server)
+    # The following resources can only be run on the main process.
+    if hs.config.worker.worker_app is None:
+        DeactivateAccountRestServlet(hs).register(http_server)
+        ResetPasswordRestServlet(hs).register(http_server)
     SearchUsersRestServlet(hs).register(http_server)
     UserRegisterServlet(hs).register(http_server)
     AccountValidityRenewServlet(hs).register(http_server)
diff --git a/synapse/rest/admin/devices.py b/synapse/rest/admin/devices.py
index d934880102..3b2f2d9abb 100644
--- a/synapse/rest/admin/devices.py
+++ b/synapse/rest/admin/devices.py
@@ -16,6 +16,7 @@ from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
 from synapse.api.errors import NotFoundError, SynapseError
+from synapse.handlers.device import DeviceHandler
 from synapse.http.servlet import (
     RestServlet,
     assert_params_in_dict,
@@ -43,7 +44,9 @@ class DeviceRestServlet(RestServlet):
     def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.auth = hs.get_auth()
-        self.device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_handler = handler
         self.store = hs.get_datastores().main
         self.is_mine = hs.is_mine
 
@@ -112,7 +115,9 @@ class DevicesRestServlet(RestServlet):
 
     def __init__(self, hs: "HomeServer"):
         self.auth = hs.get_auth()
-        self.device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_handler = handler
         self.store = hs.get_datastores().main
         self.is_mine = hs.is_mine
 
@@ -143,7 +148,9 @@ class DeleteDevicesRestServlet(RestServlet):
 
     def __init__(self, hs: "HomeServer"):
         self.auth = hs.get_auth()
-        self.device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_handler = handler
         self.store = hs.get_datastores().main
         self.is_mine = hs.is_mine
 
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 8f3cbd4ea2..69b803f9f8 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -20,6 +20,7 @@ from pydantic import Extra, StrictStr
 
 from synapse.api import errors
 from synapse.api.errors import NotFoundError
+from synapse.handlers.device import DeviceHandler
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     RestServlet,
@@ -80,7 +81,9 @@ class DeleteDevicesRestServlet(RestServlet):
         super().__init__()
         self.hs = hs
         self.auth = hs.get_auth()
-        self.device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_handler = handler
         self.auth_handler = hs.get_auth_handler()
 
     class PostBody(RequestBodyModel):
@@ -125,7 +128,9 @@ class DeviceRestServlet(RestServlet):
         super().__init__()
         self.hs = hs
         self.auth = hs.get_auth()
-        self.device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_handler = handler
         self.auth_handler = hs.get_auth_handler()
         self._msc3852_enabled = hs.config.experimental.msc3852_enabled
 
@@ -256,7 +261,9 @@ class DehydratedDeviceServlet(RestServlet):
         super().__init__()
         self.hs = hs
         self.auth = hs.get_auth()
-        self.device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_handler = handler
 
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
@@ -313,7 +320,9 @@ class ClaimDehydratedDeviceServlet(RestServlet):
         super().__init__()
         self.hs = hs
         self.auth = hs.get_auth()
-        self.device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_handler = handler
 
     class PostBody(RequestBodyModel):
         device_id: StrictStr
diff --git a/synapse/rest/client/logout.py b/synapse/rest/client/logout.py
index 23dfa4518f..6d34625ad5 100644
--- a/synapse/rest/client/logout.py
+++ b/synapse/rest/client/logout.py
@@ -15,6 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, Tuple
 
+from synapse.handlers.device import DeviceHandler
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet
 from synapse.http.site import SynapseRequest
@@ -34,7 +35,9 @@ class LogoutRestServlet(RestServlet):
         super().__init__()
         self.auth = hs.get_auth()
         self._auth_handler = hs.get_auth_handler()
-        self._device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self._device_handler = handler
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_expired=True)
@@ -59,7 +62,9 @@ class LogoutAllRestServlet(RestServlet):
         super().__init__()
         self.auth = hs.get_auth()
         self._auth_handler = hs.get_auth_handler()
-        self._device_handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self._device_handler = handler
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_expired=True)
diff --git a/synapse/server.py b/synapse/server.py
index f0a60d0056..5baae2325e 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -510,7 +510,7 @@ class HomeServer(metaclass=abc.ABCMeta):
         )
 
     @cache_in_self
-    def get_device_handler(self):
+    def get_device_handler(self) -> DeviceWorkerHandler:
         if self.config.worker.worker_app:
             return DeviceWorkerHandler(self)
         else:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index b8b465d35b..ce7525e29c 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -19,7 +19,7 @@ from typing import Optional
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import NotFoundError, SynapseError
-from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN
+from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
 from synapse.server import HomeServer
 from synapse.util import Clock
 
@@ -32,7 +32,9 @@ user2 = "@theresa:bbb"
 class DeviceTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver("server", federation_http_client=None)
-        self.handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.handler = handler
         self.store = hs.get_datastores().main
         return hs
 
@@ -61,6 +63,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
         self.assertEqual(res, "fco")
 
         dev = self.get_success(self.handler.store.get_device("@boris:foo", "fco"))
+        assert dev is not None
         self.assertEqual(dev["display_name"], "display name")
 
     def test_device_is_preserved_if_exists(self) -> None:
@@ -83,6 +86,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
         self.assertEqual(res2, "fco")
 
         dev = self.get_success(self.handler.store.get_device("@boris:foo", "fco"))
+        assert dev is not None
         self.assertEqual(dev["display_name"], "display name")
 
     def test_device_id_is_made_up_if_unspecified(self) -> None:
@@ -95,6 +99,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
         )
 
         dev = self.get_success(self.handler.store.get_device("@theresa:foo", device_id))
+        assert dev is not None
         self.assertEqual(dev["display_name"], "display")
 
     def test_get_devices_by_user(self) -> None:
@@ -264,7 +269,9 @@ class DeviceTestCase(unittest.HomeserverTestCase):
 class DehydrationTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver("server", federation_http_client=None)
-        self.handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.handler = handler
         self.registration = hs.get_registration_handler()
         self.auth = hs.get_auth()
         self.store = hs.get_datastores().main
@@ -284,9 +291,9 @@ class DehydrationTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        retrieved_device_id, device_data = self.get_success(
-            self.handler.get_dehydrated_device(user_id=user_id)
-        )
+        result = self.get_success(self.handler.get_dehydrated_device(user_id=user_id))
+        assert result is not None
+        retrieved_device_id, device_data = result
 
         self.assertEqual(retrieved_device_id, stored_dehydrated_device_id)
         self.assertEqual(device_data, {"device_data": {"foo": "bar"}})
diff --git a/tests/rest/admin/test_device.py b/tests/rest/admin/test_device.py
index d52aee8f92..03f2112b07 100644
--- a/tests/rest/admin/test_device.py
+++ b/tests/rest/admin/test_device.py
@@ -19,6 +19,7 @@ from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
 from synapse.api.errors import Codes
+from synapse.handlers.device import DeviceHandler
 from synapse.rest.client import login
 from synapse.server import HomeServer
 from synapse.util import Clock
@@ -34,7 +35,9 @@ class DeviceRestTestCase(unittest.HomeserverTestCase):
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.handler = hs.get_device_handler()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.handler = handler
 
         self.admin_user = self.register_user("admin", "pass", admin=True)
         self.admin_user_tok = self.login("admin", "pass")
-- 
cgit 1.5.1


From df390a8e676f514f3deecdcc2d12a6cc6b9e8e1d Mon Sep 17 00:00:00 2001
From: realtyem <realtyem@gmail.com>
Date: Tue, 22 Nov 2022 15:33:58 -0600
Subject: Refactor `federation_sender` and `pusher` configuration loading.
 (#14496)

To avoid duplicating the same logic for handling legacy configuration
settings.

This should help in applying similar logic to other worker types.
---
 changelog.d/14496.misc    |   1 +
 synapse/config/workers.py | 139 +++++++++++++++++++++++-----------------------
 2 files changed, 71 insertions(+), 69 deletions(-)
 create mode 100644 changelog.d/14496.misc

(limited to 'synapse')

diff --git a/changelog.d/14496.misc b/changelog.d/14496.misc
new file mode 100644
index 0000000000..57fc6cf452
--- /dev/null
+++ b/changelog.d/14496.misc
@@ -0,0 +1 @@
+Refactor `federation_sender` and `pusher` configuration loading.
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 913b83e174..2580660b6c 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -29,20 +29,6 @@ from ._base import (
 )
 from .server import DIRECT_TCP_ERROR, ListenerConfig, parse_listener_def
 
-_FEDERATION_SENDER_WITH_SEND_FEDERATION_ENABLED_ERROR = """
-The send_federation config option must be disabled in the main
-synapse process before they can be run in a separate worker.
-
-Please add ``send_federation: false`` to the main config
-"""
-
-_PUSHER_WITH_START_PUSHERS_ENABLED_ERROR = """
-The start_pushers config option must be disabled in the main
-synapse process before they can be run in a separate worker.
-
-Please add ``start_pushers: false`` to the main config
-"""
-
 _DEPRECATED_WORKER_DUTY_OPTION_USED = """
 The '%s' configuration option is deprecated and will be removed in a future
 Synapse version. Please use ``%s: name_of_worker`` instead.
@@ -182,40 +168,12 @@ class WorkerConfig(Config):
                 )
             )
 
-        # Handle federation sender configuration.
-        #
-        # There are two ways of configuring which instances handle federation
-        # sending:
-        #   1. The old way where "send_federation" is set to false and running a
-        #      `synapse.app.federation_sender` worker app.
-        #   2. Specifying the workers sending federation in
-        #      `federation_sender_instances`.
-        #
-
-        send_federation = config.get("send_federation", True)
-
-        federation_sender_instances = config.get("federation_sender_instances")
-        if federation_sender_instances is None:
-            # Default to an empty list, which means "another, unknown, worker is
-            # responsible for it".
-            federation_sender_instances = []
-
-            # If no federation sender instances are set we check if
-            # `send_federation` is set, which means use master
-            if send_federation:
-                federation_sender_instances = ["master"]
-
-            if self.worker_app == "synapse.app.federation_sender":
-                if send_federation:
-                    # If we're running federation senders, and not using
-                    # `federation_sender_instances`, then we should have
-                    # explicitly set `send_federation` to false.
-                    raise ConfigError(
-                        _FEDERATION_SENDER_WITH_SEND_FEDERATION_ENABLED_ERROR
-                    )
-
-                federation_sender_instances = [self.worker_name]
-
+        federation_sender_instances = self._worker_names_performing_this_duty(
+            config,
+            "send_federation",
+            "synapse.app.federation_sender",
+            "federation_sender_instances",
+        )
         self.send_federation = self.instance_name in federation_sender_instances
         self.federation_shard_config = ShardedWorkerHandlingConfig(
             federation_sender_instances
@@ -282,27 +240,12 @@ class WorkerConfig(Config):
         )
 
         # Handle sharded push
-        start_pushers = config.get("start_pushers", True)
-        pusher_instances = config.get("pusher_instances")
-        if pusher_instances is None:
-            # Default to an empty list, which means "another, unknown, worker is
-            # responsible for it".
-            pusher_instances = []
-
-            # If no pushers instances are set we check if `start_pushers` is
-            # set, which means use master
-            if start_pushers:
-                pusher_instances = ["master"]
-
-            if self.worker_app == "synapse.app.pusher":
-                if start_pushers:
-                    # If we're running pushers, and not using
-                    # `pusher_instances`, then we should have explicitly set
-                    # `start_pushers` to false.
-                    raise ConfigError(_PUSHER_WITH_START_PUSHERS_ENABLED_ERROR)
-
-                pusher_instances = [self.instance_name]
-
+        pusher_instances = self._worker_names_performing_this_duty(
+            config,
+            "start_pushers",
+            "synapse.app.pusher",
+            "pusher_instances",
+        )
         self.start_pushers = self.instance_name in pusher_instances
         self.pusher_shard_config = ShardedWorkerHandlingConfig(pusher_instances)
 
@@ -425,6 +368,64 @@ class WorkerConfig(Config):
         # (By this point, these are either the same value or only one is not None.)
         return bool(new_option_should_run_here or legacy_option_should_run_here)
 
+    def _worker_names_performing_this_duty(
+        self,
+        config: Dict[str, Any],
+        legacy_option_name: str,
+        legacy_app_name: str,
+        modern_instance_list_name: str,
+    ) -> List[str]:
+        """
+        Retrieves the names of the workers handling a given duty, by either legacy
+        option or instance list.
+
+        There are two ways of configuring which instances handle a given duty, e.g.
+        for configuring pushers:
+
+        1. The old way where "start_pushers" is set to false and running a
+          `synapse.app.pusher'` worker app.
+        2. Specifying the workers sending federation in `pusher_instances`.
+
+        Args:
+            config: settings read from yaml.
+            legacy_option_name: the old way of enabling options. e.g. 'start_pushers'
+            legacy_app_name: The historical app name. e.g. 'synapse.app.pusher'
+            modern_instance_list_name: the string name of the new instance_list. e.g.
+            'pusher_instances'
+
+        Returns:
+            A list of worker instance names handling the given duty.
+        """
+
+        legacy_option = config.get(legacy_option_name, True)
+
+        worker_instances = config.get(modern_instance_list_name)
+        if worker_instances is None:
+            # Default to an empty list, which means "another, unknown, worker is
+            # responsible for it".
+            worker_instances = []
+
+            # If no worker instances are set we check if the legacy option
+            # is set, which means use the main process.
+            if legacy_option:
+                worker_instances = ["master"]
+
+            if self.worker_app == legacy_app_name:
+                if legacy_option:
+                    # If we're using `legacy_app_name`, and not using
+                    # `modern_instance_list_name`, then we should have
+                    # explicitly set `legacy_option_name` to false.
+                    raise ConfigError(
+                        f"The '{legacy_option_name}' config option must be disabled in "
+                        "the main synapse process before they can be run in a separate "
+                        "worker.\n"
+                        f"Please add `{legacy_option_name}: false` to the main config.\n",
+                    )
+
+                worker_instances = [self.worker_name]
+
+        return worker_instances
+
     def read_arguments(self, args: argparse.Namespace) -> None:
         # We support a bunch of command line arguments that override options in
         # the config. A lot of these options have a worker_* prefix when running
-- 
cgit 1.5.1


From 7f78b383ca666c7f49a99b6c5095becb4ed7f1f4 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 22 Nov 2022 15:56:28 -0600
Subject: Optimize `filter_events_for_client` for faster `/messages` - v2
 (#14527)

Fix #14108
---
 changelog.d/14527.misc                        |  1 +
 synapse/storage/databases/state/bg_updates.py | 99 +++++++++++++++++++++------
 2 files changed, 80 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/14527.misc

(limited to 'synapse')

diff --git a/changelog.d/14527.misc b/changelog.d/14527.misc
new file mode 100644
index 0000000000..3c4c7bf07d
--- /dev/null
+++ b/changelog.d/14527.misc
@@ -0,0 +1 @@
+Speed-up `/messages` with `filter_events_for_client` optimizations.
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
index a7fcc564a9..4a4ad0f492 100644
--- a/synapse/storage/databases/state/bg_updates.py
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -93,13 +93,6 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
 
         results: Dict[int, MutableStateMap[str]] = {group: {} for group in groups}
 
-        where_clause, where_args = state_filter.make_sql_filter_clause()
-
-        # Unless the filter clause is empty, we're going to append it after an
-        # existing where clause
-        if where_clause:
-            where_clause = " AND (%s)" % (where_clause,)
-
         if isinstance(self.database_engine, PostgresEngine):
             # Temporarily disable sequential scans in this transaction. This is
             # a temporary hack until we can add the right indices in
@@ -110,31 +103,91 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
             # against `state_groups_state` to fetch the latest state.
             # It assumes that previous state groups are always numerically
             # lesser.
-            # The PARTITION is used to get the event_id in the greatest state
-            # group for the given type, state_key.
             # This may return multiple rows per (type, state_key), but last_value
             # should be the same.
             sql = """
-                WITH RECURSIVE state(state_group) AS (
+                WITH RECURSIVE sgs(state_group) AS (
                     VALUES(?::bigint)
                     UNION ALL
-                    SELECT prev_state_group FROM state_group_edges e, state s
+                    SELECT prev_state_group FROM state_group_edges e, sgs s
                     WHERE s.state_group = e.state_group
                 )
-                SELECT DISTINCT ON (type, state_key)
-                    type, state_key, event_id
-                FROM state_groups_state
-                WHERE state_group IN (
-                    SELECT state_group FROM state
-                ) %s
-                ORDER BY type, state_key, state_group DESC
+                %s
             """
 
+            overall_select_query_args: List[Union[int, str]] = []
+
+            # This is an optimization to create a select clause per-condition. This
+            # makes the query planner a lot smarter on what rows should pull out in the
+            # first place and we end up with something that takes 10x less time to get a
+            # result.
+            use_condition_optimization = (
+                not state_filter.include_others and not state_filter.is_full()
+            )
+            state_filter_condition_combos: List[Tuple[str, Optional[str]]] = []
+            # We don't need to caclculate this list if we're not using the condition
+            # optimization
+            if use_condition_optimization:
+                for etype, state_keys in state_filter.types.items():
+                    if state_keys is None:
+                        state_filter_condition_combos.append((etype, None))
+                    else:
+                        for state_key in state_keys:
+                            state_filter_condition_combos.append((etype, state_key))
+            # And here is the optimization itself. We don't want to do the optimization
+            # if there are too many individual conditions. 10 is an arbitrary number
+            # with no testing behind it but we do know that we specifically made this
+            # optimization for when we grab the necessary state out for
+            # `filter_events_for_client` which just uses 2 conditions
+            # (`EventTypes.RoomHistoryVisibility` and `EventTypes.Member`).
+            if use_condition_optimization and len(state_filter_condition_combos) < 10:
+                select_clause_list: List[str] = []
+                for etype, skey in state_filter_condition_combos:
+                    if skey is None:
+                        where_clause = "(type = ?)"
+                        overall_select_query_args.extend([etype])
+                    else:
+                        where_clause = "(type = ? AND state_key = ?)"
+                        overall_select_query_args.extend([etype, skey])
+
+                    select_clause_list.append(
+                        f"""
+                        (
+                            SELECT DISTINCT ON (type, state_key)
+                                type, state_key, event_id
+                            FROM state_groups_state
+                            INNER JOIN sgs USING (state_group)
+                            WHERE {where_clause}
+                            ORDER BY type, state_key, state_group DESC
+                        )
+                        """
+                    )
+
+                overall_select_clause = " UNION ".join(select_clause_list)
+            else:
+                where_clause, where_args = state_filter.make_sql_filter_clause()
+                # Unless the filter clause is empty, we're going to append it after an
+                # existing where clause
+                if where_clause:
+                    where_clause = " AND (%s)" % (where_clause,)
+
+                overall_select_query_args.extend(where_args)
+
+                overall_select_clause = f"""
+                    SELECT DISTINCT ON (type, state_key)
+                        type, state_key, event_id
+                    FROM state_groups_state
+                    WHERE state_group IN (
+                        SELECT state_group FROM sgs
+                    ) {where_clause}
+                    ORDER BY type, state_key, state_group DESC
+                """
+
             for group in groups:
                 args: List[Union[int, str]] = [group]
-                args.extend(where_args)
+                args.extend(overall_select_query_args)
 
-                txn.execute(sql % (where_clause,), args)
+                txn.execute(sql % (overall_select_clause,), args)
                 for row in txn:
                     typ, state_key, event_id = row
                     key = (intern_string(typ), intern_string(state_key))
@@ -142,6 +195,12 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
         else:
             max_entries_returned = state_filter.max_entries_returned()
 
+            where_clause, where_args = state_filter.make_sql_filter_clause()
+            # Unless the filter clause is empty, we're going to append it after an
+            # existing where clause
+            if where_clause:
+                where_clause = " AND (%s)" % (where_clause,)
+
             # We don't use WITH RECURSIVE on sqlite3 as there are distributions
             # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
             for group in groups:
-- 
cgit 1.5.1


From f38d7d79c8ec5c389c51327737bd517a27826bd6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 23 Nov 2022 14:09:00 +0000
Subject: Add another index to `device_lists_changes_in_room` (#14534)

This helps avoid reading unnecessarily large amounts of data from the
table when querying with a set of room IDs.
---
 changelog.d/14534.misc                               |  1 +
 synapse/storage/databases/main/devices.py            |  7 +++++++
 .../main/delta/73/13add_device_lists_index.sql       | 20 ++++++++++++++++++++
 3 files changed, 28 insertions(+)
 create mode 100644 changelog.d/14534.misc
 create mode 100644 synapse/storage/schema/main/delta/73/13add_device_lists_index.sql

(limited to 'synapse')

diff --git a/changelog.d/14534.misc b/changelog.d/14534.misc
new file mode 100644
index 0000000000..5fe79042e5
--- /dev/null
+++ b/changelog.d/14534.misc
@@ -0,0 +1 @@
+Improve DB performance by reducing amount of data that gets read in `device_lists_changes_in_room`.
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 37629115ab..05a193f889 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1441,6 +1441,13 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
             self._remove_duplicate_outbound_pokes,
         )
 
+        self.db_pool.updates.register_background_index_update(
+            "device_lists_changes_in_room_by_room_index",
+            index_name="device_lists_changes_in_room_by_room_idx",
+            table="device_lists_changes_in_room",
+            columns=["room_id", "stream_id"],
+        )
+
     async def _drop_device_list_streams_non_unique_indexes(
         self, progress: JsonDict, batch_size: int
     ) -> int:
diff --git a/synapse/storage/schema/main/delta/73/13add_device_lists_index.sql b/synapse/storage/schema/main/delta/73/13add_device_lists_index.sql
new file mode 100644
index 0000000000..3725022a13
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/13add_device_lists_index.sql
@@ -0,0 +1,20 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- Adds an index on `device_lists_changes_in_room (room_id, stream_id)`, which
+-- speeds up `/sync` queries.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7313, 'device_lists_changes_in_room_by_room_index', '{}');
-- 
cgit 1.5.1


From 3b4e1508689cc09eba30509249459a64431558fc Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 24 Nov 2022 09:10:47 +0100
Subject: Faster joins: use servers list approximation in `assert_host_in_room`
 (#14515)

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
---
 changelog.d/14515.misc         |  1 +
 synapse/handlers/event_auth.py | 28 +++++++++++++++++-----------
 2 files changed, 18 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/14515.misc

(limited to 'synapse')

diff --git a/changelog.d/14515.misc b/changelog.d/14515.misc
new file mode 100644
index 0000000000..a0effb4dbe
--- /dev/null
+++ b/changelog.d/14515.misc
@@ -0,0 +1 @@
+Faster joins: use servers list approximation received during `send_join` (potentially updated with received membership events) in `assert_host_in_room`.
\ No newline at end of file
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index 3bbad0271b..f91dbbecb7 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -45,6 +45,7 @@ class EventAuthHandler:
     def __init__(self, hs: "HomeServer"):
         self._clock = hs.get_clock()
         self._store = hs.get_datastores().main
+        self._state_storage_controller = hs.get_storage_controllers().state
         self._server_name = hs.hostname
 
     async def check_auth_rules_from_context(
@@ -179,17 +180,22 @@ class EventAuthHandler:
         this function may return an incorrect result as we are not able to fully
         track server membership in a room without full state.
         """
-        if not allow_partial_state_rooms and await self._store.is_partial_state_room(
-            room_id
-        ):
-            raise AuthError(
-                403,
-                "Unable to authorise you right now; room is partial-stated here.",
-                errcode=Codes.UNABLE_DUE_TO_PARTIAL_STATE,
-            )
-
-        if not await self.is_host_in_room(room_id, host):
-            raise AuthError(403, "Host not in room.")
+        if await self._store.is_partial_state_room(room_id):
+            if allow_partial_state_rooms:
+                current_hosts = await self._state_storage_controller.get_current_hosts_in_room_or_partial_state_approximation(
+                    room_id
+                )
+                if host not in current_hosts:
+                    raise AuthError(403, "Host not in room (partial-state approx).")
+            else:
+                raise AuthError(
+                    403,
+                    "Unable to authorise you right now; room is partial-stated here.",
+                    errcode=Codes.UNABLE_DUE_TO_PARTIAL_STATE,
+                )
+        else:
+            if not await self.is_host_in_room(room_id, host):
+                raise AuthError(403, "Host not in room.")
 
     async def check_restricted_join_rules(
         self,
-- 
cgit 1.5.1


From 9af2be192a759c22d189b72cc0a7580cd9de8a37 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 24 Nov 2022 09:09:17 +0000
Subject: Remove legacy Prometheus metrics names. They were deprecated in
 Synapse v1.69.0 and disabled by default in Synapse v1.71.0. (#14538)

---
 changelog.d/14538.removal                        |   1 +
 docs/upgrade.md                                  |  22 ++
 docs/usage/configuration/config_documentation.md |  25 --
 synapse/app/_base.py                             |  16 +-
 synapse/app/generic_worker.py                    |   1 -
 synapse/app/homeserver.py                        |   1 -
 synapse/config/metrics.py                        |   2 -
 synapse/metrics/__init__.py                      |   7 +-
 synapse/metrics/_legacy_exposition.py            | 288 -----------------------
 synapse/metrics/_twisted_exposition.py           |  38 +++
 tests/storage/test_event_metrics.py              |   7 +-
 11 files changed, 70 insertions(+), 338 deletions(-)
 create mode 100644 changelog.d/14538.removal
 delete mode 100644 synapse/metrics/_legacy_exposition.py
 create mode 100644 synapse/metrics/_twisted_exposition.py

(limited to 'synapse')

diff --git a/changelog.d/14538.removal b/changelog.d/14538.removal
new file mode 100644
index 0000000000..d2035ce82a
--- /dev/null
+++ b/changelog.d/14538.removal
@@ -0,0 +1 @@
+Remove legacy Prometheus metrics names. They were deprecated in Synapse v1.69.0 and disabled by default in Synapse v1.71.0.
\ No newline at end of file
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 2aa353e496..4fe9e4f02e 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,28 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.73.0
+
+## Legacy Prometheus metric names have now been removed
+
+Synapse v1.69.0 included the deprecation of legacy Prometheus metric names
+and offered an option to disable them.
+Synapse v1.71.0 disabled legacy Prometheus metric names by default.
+
+This version, v1.73.0, removes those legacy Prometheus metric names entirely.
+This also means that the `enable_legacy_metrics` configuration option has been
+removed; it will no longer be possible to re-enable the legacy metric names.
+
+If you use metrics and have not yet updated your Grafana dashboard(s),
+Prometheus console(s) or alerting rule(s), please consider doing so when upgrading
+to this version.
+Note that the included Grafana dashboard was updated in v1.72.0 to correct some
+metric names which were missed when legacy metrics were disabled by default.
+
+See [v1.69.0: Deprecation of legacy Prometheus metric names](#deprecation-of-legacy-prometheus-metric-names)
+for more context.
+
+
 # Upgrading to v1.72.0
 
 ## Dropping support for PostgreSQL 10
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index f5937dd902..fae2771fad 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2437,31 +2437,6 @@ Example configuration:
 enable_metrics: true
 ```
 ---
-### `enable_legacy_metrics`
-
-Set to `true` to publish both legacy and non-legacy Prometheus metric names,
-or to `false` to only publish non-legacy Prometheus metric names.
-Defaults to `false`. Has no effect if `enable_metrics` is `false`.
-**In Synapse v1.67.0 up to and including Synapse v1.70.1, this defaulted to `true`.**
-
-Legacy metric names include:
-- metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules;
-- counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard.
-
-These legacy metric names are unconventional and not compliant with OpenMetrics standards.
-They are included for backwards compatibility.
-
-Example configuration:
-```yaml
-enable_legacy_metrics: false
-```
-
-See https://github.com/matrix-org/synapse/issues/11106 for context.
-
-*Since v1.67.0.*
-
-**Will be removed in v1.73.0.**
----
 ### `sentry`
 
 Use this option to enable sentry integration. Provide the DSN assigned to you by sentry
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 41d2732ef9..a5aa2185a2 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -266,26 +266,18 @@ def register_start(
     reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper()))
 
 
-def listen_metrics(
-    bind_addresses: Iterable[str], port: int, enable_legacy_metric_names: bool
-) -> None:
+def listen_metrics(bind_addresses: Iterable[str], port: int) -> None:
     """
     Start Prometheus metrics server.
     """
     from prometheus_client import start_http_server as start_http_server_prometheus
 
-    from synapse.metrics import (
-        RegistryProxy,
-        start_http_server as start_http_server_legacy,
-    )
+    from synapse.metrics import RegistryProxy
 
     for host in bind_addresses:
         logger.info("Starting metrics listener on %s:%d", host, port)
-        if enable_legacy_metric_names:
-            start_http_server_legacy(port, addr=host, registry=RegistryProxy)
-        else:
-            _set_prometheus_client_use_created_metrics(False)
-            start_http_server_prometheus(port, addr=host, registry=RegistryProxy)
+        _set_prometheus_client_use_created_metrics(False)
+        start_http_server_prometheus(port, addr=host, registry=RegistryProxy)
 
 
 def _set_prometheus_client_use_created_metrics(new_value: bool) -> None:
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 74909b7d4a..46dc731696 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -320,7 +320,6 @@ class GenericWorkerServer(HomeServer):
                     _base.listen_metrics(
                         listener.bind_addresses,
                         listener.port,
-                        enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics,
                     )
             else:
                 logger.warning("Unsupported listener type: %s", listener.type)
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 4f4fee4782..b9be558c7e 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -265,7 +265,6 @@ class SynapseHomeServer(HomeServer):
                     _base.listen_metrics(
                         listener.bind_addresses,
                         listener.port,
-                        enable_legacy_metric_names=self.config.metrics.enable_legacy_metrics,
                     )
             else:
                 # this shouldn't happen, as the listener type should have been checked
diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py
index 6034a0346e..8c1c9bd12d 100644
--- a/synapse/config/metrics.py
+++ b/synapse/config/metrics.py
@@ -43,8 +43,6 @@ class MetricsConfig(Config):
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         self.enable_metrics = config.get("enable_metrics", False)
 
-        self.enable_legacy_metrics = config.get("enable_legacy_metrics", False)
-
         self.report_stats = config.get("report_stats", None)
         self.report_stats_endpoint = config.get(
             "report_stats_endpoint", "https://matrix.org/report-usage-stats/push"
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index c3d3daf877..b01372565d 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -47,11 +47,7 @@ from twisted.python.threadpool import ThreadPool
 # This module is imported for its side effects; flake8 needn't warn that it's unused.
 import synapse.metrics._reactor_metrics  # noqa: F401
 from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager
-from synapse.metrics._legacy_exposition import (
-    MetricsResource,
-    generate_latest,
-    start_http_server,
-)
+from synapse.metrics._twisted_exposition import MetricsResource, generate_latest
 from synapse.metrics._types import Collector
 from synapse.util import SYNAPSE_VERSION
 
@@ -474,7 +470,6 @@ __all__ = [
     "Collector",
     "MetricsResource",
     "generate_latest",
-    "start_http_server",
     "LaterGauge",
     "InFlightGauge",
     "GaugeBucketCollector",
diff --git a/synapse/metrics/_legacy_exposition.py b/synapse/metrics/_legacy_exposition.py
deleted file mode 100644
index 1459f9d224..0000000000
--- a/synapse/metrics/_legacy_exposition.py
+++ /dev/null
@@ -1,288 +0,0 @@
-# Copyright 2015-2019 Prometheus Python Client Developers
-# Copyright 2019 Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This code is based off `prometheus_client/exposition.py` from version 0.7.1.
-
-Due to the renaming of metrics in prometheus_client 0.4.0, this customised
-vendoring of the code will emit both the old versions that Synapse dashboards
-expect, and the newer "best practice" version of the up-to-date official client.
-"""
-import logging
-import math
-import threading
-from http.server import BaseHTTPRequestHandler, HTTPServer
-from socketserver import ThreadingMixIn
-from typing import Any, Dict, List, Type, Union
-from urllib.parse import parse_qs, urlparse
-
-from prometheus_client import REGISTRY, CollectorRegistry
-from prometheus_client.core import Sample
-
-from twisted.web.resource import Resource
-from twisted.web.server import Request
-
-logger = logging.getLogger(__name__)
-CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
-
-
-def floatToGoString(d: Union[int, float]) -> str:
-    d = float(d)
-    if d == math.inf:
-        return "+Inf"
-    elif d == -math.inf:
-        return "-Inf"
-    elif math.isnan(d):
-        return "NaN"
-    else:
-        s = repr(d)
-        dot = s.find(".")
-        # Go switches to exponents sooner than Python.
-        # We only need to care about positive values for le/quantile.
-        if d > 0 and dot > 6:
-            mantissa = f"{s[0]}.{s[1:dot]}{s[dot + 1 :]}".rstrip("0.")
-            return f"{mantissa}e+0{dot - 1}"
-        return s
-
-
-def sample_line(line: Sample, name: str) -> str:
-    if line.labels:
-        labelstr = "{{{0}}}".format(
-            ",".join(
-                [
-                    '{}="{}"'.format(
-                        k,
-                        v.replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\""),
-                    )
-                    for k, v in sorted(line.labels.items())
-                ]
-            )
-        )
-    else:
-        labelstr = ""
-    timestamp = ""
-    if line.timestamp is not None:
-        # Convert to milliseconds.
-        timestamp = f" {int(float(line.timestamp) * 1000):d}"
-    return "{}{} {}{}\n".format(name, labelstr, floatToGoString(line.value), timestamp)
-
-
-# Mapping from new metric names to legacy metric names.
-# We translate these back to their old names when exposing them through our
-# legacy vendored exporter.
-# Only this legacy exposition module applies these name changes.
-LEGACY_METRIC_NAMES = {
-    "synapse_util_caches_cache_hits": "synapse_util_caches_cache:hits",
-    "synapse_util_caches_cache_size": "synapse_util_caches_cache:size",
-    "synapse_util_caches_cache_evicted_size": "synapse_util_caches_cache:evicted_size",
-    "synapse_util_caches_cache": "synapse_util_caches_cache:total",
-    "synapse_util_caches_response_cache_size": "synapse_util_caches_response_cache:size",
-    "synapse_util_caches_response_cache_hits": "synapse_util_caches_response_cache:hits",
-    "synapse_util_caches_response_cache_evicted_size": "synapse_util_caches_response_cache:evicted_size",
-    "synapse_util_caches_response_cache": "synapse_util_caches_response_cache:total",
-    "synapse_federation_client_sent_pdu_destinations": "synapse_federation_client_sent_pdu_destinations:total",
-    "synapse_federation_client_sent_pdu_destinations_count": "synapse_federation_client_sent_pdu_destinations:count",
-    "synapse_admin_mau_current": "synapse_admin_mau:current",
-    "synapse_admin_mau_max": "synapse_admin_mau:max",
-    "synapse_admin_mau_registered_reserved_users": "synapse_admin_mau:registered_reserved_users",
-}
-
-
-def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> bytes:
-    """
-    Generate metrics in legacy format. Modern metrics are generated directly
-    by prometheus-client.
-    """
-
-    output = []
-
-    for metric in registry.collect():
-        if not metric.samples:
-            # No samples, don't bother.
-            continue
-
-        # Translate to legacy metric name if it has one.
-        mname = LEGACY_METRIC_NAMES.get(metric.name, metric.name)
-        mnewname = metric.name
-        mtype = metric.type
-
-        # OpenMetrics -> Prometheus
-        if mtype == "counter":
-            mnewname = mnewname + "_total"
-        elif mtype == "info":
-            mtype = "gauge"
-            mnewname = mnewname + "_info"
-        elif mtype == "stateset":
-            mtype = "gauge"
-        elif mtype == "gaugehistogram":
-            mtype = "histogram"
-        elif mtype == "unknown":
-            mtype = "untyped"
-
-        # Output in the old format for compatibility.
-        if emit_help:
-            output.append(
-                "# HELP {} {}\n".format(
-                    mname,
-                    metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
-                )
-            )
-        output.append(f"# TYPE {mname} {mtype}\n")
-
-        om_samples: Dict[str, List[str]] = {}
-        for s in metric.samples:
-            for suffix in ["_created", "_gsum", "_gcount"]:
-                if s.name == mname + suffix:
-                    # OpenMetrics specific sample, put in a gauge at the end.
-                    # (these come from gaugehistograms which don't get renamed,
-                    # so no need to faff with mnewname)
-                    om_samples.setdefault(suffix, []).append(sample_line(s, s.name))
-                    break
-            else:
-                newname = s.name.replace(mnewname, mname)
-                if ":" in newname and newname.endswith("_total"):
-                    newname = newname[: -len("_total")]
-                output.append(sample_line(s, newname))
-
-        for suffix, lines in sorted(om_samples.items()):
-            if emit_help:
-                output.append(
-                    "# HELP {}{} {}\n".format(
-                        mname,
-                        suffix,
-                        metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
-                    )
-                )
-            output.append(f"# TYPE {mname}{suffix} gauge\n")
-            output.extend(lines)
-
-        # Get rid of the weird colon things while we're at it
-        if mtype == "counter":
-            mnewname = mnewname.replace(":total", "")
-        mnewname = mnewname.replace(":", "_")
-
-        if mname == mnewname:
-            continue
-
-        # Also output in the new format, if it's different.
-        if emit_help:
-            output.append(
-                "# HELP {} {}\n".format(
-                    mnewname,
-                    metric.documentation.replace("\\", r"\\").replace("\n", r"\n"),
-                )
-            )
-        output.append(f"# TYPE {mnewname} {mtype}\n")
-
-        for s in metric.samples:
-            # Get rid of the OpenMetrics specific samples (we should already have
-            # dealt with them above anyway.)
-            for suffix in ["_created", "_gsum", "_gcount"]:
-                if s.name == mname + suffix:
-                    break
-            else:
-                sample_name = LEGACY_METRIC_NAMES.get(s.name, s.name)
-                output.append(
-                    sample_line(s, sample_name.replace(":total", "").replace(":", "_"))
-                )
-
-    return "".join(output).encode("utf-8")
-
-
-class MetricsHandler(BaseHTTPRequestHandler):
-    """HTTP handler that gives metrics from ``REGISTRY``."""
-
-    registry = REGISTRY
-
-    def do_GET(self) -> None:
-        registry = self.registry
-        params = parse_qs(urlparse(self.path).query)
-
-        if "help" in params:
-            emit_help = True
-        else:
-            emit_help = False
-
-        try:
-            output = generate_latest(registry, emit_help=emit_help)
-        except Exception:
-            self.send_error(500, "error generating metric output")
-            raise
-        try:
-            self.send_response(200)
-            self.send_header("Content-Type", CONTENT_TYPE_LATEST)
-            self.send_header("Content-Length", str(len(output)))
-            self.end_headers()
-            self.wfile.write(output)
-        except BrokenPipeError as e:
-            logger.warning(
-                "BrokenPipeError when serving metrics (%s). Did Prometheus restart?", e
-            )
-
-    def log_message(self, format: str, *args: Any) -> None:
-        """Log nothing."""
-
-    @classmethod
-    def factory(cls, registry: CollectorRegistry) -> Type:
-        """Returns a dynamic MetricsHandler class tied
-        to the passed registry.
-        """
-        # This implementation relies on MetricsHandler.registry
-        #  (defined above and defaulted to REGISTRY).
-
-        # As we have unicode_literals, we need to create a str()
-        #  object for type().
-        cls_name = str(cls.__name__)
-        MyMetricsHandler = type(cls_name, (cls, object), {"registry": registry})
-        return MyMetricsHandler
-
-
-class _ThreadingSimpleServer(ThreadingMixIn, HTTPServer):
-    """Thread per request HTTP server."""
-
-    # Make worker threads "fire and forget". Beginning with Python 3.7 this
-    # prevents a memory leak because ``ThreadingMixIn`` starts to gather all
-    # non-daemon threads in a list in order to join on them at server close.
-    # Enabling daemon threads virtually makes ``_ThreadingSimpleServer`` the
-    # same as Python 3.7's ``ThreadingHTTPServer``.
-    daemon_threads = True
-
-
-def start_http_server(
-    port: int, addr: str = "", registry: CollectorRegistry = REGISTRY
-) -> None:
-    """Starts an HTTP server for prometheus metrics as a daemon thread"""
-    CustomMetricsHandler = MetricsHandler.factory(registry)
-    httpd = _ThreadingSimpleServer((addr, port), CustomMetricsHandler)
-    t = threading.Thread(target=httpd.serve_forever)
-    t.daemon = True
-    t.start()
-
-
-class MetricsResource(Resource):
-    """
-    Twisted ``Resource`` that serves prometheus metrics.
-    """
-
-    isLeaf = True
-
-    def __init__(self, registry: CollectorRegistry = REGISTRY):
-        self.registry = registry
-
-    def render_GET(self, request: Request) -> bytes:
-        request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii"))
-        response = generate_latest(self.registry)
-        request.setHeader(b"Content-Length", str(len(response)))
-        return response
diff --git a/synapse/metrics/_twisted_exposition.py b/synapse/metrics/_twisted_exposition.py
new file mode 100644
index 0000000000..0abcd14953
--- /dev/null
+++ b/synapse/metrics/_twisted_exposition.py
@@ -0,0 +1,38 @@
+# Copyright 2015-2019 Prometheus Python Client Developers
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from prometheus_client import REGISTRY, CollectorRegistry, generate_latest
+
+from twisted.web.resource import Resource
+from twisted.web.server import Request
+
+CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"
+
+
+class MetricsResource(Resource):
+    """
+    Twisted ``Resource`` that serves prometheus metrics.
+    """
+
+    isLeaf = True
+
+    def __init__(self, registry: CollectorRegistry = REGISTRY):
+        self.registry = registry
+
+    def render_GET(self, request: Request) -> bytes:
+        request.setHeader(b"Content-Type", CONTENT_TYPE_LATEST.encode("ascii"))
+        response = generate_latest(self.registry)
+        request.setHeader(b"Content-Length", str(len(response)))
+        return response
diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py
index 088fbb247b..6f1135eef4 100644
--- a/tests/storage/test_event_metrics.py
+++ b/tests/storage/test_event_metrics.py
@@ -11,8 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from prometheus_client import generate_latest
 
-from synapse.metrics import REGISTRY, generate_latest
+from synapse.metrics import REGISTRY
 from synapse.types import UserID, create_requester
 
 from tests.unittest import HomeserverTestCase
@@ -53,8 +54,8 @@ class ExtremStatisticsTestCase(HomeserverTestCase):
 
         items = list(
             filter(
-                lambda x: b"synapse_forward_extremities_" in x,
-                generate_latest(REGISTRY, emit_help=False).split(b"\n"),
+                lambda x: b"synapse_forward_extremities_" in x and b"# HELP" not in x,
+                generate_latest(REGISTRY).split(b"\n"),
             )
         )
 
-- 
cgit 1.5.1


From f6c74d1cb2ed966802b01a2b037f09ce7a842c18 Mon Sep 17 00:00:00 2001
From: Benjamin Kampmann <ben.kampmann@gmail.com>
Date: Thu, 24 Nov 2022 09:10:51 +0000
Subject: Implement message forward pagination from start when no from is
 given, fixes #12383 (#14149)

Fixes https://github.com/matrix-org/synapse/issues/12383
---
 changelog.d/14149.bugfix       |  1 +
 synapse/handlers/pagination.py |  6 ++++++
 synapse/streams/events.py      | 13 +++++++++++++
 tests/rest/admin/test_room.py  | 40 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 60 insertions(+)
 create mode 100644 changelog.d/14149.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14149.bugfix b/changelog.d/14149.bugfix
new file mode 100644
index 0000000000..b31c658266
--- /dev/null
+++ b/changelog.d/14149.bugfix
@@ -0,0 +1 @@
+Fix #12383: paginate room messages from the start if no from is given. Contributed by @gnunicorn .
\ No newline at end of file
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index a4ca9cb8b4..c572508a02 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -448,6 +448,12 @@ class PaginationHandler:
 
         if pagin_config.from_token:
             from_token = pagin_config.from_token
+        elif pagin_config.direction == "f":
+            from_token = (
+                await self.hs.get_event_sources().get_start_token_for_pagination(
+                    room_id
+                )
+            )
         else:
             from_token = (
                 await self.hs.get_event_sources().get_current_token_for_pagination(
diff --git a/synapse/streams/events.py b/synapse/streams/events.py
index f331e1af16..619eb7f601 100644
--- a/synapse/streams/events.py
+++ b/synapse/streams/events.py
@@ -73,6 +73,19 @@ class EventSources:
         )
         return token
 
+    @trace
+    async def get_start_token_for_pagination(self, room_id: str) -> StreamToken:
+        """Get the start token for a given room to be used to paginate
+        events.
+
+        The returned token does not have the current values for fields other
+        than `room`, since they are not used during pagination.
+
+        Returns:
+            The start token for pagination.
+        """
+        return StreamToken.START
+
     @trace
     async def get_current_token_for_pagination(self, room_id: str) -> StreamToken:
         """Get the current token for a given room to be used to paginate
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index d156be82b0..e0f5d54aba 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -1857,6 +1857,46 @@ class RoomMessagesTestCase(unittest.HomeserverTestCase):
         self.assertIn("chunk", channel.json_body)
         self.assertIn("end", channel.json_body)
 
+    def test_room_messages_backward(self) -> None:
+        """Test room messages can be retrieved by an admin that isn't in the room."""
+        latest_event_id = self.helper.send(
+            self.room_id, body="message 1", tok=self.user_tok
+        )["event_id"]
+
+        # Check that we get the first and second message when querying /messages.
+        channel = self.make_request(
+            "GET",
+            "/_synapse/admin/v1/rooms/%s/messages?dir=b" % (self.room_id,),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+
+        chunk = channel.json_body["chunk"]
+        self.assertEqual(len(chunk), 6, [event["content"] for event in chunk])
+
+        # in backwards, this is the first event
+        self.assertEqual(chunk[0]["event_id"], latest_event_id)
+
+    def test_room_messages_forward(self) -> None:
+        """Test room messages can be retrieved by an admin that isn't in the room."""
+        latest_event_id = self.helper.send(
+            self.room_id, body="message 1", tok=self.user_tok
+        )["event_id"]
+
+        # Check that we get the first and second message when querying /messages.
+        channel = self.make_request(
+            "GET",
+            "/_synapse/admin/v1/rooms/%s/messages?dir=f" % (self.room_id,),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+
+        chunk = channel.json_body["chunk"]
+        self.assertEqual(len(chunk), 6, [event["content"] for event in chunk])
+
+        # in forward, this is the last event
+        self.assertEqual(chunk[5]["event_id"], latest_event_id)
+
     def test_room_messages_purge(self) -> None:
         """Test room messages can be retrieved by an admin that isn't in the room."""
         store = self.hs.get_datastores().main
-- 
cgit 1.5.1


From c2e06c36d4ac2aef9de1a192cdcf9964415d09d2 Mon Sep 17 00:00:00 2001
From: schmop <lars.richard@rocketmail.com>
Date: Thu, 24 Nov 2022 11:49:04 +0100
Subject: Fix crash admin media list api when info is None (#14537)

Fixes https://github.com/matrix-org/synapse/issues/14536
---
 changelog.d/14537.bugfix               | 1 +
 synapse/storage/databases/main/room.py | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14537.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14537.bugfix b/changelog.d/14537.bugfix
new file mode 100644
index 0000000000..d7ce78d032
--- /dev/null
+++ b/changelog.d/14537.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the [List media admin API](https://matrix-org.github.io/synapse/latest/admin_api/media_admin_api.html#list-all-media-in-a-room) would fail when processing an image with broken thumbnail information.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 4fbaefad73..52ad947c6c 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -912,7 +912,11 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                 event_json = db_to_json(content_json)
                 content = event_json["content"]
                 content_url = content.get("url")
-                thumbnail_url = content.get("info", {}).get("thumbnail_url")
+                info = content.get("info")
+                if isinstance(info, dict):
+                    thumbnail_url = info.get("thumbnail_url")
+                else:
+                    thumbnail_url = None
 
                 for url in (content_url, thumbnail_url):
                     if not url:
-- 
cgit 1.5.1


From 39cde585bf1e6cf3d32af9302437b37bae7a64b8 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 24 Nov 2022 18:09:47 +0100
Subject: Faster joins: use initial list of servers if we don't have the full
 state yet (#14408)

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/14408.misc                |  1 +
 synapse/federation/sender/__init__.py | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14408.misc

(limited to 'synapse')

diff --git a/changelog.d/14408.misc b/changelog.d/14408.misc
new file mode 100644
index 0000000000..2c77d97591
--- /dev/null
+++ b/changelog.d/14408.misc
@@ -0,0 +1 @@
+Faster joins: send events to initial list of servers if we don't have the full state yet.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 3ad483efe0..fc1d8c88a7 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -434,7 +434,23 @@ class FederationSender(AbstractFederationSender):
                         # If there are no prev event IDs then the state is empty
                         # and so no remote servers in the room
                         destinations = set()
-                    else:
+
+                    if destinations is None:
+                        # During partial join we use the set of servers that we got
+                        # when beginning the join. It's still possible that we send
+                        # events to servers that left the room in the meantime, but
+                        # we consider that an acceptable risk since it is only our own
+                        # events that we leak and not other server's ones.
+                        partial_state_destinations = (
+                            await self.store.get_partial_state_servers_at_join(
+                                event.room_id
+                            )
+                        )
+
+                        if len(partial_state_destinations) > 0:
+                            destinations = partial_state_destinations
+
+                    if destinations is None:
                         # We check the external cache for the destinations, which is
                         # stored per state group.
 
-- 
cgit 1.5.1


From 09de2aecb05cb46e0513396e2675b24c8beedb68 Mon Sep 17 00:00:00 2001
From: Ashish Kumar <ashfame@users.noreply.github.com>
Date: Fri, 25 Nov 2022 19:16:50 +0400
Subject: Add support for handling avatar with SSO login (#13917)

This commit adds support for handling a provided avatar picture URL
when logging in via SSO.

Signed-off-by: Ashish Kumar <ashfame@users.noreply.github.com>

Fixes #9357.
---
 changelog.d/13917.feature                        |   1 +
 docs/usage/configuration/config_documentation.md |   9 +-
 mypy.ini                                         |   4 +-
 synapse/handlers/oidc.py                         |   7 ++
 synapse/handlers/sso.py                          | 111 +++++++++++++++++
 tests/handlers/test_sso.py                       | 145 +++++++++++++++++++++++
 6 files changed, 275 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/13917.feature
 create mode 100644 tests/handlers/test_sso.py

(limited to 'synapse')

diff --git a/changelog.d/13917.feature b/changelog.d/13917.feature
new file mode 100644
index 0000000000..4eb942ab38
--- /dev/null
+++ b/changelog.d/13917.feature
@@ -0,0 +1 @@
+Adds support for handling avatar in SSO login. Contributed by @ashfame.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index fae2771fad..749af12aac 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2968,10 +2968,17 @@ Options for each entry include:
 
         For the default provider, the following settings are available:
 
-       * subject_claim: name of the claim containing a unique identifier
+       * `subject_claim`: name of the claim containing a unique identifier
          for the user. Defaults to 'sub', which OpenID Connect
          compliant providers should provide.
 
+       * `picture_claim`: name of the claim containing an url for the user's profile picture.
+         Defaults to 'picture', which OpenID Connect compliant providers should provide
+         and has to refer to a direct image file such as PNG, JPEG, or GIF image file.
+         
+         Currently only supported in monolithic (single-process) server configurations
+         where the media repository runs within the Synapse process.
+
        * `localpart_template`: Jinja2 template for the localpart of the MXID.
           If this is not set, the user will be prompted to choose their
           own username (see the documentation for the `sso_auth_account_details.html`
diff --git a/mypy.ini b/mypy.ini
index 25b3c93748..0b6e7df267 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -119,6 +119,9 @@ disallow_untyped_defs = True
 [mypy-tests.storage.test_profile]
 disallow_untyped_defs = True
 
+[mypy-tests.handlers.test_sso]
+disallow_untyped_defs = True
+
 [mypy-tests.storage.test_user_directory]
 disallow_untyped_defs = True
 
@@ -137,7 +140,6 @@ disallow_untyped_defs = False
 [mypy-tests.utils]
 disallow_untyped_defs = True
 
-
 ;; Dependencies without annotations
 ;; Before ignoring a module, check to see if type stubs are available.
 ;; The `typeshed` project maintains stubs here:
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index 41c675f408..03de6a4ba6 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -1435,6 +1435,7 @@ class UserAttributeDict(TypedDict):
     localpart: Optional[str]
     confirm_localpart: bool
     display_name: Optional[str]
+    picture: Optional[str]  # may be omitted by older `OidcMappingProviders`
     emails: List[str]
 
 
@@ -1520,6 +1521,7 @@ env.filters.update(
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class JinjaOidcMappingConfig:
     subject_claim: str
+    picture_claim: str
     localpart_template: Optional[Template]
     display_name_template: Optional[Template]
     email_template: Optional[Template]
@@ -1539,6 +1541,7 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
     @staticmethod
     def parse_config(config: dict) -> JinjaOidcMappingConfig:
         subject_claim = config.get("subject_claim", "sub")
+        picture_claim = config.get("picture_claim", "picture")
 
         def parse_template_config(option_name: str) -> Optional[Template]:
             if option_name not in config:
@@ -1572,6 +1575,7 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
 
         return JinjaOidcMappingConfig(
             subject_claim=subject_claim,
+            picture_claim=picture_claim,
             localpart_template=localpart_template,
             display_name_template=display_name_template,
             email_template=email_template,
@@ -1611,10 +1615,13 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
         if email:
             emails.append(email)
 
+        picture = userinfo.get("picture")
+
         return UserAttributeDict(
             localpart=localpart,
             display_name=display_name,
             emails=emails,
+            picture=picture,
             confirm_localpart=self._config.confirm_localpart,
         )
 
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index e1c0bff1b2..44e70fc4b8 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import abc
+import hashlib
+import io
 import logging
 from typing import (
     TYPE_CHECKING,
@@ -138,6 +140,7 @@ class UserAttributes:
     localpart: Optional[str]
     confirm_localpart: bool = False
     display_name: Optional[str] = None
+    picture: Optional[str] = None
     emails: Collection[str] = attr.Factory(list)
 
 
@@ -196,6 +199,10 @@ class SsoHandler:
         self._error_template = hs.config.sso.sso_error_template
         self._bad_user_template = hs.config.sso.sso_auth_bad_user_template
         self._profile_handler = hs.get_profile_handler()
+        self._media_repo = (
+            hs.get_media_repository() if hs.config.media.can_load_media_repo else None
+        )
+        self._http_client = hs.get_proxied_blacklisted_http_client()
 
         # The following template is shown after a successful user interactive
         # authentication session. It tells the user they can close the window.
@@ -495,6 +502,8 @@ class SsoHandler:
                         await self._profile_handler.set_displayname(
                             user_id_obj, requester, attributes.display_name, True
                         )
+                if attributes.picture:
+                    await self.set_avatar(user_id, attributes.picture)
 
         await self._auth_handler.complete_sso_login(
             user_id,
@@ -703,8 +712,110 @@ class SsoHandler:
         await self._store.record_user_external_id(
             auth_provider_id, remote_user_id, registered_user_id
         )
+
+        # Set avatar, if available
+        if attributes.picture:
+            await self.set_avatar(registered_user_id, attributes.picture)
+
         return registered_user_id
 
+    async def set_avatar(self, user_id: str, picture_https_url: str) -> bool:
+        """Set avatar of the user.
+
+        This downloads the image file from the URL provided, stores that in
+        the media repository and then sets the avatar on the user's profile.
+
+        It can detect if the same image is being saved again and bails early by storing
+        the hash of the file in the `upload_name` of the avatar image.
+
+        Currently, it only supports server configurations which run the media repository
+        within the same process.
+
+        It silently fails and logs a warning by raising an exception and catching it
+        internally if:
+         * it is unable to fetch the image itself (non 200 status code) or
+         * the image supplied is bigger than max allowed size or
+         * the image type is not one of the allowed image types.
+
+        Args:
+            user_id: matrix user ID in the form @localpart:domain as a string.
+
+            picture_https_url: HTTPS url for the picture image file.
+
+        Returns: `True` if the user's avatar has been successfully set to the image at
+            `picture_https_url`.
+        """
+        if self._media_repo is None:
+            logger.info(
+                "failed to set user avatar because out-of-process media repositories "
+                "are not supported yet "
+            )
+            return False
+
+        try:
+            uid = UserID.from_string(user_id)
+
+            def is_allowed_mime_type(content_type: str) -> bool:
+                if (
+                    self._profile_handler.allowed_avatar_mimetypes
+                    and content_type
+                    not in self._profile_handler.allowed_avatar_mimetypes
+                ):
+                    return False
+                return True
+
+            # download picture, enforcing size limit & mime type check
+            picture = io.BytesIO()
+
+            content_length, headers, uri, code = await self._http_client.get_file(
+                url=picture_https_url,
+                output_stream=picture,
+                max_size=self._profile_handler.max_avatar_size,
+                is_allowed_content_type=is_allowed_mime_type,
+            )
+
+            if code != 200:
+                raise Exception(
+                    "GET request to download sso avatar image returned {}".format(code)
+                )
+
+            # upload name includes hash of the image file's content so that we can
+            # easily check if it requires an update or not, the next time user logs in
+            upload_name = "sso_avatar_" + hashlib.sha256(picture.read()).hexdigest()
+
+            # bail if user already has the same avatar
+            profile = await self._profile_handler.get_profile(user_id)
+            if profile["avatar_url"] is not None:
+                server_name = profile["avatar_url"].split("/")[-2]
+                media_id = profile["avatar_url"].split("/")[-1]
+                if server_name == self._server_name:
+                    media = await self._media_repo.store.get_local_media(media_id)
+                    if media is not None and upload_name == media["upload_name"]:
+                        logger.info("skipping saving the user avatar")
+                        return True
+
+            # store it in media repository
+            avatar_mxc_url = await self._media_repo.create_content(
+                media_type=headers[b"Content-Type"][0].decode("utf-8"),
+                upload_name=upload_name,
+                content=picture,
+                content_length=content_length,
+                auth_user=uid,
+            )
+
+            # save it as user avatar
+            await self._profile_handler.set_avatar_url(
+                uid,
+                create_requester(uid),
+                str(avatar_mxc_url),
+            )
+
+            logger.info("successfully saved the user avatar")
+            return True
+        except Exception:
+            logger.warning("failed to save the user avatar")
+            return False
+
     async def complete_sso_ui_auth_request(
         self,
         auth_provider_id: str,
diff --git a/tests/handlers/test_sso.py b/tests/handlers/test_sso.py
new file mode 100644
index 0000000000..137deab138
--- /dev/null
+++ b/tests/handlers/test_sso.py
@@ -0,0 +1,145 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from http import HTTPStatus
+from typing import BinaryIO, Callable, Dict, List, Optional, Tuple
+from unittest.mock import Mock
+
+from twisted.test.proto_helpers import MemoryReactor
+from twisted.web.http_headers import Headers
+
+from synapse.api.errors import Codes, SynapseError
+from synapse.http.client import RawHeaders
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.test_utils import SMALL_PNG, FakeResponse
+
+
+class TestSSOHandler(unittest.HomeserverTestCase):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        self.http_client = Mock(spec=["get_file"])
+        self.http_client.get_file.side_effect = mock_get_file
+        self.http_client.user_agent = b"Synapse Test"
+        hs = self.setup_test_homeserver(
+            proxied_blacklisted_http_client=self.http_client
+        )
+        return hs
+
+    async def test_set_avatar(self) -> None:
+        """Tests successfully setting the avatar of a newly created user"""
+        handler = self.hs.get_sso_handler()
+
+        # Create a new user to set avatar for
+        reg_handler = self.hs.get_registration_handler()
+        user_id = self.get_success(reg_handler.register_user(approved=True))
+
+        self.assertTrue(
+            self.get_success(handler.set_avatar(user_id, "http://my.server/me.png"))
+        )
+
+        # Ensure avatar is set on this newly created user,
+        # so no need to compare for the exact image
+        profile_handler = self.hs.get_profile_handler()
+        profile = self.get_success(profile_handler.get_profile(user_id))
+        self.assertIsNot(profile["avatar_url"], None)
+
+    @unittest.override_config({"max_avatar_size": 1})
+    async def test_set_avatar_too_big_image(self) -> None:
+        """Tests that saving an avatar fails when it is too big"""
+        handler = self.hs.get_sso_handler()
+
+        # any random user works since image check is supposed to fail
+        user_id = "@sso-user:test"
+
+        self.assertFalse(
+            self.get_success(handler.set_avatar(user_id, "http://my.server/me.png"))
+        )
+
+    @unittest.override_config({"allowed_avatar_mimetypes": ["image/jpeg"]})
+    async def test_set_avatar_incorrect_mime_type(self) -> None:
+        """Tests that saving an avatar fails when its mime type is not allowed"""
+        handler = self.hs.get_sso_handler()
+
+        # any random user works since image check is supposed to fail
+        user_id = "@sso-user:test"
+
+        self.assertFalse(
+            self.get_success(handler.set_avatar(user_id, "http://my.server/me.png"))
+        )
+
+    async def test_skip_saving_avatar_when_not_changed(self) -> None:
+        """Tests whether saving of avatar correctly skips if the avatar hasn't
+        changed"""
+        handler = self.hs.get_sso_handler()
+
+        # Create a new user to set avatar for
+        reg_handler = self.hs.get_registration_handler()
+        user_id = self.get_success(reg_handler.register_user(approved=True))
+
+        # set avatar for the first time, should be a success
+        self.assertTrue(
+            self.get_success(handler.set_avatar(user_id, "http://my.server/me.png"))
+        )
+
+        # get avatar picture for comparison after another attempt
+        profile_handler = self.hs.get_profile_handler()
+        profile = self.get_success(profile_handler.get_profile(user_id))
+        url_to_match = profile["avatar_url"]
+
+        # set same avatar for the second time, should be a success
+        self.assertTrue(
+            self.get_success(handler.set_avatar(user_id, "http://my.server/me.png"))
+        )
+
+        # compare avatar picture's url from previous step
+        profile = self.get_success(profile_handler.get_profile(user_id))
+        self.assertEqual(profile["avatar_url"], url_to_match)
+
+
+async def mock_get_file(
+    url: str,
+    output_stream: BinaryIO,
+    max_size: Optional[int] = None,
+    headers: Optional[RawHeaders] = None,
+    is_allowed_content_type: Optional[Callable[[str], bool]] = None,
+) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
+
+    fake_response = FakeResponse(code=404)
+    if url == "http://my.server/me.png":
+        fake_response = FakeResponse(
+            code=200,
+            headers=Headers(
+                {"Content-Type": ["image/png"], "Content-Length": [str(len(SMALL_PNG))]}
+            ),
+            body=SMALL_PNG,
+        )
+
+    if max_size is not None and max_size < len(SMALL_PNG):
+        raise SynapseError(
+            HTTPStatus.BAD_GATEWAY,
+            "Requested file is too large > %r bytes" % (max_size,),
+            Codes.TOO_LARGE,
+        )
+
+    if is_allowed_content_type and not is_allowed_content_type("image/png"):
+        raise SynapseError(
+            HTTPStatus.BAD_GATEWAY,
+            (
+                "Requested file's content type not allowed for this operation: %s"
+                % "image/png"
+            ),
+        )
+
+    output_stream.write(fake_response.body)
+
+    return len(SMALL_PNG), {b"Content-Type": [b"image/png"]}, "", 200
-- 
cgit 1.5.1


From f792dd74e1e6f64cb15d920d87818f47f17e7848 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 28 Nov 2022 13:42:06 +0000
Subject: Remove option to skip locking of tables during emulated upserts
 (#14469)

To perform an emulated upsert into a table safely, we must either:
 * lock the table,
 * be the only writer upserting into the table
 * or rely on another unique index being present.

When the 2nd or 3rd cases were applicable, we previously avoided locking
the table as an optimization. However, as seen in #14406, it is easy to
slip up when adding new schema deltas and corrupt the database.

The only time we lock when performing emulated upserts is while waiting
for background updates on postgres. On sqlite, we do no locking at all.

Let's remove the option to skip locking tables, so that we don't shoot
ourselves in the foot again.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14469.misc                             |  1 +
 synapse/storage/database.py                        | 56 +++++++---------------
 synapse/storage/databases/main/account_data.py     |  8 ----
 synapse/storage/databases/main/appservice.py       |  2 -
 synapse/storage/databases/main/devices.py          |  9 ----
 synapse/storage/databases/main/event_federation.py |  1 -
 synapse/storage/databases/main/pusher.py           |  6 ---
 synapse/storage/databases/main/room.py             |  6 ---
 synapse/storage/databases/main/room_batch.py       |  2 -
 synapse/storage/databases/main/user_directory.py   |  2 -
 10 files changed, 19 insertions(+), 74 deletions(-)
 create mode 100644 changelog.d/14469.misc

(limited to 'synapse')

diff --git a/changelog.d/14469.misc b/changelog.d/14469.misc
new file mode 100644
index 0000000000..a12a21e9ae
--- /dev/null
+++ b/changelog.d/14469.misc
@@ -0,0 +1 @@
+Remove option to skip locking of tables when performing emulated upserts, to avoid a class of bugs in future.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index a14b13aec8..55bcb90001 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1129,7 +1129,6 @@ class DatabasePool:
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
         desc: str = "simple_upsert",
-        lock: bool = True,
     ) -> bool:
         """Insert a row with values + insertion_values; on conflict, update with values.
 
@@ -1154,21 +1153,12 @@ class DatabasePool:
         requiring that a unique index exist on the column names used to detect a
         conflict (i.e. `keyvalues.keys()`).
 
-        If there is no such index, we can "emulate" an upsert with a SELECT followed
-        by either an INSERT or an UPDATE. This is unsafe: we cannot make the same
-        atomicity guarantees that a native upsert can and are very vulnerable to races
-        and crashes. Therefore if we wish to upsert without an appropriate unique index,
-        we must either:
-
-        1. Acquire a table-level lock before the emulated upsert (`lock=True`), or
-        2. VERY CAREFULLY ensure that we are the only thread and worker which will be
-           writing to this table, in which case we can proceed without a lock
-           (`lock=False`).
-
-        Generally speaking, you should use `lock=True`. If the table in question has a
-        unique index[*], this class will use a native upsert (which is atomic and so can
-        ignore the `lock` argument). Otherwise this class will use an emulated upsert,
-        in which case we want the safer option unless we been VERY CAREFUL.
+        If there is no such index yet[*], we can "emulate" an upsert with a SELECT
+        followed by either an INSERT or an UPDATE. This is unsafe unless *all* upserters
+        run at the SERIALIZABLE isolation level: we cannot make the same atomicity
+        guarantees that a native upsert can and are very vulnerable to races and
+        crashes. Therefore to upsert without an appropriate unique index, we acquire a
+        table-level lock before the emulated upsert.
 
         [*]: Some tables have unique indices added to them in the background. Those
              tables `T` are keys in the dictionary UNIQUE_INDEX_BACKGROUND_UPDATES,
@@ -1189,7 +1179,6 @@ class DatabasePool:
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
             desc: description of the transaction, for logging and metrics
-            lock: True to lock the table when doing the upsert.
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
             not empty then this always returns True)
@@ -1209,7 +1198,6 @@ class DatabasePool:
                     keyvalues,
                     values,
                     insertion_values,
-                    lock=lock,
                     db_autocommit=autocommit,
                 )
             except self.engine.module.IntegrityError as e:
@@ -1232,7 +1220,6 @@ class DatabasePool:
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
         where_clause: Optional[str] = None,
-        lock: bool = True,
     ) -> bool:
         """
         Pick the UPSERT method which works best on the platform. Either the
@@ -1245,8 +1232,6 @@ class DatabasePool:
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
             where_clause: An index predicate to apply to the upsert.
-            lock: True to lock the table when doing the upsert. Unused when performing
-                a native upsert.
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
             not empty then this always returns True)
@@ -1270,7 +1255,6 @@ class DatabasePool:
                 values,
                 insertion_values=insertion_values,
                 where_clause=where_clause,
-                lock=lock,
             )
 
     def simple_upsert_txn_emulated(
@@ -1291,14 +1275,15 @@ class DatabasePool:
             insertion_values: additional key/values to use only when inserting
             where_clause: An index predicate to apply to the upsert.
             lock: True to lock the table when doing the upsert.
+                Must not be False unless the table has already been locked.
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
             not empty then this always returns True)
         """
         insertion_values = insertion_values or {}
 
-        # We need to lock the table :(, unless we're *really* careful
         if lock:
+            # We need to lock the table :(
             self.engine.lock_table(txn, table)
 
         def _getwhere(key: str) -> str:
@@ -1406,7 +1391,6 @@ class DatabasePool:
         value_names: Collection[str],
         value_values: Collection[Collection[Any]],
         desc: str,
-        lock: bool = True,
     ) -> None:
         """
         Upsert, many times.
@@ -1418,8 +1402,6 @@ class DatabasePool:
             value_names: The value column names
             value_values: A list of each row's value column values.
                 Ignored if value_names is empty.
-            lock: True to lock the table when doing the upsert. Unused when performing
-                a native upsert.
         """
 
         # We can autocommit if it safe to upsert
@@ -1433,7 +1415,6 @@ class DatabasePool:
             key_values,
             value_names,
             value_values,
-            lock=lock,
             db_autocommit=autocommit,
         )
 
@@ -1445,7 +1426,6 @@ class DatabasePool:
         key_values: Collection[Iterable[Any]],
         value_names: Collection[str],
         value_values: Iterable[Iterable[Any]],
-        lock: bool = True,
     ) -> None:
         """
         Upsert, many times.
@@ -1457,8 +1437,6 @@ class DatabasePool:
             value_names: The value column names
             value_values: A list of each row's value column values.
                 Ignored if value_names is empty.
-            lock: True to lock the table when doing the upsert. Unused when performing
-                a native upsert.
         """
         if table not in self._unsafe_to_upsert_tables:
             return self.simple_upsert_many_txn_native_upsert(
@@ -1466,7 +1444,12 @@ class DatabasePool:
             )
         else:
             return self.simple_upsert_many_txn_emulated(
-                txn, table, key_names, key_values, value_names, value_values, lock=lock
+                txn,
+                table,
+                key_names,
+                key_values,
+                value_names,
+                value_values,
             )
 
     def simple_upsert_many_txn_emulated(
@@ -1477,7 +1460,6 @@ class DatabasePool:
         key_values: Collection[Iterable[Any]],
         value_names: Collection[str],
         value_values: Iterable[Iterable[Any]],
-        lock: bool = True,
     ) -> None:
         """
         Upsert, many times, but without native UPSERT support or batching.
@@ -1489,18 +1471,16 @@ class DatabasePool:
             value_names: The value column names
             value_values: A list of each row's value column values.
                 Ignored if value_names is empty.
-            lock: True to lock the table when doing the upsert.
         """
         # No value columns, therefore make a blank list so that the following
         # zip() works correctly.
         if not value_names:
             value_values = [() for x in range(len(key_values))]
 
-        if lock:
-            # Lock the table just once, to prevent it being done once per row.
-            # Note that, according to Postgres' documentation, once obtained,
-            # the lock is held for the remainder of the current transaction.
-            self.engine.lock_table(txn, "user_ips")
+        # Lock the table just once, to prevent it being done once per row.
+        # Note that, according to Postgres' documentation, once obtained,
+        # the lock is held for the remainder of the current transaction.
+        self.engine.lock_table(txn, "user_ips")
 
         for keyv, valv in zip(key_values, value_values):
             _keys = {x: y for x, y in zip(key_names, keyv)}
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 282687ebce..07908c41d9 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -449,9 +449,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         content_json = json_encoder.encode(content)
 
         async with self._account_data_id_gen.get_next() as next_id:
-            # no need to lock here as room_account_data has a unique constraint
-            # on (user_id, room_id, account_data_type) so simple_upsert will
-            # retry if there is a conflict.
             await self.db_pool.simple_upsert(
                 desc="add_room_account_data",
                 table="room_account_data",
@@ -461,7 +458,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                     "account_data_type": account_data_type,
                 },
                 values={"stream_id": next_id, "content": content_json},
-                lock=False,
             )
 
             self._account_data_stream_cache.entity_has_changed(user_id, next_id)
@@ -517,15 +513,11 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     ) -> None:
         content_json = json_encoder.encode(content)
 
-        # no need to lock here as account_data has a unique constraint on
-        # (user_id, account_data_type) so simple_upsert will retry if
-        # there is a conflict.
         self.db_pool.simple_upsert_txn(
             txn,
             table="account_data",
             keyvalues={"user_id": user_id, "account_data_type": account_data_type},
             values={"stream_id": next_id, "content": content_json},
-            lock=False,
         )
 
         # Ignored users get denormalized into a separate table as an optimisation.
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index 63046c0527..25da0c56c5 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -451,8 +451,6 @@ class ApplicationServiceTransactionWorkerStore(
             table="application_services_state",
             keyvalues={"as_id": service.id},
             values={f"{stream_type}_stream_id": pos},
-            # no need to lock when emulating upsert: as_id is a unique key
-            lock=False,
             desc="set_appservice_stream_type_pos",
         )
 
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 05a193f889..534f7fc04a 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1744,9 +1744,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 table="device_lists_remote_cache",
                 keyvalues={"user_id": user_id, "device_id": device_id},
                 values={"content": json_encoder.encode(content)},
-                # we don't need to lock, because we assume we are the only thread
-                # updating this user's devices.
-                lock=False,
             )
 
         txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id))
@@ -1760,9 +1757,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             table="device_lists_remote_extremeties",
             keyvalues={"user_id": user_id},
             values={"stream_id": stream_id},
-            # again, we can assume we are the only thread updating this user's
-            # extremity.
-            lock=False,
         )
 
     async def update_remote_device_list_cache(
@@ -1815,9 +1809,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             table="device_lists_remote_extremeties",
             keyvalues={"user_id": user_id},
             values={"stream_id": stream_id},
-            # we don't need to lock, because we can assume we are the only thread
-            # updating this user's extremity.
-            lock=False,
         )
 
     async def add_device_change_to_streams(
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 309a4ba664..bbee02ab18 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1686,7 +1686,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             },
             insertion_values={},
             desc="insert_insertion_extremity",
-            lock=False,
         )
 
     async def insert_received_event_to_staging(
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index fee37b9ce4..40fd781a6a 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -325,14 +325,11 @@ class PusherWorkerStore(SQLBaseStore):
     async def set_throttle_params(
         self, pusher_id: str, room_id: str, params: ThrottleParams
     ) -> None:
-        # no need to lock because `pusher_throttle` has a primary key on
-        # (pusher, room_id) so simple_upsert will retry
         await self.db_pool.simple_upsert(
             "pusher_throttle",
             {"pusher": pusher_id, "room_id": room_id},
             {"last_sent_ts": params.last_sent_ts, "throttle_ms": params.throttle_ms},
             desc="set_throttle_params",
-            lock=False,
         )
 
     async def _remove_deactivated_pushers(self, progress: dict, batch_size: int) -> int:
@@ -589,8 +586,6 @@ class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore):
         device_id: Optional[str] = None,
     ) -> None:
         async with self._pushers_id_gen.get_next() as stream_id:
-            # no need to lock because `pushers` has a unique key on
-            # (app_id, pushkey, user_name) so simple_upsert will retry
             await self.db_pool.simple_upsert(
                 table="pushers",
                 keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
@@ -609,7 +604,6 @@ class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore):
                     "device_id": device_id,
                 },
                 desc="add_pusher",
-                lock=False,
             )
 
             user_has_pusher = self.get_if_user_has_pusher.cache.get_immediate(
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 52ad947c6c..1309bfd374 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1847,9 +1847,6 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
                 "creator": room_creator,
                 "has_auth_chain_index": has_auth_chain_index,
             },
-            # rooms has a unique constraint on room_id, so no need to lock when doing an
-            # emulated upsert.
-            lock=False,
         )
 
     async def store_partial_state_room(
@@ -1970,9 +1967,6 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
                 "creator": "",
                 "has_auth_chain_index": has_auth_chain_index,
             },
-            # rooms has a unique constraint on room_id, so no need to lock when doing an
-            # emulated upsert.
-            lock=False,
         )
 
     async def set_room_is_public(self, room_id: str, is_public: bool) -> None:
diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py
index 39e80f6f5b..131f357d04 100644
--- a/synapse/storage/databases/main/room_batch.py
+++ b/synapse/storage/databases/main/room_batch.py
@@ -44,6 +44,4 @@ class RoomBatchStore(SQLBaseStore):
             table="event_to_state_groups",
             keyvalues={"event_id": event_id},
             values={"state_group": state_group_id, "event_id": event_id},
-            # Unique constraint on event_id so we don't have to lock
-            lock=False,
         )
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 698d6f7515..044435deab 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -481,7 +481,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 table="user_directory",
                 keyvalues={"user_id": user_id},
                 values={"display_name": display_name, "avatar_url": avatar_url},
-                lock=False,  # We're only inserter
             )
 
             if isinstance(self.database_engine, PostgresEngine):
@@ -511,7 +510,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                     table="user_directory_search",
                     keyvalues={"user_id": user_id},
                     values={"value": value},
-                    lock=False,  # We're only inserter
                 )
             else:
                 # This should be unreachable.
-- 
cgit 1.5.1


From d748bbc8f8268d2e8457374d529adafb20b9f5f4 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 28 Nov 2022 09:40:17 -0500
Subject: Include thread information when sending receipts over federation.
 (#14466)

Include the thread_id field when sending read receipts over
federation. This might result in the same user having multiple
read receipts per-room, meaning multiple EDUs must be sent
to encapsulate those receipts.

This restructures the PerDestinationQueue APIs to support
multiple receipt EDUs, queue_read_receipt now becomes linear
time in the number of queued threaded receipts in the room for
the given user, it is expected this is a small number since receipt
EDUs are sent as filler in transactions.
---
 changelog.d/14466.bugfix                           |   1 +
 synapse/federation/sender/per_destination_queue.py | 183 ++++++++++++++-------
 synapse/handlers/receipts.py                       |   1 -
 tests/federation/test_federation_sender.py         |  77 +++++++++
 4 files changed, 198 insertions(+), 64 deletions(-)
 create mode 100644 changelog.d/14466.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14466.bugfix b/changelog.d/14466.bugfix
new file mode 100644
index 0000000000..82f6e6b68e
--- /dev/null
+++ b/changelog.d/14466.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0 where a receipt's thread ID was not sent over federation.
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 3ae5e8634c..5af2784f1e 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -35,7 +35,7 @@ from synapse.logging import issue9533_logger
 from synapse.logging.opentracing import SynapseTags, set_tag
 from synapse.metrics import sent_transactions_counter
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.types import ReadReceipt
+from synapse.types import JsonDict, ReadReceipt
 from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
 from synapse.visibility import filter_events_for_server
 
@@ -136,8 +136,11 @@ class PerDestinationQueue:
         # destination
         self._pending_presence: Dict[str, UserPresenceState] = {}
 
-        # room_id -> receipt_type -> user_id -> receipt_dict
-        self._pending_rrs: Dict[str, Dict[str, Dict[str, dict]]] = {}
+        # List of room_id -> receipt_type -> user_id -> receipt_dict,
+        #
+        # Each receipt can only have a single receipt per
+        # (room ID, receipt type, user ID, thread ID) tuple.
+        self._pending_receipt_edus: List[Dict[str, Dict[str, Dict[str, dict]]]] = []
         self._rrs_pending_flush = False
 
         # stream_id of last successfully sent to-device message.
@@ -202,17 +205,53 @@ class PerDestinationQueue:
         Args:
             receipt: receipt to be queued
         """
-        self._pending_rrs.setdefault(receipt.room_id, {}).setdefault(
-            receipt.receipt_type, {}
-        )[receipt.user_id] = {"event_ids": receipt.event_ids, "data": receipt.data}
+        serialized_receipt: JsonDict = {
+            "event_ids": receipt.event_ids,
+            "data": receipt.data,
+        }
+        if receipt.thread_id is not None:
+            serialized_receipt["data"]["thread_id"] = receipt.thread_id
+
+        # Find which EDU to add this receipt to. There's three situations depending
+        # on the (room ID, receipt type, user, thread ID) tuple:
+        #
+        # 1. If it fully matches, clobber the information.
+        # 2. If it is missing, add the information.
+        # 3. If the subset tuple of (room ID, receipt type, user) matches, check
+        #    the next EDU (or add a new EDU).
+        for edu in self._pending_receipt_edus:
+            receipt_content = edu.setdefault(receipt.room_id, {}).setdefault(
+                receipt.receipt_type, {}
+            )
+            # If this room ID, receipt type, user ID is not in this EDU, OR if
+            # the full tuple matches, use the current EDU.
+            if (
+                receipt.user_id not in receipt_content
+                or receipt_content[receipt.user_id].get("thread_id")
+                == receipt.thread_id
+            ):
+                receipt_content[receipt.user_id] = serialized_receipt
+                break
+
+        # If no matching EDU was found, create a new one.
+        else:
+            self._pending_receipt_edus.append(
+                {
+                    receipt.room_id: {
+                        receipt.receipt_type: {receipt.user_id: serialized_receipt}
+                    }
+                }
+            )
 
     def flush_read_receipts_for_room(self, room_id: str) -> None:
-        # if we don't have any read-receipts for this room, it may be that we've already
-        # sent them out, so we don't need to flush.
-        if room_id not in self._pending_rrs:
-            return
-        self._rrs_pending_flush = True
-        self.attempt_new_transaction()
+        # If there are any pending receipts for this room then force-flush them
+        # in a new transaction.
+        for edu in self._pending_receipt_edus:
+            if room_id in edu:
+                self._rrs_pending_flush = True
+                self.attempt_new_transaction()
+                # No use in checking remaining EDUs if the room was found.
+                break
 
     def send_keyed_edu(self, edu: Edu, key: Hashable) -> None:
         self._pending_edus_keyed[(edu.edu_type, key)] = edu
@@ -351,7 +390,7 @@ class PerDestinationQueue:
                 self._pending_edus = []
                 self._pending_edus_keyed = {}
                 self._pending_presence = {}
-                self._pending_rrs = {}
+                self._pending_receipt_edus = []
 
                 self._start_catching_up()
         except FederationDeniedError as e:
@@ -543,22 +582,27 @@ class PerDestinationQueue:
                     self._destination, last_successful_stream_ordering
                 )
 
-    def _get_rr_edus(self, force_flush: bool) -> Iterable[Edu]:
-        if not self._pending_rrs:
+    def _get_receipt_edus(self, force_flush: bool, limit: int) -> Iterable[Edu]:
+        if not self._pending_receipt_edus:
             return
         if not force_flush and not self._rrs_pending_flush:
             # not yet time for this lot
             return
 
-        edu = Edu(
-            origin=self._server_name,
-            destination=self._destination,
-            edu_type=EduTypes.RECEIPT,
-            content=self._pending_rrs,
-        )
-        self._pending_rrs = {}
-        self._rrs_pending_flush = False
-        yield edu
+        # Send at most limit EDUs for receipts.
+        for content in self._pending_receipt_edus[:limit]:
+            yield Edu(
+                origin=self._server_name,
+                destination=self._destination,
+                edu_type=EduTypes.RECEIPT,
+                content=content,
+            )
+        self._pending_receipt_edus = self._pending_receipt_edus[limit:]
+
+        # If there are still pending read-receipts, don't reset the pending flush
+        # flag.
+        if not self._pending_receipt_edus:
+            self._rrs_pending_flush = False
 
     def _pop_pending_edus(self, limit: int) -> List[Edu]:
         pending_edus = self._pending_edus
@@ -645,27 +689,61 @@ class _TransactionQueueManager:
     async def __aenter__(self) -> Tuple[List[EventBase], List[Edu]]:
         # First we calculate the EDUs we want to send, if any.
 
-        # We start by fetching device related EDUs, i.e device updates and to
-        # device messages. We have to keep 2 free slots for presence and rr_edus.
-        device_edu_limit = MAX_EDUS_PER_TRANSACTION - 2
+        # There's a maximum number of EDUs that can be sent with a transaction,
+        # generally device updates and to-device messages get priority, but we
+        # want to ensure that there's room for some other EDUs as well.
+        #
+        # This is done by:
+        #
+        # * Add a presence EDU, if one exists.
+        # * Add up-to a small limit of read receipt EDUs.
+        # * Add to-device EDUs, but leave some space for device list updates.
+        # * Add device list updates EDUs.
+        # * If there's any remaining room, add other EDUs.
+        pending_edus = []
+
+        # Add presence EDU.
+        if self.queue._pending_presence:
+            pending_edus.append(
+                Edu(
+                    origin=self.queue._server_name,
+                    destination=self.queue._destination,
+                    edu_type=EduTypes.PRESENCE,
+                    content={
+                        "push": [
+                            format_user_presence_state(
+                                presence, self.queue._clock.time_msec()
+                            )
+                            for presence in self.queue._pending_presence.values()
+                        ]
+                    },
+                )
+            )
+            self.queue._pending_presence = {}
 
-        # We prioritize to-device messages so that existing encryption channels
+        # Add read receipt EDUs.
+        pending_edus.extend(self.queue._get_receipt_edus(force_flush=False, limit=5))
+        edu_limit = MAX_EDUS_PER_TRANSACTION - len(pending_edus)
+
+        # Next, prioritize to-device messages so that existing encryption channels
         # work. We also keep a few slots spare (by reducing the limit) so that
         # we can still trickle out some device list updates.
         (
             to_device_edus,
             device_stream_id,
-        ) = await self.queue._get_to_device_message_edus(device_edu_limit - 10)
+        ) = await self.queue._get_to_device_message_edus(edu_limit - 10)
 
         if to_device_edus:
             self._device_stream_id = device_stream_id
         else:
             self.queue._last_device_stream_id = device_stream_id
 
-        device_edu_limit -= len(to_device_edus)
+        pending_edus.extend(to_device_edus)
+        edu_limit -= len(to_device_edus)
 
+        # Add device list update EDUs.
         device_update_edus, dev_list_id = await self.queue._get_device_update_edus(
-            device_edu_limit
+            edu_limit
         )
 
         if device_update_edus:
@@ -673,40 +751,17 @@ class _TransactionQueueManager:
         else:
             self.queue._last_device_list_stream_id = dev_list_id
 
-        pending_edus = device_update_edus + to_device_edus
-
-        # Now add the read receipt EDU.
-        pending_edus.extend(self.queue._get_rr_edus(force_flush=False))
-
-        # And presence EDU.
-        if self.queue._pending_presence:
-            pending_edus.append(
-                Edu(
-                    origin=self.queue._server_name,
-                    destination=self.queue._destination,
-                    edu_type=EduTypes.PRESENCE,
-                    content={
-                        "push": [
-                            format_user_presence_state(
-                                presence, self.queue._clock.time_msec()
-                            )
-                            for presence in self.queue._pending_presence.values()
-                        ]
-                    },
-                )
-            )
-            self.queue._pending_presence = {}
+        pending_edus.extend(device_update_edus)
+        edu_limit -= len(device_update_edus)
 
         # Finally add any other types of EDUs if there is room.
-        pending_edus.extend(
-            self.queue._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus))
-        )
-        while (
-            len(pending_edus) < MAX_EDUS_PER_TRANSACTION
-            and self.queue._pending_edus_keyed
-        ):
+        other_edus = self.queue._pop_pending_edus(edu_limit)
+        pending_edus.extend(other_edus)
+        edu_limit -= len(other_edus)
+        while edu_limit > 0 and self.queue._pending_edus_keyed:
             _, val = self.queue._pending_edus_keyed.popitem()
             pending_edus.append(val)
+            edu_limit -= 1
 
         # Now we look for any PDUs to send, by getting up to 50 PDUs from the
         # queue
@@ -717,8 +772,10 @@ class _TransactionQueueManager:
 
         # if we've decided to send a transaction anyway, and we have room, we
         # may as well send any pending RRs
-        if len(pending_edus) < MAX_EDUS_PER_TRANSACTION:
-            pending_edus.extend(self.queue._get_rr_edus(force_flush=True))
+        if edu_limit:
+            pending_edus.extend(
+                self.queue._get_receipt_edus(force_flush=True, limit=edu_limit)
+            )
 
         if self._pdus:
             self._last_stream_ordering = self._pdus[
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index ac01582442..6a4fed1156 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -92,7 +92,6 @@ class ReceiptsHandler:
                         continue
 
                     # Check if these receipts apply to a thread.
-                    thread_id = None
                     data = user_values.get("data", {})
                     thread_id = data.get("thread_id")
                     # If the thread ID is invalid, consider it missing.
diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py
index f1e357764f..01f147418b 100644
--- a/tests/federation/test_federation_sender.py
+++ b/tests/federation/test_federation_sender.py
@@ -83,6 +83,83 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
             ],
         )
 
+    @override_config({"send_federation": True})
+    def test_send_receipts_thread(self):
+        mock_send_transaction = (
+            self.hs.get_federation_transport_client().send_transaction
+        )
+        mock_send_transaction.return_value = make_awaitable({})
+
+        # Create receipts for:
+        #
+        # * The same room / user on multiple threads.
+        # * A different user in the same room.
+        sender = self.hs.get_federation_sender()
+        for user, thread in (
+            ("alice", None),
+            ("alice", "thread"),
+            ("bob", None),
+            ("bob", "diff-thread"),
+        ):
+            receipt = ReadReceipt(
+                "room_id",
+                "m.read",
+                user,
+                ["event_id"],
+                thread_id=thread,
+                data={"ts": 1234},
+            )
+            self.successResultOf(
+                defer.ensureDeferred(sender.send_read_receipt(receipt))
+            )
+
+        self.pump()
+
+        # expect a call to send_transaction with two EDUs to separate threads.
+        mock_send_transaction.assert_called_once()
+        json_cb = mock_send_transaction.call_args[0][1]
+        data = json_cb()
+        # Note that the ordering of the EDUs doesn't matter.
+        self.assertCountEqual(
+            data["edus"],
+            [
+                {
+                    "edu_type": EduTypes.RECEIPT,
+                    "content": {
+                        "room_id": {
+                            "m.read": {
+                                "alice": {
+                                    "event_ids": ["event_id"],
+                                    "data": {"ts": 1234, "thread_id": "thread"},
+                                },
+                                "bob": {
+                                    "event_ids": ["event_id"],
+                                    "data": {"ts": 1234, "thread_id": "diff-thread"},
+                                },
+                            }
+                        }
+                    },
+                },
+                {
+                    "edu_type": EduTypes.RECEIPT,
+                    "content": {
+                        "room_id": {
+                            "m.read": {
+                                "alice": {
+                                    "event_ids": ["event_id"],
+                                    "data": {"ts": 1234},
+                                },
+                                "bob": {
+                                    "event_ids": ["event_id"],
+                                    "data": {"ts": 1234},
+                                },
+                            }
+                        }
+                    },
+                },
+            ],
+        )
+
     @override_config({"send_federation": True})
     def test_send_receipts_with_backoff(self):
         """Send two receipts in quick succession; the second should be flushed, but
-- 
cgit 1.5.1


From d56f48038a07fd76d2ce08220a4061f85006bf3b Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 28 Nov 2022 15:25:18 +0000
Subject: Fix logging context warnings due to common usage metrics setup
 (#14574)

`setup()` is run under the sentinel context manager, so we wrap the
initial update in a background process. Before this change, Synapse
would log two warnings on startup:
    Starting db txn 'count_daily_users' from sentinel context
    Starting db connection from sentinel context: metrics will be lost

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14574.bugfix                | 1 +
 synapse/metrics/common_usage_metrics.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14574.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14574.bugfix b/changelog.d/14574.bugfix
new file mode 100644
index 0000000000..fac85ec9b0
--- /dev/null
+++ b/changelog.d/14574.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.67.0 where two logging context warnings would be logged on startup.
diff --git a/synapse/metrics/common_usage_metrics.py b/synapse/metrics/common_usage_metrics.py
index 0a22ea3d92..6e05b043d3 100644
--- a/synapse/metrics/common_usage_metrics.py
+++ b/synapse/metrics/common_usage_metrics.py
@@ -54,7 +54,9 @@ class CommonUsageMetricsManager:
 
     async def setup(self) -> None:
         """Keep the gauges for common usage metrics up to date."""
-        await self._update_gauges()
+        run_as_background_process(
+            desc="common_usage_metrics_update_gauges", func=self._update_gauges
+        )
         self._clock.looping_call(
             run_as_background_process,
             5 * 60 * 1000,
-- 
cgit 1.5.1


From 1183c372fa9da01b2667f1b83dab958dad432c68 Mon Sep 17 00:00:00 2001
From: Andrew Ferrazzutti <andrewf@element.io>
Date: Mon, 28 Nov 2022 11:17:29 -0500
Subject: Use `device_one_time_keys_count` to match MSC3202 (#14565)

* Use `device_one_time_keys_count` to match MSC3202

Rename the `device_one_time_key_counts` key in responses to
`device_one_time_keys_count` to match the name specified by MSC3202.

Also change related variable/class names for consistency.

Signed-off-by: Andrew Ferrazzutti <andrewf@element.io>

* Update changelog.d/14565.misc

* Revert name change for `one_time_key_counts` key

as this is a different key altogether from `device_one_time_keys_count`,
which is used for `/sync` instead of appservice transactions.

Signed-off-by: Andrew Ferrazzutti <andrewf@element.io>
---
 changelog.d/14565.misc                            |  1 +
 synapse/appservice/__init__.py                    | 10 +++++-----
 synapse/appservice/api.py                         | 11 +++++++----
 synapse/appservice/scheduler.py                   | 16 ++++++++--------
 synapse/handlers/sync.py                          |  6 +++---
 synapse/storage/databases/main/appservice.py      | 10 +++++-----
 synapse/storage/databases/main/end_to_end_keys.py |  8 ++++----
 tests/appservice/test_scheduler.py                |  6 +++---
 tests/handlers/test_appservice.py                 |  4 ++--
 9 files changed, 38 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/14565.misc

(limited to 'synapse')

diff --git a/changelog.d/14565.misc b/changelog.d/14565.misc
new file mode 100644
index 0000000000..19a62b036c
--- /dev/null
+++ b/changelog.d/14565.misc
@@ -0,0 +1 @@
+In application service transactions that include the experimental `org.matrix.msc3202.device_one_time_key_counts` key, include a duplicate key of `org.matrix.msc3202.device_one_time_keys_count` to match the name proposed by [MSC3202](https://github.com/matrix-org/matrix-spec-proposals/blob/travis/msc/otk-dl-appservice/proposals/3202-encrypted-appservices.md).
diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py
index 500bdde3a9..bf4e6c629b 100644
--- a/synapse/appservice/__init__.py
+++ b/synapse/appservice/__init__.py
@@ -32,9 +32,9 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-# Type for the `device_one_time_key_counts` field in an appservice transaction
+# Type for the `device_one_time_keys_count` field in an appservice transaction
 #   user ID -> {device ID -> {algorithm -> count}}
-TransactionOneTimeKeyCounts = Dict[str, Dict[str, Dict[str, int]]]
+TransactionOneTimeKeysCount = Dict[str, Dict[str, Dict[str, int]]]
 
 # Type for the `device_unused_fallback_key_types` field in an appservice transaction
 #   user ID -> {device ID -> [algorithm]}
@@ -376,7 +376,7 @@ class AppServiceTransaction:
         events: List[EventBase],
         ephemeral: List[JsonDict],
         to_device_messages: List[JsonDict],
-        one_time_key_counts: TransactionOneTimeKeyCounts,
+        one_time_keys_count: TransactionOneTimeKeysCount,
         unused_fallback_keys: TransactionUnusedFallbackKeys,
         device_list_summary: DeviceListUpdates,
     ):
@@ -385,7 +385,7 @@ class AppServiceTransaction:
         self.events = events
         self.ephemeral = ephemeral
         self.to_device_messages = to_device_messages
-        self.one_time_key_counts = one_time_key_counts
+        self.one_time_keys_count = one_time_keys_count
         self.unused_fallback_keys = unused_fallback_keys
         self.device_list_summary = device_list_summary
 
@@ -402,7 +402,7 @@ class AppServiceTransaction:
             events=self.events,
             ephemeral=self.ephemeral,
             to_device_messages=self.to_device_messages,
-            one_time_key_counts=self.one_time_key_counts,
+            one_time_keys_count=self.one_time_keys_count,
             unused_fallback_keys=self.unused_fallback_keys,
             device_list_summary=self.device_list_summary,
             txn_id=self.id,
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 60774b240d..edafd433cd 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -23,7 +23,7 @@ from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind
 from synapse.api.errors import CodeMessageException
 from synapse.appservice import (
     ApplicationService,
-    TransactionOneTimeKeyCounts,
+    TransactionOneTimeKeysCount,
     TransactionUnusedFallbackKeys,
 )
 from synapse.events import EventBase
@@ -262,7 +262,7 @@ class ApplicationServiceApi(SimpleHttpClient):
         events: List[EventBase],
         ephemeral: List[JsonDict],
         to_device_messages: List[JsonDict],
-        one_time_key_counts: TransactionOneTimeKeyCounts,
+        one_time_keys_count: TransactionOneTimeKeysCount,
         unused_fallback_keys: TransactionUnusedFallbackKeys,
         device_list_summary: DeviceListUpdates,
         txn_id: Optional[int] = None,
@@ -310,10 +310,13 @@ class ApplicationServiceApi(SimpleHttpClient):
 
         # TODO: Update to stable prefixes once MSC3202 completes FCP merge
         if service.msc3202_transaction_extensions:
-            if one_time_key_counts:
+            if one_time_keys_count:
                 body[
                     "org.matrix.msc3202.device_one_time_key_counts"
-                ] = one_time_key_counts
+                ] = one_time_keys_count
+                body[
+                    "org.matrix.msc3202.device_one_time_keys_count"
+                ] = one_time_keys_count
             if unused_fallback_keys:
                 body[
                     "org.matrix.msc3202.device_unused_fallback_key_types"
diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index 430ffbcd1f..7b562795a3 100644
--- a/synapse/appservice/scheduler.py
+++ b/synapse/appservice/scheduler.py
@@ -64,7 +64,7 @@ from typing import (
 from synapse.appservice import (
     ApplicationService,
     ApplicationServiceState,
-    TransactionOneTimeKeyCounts,
+    TransactionOneTimeKeysCount,
     TransactionUnusedFallbackKeys,
 )
 from synapse.appservice.api import ApplicationServiceApi
@@ -258,7 +258,7 @@ class _ServiceQueuer:
                 ):
                     return
 
-                one_time_key_counts: Optional[TransactionOneTimeKeyCounts] = None
+                one_time_keys_count: Optional[TransactionOneTimeKeysCount] = None
                 unused_fallback_keys: Optional[TransactionUnusedFallbackKeys] = None
 
                 if (
@@ -269,7 +269,7 @@ class _ServiceQueuer:
                     # for the users which are mentioned in this transaction,
                     # as well as the appservice's sender.
                     (
-                        one_time_key_counts,
+                        one_time_keys_count,
                         unused_fallback_keys,
                     ) = await self._compute_msc3202_otk_counts_and_fallback_keys(
                         service, events, ephemeral, to_device_messages_to_send
@@ -281,7 +281,7 @@ class _ServiceQueuer:
                         events,
                         ephemeral,
                         to_device_messages_to_send,
-                        one_time_key_counts,
+                        one_time_keys_count,
                         unused_fallback_keys,
                         device_list_summary,
                     )
@@ -296,7 +296,7 @@ class _ServiceQueuer:
         events: Iterable[EventBase],
         ephemerals: Iterable[JsonDict],
         to_device_messages: Iterable[JsonDict],
-    ) -> Tuple[TransactionOneTimeKeyCounts, TransactionUnusedFallbackKeys]:
+    ) -> Tuple[TransactionOneTimeKeysCount, TransactionUnusedFallbackKeys]:
         """
         Given a list of the events, ephemeral messages and to-device messages,
         - first computes a list of application services users that may have
@@ -367,7 +367,7 @@ class _TransactionController:
         events: List[EventBase],
         ephemeral: Optional[List[JsonDict]] = None,
         to_device_messages: Optional[List[JsonDict]] = None,
-        one_time_key_counts: Optional[TransactionOneTimeKeyCounts] = None,
+        one_time_keys_count: Optional[TransactionOneTimeKeysCount] = None,
         unused_fallback_keys: Optional[TransactionUnusedFallbackKeys] = None,
         device_list_summary: Optional[DeviceListUpdates] = None,
     ) -> None:
@@ -380,7 +380,7 @@ class _TransactionController:
             events: The persistent events to include in the transaction.
             ephemeral: The ephemeral events to include in the transaction.
             to_device_messages: The to-device messages to include in the transaction.
-            one_time_key_counts: Counts of remaining one-time keys for relevant
+            one_time_keys_count: Counts of remaining one-time keys for relevant
                 appservice devices in the transaction.
             unused_fallback_keys: Lists of unused fallback keys for relevant
                 appservice devices in the transaction.
@@ -397,7 +397,7 @@ class _TransactionController:
                 events=events,
                 ephemeral=ephemeral or [],
                 to_device_messages=to_device_messages or [],
-                one_time_key_counts=one_time_key_counts or {},
+                one_time_keys_count=one_time_keys_count or {},
                 unused_fallback_keys=unused_fallback_keys or {},
                 device_list_summary=device_list_summary or DeviceListUpdates(),
             )
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 259456b55d..c8858b22dd 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1426,14 +1426,14 @@ class SyncHandler:
 
         logger.debug("Fetching OTK data")
         device_id = sync_config.device_id
-        one_time_key_counts: JsonDict = {}
+        one_time_keys_count: JsonDict = {}
         unused_fallback_key_types: List[str] = []
         if device_id:
             # TODO: We should have a way to let clients differentiate between the states of:
             #   * no change in OTK count since the provided since token
             #   * the server has zero OTKs left for this device
             #  Spec issue: https://github.com/matrix-org/matrix-doc/issues/3298
-            one_time_key_counts = await self.store.count_e2e_one_time_keys(
+            one_time_keys_count = await self.store.count_e2e_one_time_keys(
                 user_id, device_id
             )
             unused_fallback_key_types = (
@@ -1463,7 +1463,7 @@ class SyncHandler:
             archived=sync_result_builder.archived,
             to_device=sync_result_builder.to_device,
             device_lists=device_lists,
-            device_one_time_keys_count=one_time_key_counts,
+            device_one_time_keys_count=one_time_keys_count,
             device_unused_fallback_key_types=unused_fallback_key_types,
             next_batch=sync_result_builder.now_token,
         )
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index 25da0c56c5..c2c8018ee2 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -20,7 +20,7 @@ from synapse.appservice import (
     ApplicationService,
     ApplicationServiceState,
     AppServiceTransaction,
-    TransactionOneTimeKeyCounts,
+    TransactionOneTimeKeysCount,
     TransactionUnusedFallbackKeys,
 )
 from synapse.config.appservice import load_appservices
@@ -260,7 +260,7 @@ class ApplicationServiceTransactionWorkerStore(
         events: List[EventBase],
         ephemeral: List[JsonDict],
         to_device_messages: List[JsonDict],
-        one_time_key_counts: TransactionOneTimeKeyCounts,
+        one_time_keys_count: TransactionOneTimeKeysCount,
         unused_fallback_keys: TransactionUnusedFallbackKeys,
         device_list_summary: DeviceListUpdates,
     ) -> AppServiceTransaction:
@@ -273,7 +273,7 @@ class ApplicationServiceTransactionWorkerStore(
             events: A list of persistent events to put in the transaction.
             ephemeral: A list of ephemeral events to put in the transaction.
             to_device_messages: A list of to-device messages to put in the transaction.
-            one_time_key_counts: Counts of remaining one-time keys for relevant
+            one_time_keys_count: Counts of remaining one-time keys for relevant
                 appservice devices in the transaction.
             unused_fallback_keys: Lists of unused fallback keys for relevant
                 appservice devices in the transaction.
@@ -299,7 +299,7 @@ class ApplicationServiceTransactionWorkerStore(
                 events=events,
                 ephemeral=ephemeral,
                 to_device_messages=to_device_messages,
-                one_time_key_counts=one_time_key_counts,
+                one_time_keys_count=one_time_keys_count,
                 unused_fallback_keys=unused_fallback_keys,
                 device_list_summary=device_list_summary,
             )
@@ -379,7 +379,7 @@ class ApplicationServiceTransactionWorkerStore(
             events=events,
             ephemeral=[],
             to_device_messages=[],
-            one_time_key_counts={},
+            one_time_keys_count={},
             unused_fallback_keys={},
             device_list_summary=DeviceListUpdates(),
         )
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index cf33e73e2b..643c47d608 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -33,7 +33,7 @@ from typing_extensions import Literal
 
 from synapse.api.constants import DeviceKeyAlgorithms
 from synapse.appservice import (
-    TransactionOneTimeKeyCounts,
+    TransactionOneTimeKeysCount,
     TransactionUnusedFallbackKeys,
 )
 from synapse.logging.opentracing import log_kv, set_tag, trace
@@ -514,7 +514,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
     async def count_bulk_e2e_one_time_keys_for_as(
         self, user_ids: Collection[str]
-    ) -> TransactionOneTimeKeyCounts:
+    ) -> TransactionOneTimeKeysCount:
         """
         Counts, in bulk, the one-time keys for all the users specified.
         Intended to be used by application services for populating OTK counts in
@@ -528,7 +528,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
         def _count_bulk_e2e_one_time_keys_txn(
             txn: LoggingTransaction,
-        ) -> TransactionOneTimeKeyCounts:
+        ) -> TransactionOneTimeKeysCount:
             user_in_where_clause, user_parameters = make_in_list_sql_clause(
                 self.database_engine, "user_id", user_ids
             )
@@ -541,7 +541,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             """
             txn.execute(sql, user_parameters)
 
-            result: TransactionOneTimeKeyCounts = {}
+            result: TransactionOneTimeKeysCount = {}
 
             for user_id, device_id, algorithm, count in txn:
                 # We deliberately construct empty dictionaries for
diff --git a/tests/appservice/test_scheduler.py b/tests/appservice/test_scheduler.py
index 0b22afdc75..0a1ae83a2b 100644
--- a/tests/appservice/test_scheduler.py
+++ b/tests/appservice/test_scheduler.py
@@ -69,7 +69,7 @@ class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase):
             events=events,
             ephemeral=[],
             to_device_messages=[],  # txn made and saved
-            one_time_key_counts={},
+            one_time_keys_count={},
             unused_fallback_keys={},
             device_list_summary=DeviceListUpdates(),
         )
@@ -96,7 +96,7 @@ class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase):
             events=events,
             ephemeral=[],
             to_device_messages=[],  # txn made and saved
-            one_time_key_counts={},
+            one_time_keys_count={},
             unused_fallback_keys={},
             device_list_summary=DeviceListUpdates(),
         )
@@ -125,7 +125,7 @@ class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase):
             events=events,
             ephemeral=[],
             to_device_messages=[],
-            one_time_key_counts={},
+            one_time_keys_count={},
             unused_fallback_keys={},
             device_list_summary=DeviceListUpdates(),
         )
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index 144e49d0fd..9ed26d87a7 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -25,7 +25,7 @@ import synapse.storage
 from synapse.api.constants import EduTypes, EventTypes
 from synapse.appservice import (
     ApplicationService,
-    TransactionOneTimeKeyCounts,
+    TransactionOneTimeKeysCount,
     TransactionUnusedFallbackKeys,
 )
 from synapse.handlers.appservice import ApplicationServicesHandler
@@ -1123,7 +1123,7 @@ class ApplicationServicesHandlerOtkCountsTestCase(unittest.HomeserverTestCase):
         # Capture what was sent as an AS transaction.
         self.send_mock.assert_called()
         last_args, _last_kwargs = self.send_mock.call_args
-        otks: Optional[TransactionOneTimeKeyCounts] = last_args[self.ARG_OTK_COUNTS]
+        otks: Optional[TransactionOneTimeKeysCount] = last_args[self.ARG_OTK_COUNTS]
         unused_fallbacks: Optional[TransactionUnusedFallbackKeys] = last_args[
             self.ARG_FALLBACK_KEYS
         ]
-- 
cgit 1.5.1


From 8f10c8b054fc970838be9ae6f1f5aea95f166c98 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 28 Nov 2022 15:54:18 -0600
Subject: Move MSC3030 `/timestamp_to_event` endpoint to stable v1 location
 (#14471)

Fix https://github.com/matrix-org/synapse/issues/14390

 - Client API: `/_matrix/client/unstable/org.matrix.msc3030/rooms/<roomID>/timestamp_to_event?ts=<timestamp>&dir=<direction>` -> `/_matrix/client/v1/rooms/<roomID>/timestamp_to_event?ts=<timestamp>&dir=<direction>`
 - Federation API: `/_matrix/federation/unstable/org.matrix.msc3030/timestamp_to_event/<roomID>?ts=<timestamp>&dir=<direction>` -> `/_matrix/federation/v1/timestamp_to_event/<roomID>?ts=<timestamp>&dir=<direction>`

Complement test changes: https://github.com/matrix-org/complement/pull/559
---
 changelog.d/14471.feature                           |  1 +
 docker/complement/conf/workers-shared-extra.yaml.j2 |  2 --
 docker/configure_workers_and_start.py               |  2 ++
 docs/workers.md                                     |  2 ++
 scripts-dev/complement.sh                           |  6 +++---
 synapse/config/experimental.py                      |  3 ---
 synapse/federation/federation_client.py             | 12 +++++++++++-
 synapse/federation/transport/client.py              |  5 ++---
 synapse/federation/transport/server/__init__.py     |  8 --------
 synapse/federation/transport/server/federation.py   |  3 +--
 synapse/rest/client/room.py                         | 10 +++-------
 synapse/rest/client/versions.py                     |  2 --
 tests/rest/client/test_rooms.py                     |  7 +------
 13 files changed, 26 insertions(+), 37 deletions(-)
 create mode 100644 changelog.d/14471.feature

(limited to 'synapse')

diff --git a/changelog.d/14471.feature b/changelog.d/14471.feature
new file mode 100644
index 0000000000..a0e0c74f1a
--- /dev/null
+++ b/changelog.d/14471.feature
@@ -0,0 +1 @@
+Move MSC3030 `/timestamp_to_event` endpoints to stable `v1` location (`/_matrix/client/v1/rooms/<roomID>/timestamp_to_event?ts=<timestamp>&dir=<direction>`, `/_matrix/federation/v1/timestamp_to_event/<roomID>?ts=<timestamp>&dir=<direction>`).
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index 883a87159c..ca640c343b 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -100,8 +100,6 @@ experimental_features:
   # client-side support for partial state in /send_join responses
   faster_joins: true
   {% endif %}
-  # Enable jump to date endpoint
-  msc3030_enabled: true
   # Filtering /messages by relation type.
   msc3874_enabled: true
 
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index c1e1544536..58c62f2231 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -140,6 +140,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms",
             "^/_matrix/client/(api/v1|r0|v3|unstable/.*)/rooms/.*/aliases",
+            "^/_matrix/client/v1/rooms/.*/timestamp_to_event$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/search",
         ],
         "shared_extra_conf": {},
@@ -163,6 +164,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/federation/(v1|v2)/invite/",
             "^/_matrix/federation/(v1|v2)/query_auth/",
             "^/_matrix/federation/(v1|v2)/event_auth/",
+            "^/_matrix/federation/v1/timestamp_to_event/",
             "^/_matrix/federation/(v1|v2)/exchange_third_party_invite/",
             "^/_matrix/federation/(v1|v2)/user/devices/",
             "^/_matrix/federation/(v1|v2)/get_groups_publicised$",
diff --git a/docs/workers.md b/docs/workers.md
index 27e54c5846..2b65acb5ed 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -191,6 +191,7 @@ information.
     ^/_matrix/federation/(v1|v2)/send_leave/
     ^/_matrix/federation/(v1|v2)/invite/
     ^/_matrix/federation/v1/event_auth/
+    ^/_matrix/federation/v1/timestamp_to_event/
     ^/_matrix/federation/v1/exchange_third_party_invite/
     ^/_matrix/federation/v1/user/devices/
     ^/_matrix/key/v2/query
@@ -218,6 +219,7 @@ information.
     ^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$
     ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event/
     ^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms$
+    ^/_matrix/client/v1/rooms/.*/timestamp_to_event$
     ^/_matrix/client/(api/v1|r0|v3|unstable)/search$
 
     # Encryption requests
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 803c6ce92d..7744b47097 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -162,9 +162,9 @@ else
   # We only test faster room joins on monoliths, because they are purposefully
   # being developed without worker support to start with.
   #
-  # The tests for importing historical messages (MSC2716) and jump to date (MSC3030)
-  # also only pass with monoliths, currently.
-  test_tags="$test_tags,faster_joins,msc2716,msc3030"
+  # The tests for importing historical messages (MSC2716) also only pass with monoliths,
+  # currently.
+  test_tags="$test_tags,faster_joins,msc2716"
 fi
 
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index d4b71d1673..a503abf364 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -53,9 +53,6 @@ class ExperimentalConfig(Config):
         # MSC3266 (room summary api)
         self.msc3266_enabled: bool = experimental.get("msc3266_enabled", False)
 
-        # MSC3030 (Jump to date API endpoint)
-        self.msc3030_enabled: bool = experimental.get("msc3030_enabled", False)
-
         # MSC2409 (this setting only relates to optionally sending to-device messages).
         # Presence, typing and read receipt EDUs are already sent to application services that
         # have opted in to receive them. If enabled, this adds to-device messages to that list.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index c4c0bc7315..8bccc9c60d 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -1691,9 +1691,19 @@ class FederationClient(FederationBase):
                 #   to return events on *both* sides of the timestamp to
                 #   help reconcile the gap faster.
                 _timestamp_to_event_from_destination,
+                # Since this endpoint is new, we should try other servers before giving up.
+                # We can safely remove this in a year (remove after 2023-11-16).
+                failover_on_unknown_endpoint=True,
             )
             return timestamp_to_event_response
-        except SynapseError:
+        except SynapseError as e:
+            logger.warn(
+                "timestamp_to_event(room_id=%s, timestamp=%s, direction=%s): encountered error when trying to fetch from destinations: %s",
+                room_id,
+                timestamp,
+                direction,
+                e,
+            )
             return None
 
     async def _timestamp_to_event_from_destination(
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index a3cfc701cd..77f1f39cac 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -185,9 +185,8 @@ class TransportLayerClient:
         Raises:
             Various exceptions when the request fails
         """
-        path = _create_path(
-            FEDERATION_UNSTABLE_PREFIX,
-            "/org.matrix.msc3030/timestamp_to_event/%s",
+        path = _create_v1_path(
+            "/timestamp_to_event/%s",
             room_id,
         )
 
diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py
index 50623cd385..2725f53cf6 100644
--- a/synapse/federation/transport/server/__init__.py
+++ b/synapse/federation/transport/server/__init__.py
@@ -25,7 +25,6 @@ from synapse.federation.transport.server._base import (
 from synapse.federation.transport.server.federation import (
     FEDERATION_SERVLET_CLASSES,
     FederationAccountStatusServlet,
-    FederationTimestampLookupServlet,
 )
 from synapse.http.server import HttpServer, JsonResource
 from synapse.http.servlet import (
@@ -291,13 +290,6 @@ def register_servlets(
             )
 
         for servletclass in SERVLET_GROUPS[servlet_group]:
-            # Only allow the `/timestamp_to_event` servlet if msc3030 is enabled
-            if (
-                servletclass == FederationTimestampLookupServlet
-                and not hs.config.experimental.msc3030_enabled
-            ):
-                continue
-
             # Only allow the `/account_status` servlet if msc3720 is enabled
             if (
                 servletclass == FederationAccountStatusServlet
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 205fd16daa..53e77b4bb6 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -218,14 +218,13 @@ class FederationTimestampLookupServlet(BaseFederationServerServlet):
     `dir` can be `f` or `b` to indicate forwards and backwards in time from the
     given timestamp.
 
-    GET /_matrix/federation/unstable/org.matrix.msc3030/timestamp_to_event/<roomID>?ts=<timestamp>&dir=<direction>
+    GET /_matrix/federation/v1/timestamp_to_event/<roomID>?ts=<timestamp>&dir=<direction>
     {
         "event_id": ...
     }
     """
 
     PATH = "/timestamp_to_event/(?P<room_id>[^/]*)/?"
-    PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc3030"
 
     async def on_GET(
         self,
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 91cb791139..636cc62877 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -1284,17 +1284,14 @@ class TimestampLookupRestServlet(RestServlet):
     `dir` can be `f` or `b` to indicate forwards and backwards in time from the
     given timestamp.
 
-    GET /_matrix/client/unstable/org.matrix.msc3030/rooms/<roomID>/timestamp_to_event?ts=<timestamp>&dir=<direction>
+    GET /_matrix/client/v1/rooms/<roomID>/timestamp_to_event?ts=<timestamp>&dir=<direction>
     {
         "event_id": ...
     }
     """
 
     PATTERNS = (
-        re.compile(
-            "^/_matrix/client/unstable/org.matrix.msc3030"
-            "/rooms/(?P<room_id>[^/]*)/timestamp_to_event$"
-        ),
+        re.compile("^/_matrix/client/v1/rooms/(?P<room_id>[^/]*)/timestamp_to_event$"),
     )
 
     def __init__(self, hs: "HomeServer"):
@@ -1421,8 +1418,7 @@ def register_servlets(
     RoomAliasListServlet(hs).register(http_server)
     SearchRestServlet(hs).register(http_server)
     RoomCreateRestServlet(hs).register(http_server)
-    if hs.config.experimental.msc3030_enabled:
-        TimestampLookupRestServlet(hs).register(http_server)
+    TimestampLookupRestServlet(hs).register(http_server)
 
     # Some servlets only get registered for the main process.
     if not is_worker:
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 180a11ef88..3c0a90010b 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -101,8 +101,6 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3827.stable": True,
                     # Adds support for importing historical messages as per MSC2716
                     "org.matrix.msc2716": self.config.experimental.msc2716_enabled,
-                    # Adds support for jump to date endpoints (/timestamp_to_event) as per MSC3030
-                    "org.matrix.msc3030": self.config.experimental.msc3030_enabled,
                     # Adds support for thread relations, per MSC3440.
                     "org.matrix.msc3440.stable": True,  # TODO: remove when "v1.3" is added above
                     # Support for thread read receipts & notification counts.
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index e919e089cb..b4daace556 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -3546,11 +3546,6 @@ class TimestampLookupTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def default_config(self) -> JsonDict:
-        config = super().default_config()
-        config["experimental_features"] = {"msc3030_enabled": True}
-        return config
-
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self._storage_controllers = self.hs.get_storage_controllers()
 
@@ -3592,7 +3587,7 @@ class TimestampLookupTestCase(unittest.HomeserverTestCase):
 
         channel = self.make_request(
             "GET",
-            f"/_matrix/client/unstable/org.matrix.msc3030/rooms/{room_id}/timestamp_to_event?dir=b&ts={outlier_event.origin_server_ts}",
+            f"/_matrix/client/v1/rooms/{room_id}/timestamp_to_event?dir=b&ts={outlier_event.origin_server_ts}",
             access_token=self.room_owner_tok,
         )
         self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body)
-- 
cgit 1.5.1


From 3da645032722fbf09c1e5efbc51d8c5c78d8a2cd Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Mon, 28 Nov 2022 16:29:53 -0700
Subject: Initial support for MSC3931: Room version push rule feature flags
 (#14520)

* Add support for MSC3931: Room Version Supports push rule condition

* Create experimental flag for future work, and use it to gate MSC3931

* Changelog entry
---
 changelog.d/14520.feature                |  1 +
 rust/src/push/evaluator.rs               | 26 ++++++++++++++++++++++++++
 rust/src/push/mod.rs                     | 16 ++++++++++++++++
 stubs/synapse/synapse_rust/push.pyi      |  2 ++
 synapse/api/room_versions.py             | 21 ++++++++++++++++++++-
 synapse/config/experimental.py           |  3 +++
 synapse/push/bulk_push_rule_evaluator.py |  6 ++++++
 tests/push/test_push_rule_evaluator.py   |  2 ++
 8 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14520.feature

(limited to 'synapse')

diff --git a/changelog.d/14520.feature b/changelog.d/14520.feature
new file mode 100644
index 0000000000..210acaa8ee
--- /dev/null
+++ b/changelog.d/14520.feature
@@ -0,0 +1 @@
+Add unstable support for an Extensible Events room version (`org.matrix.msc1767.10`) via [MSC1767](https://github.com/matrix-org/matrix-spec-proposals/pull/1767), [MSC3931](https://github.com/matrix-org/matrix-spec-proposals/pull/3931), [MSC3932](https://github.com/matrix-org/matrix-spec-proposals/pull/3932), and [MSC3933](https://github.com/matrix-org/matrix-spec-proposals/pull/3933).
\ No newline at end of file
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index cedd42c54d..e8e3d604ee 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -29,6 +29,10 @@ use super::{
 lazy_static! {
     /// Used to parse the `is` clause in the room member count condition.
     static ref INEQUALITY_EXPR: Regex = Regex::new(r"^([=<>]*)([0-9]+)$").expect("valid regex");
+
+    /// Used to determine which MSC3931 room version feature flags are actually known to
+    /// the push evaluator.
+    static ref KNOWN_RVER_FLAGS: Vec<String> = vec![];
 }
 
 /// Allows running a set of push rules against a particular event.
@@ -57,6 +61,13 @@ pub struct PushRuleEvaluator {
 
     /// If msc3664, push rules for related events, is enabled.
     related_event_match_enabled: bool,
+
+    /// If MSC3931 is applicable, the feature flags for the room version.
+    room_version_feature_flags: Vec<String>,
+
+    /// If MSC3931 (room version feature flags) is enabled. Usually controlled by the same
+    /// flag as MSC1767 (extensible events core).
+    msc3931_enabled: bool,
 }
 
 #[pymethods]
@@ -70,6 +81,8 @@ impl PushRuleEvaluator {
         notification_power_levels: BTreeMap<String, i64>,
         related_events_flattened: BTreeMap<String, BTreeMap<String, String>>,
         related_event_match_enabled: bool,
+        room_version_feature_flags: Vec<String>,
+        msc3931_enabled: bool,
     ) -> Result<Self, Error> {
         let body = flattened_keys
             .get("content.body")
@@ -84,6 +97,8 @@ impl PushRuleEvaluator {
             sender_power_level,
             related_events_flattened,
             related_event_match_enabled,
+            room_version_feature_flags,
+            msc3931_enabled,
         })
     }
 
@@ -204,6 +219,15 @@ impl PushRuleEvaluator {
                     false
                 }
             }
+            KnownCondition::RoomVersionSupports { feature } => {
+                if !self.msc3931_enabled {
+                    false
+                } else {
+                    let flag = feature.to_string();
+                    KNOWN_RVER_FLAGS.contains(&flag)
+                        && self.room_version_feature_flags.contains(&flag)
+                }
+            }
         };
 
         Ok(result)
@@ -362,6 +386,8 @@ fn push_rule_evaluator() {
         BTreeMap::new(),
         BTreeMap::new(),
         true,
+        vec![],
+        true,
     )
     .unwrap();
 
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index d57800aa4a..eef39f6472 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -277,6 +277,10 @@ pub enum KnownCondition {
     SenderNotificationPermission {
         key: Cow<'static, str>,
     },
+    #[serde(rename = "org.matrix.msc3931.room_version_supports")]
+    RoomVersionSupports {
+        feature: Cow<'static, str>,
+    },
 }
 
 impl IntoPy<PyObject> for Condition {
@@ -491,6 +495,18 @@ fn test_deserialize_unstable_msc3664_condition() {
     ));
 }
 
+#[test]
+fn test_deserialize_unstable_msc3931_condition() {
+    let json =
+        r#"{"kind":"org.matrix.msc3931.room_version_supports","feature":"org.example.feature"}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(
+        condition,
+        Condition::Known(KnownCondition::RoomVersionSupports { feature: _ })
+    ));
+}
+
 #[test]
 fn test_deserialize_custom_condition() {
     let json = r#"{"kind":"custom_tag"}"#;
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index ceade65ef9..cbeb49663c 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -41,6 +41,8 @@ class PushRuleEvaluator:
         notification_power_levels: Mapping[str, int],
         related_events_flattened: Mapping[str, Mapping[str, str]],
         related_event_match_enabled: bool,
+        room_version_feature_flags: list[str],
+        msc3931_enabled: bool,
     ): ...
     def run(
         self,
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index e37acb0f1e..1bd1ef3e2b 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Dict, Optional
+from typing import Callable, Dict, List, Optional
 
 import attr
 
@@ -91,6 +91,12 @@ class RoomVersion:
     msc3787_knock_restricted_join_rule: bool
     # MSC3667: Enforce integer power levels
     msc3667_int_only_power_levels: bool
+    # MSC3931: Adds a push rule condition for "room version feature flags", making
+    # some push rules room version dependent. Note that adding a flag to this list
+    # is not enough to mark it "supported": the push rule evaluator also needs to
+    # support the flag. Unknown flags are ignored by the evaluator, making conditions
+    # fail if used.
+    msc3931_push_features: List[str]
 
 
 class RoomVersions:
@@ -111,6 +117,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V2 = RoomVersion(
         "2",
@@ -129,6 +136,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V3 = RoomVersion(
         "3",
@@ -147,6 +155,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V4 = RoomVersion(
         "4",
@@ -165,6 +174,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V5 = RoomVersion(
         "5",
@@ -183,6 +193,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V6 = RoomVersion(
         "6",
@@ -201,6 +212,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     MSC2176 = RoomVersion(
         "org.matrix.msc2176",
@@ -219,6 +231,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V7 = RoomVersion(
         "7",
@@ -237,6 +250,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V8 = RoomVersion(
         "8",
@@ -255,6 +269,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V9 = RoomVersion(
         "9",
@@ -273,6 +288,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     MSC3787 = RoomVersion(
         "org.matrix.msc3787",
@@ -291,6 +307,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
     V10 = RoomVersion(
         "10",
@@ -309,6 +326,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
+        msc3931_push_features=[],
     )
     MSC2716v4 = RoomVersion(
         "org.matrix.msc2716v4",
@@ -327,6 +345,7 @@ class RoomVersions:
         msc2716_redactions=True,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3931_push_features=[],
     )
 
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index a503abf364..b3f51fc57d 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -128,3 +128,6 @@ class ExperimentalConfig(Config):
 
         # MSC3912: Relation-based redactions.
         self.msc3912_enabled: bool = experimental.get("msc3912_enabled", False)
+
+        # MSC1767 and friends: Extensible Events
+        self.msc1767_enabled: bool = experimental.get("msc1767_enabled", False)
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 75b7e126ca..9cc3da6d91 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -338,6 +338,10 @@ class BulkPushRuleEvaluator:
             for user_id, level in notification_levels.items():
                 notification_levels[user_id] = int(level)
 
+        room_version_features = event.room_version.msc3931_push_features
+        if not room_version_features:
+            room_version_features = []
+
         evaluator = PushRuleEvaluator(
             _flatten_dict(event),
             room_member_count,
@@ -345,6 +349,8 @@ class BulkPushRuleEvaluator:
             notification_levels,
             related_events,
             self._related_event_match_enabled,
+            room_version_features,
+            self.hs.config.experimental.msc1767_enabled,  # MSC3931 flag
         )
 
         users = rules_by_user.keys()
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index fe7c145840..5ababe6a39 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -62,6 +62,8 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             power_levels.get("notifications", {}),
             {} if related_events is None else related_events,
             True,
+            event.room_version.msc3931_push_features,
+            True,
         )
 
     def test_display_name(self) -> None:
-- 
cgit 1.5.1


From dd518281208d2fc446f9995ad78949e807d8f5b8 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Mon, 28 Nov 2022 17:22:34 -0700
Subject: Create MSC1767 (extensible events) room version; Implement MSC3932
 (#14521)

* Add MSC1767's dedicated room version, based on v10

* Only enable MSC1767 room version if the config flag is on

Using a similar technique to knocking:
https://github.com/matrix-org/synapse/pull/6739/files#diff-3af529eedb0e00279bafb7369370c9654b37792af8eafa0925400e9281d57f0a

* Support MSC3932: Extensible events room version feature flag

* Changelog entry
---
 changelog.d/14521.feature      |  1 +
 rust/src/push/evaluator.rs     | 97 +++++++++++++++++++++++++++++++++++++++++-
 synapse/api/room_versions.py   | 29 ++++++++++++-
 synapse/config/experimental.py |  5 +++
 4 files changed, 130 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14521.feature

(limited to 'synapse')

diff --git a/changelog.d/14521.feature b/changelog.d/14521.feature
new file mode 100644
index 0000000000..210acaa8ee
--- /dev/null
+++ b/changelog.d/14521.feature
@@ -0,0 +1 @@
+Add unstable support for an Extensible Events room version (`org.matrix.msc1767.10`) via [MSC1767](https://github.com/matrix-org/matrix-spec-proposals/pull/1767), [MSC3931](https://github.com/matrix-org/matrix-spec-proposals/pull/3931), [MSC3932](https://github.com/matrix-org/matrix-spec-proposals/pull/3932), and [MSC3933](https://github.com/matrix-org/matrix-spec-proposals/pull/3933).
\ No newline at end of file
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index e8e3d604ee..b4c3039aba 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::borrow::Cow;
 use std::collections::BTreeMap;
 
+use crate::push::{PushRule, PushRules};
 use anyhow::{Context, Error};
 use lazy_static::lazy_static;
 use log::warn;
@@ -32,7 +34,30 @@ lazy_static! {
 
     /// Used to determine which MSC3931 room version feature flags are actually known to
     /// the push evaluator.
-    static ref KNOWN_RVER_FLAGS: Vec<String> = vec![];
+    static ref KNOWN_RVER_FLAGS: Vec<String> = vec![
+        RoomVersionFeatures::ExtensibleEvents.as_str().to_string(),
+    ];
+
+    /// The "safe" rule IDs which are not affected by MSC3932's behaviour (room versions which
+    /// declare Extensible Events support ultimately *disable* push rules which do not declare
+    /// *any* MSC3931 room_version_supports condition).
+    static ref SAFE_EXTENSIBLE_EVENTS_RULE_IDS: Vec<String> = vec![
+        "global/override/.m.rule.master".to_string(),
+        "global/override/.m.rule.roomnotif".to_string(),
+        "global/content/.m.rule.contains_user_name".to_string(),
+    ];
+}
+
+enum RoomVersionFeatures {
+    ExtensibleEvents,
+}
+
+impl RoomVersionFeatures {
+    fn as_str(&self) -> &'static str {
+        match self {
+            RoomVersionFeatures::ExtensibleEvents => "org.matrix.msc3932.extensible_events",
+        }
+    }
 }
 
 /// Allows running a set of push rules against a particular event.
@@ -121,7 +146,22 @@ impl PushRuleEvaluator {
                 continue;
             }
 
+            let rule_id = &push_rule.rule_id().to_string();
+            let extev_flag = &RoomVersionFeatures::ExtensibleEvents.as_str().to_string();
+            let supports_extensible_events = self.room_version_feature_flags.contains(extev_flag);
+            let safe_from_rver_condition = SAFE_EXTENSIBLE_EVENTS_RULE_IDS.contains(rule_id);
+            let mut has_rver_condition = false;
+
             for condition in push_rule.conditions.iter() {
+                has_rver_condition = has_rver_condition
+                    || match condition {
+                        Condition::Known(known) => match known {
+                            // per MSC3932, we just need *any* room version condition to match
+                            KnownCondition::RoomVersionSupports { feature: _ } => true,
+                            _ => false,
+                        },
+                        _ => false,
+                    };
                 match self.match_condition(condition, user_id, display_name) {
                     Ok(true) => {}
                     Ok(false) => continue 'outer,
@@ -132,6 +172,13 @@ impl PushRuleEvaluator {
                 }
             }
 
+            // MSC3932: Disable push rules in extensible event-supporting room versions if they
+            // don't describe *any* MSC3931 room version condition, unless the rule is on the
+            // safe list.
+            if !has_rver_condition && !safe_from_rver_condition && supports_extensible_events {
+                continue;
+            }
+
             let actions = push_rule
                 .actions
                 .iter()
@@ -394,3 +441,51 @@ fn push_rule_evaluator() {
     let result = evaluator.run(&FilteredPushRules::default(), None, Some("bob"));
     assert_eq!(result.len(), 3);
 }
+
+#[test]
+fn test_requires_room_version_supports_condition() {
+    let mut flattened_keys = BTreeMap::new();
+    flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
+    let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()];
+    let evaluator = PushRuleEvaluator::py_new(
+        flattened_keys,
+        10,
+        Some(0),
+        BTreeMap::new(),
+        BTreeMap::new(),
+        false,
+        flags,
+        true,
+    )
+    .unwrap();
+
+    // first test: are the master and contains_user_name rules excluded from the "requires room
+    // version condition" check?
+    let mut result = evaluator.run(
+        &FilteredPushRules::default(),
+        Some("@bob:example.org"),
+        None,
+    );
+    assert_eq!(result.len(), 3);
+
+    // second test: if an appropriate push rule is in play, does it get handled?
+    let custom_rule = PushRule {
+        rule_id: Cow::from("global/underride/.org.example.extensible"),
+        priority_class: 1, // underride
+        conditions: Cow::from(vec![Condition::Known(
+            KnownCondition::RoomVersionSupports {
+                feature: Cow::from(RoomVersionFeatures::ExtensibleEvents.as_str().to_string()),
+            },
+        )]),
+        actions: Cow::from(vec![Action::Notify]),
+        default: false,
+        default_enabled: true,
+    };
+    let rules = PushRules::new(vec![custom_rule]);
+    result = evaluator.run(
+        &FilteredPushRules::py_new(rules, BTreeMap::new(), true),
+        None,
+        None,
+    );
+    assert_eq!(result.len(), 1);
+}
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 1bd1ef3e2b..ac62011c9f 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -51,6 +51,13 @@ class RoomDisposition:
     UNSTABLE = "unstable"
 
 
+class PushRuleRoomFlag:
+    """Enum for listing possible MSC3931 room version feature flags, for push rules"""
+
+    # MSC3932: Room version supports MSC1767 Extensible Events.
+    EXTENSIBLE_EVENTS = "org.matrix.msc3932.extensible_events"
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class RoomVersion:
     """An object which describes the unique attributes of a room version."""
@@ -96,7 +103,7 @@ class RoomVersion:
     # is not enough to mark it "supported": the push rule evaluator also needs to
     # support the flag. Unknown flags are ignored by the evaluator, making conditions
     # fail if used.
-    msc3931_push_features: List[str]
+    msc3931_push_features: List[str]  # values from PushRuleRoomFlag
 
 
 class RoomVersions:
@@ -347,6 +354,26 @@ class RoomVersions:
         msc3667_int_only_power_levels=False,
         msc3931_push_features=[],
     )
+    MSC1767v10 = RoomVersion(
+        # MSC1767 (Extensible Events) based on room version "10"
+        "org.matrix.msc1767.10",
+        RoomDisposition.UNSTABLE,
+        EventFormatVersions.ROOM_V4_PLUS,
+        StateResolutionVersions.V2,
+        enforce_key_validity=True,
+        special_case_aliases_auth=False,
+        strict_canonicaljson=True,
+        limit_notifications_power_levels=True,
+        msc2176_redaction_rules=False,
+        msc3083_join_rules=True,
+        msc3375_redaction_rules=True,
+        msc2403_knocking=True,
+        msc2716_historical=False,
+        msc2716_redactions=False,
+        msc3787_knock_restricted_join_rule=True,
+        msc3667_int_only_power_levels=True,
+        msc3931_push_features=[PushRuleRoomFlag.EXTENSIBLE_EVENTS],
+    )
 
 
 KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index b3f51fc57d..573fa0386f 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -16,6 +16,7 @@ from typing import Any, Optional
 
 import attr
 
+from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersions
 from synapse.config._base import Config
 from synapse.types import JsonDict
 
@@ -131,3 +132,7 @@ class ExperimentalConfig(Config):
 
         # MSC1767 and friends: Extensible Events
         self.msc1767_enabled: bool = experimental.get("msc1767_enabled", False)
+        if self.msc1767_enabled:
+            # Enable room version (and thus applicable push rules from MSC3931/3932)
+            version_id = RoomVersions.MSC1767v10.identifier
+            KNOWN_ROOM_VERSIONS[version_id] = RoomVersions.MSC1767v10
-- 
cgit 1.5.1


From 9ccc09fe9e332a71b8cf5bf42b16f6acf5a6887d Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Mon, 28 Nov 2022 18:02:41 -0700
Subject: Support MSC1767's `content.body` behaviour; Add base rules from
 MSC3933 (#14524)

* Support MSC1767's `content.body` behaviour in push rules

* Add the base rules from MSC3933

* Changelog entry

* Flip condition around for finding `m.markup`

* Remove forgotten import
---
 changelog.d/14524.feature                   |   1 +
 rust/src/push/base_rules.rs                 | 270 ++++++++++++++++++++++++++++
 rust/src/push/evaluator.rs                  |   2 +-
 rust/src/push/mod.rs                        |   7 +
 stubs/synapse/synapse_rust/push.pyi         |   6 +-
 synapse/push/bulk_push_rule_evaluator.py    |  29 ++-
 synapse/storage/databases/main/push_rule.py |   5 +-
 7 files changed, 316 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14524.feature

(limited to 'synapse')

diff --git a/changelog.d/14524.feature b/changelog.d/14524.feature
new file mode 100644
index 0000000000..210acaa8ee
--- /dev/null
+++ b/changelog.d/14524.feature
@@ -0,0 +1 @@
+Add unstable support for an Extensible Events room version (`org.matrix.msc1767.10`) via [MSC1767](https://github.com/matrix-org/matrix-spec-proposals/pull/1767), [MSC3931](https://github.com/matrix-org/matrix-spec-proposals/pull/3931), [MSC3932](https://github.com/matrix-org/matrix-spec-proposals/pull/3932), and [MSC3933](https://github.com/matrix-org/matrix-spec-proposals/pull/3933).
\ No newline at end of file
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 49802fa4eb..35129691ca 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -274,6 +274,156 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
         default: true,
         default_enabled: true,
     },
+    PushRule {
+        rule_id: Cow::Borrowed(
+            "global/underride/.org.matrix.msc3933.rule.extensible.encrypted_room_one_to_one",
+        ),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("org.matrix.msc1767.encrypted")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed(
+            "global/underride/.org.matrix.msc3933.rule.extensible.message.room_one_to_one",
+        ),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("org.matrix.msc1767.message")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed(
+            "global/underride/.org.matrix.msc3933.rule.extensible.file.room_one_to_one",
+        ),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("org.matrix.msc1767.file")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed(
+            "global/underride/.org.matrix.msc3933.rule.extensible.image.room_one_to_one",
+        ),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("org.matrix.msc1767.image")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed(
+            "global/underride/.org.matrix.msc3933.rule.extensible.video.room_one_to_one",
+        ),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("org.matrix.msc1767.video")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed(
+            "global/underride/.org.matrix.msc3933.rule.extensible.audio.room_one_to_one",
+        ),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("org.matrix.msc1767.audio")),
+                pattern_type: None,
+            })),
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
     PushRule {
         rule_id: Cow::Borrowed("global/underride/.m.rule.message"),
         priority_class: 1,
@@ -302,6 +452,126 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
         default: true,
         default_enabled: true,
     },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.encrypted"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("m.encrypted")),
+                pattern_type: None,
+            })),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.message"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("m.message")),
+                pattern_type: None,
+            })),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.file"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("m.file")),
+                pattern_type: None,
+            })),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.image"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("m.image")),
+                pattern_type: None,
+            })),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.video"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("m.video")),
+                pattern_type: None,
+            })),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.audio"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                // MSC3933: Type changed from template rule - see MSC.
+                pattern: Some(Cow::Borrowed("m.audio")),
+                pattern_type: None,
+            })),
+            // MSC3933: Add condition on top of template rule - see MSC.
+            Condition::Known(KnownCondition::RoomVersionSupports {
+                // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally
+                feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
     PushRule {
         rule_id: Cow::Borrowed("global/underride/.im.vector.jitsi"),
         priority_class: 1,
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index b4c3039aba..1cd54f7e2c 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -483,7 +483,7 @@ fn test_requires_room_version_supports_condition() {
     };
     let rules = PushRules::new(vec![custom_rule]);
     result = evaluator.run(
-        &FilteredPushRules::py_new(rules, BTreeMap::new(), true),
+        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, true),
         None,
         None,
     );
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index eef39f6472..2e9d3e38a1 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -412,6 +412,7 @@ pub struct FilteredPushRules {
     push_rules: PushRules,
     enabled_map: BTreeMap<String, bool>,
     msc3664_enabled: bool,
+    msc1767_enabled: bool,
 }
 
 #[pymethods]
@@ -421,11 +422,13 @@ impl FilteredPushRules {
         push_rules: PushRules,
         enabled_map: BTreeMap<String, bool>,
         msc3664_enabled: bool,
+        msc1767_enabled: bool,
     ) -> Self {
         Self {
             push_rules,
             enabled_map,
             msc3664_enabled,
+            msc1767_enabled,
         }
     }
 
@@ -450,6 +453,10 @@ impl FilteredPushRules {
                     return false;
                 }
 
+                if !self.msc1767_enabled && rule.rule_id.contains("org.matrix.msc1767") {
+                    return false;
+                }
+
                 true
             })
             .map(|r| {
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index cbeb49663c..a6a586a0b5 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -26,7 +26,11 @@ class PushRules:
 
 class FilteredPushRules:
     def __init__(
-        self, push_rules: PushRules, enabled_map: Dict[str, bool], msc3664_enabled: bool
+        self,
+        push_rules: PushRules,
+        enabled_map: Dict[str, bool],
+        msc3664_enabled: bool,
+        msc1767_enabled: bool,
     ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 9cc3da6d91..d6b377860f 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -29,6 +29,7 @@ from typing import (
 from prometheus_client import Counter
 
 from synapse.api.constants import MAIN_TIMELINE, EventTypes, Membership, RelationTypes
+from synapse.api.room_versions import PushRuleRoomFlag, RoomVersion
 from synapse.event_auth import auth_types_for_event, get_user_power_level
 from synapse.events import EventBase, relation_from_event
 from synapse.events.snapshot import EventContext
@@ -343,7 +344,7 @@ class BulkPushRuleEvaluator:
             room_version_features = []
 
         evaluator = PushRuleEvaluator(
-            _flatten_dict(event),
+            _flatten_dict(event, room_version=event.room_version),
             room_member_count,
             sender_power_level,
             notification_levels,
@@ -426,6 +427,7 @@ StateGroup = Union[object, int]
 
 def _flatten_dict(
     d: Union[EventBase, Mapping[str, Any]],
+    room_version: Optional[RoomVersion] = None,
     prefix: Optional[List[str]] = None,
     result: Optional[Dict[str, str]] = None,
 ) -> Dict[str, str]:
@@ -437,6 +439,31 @@ def _flatten_dict(
         if isinstance(value, str):
             result[".".join(prefix + [key])] = value.lower()
         elif isinstance(value, Mapping):
+            # do not set `room_version` due to recursion considerations below
             _flatten_dict(value, prefix=(prefix + [key]), result=result)
 
+    # `room_version` should only ever be set when looking at the top level of an event
+    if (
+        room_version is not None
+        and PushRuleRoomFlag.EXTENSIBLE_EVENTS in room_version.msc3931_push_features
+        and isinstance(d, EventBase)
+    ):
+        # Room supports extensible events: replace `content.body` with the plain text
+        # representation from `m.markup`, as per MSC1767.
+        markup = d.get("content").get("m.markup")
+        if room_version.identifier.startswith("org.matrix.msc1767."):
+            markup = d.get("content").get("org.matrix.msc1767.markup")
+        if markup is not None and isinstance(markup, list):
+            text = ""
+            for rep in markup:
+                if not isinstance(rep, dict):
+                    # invalid markup - skip all processing
+                    break
+                if rep.get("mimetype", "text/plain") == "text/plain":
+                    rep_text = rep.get("body")
+                    if rep_text is not None and isinstance(rep_text, str):
+                        text = rep_text.lower()
+                        break
+            result["content.body"] = text
+
     return result
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 12ad44dbb3..d4c64c46ad 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -84,7 +84,10 @@ def _load_rules(
     push_rules = PushRules(ruleslist)
 
     filtered_rules = FilteredPushRules(
-        push_rules, enabled_map, msc3664_enabled=experimental_config.msc3664_enabled
+        push_rules,
+        enabled_map,
+        msc3664_enabled=experimental_config.msc3664_enabled,
+        msc1767_enabled=experimental_config.msc1767_enabled,
     )
 
     return filtered_rules
-- 
cgit 1.5.1


From 72f3e381375ba10d576a23025ca312397114de6b Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 28 Nov 2022 19:18:12 -0800
Subject: Fix possible variable shadow in `create_new_client_event` (#14575)

---
 changelog.d/14575.misc      | 1 +
 synapse/handlers/message.py | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14575.misc

(limited to 'synapse')

diff --git a/changelog.d/14575.misc b/changelog.d/14575.misc
new file mode 100644
index 0000000000..f6fa54eaa2
--- /dev/null
+++ b/changelog.d/14575.misc
@@ -0,0 +1 @@
+Fix a possible variable shadow in `create_new_client_event`.
\ No newline at end of file
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 4cf593cfdc..5cbe89f4fd 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1135,11 +1135,13 @@ class EventCreationHandler:
             )
             state_events = await self.store.get_events_as_list(state_event_ids)
             # Create a StateMap[str]
-            state_map = {(e.type, e.state_key): e.event_id for e in state_events}
+            current_state_ids = {
+                (e.type, e.state_key): e.event_id for e in state_events
+            }
             # Actually strip down and only use the necessary auth events
             auth_event_ids = self._event_auth_handler.compute_auth_events(
                 event=temp_event,
-                current_state_ids=state_map,
+                current_state_ids=current_state_ids,
                 for_verification=False,
             )
 
-- 
cgit 1.5.1


From c7e29ca277cf60bfdc488b93f4321b046fa6b46f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 29 Nov 2022 10:36:41 +0000
Subject: POC delete stale non-e2e devices for users (#14038)

This should help reduce the number of devices e.g. simple bots the repeatedly login rack up.

We only delete non-e2e devices as they should be safe to delete, whereas if we delete e2e devices for a user we may accidentally break their ability to receive e2e keys for a message.

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/14038.misc                    |  1 +
 synapse/handlers/device.py                | 13 +++++-
 synapse/storage/databases/main/devices.py | 67 ++++++++++++++++++++++++++++++-
 tests/handlers/test_device.py             |  2 +-
 tests/storage/test_client_ips.py          |  4 +-
 5 files changed, 83 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14038.misc

(limited to 'synapse')

diff --git a/changelog.d/14038.misc b/changelog.d/14038.misc
new file mode 100644
index 0000000000..f9bfc581ad
--- /dev/null
+++ b/changelog.d/14038.misc
@@ -0,0 +1 @@
+Prune user's old devices on login if they have too many.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index b1e55e1b9e..7c4dd8cf5a 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -421,6 +421,9 @@ class DeviceHandler(DeviceWorkerHandler):
 
         self._check_device_name_length(initial_device_display_name)
 
+        # Prune the user's device list if they already have a lot of devices.
+        await self._prune_too_many_devices(user_id)
+
         if device_id is not None:
             new_device = await self.store.store_device(
                 user_id=user_id,
@@ -452,6 +455,14 @@ class DeviceHandler(DeviceWorkerHandler):
 
         raise errors.StoreError(500, "Couldn't generate a device ID.")
 
+    async def _prune_too_many_devices(self, user_id: str) -> None:
+        """Delete any excess old devices this user may have."""
+        device_ids = await self.store.check_too_many_devices_for_user(user_id)
+        if not device_ids:
+            return
+
+        await self.delete_devices(user_id, device_ids)
+
     async def _delete_stale_devices(self) -> None:
         """Background task that deletes devices which haven't been accessed for more than
         a configured time period.
@@ -481,7 +492,7 @@ class DeviceHandler(DeviceWorkerHandler):
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 534f7fc04a..1e83c62753 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1533,6 +1533,70 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         return rows
 
+    async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str]:
+        """Check if the user has a lot of devices, and if so return the set of
+        devices we can prune.
+
+        This does *not* return hidden devices or devices with E2E keys.
+        """
+
+        num_devices = await self.db_pool.simple_select_one_onecol(
+            table="devices",
+            keyvalues={"user_id": user_id, "hidden": False},
+            retcol="COALESCE(COUNT(*), 0)",
+            desc="count_devices",
+        )
+
+        # We let users have up to ten devices without pruning.
+        if num_devices <= 10:
+            return ()
+
+        # We prune everything older than N days.
+        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
+
+        if num_devices > 50:
+            # If the user has more than 50 devices, then we chose a last seen
+            # that ensures we keep at most 50 devices.
+            sql = """
+                SELECT last_seen FROM devices
+                WHERE
+                    user_id = ?
+                    AND NOT hidden
+                    AND last_seen IS NOT NULL
+                    AND key_json IS NULL
+                ORDER BY last_seen DESC
+                LIMIT 1
+                OFFSET 50
+            """
+
+            rows = await self.db_pool.execute(
+                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
+            )
+            if rows:
+                max_last_seen = max(rows[0][0], max_last_seen)
+
+        # Now fetch the devices to delete.
+        sql = """
+            SELECT DISTINCT device_id FROM devices
+            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
+            WHERE
+                user_id = ?
+                AND NOT hidden
+                AND last_seen < ?
+                AND key_json IS NULL
+        """
+
+        def check_too_many_devices_for_user_txn(
+            txn: LoggingTransaction,
+        ) -> Collection[str]:
+            txn.execute(sql, (user_id, max_last_seen))
+            return {device_id for device_id, in txn}
+
+        return await self.db_pool.runInteraction(
+            "check_too_many_devices_for_user",
+            check_too_many_devices_for_user_txn,
+        )
+
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1591,6 +1655,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
+                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1636,7 +1701,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             )
             raise StoreError(500, "Problem storing device.")
 
-    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
         """Deletes several devices.
 
         Args:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index ce7525e29c..a456bffd63 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": None,
+                "last_seen_ts": 1000000,
             },
             device_map["xyz"],
         )
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 49ad3c1324..a9af1babed 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -169,6 +169,8 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             )
         )
 
+        last_seen = self.clock.time_msec()
+
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -189,7 +191,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": None,
+                        "last_seen": last_seen,
                     },
                 ],
             )
-- 
cgit 1.5.1


From e860316818da4bd643d567708adb8d104f4a3351 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 29 Nov 2022 13:05:07 +0000
Subject: Fix `UndefinedColumn: column "key_json" does not exist` errors when
 handling users with more than 50 non-E2E devices (#14580)

---
 synapse/storage/databases/main/devices.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'synapse')

diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 1e83c62753..0378035cff 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1559,6 +1559,7 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
             # that ensures we keep at most 50 devices.
             sql = """
                 SELECT last_seen FROM devices
+                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
                 WHERE
                     user_id = ?
                     AND NOT hidden
-- 
cgit 1.5.1


From 13aa29db1ddc925beb35f5f1da8fd1a1bcc91373 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 29 Nov 2022 10:49:23 -0500
Subject: Advertise support for Matrix v1.5. (#14576)

All features of Matrix v1.5 were already supported: this was
mostly a maintenance release.
---
 changelog.d/14576.feature       | 1 +
 synapse/rest/client/versions.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/14576.feature

(limited to 'synapse')

diff --git a/changelog.d/14576.feature b/changelog.d/14576.feature
new file mode 100644
index 0000000000..4fe8cb2667
--- /dev/null
+++ b/changelog.d/14576.feature
@@ -0,0 +1 @@
+Advertise support for Matrix 1.5 on `/_matrix/client/versions`.
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 3c0a90010b..e19c0946c0 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -77,6 +77,7 @@ class VersionsRestServlet(RestServlet):
                     "v1.2",
                     "v1.3",
                     "v1.4",
+                    "v1.5",
                 ],
                 # as per MSC1497:
                 "unstable_features": {
-- 
cgit 1.5.1


From c29e2c630624beb0b5557aa0f7ccdcedbe62def1 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 29 Nov 2022 17:48:48 +0000
Subject: Revert "POC delete stale non-e2e devices for users (#14038)" (#14582)

---
 changelog.d/14582.bugfix                  |  1 +
 synapse/handlers/device.py                | 13 +-----
 synapse/storage/databases/main/devices.py | 68 +------------------------------
 tests/handlers/test_device.py             |  2 +-
 tests/storage/test_client_ips.py          |  4 +-
 5 files changed, 5 insertions(+), 83 deletions(-)
 create mode 100644 changelog.d/14582.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14582.bugfix b/changelog.d/14582.bugfix
new file mode 100644
index 0000000000..caad468e70
--- /dev/null
+++ b/changelog.d/14582.bugfix
@@ -0,0 +1 @@
+Fix a regression in Synapse 1.73.0rc1 where Synapse's main process would stop responding to HTTP requests when a user with a large number of devices logs in.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 7c4dd8cf5a..b1e55e1b9e 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -421,9 +421,6 @@ class DeviceHandler(DeviceWorkerHandler):
 
         self._check_device_name_length(initial_device_display_name)
 
-        # Prune the user's device list if they already have a lot of devices.
-        await self._prune_too_many_devices(user_id)
-
         if device_id is not None:
             new_device = await self.store.store_device(
                 user_id=user_id,
@@ -455,14 +452,6 @@ class DeviceHandler(DeviceWorkerHandler):
 
         raise errors.StoreError(500, "Couldn't generate a device ID.")
 
-    async def _prune_too_many_devices(self, user_id: str) -> None:
-        """Delete any excess old devices this user may have."""
-        device_ids = await self.store.check_too_many_devices_for_user(user_id)
-        if not device_ids:
-            return
-
-        await self.delete_devices(user_id, device_ids)
-
     async def _delete_stale_devices(self) -> None:
         """Background task that deletes devices which haven't been accessed for more than
         a configured time period.
@@ -492,7 +481,7 @@ class DeviceHandler(DeviceWorkerHandler):
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 0378035cff..534f7fc04a 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1533,71 +1533,6 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         return rows
 
-    async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str]:
-        """Check if the user has a lot of devices, and if so return the set of
-        devices we can prune.
-
-        This does *not* return hidden devices or devices with E2E keys.
-        """
-
-        num_devices = await self.db_pool.simple_select_one_onecol(
-            table="devices",
-            keyvalues={"user_id": user_id, "hidden": False},
-            retcol="COALESCE(COUNT(*), 0)",
-            desc="count_devices",
-        )
-
-        # We let users have up to ten devices without pruning.
-        if num_devices <= 10:
-            return ()
-
-        # We prune everything older than N days.
-        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
-
-        if num_devices > 50:
-            # If the user has more than 50 devices, then we chose a last seen
-            # that ensures we keep at most 50 devices.
-            sql = """
-                SELECT last_seen FROM devices
-                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-                WHERE
-                    user_id = ?
-                    AND NOT hidden
-                    AND last_seen IS NOT NULL
-                    AND key_json IS NULL
-                ORDER BY last_seen DESC
-                LIMIT 1
-                OFFSET 50
-            """
-
-            rows = await self.db_pool.execute(
-                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
-            )
-            if rows:
-                max_last_seen = max(rows[0][0], max_last_seen)
-
-        # Now fetch the devices to delete.
-        sql = """
-            SELECT DISTINCT device_id FROM devices
-            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-            WHERE
-                user_id = ?
-                AND NOT hidden
-                AND last_seen < ?
-                AND key_json IS NULL
-        """
-
-        def check_too_many_devices_for_user_txn(
-            txn: LoggingTransaction,
-        ) -> Collection[str]:
-            txn.execute(sql, (user_id, max_last_seen))
-            return {device_id for device_id, in txn}
-
-        return await self.db_pool.runInteraction(
-            "check_too_many_devices_for_user",
-            check_too_many_devices_for_user_txn,
-        )
-
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1656,7 +1591,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
-                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1702,7 +1636,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             )
             raise StoreError(500, "Problem storing device.")
 
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Deletes several devices.
 
         Args:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index a456bffd63..ce7525e29c 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": 1000000,
+                "last_seen_ts": None,
             },
             device_map["xyz"],
         )
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index a9af1babed..49ad3c1324 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -169,8 +169,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        last_seen = self.clock.time_msec()
-
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -191,7 +189,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": last_seen,
+                        "last_seen": None,
                     },
                 ],
             )
-- 
cgit 1.5.1


From ecb6fe9d9cf8375b760eb727be0e1dec3612e026 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 30 Nov 2022 11:59:57 +0000
Subject: Stop using deprecated `keyIds` param on /key/v2/server (#14525)

Fixes #14523.
---
 changelog.d/14490.feature                     |   1 +
 changelog.d/14490.misc                        |   1 -
 changelog.d/14525.feature                     |   1 +
 synapse/crypto/keyring.py                     | 107 +++++++++++---------------
 tests/crypto/test_keyring.py                  |  14 +---
 tests/rest/key/v2/test_remote_key_resource.py |   5 +-
 6 files changed, 47 insertions(+), 82 deletions(-)
 create mode 100644 changelog.d/14490.feature
 delete mode 100644 changelog.d/14490.misc
 create mode 100644 changelog.d/14525.feature

(limited to 'synapse')

diff --git a/changelog.d/14490.feature b/changelog.d/14490.feature
new file mode 100644
index 0000000000..c7cb571294
--- /dev/null
+++ b/changelog.d/14490.feature
@@ -0,0 +1 @@
+Stop using deprecated `keyIds` parameter when calling `/_matrix/key/v2/server`.
diff --git a/changelog.d/14490.misc b/changelog.d/14490.misc
deleted file mode 100644
index c0a4daa885..0000000000
--- a/changelog.d/14490.misc
+++ /dev/null
@@ -1 +0,0 @@
-Fix a bug introduced in Synapse 0.9 where it would fail to fetch server keys whose IDs contain a forward slash.
diff --git a/changelog.d/14525.feature b/changelog.d/14525.feature
new file mode 100644
index 0000000000..c7cb571294
--- /dev/null
+++ b/changelog.d/14525.feature
@@ -0,0 +1 @@
+Stop using deprecated `keyIds` parameter when calling `/_matrix/key/v2/server`.
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index ed15f88350..69310d9035 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -14,7 +14,6 @@
 
 import abc
 import logging
-import urllib
 from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Tuple
 
 import attr
@@ -813,31 +812,27 @@ class ServerKeyFetcher(BaseV2KeyFetcher):
 
         results = {}
 
-        async def get_key(key_to_fetch_item: _FetchKeyRequest) -> None:
+        async def get_keys(key_to_fetch_item: _FetchKeyRequest) -> None:
             server_name = key_to_fetch_item.server_name
-            key_ids = key_to_fetch_item.key_ids
 
             try:
-                keys = await self.get_server_verify_key_v2_direct(server_name, key_ids)
+                keys = await self.get_server_verify_keys_v2_direct(server_name)
                 results[server_name] = keys
             except KeyLookupError as e:
-                logger.warning(
-                    "Error looking up keys %s from %s: %s", key_ids, server_name, e
-                )
+                logger.warning("Error looking up keys from %s: %s", server_name, e)
             except Exception:
-                logger.exception("Error getting keys %s from %s", key_ids, server_name)
+                logger.exception("Error getting keys from %s", server_name)
 
-        await yieldable_gather_results(get_key, keys_to_fetch)
+        await yieldable_gather_results(get_keys, keys_to_fetch)
         return results
 
-    async def get_server_verify_key_v2_direct(
-        self, server_name: str, key_ids: Iterable[str]
+    async def get_server_verify_keys_v2_direct(
+        self, server_name: str
     ) -> Dict[str, FetchKeyResult]:
         """
 
         Args:
-            server_name:
-            key_ids:
+            server_name: Server to request keys from
 
         Returns:
             Map from key ID to lookup result
@@ -845,57 +840,41 @@ class ServerKeyFetcher(BaseV2KeyFetcher):
         Raises:
             KeyLookupError if there was a problem making the lookup
         """
-        keys: Dict[str, FetchKeyResult] = {}
-
-        for requested_key_id in key_ids:
-            # we may have found this key as a side-effect of asking for another.
-            if requested_key_id in keys:
-                continue
-
-            time_now_ms = self.clock.time_msec()
-            try:
-                response = await self.client.get_json(
-                    destination=server_name,
-                    path="/_matrix/key/v2/server/"
-                    + urllib.parse.quote(requested_key_id, safe=""),
-                    ignore_backoff=True,
-                    # we only give the remote server 10s to respond. It should be an
-                    # easy request to handle, so if it doesn't reply within 10s, it's
-                    # probably not going to.
-                    #
-                    # Furthermore, when we are acting as a notary server, we cannot
-                    # wait all day for all of the origin servers, as the requesting
-                    # server will otherwise time out before we can respond.
-                    #
-                    # (Note that get_json may make 4 attempts, so this can still take
-                    # almost 45 seconds to fetch the headers, plus up to another 60s to
-                    # read the response).
-                    timeout=10000,
-                )
-            except (NotRetryingDestination, RequestSendFailed) as e:
-                # these both have str() representations which we can't really improve
-                # upon
-                raise KeyLookupError(str(e))
-            except HttpResponseException as e:
-                raise KeyLookupError("Remote server returned an error: %s" % (e,))
-
-            assert isinstance(response, dict)
-            if response["server_name"] != server_name:
-                raise KeyLookupError(
-                    "Expected a response for server %r not %r"
-                    % (server_name, response["server_name"])
-                )
-
-            response_keys = await self.process_v2_response(
-                from_server=server_name,
-                response_json=response,
-                time_added_ms=time_now_ms,
+        time_now_ms = self.clock.time_msec()
+        try:
+            response = await self.client.get_json(
+                destination=server_name,
+                path="/_matrix/key/v2/server",
+                ignore_backoff=True,
+                # we only give the remote server 10s to respond. It should be an
+                # easy request to handle, so if it doesn't reply within 10s, it's
+                # probably not going to.
+                #
+                # Furthermore, when we are acting as a notary server, we cannot
+                # wait all day for all of the origin servers, as the requesting
+                # server will otherwise time out before we can respond.
+                #
+                # (Note that get_json may make 4 attempts, so this can still take
+                # almost 45 seconds to fetch the headers, plus up to another 60s to
+                # read the response).
+                timeout=10000,
             )
-            await self.store.store_server_verify_keys(
-                server_name,
-                time_now_ms,
-                ((server_name, key_id, key) for key_id, key in response_keys.items()),
+        except (NotRetryingDestination, RequestSendFailed) as e:
+            # these both have str() representations which we can't really improve
+            # upon
+            raise KeyLookupError(str(e))
+        except HttpResponseException as e:
+            raise KeyLookupError("Remote server returned an error: %s" % (e,))
+
+        assert isinstance(response, dict)
+        if response["server_name"] != server_name:
+            raise KeyLookupError(
+                "Expected a response for server %r not %r"
+                % (server_name, response["server_name"])
             )
-            keys.update(response_keys)
 
-        return keys
+        return await self.process_v2_response(
+            from_server=server_name,
+            response_json=response,
+            time_added_ms=time_now_ms,
+        )
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 63628aa6b0..f7c309cad0 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -433,7 +433,7 @@ class ServerKeyFetcherTestCase(unittest.HomeserverTestCase):
 
         async def get_json(destination, path, **kwargs):
             self.assertEqual(destination, SERVER_NAME)
-            self.assertEqual(path, "/_matrix/key/v2/server/key1")
+            self.assertEqual(path, "/_matrix/key/v2/server")
             return response
 
         self.http_client.get_json.side_effect = get_json
@@ -469,18 +469,6 @@ class ServerKeyFetcherTestCase(unittest.HomeserverTestCase):
         keys = self.get_success(fetcher.get_keys(SERVER_NAME, ["key1"], 0))
         self.assertEqual(keys, {})
 
-    def test_keyid_containing_forward_slash(self) -> None:
-        """We should url-encode any url unsafe chars in key ids.
-
-        Detects https://github.com/matrix-org/synapse/issues/14488.
-        """
-        fetcher = ServerKeyFetcher(self.hs)
-        self.get_success(fetcher.get_keys("example.com", ["key/potato"], 0))
-
-        self.http_client.get_json.assert_called_once()
-        args, kwargs = self.http_client.get_json.call_args
-        self.assertEqual(kwargs["path"], "/_matrix/key/v2/server/key%2Fpotato")
-
 
 class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor, clock):
diff --git a/tests/rest/key/v2/test_remote_key_resource.py b/tests/rest/key/v2/test_remote_key_resource.py
index 7f1fba1086..2bb6e27d94 100644
--- a/tests/rest/key/v2/test_remote_key_resource.py
+++ b/tests/rest/key/v2/test_remote_key_resource.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import urllib.parse
 from io import BytesIO, StringIO
 from typing import Any, Dict, Optional, Union
 from unittest.mock import Mock
@@ -65,9 +64,7 @@ class BaseRemoteKeyResourceTestCase(unittest.HomeserverTestCase):
             self.assertTrue(ignore_backoff)
             self.assertEqual(destination, server_name)
             key_id = "%s:%s" % (signing_key.alg, signing_key.version)
-            self.assertEqual(
-                path, "/_matrix/key/v2/server/%s" % (urllib.parse.quote(key_id),)
-            )
+            self.assertEqual(path, "/_matrix/key/v2/server")
 
             response = {
                 "server_name": server_name,
-- 
cgit 1.5.1


From 4569eda94423a10abb69e0f4d5f37eb723ed764b Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 30 Nov 2022 13:39:47 +0100
Subject: Use servers list approx to send read receipts when in partial state
 (#14549)

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
---
 changelog.d/14549.misc                     | 1 +
 synapse/federation/sender/__init__.py      | 2 +-
 tests/federation/test_federation_sender.py | 4 ++++
 3 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14549.misc

(limited to 'synapse')

diff --git a/changelog.d/14549.misc b/changelog.d/14549.misc
new file mode 100644
index 0000000000..d9d863dd20
--- /dev/null
+++ b/changelog.d/14549.misc
@@ -0,0 +1 @@
+Faster joins: use servers list approximation to send read receipts when in partial state instead of waiting for the full state of the room.
\ No newline at end of file
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index fc1d8c88a7..30ebd62883 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -647,7 +647,7 @@ class FederationSender(AbstractFederationSender):
         room_id = receipt.room_id
 
         # Work out which remote servers should be poked and poke them.
-        domains_set = await self._storage_controllers.state.get_current_hosts_in_room(
+        domains_set = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
             room_id
         )
         domains = [
diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py
index 01f147418b..cbc99d30b9 100644
--- a/tests/federation/test_federation_sender.py
+++ b/tests/federation/test_federation_sender.py
@@ -38,6 +38,10 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
             return_value=make_awaitable({"test", "host2"})
         )
 
+        hs.get_storage_controllers().state.get_current_hosts_in_room_or_partial_state_approximation = (
+            hs.get_storage_controllers().state.get_current_hosts_in_room
+        )
+
         return hs
 
     @override_config({"send_federation": True})
-- 
cgit 1.5.1


From e8bce8999f21d30affc459755e304a1f4732165c Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Wed, 30 Nov 2022 13:45:06 +0000
Subject: Aggregate unread notif count query for badge count calculation
 (#14255)

Fetch the unread notification counts used by the badge counts
in push notifications for all rooms at once (instead of fetching
them per room).
---
 changelog.d/14255.misc                             |   1 +
 synapse/push/push_tools.py                         |  28 ++--
 .../storage/databases/main/event_push_actions.py   | 149 +++++++++++++++++++++
 tests/storage/test_event_push_actions.py           |  47 +++++--
 4 files changed, 198 insertions(+), 27 deletions(-)
 create mode 100644 changelog.d/14255.misc

(limited to 'synapse')

diff --git a/changelog.d/14255.misc b/changelog.d/14255.misc
new file mode 100644
index 0000000000..39924659c7
--- /dev/null
+++ b/changelog.d/14255.misc
@@ -0,0 +1 @@
+Optimise push badge count calculations. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py
index edeba27a45..7ee07e4bee 100644
--- a/synapse/push/push_tools.py
+++ b/synapse/push/push_tools.py
@@ -17,7 +17,6 @@ from synapse.events import EventBase
 from synapse.push.presentable_names import calculate_room_name, name_from_member_event
 from synapse.storage.controllers import StorageControllers
 from synapse.storage.databases.main import DataStore
-from synapse.util.async_helpers import concurrently_execute
 
 
 async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -> int:
@@ -26,23 +25,12 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -
 
     badge = len(invites)
 
-    room_notifs = []
-
-    async def get_room_unread_count(room_id: str) -> None:
-        room_notifs.append(
-            await store.get_unread_event_push_actions_by_room_for_user(
-                room_id,
-                user_id,
-            )
-        )
-
-    await concurrently_execute(get_room_unread_count, joins, 10)
-
-    for notifs in room_notifs:
-        # Combine the counts from all the threads.
-        notify_count = notifs.main_timeline.notify_count + sum(
-            n.notify_count for n in notifs.threads.values()
-        )
+    room_to_count = await store.get_unread_counts_by_room_for_user(user_id)
+    for room_id, notify_count in room_to_count.items():
+        # room_to_count may include rooms which the user has left,
+        # ignore those.
+        if room_id not in joins:
+            continue
 
         if notify_count == 0:
             continue
@@ -51,8 +39,10 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -
             # return one badge count per conversation
             badge += 1
         else:
-            # increment the badge count by the number of unread messages in the room
+            # Increase badge by number of notifications in room
+            # NOTE: this includes threaded and unthreaded notifications.
             badge += notify_count
+
     return badge
 
 
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index b283ab0f9c..7ebe34f773 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -74,6 +74,7 @@ receipt.
 """
 
 import logging
+from collections import defaultdict
 from typing import (
     TYPE_CHECKING,
     Collection,
@@ -95,6 +96,7 @@ from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
     LoggingTransaction,
+    PostgresEngine,
 )
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
@@ -463,6 +465,153 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         return result
 
+    async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, int]:
+        """Get the notification count by room for a user. Only considers notifications,
+        not highlight or unread counts, and threads are currently aggregated under their room.
+
+        This function is intentionally not cached because it is called to calculate the
+        unread badge for push notifications and thus the result is expected to change.
+
+        Note that this function assumes the user is a member of the room. Because
+        summary rows are not removed when a user leaves a room, the caller must
+        filter out those results from the result.
+
+        Returns:
+            A map of room ID to notification counts for the given user.
+        """
+        return await self.db_pool.runInteraction(
+            "get_unread_counts_by_room_for_user",
+            self._get_unread_counts_by_room_for_user_txn,
+            user_id,
+        )
+
+    def _get_unread_counts_by_room_for_user_txn(
+        self, txn: LoggingTransaction, user_id: str
+    ) -> Dict[str, int]:
+        receipt_types_clause, args = make_in_list_sql_clause(
+            self.database_engine,
+            "receipt_type",
+            (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
+        )
+        args.extend([user_id, user_id])
+
+        receipts_cte = f"""
+            WITH all_receipts AS (
+                SELECT room_id, thread_id, MAX(event_stream_ordering) AS max_receipt_stream_ordering
+                FROM receipts_linearized
+                LEFT JOIN events USING (room_id, event_id)
+                WHERE
+                    {receipt_types_clause}
+                    AND user_id = ?
+                GROUP BY room_id, thread_id
+            )
+        """
+
+        receipts_joins = """
+            LEFT JOIN (
+                SELECT room_id, thread_id,
+                max_receipt_stream_ordering AS threaded_receipt_stream_ordering
+                FROM all_receipts
+                WHERE thread_id IS NOT NULL
+            ) AS threaded_receipts USING (room_id, thread_id)
+            LEFT JOIN (
+                SELECT room_id, thread_id,
+                max_receipt_stream_ordering AS unthreaded_receipt_stream_ordering
+                FROM all_receipts
+                WHERE thread_id IS NULL
+            ) AS unthreaded_receipts USING (room_id)
+        """
+
+        # First get summary counts by room / thread for the user. We use the max receipt
+        # stream ordering of both threaded & unthreaded receipts to compare against the
+        # summary table.
+        #
+        # PostgreSQL and SQLite differ in comparing scalar numerics.
+        if isinstance(self.database_engine, PostgresEngine):
+            # GREATEST ignores NULLs.
+            max_clause = """GREATEST(
+                threaded_receipt_stream_ordering,
+                unthreaded_receipt_stream_ordering
+            )"""
+        else:
+            # MAX returns NULL if any are NULL, so COALESCE to 0 first.
+            max_clause = """MAX(
+                COALESCE(threaded_receipt_stream_ordering, 0),
+                COALESCE(unthreaded_receipt_stream_ordering, 0)
+            )"""
+
+        sql = f"""
+            {receipts_cte}
+            SELECT eps.room_id, eps.thread_id, notif_count
+            FROM event_push_summary AS eps
+            {receipts_joins}
+            WHERE user_id = ?
+                AND notif_count != 0
+                AND (
+                    (last_receipt_stream_ordering IS NULL AND stream_ordering > {max_clause})
+                    OR last_receipt_stream_ordering = {max_clause}
+                )
+        """
+        txn.execute(sql, args)
+
+        seen_thread_ids = set()
+        room_to_count: Dict[str, int] = defaultdict(int)
+
+        for room_id, thread_id, notif_count in txn:
+            room_to_count[room_id] += notif_count
+            seen_thread_ids.add(thread_id)
+
+        # Now get any event push actions that haven't been rotated using the same OR
+        # join and filter by receipt and event push summary rotated up to stream ordering.
+        sql = f"""
+            {receipts_cte}
+            SELECT epa.room_id, epa.thread_id, COUNT(CASE WHEN epa.notif = 1 THEN 1 END) AS notif_count
+            FROM event_push_actions AS epa
+            {receipts_joins}
+            WHERE user_id = ?
+                AND epa.notif = 1
+                AND stream_ordering > (SELECT stream_ordering FROM event_push_summary_stream_ordering)
+                AND (threaded_receipt_stream_ordering IS NULL OR stream_ordering > threaded_receipt_stream_ordering)
+                AND (unthreaded_receipt_stream_ordering IS NULL OR stream_ordering > unthreaded_receipt_stream_ordering)
+            GROUP BY epa.room_id, epa.thread_id
+        """
+        txn.execute(sql, args)
+
+        for room_id, thread_id, notif_count in txn:
+            # Note: only count push actions we have valid summaries for with up to date receipt.
+            if thread_id not in seen_thread_ids:
+                continue
+            room_to_count[room_id] += notif_count
+
+        thread_id_clause, thread_ids_args = make_in_list_sql_clause(
+            self.database_engine, "epa.thread_id", seen_thread_ids
+        )
+
+        # Finally re-check event_push_actions for any rooms not in the summary, ignoring
+        # the rotated up-to position. This handles the case where a read receipt has arrived
+        # but not been rotated meaning the summary table is out of date, so we go back to
+        # the push actions table.
+        sql = f"""
+            {receipts_cte}
+            SELECT epa.room_id, COUNT(CASE WHEN epa.notif = 1 THEN 1 END) AS notif_count
+            FROM event_push_actions AS epa
+            {receipts_joins}
+            WHERE user_id = ?
+            AND NOT {thread_id_clause}
+            AND epa.notif = 1
+            AND (threaded_receipt_stream_ordering IS NULL OR stream_ordering > threaded_receipt_stream_ordering)
+            AND (unthreaded_receipt_stream_ordering IS NULL OR stream_ordering > unthreaded_receipt_stream_ordering)
+            GROUP BY epa.room_id
+        """
+
+        args.extend(thread_ids_args)
+        txn.execute(sql, args)
+
+        for room_id, notif_count in txn:
+            room_to_count[room_id] += notif_count
+
+        return room_to_count
+
     @cached(tree=True, max_entries=5000, iterable=True)
     async def get_unread_event_push_actions_by_room_for_user(
         self,
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index ee48920f84..5fa8bd2d98 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -156,7 +156,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
 
         last_event_id: str
 
-        def _assert_counts(noitf_count: int, highlight_count: int) -> None:
+        def _assert_counts(notif_count: int, highlight_count: int) -> None:
             counts = self.get_success(
                 self.store.db_pool.runInteraction(
                     "get-unread-counts",
@@ -168,13 +168,22 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             self.assertEqual(
                 counts.main_timeline,
                 NotifCounts(
-                    notify_count=noitf_count,
+                    notify_count=notif_count,
                     unread_count=0,
                     highlight_count=highlight_count,
                 ),
             )
             self.assertEqual(counts.threads, {})
 
+            aggregate_counts = self.get_success(
+                self.store.db_pool.runInteraction(
+                    "get-aggregate-unread-counts",
+                    self.store._get_unread_counts_by_room_for_user_txn,
+                    user_id,
+                )
+            )
+            self.assertEqual(aggregate_counts[room_id], notif_count)
+
         def _create_event(highlight: bool = False) -> str:
             result = self.helper.send_event(
                 room_id,
@@ -283,7 +292,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         last_event_id: str
 
         def _assert_counts(
-            noitf_count: int,
+            notif_count: int,
             highlight_count: int,
             thread_notif_count: int,
             thread_highlight_count: int,
@@ -299,7 +308,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             self.assertEqual(
                 counts.main_timeline,
                 NotifCounts(
-                    notify_count=noitf_count,
+                    notify_count=notif_count,
                     unread_count=0,
                     highlight_count=highlight_count,
                 ),
@@ -318,6 +327,17 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             else:
                 self.assertEqual(counts.threads, {})
 
+            aggregate_counts = self.get_success(
+                self.store.db_pool.runInteraction(
+                    "get-aggregate-unread-counts",
+                    self.store._get_unread_counts_by_room_for_user_txn,
+                    user_id,
+                )
+            )
+            self.assertEqual(
+                aggregate_counts[room_id], notif_count + thread_notif_count
+            )
+
         def _create_event(
             highlight: bool = False, thread_id: Optional[str] = None
         ) -> str:
@@ -454,7 +474,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         last_event_id: str
 
         def _assert_counts(
-            noitf_count: int,
+            notif_count: int,
             highlight_count: int,
             thread_notif_count: int,
             thread_highlight_count: int,
@@ -470,7 +490,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             self.assertEqual(
                 counts.main_timeline,
                 NotifCounts(
-                    notify_count=noitf_count,
+                    notify_count=notif_count,
                     unread_count=0,
                     highlight_count=highlight_count,
                 ),
@@ -489,6 +509,17 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             else:
                 self.assertEqual(counts.threads, {})
 
+            aggregate_counts = self.get_success(
+                self.store.db_pool.runInteraction(
+                    "get-aggregate-unread-counts",
+                    self.store._get_unread_counts_by_room_for_user_txn,
+                    user_id,
+                )
+            )
+            self.assertEqual(
+                aggregate_counts[room_id], notif_count + thread_notif_count
+            )
+
         def _create_event(
             highlight: bool = False, thread_id: Optional[str] = None
         ) -> str:
@@ -646,7 +677,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             )
             return result["event_id"]
 
-        def _assert_counts(noitf_count: int, thread_notif_count: int) -> None:
+        def _assert_counts(notif_count: int, thread_notif_count: int) -> None:
             counts = self.get_success(
                 self.store.db_pool.runInteraction(
                     "get-unread-counts",
@@ -658,7 +689,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
             self.assertEqual(
                 counts.main_timeline,
                 NotifCounts(
-                    notify_count=noitf_count, unread_count=0, highlight_count=0
+                    notify_count=notif_count, unread_count=0, highlight_count=0
                 ),
             )
             if thread_notif_count:
-- 
cgit 1.5.1


From 71f3e53ad010ba8c219f1076d40915b985760ed9 Mon Sep 17 00:00:00 2001
From: Will Hunt <will@half-shot.uk>
Date: Thu, 1 Dec 2022 13:46:24 +0000
Subject: Add `push.enabled` option to disable push notification calculation
 (#14551)

* Add initial option

* changelog

* Some more linting
---
 changelog.d/14551.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  5 +++
 synapse/config/push.py                           |  1 +
 synapse/push/bulk_push_rule_evaluator.py         |  3 ++
 tests/push/test_bulk_push_rule_evaluator.py      | 45 ++++++++++++++++++++++--
 5 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14551.feature

(limited to 'synapse')

diff --git a/changelog.d/14551.feature b/changelog.d/14551.feature
new file mode 100644
index 0000000000..43b91d2e57
--- /dev/null
+++ b/changelog.d/14551.feature
@@ -0,0 +1 @@
+Add new `push.enabled` config option to allow opting out of push notification calculation.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 749af12aac..b9bde8f47e 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3355,6 +3355,10 @@ Configuration settings related to push notifications
 This setting defines options for push notifications.
 
 This option has a number of sub-options. They are as follows:
+* `enable_push`: Enables or disables push notification calculation. Note, disabling this will also
+   stop unread counts being calculated for rooms. This mode of operation is intended
+   for homeservers which may only have bots or appservice users connected, or are otherwise
+   not interested in push/unread counters. This is enabled by default.
 * `include_content`: Clients requesting push notifications can either have the body of
    the message sent in the notification poke along with other details
    like the sender, or just the event ID and room ID (`event_id_only`).
@@ -3375,6 +3379,7 @@ This option has a number of sub-options. They are as follows:
 Example configuration:
 ```yaml
 push:
+  enable_push: true
   include_content: false
   group_unread_count_by_room: false
 ```
diff --git a/synapse/config/push.py b/synapse/config/push.py
index 979b128eae..3b5378e6ea 100644
--- a/synapse/config/push.py
+++ b/synapse/config/push.py
@@ -26,6 +26,7 @@ class PushConfig(Config):
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         push_config = config.get("push") or {}
         self.push_include_content = push_config.get("include_content", True)
+        self.enable_push = push_config.get("enabled", True)
         self.push_group_unread_count_by_room = push_config.get(
             "group_unread_count_by_room", True
         )
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index d6b377860f..9ed35d8461 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -106,6 +106,7 @@ class BulkPushRuleEvaluator:
         self.store = hs.get_datastores().main
         self.clock = hs.get_clock()
         self._event_auth_handler = hs.get_event_auth_handler()
+        self.should_calculate_push_rules = self.hs.config.push.enable_push
 
         self._related_event_match_enabled = self.hs.config.experimental.msc3664_enabled
 
@@ -269,6 +270,8 @@ class BulkPushRuleEvaluator:
         for each event, check if the message should increment the unread count, and
         insert the results into the event_push_actions_staging table.
         """
+        if not self.should_calculate_push_rules:
+            return
         # For batched events the power level events may not have been persisted yet,
         # so we pass in the batched events. Thus if the event cannot be found in the
         # database we can check in the batch.
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 594e7937a8..1cd453248e 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -6,10 +6,11 @@ from synapse.rest import admin
 from synapse.rest.client import login, register, room
 from synapse.types import create_requester
 
-from tests import unittest
+from tests.test_utils import simple_async_mock
+from tests.unittest import HomeserverTestCase, override_config
 
 
-class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase):
+class TestBulkPushRuleEvaluator(HomeserverTestCase):
 
     servlets = [
         admin.register_servlets_for_client_rest_resource,
@@ -72,3 +73,43 @@ class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase):
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
         # should not raise
         self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
+
+    @override_config({"push": {"enabled": False}})
+    def test_action_for_event_by_user_disabled_by_config(self) -> None:
+        """Ensure that push rules are not calculated when disabled in the config"""
+        # Create a new user and room.
+        alice = self.register_user("alice", "pass")
+        token = self.login(alice, "pass")
+
+        room_id = self.helper.create_room_as(
+            alice, room_version=RoomVersions.V9.identifier, tok=token
+        )
+
+        # Alter the power levels in that room to include stringy and floaty levels.
+        # We need to suppress the validation logic or else it will reject these dodgy
+        # values. (Presumably this validation was not always present.)
+        event_creation_handler = self.hs.get_event_creation_handler()
+        requester = create_requester(alice)
+
+        # Create a new message event, and try to evaluate it under the dodgy
+        # power level event.
+        event, context = self.get_success(
+            event_creation_handler.create_event(
+                requester,
+                {
+                    "type": "m.room.message",
+                    "room_id": room_id,
+                    "content": {
+                        "msgtype": "m.text",
+                        "body": "helo",
+                    },
+                    "sender": alice,
+                },
+            )
+        )
+
+        bulk_evaluator = BulkPushRuleEvaluator(self.hs)
+        bulk_evaluator._action_for_event_by_user = simple_async_mock()  # type: ignore[assignment]
+        # should not raise
+        self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
+        bulk_evaluator._action_for_event_by_user.assert_not_called()
-- 
cgit 1.5.1


From fac8a38525387e344e3595a092578e0ffedd49ae Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 2 Dec 2022 10:28:41 -0500
Subject: Properly handle unknown results for the stream change cache. (#14592)

StreamChangeCache.get_all_changed_entities can return None to signify
it does not have information at the given stream position. Two callers (related
to device lists and presence) were treating this response the same as an empty
list (i.e. there being no updates).
---
 changelog.d/14592.bugfix                  |  1 +
 synapse/handlers/presence.py              |  4 ++--
 synapse/storage/databases/main/devices.py | 33 ++++++++++++++++++-------------
 3 files changed, 22 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/14592.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14592.bugfix b/changelog.d/14592.bugfix
new file mode 100644
index 0000000000..149ee99dd7
--- /dev/null
+++ b/changelog.d/14592.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where a device list update might not be sent to clients in certain circumstances.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index cf08737d11..1799174c2f 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -1764,14 +1764,14 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
         Returns:
             A list of presence states for the given user to receive.
         """
+        updated_users = None
         if from_key:
             # Only return updates since the last sync
             updated_users = self.store.presence_stream_cache.get_all_entities_changed(
                 from_key
             )
-            if not updated_users:
-                updated_users = []
 
+        if updated_users is not None:
             # Get the actual presence update for each change
             users_to_state = await self.get_presence_handler().current_state_for_users(
                 updated_users
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 534f7fc04a..8ba995df3b 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -842,12 +842,11 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
                 user_ids, from_key
             )
 
-        if not user_ids_to_check:
+        # If an empty set was returned, there's nothing to do.
+        if user_ids_to_check is not None and not user_ids_to_check:
             return set()
 
         def _get_users_whose_devices_changed_txn(txn: LoggingTransaction) -> Set[str]:
-            changes: Set[str] = set()
-
             stream_id_where_clause = "stream_id > ?"
             sql_args = [from_key]
 
@@ -858,19 +857,25 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             sql = f"""
                 SELECT DISTINCT user_id FROM device_lists_stream
                 WHERE {stream_id_where_clause}
-                AND
             """
 
-            # Query device changes with a batch of users at a time
-            # Assertion for mypy's benefit; see also
-            # https://mypy.readthedocs.io/en/stable/common_issues.html#narrowing-and-inner-functions
-            assert user_ids_to_check is not None
-            for chunk in batch_iter(user_ids_to_check, 100):
-                clause, args = make_in_list_sql_clause(
-                    txn.database_engine, "user_id", chunk
-                )
-                txn.execute(sql + clause, sql_args + args)
-                changes.update(user_id for user_id, in txn)
+            # If the stream change cache gave us no information, fetch *all*
+            # users between the stream IDs.
+            if user_ids_to_check is None:
+                txn.execute(sql, sql_args)
+                return {user_id for user_id, in txn}
+
+            # Otherwise, fetch changes for the given users.
+            else:
+                changes: Set[str] = set()
+
+                # Query device changes with a batch of users at a time
+                for chunk in batch_iter(user_ids_to_check, 100):
+                    clause, args = make_in_list_sql_clause(
+                        txn.database_engine, "user_id", chunk
+                    )
+                    txn.execute(sql + " AND " + clause, sql_args + args)
+                    changes.update(user_id for user_id, in txn)
 
             return changes
 
-- 
cgit 1.5.1


From f685318c2aa5d4a54239f7fc444bdaca6ba975bd Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 2 Dec 2022 13:10:05 -0500
Subject: Use ClientRestResource on both the main process and workers. (#14528)

Add logic to ClientRestResource to decide whether to mount servlets
or not based on whether the current process is a worker.

This is clearer to see what a worker runs than the completely separate /
copy & pasted list of servlets being mounted for workers.
---
 changelog.d/14528.misc          |  1 +
 synapse/app/generic_worker.py   | 74 ++---------------------------------------
 synapse/rest/__init__.py        | 59 ++++++++++++++++++++------------
 synapse/rest/client/account.py  | 26 ++++++++-------
 synapse/rest/client/devices.py  | 10 +++---
 synapse/rest/client/keys.py     |  5 +--
 synapse/rest/client/register.py |  9 ++---
 synapse/rest/client/room.py     |  6 ++--
 8 files changed, 71 insertions(+), 119 deletions(-)
 create mode 100644 changelog.d/14528.misc

(limited to 'synapse')

diff --git a/changelog.d/14528.misc b/changelog.d/14528.misc
new file mode 100644
index 0000000000..4f233feab6
--- /dev/null
+++ b/changelog.d/14528.misc
@@ -0,0 +1 @@
+Share the `ClientRestResource` for both workers and the main process.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 46dc731696..bcc8abe20c 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -44,40 +44,8 @@ from synapse.http.server import JsonResource, OptionsResource
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
+from synapse.rest import ClientRestResource
 from synapse.rest.admin import register_servlets_for_media_repo
-from synapse.rest.client import (
-    account_data,
-    events,
-    initial_sync,
-    login,
-    presence,
-    profile,
-    push_rule,
-    read_marker,
-    receipts,
-    relations,
-    room,
-    room_batch,
-    room_keys,
-    sendtodevice,
-    sync,
-    tags,
-    user_directory,
-    versions,
-    voip,
-)
-from synapse.rest.client.account import ThreepidRestServlet, WhoamiRestServlet
-from synapse.rest.client.devices import DevicesRestServlet
-from synapse.rest.client.keys import (
-    KeyChangesServlet,
-    KeyQueryServlet,
-    KeyUploadServlet,
-    OneTimeKeyServlet,
-)
-from synapse.rest.client.register import (
-    RegisterRestServlet,
-    RegistrationTokenValidityRestServlet,
-)
 from synapse.rest.health import HealthResource
 from synapse.rest.key.v2 import KeyResource
 from synapse.rest.synapse.client import build_synapse_client_resource_tree
@@ -200,45 +168,7 @@ class GenericWorkerServer(HomeServer):
                 if name == "metrics":
                     resources[METRICS_PREFIX] = MetricsResource(RegistryProxy)
                 elif name == "client":
-                    resource = JsonResource(self, canonical_json=False)
-
-                    RegisterRestServlet(self).register(resource)
-                    RegistrationTokenValidityRestServlet(self).register(resource)
-                    login.register_servlets(self, resource)
-                    ThreepidRestServlet(self).register(resource)
-                    WhoamiRestServlet(self).register(resource)
-                    DevicesRestServlet(self).register(resource)
-
-                    # Read-only
-                    KeyUploadServlet(self).register(resource)
-                    KeyQueryServlet(self).register(resource)
-                    KeyChangesServlet(self).register(resource)
-                    OneTimeKeyServlet(self).register(resource)
-
-                    voip.register_servlets(self, resource)
-                    push_rule.register_servlets(self, resource)
-                    versions.register_servlets(self, resource)
-
-                    profile.register_servlets(self, resource)
-
-                    sync.register_servlets(self, resource)
-                    events.register_servlets(self, resource)
-                    room.register_servlets(self, resource, is_worker=True)
-                    relations.register_servlets(self, resource)
-                    room.register_deprecated_servlets(self, resource)
-                    initial_sync.register_servlets(self, resource)
-                    room_batch.register_servlets(self, resource)
-                    room_keys.register_servlets(self, resource)
-                    tags.register_servlets(self, resource)
-                    account_data.register_servlets(self, resource)
-                    receipts.register_servlets(self, resource)
-                    read_marker.register_servlets(self, resource)
-
-                    sendtodevice.register_servlets(self, resource)
-
-                    user_directory.register_servlets(self, resource)
-
-                    presence.register_servlets(self, resource)
+                    resource: Resource = ClientRestResource(self)
 
                     resources[CLIENT_API_PREFIX] = resource
 
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 28542cd774..14c4e6ebbb 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -29,7 +29,7 @@ from synapse.rest.client import (
     initial_sync,
     keys,
     knock,
-    login as v1_login,
+    login,
     login_token_request,
     logout,
     mutual_rooms,
@@ -82,6 +82,10 @@ class ClientRestResource(JsonResource):
 
     @staticmethod
     def register_servlets(client_resource: HttpServer, hs: "HomeServer") -> None:
+        # Some servlets are only registered on the main process (and not worker
+        # processes).
+        is_main_process = hs.config.worker.worker_app is None
+
         versions.register_servlets(hs, client_resource)
 
         # Deprecated in r0
@@ -92,45 +96,58 @@ class ClientRestResource(JsonResource):
         events.register_servlets(hs, client_resource)
 
         room.register_servlets(hs, client_resource)
-        v1_login.register_servlets(hs, client_resource)
+        login.register_servlets(hs, client_resource)
         profile.register_servlets(hs, client_resource)
         presence.register_servlets(hs, client_resource)
-        directory.register_servlets(hs, client_resource)
+        if is_main_process:
+            directory.register_servlets(hs, client_resource)
         voip.register_servlets(hs, client_resource)
-        pusher.register_servlets(hs, client_resource)
+        if is_main_process:
+            pusher.register_servlets(hs, client_resource)
         push_rule.register_servlets(hs, client_resource)
-        logout.register_servlets(hs, client_resource)
+        if is_main_process:
+            logout.register_servlets(hs, client_resource)
         sync.register_servlets(hs, client_resource)
-        filter.register_servlets(hs, client_resource)
+        if is_main_process:
+            filter.register_servlets(hs, client_resource)
         account.register_servlets(hs, client_resource)
         register.register_servlets(hs, client_resource)
-        auth.register_servlets(hs, client_resource)
+        if is_main_process:
+            auth.register_servlets(hs, client_resource)
         receipts.register_servlets(hs, client_resource)
         read_marker.register_servlets(hs, client_resource)
         room_keys.register_servlets(hs, client_resource)
         keys.register_servlets(hs, client_resource)
-        tokenrefresh.register_servlets(hs, client_resource)
+        if is_main_process:
+            tokenrefresh.register_servlets(hs, client_resource)
         tags.register_servlets(hs, client_resource)
         account_data.register_servlets(hs, client_resource)
-        report_event.register_servlets(hs, client_resource)
-        openid.register_servlets(hs, client_resource)
-        notifications.register_servlets(hs, client_resource)
+        if is_main_process:
+            report_event.register_servlets(hs, client_resource)
+            openid.register_servlets(hs, client_resource)
+            notifications.register_servlets(hs, client_resource)
         devices.register_servlets(hs, client_resource)
-        thirdparty.register_servlets(hs, client_resource)
+        if is_main_process:
+            thirdparty.register_servlets(hs, client_resource)
         sendtodevice.register_servlets(hs, client_resource)
         user_directory.register_servlets(hs, client_resource)
-        room_upgrade_rest_servlet.register_servlets(hs, client_resource)
+        if is_main_process:
+            room_upgrade_rest_servlet.register_servlets(hs, client_resource)
         room_batch.register_servlets(hs, client_resource)
-        capabilities.register_servlets(hs, client_resource)
-        account_validity.register_servlets(hs, client_resource)
+        if is_main_process:
+            capabilities.register_servlets(hs, client_resource)
+            account_validity.register_servlets(hs, client_resource)
         relations.register_servlets(hs, client_resource)
-        password_policy.register_servlets(hs, client_resource)
-        knock.register_servlets(hs, client_resource)
+        if is_main_process:
+            password_policy.register_servlets(hs, client_resource)
+            knock.register_servlets(hs, client_resource)
 
         # moving to /_synapse/admin
-        admin.register_servlets_for_client_rest_resource(hs, client_resource)
+        if is_main_process:
+            admin.register_servlets_for_client_rest_resource(hs, client_resource)
 
         # unstable
-        mutual_rooms.register_servlets(hs, client_resource)
-        login_token_request.register_servlets(hs, client_resource)
-        rendezvous.register_servlets(hs, client_resource)
+        if is_main_process:
+            mutual_rooms.register_servlets(hs, client_resource)
+            login_token_request.register_servlets(hs, client_resource)
+            rendezvous.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 44f622bcce..b4b92f0c99 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -875,19 +875,21 @@ class AccountStatusRestServlet(RestServlet):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    EmailPasswordRequestTokenRestServlet(hs).register(http_server)
-    PasswordRestServlet(hs).register(http_server)
-    DeactivateAccountRestServlet(hs).register(http_server)
-    EmailThreepidRequestTokenRestServlet(hs).register(http_server)
-    MsisdnThreepidRequestTokenRestServlet(hs).register(http_server)
-    AddThreepidEmailSubmitTokenServlet(hs).register(http_server)
-    AddThreepidMsisdnSubmitTokenServlet(hs).register(http_server)
+    if hs.config.worker.worker_app is None:
+        EmailPasswordRequestTokenRestServlet(hs).register(http_server)
+        PasswordRestServlet(hs).register(http_server)
+        DeactivateAccountRestServlet(hs).register(http_server)
+        EmailThreepidRequestTokenRestServlet(hs).register(http_server)
+        MsisdnThreepidRequestTokenRestServlet(hs).register(http_server)
+        AddThreepidEmailSubmitTokenServlet(hs).register(http_server)
+        AddThreepidMsisdnSubmitTokenServlet(hs).register(http_server)
     ThreepidRestServlet(hs).register(http_server)
-    ThreepidAddRestServlet(hs).register(http_server)
-    ThreepidBindRestServlet(hs).register(http_server)
-    ThreepidUnbindRestServlet(hs).register(http_server)
-    ThreepidDeleteRestServlet(hs).register(http_server)
+    if hs.config.worker.worker_app is None:
+        ThreepidAddRestServlet(hs).register(http_server)
+        ThreepidBindRestServlet(hs).register(http_server)
+        ThreepidUnbindRestServlet(hs).register(http_server)
+        ThreepidDeleteRestServlet(hs).register(http_server)
     WhoamiRestServlet(hs).register(http_server)
 
-    if hs.config.experimental.msc3720_enabled:
+    if hs.config.worker.worker_app is None and hs.config.experimental.msc3720_enabled:
         AccountStatusRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 69b803f9f8..486c6dbbc5 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -342,8 +342,10 @@ class ClaimDehydratedDeviceServlet(RestServlet):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    DeleteDevicesRestServlet(hs).register(http_server)
+    if hs.config.worker.worker_app is None:
+        DeleteDevicesRestServlet(hs).register(http_server)
     DevicesRestServlet(hs).register(http_server)
-    DeviceRestServlet(hs).register(http_server)
-    DehydratedDeviceServlet(hs).register(http_server)
-    ClaimDehydratedDeviceServlet(hs).register(http_server)
+    if hs.config.worker.worker_app is None:
+        DeviceRestServlet(hs).register(http_server)
+        DehydratedDeviceServlet(hs).register(http_server)
+        ClaimDehydratedDeviceServlet(hs).register(http_server)
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index ee038c7192..7873b363c0 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -376,5 +376,6 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     KeyQueryServlet(hs).register(http_server)
     KeyChangesServlet(hs).register(http_server)
     OneTimeKeyServlet(hs).register(http_server)
-    SigningKeyUploadServlet(hs).register(http_server)
-    SignaturesUploadServlet(hs).register(http_server)
+    if hs.config.worker.worker_app is None:
+        SigningKeyUploadServlet(hs).register(http_server)
+        SignaturesUploadServlet(hs).register(http_server)
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index de810ae3ec..3cb1e7e375 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -949,9 +949,10 @@ def _calculate_registration_flows(
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    EmailRegisterRequestTokenRestServlet(hs).register(http_server)
-    MsisdnRegisterRequestTokenRestServlet(hs).register(http_server)
-    UsernameAvailabilityRestServlet(hs).register(http_server)
-    RegistrationSubmitTokenServlet(hs).register(http_server)
+    if hs.config.worker.worker_app is None:
+        EmailRegisterRequestTokenRestServlet(hs).register(http_server)
+        MsisdnRegisterRequestTokenRestServlet(hs).register(http_server)
+        UsernameAvailabilityRestServlet(hs).register(http_server)
+        RegistrationSubmitTokenServlet(hs).register(http_server)
     RegistrationTokenValidityRestServlet(hs).register(http_server)
     RegisterRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 636cc62877..e70aa381f3 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -1395,9 +1395,7 @@ class RoomSummaryRestServlet(ResolveRoomIdMixin, RestServlet):
         )
 
 
-def register_servlets(
-    hs: "HomeServer", http_server: HttpServer, is_worker: bool = False
-) -> None:
+def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     RoomStateEventRestServlet(hs).register(http_server)
     RoomMemberListRestServlet(hs).register(http_server)
     JoinedRoomMemberListRestServlet(hs).register(http_server)
@@ -1421,7 +1419,7 @@ def register_servlets(
     TimestampLookupRestServlet(hs).register(http_server)
 
     # Some servlets only get registered for the main process.
-    if not is_worker:
+    if hs.config.worker.worker_app is None:
         RoomForgetRestServlet(hs).register(http_server)
 
 
-- 
cgit 1.5.1


From 93ac3c197ebcb56f4e68a93da5bd63b4a96b18f1 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 5 Dec 2022 11:30:41 +0000
Subject: Suppress empty body warnings in room servelets (#14600)

* Suppress empty body warnings in room servelets

We've already decided to allow empty bodies for backwards compat. The
change here stops us from emitting a misleading warning; see also
https://github.com/matrix-org/synapse/issues/14478#issuecomment-1319157105

* Changelog
---
 changelog.d/14600.bugfix    |  1 +
 synapse/rest/client/room.py | 14 ++------------
 2 files changed, 3 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/14600.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14600.bugfix b/changelog.d/14600.bugfix
new file mode 100644
index 0000000000..c4bf405684
--- /dev/null
+++ b/changelog.d/14600.bugfix
@@ -0,0 +1 @@
+Suppress a spurious warning when `POST /rooms/<room_id>/<membership>/`, `POST /join/<room_id_or_alias`, or the unspecced `PUT /join/<room_id_or_alias>/<txn_id>` receive an empty HTTP request body.
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index e70aa381f3..514eb6afc8 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -396,12 +396,7 @@ class JoinRoomAliasServlet(ResolveRoomIdMixin, TransactionRestServlet):
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
 
-        try:
-            content = parse_json_object_from_request(request)
-        except Exception:
-            # Turns out we used to ignore the body entirely, and some clients
-            # cheekily send invalid bodies.
-            content = {}
+        content = parse_json_object_from_request(request, allow_empty_body=True)
 
         # twisted.web.server.Request.args is incorrectly defined as Optional[Any]
         args: Dict[bytes, List[bytes]] = request.args  # type: ignore
@@ -952,12 +947,7 @@ class RoomMembershipRestServlet(TransactionRestServlet):
         }:
             raise AuthError(403, "Guest access not allowed")
 
-        try:
-            content = parse_json_object_from_request(request)
-        except Exception:
-            # Turns out we used to ignore the body entirely, and some clients
-            # cheekily send invalid bodies.
-            content = {}
+        content = parse_json_object_from_request(request, allow_empty_body=True)
 
         if membership_action == "invite" and all(
             key in content for key in ("medium", "address")
-- 
cgit 1.5.1


From 501f62d1a62296f79e46e1bd60dc5d1a8b28847d Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 5 Dec 2022 13:07:55 +0000
Subject: Faster remote room joins: stream the un-partial-stating of rooms over
 replication. [rei:frrj/streams/unpsr] (#14473)

---
 changelog.d/14473.misc                             |   1 +
 synapse/handlers/device.py                         |   2 +-
 synapse/handlers/federation.py                     |   4 +
 synapse/replication/tcp/streams/__init__.py        |   3 +
 synapse/replication/tcp/streams/partial_state.py   |  48 +++++
 synapse/storage/databases/main/room.py             | 237 +++++++++++++++------
 .../delta/73/20_un_partial_stated_room_stream.sql  |  32 +++
 ..._un_partial_stated_room_stream_seq.sql.postgres |  20 ++
 8 files changed, 280 insertions(+), 67 deletions(-)
 create mode 100644 changelog.d/14473.misc
 create mode 100644 synapse/replication/tcp/streams/partial_state.py
 create mode 100644 synapse/storage/schema/main/delta/73/20_un_partial_stated_room_stream.sql
 create mode 100644 synapse/storage/schema/main/delta/73/21_un_partial_stated_room_stream_seq.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/14473.misc b/changelog.d/14473.misc
new file mode 100644
index 0000000000..deccd4e91a
--- /dev/null
+++ b/changelog.d/14473.misc
@@ -0,0 +1 @@
+Faster remote room joins: stream the un-partial-stating of rooms over replication.
\ No newline at end of file
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index b1e55e1b9e..d4750a32e6 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -996,7 +996,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
         # Check if we are partially joining any rooms. If so we need to store
         # all device list updates so that we can handle them correctly once we
         # know who is in the room.
-        # TODO(faster joins): this fetches and processes a bunch of data that we don't
+        # TODO(faster_joins): this fetches and processes a bunch of data that we don't
         # use. Could be replaced by a tighter query e.g.
         #   SELECT EXISTS(SELECT 1 FROM partial_state_rooms)
         partial_rooms = await self.store.get_partial_state_room_resync_info()
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index d92582fd5c..3398fcaf7d 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -152,6 +152,7 @@ class FederationHandler:
         self._federation_event_handler = hs.get_federation_event_handler()
         self._device_handler = hs.get_device_handler()
         self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator()
+        self._notifier = hs.get_notifier()
 
         self._clean_room_for_join_client = ReplicationCleanRoomRestServlet.make_client(
             hs
@@ -1692,6 +1693,9 @@ class FederationHandler:
                     self._storage_controllers.state.notify_room_un_partial_stated(
                         room_id
                     )
+                    # Poke the notifier so that other workers see the write to
+                    # the un-partial-stated rooms stream.
+                    self._notifier.notify_replication()
 
                     # TODO(faster_joins) update room stats and user directory?
                     #   https://github.com/matrix-org/synapse/issues/12814
diff --git a/synapse/replication/tcp/streams/__init__.py b/synapse/replication/tcp/streams/__init__.py
index b1cd55bf6f..8575666d9c 100644
--- a/synapse/replication/tcp/streams/__init__.py
+++ b/synapse/replication/tcp/streams/__init__.py
@@ -42,6 +42,7 @@ from synapse.replication.tcp.streams._base import (
 )
 from synapse.replication.tcp.streams.events import EventsStream
 from synapse.replication.tcp.streams.federation import FederationStream
+from synapse.replication.tcp.streams.partial_state import UnPartialStatedRoomStream
 
 STREAMS_MAP = {
     stream.NAME: stream
@@ -61,6 +62,7 @@ STREAMS_MAP = {
         TagAccountDataStream,
         AccountDataStream,
         UserSignatureStream,
+        UnPartialStatedRoomStream,
     )
 }
 
@@ -80,4 +82,5 @@ __all__ = [
     "TagAccountDataStream",
     "AccountDataStream",
     "UserSignatureStream",
+    "UnPartialStatedRoomStream",
 ]
diff --git a/synapse/replication/tcp/streams/partial_state.py b/synapse/replication/tcp/streams/partial_state.py
new file mode 100644
index 0000000000..18f087ffa2
--- /dev/null
+++ b/synapse/replication/tcp/streams/partial_state.py
@@ -0,0 +1,48 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+
+import attr
+
+from synapse.replication.tcp.streams import Stream
+from synapse.replication.tcp.streams._base import current_token_without_instance
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class UnPartialStatedRoomStreamRow:
+    # ID of the room that has been un-partial-stated.
+    room_id: str
+
+
+class UnPartialStatedRoomStream(Stream):
+    """
+    Stream to notify about rooms becoming un-partial-stated;
+    that is, when the background sync finishes such that we now have full state for
+    the room.
+    """
+
+    NAME = "un_partial_stated_room"
+    ROW_TYPE = UnPartialStatedRoomStreamRow
+
+    def __init__(self, hs: "HomeServer"):
+        store = hs.get_datastores().main
+        super().__init__(
+            hs.get_instance_name(),
+            # TODO(faster_joins, multiple writers): we need to account for instance names
+            current_token_without_instance(store.get_un_partial_stated_rooms_token),
+            store.get_un_partial_stated_rooms_from_stream,
+        )
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 1309bfd374..78906a5e1d 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1,5 +1,5 @@
 # Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2019 The Matrix.org Foundation C.I.C.
+# Copyright 2019, 2022 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -50,8 +50,14 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
+from synapse.storage.engines import PostgresEngine
 from synapse.storage.types import Cursor
-from synapse.storage.util.id_generators import IdGenerator
+from synapse.storage.util.id_generators import (
+    AbstractStreamIdGenerator,
+    IdGenerator,
+    MultiWriterIdGenerator,
+    StreamIdGenerator,
+)
 from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
@@ -114,6 +120,26 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         self.config: HomeServerConfig = hs.config
 
+        self._un_partial_stated_rooms_stream_id_gen: AbstractStreamIdGenerator
+
+        if isinstance(database.engine, PostgresEngine):
+            self._un_partial_stated_rooms_stream_id_gen = MultiWriterIdGenerator(
+                db_conn=db_conn,
+                db=database,
+                stream_name="un_partial_stated_room_stream",
+                instance_name=self._instance_name,
+                tables=[
+                    ("un_partial_stated_room_stream", "instance_name", "stream_id")
+                ],
+                sequence_name="un_partial_stated_room_stream_sequence",
+                # TODO(faster_joins, multiple writers) Support multiple writers.
+                writers=["master"],
+            )
+        else:
+            self._un_partial_stated_rooms_stream_id_gen = StreamIdGenerator(
+                db_conn, "un_partial_stated_room_stream", "stream_id"
+            )
+
     async def store_room(
         self,
         room_id: str,
@@ -1216,70 +1242,6 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         return room_servers
 
-    async def clear_partial_state_room(self, room_id: str) -> bool:
-        """Clears the partial state flag for a room.
-
-        Args:
-            room_id: The room whose partial state flag is to be cleared.
-
-        Returns:
-            `True` if the partial state flag has been cleared successfully.
-
-            `False` if the partial state flag could not be cleared because the room
-            still contains events with partial state.
-        """
-        try:
-            await self.db_pool.runInteraction(
-                "clear_partial_state_room", self._clear_partial_state_room_txn, room_id
-            )
-            return True
-        except self.db_pool.engine.module.IntegrityError as e:
-            # Assume that any `IntegrityError`s are due to partial state events.
-            logger.info(
-                "Exception while clearing lazy partial-state-room %s, retrying: %s",
-                room_id,
-                e,
-            )
-            return False
-
-    def _clear_partial_state_room_txn(
-        self, txn: LoggingTransaction, room_id: str
-    ) -> None:
-        DatabasePool.simple_delete_txn(
-            txn,
-            table="partial_state_rooms_servers",
-            keyvalues={"room_id": room_id},
-        )
-        DatabasePool.simple_delete_one_txn(
-            txn,
-            table="partial_state_rooms",
-            keyvalues={"room_id": room_id},
-        )
-        self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
-        self._invalidate_cache_and_stream(
-            txn, self.get_partial_state_servers_at_join, (room_id,)
-        )
-
-        # We now delete anything from `device_lists_remote_pending` with a
-        # stream ID less than the minimum
-        # `partial_state_rooms.device_lists_stream_id`, as we no longer need them.
-        device_lists_stream_id = DatabasePool.simple_select_one_onecol_txn(
-            txn,
-            table="partial_state_rooms",
-            keyvalues={},
-            retcol="MIN(device_lists_stream_id)",
-            allow_none=True,
-        )
-        if device_lists_stream_id is None:
-            # There are no rooms being currently partially joined, so we delete everything.
-            txn.execute("DELETE FROM device_lists_remote_pending")
-        else:
-            sql = """
-                DELETE FROM device_lists_remote_pending
-                WHERE stream_id <= ?
-            """
-            txn.execute(sql, (device_lists_stream_id,))
-
     @cached()
     async def is_partial_state_room(self, room_id: str) -> bool:
         """Checks if this room has partial state.
@@ -1315,6 +1277,66 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         )
         return result["join_event_id"], result["device_lists_stream_id"]
 
+    def get_un_partial_stated_rooms_token(self) -> int:
+        # TODO(faster_joins, multiple writers): This is inappropriate if there
+        #     are multiple writers because workers that don't write often will
+        #     hold all readers up.
+        #     (See `MultiWriterIdGenerator.get_persisted_upto_position` for an
+        #      explanation.)
+        return self._un_partial_stated_rooms_stream_id_gen.get_current_token()
+
+    async def get_un_partial_stated_rooms_from_stream(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, Tuple[str]]], int, bool]:
+        """Get updates for caches replication stream.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def get_un_partial_stated_rooms_from_stream_txn(
+            txn: LoggingTransaction,
+        ) -> Tuple[List[Tuple[int, Tuple[str]]], int, bool]:
+            sql = """
+                SELECT stream_id, room_id
+                FROM un_partial_stated_room_stream
+                WHERE ? < stream_id AND stream_id <= ? AND instance_name = ?
+                ORDER BY stream_id ASC
+                LIMIT ?
+            """
+            txn.execute(sql, (last_id, current_id, instance_name, limit))
+            updates = [(row[0], (row[1],)) for row in txn]
+            limited = False
+            upto_token = current_id
+            if len(updates) >= limit:
+                upto_token = updates[-1][0]
+                limited = True
+
+            return updates, upto_token, limited
+
+        return await self.db_pool.runInteraction(
+            "get_un_partial_stated_rooms_from_stream",
+            get_un_partial_stated_rooms_from_stream_txn,
+        )
+
 
 class _BackgroundUpdates:
     REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
@@ -1806,6 +1828,8 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
 
         self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id")
 
+        self._instance_name = hs.get_instance_name()
+
     async def upsert_room_on_join(
         self, room_id: str, room_version: RoomVersion, state_events: List[EventBase]
     ) -> None:
@@ -2270,3 +2294,84 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             self.is_room_blocked,
             (room_id,),
         )
+
+    async def clear_partial_state_room(self, room_id: str) -> bool:
+        """Clears the partial state flag for a room.
+
+        Args:
+            room_id: The room whose partial state flag is to be cleared.
+
+        Returns:
+            `True` if the partial state flag has been cleared successfully.
+
+            `False` if the partial state flag could not be cleared because the room
+            still contains events with partial state.
+        """
+        try:
+            async with self._un_partial_stated_rooms_stream_id_gen.get_next() as un_partial_state_room_stream_id:
+                await self.db_pool.runInteraction(
+                    "clear_partial_state_room",
+                    self._clear_partial_state_room_txn,
+                    room_id,
+                    un_partial_state_room_stream_id,
+                )
+                return True
+        except self.db_pool.engine.module.IntegrityError as e:
+            # Assume that any `IntegrityError`s are due to partial state events.
+            logger.info(
+                "Exception while clearing lazy partial-state-room %s, retrying: %s",
+                room_id,
+                e,
+            )
+            return False
+
+    def _clear_partial_state_room_txn(
+        self,
+        txn: LoggingTransaction,
+        room_id: str,
+        un_partial_state_room_stream_id: int,
+    ) -> None:
+        DatabasePool.simple_delete_txn(
+            txn,
+            table="partial_state_rooms_servers",
+            keyvalues={"room_id": room_id},
+        )
+        DatabasePool.simple_delete_one_txn(
+            txn,
+            table="partial_state_rooms",
+            keyvalues={"room_id": room_id},
+        )
+        self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
+        self._invalidate_cache_and_stream(
+            txn, self.get_partial_state_servers_at_join, (room_id,)
+        )
+
+        DatabasePool.simple_insert_txn(
+            txn,
+            "un_partial_stated_room_stream",
+            {
+                "stream_id": un_partial_state_room_stream_id,
+                "instance_name": self._instance_name,
+                "room_id": room_id,
+            },
+        )
+
+        # We now delete anything from `device_lists_remote_pending` with a
+        # stream ID less than the minimum
+        # `partial_state_rooms.device_lists_stream_id`, as we no longer need them.
+        device_lists_stream_id = DatabasePool.simple_select_one_onecol_txn(
+            txn,
+            table="partial_state_rooms",
+            keyvalues={},
+            retcol="MIN(device_lists_stream_id)",
+            allow_none=True,
+        )
+        if device_lists_stream_id is None:
+            # There are no rooms being currently partially joined, so we delete everything.
+            txn.execute("DELETE FROM device_lists_remote_pending")
+        else:
+            sql = """
+                DELETE FROM device_lists_remote_pending
+                WHERE stream_id <= ?
+            """
+            txn.execute(sql, (device_lists_stream_id,))
diff --git a/synapse/storage/schema/main/delta/73/20_un_partial_stated_room_stream.sql b/synapse/storage/schema/main/delta/73/20_un_partial_stated_room_stream.sql
new file mode 100644
index 0000000000..743196cfe3
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/20_un_partial_stated_room_stream.sql
@@ -0,0 +1,32 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Stream for notifying that a room has become un-partial-stated.
+CREATE TABLE un_partial_stated_room_stream(
+    -- Position in the stream
+    stream_id BIGINT PRIMARY KEY NOT NULL,
+
+    -- Which instance wrote this entry.
+    instance_name TEXT NOT NULL,
+
+    -- Which room has been un-partial-stated.
+    room_id TEXT NOT NULL REFERENCES rooms(room_id) ON DELETE CASCADE
+);
+
+-- We want an index here because of the foreign key constraint:
+-- upon deleting a room, the database needs to be able to check here.
+-- This index is not unique because we can join a room multiple times in a server's lifetime,
+-- so the same room could be un-partial-stated multiple times!
+CREATE INDEX un_partial_stated_room_stream_room_id ON un_partial_stated_room_stream (room_id);
diff --git a/synapse/storage/schema/main/delta/73/21_un_partial_stated_room_stream_seq.sql.postgres b/synapse/storage/schema/main/delta/73/21_un_partial_stated_room_stream_seq.sql.postgres
new file mode 100644
index 0000000000..c1aac0b385
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/21_un_partial_stated_room_stream_seq.sql.postgres
@@ -0,0 +1,20 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE SEQUENCE IF NOT EXISTS un_partial_stated_room_stream_sequence;
+
+SELECT setval('un_partial_stated_room_stream_sequence', (
+    SELECT COALESCE(MAX(stream_id), 1) FROM un_partial_stated_room_stream
+));
-- 
cgit 1.5.1


From 6a8310f3dfe77acf59df2fe3e88a71b85b9b3ecc Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 5 Dec 2022 09:00:59 -0500
Subject: Compare to the earliest known stream pos in the stream change cache.
 (#14435)

The internal methods of the StreamChangeCache were inconsistently
treating the earliest known stream position as valid. It is now treated as
invalid, meaning the cache cannot determine if an entity at the earliest
known stream position has changed or not.
---
 changelog.d/14435.bugfix                   |   1 +
 poetry.lock                                |   2 +-
 pyproject.toml                             |   3 +-
 synapse/util/caches/stream_change_cache.py | 142 +++++++++++++++++++++++------
 tests/util/test_stream_change_cache.py     |  38 +++-----
 5 files changed, 133 insertions(+), 53 deletions(-)
 create mode 100644 changelog.d/14435.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14435.bugfix b/changelog.d/14435.bugfix
new file mode 100644
index 0000000000..149ee99dd7
--- /dev/null
+++ b/changelog.d/14435.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where a device list update might not be sent to clients in certain circumstances.
diff --git a/poetry.lock b/poetry.lock
index 8c63134578..90b363a548 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1639,7 +1639,7 @@ url-preview = ["lxml"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "27811bd21d56ceeb0f68ded5a00375efcd1a004928f0736f5b02927ce8594cb0"
+content-hash = "8c44ceeb9df5c3ab43040400e0a6b895de49417e61293a1ba027640b34f03263"
 
 [metadata.files]
 attrs = [
diff --git a/pyproject.toml b/pyproject.toml
index af5ce2aa03..1368e4e688 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -141,7 +141,8 @@ pyasn1 = ">=0.1.9"
 pyasn1-modules = ">=0.0.7"
 bcrypt = ">=3.1.7"
 Pillow = ">=5.4.0"
-sortedcontainers = ">=1.4.4"
+# We use SortedDict.peekitem(), which was added in sortedcontainers 1.5.2.
+sortedcontainers = ">=1.5.2"
 pymacaroons = ">=0.13.0"
 msgpack = ">=0.5.2"
 phonenumbers = ">=8.2.0"
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index 666f4b6895..042de8d7c8 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -27,13 +27,17 @@ EntityType = str
 
 
 class StreamChangeCache:
-    """Keeps track of the stream positions of the latest change in a set of entities.
+    """
+    Keeps track of the stream positions of the latest change in a set of entities.
+
+    The entity will is typically a room ID or user ID, but can be any string.
 
-    Typically the entity will be a room or user id.
+    Can be queried for whether a specific entity has changed after a stream position
+    or for a list of changed entities after a stream position. See the individual
+    methods for more information.
 
-    Given a list of entities and a stream position, it will give a subset of
-    entities that may have changed since that position. If position key is too
-    old then the cache will simply return all given entities.
+    Only tracks to a maximum cache size, any position earlier than the earliest
+    known stream position must be treated as unknown.
     """
 
     def __init__(
@@ -45,16 +49,20 @@ class StreamChangeCache:
     ) -> None:
         self._original_max_size: int = max_size
         self._max_size = math.floor(max_size)
-        self._entity_to_key: Dict[EntityType, int] = {}
 
-        # map from stream id to the a set of entities which changed at that stream id.
+        # map from stream id to the set of entities which changed at that stream id.
         self._cache: SortedDict[int, Set[EntityType]] = SortedDict()
+        # map from entity to the stream ID of the latest change for that entity.
+        #
+        # Must be kept in sync with _cache.
+        self._entity_to_key: Dict[EntityType, int] = {}
 
         # the earliest stream_pos for which we can reliably answer
         # get_all_entities_changed. In other words, one less than the earliest
         # stream_pos for which we know _cache is valid.
         #
         self._earliest_known_stream_pos = current_stream_pos
+
         self.name = name
         self.metrics = caches.register_cache(
             "cache", self.name, self._cache, resize_callback=self.set_cache_factor
@@ -82,22 +90,46 @@ class StreamChangeCache:
         return False
 
     def has_entity_changed(self, entity: EntityType, stream_pos: int) -> bool:
-        """Returns True if the entity may have been updated since stream_pos"""
+        """
+        Returns True if the entity may have been updated after stream_pos.
+
+        Args:
+            entity: The entity to check for changes.
+            stream_pos: The stream position to check for changes after.
+
+        Return:
+            True if the entity may have been updated, this happens if:
+                * The given stream position is at or earlier than the earliest
+                  known stream position.
+                * The given stream position is earlier than the latest change for
+                  the entity.
+
+            False otherwise:
+                * The entity is unknown.
+                * The given stream position is at or later than the latest change
+                  for the entity.
+        """
         assert isinstance(stream_pos, int)
 
-        if stream_pos < self._earliest_known_stream_pos:
+        # _cache is not valid at or before the earliest known stream position, so
+        # return that the entity has changed.
+        if stream_pos <= self._earliest_known_stream_pos:
             self.metrics.inc_misses()
             return True
 
+        # If the entity is unknown, it hasn't changed.
         latest_entity_change_pos = self._entity_to_key.get(entity, None)
         if latest_entity_change_pos is None:
             self.metrics.inc_hits()
             return False
 
+        # This is a known entity, return true if the stream position is earlier
+        # than the last change.
         if stream_pos < latest_entity_change_pos:
             self.metrics.inc_misses()
             return True
 
+        # Otherwise, the stream position is after the latest change: return false.
         self.metrics.inc_hits()
         return False
 
@@ -105,15 +137,27 @@ class StreamChangeCache:
         self, entities: Collection[EntityType], stream_pos: int
     ) -> Union[Set[EntityType], FrozenSet[EntityType]]:
         """
-        Returns subset of entities that have had new things since the given
-        position.  Entities unknown to the cache will be returned.  If the
-        position is too old it will just return the given list.
+        Returns the subset of the given entities that have had changes after the given position.
+
+        Entities unknown to the cache will be returned.
+
+        If the position is too old it will just return the given list.
+
+        Args:
+            entities: Entities to check for changes.
+            stream_pos: The stream position to check for changes after.
+
+        Return:
+            A subset of entities which have changed after the given stream position.
+
+            This will be all entities if the given stream position is at or earlier
+            than the earliest known stream position.
         """
         changed_entities = self.get_all_entities_changed(stream_pos)
         if changed_entities is not None:
             # We now do an intersection, trying to do so in the most efficient
             # way possible (some of these sets are *large*). First check in the
-            # given iterable is already set that we can reuse, otherwise we
+            # given iterable is already a set that we can reuse, otherwise we
             # create a set of the *smallest* of the two iterables and call
             # `intersection(..)` on it (this can be twice as fast as the reverse).
             if isinstance(entities, (set, frozenset)):
@@ -130,29 +174,57 @@ class StreamChangeCache:
         return result
 
     def has_any_entity_changed(self, stream_pos: int) -> bool:
-        """Returns if any entity has changed"""
-        assert type(stream_pos) is int
+        """
+        Returns true if any entity has changed after the given stream position.
+
+        Args:
+            stream_pos: The stream position to check for changes after.
+
+        Return:
+            True if any entity has changed after the given stream position or
+            if the given stream position is at or earlier than the earliest
+            known stream position.
+
+            False otherwise.
+        """
+        assert isinstance(stream_pos, int)
 
         if not self._cache:
             # If the cache is empty, nothing can have changed.
             return False
 
-        if stream_pos >= self._earliest_known_stream_pos:
-            self.metrics.inc_hits()
-            return self._cache.bisect_right(stream_pos) < len(self._cache)
-        else:
+        # _cache is not valid at or before the earliest known stream position, so
+        # return that an entity has changed.
+        if stream_pos <= self._earliest_known_stream_pos:
             self.metrics.inc_misses()
             return True
 
+        self.metrics.inc_hits()
+        return stream_pos < self._cache.peekitem()[0]
+
     def get_all_entities_changed(self, stream_pos: int) -> Optional[List[EntityType]]:
-        """Returns all entities that have had new things since the given
-        position. If the position is too old it will return None.
+        """
+        Returns all entities that have had changes after the given position.
+
+        If the stream change cache does not go far enough back, i.e. the position
+        is too old, it will return None.
 
         Returns the entities in the order that they were changed.
+
+        Args:
+            stream_pos: The stream position to check for changes after.
+
+        Return:
+            Entities which have changed after the given stream position.
+
+            None if the given stream position is at or earlier than the earliest
+            known stream position.
         """
-        assert type(stream_pos) is int
+        assert isinstance(stream_pos, int)
 
-        if stream_pos < self._earliest_known_stream_pos:
+        # _cache is not valid at or before the earliest known stream position, so
+        # return None to mark that it is unknown if an entity has changed.
+        if stream_pos <= self._earliest_known_stream_pos:
             return None
 
         changed_entities: List[EntityType] = []
@@ -162,11 +234,17 @@ class StreamChangeCache:
         return changed_entities
 
     def entity_has_changed(self, entity: EntityType, stream_pos: int) -> None:
-        """Informs the cache that the entity has been changed at the given
-        position.
         """
-        assert type(stream_pos) is int
+        Informs the cache that the entity has been changed at the given position.
+
+        Args:
+            entity: The entity to mark as changed.
+            stream_pos: The stream position to update the entity to.
+        """
+        assert isinstance(stream_pos, int)
 
+        # For a change before _cache is valid (e.g. at or before the earliest known
+        # stream position) there's nothing to do.
         if stream_pos <= self._earliest_known_stream_pos:
             return
 
@@ -189,6 +267,11 @@ class StreamChangeCache:
         self._evict()
 
     def _evict(self) -> None:
+        """
+        Ensure the cache has not exceeded the maximum size.
+
+        Evicts entries until it is at the maximum size.
+        """
         # if the cache is too big, remove entries
         while len(self._cache) > self._max_size:
             k, r = self._cache.popitem(0)
@@ -199,5 +282,12 @@ class StreamChangeCache:
     def get_max_pos_of_last_change(self, entity: EntityType) -> int:
         """Returns an upper bound of the stream id of the last change to an
         entity.
+
+        Args:
+            entity: The entity to check.
+
+        Return:
+            The stream position of the latest change for the given entity or
+            the earliest known stream position if the entitiy is unknown.
         """
         return self._entity_to_key.get(entity, self._earliest_known_stream_pos)
diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py
index 1b0fa52ad1..a29cc872f9 100644
--- a/tests/util/test_stream_change_cache.py
+++ b/tests/util/test_stream_change_cache.py
@@ -51,6 +51,8 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
         # return True, whether it's a known entity or not.
         self.assertTrue(cache.has_entity_changed("user@foo.com", 0))
         self.assertTrue(cache.has_entity_changed("not@here.website", 0))
+        self.assertTrue(cache.has_entity_changed("user@foo.com", 3))
+        self.assertTrue(cache.has_entity_changed("not@here.website", 3))
 
     def test_entity_has_changed_pops_off_start(self) -> None:
         """
@@ -65,15 +67,14 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
 
         # The cache is at the max size, 2
         self.assertEqual(len(cache._cache), 2)
+        # The cache's earliest known position is 2.
+        self.assertEqual(cache._earliest_known_stream_pos, 2)
 
         # The oldest item has been popped off
         self.assertTrue("user@foo.com" not in cache._entity_to_key)
 
-        self.assertEqual(
-            cache.get_all_entities_changed(2),
-            ["bar@baz.net", "user@elsewhere.org"],
-        )
-        self.assertIsNone(cache.get_all_entities_changed(1))
+        self.assertEqual(cache.get_all_entities_changed(3), ["user@elsewhere.org"])
+        self.assertIsNone(cache.get_all_entities_changed(2))
 
         # If we update an existing entity, it keeps the two existing entities
         cache.entity_has_changed("bar@baz.net", 5)
@@ -81,10 +82,10 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
             {"bar@baz.net", "user@elsewhere.org"}, set(cache._entity_to_key)
         )
         self.assertEqual(
-            cache.get_all_entities_changed(2),
+            cache.get_all_entities_changed(3),
             ["user@elsewhere.org", "bar@baz.net"],
         )
-        self.assertIsNone(cache.get_all_entities_changed(1))
+        self.assertIsNone(cache.get_all_entities_changed(2))
 
     def test_get_all_entities_changed(self) -> None:
         """
@@ -99,28 +100,15 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
         cache.entity_has_changed("anotheruser@foo.com", 3)
         cache.entity_has_changed("user@elsewhere.org", 4)
 
-        r = cache.get_all_entities_changed(1)
+        r = cache.get_all_entities_changed(2)
 
-        # either of these are valid
-        ok1 = [
-            "user@foo.com",
-            "bar@baz.net",
-            "anotheruser@foo.com",
-            "user@elsewhere.org",
-        ]
-        ok2 = [
-            "user@foo.com",
-            "anotheruser@foo.com",
-            "bar@baz.net",
-            "user@elsewhere.org",
-        ]
+        # Results are ordered so either of these are valid.
+        ok1 = ["bar@baz.net", "anotheruser@foo.com", "user@elsewhere.org"]
+        ok2 = ["anotheruser@foo.com", "bar@baz.net", "user@elsewhere.org"]
         self.assertTrue(r == ok1 or r == ok2)
 
-        r = cache.get_all_entities_changed(2)
-        self.assertTrue(r == ok1[1:] or r == ok2[1:])
-
         self.assertEqual(cache.get_all_entities_changed(3), ["user@elsewhere.org"])
-        self.assertEqual(cache.get_all_entities_changed(0), None)
+        self.assertEqual(cache.get_all_entities_changed(1), None)
 
         # ... later, things gest more updates
         cache.entity_has_changed("user@foo.com", 5)
-- 
cgit 1.5.1


From cee9445884eb62c070fb0b03a112a862e8dea7c4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 5 Dec 2022 20:19:14 +0000
Subject: Better return type for `get_all_entities_changed` (#14604)

Help callers from using the return value incorrectly by ensuring
that callers explicitly check if there was a cache hit or not.
---
 changelog.d/14604.bugfix                   |   1 +
 synapse/handlers/appservice.py             |   4 +-
 synapse/handlers/presence.py               |  12 ++--
 synapse/handlers/sync.py                   |   6 +-
 synapse/handlers/typing.py                 |   8 +--
 synapse/storage/databases/main/devices.py  | 111 ++++++++++++++++++-----------
 synapse/util/caches/stream_change_cache.py |  52 ++++++++++----
 tests/util/test_stream_change_cache.py     |  20 +++---
 8 files changed, 138 insertions(+), 76 deletions(-)
 create mode 100644 changelog.d/14604.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14604.bugfix b/changelog.d/14604.bugfix
new file mode 100644
index 0000000000..149ee99dd7
--- /dev/null
+++ b/changelog.d/14604.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where a device list update might not be sent to clients in certain circumstances.
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 66f5b8d108..f68027aaed 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -615,8 +615,8 @@ class ApplicationServicesHandler:
         )
 
         # Fetch the users who have modified their device list since then.
-        users_with_changed_device_lists = (
-            await self.store.get_users_whose_devices_changed(from_key, to_key=new_key)
+        users_with_changed_device_lists = await self.store.get_all_devices_changed(
+            from_key, to_key=new_key
         )
 
         # Filter out any users the application service is not interested in
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 1799174c2f..2af90b25a3 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -1692,10 +1692,12 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
 
             if from_key is not None:
                 # First get all users that have had a presence update
-                updated_users = stream_change_cache.get_all_entities_changed(from_key)
+                result = stream_change_cache.get_all_entities_changed(from_key)
 
                 # Cross-reference users we're interested in with those that have had updates.
-                if updated_users is not None:
+                if result.hit:
+                    updated_users = result.entities
+
                     # If we have the full list of changes for presence we can
                     # simply check which ones share a room with the user.
                     get_updates_counter.labels("stream").inc()
@@ -1767,9 +1769,9 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
         updated_users = None
         if from_key:
             # Only return updates since the last sync
-            updated_users = self.store.presence_stream_cache.get_all_entities_changed(
-                from_key
-            )
+            result = self.store.presence_stream_cache.get_all_entities_changed(from_key)
+            if result.hit:
+                updated_users = result.entities
 
         if updated_users is not None:
             # Get the actual presence update for each change
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index c8858b22dd..0b395a104d 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1528,10 +1528,12 @@ class SyncHandler:
             #
             # If we don't have that info cached then we get all the users that
             # share a room with our user and check if those users have changed.
-            changed_users = self.store.get_cached_device_list_changes(
+            cache_result = self.store.get_cached_device_list_changes(
                 since_token.device_list_key
             )
-            if changed_users is not None:
+            if cache_result.hit:
+                changed_users = cache_result.entities
+
                 result = await self.store.get_rooms_for_users(changed_users)
 
                 for changed_user_id, entries in result.items():
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index a0ea719430..3f656ea4f5 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -420,11 +420,11 @@ class TypingWriterHandler(FollowerTypingHandler):
         if last_id == current_id:
             return [], current_id, False
 
-        changed_rooms: Optional[
-            Iterable[str]
-        ] = self._typing_stream_change_cache.get_all_entities_changed(last_id)
+        result = self._typing_stream_change_cache.get_all_entities_changed(last_id)
 
-        if changed_rooms is None:
+        if result.hit:
+            changed_rooms: Iterable[str] = result.entities
+        else:
             changed_rooms = self._room_serials
 
         rows = []
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 8ba995df3b..a5bb4d404e 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -58,7 +58,10 @@ from synapse.types import JsonDict, get_verify_key_from_cross_signing_key
 from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import LruCache
-from synapse.util.caches.stream_change_cache import StreamChangeCache
+from synapse.util.caches.stream_change_cache import (
+    AllEntitiesChangedResult,
+    StreamChangeCache,
+)
 from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
 from synapse.util.stringutils import shortstr
@@ -799,18 +802,66 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     def get_cached_device_list_changes(
         self,
         from_key: int,
-    ) -> Optional[List[str]]:
+    ) -> AllEntitiesChangedResult:
         """Get set of users whose devices have changed since `from_key`, or None
         if that information is not in our cache.
         """
 
         return self._device_list_stream_cache.get_all_entities_changed(from_key)
 
+    @cancellable
+    async def get_all_devices_changed(
+        self,
+        from_key: int,
+        to_key: int,
+    ) -> Set[str]:
+        """Get all users whose devices have changed in the given range.
+
+        Args:
+            from_key: The minimum device lists stream token to query device list
+                changes for, exclusive.
+            to_key: The maximum device lists stream token to query device list
+                changes for, inclusive.
+
+        Returns:
+            The set of user_ids whose devices have changed since `from_key`
+            (exclusive) until `to_key` (inclusive).
+        """
+
+        result = self._device_list_stream_cache.get_all_entities_changed(from_key)
+
+        if result.hit:
+            # We know which users might have changed devices.
+            if not result.entities:
+                # If no users then we can return early.
+                return set()
+
+            # Otherwise we need to filter down the list
+            return await self.get_users_whose_devices_changed(
+                from_key, result.entities, to_key
+            )
+
+        # If the cache didn't tell us anything, we just need to query the full
+        # range.
+        sql = """
+            SELECT DISTINCT user_id FROM device_lists_stream
+            WHERE ? < stream_id AND stream_id <= ?
+        """
+
+        rows = await self.db_pool.execute(
+            "get_all_devices_changed",
+            None,
+            sql,
+            from_key,
+            to_key,
+        )
+        return {u for u, in rows}
+
     @cancellable
     async def get_users_whose_devices_changed(
         self,
         from_key: int,
-        user_ids: Optional[Collection[str]] = None,
+        user_ids: Collection[str],
         to_key: Optional[int] = None,
     ) -> Set[str]:
         """Get set of users whose devices have changed since `from_key` that
@@ -830,52 +881,32 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         """
         # Get set of users who *may* have changed. Users not in the returned
         # list have definitely not changed.
-        user_ids_to_check: Optional[Collection[str]]
-        if user_ids is None:
-            # Get set of all users that have had device list changes since 'from_key'
-            user_ids_to_check = self._device_list_stream_cache.get_all_entities_changed(
-                from_key
-            )
-        else:
-            # The same as above, but filter results to only those users in 'user_ids'
-            user_ids_to_check = self._device_list_stream_cache.get_entities_changed(
-                user_ids, from_key
-            )
+        user_ids_to_check = self._device_list_stream_cache.get_entities_changed(
+            user_ids, from_key
+        )
 
         # If an empty set was returned, there's nothing to do.
-        if user_ids_to_check is not None and not user_ids_to_check:
+        if not user_ids_to_check:
             return set()
 
-        def _get_users_whose_devices_changed_txn(txn: LoggingTransaction) -> Set[str]:
-            stream_id_where_clause = "stream_id > ?"
-            sql_args = [from_key]
-
-            if to_key:
-                stream_id_where_clause += " AND stream_id <= ?"
-                sql_args.append(to_key)
+        if to_key is None:
+            to_key = self._device_list_id_gen.get_current_token()
 
-            sql = f"""
+        def _get_users_whose_devices_changed_txn(txn: LoggingTransaction) -> Set[str]:
+            sql = """
                 SELECT DISTINCT user_id FROM device_lists_stream
-                WHERE {stream_id_where_clause}
+                WHERE  ? < stream_id AND stream_id <= ? AND %s
             """
 
-            # If the stream change cache gave us no information, fetch *all*
-            # users between the stream IDs.
-            if user_ids_to_check is None:
-                txn.execute(sql, sql_args)
-                return {user_id for user_id, in txn}
+            changes: Set[str] = set()
 
-            # Otherwise, fetch changes for the given users.
-            else:
-                changes: Set[str] = set()
-
-                # Query device changes with a batch of users at a time
-                for chunk in batch_iter(user_ids_to_check, 100):
-                    clause, args = make_in_list_sql_clause(
-                        txn.database_engine, "user_id", chunk
-                    )
-                    txn.execute(sql + " AND " + clause, sql_args + args)
-                    changes.update(user_id for user_id, in txn)
+            # Query device changes with a batch of users at a time
+            for chunk in batch_iter(user_ids_to_check, 100):
+                clause, args = make_in_list_sql_clause(
+                    txn.database_engine, "user_id", chunk
+                )
+                txn.execute(sql % (clause,), [from_key, to_key] + args)
+                changes.update(user_id for user_id, in txn)
 
             return changes
 
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index 042de8d7c8..c8b17acb59 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -16,6 +16,7 @@ import logging
 import math
 from typing import Collection, Dict, FrozenSet, List, Mapping, Optional, Set, Union
 
+import attr
 from sortedcontainers import SortedDict
 
 from synapse.util import caches
@@ -26,6 +27,29 @@ logger = logging.getLogger(__name__)
 EntityType = str
 
 
+@attr.s(auto_attribs=True, frozen=True, slots=True)
+class AllEntitiesChangedResult:
+    """Return type of `get_all_entities_changed`.
+
+    Callers must check that there was a cache hit, via `result.hit`, before
+    using the entities in `result.entities`.
+
+    This specifically does *not* implement helpers such as `__bool__` to ensure
+    that callers do the correct checks.
+    """
+
+    _entities: Optional[List[EntityType]]
+
+    @property
+    def hit(self) -> bool:
+        return self._entities is not None
+
+    @property
+    def entities(self) -> List[EntityType]:
+        assert self._entities is not None
+        return self._entities
+
+
 class StreamChangeCache:
     """
     Keeps track of the stream positions of the latest change in a set of entities.
@@ -153,19 +177,19 @@ class StreamChangeCache:
             This will be all entities if the given stream position is at or earlier
             than the earliest known stream position.
         """
-        changed_entities = self.get_all_entities_changed(stream_pos)
-        if changed_entities is not None:
+        cache_result = self.get_all_entities_changed(stream_pos)
+        if cache_result.hit:
             # We now do an intersection, trying to do so in the most efficient
             # way possible (some of these sets are *large*). First check in the
             # given iterable is already a set that we can reuse, otherwise we
             # create a set of the *smallest* of the two iterables and call
             # `intersection(..)` on it (this can be twice as fast as the reverse).
             if isinstance(entities, (set, frozenset)):
-                result = entities.intersection(changed_entities)
-            elif len(changed_entities) < len(entities):
-                result = set(changed_entities).intersection(entities)
+                result = entities.intersection(cache_result.entities)
+            elif len(cache_result.entities) < len(entities):
+                result = set(cache_result.entities).intersection(entities)
             else:
-                result = set(entities).intersection(changed_entities)
+                result = set(entities).intersection(cache_result.entities)
             self.metrics.inc_hits()
         else:
             result = set(entities)
@@ -202,12 +226,12 @@ class StreamChangeCache:
         self.metrics.inc_hits()
         return stream_pos < self._cache.peekitem()[0]
 
-    def get_all_entities_changed(self, stream_pos: int) -> Optional[List[EntityType]]:
+    def get_all_entities_changed(self, stream_pos: int) -> AllEntitiesChangedResult:
         """
         Returns all entities that have had changes after the given position.
 
-        If the stream change cache does not go far enough back, i.e. the position
-        is too old, it will return None.
+        If the stream change cache does not go far enough back, i.e. the
+        position is too old, it will return None.
 
         Returns the entities in the order that they were changed.
 
@@ -215,23 +239,21 @@ class StreamChangeCache:
             stream_pos: The stream position to check for changes after.
 
         Return:
-            Entities which have changed after the given stream position.
-
-            None if the given stream position is at or earlier than the earliest
-            known stream position.
+            A class indicating if we have the requested data cached, and if so
+            includes the entities in the order they were changed.
         """
         assert isinstance(stream_pos, int)
 
         # _cache is not valid at or before the earliest known stream position, so
         # return None to mark that it is unknown if an entity has changed.
         if stream_pos <= self._earliest_known_stream_pos:
-            return None
+            return AllEntitiesChangedResult(None)
 
         changed_entities: List[EntityType] = []
 
         for k in self._cache.islice(start=self._cache.bisect_right(stream_pos)):
             changed_entities.extend(self._cache[k])
-        return changed_entities
+        return AllEntitiesChangedResult(changed_entities)
 
     def entity_has_changed(self, entity: EntityType, stream_pos: int) -> None:
         """
diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py
index a29cc872f9..0305741c99 100644
--- a/tests/util/test_stream_change_cache.py
+++ b/tests/util/test_stream_change_cache.py
@@ -73,8 +73,10 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
         # The oldest item has been popped off
         self.assertTrue("user@foo.com" not in cache._entity_to_key)
 
-        self.assertEqual(cache.get_all_entities_changed(3), ["user@elsewhere.org"])
-        self.assertIsNone(cache.get_all_entities_changed(2))
+        self.assertEqual(
+            cache.get_all_entities_changed(3).entities, ["user@elsewhere.org"]
+        )
+        self.assertFalse(cache.get_all_entities_changed(2).hit)
 
         # If we update an existing entity, it keeps the two existing entities
         cache.entity_has_changed("bar@baz.net", 5)
@@ -82,10 +84,10 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
             {"bar@baz.net", "user@elsewhere.org"}, set(cache._entity_to_key)
         )
         self.assertEqual(
-            cache.get_all_entities_changed(3),
+            cache.get_all_entities_changed(3).entities,
             ["user@elsewhere.org", "bar@baz.net"],
         )
-        self.assertIsNone(cache.get_all_entities_changed(2))
+        self.assertFalse(cache.get_all_entities_changed(2).hit)
 
     def test_get_all_entities_changed(self) -> None:
         """
@@ -105,10 +107,12 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
         # Results are ordered so either of these are valid.
         ok1 = ["bar@baz.net", "anotheruser@foo.com", "user@elsewhere.org"]
         ok2 = ["anotheruser@foo.com", "bar@baz.net", "user@elsewhere.org"]
-        self.assertTrue(r == ok1 or r == ok2)
+        self.assertTrue(r.entities == ok1 or r.entities == ok2)
 
-        self.assertEqual(cache.get_all_entities_changed(3), ["user@elsewhere.org"])
-        self.assertEqual(cache.get_all_entities_changed(1), None)
+        self.assertEqual(
+            cache.get_all_entities_changed(3).entities, ["user@elsewhere.org"]
+        )
+        self.assertFalse(cache.get_all_entities_changed(1).hit)
 
         # ... later, things gest more updates
         cache.entity_has_changed("user@foo.com", 5)
@@ -128,7 +132,7 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
             "anotheruser@foo.com",
         ]
         r = cache.get_all_entities_changed(3)
-        self.assertTrue(r == ok1 or r == ok2)
+        self.assertTrue(r.entities == ok1 or r.entities == ok2)
 
     def test_has_any_entity_changed(self) -> None:
         """
-- 
cgit 1.5.1


From cb59e080627745d089d073d9dac276362d9abaf6 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 6 Dec 2022 09:52:55 +0000
Subject: Improve logging and opentracing for to-device message handling
 (#14598)

A batch of changes intended to make it easier to trace to-device messages through the system.

The intention here is that a client can set a property org.matrix.msgid in any to-device message it sends. That ID is then included in any tracing or logging related to the message. (Suggestions as to where this field should be documented welcome. I'm not enthusiastic about speccing it - it's very much an optional extra to help with debugging.)

I've also generally improved the data we send to opentracing for these messages.
---
 changelog.d/14598.feature                          |  1 +
 synapse/api/constants.py                           |  3 +
 synapse/federation/sender/per_destination_queue.py |  2 +-
 synapse/handlers/appservice.py                     |  3 -
 synapse/handlers/devicemessage.py                  | 36 +++++----
 synapse/handlers/sync.py                           | 26 ++++--
 synapse/logging/opentracing.py                     | 11 ++-
 synapse/rest/client/sendtodevice.py                |  1 -
 synapse/storage/databases/main/deviceinbox.py      | 92 ++++++++++++++++++----
 tests/handlers/test_appservice.py                  |  7 +-
 10 files changed, 136 insertions(+), 46 deletions(-)
 create mode 100644 changelog.d/14598.feature

(limited to 'synapse')

diff --git a/changelog.d/14598.feature b/changelog.d/14598.feature
new file mode 100644
index 0000000000..88d561e286
--- /dev/null
+++ b/changelog.d/14598.feature
@@ -0,0 +1 @@
+Improve opentracing and logging for to-device message handling.
\ No newline at end of file
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index bc04a0755b..89723d24fa 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -230,6 +230,9 @@ class EventContentFields:
     # The authorising user for joining a restricted room.
     AUTHORISING_USER: Final = "join_authorised_via_users_server"
 
+    # an unspecced field added to to-device messages to identify them uniquely-ish
+    TO_DEVICE_MSGID: Final = "org.matrix.msgid"
+
 
 class RoomTypes:
     """Understood values of the room_type field of m.room.create events."""
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 5af2784f1e..ffc9d95ee7 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -641,7 +641,7 @@ class PerDestinationQueue:
             if not message_id:
                 continue
 
-            set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
+            set_tag(SynapseTags.TO_DEVICE_EDU_ID, message_id)
 
         edus = [
             Edu(
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index f68027aaed..5d1d21cdc8 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -578,9 +578,6 @@ class ApplicationServicesHandler:
             device_id,
         ), messages in recipient_device_to_messages.items():
             for message_json in messages:
-                # Remove 'message_id' from the to-device message, as it's an internal ID
-                message_json.pop("message_id", None)
-
                 message_payload.append(
                     {
                         "to_user_id": user_id,
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 444c08bc2e..75e89850f5 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -15,7 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, Any, Dict
 
-from synapse.api.constants import EduTypes, ToDeviceEventTypes
+from synapse.api.constants import EduTypes, EventContentFields, ToDeviceEventTypes
 from synapse.api.errors import SynapseError
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.logging.context import run_in_background
@@ -216,14 +216,24 @@ class DeviceMessageHandler:
         """
         sender_user_id = requester.user.to_string()
 
-        message_id = random_string(16)
-        set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
-
-        log_kv({"number_of_to_device_messages": len(messages)})
-        set_tag("sender", sender_user_id)
+        set_tag(SynapseTags.TO_DEVICE_TYPE, message_type)
+        set_tag(SynapseTags.TO_DEVICE_SENDER, sender_user_id)
         local_messages = {}
         remote_messages: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
         for user_id, by_device in messages.items():
+            # add an opentracing log entry for each message
+            for device_id, message_content in by_device.items():
+                log_kv(
+                    {
+                        "event": "send_to_device_message",
+                        "user_id": user_id,
+                        "device_id": device_id,
+                        EventContentFields.TO_DEVICE_MSGID: message_content.get(
+                            EventContentFields.TO_DEVICE_MSGID
+                        ),
+                    }
+                )
+
             # Ratelimit local cross-user key requests by the sending device.
             if (
                 message_type == ToDeviceEventTypes.RoomKeyRequest
@@ -233,6 +243,7 @@ class DeviceMessageHandler:
                     requester, (sender_user_id, requester.device_id)
                 )
                 if not allowed:
+                    log_kv({"message": f"dropping key requests to {user_id}"})
                     logger.info(
                         "Dropping room_key_request from %s to %s due to rate limit",
                         sender_user_id,
@@ -247,18 +258,11 @@ class DeviceMessageHandler:
                         "content": message_content,
                         "type": message_type,
                         "sender": sender_user_id,
-                        "message_id": message_id,
                     }
                     for device_id, message_content in by_device.items()
                 }
                 if messages_by_device:
                     local_messages[user_id] = messages_by_device
-                    log_kv(
-                        {
-                            "user_id": user_id,
-                            "device_id": list(messages_by_device),
-                        }
-                    )
             else:
                 destination = get_domain_from_id(user_id)
                 remote_messages.setdefault(destination, {})[user_id] = by_device
@@ -267,7 +271,11 @@ class DeviceMessageHandler:
 
         remote_edu_contents = {}
         for destination, messages in remote_messages.items():
-            log_kv({"destination": destination})
+            # The EDU contains a "message_id" property which is used for
+            # idempotence. Make up a random one.
+            message_id = random_string(16)
+            log_kv({"destination": destination, "message_id": message_id})
+
             remote_edu_contents[destination] = {
                 "messages": messages,
                 "sender": sender_user_id,
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0b395a104d..dace9b606f 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -31,14 +31,20 @@ from typing import (
 import attr
 from prometheus_client import Counter
 
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import EventContentFields, EventTypes, Membership
 from synapse.api.filtering import FilterCollection
 from synapse.api.presence import UserPresenceState
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase
 from synapse.handlers.relations import BundledAggregations
 from synapse.logging.context import current_context
-from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span
+from synapse.logging.opentracing import (
+    SynapseTags,
+    log_kv,
+    set_tag,
+    start_active_span,
+    trace,
+)
 from synapse.push.clientformat import format_push_rules_for_user
 from synapse.storage.databases.main.event_push_actions import RoomNotifCounts
 from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
@@ -1586,6 +1592,7 @@ class SyncHandler:
         else:
             return DeviceListUpdates()
 
+    @trace
     async def _generate_sync_entry_for_to_device(
         self, sync_result_builder: "SyncResultBuilder"
     ) -> None:
@@ -1605,11 +1612,16 @@ class SyncHandler:
             )
 
             for message in messages:
-                # We pop here as we shouldn't be sending the message ID down
-                # `/sync`
-                message_id = message.pop("message_id", None)
-                if message_id:
-                    set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
+                log_kv(
+                    {
+                        "event": "to_device_message",
+                        "sender": message["sender"],
+                        "type": message["type"],
+                        EventContentFields.TO_DEVICE_MSGID: message["content"].get(
+                            EventContentFields.TO_DEVICE_MSGID
+                        ),
+                    }
+                )
 
             logger.debug(
                 "Returning %d to-device messages between %d and %d (current token: %d)",
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index b69060854f..a705af8356 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -292,8 +292,15 @@ logger = logging.getLogger(__name__)
 
 
 class SynapseTags:
-    # The message ID of any to_device message processed
-    TO_DEVICE_MESSAGE_ID = "to_device.message_id"
+    # The message ID of any to_device EDU processed
+    TO_DEVICE_EDU_ID = "to_device.edu_id"
+
+    # Details about to-device messages
+    TO_DEVICE_TYPE = "to_device.type"
+    TO_DEVICE_SENDER = "to_device.sender"
+    TO_DEVICE_RECIPIENT = "to_device.recipient"
+    TO_DEVICE_RECIPIENT_DEVICE = "to_device.recipient_device"
+    TO_DEVICE_MSGID = "to_device.msgid"  # client-generated ID
 
     # Whether the sync response has new data to be returned to the client.
     SYNC_RESULT = "sync.new_data"
diff --git a/synapse/rest/client/sendtodevice.py b/synapse/rest/client/sendtodevice.py
index 46a8b03829..55d52f0b28 100644
--- a/synapse/rest/client/sendtodevice.py
+++ b/synapse/rest/client/sendtodevice.py
@@ -46,7 +46,6 @@ class SendToDeviceRestServlet(servlet.RestServlet):
     def on_PUT(
         self, request: SynapseRequest, message_type: str, txn_id: str
     ) -> Awaitable[Tuple[int, JsonDict]]:
-        set_tag("message_type", message_type)
         set_tag("txn_id", txn_id)
         return self.txns.fetch_or_execute_request(
             request, self._put, request, message_type, txn_id
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 73c95ffb6f..48a54d9cb8 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -26,8 +26,15 @@ from typing import (
     cast,
 )
 
+from synapse.api.constants import EventContentFields
 from synapse.logging import issue9533_logger
-from synapse.logging.opentracing import log_kv, set_tag, trace
+from synapse.logging.opentracing import (
+    SynapseTags,
+    log_kv,
+    set_tag,
+    start_active_span,
+    trace,
+)
 from synapse.replication.tcp.streams import ToDeviceStream
 from synapse.storage._base import SQLBaseStore, db_to_json
 from synapse.storage.database import (
@@ -397,6 +404,17 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                     (recipient_user_id, recipient_device_id), []
                 ).append(message_dict)
 
+                # start a new span for each message, so that we can tag each separately
+                with start_active_span("get_to_device_message"):
+                    set_tag(SynapseTags.TO_DEVICE_TYPE, message_dict["type"])
+                    set_tag(SynapseTags.TO_DEVICE_SENDER, message_dict["sender"])
+                    set_tag(SynapseTags.TO_DEVICE_RECIPIENT, recipient_user_id)
+                    set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, recipient_device_id)
+                    set_tag(
+                        SynapseTags.TO_DEVICE_MSGID,
+                        message_dict["content"].get(EventContentFields.TO_DEVICE_MSGID),
+                    )
+
             if limit is not None and rowcount == limit:
                 # We ended up bumping up against the message limit. There may be more messages
                 # to retrieve. Return what we have, as well as the last stream position that
@@ -678,12 +696,35 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                 ],
             )
 
-            if remote_messages_by_destination:
-                issue9533_logger.debug(
-                    "Queued outgoing to-device messages with stream_id %i for %s",
-                    stream_id,
-                    list(remote_messages_by_destination.keys()),
-                )
+            for destination, edu in remote_messages_by_destination.items():
+                if issue9533_logger.isEnabledFor(logging.DEBUG):
+                    issue9533_logger.debug(
+                        "Queued outgoing to-device messages with "
+                        "stream_id %i, EDU message_id %s, type %s for %s: %s",
+                        stream_id,
+                        edu["message_id"],
+                        edu["type"],
+                        destination,
+                        [
+                            f"{user_id}/{device_id} (msgid "
+                            f"{msg.get(EventContentFields.TO_DEVICE_MSGID)})"
+                            for (user_id, messages_by_device) in edu["messages"].items()
+                            for (device_id, msg) in messages_by_device.items()
+                        ],
+                    )
+
+                for (user_id, messages_by_device) in edu["messages"].items():
+                    for (device_id, msg) in messages_by_device.items():
+                        with start_active_span("store_outgoing_to_device_message"):
+                            set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["sender"])
+                            set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["message_id"])
+                            set_tag(SynapseTags.TO_DEVICE_TYPE, edu["type"])
+                            set_tag(SynapseTags.TO_DEVICE_RECIPIENT, user_id)
+                            set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, device_id)
+                            set_tag(
+                                SynapseTags.TO_DEVICE_MSGID,
+                                msg.get(EventContentFields.TO_DEVICE_MSGID),
+                            )
 
         async with self._device_inbox_id_gen.get_next() as stream_id:
             now_ms = self._clock.time_msec()
@@ -801,7 +842,19 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                     # Only insert into the local inbox if the device exists on
                     # this server
                     device_id = row["device_id"]
-                    message_json = json_encoder.encode(messages_by_device[device_id])
+
+                    with start_active_span("serialise_to_device_message"):
+                        msg = messages_by_device[device_id]
+                        set_tag(SynapseTags.TO_DEVICE_TYPE, msg["type"])
+                        set_tag(SynapseTags.TO_DEVICE_SENDER, msg["sender"])
+                        set_tag(SynapseTags.TO_DEVICE_RECIPIENT, user_id)
+                        set_tag(SynapseTags.TO_DEVICE_RECIPIENT_DEVICE, device_id)
+                        set_tag(
+                            SynapseTags.TO_DEVICE_MSGID,
+                            msg["content"].get(EventContentFields.TO_DEVICE_MSGID),
+                        )
+                        message_json = json_encoder.encode(msg)
+
                     messages_json_for_user[device_id] = message_json
 
             if messages_json_for_user:
@@ -821,15 +874,20 @@ class DeviceInboxWorkerStore(SQLBaseStore):
             ],
         )
 
-        issue9533_logger.debug(
-            "Stored to-device messages with stream_id %i for %s",
-            stream_id,
-            [
-                (user_id, device_id)
-                for (user_id, messages_by_device) in local_by_user_then_device.items()
-                for device_id in messages_by_device.keys()
-            ],
-        )
+        if issue9533_logger.isEnabledFor(logging.DEBUG):
+            issue9533_logger.debug(
+                "Stored to-device messages with stream_id %i: %s",
+                stream_id,
+                [
+                    f"{user_id}/{device_id} (msgid "
+                    f"{msg['content'].get(EventContentFields.TO_DEVICE_MSGID)})"
+                    for (
+                        user_id,
+                        messages_by_device,
+                    ) in messages_by_user_then_device.items()
+                    for (device_id, msg) in messages_by_device.items()
+                ],
+            )
 
 
 class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index 9ed26d87a7..57bfbd7734 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -765,7 +765,12 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
         fake_device_ids = [f"device_{num}" for num in range(number_of_messages - 1)]
         messages = {
             self.exclusive_as_user: {
-                device_id: to_device_message_content for device_id in fake_device_ids
+                device_id: {
+                    "type": "test_to_device_message",
+                    "sender": "@some:sender",
+                    "content": to_device_message_content,
+                }
+                for device_id in fake_device_ids
             }
         }
 
-- 
cgit 1.5.1


From 9b6224577e7a387bf94f2332301f21e9514286ff Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 6 Dec 2022 07:23:03 -0500
Subject: Failover on proper error responses. (#14620)

When querying a remote server handle a 404/405 with an
errcode of M_UNRECOGNIZED as an unimplemented endpoint.
---
 changelog.d/14620.bugfix                |  1 +
 synapse/federation/federation_client.py | 29 ++++++++++++++++++++---------
 2 files changed, 21 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14620.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14620.bugfix b/changelog.d/14620.bugfix
new file mode 100644
index 0000000000..cb95a87d92
--- /dev/null
+++ b/changelog.d/14620.bugfix
@@ -0,0 +1 @@
+Return spec-compliant JSON errors when unknown endpoints are requested.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 8bccc9c60d..137cfb3346 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -771,17 +771,28 @@ class FederationClient(FederationBase):
         """
         if synapse_error is None:
             synapse_error = e.to_synapse_error()
-        # There is no good way to detect an "unknown" endpoint.
+        # MSC3743 specifies that servers should return a 404 or 405 with an errcode
+        # of M_UNRECOGNIZED when they receive a request to an unknown endpoint or
+        # to an unknown method, respectively.
         #
-        # Dendrite returns a 404 (with a body of "404 page not found");
-        # Conduit returns a 404 (with no body); and Synapse returns a 400
-        # with M_UNRECOGNIZED.
-        #
-        # This needs to be rather specific as some endpoints truly do return 404
-        # errors.
+        # Older versions of servers don't properly handle this. This needs to be
+        # rather specific as some endpoints truly do return 404 errors.
         return (
-            e.code == 404 and (not e.response or e.response == b"404 page not found")
-        ) or (e.code == 400 and synapse_error.errcode == Codes.UNRECOGNIZED)
+            # 404 is an unknown endpoint, 405 is a known endpoint, but unknown method.
+            (e.code == 404 or e.code == 405)
+            and (
+                # Older Dendrites returned a text or empty body.
+                # Older Conduit returned an empty body.
+                not e.response
+                or e.response == b"404 page not found"
+                # The proper response JSON with M_UNRECOGNIZED errcode.
+                or synapse_error.errcode == Codes.UNRECOGNIZED
+            )
+        ) or (
+            # Older Synapses returned a 400 error.
+            e.code == 400
+            and synapse_error.errcode == Codes.UNRECOGNIZED
+        )
 
     async def _try_destination_list(
         self,
-- 
cgit 1.5.1


From 9e82caac45cd8eccd7b22c60c2cdbeec9aab3a2d Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 6 Dec 2022 15:48:42 +0000
Subject: Faster remote room joins: unblock tasks waiting for full room state
 when the un-partial-stating of that room is received over the replication
 stream. [rei:frrj/streams/unpsr] (#14474)

---
 changelog.d/14474.misc                             |  1 +
 synapse/replication/tcp/client.py                  | 11 ++++
 .../replication/tcp/streams/test_partial_state.py  | 65 ++++++++++++++++++++++
 3 files changed, 77 insertions(+)
 create mode 100644 changelog.d/14474.misc
 create mode 100644 tests/replication/tcp/streams/test_partial_state.py

(limited to 'synapse')

diff --git a/changelog.d/14474.misc b/changelog.d/14474.misc
new file mode 100644
index 0000000000..deccd4e91a
--- /dev/null
+++ b/changelog.d/14474.misc
@@ -0,0 +1 @@
+Faster remote room joins: stream the un-partial-stating of rooms over replication.
\ No newline at end of file
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 18252a2958..b4dad47b45 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -36,12 +36,14 @@ from synapse.replication.tcp.streams import (
     TagAccountDataStream,
     ToDeviceStream,
     TypingStream,
+    UnPartialStatedRoomStream,
 )
 from synapse.replication.tcp.streams.events import (
     EventsStream,
     EventsStreamEventRow,
     EventsStreamRow,
 )
+from synapse.replication.tcp.streams.partial_state import UnPartialStatedRoomStreamRow
 from synapse.types import PersistedEventPosition, ReadReceipt, StreamKeyType, UserID
 from synapse.util.async_helpers import Linearizer, timeout_deferred
 from synapse.util.metrics import Measure
@@ -117,6 +119,7 @@ class ReplicationDataHandler:
         self._streams = hs.get_replication_streams()
         self._instance_name = hs.get_instance_name()
         self._typing_handler = hs.get_typing_handler()
+        self._state_storage_controller = hs.get_storage_controllers().state
 
         self._notify_pushers = hs.config.worker.start_pushers
         self._pusher_pool = hs.get_pusherpool()
@@ -236,6 +239,14 @@ class ReplicationDataHandler:
                     self.notifier.notify_user_joined_room(
                         row.data.event_id, row.data.room_id
                     )
+        elif stream_name == UnPartialStatedRoomStream.NAME:
+            for row in rows:
+                assert isinstance(row, UnPartialStatedRoomStreamRow)
+
+                # Wake up any tasks waiting for the room to be un-partial-stated.
+                self._state_storage_controller.notify_room_un_partial_stated(
+                    row.room_id
+                )
 
         await self._presence_handler.process_replication_rows(
             stream_name, instance_name, token, rows
diff --git a/tests/replication/tcp/streams/test_partial_state.py b/tests/replication/tcp/streams/test_partial_state.py
new file mode 100644
index 0000000000..2c10eab4db
--- /dev/null
+++ b/tests/replication/tcp/streams/test_partial_state.py
@@ -0,0 +1,65 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from twisted.internet.defer import ensureDeferred
+
+from synapse.rest.client import room
+
+from tests.replication._base import BaseMultiWorkerStreamTestCase
+
+
+class PartialStateStreamsTestCase(BaseMultiWorkerStreamTestCase):
+    servlets = [room.register_servlets]
+    hijack_auth = True
+    user_id = "@bob:test"
+
+    def setUp(self):
+        super().setUp()
+        self.store = self.hs.get_datastores().main
+
+    def test_un_partial_stated_room_unblocks_over_replication(self) -> None:
+        """
+        Tests that, when a room is un-partial-stated on another worker,
+        pending calls to `await_full_state` get unblocked.
+        """
+
+        # Make a room.
+        room_id = self.helper.create_room_as("@bob:test")
+        # Mark the room as partial-stated.
+        self.get_success(
+            self.store.store_partial_state_room(room_id, ["serv1", "serv2"], 0, "serv1")
+        )
+
+        worker = self.make_worker_hs("synapse.app.generic_worker")
+
+        # On the worker, attempt to get the current hosts in the room
+        d = ensureDeferred(
+            worker.get_storage_controllers().state.get_current_hosts_in_room(room_id)
+        )
+
+        self.reactor.advance(0.1)
+
+        # This should block
+        self.assertFalse(
+            d.called, "get_current_hosts_in_room/await_full_state did not block"
+        )
+
+        # On the master, clear the partial state flag.
+        self.get_success(self.store.clear_partial_state_room(room_id))
+
+        self.reactor.advance(0.1)
+
+        # The worker should have unblocked
+        self.assertTrue(
+            d.called, "get_current_hosts_in_room/await_full_state did not unblock"
+        )
-- 
cgit 1.5.1


From cf1059d045640485a5a0b1e3d945b796b0e6f228 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 7 Dec 2022 11:19:43 +0000
Subject: Fix a long-standing bug where the user directory would return 1 more
 row than requested. (#14631)

---
 changelog.d/14631.bugfix                         | 1 +
 synapse/rest/client/user_directory.py            | 4 ++--
 synapse/storage/databases/main/user_directory.py | 2 +-
 tests/storage/test_user_directory.py             | 6 ++++++
 4 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14631.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14631.bugfix b/changelog.d/14631.bugfix
new file mode 100644
index 0000000000..c5376bab9f
--- /dev/null
+++ b/changelog.d/14631.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the user directory would return 1 more row than requested.
\ No newline at end of file
diff --git a/synapse/rest/client/user_directory.py b/synapse/rest/client/user_directory.py
index 116c982ce6..4670fad608 100644
--- a/synapse/rest/client/user_directory.py
+++ b/synapse/rest/client/user_directory.py
@@ -63,8 +63,8 @@ class UserDirectorySearchRestServlet(RestServlet):
 
         body = parse_json_object_from_request(request)
 
-        limit = body.get("limit", 10)
-        limit = min(limit, 50)
+        limit = int(body.get("limit", 10))
+        limit = max(min(limit, 50), 0)
 
         try:
             search_term = body["search_term"]
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 044435deab..af9952f513 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -886,7 +886,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
 
         limited = len(results) > limit
 
-        return {"limited": limited, "results": results}
+        return {"limited": limited, "results": results[0:limit]}
 
 
 def _parse_query_sqlite(search_term: str) -> str:
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py
index 5b60cf5285..88c7d5fec0 100644
--- a/tests/storage/test_user_directory.py
+++ b/tests/storage/test_user_directory.py
@@ -448,6 +448,12 @@ class UserDirectoryStoreTestCase(HomeserverTestCase):
             {"user_id": BOBBY, "display_name": "bobby", "avatar_url": None},
         )
 
+    @override_config({"user_directory": {"search_all_users": True}})
+    def test_search_user_limit_correct(self) -> None:
+        r = self.get_success(self.store.search_user_dir(ALICE, "bob", 1))
+        self.assertTrue(r["limited"])
+        self.assertEqual(1, len(r["results"]))
+
     @override_config({"user_directory": {"search_all_users": True}})
     def test_search_user_dir_stop_words(self) -> None:
         """Tests that a user can look up another user by searching for the start if its
-- 
cgit 1.5.1


From 96251af50d621ef1250dc22e447669c69f89b3bb Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 7 Dec 2022 13:39:27 +0000
Subject: Fix a bug introduced in v1.67.0 where not specifying a config file or
 a server URL would lead to the `register_new_matrix_user` script failing.
 (#14637)

---
 changelog.d/14637.bugfix                     | 1 +
 synapse/_scripts/register_new_matrix_user.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14637.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14637.bugfix b/changelog.d/14637.bugfix
new file mode 100644
index 0000000000..ab6db383c6
--- /dev/null
+++ b/changelog.d/14637.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in v1.67.0 where not specifying a config file or a server URL would lead to the `register_new_matrix_user` script failing.
\ No newline at end of file
diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py
index 0c4504d5d8..2b74a40166 100644
--- a/synapse/_scripts/register_new_matrix_user.py
+++ b/synapse/_scripts/register_new_matrix_user.py
@@ -222,6 +222,7 @@ def main() -> None:
 
     args = parser.parse_args()
 
+    config: Optional[Dict[str, Any]] = None
     if "config" in args and args.config:
         config = yaml.safe_load(args.config)
 
@@ -229,7 +230,7 @@ def main() -> None:
         secret = args.shared_secret
     else:
         # argparse should check that we have either config or shared secret
-        assert config
+        assert config is not None
 
         secret = config.get("registration_shared_secret")
         secret_file = config.get("registration_shared_secret_path")
@@ -244,7 +245,7 @@ def main() -> None:
 
     if args.server_url:
         server_url = args.server_url
-    elif config:
+    elif config is not None:
         server_url = _find_client_listener(config)
         if not server_url:
             server_url = _DEFAULT_SERVER_URL
-- 
cgit 1.5.1


From 60c3fea3271468dd1f9e9c5fae2d22dd9778293b Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Wed, 7 Dec 2022 17:35:41 +0000
Subject: Reject receipt requests with invalid room or event IDs. (#14632)

If the room or event IDs are empty or of an invalid form they
should be rejected.
---
 changelog.d/14632.bugfix           |  1 +
 synapse/rest/client/receipts.py    |  5 ++-
 tests/rest/client/test_receipts.py | 76 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14632.bugfix
 create mode 100644 tests/rest/client/test_receipts.py

(limited to 'synapse')

diff --git a/changelog.d/14632.bugfix b/changelog.d/14632.bugfix
new file mode 100644
index 0000000000..323d10f1b0
--- /dev/null
+++ b/changelog.d/14632.bugfix
@@ -0,0 +1 @@
+Reject invalid read receipt requests with empty room or event IDs. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 18a282b22c..28b7d30ea8 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -20,7 +20,7 @@ from synapse.api.errors import Codes, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
-from synapse.types import JsonDict
+from synapse.types import EventID, JsonDict, RoomID
 
 from ._base import client_patterns
 
@@ -56,6 +56,9 @@ class ReceiptRestServlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
+        if not RoomID.is_valid(room_id) or not event_id.startswith(EventID.SIGIL):
+            raise SynapseError(400, "A valid room ID and event ID must be specified")
+
         if receipt_type not in self._known_receipt_types:
             raise SynapseError(
                 400,
diff --git a/tests/rest/client/test_receipts.py b/tests/rest/client/test_receipts.py
new file mode 100644
index 0000000000..2a7fcea386
--- /dev/null
+++ b/tests/rest/client/test_receipts.py
@@ -0,0 +1,76 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from twisted.test.proto_helpers import MemoryReactor
+
+import synapse.rest.admin
+from synapse.rest.client import login, receipts, register
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+
+
+class ReceiptsTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        login.register_servlets,
+        register.register_servlets,
+        receipts.register_servlets,
+        synapse.rest.admin.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.owner = self.register_user("owner", "pass")
+        self.owner_tok = self.login("owner", "pass")
+
+    def test_send_receipt(self) -> None:
+        channel = self.make_request(
+            "POST",
+            "/rooms/!abc:beep/receipt/m.read/$def",
+            content={},
+            access_token=self.owner_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.result)
+
+    def test_send_receipt_invalid_room_id(self) -> None:
+        channel = self.make_request(
+            "POST",
+            "/rooms/not-a-room-id/receipt/m.read/$def",
+            content={},
+            access_token=self.owner_tok,
+        )
+        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(
+            channel.json_body["error"], "A valid room ID and event ID must be specified"
+        )
+
+    def test_send_receipt_invalid_event_id(self) -> None:
+        channel = self.make_request(
+            "POST",
+            "/rooms/!abc:beep/receipt/m.read/not-an-event-id",
+            content={},
+            access_token=self.owner_tok,
+        )
+        self.assertEqual(channel.code, 400, channel.result)
+        self.assertEqual(
+            channel.json_body["error"], "A valid room ID and event ID must be specified"
+        )
+
+    def test_send_receipt_invalid_receipt_type(self) -> None:
+        channel = self.make_request(
+            "POST",
+            "/rooms/!abc:beep/receipt/invalid-receipt-type/$def",
+            content={},
+            access_token=self.owner_tok,
+        )
+        self.assertEqual(channel.code, 400, channel.result)
-- 
cgit 1.5.1


From da777207528513c858395758bf4c023da2c2c1a3 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 8 Dec 2022 11:35:49 -0500
Subject: Check the stream position before checking if the cache is empty.
 (#14639)

An empty cache does not mean the entity has no changed, if
it is earlier than the earliest known stream position return that
the entity *has* changed since the cache cannot accurately
answer that query.
---
 changelog.d/14639.bugfix                   | 1 +
 synapse/util/caches/stream_change_cache.py | 9 +++++----
 tests/util/test_stream_change_cache.py     | 7 ++++---
 3 files changed, 10 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/14639.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14639.bugfix b/changelog.d/14639.bugfix
new file mode 100644
index 0000000000..8730b10afe
--- /dev/null
+++ b/changelog.d/14639.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the user directory and room/user stats might be out of sync.
diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py
index c8b17acb59..1657459549 100644
--- a/synapse/util/caches/stream_change_cache.py
+++ b/synapse/util/caches/stream_change_cache.py
@@ -213,16 +213,17 @@ class StreamChangeCache:
         """
         assert isinstance(stream_pos, int)
 
-        if not self._cache:
-            # If the cache is empty, nothing can have changed.
-            return False
-
         # _cache is not valid at or before the earliest known stream position, so
         # return that an entity has changed.
         if stream_pos <= self._earliest_known_stream_pos:
             self.metrics.inc_misses()
             return True
 
+        # If the cache is empty, nothing can have changed.
+        if not self._cache:
+            self.metrics.inc_misses()
+            return False
+
         self.metrics.inc_hits()
         return stream_pos < self._cache.peekitem()[0]
 
diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py
index 0305741c99..3df053493b 100644
--- a/tests/util/test_stream_change_cache.py
+++ b/tests/util/test_stream_change_cache.py
@@ -144,9 +144,10 @@ class StreamChangeCacheTests(unittest.HomeserverTestCase):
         """
         cache = StreamChangeCache("#test", 1)
 
-        # With no entities, it returns False for the past, present, and future.
-        self.assertFalse(cache.has_any_entity_changed(0))
-        self.assertFalse(cache.has_any_entity_changed(1))
+        # With no entities, it returns True for the past, present, and False for
+        # the future.
+        self.assertTrue(cache.has_any_entity_changed(0))
+        self.assertTrue(cache.has_any_entity_changed(1))
         self.assertFalse(cache.has_any_entity_changed(2))
 
         # We add an entity
-- 
cgit 1.5.1


From 9d8a3234ba1d3ff831a7647f45c67946773d88a7 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 8 Dec 2022 11:37:05 -0500
Subject: Respond with proper error responses on unknown paths. (#14621)

Returns a proper 404 with an errcode of M_RECOGNIZED for
unknown endpoints per MSC3743.
---
 changelog.d/14621.bugfix                      |  1 +
 synapse/api/errors.py                         |  6 ++----
 synapse/http/server.py                        | 19 ++++++++++++++++++-
 synapse/rest/media/v1/media_repository.py     |  4 ++--
 synapse/util/httpresourcetree.py              |  6 ++++--
 tests/rest/admin/test_user.py                 |  2 +-
 tests/rest/client/test_login_token_request.py |  4 ++--
 tests/rest/client/test_rendezvous.py          |  2 +-
 tests/test_server.py                          |  2 +-
 9 files changed, 32 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/14621.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14621.bugfix b/changelog.d/14621.bugfix
new file mode 100644
index 0000000000..cb95a87d92
--- /dev/null
+++ b/changelog.d/14621.bugfix
@@ -0,0 +1 @@
+Return spec-compliant JSON errors when unknown endpoints are requested.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index e2cfcea0f2..76ef12ed3a 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -300,10 +300,8 @@ class InteractiveAuthIncompleteError(Exception):
 class UnrecognizedRequestError(SynapseError):
     """An error indicating we don't understand the request you're trying to make"""
 
-    def __init__(
-        self, msg: str = "Unrecognized request", errcode: str = Codes.UNRECOGNIZED
-    ):
-        super().__init__(400, msg, errcode)
+    def __init__(self, msg: str = "Unrecognized request", code: int = 400):
+        super().__init__(code, msg, Codes.UNRECOGNIZED)
 
 
 class NotFoundError(SynapseError):
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 051a1899a0..2563858f3c 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -577,7 +577,24 @@ def _unrecognised_request_handler(request: Request) -> NoReturn:
     Args:
         request: Unused, but passed in to match the signature of ServletCallback.
     """
-    raise UnrecognizedRequestError()
+    raise UnrecognizedRequestError(code=404)
+
+
+class UnrecognizedRequestResource(resource.Resource):
+    """
+    Similar to twisted.web.resource.NoResource, but returns a JSON 404 with an
+    errcode of M_UNRECOGNIZED.
+    """
+
+    def render(self, request: SynapseRequest) -> int:
+        f = failure.Failure(UnrecognizedRequestError(code=404))
+        return_json_error(f, request, None)
+        # A response has already been sent but Twisted requires either NOT_DONE_YET
+        # or the response bytes as a return value.
+        return NOT_DONE_YET
+
+    def getChild(self, name: str, request: Request) -> resource.Resource:
+        return self
 
 
 class RootRedirect(resource.Resource):
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 40b0d39eb2..c70e1837af 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -24,7 +24,6 @@ from matrix_common.types.mxc_uri import MXCUri
 import twisted.internet.error
 import twisted.web.http
 from twisted.internet.defer import Deferred
-from twisted.web.resource import Resource
 
 from synapse.api.errors import (
     FederationDeniedError,
@@ -35,6 +34,7 @@ from synapse.api.errors import (
 )
 from synapse.config._base import ConfigError
 from synapse.config.repository import ThumbnailRequirement
+from synapse.http.server import UnrecognizedRequestResource
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import defer_to_thread
 from synapse.metrics.background_process_metrics import run_as_background_process
@@ -1046,7 +1046,7 @@ class MediaRepository:
         return removed_media, len(removed_media)
 
 
-class MediaRepositoryResource(Resource):
+class MediaRepositoryResource(UnrecognizedRequestResource):
     """File uploading and downloading.
 
     Uploads are POSTed to a resource which returns a token which is used to GET
diff --git a/synapse/util/httpresourcetree.py b/synapse/util/httpresourcetree.py
index a0606851f7..39fab4fe06 100644
--- a/synapse/util/httpresourcetree.py
+++ b/synapse/util/httpresourcetree.py
@@ -15,7 +15,9 @@
 import logging
 from typing import Dict
 
-from twisted.web.resource import NoResource, Resource
+from twisted.web.resource import Resource
+
+from synapse.http.server import UnrecognizedRequestResource
 
 logger = logging.getLogger(__name__)
 
@@ -49,7 +51,7 @@ def create_resource_tree(
         for path_seg in full_path.split(b"/")[1:-1]:
             if path_seg not in last_resource.listNames():
                 # resource doesn't exist, so make a "dummy resource"
-                child_resource: Resource = NoResource()
+                child_resource: Resource = UnrecognizedRequestResource()
                 last_resource.putChild(path_seg, child_resource)
                 res_id = _resource_id(last_resource, path_seg)
                 resource_mappings[res_id] = child_resource
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index e8c9457794..5c1ced355f 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -3994,7 +3994,7 @@ class ShadowBanRestTestCase(unittest.HomeserverTestCase):
         """
         Tests that shadow-banning for a user that is not a local returns a 400
         """
-        url = "/_synapse/admin/v1/whois/@unknown_person:unknown_domain"
+        url = "/_synapse/admin/v1/users/@unknown_person:unknown_domain/shadow_ban"
 
         channel = self.make_request(method, url, access_token=self.admin_user_tok)
         self.assertEqual(400, channel.code, msg=channel.json_body)
diff --git a/tests/rest/client/test_login_token_request.py b/tests/rest/client/test_login_token_request.py
index c2e1e08811..6aedc1a11c 100644
--- a/tests/rest/client/test_login_token_request.py
+++ b/tests/rest/client/test_login_token_request.py
@@ -48,13 +48,13 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
 
     def test_disabled(self) -> None:
         channel = self.make_request("POST", endpoint, {}, access_token=None)
-        self.assertEqual(channel.code, 400)
+        self.assertEqual(channel.code, 404)
 
         self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
         channel = self.make_request("POST", endpoint, {}, access_token=token)
-        self.assertEqual(channel.code, 400)
+        self.assertEqual(channel.code, 404)
 
     @override_config({"experimental_features": {"msc3882_enabled": True}})
     def test_require_auth(self) -> None:
diff --git a/tests/rest/client/test_rendezvous.py b/tests/rest/client/test_rendezvous.py
index ad00a476e1..c0eb5d01a6 100644
--- a/tests/rest/client/test_rendezvous.py
+++ b/tests/rest/client/test_rendezvous.py
@@ -36,7 +36,7 @@ class RendezvousServletTestCase(unittest.HomeserverTestCase):
 
     def test_disabled(self) -> None:
         channel = self.make_request("POST", endpoint, {}, access_token=None)
-        self.assertEqual(channel.code, 400)
+        self.assertEqual(channel.code, 404)
 
     @override_config({"experimental_features": {"msc3886_endpoint": "/asd"}})
     def test_redirect(self) -> None:
diff --git a/tests/test_server.py b/tests/test_server.py
index 2d9a0257d4..d67d7722a4 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -174,7 +174,7 @@ class JsonResourceTests(unittest.TestCase):
             self.reactor, FakeSite(res, self.reactor), b"GET", b"/_matrix/foobar"
         )
 
-        self.assertEqual(channel.code, 400)
+        self.assertEqual(channel.code, 404)
         self.assertEqual(channel.json_body["error"], "Unrecognized request")
         self.assertEqual(channel.json_body["errcode"], "M_UNRECOGNIZED")
 
-- 
cgit 1.5.1


From c369e956918333c19cfb21def41c8a54f9d09c90 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 8 Dec 2022 11:40:20 -0500
Subject: Rebuild the user directory and stats tables. (#14643)

Due to the various fixes to the StreamChangeCache it is not
safe to trust the information in the user directory or room/user
stats tables. Rebuild them as background jobs.

In particular see da777207528513c858395758bf4c023da2c2c1a3 (#14639),
and 6a8310f3dfe77acf59df2fe3e88a71b85b9b3ecc (#14435).

Maybe also be related to fac8a38525387e344e3595a092578e0ffedd49ae
(#14592).
---
 changelog.d/14643.bugfix                           |  1 +
 .../main/delta/73/22_rebuild_user_dir_stats.sql    | 29 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 changelog.d/14643.bugfix
 create mode 100644 synapse/storage/schema/main/delta/73/22_rebuild_user_dir_stats.sql

(limited to 'synapse')

diff --git a/changelog.d/14643.bugfix b/changelog.d/14643.bugfix
new file mode 100644
index 0000000000..8730b10afe
--- /dev/null
+++ b/changelog.d/14643.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the user directory and room/user stats might be out of sync.
diff --git a/synapse/storage/schema/main/delta/73/22_rebuild_user_dir_stats.sql b/synapse/storage/schema/main/delta/73/22_rebuild_user_dir_stats.sql
new file mode 100644
index 0000000000..afab1e4bb7
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/22_rebuild_user_dir_stats.sql
@@ -0,0 +1,29 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  -- Set up user directory staging tables.
+  (7322, 'populate_user_directory_createtables', '{}', NULL),
+  -- Run through each room and update the user directory according to who is in it.
+  (7322, 'populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'),
+  -- Insert all users into the user directory, if search_all_users is on.
+  (7322, 'populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms'),
+  -- Clean up user directory staging tables.
+  (7322, 'populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'),
+  -- Rebuild the room_stats_current and room_stats_state tables.
+  (7322, 'populate_stats_process_rooms', '{}', NULL),
+  -- Update the user_stats_current table.
+  (7322, 'populate_stats_process_users', '{}', NULL)
+ON CONFLICT (update_name) DO NOTHING;
-- 
cgit 1.5.1


From a58b550eac9606bf6bba030abe9d1020c893ca02 Mon Sep 17 00:00:00 2001
From: Ashish Kumar <ashfame@users.noreply.github.com>
Date: Thu, 8 Dec 2022 21:28:02 +0400
Subject: Fix html templates to load images only on HTTPS (#14625)

This PR changes http-based image URLs to be https in html templates.
This impacts the Synapse SSO error page, where browsers report mixed
media content warnings.

Also, https://matrix.org/img/vector-logo-email.png is currently broken
but the URL has been updated to be https anyway.

Signed-off-by: Ashish Kumar <ashfame@users.noreply.github.com>
---
 changelog.d/14625.bugfix                 | 1 +
 synapse/res/templates/_base.html         | 6 +++---
 synapse/res/templates/notice_expiry.html | 6 +++---
 synapse/res/templates/notif_mail.html    | 6 +++---
 4 files changed, 10 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14625.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14625.bugfix b/changelog.d/14625.bugfix
new file mode 100644
index 0000000000..a4d1216690
--- /dev/null
+++ b/changelog.d/14625.bugfix
@@ -0,0 +1 @@
+Fix html templates to load images only on HTTPS. Contributed by @ashfame.
diff --git a/synapse/res/templates/_base.html b/synapse/res/templates/_base.html
index 46439fce6a..4b5cc7bcb6 100644
--- a/synapse/res/templates/_base.html
+++ b/synapse/res/templates/_base.html
@@ -13,13 +13,13 @@
 <body>
 <header class="mx_Header">
     {% if app_name == "Riot" %}
-        <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
+        <img src="https://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
     {% elif app_name == "Vector" %}
-        <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
+        <img src="https://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
     {% elif app_name == "Element" %}
         <img src="https://static.element.io/images/email-logo.png" width="83" height="83" alt="[Element]"/>
     {% else %}
-        <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
+        <img src="https://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
     {% endif %}
 </header>
 
diff --git a/synapse/res/templates/notice_expiry.html b/synapse/res/templates/notice_expiry.html
index 406397aaca..f62038e111 100644
--- a/synapse/res/templates/notice_expiry.html
+++ b/synapse/res/templates/notice_expiry.html
@@ -21,13 +21,13 @@
                     </td>
                     <td class="logo">
                         {% if app_name == "Riot" %}
-                            <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
+                            <img src="https://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
                         {% elif app_name == "Vector" %}
-                            <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
+                            <img src="https://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
                         {% elif app_name == "Element" %}
                             <img src="https://static.element.io/images/email-logo.png" width="83" height="83" alt="[Element]"/>
                         {% else %}
-                            <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
+                            <img src="https://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
                         {% endif %}
                     </td>
                 </tr>
diff --git a/synapse/res/templates/notif_mail.html b/synapse/res/templates/notif_mail.html
index 2add9dd859..7da0fff5e9 100644
--- a/synapse/res/templates/notif_mail.html
+++ b/synapse/res/templates/notif_mail.html
@@ -22,13 +22,13 @@
                     </td>
                     <td class="logo">
                         {%- if app_name == "Riot" %}
-                            <img src="http://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
+                            <img src="https://riot.im/img/external/riot-logo-email.png" width="83" height="83" alt="[Riot]"/>
                         {%- elif app_name == "Vector" %}
-                            <img src="http://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
+                            <img src="https://matrix.org/img/vector-logo-email.png" width="64" height="83" alt="[Vector]"/>
                         {%- elif app_name == "Element" %}
                             <img src="https://static.element.io/images/email-logo.png" width="83" height="83" alt="[Element]"/>
                         {%- else %}
-                            <img src="http://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
+                            <img src="https://matrix.org/img/matrix-120x51.png" width="120" height="51" alt="[matrix]"/>
                         {%- endif %}
                     </td>
                 </tr>
-- 
cgit 1.5.1


From c2de2ca63060324cf2f80ddf3289b0fd7a4d861b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 9 Dec 2022 09:37:07 +0000
Subject: Delete stale non-e2e devices for users, take 2 (#14595)

This should help reduce the number of devices e.g. simple bots the repeatedly login rack up.

We only delete non-e2e devices as they should be safe to delete, whereas if we delete e2e devices for a user we may accidentally break their ability to receive e2e keys for a message.
---
 changelog.d/14595.misc                    |  1 +
 synapse/handlers/device.py                | 31 +++++++++++-
 synapse/storage/databases/main/devices.py | 79 ++++++++++++++++++++++++++++++-
 tests/handlers/test_device.py             |  2 +-
 tests/storage/test_client_ips.py          |  4 +-
 5 files changed, 113 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14595.misc

(limited to 'synapse')

diff --git a/changelog.d/14595.misc b/changelog.d/14595.misc
new file mode 100644
index 0000000000..f9bfc581ad
--- /dev/null
+++ b/changelog.d/14595.misc
@@ -0,0 +1 @@
+Prune user's old devices on login if they have too many.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index d4750a32e6..7674c187ef 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -52,6 +52,7 @@ from synapse.util import stringutils
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.cancellation import cancellable
+from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import measure_func
 from synapse.util.retryutils import NotRetryingDestination
 
@@ -421,6 +422,9 @@ class DeviceHandler(DeviceWorkerHandler):
 
         self._check_device_name_length(initial_device_display_name)
 
+        # Prune the user's device list if they already have a lot of devices.
+        await self._prune_too_many_devices(user_id)
+
         if device_id is not None:
             new_device = await self.store.store_device(
                 user_id=user_id,
@@ -452,6 +456,31 @@ class DeviceHandler(DeviceWorkerHandler):
 
         raise errors.StoreError(500, "Couldn't generate a device ID.")
 
+    async def _prune_too_many_devices(self, user_id: str) -> None:
+        """Delete any excess old devices this user may have."""
+        device_ids = await self.store.check_too_many_devices_for_user(user_id)
+        if not device_ids:
+            return
+
+        # We don't want to block and try and delete tonnes of devices at once,
+        # so we cap the number of devices we delete synchronously.
+        first_batch, remaining_device_ids = device_ids[:10], device_ids[10:]
+        await self.delete_devices(user_id, first_batch)
+
+        if not remaining_device_ids:
+            return
+
+        # Now spawn a background loop that deletes the rest.
+        async def _prune_too_many_devices_loop() -> None:
+            for batch in batch_iter(remaining_device_ids, 10):
+                await self.delete_devices(user_id, batch)
+
+                await self.clock.sleep(1)
+
+        run_as_background_process(
+            "_prune_too_many_devices_loop", _prune_too_many_devices_loop
+        )
+
     async def _delete_stale_devices(self) -> None:
         """Background task that deletes devices which haven't been accessed for more than
         a configured time period.
@@ -481,7 +510,7 @@ class DeviceHandler(DeviceWorkerHandler):
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index a5bb4d404e..08ccd46a2b 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1569,6 +1569,72 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         return rows
 
+    async def check_too_many_devices_for_user(self, user_id: str) -> List[str]:
+        """Check if the user has a lot of devices, and if so return the set of
+        devices we can prune.
+
+        This does *not* return hidden devices or devices with E2E keys.
+        """
+
+        num_devices = await self.db_pool.simple_select_one_onecol(
+            table="devices",
+            keyvalues={"user_id": user_id, "hidden": False},
+            retcol="COALESCE(COUNT(*), 0)",
+            desc="count_devices",
+        )
+
+        # We let users have up to ten devices without pruning.
+        if num_devices <= 10:
+            return []
+
+        # We prune everything older than N days.
+        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
+
+        if num_devices > 50:
+            # If the user has more than 50 devices, then we chose a last seen
+            # that ensures we keep at most 50 devices.
+            sql = """
+                SELECT last_seen FROM devices
+                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
+                WHERE
+                    user_id = ?
+                    AND NOT hidden
+                    AND last_seen IS NOT NULL
+                    AND key_json IS NULL
+                ORDER BY last_seen DESC
+                LIMIT 1
+                OFFSET 50
+            """
+
+            rows = await self.db_pool.execute(
+                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
+            )
+            if rows:
+                max_last_seen = max(rows[0][0], max_last_seen)
+
+        # Now fetch the devices to delete.
+        sql = """
+            SELECT DISTINCT device_id FROM devices
+            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
+            WHERE
+                user_id = ?
+                AND NOT hidden
+                AND last_seen < ?
+                AND key_json IS NULL
+            ORDER BY last_seen
+        """
+
+        def check_too_many_devices_for_user_txn(
+            txn: LoggingTransaction,
+        ) -> List[str]:
+            txn.execute(sql, (user_id, max_last_seen))
+            return [device_id for device_id, in txn]
+
+        return await self.db_pool.runInteraction(
+            "check_too_many_devices_for_user",
+            check_too_many_devices_for_user_txn,
+        )
+
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1627,6 +1693,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
+                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1672,7 +1739,15 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             )
             raise StoreError(500, "Problem storing device.")
 
-    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+    @cached(max_entries=0)
+    async def delete_device(self, user_id: str, device_id: str) -> None:
+        raise NotImplementedError()
+
+    # Note: sometimes deleting rows out of `device_inbox` can take a long time,
+    # so we use a cache so that we deduplicate in flight requests to delete
+    # devices.
+    @cachedList(cached_method_name="delete_device", list_name="device_ids")
+    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
         """Deletes several devices.
 
         Args:
@@ -1709,6 +1784,8 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
+        return {}
+
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index ce7525e29c..a456bffd63 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": None,
+                "last_seen_ts": 1000000,
             },
             device_map["xyz"],
         )
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 49ad3c1324..a9af1babed 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -169,6 +169,8 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             )
         )
 
+        last_seen = self.clock.time_msec()
+
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -189,7 +191,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": None,
+                        "last_seen": last_seen,
                     },
                 ],
             )
-- 
cgit 1.5.1


From 94bc21e69f89ad873ad7a0deb6d9c4ff3cb480ef Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 9 Dec 2022 13:31:32 +0000
Subject: Limit the number of devices we delete at once (#14649)

---
 changelog.d/14649.misc                    |  1 +
 synapse/handlers/device.py                |  4 +++-
 synapse/storage/databases/main/devices.py | 11 ++++++++---
 tests/handlers/test_device.py             | 31 +++++++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14649.misc

(limited to 'synapse')

diff --git a/changelog.d/14649.misc b/changelog.d/14649.misc
new file mode 100644
index 0000000000..f9bfc581ad
--- /dev/null
+++ b/changelog.d/14649.misc
@@ -0,0 +1 @@
+Prune user's old devices on login if they have too many.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 7674c187ef..c935c7be90 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -458,10 +458,12 @@ class DeviceHandler(DeviceWorkerHandler):
 
     async def _prune_too_many_devices(self, user_id: str) -> None:
         """Delete any excess old devices this user may have."""
-        device_ids = await self.store.check_too_many_devices_for_user(user_id)
+        device_ids = await self.store.check_too_many_devices_for_user(user_id, 100)
         if not device_ids:
             return
 
+        logger.info("Pruning %d old devices for user %s", len(device_ids), user_id)
+
         # We don't want to block and try and delete tonnes of devices at once,
         # so we cap the number of devices we delete synchronously.
         first_batch, remaining_device_ids = device_ids[:10], device_ids[10:]
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 08ccd46a2b..95d4c0622d 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1569,11 +1569,15 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         return rows
 
-    async def check_too_many_devices_for_user(self, user_id: str) -> List[str]:
+    async def check_too_many_devices_for_user(
+        self, user_id: str, limit: int
+    ) -> List[str]:
         """Check if the user has a lot of devices, and if so return the set of
         devices we can prune.
 
         This does *not* return hidden devices or devices with E2E keys.
+
+        Returns at most `limit` number of devices, ordered by last seen.
         """
 
         num_devices = await self.db_pool.simple_select_one_onecol(
@@ -1614,7 +1618,7 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         # Now fetch the devices to delete.
         sql = """
-            SELECT DISTINCT device_id FROM devices
+            SELECT device_id FROM devices
             LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
             WHERE
                 user_id = ?
@@ -1622,12 +1626,13 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
                 AND last_seen < ?
                 AND key_json IS NULL
             ORDER BY last_seen
+            LIMIT ?
         """
 
         def check_too_many_devices_for_user_txn(
             txn: LoggingTransaction,
         ) -> List[str]:
-            txn.execute(sql, (user_id, max_last_seen))
+            txn.execute(sql, (user_id, max_last_seen, limit))
             return [device_id for device_id, in txn]
 
         return await self.db_pool.runInteraction(
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index a456bffd63..e51cac9b33 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -20,6 +20,8 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
+from synapse.rest import admin
+from synapse.rest.client import account, login
 from synapse.server import HomeServer
 from synapse.util import Clock
 
@@ -30,6 +32,12 @@ user2 = "@theresa:bbb"
 
 
 class DeviceTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        login.register_servlets,
+        admin.register_servlets,
+        account.register_servlets,
+    ]
+
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver("server", federation_http_client=None)
         handler = hs.get_device_handler()
@@ -229,6 +237,29 @@ class DeviceTestCase(unittest.HomeserverTestCase):
             NotFoundError,
         )
 
+    def test_login_delete_old_devices(self) -> None:
+        """Delete old devices if the user already has too many."""
+
+        user_id = self.register_user("user", "pass")
+
+        # Create a bunch of devices
+        for _ in range(50):
+            self.login("user", "pass")
+            self.reactor.advance(1)
+
+        # Advance the clock for ages (as we only delete old devices)
+        self.reactor.advance(60 * 60 * 24 * 300)
+
+        # Log in again to start the pruning
+        self.login("user", "pass")
+
+        # Give the background job time to do its thing
+        self.reactor.pump([1.0] * 100)
+
+        # We should now only have the most recent device.
+        devices = self.get_success(self.handler.get_devices_by_user(user_id))
+        self.assertEqual(len(devices), 1)
+
     def _record_users(self) -> None:
         # check this works for both devices which have a recorded client_ip,
         # and those which don't.
-- 
cgit 1.5.1


From 3ac412b4e2f8c5ba11dc962b8a9d871c1efdce9b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 9 Dec 2022 12:36:32 -0500
Subject: Require types in tests.storage. (#14646)

Adds missing type hints to `tests.storage` package
and does not allow untyped definitions.
---
 changelog.d/14646.misc                             |   1 +
 mypy.ini                                           |  14 +--
 synapse/storage/databases/main/end_to_end_keys.py  |   2 +-
 tests/storage/databases/main/test_deviceinbox.py   |  10 +-
 tests/storage/databases/main/test_events_worker.py |  27 ++---
 tests/storage/databases/main/test_lock.py          |  18 +--
 tests/storage/databases/main/test_receipts.py      |   8 +-
 tests/storage/databases/main/test_room.py          |  10 +-
 tests/storage/test__base.py                        |   2 +-
 tests/storage/test_account_data.py                 |  12 +-
 tests/storage/test_appservice.py                   |  22 ++--
 tests/storage/test_base.py                         |  30 ++---
 tests/storage/test_cleanup_extrems.py              |  37 +++---
 tests/storage/test_client_ips.py                   |  58 +++++-----
 tests/storage/test_database.py                     |   2 +-
 tests/storage/test_devices.py                      |  35 ++++--
 tests/storage/test_directory.py                    |  12 +-
 tests/storage/test_e2e_room_keys.py                |   8 +-
 tests/storage/test_end_to_end_keys.py              |  15 ++-
 tests/storage/test_event_chain.py                  |  29 +++--
 tests/storage/test_event_federation.py             |  71 ++++++------
 tests/storage/test_event_metrics.py                |   2 +-
 tests/storage/test_events.py                       |  39 ++++---
 tests/storage/test_keys.py                         |   9 +-
 tests/storage/test_monthly_active_users.py         |  30 ++---
 tests/storage/test_purge.py                        |  15 ++-
 tests/storage/test_receipts.py                     |  12 +-
 tests/storage/test_redaction.py                    | 125 ++++++++++++---------
 tests/storage/test_rollback_worker.py              |  15 ++-
 tests/storage/test_room.py                         |  24 ++--
 tests/storage/test_room_search.py                  |  10 +-
 tests/storage/test_state.py                        |  46 +++++---
 tests/storage/test_stream.py                       |  18 ++-
 tests/storage/test_transactions.py                 |  18 ++-
 tests/storage/test_txn_limit.py                    |  14 ++-
 .../util/test_partial_state_events_tracker.py      |  30 ++---
 36 files changed, 489 insertions(+), 341 deletions(-)
 create mode 100644 changelog.d/14646.misc

(limited to 'synapse')

diff --git a/changelog.d/14646.misc b/changelog.d/14646.misc
new file mode 100644
index 0000000000..d44571b731
--- /dev/null
+++ b/changelog.d/14646.misc
@@ -0,0 +1 @@
+Add missing type hints.
diff --git a/mypy.ini b/mypy.ini
index c3fbd1a955..a4a1e4511a 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -88,6 +88,9 @@ disallow_untyped_defs = False
 [mypy-tests.*]
 disallow_untyped_defs = False
 
+[mypy-tests.handlers.test_sso]
+disallow_untyped_defs = True
+
 [mypy-tests.handlers.test_user_directory]
 disallow_untyped_defs = True
 
@@ -103,16 +106,7 @@ disallow_untyped_defs = True
 [mypy-tests.state.test_profile]
 disallow_untyped_defs = True
 
-[mypy-tests.storage.test_id_generators]
-disallow_untyped_defs = True
-
-[mypy-tests.storage.test_profile]
-disallow_untyped_defs = True
-
-[mypy-tests.handlers.test_sso]
-disallow_untyped_defs = True
-
-[mypy-tests.storage.test_user_directory]
+[mypy-tests.storage.*]
 disallow_untyped_defs = True
 
 [mypy-tests.rest.*]
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 643c47d608..4c691642e2 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -140,7 +140,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     @cancellable
     async def get_e2e_device_keys_for_cs_api(
         self,
-        query_list: List[Tuple[str, Optional[str]]],
+        query_list: Collection[Tuple[str, Optional[str]]],
         include_displaynames: bool = True,
     ) -> Dict[str, Dict[str, JsonDict]]:
         """Fetch a list of device keys, formatted suitably for the C/S API.
diff --git a/tests/storage/databases/main/test_deviceinbox.py b/tests/storage/databases/main/test_deviceinbox.py
index 50c20c5b92..373707b275 100644
--- a/tests/storage/databases/main/test_deviceinbox.py
+++ b/tests/storage/databases/main/test_deviceinbox.py
@@ -12,8 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.rest import admin
 from synapse.rest.client import devices
+from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -25,11 +29,11 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase):
         devices.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         self.user_id = self.register_user("foo", "pass")
 
-    def test_background_remove_deleted_devices_from_device_inbox(self):
+    def test_background_remove_deleted_devices_from_device_inbox(self) -> None:
         """Test that the background task to delete old device_inboxes works properly."""
 
         # create a valid device
@@ -89,7 +93,7 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase):
         self.assertEqual(1, len(res))
         self.assertEqual(res[0], "cur_device")
 
-    def test_background_remove_hidden_devices_from_device_inbox(self):
+    def test_background_remove_hidden_devices_from_device_inbox(self) -> None:
         """Test that the background task to delete hidden devices
         from device_inboxes works properly."""
 
diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py
index 5773172ab8..9f33afcca0 100644
--- a/tests/storage/databases/main/test_events_worker.py
+++ b/tests/storage/databases/main/test_events_worker.py
@@ -45,7 +45,7 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.hs = hs
         self.store: EventsWorkerStore = hs.get_datastores().main
 
@@ -68,7 +68,7 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
 
             self.event_ids.append(event.event_id)
 
-    def test_simple(self):
+    def test_simple(self) -> None:
         with LoggingContext(name="test") as ctx:
             res = self.get_success(
                 self.store.have_seen_events(
@@ -90,7 +90,7 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
             self.assertEqual(res, {self.event_ids[0]})
             self.assertEqual(ctx.get_resource_usage().db_txn_count, 0)
 
-    def test_persisting_event_invalidates_cache(self):
+    def test_persisting_event_invalidates_cache(self) -> None:
         """
         Test to make sure that the `have_seen_event` cache
         is invalidated after we persist an event and returns
@@ -138,7 +138,7 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
             # That should result in a single db query to lookup
             self.assertEqual(ctx.get_resource_usage().db_txn_count, 1)
 
-    def test_invalidate_cache_by_room_id(self):
+    def test_invalidate_cache_by_room_id(self) -> None:
         """
         Test to make sure that all events associated with the given `(room_id,)`
         are invalidated in the `have_seen_event` cache.
@@ -175,7 +175,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store: EventsWorkerStore = hs.get_datastores().main
 
         self.user = self.register_user("user", "pass")
@@ -189,7 +189,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
         # Reset the event cache so the tests start with it empty
         self.get_success(self.store._get_event_cache.clear())
 
-    def test_simple(self):
+    def test_simple(self) -> None:
         """Test that we cache events that we pull from the DB."""
 
         with LoggingContext("test") as ctx:
@@ -198,7 +198,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
             # We should have fetched the event from the DB
             self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1)
 
-    def test_event_ref(self):
+    def test_event_ref(self) -> None:
         """Test that we reuse events that are still in memory but have fallen
         out of the cache, rather than requesting them from the DB.
         """
@@ -223,7 +223,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
             # from the DB
             self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 0)
 
-    def test_dedupe(self):
+    def test_dedupe(self) -> None:
         """Test that if we request the same event multiple times we only pull it
         out once.
         """
@@ -241,7 +241,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
 class DatabaseOutageTestCase(unittest.HomeserverTestCase):
     """Test event fetching during a database outage."""
 
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store: EventsWorkerStore = hs.get_datastores().main
 
         self.room_id = f"!room:{hs.hostname}"
@@ -377,7 +377,7 @@ class GetEventCancellationTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store: EventsWorkerStore = hs.get_datastores().main
 
         self.user = self.register_user("user", "pass")
@@ -412,7 +412,8 @@ class GetEventCancellationTestCase(unittest.HomeserverTestCase):
         unblock: "Deferred[None]" = Deferred()
         original_runWithConnection = self.store.db_pool.runWithConnection
 
-        async def runWithConnection(*args, **kwargs):
+        # Don't bother with the types here, we just pass into the original function.
+        async def runWithConnection(*args, **kwargs):  # type: ignore[no-untyped-def]
             await unblock
             return await original_runWithConnection(*args, **kwargs)
 
@@ -441,7 +442,7 @@ class GetEventCancellationTestCase(unittest.HomeserverTestCase):
         self.assertEqual(ctx1.get_resource_usage().evt_db_fetch_count, 1)
         self.assertEqual(ctx2.get_resource_usage().evt_db_fetch_count, 0)
 
-    def test_first_get_event_cancelled(self):
+    def test_first_get_event_cancelled(self) -> None:
         """Test cancellation of the first `get_event` call sharing a database fetch.
 
         The first `get_event` call is the one which initiates the fetch. We expect the
@@ -467,7 +468,7 @@ class GetEventCancellationTestCase(unittest.HomeserverTestCase):
             # The second `get_event` call should complete successfully.
             self.get_success(get_event2)
 
-    def test_second_get_event_cancelled(self):
+    def test_second_get_event_cancelled(self) -> None:
         """Test cancellation of the second `get_event` call sharing a database fetch."""
         with self.blocking_get_event_calls() as (unblock, get_event1, get_event2):
             # Cancel the second `get_event` call.
diff --git a/tests/storage/databases/main/test_lock.py b/tests/storage/databases/main/test_lock.py
index 3cc2a58d8d..56cb49d9b5 100644
--- a/tests/storage/databases/main/test_lock.py
+++ b/tests/storage/databases/main/test_lock.py
@@ -15,18 +15,20 @@
 from twisted.internet import defer, reactor
 from twisted.internet.base import ReactorBase
 from twisted.internet.defer import Deferred
+from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.server import HomeServer
 from synapse.storage.databases.main.lock import _LOCK_TIMEOUT_MS
+from synapse.util import Clock
 
 from tests import unittest
 
 
 class LockTestCase(unittest.HomeserverTestCase):
-    def prepare(self, reactor, clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
-    def test_acquire_contention(self):
+    def test_acquire_contention(self) -> None:
         # Track the number of tasks holding the lock.
         # Should be at most 1.
         in_lock = 0
@@ -34,7 +36,7 @@ class LockTestCase(unittest.HomeserverTestCase):
 
         release_lock: "Deferred[None]" = Deferred()
 
-        async def task():
+        async def task() -> None:
             nonlocal in_lock
             nonlocal max_in_lock
 
@@ -76,7 +78,7 @@ class LockTestCase(unittest.HomeserverTestCase):
         # At most one task should have held the lock at a time.
         self.assertEqual(max_in_lock, 1)
 
-    def test_simple_lock(self):
+    def test_simple_lock(self) -> None:
         """Test that we can take out a lock and that while we hold it nobody
         else can take it out.
         """
@@ -103,7 +105,7 @@ class LockTestCase(unittest.HomeserverTestCase):
         self.get_success(lock3.__aenter__())
         self.get_success(lock3.__aexit__(None, None, None))
 
-    def test_maintain_lock(self):
+    def test_maintain_lock(self) -> None:
         """Test that we don't time out locks while they're still active"""
 
         lock = self.get_success(self.store.try_acquire_lock("name", "key"))
@@ -119,7 +121,7 @@ class LockTestCase(unittest.HomeserverTestCase):
 
         self.get_success(lock.__aexit__(None, None, None))
 
-    def test_timeout_lock(self):
+    def test_timeout_lock(self) -> None:
         """Test that we time out locks if they're not updated for ages"""
 
         lock = self.get_success(self.store.try_acquire_lock("name", "key"))
@@ -139,7 +141,7 @@ class LockTestCase(unittest.HomeserverTestCase):
 
         self.assertFalse(self.get_success(lock.is_still_valid()))
 
-    def test_drop(self):
+    def test_drop(self) -> None:
         """Test that dropping the context manager means we stop renewing the lock"""
 
         lock = self.get_success(self.store.try_acquire_lock("name", "key"))
@@ -153,7 +155,7 @@ class LockTestCase(unittest.HomeserverTestCase):
         lock2 = self.get_success(self.store.try_acquire_lock("name", "key"))
         self.assertIsNotNone(lock2)
 
-    def test_shutdown(self):
+    def test_shutdown(self) -> None:
         """Test that shutting down Synapse releases the locks"""
         # Acquire two locks
         lock = self.get_success(self.store.try_acquire_lock("name", "key1"))
diff --git a/tests/storage/databases/main/test_receipts.py b/tests/storage/databases/main/test_receipts.py
index c4f12d81d7..68026e2830 100644
--- a/tests/storage/databases/main/test_receipts.py
+++ b/tests/storage/databases/main/test_receipts.py
@@ -33,7 +33,7 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         self.user_id = self.register_user("foo", "pass")
         self.token = self.login("foo", "pass")
@@ -47,7 +47,7 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase):
         table: str,
         receipts: Dict[Tuple[str, str, str], Sequence[Dict[str, Any]]],
         expected_unique_receipts: Dict[Tuple[str, str, str], Optional[Dict[str, Any]]],
-    ):
+    ) -> None:
         """Test that the background update to uniqueify non-thread receipts in
         the given receipts table works properly.
 
@@ -154,7 +154,7 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase):
                     f"Background update did not remove all duplicate receipts from {table}",
                 )
 
-    def test_background_receipts_linearized_unique_index(self):
+    def test_background_receipts_linearized_unique_index(self) -> None:
         """Test that the background update to uniqueify non-thread receipts in
         `receipts_linearized` works properly.
         """
@@ -177,7 +177,7 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase):
             },
         )
 
-    def test_background_receipts_graph_unique_index(self):
+    def test_background_receipts_graph_unique_index(self) -> None:
         """Test that the background update to uniqueify non-thread receipts in
         `receipts_graph` works properly.
         """
diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py
index 1edb619630..7d961fac64 100644
--- a/tests/storage/databases/main/test_room.py
+++ b/tests/storage/databases/main/test_room.py
@@ -14,10 +14,14 @@
 
 import json
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import RoomTypes
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
 from synapse.storage.databases.main.room import _BackgroundUpdates
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -30,7 +34,7 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         self.user_id = self.register_user("foo", "pass")
         self.token = self.login("foo", "pass")
@@ -40,7 +44,7 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
 
         return room_id
 
-    def test_background_populate_rooms_creator_column(self):
+    def test_background_populate_rooms_creator_column(self) -> None:
         """Test that the background update to populate the rooms creator column
         works properly.
         """
@@ -95,7 +99,7 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
         )
         self.assertEqual(room_creator_after, self.user_id)
 
-    def test_background_add_room_type_column(self):
+    def test_background_add_room_type_column(self) -> None:
         """Test that the background update to populate the `room_type` column in
         `room_stats_state` works properly.
         """
diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py
index 09cb06d614..8bbf936ae9 100644
--- a/tests/storage/test__base.py
+++ b/tests/storage/test__base.py
@@ -106,7 +106,7 @@ class UpdateUpsertManyTests(unittest.HomeserverTestCase):
             {(1, "user1", "hello"), (2, "user2", "bleb")},
         )
 
-    def test_simple_update_many(self):
+    def test_simple_update_many(self) -> None:
         """
         simple_update_many performs many updates at once.
         """
diff --git a/tests/storage/test_account_data.py b/tests/storage/test_account_data.py
index 72bf5b3d31..1bfd11ceae 100644
--- a/tests/storage/test_account_data.py
+++ b/tests/storage/test_account_data.py
@@ -14,13 +14,17 @@
 
 from typing import Iterable, Optional, Set
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import AccountDataTypes
+from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests import unittest
 
 
 class IgnoredUsersTestCase(unittest.HomeserverTestCase):
-    def prepare(self, hs, reactor, clock):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = self.hs.get_datastores().main
         self.user = "@user:test"
 
@@ -55,7 +59,7 @@ class IgnoredUsersTestCase(unittest.HomeserverTestCase):
             expected_ignored_user_ids,
         )
 
-    def test_ignoring_users(self):
+    def test_ignoring_users(self) -> None:
         """Basic adding/removing of users from the ignore list."""
         self._update_ignore_list("@other:test", "@another:remote")
         self.assert_ignored(self.user, {"@other:test", "@another:remote"})
@@ -82,7 +86,7 @@ class IgnoredUsersTestCase(unittest.HomeserverTestCase):
         # Check the removed user.
         self.assert_ignorers("@another:remote", {self.user})
 
-    def test_caching(self):
+    def test_caching(self) -> None:
         """Ensure that caching works properly between different users."""
         # The first user ignores a user.
         self._update_ignore_list("@other:test")
@@ -99,7 +103,7 @@ class IgnoredUsersTestCase(unittest.HomeserverTestCase):
         self.assert_ignored(self.user, set())
         self.assert_ignorers("@other:test", {"@second:test"})
 
-    def test_invalid_data(self):
+    def test_invalid_data(self) -> None:
         """Invalid data ends up clearing out the ignored users list."""
         # Add some data and ensure it is there.
         self._update_ignore_list("@other:test")
diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py
index 1047ed09c8..5e1324a169 100644
--- a/tests/storage/test_appservice.py
+++ b/tests/storage/test_appservice.py
@@ -26,7 +26,7 @@ from synapse.appservice import ApplicationService, ApplicationServiceState
 from synapse.config._base import ConfigError
 from synapse.events import EventBase
 from synapse.server import HomeServer
-from synapse.storage.database import DatabasePool, make_conn
+from synapse.storage.database import DatabasePool, LoggingDatabaseConnection, make_conn
 from synapse.storage.databases.main.appservice import (
     ApplicationServiceStore,
     ApplicationServiceTransactionStore,
@@ -39,7 +39,7 @@ from tests.test_utils import make_awaitable
 
 
 class ApplicationServiceStoreTestCase(unittest.HomeserverTestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         super(ApplicationServiceStoreTestCase, self).setUp()
 
         self.as_yaml_files: List[str] = []
@@ -73,7 +73,9 @@ class ApplicationServiceStoreTestCase(unittest.HomeserverTestCase):
 
         super(ApplicationServiceStoreTestCase, self).tearDown()
 
-    def _add_appservice(self, as_token, id, url, hs_token, sender) -> None:
+    def _add_appservice(
+        self, as_token: str, id: str, url: str, hs_token: str, sender: str
+    ) -> None:
         as_yaml = {
             "url": url,
             "as_token": as_token,
@@ -135,7 +137,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.HomeserverTestCase):
             database, make_conn(db_config, self.engine, "test"), self.hs
         )
 
-    def _add_service(self, url, as_token, id) -> None:
+    def _add_service(self, url: str, as_token: str, id: str) -> None:
         as_yaml = {
             "url": url,
             "as_token": as_token,
@@ -149,7 +151,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.HomeserverTestCase):
             outfile.write(yaml.dump(as_yaml))
             self.as_yaml_files.append(as_token)
 
-    def _set_state(self, id: str, state: ApplicationServiceState):
+    def _set_state(self, id: str, state: ApplicationServiceState) -> defer.Deferred:
         return self.db_pool.runOperation(
             self.engine.convert_param_style(
                 "INSERT INTO application_services_state(as_id, state) VALUES(?,?)"
@@ -157,7 +159,9 @@ class ApplicationServiceTransactionStoreTestCase(unittest.HomeserverTestCase):
             (id, state.value),
         )
 
-    def _insert_txn(self, as_id, txn_id, events):
+    def _insert_txn(
+        self, as_id: str, txn_id: int, events: List[Mock]
+    ) -> "defer.Deferred[None]":
         return self.db_pool.runOperation(
             self.engine.convert_param_style(
                 "INSERT INTO application_services_txns(as_id, txn_id, event_ids) "
@@ -448,12 +452,14 @@ class ApplicationServiceStoreTypeStreamIds(unittest.HomeserverTestCase):
 
 # required for ApplicationServiceTransactionStoreTestCase tests
 class TestTransactionStore(ApplicationServiceTransactionStore, ApplicationServiceStore):
-    def __init__(self, database: DatabasePool, db_conn, hs) -> None:
+    def __init__(
+        self, database: DatabasePool, db_conn: LoggingDatabaseConnection, hs: HomeServer
+    ) -> None:
         super().__init__(database, db_conn, hs)
 
 
 class ApplicationServiceStoreConfigTestCase(unittest.HomeserverTestCase):
-    def _write_config(self, suffix, **kwargs) -> str:
+    def _write_config(self, suffix: str, **kwargs: str) -> str:
         vals = {
             "id": "id" + suffix,
             "url": "url" + suffix,
diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py
index 40e58f8199..256d28e4c9 100644
--- a/tests/storage/test_base.py
+++ b/tests/storage/test_base.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from collections import OrderedDict
+from typing import Generator
 from unittest.mock import Mock
 
 from twisted.internet import defer
@@ -30,7 +30,7 @@ from tests.utils import default_config
 class SQLBaseStoreTestCase(unittest.TestCase):
     """Test the "simple" SQL generating methods in SQLBaseStore."""
 
-    def setUp(self):
+    def setUp(self) -> None:
         self.db_pool = Mock(spec=["runInteraction"])
         self.mock_txn = Mock()
         self.mock_conn = Mock(spec_set=["cursor", "rollback", "commit"])
@@ -38,12 +38,12 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         self.mock_conn.rollback.return_value = None
         # Our fake runInteraction just runs synchronously inline
 
-        def runInteraction(func, *args, **kwargs):
+        def runInteraction(func, *args, **kwargs) -> defer.Deferred:  # type: ignore[no-untyped-def]
             return defer.succeed(func(self.mock_txn, *args, **kwargs))
 
         self.db_pool.runInteraction = runInteraction
 
-        def runWithConnection(func, *args, **kwargs):
+        def runWithConnection(func, *args, **kwargs):  # type: ignore[no-untyped-def]
             return defer.succeed(func(self.mock_conn, *args, **kwargs))
 
         self.db_pool.runWithConnection = runWithConnection
@@ -62,7 +62,7 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         self.datastore = SQLBaseStore(db, None, hs)  # type: ignore[arg-type]
 
     @defer.inlineCallbacks
-    def test_insert_1col(self):
+    def test_insert_1col(self) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 1
 
         yield defer.ensureDeferred(
@@ -76,7 +76,7 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_insert_3cols(self):
+    def test_insert_3cols(self) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 1
 
         yield defer.ensureDeferred(
@@ -92,7 +92,7 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_select_one_1col(self):
+    def test_select_one_1col(self) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 1
         self.mock_txn.__iter__ = Mock(return_value=iter([("Value",)]))
 
@@ -108,7 +108,7 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_select_one_3col(self):
+    def test_select_one_3col(self) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 1
         self.mock_txn.fetchone.return_value = (1, 2, 3)
 
@@ -126,7 +126,9 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_select_one_missing(self):
+    def test_select_one_missing(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 0
         self.mock_txn.fetchone.return_value = None
 
@@ -142,7 +144,7 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         self.assertFalse(ret)
 
     @defer.inlineCallbacks
-    def test_select_list(self):
+    def test_select_list(self) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 3
         self.mock_txn.__iter__ = Mock(return_value=iter([(1,), (2,), (3,)]))
         self.mock_txn.description = (("colA", None, None, None, None, None, None),)
@@ -159,7 +161,7 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_update_one_1col(self):
+    def test_update_one_1col(self) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 1
 
         yield defer.ensureDeferred(
@@ -176,7 +178,9 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_update_one_4cols(self):
+    def test_update_one_4cols(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 1
 
         yield defer.ensureDeferred(
@@ -193,7 +197,7 @@ class SQLBaseStoreTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_delete_one(self):
+    def test_delete_one(self) -> Generator["defer.Deferred[object]", object, None]:
         self.mock_txn.rowcount = 1
 
         yield defer.ensureDeferred(
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
index b998ad42d9..d570684c99 100644
--- a/tests/storage/test_cleanup_extrems.py
+++ b/tests/storage/test_cleanup_extrems.py
@@ -15,11 +15,16 @@
 import os.path
 from unittest.mock import Mock, patch
 
+from twisted.test.proto_helpers import MemoryReactor
+
 import synapse.rest.admin
 from synapse.api.constants import EventTypes
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
 from synapse.storage import prepare_database
+from synapse.storage.types import Cursor
 from synapse.types import UserID, create_requester
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -29,7 +34,9 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
     Test the background update to clean forward extremities table.
     """
 
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.store = homeserver.get_datastores().main
         self.room_creator = homeserver.get_room_creation_handler()
 
@@ -39,7 +46,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         info, _ = self.get_success(self.room_creator.create_room(self.requester, {}))
         self.room_id = info["room_id"]
 
-    def run_background_update(self):
+    def run_background_update(self) -> None:
         """Re run the background update to clean up the extremities."""
         # Make sure we don't clash with in progress updates.
         self.assertTrue(
@@ -54,7 +61,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
             "delete_forward_extremities.sql",
         )
 
-        def run_delta_file(txn):
+        def run_delta_file(txn: Cursor) -> None:
             prepare_database.executescript(txn, schema_path)
 
         self.get_success(
@@ -84,7 +91,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
             (room_id,)
         )
 
-    def test_soft_failed_extremities_handled_correctly(self):
+    def test_soft_failed_extremities_handled_correctly(self) -> None:
         """Test that extremities are correctly calculated in the presence of
         soft failed events.
 
@@ -114,7 +121,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
 
         self.assertEqual(latest_event_ids, [event_id_4])
 
-    def test_basic_cleanup(self):
+    def test_basic_cleanup(self) -> None:
         """Test that extremities are correctly calculated in the presence of
         soft failed events.
 
@@ -149,7 +156,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         )
         self.assertEqual(latest_event_ids, [event_id_b])
 
-    def test_chain_of_fail_cleanup(self):
+    def test_chain_of_fail_cleanup(self) -> None:
         """Test that extremities are correctly calculated in the presence of
         soft failed events.
 
@@ -187,7 +194,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         )
         self.assertEqual(latest_event_ids, [event_id_b])
 
-    def test_forked_graph_cleanup(self):
+    def test_forked_graph_cleanup(self) -> None:
         r"""Test that extremities are correctly calculated in the presence of
         soft failed events.
 
@@ -252,12 +259,14 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
         room.register_servlets,
     ]
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         config = self.default_config()
         config["cleanup_extremities_with_dummy_events"] = True
         return self.setup_test_homeserver(config=config)
 
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.store = homeserver.get_datastores().main
         self.room_creator = homeserver.get_room_creation_handler()
         self.event_creator_handler = homeserver.get_event_creation_handler()
@@ -273,7 +282,7 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
         self.event_creator = homeserver.get_event_creation_handler()
         homeserver.config.consent.user_consent_version = self.CONSENT_VERSION
 
-    def test_send_dummy_event(self):
+    def test_send_dummy_event(self) -> None:
         self._create_extremity_rich_graph()
 
         # Pump the reactor repeatedly so that the background updates have a
@@ -286,7 +295,7 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
         self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids))
 
     @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=0)
-    def test_send_dummy_events_when_insufficient_power(self):
+    def test_send_dummy_events_when_insufficient_power(self) -> None:
         self._create_extremity_rich_graph()
         # Criple power levels
         self.helper.send_state(
@@ -317,7 +326,7 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
         self.assertTrue(len(latest_event_ids) < 10, len(latest_event_ids))
 
     @patch("synapse.handlers.message._DUMMY_EVENT_ROOM_EXCLUSION_EXPIRY", new=250)
-    def test_expiry_logic(self):
+    def test_expiry_logic(self) -> None:
         """Simple test to ensure that _expire_rooms_to_exclude_from_dummy_event_insertion()
         expires old entries correctly.
         """
@@ -357,7 +366,7 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
             0,
         )
 
-    def _create_extremity_rich_graph(self):
+    def _create_extremity_rich_graph(self) -> None:
         """Helper method to create bushy graph on demand"""
 
         event_id_start = self.create_and_send_event(self.room_id, self.user)
@@ -372,7 +381,7 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
         )
         self.assertEqual(len(latest_event_ids), 50)
 
-    def _enable_consent_checking(self):
+    def _enable_consent_checking(self) -> None:
         """Helper method to enable consent checking"""
         self.event_creator._block_events_without_consent_error = "No consent from user"
         consent_uri_builder = Mock()
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index a9af1babed..81e4e596e4 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -13,15 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Any, Dict
 from unittest.mock import Mock
 
 from parameterized import parameterized
 
+from twisted.test.proto_helpers import MemoryReactor
+
 import synapse.rest.admin
 from synapse.http.site import XForwardedForRequest
 from synapse.rest.client import login
+from synapse.server import HomeServer
 from synapse.storage.databases.main.client_ips import LAST_SEEN_GRANULARITY
 from synapse.types import UserID
+from synapse.util import Clock
 
 from tests import unittest
 from tests.server import make_request
@@ -30,14 +35,10 @@ from tests.unittest import override_config
 
 
 class ClientIpStoreTestCase(unittest.HomeserverTestCase):
-    def make_homeserver(self, reactor, clock):
-        hs = self.setup_test_homeserver()
-        return hs
-
-    def prepare(self, hs, reactor, clock):
-        self.store = self.hs.get_datastores().main
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
 
-    def test_insert_new_client_ip(self):
+    def test_insert_new_client_ip(self) -> None:
         self.reactor.advance(12345678)
 
         user_id = "@user:id"
@@ -76,7 +77,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             r,
         )
 
-    def test_insert_new_client_ip_none_device_id(self):
+    def test_insert_new_client_ip_none_device_id(self) -> None:
         """
         An insert with a device ID of NULL will not create a new entry, but
         update an existing entry in the user_ips table.
@@ -148,7 +149,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         )
 
     @parameterized.expand([(False,), (True,)])
-    def test_get_last_client_ip_by_device(self, after_persisting: bool):
+    def test_get_last_client_ip_by_device(self, after_persisting: bool) -> None:
         """Test `get_last_client_ip_by_device` for persisted and unpersisted data"""
         self.reactor.advance(12345678)
 
@@ -213,7 +214,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             },
         )
 
-    def test_get_last_client_ip_by_device_combined_data(self):
+    def test_get_last_client_ip_by_device_combined_data(self) -> None:
         """Test that `get_last_client_ip_by_device` combines persisted and unpersisted
         data together correctly
         """
@@ -312,7 +313,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         )
 
     @parameterized.expand([(False,), (True,)])
-    def test_get_user_ip_and_agents(self, after_persisting: bool):
+    def test_get_user_ip_and_agents(self, after_persisting: bool) -> None:
         """Test `get_user_ip_and_agents` for persisted and unpersisted data"""
         self.reactor.advance(12345678)
 
@@ -352,7 +353,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             ],
         )
 
-    def test_get_user_ip_and_agents_combined_data(self):
+    def test_get_user_ip_and_agents_combined_data(self) -> None:
         """Test that `get_user_ip_and_agents` combines persisted and unpersisted data
         together correctly
         """
@@ -429,7 +430,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         )
 
     @override_config({"limit_usage_by_mau": False, "max_mau_value": 50})
-    def test_disabled_monthly_active_user(self):
+    def test_disabled_monthly_active_user(self) -> None:
         user_id = "@user:server"
         self.get_success(
             self.store.insert_client_ip(
@@ -440,7 +441,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         self.assertFalse(active)
 
     @override_config({"limit_usage_by_mau": True, "max_mau_value": 50})
-    def test_adding_monthly_active_user_when_full(self):
+    def test_adding_monthly_active_user_when_full(self) -> None:
         lots_of_users = 100
         user_id = "@user:server"
 
@@ -456,7 +457,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         self.assertFalse(active)
 
     @override_config({"limit_usage_by_mau": True, "max_mau_value": 50})
-    def test_adding_monthly_active_user_when_space(self):
+    def test_adding_monthly_active_user_when_space(self) -> None:
         user_id = "@user:server"
         active = self.get_success(self.store.user_last_seen_monthly_active(user_id))
         self.assertFalse(active)
@@ -473,7 +474,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         self.assertTrue(active)
 
     @override_config({"limit_usage_by_mau": True, "max_mau_value": 50})
-    def test_updating_monthly_active_user_when_space(self):
+    def test_updating_monthly_active_user_when_space(self) -> None:
         user_id = "@user:server"
         self.get_success(self.store.register_user(user_id=user_id, password_hash=None))
 
@@ -491,7 +492,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         active = self.get_success(self.store.user_last_seen_monthly_active(user_id))
         self.assertTrue(active)
 
-    def test_devices_last_seen_bg_update(self):
+    def test_devices_last_seen_bg_update(self) -> None:
         # First make sure we have completed all updates.
         self.wait_for_background_updates()
 
@@ -576,7 +577,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             r,
         )
 
-    def test_old_user_ips_pruned(self):
+    def test_old_user_ips_pruned(self) -> None:
         # First make sure we have completed all updates.
         self.wait_for_background_updates()
 
@@ -639,11 +640,11 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         self.assertEqual(result, [])
 
         # But we should still get the correct values for the device
-        result = self.get_success(
+        result2 = self.get_success(
             self.store.get_last_client_ip_by_device(user_id, device_id)
         )
 
-        r = result[(user_id, device_id)]
+        r = result2[(user_id, device_id)]
         self.assertDictContainsSubset(
             {
                 "user_id": user_id,
@@ -663,15 +664,11 @@ class ClientIpAuthTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def make_homeserver(self, reactor, clock):
-        hs = self.setup_test_homeserver()
-        return hs
-
-    def prepare(self, hs, reactor, clock):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = self.hs.get_datastores().main
         self.user_id = self.register_user("bob", "abc123", True)
 
-    def test_request_with_xforwarded(self):
+    def test_request_with_xforwarded(self) -> None:
         """
         The IP in X-Forwarded-For is entered into the client IPs table.
         """
@@ -681,14 +678,19 @@ class ClientIpAuthTestCase(unittest.HomeserverTestCase):
             {"request": XForwardedForRequest},
         )
 
-    def test_request_from_getPeer(self):
+    def test_request_from_getPeer(self) -> None:
         """
         The IP returned by getPeer is entered into the client IPs table, if
         there's no X-Forwarded-For header.
         """
         self._runtest({}, "127.0.0.1", {})
 
-    def _runtest(self, headers, expected_ip, make_request_args):
+    def _runtest(
+        self,
+        headers: Dict[bytes, bytes],
+        expected_ip: str,
+        make_request_args: Dict[str, Any],
+    ) -> None:
         device_id = "bleb"
 
         access_token = self.login("bob", "abc123", device_id=device_id)
diff --git a/tests/storage/test_database.py b/tests/storage/test_database.py
index a40fc20ef9..543cce6b3e 100644
--- a/tests/storage/test_database.py
+++ b/tests/storage/test_database.py
@@ -31,7 +31,7 @@ from tests import unittest
 
 
 class TupleComparisonClauseTestCase(unittest.TestCase):
-    def test_native_tuple_comparison(self):
+    def test_native_tuple_comparison(self) -> None:
         clause, args = make_tuple_comparison_clause([("a", 1), ("b", 2)])
         self.assertEqual(clause, "(a,b) > (?,?)")
         self.assertEqual(args, [1, 2])
diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py
index 8e7db2c4ec..f03807c8f9 100644
--- a/tests/storage/test_devices.py
+++ b/tests/storage/test_devices.py
@@ -12,17 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Collection, List, Tuple
+
+from twisted.test.proto_helpers import MemoryReactor
+
 import synapse.api.errors
 from synapse.api.constants import EduTypes
+from synapse.server import HomeServer
+from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
 
 class DeviceStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
-    def add_device_change(self, user_id, device_ids, host):
+    def add_device_change(self, user_id: str, device_ids: List[str], host: str) -> None:
         """Add a device list change for the given device to
         `device_lists_outbound_pokes` table.
         """
@@ -44,12 +51,13 @@ class DeviceStoreTestCase(HomeserverTestCase):
                 )
             )
 
-    def test_store_new_device(self):
+    def test_store_new_device(self) -> None:
         self.get_success(
             self.store.store_device("user_id", "device_id", "display_name")
         )
 
         res = self.get_success(self.store.get_device("user_id", "device_id"))
+        assert res is not None
         self.assertDictContainsSubset(
             {
                 "user_id": "user_id",
@@ -59,7 +67,7 @@ class DeviceStoreTestCase(HomeserverTestCase):
             res,
         )
 
-    def test_get_devices_by_user(self):
+    def test_get_devices_by_user(self) -> None:
         self.get_success(
             self.store.store_device("user_id", "device1", "display_name 1")
         )
@@ -89,7 +97,7 @@ class DeviceStoreTestCase(HomeserverTestCase):
             res["device2"],
         )
 
-    def test_count_devices_by_users(self):
+    def test_count_devices_by_users(self) -> None:
         self.get_success(
             self.store.store_device("user_id", "device1", "display_name 1")
         )
@@ -114,7 +122,7 @@ class DeviceStoreTestCase(HomeserverTestCase):
         )
         self.assertEqual(3, res)
 
-    def test_get_device_updates_by_remote(self):
+    def test_get_device_updates_by_remote(self) -> None:
         device_ids = ["device_id1", "device_id2"]
 
         # Add two device updates with sequential `stream_id`s
@@ -128,7 +136,7 @@ class DeviceStoreTestCase(HomeserverTestCase):
         # Check original device_ids are contained within these updates
         self._check_devices_in_updates(device_ids, device_updates)
 
-    def test_get_device_updates_by_remote_can_limit_properly(self):
+    def test_get_device_updates_by_remote_can_limit_properly(self) -> None:
         """
         Tests that `get_device_updates_by_remote` returns an appropriate
         stream_id to resume fetching from (without skipping any results).
@@ -280,7 +288,11 @@ class DeviceStoreTestCase(HomeserverTestCase):
         )
         self.assertEqual(device_updates, [])
 
-    def _check_devices_in_updates(self, expected_device_ids, device_updates):
+    def _check_devices_in_updates(
+        self,
+        expected_device_ids: Collection[str],
+        device_updates: List[Tuple[str, JsonDict]],
+    ) -> None:
         """Check that an specific device ids exist in a list of device update EDUs"""
         self.assertEqual(len(device_updates), len(expected_device_ids))
 
@@ -289,17 +301,19 @@ class DeviceStoreTestCase(HomeserverTestCase):
         }
         self.assertEqual(received_device_ids, set(expected_device_ids))
 
-    def test_update_device(self):
+    def test_update_device(self) -> None:
         self.get_success(
             self.store.store_device("user_id", "device_id", "display_name 1")
         )
 
         res = self.get_success(self.store.get_device("user_id", "device_id"))
+        assert res is not None
         self.assertEqual("display_name 1", res["display_name"])
 
         # do a no-op first
         self.get_success(self.store.update_device("user_id", "device_id"))
         res = self.get_success(self.store.get_device("user_id", "device_id"))
+        assert res is not None
         self.assertEqual("display_name 1", res["display_name"])
 
         # do the update
@@ -311,9 +325,10 @@ class DeviceStoreTestCase(HomeserverTestCase):
 
         # check it worked
         res = self.get_success(self.store.get_device("user_id", "device_id"))
+        assert res is not None
         self.assertEqual("display_name 2", res["display_name"])
 
-    def test_update_unknown_device(self):
+    def test_update_unknown_device(self) -> None:
         exc = self.get_failure(
             self.store.update_device(
                 "user_id", "unknown_device_id", new_display_name="display_name 2"
diff --git a/tests/storage/test_directory.py b/tests/storage/test_directory.py
index 20bf3ca17b..8bedc6bdf3 100644
--- a/tests/storage/test_directory.py
+++ b/tests/storage/test_directory.py
@@ -12,19 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
 from synapse.types import RoomAlias, RoomID
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
 
 class DirectoryStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
         self.room = RoomID.from_string("!abcde:test")
         self.alias = RoomAlias.from_string("#my-room:test")
 
-    def test_room_to_alias(self):
+    def test_room_to_alias(self) -> None:
         self.get_success(
             self.store.create_room_alias_association(
                 room_alias=self.alias, room_id=self.room.to_string(), servers=["test"]
@@ -36,7 +40,7 @@ class DirectoryStoreTestCase(HomeserverTestCase):
             (self.get_success(self.store.get_aliases_for_room(self.room.to_string()))),
         )
 
-    def test_alias_to_room(self):
+    def test_alias_to_room(self) -> None:
         self.get_success(
             self.store.create_room_alias_association(
                 room_alias=self.alias, room_id=self.room.to_string(), servers=["test"]
@@ -48,7 +52,7 @@ class DirectoryStoreTestCase(HomeserverTestCase):
             (self.get_success(self.store.get_association_from_room_alias(self.alias))),
         )
 
-    def test_delete_alias(self):
+    def test_delete_alias(self) -> None:
         self.get_success(
             self.store.create_room_alias_association(
                 room_alias=self.alias, room_id=self.room.to_string(), servers=["test"]
diff --git a/tests/storage/test_e2e_room_keys.py b/tests/storage/test_e2e_room_keys.py
index fb96ab3a2f..9cb326d90a 100644
--- a/tests/storage/test_e2e_room_keys.py
+++ b/tests/storage/test_e2e_room_keys.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
 from synapse.storage.databases.main.e2e_room_keys import RoomKey
+from synapse.util import Clock
 
 from tests import unittest
 
@@ -26,12 +30,12 @@ room_key: RoomKey = {
 
 
 class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver("server", federation_http_client=None)
         self.store = hs.get_datastores().main
         return hs
 
-    def test_room_keys_version_delete(self):
+    def test_room_keys_version_delete(self) -> None:
         # test that deleting a room key backup deletes the keys
         version1 = self.get_success(
             self.store.create_e2e_room_keys_version(
diff --git a/tests/storage/test_end_to_end_keys.py b/tests/storage/test_end_to_end_keys.py
index 0f04493ad0..5fde3b9c78 100644
--- a/tests/storage/test_end_to_end_keys.py
+++ b/tests/storage/test_end_to_end_keys.py
@@ -12,14 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.util import Clock
+
 from tests.unittest import HomeserverTestCase
 
 
 class EndToEndKeyStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
-    def test_key_without_device_name(self):
+    def test_key_without_device_name(self) -> None:
         now = 1470174257070
         json = {"key": "value"}
 
@@ -35,7 +40,7 @@ class EndToEndKeyStoreTestCase(HomeserverTestCase):
         dev = res["user"]["device"]
         self.assertDictContainsSubset(json, dev)
 
-    def test_reupload_key(self):
+    def test_reupload_key(self) -> None:
         now = 1470174257070
         json = {"key": "value"}
 
@@ -53,7 +58,7 @@ class EndToEndKeyStoreTestCase(HomeserverTestCase):
         )
         self.assertFalse(changed)
 
-    def test_get_key_with_device_name(self):
+    def test_get_key_with_device_name(self) -> None:
         now = 1470174257070
         json = {"key": "value"}
 
@@ -70,7 +75,7 @@ class EndToEndKeyStoreTestCase(HomeserverTestCase):
             {"key": "value", "unsigned": {"device_display_name": "display_name"}}, dev
         )
 
-    def test_multiple_devices(self):
+    def test_multiple_devices(self) -> None:
         now = 1470174257070
 
         self.get_success(self.store.store_device("user1", "device1", None))
diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py
index de9f4af2de..c070278db8 100644
--- a/tests/storage/test_event_chain.py
+++ b/tests/storage/test_event_chain.py
@@ -14,6 +14,7 @@
 
 from typing import Dict, List, Set, Tuple
 
+from twisted.test.proto_helpers import MemoryReactor
 from twisted.trial import unittest
 
 from synapse.api.constants import EventTypes
@@ -22,18 +23,22 @@ from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.databases.main.events import _LinkMap
+from synapse.storage.types import Cursor
 from synapse.types import create_requester
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
 
 class EventChainStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         self._next_stream_ordering = 1
 
-    def test_simple(self):
+    def test_simple(self) -> None:
         """Test that the example in `docs/auth_chain_difference_algorithm.md`
         works.
         """
@@ -232,7 +237,7 @@ class EventChainStoreTestCase(HomeserverTestCase):
                 ),
             )
 
-    def test_out_of_order_events(self):
+    def test_out_of_order_events(self) -> None:
         """Test that we handle persisting events that we don't have the full
         auth chain for yet (which should only happen for out of band memberships).
         """
@@ -378,7 +383,7 @@ class EventChainStoreTestCase(HomeserverTestCase):
     def persist(
         self,
         events: List[EventBase],
-    ):
+    ) -> None:
         """Persist the given events and check that the links generated match
         those given.
         """
@@ -389,7 +394,7 @@ class EventChainStoreTestCase(HomeserverTestCase):
             e.internal_metadata.stream_ordering = self._next_stream_ordering
             self._next_stream_ordering += 1
 
-        def _persist(txn):
+        def _persist(txn: LoggingTransaction) -> None:
             # We need to persist the events to the events and state_events
             # tables.
             persist_events_store._store_event_txn(
@@ -456,7 +461,7 @@ class EventChainStoreTestCase(HomeserverTestCase):
 
 
 class LinkMapTestCase(unittest.TestCase):
-    def test_simple(self):
+    def test_simple(self) -> None:
         """Basic tests for the LinkMap."""
         link_map = _LinkMap()
 
@@ -492,7 +497,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         self.user_id = self.register_user("foo", "pass")
         self.token = self.login("foo", "pass")
@@ -559,7 +564,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
 
         # Delete the chain cover info.
 
-        def _delete_tables(txn):
+        def _delete_tables(txn: Cursor) -> None:
             txn.execute("DELETE FROM event_auth_chains")
             txn.execute("DELETE FROM event_auth_chain_links")
 
@@ -567,7 +572,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
 
         return room_id, [state1, state2]
 
-    def test_background_update_single_room(self):
+    def test_background_update_single_room(self) -> None:
         """Test that the background update to calculate auth chains for historic
         rooms works correctly.
         """
@@ -602,7 +607,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
             )
         )
 
-    def test_background_update_multiple_rooms(self):
+    def test_background_update_multiple_rooms(self) -> None:
         """Test that the background update to calculate auth chains for historic
         rooms works correctly.
         """
@@ -640,7 +645,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
             )
         )
 
-    def test_background_update_single_large_room(self):
+    def test_background_update_single_large_room(self) -> None:
         """Test that the background update to calculate auth chains for historic
         rooms works correctly.
         """
@@ -693,7 +698,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
             )
         )
 
-    def test_background_update_multiple_large_room(self):
+    def test_background_update_multiple_large_room(self) -> None:
         """Test that the background update to calculate auth chains for historic
         rooms works correctly.
         """
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 853db930d6..7fd3e01364 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import datetime
-from typing import Dict, List, Tuple, Union
+from typing import Dict, List, Tuple, Union, cast
 
 import attr
 from parameterized import parameterized
@@ -26,11 +26,12 @@ from synapse.api.room_versions import (
     EventFormatVersions,
     RoomVersion,
 )
-from synapse.events import _EventInternalMetadata
+from synapse.events import EventBase, _EventInternalMetadata
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
 from synapse.storage.database import LoggingTransaction
+from synapse.storage.types import Cursor
 from synapse.types import JsonDict
 from synapse.util import Clock, json_encoder
 
@@ -54,11 +55,11 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
-    def test_get_prev_events_for_room(self):
+    def test_get_prev_events_for_room(self) -> None:
         room_id = "@ROOM:local"
 
         # add a bunch of events and hashes to act as forward extremities
-        def insert_event(txn, i):
+        def insert_event(txn: Cursor, i: int) -> None:
             event_id = "$event_%i:local" % i
 
             txn.execute(
@@ -90,12 +91,12 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         for i in range(0, 10):
             self.assertEqual("$event_%i:local" % (19 - i), r[i])
 
-    def test_get_rooms_with_many_extremities(self):
+    def test_get_rooms_with_many_extremities(self) -> None:
         room1 = "#room1"
         room2 = "#room2"
         room3 = "#room3"
 
-        def insert_event(txn, i, room_id):
+        def insert_event(txn: Cursor, i: int, room_id: str) -> None:
             event_id = "$event_%i:local" % i
             txn.execute(
                 (
@@ -155,7 +156,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         #     |   |
         #     K   J
 
-        auth_graph = {
+        auth_graph: Dict[str, List[str]] = {
             "a": ["e"],
             "b": ["e"],
             "c": ["g", "i"],
@@ -185,7 +186,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
         # Mark the room as maybe having a cover index.
 
-        def store_room(txn):
+        def store_room(txn: LoggingTransaction) -> None:
             self.store.db_pool.simple_insert_txn(
                 txn,
                 "rooms",
@@ -203,7 +204,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         # We rudely fiddle with the appropriate tables directly, as that's much
         # easier than constructing events properly.
 
-        def insert_event(txn):
+        def insert_event(txn: LoggingTransaction) -> None:
             stream_ordering = 0
 
             for event_id in auth_graph:
@@ -228,7 +229,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             self.hs.datastores.persist_events._persist_event_auth_chain_txn(
                 txn,
                 [
-                    FakeEvent(event_id, room_id, auth_graph[event_id])
+                    cast(EventBase, FakeEvent(event_id, room_id, auth_graph[event_id]))
                     for event_id in auth_graph
                 ],
             )
@@ -243,7 +244,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         return room_id
 
     @parameterized.expand([(True,), (False,)])
-    def test_auth_chain_ids(self, use_chain_cover_index: bool):
+    def test_auth_chain_ids(self, use_chain_cover_index: bool) -> None:
         room_id = self._setup_auth_chain(use_chain_cover_index)
 
         # a and b have the same auth chain.
@@ -308,7 +309,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         self.assertCountEqual(auth_chain_ids, ["i", "j"])
 
     @parameterized.expand([(True,), (False,)])
-    def test_auth_difference(self, use_chain_cover_index: bool):
+    def test_auth_difference(self, use_chain_cover_index: bool) -> None:
         room_id = self._setup_auth_chain(use_chain_cover_index)
 
         # Now actually test that various combinations give the right result:
@@ -353,7 +354,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         )
         self.assertSetEqual(difference, set())
 
-    def test_auth_difference_partial_cover(self):
+    def test_auth_difference_partial_cover(self) -> None:
         """Test that we correctly handle rooms where not all events have a chain
         cover calculated. This can happen in some obscure edge cases, including
         during the background update that calculates the chain cover for old
@@ -377,7 +378,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         #     |   |
         #     K   J
 
-        auth_graph = {
+        auth_graph: Dict[str, List[str]] = {
             "a": ["e"],
             "b": ["e"],
             "c": ["g", "i"],
@@ -408,7 +409,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         # We rudely fiddle with the appropriate tables directly, as that's much
         # easier than constructing events properly.
 
-        def insert_event(txn):
+        def insert_event(txn: LoggingTransaction) -> None:
             # First insert the room and mark it as having a chain cover.
             self.store.db_pool.simple_insert_txn(
                 txn,
@@ -447,7 +448,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             self.hs.datastores.persist_events._persist_event_auth_chain_txn(
                 txn,
                 [
-                    FakeEvent(event_id, room_id, auth_graph[event_id])
+                    cast(EventBase, FakeEvent(event_id, room_id, auth_graph[event_id]))
                     for event_id in auth_graph
                     if event_id != "b"
                 ],
@@ -465,7 +466,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
             self.hs.datastores.persist_events._persist_event_auth_chain_txn(
                 txn,
-                [FakeEvent("b", room_id, auth_graph["b"])],
+                [cast(EventBase, FakeEvent("b", room_id, auth_graph["b"]))],
             )
 
             self.store.db_pool.simple_update_txn(
@@ -527,7 +528,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
     @parameterized.expand(
         [(room_version,) for room_version in KNOWN_ROOM_VERSIONS.values()]
     )
-    def test_prune_inbound_federation_queue(self, room_version: RoomVersion):
+    def test_prune_inbound_federation_queue(self, room_version: RoomVersion) -> None:
         """Test that pruning of inbound federation queues work"""
 
         room_id = "some_room_id"
@@ -686,7 +687,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
             stream_ordering += 1
 
-        def populate_db(txn: LoggingTransaction):
+        def populate_db(txn: LoggingTransaction) -> None:
             # Insert the room to satisfy the foreign key constraint of
             # `event_failed_pull_attempts`
             self.store.db_pool.simple_insert_txn(
@@ -760,7 +761,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
         return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map)
 
-    def test_get_backfill_points_in_room(self):
+    def test_get_backfill_points_in_room(self) -> None:
         """
         Test to make sure only backfill points that are older and come before
         the `current_depth` are returned.
@@ -787,7 +788,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
     def test_get_backfill_points_in_room_excludes_events_we_have_attempted(
         self,
-    ):
+    ) -> None:
         """
         Test to make sure that events we have attempted to backfill (and within
         backoff timeout duration) do not show up as an event to backfill again.
@@ -824,7 +825,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
     def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duration(
         self,
-    ):
+    ) -> None:
         """
         Test to make sure after we fake attempt to backfill event "b3" many times,
         we can see retry and see the "b3" again after the backoff timeout duration
@@ -941,7 +942,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             "5": 7,
         }
 
-        def populate_db(txn: LoggingTransaction):
+        def populate_db(txn: LoggingTransaction) -> None:
             # Insert the room to satisfy the foreign key constraint of
             # `event_failed_pull_attempts`
             self.store.db_pool.simple_insert_txn(
@@ -996,7 +997,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
         return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map)
 
-    def test_get_insertion_event_backward_extremities_in_room(self):
+    def test_get_insertion_event_backward_extremities_in_room(self) -> None:
         """
         Test to make sure only insertion event backward extremities that are
         older and come before the `current_depth` are returned.
@@ -1027,7 +1028,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
     def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_have_attempted(
         self,
-    ):
+    ) -> None:
         """
         Test to make sure that insertion events we have attempted to backfill
         (and within backoff timeout duration) do not show up as an event to
@@ -1060,7 +1061,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
     def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_after_backoff_duration(
         self,
-    ):
+    ) -> None:
         """
         Test to make sure after we fake attempt to backfill event
         "insertion_eventA" many times, we can see retry and see the
@@ -1130,9 +1131,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         self.assertEqual(backfill_event_ids, ["insertion_eventA"])
 
-    def test_get_event_ids_to_not_pull_from_backoff(
-        self,
-    ):
+    def test_get_event_ids_to_not_pull_from_backoff(self) -> None:
         """
         Test to make sure only event IDs we should backoff from are returned.
         """
@@ -1157,7 +1156,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
     def test_get_event_ids_to_not_pull_from_backoff_retry_after_backoff_duration(
         self,
-    ):
+    ) -> None:
         """
         Test to make sure no event IDs are returned after the backoff duration has
         elapsed.
@@ -1187,19 +1186,19 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         self.assertEqual(event_ids_to_backoff, [])
 
 
-@attr.s
+@attr.s(auto_attribs=True)
 class FakeEvent:
-    event_id = attr.ib()
-    room_id = attr.ib()
-    auth_events = attr.ib()
+    event_id: str
+    room_id: str
+    auth_events: List[str]
 
     type = "foo"
     state_key = "foo"
 
     internal_metadata = _EventInternalMetadata({})
 
-    def auth_event_ids(self):
+    def auth_event_ids(self) -> List[str]:
         return self.auth_events
 
-    def is_state(self):
+    def is_state(self) -> bool:
         return True
diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py
index 6f1135eef4..a91411168c 100644
--- a/tests/storage/test_event_metrics.py
+++ b/tests/storage/test_event_metrics.py
@@ -20,7 +20,7 @@ from tests.unittest import HomeserverTestCase
 
 
 class ExtremStatisticsTestCase(HomeserverTestCase):
-    def test_exposed_to_prometheus(self):
+    def test_exposed_to_prometheus(self) -> None:
         """
         Forward extremity counts are exposed via Prometheus.
         """
diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py
index 3ce4f35cb7..05661a537d 100644
--- a/tests/storage/test_events.py
+++ b/tests/storage/test_events.py
@@ -12,12 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import List, Optional
+
+from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.room_versions import RoomVersions
+from synapse.events import EventBase
 from synapse.federation.federation_base import event_from_pdu_json
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
+from synapse.types import StateMap
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -29,7 +36,9 @@ class ExtremPruneTestCase(HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.state = self.hs.get_state_handler()
         self._persistence = self.hs.get_storage_controllers().persistence
         self._state_storage_controller = self.hs.get_storage_controllers().state
@@ -67,7 +76,9 @@ class ExtremPruneTestCase(HomeserverTestCase):
         # Check that the current extremities is the remote event.
         self.assert_extremities([self.remote_event_1.event_id])
 
-    def persist_event(self, event, state=None):
+    def persist_event(
+        self, event: EventBase, state: Optional[StateMap[str]] = None
+    ) -> None:
         """Persist the event, with optional state"""
         context = self.get_success(
             self.state.compute_event_context(
@@ -78,14 +89,14 @@ class ExtremPruneTestCase(HomeserverTestCase):
         )
         self.get_success(self._persistence.persist_event(event, context))
 
-    def assert_extremities(self, expected_extremities):
+    def assert_extremities(self, expected_extremities: List[str]) -> None:
         """Assert the current extremities for the room"""
         extremities = self.get_success(
             self.store.get_prev_events_for_room(self.room_id)
         )
         self.assertCountEqual(extremities, expected_extremities)
 
-    def test_prune_gap(self):
+    def test_prune_gap(self) -> None:
         """Test that we drop extremities after a gap when we see an event from
         the same domain.
         """
@@ -117,7 +128,7 @@ class ExtremPruneTestCase(HomeserverTestCase):
         # Check the new extremity is just the new remote event.
         self.assert_extremities([remote_event_2.event_id])
 
-    def test_do_not_prune_gap_if_state_different(self):
+    def test_do_not_prune_gap_if_state_different(self) -> None:
         """Test that we don't prune extremities after a gap if the resolved
         state is different.
         """
@@ -161,7 +172,7 @@ class ExtremPruneTestCase(HomeserverTestCase):
         # Check that we haven't dropped the old extremity.
         self.assert_extremities([self.remote_event_1.event_id, remote_event_2.event_id])
 
-    def test_prune_gap_if_old(self):
+    def test_prune_gap_if_old(self) -> None:
         """Test that we drop extremities after a gap when the previous extremity
         is "old"
         """
@@ -197,7 +208,7 @@ class ExtremPruneTestCase(HomeserverTestCase):
         # Check the new extremity is just the new remote event.
         self.assert_extremities([remote_event_2.event_id])
 
-    def test_do_not_prune_gap_if_other_server(self):
+    def test_do_not_prune_gap_if_other_server(self) -> None:
         """Test that we do not drop extremities after a gap when we see an event
         from a different domain.
         """
@@ -229,7 +240,7 @@ class ExtremPruneTestCase(HomeserverTestCase):
         # Check the new extremity is just the new remote event.
         self.assert_extremities([self.remote_event_1.event_id, remote_event_2.event_id])
 
-    def test_prune_gap_if_dummy_remote(self):
+    def test_prune_gap_if_dummy_remote(self) -> None:
         """Test that we drop extremities after a gap when the previous extremity
         is a local dummy event and only points to remote events.
         """
@@ -271,7 +282,7 @@ class ExtremPruneTestCase(HomeserverTestCase):
         # Check the new extremity is just the new remote event.
         self.assert_extremities([remote_event_2.event_id])
 
-    def test_prune_gap_if_dummy_local(self):
+    def test_prune_gap_if_dummy_local(self) -> None:
         """Test that we don't drop extremities after a gap when the previous
         extremity is a local dummy event and points to local events.
         """
@@ -315,7 +326,7 @@ class ExtremPruneTestCase(HomeserverTestCase):
         # Check the new extremity is just the new remote event.
         self.assert_extremities([remote_event_2.event_id, local_message_event_id])
 
-    def test_do_not_prune_gap_if_not_dummy(self):
+    def test_do_not_prune_gap_if_not_dummy(self) -> None:
         """Test that we do not drop extremities after a gap when the previous extremity
         is not a dummy event.
         """
@@ -359,12 +370,14 @@ class InvalideUsersInRoomCacheTestCase(HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.state = self.hs.get_state_handler()
         self._persistence = self.hs.get_storage_controllers().persistence
         self.store = self.hs.get_datastores().main
 
-    def test_remote_user_rooms_cache_invalidated(self):
+    def test_remote_user_rooms_cache_invalidated(self) -> None:
         """Test that if the server leaves a room the `get_rooms_for_user` cache
         is invalidated for remote users.
         """
@@ -411,7 +424,7 @@ class InvalideUsersInRoomCacheTestCase(HomeserverTestCase):
         rooms = self.get_success(self.store.get_rooms_for_user(remote_user))
         self.assertEqual(set(rooms), set())
 
-    def test_room_remote_user_cache_invalidated(self):
+    def test_room_remote_user_cache_invalidated(self) -> None:
         """Test that if the server leaves a room the `get_users_in_room` cache
         is invalidated for remote users.
         """
diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py
index 9059095525..aa4b5bd3b1 100644
--- a/tests/storage/test_keys.py
+++ b/tests/storage/test_keys.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import signedjson.key
+import signedjson.types
 import unpaddedbase64
 
 from twisted.internet.defer import Deferred
@@ -22,7 +23,9 @@ from synapse.storage.keys import FetchKeyResult
 import tests.unittest
 
 
-def decode_verify_key_base64(key_id: str, key_base64: str):
+def decode_verify_key_base64(
+    key_id: str, key_base64: str
+) -> signedjson.types.VerifyKey:
     key_bytes = unpaddedbase64.decode_base64(key_base64)
     return signedjson.key.decode_verify_key_bytes(key_id, key_bytes)
 
@@ -36,7 +39,7 @@ KEY_2 = decode_verify_key_base64(
 
 
 class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
-    def test_get_server_verify_keys(self):
+    def test_get_server_verify_keys(self) -> None:
         store = self.hs.get_datastores().main
 
         key_id_1 = "ed25519:key1"
@@ -71,7 +74,7 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
         # non-existent result gives None
         self.assertIsNone(res[("server1", "ed25519:key3")])
 
-    def test_cache(self):
+    def test_cache(self) -> None:
         """Check that updates correctly invalidate the cache."""
 
         store = self.hs.get_datastores().main
diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py
index c55c4db970..2827738379 100644
--- a/tests/storage/test_monthly_active_users.py
+++ b/tests/storage/test_monthly_active_users.py
@@ -53,7 +53,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.reactor.advance(FORTY_DAYS)
 
     @override_config({"max_mau_value": 3, "mau_limit_reserved_threepids": gen_3pids(3)})
-    def test_initialise_reserved_users(self):
+    def test_initialise_reserved_users(self) -> None:
         threepids = self.hs.config.server.mau_limits_reserved_threepids
 
         # register three users, of which two have reserved 3pids, and a third
@@ -133,7 +133,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         active_count = self.get_success(self.store.get_monthly_active_count())
         self.assertEqual(active_count, 3)
 
-    def test_can_insert_and_count_mau(self):
+    def test_can_insert_and_count_mau(self) -> None:
         count = self.get_success(self.store.get_monthly_active_count())
         self.assertEqual(count, 0)
 
@@ -143,7 +143,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         count = self.get_success(self.store.get_monthly_active_count())
         self.assertEqual(count, 1)
 
-    def test_appservice_user_not_counted_in_mau(self):
+    def test_appservice_user_not_counted_in_mau(self) -> None:
         self.get_success(
             self.store.register_user(
                 user_id="@appservice_user:server", appservice_id="wibble"
@@ -158,7 +158,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         count = self.get_success(self.store.get_monthly_active_count())
         self.assertEqual(count, 0)
 
-    def test_user_last_seen_monthly_active(self):
+    def test_user_last_seen_monthly_active(self) -> None:
         user_id1 = "@user1:server"
         user_id2 = "@user2:server"
         user_id3 = "@user3:server"
@@ -177,7 +177,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.assertIsNone(result)
 
     @override_config({"max_mau_value": 5})
-    def test_reap_monthly_active_users(self):
+    def test_reap_monthly_active_users(self) -> None:
         initial_users = 10
         for i in range(initial_users):
             self.get_success(
@@ -204,7 +204,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
     # Note that below says mau_limit (no s), this is the name of the config
     # value, although it gets stored on the config object as mau_limits.
     @override_config({"max_mau_value": 5, "mau_limit_reserved_threepids": gen_3pids(5)})
-    def test_reap_monthly_active_users_reserved_users(self):
+    def test_reap_monthly_active_users_reserved_users(self) -> None:
         """Tests that reaping correctly handles reaping where reserved users are
         present"""
         threepids = self.hs.config.server.mau_limits_reserved_threepids
@@ -244,7 +244,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         count = self.get_success(self.store.get_monthly_active_count())
         self.assertEqual(count, self.hs.config.server.max_mau_value)
 
-    def test_populate_monthly_users_is_guest(self):
+    def test_populate_monthly_users_is_guest(self) -> None:
         # Test that guest users are not added to mau list
         user_id = "@user_id:host"
 
@@ -260,7 +260,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
 
         self.store.upsert_monthly_active_user.assert_not_called()
 
-    def test_populate_monthly_users_should_update(self):
+    def test_populate_monthly_users_should_update(self) -> None:
         self.store.upsert_monthly_active_user = Mock(return_value=make_awaitable(None))  # type: ignore[assignment]
 
         self.store.is_trial_user = Mock(return_value=make_awaitable(False))  # type: ignore[assignment]
@@ -273,7 +273,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
 
         self.store.upsert_monthly_active_user.assert_called_once()
 
-    def test_populate_monthly_users_should_not_update(self):
+    def test_populate_monthly_users_should_not_update(self) -> None:
         self.store.upsert_monthly_active_user = Mock(return_value=make_awaitable(None))  # type: ignore[assignment]
 
         self.store.is_trial_user = Mock(return_value=make_awaitable(False))  # type: ignore[assignment]
@@ -286,7 +286,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
 
         self.store.upsert_monthly_active_user.assert_not_called()
 
-    def test_get_reserved_real_user_account(self):
+    def test_get_reserved_real_user_account(self) -> None:
         # Test no reserved users, or reserved threepids
         users = self.get_success(self.store.get_registered_reserved_users())
         self.assertEqual(len(users), 0)
@@ -326,7 +326,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         users = self.get_success(self.store.get_registered_reserved_users())
         self.assertEqual(len(users), len(threepids))
 
-    def test_support_user_not_add_to_mau_limits(self):
+    def test_support_user_not_add_to_mau_limits(self) -> None:
         support_user_id = "@support:test"
 
         count = self.get_success(self.store.get_monthly_active_count())
@@ -347,7 +347,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
     @override_config(
         {"limit_usage_by_mau": False, "mau_stats_only": True, "max_mau_value": 1}
     )
-    def test_track_monthly_users_without_cap(self):
+    def test_track_monthly_users_without_cap(self) -> None:
         count = self.get_success(self.store.get_monthly_active_count())
         self.assertEqual(0, count)
 
@@ -358,14 +358,14 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.assertEqual(2, count)
 
     @override_config({"limit_usage_by_mau": False, "mau_stats_only": False})
-    def test_no_users_when_not_tracking(self):
+    def test_no_users_when_not_tracking(self) -> None:
         self.store.upsert_monthly_active_user = Mock(return_value=make_awaitable(None))  # type: ignore[assignment]
 
         self.get_success(self.store.populate_monthly_active_users("@user:sever"))
 
         self.store.upsert_monthly_active_user.assert_not_called()
 
-    def test_get_monthly_active_count_by_service(self):
+    def test_get_monthly_active_count_by_service(self) -> None:
         appservice1_user1 = "@appservice1_user1:example.com"
         appservice1_user2 = "@appservice1_user2:example.com"
 
@@ -413,7 +413,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.assertEqual(result[service2], 1)
         self.assertEqual(result[native], 1)
 
-    def test_get_monthly_active_users_by_service(self):
+    def test_get_monthly_active_users_by_service(self) -> None:
         # (No users, no filtering) -> empty result
         result = self.get_success(self.store.get_monthly_active_users_by_service())
 
diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py
index 9c1182ed16..010cc74c31 100644
--- a/tests/storage/test_purge.py
+++ b/tests/storage/test_purge.py
@@ -12,8 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.rest.client import room
+from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -23,17 +27,17 @@ class PurgeTests(HomeserverTestCase):
     user_id = "@red:server"
     servlets = [room.register_servlets]
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver("server", federation_http_client=None)
         return hs
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.room_id = self.helper.create_room_as(self.user_id)
 
         self.store = hs.get_datastores().main
         self._storage_controllers = self.hs.get_storage_controllers()
 
-    def test_purge_history(self):
+    def test_purge_history(self) -> None:
         """
         Purging a room history will delete everything before the topological point.
         """
@@ -63,7 +67,7 @@ class PurgeTests(HomeserverTestCase):
         self.get_failure(self.store.get_event(third["event_id"]), NotFoundError)
         self.get_success(self.store.get_event(last["event_id"]))
 
-    def test_purge_history_wont_delete_extrems(self):
+    def test_purge_history_wont_delete_extrems(self) -> None:
         """
         Purging a room history will delete everything before the topological point.
         """
@@ -77,6 +81,7 @@ class PurgeTests(HomeserverTestCase):
         token = self.get_success(
             self.store.get_topological_token_for_event(last["event_id"])
         )
+        assert token.topological is not None
         event = f"t{token.topological + 1}-{token.stream + 1}"
 
         # Purge everything before this topological token
@@ -94,7 +99,7 @@ class PurgeTests(HomeserverTestCase):
         self.get_success(self.store.get_event(third["event_id"]))
         self.get_success(self.store.get_event(last["event_id"]))
 
-    def test_purge_room(self):
+    def test_purge_room(self) -> None:
         """
         Purging a room will delete everything about it.
         """
diff --git a/tests/storage/test_receipts.py b/tests/storage/test_receipts.py
index 81253d0361..d8d84152dc 100644
--- a/tests/storage/test_receipts.py
+++ b/tests/storage/test_receipts.py
@@ -14,8 +14,12 @@
 
 from typing import Collection, Optional
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import ReceiptTypes
+from synapse.server import HomeServer
 from synapse.types import UserID, create_requester
+from synapse.util import Clock
 
 from tests.test_utils.event_injection import create_event
 from tests.unittest import HomeserverTestCase
@@ -25,7 +29,9 @@ OUR_USER_ID = "@our:test"
 
 
 class ReceiptTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, homeserver) -> None:
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         super().prepare(reactor, clock, homeserver)
 
         self.store = homeserver.get_datastores().main
@@ -135,11 +141,11 @@ class ReceiptTestCase(HomeserverTestCase):
         )
         self.assertEqual(res, {})
 
-        res = self.get_last_unthreaded_receipt(
+        res2 = self.get_last_unthreaded_receipt(
             [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]
         )
 
-        self.assertEqual(res, None)
+        self.assertIsNone(res2)
 
     def test_get_receipts_for_user(self) -> None:
         # Send some events into the first room
diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py
index 6c4e63b77c..df4740f9d9 100644
--- a/tests/storage/test_redaction.py
+++ b/tests/storage/test_redaction.py
@@ -11,27 +11,35 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List, Optional
+from typing import List, Optional, cast
 
 from canonicaljson import json
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.room_versions import RoomVersions
-from synapse.types import RoomID, UserID
+from synapse.events import EventBase, _EventInternalMetadata
+from synapse.events.builder import EventBuilder
+from synapse.server import HomeServer
+from synapse.types import JsonDict, RoomID, UserID
+from synapse.util import Clock
 
 from tests import unittest
 from tests.utils import create_room
 
 
 class RedactionTestCase(unittest.HomeserverTestCase):
-    def default_config(self):
+    def default_config(self) -> JsonDict:
         config = super().default_config()
         config["redaction_retention_period"] = "30d"
         return config
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
-        self._storage = hs.get_storage_controllers()
+        storage = hs.get_storage_controllers()
+        assert storage.persistence is not None
+        self._persistence = storage.persistence
         self.event_builder_factory = hs.get_event_builder_factory()
         self.event_creation_handler = hs.get_event_creation_handler()
 
@@ -46,14 +54,13 @@ class RedactionTestCase(unittest.HomeserverTestCase):
 
         self.depth = 1
 
-    def inject_room_member(
+    def inject_room_member(  # type: ignore[override]
         self,
-        room,
-        user,
-        membership,
-        replaces_state=None,
-        extra_content: Optional[dict] = None,
-    ):
+        room: RoomID,
+        user: UserID,
+        membership: str,
+        extra_content: Optional[JsonDict] = None,
+    ) -> EventBase:
         content = {"membership": membership}
         content.update(extra_content or {})
         builder = self.event_builder_factory.for_room_version(
@@ -71,11 +78,11 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             self.event_creation_handler.create_new_client_event(builder)
         )
 
-        self.get_success(self._storage.persistence.persist_event(event, context))
+        self.get_success(self._persistence.persist_event(event, context))
 
         return event
 
-    def inject_message(self, room, user, body):
+    def inject_message(self, room: RoomID, user: UserID, body: str) -> EventBase:
         self.depth += 1
 
         builder = self.event_builder_factory.for_room_version(
@@ -93,11 +100,13 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             self.event_creation_handler.create_new_client_event(builder)
         )
 
-        self.get_success(self._storage.persistence.persist_event(event, context))
+        self.get_success(self._persistence.persist_event(event, context))
 
         return event
 
-    def inject_redaction(self, room, event_id, user, reason):
+    def inject_redaction(
+        self, room: RoomID, event_id: str, user: UserID, reason: str
+    ) -> EventBase:
         builder = self.event_builder_factory.for_room_version(
             RoomVersions.V1,
             {
@@ -114,11 +123,11 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             self.event_creation_handler.create_new_client_event(builder)
         )
 
-        self.get_success(self._storage.persistence.persist_event(event, context))
+        self.get_success(self._persistence.persist_event(event, context))
 
         return event
 
-    def test_redact(self):
+    def test_redact(self) -> None:
         self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
 
         msg_event = self.inject_message(self.room1, self.u_alice, "t")
@@ -165,7 +174,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             event.unsigned["redacted_because"],
         )
 
-    def test_redact_join(self):
+    def test_redact_join(self) -> None:
         self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
 
         msg_event = self.inject_room_member(
@@ -213,12 +222,12 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             event.unsigned["redacted_because"],
         )
 
-    def test_circular_redaction(self):
+    def test_circular_redaction(self) -> None:
         redaction_event_id1 = "$redaction1_id:test"
         redaction_event_id2 = "$redaction2_id:test"
 
         class EventIdManglingBuilder:
-            def __init__(self, base_builder, event_id):
+            def __init__(self, base_builder: EventBuilder, event_id: str):
                 self._base_builder = base_builder
                 self._event_id = event_id
 
@@ -227,67 +236,73 @@ class RedactionTestCase(unittest.HomeserverTestCase):
                 prev_event_ids: List[str],
                 auth_event_ids: Optional[List[str]],
                 depth: Optional[int] = None,
-            ):
+            ) -> EventBase:
                 built_event = await self._base_builder.build(
                     prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids
                 )
 
-                built_event._event_id = self._event_id
+                built_event._event_id = self._event_id  # type: ignore[attr-defined]
                 built_event._dict["event_id"] = self._event_id
                 assert built_event.event_id == self._event_id
 
                 return built_event
 
             @property
-            def room_id(self):
+            def room_id(self) -> str:
                 return self._base_builder.room_id
 
             @property
-            def type(self):
+            def type(self) -> str:
                 return self._base_builder.type
 
             @property
-            def internal_metadata(self):
+            def internal_metadata(self) -> _EventInternalMetadata:
                 return self._base_builder.internal_metadata
 
         event_1, context_1 = self.get_success(
             self.event_creation_handler.create_new_client_event(
-                EventIdManglingBuilder(
-                    self.event_builder_factory.for_room_version(
-                        RoomVersions.V1,
-                        {
-                            "type": EventTypes.Redaction,
-                            "sender": self.u_alice.to_string(),
-                            "room_id": self.room1.to_string(),
-                            "content": {"reason": "test"},
-                            "redacts": redaction_event_id2,
-                        },
+                cast(
+                    EventBuilder,
+                    EventIdManglingBuilder(
+                        self.event_builder_factory.for_room_version(
+                            RoomVersions.V1,
+                            {
+                                "type": EventTypes.Redaction,
+                                "sender": self.u_alice.to_string(),
+                                "room_id": self.room1.to_string(),
+                                "content": {"reason": "test"},
+                                "redacts": redaction_event_id2,
+                            },
+                        ),
+                        redaction_event_id1,
                     ),
-                    redaction_event_id1,
                 )
             )
         )
 
-        self.get_success(self._storage.persistence.persist_event(event_1, context_1))
+        self.get_success(self._persistence.persist_event(event_1, context_1))
 
         event_2, context_2 = self.get_success(
             self.event_creation_handler.create_new_client_event(
-                EventIdManglingBuilder(
-                    self.event_builder_factory.for_room_version(
-                        RoomVersions.V1,
-                        {
-                            "type": EventTypes.Redaction,
-                            "sender": self.u_alice.to_string(),
-                            "room_id": self.room1.to_string(),
-                            "content": {"reason": "test"},
-                            "redacts": redaction_event_id1,
-                        },
+                cast(
+                    EventBuilder,
+                    EventIdManglingBuilder(
+                        self.event_builder_factory.for_room_version(
+                            RoomVersions.V1,
+                            {
+                                "type": EventTypes.Redaction,
+                                "sender": self.u_alice.to_string(),
+                                "room_id": self.room1.to_string(),
+                                "content": {"reason": "test"},
+                                "redacts": redaction_event_id1,
+                            },
+                        ),
+                        redaction_event_id2,
                     ),
-                    redaction_event_id2,
                 )
             )
         )
-        self.get_success(self._storage.persistence.persist_event(event_2, context_2))
+        self.get_success(self._persistence.persist_event(event_2, context_2))
 
         # fetch one of the redactions
         fetched = self.get_success(self.store.get_event(redaction_event_id1))
@@ -298,7 +313,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             fetched.unsigned["redacted_because"].event_id, redaction_event_id2
         )
 
-    def test_redact_censor(self):
+    def test_redact_censor(self) -> None:
         """Test that a redacted event gets censored in the DB after a month"""
 
         self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
@@ -364,7 +379,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
 
         self.assert_dict({"content": {}}, json.loads(event_json))
 
-    def test_redact_redaction(self):
+    def test_redact_redaction(self) -> None:
         """Tests that we can redact a redaction and can fetch it again."""
 
         self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
@@ -391,7 +406,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             self.store.get_event(first_redact_event.event_id, allow_none=True)
         )
 
-    def test_store_redacted_redaction(self):
+    def test_store_redacted_redaction(self) -> None:
         """Tests that we can store a redacted redaction."""
 
         self.inject_room_member(self.room1, self.u_alice, Membership.JOIN)
@@ -410,9 +425,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             self.event_creation_handler.create_new_client_event(builder)
         )
 
-        self.get_success(
-            self._storage.persistence.persist_event(redaction_event, context)
-        )
+        self.get_success(self._persistence.persist_event(redaction_event, context))
 
         # Now lets jump to the future where we have censored the redaction event
         # in the DB.
diff --git a/tests/storage/test_rollback_worker.py b/tests/storage/test_rollback_worker.py
index 0baa54312e..966aafea6f 100644
--- a/tests/storage/test_rollback_worker.py
+++ b/tests/storage/test_rollback_worker.py
@@ -14,10 +14,15 @@
 from typing import List
 from unittest import mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.app.generic_worker import GenericWorkerServer
+from synapse.server import HomeServer
 from synapse.storage.database import LoggingDatabaseConnection
 from synapse.storage.prepare_database import PrepareDatabaseException, prepare_database
 from synapse.storage.schema import SCHEMA_VERSION
+from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -39,13 +44,13 @@ def fake_listdir(filepath: str) -> List[str]:
 
 
 class WorkerSchemaTests(HomeserverTestCase):
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver(
             federation_http_client=None, homeserver_to_use=GenericWorkerServer
         )
         return hs
 
-    def default_config(self):
+    def default_config(self) -> JsonDict:
         conf = super().default_config()
 
         # Mark this as a worker app.
@@ -53,7 +58,7 @@ class WorkerSchemaTests(HomeserverTestCase):
 
         return conf
 
-    def test_rolling_back(self):
+    def test_rolling_back(self) -> None:
         """Test that workers can start if the DB is a newer schema version"""
 
         db_pool = self.hs.get_datastores().main.db_pool
@@ -70,7 +75,7 @@ class WorkerSchemaTests(HomeserverTestCase):
 
         prepare_database(db_conn, db_pool.engine, self.hs.config)
 
-    def test_not_upgraded_old_schema_version(self):
+    def test_not_upgraded_old_schema_version(self) -> None:
         """Test that workers don't start if the DB has an older schema version"""
         db_pool = self.hs.get_datastores().main.db_pool
         db_conn = LoggingDatabaseConnection(
@@ -87,7 +92,7 @@ class WorkerSchemaTests(HomeserverTestCase):
         with self.assertRaises(PrepareDatabaseException):
             prepare_database(db_conn, db_pool.engine, self.hs.config)
 
-    def test_not_upgraded_current_schema_version_with_outstanding_deltas(self):
+    def test_not_upgraded_current_schema_version_with_outstanding_deltas(self) -> None:
         """
         Test that workers don't start if the DB is on the current schema version,
         but there are still outstanding delta migrations to run.
diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py
index 3405efb6a8..71ec74eadc 100644
--- a/tests/storage/test_room.py
+++ b/tests/storage/test_room.py
@@ -12,14 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.room_versions import RoomVersions
+from synapse.server import HomeServer
 from synapse.types import RoomAlias, RoomID, UserID
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
 
 class RoomStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         # We can't test RoomStore on its own without the DirectoryStore, for
         # management of the 'room_aliases' table
         self.store = hs.get_datastores().main
@@ -37,30 +41,34 @@ class RoomStoreTestCase(HomeserverTestCase):
             )
         )
 
-    def test_get_room(self):
+    def test_get_room(self) -> None:
+        res = self.get_success(self.store.get_room(self.room.to_string()))
+        assert res is not None
         self.assertDictContainsSubset(
             {
                 "room_id": self.room.to_string(),
                 "creator": self.u_creator.to_string(),
                 "is_public": True,
             },
-            (self.get_success(self.store.get_room(self.room.to_string()))),
+            res,
         )
 
-    def test_get_room_unknown_room(self):
+    def test_get_room_unknown_room(self) -> None:
         self.assertIsNone(self.get_success(self.store.get_room("!uknown:test")))
 
-    def test_get_room_with_stats(self):
+    def test_get_room_with_stats(self) -> None:
+        res = self.get_success(self.store.get_room_with_stats(self.room.to_string()))
+        assert res is not None
         self.assertDictContainsSubset(
             {
                 "room_id": self.room.to_string(),
                 "creator": self.u_creator.to_string(),
                 "public": True,
             },
-            (self.get_success(self.store.get_room_with_stats(self.room.to_string()))),
+            res,
         )
 
-    def test_get_room_with_stats_unknown_room(self):
+    def test_get_room_with_stats_unknown_room(self) -> None:
         self.assertIsNone(
-            (self.get_success(self.store.get_room_with_stats("!uknown:test"))),
+            self.get_success(self.store.get_room_with_stats("!uknown:test"))
         )
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index ef850daa73..14d872514d 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -39,7 +39,7 @@ class EventSearchInsertionTest(HomeserverTestCase):
         room.register_servlets,
     ]
 
-    def test_null_byte(self):
+    def test_null_byte(self) -> None:
         """
         Postgres/SQLite don't like null bytes going into the search tables. Internally
         we replace those with a space.
@@ -86,7 +86,7 @@ class EventSearchInsertionTest(HomeserverTestCase):
         if isinstance(store.database_engine, PostgresEngine):
             self.assertIn("alice", result.get("highlights"))
 
-    def test_non_string(self):
+    def test_non_string(self) -> None:
         """Test that non-string `value`s are not inserted into `event_search`.
 
         This is particularly important when using sqlite, since a sqlite column can hold
@@ -157,7 +157,7 @@ class EventSearchInsertionTest(HomeserverTestCase):
         self.assertEqual(f.value.code, 404)
 
     @skip_unless(not USE_POSTGRES_FOR_TESTS, "requires sqlite")
-    def test_sqlite_non_string_deletion_background_update(self):
+    def test_sqlite_non_string_deletion_background_update(self) -> None:
         """Test the background update to delete bad rows from `event_search`."""
         store = self.hs.get_datastores().main
 
@@ -350,7 +350,7 @@ class MessageSearchTest(HomeserverTestCase):
                 "results array length should match count",
             )
 
-    def test_postgres_web_search_for_phrase(self):
+    def test_postgres_web_search_for_phrase(self) -> None:
         """
         Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery.
         This test is skipped unless the postgres instance supports websearch_to_tsquery.
@@ -364,7 +364,7 @@ class MessageSearchTest(HomeserverTestCase):
 
         self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES)
 
-    def test_sqlite_search(self):
+    def test_sqlite_search(self) -> None:
         """
         Test sqlite searching for phrases.
         """
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index 5564161750..d4e6d4236c 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -16,10 +16,15 @@ import logging
 
 from frozendict import frozendict
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.room_versions import RoomVersions
+from synapse.events import EventBase
+from synapse.server import HomeServer
 from synapse.storage.state import StateFilter
-from synapse.types import RoomID, UserID
+from synapse.types import JsonDict, RoomID, StateMap, UserID
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase, TestCase
 
@@ -27,7 +32,7 @@ logger = logging.getLogger(__name__)
 
 
 class StateStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         self.storage = hs.get_storage_controllers()
         self.state_datastore = self.storage.state.stores.state
@@ -48,7 +53,9 @@ class StateStoreTestCase(HomeserverTestCase):
             )
         )
 
-    def inject_state_event(self, room, sender, typ, state_key, content):
+    def inject_state_event(
+        self, room: RoomID, sender: UserID, typ: str, state_key: str, content: JsonDict
+    ) -> EventBase:
         builder = self.event_builder_factory.for_room_version(
             RoomVersions.V1,
             {
@@ -64,24 +71,29 @@ class StateStoreTestCase(HomeserverTestCase):
             self.event_creation_handler.create_new_client_event(builder)
         )
 
+        assert self.storage.persistence is not None
         self.get_success(self.storage.persistence.persist_event(event, context))
 
         return event
 
-    def assertStateMapEqual(self, s1, s2):
+    def assertStateMapEqual(
+        self, s1: StateMap[EventBase], s2: StateMap[EventBase]
+    ) -> None:
         for t in s1:
             # just compare event IDs for simplicity
             self.assertEqual(s1[t].event_id, s2[t].event_id)
         self.assertEqual(len(s1), len(s2))
 
-    def test_get_state_groups_ids(self):
+    def test_get_state_groups_ids(self) -> None:
         e1 = self.inject_state_event(self.room, self.u_alice, EventTypes.Create, "", {})
         e2 = self.inject_state_event(
             self.room, self.u_alice, EventTypes.Name, "", {"name": "test room"}
         )
 
         state_group_map = self.get_success(
-            self.storage.state.get_state_groups_ids(self.room, [e2.event_id])
+            self.storage.state.get_state_groups_ids(
+                self.room.to_string(), [e2.event_id]
+            )
         )
         self.assertEqual(len(state_group_map), 1)
         state_map = list(state_group_map.values())[0]
@@ -90,21 +102,21 @@ class StateStoreTestCase(HomeserverTestCase):
             {(EventTypes.Create, ""): e1.event_id, (EventTypes.Name, ""): e2.event_id},
         )
 
-    def test_get_state_groups(self):
+    def test_get_state_groups(self) -> None:
         e1 = self.inject_state_event(self.room, self.u_alice, EventTypes.Create, "", {})
         e2 = self.inject_state_event(
             self.room, self.u_alice, EventTypes.Name, "", {"name": "test room"}
         )
 
         state_group_map = self.get_success(
-            self.storage.state.get_state_groups(self.room, [e2.event_id])
+            self.storage.state.get_state_groups(self.room.to_string(), [e2.event_id])
         )
         self.assertEqual(len(state_group_map), 1)
         state_list = list(state_group_map.values())[0]
 
         self.assertEqual({ev.event_id for ev in state_list}, {e1.event_id, e2.event_id})
 
-    def test_get_state_for_event(self):
+    def test_get_state_for_event(self) -> None:
         # this defaults to a linear DAG as each new injection defaults to whatever
         # forward extremities are currently in the DB for this room.
         e1 = self.inject_state_event(self.room, self.u_alice, EventTypes.Create, "", {})
@@ -487,14 +499,16 @@ class StateStoreTestCase(HomeserverTestCase):
 class StateFilterDifferenceTestCase(TestCase):
     def assert_difference(
         self, minuend: StateFilter, subtrahend: StateFilter, expected: StateFilter
-    ):
+    ) -> None:
         self.assertEqual(
             minuend.approx_difference(subtrahend),
             expected,
             f"StateFilter difference not correct:\n\n\t{minuend!r}\nminus\n\t{subtrahend!r}\nwas\n\t{minuend.approx_difference(subtrahend)}\nexpected\n\t{expected}",
         )
 
-    def test_state_filter_difference_no_include_other_minus_no_include_other(self):
+    def test_state_filter_difference_no_include_other_minus_no_include_other(
+        self,
+    ) -> None:
         """
         Tests the StateFilter.approx_difference method
         where, in a.approx_difference(b), both a and b do not have the
@@ -610,7 +624,7 @@ class StateFilterDifferenceTestCase(TestCase):
             ),
         )
 
-    def test_state_filter_difference_include_other_minus_no_include_other(self):
+    def test_state_filter_difference_include_other_minus_no_include_other(self) -> None:
         """
         Tests the StateFilter.approx_difference method
         where, in a.approx_difference(b), only a has the include_others flag set.
@@ -739,7 +753,7 @@ class StateFilterDifferenceTestCase(TestCase):
             ),
         )
 
-    def test_state_filter_difference_include_other_minus_include_other(self):
+    def test_state_filter_difference_include_other_minus_include_other(self) -> None:
         """
         Tests the StateFilter.approx_difference method
         where, in a.approx_difference(b), both a and b have the include_others
@@ -864,7 +878,7 @@ class StateFilterDifferenceTestCase(TestCase):
             ),
         )
 
-    def test_state_filter_difference_no_include_other_minus_include_other(self):
+    def test_state_filter_difference_no_include_other_minus_include_other(self) -> None:
         """
         Tests the StateFilter.approx_difference method
         where, in a.approx_difference(b), only b has the include_others flag set.
@@ -979,7 +993,7 @@ class StateFilterDifferenceTestCase(TestCase):
             ),
         )
 
-    def test_state_filter_difference_simple_cases(self):
+    def test_state_filter_difference_simple_cases(self) -> None:
         """
         Tests some very simple cases of the StateFilter approx_difference,
         that are not explicitly tested by the more in-depth tests.
@@ -995,7 +1009,7 @@ class StateFilterDifferenceTestCase(TestCase):
 
 
 class StateFilterTestCase(TestCase):
-    def test_return_expanded(self):
+    def test_return_expanded(self) -> None:
         """
         Tests the behaviour of the return_expanded() function that expands
         StateFilters to include more state types (for the sake of cache hit rate).
diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py
index 34fa810cf6..bc090ebce0 100644
--- a/tests/storage/test_stream.py
+++ b/tests/storage/test_stream.py
@@ -14,11 +14,15 @@
 
 from typing import List
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EventTypes, RelationTypes
 from synapse.api.filtering import Filter
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
 from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
@@ -37,12 +41,14 @@ class PaginationTestCase(HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def default_config(self):
+    def default_config(self) -> JsonDict:
         config = super().default_config()
         config["experimental_features"] = {"msc3874_enabled": True}
         return config
 
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.user_id = self.register_user("test", "test")
         self.tok = self.login("test", "test")
         self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok)
@@ -130,7 +136,7 @@ class PaginationTestCase(HomeserverTestCase):
 
         return [ev.event_id for ev in events]
 
-    def test_filter_relation_senders(self):
+    def test_filter_relation_senders(self) -> None:
         # Messages which second user reacted to.
         filter = {"related_by_senders": [self.second_user_id]}
         chunk = self._filter_messages(filter)
@@ -146,7 +152,7 @@ class PaginationTestCase(HomeserverTestCase):
         chunk = self._filter_messages(filter)
         self.assertCountEqual(chunk, [self.event_id_1, self.event_id_2])
 
-    def test_filter_relation_type(self):
+    def test_filter_relation_type(self) -> None:
         # Messages which have annotations.
         filter = {"related_by_rel_types": [RelationTypes.ANNOTATION]}
         chunk = self._filter_messages(filter)
@@ -167,7 +173,7 @@ class PaginationTestCase(HomeserverTestCase):
         chunk = self._filter_messages(filter)
         self.assertCountEqual(chunk, [self.event_id_1, self.event_id_2])
 
-    def test_filter_relation_senders_and_type(self):
+    def test_filter_relation_senders_and_type(self) -> None:
         # Messages which second user reacted to.
         filter = {
             "related_by_senders": [self.second_user_id],
@@ -176,7 +182,7 @@ class PaginationTestCase(HomeserverTestCase):
         chunk = self._filter_messages(filter)
         self.assertEqual(chunk, [self.event_id_1])
 
-    def test_duplicate_relation(self):
+    def test_duplicate_relation(self) -> None:
         """An event should only be returned once if there are multiple relations to it."""
         self.helper.send_event(
             room_id=self.room_id,
diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index e05daa285e..db9ee9955e 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -12,17 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
 from synapse.storage.databases.main.transactions import DestinationRetryTimings
+from synapse.util import Clock
 from synapse.util.retryutils import MAX_RETRY_INTERVAL
 
 from tests.unittest import HomeserverTestCase
 
 
 class TransactionStoreTestCase(HomeserverTestCase):
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.store = homeserver.get_datastores().main
 
-    def test_get_set_transactions(self):
+    def test_get_set_transactions(self) -> None:
         """Tests that we can successfully get a non-existent entry for
         destination retries, as well as testing tht we can set and get
         correctly.
@@ -44,18 +50,18 @@ class TransactionStoreTestCase(HomeserverTestCase):
             r,
         )
 
-    def test_initial_set_transactions(self):
+    def test_initial_set_transactions(self) -> None:
         """Tests that we can successfully set the destination retries (there
         was a bug around invalidating the cache that broke this)
         """
         d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100)
         self.get_success(d)
 
-    def test_large_destination_retry(self):
+    def test_large_destination_retry(self) -> None:
         d = self.store.set_destination_retry_timings(
             "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL
         )
         self.get_success(d)
 
-        d = self.store.get_destination_retry_timings("example.com")
-        self.get_success(d)
+        d2 = self.store.get_destination_retry_timings("example.com")
+        self.get_success(d2)
diff --git a/tests/storage/test_txn_limit.py b/tests/storage/test_txn_limit.py
index ace82cbf42..15ea4770bd 100644
--- a/tests/storage/test_txn_limit.py
+++ b/tests/storage/test_txn_limit.py
@@ -12,21 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.storage.types import Cursor
+from synapse.util import Clock
+
 from tests import unittest
 
 
 class SQLTransactionLimitTestCase(unittest.HomeserverTestCase):
     """Test SQL transaction limit doesn't break transactions."""
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         return self.setup_test_homeserver(db_txn_limit=1000)
 
-    def test_config(self):
+    def test_config(self) -> None:
         db_config = self.hs.config.database.get_single_database()
         self.assertEqual(db_config.config["txn_limit"], 1000)
 
-    def test_select(self):
-        def do_select(txn):
+    def test_select(self) -> None:
+        def do_select(txn: Cursor) -> None:
             txn.execute("SELECT 1")
 
         db_pool = self.hs.get_datastores().databases[0]
diff --git a/tests/storage/util/test_partial_state_events_tracker.py b/tests/storage/util/test_partial_state_events_tracker.py
index cae14151c0..0e3fc2a77f 100644
--- a/tests/storage/util/test_partial_state_events_tracker.py
+++ b/tests/storage/util/test_partial_state_events_tracker.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict
+from typing import Collection, Dict
 from unittest import mock
 
 from twisted.internet.defer import CancelledError, ensureDeferred
@@ -31,7 +31,7 @@ class PartialStateEventsTrackerTestCase(TestCase):
         # the results to be returned by the mocked get_partial_state_events
         self._events_dict: Dict[str, bool] = {}
 
-        async def get_partial_state_events(events):
+        async def get_partial_state_events(events: Collection[str]) -> Dict[str, bool]:
             return {e: self._events_dict[e] for e in events}
 
         self.mock_store = mock.Mock(spec_set=["get_partial_state_events"])
@@ -39,7 +39,7 @@ class PartialStateEventsTrackerTestCase(TestCase):
 
         self.tracker = PartialStateEventsTracker(self.mock_store)
 
-    def test_does_not_block_for_full_state_events(self):
+    def test_does_not_block_for_full_state_events(self) -> None:
         self._events_dict = {"event1": False, "event2": False}
 
         self.successResultOf(
@@ -50,7 +50,7 @@ class PartialStateEventsTrackerTestCase(TestCase):
             ["event1", "event2"]
         )
 
-    def test_blocks_for_partial_state_events(self):
+    def test_blocks_for_partial_state_events(self) -> None:
         self._events_dict = {"event1": True, "event2": False}
 
         d = ensureDeferred(self.tracker.await_full_state(["event1", "event2"]))
@@ -62,12 +62,12 @@ class PartialStateEventsTrackerTestCase(TestCase):
         self.tracker.notify_un_partial_stated("event1")
         self.successResultOf(d)
 
-    def test_un_partial_state_race(self):
+    def test_un_partial_state_race(self) -> None:
         # if the event is un-partial-stated between the initial check and the
         # registration of the listener, it should not block.
         self._events_dict = {"event1": True, "event2": False}
 
-        async def get_partial_state_events(events):
+        async def get_partial_state_events(events: Collection[str]) -> Dict[str, bool]:
             res = {e: self._events_dict[e] for e in events}
             # change the result for next time
             self._events_dict = {"event1": False, "event2": False}
@@ -79,19 +79,19 @@ class PartialStateEventsTrackerTestCase(TestCase):
             ensureDeferred(self.tracker.await_full_state(["event1", "event2"]))
         )
 
-    def test_un_partial_state_during_get_partial_state_events(self):
+    def test_un_partial_state_during_get_partial_state_events(self) -> None:
         # we should correctly handle a call to notify_un_partial_stated during the
         # second call to get_partial_state_events.
 
         self._events_dict = {"event1": True, "event2": False}
 
-        async def get_partial_state_events1(events):
+        async def get_partial_state_events1(events: Collection[str]) -> Dict[str, bool]:
             self.mock_store.get_partial_state_events.side_effect = (
                 get_partial_state_events2
             )
             return {e: self._events_dict[e] for e in events}
 
-        async def get_partial_state_events2(events):
+        async def get_partial_state_events2(events: Collection[str]) -> Dict[str, bool]:
             self.tracker.notify_un_partial_stated("event1")
             self._events_dict["event1"] = False
             return {e: self._events_dict[e] for e in events}
@@ -102,7 +102,7 @@ class PartialStateEventsTrackerTestCase(TestCase):
             ensureDeferred(self.tracker.await_full_state(["event1", "event2"]))
         )
 
-    def test_cancellation(self):
+    def test_cancellation(self) -> None:
         self._events_dict = {"event1": True, "event2": False}
 
         d1 = ensureDeferred(self.tracker.await_full_state(["event1", "event2"]))
@@ -127,12 +127,12 @@ class PartialCurrentStateTrackerTestCase(TestCase):
 
         self.tracker = PartialCurrentStateTracker(self.mock_store)
 
-    def test_does_not_block_for_full_state_rooms(self):
+    def test_does_not_block_for_full_state_rooms(self) -> None:
         self.mock_store.is_partial_state_room.return_value = make_awaitable(False)
 
         self.successResultOf(ensureDeferred(self.tracker.await_full_state("room_id")))
 
-    def test_blocks_for_partial_room_state(self):
+    def test_blocks_for_partial_room_state(self) -> None:
         self.mock_store.is_partial_state_room.return_value = make_awaitable(True)
 
         d = ensureDeferred(self.tracker.await_full_state("room_id"))
@@ -144,10 +144,10 @@ class PartialCurrentStateTrackerTestCase(TestCase):
         self.tracker.notify_un_partial_stated("room_id")
         self.successResultOf(d)
 
-    def test_un_partial_state_race(self):
+    def test_un_partial_state_race(self) -> None:
         # We should correctly handle race between awaiting the state and us
         # un-partialling the state
-        async def is_partial_state_room(events):
+        async def is_partial_state_room(room_id: str) -> bool:
             self.tracker.notify_un_partial_stated("room_id")
             return True
 
@@ -155,7 +155,7 @@ class PartialCurrentStateTrackerTestCase(TestCase):
 
         self.successResultOf(ensureDeferred(self.tracker.await_full_state("room_id")))
 
-    def test_cancellation(self):
+    def test_cancellation(self) -> None:
         self.mock_store.is_partial_state_room.return_value = make_awaitable(True)
 
         d1 = ensureDeferred(self.tracker.await_full_state("room_id"))
-- 
cgit 1.5.1


From 373c485d8c7f39206bac60c6ef313b4a1978bbc0 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 9 Dec 2022 23:02:11 +0000
Subject: Handle half-created indices in receipts index background update
 (#14650)

When Synapse is terminated while running the background update to create
the `receipts_graph` or `receipts_linearized` indexes, the indexes may
be successfully created (or marked as invalid on postgres) while the
background update remains unfinished. When Synapse next starts up, the
background update will fail because the index already exists, or exists
but is invalid on postgres.

Use the existing code to create indices in background updates, since it
handles these edge cases.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14650.bugfix                   |  2 ++
 synapse/storage/background_updates.py      | 55 +++++++++++++++++++++++++-----
 synapse/storage/databases/main/receipts.py | 51 +++++++--------------------
 3 files changed, 60 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/14650.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14650.bugfix b/changelog.d/14650.bugfix
new file mode 100644
index 0000000000..5e18641bf7
--- /dev/null
+++ b/changelog.d/14650.bugfix
@@ -0,0 +1,2 @@
+Fix a bug introduced in Synapse 1.72.0 where the background updates to add non-thread unique indexes on receipts would fail if they were previously interrupted.
+
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 2056ecb2c3..a99aea8926 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -544,6 +544,48 @@ class BackgroundUpdater:
                 The named index will be dropped upon completion of the new index.
         """
 
+        async def updater(progress: JsonDict, batch_size: int) -> int:
+            await self.create_index_in_background(
+                index_name=index_name,
+                table=table,
+                columns=columns,
+                where_clause=where_clause,
+                unique=unique,
+                psql_only=psql_only,
+                replaces_index=replaces_index,
+            )
+            await self._end_background_update(update_name)
+            return 1
+
+        self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
+            updater, oneshot=True
+        )
+
+    async def create_index_in_background(
+        self,
+        index_name: str,
+        table: str,
+        columns: Iterable[str],
+        where_clause: Optional[str] = None,
+        unique: bool = False,
+        psql_only: bool = False,
+        replaces_index: Optional[str] = None,
+    ) -> None:
+        """Add an index in the background.
+
+        Args:
+            update_name: update_name to register for
+            index_name: name of index to add
+            table: table to add index to
+            columns: columns/expressions to include in index
+            where_clause: A WHERE clause to specify a partial unique index.
+            unique: true to make a UNIQUE index
+            psql_only: true to only create this index on psql databases (useful
+                for virtual sqlite tables)
+            replaces_index: The name of an index that this index replaces.
+                The named index will be dropped upon completion of the new index.
+        """
+
         def create_index_psql(conn: Connection) -> None:
             conn.rollback()
             # postgres insists on autocommit for the index
@@ -618,16 +660,11 @@ class BackgroundUpdater:
         else:
             runner = create_index_sqlite
 
-        async def updater(progress: JsonDict, batch_size: int) -> int:
-            if runner is not None:
-                logger.info("Adding index %s to %s", index_name, table)
-                await self.db_pool.runWithConnection(runner)
-            await self._end_background_update(update_name)
-            return 1
+        if runner is None:
+            return
 
-        self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
-            updater, oneshot=True
-        )
+        logger.info("Adding index %s to %s", index_name, table)
+        await self.db_pool.runWithConnection(runner)
 
     async def _end_background_update(self, update_name: str) -> None:
         """Removes a completed background update task from the queue.
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index a580e4bdda..e06725f69c 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -924,39 +924,6 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
 
         return batch_size
 
-    async def _create_receipts_index(self, index_name: str, table: str) -> None:
-        """Adds a unique index on `(room_id, receipt_type, user_id)` to the given
-        receipts table, for non-thread receipts."""
-
-        def _create_index(conn: LoggingDatabaseConnection) -> None:
-            conn.rollback()
-
-            # we have to set autocommit, because postgres refuses to
-            # CREATE INDEX CONCURRENTLY without it.
-            if isinstance(self.database_engine, PostgresEngine):
-                conn.set_session(autocommit=True)
-
-            try:
-                c = conn.cursor()
-
-                # Now that the duplicates are gone, we can create the index.
-                concurrently = (
-                    "CONCURRENTLY"
-                    if isinstance(self.database_engine, PostgresEngine)
-                    else ""
-                )
-                sql = f"""
-                    CREATE UNIQUE INDEX {concurrently} {index_name}
-                    ON {table}(room_id, receipt_type, user_id)
-                    WHERE thread_id IS NULL
-                """
-                c.execute(sql)
-            finally:
-                if isinstance(self.database_engine, PostgresEngine):
-                    conn.set_session(autocommit=False)
-
-        await self.db_pool.runWithConnection(_create_index)
-
     async def _background_receipts_linearized_unique_index(
         self, progress: dict, batch_size: int
     ) -> int:
@@ -999,9 +966,12 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
             _remote_duplicate_receipts_txn,
         )
 
-        await self._create_receipts_index(
-            "receipts_linearized_unique_index",
-            "receipts_linearized",
+        await self.db_pool.updates.create_index_in_background(
+            index_name="receipts_linearized_unique_index",
+            table="receipts_linearized",
+            columns=["room_id", "receipt_type", "user_id"],
+            where_clause="thread_id IS NULL",
+            unique=True,
         )
 
         await self.db_pool.updates._end_background_update(
@@ -1050,9 +1020,12 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
             _remote_duplicate_receipts_txn,
         )
 
-        await self._create_receipts_index(
-            "receipts_graph_unique_index",
-            "receipts_graph",
+        await self.db_pool.updates.create_index_in_background(
+            index_name="receipts_graph_unique_index",
+            table="receipts_graph",
+            columns=["room_id", "receipt_type", "user_id"],
+            where_clause="thread_id IS NULL",
+            unique=True,
         )
 
         await self.db_pool.updates._end_background_update(
-- 
cgit 1.5.1


From 2a3cd59dd06411a79fb7500970db1b98f0d87695 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Mon, 12 Dec 2022 13:21:17 +0100
Subject: Add optional ICU support for user search (#14464)

Fixes #13655

This change uses ICU (International Components for Unicode) to improve boundary detection in user search.

This change also adds a new dependency on libicu-dev and pkg-config for the Debian packages, which are available in all supported distros.
---
 changelog.d/14464.feature                        |  1 +
 debian/changelog                                 |  7 +++
 debian/control                                   |  2 +
 docker/Dockerfile                                |  2 +
 docker/Dockerfile-dhvirtualenv                   |  2 +
 poetry.lock                                      | 16 +++++-
 pyproject.toml                                   |  7 +++
 stubs/icu.pyi                                    | 25 +++++++++
 synapse/storage/databases/main/user_directory.py | 67 ++++++++++++++++++++++--
 tests/storage/test_user_directory.py             | 43 +++++++++++++++
 10 files changed, 166 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14464.feature
 create mode 100644 stubs/icu.pyi

(limited to 'synapse')

diff --git a/changelog.d/14464.feature b/changelog.d/14464.feature
new file mode 100644
index 0000000000..688ea32117
--- /dev/null
+++ b/changelog.d/14464.feature
@@ -0,0 +1 @@
+Improve user search for international display names.
diff --git a/debian/changelog b/debian/changelog
index 163b7210bf..5d3c4f7d6b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+matrix-synapse-py3 (1.74.0~rc1) UNRELEASED; urgency=medium
+
+  * New dependency on libicu-dev to provide improved results for user
+    search.
+
+ -- Synapse Packaging team <packages@matrix.org>  Tue, 06 Dec 2022 15:28:10 +0000
+
 matrix-synapse-py3 (1.73.0) stable; urgency=medium
 
   * New Synapse release 1.73.0.
diff --git a/debian/control b/debian/control
index 86f5a66d02..bc628cec08 100644
--- a/debian/control
+++ b/debian/control
@@ -8,6 +8,8 @@ Build-Depends:
  dh-virtualenv (>= 1.1),
  libsystemd-dev,
  libpq-dev,
+ libicu-dev,
+ pkg-config,
  lsb-release,
  python3-dev,
  python3,
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 185d5bc3d4..7e5123210a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -97,6 +97,8 @@ RUN \
     zlib1g-dev \
     git \
     curl \
+    libicu-dev \
+    pkg-config \
     && rm -rf /var/lib/apt/lists/*
 
 
diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv
index 73165f6f85..f3b5b00ce6 100644
--- a/docker/Dockerfile-dhvirtualenv
+++ b/docker/Dockerfile-dhvirtualenv
@@ -84,6 +84,8 @@ RUN apt-get update -qq -o Acquire::Languages=none \
         python3-venv \
         sqlite3 \
         libpq-dev \
+        libicu-dev \
+        pkg-config \
         xmlsec1
 
 # Install rust and ensure it's in the PATH
diff --git a/poetry.lock b/poetry.lock
index cac22e2ef0..ccda8a23fb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -837,6 +837,14 @@ category = "dev"
 optional = false
 python-versions = ">=3.5"
 
+[[package]]
+name = "pyicu"
+version = "2.10.2"
+description = "Python extension wrapping the ICU C++ API"
+category = "main"
+optional = true
+python-versions = "*"
+
 [[package]]
 name = "pyjwt"
 version = "2.4.0"
@@ -1622,7 +1630,7 @@ docs = ["Sphinx", "repoze.sphinx.autointerface"]
 test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"]
 
 [extras]
-all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler"]
+all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler", "pyicu"]
 cache-memory = ["Pympler"]
 jwt = ["authlib"]
 matrix-synapse-ldap3 = ["matrix-synapse-ldap3"]
@@ -1635,11 +1643,12 @@ sentry = ["sentry-sdk"]
 systemd = ["systemd-python"]
 test = ["parameterized", "idna"]
 url-preview = ["lxml"]
+user-search = ["pyicu"]
 
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "8c44ceeb9df5c3ab43040400e0a6b895de49417e61293a1ba027640b34f03263"
+content-hash = "f20007013f33bc35a01e412c48adc62a936030f3074e06286674c5ad7f44d300"
 
 [metadata.files]
 attrs = [
@@ -2427,6 +2436,9 @@ pygments = [
     {file = "Pygments-2.11.2-py3-none-any.whl", hash = "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65"},
     {file = "Pygments-2.11.2.tar.gz", hash = "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"},
 ]
+pyicu = [
+    {file = "PyICU-2.10.2.tar.gz", hash = "sha256:0c3309eea7fab6857507ace62403515b60fe096cbfb4f90d14f55ff75c5441c1"},
+]
 pyjwt = [
     {file = "PyJWT-2.4.0-py3-none-any.whl", hash = "sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf"},
     {file = "PyJWT-2.4.0.tar.gz", hash = "sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"},
diff --git a/pyproject.toml b/pyproject.toml
index df59fa0562..bb383683cc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -208,6 +208,7 @@ hiredis = { version = "*", optional = true }
 Pympler = { version = "*", optional = true }
 parameterized = { version = ">=0.7.4", optional = true }
 idna = { version = ">=2.5", optional = true }
+pyicu = { version = ">=2.10.2", optional = true }
 
 [tool.poetry.extras]
 # NB: Packages that should be part of `pip install matrix-synapse[all]` need to be specified
@@ -230,6 +231,10 @@ redis = ["txredisapi", "hiredis"]
 # Required to use experimental `caches.track_memory_usage` config option.
 cache-memory = ["pympler"]
 test = ["parameterized", "idna"]
+# Allows for better search for international characters in the user directory. This
+# requires libicu's development headers installed on the system (e.g. libicu-dev on
+# Debian-based distributions).
+user-search = ["pyicu"]
 
 # The duplication here is awful. I hate hate hate hate hate it. However, for now I want
 # to ensure you can still `pip install matrix-synapse[all]` like today. Two motivations:
@@ -261,6 +266,8 @@ all = [
     "txredisapi", "hiredis",
     # cache-memory
     "pympler",
+    # improved user search
+    "pyicu",
     # omitted:
     #   - test: it's useful to have this separate from dev deps in the olddeps job
     #   - systemd: this is a system-based requirement
diff --git a/stubs/icu.pyi b/stubs/icu.pyi
new file mode 100644
index 0000000000..efeda7938a
--- /dev/null
+++ b/stubs/icu.pyi
@@ -0,0 +1,25 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stub for PyICU.
+
+class Locale:
+    @staticmethod
+    def getDefault() -> Locale: ...
+
+class BreakIterator:
+    @staticmethod
+    def createWordInstance(locale: Locale) -> BreakIterator: ...
+    def setText(self, text: str) -> None: ...
+    def nextBoundary(self) -> int: ...
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index af9952f513..14ef5b040d 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -26,6 +26,14 @@ from typing import (
     cast,
 )
 
+try:
+    # Figure out if ICU support is available for searching users.
+    import icu
+
+    USE_ICU = True
+except ModuleNotFoundError:
+    USE_ICU = False
+
 from typing_extensions import TypedDict
 
 from synapse.api.errors import StoreError
@@ -900,7 +908,7 @@ def _parse_query_sqlite(search_term: str) -> str:
     """
 
     # Pull out the individual words, discarding any non-word characters.
-    results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+    results = _parse_words(search_term)
     return " & ".join("(%s* OR %s)" % (result, result) for result in results)
 
 
@@ -910,12 +918,63 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]:
     We use this so that we can add prefix matching, which isn't something
     that is supported by default.
     """
-
-    # Pull out the individual words, discarding any non-word characters.
-    results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+    results = _parse_words(search_term)
 
     both = " & ".join("(%s:* | %s)" % (result, result) for result in results)
     exact = " & ".join("%s" % (result,) for result in results)
     prefix = " & ".join("%s:*" % (result,) for result in results)
 
     return both, exact, prefix
+
+
+def _parse_words(search_term: str) -> List[str]:
+    """Split the provided search string into a list of its words.
+
+    If support for ICU (International Components for Unicode) is available, use it.
+    Otherwise, fall back to using a regex to detect word boundaries. This latter
+    solution works well enough for most latin-based languages, but doesn't work as well
+    with other languages.
+
+    Args:
+        search_term: The search string.
+
+    Returns:
+        A list of the words in the search string.
+    """
+    if USE_ICU:
+        return _parse_words_with_icu(search_term)
+
+    return re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+
+
+def _parse_words_with_icu(search_term: str) -> List[str]:
+    """Break down the provided search string into its individual words using ICU
+    (International Components for Unicode).
+
+    Args:
+        search_term: The search string.
+
+    Returns:
+        A list of the words in the search string.
+    """
+    results = []
+    breaker = icu.BreakIterator.createWordInstance(icu.Locale.getDefault())
+    breaker.setText(search_term)
+    i = 0
+    while True:
+        j = breaker.nextBoundary()
+        if j < 0:
+            break
+
+        result = search_term[i:j]
+
+        # libicu considers spaces and punctuation between words as words, but we don't
+        # want to include those in results as they would result in syntax errors in SQL
+        # queries (e.g. "foo bar" would result in the search query including "foo &  &
+        # bar").
+        if len(re.findall(r"([\w\-]+)", result, re.UNICODE)):
+            results.append(result)
+
+        i = j
+
+    return results
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py
index 88c7d5fec0..3ba896ecf3 100644
--- a/tests/storage/test_user_directory.py
+++ b/tests/storage/test_user_directory.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
 from typing import Any, Dict, Set, Tuple
 from unittest import mock
 from unittest.mock import Mock, patch
@@ -30,6 +31,12 @@ from synapse.util import Clock
 from tests.test_utils.event_injection import inject_member_event
 from tests.unittest import HomeserverTestCase, override_config
 
+try:
+    import icu
+except ImportError:
+    icu = None  # type: ignore
+
+
 ALICE = "@alice:a"
 BOB = "@bob:b"
 BOBBY = "@bobby:a"
@@ -467,3 +474,39 @@ class UserDirectoryStoreTestCase(HomeserverTestCase):
             r["results"][0],
             {"user_id": BELA, "display_name": "Bela", "avatar_url": None},
         )
+
+
+class UserDirectoryICUTestCase(HomeserverTestCase):
+    if not icu:
+        skip = "Requires PyICU"
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+        self.user_dir_helper = GetUserDirectoryTables(self.store)
+
+    def test_icu_word_boundary(self) -> None:
+        """Tests that we correctly detect word boundaries when ICU (International
+        Components for Unicode) support is available.
+        """
+
+        display_name = "Gáo"
+
+        # This word is not broken down correctly by Python's regular expressions,
+        # likely because á is actually a lowercase a followed by a U+0301 combining
+        # acute accent. This is specifically something that ICU support fixes.
+        matches = re.findall(r"([\w\-]+)", display_name, re.UNICODE)
+        self.assertEqual(len(matches), 2)
+
+        self.get_success(
+            self.store.update_profile_in_user_dir(ALICE, display_name, None)
+        )
+        self.get_success(self.store.add_users_in_public_rooms("!room:id", (ALICE,)))
+
+        # Check that searching for this user yields the correct result.
+        r = self.get_success(self.store.search_user_dir(BOB, display_name, 10))
+        self.assertFalse(r["limited"])
+        self.assertEqual(len(r["results"]), 1)
+        self.assertDictEqual(
+            r["results"][0],
+            {"user_id": ALICE, "display_name": display_name, "avatar_url": None},
+        )
-- 
cgit 1.5.1


From 74b89c27613a34ec9b291ad3066db7ce0adff1db Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 12 Dec 2022 13:55:23 +0000
Subject: Revert the deletion of stale devices due to performance issues.
 (#14662)

---
 changelog.d/14595.misc                    |  1 -
 changelog.d/14649.misc                    |  1 -
 changelog.d/14662.removal                 |  1 +
 synapse/handlers/device.py                | 33 +-----------
 synapse/storage/databases/main/devices.py | 84 +------------------------------
 tests/handlers/test_device.py             | 33 +-----------
 tests/storage/test_client_ips.py          |  4 +-
 7 files changed, 5 insertions(+), 152 deletions(-)
 delete mode 100644 changelog.d/14595.misc
 delete mode 100644 changelog.d/14649.misc
 create mode 100644 changelog.d/14662.removal

(limited to 'synapse')

diff --git a/changelog.d/14595.misc b/changelog.d/14595.misc
deleted file mode 100644
index f9bfc581ad..0000000000
--- a/changelog.d/14595.misc
+++ /dev/null
@@ -1 +0,0 @@
-Prune user's old devices on login if they have too many.
diff --git a/changelog.d/14649.misc b/changelog.d/14649.misc
deleted file mode 100644
index f9bfc581ad..0000000000
--- a/changelog.d/14649.misc
+++ /dev/null
@@ -1 +0,0 @@
-Prune user's old devices on login if they have too many.
diff --git a/changelog.d/14662.removal b/changelog.d/14662.removal
new file mode 100644
index 0000000000..19a387bbb4
--- /dev/null
+++ b/changelog.d/14662.removal
@@ -0,0 +1 @@
+(remove from changelog: unreleased) Revert the deletion of stale devices due to performance issues.
\ No newline at end of file
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index c935c7be90..d4750a32e6 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -52,7 +52,6 @@ from synapse.util import stringutils
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.cancellation import cancellable
-from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import measure_func
 from synapse.util.retryutils import NotRetryingDestination
 
@@ -422,9 +421,6 @@ class DeviceHandler(DeviceWorkerHandler):
 
         self._check_device_name_length(initial_device_display_name)
 
-        # Prune the user's device list if they already have a lot of devices.
-        await self._prune_too_many_devices(user_id)
-
         if device_id is not None:
             new_device = await self.store.store_device(
                 user_id=user_id,
@@ -456,33 +452,6 @@ class DeviceHandler(DeviceWorkerHandler):
 
         raise errors.StoreError(500, "Couldn't generate a device ID.")
 
-    async def _prune_too_many_devices(self, user_id: str) -> None:
-        """Delete any excess old devices this user may have."""
-        device_ids = await self.store.check_too_many_devices_for_user(user_id, 100)
-        if not device_ids:
-            return
-
-        logger.info("Pruning %d old devices for user %s", len(device_ids), user_id)
-
-        # We don't want to block and try and delete tonnes of devices at once,
-        # so we cap the number of devices we delete synchronously.
-        first_batch, remaining_device_ids = device_ids[:10], device_ids[10:]
-        await self.delete_devices(user_id, first_batch)
-
-        if not remaining_device_ids:
-            return
-
-        # Now spawn a background loop that deletes the rest.
-        async def _prune_too_many_devices_loop() -> None:
-            for batch in batch_iter(remaining_device_ids, 10):
-                await self.delete_devices(user_id, batch)
-
-                await self.clock.sleep(1)
-
-        run_as_background_process(
-            "_prune_too_many_devices_loop", _prune_too_many_devices_loop
-        )
-
     async def _delete_stale_devices(self) -> None:
         """Background task that deletes devices which haven't been accessed for more than
         a configured time period.
@@ -512,7 +481,7 @@ class DeviceHandler(DeviceWorkerHandler):
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 95d4c0622d..a5bb4d404e 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1569,77 +1569,6 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         return rows
 
-    async def check_too_many_devices_for_user(
-        self, user_id: str, limit: int
-    ) -> List[str]:
-        """Check if the user has a lot of devices, and if so return the set of
-        devices we can prune.
-
-        This does *not* return hidden devices or devices with E2E keys.
-
-        Returns at most `limit` number of devices, ordered by last seen.
-        """
-
-        num_devices = await self.db_pool.simple_select_one_onecol(
-            table="devices",
-            keyvalues={"user_id": user_id, "hidden": False},
-            retcol="COALESCE(COUNT(*), 0)",
-            desc="count_devices",
-        )
-
-        # We let users have up to ten devices without pruning.
-        if num_devices <= 10:
-            return []
-
-        # We prune everything older than N days.
-        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
-
-        if num_devices > 50:
-            # If the user has more than 50 devices, then we chose a last seen
-            # that ensures we keep at most 50 devices.
-            sql = """
-                SELECT last_seen FROM devices
-                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-                WHERE
-                    user_id = ?
-                    AND NOT hidden
-                    AND last_seen IS NOT NULL
-                    AND key_json IS NULL
-                ORDER BY last_seen DESC
-                LIMIT 1
-                OFFSET 50
-            """
-
-            rows = await self.db_pool.execute(
-                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
-            )
-            if rows:
-                max_last_seen = max(rows[0][0], max_last_seen)
-
-        # Now fetch the devices to delete.
-        sql = """
-            SELECT device_id FROM devices
-            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-            WHERE
-                user_id = ?
-                AND NOT hidden
-                AND last_seen < ?
-                AND key_json IS NULL
-            ORDER BY last_seen
-            LIMIT ?
-        """
-
-        def check_too_many_devices_for_user_txn(
-            txn: LoggingTransaction,
-        ) -> List[str]:
-            txn.execute(sql, (user_id, max_last_seen, limit))
-            return [device_id for device_id, in txn]
-
-        return await self.db_pool.runInteraction(
-            "check_too_many_devices_for_user",
-            check_too_many_devices_for_user_txn,
-        )
-
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1698,7 +1627,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
-                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1744,15 +1672,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             )
             raise StoreError(500, "Problem storing device.")
 
-    @cached(max_entries=0)
-    async def delete_device(self, user_id: str, device_id: str) -> None:
-        raise NotImplementedError()
-
-    # Note: sometimes deleting rows out of `device_inbox` can take a long time,
-    # so we use a cache so that we deduplicate in flight requests to delete
-    # devices.
-    @cachedList(cached_method_name="delete_device", list_name="device_ids")
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Deletes several devices.
 
         Args:
@@ -1789,8 +1709,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
-        return {}
-
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index e51cac9b33..ce7525e29c 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -20,8 +20,6 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
-from synapse.rest import admin
-from synapse.rest.client import account, login
 from synapse.server import HomeServer
 from synapse.util import Clock
 
@@ -32,12 +30,6 @@ user2 = "@theresa:bbb"
 
 
 class DeviceTestCase(unittest.HomeserverTestCase):
-    servlets = [
-        login.register_servlets,
-        admin.register_servlets,
-        account.register_servlets,
-    ]
-
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver("server", federation_http_client=None)
         handler = hs.get_device_handler()
@@ -123,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": 1000000,
+                "last_seen_ts": None,
             },
             device_map["xyz"],
         )
@@ -237,29 +229,6 @@ class DeviceTestCase(unittest.HomeserverTestCase):
             NotFoundError,
         )
 
-    def test_login_delete_old_devices(self) -> None:
-        """Delete old devices if the user already has too many."""
-
-        user_id = self.register_user("user", "pass")
-
-        # Create a bunch of devices
-        for _ in range(50):
-            self.login("user", "pass")
-            self.reactor.advance(1)
-
-        # Advance the clock for ages (as we only delete old devices)
-        self.reactor.advance(60 * 60 * 24 * 300)
-
-        # Log in again to start the pruning
-        self.login("user", "pass")
-
-        # Give the background job time to do its thing
-        self.reactor.pump([1.0] * 100)
-
-        # We should now only have the most recent device.
-        devices = self.get_success(self.handler.get_devices_by_user(user_id))
-        self.assertEqual(len(devices), 1)
-
     def _record_users(self) -> None:
         # check this works for both devices which have a recorded client_ip,
         # and those which don't.
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 81e4e596e4..7f7f4ef892 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -170,8 +170,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        last_seen = self.clock.time_msec()
-
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -192,7 +190,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": last_seen,
+                        "last_seen": None,
                     },
                 ],
             )
-- 
cgit 1.5.1


From b5b5f6608462a988b05502a3b70b6a57ca3846d2 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 12 Dec 2022 16:19:30 +0000
Subject: Move `StateFilter` to `synapse.types` (#14668)

* Move `StateFilter` to `synapse.types`

* Changelog
---
 changelog.d/14668.misc                        |   1 +
 synapse/events/builder.py                     |   2 +-
 synapse/events/snapshot.py                    |   2 +-
 synapse/handlers/federation.py                |   2 +-
 synapse/handlers/federation_event.py          |   2 +-
 synapse/handlers/message.py                   |   2 +-
 synapse/handlers/pagination.py                |   2 +-
 synapse/handlers/register.py                  |   2 +-
 synapse/handlers/room.py                      |   2 +-
 synapse/handlers/room_member.py               |   2 +-
 synapse/handlers/search.py                    |   2 +-
 synapse/handlers/sync.py                      |   2 +-
 synapse/module_api/__init__.py                |   2 +-
 synapse/push/bulk_push_rule_evaluator.py      |   2 +-
 synapse/push/mailer.py                        |   2 +-
 synapse/rest/admin/rooms.py                   |   2 +-
 synapse/rest/client/room.py                   |   2 +-
 synapse/state/__init__.py                     |   2 +-
 synapse/storage/controllers/persist_events.py |   2 +-
 synapse/storage/controllers/state.py          |   2 +-
 synapse/storage/databases/main/state.py       |   2 +-
 synapse/storage/databases/state/bg_updates.py |   2 +-
 synapse/storage/databases/state/store.py      |   2 +-
 synapse/storage/state.py                      | 567 ----------------
 synapse/types.py                              | 928 --------------------------
 synapse/types/__init__.py                     | 928 ++++++++++++++++++++++++++
 synapse/types/state.py                        | 567 ++++++++++++++++
 synapse/visibility.py                         |   2 +-
 tests/storage/test_state.py                   |   2 +-
 29 files changed, 1520 insertions(+), 1519 deletions(-)
 create mode 100644 changelog.d/14668.misc
 delete mode 100644 synapse/storage/state.py
 delete mode 100644 synapse/types.py
 create mode 100644 synapse/types/__init__.py
 create mode 100644 synapse/types/state.py

(limited to 'synapse')

diff --git a/changelog.d/14668.misc b/changelog.d/14668.misc
new file mode 100644
index 0000000000..5269d8a97d
--- /dev/null
+++ b/changelog.d/14668.misc
@@ -0,0 +1 @@
+Move `StateFilter` to `synapse.types`.
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index d62906043f..94dd1298e1 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -28,8 +28,8 @@ from synapse.event_auth import auth_types_for_event
 from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict
 from synapse.state import StateHandler
 from synapse.storage.databases.main import DataStore
-from synapse.storage.state import StateFilter
 from synapse.types import EventID, JsonDict
+from synapse.types.state import StateFilter
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index 1c0e96bec7..6eaef8b57a 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -23,7 +23,7 @@ from synapse.types import JsonDict, StateMap
 if TYPE_CHECKING:
     from synapse.storage.controllers import StorageControllers
     from synapse.storage.databases.main import DataStore
-    from synapse.storage.state import StateFilter
+    from synapse.types.state import StateFilter
 
 
 @attr.s(slots=True, auto_attribs=True)
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 3398fcaf7d..b2784d7333 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -70,8 +70,8 @@ from synapse.replication.http.federation import (
 )
 from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.state import StateFilter
 from synapse.types import JsonDict, get_domain_from_id
+from synapse.types.state import StateFilter
 from synapse.util.async_helpers import Linearizer
 from synapse.util.retryutils import NotRetryingDestination
 from synapse.visibility import filter_events_for_server
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index f7223b03c3..d2facdab60 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -75,7 +75,6 @@ from synapse.replication.http.federation import (
 from synapse.state import StateResolutionStore
 from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.state import StateFilter
 from synapse.types import (
     PersistedEventPosition,
     RoomStreamToken,
@@ -83,6 +82,7 @@ from synapse.types import (
     UserID,
     get_domain_from_id,
 )
+from synapse.types.state import StateFilter
 from synapse.util.async_helpers import Linearizer, concurrently_execute
 from synapse.util.iterutils import batch_iter
 from synapse.util.retryutils import NotRetryingDestination
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 5cbe89f4fd..d6e90ef259 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -59,7 +59,6 @@ from synapse.replication.http.send_event import ReplicationSendEventRestServlet
 from synapse.replication.http.send_events import ReplicationSendEventsRestServlet
 from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.state import StateFilter
 from synapse.types import (
     MutableStateMap,
     PersistedEventPosition,
@@ -70,6 +69,7 @@ from synapse.types import (
     UserID,
     create_requester,
 )
+from synapse.types.state import StateFilter
 from synapse.util import json_decoder, json_encoder, log_failure, unwrapFirstError
 from synapse.util.async_helpers import Linearizer, gather_results
 from synapse.util.caches.expiringcache import ExpiringCache
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index c572508a02..8c8ff18a1a 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -27,9 +27,9 @@ from synapse.handlers.room import ShutdownRoomResponse
 from synapse.logging.opentracing import trace
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.rest.admin._base import assert_user_is_admin
-from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, Requester, StreamKeyType
+from synapse.types.state import StateFilter
 from synapse.util.async_helpers import ReadWriteLock
 from synapse.util.stringutils import random_string
 from synapse.visibility import filter_events_for_client
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 6307fa9c5d..c611efb760 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -46,8 +46,8 @@ from synapse.replication.http.register import (
     ReplicationRegisterServlet,
 )
 from synapse.spam_checker_api import RegistrationBehaviour
-from synapse.storage.state import StateFilter
 from synapse.types import RoomAlias, UserID, create_requester
+from synapse.types.state import StateFilter
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 6dcfd86fdf..f81241c2b3 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -62,7 +62,6 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents
 from synapse.handlers.relations import BundledAggregations
 from synapse.module_api import NOT_SPAM
 from synapse.rest.admin._base import assert_user_is_admin
-from synapse.storage.state import StateFilter
 from synapse.streams import EventSource
 from synapse.types import (
     JsonDict,
@@ -77,6 +76,7 @@ from synapse.types import (
     UserID,
     create_requester,
 )
+from synapse.types.state import StateFilter
 from synapse.util import stringutils
 from synapse.util.caches.response_cache import ResponseCache
 from synapse.util.stringutils import parse_and_validate_server_name
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 6ad2b38b8f..0c39e852a1 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -34,7 +34,6 @@ from synapse.events.snapshot import EventContext
 from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN
 from synapse.logging import opentracing
 from synapse.module_api import NOT_SPAM
-from synapse.storage.state import StateFilter
 from synapse.types import (
     JsonDict,
     Requester,
@@ -45,6 +44,7 @@ from synapse.types import (
     create_requester,
     get_domain_from_id,
 )
+from synapse.types.state import StateFilter
 from synapse.util.async_helpers import Linearizer
 from synapse.util.distributor import user_left_room
 
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index bcab98c6d5..33115ce488 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -23,8 +23,8 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.api.filtering import Filter
 from synapse.events import EventBase
-from synapse.storage.state import StateFilter
 from synapse.types import JsonDict, StreamKeyType, UserID
+from synapse.types.state import StateFilter
 from synapse.visibility import filter_events_for_client
 
 if TYPE_CHECKING:
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index dace9b606f..7d6a653747 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -49,7 +49,6 @@ from synapse.push.clientformat import format_push_rules_for_user
 from synapse.storage.databases.main.event_push_actions import RoomNotifCounts
 from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
 from synapse.storage.roommember import MemberSummary
-from synapse.storage.state import StateFilter
 from synapse.types import (
     DeviceListUpdates,
     JsonDict,
@@ -61,6 +60,7 @@ from synapse.types import (
     StreamToken,
     UserID,
 )
+from synapse.types.state import StateFilter
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.caches.lrucache import LruCache
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 96a661177a..0092a03c59 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -111,7 +111,6 @@ from synapse.storage.background_updates import (
 )
 from synapse.storage.database import DatabasePool, LoggingTransaction
 from synapse.storage.databases.main.roommember import ProfileInfo
-from synapse.storage.state import StateFilter
 from synapse.types import (
     DomainSpecificString,
     JsonDict,
@@ -124,6 +123,7 @@ from synapse.types import (
     UserProfile,
     create_requester,
 )
+from synapse.types.state import StateFilter
 from synapse.util import Clock
 from synapse.util.async_helpers import maybe_awaitable
 from synapse.util.caches.descriptors import CachedFunction, cached
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 9ed35d8461..36e5b327ef 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -35,8 +35,8 @@ from synapse.events import EventBase, relation_from_event
 from synapse.events.snapshot import EventContext
 from synapse.state import POWER_KEY
 from synapse.storage.databases.main.roommember import EventIdMembership
-from synapse.storage.state import StateFilter
 from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator
+from synapse.types.state import StateFilter
 from synapse.util.caches import register_cache
 from synapse.util.metrics import measure_func
 from synapse.visibility import filter_event_for_clients_with_state
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index c2575ba3d9..93b255ced5 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -37,8 +37,8 @@ from synapse.push.push_types import (
     TemplateVars,
 )
 from synapse.storage.databases.main.event_push_actions import EmailPushAction
-from synapse.storage.state import StateFilter
 from synapse.types import StateMap, UserID
+from synapse.types.state import StateFilter
 from synapse.util.async_helpers import concurrently_execute
 from synapse.visibility import filter_events_for_client
 
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 747e6fda83..e957aa28ca 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -34,9 +34,9 @@ from synapse.rest.admin._base import (
     assert_user_is_admin,
 )
 from synapse.storage.databases.main.room import RoomSortOrder
-from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, RoomID, UserID, create_requester
+from synapse.types.state import StateFilter
 from synapse.util import json_decoder
 
 if TYPE_CHECKING:
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 514eb6afc8..790614d721 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -55,9 +55,9 @@ from synapse.logging.opentracing import set_tag
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.rest.client._base import client_patterns
 from synapse.rest.client.transactions import HttpTransactionCache
-from synapse.storage.state import StateFilter
 from synapse.streams.config import PaginationConfig
 from synapse.types import JsonDict, StreamToken, ThirdPartyInstanceID, UserID
+from synapse.types.state import StateFilter
 from synapse.util import json_decoder
 from synapse.util.cancellation import cancellable
 from synapse.util.stringutils import parse_and_validate_server_name, random_string
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 833ffec3de..ee5469d5a8 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -44,8 +44,8 @@ from synapse.logging.context import ContextResourceUsage
 from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet
 from synapse.state import v1, v2
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.state import StateFilter
 from synapse.types import StateMap
+from synapse.types.state import StateFilter
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.metrics import Measure, measure_func
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 33ffef521b..f1d2c71c91 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -58,13 +58,13 @@ from synapse.storage.controllers.state import StateStorageController
 from synapse.storage.databases import Databases
 from synapse.storage.databases.main.events import DeltaState
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.storage.state import StateFilter
 from synapse.types import (
     PersistedEventPosition,
     RoomStreamToken,
     StateMap,
     get_domain_from_id,
 )
+from synapse.types.state import StateFilter
 from synapse.util.async_helpers import ObservableDeferred, yieldable_gather_results
 from synapse.util.metrics import Measure
 
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 2b31ce54bb..26d79c6e62 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -31,12 +31,12 @@ from synapse.api.constants import EventTypes
 from synapse.events import EventBase
 from synapse.logging.opentracing import tag_args, trace
 from synapse.storage.roommember import ProfileInfo
-from synapse.storage.state import StateFilter
 from synapse.storage.util.partial_state_events_tracker import (
     PartialCurrentStateTracker,
     PartialStateEventsTracker,
 )
 from synapse.types import MutableStateMap, StateMap
+from synapse.types.state import StateFilter
 from synapse.util.cancellation import cancellable
 
 if TYPE_CHECKING:
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index af7bebee80..c801a93b5b 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -33,8 +33,8 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
-from synapse.storage.state import StateFilter
 from synapse.types import JsonDict, JsonMapping, StateMap
+from synapse.types.state import StateFilter
 from synapse.util.caches import intern_string
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.cancellation import cancellable
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
index 4a4ad0f492..d743282f13 100644
--- a/synapse/storage/databases/state/bg_updates.py
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -22,8 +22,8 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.engines import PostgresEngine
-from synapse.storage.state import StateFilter
 from synapse.types import MutableStateMap, StateMap
+from synapse.types.state import StateFilter
 from synapse.util.caches import intern_string
 
 if TYPE_CHECKING:
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index f8cfcaca83..1a7232b276 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -25,10 +25,10 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.databases.state.bg_updates import StateBackgroundUpdateStore
-from synapse.storage.state import StateFilter
 from synapse.storage.types import Cursor
 from synapse.storage.util.sequence import build_sequence_generator
 from synapse.types import MutableStateMap, StateKey, StateMap
+from synapse.types.state import StateFilter
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.dictionary_cache import DictionaryCache
 from synapse.util.cancellation import cancellable
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
deleted file mode 100644
index 0004d955b4..0000000000
--- a/synapse/storage/state.py
+++ /dev/null
@@ -1,567 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2022 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from typing import (
-    TYPE_CHECKING,
-    Callable,
-    Collection,
-    Dict,
-    Iterable,
-    List,
-    Mapping,
-    Optional,
-    Set,
-    Tuple,
-    TypeVar,
-)
-
-import attr
-from frozendict import frozendict
-
-from synapse.api.constants import EventTypes
-from synapse.types import MutableStateMap, StateKey, StateMap
-
-if TYPE_CHECKING:
-    from typing import FrozenSet  # noqa: used within quoted type hint; flake8 sad
-
-
-logger = logging.getLogger(__name__)
-
-# Used for generic functions below
-T = TypeVar("T")
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class StateFilter:
-    """A filter used when querying for state.
-
-    Attributes:
-        types: Map from type to set of state keys (or None). This specifies
-            which state_keys for the given type to fetch from the DB. If None
-            then all events with that type are fetched. If the set is empty
-            then no events with that type are fetched.
-        include_others: Whether to fetch events with types that do not
-            appear in `types`.
-    """
-
-    types: "frozendict[str, Optional[FrozenSet[str]]]"
-    include_others: bool = False
-
-    def __attrs_post_init__(self) -> None:
-        # If `include_others` is set we canonicalise the filter by removing
-        # wildcards from the types dictionary
-        if self.include_others:
-            # this is needed to work around the fact that StateFilter is frozen
-            object.__setattr__(
-                self,
-                "types",
-                frozendict({k: v for k, v in self.types.items() if v is not None}),
-            )
-
-    @staticmethod
-    def all() -> "StateFilter":
-        """Returns a filter that fetches everything.
-
-        Returns:
-            The state filter.
-        """
-        return _ALL_STATE_FILTER
-
-    @staticmethod
-    def none() -> "StateFilter":
-        """Returns a filter that fetches nothing.
-
-        Returns:
-            The new state filter.
-        """
-        return _NONE_STATE_FILTER
-
-    @staticmethod
-    def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter":
-        """Creates a filter that only fetches the given types
-
-        Args:
-            types: A list of type and state keys to fetch. A state_key of None
-                fetches everything for that type
-
-        Returns:
-            The new state filter.
-        """
-        type_dict: Dict[str, Optional[Set[str]]] = {}
-        for typ, s in types:
-            if typ in type_dict:
-                if type_dict[typ] is None:
-                    continue
-
-            if s is None:
-                type_dict[typ] = None
-                continue
-
-            type_dict.setdefault(typ, set()).add(s)  # type: ignore
-
-        return StateFilter(
-            types=frozendict(
-                (k, frozenset(v) if v is not None else None)
-                for k, v in type_dict.items()
-            )
-        )
-
-    @staticmethod
-    def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter":
-        """Creates a filter that returns all non-member events, plus the member
-        events for the given users
-
-        Args:
-            members: Set of user IDs
-
-        Returns:
-            The new state filter
-        """
-        return StateFilter(
-            types=frozendict({EventTypes.Member: frozenset(members)}),
-            include_others=True,
-        )
-
-    @staticmethod
-    def freeze(
-        types: Mapping[str, Optional[Collection[str]]], include_others: bool
-    ) -> "StateFilter":
-        """
-        Returns a (frozen) StateFilter with the same contents as the parameters
-        specified here, which can be made of mutable types.
-        """
-        types_with_frozen_values: Dict[str, Optional[FrozenSet[str]]] = {}
-        for state_types, state_keys in types.items():
-            if state_keys is not None:
-                types_with_frozen_values[state_types] = frozenset(state_keys)
-            else:
-                types_with_frozen_values[state_types] = None
-
-        return StateFilter(
-            frozendict(types_with_frozen_values), include_others=include_others
-        )
-
-    def return_expanded(self) -> "StateFilter":
-        """Creates a new StateFilter where type wild cards have been removed
-        (except for memberships). The returned filter is a superset of the
-        current one, i.e. anything that passes the current filter will pass
-        the returned filter.
-
-        This helps the caching as the DictionaryCache knows if it has *all* the
-        state, but does not know if it has all of the keys of a particular type,
-        which makes wildcard lookups expensive unless we have a complete cache.
-        Hence, if we are doing a wildcard lookup, populate the cache fully so
-        that we can do an efficient lookup next time.
-
-        Note that since we have two caches, one for membership events and one for
-        other events, we can be a bit more clever than simply returning
-        `StateFilter.all()` if `has_wildcards()` is True.
-
-        We return a StateFilter where:
-            1. the list of membership events to return is the same
-            2. if there is a wildcard that matches non-member events we
-               return all non-member events
-
-        Returns:
-            The new state filter.
-        """
-
-        if self.is_full():
-            # If we're going to return everything then there's nothing to do
-            return self
-
-        if not self.has_wildcards():
-            # If there are no wild cards, there's nothing to do
-            return self
-
-        if EventTypes.Member in self.types:
-            get_all_members = self.types[EventTypes.Member] is None
-        else:
-            get_all_members = self.include_others
-
-        has_non_member_wildcard = self.include_others or any(
-            state_keys is None
-            for t, state_keys in self.types.items()
-            if t != EventTypes.Member
-        )
-
-        if not has_non_member_wildcard:
-            # If there are no non-member wild cards we can just return ourselves
-            return self
-
-        if get_all_members:
-            # We want to return everything.
-            return StateFilter.all()
-        elif EventTypes.Member in self.types:
-            # We want to return all non-members, but only particular
-            # memberships
-            return StateFilter(
-                types=frozendict({EventTypes.Member: self.types[EventTypes.Member]}),
-                include_others=True,
-            )
-        else:
-            # We want to return all non-members
-            return _ALL_NON_MEMBER_STATE_FILTER
-
-    def make_sql_filter_clause(self) -> Tuple[str, List[str]]:
-        """Converts the filter to an SQL clause.
-
-        For example:
-
-            f = StateFilter.from_types([("m.room.create", "")])
-            clause, args = f.make_sql_filter_clause()
-            clause == "(type = ? AND state_key = ?)"
-            args == ['m.room.create', '']
-
-
-        Returns:
-            The SQL string (may be empty) and arguments. An empty SQL string is
-            returned when the filter matches everything (i.e. is "full").
-        """
-
-        where_clause = ""
-        where_args: List[str] = []
-
-        if self.is_full():
-            return where_clause, where_args
-
-        if not self.include_others and not self.types:
-            # i.e. this is an empty filter, so we need to return a clause that
-            # will match nothing
-            return "1 = 2", []
-
-        # First we build up a lost of clauses for each type/state_key combo
-        clauses = []
-        for etype, state_keys in self.types.items():
-            if state_keys is None:
-                clauses.append("(type = ?)")
-                where_args.append(etype)
-                continue
-
-            for state_key in state_keys:
-                clauses.append("(type = ? AND state_key = ?)")
-                where_args.extend((etype, state_key))
-
-        # This will match anything that appears in `self.types`
-        where_clause = " OR ".join(clauses)
-
-        # If we want to include stuff that's not in the types dict then we add
-        # a `OR type NOT IN (...)` clause to the end.
-        if self.include_others:
-            if where_clause:
-                where_clause += " OR "
-
-            where_clause += "type NOT IN (%s)" % (",".join(["?"] * len(self.types)),)
-            where_args.extend(self.types)
-
-        return where_clause, where_args
-
-    def max_entries_returned(self) -> Optional[int]:
-        """Returns the maximum number of entries this filter will return if
-        known, otherwise returns None.
-
-        For example a simple state filter asking for `("m.room.create", "")`
-        will return 1, whereas the default state filter will return None.
-
-        This is used to bail out early if the right number of entries have been
-        fetched.
-        """
-        if self.has_wildcards():
-            return None
-
-        return len(self.concrete_types())
-
-    def filter_state(self, state_dict: StateMap[T]) -> MutableStateMap[T]:
-        """Returns the state filtered with by this StateFilter.
-
-        Args:
-            state: The state map to filter
-
-        Returns:
-            The filtered state map.
-            This is a copy, so it's safe to mutate.
-        """
-        if self.is_full():
-            return dict(state_dict)
-
-        filtered_state = {}
-        for k, v in state_dict.items():
-            typ, state_key = k
-            if typ in self.types:
-                state_keys = self.types[typ]
-                if state_keys is None or state_key in state_keys:
-                    filtered_state[k] = v
-            elif self.include_others:
-                filtered_state[k] = v
-
-        return filtered_state
-
-    def is_full(self) -> bool:
-        """Whether this filter fetches everything or not
-
-        Returns:
-            True if the filter fetches everything.
-        """
-        return self.include_others and not self.types
-
-    def has_wildcards(self) -> bool:
-        """Whether the filter includes wildcards or is attempting to fetch
-        specific state.
-
-        Returns:
-            True if the filter includes wildcards.
-        """
-
-        return self.include_others or any(
-            state_keys is None for state_keys in self.types.values()
-        )
-
-    def concrete_types(self) -> List[Tuple[str, str]]:
-        """Returns a list of concrete type/state_keys (i.e. not None) that
-        will be fetched. This will be a complete list if `has_wildcards`
-        returns False, but otherwise will be a subset (or even empty).
-
-        Returns:
-            A list of type/state_keys tuples.
-        """
-        return [
-            (t, s)
-            for t, state_keys in self.types.items()
-            if state_keys is not None
-            for s in state_keys
-        ]
-
-    def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]:
-        """Return the filter split into two: one which assumes it's exclusively
-        matching against member state, and one which assumes it's matching
-        against non member state.
-
-        This is useful due to the returned filters giving correct results for
-        `is_full()`, `has_wildcards()`, etc, when operating against maps that
-        either exclusively contain member events or only contain non-member
-        events. (Which is the case when dealing with the member vs non-member
-        state caches).
-
-        Returns:
-            The member and non member filters
-        """
-
-        if EventTypes.Member in self.types:
-            state_keys = self.types[EventTypes.Member]
-            if state_keys is None:
-                member_filter = StateFilter.all()
-            else:
-                member_filter = StateFilter(frozendict({EventTypes.Member: state_keys}))
-        elif self.include_others:
-            member_filter = StateFilter.all()
-        else:
-            member_filter = StateFilter.none()
-
-        non_member_filter = StateFilter(
-            types=frozendict(
-                {k: v for k, v in self.types.items() if k != EventTypes.Member}
-            ),
-            include_others=self.include_others,
-        )
-
-        return member_filter, non_member_filter
-
-    def _decompose_into_four_parts(
-        self,
-    ) -> Tuple[Tuple[bool, Set[str]], Tuple[Set[str], Set[StateKey]]]:
-        """
-        Decomposes this state filter into 4 constituent parts, which can be
-        thought of as this:
-            all? - minus_wildcards + plus_wildcards + plus_state_keys
-
-        where
-        * all represents ALL state
-        * minus_wildcards represents entire state types to remove
-        * plus_wildcards represents entire state types to add
-        * plus_state_keys represents individual state keys to add
-
-        See `recompose_from_four_parts` for the other direction of this
-        correspondence.
-        """
-        is_all = self.include_others
-        excluded_types: Set[str] = {t for t in self.types if is_all}
-        wildcard_types: Set[str] = {t for t, s in self.types.items() if s is None}
-        concrete_keys: Set[StateKey] = set(self.concrete_types())
-
-        return (is_all, excluded_types), (wildcard_types, concrete_keys)
-
-    @staticmethod
-    def _recompose_from_four_parts(
-        all_part: bool,
-        minus_wildcards: Set[str],
-        plus_wildcards: Set[str],
-        plus_state_keys: Set[StateKey],
-    ) -> "StateFilter":
-        """
-        Recomposes a state filter from 4 parts.
-
-        See `decompose_into_four_parts` (the other direction of this
-        correspondence) for descriptions on each of the parts.
-        """
-
-        # {state type -> set of state keys OR None for wildcard}
-        # (The same structure as that of a StateFilter.)
-        new_types: Dict[str, Optional[Set[str]]] = {}
-
-        # if we start with all, insert the excluded statetypes as empty sets
-        # to prevent them from being included
-        if all_part:
-            new_types.update({state_type: set() for state_type in minus_wildcards})
-
-        # insert the plus wildcards
-        new_types.update({state_type: None for state_type in plus_wildcards})
-
-        # insert the specific state keys
-        for state_type, state_key in plus_state_keys:
-            if state_type in new_types:
-                entry = new_types[state_type]
-                if entry is not None:
-                    entry.add(state_key)
-            elif not all_part:
-                # don't insert if the entire type is already included by
-                # include_others as this would actually shrink the state allowed
-                # by this filter.
-                new_types[state_type] = {state_key}
-
-        return StateFilter.freeze(new_types, include_others=all_part)
-
-    def approx_difference(self, other: "StateFilter") -> "StateFilter":
-        """
-        Returns a state filter which represents `self - other`.
-
-        This is useful for determining what state remains to be pulled out of the
-        database if we want the state included by `self` but already have the state
-        included by `other`.
-
-        The returned state filter
-        - MUST include all state events that are included by this filter (`self`)
-          unless they are included by `other`;
-        - MUST NOT include state events not included by this filter (`self`); and
-        - MAY be an over-approximation: the returned state filter
-          MAY additionally include some state events from `other`.
-
-        This implementation attempts to return the narrowest such state filter.
-        In the case that `self` contains wildcards for state types where
-        `other` contains specific state keys, an approximation must be made:
-        the returned state filter keeps the wildcard, as state filters are not
-        able to express 'all state keys except some given examples'.
-        e.g.
-            StateFilter(m.room.member -> None (wildcard))
-                minus
-            StateFilter(m.room.member -> {'@wombat:example.org'})
-                is approximated as
-            StateFilter(m.room.member -> None (wildcard))
-        """
-
-        # We first transform self and other into an alternative representation:
-        #   - whether or not they include all events to begin with ('all')
-        #   - if so, which event types are excluded? ('excludes')
-        #   - which entire event types to include ('wildcards')
-        #   - which concrete state keys to include ('concrete state keys')
-        (self_all, self_excludes), (
-            self_wildcards,
-            self_concrete_keys,
-        ) = self._decompose_into_four_parts()
-        (other_all, other_excludes), (
-            other_wildcards,
-            other_concrete_keys,
-        ) = other._decompose_into_four_parts()
-
-        # Start with an estimate of the difference based on self
-        new_all = self_all
-        # Wildcards from the other can be added to the exclusion filter
-        new_excludes = self_excludes | other_wildcards
-        # We remove wildcards that appeared as wildcards in the other
-        new_wildcards = self_wildcards - other_wildcards
-        # We filter out the concrete state keys that appear in the other
-        # as wildcards or concrete state keys.
-        new_concrete_keys = {
-            (state_type, state_key)
-            for (state_type, state_key) in self_concrete_keys
-            if state_type not in other_wildcards
-        } - other_concrete_keys
-
-        if other_all:
-            if self_all:
-                # If self starts with all, then we add as wildcards any
-                # types which appear in the other's exclusion filter (but
-                # aren't in the self exclusion filter). This is as the other
-                # filter will return everything BUT the types in its exclusion, so
-                # we need to add those excluded types that also match the self
-                # filter as wildcard types in the new filter.
-                new_wildcards |= other_excludes.difference(self_excludes)
-
-            # If other is an `include_others` then the difference isn't.
-            new_all = False
-            # (We have no need for excludes when we don't start with all, as there
-            #  is nothing to exclude.)
-            new_excludes = set()
-
-            # We also filter out all state types that aren't in the exclusion
-            # list of the other.
-            new_wildcards &= other_excludes
-            new_concrete_keys = {
-                (state_type, state_key)
-                for (state_type, state_key) in new_concrete_keys
-                if state_type in other_excludes
-            }
-
-        # Transform our newly-constructed state filter from the alternative
-        # representation back into the normal StateFilter representation.
-        return StateFilter._recompose_from_four_parts(
-            new_all, new_excludes, new_wildcards, new_concrete_keys
-        )
-
-    def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool:
-        """Check if we need to wait for full state to complete to calculate this state
-
-        If we have a state filter which is completely satisfied even with partial
-        state, then we don't need to await_full_state before we can return it.
-
-        Args:
-            is_mine_id: a callable which confirms if a given state_key matches a mxid
-               of a local user
-        """
-        # if we haven't requested membership events, then it depends on the value of
-        # 'include_others'
-        if EventTypes.Member not in self.types:
-            return self.include_others
-
-        # if we're looking for *all* membership events, then we have to wait
-        member_state_keys = self.types[EventTypes.Member]
-        if member_state_keys is None:
-            return True
-
-        # otherwise, consider whose membership we are looking for. If it's entirely
-        # local users, then we don't need to wait.
-        for state_key in member_state_keys:
-            if not is_mine_id(state_key):
-                # remote user
-                return True
-
-        # local users only
-        return False
-
-
-_ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True)
-_ALL_NON_MEMBER_STATE_FILTER = StateFilter(
-    types=frozendict({EventTypes.Member: frozenset()}), include_others=True
-)
-_NONE_STATE_FILTER = StateFilter(types=frozendict(), include_others=False)
diff --git a/synapse/types.py b/synapse/types.py
deleted file mode 100644
index f2d436ddc3..0000000000
--- a/synapse/types.py
+++ /dev/null
@@ -1,928 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2019 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import abc
-import re
-import string
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    ClassVar,
-    Dict,
-    List,
-    Mapping,
-    Match,
-    MutableMapping,
-    NoReturn,
-    Optional,
-    Set,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-)
-
-import attr
-from frozendict import frozendict
-from signedjson.key import decode_verify_key_bytes
-from signedjson.types import VerifyKey
-from typing_extensions import Final, TypedDict
-from unpaddedbase64 import decode_base64
-from zope.interface import Interface
-
-from twisted.internet.defer import CancelledError
-from twisted.internet.interfaces import (
-    IReactorCore,
-    IReactorPluggableNameResolver,
-    IReactorSSL,
-    IReactorTCP,
-    IReactorThreads,
-    IReactorTime,
-)
-
-from synapse.api.errors import Codes, SynapseError
-from synapse.util.cancellation import cancellable
-from synapse.util.stringutils import parse_and_validate_server_name
-
-if TYPE_CHECKING:
-    from synapse.appservice.api import ApplicationService
-    from synapse.storage.databases.main import DataStore, PurgeEventsStore
-    from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
-
-# Define a state map type from type/state_key to T (usually an event ID or
-# event)
-T = TypeVar("T")
-StateKey = Tuple[str, str]
-StateMap = Mapping[StateKey, T]
-MutableStateMap = MutableMapping[StateKey, T]
-
-# JSON types. These could be made stronger, but will do for now.
-# A JSON-serialisable dict.
-JsonDict = Dict[str, Any]
-# A JSON-serialisable mapping; roughly speaking an immutable JSONDict.
-# Useful when you have a TypedDict which isn't going to be mutated and you don't want
-# to cast to JsonDict everywhere.
-JsonMapping = Mapping[str, Any]
-# A JSON-serialisable object.
-JsonSerializable = object
-
-
-# Note that this seems to require inheriting *directly* from Interface in order
-# for mypy-zope to realize it is an interface.
-class ISynapseReactor(
-    IReactorTCP,
-    IReactorSSL,
-    IReactorPluggableNameResolver,
-    IReactorTime,
-    IReactorCore,
-    IReactorThreads,
-    Interface,
-):
-    """The interfaces necessary for Synapse to function."""
-
-
-@attr.s(frozen=True, slots=True, auto_attribs=True)
-class Requester:
-    """
-    Represents the user making a request
-
-    Attributes:
-        user:  id of the user making the request
-        access_token_id:  *ID* of the access token used for this
-            request, or None if it came via the appservice API or similar
-        is_guest:  True if the user making this request is a guest user
-        shadow_banned:  True if the user making this request has been shadow-banned.
-        device_id:  device_id which was set at authentication time
-        app_service:  the AS requesting on behalf of the user
-        authenticated_entity: The entity that authenticated when making the request.
-            This is different to the user_id when an admin user or the server is
-            "puppeting" the user.
-    """
-
-    user: "UserID"
-    access_token_id: Optional[int]
-    is_guest: bool
-    shadow_banned: bool
-    device_id: Optional[str]
-    app_service: Optional["ApplicationService"]
-    authenticated_entity: str
-
-    def serialize(self) -> Dict[str, Any]:
-        """Converts self to a type that can be serialized as JSON, and then
-        deserialized by `deserialize`
-
-        Returns:
-            dict
-        """
-        return {
-            "user_id": self.user.to_string(),
-            "access_token_id": self.access_token_id,
-            "is_guest": self.is_guest,
-            "shadow_banned": self.shadow_banned,
-            "device_id": self.device_id,
-            "app_server_id": self.app_service.id if self.app_service else None,
-            "authenticated_entity": self.authenticated_entity,
-        }
-
-    @staticmethod
-    def deserialize(
-        store: "ApplicationServiceWorkerStore", input: Dict[str, Any]
-    ) -> "Requester":
-        """Converts a dict that was produced by `serialize` back into a
-        Requester.
-
-        Args:
-            store: Used to convert AS ID to AS object
-            input: A dict produced by `serialize`
-
-        Returns:
-            Requester
-        """
-        appservice = None
-        if input["app_server_id"]:
-            appservice = store.get_app_service_by_id(input["app_server_id"])
-
-        return Requester(
-            user=UserID.from_string(input["user_id"]),
-            access_token_id=input["access_token_id"],
-            is_guest=input["is_guest"],
-            shadow_banned=input["shadow_banned"],
-            device_id=input["device_id"],
-            app_service=appservice,
-            authenticated_entity=input["authenticated_entity"],
-        )
-
-
-def create_requester(
-    user_id: Union[str, "UserID"],
-    access_token_id: Optional[int] = None,
-    is_guest: bool = False,
-    shadow_banned: bool = False,
-    device_id: Optional[str] = None,
-    app_service: Optional["ApplicationService"] = None,
-    authenticated_entity: Optional[str] = None,
-) -> Requester:
-    """
-    Create a new ``Requester`` object
-
-    Args:
-        user_id:  id of the user making the request
-        access_token_id:  *ID* of the access token used for this
-            request, or None if it came via the appservice API or similar
-        is_guest:  True if the user making this request is a guest user
-        shadow_banned:  True if the user making this request is shadow-banned.
-        device_id:  device_id which was set at authentication time
-        app_service:  the AS requesting on behalf of the user
-        authenticated_entity: The entity that authenticated when making the request.
-            This is different to the user_id when an admin user or the server is
-            "puppeting" the user.
-
-    Returns:
-        Requester
-    """
-    if not isinstance(user_id, UserID):
-        user_id = UserID.from_string(user_id)
-
-    if authenticated_entity is None:
-        authenticated_entity = user_id.to_string()
-
-    return Requester(
-        user_id,
-        access_token_id,
-        is_guest,
-        shadow_banned,
-        device_id,
-        app_service,
-        authenticated_entity,
-    )
-
-
-def get_domain_from_id(string: str) -> str:
-    idx = string.find(":")
-    if idx == -1:
-        raise SynapseError(400, "Invalid ID: %r" % (string,))
-    return string[idx + 1 :]
-
-
-def get_localpart_from_id(string: str) -> str:
-    idx = string.find(":")
-    if idx == -1:
-        raise SynapseError(400, "Invalid ID: %r" % (string,))
-    return string[1:idx]
-
-
-DS = TypeVar("DS", bound="DomainSpecificString")
-
-
-@attr.s(slots=True, frozen=True, repr=False, auto_attribs=True)
-class DomainSpecificString(metaclass=abc.ABCMeta):
-    """Common base class among ID/name strings that have a local part and a
-    domain name, prefixed with a sigil.
-
-    Has the fields:
-
-        'localpart' : The local part of the name (without the leading sigil)
-        'domain' : The domain part of the name
-    """
-
-    SIGIL: ClassVar[str] = abc.abstractproperty()  # type: ignore
-
-    localpart: str
-    domain: str
-
-    # Because this is a frozen class, it is deeply immutable.
-    def __copy__(self: DS) -> DS:
-        return self
-
-    def __deepcopy__(self: DS, memo: Dict[str, object]) -> DS:
-        return self
-
-    @classmethod
-    def from_string(cls: Type[DS], s: str) -> DS:
-        """Parse the string given by 's' into a structure object."""
-        if len(s) < 1 or s[0:1] != cls.SIGIL:
-            raise SynapseError(
-                400,
-                "Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL),
-                Codes.INVALID_PARAM,
-            )
-
-        parts = s[1:].split(":", 1)
-        if len(parts) != 2:
-            raise SynapseError(
-                400,
-                "Expected %s of the form '%slocalname:domain'"
-                % (cls.__name__, cls.SIGIL),
-                Codes.INVALID_PARAM,
-            )
-
-        domain = parts[1]
-        # This code will need changing if we want to support multiple domain
-        # names on one HS
-        return cls(localpart=parts[0], domain=domain)
-
-    def to_string(self) -> str:
-        """Return a string encoding the fields of the structure object."""
-        return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
-
-    @classmethod
-    def is_valid(cls: Type[DS], s: str) -> bool:
-        """Parses the input string and attempts to ensure it is valid."""
-        # TODO: this does not reject an empty localpart or an overly-long string.
-        # See https://spec.matrix.org/v1.2/appendices/#identifier-grammar
-        try:
-            obj = cls.from_string(s)
-            # Apply additional validation to the domain. This is only done
-            # during  is_valid (and not part of from_string) since it is
-            # possible for invalid data to exist in room-state, etc.
-            parse_and_validate_server_name(obj.domain)
-            return True
-        except Exception:
-            return False
-
-    __repr__ = to_string
-
-
-@attr.s(slots=True, frozen=True, repr=False)
-class UserID(DomainSpecificString):
-    """Structure representing a user ID."""
-
-    SIGIL = "@"
-
-
-@attr.s(slots=True, frozen=True, repr=False)
-class RoomAlias(DomainSpecificString):
-    """Structure representing a room name."""
-
-    SIGIL = "#"
-
-
-@attr.s(slots=True, frozen=True, repr=False)
-class RoomID(DomainSpecificString):
-    """Structure representing a room id."""
-
-    SIGIL = "!"
-
-
-@attr.s(slots=True, frozen=True, repr=False)
-class EventID(DomainSpecificString):
-    """Structure representing an event id."""
-
-    SIGIL = "$"
-
-
-mxid_localpart_allowed_characters = set(
-    "_-./=" + string.ascii_lowercase + string.digits
-)
-
-
-def contains_invalid_mxid_characters(localpart: str) -> bool:
-    """Check for characters not allowed in an mxid or groupid localpart
-
-    Args:
-        localpart: the localpart to be checked
-
-    Returns:
-        True if there are any naughty characters
-    """
-    return any(c not in mxid_localpart_allowed_characters for c in localpart)
-
-
-UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
-
-# the following is a pattern which matches '=', and bytes which are not allowed in a mxid
-# localpart.
-#
-# It works by:
-#  * building a string containing the allowed characters (excluding '=')
-#  * escaping every special character with a backslash (to stop '-' being interpreted as a
-#    range operator)
-#  * wrapping it in a '[^...]' regex
-#  * converting the whole lot to a 'bytes' sequence, so that we can use it to match
-#    bytes rather than strings
-#
-NON_MXID_CHARACTER_PATTERN = re.compile(
-    ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode(
-        "ascii"
-    )
-)
-
-
-def map_username_to_mxid_localpart(
-    username: Union[str, bytes], case_sensitive: bool = False
-) -> str:
-    """Map a username onto a string suitable for a MXID
-
-    This follows the algorithm laid out at
-    https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets.
-
-    Args:
-        username: username to be mapped
-        case_sensitive: true if TEST and test should be mapped
-            onto different mxids
-
-    Returns:
-        string suitable for a mxid localpart
-    """
-    if not isinstance(username, bytes):
-        username = username.encode("utf-8")
-
-    # first we sort out upper-case characters
-    if case_sensitive:
-
-        def f1(m: Match[bytes]) -> bytes:
-            return b"_" + m.group().lower()
-
-        username = UPPER_CASE_PATTERN.sub(f1, username)
-    else:
-        username = username.lower()
-
-    # then we sort out non-ascii characters by converting to the hex equivalent.
-    def f2(m: Match[bytes]) -> bytes:
-        return b"=%02x" % (m.group()[0],)
-
-    username = NON_MXID_CHARACTER_PATTERN.sub(f2, username)
-
-    # we also do the =-escaping to mxids starting with an underscore.
-    username = re.sub(b"^_", b"=5f", username)
-
-    # we should now only have ascii bytes left, so can decode back to a string.
-    return username.decode("ascii")
-
-
-@attr.s(frozen=True, slots=True, order=False)
-class RoomStreamToken:
-    """Tokens are positions between events. The token "s1" comes after event 1.
-
-            s0    s1
-            |     |
-        [0] ▼ [1] ▼ [2]
-
-    Tokens can either be a point in the live event stream or a cursor going
-    through historic events.
-
-    When traversing the live event stream, events are ordered by
-    `stream_ordering` (when they arrived at the homeserver).
-
-    When traversing historic events, events are first ordered by their `depth`
-    (`topological_ordering` in the event graph) and tie-broken by
-    `stream_ordering` (when the event arrived at the homeserver).
-
-    If you're looking for more info about what a token with all of the
-    underscores means, ex.
-    `s2633508_17_338_6732159_1082514_541479_274711_265584_1`, see the docstring
-    for `StreamToken` below.
-
-    ---
-
-    Live tokens start with an "s" followed by the `stream_ordering` of the event
-    that comes before the position of the token. Said another way:
-    `stream_ordering` uniquely identifies a persisted event. The live token
-    means "the position just after the event identified by `stream_ordering`".
-    An example token is:
-
-        s2633508
-
-    ---
-
-    Historic tokens start with a "t" followed by the `depth`
-    (`topological_ordering` in the event graph) of the event that comes before
-    the position of the token, followed by "-", followed by the
-    `stream_ordering` of the event that comes before the position of the token.
-    An example token is:
-
-        t426-2633508
-
-    ---
-
-    There is also a third mode for live tokens where the token starts with "m",
-    which is sometimes used when using sharded event persisters. In this case
-    the events stream is considered to be a set of streams (one for each writer)
-    and the token encodes the vector clock of positions of each writer in their
-    respective streams.
-
-    The format of the token in such case is an initial integer min position,
-    followed by the mapping of instance ID to position separated by '.' and '~':
-
-        m{min_pos}~{writer1}.{pos1}~{writer2}.{pos2}. ...
-
-    The `min_pos` corresponds to the minimum position all writers have persisted
-    up to, and then only writers that are ahead of that position need to be
-    encoded. An example token is:
-
-        m56~2.58~3.59
-
-    Which corresponds to a set of three (or more writers) where instances 2 and
-    3 (these are instance IDs that can be looked up in the DB to fetch the more
-    commonly used instance names) are at positions 58 and 59 respectively, and
-    all other instances are at position 56.
-
-    Note: The `RoomStreamToken` cannot have both a topological part and an
-    instance map.
-
-    ---
-
-    For caching purposes, `RoomStreamToken`s and by extension, all their
-    attributes, must be hashable.
-    """
-
-    topological: Optional[int] = attr.ib(
-        validator=attr.validators.optional(attr.validators.instance_of(int)),
-    )
-    stream: int = attr.ib(validator=attr.validators.instance_of(int))
-
-    instance_map: "frozendict[str, int]" = attr.ib(
-        factory=frozendict,
-        validator=attr.validators.deep_mapping(
-            key_validator=attr.validators.instance_of(str),
-            value_validator=attr.validators.instance_of(int),
-            mapping_validator=attr.validators.instance_of(frozendict),
-        ),
-    )
-
-    def __attrs_post_init__(self) -> None:
-        """Validates that both `topological` and `instance_map` aren't set."""
-
-        if self.instance_map and self.topological:
-            raise ValueError(
-                "Cannot set both 'topological' and 'instance_map' on 'RoomStreamToken'."
-            )
-
-    @classmethod
-    async def parse(cls, store: "PurgeEventsStore", string: str) -> "RoomStreamToken":
-        try:
-            if string[0] == "s":
-                return cls(topological=None, stream=int(string[1:]))
-            if string[0] == "t":
-                parts = string[1:].split("-", 1)
-                return cls(topological=int(parts[0]), stream=int(parts[1]))
-            if string[0] == "m":
-                parts = string[1:].split("~")
-                stream = int(parts[0])
-
-                instance_map = {}
-                for part in parts[1:]:
-                    key, value = part.split(".")
-                    instance_id = int(key)
-                    pos = int(value)
-
-                    instance_name = await store.get_name_from_instance_id(instance_id)  # type: ignore[attr-defined]
-                    instance_map[instance_name] = pos
-
-                return cls(
-                    topological=None,
-                    stream=stream,
-                    instance_map=frozendict(instance_map),
-                )
-        except CancelledError:
-            raise
-        except Exception:
-            pass
-        raise SynapseError(400, "Invalid room stream token %r" % (string,))
-
-    @classmethod
-    def parse_stream_token(cls, string: str) -> "RoomStreamToken":
-        try:
-            if string[0] == "s":
-                return cls(topological=None, stream=int(string[1:]))
-        except Exception:
-            pass
-        raise SynapseError(400, "Invalid room stream token %r" % (string,))
-
-    def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken":
-        """Return a new token such that if an event is after both this token and
-        the other token, then its after the returned token too.
-        """
-
-        if self.topological or other.topological:
-            raise Exception("Can't advance topological tokens")
-
-        max_stream = max(self.stream, other.stream)
-
-        instance_map = {
-            instance: max(
-                self.instance_map.get(instance, self.stream),
-                other.instance_map.get(instance, other.stream),
-            )
-            for instance in set(self.instance_map).union(other.instance_map)
-        }
-
-        return RoomStreamToken(None, max_stream, frozendict(instance_map))
-
-    def as_historical_tuple(self) -> Tuple[int, int]:
-        """Returns a tuple of `(topological, stream)` for historical tokens.
-
-        Raises if not an historical token (i.e. doesn't have a topological part).
-        """
-        if self.topological is None:
-            raise Exception(
-                "Cannot call `RoomStreamToken.as_historical_tuple` on live token"
-            )
-
-        return self.topological, self.stream
-
-    def get_stream_pos_for_instance(self, instance_name: str) -> int:
-        """Get the stream position that the given writer was at at this token.
-
-        This only makes sense for "live" tokens that may have a vector clock
-        component, and so asserts that this is a "live" token.
-        """
-        assert self.topological is None
-
-        # If we don't have an entry for the instance we can assume that it was
-        # at `self.stream`.
-        return self.instance_map.get(instance_name, self.stream)
-
-    def get_max_stream_pos(self) -> int:
-        """Get the maximum stream position referenced in this token.
-
-        The corresponding "min" position is, by definition just `self.stream`.
-
-        This is used to handle tokens that have non-empty `instance_map`, and so
-        reference stream positions after the `self.stream` position.
-        """
-        return max(self.instance_map.values(), default=self.stream)
-
-    async def to_string(self, store: "DataStore") -> str:
-        if self.topological is not None:
-            return "t%d-%d" % (self.topological, self.stream)
-        elif self.instance_map:
-            entries = []
-            for name, pos in self.instance_map.items():
-                instance_id = await store.get_id_for_instance(name)
-                entries.append(f"{instance_id}.{pos}")
-
-            encoded_map = "~".join(entries)
-            return f"m{self.stream}~{encoded_map}"
-        else:
-            return "s%d" % (self.stream,)
-
-
-class StreamKeyType:
-    """Known stream types.
-
-    A stream is a list of entities ordered by an incrementing "stream token".
-    """
-
-    ROOM: Final = "room_key"
-    PRESENCE: Final = "presence_key"
-    TYPING: Final = "typing_key"
-    RECEIPT: Final = "receipt_key"
-    ACCOUNT_DATA: Final = "account_data_key"
-    PUSH_RULES: Final = "push_rules_key"
-    TO_DEVICE: Final = "to_device_key"
-    DEVICE_LIST: Final = "device_list_key"
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class StreamToken:
-    """A collection of keys joined together by underscores in the following
-    order and which represent the position in their respective streams.
-
-    ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1`
-        1. `room_key`: `s2633508` which is a `RoomStreamToken`
-           - `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59`
-           - See the docstring for `RoomStreamToken` for more details.
-        2. `presence_key`: `17`
-        3. `typing_key`: `338`
-        4. `receipt_key`: `6732159`
-        5. `account_data_key`: `1082514`
-        6. `push_rules_key`: `541479`
-        7. `to_device_key`: `274711`
-        8. `device_list_key`: `265584`
-        9. `groups_key`: `1` (note that this key is now unused)
-
-    You can see how many of these keys correspond to the various
-    fields in a "/sync" response:
-    ```json
-    {
-        "next_batch": "s12_4_0_1_1_1_1_4_1",
-        "presence": {
-            "events": []
-        },
-        "device_lists": {
-            "changed": []
-        },
-        "rooms": {
-            "join": {
-                "!QrZlfIDQLNLdZHqTnt:hs1": {
-                    "timeline": {
-                        "events": [],
-                        "prev_batch": "s10_4_0_1_1_1_1_4_1",
-                        "limited": false
-                    },
-                    "state": {
-                        "events": []
-                    },
-                    "account_data": {
-                        "events": []
-                    },
-                    "ephemeral": {
-                        "events": []
-                    }
-                }
-            }
-        }
-    }
-    ```
-
-    ---
-
-    For caching purposes, `StreamToken`s and by extension, all their attributes,
-    must be hashable.
-    """
-
-    room_key: RoomStreamToken = attr.ib(
-        validator=attr.validators.instance_of(RoomStreamToken)
-    )
-    presence_key: int
-    typing_key: int
-    receipt_key: int
-    account_data_key: int
-    push_rules_key: int
-    to_device_key: int
-    device_list_key: int
-    # Note that the groups key is no longer used and may have bogus values.
-    groups_key: int
-
-    _SEPARATOR = "_"
-    START: ClassVar["StreamToken"]
-
-    @classmethod
-    @cancellable
-    async def from_string(cls, store: "DataStore", string: str) -> "StreamToken":
-        """
-        Creates a RoomStreamToken from its textual representation.
-        """
-        try:
-            keys = string.split(cls._SEPARATOR)
-            while len(keys) < len(attr.fields(cls)):
-                # i.e. old token from before receipt_key
-                keys.append("0")
-            return cls(
-                await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:])
-            )
-        except CancelledError:
-            raise
-        except Exception:
-            raise SynapseError(400, "Invalid stream token")
-
-    async def to_string(self, store: "DataStore") -> str:
-        return self._SEPARATOR.join(
-            [
-                await self.room_key.to_string(store),
-                str(self.presence_key),
-                str(self.typing_key),
-                str(self.receipt_key),
-                str(self.account_data_key),
-                str(self.push_rules_key),
-                str(self.to_device_key),
-                str(self.device_list_key),
-                # Note that the groups key is no longer used, but it is still
-                # serialized so that there will not be confusion in the future
-                # if additional tokens are added.
-                str(self.groups_key),
-            ]
-        )
-
-    @property
-    def room_stream_id(self) -> int:
-        return self.room_key.stream
-
-    def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken":
-        """Advance the given key in the token to a new value if and only if the
-        new value is after the old value.
-
-        :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken.
-        """
-        if key == StreamKeyType.ROOM:
-            new_token = self.copy_and_replace(
-                StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value)
-            )
-            return new_token
-
-        new_token = self.copy_and_replace(key, new_value)
-        new_id = int(getattr(new_token, key))
-        old_id = int(getattr(self, key))
-
-        if old_id < new_id:
-            return new_token
-        else:
-            return self
-
-    def copy_and_replace(self, key: str, new_value: Any) -> "StreamToken":
-        return attr.evolve(self, **{key: new_value})
-
-
-StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0)
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class PersistedEventPosition:
-    """Position of a newly persisted event with instance that persisted it.
-
-    This can be used to test whether the event is persisted before or after a
-    RoomStreamToken.
-    """
-
-    instance_name: str
-    stream: int
-
-    def persisted_after(self, token: RoomStreamToken) -> bool:
-        return token.get_stream_pos_for_instance(self.instance_name) < self.stream
-
-    def to_room_stream_token(self) -> RoomStreamToken:
-        """Converts the position to a room stream token such that events
-        persisted in the same room after this position will be after the
-        returned `RoomStreamToken`.
-
-        Note: no guarantees are made about ordering w.r.t. events in other
-        rooms.
-        """
-        # Doing the naive thing satisfies the desired properties described in
-        # the docstring.
-        return RoomStreamToken(None, self.stream)
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class ThirdPartyInstanceID:
-    appservice_id: Optional[str]
-    network_id: Optional[str]
-
-    # Deny iteration because it will bite you if you try to create a singleton
-    # set by:
-    #    users = set(user)
-    def __iter__(self) -> NoReturn:
-        raise ValueError("Attempted to iterate a %s" % (type(self).__name__,))
-
-    # Because this class is a frozen class, it is deeply immutable.
-    def __copy__(self) -> "ThirdPartyInstanceID":
-        return self
-
-    def __deepcopy__(self, memo: Dict[str, object]) -> "ThirdPartyInstanceID":
-        return self
-
-    @classmethod
-    def from_string(cls, s: str) -> "ThirdPartyInstanceID":
-        bits = s.split("|", 2)
-        if len(bits) != 2:
-            raise SynapseError(400, "Invalid ID %r" % (s,))
-
-        return cls(appservice_id=bits[0], network_id=bits[1])
-
-    def to_string(self) -> str:
-        return "%s|%s" % (self.appservice_id, self.network_id)
-
-    __str__ = to_string
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class ReadReceipt:
-    """Information about a read-receipt"""
-
-    room_id: str
-    receipt_type: str
-    user_id: str
-    event_ids: List[str]
-    thread_id: Optional[str]
-    data: JsonDict
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class DeviceListUpdates:
-    """
-    An object containing a diff of information regarding other users' device lists, intended for
-    a recipient to carry out device list tracking.
-
-    Attributes:
-        changed: A set of users whose device lists have changed recently.
-        left: A set of users who the recipient no longer needs to track the device lists of.
-            Typically when those users no longer share any end-to-end encryption enabled rooms.
-    """
-
-    # We need to use a factory here, otherwise `set` is not evaluated at
-    # object instantiation, but instead at class definition instantiation.
-    # The latter happening only once, thus always giving you the same sets
-    # across multiple DeviceListUpdates instances.
-    # Also see: don't define mutable default arguments.
-    changed: Set[str] = attr.ib(factory=set)
-    left: Set[str] = attr.ib(factory=set)
-
-    def __bool__(self) -> bool:
-        return bool(self.changed or self.left)
-
-
-def get_verify_key_from_cross_signing_key(
-    key_info: Mapping[str, Any]
-) -> Tuple[str, VerifyKey]:
-    """Get the key ID and signedjson verify key from a cross-signing key dict
-
-    Args:
-        key_info: a cross-signing key dict, which must have a "keys"
-            property that has exactly one item in it
-
-    Returns:
-        the key ID and verify key for the cross-signing key
-    """
-    # make sure that a `keys` field is provided
-    if "keys" not in key_info:
-        raise ValueError("Invalid key")
-    keys = key_info["keys"]
-    # and that it contains exactly one key
-    if len(keys) == 1:
-        key_id, key_data = next(iter(keys.items()))
-        return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data))
-    else:
-        raise ValueError("Invalid key")
-
-
-@attr.s(auto_attribs=True, frozen=True, slots=True)
-class UserInfo:
-    """Holds information about a user. Result of get_userinfo_by_id.
-
-    Attributes:
-        user_id:  ID of the user.
-        appservice_id:  Application service ID that created this user.
-        consent_server_notice_sent:  Version of policy documents the user has been sent.
-        consent_version:  Version of policy documents the user has consented to.
-        creation_ts:  Creation timestamp of the user.
-        is_admin:  True if the user is an admin.
-        is_deactivated:  True if the user has been deactivated.
-        is_guest:  True if the user is a guest user.
-        is_shadow_banned:  True if the user has been shadow-banned.
-        user_type:  User type (None for normal user, 'support' and 'bot' other options).
-    """
-
-    user_id: UserID
-    appservice_id: Optional[int]
-    consent_server_notice_sent: Optional[str]
-    consent_version: Optional[str]
-    user_type: Optional[str]
-    creation_ts: int
-    is_admin: bool
-    is_deactivated: bool
-    is_guest: bool
-    is_shadow_banned: bool
-
-
-class UserProfile(TypedDict):
-    user_id: str
-    display_name: Optional[str]
-    avatar_url: Optional[str]
-
-
-@attr.s(auto_attribs=True, frozen=True, slots=True)
-class RetentionPolicy:
-    min_lifetime: Optional[int] = None
-    max_lifetime: Optional[int] = None
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
new file mode 100644
index 0000000000..f2d436ddc3
--- /dev/null
+++ b/synapse/types/__init__.py
@@ -0,0 +1,928 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import abc
+import re
+import string
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    ClassVar,
+    Dict,
+    List,
+    Mapping,
+    Match,
+    MutableMapping,
+    NoReturn,
+    Optional,
+    Set,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
+
+import attr
+from frozendict import frozendict
+from signedjson.key import decode_verify_key_bytes
+from signedjson.types import VerifyKey
+from typing_extensions import Final, TypedDict
+from unpaddedbase64 import decode_base64
+from zope.interface import Interface
+
+from twisted.internet.defer import CancelledError
+from twisted.internet.interfaces import (
+    IReactorCore,
+    IReactorPluggableNameResolver,
+    IReactorSSL,
+    IReactorTCP,
+    IReactorThreads,
+    IReactorTime,
+)
+
+from synapse.api.errors import Codes, SynapseError
+from synapse.util.cancellation import cancellable
+from synapse.util.stringutils import parse_and_validate_server_name
+
+if TYPE_CHECKING:
+    from synapse.appservice.api import ApplicationService
+    from synapse.storage.databases.main import DataStore, PurgeEventsStore
+    from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
+
+# Define a state map type from type/state_key to T (usually an event ID or
+# event)
+T = TypeVar("T")
+StateKey = Tuple[str, str]
+StateMap = Mapping[StateKey, T]
+MutableStateMap = MutableMapping[StateKey, T]
+
+# JSON types. These could be made stronger, but will do for now.
+# A JSON-serialisable dict.
+JsonDict = Dict[str, Any]
+# A JSON-serialisable mapping; roughly speaking an immutable JSONDict.
+# Useful when you have a TypedDict which isn't going to be mutated and you don't want
+# to cast to JsonDict everywhere.
+JsonMapping = Mapping[str, Any]
+# A JSON-serialisable object.
+JsonSerializable = object
+
+
+# Note that this seems to require inheriting *directly* from Interface in order
+# for mypy-zope to realize it is an interface.
+class ISynapseReactor(
+    IReactorTCP,
+    IReactorSSL,
+    IReactorPluggableNameResolver,
+    IReactorTime,
+    IReactorCore,
+    IReactorThreads,
+    Interface,
+):
+    """The interfaces necessary for Synapse to function."""
+
+
+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class Requester:
+    """
+    Represents the user making a request
+
+    Attributes:
+        user:  id of the user making the request
+        access_token_id:  *ID* of the access token used for this
+            request, or None if it came via the appservice API or similar
+        is_guest:  True if the user making this request is a guest user
+        shadow_banned:  True if the user making this request has been shadow-banned.
+        device_id:  device_id which was set at authentication time
+        app_service:  the AS requesting on behalf of the user
+        authenticated_entity: The entity that authenticated when making the request.
+            This is different to the user_id when an admin user or the server is
+            "puppeting" the user.
+    """
+
+    user: "UserID"
+    access_token_id: Optional[int]
+    is_guest: bool
+    shadow_banned: bool
+    device_id: Optional[str]
+    app_service: Optional["ApplicationService"]
+    authenticated_entity: str
+
+    def serialize(self) -> Dict[str, Any]:
+        """Converts self to a type that can be serialized as JSON, and then
+        deserialized by `deserialize`
+
+        Returns:
+            dict
+        """
+        return {
+            "user_id": self.user.to_string(),
+            "access_token_id": self.access_token_id,
+            "is_guest": self.is_guest,
+            "shadow_banned": self.shadow_banned,
+            "device_id": self.device_id,
+            "app_server_id": self.app_service.id if self.app_service else None,
+            "authenticated_entity": self.authenticated_entity,
+        }
+
+    @staticmethod
+    def deserialize(
+        store: "ApplicationServiceWorkerStore", input: Dict[str, Any]
+    ) -> "Requester":
+        """Converts a dict that was produced by `serialize` back into a
+        Requester.
+
+        Args:
+            store: Used to convert AS ID to AS object
+            input: A dict produced by `serialize`
+
+        Returns:
+            Requester
+        """
+        appservice = None
+        if input["app_server_id"]:
+            appservice = store.get_app_service_by_id(input["app_server_id"])
+
+        return Requester(
+            user=UserID.from_string(input["user_id"]),
+            access_token_id=input["access_token_id"],
+            is_guest=input["is_guest"],
+            shadow_banned=input["shadow_banned"],
+            device_id=input["device_id"],
+            app_service=appservice,
+            authenticated_entity=input["authenticated_entity"],
+        )
+
+
+def create_requester(
+    user_id: Union[str, "UserID"],
+    access_token_id: Optional[int] = None,
+    is_guest: bool = False,
+    shadow_banned: bool = False,
+    device_id: Optional[str] = None,
+    app_service: Optional["ApplicationService"] = None,
+    authenticated_entity: Optional[str] = None,
+) -> Requester:
+    """
+    Create a new ``Requester`` object
+
+    Args:
+        user_id:  id of the user making the request
+        access_token_id:  *ID* of the access token used for this
+            request, or None if it came via the appservice API or similar
+        is_guest:  True if the user making this request is a guest user
+        shadow_banned:  True if the user making this request is shadow-banned.
+        device_id:  device_id which was set at authentication time
+        app_service:  the AS requesting on behalf of the user
+        authenticated_entity: The entity that authenticated when making the request.
+            This is different to the user_id when an admin user or the server is
+            "puppeting" the user.
+
+    Returns:
+        Requester
+    """
+    if not isinstance(user_id, UserID):
+        user_id = UserID.from_string(user_id)
+
+    if authenticated_entity is None:
+        authenticated_entity = user_id.to_string()
+
+    return Requester(
+        user_id,
+        access_token_id,
+        is_guest,
+        shadow_banned,
+        device_id,
+        app_service,
+        authenticated_entity,
+    )
+
+
+def get_domain_from_id(string: str) -> str:
+    idx = string.find(":")
+    if idx == -1:
+        raise SynapseError(400, "Invalid ID: %r" % (string,))
+    return string[idx + 1 :]
+
+
+def get_localpart_from_id(string: str) -> str:
+    idx = string.find(":")
+    if idx == -1:
+        raise SynapseError(400, "Invalid ID: %r" % (string,))
+    return string[1:idx]
+
+
+DS = TypeVar("DS", bound="DomainSpecificString")
+
+
+@attr.s(slots=True, frozen=True, repr=False, auto_attribs=True)
+class DomainSpecificString(metaclass=abc.ABCMeta):
+    """Common base class among ID/name strings that have a local part and a
+    domain name, prefixed with a sigil.
+
+    Has the fields:
+
+        'localpart' : The local part of the name (without the leading sigil)
+        'domain' : The domain part of the name
+    """
+
+    SIGIL: ClassVar[str] = abc.abstractproperty()  # type: ignore
+
+    localpart: str
+    domain: str
+
+    # Because this is a frozen class, it is deeply immutable.
+    def __copy__(self: DS) -> DS:
+        return self
+
+    def __deepcopy__(self: DS, memo: Dict[str, object]) -> DS:
+        return self
+
+    @classmethod
+    def from_string(cls: Type[DS], s: str) -> DS:
+        """Parse the string given by 's' into a structure object."""
+        if len(s) < 1 or s[0:1] != cls.SIGIL:
+            raise SynapseError(
+                400,
+                "Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL),
+                Codes.INVALID_PARAM,
+            )
+
+        parts = s[1:].split(":", 1)
+        if len(parts) != 2:
+            raise SynapseError(
+                400,
+                "Expected %s of the form '%slocalname:domain'"
+                % (cls.__name__, cls.SIGIL),
+                Codes.INVALID_PARAM,
+            )
+
+        domain = parts[1]
+        # This code will need changing if we want to support multiple domain
+        # names on one HS
+        return cls(localpart=parts[0], domain=domain)
+
+    def to_string(self) -> str:
+        """Return a string encoding the fields of the structure object."""
+        return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
+
+    @classmethod
+    def is_valid(cls: Type[DS], s: str) -> bool:
+        """Parses the input string and attempts to ensure it is valid."""
+        # TODO: this does not reject an empty localpart or an overly-long string.
+        # See https://spec.matrix.org/v1.2/appendices/#identifier-grammar
+        try:
+            obj = cls.from_string(s)
+            # Apply additional validation to the domain. This is only done
+            # during  is_valid (and not part of from_string) since it is
+            # possible for invalid data to exist in room-state, etc.
+            parse_and_validate_server_name(obj.domain)
+            return True
+        except Exception:
+            return False
+
+    __repr__ = to_string
+
+
+@attr.s(slots=True, frozen=True, repr=False)
+class UserID(DomainSpecificString):
+    """Structure representing a user ID."""
+
+    SIGIL = "@"
+
+
+@attr.s(slots=True, frozen=True, repr=False)
+class RoomAlias(DomainSpecificString):
+    """Structure representing a room name."""
+
+    SIGIL = "#"
+
+
+@attr.s(slots=True, frozen=True, repr=False)
+class RoomID(DomainSpecificString):
+    """Structure representing a room id."""
+
+    SIGIL = "!"
+
+
+@attr.s(slots=True, frozen=True, repr=False)
+class EventID(DomainSpecificString):
+    """Structure representing an event id."""
+
+    SIGIL = "$"
+
+
+mxid_localpart_allowed_characters = set(
+    "_-./=" + string.ascii_lowercase + string.digits
+)
+
+
+def contains_invalid_mxid_characters(localpart: str) -> bool:
+    """Check for characters not allowed in an mxid or groupid localpart
+
+    Args:
+        localpart: the localpart to be checked
+
+    Returns:
+        True if there are any naughty characters
+    """
+    return any(c not in mxid_localpart_allowed_characters for c in localpart)
+
+
+UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
+
+# the following is a pattern which matches '=', and bytes which are not allowed in a mxid
+# localpart.
+#
+# It works by:
+#  * building a string containing the allowed characters (excluding '=')
+#  * escaping every special character with a backslash (to stop '-' being interpreted as a
+#    range operator)
+#  * wrapping it in a '[^...]' regex
+#  * converting the whole lot to a 'bytes' sequence, so that we can use it to match
+#    bytes rather than strings
+#
+NON_MXID_CHARACTER_PATTERN = re.compile(
+    ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode(
+        "ascii"
+    )
+)
+
+
+def map_username_to_mxid_localpart(
+    username: Union[str, bytes], case_sensitive: bool = False
+) -> str:
+    """Map a username onto a string suitable for a MXID
+
+    This follows the algorithm laid out at
+    https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets.
+
+    Args:
+        username: username to be mapped
+        case_sensitive: true if TEST and test should be mapped
+            onto different mxids
+
+    Returns:
+        string suitable for a mxid localpart
+    """
+    if not isinstance(username, bytes):
+        username = username.encode("utf-8")
+
+    # first we sort out upper-case characters
+    if case_sensitive:
+
+        def f1(m: Match[bytes]) -> bytes:
+            return b"_" + m.group().lower()
+
+        username = UPPER_CASE_PATTERN.sub(f1, username)
+    else:
+        username = username.lower()
+
+    # then we sort out non-ascii characters by converting to the hex equivalent.
+    def f2(m: Match[bytes]) -> bytes:
+        return b"=%02x" % (m.group()[0],)
+
+    username = NON_MXID_CHARACTER_PATTERN.sub(f2, username)
+
+    # we also do the =-escaping to mxids starting with an underscore.
+    username = re.sub(b"^_", b"=5f", username)
+
+    # we should now only have ascii bytes left, so can decode back to a string.
+    return username.decode("ascii")
+
+
+@attr.s(frozen=True, slots=True, order=False)
+class RoomStreamToken:
+    """Tokens are positions between events. The token "s1" comes after event 1.
+
+            s0    s1
+            |     |
+        [0] ▼ [1] ▼ [2]
+
+    Tokens can either be a point in the live event stream or a cursor going
+    through historic events.
+
+    When traversing the live event stream, events are ordered by
+    `stream_ordering` (when they arrived at the homeserver).
+
+    When traversing historic events, events are first ordered by their `depth`
+    (`topological_ordering` in the event graph) and tie-broken by
+    `stream_ordering` (when the event arrived at the homeserver).
+
+    If you're looking for more info about what a token with all of the
+    underscores means, ex.
+    `s2633508_17_338_6732159_1082514_541479_274711_265584_1`, see the docstring
+    for `StreamToken` below.
+
+    ---
+
+    Live tokens start with an "s" followed by the `stream_ordering` of the event
+    that comes before the position of the token. Said another way:
+    `stream_ordering` uniquely identifies a persisted event. The live token
+    means "the position just after the event identified by `stream_ordering`".
+    An example token is:
+
+        s2633508
+
+    ---
+
+    Historic tokens start with a "t" followed by the `depth`
+    (`topological_ordering` in the event graph) of the event that comes before
+    the position of the token, followed by "-", followed by the
+    `stream_ordering` of the event that comes before the position of the token.
+    An example token is:
+
+        t426-2633508
+
+    ---
+
+    There is also a third mode for live tokens where the token starts with "m",
+    which is sometimes used when using sharded event persisters. In this case
+    the events stream is considered to be a set of streams (one for each writer)
+    and the token encodes the vector clock of positions of each writer in their
+    respective streams.
+
+    The format of the token in such case is an initial integer min position,
+    followed by the mapping of instance ID to position separated by '.' and '~':
+
+        m{min_pos}~{writer1}.{pos1}~{writer2}.{pos2}. ...
+
+    The `min_pos` corresponds to the minimum position all writers have persisted
+    up to, and then only writers that are ahead of that position need to be
+    encoded. An example token is:
+
+        m56~2.58~3.59
+
+    Which corresponds to a set of three (or more writers) where instances 2 and
+    3 (these are instance IDs that can be looked up in the DB to fetch the more
+    commonly used instance names) are at positions 58 and 59 respectively, and
+    all other instances are at position 56.
+
+    Note: The `RoomStreamToken` cannot have both a topological part and an
+    instance map.
+
+    ---
+
+    For caching purposes, `RoomStreamToken`s and by extension, all their
+    attributes, must be hashable.
+    """
+
+    topological: Optional[int] = attr.ib(
+        validator=attr.validators.optional(attr.validators.instance_of(int)),
+    )
+    stream: int = attr.ib(validator=attr.validators.instance_of(int))
+
+    instance_map: "frozendict[str, int]" = attr.ib(
+        factory=frozendict,
+        validator=attr.validators.deep_mapping(
+            key_validator=attr.validators.instance_of(str),
+            value_validator=attr.validators.instance_of(int),
+            mapping_validator=attr.validators.instance_of(frozendict),
+        ),
+    )
+
+    def __attrs_post_init__(self) -> None:
+        """Validates that both `topological` and `instance_map` aren't set."""
+
+        if self.instance_map and self.topological:
+            raise ValueError(
+                "Cannot set both 'topological' and 'instance_map' on 'RoomStreamToken'."
+            )
+
+    @classmethod
+    async def parse(cls, store: "PurgeEventsStore", string: str) -> "RoomStreamToken":
+        try:
+            if string[0] == "s":
+                return cls(topological=None, stream=int(string[1:]))
+            if string[0] == "t":
+                parts = string[1:].split("-", 1)
+                return cls(topological=int(parts[0]), stream=int(parts[1]))
+            if string[0] == "m":
+                parts = string[1:].split("~")
+                stream = int(parts[0])
+
+                instance_map = {}
+                for part in parts[1:]:
+                    key, value = part.split(".")
+                    instance_id = int(key)
+                    pos = int(value)
+
+                    instance_name = await store.get_name_from_instance_id(instance_id)  # type: ignore[attr-defined]
+                    instance_map[instance_name] = pos
+
+                return cls(
+                    topological=None,
+                    stream=stream,
+                    instance_map=frozendict(instance_map),
+                )
+        except CancelledError:
+            raise
+        except Exception:
+            pass
+        raise SynapseError(400, "Invalid room stream token %r" % (string,))
+
+    @classmethod
+    def parse_stream_token(cls, string: str) -> "RoomStreamToken":
+        try:
+            if string[0] == "s":
+                return cls(topological=None, stream=int(string[1:]))
+        except Exception:
+            pass
+        raise SynapseError(400, "Invalid room stream token %r" % (string,))
+
+    def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken":
+        """Return a new token such that if an event is after both this token and
+        the other token, then its after the returned token too.
+        """
+
+        if self.topological or other.topological:
+            raise Exception("Can't advance topological tokens")
+
+        max_stream = max(self.stream, other.stream)
+
+        instance_map = {
+            instance: max(
+                self.instance_map.get(instance, self.stream),
+                other.instance_map.get(instance, other.stream),
+            )
+            for instance in set(self.instance_map).union(other.instance_map)
+        }
+
+        return RoomStreamToken(None, max_stream, frozendict(instance_map))
+
+    def as_historical_tuple(self) -> Tuple[int, int]:
+        """Returns a tuple of `(topological, stream)` for historical tokens.
+
+        Raises if not an historical token (i.e. doesn't have a topological part).
+        """
+        if self.topological is None:
+            raise Exception(
+                "Cannot call `RoomStreamToken.as_historical_tuple` on live token"
+            )
+
+        return self.topological, self.stream
+
+    def get_stream_pos_for_instance(self, instance_name: str) -> int:
+        """Get the stream position that the given writer was at at this token.
+
+        This only makes sense for "live" tokens that may have a vector clock
+        component, and so asserts that this is a "live" token.
+        """
+        assert self.topological is None
+
+        # If we don't have an entry for the instance we can assume that it was
+        # at `self.stream`.
+        return self.instance_map.get(instance_name, self.stream)
+
+    def get_max_stream_pos(self) -> int:
+        """Get the maximum stream position referenced in this token.
+
+        The corresponding "min" position is, by definition just `self.stream`.
+
+        This is used to handle tokens that have non-empty `instance_map`, and so
+        reference stream positions after the `self.stream` position.
+        """
+        return max(self.instance_map.values(), default=self.stream)
+
+    async def to_string(self, store: "DataStore") -> str:
+        if self.topological is not None:
+            return "t%d-%d" % (self.topological, self.stream)
+        elif self.instance_map:
+            entries = []
+            for name, pos in self.instance_map.items():
+                instance_id = await store.get_id_for_instance(name)
+                entries.append(f"{instance_id}.{pos}")
+
+            encoded_map = "~".join(entries)
+            return f"m{self.stream}~{encoded_map}"
+        else:
+            return "s%d" % (self.stream,)
+
+
+class StreamKeyType:
+    """Known stream types.
+
+    A stream is a list of entities ordered by an incrementing "stream token".
+    """
+
+    ROOM: Final = "room_key"
+    PRESENCE: Final = "presence_key"
+    TYPING: Final = "typing_key"
+    RECEIPT: Final = "receipt_key"
+    ACCOUNT_DATA: Final = "account_data_key"
+    PUSH_RULES: Final = "push_rules_key"
+    TO_DEVICE: Final = "to_device_key"
+    DEVICE_LIST: Final = "device_list_key"
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class StreamToken:
+    """A collection of keys joined together by underscores in the following
+    order and which represent the position in their respective streams.
+
+    ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1`
+        1. `room_key`: `s2633508` which is a `RoomStreamToken`
+           - `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59`
+           - See the docstring for `RoomStreamToken` for more details.
+        2. `presence_key`: `17`
+        3. `typing_key`: `338`
+        4. `receipt_key`: `6732159`
+        5. `account_data_key`: `1082514`
+        6. `push_rules_key`: `541479`
+        7. `to_device_key`: `274711`
+        8. `device_list_key`: `265584`
+        9. `groups_key`: `1` (note that this key is now unused)
+
+    You can see how many of these keys correspond to the various
+    fields in a "/sync" response:
+    ```json
+    {
+        "next_batch": "s12_4_0_1_1_1_1_4_1",
+        "presence": {
+            "events": []
+        },
+        "device_lists": {
+            "changed": []
+        },
+        "rooms": {
+            "join": {
+                "!QrZlfIDQLNLdZHqTnt:hs1": {
+                    "timeline": {
+                        "events": [],
+                        "prev_batch": "s10_4_0_1_1_1_1_4_1",
+                        "limited": false
+                    },
+                    "state": {
+                        "events": []
+                    },
+                    "account_data": {
+                        "events": []
+                    },
+                    "ephemeral": {
+                        "events": []
+                    }
+                }
+            }
+        }
+    }
+    ```
+
+    ---
+
+    For caching purposes, `StreamToken`s and by extension, all their attributes,
+    must be hashable.
+    """
+
+    room_key: RoomStreamToken = attr.ib(
+        validator=attr.validators.instance_of(RoomStreamToken)
+    )
+    presence_key: int
+    typing_key: int
+    receipt_key: int
+    account_data_key: int
+    push_rules_key: int
+    to_device_key: int
+    device_list_key: int
+    # Note that the groups key is no longer used and may have bogus values.
+    groups_key: int
+
+    _SEPARATOR = "_"
+    START: ClassVar["StreamToken"]
+
+    @classmethod
+    @cancellable
+    async def from_string(cls, store: "DataStore", string: str) -> "StreamToken":
+        """
+        Creates a RoomStreamToken from its textual representation.
+        """
+        try:
+            keys = string.split(cls._SEPARATOR)
+            while len(keys) < len(attr.fields(cls)):
+                # i.e. old token from before receipt_key
+                keys.append("0")
+            return cls(
+                await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:])
+            )
+        except CancelledError:
+            raise
+        except Exception:
+            raise SynapseError(400, "Invalid stream token")
+
+    async def to_string(self, store: "DataStore") -> str:
+        return self._SEPARATOR.join(
+            [
+                await self.room_key.to_string(store),
+                str(self.presence_key),
+                str(self.typing_key),
+                str(self.receipt_key),
+                str(self.account_data_key),
+                str(self.push_rules_key),
+                str(self.to_device_key),
+                str(self.device_list_key),
+                # Note that the groups key is no longer used, but it is still
+                # serialized so that there will not be confusion in the future
+                # if additional tokens are added.
+                str(self.groups_key),
+            ]
+        )
+
+    @property
+    def room_stream_id(self) -> int:
+        return self.room_key.stream
+
+    def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken":
+        """Advance the given key in the token to a new value if and only if the
+        new value is after the old value.
+
+        :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken.
+        """
+        if key == StreamKeyType.ROOM:
+            new_token = self.copy_and_replace(
+                StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value)
+            )
+            return new_token
+
+        new_token = self.copy_and_replace(key, new_value)
+        new_id = int(getattr(new_token, key))
+        old_id = int(getattr(self, key))
+
+        if old_id < new_id:
+            return new_token
+        else:
+            return self
+
+    def copy_and_replace(self, key: str, new_value: Any) -> "StreamToken":
+        return attr.evolve(self, **{key: new_value})
+
+
+StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0)
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class PersistedEventPosition:
+    """Position of a newly persisted event with instance that persisted it.
+
+    This can be used to test whether the event is persisted before or after a
+    RoomStreamToken.
+    """
+
+    instance_name: str
+    stream: int
+
+    def persisted_after(self, token: RoomStreamToken) -> bool:
+        return token.get_stream_pos_for_instance(self.instance_name) < self.stream
+
+    def to_room_stream_token(self) -> RoomStreamToken:
+        """Converts the position to a room stream token such that events
+        persisted in the same room after this position will be after the
+        returned `RoomStreamToken`.
+
+        Note: no guarantees are made about ordering w.r.t. events in other
+        rooms.
+        """
+        # Doing the naive thing satisfies the desired properties described in
+        # the docstring.
+        return RoomStreamToken(None, self.stream)
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class ThirdPartyInstanceID:
+    appservice_id: Optional[str]
+    network_id: Optional[str]
+
+    # Deny iteration because it will bite you if you try to create a singleton
+    # set by:
+    #    users = set(user)
+    def __iter__(self) -> NoReturn:
+        raise ValueError("Attempted to iterate a %s" % (type(self).__name__,))
+
+    # Because this class is a frozen class, it is deeply immutable.
+    def __copy__(self) -> "ThirdPartyInstanceID":
+        return self
+
+    def __deepcopy__(self, memo: Dict[str, object]) -> "ThirdPartyInstanceID":
+        return self
+
+    @classmethod
+    def from_string(cls, s: str) -> "ThirdPartyInstanceID":
+        bits = s.split("|", 2)
+        if len(bits) != 2:
+            raise SynapseError(400, "Invalid ID %r" % (s,))
+
+        return cls(appservice_id=bits[0], network_id=bits[1])
+
+    def to_string(self) -> str:
+        return "%s|%s" % (self.appservice_id, self.network_id)
+
+    __str__ = to_string
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class ReadReceipt:
+    """Information about a read-receipt"""
+
+    room_id: str
+    receipt_type: str
+    user_id: str
+    event_ids: List[str]
+    thread_id: Optional[str]
+    data: JsonDict
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class DeviceListUpdates:
+    """
+    An object containing a diff of information regarding other users' device lists, intended for
+    a recipient to carry out device list tracking.
+
+    Attributes:
+        changed: A set of users whose device lists have changed recently.
+        left: A set of users who the recipient no longer needs to track the device lists of.
+            Typically when those users no longer share any end-to-end encryption enabled rooms.
+    """
+
+    # We need to use a factory here, otherwise `set` is not evaluated at
+    # object instantiation, but instead at class definition instantiation.
+    # The latter happening only once, thus always giving you the same sets
+    # across multiple DeviceListUpdates instances.
+    # Also see: don't define mutable default arguments.
+    changed: Set[str] = attr.ib(factory=set)
+    left: Set[str] = attr.ib(factory=set)
+
+    def __bool__(self) -> bool:
+        return bool(self.changed or self.left)
+
+
+def get_verify_key_from_cross_signing_key(
+    key_info: Mapping[str, Any]
+) -> Tuple[str, VerifyKey]:
+    """Get the key ID and signedjson verify key from a cross-signing key dict
+
+    Args:
+        key_info: a cross-signing key dict, which must have a "keys"
+            property that has exactly one item in it
+
+    Returns:
+        the key ID and verify key for the cross-signing key
+    """
+    # make sure that a `keys` field is provided
+    if "keys" not in key_info:
+        raise ValueError("Invalid key")
+    keys = key_info["keys"]
+    # and that it contains exactly one key
+    if len(keys) == 1:
+        key_id, key_data = next(iter(keys.items()))
+        return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data))
+    else:
+        raise ValueError("Invalid key")
+
+
+@attr.s(auto_attribs=True, frozen=True, slots=True)
+class UserInfo:
+    """Holds information about a user. Result of get_userinfo_by_id.
+
+    Attributes:
+        user_id:  ID of the user.
+        appservice_id:  Application service ID that created this user.
+        consent_server_notice_sent:  Version of policy documents the user has been sent.
+        consent_version:  Version of policy documents the user has consented to.
+        creation_ts:  Creation timestamp of the user.
+        is_admin:  True if the user is an admin.
+        is_deactivated:  True if the user has been deactivated.
+        is_guest:  True if the user is a guest user.
+        is_shadow_banned:  True if the user has been shadow-banned.
+        user_type:  User type (None for normal user, 'support' and 'bot' other options).
+    """
+
+    user_id: UserID
+    appservice_id: Optional[int]
+    consent_server_notice_sent: Optional[str]
+    consent_version: Optional[str]
+    user_type: Optional[str]
+    creation_ts: int
+    is_admin: bool
+    is_deactivated: bool
+    is_guest: bool
+    is_shadow_banned: bool
+
+
+class UserProfile(TypedDict):
+    user_id: str
+    display_name: Optional[str]
+    avatar_url: Optional[str]
+
+
+@attr.s(auto_attribs=True, frozen=True, slots=True)
+class RetentionPolicy:
+    min_lifetime: Optional[int] = None
+    max_lifetime: Optional[int] = None
diff --git a/synapse/types/state.py b/synapse/types/state.py
new file mode 100644
index 0000000000..0004d955b4
--- /dev/null
+++ b/synapse/types/state.py
@@ -0,0 +1,567 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Collection,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Set,
+    Tuple,
+    TypeVar,
+)
+
+import attr
+from frozendict import frozendict
+
+from synapse.api.constants import EventTypes
+from synapse.types import MutableStateMap, StateKey, StateMap
+
+if TYPE_CHECKING:
+    from typing import FrozenSet  # noqa: used within quoted type hint; flake8 sad
+
+
+logger = logging.getLogger(__name__)
+
+# Used for generic functions below
+T = TypeVar("T")
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class StateFilter:
+    """A filter used when querying for state.
+
+    Attributes:
+        types: Map from type to set of state keys (or None). This specifies
+            which state_keys for the given type to fetch from the DB. If None
+            then all events with that type are fetched. If the set is empty
+            then no events with that type are fetched.
+        include_others: Whether to fetch events with types that do not
+            appear in `types`.
+    """
+
+    types: "frozendict[str, Optional[FrozenSet[str]]]"
+    include_others: bool = False
+
+    def __attrs_post_init__(self) -> None:
+        # If `include_others` is set we canonicalise the filter by removing
+        # wildcards from the types dictionary
+        if self.include_others:
+            # this is needed to work around the fact that StateFilter is frozen
+            object.__setattr__(
+                self,
+                "types",
+                frozendict({k: v for k, v in self.types.items() if v is not None}),
+            )
+
+    @staticmethod
+    def all() -> "StateFilter":
+        """Returns a filter that fetches everything.
+
+        Returns:
+            The state filter.
+        """
+        return _ALL_STATE_FILTER
+
+    @staticmethod
+    def none() -> "StateFilter":
+        """Returns a filter that fetches nothing.
+
+        Returns:
+            The new state filter.
+        """
+        return _NONE_STATE_FILTER
+
+    @staticmethod
+    def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter":
+        """Creates a filter that only fetches the given types
+
+        Args:
+            types: A list of type and state keys to fetch. A state_key of None
+                fetches everything for that type
+
+        Returns:
+            The new state filter.
+        """
+        type_dict: Dict[str, Optional[Set[str]]] = {}
+        for typ, s in types:
+            if typ in type_dict:
+                if type_dict[typ] is None:
+                    continue
+
+            if s is None:
+                type_dict[typ] = None
+                continue
+
+            type_dict.setdefault(typ, set()).add(s)  # type: ignore
+
+        return StateFilter(
+            types=frozendict(
+                (k, frozenset(v) if v is not None else None)
+                for k, v in type_dict.items()
+            )
+        )
+
+    @staticmethod
+    def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter":
+        """Creates a filter that returns all non-member events, plus the member
+        events for the given users
+
+        Args:
+            members: Set of user IDs
+
+        Returns:
+            The new state filter
+        """
+        return StateFilter(
+            types=frozendict({EventTypes.Member: frozenset(members)}),
+            include_others=True,
+        )
+
+    @staticmethod
+    def freeze(
+        types: Mapping[str, Optional[Collection[str]]], include_others: bool
+    ) -> "StateFilter":
+        """
+        Returns a (frozen) StateFilter with the same contents as the parameters
+        specified here, which can be made of mutable types.
+        """
+        types_with_frozen_values: Dict[str, Optional[FrozenSet[str]]] = {}
+        for state_types, state_keys in types.items():
+            if state_keys is not None:
+                types_with_frozen_values[state_types] = frozenset(state_keys)
+            else:
+                types_with_frozen_values[state_types] = None
+
+        return StateFilter(
+            frozendict(types_with_frozen_values), include_others=include_others
+        )
+
+    def return_expanded(self) -> "StateFilter":
+        """Creates a new StateFilter where type wild cards have been removed
+        (except for memberships). The returned filter is a superset of the
+        current one, i.e. anything that passes the current filter will pass
+        the returned filter.
+
+        This helps the caching as the DictionaryCache knows if it has *all* the
+        state, but does not know if it has all of the keys of a particular type,
+        which makes wildcard lookups expensive unless we have a complete cache.
+        Hence, if we are doing a wildcard lookup, populate the cache fully so
+        that we can do an efficient lookup next time.
+
+        Note that since we have two caches, one for membership events and one for
+        other events, we can be a bit more clever than simply returning
+        `StateFilter.all()` if `has_wildcards()` is True.
+
+        We return a StateFilter where:
+            1. the list of membership events to return is the same
+            2. if there is a wildcard that matches non-member events we
+               return all non-member events
+
+        Returns:
+            The new state filter.
+        """
+
+        if self.is_full():
+            # If we're going to return everything then there's nothing to do
+            return self
+
+        if not self.has_wildcards():
+            # If there are no wild cards, there's nothing to do
+            return self
+
+        if EventTypes.Member in self.types:
+            get_all_members = self.types[EventTypes.Member] is None
+        else:
+            get_all_members = self.include_others
+
+        has_non_member_wildcard = self.include_others or any(
+            state_keys is None
+            for t, state_keys in self.types.items()
+            if t != EventTypes.Member
+        )
+
+        if not has_non_member_wildcard:
+            # If there are no non-member wild cards we can just return ourselves
+            return self
+
+        if get_all_members:
+            # We want to return everything.
+            return StateFilter.all()
+        elif EventTypes.Member in self.types:
+            # We want to return all non-members, but only particular
+            # memberships
+            return StateFilter(
+                types=frozendict({EventTypes.Member: self.types[EventTypes.Member]}),
+                include_others=True,
+            )
+        else:
+            # We want to return all non-members
+            return _ALL_NON_MEMBER_STATE_FILTER
+
+    def make_sql_filter_clause(self) -> Tuple[str, List[str]]:
+        """Converts the filter to an SQL clause.
+
+        For example:
+
+            f = StateFilter.from_types([("m.room.create", "")])
+            clause, args = f.make_sql_filter_clause()
+            clause == "(type = ? AND state_key = ?)"
+            args == ['m.room.create', '']
+
+
+        Returns:
+            The SQL string (may be empty) and arguments. An empty SQL string is
+            returned when the filter matches everything (i.e. is "full").
+        """
+
+        where_clause = ""
+        where_args: List[str] = []
+
+        if self.is_full():
+            return where_clause, where_args
+
+        if not self.include_others and not self.types:
+            # i.e. this is an empty filter, so we need to return a clause that
+            # will match nothing
+            return "1 = 2", []
+
+        # First we build up a lost of clauses for each type/state_key combo
+        clauses = []
+        for etype, state_keys in self.types.items():
+            if state_keys is None:
+                clauses.append("(type = ?)")
+                where_args.append(etype)
+                continue
+
+            for state_key in state_keys:
+                clauses.append("(type = ? AND state_key = ?)")
+                where_args.extend((etype, state_key))
+
+        # This will match anything that appears in `self.types`
+        where_clause = " OR ".join(clauses)
+
+        # If we want to include stuff that's not in the types dict then we add
+        # a `OR type NOT IN (...)` clause to the end.
+        if self.include_others:
+            if where_clause:
+                where_clause += " OR "
+
+            where_clause += "type NOT IN (%s)" % (",".join(["?"] * len(self.types)),)
+            where_args.extend(self.types)
+
+        return where_clause, where_args
+
+    def max_entries_returned(self) -> Optional[int]:
+        """Returns the maximum number of entries this filter will return if
+        known, otherwise returns None.
+
+        For example a simple state filter asking for `("m.room.create", "")`
+        will return 1, whereas the default state filter will return None.
+
+        This is used to bail out early if the right number of entries have been
+        fetched.
+        """
+        if self.has_wildcards():
+            return None
+
+        return len(self.concrete_types())
+
+    def filter_state(self, state_dict: StateMap[T]) -> MutableStateMap[T]:
+        """Returns the state filtered with by this StateFilter.
+
+        Args:
+            state: The state map to filter
+
+        Returns:
+            The filtered state map.
+            This is a copy, so it's safe to mutate.
+        """
+        if self.is_full():
+            return dict(state_dict)
+
+        filtered_state = {}
+        for k, v in state_dict.items():
+            typ, state_key = k
+            if typ in self.types:
+                state_keys = self.types[typ]
+                if state_keys is None or state_key in state_keys:
+                    filtered_state[k] = v
+            elif self.include_others:
+                filtered_state[k] = v
+
+        return filtered_state
+
+    def is_full(self) -> bool:
+        """Whether this filter fetches everything or not
+
+        Returns:
+            True if the filter fetches everything.
+        """
+        return self.include_others and not self.types
+
+    def has_wildcards(self) -> bool:
+        """Whether the filter includes wildcards or is attempting to fetch
+        specific state.
+
+        Returns:
+            True if the filter includes wildcards.
+        """
+
+        return self.include_others or any(
+            state_keys is None for state_keys in self.types.values()
+        )
+
+    def concrete_types(self) -> List[Tuple[str, str]]:
+        """Returns a list of concrete type/state_keys (i.e. not None) that
+        will be fetched. This will be a complete list if `has_wildcards`
+        returns False, but otherwise will be a subset (or even empty).
+
+        Returns:
+            A list of type/state_keys tuples.
+        """
+        return [
+            (t, s)
+            for t, state_keys in self.types.items()
+            if state_keys is not None
+            for s in state_keys
+        ]
+
+    def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]:
+        """Return the filter split into two: one which assumes it's exclusively
+        matching against member state, and one which assumes it's matching
+        against non member state.
+
+        This is useful due to the returned filters giving correct results for
+        `is_full()`, `has_wildcards()`, etc, when operating against maps that
+        either exclusively contain member events or only contain non-member
+        events. (Which is the case when dealing with the member vs non-member
+        state caches).
+
+        Returns:
+            The member and non member filters
+        """
+
+        if EventTypes.Member in self.types:
+            state_keys = self.types[EventTypes.Member]
+            if state_keys is None:
+                member_filter = StateFilter.all()
+            else:
+                member_filter = StateFilter(frozendict({EventTypes.Member: state_keys}))
+        elif self.include_others:
+            member_filter = StateFilter.all()
+        else:
+            member_filter = StateFilter.none()
+
+        non_member_filter = StateFilter(
+            types=frozendict(
+                {k: v for k, v in self.types.items() if k != EventTypes.Member}
+            ),
+            include_others=self.include_others,
+        )
+
+        return member_filter, non_member_filter
+
+    def _decompose_into_four_parts(
+        self,
+    ) -> Tuple[Tuple[bool, Set[str]], Tuple[Set[str], Set[StateKey]]]:
+        """
+        Decomposes this state filter into 4 constituent parts, which can be
+        thought of as this:
+            all? - minus_wildcards + plus_wildcards + plus_state_keys
+
+        where
+        * all represents ALL state
+        * minus_wildcards represents entire state types to remove
+        * plus_wildcards represents entire state types to add
+        * plus_state_keys represents individual state keys to add
+
+        See `recompose_from_four_parts` for the other direction of this
+        correspondence.
+        """
+        is_all = self.include_others
+        excluded_types: Set[str] = {t for t in self.types if is_all}
+        wildcard_types: Set[str] = {t for t, s in self.types.items() if s is None}
+        concrete_keys: Set[StateKey] = set(self.concrete_types())
+
+        return (is_all, excluded_types), (wildcard_types, concrete_keys)
+
+    @staticmethod
+    def _recompose_from_four_parts(
+        all_part: bool,
+        minus_wildcards: Set[str],
+        plus_wildcards: Set[str],
+        plus_state_keys: Set[StateKey],
+    ) -> "StateFilter":
+        """
+        Recomposes a state filter from 4 parts.
+
+        See `decompose_into_four_parts` (the other direction of this
+        correspondence) for descriptions on each of the parts.
+        """
+
+        # {state type -> set of state keys OR None for wildcard}
+        # (The same structure as that of a StateFilter.)
+        new_types: Dict[str, Optional[Set[str]]] = {}
+
+        # if we start with all, insert the excluded statetypes as empty sets
+        # to prevent them from being included
+        if all_part:
+            new_types.update({state_type: set() for state_type in minus_wildcards})
+
+        # insert the plus wildcards
+        new_types.update({state_type: None for state_type in plus_wildcards})
+
+        # insert the specific state keys
+        for state_type, state_key in plus_state_keys:
+            if state_type in new_types:
+                entry = new_types[state_type]
+                if entry is not None:
+                    entry.add(state_key)
+            elif not all_part:
+                # don't insert if the entire type is already included by
+                # include_others as this would actually shrink the state allowed
+                # by this filter.
+                new_types[state_type] = {state_key}
+
+        return StateFilter.freeze(new_types, include_others=all_part)
+
+    def approx_difference(self, other: "StateFilter") -> "StateFilter":
+        """
+        Returns a state filter which represents `self - other`.
+
+        This is useful for determining what state remains to be pulled out of the
+        database if we want the state included by `self` but already have the state
+        included by `other`.
+
+        The returned state filter
+        - MUST include all state events that are included by this filter (`self`)
+          unless they are included by `other`;
+        - MUST NOT include state events not included by this filter (`self`); and
+        - MAY be an over-approximation: the returned state filter
+          MAY additionally include some state events from `other`.
+
+        This implementation attempts to return the narrowest such state filter.
+        In the case that `self` contains wildcards for state types where
+        `other` contains specific state keys, an approximation must be made:
+        the returned state filter keeps the wildcard, as state filters are not
+        able to express 'all state keys except some given examples'.
+        e.g.
+            StateFilter(m.room.member -> None (wildcard))
+                minus
+            StateFilter(m.room.member -> {'@wombat:example.org'})
+                is approximated as
+            StateFilter(m.room.member -> None (wildcard))
+        """
+
+        # We first transform self and other into an alternative representation:
+        #   - whether or not they include all events to begin with ('all')
+        #   - if so, which event types are excluded? ('excludes')
+        #   - which entire event types to include ('wildcards')
+        #   - which concrete state keys to include ('concrete state keys')
+        (self_all, self_excludes), (
+            self_wildcards,
+            self_concrete_keys,
+        ) = self._decompose_into_four_parts()
+        (other_all, other_excludes), (
+            other_wildcards,
+            other_concrete_keys,
+        ) = other._decompose_into_four_parts()
+
+        # Start with an estimate of the difference based on self
+        new_all = self_all
+        # Wildcards from the other can be added to the exclusion filter
+        new_excludes = self_excludes | other_wildcards
+        # We remove wildcards that appeared as wildcards in the other
+        new_wildcards = self_wildcards - other_wildcards
+        # We filter out the concrete state keys that appear in the other
+        # as wildcards or concrete state keys.
+        new_concrete_keys = {
+            (state_type, state_key)
+            for (state_type, state_key) in self_concrete_keys
+            if state_type not in other_wildcards
+        } - other_concrete_keys
+
+        if other_all:
+            if self_all:
+                # If self starts with all, then we add as wildcards any
+                # types which appear in the other's exclusion filter (but
+                # aren't in the self exclusion filter). This is as the other
+                # filter will return everything BUT the types in its exclusion, so
+                # we need to add those excluded types that also match the self
+                # filter as wildcard types in the new filter.
+                new_wildcards |= other_excludes.difference(self_excludes)
+
+            # If other is an `include_others` then the difference isn't.
+            new_all = False
+            # (We have no need for excludes when we don't start with all, as there
+            #  is nothing to exclude.)
+            new_excludes = set()
+
+            # We also filter out all state types that aren't in the exclusion
+            # list of the other.
+            new_wildcards &= other_excludes
+            new_concrete_keys = {
+                (state_type, state_key)
+                for (state_type, state_key) in new_concrete_keys
+                if state_type in other_excludes
+            }
+
+        # Transform our newly-constructed state filter from the alternative
+        # representation back into the normal StateFilter representation.
+        return StateFilter._recompose_from_four_parts(
+            new_all, new_excludes, new_wildcards, new_concrete_keys
+        )
+
+    def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool:
+        """Check if we need to wait for full state to complete to calculate this state
+
+        If we have a state filter which is completely satisfied even with partial
+        state, then we don't need to await_full_state before we can return it.
+
+        Args:
+            is_mine_id: a callable which confirms if a given state_key matches a mxid
+               of a local user
+        """
+        # if we haven't requested membership events, then it depends on the value of
+        # 'include_others'
+        if EventTypes.Member not in self.types:
+            return self.include_others
+
+        # if we're looking for *all* membership events, then we have to wait
+        member_state_keys = self.types[EventTypes.Member]
+        if member_state_keys is None:
+            return True
+
+        # otherwise, consider whose membership we are looking for. If it's entirely
+        # local users, then we don't need to wait.
+        for state_key in member_state_keys:
+            if not is_mine_id(state_key):
+                # remote user
+                return True
+
+        # local users only
+        return False
+
+
+_ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True)
+_ALL_NON_MEMBER_STATE_FILTER = StateFilter(
+    types=frozendict({EventTypes.Member: frozenset()}), include_others=True
+)
+_NONE_STATE_FILTER = StateFilter(types=frozendict(), include_others=False)
diff --git a/synapse/visibility.py b/synapse/visibility.py
index b443857571..e442de3173 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -26,8 +26,8 @@ from synapse.events.utils import prune_event
 from synapse.logging.opentracing import trace
 from synapse.storage.controllers import StorageControllers
 from synapse.storage.databases.main import DataStore
-from synapse.storage.state import StateFilter
 from synapse.types import RetentionPolicy, StateMap, get_domain_from_id
+from synapse.types.state import StateFilter
 from synapse.util import Clock
 
 logger = logging.getLogger(__name__)
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index d4e6d4236c..a433e70870 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -22,8 +22,8 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase
 from synapse.server import HomeServer
-from synapse.storage.state import StateFilter
 from synapse.types import JsonDict, RoomID, StateMap, UserID
+from synapse.types.state import StateFilter
 from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase, TestCase
-- 
cgit 1.5.1


From 7982891794e26cabe18448f4e0ec0d301f13d186 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 12 Dec 2022 18:13:43 +0000
Subject: Fix missing cache invalidation in application service code (#14670)

#11915 introduced the `@cached` `is_interested_in_room` method in
Synapse 1.55.0, which depends upon `get_aliases_for_room`. Add a missing
cache invalidation callback so that the `is_interested_in_room` cache is
invalidated when `get_aliases_for_room` is invalidated.

#13787 made `get_rooms_for_user` `@cached`. Add a missing cache
invalidation callback so that the `is_interested_in_presence` cache is
invalidated when `get_rooms_for_user` is invalidated.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14670.bugfix       | 1 +
 synapse/appservice/__init__.py | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14670.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14670.bugfix b/changelog.d/14670.bugfix
new file mode 100644
index 0000000000..98398d76cc
--- /dev/null
+++ b/changelog.d/14670.bugfix
@@ -0,0 +1 @@
+Fix bugs introduced in 1.55.0 and 1.69.0 where application services would not be notified of events in the correct rooms, due to stale caches.
diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py
index bf4e6c629b..65615f50b8 100644
--- a/synapse/appservice/__init__.py
+++ b/synapse/appservice/__init__.py
@@ -245,7 +245,9 @@ class ApplicationService:
             return True
 
         # likewise with the room's aliases (if it has any)
-        alias_list = await store.get_aliases_for_room(room_id)
+        alias_list = await store.get_aliases_for_room(
+            room_id, on_invalidate=cache_context.invalidate
+        )
         for alias in alias_list:
             if self.is_room_alias_in_namespace(alias):
                 return True
@@ -311,7 +313,9 @@ class ApplicationService:
         # Find all the rooms the sender is in
         if self.is_interested_in_user(user_id.to_string()):
             return True
-        room_ids = await store.get_rooms_for_user(user_id.to_string())
+        room_ids = await store.get_rooms_for_user(
+            user_id.to_string(), on_invalidate=cache_context.invalidate
+        )
 
         # Then find out if the appservice is interested in any of those rooms
         for room_id in room_ids:
-- 
cgit 1.5.1


From 3d87847ecc943c689c4587c5327d744e4a8f92c2 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 12 Dec 2022 21:25:07 +0000
Subject: Enable `--warn-redundant-casts` option in mypy (#14671)

* Enable `--warn-redundant-casts` option in mypy

Doesn't do much but helps me sleep better at night.

* Changelog

* Fix name of the ignore

* Fix one more missed cast

Not sure why I didn't see this one locally, maybe I needed a poetry update

* Remove old comment

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/14671.misc              | 1 +
 mypy.ini                            | 1 +
 scripts-dev/release.py              | 6 ++----
 synapse/storage/database.py         | 3 ++-
 synapse/storage/engines/postgres.py | 2 +-
 5 files changed, 7 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14671.misc

(limited to 'synapse')

diff --git a/changelog.d/14671.misc b/changelog.d/14671.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/14671.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index a4a1e4511a..727536df50 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -12,6 +12,7 @@ local_partial_types = True
 no_implicit_optional = True
 disallow_untyped_defs = True
 strict_equality = True
+warn_redundant_casts = True
 
 files =
   docker/,
diff --git a/scripts-dev/release.py b/scripts-dev/release.py
index bf47b6c713..6974fd7895 100755
--- a/scripts-dev/release.py
+++ b/scripts-dev/release.py
@@ -27,7 +27,7 @@ import time
 import urllib.request
 from os import path
 from tempfile import TemporaryDirectory
-from typing import Any, List, Optional, cast
+from typing import Any, List, Optional
 
 import attr
 import click
@@ -174,9 +174,7 @@ def _prepare() -> None:
         click.get_current_context().abort()
 
     # Switch to the release branch.
-    # Cast safety: parse() won't return a version.LegacyVersion from our
-    # version string format.
-    parsed_new_version = cast(version.Version, version.parse(new_version))
+    parsed_new_version = version.parse(new_version)
 
     # We assume for debian changelogs that we only do RCs or full releases.
     assert not parsed_new_version.is_devrelease
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 55bcb90001..0b29e67b94 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -667,7 +667,8 @@ class DatabasePool:
                 )
         # also check variables referenced in func's closure
         if inspect.isfunction(func):
-            f = cast(types.FunctionType, func)
+            # Keep the cast for now---it helps PyCharm to understand what `func` is.
+            f = cast(types.FunctionType, func)  # type: ignore[redundant-cast]
             if f.__closure__:
                 for i, cell in enumerate(f.__closure__):
                     if inspect.isgenerator(cell.cell_contents):
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 719a517336..f9f562ea45 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -77,7 +77,7 @@ class PostgresEngine(
         # docs: The number is formed by converting the major, minor, and
         # revision numbers into two-decimal-digit numbers and appending them
         # together. For example, version 8.1.5 will be returned as 80105
-        self._version = cast(int, db_conn.server_version)
+        self._version = db_conn.server_version
         allow_unsafe_locale = self.config.get("allow_unsafe_locale", False)
 
         # Are we on a supported PostgreSQL version?
-- 
cgit 1.5.1


From e2a1adbf5d11288f2134ced1f84c6ffdd91a9357 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 13 Dec 2022 00:54:46 +0000
Subject: Allow selecting "prejoin" events by state keys (#14642)

* Declare new config

* Parse new config

* Read new config

* Don't use trial/our TestCase where it's not needed

Before:

```
$ time trial tests/events/test_utils.py > /dev/null

real	0m2.277s
user	0m2.186s
sys	0m0.083s
```

After:
```
$ time trial tests/events/test_utils.py > /dev/null

real	0m0.566s
user	0m0.508s
sys	0m0.056s
```

* Helper to upsert to event fields

without exceeding size limits.

* Use helper when adding invite/knock state

Now that we allow admins to include events in prejoin room state with
arbitrary state keys, be a good Matrix citizen and ensure they don't
accidentally create an oversized event.

* Changelog

* Move StateFilter tests

should have done this in #14668

* Add extra methods to StateFilter

* Use StateFilter

* Ensure test file enforces typed defs; alphabetise

* Workaround surprising get_current_state_ids

* Whoops, fix mypy
---
 changelog.d/14642.feature                        |   1 +
 docs/usage/configuration/config_documentation.md |  57 ++-
 mypy.ini                                         |  12 +-
 synapse/config/_util.py                          |   3 +
 synapse/config/api.py                            |  63 ++-
 synapse/events/utils.py                          |  32 +-
 synapse/handlers/message.py                      |  29 +-
 synapse/storage/databases/main/events_worker.py  |  33 +-
 synapse/types/state.py                           |  18 +
 tests/config/test_api.py                         | 145 ++++++
 tests/events/test_utils.py                       |  35 +-
 tests/storage/test_state.py                      | 623 +---------------------
 tests/types/__init__.py                          |   0
 tests/types/test_state.py                        | 627 +++++++++++++++++++++++
 14 files changed, 983 insertions(+), 695 deletions(-)
 create mode 100644 changelog.d/14642.feature
 create mode 100644 tests/config/test_api.py
 create mode 100644 tests/types/__init__.py
 create mode 100644 tests/types/test_state.py

(limited to 'synapse')

diff --git a/changelog.d/14642.feature b/changelog.d/14642.feature
new file mode 100644
index 0000000000..cbc9db10c3
--- /dev/null
+++ b/changelog.d/14642.feature
@@ -0,0 +1 @@
+Allow selecting "prejoin" events by state keys in addition to event types.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index dc5e5ac597..4d32902fea 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2501,32 +2501,53 @@ Config settings related to the client/server API
 ---
 ### `room_prejoin_state`
 
-Controls for the state that is shared with users who receive an invite
-to a room. By default, the following state event types are shared with users who
-receive invites to the room:
-- m.room.join_rules
-- m.room.canonical_alias
-- m.room.avatar
-- m.room.encryption
-- m.room.name
-- m.room.create
-- m.room.topic
+This setting controls the state that is shared with users upon receiving an
+invite to a room, or in reply to a knock on a room. By default, the following
+state events are shared with users:
+
+- `m.room.join_rules`
+- `m.room.canonical_alias`
+- `m.room.avatar`
+- `m.room.encryption`
+- `m.room.name`
+- `m.room.create`
+- `m.room.topic`
 
 To change the default behavior, use the following sub-options:
-* `disable_default_event_types`: set to true to disable the above defaults. If this
-   is enabled, only the event types listed in `additional_event_types` are shared.
-   Defaults to false.
-* `additional_event_types`: Additional state event types to share with users when they are invited
-   to a room. By default, this list is empty (so only the default event types are shared).
+* `disable_default_event_types`: boolean. Set to `true` to disable the above 
+  defaults. If this is enabled, only the event types listed in
+  `additional_event_types` are shared. Defaults to `false`.
+* `additional_event_types`: A list of additional state events to include in the 
+  events to be shared. By default, this list is empty (so only the default event 
+  types are shared).
+
+  Each entry in this list should be either a single string or a list of two
+  strings. 
+  * A standalone string `t` represents all events with type `t` (i.e.
+    with no restrictions on state keys).
+  * A pair of strings `[t, s]` represents a single event with type `t` and 
+    state key `s`. The same type can appear in two entries with different state
+    keys: in this situation, both state keys are included in prejoin state.
 
 Example configuration:
 ```yaml
 room_prejoin_state:
-   disable_default_event_types: true
+   disable_default_event_types: false
    additional_event_types:
-     - org.example.custom.event.type
-     - m.room.join_rules
+     # Share all events of type `org.example.custom.event.typeA`
+     - org.example.custom.event.typeA
+     # Share only events of type `org.example.custom.event.typeB` whose
+     # state_key is "foo"
+     - ["org.example.custom.event.typeB", "foo"]
+     # Share only events of type `org.example.custom.event.typeC` whose
+     # state_key is "bar" or "baz"
+     - ["org.example.custom.event.typeC", "bar"]
+     - ["org.example.custom.event.typeC", "baz"]
 ```
+
+*Changed in Synapse 1.74:* admins can filter the events in prejoin state based
+on their state key.
+
 ---
 ### `track_puppeted_user_ips`
 
diff --git a/mypy.ini b/mypy.ini
index 727536df50..37acf589c9 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -89,6 +89,12 @@ disallow_untyped_defs = False
 [mypy-tests.*]
 disallow_untyped_defs = False
 
+[mypy-tests.config.test_api]
+disallow_untyped_defs = True
+
+[mypy-tests.federation.transport.test_client]
+disallow_untyped_defs = True
+
 [mypy-tests.handlers.test_sso]
 disallow_untyped_defs = True
 
@@ -101,7 +107,7 @@ disallow_untyped_defs = True
 [mypy-tests.push.test_bulk_push_rule_evaluator]
 disallow_untyped_defs = True
 
-[mypy-tests.test_server]
+[mypy-tests.rest.*]
 disallow_untyped_defs = True
 
 [mypy-tests.state.test_profile]
@@ -110,10 +116,10 @@ disallow_untyped_defs = True
 [mypy-tests.storage.*]
 disallow_untyped_defs = True
 
-[mypy-tests.rest.*]
+[mypy-tests.test_server]
 disallow_untyped_defs = True
 
-[mypy-tests.federation.transport.test_client]
+[mypy-tests.types.*]
 disallow_untyped_defs = True
 
 [mypy-tests.util.caches.*]
diff --git a/synapse/config/_util.py b/synapse/config/_util.py
index 3edb4b7106..d3a4b484ab 100644
--- a/synapse/config/_util.py
+++ b/synapse/config/_util.py
@@ -33,6 +33,9 @@ def validate_config(
         config: the configuration value to be validated
         config_path: the path within the config file. This will be used as a basis
            for the error message.
+
+    Raises:
+        ConfigError, if validation fails.
     """
     try:
         jsonschema.validate(config, json_schema)
diff --git a/synapse/config/api.py b/synapse/config/api.py
index e46728e73f..27d50d118f 100644
--- a/synapse/config/api.py
+++ b/synapse/config/api.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import logging
-from typing import Any, Iterable
+from typing import Any, Iterable, Optional, Tuple
 
 from synapse.api.constants import EventTypes
 from synapse.config._base import Config, ConfigError
 from synapse.config._util import validate_config
 from synapse.types import JsonDict
+from synapse.types.state import StateFilter
 
 logger = logging.getLogger(__name__)
 
@@ -26,16 +27,20 @@ logger = logging.getLogger(__name__)
 class ApiConfig(Config):
     section = "api"
 
+    room_prejoin_state: StateFilter
+    track_puppetted_users_ips: bool
+
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         validate_config(_MAIN_SCHEMA, config, ())
-        self.room_prejoin_state = list(self._get_prejoin_state_types(config))
+        self.room_prejoin_state = StateFilter.from_types(
+            self._get_prejoin_state_entries(config)
+        )
         self.track_puppeted_user_ips = config.get("track_puppeted_user_ips", False)
 
-    def _get_prejoin_state_types(self, config: JsonDict) -> Iterable[str]:
-        """Get the event types to include in the prejoin state
-
-        Parses the config and returns an iterable of the event types to be included.
-        """
+    def _get_prejoin_state_entries(
+        self, config: JsonDict
+    ) -> Iterable[Tuple[str, Optional[str]]]:
+        """Get the event types and state keys to include in the prejoin state."""
         room_prejoin_state_config = config.get("room_prejoin_state") or {}
 
         # backwards-compatibility support for room_invite_state_types
@@ -50,33 +55,39 @@ class ApiConfig(Config):
 
             logger.warning(_ROOM_INVITE_STATE_TYPES_WARNING)
 
-            yield from config["room_invite_state_types"]
+            for event_type in config["room_invite_state_types"]:
+                yield event_type, None
             return
 
         if not room_prejoin_state_config.get("disable_default_event_types"):
-            yield from _DEFAULT_PREJOIN_STATE_TYPES
+            yield from _DEFAULT_PREJOIN_STATE_TYPES_AND_STATE_KEYS
 
-        yield from room_prejoin_state_config.get("additional_event_types", [])
+        for entry in room_prejoin_state_config.get("additional_event_types", []):
+            if isinstance(entry, str):
+                yield entry, None
+            else:
+                yield entry
 
 
 _ROOM_INVITE_STATE_TYPES_WARNING = """\
 WARNING: The 'room_invite_state_types' configuration setting is now deprecated,
 and replaced with 'room_prejoin_state'. New features may not work correctly
-unless 'room_invite_state_types' is removed. See the sample configuration file for
-details of 'room_prejoin_state'.
+unless 'room_invite_state_types' is removed. See the config documentation at
+    https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#room_prejoin_state
+for details of 'room_prejoin_state'.
 --------------------------------------------------------------------------------
 """
 
-_DEFAULT_PREJOIN_STATE_TYPES = [
-    EventTypes.JoinRules,
-    EventTypes.CanonicalAlias,
-    EventTypes.RoomAvatar,
-    EventTypes.RoomEncryption,
-    EventTypes.Name,
+_DEFAULT_PREJOIN_STATE_TYPES_AND_STATE_KEYS = [
+    (EventTypes.JoinRules, ""),
+    (EventTypes.CanonicalAlias, ""),
+    (EventTypes.RoomAvatar, ""),
+    (EventTypes.RoomEncryption, ""),
+    (EventTypes.Name, ""),
     # Per MSC1772.
-    EventTypes.Create,
+    (EventTypes.Create, ""),
     # Per MSC3173.
-    EventTypes.Topic,
+    (EventTypes.Topic, ""),
 ]
 
 
@@ -90,7 +101,17 @@ _ROOM_PREJOIN_STATE_CONFIG_SCHEMA = {
                 "disable_default_event_types": {"type": "boolean"},
                 "additional_event_types": {
                     "type": "array",
-                    "items": {"type": "string"},
+                    "items": {
+                        "oneOf": [
+                            {"type": "string"},
+                            {
+                                "type": "array",
+                                "items": {"type": "string"},
+                                "minItems": 2,
+                                "maxItems": 2,
+                            },
+                        ],
+                    },
                 },
             },
         },
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index 71853caad8..13fa93afb8 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -28,8 +28,14 @@ from typing import (
 )
 
 import attr
+from canonicaljson import encode_canonical_json
 
-from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
+from synapse.api.constants import (
+    MAX_PDU_SIZE,
+    EventContentFields,
+    EventTypes,
+    RelationTypes,
+)
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersion
 from synapse.types import JsonDict
@@ -674,3 +680,27 @@ def validate_canonicaljson(value: Any) -> None:
     elif not isinstance(value, (bool, str)) and value is not None:
         # Other potential JSON values (bool, None, str) are safe.
         raise SynapseError(400, "Unknown JSON value", Codes.BAD_JSON)
+
+
+def maybe_upsert_event_field(
+    event: EventBase, container: JsonDict, key: str, value: object
+) -> bool:
+    """Upsert an event field, but only if this doesn't make the event too large.
+
+    Returns true iff the upsert took place.
+    """
+    if key in container:
+        old_value: object = container[key]
+        container[key] = value
+        # NB: here and below, we assume that passing a non-None `time_now` argument to
+        # get_pdu_json doesn't increase the size of the encoded result.
+        upsert_okay = len(encode_canonical_json(event.get_pdu_json())) <= MAX_PDU_SIZE
+        if not upsert_okay:
+            container[key] = old_value
+    else:
+        container[key] = value
+        upsert_okay = len(encode_canonical_json(event.get_pdu_json())) <= MAX_PDU_SIZE
+        if not upsert_okay:
+            del container[key]
+
+    return upsert_okay
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index d6e90ef259..845f683358 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -50,6 +50,7 @@ from synapse.event_auth import validate_event_for_room_version
 from synapse.events import EventBase, relation_from_event
 from synapse.events.builder import EventBuilder
 from synapse.events.snapshot import EventContext
+from synapse.events.utils import maybe_upsert_event_field
 from synapse.events.validator import EventValidator
 from synapse.handlers.directory import DirectoryHandler
 from synapse.logging import opentracing
@@ -1739,12 +1740,15 @@ class EventCreationHandler:
 
             if event.type == EventTypes.Member:
                 if event.content["membership"] == Membership.INVITE:
-                    event.unsigned[
-                        "invite_room_state"
-                    ] = await self.store.get_stripped_room_state_from_event_context(
-                        context,
-                        self.room_prejoin_state_types,
-                        membership_user_id=event.sender,
+                    maybe_upsert_event_field(
+                        event,
+                        event.unsigned,
+                        "invite_room_state",
+                        await self.store.get_stripped_room_state_from_event_context(
+                            context,
+                            self.room_prejoin_state_types,
+                            membership_user_id=event.sender,
+                        ),
                     )
 
                     invitee = UserID.from_string(event.state_key)
@@ -1762,11 +1766,14 @@ class EventCreationHandler:
                         event.signatures.update(returned_invite.signatures)
 
                 if event.content["membership"] == Membership.KNOCK:
-                    event.unsigned[
-                        "knock_room_state"
-                    ] = await self.store.get_stripped_room_state_from_event_context(
-                        context,
-                        self.room_prejoin_state_types,
+                    maybe_upsert_event_field(
+                        event,
+                        event.unsigned,
+                        "knock_room_state",
+                        await self.store.get_stripped_room_state_from_event_context(
+                            context,
+                            self.room_prejoin_state_types,
+                        ),
                     )
 
             if event.type == EventTypes.Redaction:
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 01e935edef..318fd7dc71 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -16,11 +16,11 @@ import logging
 import threading
 import weakref
 from enum import Enum, auto
+from itertools import chain
 from typing import (
     TYPE_CHECKING,
     Any,
     Collection,
-    Container,
     Dict,
     Iterable,
     List,
@@ -76,6 +76,7 @@ from synapse.storage.util.id_generators import (
 )
 from synapse.storage.util.sequence import build_sequence_generator
 from synapse.types import JsonDict, get_domain_from_id
+from synapse.types.state import StateFilter
 from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import ObservableDeferred, delay_cancellation
 from synapse.util.caches.descriptors import cached, cachedList
@@ -879,7 +880,7 @@ class EventsWorkerStore(SQLBaseStore):
     async def get_stripped_room_state_from_event_context(
         self,
         context: EventContext,
-        state_types_to_include: Container[str],
+        state_keys_to_include: StateFilter,
         membership_user_id: Optional[str] = None,
     ) -> List[JsonDict]:
         """
@@ -892,7 +893,7 @@ class EventsWorkerStore(SQLBaseStore):
 
         Args:
             context: The event context to retrieve state of the room from.
-            state_types_to_include: The type of state events to include.
+            state_keys_to_include: The state events to include, for each event type.
             membership_user_id: An optional user ID to include the stripped membership state
                 events of. This is useful when generating the stripped state of a room for
                 invites. We want to send membership events of the inviter, so that the
@@ -901,21 +902,25 @@ class EventsWorkerStore(SQLBaseStore):
         Returns:
             A list of dictionaries, each representing a stripped state event from the room.
         """
-        current_state_ids = await context.get_current_state_ids()
+        if membership_user_id:
+            types = chain(
+                state_keys_to_include.to_types(),
+                [(EventTypes.Member, membership_user_id)],
+            )
+            filter = StateFilter.from_types(types)
+        else:
+            filter = state_keys_to_include
+        selected_state_ids = await context.get_current_state_ids(filter)
 
         # We know this event is not an outlier, so this must be
         # non-None.
-        assert current_state_ids is not None
-
-        # The state to include
-        state_to_include_ids = [
-            e_id
-            for k, e_id in current_state_ids.items()
-            if k[0] in state_types_to_include
-            or (membership_user_id and k == (EventTypes.Member, membership_user_id))
-        ]
+        assert selected_state_ids is not None
+
+        # Confusingly, get_current_state_events may return events that are discarded by
+        # the filter, if they're in context._state_delta_due_to_event. Strip these away.
+        selected_state_ids = filter.filter_state(selected_state_ids)
 
-        state_to_include = await self.get_events(state_to_include_ids)
+        state_to_include = await self.get_events(selected_state_ids.values())
 
         return [
             {
diff --git a/synapse/types/state.py b/synapse/types/state.py
index 0004d955b4..743a4f9217 100644
--- a/synapse/types/state.py
+++ b/synapse/types/state.py
@@ -118,6 +118,15 @@ class StateFilter:
             )
         )
 
+    def to_types(self) -> Iterable[Tuple[str, Optional[str]]]:
+        """The inverse to `from_types`."""
+        for (event_type, state_keys) in self.types.items():
+            if state_keys is None:
+                yield event_type, None
+            else:
+                for state_key in state_keys:
+                    yield event_type, state_key
+
     @staticmethod
     def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter":
         """Creates a filter that returns all non-member events, plus the member
@@ -343,6 +352,15 @@ class StateFilter:
             for s in state_keys
         ]
 
+    def wildcard_types(self) -> List[str]:
+        """Returns a list of event types which require us to fetch all state keys.
+        This will be empty unless `has_wildcards` returns True.
+
+        Returns:
+            A list of event types.
+        """
+        return [t for t, state_keys in self.types.items() if state_keys is None]
+
     def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]:
         """Return the filter split into two: one which assumes it's exclusively
         matching against member state, and one which assumes it's matching
diff --git a/tests/config/test_api.py b/tests/config/test_api.py
new file mode 100644
index 0000000000..6773c9a277
--- /dev/null
+++ b/tests/config/test_api.py
@@ -0,0 +1,145 @@
+from unittest import TestCase as StdlibTestCase
+
+import yaml
+
+from synapse.config import ConfigError
+from synapse.config.api import ApiConfig
+from synapse.types.state import StateFilter
+
+DEFAULT_PREJOIN_STATE_PAIRS = {
+    ("m.room.join_rules", ""),
+    ("m.room.canonical_alias", ""),
+    ("m.room.avatar", ""),
+    ("m.room.encryption", ""),
+    ("m.room.name", ""),
+    ("m.room.create", ""),
+    ("m.room.topic", ""),
+}
+
+
+class TestRoomPrejoinState(StdlibTestCase):
+    def read_config(self, source: str) -> ApiConfig:
+        config = ApiConfig()
+        config.read_config(yaml.safe_load(source))
+        return config
+
+    def test_no_prejoin_state(self) -> None:
+        config = self.read_config("foo: bar")
+        self.assertFalse(config.room_prejoin_state.has_wildcards())
+        self.assertEqual(
+            set(config.room_prejoin_state.concrete_types()), DEFAULT_PREJOIN_STATE_PAIRS
+        )
+
+    def test_disable_default_event_types(self) -> None:
+        config = self.read_config(
+            """
+room_prejoin_state:
+    disable_default_event_types: true
+        """
+        )
+        self.assertEqual(config.room_prejoin_state, StateFilter.none())
+
+    def test_event_without_state_key(self) -> None:
+        config = self.read_config(
+            """
+room_prejoin_state:
+    disable_default_event_types: true
+    additional_event_types:
+        - foo
+        """
+        )
+        self.assertEqual(config.room_prejoin_state.wildcard_types(), ["foo"])
+        self.assertEqual(config.room_prejoin_state.concrete_types(), [])
+
+    def test_event_with_specific_state_key(self) -> None:
+        config = self.read_config(
+            """
+room_prejoin_state:
+    disable_default_event_types: true
+    additional_event_types:
+        - [foo, bar]
+        """
+        )
+        self.assertFalse(config.room_prejoin_state.has_wildcards())
+        self.assertEqual(
+            set(config.room_prejoin_state.concrete_types()),
+            {("foo", "bar")},
+        )
+
+    def test_repeated_event_with_specific_state_key(self) -> None:
+        config = self.read_config(
+            """
+room_prejoin_state:
+    disable_default_event_types: true
+    additional_event_types:
+        - [foo, bar]
+        - [foo, baz]
+        """
+        )
+        self.assertFalse(config.room_prejoin_state.has_wildcards())
+        self.assertEqual(
+            set(config.room_prejoin_state.concrete_types()),
+            {("foo", "bar"), ("foo", "baz")},
+        )
+
+    def test_no_specific_state_key_overrides_specific_state_key(self) -> None:
+        config = self.read_config(
+            """
+room_prejoin_state:
+    disable_default_event_types: true
+    additional_event_types:
+        - [foo, bar]
+        - foo
+        """
+        )
+        self.assertEqual(config.room_prejoin_state.wildcard_types(), ["foo"])
+        self.assertEqual(config.room_prejoin_state.concrete_types(), [])
+
+        config = self.read_config(
+            """
+room_prejoin_state:
+    disable_default_event_types: true
+    additional_event_types:
+        - foo
+        - [foo, bar]
+        """
+        )
+        self.assertEqual(config.room_prejoin_state.wildcard_types(), ["foo"])
+        self.assertEqual(config.room_prejoin_state.concrete_types(), [])
+
+    def test_bad_event_type_entry_raises(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.read_config(
+                """
+room_prejoin_state:
+    additional_event_types:
+        - []
+            """
+            )
+
+        with self.assertRaises(ConfigError):
+            self.read_config(
+                """
+room_prejoin_state:
+    additional_event_types:
+        - [a]
+            """
+            )
+
+        with self.assertRaises(ConfigError):
+            self.read_config(
+                """
+room_prejoin_state:
+    additional_event_types:
+        - [a, b, c]
+            """
+            )
+
+        with self.assertRaises(ConfigError):
+            self.read_config(
+                """
+room_prejoin_state:
+    additional_event_types:
+        - [true, 1.23]
+            """
+            )
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index b1c47efac7..a79256846f 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -12,19 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import unittest as stdlib_unittest
+
 from synapse.api.constants import EventContentFields
 from synapse.api.room_versions import RoomVersions
 from synapse.events import make_event_from_dict
 from synapse.events.utils import (
     SerializeEventConfig,
     copy_and_fixup_power_levels_contents,
+    maybe_upsert_event_field,
     prune_event,
     serialize_event,
 )
 from synapse.util.frozenutils import freeze
 
-from tests import unittest
-
 
 def MockEvent(**kwargs):
     if "event_id" not in kwargs:
@@ -34,7 +35,31 @@ def MockEvent(**kwargs):
     return make_event_from_dict(kwargs)
 
 
-class PruneEventTestCase(unittest.TestCase):
+class TestMaybeUpsertEventField(stdlib_unittest.TestCase):
+    def test_update_okay(self) -> None:
+        event = make_event_from_dict({"event_id": "$1234"})
+        success = maybe_upsert_event_field(event, event.unsigned, "key", "value")
+        self.assertTrue(success)
+        self.assertEqual(event.unsigned["key"], "value")
+
+    def test_update_not_okay(self) -> None:
+        event = make_event_from_dict({"event_id": "$1234"})
+        LARGE_STRING = "a" * 100_000
+        success = maybe_upsert_event_field(event, event.unsigned, "key", LARGE_STRING)
+        self.assertFalse(success)
+        self.assertNotIn("key", event.unsigned)
+
+    def test_update_not_okay_leaves_original_value(self) -> None:
+        event = make_event_from_dict(
+            {"event_id": "$1234", "unsigned": {"key": "value"}}
+        )
+        LARGE_STRING = "a" * 100_000
+        success = maybe_upsert_event_field(event, event.unsigned, "key", LARGE_STRING)
+        self.assertFalse(success)
+        self.assertEqual(event.unsigned["key"], "value")
+
+
+class PruneEventTestCase(stdlib_unittest.TestCase):
     def run_test(self, evdict, matchdict, **kwargs):
         """
         Asserts that a new event constructed with `evdict` will look like
@@ -391,7 +416,7 @@ class PruneEventTestCase(unittest.TestCase):
         )
 
 
-class SerializeEventTestCase(unittest.TestCase):
+class SerializeEventTestCase(stdlib_unittest.TestCase):
     def serialize(self, ev, fields):
         return serialize_event(
             ev, 1479807801915, config=SerializeEventConfig(only_event_fields=fields)
@@ -513,7 +538,7 @@ class SerializeEventTestCase(unittest.TestCase):
             )
 
 
-class CopyPowerLevelsContentTestCase(unittest.TestCase):
+class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase):
     def setUp(self) -> None:
         self.test_content = {
             "ban": 50,
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index a433e70870..bad7f0bc60 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -26,7 +26,7 @@ from synapse.types import JsonDict, RoomID, StateMap, UserID
 from synapse.types.state import StateFilter
 from synapse.util import Clock
 
-from tests.unittest import HomeserverTestCase, TestCase
+from tests.unittest import HomeserverTestCase
 
 logger = logging.getLogger(__name__)
 
@@ -494,624 +494,3 @@ class StateStoreTestCase(HomeserverTestCase):
 
         self.assertEqual(is_all, True)
         self.assertDictEqual({(e5.type, e5.state_key): e5.event_id}, state_dict)
-
-
-class StateFilterDifferenceTestCase(TestCase):
-    def assert_difference(
-        self, minuend: StateFilter, subtrahend: StateFilter, expected: StateFilter
-    ) -> None:
-        self.assertEqual(
-            minuend.approx_difference(subtrahend),
-            expected,
-            f"StateFilter difference not correct:\n\n\t{minuend!r}\nminus\n\t{subtrahend!r}\nwas\n\t{minuend.approx_difference(subtrahend)}\nexpected\n\t{expected}",
-        )
-
-    def test_state_filter_difference_no_include_other_minus_no_include_other(
-        self,
-    ) -> None:
-        """
-        Tests the StateFilter.approx_difference method
-        where, in a.approx_difference(b), both a and b do not have the
-        include_others flag set.
-        """
-        # (wildcard on state keys) - (wildcard on state keys):
-        self.assert_difference(
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.Create: None},
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
-                include_others=False,
-            ),
-            StateFilter.freeze({EventTypes.Create: None}, include_others=False),
-        )
-
-        # (wildcard on state keys) - (specific state keys)
-        # This one is an over-approximation because we can't represent
-        # 'all state keys except a few named examples'
-        self.assert_difference(
-            StateFilter.freeze({EventTypes.Member: None}, include_others=False),
-            StateFilter.freeze(
-                {EventTypes.Member: {"@wombat:spqr"}},
-                include_others=False,
-            ),
-            StateFilter.freeze({EventTypes.Member: None}, include_others=False),
-        )
-
-        # (wildcard on state keys) - (no state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: set(),
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (wildcard on state keys):
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {EventTypes.CanonicalAlias: {""}},
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (specific state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr"},
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (no state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: set(),
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=False,
-            ),
-        )
-
-    def test_state_filter_difference_include_other_minus_no_include_other(self) -> None:
-        """
-        Tests the StateFilter.approx_difference method
-        where, in a.approx_difference(b), only a has the include_others flag set.
-        """
-        # (wildcard on state keys) - (wildcard on state keys):
-        self.assert_difference(
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.Create: None},
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Create: None,
-                    EventTypes.Member: set(),
-                    EventTypes.CanonicalAlias: set(),
-                },
-                include_others=True,
-            ),
-        )
-
-        # (wildcard on state keys) - (specific state keys)
-        # This one is an over-approximation because we can't represent
-        # 'all state keys except a few named examples'
-        # This also shows that the resultant state filter is normalised.
-        self.assert_difference(
-            StateFilter.freeze({EventTypes.Member: None}, include_others=True),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr"},
-                    EventTypes.Create: {""},
-                },
-                include_others=False,
-            ),
-            StateFilter(types=frozendict(), include_others=True),
-        )
-
-        # (wildcard on state keys) - (no state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: set(),
-                },
-                include_others=False,
-            ),
-            StateFilter(
-                types=frozendict(),
-                include_others=True,
-            ),
-        )
-
-        # (specific state keys) - (wildcard on state keys):
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.CanonicalAlias: {""},
-                    EventTypes.Member: set(),
-                },
-                include_others=True,
-            ),
-        )
-
-        # (specific state keys) - (specific state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr"},
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=True,
-            ),
-        )
-
-        # (specific state keys) - (no state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: set(),
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=True,
-            ),
-        )
-
-    def test_state_filter_difference_include_other_minus_include_other(self) -> None:
-        """
-        Tests the StateFilter.approx_difference method
-        where, in a.approx_difference(b), both a and b have the include_others
-        flag set.
-        """
-        # (wildcard on state keys) - (wildcard on state keys):
-        self.assert_difference(
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.Create: None},
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
-                include_others=True,
-            ),
-            StateFilter(types=frozendict(), include_others=False),
-        )
-
-        # (wildcard on state keys) - (specific state keys)
-        # This one is an over-approximation because we can't represent
-        # 'all state keys except a few named examples'
-        self.assert_difference(
-            StateFilter.freeze({EventTypes.Member: None}, include_others=True),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
-                include_others=False,
-            ),
-        )
-
-        # (wildcard on state keys) - (no state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: set(),
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (wildcard on state keys):
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=True,
-            ),
-            StateFilter(
-                types=frozendict(),
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (specific state keys)
-        # This one is an over-approximation because we can't represent
-        # 'all state keys except a few named examples'
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                    EventTypes.Create: {""},
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr"},
-                    EventTypes.Create: set(),
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@spqr:spqr"},
-                    EventTypes.Create: {""},
-                },
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (no state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: set(),
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                },
-                include_others=False,
-            ),
-        )
-
-    def test_state_filter_difference_no_include_other_minus_include_other(self) -> None:
-        """
-        Tests the StateFilter.approx_difference method
-        where, in a.approx_difference(b), only b has the include_others flag set.
-        """
-        # (wildcard on state keys) - (wildcard on state keys):
-        self.assert_difference(
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.Create: None},
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
-                include_others=True,
-            ),
-            StateFilter(types=frozendict(), include_others=False),
-        )
-
-        # (wildcard on state keys) - (specific state keys)
-        # This one is an over-approximation because we can't represent
-        # 'all state keys except a few named examples'
-        self.assert_difference(
-            StateFilter.freeze({EventTypes.Member: None}, include_others=False),
-            StateFilter.freeze(
-                {EventTypes.Member: {"@wombat:spqr"}},
-                include_others=True,
-            ),
-            StateFilter.freeze({EventTypes.Member: None}, include_others=False),
-        )
-
-        # (wildcard on state keys) - (no state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: set(),
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (wildcard on state keys):
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=True,
-            ),
-            StateFilter(
-                types=frozendict(),
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (specific state keys)
-        # This one is an over-approximation because we can't represent
-        # 'all state keys except a few named examples'
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr"},
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@spqr:spqr"},
-                },
-                include_others=False,
-            ),
-        )
-
-        # (specific state keys) - (no state keys)
-        self.assert_difference(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                    EventTypes.CanonicalAlias: {""},
-                },
-                include_others=False,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: set(),
-                },
-                include_others=True,
-            ),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
-                },
-                include_others=False,
-            ),
-        )
-
-    def test_state_filter_difference_simple_cases(self) -> None:
-        """
-        Tests some very simple cases of the StateFilter approx_difference,
-        that are not explicitly tested by the more in-depth tests.
-        """
-
-        self.assert_difference(StateFilter.all(), StateFilter.all(), StateFilter.none())
-
-        self.assert_difference(
-            StateFilter.all(),
-            StateFilter.none(),
-            StateFilter.all(),
-        )
-
-
-class StateFilterTestCase(TestCase):
-    def test_return_expanded(self) -> None:
-        """
-        Tests the behaviour of the return_expanded() function that expands
-        StateFilters to include more state types (for the sake of cache hit rate).
-        """
-
-        self.assertEqual(StateFilter.all().return_expanded(), StateFilter.all())
-
-        self.assertEqual(StateFilter.none().return_expanded(), StateFilter.none())
-
-        # Concrete-only state filters stay the same
-        # (Case: mixed filter)
-        self.assertEqual(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
-                    "some.other.state.type": {""},
-                },
-                include_others=False,
-            ).return_expanded(),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
-                    "some.other.state.type": {""},
-                },
-                include_others=False,
-            ),
-        )
-
-        # Concrete-only state filters stay the same
-        # (Case: non-member-only filter)
-        self.assertEqual(
-            StateFilter.freeze(
-                {"some.other.state.type": {""}}, include_others=False
-            ).return_expanded(),
-            StateFilter.freeze({"some.other.state.type": {""}}, include_others=False),
-        )
-
-        # Concrete-only state filters stay the same
-        # (Case: member-only filter)
-        self.assertEqual(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
-                },
-                include_others=False,
-            ).return_expanded(),
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
-                },
-                include_others=False,
-            ),
-        )
-
-        # Wildcard member-only state filters stay the same
-        self.assertEqual(
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ).return_expanded(),
-            StateFilter.freeze(
-                {EventTypes.Member: None},
-                include_others=False,
-            ),
-        )
-
-        # If there is a wildcard in the non-member portion of the filter,
-        # it's expanded to include ALL non-member events.
-        # (Case: mixed filter)
-        self.assertEqual(
-            StateFilter.freeze(
-                {
-                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
-                    "some.other.state.type": None,
-                },
-                include_others=False,
-            ).return_expanded(),
-            StateFilter.freeze(
-                {EventTypes.Member: {"@wombat:test", "@alicia:test"}},
-                include_others=True,
-            ),
-        )
-
-        # If there is a wildcard in the non-member portion of the filter,
-        # it's expanded to include ALL non-member events.
-        # (Case: non-member-only filter)
-        self.assertEqual(
-            StateFilter.freeze(
-                {
-                    "some.other.state.type": None,
-                },
-                include_others=False,
-            ).return_expanded(),
-            StateFilter.freeze({EventTypes.Member: set()}, include_others=True),
-        )
-        self.assertEqual(
-            StateFilter.freeze(
-                {
-                    "some.other.state.type": None,
-                    "yet.another.state.type": {"wombat"},
-                },
-                include_others=False,
-            ).return_expanded(),
-            StateFilter.freeze({EventTypes.Member: set()}, include_others=True),
-        )
diff --git a/tests/types/__init__.py b/tests/types/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/types/test_state.py b/tests/types/test_state.py
new file mode 100644
index 0000000000..eb809f9fb7
--- /dev/null
+++ b/tests/types/test_state.py
@@ -0,0 +1,627 @@
+from frozendict import frozendict
+
+from synapse.api.constants import EventTypes
+from synapse.types.state import StateFilter
+
+from tests.unittest import TestCase
+
+
+class StateFilterDifferenceTestCase(TestCase):
+    def assert_difference(
+        self, minuend: StateFilter, subtrahend: StateFilter, expected: StateFilter
+    ) -> None:
+        self.assertEqual(
+            minuend.approx_difference(subtrahend),
+            expected,
+            f"StateFilter difference not correct:\n\n\t{minuend!r}\nminus\n\t{subtrahend!r}\nwas\n\t{minuend.approx_difference(subtrahend)}\nexpected\n\t{expected}",
+        )
+
+    def test_state_filter_difference_no_include_other_minus_no_include_other(
+        self,
+    ) -> None:
+        """
+        Tests the StateFilter.approx_difference method
+        where, in a.approx_difference(b), both a and b do not have the
+        include_others flag set.
+        """
+        # (wildcard on state keys) - (wildcard on state keys):
+        self.assert_difference(
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.Create: None},
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
+                include_others=False,
+            ),
+            StateFilter.freeze({EventTypes.Create: None}, include_others=False),
+        )
+
+        # (wildcard on state keys) - (specific state keys)
+        # This one is an over-approximation because we can't represent
+        # 'all state keys except a few named examples'
+        self.assert_difference(
+            StateFilter.freeze({EventTypes.Member: None}, include_others=False),
+            StateFilter.freeze(
+                {EventTypes.Member: {"@wombat:spqr"}},
+                include_others=False,
+            ),
+            StateFilter.freeze({EventTypes.Member: None}, include_others=False),
+        )
+
+        # (wildcard on state keys) - (no state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: set(),
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (wildcard on state keys):
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {EventTypes.CanonicalAlias: {""}},
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (specific state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr"},
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (no state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: set(),
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=False,
+            ),
+        )
+
+    def test_state_filter_difference_include_other_minus_no_include_other(self) -> None:
+        """
+        Tests the StateFilter.approx_difference method
+        where, in a.approx_difference(b), only a has the include_others flag set.
+        """
+        # (wildcard on state keys) - (wildcard on state keys):
+        self.assert_difference(
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.Create: None},
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Create: None,
+                    EventTypes.Member: set(),
+                    EventTypes.CanonicalAlias: set(),
+                },
+                include_others=True,
+            ),
+        )
+
+        # (wildcard on state keys) - (specific state keys)
+        # This one is an over-approximation because we can't represent
+        # 'all state keys except a few named examples'
+        # This also shows that the resultant state filter is normalised.
+        self.assert_difference(
+            StateFilter.freeze({EventTypes.Member: None}, include_others=True),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr"},
+                    EventTypes.Create: {""},
+                },
+                include_others=False,
+            ),
+            StateFilter(types=frozendict(), include_others=True),
+        )
+
+        # (wildcard on state keys) - (no state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: set(),
+                },
+                include_others=False,
+            ),
+            StateFilter(
+                types=frozendict(),
+                include_others=True,
+            ),
+        )
+
+        # (specific state keys) - (wildcard on state keys):
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.CanonicalAlias: {""},
+                    EventTypes.Member: set(),
+                },
+                include_others=True,
+            ),
+        )
+
+        # (specific state keys) - (specific state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr"},
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=True,
+            ),
+        )
+
+        # (specific state keys) - (no state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: set(),
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=True,
+            ),
+        )
+
+    def test_state_filter_difference_include_other_minus_include_other(self) -> None:
+        """
+        Tests the StateFilter.approx_difference method
+        where, in a.approx_difference(b), both a and b have the include_others
+        flag set.
+        """
+        # (wildcard on state keys) - (wildcard on state keys):
+        self.assert_difference(
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.Create: None},
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
+                include_others=True,
+            ),
+            StateFilter(types=frozendict(), include_others=False),
+        )
+
+        # (wildcard on state keys) - (specific state keys)
+        # This one is an over-approximation because we can't represent
+        # 'all state keys except a few named examples'
+        self.assert_difference(
+            StateFilter.freeze({EventTypes.Member: None}, include_others=True),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
+                include_others=False,
+            ),
+        )
+
+        # (wildcard on state keys) - (no state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: set(),
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (wildcard on state keys):
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=True,
+            ),
+            StateFilter(
+                types=frozendict(),
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (specific state keys)
+        # This one is an over-approximation because we can't represent
+        # 'all state keys except a few named examples'
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                    EventTypes.Create: {""},
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr"},
+                    EventTypes.Create: set(),
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@spqr:spqr"},
+                    EventTypes.Create: {""},
+                },
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (no state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: set(),
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                },
+                include_others=False,
+            ),
+        )
+
+    def test_state_filter_difference_no_include_other_minus_include_other(self) -> None:
+        """
+        Tests the StateFilter.approx_difference method
+        where, in a.approx_difference(b), only b has the include_others flag set.
+        """
+        # (wildcard on state keys) - (wildcard on state keys):
+        self.assert_difference(
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.Create: None},
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
+                include_others=True,
+            ),
+            StateFilter(types=frozendict(), include_others=False),
+        )
+
+        # (wildcard on state keys) - (specific state keys)
+        # This one is an over-approximation because we can't represent
+        # 'all state keys except a few named examples'
+        self.assert_difference(
+            StateFilter.freeze({EventTypes.Member: None}, include_others=False),
+            StateFilter.freeze(
+                {EventTypes.Member: {"@wombat:spqr"}},
+                include_others=True,
+            ),
+            StateFilter.freeze({EventTypes.Member: None}, include_others=False),
+        )
+
+        # (wildcard on state keys) - (no state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: set(),
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (wildcard on state keys):
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=True,
+            ),
+            StateFilter(
+                types=frozendict(),
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (specific state keys)
+        # This one is an over-approximation because we can't represent
+        # 'all state keys except a few named examples'
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr"},
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@spqr:spqr"},
+                },
+                include_others=False,
+            ),
+        )
+
+        # (specific state keys) - (no state keys)
+        self.assert_difference(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                    EventTypes.CanonicalAlias: {""},
+                },
+                include_others=False,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: set(),
+                },
+                include_others=True,
+            ),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:spqr", "@spqr:spqr"},
+                },
+                include_others=False,
+            ),
+        )
+
+    def test_state_filter_difference_simple_cases(self) -> None:
+        """
+        Tests some very simple cases of the StateFilter approx_difference,
+        that are not explicitly tested by the more in-depth tests.
+        """
+
+        self.assert_difference(StateFilter.all(), StateFilter.all(), StateFilter.none())
+
+        self.assert_difference(
+            StateFilter.all(),
+            StateFilter.none(),
+            StateFilter.all(),
+        )
+
+
+class StateFilterTestCase(TestCase):
+    def test_return_expanded(self) -> None:
+        """
+        Tests the behaviour of the return_expanded() function that expands
+        StateFilters to include more state types (for the sake of cache hit rate).
+        """
+
+        self.assertEqual(StateFilter.all().return_expanded(), StateFilter.all())
+
+        self.assertEqual(StateFilter.none().return_expanded(), StateFilter.none())
+
+        # Concrete-only state filters stay the same
+        # (Case: mixed filter)
+        self.assertEqual(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
+                    "some.other.state.type": {""},
+                },
+                include_others=False,
+            ).return_expanded(),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
+                    "some.other.state.type": {""},
+                },
+                include_others=False,
+            ),
+        )
+
+        # Concrete-only state filters stay the same
+        # (Case: non-member-only filter)
+        self.assertEqual(
+            StateFilter.freeze(
+                {"some.other.state.type": {""}}, include_others=False
+            ).return_expanded(),
+            StateFilter.freeze({"some.other.state.type": {""}}, include_others=False),
+        )
+
+        # Concrete-only state filters stay the same
+        # (Case: member-only filter)
+        self.assertEqual(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
+                },
+                include_others=False,
+            ).return_expanded(),
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
+                },
+                include_others=False,
+            ),
+        )
+
+        # Wildcard member-only state filters stay the same
+        self.assertEqual(
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ).return_expanded(),
+            StateFilter.freeze(
+                {EventTypes.Member: None},
+                include_others=False,
+            ),
+        )
+
+        # If there is a wildcard in the non-member portion of the filter,
+        # it's expanded to include ALL non-member events.
+        # (Case: mixed filter)
+        self.assertEqual(
+            StateFilter.freeze(
+                {
+                    EventTypes.Member: {"@wombat:test", "@alicia:test"},
+                    "some.other.state.type": None,
+                },
+                include_others=False,
+            ).return_expanded(),
+            StateFilter.freeze(
+                {EventTypes.Member: {"@wombat:test", "@alicia:test"}},
+                include_others=True,
+            ),
+        )
+
+        # If there is a wildcard in the non-member portion of the filter,
+        # it's expanded to include ALL non-member events.
+        # (Case: non-member-only filter)
+        self.assertEqual(
+            StateFilter.freeze(
+                {
+                    "some.other.state.type": None,
+                },
+                include_others=False,
+            ).return_expanded(),
+            StateFilter.freeze({EventTypes.Member: set()}, include_others=True),
+        )
+        self.assertEqual(
+            StateFilter.freeze(
+                {
+                    "some.other.state.type": None,
+                    "yet.another.state.type": {"wombat"},
+                },
+                include_others=False,
+            ).return_expanded(),
+            StateFilter.freeze({EventTypes.Member: set()}, include_others=True),
+        )
-- 
cgit 1.5.1


From 62ed877433e23ba055cbc69a089c09d03c67681d Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 13 Dec 2022 13:19:19 +0000
Subject: Improve validation of field size limits in events. (#14664)

---
 changelog.d/14664.bugfix                 |  1 +
 stubs/synapse/synapse_rust/push.pyi      |  2 +-
 synapse/api/constants.py                 |  1 +
 synapse/api/errors.py                    | 11 ++++-
 synapse/api/room_versions.py             | 32 +++++++-------
 synapse/event_auth.py                    | 76 +++++++++++++++++++++++++++++---
 synapse/handlers/federation_event.py     | 20 +++++++++
 synapse/push/bulk_push_rule_evaluator.py |  6 +--
 8 files changed, 119 insertions(+), 30 deletions(-)
 create mode 100644 changelog.d/14664.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14664.bugfix b/changelog.d/14664.bugfix
new file mode 100644
index 0000000000..a15df9a89d
--- /dev/null
+++ b/changelog.d/14664.bugfix
@@ -0,0 +1 @@
+Improve validation of field size limits in events.
\ No newline at end of file
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index a6a586a0b5..dab5d4aff7 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -45,7 +45,7 @@ class PushRuleEvaluator:
         notification_power_levels: Mapping[str, int],
         related_events_flattened: Mapping[str, Mapping[str, str]],
         related_event_match_enabled: bool,
-        room_version_feature_flags: list[str],
+        room_version_feature_flags: Tuple[str, ...],
         msc3931_enabled: bool,
     ): ...
     def run(
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 89723d24fa..6a5e7171da 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -152,6 +152,7 @@ class EduTypes:
 
 class RejectedReason:
     AUTH_ERROR: Final = "auth_error"
+    OVERSIZED_EVENT: Final = "oversized_event"
 
 
 class RoomCreationPreset:
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 76ef12ed3a..c2c177fd71 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -424,8 +424,17 @@ class ResourceLimitError(SynapseError):
 class EventSizeError(SynapseError):
     """An error raised when an event is too big."""
 
-    def __init__(self, msg: str):
+    def __init__(self, msg: str, unpersistable: bool):
+        """
+        unpersistable:
+            if True, the PDU must not be persisted, not even as a rejected PDU
+            when received over federation.
+            This is notably true when the entire PDU exceeds the size limit for a PDU,
+            (as opposed to an individual key's size limit being exceeded).
+        """
+
         super().__init__(413, msg, Codes.TOO_LARGE)
+        self.unpersistable = unpersistable
 
 
 class LoginError(SynapseError):
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index ac62011c9f..c397920fe5 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Dict, Optional, Tuple
 
 import attr
 
@@ -103,7 +103,7 @@ class RoomVersion:
     # is not enough to mark it "supported": the push rule evaluator also needs to
     # support the flag. Unknown flags are ignored by the evaluator, making conditions
     # fail if used.
-    msc3931_push_features: List[str]  # values from PushRuleRoomFlag
+    msc3931_push_features: Tuple[str, ...]  # values from PushRuleRoomFlag
 
 
 class RoomVersions:
@@ -124,7 +124,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V2 = RoomVersion(
         "2",
@@ -143,7 +143,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V3 = RoomVersion(
         "3",
@@ -162,7 +162,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V4 = RoomVersion(
         "4",
@@ -181,7 +181,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V5 = RoomVersion(
         "5",
@@ -200,7 +200,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V6 = RoomVersion(
         "6",
@@ -219,7 +219,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     MSC2176 = RoomVersion(
         "org.matrix.msc2176",
@@ -238,7 +238,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V7 = RoomVersion(
         "7",
@@ -257,7 +257,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V8 = RoomVersion(
         "8",
@@ -276,7 +276,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V9 = RoomVersion(
         "9",
@@ -295,7 +295,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     MSC3787 = RoomVersion(
         "org.matrix.msc3787",
@@ -314,7 +314,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     V10 = RoomVersion(
         "10",
@@ -333,7 +333,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     MSC2716v4 = RoomVersion(
         "org.matrix.msc2716v4",
@@ -352,7 +352,7 @@ class RoomVersions:
         msc2716_redactions=True,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
-        msc3931_push_features=[],
+        msc3931_push_features=(),
     )
     MSC1767v10 = RoomVersion(
         # MSC1767 (Extensible Events) based on room version "10"
@@ -372,7 +372,7 @@ class RoomVersions:
         msc2716_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
-        msc3931_push_features=[PushRuleRoomFlag.EXTENSIBLE_EVENTS],
+        msc3931_push_features=(PushRuleRoomFlag.EXTENSIBLE_EVENTS,),
     )
 
 
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index bab31e33c5..d437b7e5d1 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -52,6 +52,7 @@ from synapse.api.room_versions import (
     KNOWN_ROOM_VERSIONS,
     EventFormatVersions,
     RoomVersion,
+    RoomVersions,
 )
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.types import MutableStateMap, StateMap, UserID, get_domain_from_id
@@ -341,19 +342,80 @@ def check_state_dependent_auth_rules(
     logger.debug("Allowing! %s", event)
 
 
+# Set of room versions where Synapse did not apply event key size limits
+# in bytes, but rather in codepoints.
+# In these room versions, we are more lenient with event size validation.
+LENIENT_EVENT_BYTE_LIMITS_ROOM_VERSIONS = {
+    RoomVersions.V1,
+    RoomVersions.V2,
+    RoomVersions.V3,
+    RoomVersions.V4,
+    RoomVersions.V5,
+    RoomVersions.V6,
+    RoomVersions.MSC2176,
+    RoomVersions.V7,
+    RoomVersions.V8,
+    RoomVersions.V9,
+    RoomVersions.MSC3787,
+    RoomVersions.V10,
+    RoomVersions.MSC2716v4,
+    RoomVersions.MSC1767v10,
+}
+
+
 def _check_size_limits(event: "EventBase") -> None:
+    """
+    Checks the size limits in a PDU.
+
+    The entire size limit of the PDU is checked first.
+    Then the size of fields is checked, first in codepoints and then in bytes.
+
+    The codepoint size limits are only for Synapse compatibility.
+
+    Raises:
+        EventSizeError:
+            when a size limit has been violated.
+
+            unpersistable=True if Synapse never would have accepted the event and
+                the PDU must NOT be persisted.
+
+            unpersistable=False if a prior version of Synapse would have accepted the
+                event and so the PDU must be persisted as rejected to avoid
+                breaking the room.
+    """
+
+    # Whole PDU check
+    if len(encode_canonical_json(event.get_pdu_json())) > MAX_PDU_SIZE:
+        raise EventSizeError("event too large", unpersistable=True)
+
+    # Codepoint size check: Synapse always enforced these limits, so apply
+    # them strictly.
     if len(event.user_id) > 255:
-        raise EventSizeError("'user_id' too large")
+        raise EventSizeError("'user_id' too large", unpersistable=True)
     if len(event.room_id) > 255:
-        raise EventSizeError("'room_id' too large")
+        raise EventSizeError("'room_id' too large", unpersistable=True)
     if event.is_state() and len(event.state_key) > 255:
-        raise EventSizeError("'state_key' too large")
+        raise EventSizeError("'state_key' too large", unpersistable=True)
     if len(event.type) > 255:
-        raise EventSizeError("'type' too large")
+        raise EventSizeError("'type' too large", unpersistable=True)
     if len(event.event_id) > 255:
-        raise EventSizeError("'event_id' too large")
-    if len(encode_canonical_json(event.get_pdu_json())) > MAX_PDU_SIZE:
-        raise EventSizeError("event too large")
+        raise EventSizeError("'event_id' too large", unpersistable=True)
+
+    strict_byte_limits = (
+        event.room_version not in LENIENT_EVENT_BYTE_LIMITS_ROOM_VERSIONS
+    )
+
+    # Byte size check: if these fail, then be lenient to avoid breaking rooms.
+    if len(event.user_id.encode("utf-8")) > 255:
+        raise EventSizeError("'user_id' too large", unpersistable=strict_byte_limits)
+    if len(event.room_id.encode("utf-8")) > 255:
+        raise EventSizeError("'room_id' too large", unpersistable=strict_byte_limits)
+    if event.is_state() and len(event.state_key.encode("utf-8")) > 255:
+        raise EventSizeError("'state_key' too large", unpersistable=strict_byte_limits)
+    if len(event.type.encode("utf-8")) > 255:
+        raise EventSizeError("'type' too large", unpersistable=strict_byte_limits)
+    if len(event.event_id.encode("utf-8")) > 255:
+        raise EventSizeError("'event_id' too large", unpersistable=strict_byte_limits)
 
 
 def _check_create(event: "EventBase") -> None:
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index d2facdab60..66aca2f864 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -43,6 +43,7 @@ from synapse.api.constants import (
 from synapse.api.errors import (
     AuthError,
     Codes,
+    EventSizeError,
     FederationError,
     FederationPullAttemptBackoffError,
     HttpResponseException,
@@ -1736,6 +1737,15 @@ class FederationEventHandler:
                 except AuthError as e:
                     logger.warning("Rejecting %r because %s", event, e)
                     context.rejected = RejectedReason.AUTH_ERROR
+                except EventSizeError as e:
+                    if e.unpersistable:
+                        # This event is completely unpersistable.
+                        raise e
+                    # Otherwise, we are somewhat lenient and just persist the event
+                    # as rejected, for moderate compatibility with older Synapse
+                    # versions.
+                    logger.warning("While validating received event %r: %s", event, e)
+                    context.rejected = RejectedReason.OVERSIZED_EVENT
 
             events_and_contexts_to_persist.append((event, context))
 
@@ -1781,6 +1791,16 @@ class FederationEventHandler:
             # TODO: use a different rejected reason here?
             context.rejected = RejectedReason.AUTH_ERROR
             return
+        except EventSizeError as e:
+            if e.unpersistable:
+                # This event is completely unpersistable.
+                raise e
+            # Otherwise, we are somewhat lenient and just persist the event
+            # as rejected, for moderate compatibility with older Synapse
+            # versions.
+            logger.warning("While validating received event %r: %s", event, e)
+            context.rejected = RejectedReason.OVERSIZED_EVENT
+            return
 
         # next, check that we have all of the event's auth events.
         #
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 36e5b327ef..f27ba64d53 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -342,10 +342,6 @@ class BulkPushRuleEvaluator:
             for user_id, level in notification_levels.items():
                 notification_levels[user_id] = int(level)
 
-        room_version_features = event.room_version.msc3931_push_features
-        if not room_version_features:
-            room_version_features = []
-
         evaluator = PushRuleEvaluator(
             _flatten_dict(event, room_version=event.room_version),
             room_member_count,
@@ -353,7 +349,7 @@ class BulkPushRuleEvaluator:
             notification_levels,
             related_events,
             self._related_event_match_enabled,
-            room_version_features,
+            event.room_version.msc3931_push_features,
             self.hs.config.experimental.msc1767_enabled,  # MSC3931 flag
         )
 
-- 
cgit 1.5.1


From 2920e540bfd263e33fa25a6f6d642a9f2b965c2f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 13 Dec 2022 08:43:53 -0500
Subject: Use the room type from stats in hierarchy response. (#14263)

This avoids pulling additional state information (and events) from
the database for each item returned in the hierarchy response.

The room type might be out of date until a background update finishes
running, the worst impact of this would be spaces being treated as rooms
in the hierarchy response. This should self-heal once the background
update finishes.
---
 changelog.d/14263.misc           |  1 +
 synapse/handlers/room_summary.py | 14 +++++---------
 2 files changed, 6 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14263.misc

(limited to 'synapse')

diff --git a/changelog.d/14263.misc b/changelog.d/14263.misc
new file mode 100644
index 0000000000..11d9446a4b
--- /dev/null
+++ b/changelog.d/14263.misc
@@ -0,0 +1 @@
+Improve performance of the `/hierarchy` endpoint.
diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py
index 8d08625237..c6b869c6f4 100644
--- a/synapse/handlers/room_summary.py
+++ b/synapse/handlers/room_summary.py
@@ -20,7 +20,6 @@ from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence, Set,
 import attr
 
 from synapse.api.constants import (
-    EventContentFields,
     EventTypes,
     HistoryVisibility,
     JoinRules,
@@ -701,13 +700,6 @@ class RoomSummaryHandler:
         # there should always be an entry
         assert stats is not None, "unable to retrieve stats for %s" % (room_id,)
 
-        current_state_ids = await self._storage_controllers.state.get_current_state_ids(
-            room_id
-        )
-        create_event = await self._store.get_event(
-            current_state_ids[(EventTypes.Create, "")]
-        )
-
         entry = {
             "room_id": stats["room_id"],
             "name": stats["name"],
@@ -720,7 +712,7 @@ class RoomSummaryHandler:
                 stats["history_visibility"] == HistoryVisibility.WORLD_READABLE
             ),
             "guest_can_join": stats["guest_access"] == "can_join",
-            "room_type": create_event.content.get(EventContentFields.ROOM_TYPE),
+            "room_type": stats["room_type"],
         }
 
         if self._msc3266_enabled:
@@ -730,7 +722,11 @@ class RoomSummaryHandler:
         # Federation requests need to provide additional information so the
         # requested server is able to filter the response appropriately.
         if for_federation:
+            current_state_ids = (
+                await self._storage_controllers.state.get_current_state_ids(room_id)
+            )
             room_version = await self._store.get_room_version(room_id)
+
             if await self._event_auth_handler.has_restricted_join_rules(
                 current_state_ids, room_version
             ):
-- 
cgit 1.5.1


From e512b25cd1618941d165b37f0518ec5765a3b23d Mon Sep 17 00:00:00 2001
From: Jeyachandran Rathnam <jai.rathnem@gmail.com>
Date: Wed, 14 Dec 2022 07:02:28 -0500
Subject: Fix #11308 : Remove dependency on jquery on reCAPTCHA page (#14672)

---
 changelog.d/14672.misc               | 1 +
 synapse/res/templates/recaptcha.html | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14672.misc

(limited to 'synapse')

diff --git a/changelog.d/14672.misc b/changelog.d/14672.misc
new file mode 100644
index 0000000000..b94ebed971
--- /dev/null
+++ b/changelog.d/14672.misc
@@ -0,0 +1 @@
+Remove dependency on jQuery on reCAPTCHA page.
diff --git a/synapse/res/templates/recaptcha.html b/synapse/res/templates/recaptcha.html
index 8204928cdf..f00992a24b 100644
--- a/synapse/res/templates/recaptcha.html
+++ b/synapse/res/templates/recaptcha.html
@@ -3,11 +3,10 @@
 
 {% block header %}
 <script src="https://www.recaptcha.net/recaptcha/api.js" async defer></script>
-<script src="//code.jquery.com/jquery-1.11.2.min.js"></script>
 <link rel="stylesheet" href="/_matrix/static/client/register/style.css">
 <script>
 function captchaDone() {
-    $('#registrationForm').submit();
+    document.getElementById('registrationForm').submit(); 
 }
 </script>
 {% endblock %}
-- 
cgit 1.5.1


From 24a97b3e7144720545df69c321e320c9d35166a6 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 14 Dec 2022 09:25:33 -0500
Subject: Delete event_push_summary_unique_index again. (#14669)

if a Synapse deployment upgraded (from < 1.62.0 to >= 1.70.0) then it
is possible for schema deltas to run before background updates causing
drift in the database schema due to:

1. A delta registered a background update to create an index.
2. A delta dropped the above index if it exists (but it yet exist won't since
  the background job hasn't run).
3. The code assumed the index was dropped.

To fix this we:

1. Cancel the background update which could create the index.
2. Drop the index again.
3. Drop a related index which is dropped by the background update.
---
 changelog.d/14669.bugfix                           |  1 +
 .../storage/databases/main/event_push_actions.py   |  9 ------
 .../schema/main/delta/73/23_fix_thread_index.sql   | 33 ++++++++++++++++++++++
 3 files changed, 34 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14669.bugfix
 create mode 100644 synapse/storage/schema/main/delta/73/23_fix_thread_index.sql

(limited to 'synapse')

diff --git a/changelog.d/14669.bugfix b/changelog.d/14669.bugfix
new file mode 100644
index 0000000000..bea316b065
--- /dev/null
+++ b/changelog.d/14669.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0 which could cause spurious `UNIQUE constraint failed` errors in the `rotate_notifs` background job.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 7ebe34f773..3a0c370fde 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -274,15 +274,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 self._clear_old_push_actions_staging, 30 * 60 * 1000
             )
 
-        self.db_pool.updates.register_background_index_update(
-            "event_push_summary_unique_index",
-            index_name="event_push_summary_unique_index",
-            table="event_push_summary",
-            columns=["user_id", "room_id"],
-            unique=True,
-            replaces_index="event_push_summary_user_rm",
-        )
-
         self.db_pool.updates.register_background_index_update(
             "event_push_summary_unique_index2",
             index_name="event_push_summary_unique_index2",
diff --git a/synapse/storage/schema/main/delta/73/23_fix_thread_index.sql b/synapse/storage/schema/main/delta/73/23_fix_thread_index.sql
new file mode 100644
index 0000000000..ec519ceebf
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/23_fix_thread_index.sql
@@ -0,0 +1,33 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- If a Synapse deployment made a large jump in versions (from < 1.62.0 to >= 1.70.0)
+-- in a single upgrade then it might be possible for the event_push_summary_unique_index
+-- to be created in the background from delta 71/02event_push_summary_unique.sql after
+-- delta 73/06thread_notifications_thread_id_idx.sql is executed, causing it to
+-- not drop the event_push_summary_unique_index index.
+--
+-- See https://github.com/matrix-org/synapse/issues/14641
+
+-- Stop the index from being scheduled for creation in the background.
+DELETE FROM background_updates WHERE update_name = 'event_push_summary_unique_index';
+
+-- The above background job also replaces another index, so ensure that side-effect
+-- is applied.
+DROP INDEX IF EXISTS event_push_summary_user_rm;
+
+-- Fix deployments which ran the 73/06thread_notifications_thread_id_idx.sql delta
+-- before the event_push_summary_unique_index background job was run.
+DROP INDEX IF EXISTS event_push_summary_unique_index;
-- 
cgit 1.5.1


From fb60cb16fe3cf26fbd947eec926cb4b24b8e9fc7 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 14 Dec 2022 14:47:11 +0000
Subject: Faster remote room joins: stream the un-partial-stating of events
 over replication. [rei:frrj/streams/unpsr] (#14545)

---
 changelog.d/14545.misc                             |  1 +
 synapse/handlers/federation_event.py               |  2 +
 synapse/replication/tcp/streams/__init__.py        |  7 +-
 synapse/replication/tcp/streams/partial_state.py   | 28 +++++++
 synapse/storage/databases/main/events_worker.py    | 88 ++++++++++++++++++++++
 synapse/storage/databases/main/state.py            | 34 ++++++---
 .../delta/73/22_un_partial_stated_event_stream.sql | 34 +++++++++
 ..._un_partial_stated_room_stream_seq.sql.postgres | 20 +++++
 8 files changed, 204 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/14545.misc
 create mode 100644 synapse/storage/schema/main/delta/73/22_un_partial_stated_event_stream.sql
 create mode 100644 synapse/storage/schema/main/delta/73/23_un_partial_stated_room_stream_seq.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/14545.misc b/changelog.d/14545.misc
new file mode 100644
index 0000000000..60b6761a51
--- /dev/null
+++ b/changelog.d/14545.misc
@@ -0,0 +1 @@
+Faster remote room joins: stream the un-partial-stating of events over replication.
\ No newline at end of file
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 66aca2f864..31df7f55cc 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -610,6 +610,8 @@ class FederationEventHandler:
             self._state_storage_controller.notify_event_un_partial_stated(
                 event.event_id
             )
+            # Notify that there's a new row in the un_partial_stated_events stream.
+            self._notifier.notify_replication()
 
     @trace
     async def backfill(
diff --git a/synapse/replication/tcp/streams/__init__.py b/synapse/replication/tcp/streams/__init__.py
index 8575666d9c..110f10aab9 100644
--- a/synapse/replication/tcp/streams/__init__.py
+++ b/synapse/replication/tcp/streams/__init__.py
@@ -42,7 +42,10 @@ from synapse.replication.tcp.streams._base import (
 )
 from synapse.replication.tcp.streams.events import EventsStream
 from synapse.replication.tcp.streams.federation import FederationStream
-from synapse.replication.tcp.streams.partial_state import UnPartialStatedRoomStream
+from synapse.replication.tcp.streams.partial_state import (
+    UnPartialStatedEventStream,
+    UnPartialStatedRoomStream,
+)
 
 STREAMS_MAP = {
     stream.NAME: stream
@@ -63,6 +66,7 @@ STREAMS_MAP = {
         AccountDataStream,
         UserSignatureStream,
         UnPartialStatedRoomStream,
+        UnPartialStatedEventStream,
     )
 }
 
@@ -83,4 +87,5 @@ __all__ = [
     "AccountDataStream",
     "UserSignatureStream",
     "UnPartialStatedRoomStream",
+    "UnPartialStatedEventStream",
 ]
diff --git a/synapse/replication/tcp/streams/partial_state.py b/synapse/replication/tcp/streams/partial_state.py
index 18f087ffa2..b5a2ae74b6 100644
--- a/synapse/replication/tcp/streams/partial_state.py
+++ b/synapse/replication/tcp/streams/partial_state.py
@@ -46,3 +46,31 @@ class UnPartialStatedRoomStream(Stream):
             current_token_without_instance(store.get_un_partial_stated_rooms_token),
             store.get_un_partial_stated_rooms_from_stream,
         )
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class UnPartialStatedEventStreamRow:
+    # ID of the event that has been un-partial-stated.
+    event_id: str
+
+    # True iff the rejection status of the event changed as a result of being
+    # un-partial-stated.
+    rejection_status_changed: bool
+
+
+class UnPartialStatedEventStream(Stream):
+    """
+    Stream to notify about events becoming un-partial-stated.
+    """
+
+    NAME = "un_partial_stated_event"
+    ROW_TYPE = UnPartialStatedEventStreamRow
+
+    def __init__(self, hs: "HomeServer"):
+        store = hs.get_datastores().main
+        super().__init__(
+            hs.get_instance_name(),
+            # TODO(faster_joins, multiple writers): we need to account for instance names
+            current_token_without_instance(store.get_un_partial_stated_events_token),
+            store.get_un_partial_stated_events_from_stream,
+        )
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 318fd7dc71..e19b16064b 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -70,6 +70,7 @@ from synapse.storage.database import (
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.types import Cursor
 from synapse.storage.util.id_generators import (
+    AbstractStreamIdGenerator,
     AbstractStreamIdTracker,
     MultiWriterIdGenerator,
     StreamIdGenerator,
@@ -292,6 +293,93 @@ class EventsWorkerStore(SQLBaseStore):
             id_column="chain_id",
         )
 
+        self._un_partial_stated_events_stream_id_gen: AbstractStreamIdGenerator
+
+        if isinstance(database.engine, PostgresEngine):
+            self._un_partial_stated_events_stream_id_gen = MultiWriterIdGenerator(
+                db_conn=db_conn,
+                db=database,
+                stream_name="un_partial_stated_event_stream",
+                instance_name=hs.get_instance_name(),
+                tables=[
+                    ("un_partial_stated_event_stream", "instance_name", "stream_id")
+                ],
+                sequence_name="un_partial_stated_event_stream_sequence",
+                # TODO(faster_joins, multiple writers) Support multiple writers.
+                writers=["master"],
+            )
+        else:
+            self._un_partial_stated_events_stream_id_gen = StreamIdGenerator(
+                db_conn, "un_partial_stated_event_stream", "stream_id"
+            )
+
+    def get_un_partial_stated_events_token(self) -> int:
+        # TODO(faster_joins, multiple writers): This is inappropriate if there are multiple
+        #     writers because workers that don't write often will hold all
+        #     readers up.
+        return self._un_partial_stated_events_stream_id_gen.get_current_token()
+
+    async def get_un_partial_stated_events_from_stream(
+        self, instance_name: str, last_id: int, current_id: int, limit: int
+    ) -> Tuple[List[Tuple[int, Tuple[str, bool]]], int, bool]:
+        """Get updates for the un-partial-stated events replication stream.
+
+        Args:
+            instance_name: The writer we want to fetch updates from. Unused
+                here since there is only ever one writer.
+            last_id: The token to fetch updates from. Exclusive.
+            current_id: The token to fetch updates up to. Inclusive.
+            limit: The requested limit for the number of rows to return. The
+                function may return more or fewer rows.
+
+        Returns:
+            A tuple consisting of: the updates, a token to use to fetch
+            subsequent updates, and whether we returned fewer rows than exists
+            between the requested tokens due to the limit.
+
+            The token returned can be used in a subsequent call to this
+            function to get further updatees.
+
+            The updates are a list of 2-tuples of stream ID and the row data
+        """
+
+        if last_id == current_id:
+            return [], current_id, False
+
+        def get_un_partial_stated_events_from_stream_txn(
+            txn: LoggingTransaction,
+        ) -> Tuple[List[Tuple[int, Tuple[str, bool]]], int, bool]:
+            sql = """
+                SELECT stream_id, event_id, rejection_status_changed
+                FROM un_partial_stated_event_stream
+                WHERE ? < stream_id AND stream_id <= ? AND instance_name = ?
+                ORDER BY stream_id ASC
+                LIMIT ?
+            """
+            txn.execute(sql, (last_id, current_id, instance_name, limit))
+            updates = [
+                (
+                    row[0],
+                    (
+                        row[1],
+                        bool(row[2]),
+                    ),
+                )
+                for row in txn
+            ]
+            limited = False
+            upto_token = current_id
+            if len(updates) >= limit:
+                upto_token = updates[-1][0]
+                limited = True
+
+            return updates, upto_token, limited
+
+        return await self.db_pool.runInteraction(
+            "get_un_partial_stated_events_from_stream",
+            get_un_partial_stated_events_from_stream_txn,
+        )
+
     def process_replication_rows(
         self,
         stream_name: str,
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index c801a93b5b..f855903c39 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -80,6 +80,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         hs: "HomeServer",
     ):
         super().__init__(database, db_conn, hs)
+        self._instance_name: str = hs.get_instance_name()
 
     async def get_room_version(self, room_id: str) -> RoomVersion:
         """Get the room_version of a given room
@@ -404,18 +405,21 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         context: EventContext,
     ) -> None:
         """Update the state group for a partial state event"""
-        await self.db_pool.runInteraction(
-            "update_state_for_partial_state_event",
-            self._update_state_for_partial_state_event_txn,
-            event,
-            context,
-        )
+        async with self._un_partial_stated_events_stream_id_gen.get_next() as un_partial_state_event_stream_id:
+            await self.db_pool.runInteraction(
+                "update_state_for_partial_state_event",
+                self._update_state_for_partial_state_event_txn,
+                event,
+                context,
+                un_partial_state_event_stream_id,
+            )
 
     def _update_state_for_partial_state_event_txn(
         self,
         txn: LoggingTransaction,
         event: EventBase,
         context: EventContext,
+        un_partial_state_event_stream_id: int,
     ) -> None:
         # we shouldn't have any outliers here
         assert not event.internal_metadata.is_outlier()
@@ -436,7 +440,10 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         # the event may now be rejected where it was not before, or vice versa,
         # in which case we need to update the rejected flags.
-        if bool(context.rejected) != (event.rejected_reason is not None):
+        rejection_status_changed = bool(context.rejected) != (
+            event.rejected_reason is not None
+        )
+        if rejection_status_changed:
             self.mark_event_rejected_txn(txn, event.event_id, context.rejected)
 
         self.db_pool.simple_delete_one_txn(
@@ -445,8 +452,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
             keyvalues={"event_id": event.event_id},
         )
 
-        # TODO(faster_joins): need to do something about workers here
-        #   https://github.com/matrix-org/synapse/issues/12994
         txn.call_after(self.is_partial_state_event.invalidate, (event.event_id,))
         txn.call_after(
             self._get_state_group_for_event.prefill,
@@ -454,6 +459,17 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
             state_group,
         )
 
+        self.db_pool.simple_insert_txn(
+            txn,
+            "un_partial_stated_event_stream",
+            {
+                "stream_id": un_partial_state_event_stream_id,
+                "instance_name": self._instance_name,
+                "event_id": event.event_id,
+                "rejection_status_changed": rejection_status_changed,
+            },
+        )
+
 
 class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
 
diff --git a/synapse/storage/schema/main/delta/73/22_un_partial_stated_event_stream.sql b/synapse/storage/schema/main/delta/73/22_un_partial_stated_event_stream.sql
new file mode 100644
index 0000000000..0e571f78c3
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/22_un_partial_stated_event_stream.sql
@@ -0,0 +1,34 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Stream for notifying that an event has become un-partial-stated.
+CREATE TABLE un_partial_stated_event_stream(
+    -- Position in the stream
+    stream_id BIGINT PRIMARY KEY NOT NULL,
+
+    -- Which instance wrote this entry.
+    instance_name TEXT NOT NULL,
+
+    -- Which event has been un-partial-stated.
+    event_id TEXT NOT NULL REFERENCES events(event_id) ON DELETE CASCADE,
+
+    -- true iff the `rejected` status of the event changed when it became
+    -- un-partial-stated.
+    rejection_status_changed BOOLEAN NOT NULL
+);
+
+-- We want an index here because of the foreign key constraint:
+-- upon deleting an event, the database needs to be able to check here.
+CREATE UNIQUE INDEX un_partial_stated_event_stream_room_id ON un_partial_stated_event_stream (event_id);
diff --git a/synapse/storage/schema/main/delta/73/23_un_partial_stated_room_stream_seq.sql.postgres b/synapse/storage/schema/main/delta/73/23_un_partial_stated_room_stream_seq.sql.postgres
new file mode 100644
index 0000000000..1ec24702f3
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/23_un_partial_stated_room_stream_seq.sql.postgres
@@ -0,0 +1,20 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE SEQUENCE IF NOT EXISTS un_partial_stated_event_stream_sequence;
+
+SELECT setval('un_partial_stated_event_stream_sequence', (
+    SELECT COALESCE(MAX(stream_id), 1) FROM un_partial_stated_event_stream
+));
-- 
cgit 1.5.1


From 4f4d69042345134c040de137a8e1aa108ff71acb Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 14 Dec 2022 14:52:35 +0000
Subject: Allow `compute_state_after_events` to use partial state (#14676)

* Allow `compute_state_after_events` to use partial state

if fetching a subset of state that is trusted during a partial join.

* Changelog
---
 changelog.d/14676.misc    |  1 +
 synapse/state/__init__.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14676.misc

(limited to 'synapse')

diff --git a/changelog.d/14676.misc b/changelog.d/14676.misc
new file mode 100644
index 0000000000..8a41df9c64
--- /dev/null
+++ b/changelog.d/14676.misc
@@ -0,0 +1 @@
+Faster joins: make `computer_state_after_events` consistent with other state-fetching functions that take a `StateFilter`.
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index ee5469d5a8..fdfb46ab82 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -202,14 +202,20 @@ class StateHandler:
             room_id: the room_id containing the given events.
             event_ids: the events whose state should be fetched and resolved.
             await_full_state: if `True`, will block if we do not yet have complete state
-                at the given `event_id`s, regardless of whether `state_filter` is
-                satisfied by partial state.
+                at these events and `state_filter` is not satisfied by partial state.
+                Defaults to `True`.
 
         Returns:
             the state dict (a mapping from (event_type, state_key) -> event_id) which
             holds the resolution of the states after the given event IDs.
         """
         logger.debug("calling resolve_state_groups from compute_state_after_events")
+        if (
+            await_full_state
+            and state_filter
+            and not state_filter.must_await_full_state(self.hs.is_mine_id)
+        ):
+            await_full_state = False
         ret = await self.resolve_state_groups_for_events(
             room_id, event_ids, await_full_state
         )
-- 
cgit 1.5.1


From 54c012c5a8722725cf104fa6205f253b5b9b0192 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 15 Dec 2022 17:04:23 +0100
Subject: Make `handle_new_client_event` throws `PartialStateConflictError`
 (#14665)

Then adapts calling code to retry when needed so it doesn't 500
to clients.

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/14665.misc                |   1 +
 synapse/handlers/federation.py        | 117 +++++++++++++-------
 synapse/handlers/message.py           | 202 ++++++++++++++++++----------------
 synapse/handlers/room.py              |  95 +++++++++-------
 synapse/handlers/room_batch.py        |   2 +
 synapse/handlers/room_member.py       | 168 +++++++++++++++++-----------
 synapse/util/caches/response_cache.py |  14 ++-
 7 files changed, 360 insertions(+), 239 deletions(-)
 create mode 100644 changelog.d/14665.misc

(limited to 'synapse')

diff --git a/changelog.d/14665.misc b/changelog.d/14665.misc
new file mode 100644
index 0000000000..2b7c96143d
--- /dev/null
+++ b/changelog.d/14665.misc
@@ -0,0 +1 @@
+Change `handle_new_client_event` signature so that a 429 does not reach clients on `PartialStateConflictError`, and internally retry when needed instead.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index b2784d7333..eca75f1108 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1343,32 +1343,53 @@ class FederationHandler:
             )
 
             EventValidator().validate_builder(builder)
-            event, context = await self.event_creation_handler.create_new_client_event(
-                builder=builder
-            )
 
-            event, context = await self.add_display_name_to_third_party_invite(
-                room_version_obj, event_dict, event, context
-            )
+            # Try several times, it could fail with PartialStateConflictError
+            # in send_membership_event, cf comment in except block.
+            max_retries = 5
+            for i in range(max_retries):
+                try:
+                    (
+                        event,
+                        context,
+                    ) = await self.event_creation_handler.create_new_client_event(
+                        builder=builder
+                    )
 
-            EventValidator().validate_new(event, self.config)
+                    event, context = await self.add_display_name_to_third_party_invite(
+                        room_version_obj, event_dict, event, context
+                    )
 
-            # We need to tell the transaction queue to send this out, even
-            # though the sender isn't a local user.
-            event.internal_metadata.send_on_behalf_of = self.hs.hostname
+                    EventValidator().validate_new(event, self.config)
 
-            try:
-                validate_event_for_room_version(event)
-                await self._event_auth_handler.check_auth_rules_from_context(event)
-            except AuthError as e:
-                logger.warning("Denying new third party invite %r because %s", event, e)
-                raise e
+                    # We need to tell the transaction queue to send this out, even
+                    # though the sender isn't a local user.
+                    event.internal_metadata.send_on_behalf_of = self.hs.hostname
 
-            await self._check_signature(event, context)
+                    try:
+                        validate_event_for_room_version(event)
+                        await self._event_auth_handler.check_auth_rules_from_context(
+                            event
+                        )
+                    except AuthError as e:
+                        logger.warning(
+                            "Denying new third party invite %r because %s", event, e
+                        )
+                        raise e
 
-            # We retrieve the room member handler here as to not cause a cyclic dependency
-            member_handler = self.hs.get_room_member_handler()
-            await member_handler.send_membership_event(None, event, context)
+                    await self._check_signature(event, context)
+
+                    # We retrieve the room member handler here as to not cause a cyclic dependency
+                    member_handler = self.hs.get_room_member_handler()
+                    await member_handler.send_membership_event(None, event, context)
+
+                    break
+                except PartialStateConflictError as e:
+                    # Persisting couldn't happen because the room got un-partial stated
+                    # in the meantime and context needs to be recomputed, so let's do so.
+                    if i == max_retries - 1:
+                        raise e
+                    pass
         else:
             destinations = {x.split(":", 1)[-1] for x in (sender_user_id, room_id)}
 
@@ -1400,28 +1421,46 @@ class FederationHandler:
             room_version_obj, event_dict
         )
 
-        event, context = await self.event_creation_handler.create_new_client_event(
-            builder=builder
-        )
-        event, context = await self.add_display_name_to_third_party_invite(
-            room_version_obj, event_dict, event, context
-        )
+        # Try several times, it could fail with PartialStateConflictError
+        # in send_membership_event, cf comment in except block.
+        max_retries = 5
+        for i in range(max_retries):
+            try:
+                (
+                    event,
+                    context,
+                ) = await self.event_creation_handler.create_new_client_event(
+                    builder=builder
+                )
+                event, context = await self.add_display_name_to_third_party_invite(
+                    room_version_obj, event_dict, event, context
+                )
 
-        try:
-            validate_event_for_room_version(event)
-            await self._event_auth_handler.check_auth_rules_from_context(event)
-        except AuthError as e:
-            logger.warning("Denying third party invite %r because %s", event, e)
-            raise e
-        await self._check_signature(event, context)
+                try:
+                    validate_event_for_room_version(event)
+                    await self._event_auth_handler.check_auth_rules_from_context(event)
+                except AuthError as e:
+                    logger.warning("Denying third party invite %r because %s", event, e)
+                    raise e
+                await self._check_signature(event, context)
+
+                # We need to tell the transaction queue to send this out, even
+                # though the sender isn't a local user.
+                event.internal_metadata.send_on_behalf_of = get_domain_from_id(
+                    event.sender
+                )
 
-        # We need to tell the transaction queue to send this out, even
-        # though the sender isn't a local user.
-        event.internal_metadata.send_on_behalf_of = get_domain_from_id(event.sender)
+                # We retrieve the room member handler here as to not cause a cyclic dependency
+                member_handler = self.hs.get_room_member_handler()
+                await member_handler.send_membership_event(None, event, context)
 
-        # We retrieve the room member handler here as to not cause a cyclic dependency
-        member_handler = self.hs.get_room_member_handler()
-        await member_handler.send_membership_event(None, event, context)
+                break
+            except PartialStateConflictError as e:
+                # Persisting couldn't happen because the room got un-partial stated
+                # in the meantime and context needs to be recomputed, so let's do so.
+                if i == max_retries - 1:
+                    raise e
+                pass
 
     async def add_display_name_to_third_party_invite(
         self,
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 845f683358..88fc51a4c9 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -37,7 +37,6 @@ from synapse.api.errors import (
     AuthError,
     Codes,
     ConsentNotGivenError,
-    LimitExceededError,
     NotFoundError,
     ShadowBanError,
     SynapseError,
@@ -999,60 +998,73 @@ class EventCreationHandler:
                         event.internal_metadata.stream_ordering,
                     )
 
-            event, context = await self.create_event(
-                requester,
-                event_dict,
-                txn_id=txn_id,
-                allow_no_prev_events=allow_no_prev_events,
-                prev_event_ids=prev_event_ids,
-                state_event_ids=state_event_ids,
-                outlier=outlier,
-                historical=historical,
-                depth=depth,
-            )
+        # Try several times, it could fail with PartialStateConflictError
+        # in handle_new_client_event, cf comment in except block.
+        max_retries = 5
+        for i in range(max_retries):
+            try:
+                event, context = await self.create_event(
+                    requester,
+                    event_dict,
+                    txn_id=txn_id,
+                    allow_no_prev_events=allow_no_prev_events,
+                    prev_event_ids=prev_event_ids,
+                    state_event_ids=state_event_ids,
+                    outlier=outlier,
+                    historical=historical,
+                    depth=depth,
+                )
 
-            assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % (
-                event.sender,
-            )
+                assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % (
+                    event.sender,
+                )
 
-            spam_check_result = await self.spam_checker.check_event_for_spam(event)
-            if spam_check_result != self.spam_checker.NOT_SPAM:
-                if isinstance(spam_check_result, tuple):
-                    try:
-                        [code, dict] = spam_check_result
-                        raise SynapseError(
-                            403,
-                            "This message had been rejected as probable spam",
-                            code,
-                            dict,
-                        )
-                    except ValueError:
-                        logger.error(
-                            "Spam-check module returned invalid error value. Expecting [code, dict], got %s",
-                            spam_check_result,
-                        )
+                spam_check_result = await self.spam_checker.check_event_for_spam(event)
+                if spam_check_result != self.spam_checker.NOT_SPAM:
+                    if isinstance(spam_check_result, tuple):
+                        try:
+                            [code, dict] = spam_check_result
+                            raise SynapseError(
+                                403,
+                                "This message had been rejected as probable spam",
+                                code,
+                                dict,
+                            )
+                        except ValueError:
+                            logger.error(
+                                "Spam-check module returned invalid error value. Expecting [code, dict], got %s",
+                                spam_check_result,
+                            )
 
-                        raise SynapseError(
-                            403,
-                            "This message has been rejected as probable spam",
-                            Codes.FORBIDDEN,
-                        )
+                            raise SynapseError(
+                                403,
+                                "This message has been rejected as probable spam",
+                                Codes.FORBIDDEN,
+                            )
 
-                # Backwards compatibility: if the return value is not an error code, it
-                # means the module returned an error message to be included in the
-                # SynapseError (which is now deprecated).
-                raise SynapseError(
-                    403,
-                    spam_check_result,
-                    Codes.FORBIDDEN,
+                    # Backwards compatibility: if the return value is not an error code, it
+                    # means the module returned an error message to be included in the
+                    # SynapseError (which is now deprecated).
+                    raise SynapseError(
+                        403,
+                        spam_check_result,
+                        Codes.FORBIDDEN,
+                    )
+
+                ev = await self.handle_new_client_event(
+                    requester=requester,
+                    events_and_context=[(event, context)],
+                    ratelimit=ratelimit,
+                    ignore_shadow_ban=ignore_shadow_ban,
                 )
 
-            ev = await self.handle_new_client_event(
-                requester=requester,
-                events_and_context=[(event, context)],
-                ratelimit=ratelimit,
-                ignore_shadow_ban=ignore_shadow_ban,
-            )
+                break
+            except PartialStateConflictError as e:
+                # Persisting couldn't happen because the room got un-partial stated
+                # in the meantime and context needs to be recomputed, so let's do so.
+                if i == max_retries - 1:
+                    raise e
+                pass
 
         # we know it was persisted, so must have a stream ordering
         assert ev.internal_metadata.stream_ordering
@@ -1356,7 +1368,7 @@ class EventCreationHandler:
 
         Raises:
             ShadowBanError if the requester has been shadow-banned.
-            SynapseError(503) if attempting to persist a partial state event in
+            PartialStateConflictError if attempting to persist a partial state event in
                 a room that has been un-partial stated.
         """
         extra_users = extra_users or []
@@ -1418,34 +1430,23 @@ class EventCreationHandler:
         # We now persist the event (and update the cache in parallel, since we
         # don't want to block on it).
         event, context = events_and_context[0]
-        try:
-            result, _ = await make_deferred_yieldable(
-                gather_results(
-                    (
-                        run_in_background(
-                            self._persist_events,
-                            requester=requester,
-                            events_and_context=events_and_context,
-                            ratelimit=ratelimit,
-                            extra_users=extra_users,
-                        ),
-                        run_in_background(
-                            self.cache_joined_hosts_for_events, events_and_context
-                        ).addErrback(
-                            log_failure, "cache_joined_hosts_for_event failed"
-                        ),
+        result, _ = await make_deferred_yieldable(
+            gather_results(
+                (
+                    run_in_background(
+                        self._persist_events,
+                        requester=requester,
+                        events_and_context=events_and_context,
+                        ratelimit=ratelimit,
+                        extra_users=extra_users,
                     ),
-                    consumeErrors=True,
-                )
-            ).addErrback(unwrapFirstError)
-        except PartialStateConflictError as e:
-            # The event context needs to be recomputed.
-            # Turn the error into a 429, as a hint to the client to try again.
-            logger.info(
-                "Room %s was un-partial stated while persisting client event.",
-                event.room_id,
+                    run_in_background(
+                        self.cache_joined_hosts_for_events, events_and_context
+                    ).addErrback(log_failure, "cache_joined_hosts_for_event failed"),
+                ),
+                consumeErrors=True,
             )
-            raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)
+        ).addErrback(unwrapFirstError)
 
         return result
 
@@ -2012,26 +2013,39 @@ class EventCreationHandler:
         for user_id in members:
             requester = create_requester(user_id, authenticated_entity=self.server_name)
             try:
-                event, context = await self.create_event(
-                    requester,
-                    {
-                        "type": EventTypes.Dummy,
-                        "content": {},
-                        "room_id": room_id,
-                        "sender": user_id,
-                    },
-                )
+                # Try several times, it could fail with PartialStateConflictError
+                # in handle_new_client_event, cf comment in except block.
+                max_retries = 5
+                for i in range(max_retries):
+                    try:
+                        event, context = await self.create_event(
+                            requester,
+                            {
+                                "type": EventTypes.Dummy,
+                                "content": {},
+                                "room_id": room_id,
+                                "sender": user_id,
+                            },
+                        )
 
-                event.internal_metadata.proactively_send = False
+                        event.internal_metadata.proactively_send = False
 
-                # Since this is a dummy-event it is OK if it is sent by a
-                # shadow-banned user.
-                await self.handle_new_client_event(
-                    requester,
-                    events_and_context=[(event, context)],
-                    ratelimit=False,
-                    ignore_shadow_ban=True,
-                )
+                        # Since this is a dummy-event it is OK if it is sent by a
+                        # shadow-banned user.
+                        await self.handle_new_client_event(
+                            requester,
+                            events_and_context=[(event, context)],
+                            ratelimit=False,
+                            ignore_shadow_ban=True,
+                        )
+
+                        break
+                    except PartialStateConflictError as e:
+                        # Persisting couldn't happen because the room got un-partial stated
+                        # in the meantime and context needs to be recomputed, so let's do so.
+                        if i == max_retries - 1:
+                            raise e
+                        pass
                 return True
             except AuthError:
                 logger.info(
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index f81241c2b3..572c7b4db3 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -62,6 +62,7 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents
 from synapse.handlers.relations import BundledAggregations
 from synapse.module_api import NOT_SPAM
 from synapse.rest.admin._base import assert_user_is_admin
+from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.streams import EventSource
 from synapse.types import (
     JsonDict,
@@ -207,46 +208,64 @@ class RoomCreationHandler:
 
         new_room_id = self._generate_room_id()
 
-        # Check whether the user has the power level to carry out the upgrade.
-        # `check_auth_rules_from_context` will check that they are in the room and have
-        # the required power level to send the tombstone event.
-        (
-            tombstone_event,
-            tombstone_context,
-        ) = await self.event_creation_handler.create_event(
-            requester,
-            {
-                "type": EventTypes.Tombstone,
-                "state_key": "",
-                "room_id": old_room_id,
-                "sender": user_id,
-                "content": {
-                    "body": "This room has been replaced",
-                    "replacement_room": new_room_id,
-                },
-            },
-        )
-        validate_event_for_room_version(tombstone_event)
-        await self._event_auth_handler.check_auth_rules_from_context(tombstone_event)
+        # Try several times, it could fail with PartialStateConflictError
+        # in _upgrade_room, cf comment in except block.
+        max_retries = 5
+        for i in range(max_retries):
+            try:
+                # Check whether the user has the power level to carry out the upgrade.
+                # `check_auth_rules_from_context` will check that they are in the room and have
+                # the required power level to send the tombstone event.
+                (
+                    tombstone_event,
+                    tombstone_context,
+                ) = await self.event_creation_handler.create_event(
+                    requester,
+                    {
+                        "type": EventTypes.Tombstone,
+                        "state_key": "",
+                        "room_id": old_room_id,
+                        "sender": user_id,
+                        "content": {
+                            "body": "This room has been replaced",
+                            "replacement_room": new_room_id,
+                        },
+                    },
+                )
+                validate_event_for_room_version(tombstone_event)
+                await self._event_auth_handler.check_auth_rules_from_context(
+                    tombstone_event
+                )
 
-        # Upgrade the room
-        #
-        # If this user has sent multiple upgrade requests for the same room
-        # and one of them is not complete yet, cache the response and
-        # return it to all subsequent requests
-        ret = await self._upgrade_response_cache.wrap(
-            (old_room_id, user_id),
-            self._upgrade_room,
-            requester,
-            old_room_id,
-            old_room,  # args for _upgrade_room
-            new_room_id,
-            new_version,
-            tombstone_event,
-            tombstone_context,
-        )
+                # Upgrade the room
+                #
+                # If this user has sent multiple upgrade requests for the same room
+                # and one of them is not complete yet, cache the response and
+                # return it to all subsequent requests
+                ret = await self._upgrade_response_cache.wrap(
+                    (old_room_id, user_id),
+                    self._upgrade_room,
+                    requester,
+                    old_room_id,
+                    old_room,  # args for _upgrade_room
+                    new_room_id,
+                    new_version,
+                    tombstone_event,
+                    tombstone_context,
+                )
 
-        return ret
+                return ret
+            except PartialStateConflictError as e:
+                # Clean up the cache so we can retry properly
+                self._upgrade_response_cache.unset((old_room_id, user_id))
+                # Persisting couldn't happen because the room got un-partial stated
+                # in the meantime and context needs to be recomputed, so let's do so.
+                if i == max_retries - 1:
+                    raise e
+                pass
+
+        # This is to satisfy mypy and should never happen
+        raise PartialStateConflictError()
 
     async def _upgrade_room(
         self,
diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py
index 411a6fb22f..c73d2adaad 100644
--- a/synapse/handlers/room_batch.py
+++ b/synapse/handlers/room_batch.py
@@ -375,6 +375,8 @@ class RoomBatchHandler:
         # Events are sorted by (topological_ordering, stream_ordering)
         # where topological_ordering is just depth.
         for (event, context) in reversed(events_to_persist):
+            # This call can't raise `PartialStateConflictError` since we forbid
+            # use of the historical batch API during partial state
             await self.event_creation_handler.handle_new_client_event(
                 await self.create_requester_for_user_id_from_app_service(
                     event.sender, app_service_requester.app_service
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 0c39e852a1..d236cc09b5 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -34,6 +34,7 @@ from synapse.events.snapshot import EventContext
 from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN
 from synapse.logging import opentracing
 from synapse.module_api import NOT_SPAM
+from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.types import (
     JsonDict,
     Requester,
@@ -392,60 +393,81 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 event_pos = await self.store.get_position_for_event(existing_event_id)
                 return existing_event_id, event_pos.stream
 
-        event, context = await self.event_creation_handler.create_event(
-            requester,
-            {
-                "type": EventTypes.Member,
-                "content": content,
-                "room_id": room_id,
-                "sender": requester.user.to_string(),
-                "state_key": user_id,
-                # For backwards compatibility:
-                "membership": membership,
-                "origin_server_ts": origin_server_ts,
-            },
-            txn_id=txn_id,
-            allow_no_prev_events=allow_no_prev_events,
-            prev_event_ids=prev_event_ids,
-            state_event_ids=state_event_ids,
-            depth=depth,
-            require_consent=require_consent,
-            outlier=outlier,
-            historical=historical,
-        )
-
-        prev_state_ids = await context.get_prev_state_ids(
-            StateFilter.from_types([(EventTypes.Member, None)])
-        )
+        # Try several times, it could fail with PartialStateConflictError,
+        # in handle_new_client_event, cf comment in except block.
+        max_retries = 5
+        for i in range(max_retries):
+            try:
+                event, context = await self.event_creation_handler.create_event(
+                    requester,
+                    {
+                        "type": EventTypes.Member,
+                        "content": content,
+                        "room_id": room_id,
+                        "sender": requester.user.to_string(),
+                        "state_key": user_id,
+                        # For backwards compatibility:
+                        "membership": membership,
+                        "origin_server_ts": origin_server_ts,
+                    },
+                    txn_id=txn_id,
+                    allow_no_prev_events=allow_no_prev_events,
+                    prev_event_ids=prev_event_ids,
+                    state_event_ids=state_event_ids,
+                    depth=depth,
+                    require_consent=require_consent,
+                    outlier=outlier,
+                    historical=historical,
+                )
 
-        prev_member_event_id = prev_state_ids.get((EventTypes.Member, user_id), None)
+                prev_state_ids = await context.get_prev_state_ids(
+                    StateFilter.from_types([(EventTypes.Member, None)])
+                )
 
-        if event.membership == Membership.JOIN:
-            newly_joined = True
-            if prev_member_event_id:
-                prev_member_event = await self.store.get_event(prev_member_event_id)
-                newly_joined = prev_member_event.membership != Membership.JOIN
-
-            # Only rate-limit if the user actually joined the room, otherwise we'll end
-            # up blocking profile updates.
-            if newly_joined and ratelimit:
-                await self._join_rate_limiter_local.ratelimit(requester)
-                await self._join_rate_per_room_limiter.ratelimit(
-                    requester, key=room_id, update=False
+                prev_member_event_id = prev_state_ids.get(
+                    (EventTypes.Member, user_id), None
                 )
-        with opentracing.start_active_span("handle_new_client_event"):
-            result_event = await self.event_creation_handler.handle_new_client_event(
-                requester,
-                events_and_context=[(event, context)],
-                extra_users=[target],
-                ratelimit=ratelimit,
-            )
 
-        if event.membership == Membership.LEAVE:
-            if prev_member_event_id:
-                prev_member_event = await self.store.get_event(prev_member_event_id)
-                if prev_member_event.membership == Membership.JOIN:
-                    await self._user_left_room(target, room_id)
+                if event.membership == Membership.JOIN:
+                    newly_joined = True
+                    if prev_member_event_id:
+                        prev_member_event = await self.store.get_event(
+                            prev_member_event_id
+                        )
+                        newly_joined = prev_member_event.membership != Membership.JOIN
+
+                    # Only rate-limit if the user actually joined the room, otherwise we'll end
+                    # up blocking profile updates.
+                    if newly_joined and ratelimit:
+                        await self._join_rate_limiter_local.ratelimit(requester)
+                        await self._join_rate_per_room_limiter.ratelimit(
+                            requester, key=room_id, update=False
+                        )
+                with opentracing.start_active_span("handle_new_client_event"):
+                    result_event = (
+                        await self.event_creation_handler.handle_new_client_event(
+                            requester,
+                            events_and_context=[(event, context)],
+                            extra_users=[target],
+                            ratelimit=ratelimit,
+                        )
+                    )
+
+                if event.membership == Membership.LEAVE:
+                    if prev_member_event_id:
+                        prev_member_event = await self.store.get_event(
+                            prev_member_event_id
+                        )
+                        if prev_member_event.membership == Membership.JOIN:
+                            await self._user_left_room(target, room_id)
+
+                break
+            except PartialStateConflictError as e:
+                # Persisting couldn't happen because the room got un-partial stated
+                # in the meantime and context needs to be recomputed, so let's do so.
+                if i == max_retries - 1:
+                    raise e
+                pass
 
         # we know it was persisted, so should have a stream ordering
         assert result_event.internal_metadata.stream_ordering
@@ -1234,6 +1256,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             ratelimit: Whether to rate limit this request.
         Raises:
             SynapseError if there was a problem changing the membership.
+            PartialStateConflictError: if attempting to persist a partial state event in
+                a room that has been un-partial stated.
         """
         target_user = UserID.from_string(event.state_key)
         room_id = event.room_id
@@ -1863,21 +1887,37 @@ class RoomMemberMasterHandler(RoomMemberHandler):
             list(previous_membership_event.auth_event_ids()) + prev_event_ids
         )
 
-        event, context = await self.event_creation_handler.create_event(
-            requester,
-            event_dict,
-            txn_id=txn_id,
-            prev_event_ids=prev_event_ids,
-            auth_event_ids=auth_event_ids,
-            outlier=True,
-        )
-        event.internal_metadata.out_of_band_membership = True
+        # Try several times, it could fail with PartialStateConflictError
+        # in handle_new_client_event, cf comment in except block.
+        max_retries = 5
+        for i in range(max_retries):
+            try:
+                event, context = await self.event_creation_handler.create_event(
+                    requester,
+                    event_dict,
+                    txn_id=txn_id,
+                    prev_event_ids=prev_event_ids,
+                    auth_event_ids=auth_event_ids,
+                    outlier=True,
+                )
+                event.internal_metadata.out_of_band_membership = True
+
+                result_event = (
+                    await self.event_creation_handler.handle_new_client_event(
+                        requester,
+                        events_and_context=[(event, context)],
+                        extra_users=[UserID.from_string(target_user)],
+                    )
+                )
+
+                break
+            except PartialStateConflictError as e:
+                # Persisting couldn't happen because the room got un-partial stated
+                # in the meantime and context needs to be recomputed, so let's do so.
+                if i == max_retries - 1:
+                    raise e
+                pass
 
-        result_event = await self.event_creation_handler.handle_new_client_event(
-            requester,
-            events_and_context=[(event, context)],
-            extra_users=[UserID.from_string(target_user)],
-        )
         # we know it was persisted, so must have a stream ordering
         assert result_event.internal_metadata.stream_ordering
 
diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py
index a3eb5f741b..340e5e9145 100644
--- a/synapse/util/caches/response_cache.py
+++ b/synapse/util/caches/response_cache.py
@@ -167,12 +167,10 @@ class ResponseCache(Generic[KV]):
             # the should_cache bit, we leave it in the cache for now and schedule
             # its removal later.
             if self.timeout_sec and context.should_cache:
-                self.clock.call_later(
-                    self.timeout_sec, self._result_cache.pop, key, None
-                )
+                self.clock.call_later(self.timeout_sec, self.unset, key)
             else:
                 # otherwise, remove the result immediately.
-                self._result_cache.pop(key, None)
+                self.unset(key)
             return r
 
         # make sure we do this *after* adding the entry to result_cache,
@@ -181,6 +179,14 @@ class ResponseCache(Generic[KV]):
         result.addBoth(on_complete)
         return entry
 
+    def unset(self, key: KV) -> None:
+        """Remove the cached value for this key from the cache, if any.
+
+        Args:
+            key: key used to remove the cached value
+        """
+        self._result_cache.pop(key, None)
+
     async def wrap(
         self,
         key: KV,
-- 
cgit 1.5.1


From 652d1669c5a103b1c20478770c4aaf18849c09a3 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 16 Dec 2022 06:53:01 -0500
Subject: Add missing type hints to tests.handlers. (#14680)

And do not allow untyped defs in tests.handlers.
---
 changelog.d/14680.misc                    |   1 +
 mypy.ini                                  |   5 +-
 synapse/handlers/auth.py                  |   2 +-
 tests/handlers/test_appservice.py         |  54 +++++-----
 tests/handlers/test_cas.py                |   2 +-
 tests/handlers/test_directory.py          |  27 ++---
 tests/handlers/test_e2e_room_keys.py      |  76 ++++++++------
 tests/handlers/test_federation.py         |   2 +-
 tests/handlers/test_federation_event.py   |  10 +-
 tests/handlers/test_message.py            |  26 +++--
 tests/handlers/test_oidc.py               |  48 ++++++---
 tests/handlers/test_password_providers.py | 144 ++++++++++++-------------
 tests/handlers/test_presence.py           | 100 ++++++++++--------
 tests/handlers/test_profile.py            |   4 +-
 tests/handlers/test_receipts.py           |   6 +-
 tests/handlers/test_register.py           | 169 +++++++++++++++++-------------
 tests/handlers/test_room.py               |   6 +-
 tests/handlers/test_room_summary.py       |  76 ++++++++------
 tests/handlers/test_saml.py               |  33 ++++--
 tests/handlers/test_send_email.py         |  29 +++--
 tests/handlers/test_stats.py              |  74 +++++++++----
 tests/handlers/test_sync.py               |  11 +-
 22 files changed, 527 insertions(+), 378 deletions(-)
 create mode 100644 changelog.d/14680.misc

(limited to 'synapse')

diff --git a/changelog.d/14680.misc b/changelog.d/14680.misc
new file mode 100644
index 0000000000..d44571b731
--- /dev/null
+++ b/changelog.d/14680.misc
@@ -0,0 +1 @@
+Add missing type hints.
diff --git a/mypy.ini b/mypy.ini
index 37acf589c9..1a37414e58 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -95,10 +95,7 @@ disallow_untyped_defs = True
 [mypy-tests.federation.transport.test_client]
 disallow_untyped_defs = True
 
-[mypy-tests.handlers.test_sso]
-disallow_untyped_defs = True
-
-[mypy-tests.handlers.test_user_directory]
+[mypy-tests.handlers.*]
 disallow_untyped_defs = True
 
 [mypy-tests.metrics.test_background_process_metrics]
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 8b9ef25d29..30f2d46c3c 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -2031,7 +2031,7 @@ class PasswordAuthProvider:
         self.is_3pid_allowed_callbacks: List[IS_3PID_ALLOWED_CALLBACK] = []
 
         # Mapping from login type to login parameters
-        self._supported_login_types: Dict[str, Iterable[str]] = {}
+        self._supported_login_types: Dict[str, Tuple[str, ...]] = {}
 
         # Mapping from login type to auth checker callbacks
         self.auth_checker_callbacks: Dict[str, List[CHECK_AUTH_CALLBACK]] = {}
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index 57bfbd7734..a7495ab21a 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -31,7 +31,7 @@ from synapse.appservice import (
 from synapse.handlers.appservice import ApplicationServicesHandler
 from synapse.rest.client import login, receipts, register, room, sendtodevice
 from synapse.server import HomeServer
-from synapse.types import RoomStreamToken
+from synapse.types import JsonDict, RoomStreamToken
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
@@ -44,7 +44,7 @@ from tests.utils import MockClock
 class AppServiceHandlerTestCase(unittest.TestCase):
     """Tests the ApplicationServicesHandler."""
 
-    def setUp(self):
+    def setUp(self) -> None:
         self.mock_store = Mock()
         self.mock_as_api = Mock()
         self.mock_scheduler = Mock()
@@ -61,7 +61,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
         self.handler = ApplicationServicesHandler(hs)
         self.event_source = hs.get_event_sources()
 
-    def test_notify_interested_services(self):
+    def test_notify_interested_services(self) -> None:
         interested_service = self._mkservice(is_interested_in_event=True)
         services = [
             self._mkservice(is_interested_in_event=False),
@@ -90,7 +90,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
             interested_service, events=[event]
         )
 
-    def test_query_user_exists_unknown_user(self):
+    def test_query_user_exists_unknown_user(self) -> None:
         user_id = "@someone:anywhere"
         services = [self._mkservice(is_interested_in_event=True)]
         services[0].is_interested_in_user.return_value = True
@@ -107,7 +107,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
 
         self.mock_as_api.query_user.assert_called_once_with(services[0], user_id)
 
-    def test_query_user_exists_known_user(self):
+    def test_query_user_exists_known_user(self) -> None:
         user_id = "@someone:anywhere"
         services = [self._mkservice(is_interested_in_event=True)]
         services[0].is_interested_in_user.return_value = True
@@ -127,7 +127,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
             "query_user called when it shouldn't have been.",
         )
 
-    def test_query_room_alias_exists(self):
+    def test_query_room_alias_exists(self) -> None:
         room_alias_str = "#foo:bar"
         room_alias = Mock()
         room_alias.to_string.return_value = room_alias_str
@@ -157,7 +157,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
         self.assertEqual(result.room_id, room_id)
         self.assertEqual(result.servers, servers)
 
-    def test_get_3pe_protocols_no_appservices(self):
+    def test_get_3pe_protocols_no_appservices(self) -> None:
         self.mock_store.get_app_services.return_value = []
         response = self.successResultOf(
             defer.ensureDeferred(self.handler.get_3pe_protocols("my-protocol"))
@@ -165,7 +165,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
         self.mock_as_api.get_3pe_protocol.assert_not_called()
         self.assertEqual(response, {})
 
-    def test_get_3pe_protocols_no_protocols(self):
+    def test_get_3pe_protocols_no_protocols(self) -> None:
         service = self._mkservice(False, [])
         self.mock_store.get_app_services.return_value = [service]
         response = self.successResultOf(
@@ -174,7 +174,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
         self.mock_as_api.get_3pe_protocol.assert_not_called()
         self.assertEqual(response, {})
 
-    def test_get_3pe_protocols_protocol_no_response(self):
+    def test_get_3pe_protocols_protocol_no_response(self) -> None:
         service = self._mkservice(False, ["my-protocol"])
         self.mock_store.get_app_services.return_value = [service]
         self.mock_as_api.get_3pe_protocol.return_value = make_awaitable(None)
@@ -186,7 +186,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
         )
         self.assertEqual(response, {})
 
-    def test_get_3pe_protocols_select_one_protocol(self):
+    def test_get_3pe_protocols_select_one_protocol(self) -> None:
         service = self._mkservice(False, ["my-protocol"])
         self.mock_store.get_app_services.return_value = [service]
         self.mock_as_api.get_3pe_protocol.return_value = make_awaitable(
@@ -202,7 +202,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
             response, {"my-protocol": {"x-protocol-data": 42, "instances": []}}
         )
 
-    def test_get_3pe_protocols_one_protocol(self):
+    def test_get_3pe_protocols_one_protocol(self) -> None:
         service = self._mkservice(False, ["my-protocol"])
         self.mock_store.get_app_services.return_value = [service]
         self.mock_as_api.get_3pe_protocol.return_value = make_awaitable(
@@ -218,7 +218,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
             response, {"my-protocol": {"x-protocol-data": 42, "instances": []}}
         )
 
-    def test_get_3pe_protocols_multiple_protocol(self):
+    def test_get_3pe_protocols_multiple_protocol(self) -> None:
         service_one = self._mkservice(False, ["my-protocol"])
         service_two = self._mkservice(False, ["other-protocol"])
         self.mock_store.get_app_services.return_value = [service_one, service_two]
@@ -237,11 +237,13 @@ class AppServiceHandlerTestCase(unittest.TestCase):
             },
         )
 
-    def test_get_3pe_protocols_multiple_info(self):
+    def test_get_3pe_protocols_multiple_info(self) -> None:
         service_one = self._mkservice(False, ["my-protocol"])
         service_two = self._mkservice(False, ["my-protocol"])
 
-        async def get_3pe_protocol(service, unusedProtocol):
+        async def get_3pe_protocol(
+            service: ApplicationService, protocol: str
+        ) -> Optional[JsonDict]:
             if service == service_one:
                 return {
                     "x-protocol-data": 42,
@@ -276,7 +278,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
             },
         )
 
-    def test_notify_interested_services_ephemeral(self):
+    def test_notify_interested_services_ephemeral(self) -> None:
         """
         Test sending ephemeral events to the appservice handler are scheduled
         to be pushed out to interested appservices, and that the stream ID is
@@ -306,7 +308,7 @@ class AppServiceHandlerTestCase(unittest.TestCase):
             580,
         )
 
-    def test_notify_interested_services_ephemeral_out_of_order(self):
+    def test_notify_interested_services_ephemeral_out_of_order(self) -> None:
         """
         Test sending out of order ephemeral events to the appservice handler
         are ignored.
@@ -390,7 +392,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
         receipts.register_servlets,
     ]
 
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.hs = hs
         # Mock the ApplicationServiceScheduler's _TransactionController's send method so that
         # we can track any outgoing ephemeral events
@@ -417,7 +419,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
             "exclusive_as_user", "password", self.exclusive_as_user_device_id
         )
 
-    def _notify_interested_services(self):
+    def _notify_interested_services(self) -> None:
         # This is normally set in `notify_interested_services` but we need to call the
         # internal async version so the reactor gets pushed to completion.
         self.hs.get_application_service_handler().current_max += 1
@@ -443,7 +445,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
     )
     def test_match_interesting_room_members(
         self, interesting_user: str, should_notify: bool
-    ):
+    ) -> None:
         """
         Test to make sure that a interesting user (local or remote) in the room is
         notified as expected when someone else in the room sends a message.
@@ -512,7 +514,9 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
         else:
             self.send_mock.assert_not_called()
 
-    def test_application_services_receive_events_sent_by_interesting_local_user(self):
+    def test_application_services_receive_events_sent_by_interesting_local_user(
+        self,
+    ) -> None:
         """
         Test to make sure that a messages sent from a local user can be interesting and
         picked up by the appservice.
@@ -568,7 +572,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
         self.assertEqual(events[0]["type"], "m.room.message")
         self.assertEqual(events[0]["sender"], alice)
 
-    def test_sending_read_receipt_batches_to_application_services(self):
+    def test_sending_read_receipt_batches_to_application_services(self) -> None:
         """Tests that a large batch of read receipts are sent correctly to
         interested application services.
         """
@@ -644,7 +648,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
     @unittest.override_config(
         {"experimental_features": {"msc2409_to_device_messages_enabled": True}}
     )
-    def test_application_services_receive_local_to_device(self):
+    def test_application_services_receive_local_to_device(self) -> None:
         """
         Test that when a user sends a to-device message to another user
         that is an application service's user namespace, the
@@ -722,7 +726,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
     @unittest.override_config(
         {"experimental_features": {"msc2409_to_device_messages_enabled": True}}
     )
-    def test_application_services_receive_bursts_of_to_device(self):
+    def test_application_services_receive_bursts_of_to_device(self) -> None:
         """
         Test that when a user sends >100 to-device messages at once, any
         interested AS's will receive them in separate transactions.
@@ -913,7 +917,7 @@ class ApplicationServicesHandlerDeviceListsTestCase(unittest.HomeserverTestCase)
         experimental_feature_enabled: bool,
         as_supports_txn_extensions: bool,
         as_should_receive_device_list_updates: bool,
-    ):
+    ) -> None:
         """
         Tests that an application service receives notice of changed device
         lists for a user, when a user changes their device lists.
@@ -1070,7 +1074,7 @@ class ApplicationServicesHandlerOtkCountsTestCase(unittest.HomeserverTestCase):
         and a room for the users to talk in.
         """
 
-        async def preparation():
+        async def preparation() -> None:
             await self._add_otks_for_device(self._sender_user, self._sender_device, 42)
             await self._add_fallback_key_for_device(
                 self._sender_user, self._sender_device, used=True
diff --git a/tests/handlers/test_cas.py b/tests/handlers/test_cas.py
index 2b21547d0f..2733719d82 100644
--- a/tests/handlers/test_cas.py
+++ b/tests/handlers/test_cas.py
@@ -199,7 +199,7 @@ class CasHandlerTestCase(HomeserverTestCase):
         )
 
 
-def _mock_request():
+def _mock_request() -> Mock:
     """Returns a mock which will stand in as a SynapseRequest"""
     mock = Mock(
         spec=[
diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py
index 3b72c4c9d0..90aec484c4 100644
--- a/tests/handlers/test_directory.py
+++ b/tests/handlers/test_directory.py
@@ -20,6 +20,7 @@ from twisted.test.proto_helpers import MemoryReactor
 import synapse.api.errors
 import synapse.rest.admin
 from synapse.api.constants import EventTypes
+from synapse.events import EventBase
 from synapse.rest.client import directory, login, room
 from synapse.server import HomeServer
 from synapse.types import JsonDict, RoomAlias, create_requester
@@ -201,7 +202,7 @@ class TestDeleteAlias(unittest.HomeserverTestCase):
         self.test_user_tok = self.login("user", "pass")
         self.helper.join(room=self.room_id, user=self.test_user, tok=self.test_user_tok)
 
-    def _create_alias(self, user) -> None:
+    def _create_alias(self, user: str) -> None:
         # Create a new alias to this room.
         self.get_success(
             self.store.create_room_alias_association(
@@ -324,7 +325,7 @@ class CanonicalAliasTestCase(unittest.HomeserverTestCase):
         )
         return room_alias
 
-    def _set_canonical_alias(self, content) -> None:
+    def _set_canonical_alias(self, content: JsonDict) -> None:
         """Configure the canonical alias state on the room."""
         self.helper.send_state(
             self.room_id,
@@ -333,13 +334,15 @@ class CanonicalAliasTestCase(unittest.HomeserverTestCase):
             tok=self.admin_user_tok,
         )
 
-    def _get_canonical_alias(self):
+    def _get_canonical_alias(self) -> EventBase:
         """Get the canonical alias state of the room."""
-        return self.get_success(
+        result = self.get_success(
             self._storage_controllers.state.get_current_state_event(
                 self.room_id, EventTypes.CanonicalAlias, ""
             )
         )
+        assert result is not None
+        return result
 
     def test_remove_alias(self) -> None:
         """Removing an alias that is the canonical alias should remove it there too."""
@@ -349,8 +352,8 @@ class CanonicalAliasTestCase(unittest.HomeserverTestCase):
         )
 
         data = self._get_canonical_alias()
-        self.assertEqual(data["content"]["alias"], self.test_alias)
-        self.assertEqual(data["content"]["alt_aliases"], [self.test_alias])
+        self.assertEqual(data.content["alias"], self.test_alias)
+        self.assertEqual(data.content["alt_aliases"], [self.test_alias])
 
         # Finally, delete the alias.
         self.get_success(
@@ -360,8 +363,8 @@ class CanonicalAliasTestCase(unittest.HomeserverTestCase):
         )
 
         data = self._get_canonical_alias()
-        self.assertNotIn("alias", data["content"])
-        self.assertNotIn("alt_aliases", data["content"])
+        self.assertNotIn("alias", data.content)
+        self.assertNotIn("alt_aliases", data.content)
 
     def test_remove_other_alias(self) -> None:
         """Removing an alias listed as in alt_aliases should remove it there too."""
@@ -378,9 +381,9 @@ class CanonicalAliasTestCase(unittest.HomeserverTestCase):
         )
 
         data = self._get_canonical_alias()
-        self.assertEqual(data["content"]["alias"], self.test_alias)
+        self.assertEqual(data.content["alias"], self.test_alias)
         self.assertEqual(
-            data["content"]["alt_aliases"], [self.test_alias, other_test_alias]
+            data.content["alt_aliases"], [self.test_alias, other_test_alias]
         )
 
         # Delete the second alias.
@@ -391,8 +394,8 @@ class CanonicalAliasTestCase(unittest.HomeserverTestCase):
         )
 
         data = self._get_canonical_alias()
-        self.assertEqual(data["content"]["alias"], self.test_alias)
-        self.assertEqual(data["content"]["alt_aliases"], [self.test_alias])
+        self.assertEqual(data.content["alias"], self.test_alias)
+        self.assertEqual(data.content["alt_aliases"], [self.test_alias])
 
 
 class TestCreateAliasACL(unittest.HomeserverTestCase):
diff --git a/tests/handlers/test_e2e_room_keys.py b/tests/handlers/test_e2e_room_keys.py
index 9b7e7a8e9a..6c0b30de9e 100644
--- a/tests/handlers/test_e2e_room_keys.py
+++ b/tests/handlers/test_e2e_room_keys.py
@@ -17,7 +17,11 @@
 import copy
 from unittest import mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.errors import SynapseError
+from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests import unittest
 
@@ -39,14 +43,14 @@ room_keys = {
 
 
 class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         return self.setup_test_homeserver(replication_layer=mock.Mock())
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.handler = hs.get_e2e_room_keys_handler()
         self.local_user = "@boris:" + hs.hostname
 
-    def test_get_missing_current_version_info(self):
+    def test_get_missing_current_version_info(self) -> None:
         """Check that we get a 404 if we ask for info about the current version
         if there is no version.
         """
@@ -56,7 +60,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_get_missing_version_info(self):
+    def test_get_missing_version_info(self) -> None:
         """Check that we get a 404 if we ask for info about a specific version
         if it doesn't exist.
         """
@@ -67,9 +71,9 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_create_version(self):
+    def test_create_version(self) -> None:
         """Check that we can create and then retrieve versions."""
-        res = self.get_success(
+        version = self.get_success(
             self.handler.create_version(
                 self.local_user,
                 {
@@ -78,7 +82,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
                 },
             )
         )
-        self.assertEqual(res, "1")
+        self.assertEqual(version, "1")
 
         # check we can retrieve it as the current version
         res = self.get_success(self.handler.get_version_info(self.local_user))
@@ -110,7 +114,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         )
 
         # upload a new one...
-        res = self.get_success(
+        version = self.get_success(
             self.handler.create_version(
                 self.local_user,
                 {
@@ -119,7 +123,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
                 },
             )
         )
-        self.assertEqual(res, "2")
+        self.assertEqual(version, "2")
 
         # check we can retrieve it as the current version
         res = self.get_success(self.handler.get_version_info(self.local_user))
@@ -134,7 +138,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
             },
         )
 
-    def test_update_version(self):
+    def test_update_version(self) -> None:
         """Check that we can update versions."""
         version = self.get_success(
             self.handler.create_version(
@@ -173,7 +177,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
             },
         )
 
-    def test_update_missing_version(self):
+    def test_update_missing_version(self) -> None:
         """Check that we get a 404 on updating nonexistent versions"""
         e = self.get_failure(
             self.handler.update_version(
@@ -190,7 +194,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_update_omitted_version(self):
+    def test_update_omitted_version(self) -> None:
         """Check that the update succeeds if the version is missing from the body"""
         version = self.get_success(
             self.handler.create_version(
@@ -227,7 +231,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
             },
         )
 
-    def test_update_bad_version(self):
+    def test_update_bad_version(self) -> None:
         """Check that we get a 400 if the version in the body doesn't match"""
         version = self.get_success(
             self.handler.create_version(
@@ -255,7 +259,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 400)
 
-    def test_delete_missing_version(self):
+    def test_delete_missing_version(self) -> None:
         """Check that we get a 404 on deleting nonexistent versions"""
         e = self.get_failure(
             self.handler.delete_version(self.local_user, "1"), SynapseError
@@ -263,15 +267,15 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_delete_missing_current_version(self):
+    def test_delete_missing_current_version(self) -> None:
         """Check that we get a 404 on deleting nonexistent current version"""
         e = self.get_failure(self.handler.delete_version(self.local_user), SynapseError)
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_delete_version(self):
+    def test_delete_version(self) -> None:
         """Check that we can create and then delete versions."""
-        res = self.get_success(
+        version = self.get_success(
             self.handler.create_version(
                 self.local_user,
                 {
@@ -280,7 +284,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
                 },
             )
         )
-        self.assertEqual(res, "1")
+        self.assertEqual(version, "1")
 
         # check we can delete it
         self.get_success(self.handler.delete_version(self.local_user, "1"))
@@ -292,7 +296,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_get_missing_backup(self):
+    def test_get_missing_backup(self) -> None:
         """Check that we get a 404 on querying missing backup"""
         e = self.get_failure(
             self.handler.get_room_keys(self.local_user, "bogus_version"), SynapseError
@@ -300,7 +304,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_get_missing_room_keys(self):
+    def test_get_missing_room_keys(self) -> None:
         """Check we get an empty response from an empty backup"""
         version = self.get_success(
             self.handler.create_version(
@@ -319,7 +323,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
     # TODO: test the locking semantics when uploading room_keys,
     # although this is probably best done in sytest
 
-    def test_upload_room_keys_no_versions(self):
+    def test_upload_room_keys_no_versions(self) -> None:
         """Check that we get a 404 on uploading keys when no versions are defined"""
         e = self.get_failure(
             self.handler.upload_room_keys(self.local_user, "no_version", room_keys),
@@ -328,7 +332,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_upload_room_keys_bogus_version(self):
+    def test_upload_room_keys_bogus_version(self) -> None:
         """Check that we get a 404 on uploading keys when an nonexistent version
         is specified
         """
@@ -350,7 +354,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 404)
 
-    def test_upload_room_keys_wrong_version(self):
+    def test_upload_room_keys_wrong_version(self) -> None:
         """Check that we get a 403 on uploading keys for an old version"""
         version = self.get_success(
             self.handler.create_version(
@@ -380,7 +384,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         res = e.value.code
         self.assertEqual(res, 403)
 
-    def test_upload_room_keys_insert(self):
+    def test_upload_room_keys_insert(self) -> None:
         """Check that we can insert and retrieve keys for a session"""
         version = self.get_success(
             self.handler.create_version(
@@ -416,7 +420,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
         )
         self.assertDictEqual(res, room_keys)
 
-    def test_upload_room_keys_merge(self):
+    def test_upload_room_keys_merge(self) -> None:
         """Check that we can upload a new room_key for an existing session and
         have it correctly merged"""
         version = self.get_success(
@@ -449,9 +453,11 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
             self.handler.upload_room_keys(self.local_user, version, new_room_keys)
         )
 
-        res = self.get_success(self.handler.get_room_keys(self.local_user, version))
+        res_keys = self.get_success(
+            self.handler.get_room_keys(self.local_user, version)
+        )
         self.assertEqual(
-            res["rooms"]["!abc:matrix.org"]["sessions"]["c0ff33"]["session_data"],
+            res_keys["rooms"]["!abc:matrix.org"]["sessions"]["c0ff33"]["session_data"],
             "SSBBTSBBIEZJU0gK",
         )
 
@@ -465,9 +471,12 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
             self.handler.upload_room_keys(self.local_user, version, new_room_keys)
         )
 
-        res = self.get_success(self.handler.get_room_keys(self.local_user, version))
+        res_keys = self.get_success(
+            self.handler.get_room_keys(self.local_user, version)
+        )
         self.assertEqual(
-            res["rooms"]["!abc:matrix.org"]["sessions"]["c0ff33"]["session_data"], "new"
+            res_keys["rooms"]["!abc:matrix.org"]["sessions"]["c0ff33"]["session_data"],
+            "new",
         )
 
         # the etag should NOT be equal now, since the key changed
@@ -483,9 +492,12 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
             self.handler.upload_room_keys(self.local_user, version, new_room_keys)
         )
 
-        res = self.get_success(self.handler.get_room_keys(self.local_user, version))
+        res_keys = self.get_success(
+            self.handler.get_room_keys(self.local_user, version)
+        )
         self.assertEqual(
-            res["rooms"]["!abc:matrix.org"]["sessions"]["c0ff33"]["session_data"], "new"
+            res_keys["rooms"]["!abc:matrix.org"]["sessions"]["c0ff33"]["session_data"],
+            "new",
         )
 
         # the etag should be the same since the session did not change
@@ -494,7 +506,7 @@ class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         # TODO: check edge cases as well as the common variations here
 
-    def test_delete_room_keys(self):
+    def test_delete_room_keys(self) -> None:
         """Check that we can insert and delete keys for a session"""
         version = self.get_success(
             self.handler.create_version(
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index d00c69c229..cedbb9fafc 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -439,7 +439,7 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
         user_id = self.register_user("kermit", "test")
         tok = self.login("kermit", "test")
 
-        def create_invite():
+        def create_invite() -> EventBase:
             room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
             room_version = self.get_success(self.store.get_room_version(room_id))
             return event_from_pdu_json(
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index e448cb1901..70ea4d15d4 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -14,6 +14,8 @@
 from typing import Optional
 from unittest import mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.errors import AuthError, StoreError
 from synapse.api.room_versions import RoomVersion
 from synapse.event_auth import (
@@ -26,8 +28,10 @@ from synapse.federation.transport.client import StateRequestResponse
 from synapse.logging.context import LoggingContext
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
 from synapse.state.v2 import _mainline_sort, _reverse_topological_power_sort
 from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests import unittest
 from tests.test_utils import event_injection, make_awaitable
@@ -40,7 +44,7 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
         room.register_servlets,
     ]
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         # mock out the federation transport client
         self.mock_federation_transport_client = mock.Mock(
             spec=["get_room_state_ids", "get_room_state", "get_event", "backfill"]
@@ -165,7 +169,9 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
             )
         else:
 
-            async def get_event(destination: str, event_id: str, timeout=None):
+            async def get_event(
+                destination: str, event_id: str, timeout: Optional[int] = None
+            ) -> JsonDict:
                 self.assertEqual(destination, self.OTHER_SERVER_NAME)
                 self.assertEqual(event_id, prev_event.event_id)
                 return {"pdus": [prev_event.get_pdu_json()]}
diff --git a/tests/handlers/test_message.py b/tests/handlers/test_message.py
index 99384837d0..c4727ab917 100644
--- a/tests/handlers/test_message.py
+++ b/tests/handlers/test_message.py
@@ -14,12 +14,16 @@
 import logging
 from typing import Tuple
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EventTypes
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
 from synapse.types import create_requester
+from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
 from tests import unittest
@@ -35,7 +39,7 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
         room.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.handler = self.hs.get_event_creation_handler()
         self._persist_event_storage_controller = (
             self.hs.get_storage_controllers().persistence
@@ -94,7 +98,7 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
             )
         )
 
-    def test_duplicated_txn_id(self):
+    def test_duplicated_txn_id(self) -> None:
         """Test that attempting to handle/persist an event with a transaction ID
         that has already been persisted correctly returns the old event and does
         *not* produce duplicate messages.
@@ -161,7 +165,7 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
         # rather than the new one.
         self.assertEqual(ret_event1.event_id, ret_event4.event_id)
 
-    def test_duplicated_txn_id_one_call(self):
+    def test_duplicated_txn_id_one_call(self) -> None:
         """Test that we correctly handle duplicates that we try and persist at
         the same time.
         """
@@ -185,7 +189,9 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(events), 2)
         self.assertEqual(events[0].event_id, events[1].event_id)
 
-    def test_when_empty_prev_events_allowed_create_event_with_empty_prev_events(self):
+    def test_when_empty_prev_events_allowed_create_event_with_empty_prev_events(
+        self,
+    ) -> None:
         """When we set allow_no_prev_events=True, should be able to create a
         event without any prev_events (only auth_events).
         """
@@ -214,7 +220,7 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
 
     def test_when_empty_prev_events_not_allowed_reject_event_with_empty_prev_events(
         self,
-    ):
+    ) -> None:
         """When we set allow_no_prev_events=False, shouldn't be able to create a
         event without any prev_events even if it has auth_events. Expect an
         exception to be raised.
@@ -245,7 +251,7 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
 
     def test_when_empty_prev_events_allowed_reject_event_with_empty_prev_events_and_auth_events(
         self,
-    ):
+    ) -> None:
         """When we set allow_no_prev_events=True, should be able to create a
         event without any prev_events or auth_events. Expect an exception to be
         raised.
@@ -277,12 +283,12 @@ class ServerAclValidationTestCase(unittest.HomeserverTestCase):
         room.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.user_id = self.register_user("tester", "foobar")
         self.access_token = self.login("tester", "foobar")
         self.room_id = self.helper.create_room_as(self.user_id, tok=self.access_token)
 
-    def test_allow_server_acl(self):
+    def test_allow_server_acl(self) -> None:
         """Test that sending an ACL that blocks everyone but ourselves works."""
 
         self.helper.send_state(
@@ -293,7 +299,7 @@ class ServerAclValidationTestCase(unittest.HomeserverTestCase):
             expect_code=200,
         )
 
-    def test_deny_server_acl_block_outselves(self):
+    def test_deny_server_acl_block_outselves(self) -> None:
         """Test that sending an ACL that blocks ourselves does not work."""
         self.helper.send_state(
             self.room_id,
@@ -303,7 +309,7 @@ class ServerAclValidationTestCase(unittest.HomeserverTestCase):
             expect_code=400,
         )
 
-    def test_deny_redact_server_acl(self):
+    def test_deny_redact_server_acl(self) -> None:
         """Test that attempting to redact an ACL is blocked."""
 
         body = self.helper.send_state(
diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py
index 5955410524..49a1842b5c 100644
--- a/tests/handlers/test_oidc.py
+++ b/tests/handlers/test_oidc.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-from typing import Any, Dict, Tuple
+from typing import Any, Awaitable, ContextManager, Dict, Optional, Tuple
 from unittest.mock import ANY, Mock, patch
 from urllib.parse import parse_qs, urlparse
 
@@ -23,7 +23,7 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.handlers.sso import MappingException
 from synapse.http.site import SynapseRequest
 from synapse.server import HomeServer
-from synapse.types import UserID
+from synapse.types import JsonDict, UserID
 from synapse.util import Clock
 from synapse.util.macaroons import get_value_from_macaroon
 from synapse.util.stringutils import random_string
@@ -34,6 +34,10 @@ from tests.unittest import HomeserverTestCase, override_config
 
 try:
     import authlib  # noqa: F401
+    from authlib.oidc.core import UserInfo
+    from authlib.oidc.discovery import OpenIDProviderMetadata
+
+    from synapse.handlers.oidc import Token, UserAttributeDict
 
     HAS_OIDC = True
 except ImportError:
@@ -70,29 +74,37 @@ EXPLICIT_ENDPOINT_CONFIG = {
 
 class TestMappingProvider:
     @staticmethod
-    def parse_config(config):
-        return
+    def parse_config(config: JsonDict) -> None:
+        return None
 
-    def __init__(self, config):
+    def __init__(self, config: None):
         pass
 
-    def get_remote_user_id(self, userinfo):
+    def get_remote_user_id(self, userinfo: "UserInfo") -> str:
         return userinfo["sub"]
 
-    async def map_user_attributes(self, userinfo, token):
-        return {"localpart": userinfo["username"], "display_name": None}
+    async def map_user_attributes(
+        self, userinfo: "UserInfo", token: "Token"
+    ) -> "UserAttributeDict":
+        # This is testing not providing the full map.
+        return {"localpart": userinfo["username"], "display_name": None}  # type: ignore[typeddict-item]
 
     # Do not include get_extra_attributes to test backwards compatibility paths.
 
 
 class TestMappingProviderExtra(TestMappingProvider):
-    async def get_extra_attributes(self, userinfo, token):
+    async def get_extra_attributes(
+        self, userinfo: "UserInfo", token: "Token"
+    ) -> JsonDict:
         return {"phone": userinfo["phone"]}
 
 
 class TestMappingProviderFailures(TestMappingProvider):
-    async def map_user_attributes(self, userinfo, token, failures):
-        return {
+    # Superclass is testing the legacy interface for map_user_attributes.
+    async def map_user_attributes(  # type: ignore[override]
+        self, userinfo: "UserInfo", token: "Token", failures: int
+    ) -> "UserAttributeDict":
+        return {  # type: ignore[typeddict-item]
             "localpart": userinfo["username"] + (str(failures) if failures else ""),
             "display_name": None,
         }
@@ -161,13 +173,13 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.hs_patcher.stop()
         return super().tearDown()
 
-    def reset_mocks(self):
+    def reset_mocks(self) -> None:
         """Reset all the Mocks."""
         self.fake_server.reset_mocks()
         self.render_error.reset_mock()
         self.complete_sso_login.reset_mock()
 
-    def metadata_edit(self, values):
+    def metadata_edit(self, values: dict) -> ContextManager[Mock]:
         """Modify the result that will be returned by the well-known query"""
 
         metadata = self.fake_server.get_metadata()
@@ -196,7 +208,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
         session = self._generate_oidc_session_token(state, nonce, client_redirect_url)
         return _build_callback_request(code, state, session), grant
 
-    def assertRenderedError(self, error, error_description=None):
+    def assertRenderedError(
+        self, error: str, error_description: Optional[str] = None
+    ) -> Tuple[Any, ...]:
         self.render_error.assert_called_once()
         args = self.render_error.call_args[0]
         self.assertEqual(args[1], error)
@@ -273,8 +287,8 @@ class OidcHandlerTestCase(HomeserverTestCase):
         """Provider metadatas are extensively validated."""
         h = self.provider
 
-        def force_load_metadata():
-            async def force_load():
+        def force_load_metadata() -> Awaitable[None]:
+            async def force_load() -> "OpenIDProviderMetadata":
                 return await h.load_metadata(force=True)
 
             return get_awaitable_result(force_load())
@@ -1198,7 +1212,7 @@ def _build_callback_request(
     state: str,
     session: str,
     ip_address: str = "10.0.0.1",
-):
+) -> Mock:
     """Builds a fake SynapseRequest to mock the browser callback
 
     Returns a Mock object which looks like the SynapseRequest we get from a browser
diff --git a/tests/handlers/test_password_providers.py b/tests/handlers/test_password_providers.py
index 75934b1707..0916de64f5 100644
--- a/tests/handlers/test_password_providers.py
+++ b/tests/handlers/test_password_providers.py
@@ -15,12 +15,13 @@
 """Tests for the password_auth_provider interface"""
 
 from http import HTTPStatus
-from typing import Any, Type, Union
+from typing import Any, Dict, List, Optional, Type, Union
 from unittest.mock import Mock
 
 import synapse
 from synapse.api.constants import LoginType
 from synapse.api.errors import Codes
+from synapse.handlers.account import AccountHandler
 from synapse.module_api import ModuleApi
 from synapse.rest.client import account, devices, login, logout, register
 from synapse.types import JsonDict, UserID
@@ -44,13 +45,13 @@ class LegacyPasswordOnlyAuthProvider:
     """A legacy password_provider which only implements `check_password`."""
 
     @staticmethod
-    def parse_config(self):
+    def parse_config(config: JsonDict) -> None:
         pass
 
-    def __init__(self, config, account_handler):
+    def __init__(self, config: None, account_handler: AccountHandler):
         pass
 
-    def check_password(self, *args):
+    def check_password(self, *args: str) -> Mock:
         return mock_password_provider.check_password(*args)
 
 
@@ -58,16 +59,16 @@ class LegacyCustomAuthProvider:
     """A legacy password_provider which implements a custom login type."""
 
     @staticmethod
-    def parse_config(self):
+    def parse_config(config: JsonDict) -> None:
         pass
 
-    def __init__(self, config, account_handler):
+    def __init__(self, config: None, account_handler: AccountHandler):
         pass
 
-    def get_supported_login_types(self):
+    def get_supported_login_types(self) -> Dict[str, List[str]]:
         return {"test.login_type": ["test_field"]}
 
-    def check_auth(self, *args):
+    def check_auth(self, *args: str) -> Mock:
         return mock_password_provider.check_auth(*args)
 
 
@@ -75,15 +76,15 @@ class CustomAuthProvider:
     """A module which registers password_auth_provider callbacks for a custom login type."""
 
     @staticmethod
-    def parse_config(self):
+    def parse_config(config: JsonDict) -> None:
         pass
 
-    def __init__(self, config, api: ModuleApi):
+    def __init__(self, config: None, api: ModuleApi):
         api.register_password_auth_provider_callbacks(
             auth_checkers={("test.login_type", ("test_field",)): self.check_auth}
         )
 
-    def check_auth(self, *args):
+    def check_auth(self, *args: Any) -> Mock:
         return mock_password_provider.check_auth(*args)
 
 
@@ -92,16 +93,16 @@ class LegacyPasswordCustomAuthProvider:
     as a custom type."""
 
     @staticmethod
-    def parse_config(self):
+    def parse_config(config: JsonDict) -> None:
         pass
 
-    def __init__(self, config, account_handler):
+    def __init__(self, config: None, account_handler: AccountHandler):
         pass
 
-    def get_supported_login_types(self):
+    def get_supported_login_types(self) -> Dict[str, List[str]]:
         return {"m.login.password": ["password"], "test.login_type": ["test_field"]}
 
-    def check_auth(self, *args):
+    def check_auth(self, *args: str) -> Mock:
         return mock_password_provider.check_auth(*args)
 
 
@@ -110,10 +111,10 @@ class PasswordCustomAuthProvider:
     as well as a password login"""
 
     @staticmethod
-    def parse_config(self):
+    def parse_config(config: JsonDict) -> None:
         pass
 
-    def __init__(self, config, api: ModuleApi):
+    def __init__(self, config: None, api: ModuleApi):
         api.register_password_auth_provider_callbacks(
             auth_checkers={
                 ("test.login_type", ("test_field",)): self.check_auth,
@@ -121,10 +122,10 @@ class PasswordCustomAuthProvider:
             }
         )
 
-    def check_auth(self, *args):
+    def check_auth(self, *args: Any) -> Mock:
         return mock_password_provider.check_auth(*args)
 
-    def check_pass(self, *args):
+    def check_pass(self, *args: str) -> Mock:
         return mock_password_provider.check_password(*args)
 
 
@@ -161,16 +162,16 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
     CALLBACK_USERNAME = "get_username_for_registration"
     CALLBACK_DISPLAYNAME = "get_displayname_for_registration"
 
-    def setUp(self):
+    def setUp(self) -> None:
         # we use a global mock device, so make sure we are starting with a clean slate
         mock_password_provider.reset_mock()
         super().setUp()
 
     @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider))
-    def test_password_only_auth_progiver_login_legacy(self):
+    def test_password_only_auth_progiver_login_legacy(self) -> None:
         self.password_only_auth_provider_login_test_body()
 
-    def password_only_auth_provider_login_test_body(self):
+    def password_only_auth_provider_login_test_body(self) -> None:
         # login flows should only have m.login.password
         flows = self._get_login_flows()
         self.assertEqual(flows, [{"type": "m.login.password"}] + ADDITIONAL_LOGIN_FLOWS)
@@ -201,10 +202,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         )
 
     @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider))
-    def test_password_only_auth_provider_ui_auth_legacy(self):
+    def test_password_only_auth_provider_ui_auth_legacy(self) -> None:
         self.password_only_auth_provider_ui_auth_test_body()
 
-    def password_only_auth_provider_ui_auth_test_body(self):
+    def password_only_auth_provider_ui_auth_test_body(self) -> None:
         """UI Auth should delegate correctly to the password provider"""
 
         # create the user, otherwise access doesn't work
@@ -238,10 +239,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         mock_password_provider.check_password.assert_called_once_with("@u:test", "p")
 
     @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider))
-    def test_local_user_fallback_login_legacy(self):
+    def test_local_user_fallback_login_legacy(self) -> None:
         self.local_user_fallback_login_test_body()
 
-    def local_user_fallback_login_test_body(self):
+    def local_user_fallback_login_test_body(self) -> None:
         """rejected login should fall back to local db"""
         self.register_user("localuser", "localpass")
 
@@ -255,10 +256,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         self.assertEqual("@localuser:test", channel.json_body["user_id"])
 
     @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider))
-    def test_local_user_fallback_ui_auth_legacy(self):
+    def test_local_user_fallback_ui_auth_legacy(self) -> None:
         self.local_user_fallback_ui_auth_test_body()
 
-    def local_user_fallback_ui_auth_test_body(self):
+    def local_user_fallback_ui_auth_test_body(self) -> None:
         """rejected login should fall back to local db"""
         self.register_user("localuser", "localpass")
 
@@ -298,10 +299,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"localdb_enabled": False},
         }
     )
-    def test_no_local_user_fallback_login_legacy(self):
+    def test_no_local_user_fallback_login_legacy(self) -> None:
         self.no_local_user_fallback_login_test_body()
 
-    def no_local_user_fallback_login_test_body(self):
+    def no_local_user_fallback_login_test_body(self) -> None:
         """localdb_enabled can block login with the local password"""
         self.register_user("localuser", "localpass")
 
@@ -320,10 +321,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"localdb_enabled": False},
         }
     )
-    def test_no_local_user_fallback_ui_auth_legacy(self):
+    def test_no_local_user_fallback_ui_auth_legacy(self) -> None:
         self.no_local_user_fallback_ui_auth_test_body()
 
-    def no_local_user_fallback_ui_auth_test_body(self):
+    def no_local_user_fallback_ui_auth_test_body(self) -> None:
         """localdb_enabled can block ui auth with the local password"""
         self.register_user("localuser", "localpass")
 
@@ -361,10 +362,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"enabled": False},
         }
     )
-    def test_password_auth_disabled_legacy(self):
+    def test_password_auth_disabled_legacy(self) -> None:
         self.password_auth_disabled_test_body()
 
-    def password_auth_disabled_test_body(self):
+    def password_auth_disabled_test_body(self) -> None:
         """password auth doesn't work if it's disabled across the board"""
         # login flows should be empty
         flows = self._get_login_flows()
@@ -376,14 +377,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         mock_password_provider.check_password.assert_not_called()
 
     @override_config(legacy_providers_config(LegacyCustomAuthProvider))
-    def test_custom_auth_provider_login_legacy(self):
+    def test_custom_auth_provider_login_legacy(self) -> None:
         self.custom_auth_provider_login_test_body()
 
     @override_config(providers_config(CustomAuthProvider))
-    def test_custom_auth_provider_login(self):
+    def test_custom_auth_provider_login(self) -> None:
         self.custom_auth_provider_login_test_body()
 
-    def custom_auth_provider_login_test_body(self):
+    def custom_auth_provider_login_test_body(self) -> None:
         # login flows should have the custom flow and m.login.password, since we
         # haven't disabled local password lookup.
         # (password must come first, because reasons)
@@ -424,14 +425,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         )
 
     @override_config(legacy_providers_config(LegacyCustomAuthProvider))
-    def test_custom_auth_provider_ui_auth_legacy(self):
+    def test_custom_auth_provider_ui_auth_legacy(self) -> None:
         self.custom_auth_provider_ui_auth_test_body()
 
     @override_config(providers_config(CustomAuthProvider))
-    def test_custom_auth_provider_ui_auth(self):
+    def test_custom_auth_provider_ui_auth(self) -> None:
         self.custom_auth_provider_ui_auth_test_body()
 
-    def custom_auth_provider_ui_auth_test_body(self):
+    def custom_auth_provider_ui_auth_test_body(self) -> None:
         # register the user and log in twice, to get two devices
         self.register_user("localuser", "localpass")
         tok1 = self.login("localuser", "localpass")
@@ -486,14 +487,14 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         )
 
     @override_config(legacy_providers_config(LegacyCustomAuthProvider))
-    def test_custom_auth_provider_callback_legacy(self):
+    def test_custom_auth_provider_callback_legacy(self) -> None:
         self.custom_auth_provider_callback_test_body()
 
     @override_config(providers_config(CustomAuthProvider))
-    def test_custom_auth_provider_callback(self):
+    def test_custom_auth_provider_callback(self) -> None:
         self.custom_auth_provider_callback_test_body()
 
-    def custom_auth_provider_callback_test_body(self):
+    def custom_auth_provider_callback_test_body(self) -> None:
         callback = Mock(return_value=make_awaitable(None))
 
         mock_password_provider.check_auth.return_value = make_awaitable(
@@ -521,16 +522,16 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"enabled": False},
         }
     )
-    def test_custom_auth_password_disabled_legacy(self):
+    def test_custom_auth_password_disabled_legacy(self) -> None:
         self.custom_auth_password_disabled_test_body()
 
     @override_config(
         {**providers_config(CustomAuthProvider), "password_config": {"enabled": False}}
     )
-    def test_custom_auth_password_disabled(self):
+    def test_custom_auth_password_disabled(self) -> None:
         self.custom_auth_password_disabled_test_body()
 
-    def custom_auth_password_disabled_test_body(self):
+    def custom_auth_password_disabled_test_body(self) -> None:
         """Test login with a custom auth provider where password login is disabled"""
         self.register_user("localuser", "localpass")
 
@@ -548,7 +549,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"enabled": False, "localdb_enabled": False},
         }
     )
-    def test_custom_auth_password_disabled_localdb_enabled_legacy(self):
+    def test_custom_auth_password_disabled_localdb_enabled_legacy(self) -> None:
         self.custom_auth_password_disabled_localdb_enabled_test_body()
 
     @override_config(
@@ -557,10 +558,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"enabled": False, "localdb_enabled": False},
         }
     )
-    def test_custom_auth_password_disabled_localdb_enabled(self):
+    def test_custom_auth_password_disabled_localdb_enabled(self) -> None:
         self.custom_auth_password_disabled_localdb_enabled_test_body()
 
-    def custom_auth_password_disabled_localdb_enabled_test_body(self):
+    def custom_auth_password_disabled_localdb_enabled_test_body(self) -> None:
         """Check the localdb_enabled == enabled == False
 
         Regression test for https://github.com/matrix-org/synapse/issues/8914: check
@@ -583,7 +584,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"enabled": False},
         }
     )
-    def test_password_custom_auth_password_disabled_login_legacy(self):
+    def test_password_custom_auth_password_disabled_login_legacy(self) -> None:
         self.password_custom_auth_password_disabled_login_test_body()
 
     @override_config(
@@ -592,10 +593,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"enabled": False},
         }
     )
-    def test_password_custom_auth_password_disabled_login(self):
+    def test_password_custom_auth_password_disabled_login(self) -> None:
         self.password_custom_auth_password_disabled_login_test_body()
 
-    def password_custom_auth_password_disabled_login_test_body(self):
+    def password_custom_auth_password_disabled_login_test_body(self) -> None:
         """log in with a custom auth provider which implements password, but password
         login is disabled"""
         self.register_user("localuser", "localpass")
@@ -615,7 +616,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"enabled": False},
         }
     )
-    def test_password_custom_auth_password_disabled_ui_auth_legacy(self):
+    def test_password_custom_auth_password_disabled_ui_auth_legacy(self) -> None:
         self.password_custom_auth_password_disabled_ui_auth_test_body()
 
     @override_config(
@@ -624,10 +625,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"enabled": False},
         }
     )
-    def test_password_custom_auth_password_disabled_ui_auth(self):
+    def test_password_custom_auth_password_disabled_ui_auth(self) -> None:
         self.password_custom_auth_password_disabled_ui_auth_test_body()
 
-    def password_custom_auth_password_disabled_ui_auth_test_body(self):
+    def password_custom_auth_password_disabled_ui_auth_test_body(self) -> None:
         """UI Auth with a custom auth provider which implements password, but password
         login is disabled"""
         # register the user and log in twice via the test login type to get two devices,
@@ -689,7 +690,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"localdb_enabled": False},
         }
     )
-    def test_custom_auth_no_local_user_fallback_legacy(self):
+    def test_custom_auth_no_local_user_fallback_legacy(self) -> None:
         self.custom_auth_no_local_user_fallback_test_body()
 
     @override_config(
@@ -698,10 +699,10 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             "password_config": {"localdb_enabled": False},
         }
     )
-    def test_custom_auth_no_local_user_fallback(self):
+    def test_custom_auth_no_local_user_fallback(self) -> None:
         self.custom_auth_no_local_user_fallback_test_body()
 
-    def custom_auth_no_local_user_fallback_test_body(self):
+    def custom_auth_no_local_user_fallback_test_body(self) -> None:
         """Test login with a custom auth provider where the local db is disabled"""
         self.register_user("localuser", "localpass")
 
@@ -713,14 +714,16 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         channel = self._send_password_login("localuser", "localpass")
         self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result)
 
-    def test_on_logged_out(self):
+    def test_on_logged_out(self) -> None:
         """Tests that the on_logged_out callback is called when the user logs out."""
         self.register_user("rin", "password")
         tok = self.login("rin", "password")
 
         self.called = False
 
-        async def on_logged_out(user_id, device_id, access_token):
+        async def on_logged_out(
+            user_id: str, device_id: Optional[str], access_token: str
+        ) -> None:
             self.called = True
 
         on_logged_out = Mock(side_effect=on_logged_out)
@@ -738,7 +741,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         on_logged_out.assert_called_once()
         self.assertTrue(self.called)
 
-    def test_username(self):
+    def test_username(self) -> None:
         """Tests that the get_username_for_registration callback can define the username
         of a user when registering.
         """
@@ -763,7 +766,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         mxid = channel.json_body["user_id"]
         self.assertEqual(UserID.from_string(mxid).localpart, username + "-foo")
 
-    def test_username_uia(self):
+    def test_username_uia(self) -> None:
         """Tests that the get_username_for_registration callback is only called at the
         end of the UIA flow.
         """
@@ -782,7 +785,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
 
     # Set some email configuration so the test doesn't fail because of its absence.
     @override_config({"email": {"notif_from": "noreply@test"}})
-    def test_3pid_allowed(self):
+    def test_3pid_allowed(self) -> None:
         """Tests that an is_3pid_allowed_callbacks forbidding a 3PID makes Synapse refuse
         to bind the new 3PID, and that one allowing a 3PID makes Synapse accept to bind
         the 3PID. Also checks that the module is passed a boolean indicating whether the
@@ -791,7 +794,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         self._test_3pid_allowed("rin", False)
         self._test_3pid_allowed("kitay", True)
 
-    def test_displayname(self):
+    def test_displayname(self) -> None:
         """Tests that the get_displayname_for_registration callback can define the
         display name of a user when registering.
         """
@@ -820,7 +823,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
 
         self.assertEqual(display_name, username + "-foo")
 
-    def test_displayname_uia(self):
+    def test_displayname_uia(self) -> None:
         """Tests that the get_displayname_for_registration callback is only called at the
         end of the UIA flow.
         """
@@ -841,7 +844,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         # Check that the callback has been called.
         m.assert_called_once()
 
-    def _test_3pid_allowed(self, username: str, registration: bool):
+    def _test_3pid_allowed(self, username: str, registration: bool) -> None:
         """Tests that the "is_3pid_allowed" module callback is called correctly, using
         either /register or /account URLs depending on the arguments.
 
@@ -907,7 +910,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         client is trying to register.
         """
 
-        async def callback(uia_results, params):
+        async def callback(uia_results: JsonDict, params: JsonDict) -> str:
             self.assertIn(LoginType.DUMMY, uia_results)
             username = params["username"]
             return username + "-foo"
@@ -950,12 +953,13 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
     def _send_password_login(self, user: str, password: str) -> FakeChannel:
         return self._send_login(type="m.login.password", user=user, password=password)
 
-    def _send_login(self, type, user, **params) -> FakeChannel:
-        params.update({"identifier": {"type": "m.id.user", "user": user}, "type": type})
+    def _send_login(self, type: str, user: str, **extra_params: str) -> FakeChannel:
+        params = {"identifier": {"type": "m.id.user", "user": user}, "type": type}
+        params.update(extra_params)
         channel = self.make_request("POST", "/_matrix/client/r0/login", params)
         return channel
 
-    def _start_delete_device_session(self, access_token, device_id) -> str:
+    def _start_delete_device_session(self, access_token: str, device_id: str) -> str:
         """Make an initial delete device request, and return the UI Auth session ID"""
         channel = self._delete_device(access_token, device_id)
         self.assertEqual(channel.code, 401)
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 584e7b8971..19f5322317 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional
+from typing import Optional, cast
 from unittest.mock import Mock, call
 
 from parameterized import parameterized
 from signedjson.key import generate_signing_key
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EventTypes, Membership, PresenceState
 from synapse.api.presence import UserPresenceState
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
@@ -35,7 +37,9 @@ from synapse.handlers.presence import (
 )
 from synapse.rest import admin
 from synapse.rest.client import room
-from synapse.types import UserID, get_domain_from_id
+from synapse.server import HomeServer
+from synapse.types import JsonDict, UserID, get_domain_from_id
+from synapse.util import Clock
 
 from tests import unittest
 from tests.replication._base import BaseMultiWorkerStreamTestCase
@@ -44,10 +48,12 @@ from tests.replication._base import BaseMultiWorkerStreamTestCase
 class PresenceUpdateTestCase(unittest.HomeserverTestCase):
     servlets = [admin.register_servlets]
 
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.store = homeserver.get_datastores().main
 
-    def test_offline_to_online(self):
+    def test_offline_to_online(self) -> None:
         wheel_timer = Mock()
         user_id = "@foo:bar"
         now = 5000000
@@ -85,7 +91,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
             any_order=True,
         )
 
-    def test_online_to_online(self):
+    def test_online_to_online(self) -> None:
         wheel_timer = Mock()
         user_id = "@foo:bar"
         now = 5000000
@@ -128,7 +134,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
             any_order=True,
         )
 
-    def test_online_to_online_last_active_noop(self):
+    def test_online_to_online_last_active_noop(self) -> None:
         wheel_timer = Mock()
         user_id = "@foo:bar"
         now = 5000000
@@ -173,7 +179,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
             any_order=True,
         )
 
-    def test_online_to_online_last_active(self):
+    def test_online_to_online_last_active(self) -> None:
         wheel_timer = Mock()
         user_id = "@foo:bar"
         now = 5000000
@@ -210,7 +216,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
             any_order=True,
         )
 
-    def test_remote_ping_timer(self):
+    def test_remote_ping_timer(self) -> None:
         wheel_timer = Mock()
         user_id = "@foo:bar"
         now = 5000000
@@ -244,7 +250,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
             any_order=True,
         )
 
-    def test_online_to_offline(self):
+    def test_online_to_offline(self) -> None:
         wheel_timer = Mock()
         user_id = "@foo:bar"
         now = 5000000
@@ -266,7 +272,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(wheel_timer.insert.call_count, 0)
 
-    def test_online_to_idle(self):
+    def test_online_to_idle(self) -> None:
         wheel_timer = Mock()
         user_id = "@foo:bar"
         now = 5000000
@@ -300,7 +306,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
             any_order=True,
         )
 
-    def test_persisting_presence_updates(self):
+    def test_persisting_presence_updates(self) -> None:
         """Tests that the latest presence state for each user is persisted correctly"""
         # Create some test users and presence states for them
         presence_states = []
@@ -322,7 +328,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
         self.get_success(self.store.update_presence(presence_states))
 
         # Check that each update is present in the database
-        db_presence_states = self.get_success(
+        db_presence_states_raw = self.get_success(
             self.store.get_all_presence_updates(
                 instance_name="master",
                 last_id=0,
@@ -332,7 +338,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
         )
 
         # Extract presence update user ID and state information into lists of tuples
-        db_presence_states = [(ps[0], ps[1]) for _, ps in db_presence_states[0]]
+        db_presence_states = [(ps[0], ps[1]) for _, ps in db_presence_states_raw[0]]
         presence_states_compare = [(ps.user_id, ps.state) for ps in presence_states]
 
         # Compare what we put into the storage with what we got out.
@@ -343,7 +349,7 @@ class PresenceUpdateTestCase(unittest.HomeserverTestCase):
 class PresenceTimeoutTestCase(unittest.TestCase):
     """Tests different timers and that the timer does not change `status_msg` of user."""
 
-    def test_idle_timer(self):
+    def test_idle_timer(self) -> None:
         user_id = "@foo:bar"
         status_msg = "I'm here!"
         now = 5000000
@@ -363,7 +369,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         self.assertEqual(new_state.state, PresenceState.UNAVAILABLE)
         self.assertEqual(new_state.status_msg, status_msg)
 
-    def test_busy_no_idle(self):
+    def test_busy_no_idle(self) -> None:
         """
         Tests that a user setting their presence to busy but idling doesn't turn their
         presence state into unavailable.
@@ -387,7 +393,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         self.assertEqual(new_state.state, PresenceState.BUSY)
         self.assertEqual(new_state.status_msg, status_msg)
 
-    def test_sync_timeout(self):
+    def test_sync_timeout(self) -> None:
         user_id = "@foo:bar"
         status_msg = "I'm here!"
         now = 5000000
@@ -407,7 +413,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         self.assertEqual(new_state.state, PresenceState.OFFLINE)
         self.assertEqual(new_state.status_msg, status_msg)
 
-    def test_sync_online(self):
+    def test_sync_online(self) -> None:
         user_id = "@foo:bar"
         status_msg = "I'm here!"
         now = 5000000
@@ -429,7 +435,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         self.assertEqual(new_state.state, PresenceState.ONLINE)
         self.assertEqual(new_state.status_msg, status_msg)
 
-    def test_federation_ping(self):
+    def test_federation_ping(self) -> None:
         user_id = "@foo:bar"
         status_msg = "I'm here!"
         now = 5000000
@@ -448,7 +454,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         self.assertIsNotNone(new_state)
         self.assertEqual(state, new_state)
 
-    def test_no_timeout(self):
+    def test_no_timeout(self) -> None:
         user_id = "@foo:bar"
         now = 5000000
 
@@ -464,7 +470,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
 
         self.assertIsNone(new_state)
 
-    def test_federation_timeout(self):
+    def test_federation_timeout(self) -> None:
         user_id = "@foo:bar"
         status_msg = "I'm here!"
         now = 5000000
@@ -487,7 +493,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         self.assertEqual(new_state.state, PresenceState.OFFLINE)
         self.assertEqual(new_state.status_msg, status_msg)
 
-    def test_last_active(self):
+    def test_last_active(self) -> None:
         user_id = "@foo:bar"
         status_msg = "I'm here!"
         now = 5000000
@@ -508,15 +514,15 @@ class PresenceTimeoutTestCase(unittest.TestCase):
 
 
 class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.presence_handler = hs.get_presence_handler()
         self.clock = hs.get_clock()
 
-    def test_external_process_timeout(self):
+    def test_external_process_timeout(self) -> None:
         """Test that if an external process doesn't update the records for a while
         we time out their syncing users presence.
         """
-        process_id = 1
+        process_id = "1"
         user_id = "@test:server"
 
         # Notify handler that a user is now syncing.
@@ -544,7 +550,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         )
         self.assertEqual(state.state, PresenceState.OFFLINE)
 
-    def test_user_goes_offline_by_timeout_status_msg_remain(self):
+    def test_user_goes_offline_by_timeout_status_msg_remain(self) -> None:
         """Test that if a user doesn't update the records for a while
         users presence goes `OFFLINE` because of timeout and `status_msg` remains.
         """
@@ -576,7 +582,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         self.assertEqual(state.state, PresenceState.OFFLINE)
         self.assertEqual(state.status_msg, status_msg)
 
-    def test_user_goes_offline_manually_with_no_status_msg(self):
+    def test_user_goes_offline_manually_with_no_status_msg(self) -> None:
         """Test that if a user change presence manually to `OFFLINE`
         and no status is set, that `status_msg` is `None`.
         """
@@ -601,7 +607,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         self.assertEqual(state.state, PresenceState.OFFLINE)
         self.assertEqual(state.status_msg, None)
 
-    def test_user_goes_offline_manually_with_status_msg(self):
+    def test_user_goes_offline_manually_with_status_msg(self) -> None:
         """Test that if a user change presence manually to `OFFLINE`
         and a status is set, that `status_msg` appears.
         """
@@ -618,7 +624,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
             user_id, PresenceState.OFFLINE, "And now here."
         )
 
-    def test_user_reset_online_with_no_status(self):
+    def test_user_reset_online_with_no_status(self) -> None:
         """Test that if a user set again the presence manually
         and no status is set, that `status_msg` is `None`.
         """
@@ -644,7 +650,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         self.assertEqual(state.state, PresenceState.ONLINE)
         self.assertEqual(state.status_msg, None)
 
-    def test_set_presence_with_status_msg_none(self):
+    def test_set_presence_with_status_msg_none(self) -> None:
         """Test that if a user set again the presence manually
         and status is `None`, that `status_msg` is `None`.
         """
@@ -659,7 +665,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         # Mark user as online and `status_msg = None`
         self._set_presencestate_with_status_msg(user_id, PresenceState.ONLINE, None)
 
-    def test_set_presence_from_syncing_not_set(self):
+    def test_set_presence_from_syncing_not_set(self) -> None:
         """Test that presence is not set by syncing if affect_presence is false"""
         user_id = "@test:server"
         status_msg = "I'm here!"
@@ -680,7 +686,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         # and status message should still be the same
         self.assertEqual(state.status_msg, status_msg)
 
-    def test_set_presence_from_syncing_is_set(self):
+    def test_set_presence_from_syncing_is_set(self) -> None:
         """Test that presence is set by syncing if affect_presence is true"""
         user_id = "@test:server"
         status_msg = "I'm here!"
@@ -699,7 +705,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         # we should now be online
         self.assertEqual(state.state, PresenceState.ONLINE)
 
-    def test_set_presence_from_syncing_keeps_status(self):
+    def test_set_presence_from_syncing_keeps_status(self) -> None:
         """Test that presence set by syncing retains status message"""
         user_id = "@test:server"
         status_msg = "I'm here!"
@@ -726,7 +732,9 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
             },
         }
     )
-    def test_set_presence_from_syncing_keeps_busy(self, test_with_workers: bool):
+    def test_set_presence_from_syncing_keeps_busy(
+        self, test_with_workers: bool
+    ) -> None:
         """Test that presence set by syncing doesn't affect busy status
 
         Args:
@@ -767,7 +775,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
 
     def _set_presencestate_with_status_msg(
         self, user_id: str, state: str, status_msg: Optional[str]
-    ):
+    ) -> None:
         """Set a PresenceState and status_msg and check the result.
 
         Args:
@@ -790,14 +798,14 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
 
 
 class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.presence_handler = hs.get_presence_handler()
         self.clock = hs.get_clock()
         self.instance_name = hs.get_instance_name()
 
         self.queue = self.presence_handler.get_federation_queue()
 
-    def test_send_and_get(self):
+    def test_send_and_get(self) -> None:
         state1 = UserPresenceState.default("@user1:test")
         state2 = UserPresenceState.default("@user2:test")
         state3 = UserPresenceState.default("@user3:test")
@@ -834,7 +842,7 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
         self.assertFalse(limited)
         self.assertCountEqual(rows, [])
 
-    def test_send_and_get_split(self):
+    def test_send_and_get_split(self) -> None:
         state1 = UserPresenceState.default("@user1:test")
         state2 = UserPresenceState.default("@user2:test")
         state3 = UserPresenceState.default("@user3:test")
@@ -877,7 +885,7 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
 
         self.assertCountEqual(rows, expected_rows)
 
-    def test_clear_queue_all(self):
+    def test_clear_queue_all(self) -> None:
         state1 = UserPresenceState.default("@user1:test")
         state2 = UserPresenceState.default("@user2:test")
         state3 = UserPresenceState.default("@user3:test")
@@ -921,7 +929,7 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
 
         self.assertCountEqual(rows, expected_rows)
 
-    def test_partially_clear_queue(self):
+    def test_partially_clear_queue(self) -> None:
         state1 = UserPresenceState.default("@user1:test")
         state2 = UserPresenceState.default("@user2:test")
         state3 = UserPresenceState.default("@user3:test")
@@ -982,7 +990,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
 
     servlets = [room.register_servlets]
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver(
             "server",
             federation_http_client=None,
@@ -990,14 +998,14 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
         )
         return hs
 
-    def default_config(self):
+    def default_config(self) -> JsonDict:
         config = super().default_config()
         # Enable federation sending on the main process.
         config["federation_sender_instances"] = None
         return config
 
-    def prepare(self, reactor, clock, hs):
-        self.federation_sender = hs.get_federation_sender()
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.federation_sender = cast(Mock, hs.get_federation_sender())
         self.event_builder_factory = hs.get_event_builder_factory()
         self.federation_event_handler = hs.get_federation_event_handler()
         self.presence_handler = hs.get_presence_handler()
@@ -1013,7 +1021,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
         # random key to use.
         self.random_signing_key = generate_signing_key("ver")
 
-    def test_remote_joins(self):
+    def test_remote_joins(self) -> None:
         # We advance time to something that isn't 0, as we use 0 as a special
         # value.
         self.reactor.advance(1000000000000)
@@ -1061,7 +1069,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
             destinations={"server3"}, states=[expected_state]
         )
 
-    def test_remote_gets_presence_when_local_user_joins(self):
+    def test_remote_gets_presence_when_local_user_joins(self) -> None:
         # We advance time to something that isn't 0, as we use 0 as a special
         # value.
         self.reactor.advance(1000000000000)
@@ -1110,7 +1118,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
             destinations={"server2", "server3"}, states=[expected_state]
         )
 
-    def _add_new_user(self, room_id, user_id):
+    def _add_new_user(self, room_id: str, user_id: str) -> None:
         """Add new user to the room by creating an event and poking the federation API."""
 
         hostname = get_domain_from_id(user_id)
diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py
index 675aa023ac..7c174782da 100644
--- a/tests/handlers/test_profile.py
+++ b/tests/handlers/test_profile.py
@@ -332,7 +332,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
     @unittest.override_config(
         {"server_name": "test:8888", "allowed_avatar_mimetypes": ["image/png"]}
     )
-    def test_avatar_constraint_on_local_server_with_port(self):
+    def test_avatar_constraint_on_local_server_with_port(self) -> None:
         """Test that avatar metadata is correctly fetched when the media is on a local
         server and the server has an explicit port.
 
@@ -376,7 +376,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
             self.get_success(self.handler.check_avatar_size_and_mime_type(remote_mxc))
         )
 
-    def _setup_local_files(self, names_and_props: Dict[str, Dict[str, Any]]):
+    def _setup_local_files(self, names_and_props: Dict[str, Dict[str, Any]]) -> None:
         """Stores metadata about files in the database.
 
         Args:
diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py
index b55238650c..f60400ff8d 100644
--- a/tests/handlers/test_receipts.py
+++ b/tests/handlers/test_receipts.py
@@ -15,14 +15,18 @@
 from copy import deepcopy
 from typing import List
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EduTypes, ReceiptTypes
+from synapse.server import HomeServer
 from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests import unittest
 
 
 class ReceiptsTestCase(unittest.HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.event_source = hs.get_event_sources().sources.receipt
 
     def test_filters_out_private_receipt(self) -> None:
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index 765df75d91..b9332d97dc 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -12,8 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Any, Collection, List, Optional, Tuple
 from unittest.mock import Mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.auth import Auth
 from synapse.api.constants import UserTypes
 from synapse.api.errors import (
@@ -22,8 +25,18 @@ from synapse.api.errors import (
     ResourceLimitError,
     SynapseError,
 )
+from synapse.module_api import ModuleApi
+from synapse.server import HomeServer
 from synapse.spam_checker_api import RegistrationBehaviour
-from synapse.types import RoomAlias, RoomID, UserID, create_requester
+from synapse.types import (
+    JsonDict,
+    Requester,
+    RoomAlias,
+    RoomID,
+    UserID,
+    create_requester,
+)
+from synapse.util import Clock
 
 from tests.test_utils import make_awaitable
 from tests.unittest import override_config
@@ -33,94 +46,98 @@ from .. import unittest
 
 
 class TestSpamChecker:
-    def __init__(self, config, api):
+    def __init__(self, config: None, api: ModuleApi):
         api.register_spam_checker_callbacks(
             check_registration_for_spam=self.check_registration_for_spam,
         )
 
     @staticmethod
-    def parse_config(config):
-        return config
+    def parse_config(config: JsonDict) -> None:
+        return None
 
     async def check_registration_for_spam(
         self,
-        email_threepid,
-        username,
-        request_info,
-        auth_provider_id,
-    ):
+        email_threepid: Optional[dict],
+        username: Optional[str],
+        request_info: Collection[Tuple[str, str]],
+        auth_provider_id: Optional[str],
+    ) -> RegistrationBehaviour:
         pass
 
 
 class DenyAll(TestSpamChecker):
     async def check_registration_for_spam(
         self,
-        email_threepid,
-        username,
-        request_info,
-        auth_provider_id,
-    ):
+        email_threepid: Optional[dict],
+        username: Optional[str],
+        request_info: Collection[Tuple[str, str]],
+        auth_provider_id: Optional[str],
+    ) -> RegistrationBehaviour:
         return RegistrationBehaviour.DENY
 
 
 class BanAll(TestSpamChecker):
     async def check_registration_for_spam(
         self,
-        email_threepid,
-        username,
-        request_info,
-        auth_provider_id,
-    ):
+        email_threepid: Optional[dict],
+        username: Optional[str],
+        request_info: Collection[Tuple[str, str]],
+        auth_provider_id: Optional[str],
+    ) -> RegistrationBehaviour:
         return RegistrationBehaviour.SHADOW_BAN
 
 
 class BanBadIdPUser(TestSpamChecker):
     async def check_registration_for_spam(
-        self, email_threepid, username, request_info, auth_provider_id=None
-    ):
+        self,
+        email_threepid: Optional[dict],
+        username: Optional[str],
+        request_info: Collection[Tuple[str, str]],
+        auth_provider_id: Optional[str] = None,
+    ) -> RegistrationBehaviour:
         # Reject any user coming from CAS and whose username contains profanity
-        if auth_provider_id == "cas" and "flimflob" in username:
+        if auth_provider_id == "cas" and username and "flimflob" in username:
             return RegistrationBehaviour.DENY
         return RegistrationBehaviour.ALLOW
 
 
 class TestLegacyRegistrationSpamChecker:
-    def __init__(self, config, api):
+    def __init__(self, config: None, api: ModuleApi):
         pass
 
     async def check_registration_for_spam(
         self,
-        email_threepid,
-        username,
-        request_info,
-    ):
+        email_threepid: Optional[dict],
+        username: Optional[str],
+        request_info: Collection[Tuple[str, str]],
+    ) -> RegistrationBehaviour:
         pass
 
 
 class LegacyAllowAll(TestLegacyRegistrationSpamChecker):
     async def check_registration_for_spam(
         self,
-        email_threepid,
-        username,
-        request_info,
-    ):
+        email_threepid: Optional[dict],
+        username: Optional[str],
+        request_info: Collection[Tuple[str, str]],
+    ) -> RegistrationBehaviour:
         return RegistrationBehaviour.ALLOW
 
 
 class LegacyDenyAll(TestLegacyRegistrationSpamChecker):
     async def check_registration_for_spam(
         self,
-        email_threepid,
-        username,
-        request_info,
-    ):
+        email_threepid: Optional[dict],
+        username: Optional[str],
+        request_info: Collection[Tuple[str, str]],
+    ) -> RegistrationBehaviour:
         return RegistrationBehaviour.DENY
 
 
 class RegistrationTestCase(unittest.HomeserverTestCase):
     """Tests the RegistrationHandler."""
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs_config = self.default_config()
 
         # some of the tests rely on us having a user consent version
@@ -145,7 +162,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
 
         return hs
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.handler = self.hs.get_registration_handler()
         self.store = self.hs.get_datastores().main
         self.lots_of_users = 100
@@ -153,7 +170,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
 
         self.requester = create_requester("@requester:test")
 
-    def test_user_is_created_and_logged_in_if_doesnt_exist(self):
+    def test_user_is_created_and_logged_in_if_doesnt_exist(self) -> None:
         frank = UserID.from_string("@frank:test")
         user_id = frank.to_string()
         requester = create_requester(user_id)
@@ -164,7 +181,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.assertIsInstance(result_token, str)
         self.assertGreater(len(result_token), 20)
 
-    def test_if_user_exists(self):
+    def test_if_user_exists(self) -> None:
         store = self.hs.get_datastores().main
         frank = UserID.from_string("@frank:test")
         self.get_success(
@@ -180,12 +197,12 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.assertTrue(result_token is not None)
 
     @override_config({"limit_usage_by_mau": False})
-    def test_mau_limits_when_disabled(self):
+    def test_mau_limits_when_disabled(self) -> None:
         # Ensure does not throw exception
         self.get_success(self.get_or_create_user(self.requester, "a", "display_name"))
 
     @override_config({"limit_usage_by_mau": True})
-    def test_get_or_create_user_mau_not_blocked(self):
+    def test_get_or_create_user_mau_not_blocked(self) -> None:
         self.store.count_monthly_users = Mock(
             return_value=make_awaitable(self.hs.config.server.max_mau_value - 1)
         )
@@ -193,7 +210,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.get_success(self.get_or_create_user(self.requester, "c", "User"))
 
     @override_config({"limit_usage_by_mau": True})
-    def test_get_or_create_user_mau_blocked(self):
+    def test_get_or_create_user_mau_blocked(self) -> None:
         self.store.get_monthly_active_count = Mock(
             return_value=make_awaitable(self.lots_of_users)
         )
@@ -211,7 +228,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         )
 
     @override_config({"limit_usage_by_mau": True})
-    def test_register_mau_blocked(self):
+    def test_register_mau_blocked(self) -> None:
         self.store.get_monthly_active_count = Mock(
             return_value=make_awaitable(self.lots_of_users)
         )
@@ -229,7 +246,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
     @override_config(
         {"auto_join_rooms": ["#room:test"], "auto_join_rooms_for_guests": False}
     )
-    def test_auto_join_rooms_for_guests(self):
+    def test_auto_join_rooms_for_guests(self) -> None:
         user_id = self.get_success(
             self.handler.register_user(localpart="jeff", make_guest=True),
         )
@@ -237,7 +254,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(rooms), 0)
 
     @override_config({"auto_join_rooms": ["#room:test"]})
-    def test_auto_create_auto_join_rooms(self):
+    def test_auto_create_auto_join_rooms(self) -> None:
         room_alias_str = "#room:test"
         user_id = self.get_success(self.handler.register_user(localpart="jeff"))
         rooms = self.get_success(self.store.get_rooms_for_user(user_id))
@@ -249,7 +266,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(rooms), 1)
 
     @override_config({"auto_join_rooms": []})
-    def test_auto_create_auto_join_rooms_with_no_rooms(self):
+    def test_auto_create_auto_join_rooms_with_no_rooms(self) -> None:
         frank = UserID.from_string("@frank:test")
         user_id = self.get_success(self.handler.register_user(frank.localpart))
         self.assertEqual(user_id, frank.to_string())
@@ -257,7 +274,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(rooms), 0)
 
     @override_config({"auto_join_rooms": ["#room:another"]})
-    def test_auto_create_auto_join_where_room_is_another_domain(self):
+    def test_auto_create_auto_join_where_room_is_another_domain(self) -> None:
         frank = UserID.from_string("@frank:test")
         user_id = self.get_success(self.handler.register_user(frank.localpart))
         self.assertEqual(user_id, frank.to_string())
@@ -267,13 +284,13 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
     @override_config(
         {"auto_join_rooms": ["#room:test"], "autocreate_auto_join_rooms": False}
     )
-    def test_auto_create_auto_join_where_auto_create_is_false(self):
+    def test_auto_create_auto_join_where_auto_create_is_false(self) -> None:
         user_id = self.get_success(self.handler.register_user(localpart="jeff"))
         rooms = self.get_success(self.store.get_rooms_for_user(user_id))
         self.assertEqual(len(rooms), 0)
 
     @override_config({"auto_join_rooms": ["#room:test"]})
-    def test_auto_create_auto_join_rooms_when_user_is_not_a_real_user(self):
+    def test_auto_create_auto_join_rooms_when_user_is_not_a_real_user(self) -> None:
         room_alias_str = "#room:test"
         self.store.is_real_user = Mock(return_value=make_awaitable(False))
         user_id = self.get_success(self.handler.register_user(localpart="support"))
@@ -284,7 +301,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.get_failure(directory_handler.get_association(room_alias), SynapseError)
 
     @override_config({"auto_join_rooms": ["#room:test"]})
-    def test_auto_create_auto_join_rooms_when_user_is_the_first_real_user(self):
+    def test_auto_create_auto_join_rooms_when_user_is_the_first_real_user(self) -> None:
         room_alias_str = "#room:test"
 
         self.store.count_real_users = Mock(return_value=make_awaitable(1))
@@ -299,7 +316,9 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(rooms), 1)
 
     @override_config({"auto_join_rooms": ["#room:test"]})
-    def test_auto_create_auto_join_rooms_when_user_is_not_the_first_real_user(self):
+    def test_auto_create_auto_join_rooms_when_user_is_not_the_first_real_user(
+        self,
+    ) -> None:
         self.store.count_real_users = Mock(return_value=make_awaitable(2))
         self.store.is_real_user = Mock(return_value=make_awaitable(True))
         user_id = self.get_success(self.handler.register_user(localpart="real"))
@@ -312,7 +331,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             "autocreate_auto_join_rooms_federated": False,
         }
     )
-    def test_auto_create_auto_join_rooms_federated(self):
+    def test_auto_create_auto_join_rooms_federated(self) -> None:
         """
         Auto-created rooms that are private require an invite to go to the user
         (instead of directly joining it).
@@ -339,7 +358,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
     @override_config(
         {"auto_join_rooms": ["#room:test"], "auto_join_mxid_localpart": "support"}
     )
-    def test_auto_join_mxid_localpart(self):
+    def test_auto_join_mxid_localpart(self) -> None:
         """
         Ensure the user still needs up in the room created by a different user.
         """
@@ -376,7 +395,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             "auto_join_mxid_localpart": "support",
         }
     )
-    def test_auto_create_auto_join_room_preset(self):
+    def test_auto_create_auto_join_room_preset(self) -> None:
         """
         Auto-created rooms that are private require an invite to go to the user
         (instead of directly joining it).
@@ -416,7 +435,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             "auto_join_mxid_localpart": "support",
         }
     )
-    def test_auto_create_auto_join_room_preset_guest(self):
+    def test_auto_create_auto_join_room_preset_guest(self) -> None:
         """
         Auto-created rooms that are private require an invite to go to the user
         (instead of directly joining it).
@@ -454,7 +473,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             "auto_join_mxid_localpart": "support",
         }
     )
-    def test_auto_create_auto_join_room_preset_invalid_permissions(self):
+    def test_auto_create_auto_join_room_preset_invalid_permissions(self) -> None:
         """
         Auto-created rooms that are private require an invite, check that
         registration doesn't completely break if the inviter doesn't have proper
@@ -525,7 +544,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             "auto_join_rooms": ["#room:test"],
         },
     )
-    def test_auto_create_auto_join_where_no_consent(self):
+    def test_auto_create_auto_join_where_no_consent(self) -> None:
         """Test to ensure that the first user is not auto-joined to a room if
         they have not given general consent.
         """
@@ -550,19 +569,19 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         rooms = self.get_success(self.store.get_rooms_for_user(user_id))
         self.assertEqual(len(rooms), 1)
 
-    def test_register_support_user(self):
+    def test_register_support_user(self) -> None:
         user_id = self.get_success(
             self.handler.register_user(localpart="user", user_type=UserTypes.SUPPORT)
         )
         d = self.store.is_support_user(user_id)
         self.assertTrue(self.get_success(d))
 
-    def test_register_not_support_user(self):
+    def test_register_not_support_user(self) -> None:
         user_id = self.get_success(self.handler.register_user(localpart="user"))
         d = self.store.is_support_user(user_id)
         self.assertFalse(self.get_success(d))
 
-    def test_invalid_user_id_length(self):
+    def test_invalid_user_id_length(self) -> None:
         invalid_user_id = "x" * 256
         self.get_failure(
             self.handler.register_user(localpart=invalid_user_id), SynapseError
@@ -577,7 +596,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             ]
         }
     )
-    def test_spam_checker_deny(self):
+    def test_spam_checker_deny(self) -> None:
         """A spam checker can deny registration, which results in an error."""
         self.get_failure(self.handler.register_user(localpart="user"), SynapseError)
 
@@ -590,7 +609,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             ]
         }
     )
-    def test_spam_checker_legacy_allow(self):
+    def test_spam_checker_legacy_allow(self) -> None:
         """Tests that a legacy spam checker implementing the legacy 3-arg version of the
         check_registration_for_spam callback is correctly called.
 
@@ -610,7 +629,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             ]
         }
     )
-    def test_spam_checker_legacy_deny(self):
+    def test_spam_checker_legacy_deny(self) -> None:
         """Tests that a legacy spam checker implementing the legacy 3-arg version of the
         check_registration_for_spam callback is correctly called.
 
@@ -630,7 +649,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             ]
         }
     )
-    def test_spam_checker_shadow_ban(self):
+    def test_spam_checker_shadow_ban(self) -> None:
         """A spam checker can choose to shadow-ban a user, which allows registration to succeed."""
         user_id = self.get_success(self.handler.register_user(localpart="user"))
 
@@ -660,7 +679,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
             ]
         }
     )
-    def test_spam_checker_receives_sso_type(self):
+    def test_spam_checker_receives_sso_type(self) -> None:
         """Test rejecting registration based on SSO type"""
         f = self.get_failure(
             self.handler.register_user(localpart="bobflimflob", auth_provider_id="cas"),
@@ -678,8 +697,12 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         )
 
     async def get_or_create_user(
-        self, requester, localpart, displayname, password_hash=None
-    ):
+        self,
+        requester: Requester,
+        localpart: str,
+        displayname: Optional[str],
+        password_hash: Optional[str] = None,
+    ) -> Tuple[str, str]:
         """Creates a new user if the user does not exist,
         else revokes all previous access tokens and generates a new one.
 
@@ -734,13 +757,15 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
 class RemoteAutoJoinTestCase(unittest.HomeserverTestCase):
     """Tests auto-join on remote rooms."""
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         self.room_id = "!roomid:remotetest"
 
-        async def update_membership(*args, **kwargs):
+        async def update_membership(*args: Any, **kwargs: Any) -> None:
             pass
 
-        async def lookup_room_alias(*args, **kwargs):
+        async def lookup_room_alias(
+            *args: Any, **kwargs: Any
+        ) -> Tuple[RoomID, List[str]]:
             return RoomID.from_string(self.room_id), ["remotetest"]
 
         self.room_member_handler = Mock(spec=["update_membership", "lookup_room_alias"])
@@ -750,12 +775,12 @@ class RemoteAutoJoinTestCase(unittest.HomeserverTestCase):
         hs = self.setup_test_homeserver(room_member_handler=self.room_member_handler)
         return hs
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.handler = self.hs.get_registration_handler()
         self.store = self.hs.get_datastores().main
 
     @override_config({"auto_join_rooms": ["#room:remotetest"]})
-    def test_auto_create_auto_join_remote_room(self):
+    def test_auto_create_auto_join_remote_room(self) -> None:
         """Tests that we don't attempt to create remote rooms, and that we don't attempt
         to invite ourselves to rooms we're not in."""
 
diff --git a/tests/handlers/test_room.py b/tests/handlers/test_room.py
index fcde5dab72..df95490d3b 100644
--- a/tests/handlers/test_room.py
+++ b/tests/handlers/test_room.py
@@ -14,7 +14,7 @@ class EncryptedByDefaultTestCase(unittest.HomeserverTestCase):
     ]
 
     @override_config({"encryption_enabled_by_default_for_room_type": "all"})
-    def test_encrypted_by_default_config_option_all(self):
+    def test_encrypted_by_default_config_option_all(self) -> None:
         """Tests that invite-only and non-invite-only rooms have encryption enabled by
         default when the config option encryption_enabled_by_default_for_room_type is "all".
         """
@@ -45,7 +45,7 @@ class EncryptedByDefaultTestCase(unittest.HomeserverTestCase):
         self.assertEqual(event_content, {"algorithm": RoomEncryptionAlgorithms.DEFAULT})
 
     @override_config({"encryption_enabled_by_default_for_room_type": "invite"})
-    def test_encrypted_by_default_config_option_invite(self):
+    def test_encrypted_by_default_config_option_invite(self) -> None:
         """Tests that only new, invite-only rooms have encryption enabled by default when
         the config option encryption_enabled_by_default_for_room_type is "invite".
         """
@@ -76,7 +76,7 @@ class EncryptedByDefaultTestCase(unittest.HomeserverTestCase):
         )
 
     @override_config({"encryption_enabled_by_default_for_room_type": "off"})
-    def test_encrypted_by_default_config_option_off(self):
+    def test_encrypted_by_default_config_option_off(self) -> None:
         """Tests that neither new invite-only nor non-invite-only rooms have encryption
         enabled by default when the config option
         encryption_enabled_by_default_for_room_type is "off".
diff --git a/tests/handlers/test_room_summary.py b/tests/handlers/test_room_summary.py
index aa650756e4..d907fcaf04 100644
--- a/tests/handlers/test_room_summary.py
+++ b/tests/handlers/test_room_summary.py
@@ -11,10 +11,11 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-from typing import Any, Iterable, List, Optional, Tuple
+from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
 from unittest import mock
 
 from twisted.internet.defer import ensureDeferred
+from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import (
     EventContentFields,
@@ -34,11 +35,14 @@ from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
 from synapse.types import JsonDict, UserID, create_requester
+from synapse.util import Clock
 
 from tests import unittest
 
 
-def _create_event(room_id: str, order: Optional[Any] = None, origin_server_ts: int = 0):
+def _create_event(
+    room_id: str, order: Optional[Any] = None, origin_server_ts: int = 0
+) -> mock.Mock:
     result = mock.Mock(name=room_id)
     result.room_id = room_id
     result.content = {}
@@ -48,40 +52,40 @@ def _create_event(room_id: str, order: Optional[Any] = None, origin_server_ts: i
     return result
 
 
-def _order(*events):
+def _order(*events: mock.Mock) -> List[mock.Mock]:
     return sorted(events, key=_child_events_comparison_key)
 
 
 class TestSpaceSummarySort(unittest.TestCase):
-    def test_no_order_last(self):
+    def test_no_order_last(self) -> None:
         """An event with no ordering is placed behind those with an ordering."""
         ev1 = _create_event("!abc:test")
         ev2 = _create_event("!xyz:test", "xyz")
 
         self.assertEqual([ev2, ev1], _order(ev1, ev2))
 
-    def test_order(self):
+    def test_order(self) -> None:
         """The ordering should be used."""
         ev1 = _create_event("!abc:test", "xyz")
         ev2 = _create_event("!xyz:test", "abc")
 
         self.assertEqual([ev2, ev1], _order(ev1, ev2))
 
-    def test_order_origin_server_ts(self):
+    def test_order_origin_server_ts(self) -> None:
         """Origin server  is a tie-breaker for ordering."""
         ev1 = _create_event("!abc:test", origin_server_ts=10)
         ev2 = _create_event("!xyz:test", origin_server_ts=30)
 
         self.assertEqual([ev1, ev2], _order(ev1, ev2))
 
-    def test_order_room_id(self):
+    def test_order_room_id(self) -> None:
         """Room ID is a final tie-breaker for ordering."""
         ev1 = _create_event("!abc:test")
         ev2 = _create_event("!xyz:test")
 
         self.assertEqual([ev1, ev2], _order(ev1, ev2))
 
-    def test_invalid_ordering_type(self):
+    def test_invalid_ordering_type(self) -> None:
         """Invalid orderings are considered the same as missing."""
         ev1 = _create_event("!abc:test", 1)
         ev2 = _create_event("!xyz:test", "xyz")
@@ -97,7 +101,7 @@ class TestSpaceSummarySort(unittest.TestCase):
         ev1 = _create_event("!abc:test", True)
         self.assertEqual([ev2, ev1], _order(ev1, ev2))
 
-    def test_invalid_ordering_value(self):
+    def test_invalid_ordering_value(self) -> None:
         """Invalid orderings are considered the same as missing."""
         ev1 = _create_event("!abc:test", "foo\n")
         ev2 = _create_event("!xyz:test", "xyz")
@@ -115,7 +119,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.hs = hs
         self.handler = self.hs.get_room_summary_handler()
 
@@ -223,7 +227,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
             fed_handler.on_invite_request(fed_hostname, event, RoomVersions.V6)
         )
 
-    def test_simple_space(self):
+    def test_simple_space(self) -> None:
         """Test a simple space with a single room."""
         # The result should have the space and the room in it, along with a link
         # from space -> room.
@@ -234,7 +238,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         )
         self._assert_hierarchy(result, expected)
 
-    def test_large_space(self):
+    def test_large_space(self) -> None:
         """Test a space with a large number of rooms."""
         rooms = [self.room]
         # Make at least 51 rooms that are part of the space.
@@ -260,7 +264,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         result["rooms"] += result2["rooms"]
         self._assert_hierarchy(result, expected)
 
-    def test_visibility(self):
+    def test_visibility(self) -> None:
         """A user not in a space cannot inspect it."""
         user2 = self.register_user("user2", "pass")
         token2 = self.login("user2", "pass")
@@ -380,7 +384,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         self._assert_hierarchy(result2, [(self.space, [self.room])])
 
     def _create_room_with_join_rule(
-        self, join_rule: str, room_version: Optional[str] = None, **extra_content
+        self, join_rule: str, room_version: Optional[str] = None, **extra_content: Any
     ) -> str:
         """Create a room with the given join rule and add it to the space."""
         room_id = self.helper.create_room_as(
@@ -403,7 +407,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         self._add_child(self.space, room_id, self.token)
         return room_id
 
-    def test_filtering(self):
+    def test_filtering(self) -> None:
         """
         Rooms should be properly filtered to only include rooms the user has access to.
         """
@@ -476,7 +480,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         )
         self._assert_hierarchy(result, expected)
 
-    def test_complex_space(self):
+    def test_complex_space(self) -> None:
         """
         Create a "complex" space to see how it handles things like loops and subspaces.
         """
@@ -516,7 +520,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         )
         self._assert_hierarchy(result, expected)
 
-    def test_pagination(self):
+    def test_pagination(self) -> None:
         """Test simple pagination works."""
         room_ids = []
         for i in range(1, 10):
@@ -553,7 +557,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         self._assert_hierarchy(result, expected)
         self.assertNotIn("next_batch", result)
 
-    def test_invalid_pagination_token(self):
+    def test_invalid_pagination_token(self) -> None:
         """An invalid pagination token, or changing other parameters, shoudl be rejected."""
         room_ids = []
         for i in range(1, 10):
@@ -604,7 +608,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
             SynapseError,
         )
 
-    def test_max_depth(self):
+    def test_max_depth(self) -> None:
         """Create a deep tree to test the max depth against."""
         spaces = [self.space]
         rooms = [self.room]
@@ -659,7 +663,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         ]
         self._assert_hierarchy(result, expected)
 
-    def test_unknown_room_version(self):
+    def test_unknown_room_version(self) -> None:
         """
         If a room with an unknown room version is encountered it should not cause
         the entire summary to skip.
@@ -685,7 +689,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
         )
         self._assert_hierarchy(result, expected)
 
-    def test_fed_complex(self):
+    def test_fed_complex(self) -> None:
         """
         Return data over federation and ensure that it is handled properly.
         """
@@ -722,7 +726,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
             "world_readable": True,
         }
 
-        async def summarize_remote_room_hierarchy(_self, room, suggested_only):
+        async def summarize_remote_room_hierarchy(
+            _self: Any, room: Any, suggested_only: bool
+        ) -> Tuple[Optional[_RoomEntry], Dict[str, JsonDict], Set[str]]:
             return requested_room_entry, {subroom: child_room}, set()
 
         # Add a room to the space which is on another server.
@@ -744,7 +750,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
             )
         self._assert_hierarchy(result, expected)
 
-    def test_fed_filtering(self):
+    def test_fed_filtering(self) -> None:
         """
         Rooms returned over federation should be properly filtered to only include
         rooms the user has access to.
@@ -853,7 +859,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
             ],
         )
 
-        async def summarize_remote_room_hierarchy(_self, room, suggested_only):
+        async def summarize_remote_room_hierarchy(
+            _self: Any, room: Any, suggested_only: bool
+        ) -> Tuple[Optional[_RoomEntry], Dict[str, JsonDict], Set[str]]:
             return subspace_room_entry, dict(children_rooms), set()
 
         # Add a room to the space which is on another server.
@@ -892,7 +900,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
             )
         self._assert_hierarchy(result, expected)
 
-    def test_fed_invited(self):
+    def test_fed_invited(self) -> None:
         """
         A room which the user was invited to should be included in the response.
 
@@ -915,7 +923,9 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        async def summarize_remote_room_hierarchy(_self, room, suggested_only):
+        async def summarize_remote_room_hierarchy(
+            _self: Any, room: Any, suggested_only: bool
+        ) -> Tuple[Optional[_RoomEntry], Dict[str, JsonDict], Set[str]]:
             return fed_room_entry, {}, set()
 
         # Add a room to the space which is on another server.
@@ -936,7 +946,7 @@ class SpaceSummaryTestCase(unittest.HomeserverTestCase):
             )
         self._assert_hierarchy(result, expected)
 
-    def test_fed_caching(self):
+    def test_fed_caching(self) -> None:
         """
         Federation `/hierarchy` responses should be cached.
         """
@@ -1023,7 +1033,7 @@ class RoomSummaryTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.hs = hs
         self.handler = self.hs.get_room_summary_handler()
 
@@ -1040,12 +1050,12 @@ class RoomSummaryTestCase(unittest.HomeserverTestCase):
             tok=self.token,
         )
 
-    def test_own_room(self):
+    def test_own_room(self) -> None:
         """Test a simple room created by the requester."""
         result = self.get_success(self.handler.get_room_summary(self.user, self.room))
         self.assertEqual(result.get("room_id"), self.room)
 
-    def test_visibility(self):
+    def test_visibility(self) -> None:
         """A user not in a private room cannot get its summary."""
         user2 = self.register_user("user2", "pass")
         token2 = self.login("user2", "pass")
@@ -1093,7 +1103,7 @@ class RoomSummaryTestCase(unittest.HomeserverTestCase):
         result = self.get_success(self.handler.get_room_summary(user2, self.room))
         self.assertEqual(result.get("room_id"), self.room)
 
-    def test_fed(self):
+    def test_fed(self) -> None:
         """
         Return data over federation and ensure that it is handled properly.
         """
@@ -1105,7 +1115,9 @@ class RoomSummaryTestCase(unittest.HomeserverTestCase):
             {"room_id": fed_room, "world_readable": True},
         )
 
-        async def summarize_remote_room_hierarchy(_self, room, suggested_only):
+        async def summarize_remote_room_hierarchy(
+            _self: Any, room: Any, suggested_only: bool
+        ) -> Tuple[Optional[_RoomEntry], Dict[str, JsonDict], Set[str]]:
             return requested_room_entry, {}, set()
 
         with mock.patch(
diff --git a/tests/handlers/test_saml.py b/tests/handlers/test_saml.py
index a0f84e2940..9b1b8b9f13 100644
--- a/tests/handlers/test_saml.py
+++ b/tests/handlers/test_saml.py
@@ -12,7 +12,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Set, Tuple
 from unittest.mock import Mock
 
 import attr
@@ -20,7 +20,9 @@ import attr
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import RedirectException
+from synapse.module_api import ModuleApi
 from synapse.server import HomeServer
+from synapse.types import JsonDict
 from synapse.util import Clock
 
 from tests.test_utils import simple_async_mock
@@ -29,6 +31,7 @@ from tests.unittest import HomeserverTestCase, override_config
 # Check if we have the dependencies to run the tests.
 try:
     import saml2.config
+    import saml2.response
     from saml2.sigver import SigverError
 
     has_saml2 = True
@@ -56,31 +59,39 @@ class FakeAuthnResponse:
 
 
 class TestMappingProvider:
-    def __init__(self, config, module):
+    def __init__(self, config: None, module: ModuleApi):
         pass
 
     @staticmethod
-    def parse_config(config):
-        return
+    def parse_config(config: JsonDict) -> None:
+        return None
 
     @staticmethod
-    def get_saml_attributes(config):
+    def get_saml_attributes(config: None) -> Tuple[Set[str], Set[str]]:
         return {"uid"}, {"displayName"}
 
-    def get_remote_user_id(self, saml_response, client_redirect_url):
+    def get_remote_user_id(
+        self, saml_response: "saml2.response.AuthnResponse", client_redirect_url: str
+    ) -> str:
         return saml_response.ava["uid"]
 
     def saml_response_to_user_attributes(
-        self, saml_response, failures, client_redirect_url
-    ):
+        self,
+        saml_response: "saml2.response.AuthnResponse",
+        failures: int,
+        client_redirect_url: str,
+    ) -> dict:
         localpart = saml_response.ava["username"] + (str(failures) if failures else "")
         return {"mxid_localpart": localpart, "displayname": None}
 
 
 class TestRedirectMappingProvider(TestMappingProvider):
     def saml_response_to_user_attributes(
-        self, saml_response, failures, client_redirect_url
-    ):
+        self,
+        saml_response: "saml2.response.AuthnResponse",
+        failures: int,
+        client_redirect_url: str,
+    ) -> dict:
         raise RedirectException(b"https://custom-saml-redirect/")
 
 
@@ -347,7 +358,7 @@ class SamlHandlerTestCase(HomeserverTestCase):
         )
 
 
-def _mock_request():
+def _mock_request() -> Mock:
     """Returns a mock which will stand in as a SynapseRequest"""
     mock = Mock(
         spec=[
diff --git a/tests/handlers/test_send_email.py b/tests/handlers/test_send_email.py
index da4bf8b582..8b6e4a40b6 100644
--- a/tests/handlers/test_send_email.py
+++ b/tests/handlers/test_send_email.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 
-from typing import List, Tuple
+from typing import Callable, List, Tuple
 
 from zope.interface import implementer
 
@@ -28,20 +28,27 @@ from tests.unittest import HomeserverTestCase, override_config
 
 @implementer(interfaces.IMessageDelivery)
 class _DummyMessageDelivery:
-    def __init__(self):
+    def __init__(self) -> None:
         # (recipient, message) tuples
         self.messages: List[Tuple[smtp.Address, bytes]] = []
 
-    def receivedHeader(self, helo, origin, recipients):
+    def receivedHeader(
+        self,
+        helo: Tuple[bytes, bytes],
+        origin: smtp.Address,
+        recipients: List[smtp.User],
+    ) -> None:
         return None
 
-    def validateFrom(self, helo, origin):
+    def validateFrom(
+        self, helo: Tuple[bytes, bytes], origin: smtp.Address
+    ) -> smtp.Address:
         return origin
 
-    def record_message(self, recipient: smtp.Address, message: bytes):
+    def record_message(self, recipient: smtp.Address, message: bytes) -> None:
         self.messages.append((recipient, message))
 
-    def validateTo(self, user: smtp.User):
+    def validateTo(self, user: smtp.User) -> Callable[[], interfaces.IMessageSMTP]:
         return lambda: _DummyMessage(self, user)
 
 
@@ -56,20 +63,20 @@ class _DummyMessage:
         self._user = user
         self._buffer: List[bytes] = []
 
-    def lineReceived(self, line):
+    def lineReceived(self, line: bytes) -> None:
         self._buffer.append(line)
 
-    def eomReceived(self):
+    def eomReceived(self) -> "defer.Deferred[bytes]":
         message = b"\n".join(self._buffer) + b"\n"
         self._delivery.record_message(self._user.dest, message)
         return defer.succeed(b"saved")
 
-    def connectionLost(self):
+    def connectionLost(self) -> None:
         pass
 
 
 class SendEmailHandlerTestCase(HomeserverTestCase):
-    def test_send_email(self):
+    def test_send_email(self) -> None:
         """Happy-path test that we can send email to a non-TLS server."""
         h = self.hs.get_send_email_handler()
         d = ensureDeferred(
@@ -119,7 +126,7 @@ class SendEmailHandlerTestCase(HomeserverTestCase):
             },
         }
     )
-    def test_send_email_force_tls(self):
+    def test_send_email_force_tls(self) -> None:
         """Happy-path test that we can send email to an Implicit TLS server."""
         h = self.hs.get_send_email_handler()
         d = ensureDeferred(
diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py
index 05f9ec3c51..f1a50c5bcb 100644
--- a/tests/handlers/test_stats.py
+++ b/tests/handlers/test_stats.py
@@ -12,9 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Any, Dict, List, Optional
+
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
 from synapse.storage.databases.main import stats
+from synapse.util import Clock
 
 from tests import unittest
 
@@ -32,11 +38,11 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         login.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         self.handler = self.hs.get_stats_handler()
 
-    def _add_background_updates(self):
+    def _add_background_updates(self) -> None:
         """
         Add the background updates we need to run.
         """
@@ -63,12 +69,14 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             )
         )
 
-    async def get_all_room_state(self):
+    async def get_all_room_state(self) -> List[Dict[str, Any]]:
         return await self.store.db_pool.simple_select_list(
             "room_stats_state", None, retcols=("name", "topic", "canonical_alias")
         )
 
-    def _get_current_stats(self, stats_type, stat_id):
+    def _get_current_stats(
+        self, stats_type: str, stat_id: str
+    ) -> Optional[Dict[str, Any]]:
         table, id_col = stats.TYPE_TO_TABLE[stats_type]
 
         cols = list(stats.ABSOLUTE_STATS_FIELDS[stats_type])
@@ -82,13 +90,13 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             )
         )
 
-    def _perform_background_initial_update(self):
+    def _perform_background_initial_update(self) -> None:
         # Do the initial population of the stats via the background update
         self._add_background_updates()
 
         self.wait_for_background_updates()
 
-    def test_initial_room(self):
+    def test_initial_room(self) -> None:
         """
         The background updates will build the table from scratch.
         """
@@ -125,7 +133,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.assertEqual(len(r), 1)
         self.assertEqual(r[0]["topic"], "foo")
 
-    def test_create_user(self):
+    def test_create_user(self) -> None:
         """
         When we create a user, it should have statistics already ready.
         """
@@ -134,12 +142,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         u1stats = self._get_current_stats("user", u1)
 
-        self.assertIsNotNone(u1stats)
+        assert u1stats is not None
 
         # not in any rooms by default
         self.assertEqual(u1stats["joined_rooms"], 0)
 
-    def test_create_room(self):
+    def test_create_room(self) -> None:
         """
         When we create a room, it should have statistics already ready.
         """
@@ -153,8 +161,8 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         r2 = self.helper.create_room_as(u1, tok=u1token, is_public=False)
         r2stats = self._get_current_stats("room", r2)
 
-        self.assertIsNotNone(r1stats)
-        self.assertIsNotNone(r2stats)
+        assert r1stats is not None
+        assert r2stats is not None
 
         self.assertEqual(
             r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM
@@ -171,7 +179,9 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.assertEqual(r2stats["invited_members"], 0)
         self.assertEqual(r2stats["banned_members"], 0)
 
-    def test_updating_profile_information_does_not_increase_joined_members_count(self):
+    def test_updating_profile_information_does_not_increase_joined_members_count(
+        self,
+    ) -> None:
         """
         Check that the joined_members count does not increase when a user changes their
         profile information (which is done by sending another join membership event into
@@ -186,6 +196,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         # Get the current room stats
         r1stats_ante = self._get_current_stats("room", r1)
+        assert r1stats_ante is not None
 
         # Send a profile update into the room
         new_profile = {"displayname": "bob"}
@@ -195,6 +206,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         # Get the new room stats
         r1stats_post = self._get_current_stats("room", r1)
+        assert r1stats_post is not None
 
         # Ensure that the user count did not changed
         self.assertEqual(r1stats_post["joined_members"], r1stats_ante["joined_members"])
@@ -202,7 +214,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             r1stats_post["local_users_in_room"], r1stats_ante["local_users_in_room"]
         )
 
-    def test_send_state_event_nonoverwriting(self):
+    def test_send_state_event_nonoverwriting(self) -> None:
         """
         When we send a non-overwriting state event, it increments current_state_events
         """
@@ -218,19 +230,21 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         )
 
         r1stats_ante = self._get_current_stats("room", r1)
+        assert r1stats_ante is not None
 
         self.helper.send_state(
             r1, "cat.hissing", {"value": False}, tok=u1token, state_key="moggy"
         )
 
         r1stats_post = self._get_current_stats("room", r1)
+        assert r1stats_post is not None
 
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
             1,
         )
 
-    def test_join_first_time(self):
+    def test_join_first_time(self) -> None:
         """
         When a user joins a room for the first time, current_state_events and
         joined_members should increase by exactly 1.
@@ -246,10 +260,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         u2token = self.login("u2", "pass")
 
         r1stats_ante = self._get_current_stats("room", r1)
+        assert r1stats_ante is not None
 
         self.helper.join(r1, u2, tok=u2token)
 
         r1stats_post = self._get_current_stats("room", r1)
+        assert r1stats_post is not None
 
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
@@ -259,7 +275,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             r1stats_post["joined_members"] - r1stats_ante["joined_members"], 1
         )
 
-    def test_join_after_leave(self):
+    def test_join_after_leave(self) -> None:
         """
         When a user joins a room after being previously left,
         joined_members should increase by exactly 1.
@@ -280,10 +296,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.helper.leave(r1, u2, tok=u2token)
 
         r1stats_ante = self._get_current_stats("room", r1)
+        assert r1stats_ante is not None
 
         self.helper.join(r1, u2, tok=u2token)
 
         r1stats_post = self._get_current_stats("room", r1)
+        assert r1stats_post is not None
 
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
@@ -296,7 +314,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             r1stats_post["left_members"] - r1stats_ante["left_members"], -1
         )
 
-    def test_invited(self):
+    def test_invited(self) -> None:
         """
         When a user invites another user, current_state_events and
         invited_members should increase by exactly 1.
@@ -311,10 +329,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         u2 = self.register_user("u2", "pass")
 
         r1stats_ante = self._get_current_stats("room", r1)
+        assert r1stats_ante is not None
 
         self.helper.invite(r1, u1, u2, tok=u1token)
 
         r1stats_post = self._get_current_stats("room", r1)
+        assert r1stats_post is not None
 
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
@@ -324,7 +344,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             r1stats_post["invited_members"] - r1stats_ante["invited_members"], +1
         )
 
-    def test_join_after_invite(self):
+    def test_join_after_invite(self) -> None:
         """
         When a user joins a room after being invited and
         joined_members should increase by exactly 1.
@@ -344,10 +364,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.helper.invite(r1, u1, u2, tok=u1token)
 
         r1stats_ante = self._get_current_stats("room", r1)
+        assert r1stats_ante is not None
 
         self.helper.join(r1, u2, tok=u2token)
 
         r1stats_post = self._get_current_stats("room", r1)
+        assert r1stats_post is not None
 
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
@@ -360,7 +382,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             r1stats_post["invited_members"] - r1stats_ante["invited_members"], -1
         )
 
-    def test_left(self):
+    def test_left(self) -> None:
         """
         When a user leaves a room after joining and
         left_members should increase by exactly 1.
@@ -380,10 +402,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.helper.join(r1, u2, tok=u2token)
 
         r1stats_ante = self._get_current_stats("room", r1)
+        assert r1stats_ante is not None
 
         self.helper.leave(r1, u2, tok=u2token)
 
         r1stats_post = self._get_current_stats("room", r1)
+        assert r1stats_post is not None
 
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
@@ -396,7 +420,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             r1stats_post["joined_members"] - r1stats_ante["joined_members"], -1
         )
 
-    def test_banned(self):
+    def test_banned(self) -> None:
         """
         When a user is banned from a room after joining and
         left_members should increase by exactly 1.
@@ -416,10 +440,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.helper.join(r1, u2, tok=u2token)
 
         r1stats_ante = self._get_current_stats("room", r1)
+        assert r1stats_ante is not None
 
         self.helper.change_membership(r1, u1, u2, "ban", tok=u1token)
 
         r1stats_post = self._get_current_stats("room", r1)
+        assert r1stats_post is not None
 
         self.assertEqual(
             r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
@@ -432,7 +458,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
             r1stats_post["joined_members"] - r1stats_ante["joined_members"], -1
         )
 
-    def test_initial_background_update(self):
+    def test_initial_background_update(self) -> None:
         """
         Test that statistics can be generated by the initial background update
         handler.
@@ -462,6 +488,9 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         r1stats = self._get_current_stats("room", r1)
         u1stats = self._get_current_stats("user", u1)
 
+        assert r1stats is not None
+        assert u1stats is not None
+
         self.assertEqual(r1stats["joined_members"], 1)
         self.assertEqual(
             r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM
@@ -469,7 +498,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
 
         self.assertEqual(u1stats["joined_rooms"], 1)
 
-    def test_incomplete_stats(self):
+    def test_incomplete_stats(self) -> None:
         """
         This tests that we track incomplete statistics.
 
@@ -533,8 +562,11 @@ class StatsRoomTests(unittest.HomeserverTestCase):
         self.wait_for_background_updates()
 
         r1stats_complete = self._get_current_stats("room", r1)
+        assert r1stats_complete is not None
         u1stats_complete = self._get_current_stats("user", u1)
+        assert u1stats_complete is not None
         u2stats_complete = self._get_current_stats("user", u2)
+        assert u2stats_complete is not None
 
         # now we make our assertions
 
diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py
index ab5c101eb7..0d9a3de92a 100644
--- a/tests/handlers/test_sync.py
+++ b/tests/handlers/test_sync.py
@@ -14,6 +14,8 @@
 from typing import Optional
 from unittest.mock import MagicMock, Mock, patch
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EventTypes, JoinRules
 from synapse.api.errors import Codes, ResourceLimitError
 from synapse.api.filtering import Filtering
@@ -23,6 +25,7 @@ from synapse.rest import admin
 from synapse.rest.client import knock, login, room
 from synapse.server import HomeServer
 from synapse.types import UserID, create_requester
+from synapse.util import Clock
 
 import tests.unittest
 import tests.utils
@@ -39,7 +42,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase):
         room.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.sync_handler = self.hs.get_sync_handler()
         self.store = self.hs.get_datastores().main
 
@@ -47,7 +50,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase):
         # modify its config instead of the hs'
         self.auth_blocking = self.hs.get_auth_blocking()
 
-    def test_wait_for_sync_for_user_auth_blocking(self):
+    def test_wait_for_sync_for_user_auth_blocking(self) -> None:
         user_id1 = "@user1:test"
         user_id2 = "@user2:test"
         sync_config = generate_sync_config(user_id1)
@@ -82,7 +85,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase):
         )
         self.assertEqual(e.value.errcode, Codes.RESOURCE_LIMIT_EXCEEDED)
 
-    def test_unknown_room_version(self):
+    def test_unknown_room_version(self) -> None:
         """
         A room with an unknown room version should not break sync (and should be excluded).
         """
@@ -186,7 +189,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase):
         self.assertNotIn(invite_room, [r.room_id for r in result.invited])
         self.assertNotIn(knock_room, [r.room_id for r in result.knocked])
 
-    def test_ban_wins_race_with_join(self):
+    def test_ban_wins_race_with_join(self) -> None:
         """Rooms shouldn't appear under "joined" if a join loses a race to a ban.
 
         A complicated edge case. Imagine the following scenario:
-- 
cgit 1.5.1


From 864c3f85b0c420f755a064a3c50a45716db3f8af Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 16 Dec 2022 13:04:54 +0000
Subject: Improve type annotations for the helper methods on a
 `CachedFunction`. (#14685)

---
 changelog.d/14685.misc             | 1 +
 synapse/util/caches/descriptors.py | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14685.misc

(limited to 'synapse')

diff --git a/changelog.d/14685.misc b/changelog.d/14685.misc
new file mode 100644
index 0000000000..3ba2270100
--- /dev/null
+++ b/changelog.d/14685.misc
@@ -0,0 +1 @@
+Improve type annotations for the helper methods on a `CachedFunction`.
\ No newline at end of file
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 72227359b9..81df71a0c5 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -53,9 +53,9 @@ F = TypeVar("F", bound=Callable[..., Any])
 
 
 class CachedFunction(Generic[F]):
-    invalidate: Any = None
-    invalidate_all: Any = None
-    prefill: Any = None
+    invalidate: Callable[[Tuple[Any, ...]], None]
+    invalidate_all: Callable[[], None]
+    prefill: Callable[[Tuple[Any, ...], Any], None]
     cache: Any = None
     num_args: Any = None
 
-- 
cgit 1.5.1


From 3aeca2588b79111a48a6083c88efc4d68a2cea19 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 16 Dec 2022 08:53:28 -0500
Subject: Add missing type hints to tests.config. (#14681)

---
 changelog.d/14681.misc                   |  1 +
 mypy.ini                                 |  4 +--
 synapse/config/cache.py                  |  4 +--
 synapse/util/caches/lrucache.py          |  9 ++---
 tests/config/test___main__.py            |  6 ++--
 tests/config/test_background_update.py   |  4 +--
 tests/config/test_base.py                | 10 +++---
 tests/config/test_cache.py               | 57 ++++++++++++++++----------------
 tests/config/test_database.py            |  2 +-
 tests/config/test_generate.py            |  8 ++---
 tests/config/test_load.py                | 12 +++----
 tests/config/test_ratelimiting.py        |  2 +-
 tests/config/test_registration_config.py |  4 +--
 tests/config/test_room_directory.py      |  4 +--
 tests/config/test_server.py              | 18 +++++-----
 tests/config/test_tls.py                 | 53 +++++++++++++++++------------
 tests/config/test_util.py                |  2 +-
 tests/config/utils.py                    | 11 +++---
 18 files changed, 108 insertions(+), 103 deletions(-)
 create mode 100644 changelog.d/14681.misc

(limited to 'synapse')

diff --git a/changelog.d/14681.misc b/changelog.d/14681.misc
new file mode 100644
index 0000000000..d44571b731
--- /dev/null
+++ b/changelog.d/14681.misc
@@ -0,0 +1 @@
+Add missing type hints.
diff --git a/mypy.ini b/mypy.ini
index 1a37414e58..80fbcdfeab 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -36,8 +36,6 @@ exclude = (?x)
    |tests/api/test_ratelimiting.py
    |tests/app/test_openid_listener.py
    |tests/appservice/test_scheduler.py
-   |tests/config/test_cache.py
-   |tests/config/test_tls.py
    |tests/crypto/test_keyring.py
    |tests/events/test_presence_router.py
    |tests/events/test_utils.py
@@ -89,7 +87,7 @@ disallow_untyped_defs = False
 [mypy-tests.*]
 disallow_untyped_defs = False
 
-[mypy-tests.config.test_api]
+[mypy-tests.config.*]
 disallow_untyped_defs = True
 
 [mypy-tests.federation.transport.test_client]
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
index eb4194a5a9..015b2a138e 100644
--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -16,7 +16,7 @@ import logging
 import os
 import re
 import threading
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Dict, Mapping, Optional
 
 import attr
 
@@ -94,7 +94,7 @@ def add_resizable_cache(
 
 class CacheConfig(Config):
     section = "caches"
-    _environ = os.environ
+    _environ: Mapping[str, str] = os.environ
 
     event_cache_size: int
     cache_factors: Dict[str, float]
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index dcf0eac3bf..452d5d04c1 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -788,26 +788,21 @@ class LruCache(Generic[KT, VT]):
     def __contains__(self, key: KT) -> bool:
         return self.contains(key)
 
-    def set_cache_factor(self, factor: float) -> bool:
+    def set_cache_factor(self, factor: float) -> None:
         """
         Set the cache factor for this individual cache.
 
         This will trigger a resize if it changes, which may require evicting
         items from the cache.
-
-        Returns:
-            Whether the cache changed size or not.
         """
         if not self.apply_cache_factor_from_config:
-            return False
+            return
 
         new_size = int(self._original_max_size * factor)
         if new_size != self.max_size:
             self.max_size = new_size
             if self._on_resize:
                 self._on_resize()
-            return True
-        return False
 
     def __del__(self) -> None:
         # We're about to be deleted, so we make sure to clear up all the nodes
diff --git a/tests/config/test___main__.py b/tests/config/test___main__.py
index b1c73d3612..cb5d4b05c3 100644
--- a/tests/config/test___main__.py
+++ b/tests/config/test___main__.py
@@ -17,15 +17,15 @@ from tests.config.utils import ConfigFileTestCase
 
 
 class ConfigMainFileTestCase(ConfigFileTestCase):
-    def test_executes_without_an_action(self):
+    def test_executes_without_an_action(self) -> None:
         self.generate_config()
         main(["", "-c", self.config_file])
 
-    def test_read__error_if_key_not_found(self):
+    def test_read__error_if_key_not_found(self) -> None:
         self.generate_config()
         with self.assertRaises(SystemExit):
             main(["", "read", "foo.bar.hello", "-c", self.config_file])
 
-    def test_read__passes_if_key_found(self):
+    def test_read__passes_if_key_found(self) -> None:
         self.generate_config()
         main(["", "read", "server.server_name", "-c", self.config_file])
diff --git a/tests/config/test_background_update.py b/tests/config/test_background_update.py
index 0c32c1ca29..e4bad2ba6e 100644
--- a/tests/config/test_background_update.py
+++ b/tests/config/test_background_update.py
@@ -22,7 +22,7 @@ class BackgroundUpdateConfigTestCase(HomeserverTestCase):
     # Tests that the default values in the config are correctly loaded. Note that the default
     # values are loaded when the corresponding config options are commented out, which is why there isn't
     # a config specified here.
-    def test_default_configuration(self):
+    def test_default_configuration(self) -> None:
         background_updater = BackgroundUpdater(
             self.hs, self.hs.get_datastores().main.db_pool
         )
@@ -46,7 +46,7 @@ class BackgroundUpdateConfigTestCase(HomeserverTestCase):
             """
         )
     )
-    def test_custom_configuration(self):
+    def test_custom_configuration(self) -> None:
         background_updater = BackgroundUpdater(
             self.hs, self.hs.get_datastores().main.db_pool
         )
diff --git a/tests/config/test_base.py b/tests/config/test_base.py
index 6a52f862f4..3fbfe6c1da 100644
--- a/tests/config/test_base.py
+++ b/tests/config/test_base.py
@@ -24,13 +24,13 @@ from tests import unittest
 
 
 class BaseConfigTestCase(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         # The root object needs a server property with a public_baseurl.
         root = Mock()
         root.server.public_baseurl = "http://test"
         self.config = Config(root)
 
-    def test_loading_missing_templates(self):
+    def test_loading_missing_templates(self) -> None:
         # Use a temporary directory that exists on the system, but that isn't likely to
         # contain template files
         with tempfile.TemporaryDirectory() as tmp_dir:
@@ -50,7 +50,7 @@ class BaseConfigTestCase(unittest.TestCase):
             "Template file did not contain our test string",
         )
 
-    def test_loading_custom_templates(self):
+    def test_loading_custom_templates(self) -> None:
         # Use a temporary directory that exists on the system
         with tempfile.TemporaryDirectory() as tmp_dir:
             # Create a temporary bogus template file
@@ -79,7 +79,7 @@ class BaseConfigTestCase(unittest.TestCase):
             "Template file did not contain our test string",
         )
 
-    def test_multiple_custom_template_directories(self):
+    def test_multiple_custom_template_directories(self) -> None:
         """Tests that directories are searched in the right order if multiple custom
         template directories are provided.
         """
@@ -137,7 +137,7 @@ class BaseConfigTestCase(unittest.TestCase):
         for td in tempdirs:
             td.cleanup()
 
-    def test_loading_template_from_nonexistent_custom_directory(self):
+    def test_loading_template_from_nonexistent_custom_directory(self) -> None:
         with self.assertRaises(ConfigError):
             self.config.read_templates(
                 ["some_filename.html"], ("a_nonexistent_directory",)
diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py
index d2b3c299e3..96f66af328 100644
--- a/tests/config/test_cache.py
+++ b/tests/config/test_cache.py
@@ -13,26 +13,27 @@
 # limitations under the License.
 
 from synapse.config.cache import CacheConfig, add_resizable_cache
+from synapse.types import JsonDict
 from synapse.util.caches.lrucache import LruCache
 
 from tests.unittest import TestCase
 
 
 class CacheConfigTests(TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         # Reset caches before each test since there's global state involved.
         self.config = CacheConfig()
         self.config.reset()
 
-    def tearDown(self):
+    def tearDown(self) -> None:
         # Also reset the caches after each test to leave state pristine.
         self.config.reset()
 
-    def test_individual_caches_from_environ(self):
+    def test_individual_caches_from_environ(self) -> None:
         """
         Individual cache factors will be loaded from the environment.
         """
-        config = {}
+        config: JsonDict = {}
         self.config._environ = {
             "SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2",
             "SYNAPSE_NOT_CACHE": "BLAH",
@@ -42,15 +43,15 @@ class CacheConfigTests(TestCase):
 
         self.assertEqual(dict(self.config.cache_factors), {"something_or_other": 2.0})
 
-    def test_config_overrides_environ(self):
+    def test_config_overrides_environ(self) -> None:
         """
         Individual cache factors defined in the environment will take precedence
         over those in the config.
         """
-        config = {"caches": {"per_cache_factors": {"foo": 2, "bar": 3}}}
+        config: JsonDict = {"caches": {"per_cache_factors": {"foo": 2, "bar": 3}}}
         self.config._environ = {
             "SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2",
-            "SYNAPSE_CACHE_FACTOR_FOO": 1,
+            "SYNAPSE_CACHE_FACTOR_FOO": "1",
         }
         self.config.read_config(config, config_dir_path="", data_dir_path="")
         self.config.resize_all_caches()
@@ -60,104 +61,104 @@ class CacheConfigTests(TestCase):
             {"foo": 1.0, "bar": 3.0, "something_or_other": 2.0},
         )
 
-    def test_individual_instantiated_before_config_load(self):
+    def test_individual_instantiated_before_config_load(self) -> None:
         """
         If a cache is instantiated before the config is read, it will be given
         the default cache size in the interim, and then resized once the config
         is loaded.
         """
-        cache = LruCache(100)
+        cache: LruCache = LruCache(100)
 
         add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
         self.assertEqual(cache.max_size, 50)
 
-        config = {"caches": {"per_cache_factors": {"foo": 3}}}
+        config: JsonDict = {"caches": {"per_cache_factors": {"foo": 3}}}
         self.config.read_config(config)
         self.config.resize_all_caches()
 
         self.assertEqual(cache.max_size, 300)
 
-    def test_individual_instantiated_after_config_load(self):
+    def test_individual_instantiated_after_config_load(self) -> None:
         """
         If a cache is instantiated after the config is read, it will be
         immediately resized to the correct size given the per_cache_factor if
         there is one.
         """
-        config = {"caches": {"per_cache_factors": {"foo": 2}}}
+        config: JsonDict = {"caches": {"per_cache_factors": {"foo": 2}}}
         self.config.read_config(config, config_dir_path="", data_dir_path="")
         self.config.resize_all_caches()
 
-        cache = LruCache(100)
+        cache: LruCache = LruCache(100)
         add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
         self.assertEqual(cache.max_size, 200)
 
-    def test_global_instantiated_before_config_load(self):
+    def test_global_instantiated_before_config_load(self) -> None:
         """
         If a cache is instantiated before the config is read, it will be given
         the default cache size in the interim, and then resized to the new
         default cache size once the config is loaded.
         """
-        cache = LruCache(100)
+        cache: LruCache = LruCache(100)
         add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
         self.assertEqual(cache.max_size, 50)
 
-        config = {"caches": {"global_factor": 4}}
+        config: JsonDict = {"caches": {"global_factor": 4}}
         self.config.read_config(config, config_dir_path="", data_dir_path="")
         self.config.resize_all_caches()
 
         self.assertEqual(cache.max_size, 400)
 
-    def test_global_instantiated_after_config_load(self):
+    def test_global_instantiated_after_config_load(self) -> None:
         """
         If a cache is instantiated after the config is read, it will be
         immediately resized to the correct size given the global factor if there
         is no per-cache factor.
         """
-        config = {"caches": {"global_factor": 1.5}}
+        config: JsonDict = {"caches": {"global_factor": 1.5}}
         self.config.read_config(config, config_dir_path="", data_dir_path="")
         self.config.resize_all_caches()
 
-        cache = LruCache(100)
+        cache: LruCache = LruCache(100)
         add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
         self.assertEqual(cache.max_size, 150)
 
-    def test_cache_with_asterisk_in_name(self):
+    def test_cache_with_asterisk_in_name(self) -> None:
         """Some caches have asterisks in their name, test that they are set correctly."""
 
-        config = {
+        config: JsonDict = {
             "caches": {
                 "per_cache_factors": {"*cache_a*": 5, "cache_b": 6, "cache_c": 2}
             }
         }
         self.config._environ = {
             "SYNAPSE_CACHE_FACTOR_CACHE_A": "2",
-            "SYNAPSE_CACHE_FACTOR_CACHE_B": 3,
+            "SYNAPSE_CACHE_FACTOR_CACHE_B": "3",
         }
         self.config.read_config(config, config_dir_path="", data_dir_path="")
         self.config.resize_all_caches()
 
-        cache_a = LruCache(100)
+        cache_a: LruCache = LruCache(100)
         add_resizable_cache("*cache_a*", cache_resize_callback=cache_a.set_cache_factor)
         self.assertEqual(cache_a.max_size, 200)
 
-        cache_b = LruCache(100)
+        cache_b: LruCache = LruCache(100)
         add_resizable_cache("*Cache_b*", cache_resize_callback=cache_b.set_cache_factor)
         self.assertEqual(cache_b.max_size, 300)
 
-        cache_c = LruCache(100)
+        cache_c: LruCache = LruCache(100)
         add_resizable_cache("*cache_c*", cache_resize_callback=cache_c.set_cache_factor)
         self.assertEqual(cache_c.max_size, 200)
 
-    def test_apply_cache_factor_from_config(self):
+    def test_apply_cache_factor_from_config(self) -> None:
         """Caches can disable applying cache factor updates, mainly used by
         event cache size.
         """
 
-        config = {"caches": {"event_cache_size": "10k"}}
+        config: JsonDict = {"caches": {"event_cache_size": "10k"}}
         self.config.read_config(config, config_dir_path="", data_dir_path="")
         self.config.resize_all_caches()
 
-        cache = LruCache(
+        cache: LruCache = LruCache(
             max_size=self.config.event_cache_size,
             apply_cache_factor_from_config=False,
         )
diff --git a/tests/config/test_database.py b/tests/config/test_database.py
index 9eca10bbe9..240277bcc6 100644
--- a/tests/config/test_database.py
+++ b/tests/config/test_database.py
@@ -20,7 +20,7 @@ from tests import unittest
 
 
 class DatabaseConfigTestCase(unittest.TestCase):
-    def test_database_configured_correctly(self):
+    def test_database_configured_correctly(self) -> None:
         conf = yaml.safe_load(
             DatabaseConfig().generate_config_section(data_dir_path="/data_dir_path")
         )
diff --git a/tests/config/test_generate.py b/tests/config/test_generate.py
index fdfbb0e38e..3a02366932 100644
--- a/tests/config/test_generate.py
+++ b/tests/config/test_generate.py
@@ -25,14 +25,14 @@ from tests import unittest
 
 
 class ConfigGenerationTestCase(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.dir = tempfile.mkdtemp()
         self.file = os.path.join(self.dir, "homeserver.yaml")
 
-    def tearDown(self):
+    def tearDown(self) -> None:
         shutil.rmtree(self.dir)
 
-    def test_generate_config_generates_files(self):
+    def test_generate_config_generates_files(self) -> None:
         with redirect_stdout(StringIO()):
             HomeServerConfig.load_or_generate_config(
                 "",
@@ -56,7 +56,7 @@ class ConfigGenerationTestCase(unittest.TestCase):
             os.path.join(os.getcwd(), "homeserver.log"),
         )
 
-    def assert_log_filename_is(self, log_config_file, expected):
+    def assert_log_filename_is(self, log_config_file: str, expected: str) -> None:
         with open(log_config_file) as f:
             config = f.read()
             # find the 'filename' line
diff --git a/tests/config/test_load.py b/tests/config/test_load.py
index 69a4e9413b..fcbe79cc7a 100644
--- a/tests/config/test_load.py
+++ b/tests/config/test_load.py
@@ -21,14 +21,14 @@ from tests.config.utils import ConfigFileTestCase
 
 
 class ConfigLoadingFileTestCase(ConfigFileTestCase):
-    def test_load_fails_if_server_name_missing(self):
+    def test_load_fails_if_server_name_missing(self) -> None:
         self.generate_config_and_remove_lines_containing("server_name")
         with self.assertRaises(ConfigError):
             HomeServerConfig.load_config("", ["-c", self.config_file])
         with self.assertRaises(ConfigError):
             HomeServerConfig.load_or_generate_config("", ["-c", self.config_file])
 
-    def test_generates_and_loads_macaroon_secret_key(self):
+    def test_generates_and_loads_macaroon_secret_key(self) -> None:
         self.generate_config()
 
         with open(self.config_file) as f:
@@ -58,7 +58,7 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase):
                 "was: %r" % (config2.key.macaroon_secret_key,)
             )
 
-    def test_load_succeeds_if_macaroon_secret_key_missing(self):
+    def test_load_succeeds_if_macaroon_secret_key_missing(self) -> None:
         self.generate_config_and_remove_lines_containing("macaroon")
         config1 = HomeServerConfig.load_config("", ["-c", self.config_file])
         config2 = HomeServerConfig.load_config("", ["-c", self.config_file])
@@ -73,7 +73,7 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase):
             config1.key.macaroon_secret_key, config3.key.macaroon_secret_key
         )
 
-    def test_disable_registration(self):
+    def test_disable_registration(self) -> None:
         self.generate_config()
         self.add_lines_to_config(
             ["enable_registration: true", "disable_registration: true"]
@@ -93,7 +93,7 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase):
         assert config3 is not None
         self.assertTrue(config3.registration.enable_registration)
 
-    def test_stats_enabled(self):
+    def test_stats_enabled(self) -> None:
         self.generate_config_and_remove_lines_containing("enable_metrics")
         self.add_lines_to_config(["enable_metrics: true"])
 
@@ -101,7 +101,7 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase):
         config = HomeServerConfig.load_config("", ["-c", self.config_file])
         self.assertFalse(config.metrics.metrics_flags.known_servers)
 
-    def test_depreciated_identity_server_flag_throws_error(self):
+    def test_depreciated_identity_server_flag_throws_error(self) -> None:
         self.generate_config()
         # Needed to ensure that actual key/value pair added below don't end up on a line with a comment
         self.add_lines_to_config([" "])
diff --git a/tests/config/test_ratelimiting.py b/tests/config/test_ratelimiting.py
index 1b63e1adfd..f12147eaa0 100644
--- a/tests/config/test_ratelimiting.py
+++ b/tests/config/test_ratelimiting.py
@@ -18,7 +18,7 @@ from tests.utils import default_config
 
 
 class RatelimitConfigTestCase(TestCase):
-    def test_parse_rc_federation(self):
+    def test_parse_rc_federation(self) -> None:
         config_dict = default_config("test")
         config_dict["rc_federation"] = {
             "window_size": 20000,
diff --git a/tests/config/test_registration_config.py b/tests/config/test_registration_config.py
index 33d7b70e32..f6869d7f06 100644
--- a/tests/config/test_registration_config.py
+++ b/tests/config/test_registration_config.py
@@ -21,7 +21,7 @@ from tests.utils import default_config
 
 
 class RegistrationConfigTestCase(ConfigFileTestCase):
-    def test_session_lifetime_must_not_be_exceeded_by_smaller_lifetimes(self):
+    def test_session_lifetime_must_not_be_exceeded_by_smaller_lifetimes(self) -> None:
         """
         session_lifetime should logically be larger than, or at least as large as,
         all the different token lifetimes.
@@ -91,7 +91,7 @@ class RegistrationConfigTestCase(ConfigFileTestCase):
             "",
         )
 
-    def test_refuse_to_start_if_open_registration_and_no_verification(self):
+    def test_refuse_to_start_if_open_registration_and_no_verification(self) -> None:
         self.generate_config()
         self.add_lines_to_config(
             [
diff --git a/tests/config/test_room_directory.py b/tests/config/test_room_directory.py
index db745815ef..297ab37792 100644
--- a/tests/config/test_room_directory.py
+++ b/tests/config/test_room_directory.py
@@ -20,7 +20,7 @@ from tests import unittest
 
 
 class RoomDirectoryConfigTestCase(unittest.TestCase):
-    def test_alias_creation_acl(self):
+    def test_alias_creation_acl(self) -> None:
         config = yaml.safe_load(
             """
         alias_creation_rules:
@@ -78,7 +78,7 @@ class RoomDirectoryConfigTestCase(unittest.TestCase):
             )
         )
 
-    def test_room_publish_acl(self):
+    def test_room_publish_acl(self) -> None:
         config = yaml.safe_load(
             """
         alias_creation_rules: []
diff --git a/tests/config/test_server.py b/tests/config/test_server.py
index 1f27a54701..41a3fb0b6d 100644
--- a/tests/config/test_server.py
+++ b/tests/config/test_server.py
@@ -21,7 +21,7 @@ from tests import unittest
 
 
 class ServerConfigTestCase(unittest.TestCase):
-    def test_is_threepid_reserved(self):
+    def test_is_threepid_reserved(self) -> None:
         user1 = {"medium": "email", "address": "user1@example.com"}
         user2 = {"medium": "email", "address": "user2@example.com"}
         user3 = {"medium": "email", "address": "user3@example.com"}
@@ -32,7 +32,7 @@ class ServerConfigTestCase(unittest.TestCase):
         self.assertFalse(is_threepid_reserved(config, user3))
         self.assertFalse(is_threepid_reserved(config, user1_msisdn))
 
-    def test_unsecure_listener_no_listeners_open_private_ports_false(self):
+    def test_unsecure_listener_no_listeners_open_private_ports_false(self) -> None:
         conf = yaml.safe_load(
             ServerConfig().generate_config_section(
                 "CONFDIR", "/data_dir_path", "che.org", False, None
@@ -52,7 +52,7 @@ class ServerConfigTestCase(unittest.TestCase):
 
         self.assertEqual(conf["listeners"], expected_listeners)
 
-    def test_unsecure_listener_no_listeners_open_private_ports_true(self):
+    def test_unsecure_listener_no_listeners_open_private_ports_true(self) -> None:
         conf = yaml.safe_load(
             ServerConfig().generate_config_section(
                 "CONFDIR", "/data_dir_path", "che.org", True, None
@@ -71,7 +71,7 @@ class ServerConfigTestCase(unittest.TestCase):
 
         self.assertEqual(conf["listeners"], expected_listeners)
 
-    def test_listeners_set_correctly_open_private_ports_false(self):
+    def test_listeners_set_correctly_open_private_ports_false(self) -> None:
         listeners = [
             {
                 "port": 8448,
@@ -95,7 +95,7 @@ class ServerConfigTestCase(unittest.TestCase):
 
         self.assertEqual(conf["listeners"], listeners)
 
-    def test_listeners_set_correctly_open_private_ports_true(self):
+    def test_listeners_set_correctly_open_private_ports_true(self) -> None:
         listeners = [
             {
                 "port": 8448,
@@ -131,14 +131,14 @@ class ServerConfigTestCase(unittest.TestCase):
 
 
 class GenerateIpSetTestCase(unittest.TestCase):
-    def test_empty(self):
+    def test_empty(self) -> None:
         ip_set = generate_ip_set(())
         self.assertFalse(ip_set)
 
         ip_set = generate_ip_set((), ())
         self.assertFalse(ip_set)
 
-    def test_generate(self):
+    def test_generate(self) -> None:
         """Check adding IPv4 and IPv6 addresses."""
         # IPv4 address
         ip_set = generate_ip_set(("1.2.3.4",))
@@ -160,7 +160,7 @@ class GenerateIpSetTestCase(unittest.TestCase):
         ip_set = generate_ip_set(("1.2.3.4", "::1.2.3.4"))
         self.assertEqual(len(ip_set.iter_cidrs()), 4)
 
-    def test_extra(self):
+    def test_extra(self) -> None:
         """Extra IP addresses are treated the same."""
         ip_set = generate_ip_set((), ("1.2.3.4",))
         self.assertEqual(len(ip_set.iter_cidrs()), 4)
@@ -172,7 +172,7 @@ class GenerateIpSetTestCase(unittest.TestCase):
         ip_set = generate_ip_set(("1.2.3.4",), ("1.2.3.4",))
         self.assertEqual(len(ip_set.iter_cidrs()), 4)
 
-    def test_bad_value(self):
+    def test_bad_value(self) -> None:
         """An error should be raised if a bad value is passed in."""
         with self.assertRaises(ConfigError):
             generate_ip_set(("not-an-ip",))
diff --git a/tests/config/test_tls.py b/tests/config/test_tls.py
index 9ba5781573..7510fc4643 100644
--- a/tests/config/test_tls.py
+++ b/tests/config/test_tls.py
@@ -13,13 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import cast
+
 import idna
 
 from OpenSSL import SSL
 
 from synapse.config._base import Config, RootConfig
+from synapse.config.homeserver import HomeServerConfig
 from synapse.config.tls import ConfigError, TlsConfig
-from synapse.crypto.context_factory import FederationPolicyForHTTPS
+from synapse.crypto.context_factory import (
+    FederationPolicyForHTTPS,
+    SSLClientConnectionCreator,
+)
+from synapse.types import JsonDict
 
 from tests.unittest import TestCase
 
@@ -27,7 +34,7 @@ from tests.unittest import TestCase
 class FakeServer(Config):
     section = "server"
 
-    def has_tls_listener(self):
+    def has_tls_listener(self) -> bool:
         return False
 
 
@@ -36,21 +43,21 @@ class TestConfig(RootConfig):
 
 
 class TLSConfigTests(TestCase):
-    def test_tls_client_minimum_default(self):
+    def test_tls_client_minimum_default(self) -> None:
         """
         The default client TLS version is 1.0.
         """
-        config = {}
+        config: JsonDict = {}
         t = TestConfig()
         t.tls.read_config(config, config_dir_path="", data_dir_path="")
 
         self.assertEqual(t.tls.federation_client_minimum_tls_version, "1")
 
-    def test_tls_client_minimum_set(self):
+    def test_tls_client_minimum_set(self) -> None:
         """
         The default client TLS version can be set to 1.0, 1.1, and 1.2.
         """
-        config = {"federation_client_minimum_tls_version": 1}
+        config: JsonDict = {"federation_client_minimum_tls_version": 1}
         t = TestConfig()
         t.tls.read_config(config, config_dir_path="", data_dir_path="")
         self.assertEqual(t.tls.federation_client_minimum_tls_version, "1")
@@ -76,7 +83,7 @@ class TLSConfigTests(TestCase):
         t.tls.read_config(config, config_dir_path="", data_dir_path="")
         self.assertEqual(t.tls.federation_client_minimum_tls_version, "1.2")
 
-    def test_tls_client_minimum_1_point_3_missing(self):
+    def test_tls_client_minimum_1_point_3_missing(self) -> None:
         """
         If TLS 1.3 support is missing and it's configured, it will raise a
         ConfigError.
@@ -88,7 +95,7 @@ class TLSConfigTests(TestCase):
             self.addCleanup(setattr, SSL, "SSL.OP_NO_TLSv1_3", OP_NO_TLSv1_3)
             assert not hasattr(SSL, "OP_NO_TLSv1_3")
 
-        config = {"federation_client_minimum_tls_version": 1.3}
+        config: JsonDict = {"federation_client_minimum_tls_version": 1.3}
         t = TestConfig()
         with self.assertRaises(ConfigError) as e:
             t.tls.read_config(config, config_dir_path="", data_dir_path="")
@@ -100,7 +107,7 @@ class TLSConfigTests(TestCase):
             ),
         )
 
-    def test_tls_client_minimum_1_point_3_exists(self):
+    def test_tls_client_minimum_1_point_3_exists(self) -> None:
         """
         If TLS 1.3 support exists and it's configured, it will be settable.
         """
@@ -110,20 +117,20 @@ class TLSConfigTests(TestCase):
             self.addCleanup(lambda: delattr(SSL, "OP_NO_TLSv1_3"))
             assert hasattr(SSL, "OP_NO_TLSv1_3")
 
-        config = {"federation_client_minimum_tls_version": 1.3}
+        config: JsonDict = {"federation_client_minimum_tls_version": 1.3}
         t = TestConfig()
         t.tls.read_config(config, config_dir_path="", data_dir_path="")
         self.assertEqual(t.tls.federation_client_minimum_tls_version, "1.3")
 
-    def test_tls_client_minimum_set_passed_through_1_2(self):
+    def test_tls_client_minimum_set_passed_through_1_2(self) -> None:
         """
         The configured TLS version is correctly configured by the ContextFactory.
         """
-        config = {"federation_client_minimum_tls_version": 1.2}
+        config: JsonDict = {"federation_client_minimum_tls_version": 1.2}
         t = TestConfig()
         t.tls.read_config(config, config_dir_path="", data_dir_path="")
 
-        cf = FederationPolicyForHTTPS(t)
+        cf = FederationPolicyForHTTPS(cast(HomeServerConfig, t))
         options = _get_ssl_context_options(cf._verify_ssl_context)
 
         # The context has had NO_TLSv1_1 and NO_TLSv1_0 set, but not NO_TLSv1_2
@@ -131,15 +138,15 @@ class TLSConfigTests(TestCase):
         self.assertNotEqual(options & SSL.OP_NO_TLSv1_1, 0)
         self.assertEqual(options & SSL.OP_NO_TLSv1_2, 0)
 
-    def test_tls_client_minimum_set_passed_through_1_0(self):
+    def test_tls_client_minimum_set_passed_through_1_0(self) -> None:
         """
         The configured TLS version is correctly configured by the ContextFactory.
         """
-        config = {"federation_client_minimum_tls_version": 1}
+        config: JsonDict = {"federation_client_minimum_tls_version": 1}
         t = TestConfig()
         t.tls.read_config(config, config_dir_path="", data_dir_path="")
 
-        cf = FederationPolicyForHTTPS(t)
+        cf = FederationPolicyForHTTPS(cast(HomeServerConfig, t))
         options = _get_ssl_context_options(cf._verify_ssl_context)
 
         # The context has not had any of the NO_TLS set.
@@ -147,11 +154,11 @@ class TLSConfigTests(TestCase):
         self.assertEqual(options & SSL.OP_NO_TLSv1_1, 0)
         self.assertEqual(options & SSL.OP_NO_TLSv1_2, 0)
 
-    def test_whitelist_idna_failure(self):
+    def test_whitelist_idna_failure(self) -> None:
         """
         The federation certificate whitelist will not allow IDNA domain names.
         """
-        config = {
+        config: JsonDict = {
             "federation_certificate_verification_whitelist": [
                 "example.com",
                 "*.ドメイン.テスト",
@@ -163,11 +170,11 @@ class TLSConfigTests(TestCase):
         )
         self.assertIn("IDNA domain names", str(e))
 
-    def test_whitelist_idna_result(self):
+    def test_whitelist_idna_result(self) -> None:
         """
         The federation certificate whitelist will match on IDNA encoded names.
         """
-        config = {
+        config: JsonDict = {
             "federation_certificate_verification_whitelist": [
                 "example.com",
                 "*.xn--eckwd4c7c.xn--zckzah",
@@ -176,14 +183,16 @@ class TLSConfigTests(TestCase):
         t = TestConfig()
         t.tls.read_config(config, config_dir_path="", data_dir_path="")
 
-        cf = FederationPolicyForHTTPS(t)
+        cf = FederationPolicyForHTTPS(cast(HomeServerConfig, t))
 
         # Not in the whitelist
         opts = cf.get_options(b"notexample.com")
+        assert isinstance(opts, SSLClientConnectionCreator)
         self.assertTrue(opts._verifier._verify_certs)
 
         # Caught by the wildcard
         opts = cf.get_options(idna.encode("テスト.ドメイン.テスト"))
+        assert isinstance(opts, SSLClientConnectionCreator)
         self.assertFalse(opts._verifier._verify_certs)
 
 
@@ -191,4 +200,4 @@ def _get_ssl_context_options(ssl_context: SSL.Context) -> int:
     """get the options bits from an openssl context object"""
     # the OpenSSL.SSL.Context wrapper doesn't expose get_options, so we have to
     # use the low-level interface
-    return SSL._lib.SSL_CTX_get_options(ssl_context._context)
+    return SSL._lib.SSL_CTX_get_options(ssl_context._context)  # type: ignore[attr-defined]
diff --git a/tests/config/test_util.py b/tests/config/test_util.py
index 3d4929daac..7073654832 100644
--- a/tests/config/test_util.py
+++ b/tests/config/test_util.py
@@ -21,7 +21,7 @@ from tests.unittest import TestCase
 class ValidateConfigTestCase(TestCase):
     """Test cases for synapse.config._util.validate_config"""
 
-    def test_bad_object_in_array(self):
+    def test_bad_object_in_array(self) -> None:
         """malformed objects within an array should be validated correctly"""
 
         # consider a structure:
diff --git a/tests/config/utils.py b/tests/config/utils.py
index 94c18a052b..4c0e8a064a 100644
--- a/tests/config/utils.py
+++ b/tests/config/utils.py
@@ -17,19 +17,20 @@ import tempfile
 import unittest
 from contextlib import redirect_stdout
 from io import StringIO
+from typing import List
 
 from synapse.config.homeserver import HomeServerConfig
 
 
 class ConfigFileTestCase(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.dir = tempfile.mkdtemp()
         self.config_file = os.path.join(self.dir, "homeserver.yaml")
 
-    def tearDown(self):
+    def tearDown(self) -> None:
         shutil.rmtree(self.dir)
 
-    def generate_config(self):
+    def generate_config(self) -> None:
         with redirect_stdout(StringIO()):
             HomeServerConfig.load_or_generate_config(
                 "",
@@ -43,7 +44,7 @@ class ConfigFileTestCase(unittest.TestCase):
                 ],
             )
 
-    def generate_config_and_remove_lines_containing(self, needle):
+    def generate_config_and_remove_lines_containing(self, needle: str) -> None:
         self.generate_config()
 
         with open(self.config_file) as f:
@@ -52,7 +53,7 @@ class ConfigFileTestCase(unittest.TestCase):
         with open(self.config_file, "w") as f:
             f.write("".join(contents))
 
-    def add_lines_to_config(self, lines):
+    def add_lines_to_config(self, lines: List[str]) -> None:
         with open(self.config_file, "a") as f:
             for line in lines:
                 f.write(line + "\n")
-- 
cgit 1.5.1


From 2888d7ec83b33b3ce848d9219c921ffe0b88ffbf Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 19 Dec 2022 14:57:51 +0000
Subject: Faster remote room joins: invalidate caches and unblock requests when
 receiving un-partial-stated event notifications over replication.
 [rei:frrj/streams/unpsr] (#14546)

---
 changelog.d/14546.misc                          |  1 +
 synapse/replication/tcp/client.py               | 14 ++++++++++++-
 synapse/storage/databases/main/events_worker.py | 27 ++++++++++++++-----------
 synapse/storage/databases/main/state.py         | 18 ++++++++++++++++-
 4 files changed, 46 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/14546.misc

(limited to 'synapse')

diff --git a/changelog.d/14546.misc b/changelog.d/14546.misc
new file mode 100644
index 0000000000..60b6761a51
--- /dev/null
+++ b/changelog.d/14546.misc
@@ -0,0 +1 @@
+Faster remote room joins: stream the un-partial-stating of events over replication.
\ No newline at end of file
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index b4dad47b45..658d89210d 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -36,6 +36,7 @@ from synapse.replication.tcp.streams import (
     TagAccountDataStream,
     ToDeviceStream,
     TypingStream,
+    UnPartialStatedEventStream,
     UnPartialStatedRoomStream,
 )
 from synapse.replication.tcp.streams.events import (
@@ -43,7 +44,10 @@ from synapse.replication.tcp.streams.events import (
     EventsStreamEventRow,
     EventsStreamRow,
 )
-from synapse.replication.tcp.streams.partial_state import UnPartialStatedRoomStreamRow
+from synapse.replication.tcp.streams.partial_state import (
+    UnPartialStatedEventStreamRow,
+    UnPartialStatedRoomStreamRow,
+)
 from synapse.types import PersistedEventPosition, ReadReceipt, StreamKeyType, UserID
 from synapse.util.async_helpers import Linearizer, timeout_deferred
 from synapse.util.metrics import Measure
@@ -247,6 +251,14 @@ class ReplicationDataHandler:
                 self._state_storage_controller.notify_room_un_partial_stated(
                     row.room_id
                 )
+        elif stream_name == UnPartialStatedEventStream.NAME:
+            for row in rows:
+                assert isinstance(row, UnPartialStatedEventStreamRow)
+
+                # Wake up any tasks waiting for the event to be un-partial-stated.
+                self._state_storage_controller.notify_event_un_partial_stated(
+                    row.event_id
+                )
 
         await self._presence_handler.process_replication_rows(
             stream_name, instance_name, token, rows
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index e19b16064b..761b15a815 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -59,8 +59,9 @@ from synapse.metrics.background_process_metrics import (
     run_as_background_process,
     wrap_as_background_process,
 )
-from synapse.replication.tcp.streams import BackfillStream
+from synapse.replication.tcp.streams import BackfillStream, UnPartialStatedEventStream
 from synapse.replication.tcp.streams.events import EventsStream
+from synapse.replication.tcp.streams.partial_state import UnPartialStatedEventStreamRow
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
@@ -391,6 +392,16 @@ class EventsWorkerStore(SQLBaseStore):
             self._stream_id_gen.advance(instance_name, token)
         elif stream_name == BackfillStream.NAME:
             self._backfill_id_gen.advance(instance_name, -token)
+        elif stream_name == UnPartialStatedEventStream.NAME:
+            for row in rows:
+                assert isinstance(row, UnPartialStatedEventStreamRow)
+
+                self.is_partial_state_event.invalidate((row.event_id,))
+
+                if row.rejection_status_changed:
+                    # If the partial-stated event became rejected or unrejected
+                    # when it wasn't before, we need to invalidate this cache.
+                    self._invalidate_local_get_event_cache(row.event_id)
 
         super().process_replication_rows(stream_name, instance_name, token, rows)
 
@@ -2380,6 +2391,9 @@ class EventsWorkerStore(SQLBaseStore):
 
         This can happen, for example, when resyncing state during a faster join.
 
+        It is the caller's responsibility to ensure that other workers are
+        sent a notification so that they call `_invalidate_local_get_event_cache()`.
+
         Args:
             txn:
             event_id: ID of event to update
@@ -2418,14 +2432,3 @@ class EventsWorkerStore(SQLBaseStore):
         )
 
         self.invalidate_get_event_cache_after_txn(txn, event_id)
-
-        # TODO(faster_joins): invalidate the cache on workers. Ideally we'd just
-        #   call '_send_invalidation_to_replication', but we actually need the other
-        #   end to call _invalidate_local_get_event_cache() rather than (just)
-        #   _get_event_cache.invalidate().
-        #
-        #   One solution might be to (somehow) get the workers to call
-        #   _invalidate_caches_for_event() (though that will invalidate more than
-        #   strictly necessary).
-        #
-        #   https://github.com/matrix-org/synapse/issues/12994
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index f855903c39..f32cbb2dec 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import collections.abc
 import logging
-from typing import TYPE_CHECKING, Collection, Dict, Iterable, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, Optional, Set, Tuple
 
 import attr
 
@@ -24,6 +24,8 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.logging.opentracing import trace
+from synapse.replication.tcp.streams import UnPartialStatedEventStream
+from synapse.replication.tcp.streams.partial_state import UnPartialStatedEventStreamRow
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -82,6 +84,20 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
         super().__init__(database, db_conn, hs)
         self._instance_name: str = hs.get_instance_name()
 
+    def process_replication_rows(
+        self,
+        stream_name: str,
+        instance_name: str,
+        token: int,
+        rows: Iterable[Any],
+    ) -> None:
+        if stream_name == UnPartialStatedEventStream.NAME:
+            for row in rows:
+                assert isinstance(row, UnPartialStatedEventStreamRow)
+                self._get_state_group_for_event.invalidate((row.event_id,))
+
+        super().process_replication_rows(stream_name, instance_name, token, rows)
+
     async def get_room_version(self, room_id: str) -> RoomVersion:
         """Get the room_version of a given room
         Raises:
-- 
cgit 1.5.1


From 7010a3d0151b88b3a9a7451201eaf9c5bbe48d64 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 21 Dec 2022 13:05:21 -0500
Subject: Switch to ruff instead of flake8. (#14633)

ruff is a flake8-compatible Python linter written in Rust.
It supports the flake8 plugins that we use and is significantly
faster in testing.
---
 .flake8                               |  18 -----
 .github/workflows/tests.yml           |   2 +-
 changelog.d/14633.misc                |   1 +
 poetry.lock                           | 119 +++++++++-------------------------
 pyproject.toml                        |  46 +++++++++++--
 scripts-dev/lint.sh                   |   5 +-
 stubs/frozendict.pyi                  |   2 +
 stubs/icu.pyi                         |   2 +
 stubs/sortedcontainers/sorteddict.pyi |   2 +
 stubs/sortedcontainers/sortedlist.pyi |   2 +
 stubs/sortedcontainers/sortedset.pyi  |   2 +
 synapse/config/_base.pyi              |   2 +
 12 files changed, 87 insertions(+), 116 deletions(-)
 delete mode 100644 .flake8
 create mode 100644 changelog.d/14633.misc

(limited to 'synapse')

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 4c6a4d5843..0000000000
--- a/.flake8
+++ /dev/null
@@ -1,18 +0,0 @@
-# TODO: incorporate this into pyproject.toml if flake8 supports it in the future.
-# See https://github.com/PyCQA/flake8/issues/234
-[flake8]
-# see https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes
-# for error codes. The ones we ignore are:
-#  W503: line break before binary operator
-#  W504: line break after binary operator
-#  E203: whitespace before ':' (which is contrary to pep8?)
-#  E731: do not assign a lambda expression, use a def
-#  E501: Line too long (black enforces this for us)
-#
-# flake8-bugbear runs extra checks. Its error codes are described at
-# https://github.com/PyCQA/flake8-bugbear#list-of-warnings
-#  B019: Use of functools.lru_cache or functools.cache on methods can lead to memory leaks
-#  B023: Functions defined inside a loop must not use variables redefined in the loop
-#  B024: Abstract base class with no abstract method.
-
-ignore=W503,W504,E203,E731,E501,B019,B023,B024
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f07655d982..5a0c0a0d65 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -53,7 +53,7 @@ jobs:
       - run: scripts-dev/check_schema_delta.py --force-colors
 
   lint:
-    uses: "matrix-org/backend-meta/.github/workflows/python-poetry-ci.yml@v1"
+    uses: "matrix-org/backend-meta/.github/workflows/python-poetry-ci.yml@v2"
     with:
       typechecking-extras: "all"
 
diff --git a/changelog.d/14633.misc b/changelog.d/14633.misc
new file mode 100644
index 0000000000..def187b12b
--- /dev/null
+++ b/changelog.d/14633.misc
@@ -0,0 +1 @@
+Use [ruff](https://github.com/charliermarsh/ruff/) instead of flake8.
diff --git a/poetry.lock b/poetry.lock
index 9a9a141a14..c83cad3e1a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -244,47 +244,6 @@ python-versions = ">=3.7"
 [package.extras]
 dev = ["Sphinx", "coverage", "flake8", "lxml", "memory-profiler", "mypy (==0.910)", "tox", "xmlschema (>=1.8.0)"]
 
-[[package]]
-name = "flake8"
-version = "5.0.4"
-description = "the modular source code checker: pep8 pyflakes and co"
-category = "dev"
-optional = false
-python-versions = ">=3.6.1"
-
-[package.dependencies]
-importlib-metadata = {version = ">=1.1.0,<4.3", markers = "python_version < \"3.8\""}
-mccabe = ">=0.7.0,<0.8.0"
-pycodestyle = ">=2.9.0,<2.10.0"
-pyflakes = ">=2.5.0,<2.6.0"
-
-[[package]]
-name = "flake8-bugbear"
-version = "22.12.6"
-description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle."
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-
-[package.dependencies]
-attrs = ">=19.2.0"
-flake8 = ">=3.0.0"
-
-[package.extras]
-dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit", "tox"]
-
-[[package]]
-name = "flake8-comprehensions"
-version = "3.10.1"
-description = "A flake8 plugin to help you write better list/set/dict comprehensions."
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-
-[package.dependencies]
-flake8 = ">=3.0,<3.2.0 || >3.2.0"
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
-
 [[package]]
 name = "frozendict"
 version = "2.3.4"
@@ -553,14 +512,6 @@ Twisted = ">=15.1.0"
 [package.extras]
 dev = ["black (==22.3.0)", "flake8 (==4.0.1)", "isort (==5.9.3)", "ldaptor", "matrix-synapse", "mypy (==0.910)", "tox", "types-setuptools"]
 
-[[package]]
-name = "mccabe"
-version = "0.7.0"
-description = "McCabe checker, plugin for flake8"
-category = "dev"
-optional = false
-python-versions = ">=3.6"
-
 [[package]]
 name = "msgpack"
 version = "1.0.4"
@@ -770,14 +721,6 @@ python-versions = "*"
 [package.dependencies]
 pyasn1 = ">=0.4.6,<0.5.0"
 
-[[package]]
-name = "pycodestyle"
-version = "2.9.1"
-description = "Python style guide checker"
-category = "dev"
-optional = false
-python-versions = ">=3.6"
-
 [[package]]
 name = "pycparser"
 version = "2.21"
@@ -801,14 +744,6 @@ typing-extensions = ">=4.1.0"
 dotenv = ["python-dotenv (>=0.10.4)"]
 email = ["email-validator (>=1.0.3)"]
 
-[[package]]
-name = "pyflakes"
-version = "2.5.0"
-description = "passive checker of Python programs"
-category = "dev"
-optional = false
-python-versions = ">=3.6"
-
 [[package]]
 name = "pygithub"
 version = "1.57"
@@ -1044,6 +979,14 @@ typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9
 [package.extras]
 jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"]
 
+[[package]]
+name = "ruff"
+version = "0.0.189"
+description = "An extremely fast Python linter, written in Rust."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
 [[package]]
 name = "secretstorage"
 version = "3.3.1"
@@ -1635,7 +1578,7 @@ user-search = ["pyicu"]
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "f20007013f33bc35a01e412c48adc62a936030f3074e06286674c5ad7f44d300"
+content-hash = "d20b6aea682a74e6a161080bb459e73160b8eb79526f5d17a525639ac3fe3e9e"
 
 [metadata.files]
 attrs = [
@@ -1827,18 +1770,6 @@ elementpath = [
     {file = "elementpath-2.5.0-py3-none-any.whl", hash = "sha256:2a432775e37a19e4362443078130a7dbfc457d7d093cd421c03958d9034cc08b"},
     {file = "elementpath-2.5.0.tar.gz", hash = "sha256:3a27aaf3399929fccda013899cb76d3ff111734abf4281e5f9d3721ba0b9ffa3"},
 ]
-flake8 = [
-    {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"},
-    {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"},
-]
-flake8-bugbear = [
-    {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"},
-    {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"},
-]
-flake8-comprehensions = [
-    {file = "flake8-comprehensions-3.10.1.tar.gz", hash = "sha256:412052ac4a947f36b891143430fef4859705af11b2572fbb689f90d372cf26ab"},
-    {file = "flake8_comprehensions-3.10.1-py3-none-any.whl", hash = "sha256:d763de3c74bc18a79c039a7ec732e0a1985b0c79309ceb51e56401ad0a2cd44e"},
-]
 frozendict = [
     {file = "frozendict-2.3.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4a3b32d47282ae0098b9239a6d53ec539da720258bd762d62191b46f2f87c5fc"},
     {file = "frozendict-2.3.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84c9887179a245a66a50f52afa08d4d92ae0f269839fab82285c70a0fa0dd782"},
@@ -2046,6 +1977,7 @@ lxml = [
     {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca989b91cf3a3ba28930a9fc1e9aeafc2a395448641df1f387a2d394638943b0"},
     {file = "lxml-4.9.2-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:822068f85e12a6e292803e112ab876bc03ed1f03dddb80154c395f891ca6b31e"},
     {file = "lxml-4.9.2-cp35-cp35m-win32.whl", hash = "sha256:be7292c55101e22f2a3d4d8913944cbea71eea90792bf914add27454a13905df"},
+    {file = "lxml-4.9.2-cp35-cp35m-win_amd64.whl", hash = "sha256:998c7c41910666d2976928c38ea96a70d1aa43be6fe502f21a651e17483a43c5"},
     {file = "lxml-4.9.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:b26a29f0b7fc6f0897f043ca366142d2b609dc60756ee6e4e90b5f762c6adc53"},
     {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:ab323679b8b3030000f2be63e22cdeea5b47ee0abd2d6a1dc0c8103ddaa56cd7"},
     {file = "lxml-4.9.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:689bb688a1db722485e4610a503e3e9210dcc20c520b45ac8f7533c837be76fe"},
@@ -2055,6 +1987,7 @@ lxml = [
     {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:58bfa3aa19ca4c0f28c5dde0ff56c520fbac6f0daf4fac66ed4c8d2fb7f22e74"},
     {file = "lxml-4.9.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc718cd47b765e790eecb74d044cc8d37d58562f6c314ee9484df26276d36a38"},
     {file = "lxml-4.9.2-cp36-cp36m-win32.whl", hash = "sha256:d5bf6545cd27aaa8a13033ce56354ed9e25ab0e4ac3b5392b763d8d04b08e0c5"},
+    {file = "lxml-4.9.2-cp36-cp36m-win_amd64.whl", hash = "sha256:3ab9fa9d6dc2a7f29d7affdf3edebf6ece6fb28a6d80b14c3b2fb9d39b9322c3"},
     {file = "lxml-4.9.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:05ca3f6abf5cf78fe053da9b1166e062ade3fa5d4f92b4ed688127ea7d7b1d03"},
     {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:a5da296eb617d18e497bcf0a5c528f5d3b18dadb3619fbdadf4ed2356ef8d941"},
     {file = "lxml-4.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:04876580c050a8c5341d706dd464ff04fd597095cc8c023252566a8826505726"},
@@ -2147,10 +2080,6 @@ matrix-synapse-ldap3 = [
     {file = "matrix-synapse-ldap3-0.2.2.tar.gz", hash = "sha256:b388d95693486eef69adaefd0fd9e84463d52fe17b0214a00efcaa669b73cb74"},
     {file = "matrix_synapse_ldap3-0.2.2-py3-none-any.whl", hash = "sha256:66ee4c85d7952c6c27fd04c09cdfdf4847b8e8b7d6a7ada6ba1100013bda060f"},
 ]
-mccabe = [
-    {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
-    {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
-]
 msgpack = [
     {file = "msgpack-1.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4ab251d229d10498e9a2f3b1e68ef64cb393394ec477e3370c457f9430ce9250"},
     {file = "msgpack-1.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:112b0f93202d7c0fef0b7810d465fde23c746a2d482e1e2de2aafd2ce1492c88"},
@@ -2370,10 +2299,6 @@ pyasn1-modules = [
     {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"},
     {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"},
 ]
-pycodestyle = [
-    {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"},
-    {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"},
-]
 pycparser = [
     {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
     {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
@@ -2416,10 +2341,6 @@ pydantic = [
     {file = "pydantic-1.10.2-py3-none-any.whl", hash = "sha256:1b6ee725bd6e83ec78b1aa32c5b1fa67a3a65badddde3976bca5fe4568f27709"},
     {file = "pydantic-1.10.2.tar.gz", hash = "sha256:91b8e218852ef6007c2b98cd861601c6a09f1aa32bbbb74fab5b1c33d4a1e410"},
 ]
-pyflakes = [
-    {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"},
-    {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"},
-]
 pygithub = [
     {file = "PyGithub-1.57-py3-none-any.whl", hash = "sha256:5822febeac2391f1306c55a99af2bc8f86c8bf82ded000030cd02c18f31b731f"},
     {file = "PyGithub-1.57.tar.gz", hash = "sha256:c273f252b278fb81f1769505cc6921bdb6791e1cebd6ac850cc97dad13c31ff3"},
@@ -2560,6 +2481,24 @@ rich = [
     {file = "rich-12.6.0-py3-none-any.whl", hash = "sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e"},
     {file = "rich-12.6.0.tar.gz", hash = "sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0"},
 ]
+ruff = [
+    {file = "ruff-0.0.189-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:07c947b42d3c5efc6761214acdb6b71a49b833ad9fb9b320454244a6fe01f212"},
+    {file = "ruff-0.0.189-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:76e6161d021bde5738bf9d123ae445cb3a22fa60f14958ce64961d8af16141a0"},
+    {file = "ruff-0.0.189-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c27f51e5b48cd483459cdd1c95a6bd989adcf7653ccc440ca437f4993fe4b812"},
+    {file = "ruff-0.0.189-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e89f488a16ce2b21d940fc6271ed161affec788955f7b41761a9693a92e994bb"},
+    {file = "ruff-0.0.189-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fee593d8d470811c316ff2eb0124ac74668a3d637ab3fb237aa3fa8561fb89aa"},
+    {file = "ruff-0.0.189-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:bc3a73683a5b3b4b7bf951bbd4aa7d79b993c8c2e608a68de120c342ebe510f2"},
+    {file = "ruff-0.0.189-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e5d73877558651f48c86d958afe0f662b6c3639990c230a6b9d82ac6093484db"},
+    {file = "ruff-0.0.189-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d1e6e9813f59ba54e7cb6f28c1f2a9a756197f6e321bd68519afe57f8522fce"},
+    {file = "ruff-0.0.189-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d177090cf03004b14814b0aad530758f5186d391250afb737570edd55beabc6"},
+    {file = "ruff-0.0.189-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:48de3253856a0a85f9b53a0ca1982946c7fd343c796cdc76ece0ae359d5b71b5"},
+    {file = "ruff-0.0.189-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e935bb5a213030de312ad00df477f38c78ac97af58b0e6a4ae5762705a5113da"},
+    {file = "ruff-0.0.189-py3-none-musllinux_1_2_i686.whl", hash = "sha256:bdb8173d6efff96e0cc5fe38f5fc4daa0d28fb11553482b9989d372fdafc7708"},
+    {file = "ruff-0.0.189-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:14486fd8632bc4c7f926137a9c6a8c45993ff6667ddb7a88192c369c3afd86e9"},
+    {file = "ruff-0.0.189-py3-none-win32.whl", hash = "sha256:e281080e2ed04f01275b3df5baa0afe2802ab145349298e24700cdd09c0afddc"},
+    {file = "ruff-0.0.189-py3-none-win_amd64.whl", hash = "sha256:c552ff0b0587a5e13f935131d2a19782c0baf8b59175cf3160a76545fbdbdd76"},
+    {file = "ruff-0.0.189.tar.gz", hash = "sha256:90a3031461ed83686ff78f96e58d28cdee835110c51bdfa0968a2d5892610c71"},
+]
 secretstorage = [
     {file = "SecretStorage-3.3.1-py3-none-any.whl", hash = "sha256:422d82c36172d88d6a0ed5afdec956514b189ddbfb72fefab0c8a1cee4eaf71f"},
     {file = "SecretStorage-3.3.1.tar.gz", hash = "sha256:fd666c51a6bf200643495a04abb261f83229dcb6fd8472ec393df7ffc8b6f195"},
diff --git a/pyproject.toml b/pyproject.toml
index 3281441534..37b9ab3a77 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,46 @@ target-version = ['py37', 'py38', 'py39', 'py310']
 # https://black.readthedocs.io/en/stable/usage_and_configuration/file_collection_and_discovery.html#gitignore
 # Use `extend-exclude` if you want to exclude something in addition to this.
 
+[tool.ruff]
+line-length = 88
+
+# See https://github.com/charliermarsh/ruff/#pycodestyle
+# for error codes. The ones we ignore are:
+#  E731: do not assign a lambda expression, use a def
+#  E501: Line too long (black enforces this for us)
+#
+# See https://github.com/charliermarsh/ruff/#pyflakes
+#  F401: unused import
+#  F811: Redefinition of unused
+#  F821: Undefined name
+#
+# flake8-bugbear compatible checks. Its error codes are described at
+# https://github.com/charliermarsh/ruff/#flake8-bugbear
+#  B019: Use of functools.lru_cache or functools.cache on methods can lead to memory leaks
+#  B023: Functions defined inside a loop must not use variables redefined in the loop
+#  B024: Abstract base class with no abstract method.
+ignore = [
+    "B019",
+    "B023",
+    "B024",
+    "E501",
+    "E731",
+    "F401",
+    "F811",
+    "F821",
+]
+select = [
+    # pycodestyle checks.
+    "E",
+    "W",
+    # pyflakes checks.
+    "F",
+    # flake8-bugbear checks.
+    "B0",
+    # flake8-comprehensions checks.
+    "C4",
+]
+
 [tool.isort]
 line_length = 88
 sections = ["FUTURE", "STDLIB", "THIRDPARTY", "TWISTED", "FIRSTPARTY", "TESTS", "LOCALFOLDER"]
@@ -274,12 +314,10 @@ all = [
 ]
 
 [tool.poetry.dev-dependencies]
-## We pin black so that our tests don't start failing on new releases.
+# We pin black so that our tests don't start failing on new releases.
 isort = ">=5.10.1"
 black = ">=22.3.0"
-flake8-comprehensions = "*"
-flake8-bugbear = ">=21.3.2"
-flake8 = "*"
+ruff = "0.0.189"
 
 # Typechecking
 mypy = "*"
diff --git a/scripts-dev/lint.sh b/scripts-dev/lint.sh
index bf900645b1..f6b81013c3 100755
--- a/scripts-dev/lint.sh
+++ b/scripts-dev/lint.sh
@@ -1,9 +1,8 @@
 #!/usr/bin/env bash
 #
 # Runs linting scripts over the local Synapse checkout
-# isort - sorts import statements
 # black - opinionated code formatter
-# flake8 - lints and finds mistakes
+# ruff - lints and finds mistakes
 
 set -e
 
@@ -105,6 +104,6 @@ set -x
 isort "${files[@]}"
 python3 -m black "${files[@]}"
 ./scripts-dev/config-lint.sh
-flake8 "${files[@]}"
+ruff "${files[@]}"
 ./scripts-dev/check_pydantic_models.py lint
 mypy
diff --git a/stubs/frozendict.pyi b/stubs/frozendict.pyi
index 24c6f3af77..196dee4461 100644
--- a/stubs/frozendict.pyi
+++ b/stubs/frozendict.pyi
@@ -14,6 +14,8 @@
 
 # Stub for frozendict.
 
+from __future__ import annotations
+
 from typing import Any, Hashable, Iterable, Iterator, Mapping, Tuple, TypeVar, overload
 
 _KT = TypeVar("_KT", bound=Hashable)  # Key type.
diff --git a/stubs/icu.pyi b/stubs/icu.pyi
index efeda7938a..7736df8a92 100644
--- a/stubs/icu.pyi
+++ b/stubs/icu.pyi
@@ -14,6 +14,8 @@
 
 # Stub for PyICU.
 
+from __future__ import annotations
+
 class Locale:
     @staticmethod
     def getDefault() -> Locale: ...
diff --git a/stubs/sortedcontainers/sorteddict.pyi b/stubs/sortedcontainers/sorteddict.pyi
index 7c399ab38d..81f581b034 100644
--- a/stubs/sortedcontainers/sorteddict.pyi
+++ b/stubs/sortedcontainers/sorteddict.pyi
@@ -2,6 +2,8 @@
 # https://github.com/grantjenks/python-sortedcontainers/blob/eea42df1f7bad2792e8da77335ff888f04b9e5ae/sortedcontainers/sorteddict.pyi
 # (from https://github.com/grantjenks/python-sortedcontainers/pull/107)
 
+from __future__ import annotations
+
 from typing import (
     Any,
     Callable,
diff --git a/stubs/sortedcontainers/sortedlist.pyi b/stubs/sortedcontainers/sortedlist.pyi
index 403897e391..cd4c969849 100644
--- a/stubs/sortedcontainers/sortedlist.pyi
+++ b/stubs/sortedcontainers/sortedlist.pyi
@@ -2,6 +2,8 @@
 # https://github.com/grantjenks/python-sortedcontainers/blob/a419ffbd2b1c935b09f11f0971696e537fd0c510/sortedcontainers/sortedlist.pyi
 # (from https://github.com/grantjenks/python-sortedcontainers/pull/107)
 
+from __future__ import annotations
+
 from typing import (
     Any,
     Callable,
diff --git a/stubs/sortedcontainers/sortedset.pyi b/stubs/sortedcontainers/sortedset.pyi
index 43c860f422..d761c438f7 100644
--- a/stubs/sortedcontainers/sortedset.pyi
+++ b/stubs/sortedcontainers/sortedset.pyi
@@ -2,6 +2,8 @@
 # https://github.com/grantjenks/python-sortedcontainers/blob/d0a225d7fd0fb4c54532b8798af3cbeebf97e2d5/sortedcontainers/sortedset.pyi
 # (from https://github.com/grantjenks/python-sortedcontainers/pull/107)
 
+from __future__ import annotations
+
 from typing import (
     AbstractSet,
     Any,
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index 01ea2b4dab..bd265de536 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import argparse
 from typing import (
     Any,
-- 
cgit 1.5.1


From 5c9be9c76021ac54f425f10e8f935532d3197de5 Mon Sep 17 00:00:00 2001
From: Jeyachandran Rathnam <jai.rathnem@gmail.com>
Date: Thu, 22 Dec 2022 13:26:37 -0500
Subject: Check sqlite database file exists before porting. (#14692)

To avoid creating an empty SQLite file if the given path
is incorrect.
---
 changelog.d/14692.misc              | 1 +
 synapse/_scripts/synapse_port_db.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14692.misc

(limited to 'synapse')

diff --git a/changelog.d/14692.misc b/changelog.d/14692.misc
new file mode 100644
index 0000000000..0edac253b7
--- /dev/null
+++ b/changelog.d/14692.misc
@@ -0,0 +1 @@
+Check that the SQLite database file exists before porting to PostgreSQL.
\ No newline at end of file
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index d850e54e17..c463b60b26 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -1307,7 +1307,7 @@ def main() -> None:
     sqlite_config = {
         "name": "sqlite3",
         "args": {
-            "database": args.sqlite_database,
+            "database": "file:{}?mode=rw".format(args.sqlite_database),
             "cp_min": 1,
             "cp_max": 1,
             "check_same_thread": False,
-- 
cgit 1.5.1


From a52822d39c866b4d5e6d2a0176f29ae49bf3f8e9 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Fri, 23 Dec 2022 14:04:50 +0000
Subject: Log to-device msgids when we return them over /sync (#14724)

---
 changelog.d/14724.misc   |  1 +
 synapse/handlers/sync.py | 20 +++++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/14724.misc

(limited to 'synapse')

diff --git a/changelog.d/14724.misc b/changelog.d/14724.misc
new file mode 100644
index 0000000000..270e5ed188
--- /dev/null
+++ b/changelog.d/14724.misc
@@ -0,0 +1 @@
+If debug logging is enabled, log the `msgid`s of any to-device messages that are returned over `/sync`.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 7d6a653747..4fa480262b 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -37,6 +37,7 @@ from synapse.api.presence import UserPresenceState
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase
 from synapse.handlers.relations import BundledAggregations
+from synapse.logging import issue9533_logger
 from synapse.logging.context import current_context
 from synapse.logging.opentracing import (
     SynapseTags,
@@ -1623,13 +1624,18 @@ class SyncHandler:
                     }
                 )
 
-            logger.debug(
-                "Returning %d to-device messages between %d and %d (current token: %d)",
-                len(messages),
-                since_stream_id,
-                stream_id,
-                now_token.to_device_key,
-            )
+            if messages and issue9533_logger.isEnabledFor(logging.DEBUG):
+                issue9533_logger.debug(
+                    "Returning to-device messages with stream_ids (%d, %d]; now: %d;"
+                    " msgids: %s",
+                    since_stream_id,
+                    stream_id,
+                    now_token.to_device_key,
+                    [
+                        message["content"].get(EventContentFields.TO_DEVICE_MSGID)
+                        for message in messages
+                    ],
+                )
             sync_result_builder.now_token = now_token.copy_and_replace(
                 StreamKeyType.TO_DEVICE, stream_id
             )
-- 
cgit 1.5.1


From 3854d0f94947ddd5a9ee98198af8d7ae839962c9 Mon Sep 17 00:00:00 2001
From: Brendan Abolivier <babolivier@matrix.org>
Date: Wed, 28 Dec 2022 14:48:21 +0100
Subject: Add a `cached` helper to the module API (#14663)

---
 changelog.d/14663.feature      |  1 +
 synapse/module_api/__init__.py | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14663.feature

(limited to 'synapse')

diff --git a/changelog.d/14663.feature b/changelog.d/14663.feature
new file mode 100644
index 0000000000..b03f3ee54e
--- /dev/null
+++ b/changelog.d/14663.feature
@@ -0,0 +1 @@
+Add a `cached` function to `synapse.module_api` that returns a decorator to cache return values of functions.
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 0092a03c59..6f4a934b05 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -18,6 +18,7 @@ from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
+    Collection,
     Dict,
     Generator,
     Iterable,
@@ -126,7 +127,7 @@ from synapse.types import (
 from synapse.types.state import StateFilter
 from synapse.util import Clock
 from synapse.util.async_helpers import maybe_awaitable
-from synapse.util.caches.descriptors import CachedFunction, cached
+from synapse.util.caches.descriptors import CachedFunction, cached as _cached
 from synapse.util.frozenutils import freeze
 
 if TYPE_CHECKING:
@@ -136,6 +137,7 @@ if TYPE_CHECKING:
 
 T = TypeVar("T")
 P = ParamSpec("P")
+F = TypeVar("F", bound=Callable[..., Any])
 
 """
 This package defines the 'stable' API which can be used by extension modules which
@@ -185,6 +187,42 @@ class UserIpAndAgent:
     last_seen: int
 
 
+def cached(
+    *,
+    max_entries: int = 1000,
+    num_args: Optional[int] = None,
+    uncached_args: Optional[Collection[str]] = None,
+) -> Callable[[F], CachedFunction[F]]:
+    """Returns a decorator that applies a memoizing cache around the function. This
+    decorator behaves similarly to functools.lru_cache.
+
+    Example:
+
+        @cached()
+        def foo('a', 'b'):
+            ...
+
+    Added in Synapse v1.74.0.
+
+    Args:
+        max_entries: The maximum number of entries in the cache. If the cache is full
+            and a new entry is added, the least recently accessed entry will be evicted
+            from the cache.
+        num_args: The number of positional arguments (excluding `self`) to use as cache
+            keys. Defaults to all named args of the function.
+        uncached_args: A list of argument names to not use as the cache key. (`self` is
+            always ignored.) Cannot be used with num_args.
+
+    Returns:
+        A decorator that applies a memoizing cache around the function.
+    """
+    return _cached(
+        max_entries=max_entries,
+        num_args=num_args,
+        uncached_args=uncached_args,
+    )
+
+
 class ModuleApi:
     """A proxy object that gets passed to various plugin modules so they
     can register new users etc if necessary.
-- 
cgit 1.5.1


From 044fa1a1de3c954f247a98c0ce8f734c675a5efb Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 29 Dec 2022 12:18:06 -0500
Subject: Actually use the picture_claim as configured in OIDC config. (#14751)

Previously it was only using the default value ("picture") when
fetching the picture from the user info.
---
 changelog.d/14751.bugfix | 1 +
 synapse/handlers/oidc.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14751.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14751.bugfix b/changelog.d/14751.bugfix
new file mode 100644
index 0000000000..56ef852288
--- /dev/null
+++ b/changelog.d/14751.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.73.0 where the `picture_claim` configured under `oidc_providers` was unused (the default value of `"picture"` was used instead).
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index 03de6a4ba6..23fb00c9c9 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -1615,7 +1615,7 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
         if email:
             emails.append(email)
 
-        picture = userinfo.get("picture")
+        picture = userinfo.get(self._config.picture_claim)
 
         return UserAttributeDict(
             localpart=localpart,
-- 
cgit 1.5.1


From c4456114e1a5471bb61cb45605e782263dc8233c Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Sun, 1 Jan 2023 03:40:46 +0000
Subject: Add experimental support for MSC3391: deleting account data (#14714)

---
 changelog.d/14714.feature                          |   1 +
 .../complement/conf/workers-shared-extra.yaml.j2   |   2 +
 scripts-dev/complement.sh                          |   2 +-
 synapse/config/experimental.py                     |   3 +
 synapse/handlers/account_data.py                   | 111 ++++++++++-
 synapse/replication/http/account_data.py           |  92 ++++++++-
 synapse/rest/client/account_data.py                | 115 +++++++++++
 synapse/storage/database.py                        |  33 +++-
 synapse/storage/databases/main/account_data.py     | 219 +++++++++++++++++++--
 9 files changed, 547 insertions(+), 31 deletions(-)
 create mode 100644 changelog.d/14714.feature

(limited to 'synapse')

diff --git a/changelog.d/14714.feature b/changelog.d/14714.feature
new file mode 100644
index 0000000000..5f3a20b7a7
--- /dev/null
+++ b/changelog.d/14714.feature
@@ -0,0 +1 @@
+Add experimental support for [MSC3391](https://github.com/matrix-org/matrix-spec-proposals/pull/3391) (removing account data).
\ No newline at end of file
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index ca640c343b..cb839fed07 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -102,6 +102,8 @@ experimental_features:
   {% endif %}
   # Filtering /messages by relation type.
   msc3874_enabled: true
+  # Enable removing account data support
+  msc3391_enabled: true
 
 server_notices:
   system_mxid_localpart: _server
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 8741ba3e34..51d1bac618 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -190,7 +190,7 @@ fi
 
 extra_test_args=()
 
-test_tags="synapse_blacklist,msc3787,msc3874"
+test_tags="synapse_blacklist,msc3787,msc3874,msc3391"
 
 # All environment variables starting with PASS_ will be shared.
 # (The prefix is stripped off before reaching the container.)
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 573fa0386f..0f3870bfe1 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -136,3 +136,6 @@ class ExperimentalConfig(Config):
             # Enable room version (and thus applicable push rules from MSC3931/3932)
             version_id = RoomVersions.MSC1767v10.identifier
             KNOWN_ROOM_VERSIONS[version_id] = RoomVersions.MSC1767v10
+
+        # MSC3391: Removing account data.
+        self.msc3391_enabled = experimental.get("msc3391_enabled", False)
diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py
index fc21d58001..aba7315cf7 100644
--- a/synapse/handlers/account_data.py
+++ b/synapse/handlers/account_data.py
@@ -17,10 +17,12 @@ import random
 from typing import TYPE_CHECKING, Awaitable, Callable, Collection, List, Optional, Tuple
 
 from synapse.replication.http.account_data import (
+    ReplicationAddRoomAccountDataRestServlet,
     ReplicationAddTagRestServlet,
+    ReplicationAddUserAccountDataRestServlet,
+    ReplicationRemoveRoomAccountDataRestServlet,
     ReplicationRemoveTagRestServlet,
-    ReplicationRoomAccountDataRestServlet,
-    ReplicationUserAccountDataRestServlet,
+    ReplicationRemoveUserAccountDataRestServlet,
 )
 from synapse.streams import EventSource
 from synapse.types import JsonDict, StreamKeyType, UserID
@@ -41,8 +43,18 @@ class AccountDataHandler:
         self._instance_name = hs.get_instance_name()
         self._notifier = hs.get_notifier()
 
-        self._user_data_client = ReplicationUserAccountDataRestServlet.make_client(hs)
-        self._room_data_client = ReplicationRoomAccountDataRestServlet.make_client(hs)
+        self._add_user_data_client = (
+            ReplicationAddUserAccountDataRestServlet.make_client(hs)
+        )
+        self._remove_user_data_client = (
+            ReplicationRemoveUserAccountDataRestServlet.make_client(hs)
+        )
+        self._add_room_data_client = (
+            ReplicationAddRoomAccountDataRestServlet.make_client(hs)
+        )
+        self._remove_room_data_client = (
+            ReplicationRemoveRoomAccountDataRestServlet.make_client(hs)
+        )
         self._add_tag_client = ReplicationAddTagRestServlet.make_client(hs)
         self._remove_tag_client = ReplicationRemoveTagRestServlet.make_client(hs)
         self._account_data_writers = hs.config.worker.writers.account_data
@@ -112,7 +124,7 @@ class AccountDataHandler:
 
             return max_stream_id
         else:
-            response = await self._room_data_client(
+            response = await self._add_room_data_client(
                 instance_name=random.choice(self._account_data_writers),
                 user_id=user_id,
                 room_id=room_id,
@@ -121,15 +133,59 @@ class AccountDataHandler:
             )
             return response["max_stream_id"]
 
+    async def remove_account_data_for_room(
+        self, user_id: str, room_id: str, account_data_type: str
+    ) -> Optional[int]:
+        """
+        Deletes the room account data for the given user and account data type.
+
+        "Deleting" account data merely means setting the content of the account data
+        to an empty JSON object: {}.
+
+        Args:
+            user_id: The user ID to remove room account data for.
+            room_id: The room ID to target.
+            account_data_type: The account data type to remove.
+
+        Returns:
+            The maximum stream ID, or None if the room account data item did not exist.
+        """
+        if self._instance_name in self._account_data_writers:
+            max_stream_id = await self._store.remove_account_data_for_room(
+                user_id, room_id, account_data_type
+            )
+            if max_stream_id is None:
+                # The referenced account data did not exist, so no delete occurred.
+                return None
+
+            self._notifier.on_new_event(
+                StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id]
+            )
+
+            # Notify Synapse modules that the content of the type has changed to an
+            # empty dictionary.
+            await self._notify_modules(user_id, room_id, account_data_type, {})
+
+            return max_stream_id
+        else:
+            response = await self._remove_room_data_client(
+                instance_name=random.choice(self._account_data_writers),
+                user_id=user_id,
+                room_id=room_id,
+                account_data_type=account_data_type,
+                content={},
+            )
+            return response["max_stream_id"]
+
     async def add_account_data_for_user(
         self, user_id: str, account_data_type: str, content: JsonDict
     ) -> int:
         """Add some global account_data for a user.
 
         Args:
-            user_id: The user to add a tag for.
+            user_id: The user to add some account data for.
             account_data_type: The type of account_data to add.
-            content: A json object to associate with the tag.
+            content: The content json dictionary.
 
         Returns:
             The maximum stream ID.
@@ -148,7 +204,7 @@ class AccountDataHandler:
 
             return max_stream_id
         else:
-            response = await self._user_data_client(
+            response = await self._add_user_data_client(
                 instance_name=random.choice(self._account_data_writers),
                 user_id=user_id,
                 account_data_type=account_data_type,
@@ -156,6 +212,45 @@ class AccountDataHandler:
             )
             return response["max_stream_id"]
 
+    async def remove_account_data_for_user(
+        self, user_id: str, account_data_type: str
+    ) -> Optional[int]:
+        """Removes a piece of global account_data for a user.
+
+        Args:
+            user_id: The user to remove account data for.
+            account_data_type: The type of account_data to remove.
+
+        Returns:
+            The maximum stream ID, or None if the room account data item did not exist.
+        """
+
+        if self._instance_name in self._account_data_writers:
+            max_stream_id = await self._store.remove_account_data_for_user(
+                user_id, account_data_type
+            )
+            if max_stream_id is None:
+                # The referenced account data did not exist, so no delete occurred.
+                return None
+
+            self._notifier.on_new_event(
+                StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id]
+            )
+
+            # Notify Synapse modules that the content of the type has changed to an
+            # empty dictionary.
+            await self._notify_modules(user_id, None, account_data_type, {})
+
+            return max_stream_id
+        else:
+            response = await self._remove_user_data_client(
+                instance_name=random.choice(self._account_data_writers),
+                user_id=user_id,
+                account_data_type=account_data_type,
+                content={},
+            )
+            return response["max_stream_id"]
+
     async def add_tag_to_room(
         self, user_id: str, room_id: str, tag: str, content: JsonDict
     ) -> int:
diff --git a/synapse/replication/http/account_data.py b/synapse/replication/http/account_data.py
index 310f609153..0edc95977b 100644
--- a/synapse/replication/http/account_data.py
+++ b/synapse/replication/http/account_data.py
@@ -28,7 +28,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class ReplicationUserAccountDataRestServlet(ReplicationEndpoint):
+class ReplicationAddUserAccountDataRestServlet(ReplicationEndpoint):
     """Add user account data on the appropriate account data worker.
 
     Request format:
@@ -49,7 +49,6 @@ class ReplicationUserAccountDataRestServlet(ReplicationEndpoint):
         super().__init__(hs)
 
         self.handler = hs.get_account_data_handler()
-        self.clock = hs.get_clock()
 
     @staticmethod
     async def _serialize_payload(  # type: ignore[override]
@@ -73,7 +72,45 @@ class ReplicationUserAccountDataRestServlet(ReplicationEndpoint):
         return 200, {"max_stream_id": max_stream_id}
 
 
-class ReplicationRoomAccountDataRestServlet(ReplicationEndpoint):
+class ReplicationRemoveUserAccountDataRestServlet(ReplicationEndpoint):
+    """Remove user account data on the appropriate account data worker.
+
+    Request format:
+
+        POST /_synapse/replication/remove_user_account_data/:user_id/:type
+
+        {
+            "content": { ... },
+        }
+
+    """
+
+    NAME = "remove_user_account_data"
+    PATH_ARGS = ("user_id", "account_data_type")
+    CACHE = False
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        self.handler = hs.get_account_data_handler()
+
+    @staticmethod
+    async def _serialize_payload(  # type: ignore[override]
+        user_id: str, account_data_type: str
+    ) -> JsonDict:
+        return {}
+
+    async def _handle_request(  # type: ignore[override]
+        self, request: Request, user_id: str, account_data_type: str
+    ) -> Tuple[int, JsonDict]:
+        max_stream_id = await self.handler.remove_account_data_for_user(
+            user_id, account_data_type
+        )
+
+        return 200, {"max_stream_id": max_stream_id}
+
+
+class ReplicationAddRoomAccountDataRestServlet(ReplicationEndpoint):
     """Add room account data on the appropriate account data worker.
 
     Request format:
@@ -94,7 +131,6 @@ class ReplicationRoomAccountDataRestServlet(ReplicationEndpoint):
         super().__init__(hs)
 
         self.handler = hs.get_account_data_handler()
-        self.clock = hs.get_clock()
 
     @staticmethod
     async def _serialize_payload(  # type: ignore[override]
@@ -118,6 +154,44 @@ class ReplicationRoomAccountDataRestServlet(ReplicationEndpoint):
         return 200, {"max_stream_id": max_stream_id}
 
 
+class ReplicationRemoveRoomAccountDataRestServlet(ReplicationEndpoint):
+    """Remove room account data on the appropriate account data worker.
+
+    Request format:
+
+        POST /_synapse/replication/remove_room_account_data/:user_id/:room_id/:account_data_type
+
+        {
+            "content": { ... },
+        }
+
+    """
+
+    NAME = "remove_room_account_data"
+    PATH_ARGS = ("user_id", "room_id", "account_data_type")
+    CACHE = False
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        self.handler = hs.get_account_data_handler()
+
+    @staticmethod
+    async def _serialize_payload(  # type: ignore[override]
+        user_id: str, room_id: str, account_data_type: str, content: JsonDict
+    ) -> JsonDict:
+        return {}
+
+    async def _handle_request(  # type: ignore[override]
+        self, request: Request, user_id: str, room_id: str, account_data_type: str
+    ) -> Tuple[int, JsonDict]:
+        max_stream_id = await self.handler.remove_account_data_for_room(
+            user_id, room_id, account_data_type
+        )
+
+        return 200, {"max_stream_id": max_stream_id}
+
+
 class ReplicationAddTagRestServlet(ReplicationEndpoint):
     """Add tag on the appropriate account data worker.
 
@@ -139,7 +213,6 @@ class ReplicationAddTagRestServlet(ReplicationEndpoint):
         super().__init__(hs)
 
         self.handler = hs.get_account_data_handler()
-        self.clock = hs.get_clock()
 
     @staticmethod
     async def _serialize_payload(  # type: ignore[override]
@@ -186,7 +259,6 @@ class ReplicationRemoveTagRestServlet(ReplicationEndpoint):
         super().__init__(hs)
 
         self.handler = hs.get_account_data_handler()
-        self.clock = hs.get_clock()
 
     @staticmethod
     async def _serialize_payload(user_id: str, room_id: str, tag: str) -> JsonDict:  # type: ignore[override]
@@ -206,7 +278,11 @@ class ReplicationRemoveTagRestServlet(ReplicationEndpoint):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    ReplicationUserAccountDataRestServlet(hs).register(http_server)
-    ReplicationRoomAccountDataRestServlet(hs).register(http_server)
+    ReplicationAddUserAccountDataRestServlet(hs).register(http_server)
+    ReplicationAddRoomAccountDataRestServlet(hs).register(http_server)
     ReplicationAddTagRestServlet(hs).register(http_server)
     ReplicationRemoveTagRestServlet(hs).register(http_server)
+
+    if hs.config.experimental.msc3391_enabled:
+        ReplicationRemoveUserAccountDataRestServlet(hs).register(http_server)
+        ReplicationRemoveRoomAccountDataRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py
index f13970b898..e805196fec 100644
--- a/synapse/rest/client/account_data.py
+++ b/synapse/rest/client/account_data.py
@@ -41,6 +41,7 @@ class AccountDataServlet(RestServlet):
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
+        self._hs = hs
         self.auth = hs.get_auth()
         self.store = hs.get_datastores().main
         self.handler = hs.get_account_data_handler()
@@ -54,6 +55,16 @@ class AccountDataServlet(RestServlet):
 
         body = parse_json_object_from_request(request)
 
+        # If experimental support for MSC3391 is enabled, then providing an empty dict
+        # as the value for an account data type should be functionally equivalent to
+        # calling the DELETE method on the same type.
+        if self._hs.config.experimental.msc3391_enabled:
+            if body == {}:
+                await self.handler.remove_account_data_for_user(
+                    user_id, account_data_type
+                )
+                return 200, {}
+
         await self.handler.add_account_data_for_user(user_id, account_data_type, body)
 
         return 200, {}
@@ -72,9 +83,48 @@ class AccountDataServlet(RestServlet):
         if event is None:
             raise NotFoundError("Account data not found")
 
+        # If experimental support for MSC3391 is enabled, then this endpoint should
+        # return a 404 if the content for an account data type is an empty dict.
+        if self._hs.config.experimental.msc3391_enabled and event == {}:
+            raise NotFoundError("Account data not found")
+
         return 200, event
 
 
+class UnstableAccountDataServlet(RestServlet):
+    """
+    Contains an unstable endpoint for removing user account data, as specified by
+    MSC3391. If that MSC is accepted, this code should have unstable prefixes removed
+    and become incorporated into AccountDataServlet above.
+    """
+
+    PATTERNS = client_patterns(
+        "/org.matrix.msc3391/user/(?P<user_id>[^/]*)"
+        "/account_data/(?P<account_data_type>[^/]*)",
+        unstable=True,
+        releases=(),
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.auth = hs.get_auth()
+        self.handler = hs.get_account_data_handler()
+
+    async def on_DELETE(
+        self,
+        request: SynapseRequest,
+        user_id: str,
+        account_data_type: str,
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+        if user_id != requester.user.to_string():
+            raise AuthError(403, "Cannot delete account data for other users.")
+
+        await self.handler.remove_account_data_for_user(user_id, account_data_type)
+
+        return 200, {}
+
+
 class RoomAccountDataServlet(RestServlet):
     """
     PUT /user/{user_id}/rooms/{room_id}/account_data/{account_dataType} HTTP/1.1
@@ -89,6 +139,7 @@ class RoomAccountDataServlet(RestServlet):
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
+        self._hs = hs
         self.auth = hs.get_auth()
         self.store = hs.get_datastores().main
         self.handler = hs.get_account_data_handler()
@@ -121,6 +172,16 @@ class RoomAccountDataServlet(RestServlet):
                 Codes.BAD_JSON,
             )
 
+        # If experimental support for MSC3391 is enabled, then providing an empty dict
+        # as the value for an account data type should be functionally equivalent to
+        # calling the DELETE method on the same type.
+        if self._hs.config.experimental.msc3391_enabled:
+            if body == {}:
+                await self.handler.remove_account_data_for_room(
+                    user_id, room_id, account_data_type
+                )
+                return 200, {}
+
         await self.handler.add_account_data_to_room(
             user_id, room_id, account_data_type, body
         )
@@ -152,9 +213,63 @@ class RoomAccountDataServlet(RestServlet):
         if event is None:
             raise NotFoundError("Room account data not found")
 
+        # If experimental support for MSC3391 is enabled, then this endpoint should
+        # return a 404 if the content for an account data type is an empty dict.
+        if self._hs.config.experimental.msc3391_enabled and event == {}:
+            raise NotFoundError("Room account data not found")
+
         return 200, event
 
 
+class UnstableRoomAccountDataServlet(RestServlet):
+    """
+    Contains an unstable endpoint for removing room account data, as specified by
+    MSC3391. If that MSC is accepted, this code should have unstable prefixes removed
+    and become incorporated into RoomAccountDataServlet above.
+    """
+
+    PATTERNS = client_patterns(
+        "/org.matrix.msc3391/user/(?P<user_id>[^/]*)"
+        "/rooms/(?P<room_id>[^/]*)"
+        "/account_data/(?P<account_data_type>[^/]*)",
+        unstable=True,
+        releases=(),
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.auth = hs.get_auth()
+        self.handler = hs.get_account_data_handler()
+
+    async def on_DELETE(
+        self,
+        request: SynapseRequest,
+        user_id: str,
+        room_id: str,
+        account_data_type: str,
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+        if user_id != requester.user.to_string():
+            raise AuthError(403, "Cannot delete account data for other users.")
+
+        if not RoomID.is_valid(room_id):
+            raise SynapseError(
+                400,
+                f"{room_id} is not a valid room ID",
+                Codes.INVALID_PARAM,
+            )
+
+        await self.handler.remove_account_data_for_room(
+            user_id, room_id, account_data_type
+        )
+
+        return 200, {}
+
+
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     AccountDataServlet(hs).register(http_server)
     RoomAccountDataServlet(hs).register(http_server)
+
+    if hs.config.experimental.msc3391_enabled:
+        UnstableAccountDataServlet(hs).register(http_server)
+        UnstableRoomAccountDataServlet(hs).register(http_server)
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 0b29e67b94..88479a16db 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1762,7 +1762,8 @@ class DatabasePool:
             desc: description of the transaction, for logging and metrics
 
         Returns:
-            A list of dictionaries.
+            A list of dictionaries, one per result row, each a mapping between the
+            column names from `retcols` and that column's value for the row.
         """
         return await self.runInteraction(
             desc,
@@ -1791,6 +1792,10 @@ class DatabasePool:
                 column names and values to select the rows with, or None to not
                 apply a WHERE clause.
             retcols: the names of the columns to return
+
+        Returns:
+            A list of dictionaries, one per result row, each a mapping between the
+            column names from `retcols` and that column's value for the row.
         """
         if keyvalues:
             sql = "SELECT %s FROM %s WHERE %s" % (
@@ -1898,6 +1903,19 @@ class DatabasePool:
         updatevalues: Dict[str, Any],
         desc: str,
     ) -> int:
+        """
+        Update rows in the given database table.
+        If the given keyvalues don't match anything, nothing will be updated.
+
+        Args:
+            table: The database table to update.
+            keyvalues: A mapping of column name to value to match rows on.
+            updatevalues: A mapping of column name to value to replace in any matched rows.
+            desc: description of the transaction, for logging and metrics.
+
+        Returns:
+            The number of rows that were updated. Will be 0 if no matching rows were found.
+        """
         return await self.runInteraction(
             desc, self.simple_update_txn, table, keyvalues, updatevalues
         )
@@ -1909,6 +1927,19 @@ class DatabasePool:
         keyvalues: Dict[str, Any],
         updatevalues: Dict[str, Any],
     ) -> int:
+        """
+        Update rows in the given database table.
+        If the given keyvalues don't match anything, nothing will be updated.
+
+        Args:
+            txn: The database transaction object.
+            table: The database table to update.
+            keyvalues: A mapping of column name to value to match rows on.
+            updatevalues: A mapping of column name to value to replace in any matched rows.
+
+        Returns:
+            The number of rows that were updated. Will be 0 if no matching rows were found.
+        """
         if keyvalues:
             where = "WHERE %s" % " AND ".join("%s = ?" % k for k in keyvalues.keys())
         else:
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 07908c41d9..e59776f434 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -123,7 +123,11 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     async def get_account_data_for_user(
         self, user_id: str
     ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]:
-        """Get all the client account_data for a user.
+        """
+        Get all the client account_data for a user.
+
+        If experimental MSC3391 support is enabled, any entries with an empty
+        content body are excluded; as this means they have been deleted.
 
         Args:
             user_id: The user to get the account_data for.
@@ -135,27 +139,48 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         def get_account_data_for_user_txn(
             txn: LoggingTransaction,
         ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]:
-            rows = self.db_pool.simple_select_list_txn(
-                txn,
-                "account_data",
-                {"user_id": user_id},
-                ["account_data_type", "content"],
-            )
+            # The 'content != '{}' condition below prevents us from using
+            # `simple_select_list_txn` here, as it doesn't support conditions
+            # other than 'equals'.
+            sql = """
+                SELECT account_data_type, content FROM account_data
+                WHERE user_id = ?
+            """
+
+            # If experimental MSC3391 support is enabled, then account data entries
+            # with an empty content are considered "deleted". So skip adding them to
+            # the results.
+            if self.hs.config.experimental.msc3391_enabled:
+                sql += " AND content != '{}'"
+
+            txn.execute(sql, (user_id,))
+            rows = self.db_pool.cursor_to_dict(txn)
 
             global_account_data = {
                 row["account_data_type"]: db_to_json(row["content"]) for row in rows
             }
 
-            rows = self.db_pool.simple_select_list_txn(
-                txn,
-                "room_account_data",
-                {"user_id": user_id},
-                ["room_id", "account_data_type", "content"],
-            )
+            # The 'content != '{}' condition below prevents us from using
+            # `simple_select_list_txn` here, as it doesn't support conditions
+            # other than 'equals'.
+            sql = """
+                SELECT room_id, account_data_type, content FROM room_account_data
+                WHERE user_id = ?
+            """
+
+            # If experimental MSC3391 support is enabled, then account data entries
+            # with an empty content are considered "deleted". So skip adding them to
+            # the results.
+            if self.hs.config.experimental.msc3391_enabled:
+                sql += " AND content != '{}'"
+
+            txn.execute(sql, (user_id,))
+            rows = self.db_pool.cursor_to_dict(txn)
 
             by_room: Dict[str, Dict[str, JsonDict]] = {}
             for row in rows:
                 room_data = by_room.setdefault(row["room_id"], {})
+
                 room_data[row["account_data_type"]] = db_to_json(row["content"])
 
             return global_account_data, by_room
@@ -469,6 +494,72 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
 
         return self._account_data_id_gen.get_current_token()
 
+    async def remove_account_data_for_room(
+        self, user_id: str, room_id: str, account_data_type: str
+    ) -> Optional[int]:
+        """Delete the room account data for the user of a given type.
+
+        Args:
+            user_id: The user to remove account_data for.
+            room_id: The room ID to scope the request to.
+            account_data_type: The account data type to delete.
+
+        Returns:
+            The maximum stream position, or None if there was no matching room account
+            data to delete.
+        """
+        assert self._can_write_to_account_data
+        assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator)
+
+        def _remove_account_data_for_room_txn(
+            txn: LoggingTransaction, next_id: int
+        ) -> bool:
+            """
+            Args:
+                txn: The transaction object.
+                next_id: The stream_id to update any existing rows to.
+
+            Returns:
+                True if an entry in room_account_data had its content set to '{}',
+                otherwise False. This informs callers of whether there actually was an
+                existing room account data entry to delete, or if the call was a no-op.
+            """
+            # We can't use `simple_update` as it doesn't have the ability to specify
+            # where clauses other than '=', which we need for `content != '{}'` below.
+            sql = """
+                UPDATE room_account_data
+                    SET stream_id = ?, content = '{}'
+                WHERE user_id = ?
+                    AND room_id = ?
+                    AND account_data_type = ?
+                    AND content != '{}'
+            """
+            txn.execute(
+                sql,
+                (next_id, user_id, room_id, account_data_type),
+            )
+            # Return true if any rows were updated.
+            return txn.rowcount != 0
+
+        async with self._account_data_id_gen.get_next() as next_id:
+            row_updated = await self.db_pool.runInteraction(
+                "remove_account_data_for_room",
+                _remove_account_data_for_room_txn,
+                next_id,
+            )
+
+            if not row_updated:
+                return None
+
+            self._account_data_stream_cache.entity_has_changed(user_id, next_id)
+            self.get_account_data_for_user.invalidate((user_id,))
+            self.get_account_data_for_room.invalidate((user_id, room_id))
+            self.get_account_data_for_room_and_type.prefill(
+                (user_id, room_id, account_data_type), {}
+            )
+
+        return self._account_data_id_gen.get_current_token()
+
     async def add_account_data_for_user(
         self, user_id: str, account_data_type: str, content: JsonDict
     ) -> int:
@@ -569,6 +660,108 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             self._invalidate_cache_and_stream(txn, self.ignored_by, (ignored_user_id,))
         self._invalidate_cache_and_stream(txn, self.ignored_users, (user_id,))
 
+    async def remove_account_data_for_user(
+        self,
+        user_id: str,
+        account_data_type: str,
+    ) -> Optional[int]:
+        """
+        Delete a single piece of user account data by type.
+
+        A "delete" is performed by updating a potentially existing row in the
+        "account_data" database table for (user_id, account_data_type) and
+        setting its content to "{}".
+
+        Args:
+            user_id: The user ID to modify the account data of.
+            account_data_type: The type to remove.
+
+        Returns:
+            The maximum stream position, or None if there was no matching account data
+            to delete.
+        """
+        assert self._can_write_to_account_data
+        assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator)
+
+        def _remove_account_data_for_user_txn(
+            txn: LoggingTransaction, next_id: int
+        ) -> bool:
+            """
+            Args:
+                txn: The transaction object.
+                next_id: The stream_id to update any existing rows to.
+
+            Returns:
+                True if an entry in account_data had its content set to '{}', otherwise
+                False. This informs callers of whether there actually was an existing
+                account data entry to delete, or if the call was a no-op.
+            """
+            # We can't use `simple_update` as it doesn't have the ability to specify
+            # where clauses other than '=', which we need for `content != '{}'` below.
+            sql = """
+                UPDATE account_data
+                    SET stream_id = ?, content = '{}'
+                WHERE user_id = ?
+                    AND account_data_type = ?
+                    AND content != '{}'
+            """
+            txn.execute(sql, (next_id, user_id, account_data_type))
+            if txn.rowcount == 0:
+                # We didn't update any rows. This means that there was no matching room
+                # account data entry to delete in the first place.
+                return False
+
+            # Ignored users get denormalized into a separate table as an optimisation.
+            if account_data_type == AccountDataTypes.IGNORED_USER_LIST:
+                # If this method was called with the ignored users account data type, we
+                # simply delete all ignored users.
+
+                # First pull all the users that this user ignores.
+                previously_ignored_users = set(
+                    self.db_pool.simple_select_onecol_txn(
+                        txn,
+                        table="ignored_users",
+                        keyvalues={"ignorer_user_id": user_id},
+                        retcol="ignored_user_id",
+                    )
+                )
+
+                # Then delete them from the database.
+                self.db_pool.simple_delete_txn(
+                    txn,
+                    table="ignored_users",
+                    keyvalues={"ignorer_user_id": user_id},
+                )
+
+                # Invalidate the cache for ignored users which were removed.
+                for ignored_user_id in previously_ignored_users:
+                    self._invalidate_cache_and_stream(
+                        txn, self.ignored_by, (ignored_user_id,)
+                    )
+
+                # Invalidate for this user the cache tracking ignored users.
+                self._invalidate_cache_and_stream(txn, self.ignored_users, (user_id,))
+
+            return True
+
+        async with self._account_data_id_gen.get_next() as next_id:
+            row_updated = await self.db_pool.runInteraction(
+                "remove_account_data_for_user",
+                _remove_account_data_for_user_txn,
+                next_id,
+            )
+
+            if not row_updated:
+                return None
+
+            self._account_data_stream_cache.entity_has_changed(user_id, next_id)
+            self.get_account_data_for_user.invalidate((user_id,))
+            self.get_global_account_data_by_type_for_user.prefill(
+                (user_id, account_data_type), {}
+            )
+
+        return self._account_data_id_gen.get_current_token()
+
     async def purge_account_data_for_user(self, user_id: str) -> None:
         """
         Removes ALL the account data for a user.
-- 
cgit 1.5.1


From db1cfe9c80a707995fcad8f3faa839acb247068a Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Wed, 4 Jan 2023 11:49:26 +0000
Subject: Update all stream IDs after processing replication rows (#14723)

This creates a new store method, `process_replication_position` that
is called after `process_replication_rows`. By moving stream ID advances
here this guarantees any relevant cache invalidations will have been
applied before the stream is advanced.

This avoids race conditions where Python switches between threads mid
way through processing the `process_replication_rows` method where stream
IDs may be advanced before caches are invalidated due to class resolution
ordering.

See this comment/issue for further discussion:
	https://github.com/matrix-org/synapse/issues/14158#issuecomment-1344048703
---
 changelog.d/14723.bugfix                        |  1 +
 synapse/replication/tcp/client.py               |  3 +++
 synapse/storage/_base.py                        | 17 ++++++++++++++++-
 synapse/storage/databases/main/account_data.py  | 14 ++++++++++----
 synapse/storage/databases/main/cache.py         | 11 ++++++++---
 synapse/storage/databases/main/deviceinbox.py   |  7 +++++++
 synapse/storage/databases/main/devices.py       | 11 +++++++++--
 synapse/storage/databases/main/events_worker.py | 15 ++++++++++-----
 synapse/storage/databases/main/presence.py      |  8 +++++++-
 synapse/storage/databases/main/push_rule.py     |  7 +++++++
 synapse/storage/databases/main/pusher.py        |  6 +++---
 synapse/storage/databases/main/receipts.py      |  7 +++++++
 synapse/storage/databases/main/tags.py          |  8 +++++++-
 13 files changed, 95 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/14723.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14723.bugfix b/changelog.d/14723.bugfix
new file mode 100644
index 0000000000..e1f89cee35
--- /dev/null
+++ b/changelog.d/14723.bugfix
@@ -0,0 +1 @@
+Ensure stream IDs are always updated after caches get invalidated with workers. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 658d89210d..b5e40da533 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -152,6 +152,9 @@ class ReplicationDataHandler:
             rows: a list of Stream.ROW_TYPE objects as returned by Stream.parse_row.
         """
         self.store.process_replication_rows(stream_name, instance_name, token, rows)
+        # NOTE: this must be called after process_replication_rows to ensure any
+        # cache invalidations are first handled before any stream ID advances.
+        self.store.process_replication_position(stream_name, instance_name, token)
 
         if self.send_handler:
             await self.send_handler.process_replication_rows(stream_name, token, rows)
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 69abf6fa87..41d9111019 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -57,7 +57,22 @@ class SQLBaseStore(metaclass=ABCMeta):
         token: int,
         rows: Iterable[Any],
     ) -> None:
-        pass
+        """
+        Used by storage classes to invalidate caches based on incoming replication data. These
+        must not update any ID generators, use `process_replication_position`.
+        """
+
+    def process_replication_position(  # noqa: B027 (no-op by design)
+        self,
+        stream_name: str,
+        instance_name: str,
+        token: int,
+    ) -> None:
+        """
+        Used by storage classes to advance ID generators based on incoming replication data. This
+        is called after process_replication_rows such that caches are invalidated before any token
+        positions advance.
+        """
 
     def _invalidate_state_caches(
         self, room_id: str, members_changed: Collection[str]
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index e59776f434..86032897f5 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -436,10 +436,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         token: int,
         rows: Iterable[Any],
     ) -> None:
-        if stream_name == TagAccountDataStream.NAME:
-            self._account_data_id_gen.advance(instance_name, token)
-        elif stream_name == AccountDataStream.NAME:
-            self._account_data_id_gen.advance(instance_name, token)
+        if stream_name == AccountDataStream.NAME:
             for row in rows:
                 if not row.room_id:
                     self.get_global_account_data_by_type_for_user.invalidate(
@@ -454,6 +451,15 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
 
         super().process_replication_rows(stream_name, instance_name, token, rows)
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == TagAccountDataStream.NAME:
+            self._account_data_id_gen.advance(instance_name, token)
+        elif stream_name == AccountDataStream.NAME:
+            self._account_data_id_gen.advance(instance_name, token)
+        super().process_replication_position(stream_name, instance_name, token)
+
     async def add_account_data_to_room(
         self, user_id: str, room_id: str, account_data_type: str, content: JsonDict
     ) -> int:
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index a58668a380..2179a8bf59 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -164,9 +164,6 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
                     backfilled=True,
                 )
         elif stream_name == CachesStream.NAME:
-            if self._cache_id_gen:
-                self._cache_id_gen.advance(instance_name, token)
-
             for row in rows:
                 if row.cache_func == CURRENT_STATE_CACHE_NAME:
                     if row.keys is None:
@@ -182,6 +179,14 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
 
         super().process_replication_rows(stream_name, instance_name, token, rows)
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == CachesStream.NAME:
+            if self._cache_id_gen:
+                self._cache_id_gen.advance(instance_name, token)
+        super().process_replication_position(stream_name, instance_name, token)
+
     def _process_event_stream_row(self, token: int, row: EventsStreamRow) -> None:
         data = row.data
 
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 48a54d9cb8..713be91c5d 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -157,6 +157,13 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                     )
         return super().process_replication_rows(stream_name, instance_name, token, rows)
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == ToDeviceStream.NAME:
+            self._device_inbox_id_gen.advance(instance_name, token)
+        super().process_replication_position(stream_name, instance_name, token)
+
     def get_to_device_stream_token(self) -> int:
         return self._device_inbox_id_gen.get_current_token()
 
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index a5bb4d404e..db877e3f13 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -162,14 +162,21 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
     ) -> None:
         if stream_name == DeviceListsStream.NAME:
-            self._device_list_id_gen.advance(instance_name, token)
             self._invalidate_caches_for_devices(token, rows)
         elif stream_name == UserSignatureStream.NAME:
-            self._device_list_id_gen.advance(instance_name, token)
             for row in rows:
                 self._user_signature_stream_cache.entity_has_changed(row.user_id, token)
         return super().process_replication_rows(stream_name, instance_name, token, rows)
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == DeviceListsStream.NAME:
+            self._device_list_id_gen.advance(instance_name, token)
+        elif stream_name == UserSignatureStream.NAME:
+            self._device_list_id_gen.advance(instance_name, token)
+        super().process_replication_position(stream_name, instance_name, token)
+
     def _invalidate_caches_for_devices(
         self, token: int, rows: Iterable[DeviceListsStream.DeviceListsStreamRow]
     ) -> None:
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 761b15a815..d150fa8a94 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -388,11 +388,7 @@ class EventsWorkerStore(SQLBaseStore):
         token: int,
         rows: Iterable[Any],
     ) -> None:
-        if stream_name == EventsStream.NAME:
-            self._stream_id_gen.advance(instance_name, token)
-        elif stream_name == BackfillStream.NAME:
-            self._backfill_id_gen.advance(instance_name, -token)
-        elif stream_name == UnPartialStatedEventStream.NAME:
+        if stream_name == UnPartialStatedEventStream.NAME:
             for row in rows:
                 assert isinstance(row, UnPartialStatedEventStreamRow)
 
@@ -405,6 +401,15 @@ class EventsWorkerStore(SQLBaseStore):
 
         super().process_replication_rows(stream_name, instance_name, token, rows)
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == EventsStream.NAME:
+            self._stream_id_gen.advance(instance_name, token)
+        elif stream_name == BackfillStream.NAME:
+            self._backfill_id_gen.advance(instance_name, -token)
+        super().process_replication_position(stream_name, instance_name, token)
+
     async def have_censored_event(self, event_id: str) -> bool:
         """Check if an event has been censored, i.e. if the content of the event has been erased
         from the database due to a redaction.
diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py
index 9769a18a9d..7b60815043 100644
--- a/synapse/storage/databases/main/presence.py
+++ b/synapse/storage/databases/main/presence.py
@@ -439,8 +439,14 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
         rows: Iterable[Any],
     ) -> None:
         if stream_name == PresenceStream.NAME:
-            self._presence_id_gen.advance(instance_name, token)
             for row in rows:
                 self.presence_stream_cache.entity_has_changed(row.user_id, token)
                 self._get_presence_for_user.invalidate((row.user_id,))
         return super().process_replication_rows(stream_name, instance_name, token, rows)
+
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == PresenceStream.NAME:
+            self._presence_id_gen.advance(instance_name, token)
+        super().process_replication_position(stream_name, instance_name, token)
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index d4c64c46ad..d4e4b777da 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -154,6 +154,13 @@ class PushRulesWorkerStore(
                 self.push_rules_stream_cache.entity_has_changed(row.user_id, token)
         return super().process_replication_rows(stream_name, instance_name, token, rows)
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == PushRulesStream.NAME:
+            self._push_rules_stream_id_gen.advance(instance_name, token)
+        super().process_replication_position(stream_name, instance_name, token)
+
     @cached(max_entries=5000)
     async def get_push_rules_for_user(self, user_id: str) -> FilteredPushRules:
         rows = await self.db_pool.simple_select_list(
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index 40fd781a6a..7f24a3b6ec 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -111,12 +111,12 @@ class PusherWorkerStore(SQLBaseStore):
     def get_pushers_stream_token(self) -> int:
         return self._pushers_id_gen.get_current_token()
 
-    def process_replication_rows(
-        self, stream_name: str, instance_name: str, token: int, rows: Iterable[Any]
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
     ) -> None:
         if stream_name == PushersStream.NAME:
             self._pushers_id_gen.advance(instance_name, token)
-        return super().process_replication_rows(stream_name, instance_name, token, rows)
+        super().process_replication_position(stream_name, instance_name, token)
 
     async def get_pushers_by_app_id_and_pushkey(
         self, app_id: str, pushkey: str
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index e06725f69c..86f5bce5f0 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -588,6 +588,13 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
         return super().process_replication_rows(stream_name, instance_name, token, rows)
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == ReceiptsStream.NAME:
+            self._receipts_id_gen.advance(instance_name, token)
+        super().process_replication_position(stream_name, instance_name, token)
+
     def _insert_linearized_receipt_txn(
         self,
         txn: LoggingTransaction,
diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
index b0f5de67a3..e23c927e02 100644
--- a/synapse/storage/databases/main/tags.py
+++ b/synapse/storage/databases/main/tags.py
@@ -300,13 +300,19 @@ class TagsWorkerStore(AccountDataWorkerStore):
         rows: Iterable[Any],
     ) -> None:
         if stream_name == TagAccountDataStream.NAME:
-            self._account_data_id_gen.advance(instance_name, token)
             for row in rows:
                 self.get_tags_for_user.invalidate((row.user_id,))
                 self._account_data_stream_cache.entity_has_changed(row.user_id, token)
 
         super().process_replication_rows(stream_name, instance_name, token, rows)
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == TagAccountDataStream.NAME:
+            self._account_data_id_gen.advance(instance_name, token)
+        super().process_replication_position(stream_name, instance_name, token)
+
 
 class TagsStore(TagsWorkerStore):
     pass
-- 
cgit 1.5.1


From 906dfaa2cf5a79ed9c18529b1a370ffd49c0204e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 4 Jan 2023 08:26:10 -0500
Subject: Support non-OpenID compliant user info endpoints (#14753)

OpenID specifies the format of the user info endpoint and some
OAuth 2.0 IdPs do not follow it, e.g. NextCloud and Twitter.

This adds subject_template and picture_template options to the
default mapping provider for more flexibility in matching those user
info responses.
---
 changelog.d/14753.feature                        |  1 +
 docs/usage/configuration/config_documentation.md | 18 ++++++++++++++
 synapse/handlers/oidc.py                         | 31 ++++++++++++++++++------
 3 files changed, 42 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/14753.feature

(limited to 'synapse')

diff --git a/changelog.d/14753.feature b/changelog.d/14753.feature
new file mode 100644
index 0000000000..38b4d6af4b
--- /dev/null
+++ b/changelog.d/14753.feature
@@ -0,0 +1 @@
+Support non-OpenID compliant userinfo claims for subject and picture.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 67e0acc910..23f9dcbea2 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3098,10 +3098,26 @@ Options for each entry include:
 
         For the default provider, the following settings are available:
 
+       * `subject_template`: Jinja2 template for a unique identifier for the user.
+         Defaults to `{{ user.sub }}`, which OpenID Connect compliant providers should provide.
+
+         This replaces and overrides `subject_claim`.
+
        * `subject_claim`: name of the claim containing a unique identifier
          for the user. Defaults to 'sub', which OpenID Connect
          compliant providers should provide.
 
+         *Deprecated in Synapse v1.75.0.*
+
+       * `picture_template`: Jinja2 template for an url for the user's profile picture.
+         Defaults to `{{ user.picture }}`, which OpenID Connect compliant providers should
+         provide and has to refer to a direct image file such as PNG, JPEG, or GIF image file.
+
+         This replaces and overrides `picture_claim`.
+
+         Currently only supported in monolithic (single-process) server configurations
+         where the media repository runs within the Synapse process.
+
        * `picture_claim`: name of the claim containing an url for the user's profile picture.
          Defaults to 'picture', which OpenID Connect compliant providers should provide
          and has to refer to a direct image file such as PNG, JPEG, or GIF image file.
@@ -3109,6 +3125,8 @@ Options for each entry include:
          Currently only supported in monolithic (single-process) server configurations
          where the media repository runs within the Synapse process.
 
+         *Deprecated in Synapse v1.75.0.*
+
        * `localpart_template`: Jinja2 template for the localpart of the MXID.
           If this is not set, the user will be prompted to choose their
           own username (see the documentation for the `sso_auth_account_details.html`
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index 23fb00c9c9..24e1cec5b6 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -1520,8 +1520,8 @@ env.filters.update(
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class JinjaOidcMappingConfig:
-    subject_claim: str
-    picture_claim: str
+    subject_template: Template
+    picture_template: Template
     localpart_template: Optional[Template]
     display_name_template: Optional[Template]
     email_template: Optional[Template]
@@ -1540,8 +1540,23 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
 
     @staticmethod
     def parse_config(config: dict) -> JinjaOidcMappingConfig:
-        subject_claim = config.get("subject_claim", "sub")
-        picture_claim = config.get("picture_claim", "picture")
+        def parse_template_config_with_claim(
+            option_name: str, default_claim: str
+        ) -> Template:
+            template_name = f"{option_name}_template"
+            template = config.get(template_name)
+            if not template:
+                # Convert the legacy subject_claim into a template.
+                claim = config.get(f"{option_name}_claim", default_claim)
+                template = "{{ user.%s }}" % (claim,)
+
+            try:
+                return env.from_string(template)
+            except Exception as e:
+                raise ConfigError("invalid jinja template", path=[template_name]) from e
+
+        subject_template = parse_template_config_with_claim("subject", "sub")
+        picture_template = parse_template_config_with_claim("picture", "picture")
 
         def parse_template_config(option_name: str) -> Optional[Template]:
             if option_name not in config:
@@ -1574,8 +1589,8 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
             raise ConfigError("must be a bool", path=["confirm_localpart"])
 
         return JinjaOidcMappingConfig(
-            subject_claim=subject_claim,
-            picture_claim=picture_claim,
+            subject_template=subject_template,
+            picture_template=picture_template,
             localpart_template=localpart_template,
             display_name_template=display_name_template,
             email_template=email_template,
@@ -1584,7 +1599,7 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
         )
 
     def get_remote_user_id(self, userinfo: UserInfo) -> str:
-        return userinfo[self._config.subject_claim]
+        return self._config.subject_template.render(user=userinfo).strip()
 
     async def map_user_attributes(
         self, userinfo: UserInfo, token: Token, failures: int
@@ -1615,7 +1630,7 @@ class JinjaOidcMappingProvider(OidcMappingProvider[JinjaOidcMappingConfig]):
         if email:
             emails.append(email)
 
-        picture = userinfo.get(self._config.picture_claim)
+        picture = self._config.picture_template.render(user=userinfo).strip()
 
         return UserAttributeDict(
             localpart=localpart,
-- 
cgit 1.5.1


From 630d0aeaf607b4016e67895d81b0402a5dfcc769 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 4 Jan 2023 14:58:08 -0500
Subject: Support RFC7636 PKCE in the OAuth 2.0 flow. (#14750)

PKCE can protect against certain attacks and is enabled by default. Support
can be controlled manually by setting the pkce_method of each oidc_providers
entry to 'auto' (default), 'always', or 'never'.

This is required by Twitter OAuth 2.0 support.
---
 changelog.d/14750.feature                        |   1 +
 docs/usage/configuration/config_documentation.md |   7 +-
 synapse/config/oidc.py                           |   6 +
 synapse/handlers/oidc.py                         |  54 ++++++--
 synapse/util/macaroons.py                        |   7 ++
 tests/handlers/test_oidc.py                      | 152 +++++++++++++++++++++--
 tests/util/test_macaroons.py                     |   1 +
 7 files changed, 212 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/14750.feature

(limited to 'synapse')

diff --git a/changelog.d/14750.feature b/changelog.d/14750.feature
new file mode 100644
index 0000000000..cfed64ee80
--- /dev/null
+++ b/changelog.d/14750.feature
@@ -0,0 +1 @@
+Support [RFC7636](https://datatracker.ietf.org/doc/html/rfc7636) Proof Key for Code Exchange for OAuth single sign-on.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 23f9dcbea2..ec8403c7e9 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3053,8 +3053,13 @@ Options for each entry include:
    values are `client_secret_basic` (default), `client_secret_post` and
    `none`.
 
+* `pkce_method`: Whether to use proof key for code exchange when requesting
+   and exchanging the token. Valid values are: `auto`, `always`, or `never`. Defaults
+   to `auto`, which uses PKCE if supported during metadata discovery. Set to `always`
+   to force enable PKCE or `never` to force disable PKCE.
+
 * `scopes`: list of scopes to request. This should normally include the "openid"
-   scope. Defaults to ["openid"].
+   scope. Defaults to `["openid"]`.
 
 * `authorization_endpoint`: the oauth2 authorization endpoint. Required if
    provider discovery is disabled.
diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py
index 0bd83f4010..df8c422043 100644
--- a/synapse/config/oidc.py
+++ b/synapse/config/oidc.py
@@ -117,6 +117,7 @@ OIDC_PROVIDER_CONFIG_SCHEMA = {
             # to avoid importing authlib here.
             "enum": ["client_secret_basic", "client_secret_post", "none"],
         },
+        "pkce_method": {"type": "string", "enum": ["auto", "always", "never"]},
         "scopes": {"type": "array", "items": {"type": "string"}},
         "authorization_endpoint": {"type": "string"},
         "token_endpoint": {"type": "string"},
@@ -289,6 +290,7 @@ def _parse_oidc_config_dict(
         client_secret=oidc_config.get("client_secret"),
         client_secret_jwt_key=client_secret_jwt_key,
         client_auth_method=oidc_config.get("client_auth_method", "client_secret_basic"),
+        pkce_method=oidc_config.get("pkce_method", "auto"),
         scopes=oidc_config.get("scopes", ["openid"]),
         authorization_endpoint=oidc_config.get("authorization_endpoint"),
         token_endpoint=oidc_config.get("token_endpoint"),
@@ -357,6 +359,10 @@ class OidcProviderConfig:
     # 'none'.
     client_auth_method: str
 
+    # Whether to enable PKCE when exchanging the authorization & token.
+    # Valid values are 'auto', 'always', and 'never'.
+    pkce_method: str
+
     # list of scopes to request
     scopes: Collection[str]
 
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index 24e1cec5b6..0fc829acf7 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -36,6 +36,7 @@ from authlib.jose import JsonWebToken, JWTClaims
 from authlib.jose.errors import InvalidClaimError, JoseError, MissingClaimError
 from authlib.oauth2.auth import ClientAuth
 from authlib.oauth2.rfc6749.parameters import prepare_grant_uri
+from authlib.oauth2.rfc7636.challenge import create_s256_code_challenge
 from authlib.oidc.core import CodeIDToken, UserInfo
 from authlib.oidc.discovery import OpenIDProviderMetadata, get_well_known_url
 from jinja2 import Environment, Template
@@ -475,6 +476,16 @@ class OidcProvider:
                     )
                 )
 
+        # If PKCE support is advertised ensure the wanted method is available.
+        if m.get("code_challenge_methods_supported") is not None:
+            m.validate_code_challenge_methods_supported()
+            if "S256" not in m["code_challenge_methods_supported"]:
+                raise ValueError(
+                    '"S256" not in "code_challenge_methods_supported" ({supported!r})'.format(
+                        supported=m["code_challenge_methods_supported"],
+                    )
+                )
+
         if m.get("response_types_supported") is not None:
             m.validate_response_types_supported()
 
@@ -602,6 +613,11 @@ class OidcProvider:
         if self._config.jwks_uri:
             metadata["jwks_uri"] = self._config.jwks_uri
 
+        if self._config.pkce_method == "always":
+            metadata["code_challenge_methods_supported"] = ["S256"]
+        elif self._config.pkce_method == "never":
+            metadata.pop("code_challenge_methods_supported", None)
+
         self._validate_metadata(metadata)
 
         return metadata
@@ -653,7 +669,7 @@ class OidcProvider:
 
         return jwk_set
 
-    async def _exchange_code(self, code: str) -> Token:
+    async def _exchange_code(self, code: str, code_verifier: str) -> Token:
         """Exchange an authorization code for a token.
 
         This calls the ``token_endpoint`` with the authorization code we
@@ -666,6 +682,7 @@ class OidcProvider:
 
         Args:
             code: The authorization code we got from the callback.
+            code_verifier: The PKCE code verifier to send, blank if unused.
 
         Returns:
             A dict containing various tokens.
@@ -696,6 +713,8 @@ class OidcProvider:
             "code": code,
             "redirect_uri": self._callback_url,
         }
+        if code_verifier:
+            args["code_verifier"] = code_verifier
         body = urlencode(args, True)
 
         # Fill the body/headers with credentials
@@ -914,11 +933,14 @@ class OidcProvider:
           - ``scope``: the list of scopes set in ``oidc_config.scopes``
           - ``state``: a random string
           - ``nonce``: a random string
+          - ``code_challenge``: a RFC7636 code challenge (if PKCE is supported)
 
-        In addition generating a redirect URL, we are setting a cookie with
-        a signed macaroon token containing the state, the nonce and the
-        client_redirect_url params. Those are then checked when the client
-        comes back from the provider.
+        In addition to generating a redirect URL, we are setting a cookie with
+        a signed macaroon token containing the state, the nonce, the
+        client_redirect_url, and (optionally) the code_verifier params. The state,
+        nonce, and client_redirect_url are then checked when the client comes back
+        from the provider. The code_verifier is passed back to the server during
+        the token exchange and compared to the code_challenge sent in this request.
 
         Args:
             request: the incoming request from the browser.
@@ -935,10 +957,25 @@ class OidcProvider:
 
         state = generate_token()
         nonce = generate_token()
+        code_verifier = ""
 
         if not client_redirect_url:
             client_redirect_url = b""
 
+        metadata = await self.load_metadata()
+
+        # Automatically enable PKCE if it is supported.
+        extra_grant_values = {}
+        if metadata.get("code_challenge_methods_supported"):
+            code_verifier = generate_token(48)
+
+            # Note that we verified the server supports S256 earlier (in
+            # OidcProvider._validate_metadata).
+            extra_grant_values = {
+                "code_challenge_method": "S256",
+                "code_challenge": create_s256_code_challenge(code_verifier),
+            }
+
         cookie = self._macaroon_generaton.generate_oidc_session_token(
             state=state,
             session_data=OidcSessionData(
@@ -946,6 +983,7 @@ class OidcProvider:
                 nonce=nonce,
                 client_redirect_url=client_redirect_url.decode(),
                 ui_auth_session_id=ui_auth_session_id or "",
+                code_verifier=code_verifier,
             ),
         )
 
@@ -966,7 +1004,6 @@ class OidcProvider:
                 )
             )
 
-        metadata = await self.load_metadata()
         authorization_endpoint = metadata.get("authorization_endpoint")
         return prepare_grant_uri(
             authorization_endpoint,
@@ -976,6 +1013,7 @@ class OidcProvider:
             scope=self._scopes,
             state=state,
             nonce=nonce,
+            **extra_grant_values,
         )
 
     async def handle_oidc_callback(
@@ -1003,7 +1041,9 @@ class OidcProvider:
         # Exchange the code with the provider
         try:
             logger.debug("Exchanging OAuth2 code for a token")
-            token = await self._exchange_code(code)
+            token = await self._exchange_code(
+                code, code_verifier=session_data.code_verifier
+            )
         except OidcError as e:
             logger.warning("Could not exchange OAuth2 code: %s", e)
             self._sso_handler.render_error(request, e.error, e.error_description)
diff --git a/synapse/util/macaroons.py b/synapse/util/macaroons.py
index 5df03d3ddc..644c341e8c 100644
--- a/synapse/util/macaroons.py
+++ b/synapse/util/macaroons.py
@@ -110,6 +110,9 @@ class OidcSessionData:
     ui_auth_session_id: str
     """The session ID of the ongoing UI Auth ("" if this is a login)"""
 
+    code_verifier: str
+    """The random string used in the RFC7636 code challenge ("" if PKCE is not being used)."""
+
 
 class MacaroonGenerator:
     def __init__(self, clock: Clock, location: str, secret_key: bytes):
@@ -187,6 +190,7 @@ class MacaroonGenerator:
         macaroon.add_first_party_caveat(
             f"ui_auth_session_id = {session_data.ui_auth_session_id}"
         )
+        macaroon.add_first_party_caveat(f"code_verifier = {session_data.code_verifier}")
         macaroon.add_first_party_caveat(f"time < {expiry}")
 
         return macaroon.serialize()
@@ -278,6 +282,7 @@ class MacaroonGenerator:
         v.satisfy_general(lambda c: c.startswith("idp_id = "))
         v.satisfy_general(lambda c: c.startswith("client_redirect_url = "))
         v.satisfy_general(lambda c: c.startswith("ui_auth_session_id = "))
+        v.satisfy_general(lambda c: c.startswith("code_verifier = "))
         satisfy_expiry(v, self._clock.time_msec)
 
         v.verify(macaroon, self._secret_key)
@@ -287,11 +292,13 @@ class MacaroonGenerator:
         idp_id = get_value_from_macaroon(macaroon, "idp_id")
         client_redirect_url = get_value_from_macaroon(macaroon, "client_redirect_url")
         ui_auth_session_id = get_value_from_macaroon(macaroon, "ui_auth_session_id")
+        code_verifier = get_value_from_macaroon(macaroon, "code_verifier")
         return OidcSessionData(
             nonce=nonce,
             idp_id=idp_id,
             client_redirect_url=client_redirect_url,
             ui_auth_session_id=ui_auth_session_id,
+            code_verifier=code_verifier,
         )
 
     def _generate_base_macaroon(self, type: MacaroonType) -> pymacaroons.Macaroon:
diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py
index 49a1842b5c..adddbd002f 100644
--- a/tests/handlers/test_oidc.py
+++ b/tests/handlers/test_oidc.py
@@ -396,6 +396,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.assertEqual(params["client_id"], [CLIENT_ID])
         self.assertEqual(len(params["state"]), 1)
         self.assertEqual(len(params["nonce"]), 1)
+        self.assertNotIn("code_challenge", params)
 
         # Check what is in the cookies
         self.assertEqual(len(req.cookies), 2)  # two cookies
@@ -411,12 +412,117 @@ class OidcHandlerTestCase(HomeserverTestCase):
         macaroon = pymacaroons.Macaroon.deserialize(cookie)
         state = get_value_from_macaroon(macaroon, "state")
         nonce = get_value_from_macaroon(macaroon, "nonce")
+        code_verifier = get_value_from_macaroon(macaroon, "code_verifier")
         redirect = get_value_from_macaroon(macaroon, "client_redirect_url")
 
         self.assertEqual(params["state"], [state])
         self.assertEqual(params["nonce"], [nonce])
+        self.assertEqual(code_verifier, "")
         self.assertEqual(redirect, "http://client/redirect")
 
+    @override_config({"oidc_config": DEFAULT_CONFIG})
+    def test_redirect_request_with_code_challenge(self) -> None:
+        """The redirect request has the right arguments & generates a valid session cookie."""
+        req = Mock(spec=["cookies"])
+        req.cookies = []
+
+        with self.metadata_edit({"code_challenge_methods_supported": ["S256"]}):
+            url = urlparse(
+                self.get_success(
+                    self.provider.handle_redirect_request(
+                        req, b"http://client/redirect"
+                    )
+                )
+            )
+
+        # Ensure the code_challenge param is added to the redirect.
+        params = parse_qs(url.query)
+        self.assertEqual(len(params["code_challenge"]), 1)
+
+        # Check what is in the cookies
+        self.assertEqual(len(req.cookies), 2)  # two cookies
+        cookie_header = req.cookies[0]
+
+        # The cookie name and path don't really matter, just that it has to be coherent
+        # between the callback & redirect handlers.
+        parts = [p.strip() for p in cookie_header.split(b";")]
+        self.assertIn(b"Path=/_synapse/client/oidc", parts)
+        name, cookie = parts[0].split(b"=")
+        self.assertEqual(name, b"oidc_session")
+
+        # Ensure the code_verifier is set in the cookie.
+        macaroon = pymacaroons.Macaroon.deserialize(cookie)
+        code_verifier = get_value_from_macaroon(macaroon, "code_verifier")
+        self.assertNotEqual(code_verifier, "")
+
+    @override_config({"oidc_config": {**DEFAULT_CONFIG, "pkce_method": "always"}})
+    def test_redirect_request_with_forced_code_challenge(self) -> None:
+        """The redirect request has the right arguments & generates a valid session cookie."""
+        req = Mock(spec=["cookies"])
+        req.cookies = []
+
+        url = urlparse(
+            self.get_success(
+                self.provider.handle_redirect_request(req, b"http://client/redirect")
+            )
+        )
+
+        # Ensure the code_challenge param is added to the redirect.
+        params = parse_qs(url.query)
+        self.assertEqual(len(params["code_challenge"]), 1)
+
+        # Check what is in the cookies
+        self.assertEqual(len(req.cookies), 2)  # two cookies
+        cookie_header = req.cookies[0]
+
+        # The cookie name and path don't really matter, just that it has to be coherent
+        # between the callback & redirect handlers.
+        parts = [p.strip() for p in cookie_header.split(b";")]
+        self.assertIn(b"Path=/_synapse/client/oidc", parts)
+        name, cookie = parts[0].split(b"=")
+        self.assertEqual(name, b"oidc_session")
+
+        # Ensure the code_verifier is set in the cookie.
+        macaroon = pymacaroons.Macaroon.deserialize(cookie)
+        code_verifier = get_value_from_macaroon(macaroon, "code_verifier")
+        self.assertNotEqual(code_verifier, "")
+
+    @override_config({"oidc_config": {**DEFAULT_CONFIG, "pkce_method": "never"}})
+    def test_redirect_request_with_disabled_code_challenge(self) -> None:
+        """The redirect request has the right arguments & generates a valid session cookie."""
+        req = Mock(spec=["cookies"])
+        req.cookies = []
+
+        # The metadata should state that PKCE is enabled.
+        with self.metadata_edit({"code_challenge_methods_supported": ["S256"]}):
+            url = urlparse(
+                self.get_success(
+                    self.provider.handle_redirect_request(
+                        req, b"http://client/redirect"
+                    )
+                )
+            )
+
+        # Ensure the code_challenge param is added to the redirect.
+        params = parse_qs(url.query)
+        self.assertNotIn("code_challenge", params)
+
+        # Check what is in the cookies
+        self.assertEqual(len(req.cookies), 2)  # two cookies
+        cookie_header = req.cookies[0]
+
+        # The cookie name and path don't really matter, just that it has to be coherent
+        # between the callback & redirect handlers.
+        parts = [p.strip() for p in cookie_header.split(b";")]
+        self.assertIn(b"Path=/_synapse/client/oidc", parts)
+        name, cookie = parts[0].split(b"=")
+        self.assertEqual(name, b"oidc_session")
+
+        # Ensure the code_verifier is blank in the cookie.
+        macaroon = pymacaroons.Macaroon.deserialize(cookie)
+        code_verifier = get_value_from_macaroon(macaroon, "code_verifier")
+        self.assertEqual(code_verifier, "")
+
     @override_config({"oidc_config": DEFAULT_CONFIG})
     def test_callback_error(self) -> None:
         """Errors from the provider returned in the callback are displayed."""
@@ -601,7 +707,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
             payload=token
         )
         code = "code"
-        ret = self.get_success(self.provider._exchange_code(code))
+        ret = self.get_success(self.provider._exchange_code(code, code_verifier=""))
         kwargs = self.fake_server.request.call_args[1]
 
         self.assertEqual(ret, token)
@@ -615,13 +721,34 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.assertEqual(args["client_secret"], [CLIENT_SECRET])
         self.assertEqual(args["redirect_uri"], [CALLBACK_URL])
 
+        # Test providing a code verifier.
+        code_verifier = "code_verifier"
+        ret = self.get_success(
+            self.provider._exchange_code(code, code_verifier=code_verifier)
+        )
+        kwargs = self.fake_server.request.call_args[1]
+
+        self.assertEqual(ret, token)
+        self.assertEqual(kwargs["method"], "POST")
+        self.assertEqual(kwargs["uri"], self.fake_server.token_endpoint)
+
+        args = parse_qs(kwargs["data"].decode("utf-8"))
+        self.assertEqual(args["grant_type"], ["authorization_code"])
+        self.assertEqual(args["code"], [code])
+        self.assertEqual(args["client_id"], [CLIENT_ID])
+        self.assertEqual(args["client_secret"], [CLIENT_SECRET])
+        self.assertEqual(args["redirect_uri"], [CALLBACK_URL])
+        self.assertEqual(args["code_verifier"], [code_verifier])
+
         # Test error handling
         self.fake_server.post_token_handler.return_value = FakeResponse.json(
             code=400, payload={"error": "foo", "error_description": "bar"}
         )
         from synapse.handlers.oidc import OidcError
 
-        exc = self.get_failure(self.provider._exchange_code(code), OidcError)
+        exc = self.get_failure(
+            self.provider._exchange_code(code, code_verifier=""), OidcError
+        )
         self.assertEqual(exc.value.error, "foo")
         self.assertEqual(exc.value.error_description, "bar")
 
@@ -629,7 +756,9 @@ class OidcHandlerTestCase(HomeserverTestCase):
         self.fake_server.post_token_handler.return_value = FakeResponse(
             code=500, body=b"Not JSON"
         )
-        exc = self.get_failure(self.provider._exchange_code(code), OidcError)
+        exc = self.get_failure(
+            self.provider._exchange_code(code, code_verifier=""), OidcError
+        )
         self.assertEqual(exc.value.error, "server_error")
 
         # Internal server error with JSON body
@@ -637,21 +766,27 @@ class OidcHandlerTestCase(HomeserverTestCase):
             code=500, payload={"error": "internal_server_error"}
         )
 
-        exc = self.get_failure(self.provider._exchange_code(code), OidcError)
+        exc = self.get_failure(
+            self.provider._exchange_code(code, code_verifier=""), OidcError
+        )
         self.assertEqual(exc.value.error, "internal_server_error")
 
         # 4xx error without "error" field
         self.fake_server.post_token_handler.return_value = FakeResponse.json(
             code=400, payload={}
         )
-        exc = self.get_failure(self.provider._exchange_code(code), OidcError)
+        exc = self.get_failure(
+            self.provider._exchange_code(code, code_verifier=""), OidcError
+        )
         self.assertEqual(exc.value.error, "server_error")
 
         # 2xx error with "error" field
         self.fake_server.post_token_handler.return_value = FakeResponse.json(
             code=200, payload={"error": "some_error"}
         )
-        exc = self.get_failure(self.provider._exchange_code(code), OidcError)
+        exc = self.get_failure(
+            self.provider._exchange_code(code, code_verifier=""), OidcError
+        )
         self.assertEqual(exc.value.error, "some_error")
 
     @override_config(
@@ -688,7 +823,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
         # timestamps.
         self.reactor.advance(1000)
         start_time = self.reactor.seconds()
-        ret = self.get_success(self.provider._exchange_code(code))
+        ret = self.get_success(self.provider._exchange_code(code, code_verifier=""))
 
         self.assertEqual(ret, token)
 
@@ -739,7 +874,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
             payload=token
         )
         code = "code"
-        ret = self.get_success(self.provider._exchange_code(code))
+        ret = self.get_success(self.provider._exchange_code(code, code_verifier=""))
 
         self.assertEqual(ret, token)
 
@@ -1203,6 +1338,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
                 nonce=nonce,
                 client_redirect_url=client_redirect_url,
                 ui_auth_session_id=ui_auth_session_id,
+                code_verifier="",
             ),
         )
 
diff --git a/tests/util/test_macaroons.py b/tests/util/test_macaroons.py
index f68377a05a..e56ec2c860 100644
--- a/tests/util/test_macaroons.py
+++ b/tests/util/test_macaroons.py
@@ -92,6 +92,7 @@ class MacaroonGeneratorTestCase(TestCase):
             nonce="nonce",
             client_redirect_url="https://example.com/",
             ui_auth_session_id="",
+            code_verifier="",
         )
         token = self.macaroon_generator.generate_oidc_session_token(
             state, session_data, duration_in_ms=2 * 60 * 1000
-- 
cgit 1.5.1


From 5e0888076fea8c70ab84114e1c261dd46330c1d6 Mon Sep 17 00:00:00 2001
From: Jeyachandran Rathnam <jai.rathnem@gmail.com>
Date: Mon, 9 Jan 2023 06:12:03 -0500
Subject: Disable sending confirmation email when 3pid is disabled #14682
 (#14725)

* Fixes #12277 :Disable sending confirmation email when 3pid is disabled

* Fix test_add_email_if_disabled test case to reflect changes to enable_3pid_changes flag

* Add changelog file

* Rename newsfragment.

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/14725.misc            |  1 +
 synapse/rest/client/account.py    |  5 +++++
 tests/rest/client/test_account.py | 30 +++++-------------------------
 3 files changed, 11 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/14725.misc

(limited to 'synapse')

diff --git a/changelog.d/14725.misc b/changelog.d/14725.misc
new file mode 100644
index 0000000000..a86c4f8c05
--- /dev/null
+++ b/changelog.d/14725.misc
@@ -0,0 +1 @@
+Disable sending confirmation email when 3pid is disabled.
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index b4b92f0c99..4373c73662 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -338,6 +338,11 @@ class EmailThreepidRequestTokenRestServlet(RestServlet):
             )
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        if not self.hs.config.registration.enable_3pid_changes:
+            raise SynapseError(
+                400, "3PID changes are disabled on this server", Codes.FORBIDDEN
+            )
+
         if not self.config.email.can_verify_email:
             logger.warning(
                 "Adding emails have been disabled due to lack of an email config"
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index c1a7fb2f8a..88f255c9ee 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -690,41 +690,21 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
         self.hs.config.registration.enable_3pid_changes = False
 
         client_secret = "foobar"
-        session_id = self._request_token(self.email, client_secret)
-
-        self.assertEqual(len(self.email_attempts), 1)
-        link = self._get_link_from_email()
-
-        self._validate_token(link)
-
         channel = self.make_request(
             "POST",
-            b"/_matrix/client/unstable/account/3pid/add",
+            b"/_matrix/client/unstable/account/3pid/email/requestToken",
             {
                 "client_secret": client_secret,
-                "sid": session_id,
-                "auth": {
-                    "type": "m.login.password",
-                    "user": self.user_id,
-                    "password": "test",
-                },
+                "email": "test@example.com",
+                "send_attempt": 1,
             },
-            access_token=self.user_id_tok,
         )
+
         self.assertEqual(
             HTTPStatus.BAD_REQUEST, channel.code, msg=channel.result["body"]
         )
-        self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
 
-        # Get user
-        channel = self.make_request(
-            "GET",
-            self.url_3pid,
-            access_token=self.user_id_tok,
-        )
-
-        self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.result["body"])
-        self.assertFalse(channel.json_body["threepids"])
+        self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
 
     def test_delete_email(self) -> None:
         """Test deleting an email from profile"""
-- 
cgit 1.5.1


From 7e582a25f8f350df29d7d83ca902bdb522d1bbaf Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 9 Jan 2023 08:43:50 -0500
Subject: Improve /sync performance of when passing filters with empty arrays.
 (#14786)

This has two related changes:

* It enables fast-path processing for an empty filter (`[]`) which was
  previously only used for wildcard not-filters (`["*"]`).
* It special cases a `/sync` filter with no-rooms to skip all room
  processing, previously we would partially skip processing, but would
  generally still calculate intermediate values for each room which were
  then unused.

Future changes might consider further optimizations:

* Skip calculating per-room account data when all rooms are filtered (currently
  this is thrown away).
* Make similar improvements to other endpoints which support filters.
---
 changelog.d/14786.feature  |  1 +
 synapse/api/filtering.py   | 13 ++++++++-----
 synapse/handlers/search.py |  2 +-
 synapse/handlers/sync.py   | 14 +++++++++++---
 4 files changed, 21 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14786.feature

(limited to 'synapse')

diff --git a/changelog.d/14786.feature b/changelog.d/14786.feature
new file mode 100644
index 0000000000..008d61ab03
--- /dev/null
+++ b/changelog.d/14786.feature
@@ -0,0 +1 @@
+Improve performance of `/sync` when filtering all rooms, message types, or senders.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index a9888381b4..2b5af264b4 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -283,6 +283,9 @@ class FilterCollection:
             await self._room_filter.filter(events)
         )
 
+    def blocks_all_rooms(self) -> bool:
+        return self._room_filter.filters_all_rooms()
+
     def blocks_all_presence(self) -> bool:
         return (
             self._presence_filter.filters_all_types()
@@ -351,13 +354,13 @@ class Filter:
             self.not_rel_types = filter_json.get("org.matrix.msc3874.not_rel_types", [])
 
     def filters_all_types(self) -> bool:
-        return "*" in self.not_types
+        return self.types == [] or "*" in self.not_types
 
     def filters_all_senders(self) -> bool:
-        return "*" in self.not_senders
+        return self.senders == [] or "*" in self.not_senders
 
     def filters_all_rooms(self) -> bool:
-        return "*" in self.not_rooms
+        return self.rooms == [] or "*" in self.not_rooms
 
     def _check(self, event: FilterEvent) -> bool:
         """Checks whether the filter matches the given event.
@@ -450,8 +453,8 @@ class Filter:
             if any(map(match_func, disallowed_values)):
                 return False
 
-            # Other the event does not match at least one of the allowed values,
-            # reject it.
+            # Otherwise if the event does not match at least one of the allowed
+            # values, reject it.
             allowed_values = getattr(self, name)
             if allowed_values is not None:
                 if not any(map(match_func, allowed_values)):
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 33115ce488..40f4635c4e 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -275,7 +275,7 @@ class SearchHandler:
         )
         room_ids = {r.room_id for r in rooms}
 
-        # If doing a subset of all rooms seearch, check if any of the rooms
+        # If doing a subset of all rooms search, check if any of the rooms
         # are from an upgraded room, and search their contents as well
         if search_filter.rooms:
             historical_room_ids: List[str] = []
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4fa480262b..6942e06c77 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1403,11 +1403,14 @@ class SyncHandler:
 
         logger.debug("Fetching room data")
 
-        res = await self._generate_sync_entry_for_rooms(
+        (
+            newly_joined_rooms,
+            newly_joined_or_invited_or_knocked_users,
+            newly_left_rooms,
+            newly_left_users,
+        ) = await self._generate_sync_entry_for_rooms(
             sync_result_builder, account_data_by_room
         )
-        newly_joined_rooms, newly_joined_or_invited_or_knocked_users, _, _ = res
-        _, _, newly_left_rooms, newly_left_users = res
 
         block_all_presence_data = (
             since_token is None and sync_config.filter_collection.blocks_all_presence()
@@ -1789,6 +1792,11 @@ class SyncHandler:
             - newly_left_rooms
             - newly_left_users
         """
+
+        # If the request doesn't care about rooms then nothing to do!
+        if sync_result_builder.sync_config.filter_collection.blocks_all_rooms():
+            return set(), set(), set(), set()
+
         since_token = sync_result_builder.since_token
 
         # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
-- 
cgit 1.5.1


From babeeb4e7a6f5b5c643b837bf724d674805546f6 Mon Sep 17 00:00:00 2001
From: Jeyachandran Rathnam <jai.rathnem@gmail.com>
Date: Mon, 9 Jan 2023 09:22:02 -0500
Subject: Unescape HTML entities in oEmbed titles. (#14781)

It doesn't seem valid that HTML entities should appear in
the title field of oEmbed responses, but a popular WordPress
plug-in seems to do it.

There should not be harm in unescaping these.
---
 changelog.d/14781.misc             |  1 +
 synapse/rest/media/v1/oembed.py    | 15 +++++++++------
 tests/rest/media/v1/test_oembed.py | 10 ++++++++++
 3 files changed, 20 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14781.misc

(limited to 'synapse')

diff --git a/changelog.d/14781.misc b/changelog.d/14781.misc
new file mode 100644
index 0000000000..04f565b410
--- /dev/null
+++ b/changelog.d/14781.misc
@@ -0,0 +1 @@
+Unescape HTML entities in URL preview titles making use of oEmbed responses.
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
index 827afd868d..a3738a6250 100644
--- a/synapse/rest/media/v1/oembed.py
+++ b/synapse/rest/media/v1/oembed.py
@@ -11,6 +11,7 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+import html
 import logging
 import urllib.parse
 from typing import TYPE_CHECKING, List, Optional
@@ -161,7 +162,9 @@ class OEmbedProvider:
 
         title = oembed.get("title")
         if title and isinstance(title, str):
-            open_graph_response["og:title"] = title
+            # A common WordPress plug-in seems to incorrectly escape entities
+            # in the oEmbed response.
+            open_graph_response["og:title"] = html.unescape(title)
 
         author_name = oembed.get("author_name")
         if not isinstance(author_name, str):
@@ -180,9 +183,9 @@ class OEmbedProvider:
         # Process each type separately.
         oembed_type = oembed.get("type")
         if oembed_type == "rich":
-            html = oembed.get("html")
-            if isinstance(html, str):
-                calc_description_and_urls(open_graph_response, html)
+            html_str = oembed.get("html")
+            if isinstance(html_str, str):
+                calc_description_and_urls(open_graph_response, html_str)
 
         elif oembed_type == "photo":
             # If this is a photo, use the full image, not the thumbnail.
@@ -192,8 +195,8 @@ class OEmbedProvider:
 
         elif oembed_type == "video":
             open_graph_response["og:type"] = "video.other"
-            html = oembed.get("html")
-            if html and isinstance(html, str):
+            html_str = oembed.get("html")
+            if html_str and isinstance(html_str, str):
                 calc_description_and_urls(open_graph_response, oembed["html"])
             for size in ("width", "height"):
                 val = oembed.get(size)
diff --git a/tests/rest/media/v1/test_oembed.py b/tests/rest/media/v1/test_oembed.py
index 319ae8b1cc..3f7f1dbab9 100644
--- a/tests/rest/media/v1/test_oembed.py
+++ b/tests/rest/media/v1/test_oembed.py
@@ -150,3 +150,13 @@ class OEmbedTests(HomeserverTestCase):
         result = self.parse_response({"type": "link"})
         self.assertIn("og:type", result.open_graph_result)
         self.assertEqual(result.open_graph_result["og:type"], "website")
+
+    def test_title_html_entities(self) -> None:
+        """Test HTML entities in title"""
+        result = self.parse_response(
+            {"title": "Why JSON isn&#8217;t a Good Configuration Language"}
+        )
+        self.assertEqual(
+            result.open_graph_result["og:title"],
+            "Why JSON isn’t a Good Configuration Language",
+        )
-- 
cgit 1.5.1


From 58d2adc3da6a988452dbb9c6c4202a5ea19c4ca9 Mon Sep 17 00:00:00 2001
From: Jeyachandran Rathnam <jai.rathnem@gmail.com>
Date: Mon, 9 Jan 2023 12:17:24 -0500
Subject: Remove undocumented device from pushrules (#14727)

* Remove undocumented device from pushrules

* Add changelog

* Update changelog.d/14727.misc

* Rename 14727.misc to 14727.bugfix

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/14727.bugfix     | 1 +
 synapse/push/clientformat.py | 5 +----
 2 files changed, 2 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14727.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14727.bugfix b/changelog.d/14727.bugfix
new file mode 100644
index 0000000000..25079496e4
--- /dev/null
+++ b/changelog.d/14727.bugfix
@@ -0,0 +1 @@
+Remove the unspecced `device` field from `/pushrules` responses.
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index 622a1e35c5..bb76c169c6 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -26,10 +26,7 @@ def format_push_rules_for_user(
     """Converts a list of rawrules and a enabled map into nested dictionaries
     to match the Matrix client-server format for push rules"""
 
-    rules: Dict[str, Dict[str, List[Dict[str, Any]]]] = {
-        "global": {},
-        "device": {},
-    }
+    rules: Dict[str, Dict[str, List[Dict[str, Any]]]] = {"global": {}}
 
     rules["global"] = _add_empty_priority_class_arrays(rules["global"])
 
-- 
cgit 1.5.1


From ba4ea7d13ffae53644b206222af95a5171faa27c Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 10 Jan 2023 11:17:59 +0000
Subject: Batch up replication requests to request the resyncing of remote
 users's devices. (#14716)

---
 changelog.d/14716.misc                    |   1 +
 synapse/handlers/device.py                | 124 +++++++++++++++++++++++-------
 synapse/handlers/devicemessage.py         |   2 +-
 synapse/handlers/e2e_keys.py              |  93 +++++++++++++---------
 synapse/handlers/federation_event.py      |   2 +-
 synapse/replication/http/devices.py       |  74 +++++++++++++++++-
 synapse/storage/databases/main/devices.py |  30 ++++++--
 synapse/types/__init__.py                 |   4 +
 synapse/util/async_helpers.py             |  55 ++++++++++++-
 9 files changed, 306 insertions(+), 79 deletions(-)
 create mode 100644 changelog.d/14716.misc

(limited to 'synapse')

diff --git a/changelog.d/14716.misc b/changelog.d/14716.misc
new file mode 100644
index 0000000000..ef9522e01d
--- /dev/null
+++ b/changelog.d/14716.misc
@@ -0,0 +1 @@
+Batch up replication requests to request the resyncing of remote users's devices.
\ No newline at end of file
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index d4750a32e6..89864e1119 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from http import HTTPStatus
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -33,6 +34,7 @@ from synapse.api.errors import (
     Codes,
     FederationDeniedError,
     HttpResponseException,
+    InvalidAPICallError,
     RequestSendFailed,
     SynapseError,
 )
@@ -45,6 +47,7 @@ from synapse.types import (
     JsonDict,
     StreamKeyType,
     StreamToken,
+    UserID,
     get_domain_from_id,
     get_verify_key_from_cross_signing_key,
 )
@@ -893,12 +896,47 @@ class DeviceListWorkerUpdater:
 
     def __init__(self, hs: "HomeServer"):
         from synapse.replication.http.devices import (
+            ReplicationMultiUserDevicesResyncRestServlet,
             ReplicationUserDevicesResyncRestServlet,
         )
 
         self._user_device_resync_client = (
             ReplicationUserDevicesResyncRestServlet.make_client(hs)
         )
+        self._multi_user_device_resync_client = (
+            ReplicationMultiUserDevicesResyncRestServlet.make_client(hs)
+        )
+
+    async def multi_user_device_resync(
+        self, user_ids: List[str], mark_failed_as_stale: bool = True
+    ) -> Dict[str, Optional[JsonDict]]:
+        """
+        Like `user_device_resync` but operates on multiple users **from the same origin**
+        at once.
+
+        Returns:
+            Dict from User ID to the same Dict as `user_device_resync`.
+        """
+        # mark_failed_as_stale is not sent. Ensure this doesn't break expectations.
+        assert mark_failed_as_stale
+
+        if not user_ids:
+            # Shortcut empty requests
+            return {}
+
+        try:
+            return await self._multi_user_device_resync_client(user_ids=user_ids)
+        except SynapseError as err:
+            if not (
+                err.code == HTTPStatus.NOT_FOUND and err.errcode == Codes.UNRECOGNIZED
+            ):
+                raise
+
+            # Fall back to single requests
+            result: Dict[str, Optional[JsonDict]] = {}
+            for user_id in user_ids:
+                result[user_id] = await self._user_device_resync_client(user_id=user_id)
+            return result
 
     async def user_device_resync(
         self, user_id: str, mark_failed_as_stale: bool = True
@@ -913,8 +951,10 @@ class DeviceListWorkerUpdater:
             A dict with device info as under the "devices" in the result of this
             request:
             https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
+            None when we weren't able to fetch the device info for some reason,
+            e.g. due to a connection problem.
         """
-        return await self._user_device_resync_client(user_id=user_id)
+        return (await self.multi_user_device_resync([user_id]))[user_id]
 
 
 class DeviceListUpdater(DeviceListWorkerUpdater):
@@ -1160,19 +1200,66 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
             # Allow future calls to retry resyncinc out of sync device lists.
             self._resync_retry_in_progress = False
 
+    async def multi_user_device_resync(
+        self, user_ids: List[str], mark_failed_as_stale: bool = True
+    ) -> Dict[str, Optional[JsonDict]]:
+        """
+        Like `user_device_resync` but operates on multiple users **from the same origin**
+        at once.
+
+        Returns:
+            Dict from User ID to the same Dict as `user_device_resync`.
+        """
+        if not user_ids:
+            return {}
+
+        origins = {UserID.from_string(user_id).domain for user_id in user_ids}
+
+        if len(origins) != 1:
+            raise InvalidAPICallError(f"Only one origin permitted, got {origins!r}")
+
+        result = {}
+        failed = set()
+        # TODO(Perf): Actually batch these up
+        for user_id in user_ids:
+            user_result, user_failed = await self._user_device_resync_returning_failed(
+                user_id
+            )
+            result[user_id] = user_result
+            if user_failed:
+                failed.add(user_id)
+
+        if mark_failed_as_stale:
+            await self.store.mark_remote_users_device_caches_as_stale(failed)
+
+        return result
+
     async def user_device_resync(
         self, user_id: str, mark_failed_as_stale: bool = True
     ) -> Optional[JsonDict]:
+        result, failed = await self._user_device_resync_returning_failed(user_id)
+
+        if failed and mark_failed_as_stale:
+            # Mark the remote user's device list as stale so we know we need to retry
+            # it later.
+            await self.store.mark_remote_users_device_caches_as_stale((user_id,))
+
+        return result
+
+    async def _user_device_resync_returning_failed(
+        self, user_id: str
+    ) -> Tuple[Optional[JsonDict], bool]:
         """Fetches all devices for a user and updates the device cache with them.
 
         Args:
             user_id: The user's id whose device_list will be updated.
-            mark_failed_as_stale: Whether to mark the user's device list as stale
-                if the attempt to resync failed.
         Returns:
-            A dict with device info as under the "devices" in the result of this
-            request:
-            https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
+            - A dict with device info as under the "devices" in the result of this
+              request:
+              https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
+              None when we weren't able to fetch the device info for some reason,
+              e.g. due to a connection problem.
+            - True iff the resync failed and the device list should be marked as stale.
         """
         logger.debug("Attempting to resync the device list for %s", user_id)
         log_kv({"message": "Doing resync to update device list."})
@@ -1181,12 +1268,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
         try:
             result = await self.federation.query_user_devices(origin, user_id)
         except NotRetryingDestination:
-            if mark_failed_as_stale:
-                # Mark the remote user's device list as stale so we know we need to retry
-                # it later.
-                await self.store.mark_remote_user_device_cache_as_stale(user_id)
-
-            return None
+            return None, True
         except (RequestSendFailed, HttpResponseException) as e:
             logger.warning(
                 "Failed to handle device list update for %s: %s",
@@ -1194,23 +1276,18 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
                 e,
             )
 
-            if mark_failed_as_stale:
-                # Mark the remote user's device list as stale so we know we need to retry
-                # it later.
-                await self.store.mark_remote_user_device_cache_as_stale(user_id)
-
             # We abort on exceptions rather than accepting the update
             # as otherwise synapse will 'forget' that its device list
             # is out of date. If we bail then we will retry the resync
             # next time we get a device list update for this user_id.
             # This makes it more likely that the device lists will
             # eventually become consistent.
-            return None
+            return None, True
         except FederationDeniedError as e:
             set_tag("error", True)
             log_kv({"reason": "FederationDeniedError"})
             logger.info(e)
-            return None
+            return None, False
         except Exception as e:
             set_tag("error", True)
             log_kv(
@@ -1218,12 +1295,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
             )
             logger.exception("Failed to handle device list update for %s", user_id)
 
-            if mark_failed_as_stale:
-                # Mark the remote user's device list as stale so we know we need to retry
-                # it later.
-                await self.store.mark_remote_user_device_cache_as_stale(user_id)
-
-            return None
+            return None, True
         log_kv({"result": result})
         stream_id = result["stream_id"]
         devices = result["devices"]
@@ -1305,7 +1377,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
         # point.
         self._seen_updates[user_id] = {stream_id}
 
-        return result
+        return result, False
 
     async def process_cross_signing_key_update(
         self,
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 75e89850f5..00c403db49 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -195,7 +195,7 @@ class DeviceMessageHandler:
                 sender_user_id,
                 unknown_devices,
             )
-            await self.store.mark_remote_user_device_cache_as_stale(sender_user_id)
+            await self.store.mark_remote_users_device_caches_as_stale((sender_user_id,))
 
             # Immediately attempt a resync in the background
             run_in_background(self._user_device_resync, user_id=sender_user_id)
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 5fe102e2f2..d2188ca08f 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -36,8 +36,8 @@ from synapse.types import (
     get_domain_from_id,
     get_verify_key_from_cross_signing_key,
 )
-from synapse.util import json_decoder, unwrapFirstError
-from synapse.util.async_helpers import Linearizer, delay_cancellation
+from synapse.util import json_decoder
+from synapse.util.async_helpers import Linearizer, concurrently_execute
 from synapse.util.cancellation import cancellable
 from synapse.util.retryutils import NotRetryingDestination
 
@@ -238,24 +238,28 @@ class E2eKeysHandler:
             # Now fetch any devices that we don't have in our cache
             # TODO It might make sense to propagate cancellations into the
             #      deferreds which are querying remote homeservers.
-            await make_deferred_yieldable(
-                delay_cancellation(
-                    defer.gatherResults(
-                        [
-                            run_in_background(
-                                self._query_devices_for_destination,
-                                results,
-                                cross_signing_keys,
-                                failures,
-                                destination,
-                                queries,
-                                timeout,
-                            )
-                            for destination, queries in remote_queries_not_in_cache.items()
-                        ],
-                        consumeErrors=True,
-                    ).addErrback(unwrapFirstError)
+            logger.debug(
+                "%d destinations to query devices for", len(remote_queries_not_in_cache)
+            )
+
+            async def _query(
+                destination_queries: Tuple[str, Dict[str, Iterable[str]]]
+            ) -> None:
+                destination, queries = destination_queries
+                return await self._query_devices_for_destination(
+                    results,
+                    cross_signing_keys,
+                    failures,
+                    destination,
+                    queries,
+                    timeout,
                 )
+
+            await concurrently_execute(
+                _query,
+                remote_queries_not_in_cache.items(),
+                10,
+                delay_cancellation=True,
             )
 
             ret = {"device_keys": results, "failures": failures}
@@ -300,28 +304,41 @@ class E2eKeysHandler:
         # queries. We use the more efficient batched query_client_keys for all
         # remaining users
         user_ids_updated = []
-        for (user_id, device_list) in destination_query.items():
-            if user_id in user_ids_updated:
-                continue
 
-            if device_list:
-                continue
+        # Perform a user device resync for each user only once and only as long as:
+        # - they have an empty device_list
+        # - they are in some rooms that this server can see
+        users_to_resync_devices = {
+            user_id
+            for (user_id, device_list) in destination_query.items()
+            if (not device_list) and (await self.store.get_rooms_for_user(user_id))
+        }
 
-            room_ids = await self.store.get_rooms_for_user(user_id)
-            if not room_ids:
-                continue
+        logger.debug(
+            "%d users to resync devices for from destination %s",
+            len(users_to_resync_devices),
+            destination,
+        )
 
-            # We've decided we're sharing a room with this user and should
-            # probably be tracking their device lists. However, we haven't
-            # done an initial sync on the device list so we do it now.
-            try:
-                resync_results = (
-                    await self.device_handler.device_list_updater.user_device_resync(
-                        user_id
-                    )
+        try:
+            user_resync_results = (
+                await self.device_handler.device_list_updater.multi_user_device_resync(
+                    list(users_to_resync_devices)
                 )
+            )
+            for user_id in users_to_resync_devices:
+                resync_results = user_resync_results[user_id]
+
                 if resync_results is None:
-                    raise ValueError("Device resync failed")
+                    # TODO: It's weird that we'll store a failure against a
+                    #       destination, yet continue processing users from that
+                    #       destination.
+                    #       We might want to consider changing this, but for now
+                    #       I'm leaving it as I found it.
+                    failures[destination] = _exception_to_failure(
+                        ValueError(f"Device resync failed for {user_id!r}")
+                    )
+                    continue
 
                 # Add the device keys to the results.
                 user_devices = resync_results["devices"]
@@ -339,8 +356,8 @@ class E2eKeysHandler:
 
                 if self_signing_key:
                     cross_signing_keys["self_signing_keys"][user_id] = self_signing_key
-            except Exception as e:
-                failures[destination] = _exception_to_failure(e)
+        except Exception as e:
+            failures[destination] = _exception_to_failure(e)
 
         if len(destination_query) == len(user_ids_updated):
             # We've updated all the users in the query and we do not need to
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 31df7f55cc..6df000faaf 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1423,7 +1423,7 @@ class FederationEventHandler:
         """
 
         try:
-            await self._store.mark_remote_user_device_cache_as_stale(sender)
+            await self._store.mark_remote_users_device_caches_as_stale((sender,))
 
             # Immediately attempt a resync in the background
             if self._config.worker.worker_app:
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index 7c4941c3d3..ea5c08e6cf 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Optional, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
 from synapse.http.servlet import parse_json_object_from_request
+from synapse.logging.opentracing import active_span
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
 
@@ -84,6 +85,76 @@ class ReplicationUserDevicesResyncRestServlet(ReplicationEndpoint):
         return 200, user_devices
 
 
+class ReplicationMultiUserDevicesResyncRestServlet(ReplicationEndpoint):
+    """Ask master to resync the device list for multiple users from the same
+    remote server by contacting their server.
+
+    This must happen on master so that the results can be correctly cached in
+    the database and streamed to workers.
+
+    Request format:
+
+        POST /_synapse/replication/multi_user_device_resync
+
+        {
+            "user_ids": ["@alice:example.org", "@bob:example.org", ...]
+        }
+
+    Response is roughly equivalent to ` /_matrix/federation/v1/user/devices/:user_id`
+    response, but there is a map from user ID to response, e.g.:
+
+        {
+            "@alice:example.org": {
+                "devices": [
+                    {
+                        "device_id": "JLAFKJWSCS",
+                        "keys": { ... },
+                        "device_display_name": "Alice's Mobile Phone"
+                    }
+                ]
+            },
+            ...
+        }
+    """
+
+    NAME = "multi_user_device_resync"
+    PATH_ARGS = ()
+    CACHE = False
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        from synapse.handlers.device import DeviceHandler
+
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.device_list_updater = handler.device_list_updater
+
+        self.store = hs.get_datastores().main
+        self.clock = hs.get_clock()
+
+    @staticmethod
+    async def _serialize_payload(user_ids: List[str]) -> JsonDict:  # type: ignore[override]
+        return {"user_ids": user_ids}
+
+    async def _handle_request(  # type: ignore[override]
+        self, request: Request
+    ) -> Tuple[int, Dict[str, Optional[JsonDict]]]:
+        content = parse_json_object_from_request(request)
+        user_ids: List[str] = content["user_ids"]
+
+        logger.info("Resync for %r", user_ids)
+        span = active_span()
+        if span:
+            span.set_tag("user_ids", f"{user_ids!r}")
+
+        multi_user_devices = await self.device_list_updater.multi_user_device_resync(
+            user_ids
+        )
+
+        return 200, multi_user_devices
+
+
 class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
     """Ask master to upload keys for the user and send them out over federation to
     update other servers.
@@ -151,4 +222,5 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ReplicationUserDevicesResyncRestServlet(hs).register(http_server)
+    ReplicationMultiUserDevicesResyncRestServlet(hs).register(http_server)
     ReplicationUploadKeysForUserRestServlet(hs).register(http_server)
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index db877e3f13..b067664473 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -54,7 +54,7 @@ from synapse.storage.util.id_generators import (
     AbstractStreamIdTracker,
     StreamIdGenerator,
 )
-from synapse.types import JsonDict, get_verify_key_from_cross_signing_key
+from synapse.types import JsonDict, StrCollection, get_verify_key_from_cross_signing_key
 from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import LruCache
@@ -1069,16 +1069,30 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
 
         return {row["user_id"] for row in rows}
 
-    async def mark_remote_user_device_cache_as_stale(self, user_id: str) -> None:
+    async def mark_remote_users_device_caches_as_stale(
+        self, user_ids: StrCollection
+    ) -> None:
         """Records that the server has reason to believe the cache of the devices
         for the remote users is out of date.
         """
-        await self.db_pool.simple_upsert(
-            table="device_lists_remote_resync",
-            keyvalues={"user_id": user_id},
-            values={},
-            insertion_values={"added_ts": self._clock.time_msec()},
-            desc="mark_remote_user_device_cache_as_stale",
+
+        def _mark_remote_users_device_caches_as_stale_txn(
+            txn: LoggingTransaction,
+        ) -> None:
+            # TODO add insertion_values support to simple_upsert_many and use
+            #      that!
+            for user_id in user_ids:
+                self.db_pool.simple_upsert_txn(
+                    txn,
+                    table="device_lists_remote_resync",
+                    keyvalues={"user_id": user_id},
+                    values={},
+                    insertion_values={"added_ts": self._clock.time_msec()},
+                )
+
+        await self.db_pool.runInteraction(
+            "mark_remote_users_device_caches_as_stale",
+            _mark_remote_users_device_caches_as_stale_txn,
         )
 
     async def mark_remote_user_device_cache_as_valid(self, user_id: str) -> None:
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index f2d436ddc3..0c725eb967 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -77,6 +77,10 @@ JsonMapping = Mapping[str, Any]
 # A JSON-serialisable object.
 JsonSerializable = object
 
+# Collection[str] that does not include str itself; str being a Sequence[str]
+# is very misleading and results in bugs.
+StrCollection = Union[Tuple[str, ...], List[str], Set[str]]
+
 
 # Note that this seems to require inheriting *directly* from Interface in order
 # for mypy-zope to realize it is an interface.
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index d24c4f68c4..01e3cd46f6 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -205,7 +205,10 @@ T = TypeVar("T")
 
 
 async def concurrently_execute(
-    func: Callable[[T], Any], args: Iterable[T], limit: int
+    func: Callable[[T], Any],
+    args: Iterable[T],
+    limit: int,
+    delay_cancellation: bool = False,
 ) -> None:
     """Executes the function with each argument concurrently while limiting
     the number of concurrent executions.
@@ -215,6 +218,8 @@ async def concurrently_execute(
         args: List of arguments to pass to func, each invocation of func
             gets a single argument.
         limit: Maximum number of conccurent executions.
+        delay_cancellation: Whether to delay cancellation until after the invocations
+            have finished.
 
     Returns:
         None, when all function invocations have finished. The return values
@@ -233,9 +238,16 @@ async def concurrently_execute(
     # We use `itertools.islice` to handle the case where the number of args is
     # less than the limit, avoiding needlessly spawning unnecessary background
     # tasks.
-    await yieldable_gather_results(
-        _concurrently_execute_inner, (value for value in itertools.islice(it, limit))
-    )
+    if delay_cancellation:
+        await yieldable_gather_results_delaying_cancellation(
+            _concurrently_execute_inner,
+            (value for value in itertools.islice(it, limit)),
+        )
+    else:
+        await yieldable_gather_results(
+            _concurrently_execute_inner,
+            (value for value in itertools.islice(it, limit)),
+        )
 
 
 P = ParamSpec("P")
@@ -292,6 +304,41 @@ async def yieldable_gather_results(
         raise dfe.subFailure.value from None
 
 
+async def yieldable_gather_results_delaying_cancellation(
+    func: Callable[Concatenate[T, P], Awaitable[R]],
+    iter: Iterable[T],
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> List[R]:
+    """Executes the function with each argument concurrently.
+    Cancellation is delayed until after all the results have been gathered.
+
+    See `yieldable_gather_results`.
+
+    Args:
+        func: Function to execute that returns a Deferred
+        iter: An iterable that yields items that get passed as the first
+            argument to the function
+        *args: Arguments to be passed to each call to func
+        **kwargs: Keyword arguments to be passed to each call to func
+
+    Returns
+        A list containing the results of the function
+    """
+    try:
+        return await make_deferred_yieldable(
+            delay_cancellation(
+                defer.gatherResults(
+                    [run_in_background(func, item, *args, **kwargs) for item in iter],  # type: ignore[arg-type]
+                    consumeErrors=True,
+                )
+            )
+        )
+    except defer.FirstError as dfe:
+        assert isinstance(dfe.subFailure.value, BaseException)
+        raise dfe.subFailure.value from None
+
+
 T1 = TypeVar("T1")
 T2 = TypeVar("T2")
 T3 = TypeVar("T3")
-- 
cgit 1.5.1


From 06ab64f201dffcb93b826546e20be53cc712c8b8 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 10 Jan 2023 16:31:28 +0000
Subject: Implement MSC3925: changes to bundling of edits (#14811)

Two parts to this:

 * Bundle the whole of the replacement with any edited events. This is backwards-compatible so I haven't put it behind a flag.
 * Optionally, inhibit server-side replacement of edited events. This has scope to break things, so it is currently disabled by default.
---
 changelog.d/14811.feature           |   1 +
 synapse/config/experimental.py      |   3 +
 synapse/events/utils.py             |  31 ++++--
 synapse/server.py                   |   2 +-
 tests/rest/client/test_relations.py | 185 +++++++++++++++++++++++++-----------
 5 files changed, 159 insertions(+), 63 deletions(-)
 create mode 100644 changelog.d/14811.feature

(limited to 'synapse')

diff --git a/changelog.d/14811.feature b/changelog.d/14811.feature
new file mode 100644
index 0000000000..87542835c3
--- /dev/null
+++ b/changelog.d/14811.feature
@@ -0,0 +1 @@
+Per [MSC3925](https://github.com/matrix-org/matrix-spec-proposals/pull/3925), bundle the whole of the replacement with any edited events, and optionally inhibit server-side replacement.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 0f3870bfe1..a8b2db372d 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -139,3 +139,6 @@ class ExperimentalConfig(Config):
 
         # MSC3391: Removing account data.
         self.msc3391_enabled = experimental.get("msc3391_enabled", False)
+
+        # MSC3925: do not replace events with their edits
+        self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False)
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index 13fa93afb8..ae57a4df5e 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -403,6 +403,14 @@ class EventClientSerializer:
     clients.
     """
 
+    def __init__(self, inhibit_replacement_via_edits: bool = False):
+        """
+        Args:
+            inhibit_replacement_via_edits: If this is set to True, then events are
+               never replaced by their edits.
+        """
+        self._inhibit_replacement_via_edits = inhibit_replacement_via_edits
+
     def serialize_event(
         self,
         event: Union[JsonDict, EventBase],
@@ -422,6 +430,8 @@ class EventClientSerializer:
                into the event.
             apply_edits: Whether the content of the event should be modified to reflect
                any replacement in `bundle_aggregations[<event_id>].replace`.
+               See also the `inhibit_replacement_via_edits` constructor arg: if that is
+               set to True, then this argument is ignored.
         Returns:
             The serialized event
         """
@@ -495,7 +505,8 @@ class EventClientSerializer:
                 again for additional events in a recursive manner.
             serialized_event: The serialized event which may be modified.
             apply_edits: Whether the content of the event should be modified to reflect
-               any replacement in `aggregations.replace`.
+               any replacement in `aggregations.replace` (subject to the
+               `inhibit_replacement_via_edits` constructor arg).
         """
 
         # We have already checked that aggregations exist for this event.
@@ -518,15 +529,21 @@ class EventClientSerializer:
         if event_aggregations.replace:
             # If there is an edit, optionally apply it to the event.
             edit = event_aggregations.replace
-            if apply_edits:
+            if apply_edits and not self._inhibit_replacement_via_edits:
                 self._apply_edit(event, serialized_event, edit)
 
             # Include information about it in the relations dict.
-            serialized_aggregations[RelationTypes.REPLACE] = {
-                "event_id": edit.event_id,
-                "origin_server_ts": edit.origin_server_ts,
-                "sender": edit.sender,
-            }
+            #
+            # Matrix spec v1.5 (https://spec.matrix.org/v1.5/client-server-api/#server-side-aggregation-of-mreplace-relationships)
+            # said that we should only include the `event_id`, `origin_server_ts` and
+            # `sender` of the edit; however MSC3925 proposes extending it to the whole
+            # of the edit, which is what we do here.
+            serialized_aggregations[RelationTypes.REPLACE] = self.serialize_event(
+                edit,
+                time_now,
+                config=config,
+                apply_edits=False,
+            )
 
         # Include any threaded replies to this event.
         if event_aggregations.thread:
diff --git a/synapse/server.py b/synapse/server.py
index 5baae2325e..f4ab94c4f3 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -743,7 +743,7 @@ class HomeServer(metaclass=abc.ABCMeta):
 
     @cache_in_self
     def get_event_client_serializer(self) -> EventClientSerializer:
-        return EventClientSerializer()
+        return EventClientSerializer(self.config.experimental.msc3925_inhibit_edit)
 
     @cache_in_self
     def get_password_policy_handler(self) -> PasswordPolicyHandler:
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index b86f341ff5..c8a6911d5e 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -30,6 +30,7 @@ from tests import unittest
 from tests.server import FakeChannel
 from tests.test_utils import make_awaitable
 from tests.test_utils.event_injection import inject_event
+from tests.unittest import override_config
 
 
 class BaseRelationsTestCase(unittest.HomeserverTestCase):
@@ -355,30 +356,67 @@ class RelationsTestCase(BaseRelationsTestCase):
         self.assertEqual(200, channel.code, channel.json_body)
         self.assertNotIn("m.relations", channel.json_body["unsigned"])
 
+    def _assert_edit_bundle(
+        self, event_json: JsonDict, edit_event_id: str, edit_event_content: JsonDict
+    ) -> None:
+        """
+        Assert that the given event has a correctly-serialised edit event in its
+        bundled aggregations
+
+        Args:
+            event_json: the serialised event to be checked
+            edit_event_id: the ID of the edit event that we expect to be bundled
+            edit_event_content: the content of that event, excluding the 'm.relates_to`
+               property
+        """
+        relations_dict = event_json["unsigned"].get("m.relations")
+        self.assertIn(RelationTypes.REPLACE, relations_dict)
+
+        m_replace_dict = relations_dict[RelationTypes.REPLACE]
+        for key in [
+            "event_id",
+            "sender",
+            "origin_server_ts",
+            "content",
+            "type",
+            "unsigned",
+        ]:
+            self.assertIn(key, m_replace_dict)
+
+        expected_edit_content = {
+            "m.relates_to": {
+                "event_id": event_json["event_id"],
+                "rel_type": "m.replace",
+            }
+        }
+        expected_edit_content.update(edit_event_content)
+
+        self.assert_dict(
+            {
+                "event_id": edit_event_id,
+                "sender": self.user_id,
+                "content": expected_edit_content,
+                "type": "m.room.message",
+            },
+            m_replace_dict,
+        )
+
     def test_edit(self) -> None:
         """Test that a simple edit works."""
 
         new_body = {"msgtype": "m.text", "body": "I've been edited!"}
+        edit_event_content = {
+            "msgtype": "m.text",
+            "body": "foo",
+            "m.new_content": new_body,
+        }
         channel = self._send_relation(
             RelationTypes.REPLACE,
             "m.room.message",
-            content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body},
+            content=edit_event_content,
         )
         edit_event_id = channel.json_body["event_id"]
 
-        def assert_bundle(event_json: JsonDict) -> None:
-            """Assert the expected values of the bundled aggregations."""
-            relations_dict = event_json["unsigned"].get("m.relations")
-            self.assertIn(RelationTypes.REPLACE, relations_dict)
-
-            m_replace_dict = relations_dict[RelationTypes.REPLACE]
-            for key in ["event_id", "sender", "origin_server_ts"]:
-                self.assertIn(key, m_replace_dict)
-
-            self.assert_dict(
-                {"event_id": edit_event_id, "sender": self.user_id}, m_replace_dict
-            )
-
         # /event should return the *original* event
         channel = self.make_request(
             "GET",
@@ -389,7 +427,7 @@ class RelationsTestCase(BaseRelationsTestCase):
         self.assertEqual(
             channel.json_body["content"], {"body": "Hi!", "msgtype": "m.text"}
         )
-        assert_bundle(channel.json_body)
+        self._assert_edit_bundle(channel.json_body, edit_event_id, edit_event_content)
 
         # Request the room messages.
         channel = self.make_request(
@@ -398,7 +436,11 @@ class RelationsTestCase(BaseRelationsTestCase):
             access_token=self.user_token,
         )
         self.assertEqual(200, channel.code, channel.json_body)
-        assert_bundle(self._find_event_in_chunk(channel.json_body["chunk"]))
+        self._assert_edit_bundle(
+            self._find_event_in_chunk(channel.json_body["chunk"]),
+            edit_event_id,
+            edit_event_content,
+        )
 
         # Request the room context.
         # /context should return the edited event.
@@ -408,7 +450,9 @@ class RelationsTestCase(BaseRelationsTestCase):
             access_token=self.user_token,
         )
         self.assertEqual(200, channel.code, channel.json_body)
-        assert_bundle(channel.json_body["event"])
+        self._assert_edit_bundle(
+            channel.json_body["event"], edit_event_id, edit_event_content
+        )
         self.assertEqual(channel.json_body["event"]["content"], new_body)
 
         # Request sync, but limit the timeline so it becomes limited (and includes
@@ -420,7 +464,11 @@ class RelationsTestCase(BaseRelationsTestCase):
         self.assertEqual(200, channel.code, channel.json_body)
         room_timeline = channel.json_body["rooms"]["join"][self.room]["timeline"]
         self.assertTrue(room_timeline["limited"])
-        assert_bundle(self._find_event_in_chunk(room_timeline["events"]))
+        self._assert_edit_bundle(
+            self._find_event_in_chunk(room_timeline["events"]),
+            edit_event_id,
+            edit_event_content,
+        )
 
         # Request search.
         channel = self.make_request(
@@ -437,7 +485,45 @@ class RelationsTestCase(BaseRelationsTestCase):
                 "results"
             ]
         ]
-        assert_bundle(self._find_event_in_chunk(chunk))
+        self._assert_edit_bundle(
+            self._find_event_in_chunk(chunk),
+            edit_event_id,
+            edit_event_content,
+        )
+
+    @override_config({"experimental_features": {"msc3925_inhibit_edit": True}})
+    def test_edit_inhibit_replace(self) -> None:
+        """
+        If msc3925_inhibit_edit is enabled, then the original event should not be
+        replaced.
+        """
+
+        new_body = {"msgtype": "m.text", "body": "I've been edited!"}
+        edit_event_content = {
+            "msgtype": "m.text",
+            "body": "foo",
+            "m.new_content": new_body,
+        }
+        channel = self._send_relation(
+            RelationTypes.REPLACE,
+            "m.room.message",
+            content=edit_event_content,
+        )
+        edit_event_id = channel.json_body["event_id"]
+
+        # /context should return the *original* event.
+        channel = self.make_request(
+            "GET",
+            f"/rooms/{self.room}/context/{self.parent_id}",
+            access_token=self.user_token,
+        )
+        self.assertEqual(200, channel.code, channel.json_body)
+        self.assertEqual(
+            channel.json_body["event"]["content"], {"body": "Hi!", "msgtype": "m.text"}
+        )
+        self._assert_edit_bundle(
+            channel.json_body["event"], edit_event_id, edit_event_content
+        )
 
     def test_multi_edit(self) -> None:
         """Test that multiple edits, including attempts by people who
@@ -455,10 +541,15 @@ class RelationsTestCase(BaseRelationsTestCase):
         )
 
         new_body = {"msgtype": "m.text", "body": "I've been edited!"}
+        edit_event_content = {
+            "msgtype": "m.text",
+            "body": "foo",
+            "m.new_content": new_body,
+        }
         channel = self._send_relation(
             RelationTypes.REPLACE,
             "m.room.message",
-            content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body},
+            content=edit_event_content,
         )
         edit_event_id = channel.json_body["event_id"]
 
@@ -480,16 +571,8 @@ class RelationsTestCase(BaseRelationsTestCase):
         self.assertEqual(200, channel.code, channel.json_body)
 
         self.assertEqual(channel.json_body["event"]["content"], new_body)
-
-        relations_dict = channel.json_body["event"]["unsigned"].get("m.relations")
-        self.assertIn(RelationTypes.REPLACE, relations_dict)
-
-        m_replace_dict = relations_dict[RelationTypes.REPLACE]
-        for key in ["event_id", "sender", "origin_server_ts"]:
-            self.assertIn(key, m_replace_dict)
-
-        self.assert_dict(
-            {"event_id": edit_event_id, "sender": self.user_id}, m_replace_dict
+        self._assert_edit_bundle(
+            channel.json_body["event"], edit_event_id, edit_event_content
         )
 
     def test_edit_reply(self) -> None:
@@ -502,11 +585,15 @@ class RelationsTestCase(BaseRelationsTestCase):
         )
         reply = channel.json_body["event_id"]
 
-        new_body = {"msgtype": "m.text", "body": "I've been edited!"}
+        edit_event_content = {
+            "msgtype": "m.text",
+            "body": "foo",
+            "m.new_content": {"msgtype": "m.text", "body": "I've been edited!"},
+        }
         channel = self._send_relation(
             RelationTypes.REPLACE,
             "m.room.message",
-            content={"msgtype": "m.text", "body": "foo", "m.new_content": new_body},
+            content=edit_event_content,
             parent_id=reply,
         )
         edit_event_id = channel.json_body["event_id"]
@@ -549,28 +636,22 @@ class RelationsTestCase(BaseRelationsTestCase):
 
             # We expect that the edit relation appears in the unsigned relations
             # section.
-            relations_dict = result_event_dict["unsigned"].get("m.relations")
-            self.assertIn(RelationTypes.REPLACE, relations_dict, desc)
-
-            m_replace_dict = relations_dict[RelationTypes.REPLACE]
-            for key in ["event_id", "sender", "origin_server_ts"]:
-                self.assertIn(key, m_replace_dict, desc)
-
-            self.assert_dict(
-                {"event_id": edit_event_id, "sender": self.user_id}, m_replace_dict
+            self._assert_edit_bundle(
+                result_event_dict, edit_event_id, edit_event_content
             )
 
     def test_edit_edit(self) -> None:
         """Test that an edit cannot be edited."""
         new_body = {"msgtype": "m.text", "body": "Initial edit"}
+        edit_event_content = {
+            "msgtype": "m.text",
+            "body": "Wibble",
+            "m.new_content": new_body,
+        }
         channel = self._send_relation(
             RelationTypes.REPLACE,
             "m.room.message",
-            content={
-                "msgtype": "m.text",
-                "body": "Wibble",
-                "m.new_content": new_body,
-            },
+            content=edit_event_content,
         )
         edit_event_id = channel.json_body["event_id"]
 
@@ -599,8 +680,7 @@ class RelationsTestCase(BaseRelationsTestCase):
         )
 
         # The relations information should not include the edit to the edit.
-        relations_dict = channel.json_body["unsigned"].get("m.relations")
-        self.assertIn(RelationTypes.REPLACE, relations_dict)
+        self._assert_edit_bundle(channel.json_body, edit_event_id, edit_event_content)
 
         # /context should return the event updated for the *first* edit
         # (The edit to the edit should be ignored.)
@@ -611,13 +691,8 @@ class RelationsTestCase(BaseRelationsTestCase):
         )
         self.assertEqual(200, channel.code, channel.json_body)
         self.assertEqual(channel.json_body["event"]["content"], new_body)
-
-        m_replace_dict = relations_dict[RelationTypes.REPLACE]
-        for key in ["event_id", "sender", "origin_server_ts"]:
-            self.assertIn(key, m_replace_dict)
-
-        self.assert_dict(
-            {"event_id": edit_event_id, "sender": self.user_id}, m_replace_dict
+        self._assert_edit_bundle(
+            channel.json_body["event"], edit_event_id, edit_event_content
         )
 
         # Directly requesting the edit should not have the edit to the edit applied.
-- 
cgit 1.5.1


From 73f097888eedaad05eda6b2453b6558158c0b032 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Wed, 11 Jan 2023 13:00:38 +0100
Subject: Add listener `health` (#14747)

Fixes: #8780
---
 changelog.d/14747.feature                        | 1 +
 docs/usage/configuration/config_documentation.md | 6 ++++++
 synapse/app/generic_worker.py                    | 3 +++
 synapse/app/homeserver.py                        | 3 +++
 4 files changed, 13 insertions(+)
 create mode 100644 changelog.d/14747.feature

(limited to 'synapse')

diff --git a/changelog.d/14747.feature b/changelog.d/14747.feature
new file mode 100644
index 0000000000..0b8066159c
--- /dev/null
+++ b/changelog.d/14747.feature
@@ -0,0 +1 @@
+Add a dedicated listener configuration for `health` endpoint.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index a355eef529..294dd6eddd 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -480,6 +480,12 @@ Valid resource names are:
 
 * `static`: static resources under synapse/static (/_matrix/static). (Mostly useful for 'fallback authentication'.)
 
+* `health`: the [health check endpoint](../../reverse_proxy.md#health-check-endpoint). This endpoint
+  is by default active for all other resources and does not have to be activated separately.
+  This is only useful if you want to use the health endpoint explicitly on a dedicated port or
+  for [workers](../../workers.md) and containers without listener e.g.
+  [application services](../../workers.md#notifying-application-services).
+
 Example configuration #1:
 ```yaml
 listeners:
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index bcc8abe20c..8108b1e98f 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -199,6 +199,9 @@ class GenericWorkerServer(HomeServer):
                             "A 'media' listener is configured but the media"
                             " repository is disabled. Ignoring."
                         )
+                elif name == "health":
+                    # Skip loading, health resource is always included
+                    continue
 
                 if name == "openid" and "federation" not in res.names:
                     # Only load the openid resource separately if federation resource
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index b9be558c7e..6176a70eb2 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -96,6 +96,9 @@ class SynapseHomeServer(HomeServer):
                     # Skip loading openid resource if federation is defined
                     # since federation resource will include openid
                     continue
+                if name == "health":
+                    # Skip loading, health resource is always included
+                    continue
                 resources.update(self._configure_named_resource(name, res.compress))
 
         additional_resources = listener_config.http_options.additional_resources
-- 
cgit 1.5.1


From 3952297f6f39906a65e70bce7becc1acd300a287 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 11 Jan 2023 07:16:41 -0500
Subject: Calculate rooms changed for device lists to work. (#14810)

Back-out some changes from 7e582a25f8f350df29d7d83ca902bdb522d1bbaf
(#14786) which skipped necessary logic to calculate device lists properly.
---
 changelog.d/14810.bugfix | 1 +
 synapse/api/filtering.py | 3 ---
 synapse/handlers/sync.py | 4 ----
 3 files changed, 1 insertion(+), 7 deletions(-)
 create mode 100644 changelog.d/14810.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14810.bugfix b/changelog.d/14810.bugfix
new file mode 100644
index 0000000000..379bfccffa
--- /dev/null
+++ b/changelog.d/14810.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.75.0rc1 where device lists could be miscalculated with some sync filters.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 2b5af264b4..4cf8f0cc8e 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -283,9 +283,6 @@ class FilterCollection:
             await self._room_filter.filter(events)
         )
 
-    def blocks_all_rooms(self) -> bool:
-        return self._room_filter.filters_all_rooms()
-
     def blocks_all_presence(self) -> bool:
         return (
             self._presence_filter.filters_all_types()
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 6942e06c77..20ee2f203a 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1793,10 +1793,6 @@ class SyncHandler:
             - newly_left_users
         """
 
-        # If the request doesn't care about rooms then nothing to do!
-        if sync_result_builder.sync_config.filter_collection.blocks_all_rooms():
-            return set(), set(), set(), set()
-
         since_token = sync_result_builder.since_token
 
         # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
-- 
cgit 1.5.1


From d6bda5adddd863409961dbafcd018356c213610e Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 11 Jan 2023 12:29:13 +0000
Subject: Add index to improve performance of the `/timestamp_to_event`
 endpoint used for jumping to a specific date in the timeline of a room.
 (#14799)

---
 changelog.d/14799.bugfix                                |  1 +
 synapse/storage/databases/main/events_bg_updates.py     | 12 ++++++++++++
 .../main/delta/73/24_events_jump_to_date_index.sql      | 17 +++++++++++++++++
 3 files changed, 30 insertions(+)
 create mode 100644 changelog.d/14799.bugfix
 create mode 100644 synapse/storage/schema/main/delta/73/24_events_jump_to_date_index.sql

(limited to 'synapse')

diff --git a/changelog.d/14799.bugfix b/changelog.d/14799.bugfix
new file mode 100644
index 0000000000..dc867bd93a
--- /dev/null
+++ b/changelog.d/14799.bugfix
@@ -0,0 +1 @@
+Add index to improve performance of the `/timestamp_to_event` endpoint used for jumping to a specific date in the timeline of a room.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 9e31798ab1..b9d3c36d60 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -69,6 +69,8 @@ class _BackgroundUpdates:
 
     EVENTS_POPULATE_STATE_KEY_REJECTIONS = "events_populate_state_key_rejections"
 
+    EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index"
+
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _CalculateChainCover:
@@ -260,6 +262,16 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             self._background_events_populate_state_key_rejections,
         )
 
+        # Add an index that would be useful for jumping to date using
+        # get_event_id_for_timestamp.
+        self.db_pool.updates.register_background_index_update(
+            _BackgroundUpdates.EVENTS_JUMP_TO_DATE_INDEX,
+            index_name="events_jump_to_date_idx",
+            table="events",
+            columns=["room_id", "origin_server_ts"],
+            where_clause="NOT outlier",
+        )
+
     async def _background_reindex_fields_sender(
         self, progress: JsonDict, batch_size: int
     ) -> int:
diff --git a/synapse/storage/schema/main/delta/73/24_events_jump_to_date_index.sql b/synapse/storage/schema/main/delta/73/24_events_jump_to_date_index.sql
new file mode 100644
index 0000000000..67059909a1
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/24_events_jump_to_date_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7324, 'events_jump_to_date_index', '{}');
-- 
cgit 1.5.1


From 5172c8c403d94ea5f184abc8b3c37dbd19a849bc Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 11 Jan 2023 13:21:53 +0000
Subject: Faster remote room joins (worker mode): do not populate external
 hosts-in-room cache when sending events as this requires blocking for full
 state. (#14749)

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
Co-authored-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14749.misc      |  1 +
 synapse/handlers/message.py | 21 ++++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/14749.misc

(limited to 'synapse')

diff --git a/changelog.d/14749.misc b/changelog.d/14749.misc
new file mode 100644
index 0000000000..ff81325225
--- /dev/null
+++ b/changelog.d/14749.misc
@@ -0,0 +1 @@
+Faster remote room joins (worker mode): do not populate external hosts-in-room cache when sending events as this requires blocking for full state.
\ No newline at end of file
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 88fc51a4c9..3278a695ed 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1531,12 +1531,23 @@ class EventCreationHandler:
         external federation senders don't have to recalculate it themselves.
         """
 
-        for event, _ in events_and_context:
-            if not self._external_cache.is_enabled():
-                return
+        if not self._external_cache.is_enabled():
+            return
 
-            # If external cache is enabled we should always have this.
-            assert self._external_cache_joined_hosts_updates is not None
+        # If external cache is enabled we should always have this.
+        assert self._external_cache_joined_hosts_updates is not None
+
+        for event, event_context in events_and_context:
+            if event_context.partial_state:
+                # To populate the cache for a partial-state event, we either have to
+                # block until full state, which the code below does, or change the
+                # meaning of cache values to be the list of hosts to which we plan to
+                # send events and calculate that instead.
+                #
+                # The federation senders don't use the external cache when sending
+                # events in partial-state rooms anyway, so let's not bother populating
+                # the cache.
+                continue
 
             # We actually store two mappings, event ID -> prev state group,
             # state group -> joined hosts, which is much more space efficient
-- 
cgit 1.5.1


From dd9e71dc7fa91b81adfaaf8669aaf7ee976ffcd7 Mon Sep 17 00:00:00 2001
From: Emelie Graven <e.graven@famedly.com>
Date: Wed, 11 Jan 2023 19:41:52 +0100
Subject: Add `set_displayname` to the module API (#14629)

---
 changelog.d/14629.feature      |  1 +
 synapse/module_api/__init__.py | 27 +++++++++++++++++++++++++++
 tests/module_api/test_api.py   | 18 ++++++++++++++++++
 3 files changed, 46 insertions(+)
 create mode 100644 changelog.d/14629.feature

(limited to 'synapse')

diff --git a/changelog.d/14629.feature b/changelog.d/14629.feature
new file mode 100644
index 0000000000..78f5fc2403
--- /dev/null
+++ b/changelog.d/14629.feature
@@ -0,0 +1 @@
+Adds a `set_displayname()` method to the module API for setting a user's display name.
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 6f4a934b05..6153a48257 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -1585,6 +1585,33 @@ class ModuleApi:
 
         return room_id_and_alias["room_id"], room_id_and_alias.get("room_alias", None)
 
+    async def set_displayname(
+        self,
+        user_id: UserID,
+        new_displayname: str,
+        deactivation: bool = False,
+    ) -> None:
+        """Sets a user's display name.
+
+        Added in Synapse v1.76.0.
+
+        Args:
+            user_id:
+                The user whose display name is to be changed.
+            new_displayname:
+                The new display name to give the user.
+            deactivation:
+                Whether this change was made while deactivating the user.
+        """
+        requester = create_requester(user_id)
+        await self._hs.get_profile_handler().set_displayname(
+            target_user=user_id,
+            requester=requester,
+            new_displayname=new_displayname,
+            by_admin=True,
+            deactivation=deactivation,
+        )
+
 
 class PublicRoomListManager:
     """Contains methods for adding to, removing from and querying whether a room
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index b0f3f4374d..9919938e80 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -110,6 +110,24 @@ class ModuleApiTestCase(HomeserverTestCase):
         self.assertEqual(found_user.user_id.to_string(), user_id)
         self.assertIdentical(found_user.is_admin, True)
 
+    def test_can_set_displayname(self):
+        localpart = "alice_wants_a_new_displayname"
+        user_id = self.register_user(
+            localpart, "1234", displayname="Alice", admin=False
+        )
+        found_userinfo = self.get_success(self.module_api.get_userinfo_by_id(user_id))
+
+        self.get_success(
+            self.module_api.set_displayname(
+                found_userinfo.user_id, "Bob", deactivation=False
+            )
+        )
+        found_profile = self.get_success(
+            self.module_api.get_profile_for_user(localpart)
+        )
+
+        self.assertEqual(found_profile.display_name, "Bob")
+
     def test_get_userinfo_by_id(self):
         user_id = self.register_user("alice", "1234")
         found_user = self.get_success(self.module_api.get_userinfo_by_id(user_id))
-- 
cgit 1.5.1


From 84ce93c12f921063bb6c59400fcf95649a1b7f45 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 12 Jan 2023 10:29:09 +0000
Subject: Fix race calling `/members?at=` (#14817)

Fixes #14814
---
 changelog.d/14817.bugfix                 |  1 +
 synapse/storage/databases/main/stream.py | 65 +++++++++++++++++++++++++++++---
 2 files changed, 60 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14817.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14817.bugfix b/changelog.d/14817.bugfix
new file mode 100644
index 0000000000..bb5da79268
--- /dev/null
+++ b/changelog.d/14817.bugfix
@@ -0,0 +1 @@
+Fix race where calling `/members` or `/state` with an `at` parameter could fail for newly created rooms, when using multiple workers.
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index cc27ec3804..63d8350530 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -801,13 +801,66 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             before this stream ordering.
         """
 
-        last_row = await self.get_room_event_before_stream_ordering(
-            room_id=room_id,
-            stream_ordering=end_token.stream,
+        def get_last_event_in_room_before_stream_ordering_txn(
+            txn: LoggingTransaction,
+        ) -> Optional[str]:
+            # We need to handle the fact that the stream tokens can be vector
+            # clocks. We do this by getting all rows between the minimum and
+            # maximum stream ordering in the token, plus one row less than the
+            # minimum stream ordering. We then filter the results against the
+            # token and return the first row that matches.
+
+            sql = """
+                SELECT * FROM (
+                    SELECT instance_name, stream_ordering, topological_ordering, event_id
+                    FROM events
+                    LEFT JOIN rejections USING (event_id)
+                    WHERE room_id = ?
+                        AND ? < stream_ordering AND stream_ordering <= ?
+                        AND NOT outlier
+                        AND rejections.event_id IS NULL
+                    ORDER BY stream_ordering DESC
+                ) AS a
+                UNION
+                SELECT * FROM (
+                    SELECT instance_name, stream_ordering, topological_ordering, event_id
+                    FROM events
+                    LEFT JOIN rejections USING (event_id)
+                    WHERE room_id = ?
+                        AND stream_ordering <= ?
+                        AND NOT outlier
+                        AND rejections.event_id IS NULL
+                    ORDER BY stream_ordering DESC
+                    LIMIT 1
+                ) AS b
+            """
+            txn.execute(
+                sql,
+                (
+                    room_id,
+                    end_token.stream,
+                    end_token.get_max_stream_pos(),
+                    room_id,
+                    end_token.stream,
+                ),
+            )
+
+            for instance_name, stream_ordering, topological_ordering, event_id in txn:
+                if _filter_results(
+                    lower_token=None,
+                    upper_token=end_token,
+                    instance_name=instance_name,
+                    topological_ordering=topological_ordering,
+                    stream_ordering=stream_ordering,
+                ):
+                    return event_id
+
+            return None
+
+        return await self.db_pool.runInteraction(
+            "get_last_event_in_room_before_stream_ordering",
+            get_last_event_in_room_before_stream_ordering_txn,
         )
-        if last_row:
-            return last_row[2]
-        return None
 
     async def get_current_room_stream_token_for_room_id(
         self, room_id: str
-- 
cgit 1.5.1


From b50c008453001aee8dd7dbd6f36ec32039e6ce76 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 12 Jan 2023 10:52:07 +0000
Subject: Re-enable some linting (#14821)

* Re-enable some linting

* Newsfile

* Remove comment
---
 changelog.d/14821.misc                   |  1 +
 pyproject.toml                           |  8 --------
 stubs/sortedcontainers/sortedlist.pyi    |  1 -
 stubs/sortedcontainers/sortedset.pyi     |  2 --
 stubs/synapse/synapse_rust/push.pyi      |  2 +-
 synapse/config/_base.pyi                 | 10 ++++------
 tests/storage/test_event_push_actions.py |  6 +++---
 7 files changed, 9 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/14821.misc

(limited to 'synapse')

diff --git a/changelog.d/14821.misc b/changelog.d/14821.misc
new file mode 100644
index 0000000000..99e4e5e8a1
--- /dev/null
+++ b/changelog.d/14821.misc
@@ -0,0 +1 @@
+Re-enable some linting that was disabled when we switched to ruff.
diff --git a/pyproject.toml b/pyproject.toml
index 740d33066e..10d50ddb45 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,11 +48,6 @@ line-length = 88
 #  E731: do not assign a lambda expression, use a def
 #  E501: Line too long (black enforces this for us)
 #
-# See https://github.com/charliermarsh/ruff/#pyflakes
-#  F401: unused import
-#  F811: Redefinition of unused
-#  F821: Undefined name
-#
 # flake8-bugbear compatible checks. Its error codes are described at
 # https://github.com/charliermarsh/ruff/#flake8-bugbear
 #  B019: Use of functools.lru_cache or functools.cache on methods can lead to memory leaks
@@ -64,9 +59,6 @@ ignore = [
     "B024",
     "E501",
     "E731",
-    "F401",
-    "F811",
-    "F821",
 ]
 select = [
     # pycodestyle checks.
diff --git a/stubs/sortedcontainers/sortedlist.pyi b/stubs/sortedcontainers/sortedlist.pyi
index cd4c969849..1fe1a136f1 100644
--- a/stubs/sortedcontainers/sortedlist.pyi
+++ b/stubs/sortedcontainers/sortedlist.pyi
@@ -7,7 +7,6 @@ from __future__ import annotations
 from typing import (
     Any,
     Callable,
-    Generic,
     Iterable,
     Iterator,
     List,
diff --git a/stubs/sortedcontainers/sortedset.pyi b/stubs/sortedcontainers/sortedset.pyi
index d761c438f7..6db11eacbe 100644
--- a/stubs/sortedcontainers/sortedset.pyi
+++ b/stubs/sortedcontainers/sortedset.pyi
@@ -5,10 +5,8 @@
 from __future__ import annotations
 
 from typing import (
-    AbstractSet,
     Any,
     Callable,
-    Generic,
     Hashable,
     Iterable,
     Iterator,
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index b91f2edd7b..373b40740b 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Tuple, Union
 
 from synapse.types import JsonDict
 
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index bd265de536..b5cec132b4 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 import argparse
 from typing import (
     Any,
@@ -20,7 +18,7 @@ from typing import (
 
 import jinja2
 
-from synapse.config import (
+from synapse.config import (  # noqa: F401
     account_validity,
     api,
     appservice,
@@ -169,7 +167,7 @@ class RootConfig:
         self, section_name: Literal["caches"]
     ) -> cache.CacheConfig: ...
     @overload
-    def reload_config_section(self, section_name: str) -> Config: ...
+    def reload_config_section(self, section_name: str) -> "Config": ...
 
 class Config:
     root: RootConfig
@@ -202,9 +200,9 @@ def find_config_files(search_paths: List[str]) -> List[str]: ...
 class ShardedWorkerHandlingConfig:
     instances: List[str]
     def __init__(self, instances: List[str]) -> None: ...
-    def should_handle(self, instance_name: str, key: str) -> bool: ...
+    def should_handle(self, instance_name: str, key: str) -> bool: ...  # noqa: F811
 
 class RoutableShardedWorkerHandlingConfig(ShardedWorkerHandlingConfig):
-    def get_instance(self, key: str) -> str: ...
+    def get_instance(self, key: str) -> str: ...  # noqa: F811
 
 def read_file(file_path: Any, config_path: Iterable[str]) -> str: ...
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 5fa8bd2d98..76c06a9d1e 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -154,7 +154,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         # Create a user to receive notifications and send receipts.
         user_id, token, _, other_token, room_id = self._create_users_and_room()
 
-        last_event_id: str
+        last_event_id = ""
 
         def _assert_counts(notif_count: int, highlight_count: int) -> None:
             counts = self.get_success(
@@ -289,7 +289,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         user_id, token, _, other_token, room_id = self._create_users_and_room()
         thread_id: str
 
-        last_event_id: str
+        last_event_id = ""
 
         def _assert_counts(
             notif_count: int,
@@ -471,7 +471,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         user_id, token, _, other_token, room_id = self._create_users_and_room()
         thread_id: str
 
-        last_event_id: str
+        last_event_id = ""
 
         def _assert_counts(
             notif_count: int,
-- 
cgit 1.5.1


From 772e8c23856e27960caba4dd87af42401b6c0cac Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 13 Jan 2023 00:16:21 +0000
Subject: Fix stack overflow in `_PerHostRatelimiter` due to synchronous
 requests (#14812)

When there are many synchronous requests waiting on a
`_PerHostRatelimiter`, each request will be started recursively just
after the previous request has completed. Under the right conditions,
this leads to stack exhaustion.

A common way for requests to become synchronous is when the remote
client disconnects early, because the homeserver is overloaded and slow
to respond.

Avoid stack exhaustion under these conditions by deferring subsequent
requests until the next reactor tick.

Fixes #14480.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14812.bugfix          |  1 +
 synapse/rest/client/register.py   |  1 +
 synapse/server.py                 |  1 +
 synapse/util/ratelimitutils.py    | 34 +++++++++++++++++++++--------
 tests/util/test_ratelimitutils.py | 45 ++++++++++++++++++++++++++++++++++++---
 5 files changed, 70 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/14812.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14812.bugfix b/changelog.d/14812.bugfix
new file mode 100644
index 0000000000..94e0d70cbc
--- /dev/null
+++ b/changelog.d/14812.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where Synapse would exhaust the stack when processing many federation requests where the remote homeserver has disconencted early.
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 3cb1e7e375..be696c304b 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -310,6 +310,7 @@ class UsernameAvailabilityRestServlet(RestServlet):
         self.hs = hs
         self.registration_handler = hs.get_registration_handler()
         self.ratelimiter = FederationRateLimiter(
+            hs.get_reactor(),
             hs.get_clock(),
             FederationRatelimitSettings(
                 # Time window of 2s
diff --git a/synapse/server.py b/synapse/server.py
index f4ab94c4f3..c8752baa5a 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -768,6 +768,7 @@ class HomeServer(metaclass=abc.ABCMeta):
     @cache_in_self
     def get_federation_ratelimiter(self) -> FederationRateLimiter:
         return FederationRateLimiter(
+            self.get_reactor(),
             self.get_clock(),
             config=self.config.ratelimiting.rc_federation,
             metrics_name="federation_servlets",
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index 2aceb1a47f..bd72947bfe 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -34,6 +34,7 @@ from prometheus_client.core import Counter
 from typing_extensions import ContextManager
 
 from twisted.internet import defer
+from twisted.internet.interfaces import IReactorTime
 
 from synapse.api.errors import LimitExceededError
 from synapse.config.ratelimiting import FederationRatelimitSettings
@@ -146,12 +147,14 @@ class FederationRateLimiter:
 
     def __init__(
         self,
+        reactor: IReactorTime,
         clock: Clock,
         config: FederationRatelimitSettings,
         metrics_name: Optional[str] = None,
     ):
         """
         Args:
+            reactor
             clock
             config
             metrics_name: The name of the rate limiter so we can differentiate it
@@ -163,7 +166,7 @@ class FederationRateLimiter:
 
         def new_limiter() -> "_PerHostRatelimiter":
             return _PerHostRatelimiter(
-                clock=clock, config=config, metrics_name=metrics_name
+                reactor=reactor, clock=clock, config=config, metrics_name=metrics_name
             )
 
         self.ratelimiters: DefaultDict[
@@ -194,12 +197,14 @@ class FederationRateLimiter:
 class _PerHostRatelimiter:
     def __init__(
         self,
+        reactor: IReactorTime,
         clock: Clock,
         config: FederationRatelimitSettings,
         metrics_name: Optional[str] = None,
     ):
         """
         Args:
+            reactor
             clock
             config
             metrics_name: The name of the rate limiter so we can differentiate it
@@ -207,6 +212,7 @@ class _PerHostRatelimiter:
                 for this rate limiter.
                 from the rest in the metrics
         """
+        self.reactor = reactor
         self.clock = clock
         self.metrics_name = metrics_name
 
@@ -364,12 +370,22 @@ class _PerHostRatelimiter:
 
     def _on_exit(self, request_id: object) -> None:
         logger.debug("Ratelimit(%s) [%s]: Processed req", self.host, id(request_id))
-        self.current_processing.discard(request_id)
-        try:
-            # start processing the next item on the queue.
-            _, deferred = self.ready_request_queue.popitem(last=False)
 
-            with PreserveLoggingContext():
-                deferred.callback(None)
-        except KeyError:
-            pass
+        # When requests complete synchronously, we will recursively start the next
+        # request in the queue. To avoid stack exhaustion, we defer starting the next
+        # request until the next reactor tick.
+
+        def start_next_request() -> None:
+            # We only remove the completed request from the list when we're about to
+            # start the next one, otherwise we can allow extra requests through.
+            self.current_processing.discard(request_id)
+            try:
+                # start processing the next item on the queue.
+                _, deferred = self.ready_request_queue.popitem(last=False)
+
+                with PreserveLoggingContext():
+                    deferred.callback(None)
+            except KeyError:
+                pass
+
+        self.reactor.callLater(0.0, start_next_request)
diff --git a/tests/util/test_ratelimitutils.py b/tests/util/test_ratelimitutils.py
index 5b327b390e..2f3ea15b96 100644
--- a/tests/util/test_ratelimitutils.py
+++ b/tests/util/test_ratelimitutils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from typing import Optional
 
+from twisted.internet import defer
 from twisted.internet.defer import Deferred
 
 from synapse.config.homeserver import HomeServerConfig
@@ -29,7 +30,7 @@ class FederationRateLimiterTestCase(TestCase):
         """A simple test with the default values"""
         reactor, clock = get_clock()
         rc_config = build_rc_config()
-        ratelimiter = FederationRateLimiter(clock, rc_config)
+        ratelimiter = FederationRateLimiter(reactor, clock, rc_config)
 
         with ratelimiter.ratelimit("testhost") as d1:
             # shouldn't block
@@ -39,7 +40,7 @@ class FederationRateLimiterTestCase(TestCase):
         """Test what happens when we hit the concurrent limit"""
         reactor, clock = get_clock()
         rc_config = build_rc_config({"rc_federation": {"concurrent": 2}})
-        ratelimiter = FederationRateLimiter(clock, rc_config)
+        ratelimiter = FederationRateLimiter(reactor, clock, rc_config)
 
         with ratelimiter.ratelimit("testhost") as d1:
             # shouldn't block
@@ -57,6 +58,7 @@ class FederationRateLimiterTestCase(TestCase):
 
             # ... until we complete an earlier request
             cm2.__exit__(None, None, None)
+            reactor.advance(0.0)
             self.successResultOf(d3)
 
     def test_sleep_limit(self) -> None:
@@ -65,7 +67,7 @@ class FederationRateLimiterTestCase(TestCase):
         rc_config = build_rc_config(
             {"rc_federation": {"sleep_limit": 2, "sleep_delay": 500}}
         )
-        ratelimiter = FederationRateLimiter(clock, rc_config)
+        ratelimiter = FederationRateLimiter(reactor, clock, rc_config)
 
         with ratelimiter.ratelimit("testhost") as d1:
             # shouldn't block
@@ -81,6 +83,43 @@ class FederationRateLimiterTestCase(TestCase):
             sleep_time = _await_resolution(reactor, d3)
             self.assertAlmostEqual(sleep_time, 500, places=3)
 
+    def test_lots_of_queued_things(self) -> None:
+        """Tests lots of synchronous things queued up behind a slow thing.
+
+        The stack should *not* explode when the slow thing completes.
+        """
+        reactor, clock = get_clock()
+        rc_config = build_rc_config(
+            {
+                "rc_federation": {
+                    "sleep_limit": 1000000000,  # never sleep
+                    "reject_limit": 1000000000,  # never reject requests
+                    "concurrent": 1,
+                }
+            }
+        )
+        ratelimiter = FederationRateLimiter(reactor, clock, rc_config)
+
+        with ratelimiter.ratelimit("testhost") as d:
+            # shouldn't block
+            self.successResultOf(d)
+
+            async def task() -> None:
+                with ratelimiter.ratelimit("testhost") as d:
+                    await d
+
+            for _ in range(1, 100):
+                defer.ensureDeferred(task())
+
+            last_task = defer.ensureDeferred(task())
+
+            # Upon exiting the context manager, all the synchronous things will resume.
+            # If a stack overflow occurs, the final task will not complete.
+
+        # Wait for all the things to complete.
+        reactor.advance(0.0)
+        self.successResultOf(last_task)
+
 
 def _await_resolution(reactor: ThreadedMemoryReactorClock, d: Deferred) -> float:
     """advance the clock until the deferred completes.
-- 
cgit 1.5.1


From 3a125625e70634075cc4d965a01309af56748eb2 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 13 Jan 2023 12:37:28 +0000
Subject: Add some clarifying comments and refactor a portion of the `Keyring`
 class for readability (#14804)

---
 changelog.d/14804.misc    |  1 +
 synapse/crypto/keyring.py | 61 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 44 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/14804.misc

(limited to 'synapse')

diff --git a/changelog.d/14804.misc b/changelog.d/14804.misc
new file mode 100644
index 0000000000..24302332bd
--- /dev/null
+++ b/changelog.d/14804.misc
@@ -0,0 +1 @@
+Add some clarifying comments and refactor a portion of the `Keyring` class for readability.
\ No newline at end of file
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 69310d9035..86cd4af9bd 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -154,17 +154,21 @@ class Keyring:
 
         if key_fetchers is None:
             key_fetchers = (
+                # Fetch keys from the database.
                 StoreKeyFetcher(hs),
+                # Fetch keys from a configured Perspectives server.
                 PerspectivesKeyFetcher(hs),
+                # Fetch keys from the origin server directly.
                 ServerKeyFetcher(hs),
             )
         self._key_fetchers = key_fetchers
 
-        self._server_queue: BatchingQueue[
+        self._fetch_keys_queue: BatchingQueue[
             _FetchKeyRequest, Dict[str, Dict[str, FetchKeyResult]]
         ] = BatchingQueue(
             "keyring_server",
             clock=hs.get_clock(),
+            # The method called to fetch each key
             process_batch_callback=self._inner_fetch_key_requests,
         )
 
@@ -287,7 +291,7 @@ class Keyring:
                 minimum_valid_until_ts=verify_request.minimum_valid_until_ts,
                 key_ids=list(key_ids_to_find),
             )
-            found_keys_by_server = await self._server_queue.add_to_queue(
+            found_keys_by_server = await self._fetch_keys_queue.add_to_queue(
                 key_request, key=verify_request.server_name
             )
 
@@ -352,7 +356,17 @@ class Keyring:
     async def _inner_fetch_key_requests(
         self, requests: List[_FetchKeyRequest]
     ) -> Dict[str, Dict[str, FetchKeyResult]]:
-        """Processing function for the queue of `_FetchKeyRequest`."""
+        """Processing function for the queue of `_FetchKeyRequest`.
+
+        Takes a list of key fetch requests, de-duplicates them and then carries out
+        each request by invoking self._inner_fetch_key_request.
+
+        Args:
+            requests: A list of requests for homeserver verify keys.
+
+        Returns:
+            {server name: {key id: fetch key result}}
+        """
 
         logger.debug("Starting fetch for %s", requests)
 
@@ -397,8 +411,23 @@ class Keyring:
     async def _inner_fetch_key_request(
         self, verify_request: _FetchKeyRequest
     ) -> Dict[str, FetchKeyResult]:
-        """Attempt to fetch the given key by calling each key fetcher one by
-        one.
+        """Attempt to fetch the given key by calling each key fetcher one by one.
+
+        If a key is found, check whether its `valid_until_ts` attribute satisfies the
+        `minimum_valid_until_ts` attribute of the `verify_request`. If it does, we
+        refrain from asking subsequent fetchers for that key.
+
+        Even if the above check fails, we still return the found key - the caller may
+        still find the invalid key result useful. In this case, we continue to ask
+        subsequent fetchers for the invalid key, in case they return a valid result
+        for it. This can happen when fetching a stale key result from the database,
+        before querying the origin server for an up-to-date result.
+
+        Args:
+            verify_request: The request for a verify key. Can include multiple key IDs.
+
+        Returns:
+            A map of {key_id: the key fetch result}.
         """
         logger.debug("Starting fetch for %s", verify_request)
 
@@ -420,26 +449,22 @@ class Keyring:
                 if not key:
                     continue
 
-                # If we already have a result for the given key ID we keep the
+                # If we already have a result for the given key ID, we keep the
                 # one with the highest `valid_until_ts`.
                 existing_key = found_keys.get(key_id)
-                if existing_key:
-                    if key.valid_until_ts <= existing_key.valid_until_ts:
-                        continue
+                if existing_key and existing_key.valid_until_ts > key.valid_until_ts:
+                    continue
+
+                # Check if this key's expiry timestamp is valid for the verify request.
+                if key.valid_until_ts >= verify_request.minimum_valid_until_ts:
+                    # Stop looking for this key from subsequent fetchers.
+                    missing_key_ids.discard(key_id)
 
-                # We always store the returned key even if it doesn't the
+                # We always store the returned key even if it doesn't meet the
                 # `minimum_valid_until_ts` requirement, as some verification
                 # requests may still be able to be satisfied by it.
-                #
-                # We still keep looking for the key from other fetchers in that
-                # case though.
                 found_keys[key_id] = key
 
-                if key.valid_until_ts < verify_request.minimum_valid_until_ts:
-                    continue
-
-                missing_key_ids.discard(key_id)
-
         return found_keys
 
 
-- 
cgit 1.5.1


From 8d5325ec0c04c3b0f08e0c5b4a26c5939d9db7f1 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Fri, 13 Jan 2023 15:17:03 +0100
Subject: Drop unused table `presence` (#14825)

---
 changelog.d/14825.misc                                  |  1 +
 scripts-dev/database-save.sh                            |  1 -
 .../storage/schema/main/delta/73/25drop_presence.sql    | 17 +++++++++++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14825.misc
 create mode 100644 synapse/storage/schema/main/delta/73/25drop_presence.sql

(limited to 'synapse')

diff --git a/changelog.d/14825.misc b/changelog.d/14825.misc
new file mode 100644
index 0000000000..64312ac09e
--- /dev/null
+++ b/changelog.d/14825.misc
@@ -0,0 +1 @@
+Drop unused table `presence`.
\ No newline at end of file
diff --git a/scripts-dev/database-save.sh b/scripts-dev/database-save.sh
index 040c8a4943..91674027ae 100755
--- a/scripts-dev/database-save.sh
+++ b/scripts-dev/database-save.sh
@@ -11,6 +11,5 @@
 sqlite3 "$1" <<'EOF' >table-save.sql
 .dump users
 .dump access_tokens
-.dump presence
 .dump profiles
 EOF
diff --git a/synapse/storage/schema/main/delta/73/25drop_presence.sql b/synapse/storage/schema/main/delta/73/25drop_presence.sql
new file mode 100644
index 0000000000..9f6ffa20b6
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/25drop_presence.sql
@@ -0,0 +1,17 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- this table is unused
+DROP TABLE presence;
-- 
cgit 1.5.1


From 73ff493dfba63541a09eaf08587eb8bbd3330967 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 13 Jan 2023 14:57:43 +0000
Subject: Merge account data streams (#14826)

---
 changelog.d/14826.misc                         |  1 +
 docs/upgrade.md                                | 12 ++++++
 synapse/api/constants.py                       |  1 +
 synapse/handlers/account_data.py               |  7 +++-
 synapse/handlers/initial_sync.py               |  8 ++--
 synapse/handlers/sync.py                       | 11 +++++-
 synapse/replication/tcp/client.py              |  3 +-
 synapse/replication/tcp/handler.py             |  3 +-
 synapse/replication/tcp/streams/__init__.py    |  3 --
 synapse/replication/tcp/streams/_base.py       | 49 ++++++++++++-----------
 synapse/storage/databases/main/account_data.py |  6 +--
 synapse/storage/databases/main/tags.py         | 54 +++++++-------------------
 12 files changed, 75 insertions(+), 83 deletions(-)
 create mode 100644 changelog.d/14826.misc

(limited to 'synapse')

diff --git a/changelog.d/14826.misc b/changelog.d/14826.misc
new file mode 100644
index 0000000000..9ebedcf51e
--- /dev/null
+++ b/changelog.d/14826.misc
@@ -0,0 +1 @@
+Merge tag and normal account data replication streams.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index c4bc5889a9..8a76172e43 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,18 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.76.0
+
+## Changes to the account data replication streams
+
+Synapse has changed the format of the account data replication streams (between
+workers). This is a forwards- and backwards-incompatible change: v1.75 workers
+cannot process account data replicated by v1.76 workers, and vice versa.
+
+Once all workers are upgraded to v1.76 (or downgraded to v1.75), account data
+replication will resume as normal.
+
+
 # Upgrading to v1.74.0
 
 ## Unicode support in user search
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 6a5e7171da..6432d32d83 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -249,6 +249,7 @@ class RoomEncryptionAlgorithms:
 class AccountDataTypes:
     DIRECT: Final = "m.direct"
     IGNORED_USER_LIST: Final = "m.ignored_user_list"
+    TAG: Final = "m.tag"
 
 
 class HistoryVisibility:
diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py
index aba7315cf7..834006356a 100644
--- a/synapse/handlers/account_data.py
+++ b/synapse/handlers/account_data.py
@@ -16,6 +16,7 @@ import logging
 import random
 from typing import TYPE_CHECKING, Awaitable, Callable, Collection, List, Optional, Tuple
 
+from synapse.api.constants import AccountDataTypes
 from synapse.replication.http.account_data import (
     ReplicationAddRoomAccountDataRestServlet,
     ReplicationAddTagRestServlet,
@@ -335,7 +336,11 @@ class AccountDataEventSource(EventSource[int, JsonDict]):
 
         for room_id, room_tags in tags.items():
             results.append(
-                {"type": "m.tag", "content": {"tags": room_tags}, "room_id": room_id}
+                {
+                    "type": AccountDataTypes.TAG,
+                    "content": {"tags": room_tags},
+                    "room_id": room_id,
+                }
             )
 
         (
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 9c335e6863..8c2260ad7d 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -15,7 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, List, Optional, Tuple, cast
 
-from synapse.api.constants import EduTypes, EventTypes, Membership
+from synapse.api.constants import AccountDataTypes, EduTypes, EventTypes, Membership
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase
 from synapse.events.utils import SerializeEventConfig
@@ -239,7 +239,7 @@ class InitialSyncHandler:
                 tags = tags_by_room.get(event.room_id)
                 if tags:
                     account_data_events.append(
-                        {"type": "m.tag", "content": {"tags": tags}}
+                        {"type": AccountDataTypes.TAG, "content": {"tags": tags}}
                     )
 
                 account_data = account_data_by_room.get(event.room_id, {})
@@ -326,7 +326,9 @@ class InitialSyncHandler:
         account_data_events = []
         tags = await self.store.get_tags_for_room(user_id, room_id)
         if tags:
-            account_data_events.append({"type": "m.tag", "content": {"tags": tags}})
+            account_data_events.append(
+                {"type": AccountDataTypes.TAG, "content": {"tags": tags}}
+            )
 
         account_data = await self.store.get_account_data_for_room(user_id, room_id)
         for account_data_type, content in account_data.items():
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 20ee2f203a..78d488f2b1 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -31,7 +31,12 @@ from typing import (
 import attr
 from prometheus_client import Counter
 
-from synapse.api.constants import EventContentFields, EventTypes, Membership
+from synapse.api.constants import (
+    AccountDataTypes,
+    EventContentFields,
+    EventTypes,
+    Membership,
+)
 from synapse.api.filtering import FilterCollection
 from synapse.api.presence import UserPresenceState
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
@@ -2331,7 +2336,9 @@ class SyncHandler:
 
             account_data_events = []
             if tags is not None:
-                account_data_events.append({"type": "m.tag", "content": {"tags": tags}})
+                account_data_events.append(
+                    {"type": AccountDataTypes.TAG, "content": {"tags": tags}}
+                )
 
             for account_data_type, content in account_data.items():
                 account_data_events.append(
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index b5e40da533..7263bb2796 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -33,7 +33,6 @@ from synapse.replication.tcp.streams import (
     PushersStream,
     PushRulesStream,
     ReceiptsStream,
-    TagAccountDataStream,
     ToDeviceStream,
     TypingStream,
     UnPartialStatedEventStream,
@@ -168,7 +167,7 @@ class ReplicationDataHandler:
             self.notifier.on_new_event(
                 StreamKeyType.PUSH_RULES, token, users=[row.user_id for row in rows]
             )
-        elif stream_name in (AccountDataStream.NAME, TagAccountDataStream.NAME):
+        elif stream_name in AccountDataStream.NAME:
             self.notifier.on_new_event(
                 StreamKeyType.ACCOUNT_DATA, token, users=[row.user_id for row in rows]
             )
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index 0f166d16aa..d03a53d764 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -58,7 +58,6 @@ from synapse.replication.tcp.streams import (
     PresenceStream,
     ReceiptsStream,
     Stream,
-    TagAccountDataStream,
     ToDeviceStream,
     TypingStream,
 )
@@ -145,7 +144,7 @@ class ReplicationCommandHandler:
 
                 continue
 
-            if isinstance(stream, (AccountDataStream, TagAccountDataStream)):
+            if isinstance(stream, AccountDataStream):
                 # Only add AccountDataStream and TagAccountDataStream as a source on the
                 # instance in charge of account_data persistence.
                 if hs.get_instance_name() in hs.config.worker.writers.account_data:
diff --git a/synapse/replication/tcp/streams/__init__.py b/synapse/replication/tcp/streams/__init__.py
index 110f10aab9..a7eadfa3c9 100644
--- a/synapse/replication/tcp/streams/__init__.py
+++ b/synapse/replication/tcp/streams/__init__.py
@@ -35,7 +35,6 @@ from synapse.replication.tcp.streams._base import (
     PushRulesStream,
     ReceiptsStream,
     Stream,
-    TagAccountDataStream,
     ToDeviceStream,
     TypingStream,
     UserSignatureStream,
@@ -62,7 +61,6 @@ STREAMS_MAP = {
         DeviceListsStream,
         ToDeviceStream,
         FederationStream,
-        TagAccountDataStream,
         AccountDataStream,
         UserSignatureStream,
         UnPartialStatedRoomStream,
@@ -83,7 +81,6 @@ __all__ = [
     "CachesStream",
     "DeviceListsStream",
     "ToDeviceStream",
-    "TagAccountDataStream",
     "AccountDataStream",
     "UserSignatureStream",
     "UnPartialStatedRoomStream",
diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py
index e01155ad59..fbf78da9c2 100644
--- a/synapse/replication/tcp/streams/_base.py
+++ b/synapse/replication/tcp/streams/_base.py
@@ -28,8 +28,8 @@ from typing import (
 
 import attr
 
+from synapse.api.constants import AccountDataTypes
 from synapse.replication.http.streams import ReplicationGetStreamUpdates
-from synapse.types import JsonDict
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -495,27 +495,6 @@ class ToDeviceStream(Stream):
         )
 
 
-class TagAccountDataStream(Stream):
-    """Someone added/removed a tag for a room"""
-
-    @attr.s(slots=True, frozen=True, auto_attribs=True)
-    class TagAccountDataStreamRow:
-        user_id: str
-        room_id: str
-        data: JsonDict
-
-    NAME = "tag_account_data"
-    ROW_TYPE = TagAccountDataStreamRow
-
-    def __init__(self, hs: "HomeServer"):
-        store = hs.get_datastores().main
-        super().__init__(
-            hs.get_instance_name(),
-            current_token_without_instance(store.get_max_account_data_stream_id),
-            store.get_all_updated_tags,
-        )
-
-
 class AccountDataStream(Stream):
     """Global or per room account data was changed"""
 
@@ -560,6 +539,19 @@ class AccountDataStream(Stream):
             to_token = room_results[-1][0]
             limited = True
 
+        tags, tag_to_token, tags_limited = await self.store.get_all_updated_tags(
+            instance_name,
+            from_token,
+            to_token,
+            limit,
+        )
+
+        # again, if the tag results hit the limit, limit the global results to
+        # the same stream token.
+        if tags_limited:
+            to_token = tag_to_token
+            limited = True
+
         # convert the global results to the right format, and limit them to the to_token
         # at the same time
         global_rows = (
@@ -568,11 +560,16 @@ class AccountDataStream(Stream):
             if stream_id <= to_token
         )
 
-        # we know that the room_results are already limited to `to_token` so no need
-        # for a check on `stream_id` here.
         room_rows = (
             (stream_id, (user_id, room_id, account_data_type))
             for stream_id, user_id, room_id, account_data_type in room_results
+            if stream_id <= to_token
+        )
+
+        tag_rows = (
+            (stream_id, (user_id, room_id, AccountDataTypes.TAG))
+            for stream_id, user_id, room_id in tags
+            if stream_id <= to_token
         )
 
         # We need to return a sorted list, so merge them together.
@@ -582,7 +579,9 @@ class AccountDataStream(Stream):
         # leading to a comparison between the data tuples. The comparison could
         # fail due to attempting to compare the `room_id` which results in a
         # `TypeError` from comparing a `str` vs `None`.
-        updates = list(heapq.merge(room_rows, global_rows, key=lambda row: row[0]))
+        updates = list(
+            heapq.merge(room_rows, global_rows, tag_rows, key=lambda row: row[0])
+        )
         return updates, to_token, limited
 
 
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 86032897f5..881d7089db 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -27,7 +27,7 @@ from typing import (
 )
 
 from synapse.api.constants import AccountDataTypes
-from synapse.replication.tcp.streams import AccountDataStream, TagAccountDataStream
+from synapse.replication.tcp.streams import AccountDataStream
 from synapse.storage._base import db_to_json
 from synapse.storage.database import (
     DatabasePool,
@@ -454,9 +454,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     def process_replication_position(
         self, stream_name: str, instance_name: str, token: int
     ) -> None:
-        if stream_name == TagAccountDataStream.NAME:
-            self._account_data_id_gen.advance(instance_name, token)
-        elif stream_name == AccountDataStream.NAME:
+        if stream_name == AccountDataStream.NAME:
             self._account_data_id_gen.advance(instance_name, token)
         super().process_replication_position(stream_name, instance_name, token)
 
diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
index e23c927e02..d5500cdd47 100644
--- a/synapse/storage/databases/main/tags.py
+++ b/synapse/storage/databases/main/tags.py
@@ -17,7 +17,8 @@
 import logging
 from typing import Any, Dict, Iterable, List, Tuple, cast
 
-from synapse.replication.tcp.streams import TagAccountDataStream
+from synapse.api.constants import AccountDataTypes
+from synapse.replication.tcp.streams import AccountDataStream
 from synapse.storage._base import db_to_json
 from synapse.storage.database import LoggingTransaction
 from synapse.storage.databases.main.account_data import AccountDataWorkerStore
@@ -54,7 +55,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
 
     async def get_all_updated_tags(
         self, instance_name: str, last_id: int, current_id: int, limit: int
-    ) -> Tuple[List[Tuple[int, Tuple[str, str, str]]], int, bool]:
+    ) -> Tuple[List[Tuple[int, str, str]], int, bool]:
         """Get updates for tags replication stream.
 
         Args:
@@ -73,7 +74,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
             The token returned can be used in a subsequent call to this
             function to get further updatees.
 
-            The updates are a list of 2-tuples of stream ID and the row data
+            The updates are a list of tuples of stream ID, user ID and room ID
         """
 
         if last_id == current_id:
@@ -96,38 +97,13 @@ class TagsWorkerStore(AccountDataWorkerStore):
             "get_all_updated_tags", get_all_updated_tags_txn
         )
 
-        def get_tag_content(
-            txn: LoggingTransaction, tag_ids: List[Tuple[int, str, str]]
-        ) -> List[Tuple[int, Tuple[str, str, str]]]:
-            sql = "SELECT tag, content FROM room_tags WHERE user_id=? AND room_id=?"
-            results = []
-            for stream_id, user_id, room_id in tag_ids:
-                txn.execute(sql, (user_id, room_id))
-                tags = []
-                for tag, content in txn:
-                    tags.append(json_encoder.encode(tag) + ":" + content)
-                tag_json = "{" + ",".join(tags) + "}"
-                results.append((stream_id, (user_id, room_id, tag_json)))
-
-            return results
-
-        batch_size = 50
-        results = []
-        for i in range(0, len(tag_ids), batch_size):
-            tags = await self.db_pool.runInteraction(
-                "get_all_updated_tag_content",
-                get_tag_content,
-                tag_ids[i : i + batch_size],
-            )
-            results.extend(tags)
-
         limited = False
         upto_token = current_id
-        if len(results) >= limit:
-            upto_token = results[-1][0]
+        if len(tag_ids) >= limit:
+            upto_token = tag_ids[-1][0]
             limited = True
 
-        return results, upto_token, limited
+        return tag_ids, upto_token, limited
 
     async def get_updated_tags(
         self, user_id: str, stream_id: int
@@ -299,20 +275,16 @@ class TagsWorkerStore(AccountDataWorkerStore):
         token: int,
         rows: Iterable[Any],
     ) -> None:
-        if stream_name == TagAccountDataStream.NAME:
+        if stream_name == AccountDataStream.NAME:
             for row in rows:
-                self.get_tags_for_user.invalidate((row.user_id,))
-                self._account_data_stream_cache.entity_has_changed(row.user_id, token)
+                if row.data_type == AccountDataTypes.TAG:
+                    self.get_tags_for_user.invalidate((row.user_id,))
+                    self._account_data_stream_cache.entity_has_changed(
+                        row.user_id, token
+                    )
 
         super().process_replication_rows(stream_name, instance_name, token, rows)
 
-    def process_replication_position(
-        self, stream_name: str, instance_name: str, token: int
-    ) -> None:
-        if stream_name == TagAccountDataStream.NAME:
-            self._account_data_id_gen.advance(instance_name, token)
-        super().process_replication_position(stream_name, instance_name, token)
-
 
 class TagsStore(TagsWorkerStore):
     pass
-- 
cgit 1.5.1


From 52ae80dd1afd9bb5b4cf2bb79297e1590f92cacb Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 13 Jan 2023 17:58:53 +0000
Subject: Use stable identifiers for faster joins (#14832)

* Use new query param when requesting a partial join

* Read new query param when serving partial join

* Provide new field names when serving partial joins

* Read new field names from partial join response

* Changelog
---
 changelog.d/14832.misc                            |  1 +
 synapse/federation/federation_server.py           |  2 +
 synapse/federation/transport/client.py            | 18 ++++++
 synapse/federation/transport/server/federation.py | 13 +++-
 tests/federation/test_federation_server.py        |  2 +-
 tests/federation/transport/test_client.py         | 77 +++++++++++++++++------
 6 files changed, 89 insertions(+), 24 deletions(-)
 create mode 100644 changelog.d/14832.misc

(limited to 'synapse')

diff --git a/changelog.d/14832.misc b/changelog.d/14832.misc
new file mode 100644
index 0000000000..61e7401e43
--- /dev/null
+++ b/changelog.d/14832.misc
@@ -0,0 +1 @@
+Faster joins: use stable identifiers from [MSC3706](https://github.com/matrix-org/matrix-spec-proposals/pull/3706).
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index bb20af6e91..c65dbf87fb 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -725,10 +725,12 @@ class FederationServer(FederationBase):
             "state": [p.get_pdu_json(time_now) for p in state_events],
             "auth_chain": [p.get_pdu_json(time_now) for p in auth_chain_events],
             "org.matrix.msc3706.partial_state": caller_supports_partial_state,
+            "members_omitted": caller_supports_partial_state,
         }
 
         if servers_in_room is not None:
             resp["org.matrix.msc3706.servers_in_room"] = list(servers_in_room)
+            resp["servers_in_room"] = list(servers_in_room)
 
         return resp
 
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 77f1f39cac..c8471d4cf7 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -357,6 +357,7 @@ class TransportLayerClient:
         if self._faster_joins_enabled:
             # lazy-load state on join
             query_params["org.matrix.msc3706.partial_state"] = "true"
+            query_params["omit_members"] = "true"
 
         return await self.client.put_json(
             destination=destination,
@@ -909,6 +910,14 @@ class SendJoinParser(ByteParser[SendJoinResponse]):
                     use_float="True",
                 )
             )
+            # The stable field name comes last, so it "wins" if the fields disagree
+            self._coros.append(
+                ijson.items_coro(
+                    _partial_state_parser(self._response),
+                    "members_omitted",
+                    use_float="True",
+                )
+            )
 
             self._coros.append(
                 ijson.items_coro(
@@ -918,6 +927,15 @@ class SendJoinParser(ByteParser[SendJoinResponse]):
                 )
             )
 
+            # Again, stable field name comes last
+            self._coros.append(
+                ijson.items_coro(
+                    _servers_in_room_parser(self._response),
+                    "servers_in_room",
+                    use_float="True",
+                )
+            )
+
     def write(self, data: bytes) -> int:
         for c in self._coros:
             c.send(data)
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 53e77b4bb6..c0a700905b 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -437,9 +437,16 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet):
 
         partial_state = False
         if self._msc3706_enabled:
-            partial_state = parse_boolean_from_args(
-                query, "org.matrix.msc3706.partial_state", default=False
-            )
+            # The stable query parameter wins, if it disagrees with the unstable
+            # parameter for some reason.
+            stable_param = parse_boolean_from_args(query, "omit_members", default=None)
+            if stable_param is not None:
+                partial_state = stable_param
+            else:
+                partial_state = parse_boolean_from_args(
+                    query, "org.matrix.msc3706.partial_state", default=False
+                )
+
         result = await self.handler.on_send_join_request(
             origin, content, room_id, caller_supports_partial_state=partial_state
         )
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index 177e5b5afc..27770304be 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -224,7 +224,7 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
         )
         channel = self.make_signed_federation_request(
             "PUT",
-            f"/_matrix/federation/v2/send_join/{self._room_id}/x?org.matrix.msc3706.partial_state=true",
+            f"/_matrix/federation/v2/send_join/{self._room_id}/x?omit_members=true",
             content=join_event_dict,
         )
         self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
diff --git a/tests/federation/transport/test_client.py b/tests/federation/transport/test_client.py
index b84c74fc0e..c90635e0a0 100644
--- a/tests/federation/transport/test_client.py
+++ b/tests/federation/transport/test_client.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 
 import json
+from typing import List, Optional
 from unittest.mock import Mock
 
 import ijson.common
 
 from synapse.api.room_versions import RoomVersions
 from synapse.federation.transport.client import SendJoinParser
+from synapse.types import JsonDict
 from synapse.util import ExceptionBundle
 
 from tests.unittest import TestCase
@@ -71,33 +73,68 @@ class SendJoinParserTestCase(TestCase):
 
     def test_partial_state(self) -> None:
         """Check that the partial_state flag is correctly parsed"""
-        parser = SendJoinParser(RoomVersions.V1, False)
-        response = {
-            "org.matrix.msc3706.partial_state": True,
-        }
 
-        serialised_response = json.dumps(response).encode()
+        def parse(response: JsonDict) -> bool:
+            parser = SendJoinParser(RoomVersions.V1, False)
+            serialised_response = json.dumps(response).encode()
 
-        # Send data to the parser
-        parser.write(serialised_response)
+            # Send data to the parser
+            parser.write(serialised_response)
 
-        # Retrieve and check the parsed SendJoinResponse
-        parsed_response = parser.finish()
-        self.assertTrue(parsed_response.partial_state)
+            # Retrieve and check the parsed SendJoinResponse
+            parsed_response = parser.finish()
+            return parsed_response.partial_state
 
-    def test_servers_in_room(self) -> None:
-        """Check that the servers_in_room field is correctly parsed"""
-        parser = SendJoinParser(RoomVersions.V1, False)
-        response = {"org.matrix.msc3706.servers_in_room": ["hs1", "hs2"]}
+        self.assertTrue(parse({"members_omitted": True}))
+        self.assertTrue(parse({"org.matrix.msc3706.partial_state": True}))
 
-        serialised_response = json.dumps(response).encode()
+        self.assertFalse(parse({"members_omitted": False}))
+        self.assertFalse(parse({"org.matrix.msc3706.partial_state": False}))
 
-        # Send data to the parser
-        parser.write(serialised_response)
+        # If there's a conflict, the stable field wins.
+        self.assertTrue(
+            parse({"members_omitted": True, "org.matrix.msc3706.partial_state": False})
+        )
+        self.assertFalse(
+            parse({"members_omitted": False, "org.matrix.msc3706.partial_state": True})
+        )
 
-        # Retrieve and check the parsed SendJoinResponse
-        parsed_response = parser.finish()
-        self.assertEqual(parsed_response.servers_in_room, ["hs1", "hs2"])
+    def test_servers_in_room(self) -> None:
+        """Check that the servers_in_room field is correctly parsed"""
+
+        def parse(response: JsonDict) -> Optional[List[str]]:
+            parser = SendJoinParser(RoomVersions.V1, False)
+            serialised_response = json.dumps(response).encode()
+
+            # Send data to the parser
+            parser.write(serialised_response)
+
+            # Retrieve and check the parsed SendJoinResponse
+            parsed_response = parser.finish()
+            return parsed_response.servers_in_room
+
+        self.assertEqual(
+            parse({"org.matrix.msc3706.servers_in_room": ["hs1", "hs2"]}),
+            ["hs1", "hs2"],
+        )
+        self.assertEqual(parse({"servers_in_room": ["example.com"]}), ["example.com"])
+
+        # If both are provided, the stable identifier should win
+        self.assertEqual(
+            parse(
+                {
+                    "org.matrix.msc3706.servers_in_room": ["old"],
+                    "servers_in_room": ["new"],
+                }
+            ),
+            ["new"],
+        )
+
+        # And lastly, we should be able to tell if neither field was present.
+        self.assertEqual(
+            parse({}),
+            None,
+        )
 
     def test_errors_closing_coroutines(self) -> None:
         """Check we close all coroutines, even if closing the first raises an Exception.
-- 
cgit 1.5.1


From 54cd90ea60610a6dc24a291dd0cad4ce9bea8728 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 13 Jan 2023 19:32:10 +0000
Subject: Implement MSC3890: Remotely silence local notifications (#14775)

---
 changelog.d/14775.feature                           |  1 +
 docker/complement/conf/workers-shared-extra.yaml.j2 |  2 ++
 scripts-dev/complement.sh                           |  2 +-
 synapse/config/experimental.py                      | 15 +++++++++++++++
 synapse/handlers/device.py                          | 11 ++++++++++-
 5 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14775.feature

(limited to 'synapse')

diff --git a/changelog.d/14775.feature b/changelog.d/14775.feature
new file mode 100644
index 0000000000..7b7ee42cac
--- /dev/null
+++ b/changelog.d/14775.feature
@@ -0,0 +1 @@
+Implement support for MSC3890: Remotely silence local notifications.
\ No newline at end of file
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index cb839fed07..1170694df5 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -102,6 +102,8 @@ experimental_features:
   {% endif %}
   # Filtering /messages by relation type.
   msc3874_enabled: true
+  # Enable deleting device-specific notification settings stored in account data
+  msc3890_enabled: true
   # Enable removing account data support
   msc3391_enabled: true
 
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 51d1bac618..7c48d8bccb 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -190,7 +190,7 @@ fi
 
 extra_test_args=()
 
-test_tags="synapse_blacklist,msc3787,msc3874,msc3391"
+test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391"
 
 # All environment variables starting with PASS_ will be shared.
 # (The prefix is stripped off before reaching the container.)
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index a8b2db372d..72a17e0616 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -17,6 +17,7 @@ from typing import Any, Optional
 import attr
 
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersions
+from synapse.config import ConfigError
 from synapse.config._base import Config
 from synapse.types import JsonDict
 
@@ -93,6 +94,9 @@ class ExperimentalConfig(Config):
         # MSC2815 (allow room moderators to view redacted event content)
         self.msc2815_enabled: bool = experimental.get("msc2815_enabled", False)
 
+        # MSC3391: Removing account data.
+        self.msc3391_enabled = experimental.get("msc3391_enabled", False)
+
         # MSC3773: Thread notifications
         self.msc3773_enabled: bool = experimental.get("msc3773_enabled", False)
 
@@ -127,6 +131,17 @@ class ExperimentalConfig(Config):
             "msc3886_endpoint", None
         )
 
+        # MSC3890: Remotely silence local notifications
+        # Note: This option requires "experimental_features.msc3391_enabled" to be
+        # set to "true", in order to communicate account data deletions to clients.
+        self.msc3890_enabled: bool = experimental.get("msc3890_enabled", False)
+        if self.msc3890_enabled and not self.msc3391_enabled:
+            raise ConfigError(
+                "Option 'experimental_features.msc3391' must be set to 'true' to "
+                "enable 'experimental_features.msc3890'. MSC3391 functionality is "
+                "required to communicate account data deletions to clients."
+            )
+
         # MSC3912: Relation-based redactions.
         self.msc3912_enabled: bool = experimental.get("msc3912_enabled", False)
 
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 89864e1119..0640ea79a0 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -346,6 +346,7 @@ class DeviceHandler(DeviceWorkerHandler):
         super().__init__(hs)
 
         self.federation_sender = hs.get_federation_sender()
+        self._account_data_handler = hs.get_account_data_handler()
         self._storage_controllers = hs.get_storage_controllers()
 
         self.device_list_updater = DeviceListUpdater(hs, self)
@@ -502,7 +503,7 @@ class DeviceHandler(DeviceWorkerHandler):
             else:
                 raise
 
-        # Delete access tokens and e2e keys for each device. Not optimised as it is not
+        # Delete data specific to each device. Not optimised as it is not
         # considered as part of a critical path.
         for device_id in device_ids:
             await self._auth_handler.delete_access_tokens_for_user(
@@ -512,6 +513,14 @@ class DeviceHandler(DeviceWorkerHandler):
                 user_id=user_id, device_id=device_id
             )
 
+            if self.hs.config.experimental.msc3890_enabled:
+                # Remove any local notification settings for this device in accordance
+                # with MSC3890.
+                await self._account_data_handler.remove_account_data_for_user(
+                    user_id,
+                    f"org.matrix.msc3890.local_notification_settings.{device_id}",
+                )
+
         await self.notify_device_update(user_id, device_ids)
 
     async def update_device(self, user_id: str, device_id: str, content: dict) -> None:
-- 
cgit 1.5.1


From 85a7a201fa460c227562111fba4d3d6aef681e23 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 16 Jan 2023 12:40:25 +0000
Subject: Also use stable name in SendJoinResponse struct (#14841)

* Also use stable name in SendJoinResponse struct

follow-up to #14832

* Changelog

* Fix a rename I missed

* Run black

* Update synapse/federation/federation_client.py

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/14841.misc                    |  1 +
 synapse/federation/federation_client.py   |  6 +++---
 synapse/federation/federation_server.py   |  2 +-
 synapse/federation/transport/client.py    | 16 +++++++++-------
 tests/federation/transport/test_client.py |  6 +++---
 5 files changed, 17 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/14841.misc

(limited to 'synapse')

diff --git a/changelog.d/14841.misc b/changelog.d/14841.misc
new file mode 100644
index 0000000000..61e7401e43
--- /dev/null
+++ b/changelog.d/14841.misc
@@ -0,0 +1 @@
+Faster joins: use stable identifiers from [MSC3706](https://github.com/matrix-org/matrix-spec-proposals/pull/3706).
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 137cfb3346..b7002e8a6c 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -1142,9 +1142,9 @@ class FederationClient(FederationBase):
                     % (auth_chain_create_events,)
                 )
 
-            if response.partial_state and not response.servers_in_room:
+            if response.members_omitted and not response.servers_in_room:
                 raise InvalidResponseError(
-                    "partial_state was set, but no servers were listed in the room"
+                    "members_omitted was set, but no servers were listed in the room"
                 )
 
             return SendJoinResult(
@@ -1152,7 +1152,7 @@ class FederationClient(FederationBase):
                 state=signed_state,
                 auth_chain=signed_auth,
                 origin=destination,
-                partial_state=response.partial_state,
+                partial_state=response.members_omitted,
                 servers_in_room=response.servers_in_room or [],
             )
 
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index c65dbf87fb..3197939a36 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -1502,7 +1502,7 @@ def _get_event_ids_for_partial_state_join(
     prev_state_ids: StateMap[str],
     summary: Dict[str, MemberSummary],
 ) -> Collection[str]:
-    """Calculate state to be retuned in a partial_state send_join
+    """Calculate state to be returned in a partial_state send_join
 
     Args:
         join_event: the join event being send_joined
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index c8471d4cf7..5ec651400a 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -795,7 +795,7 @@ class SendJoinResponse:
     event: Optional[EventBase] = None
 
     # The room state is incomplete
-    partial_state: bool = False
+    members_omitted: bool = False
 
     # List of servers in the room
     servers_in_room: Optional[List[str]] = None
@@ -835,16 +835,18 @@ def _event_list_parser(
 
 
 @ijson.coroutine
-def _partial_state_parser(response: SendJoinResponse) -> Generator[None, Any, None]:
+def _members_omitted_parser(response: SendJoinResponse) -> Generator[None, Any, None]:
     """Helper function for use with `ijson.items_coro`
 
-    Parses the partial_state field in send_join responses
+    Parses the members_omitted field in send_join responses
     """
     while True:
         val = yield
         if not isinstance(val, bool):
-            raise TypeError("partial_state must be a boolean")
-        response.partial_state = val
+            raise TypeError(
+                "members_omitted (formerly org.matrix.msc370c.partial_state) must be a boolean"
+            )
+        response.members_omitted = val
 
 
 @ijson.coroutine
@@ -905,7 +907,7 @@ class SendJoinParser(ByteParser[SendJoinResponse]):
         if not v1_api:
             self._coros.append(
                 ijson.items_coro(
-                    _partial_state_parser(self._response),
+                    _members_omitted_parser(self._response),
                     "org.matrix.msc3706.partial_state",
                     use_float="True",
                 )
@@ -913,7 +915,7 @@ class SendJoinParser(ByteParser[SendJoinResponse]):
             # The stable field name comes last, so it "wins" if the fields disagree
             self._coros.append(
                 ijson.items_coro(
-                    _partial_state_parser(self._response),
+                    _members_omitted_parser(self._response),
                     "members_omitted",
                     use_float="True",
                 )
diff --git a/tests/federation/transport/test_client.py b/tests/federation/transport/test_client.py
index c90635e0a0..3d61b1e8a9 100644
--- a/tests/federation/transport/test_client.py
+++ b/tests/federation/transport/test_client.py
@@ -68,11 +68,11 @@ class SendJoinParserTestCase(TestCase):
         self.assertEqual(len(parsed_response.state), 1, parsed_response)
         self.assertEqual(parsed_response.event_dict, {}, parsed_response)
         self.assertIsNone(parsed_response.event, parsed_response)
-        self.assertFalse(parsed_response.partial_state, parsed_response)
+        self.assertFalse(parsed_response.members_omitted, parsed_response)
         self.assertEqual(parsed_response.servers_in_room, None, parsed_response)
 
     def test_partial_state(self) -> None:
-        """Check that the partial_state flag is correctly parsed"""
+        """Check that the members_omitted flag is correctly parsed"""
 
         def parse(response: JsonDict) -> bool:
             parser = SendJoinParser(RoomVersions.V1, False)
@@ -83,7 +83,7 @@ class SendJoinParserTestCase(TestCase):
 
             # Retrieve and check the parsed SendJoinResponse
             parsed_response = parser.finish()
-            return parsed_response.partial_state
+            return parsed_response.members_omitted
 
         self.assertTrue(parse({"members_omitted": True}))
         self.assertTrue(parse({"org.matrix.msc3706.partial_state": True}))
-- 
cgit 1.5.1


From a302d3ecf75493f84fc5be616fee7d199ed12394 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 16 Jan 2023 13:16:19 +0000
Subject: Remove unnecessary reactor reference from `_PerHostRatelimiter`
 (#14842)

Fix up #14812 to avoid introducing a reference to the reactor.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14842.bugfix          |  1 +
 synapse/rest/client/register.py   |  1 -
 synapse/server.py                 |  1 -
 synapse/util/ratelimitutils.py    | 10 ++--------
 tests/util/test_ratelimitutils.py |  8 ++++----
 5 files changed, 7 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/14842.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14842.bugfix b/changelog.d/14842.bugfix
new file mode 100644
index 0000000000..94e0d70cbc
--- /dev/null
+++ b/changelog.d/14842.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where Synapse would exhaust the stack when processing many federation requests where the remote homeserver has disconencted early.
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index be696c304b..3cb1e7e375 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -310,7 +310,6 @@ class UsernameAvailabilityRestServlet(RestServlet):
         self.hs = hs
         self.registration_handler = hs.get_registration_handler()
         self.ratelimiter = FederationRateLimiter(
-            hs.get_reactor(),
             hs.get_clock(),
             FederationRatelimitSettings(
                 # Time window of 2s
diff --git a/synapse/server.py b/synapse/server.py
index c8752baa5a..f4ab94c4f3 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -768,7 +768,6 @@ class HomeServer(metaclass=abc.ABCMeta):
     @cache_in_self
     def get_federation_ratelimiter(self) -> FederationRateLimiter:
         return FederationRateLimiter(
-            self.get_reactor(),
             self.get_clock(),
             config=self.config.ratelimiting.rc_federation,
             metrics_name="federation_servlets",
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index bd72947bfe..f262bf95a0 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -34,7 +34,6 @@ from prometheus_client.core import Counter
 from typing_extensions import ContextManager
 
 from twisted.internet import defer
-from twisted.internet.interfaces import IReactorTime
 
 from synapse.api.errors import LimitExceededError
 from synapse.config.ratelimiting import FederationRatelimitSettings
@@ -147,14 +146,12 @@ class FederationRateLimiter:
 
     def __init__(
         self,
-        reactor: IReactorTime,
         clock: Clock,
         config: FederationRatelimitSettings,
         metrics_name: Optional[str] = None,
     ):
         """
         Args:
-            reactor
             clock
             config
             metrics_name: The name of the rate limiter so we can differentiate it
@@ -166,7 +163,7 @@ class FederationRateLimiter:
 
         def new_limiter() -> "_PerHostRatelimiter":
             return _PerHostRatelimiter(
-                reactor=reactor, clock=clock, config=config, metrics_name=metrics_name
+                clock=clock, config=config, metrics_name=metrics_name
             )
 
         self.ratelimiters: DefaultDict[
@@ -197,14 +194,12 @@ class FederationRateLimiter:
 class _PerHostRatelimiter:
     def __init__(
         self,
-        reactor: IReactorTime,
         clock: Clock,
         config: FederationRatelimitSettings,
         metrics_name: Optional[str] = None,
     ):
         """
         Args:
-            reactor
             clock
             config
             metrics_name: The name of the rate limiter so we can differentiate it
@@ -212,7 +207,6 @@ class _PerHostRatelimiter:
                 for this rate limiter.
                 from the rest in the metrics
         """
-        self.reactor = reactor
         self.clock = clock
         self.metrics_name = metrics_name
 
@@ -388,4 +382,4 @@ class _PerHostRatelimiter:
             except KeyError:
                 pass
 
-        self.reactor.callLater(0.0, start_next_request)
+        self.clock.call_later(0.0, start_next_request)
diff --git a/tests/util/test_ratelimitutils.py b/tests/util/test_ratelimitutils.py
index 2f3ea15b96..fe4961dcf3 100644
--- a/tests/util/test_ratelimitutils.py
+++ b/tests/util/test_ratelimitutils.py
@@ -30,7 +30,7 @@ class FederationRateLimiterTestCase(TestCase):
         """A simple test with the default values"""
         reactor, clock = get_clock()
         rc_config = build_rc_config()
-        ratelimiter = FederationRateLimiter(reactor, clock, rc_config)
+        ratelimiter = FederationRateLimiter(clock, rc_config)
 
         with ratelimiter.ratelimit("testhost") as d1:
             # shouldn't block
@@ -40,7 +40,7 @@ class FederationRateLimiterTestCase(TestCase):
         """Test what happens when we hit the concurrent limit"""
         reactor, clock = get_clock()
         rc_config = build_rc_config({"rc_federation": {"concurrent": 2}})
-        ratelimiter = FederationRateLimiter(reactor, clock, rc_config)
+        ratelimiter = FederationRateLimiter(clock, rc_config)
 
         with ratelimiter.ratelimit("testhost") as d1:
             # shouldn't block
@@ -67,7 +67,7 @@ class FederationRateLimiterTestCase(TestCase):
         rc_config = build_rc_config(
             {"rc_federation": {"sleep_limit": 2, "sleep_delay": 500}}
         )
-        ratelimiter = FederationRateLimiter(reactor, clock, rc_config)
+        ratelimiter = FederationRateLimiter(clock, rc_config)
 
         with ratelimiter.ratelimit("testhost") as d1:
             # shouldn't block
@@ -98,7 +98,7 @@ class FederationRateLimiterTestCase(TestCase):
                 }
             }
         )
-        ratelimiter = FederationRateLimiter(reactor, clock, rc_config)
+        ratelimiter = FederationRateLimiter(clock, rc_config)
 
         with ratelimiter.ratelimit("testhost") as d:
             # shouldn't block
-- 
cgit 1.5.1


From 4db3331bb95a655bb56ab8333be49ee183f71715 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 16 Jan 2023 14:20:12 +0000
Subject: Add an early return when handling no-op presence updates. (#14855)

This stops us from incrementing the presence stream position for no-op updates.
---
 changelog.d/14855.misc       | 1 +
 synapse/handlers/presence.py | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 changelog.d/14855.misc

(limited to 'synapse')

diff --git a/changelog.d/14855.misc b/changelog.d/14855.misc
new file mode 100644
index 0000000000..f0e292f287
--- /dev/null
+++ b/changelog.d/14855.misc
@@ -0,0 +1 @@
+Add an early return when handling no-op presence updates.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 2af90b25a3..43e4e7b1b4 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -2155,6 +2155,11 @@ class PresenceFederationQueue:
         # This should only be called on a presence writer.
         assert self._presence_writer
 
+        if not states or not destinations:
+            # Ignore calls which either don't have any new states or don't need
+            # to be sent anywhere.
+            return
+
         if self._federation:
             self._federation.send_presence_to_destinations(
                 states=states,
-- 
cgit 1.5.1


From db5145a31d8ed76ac637f933f4facc195d557f75 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 16 Jan 2023 23:15:17 +0000
Subject: Add parameter to control whether we do a partial state join (#14843)

When the local homeserver is already joined to a room and wants to
perform another remote join, we may find it useful to do a non-partial
state join if we already have the full state for the room.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14843.misc                  |  1 +
 synapse/federation/federation_client.py | 21 ++++++++++++++++++---
 synapse/federation/transport/client.py  |  7 +++++--
 3 files changed, 24 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/14843.misc

(limited to 'synapse')

diff --git a/changelog.d/14843.misc b/changelog.d/14843.misc
new file mode 100644
index 0000000000..bec3c216bc
--- /dev/null
+++ b/changelog.d/14843.misc
@@ -0,0 +1 @@
+Add a parameter to control whether the federation client performs a partial state join.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index b7002e8a6c..15a9a88302 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -1014,7 +1014,11 @@ class FederationClient(FederationBase):
         )
 
     async def send_join(
-        self, destinations: Iterable[str], pdu: EventBase, room_version: RoomVersion
+        self,
+        destinations: Iterable[str],
+        pdu: EventBase,
+        room_version: RoomVersion,
+        partial_state: bool = True,
     ) -> SendJoinResult:
         """Sends a join event to one of a list of homeservers.
 
@@ -1027,6 +1031,10 @@ class FederationClient(FederationBase):
             pdu: event to be sent
             room_version: the version of the room (according to the server that
                 did the make_join)
+            partial_state: whether to ask the remote server to omit membership state
+                events from the response. If the remote server complies,
+                `partial_state` in the send join result will be set. Defaults to
+                `True`.
 
         Returns:
             The result of the send join request.
@@ -1037,7 +1045,9 @@ class FederationClient(FederationBase):
         """
 
         async def send_request(destination: str) -> SendJoinResult:
-            response = await self._do_send_join(room_version, destination, pdu)
+            response = await self._do_send_join(
+                room_version, destination, pdu, omit_members=partial_state
+            )
 
             # If an event was returned (and expected to be returned):
             #
@@ -1177,7 +1187,11 @@ class FederationClient(FederationBase):
         )
 
     async def _do_send_join(
-        self, room_version: RoomVersion, destination: str, pdu: EventBase
+        self,
+        room_version: RoomVersion,
+        destination: str,
+        pdu: EventBase,
+        omit_members: bool,
     ) -> SendJoinResponse:
         time_now = self._clock.time_msec()
 
@@ -1188,6 +1202,7 @@ class FederationClient(FederationBase):
                 room_id=pdu.room_id,
                 event_id=pdu.event_id,
                 content=pdu.get_pdu_json(time_now),
+                omit_members=omit_members,
             )
         except HttpResponseException as e:
             # If an error is received that is due to an unrecognised endpoint,
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 5ec651400a..556883f079 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -351,13 +351,16 @@ class TransportLayerClient:
         room_id: str,
         event_id: str,
         content: JsonDict,
+        omit_members: bool,
     ) -> "SendJoinResponse":
         path = _create_v2_path("/send_join/%s/%s", room_id, event_id)
         query_params: Dict[str, str] = {}
         if self._faster_joins_enabled:
             # lazy-load state on join
-            query_params["org.matrix.msc3706.partial_state"] = "true"
-            query_params["omit_members"] = "true"
+            query_params["org.matrix.msc3706.partial_state"] = (
+                "true" if omit_members else "false"
+            )
+            query_params["omit_members"] = "true" if omit_members else "false"
 
         return await self.client.put_json(
             destination=destination,
-- 
cgit 1.5.1


From 2b084c5b710d9630178484e6ade597ca7fa814b6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 17 Jan 2023 09:29:58 +0000
Subject: Merge device list replication streams (#14833)

---
 changelog.d/14826.misc                      |  2 +-
 changelog.d/14833.misc                      |  1 +
 docs/upgrade.md                             |  9 ++--
 synapse/replication/tcp/client.py           |  8 +++-
 synapse/replication/tcp/streams/__init__.py |  3 --
 synapse/replication/tcp/streams/_base.py    | 74 ++++++++++++++++++++---------
 synapse/storage/databases/main/devices.py   | 13 ++---
 7 files changed, 72 insertions(+), 38 deletions(-)
 create mode 100644 changelog.d/14833.misc

(limited to 'synapse')

diff --git a/changelog.d/14826.misc b/changelog.d/14826.misc
index 9ebedcf51e..e80673a721 100644
--- a/changelog.d/14826.misc
+++ b/changelog.d/14826.misc
@@ -1 +1 @@
-Merge tag and normal account data replication streams.
+Merge the two account data and the two device list replication streams.
diff --git a/changelog.d/14833.misc b/changelog.d/14833.misc
new file mode 100644
index 0000000000..e80673a721
--- /dev/null
+++ b/changelog.d/14833.misc
@@ -0,0 +1 @@
+Merge the two account data and the two device list replication streams.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 8a76172e43..270c33b656 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -92,12 +92,13 @@ process, for example:
 
 ## Changes to the account data replication streams
 
-Synapse has changed the format of the account data replication streams (between
-workers). This is a forwards- and backwards-incompatible change: v1.75 workers
-cannot process account data replicated by v1.76 workers, and vice versa.
+Synapse has changed the format of the account data and devices replication
+streams (between workers). This is a forwards- and backwards-incompatible
+change: v1.75 workers cannot process account data replicated by v1.76 workers,
+and vice versa.
 
 Once all workers are upgraded to v1.76 (or downgraded to v1.75), account data
-replication will resume as normal.
+and device replication will resume as normal.
 
 
 # Upgrading to v1.74.0
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 7263bb2796..31022ce5fb 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -187,7 +187,7 @@ class ReplicationDataHandler:
         elif stream_name == DeviceListsStream.NAME:
             all_room_ids: Set[str] = set()
             for row in rows:
-                if row.entity.startswith("@"):
+                if row.entity.startswith("@") and not row.is_signature:
                     room_ids = await self.store.get_rooms_for_user(row.entity)
                     all_room_ids.update(room_ids)
             self.notifier.on_new_event(
@@ -422,7 +422,11 @@ class FederationSenderHandler:
             # The entities are either user IDs (starting with '@') whose devices
             # have changed, or remote servers that we need to tell about
             # changes.
-            hosts = {row.entity for row in rows if not row.entity.startswith("@")}
+            hosts = {
+                row.entity
+                for row in rows
+                if not row.entity.startswith("@") and not row.is_signature
+            }
             for host in hosts:
                 self.federation_sender.send_device_messages(host, immediate=False)
 
diff --git a/synapse/replication/tcp/streams/__init__.py b/synapse/replication/tcp/streams/__init__.py
index a7eadfa3c9..9c67f661a3 100644
--- a/synapse/replication/tcp/streams/__init__.py
+++ b/synapse/replication/tcp/streams/__init__.py
@@ -37,7 +37,6 @@ from synapse.replication.tcp.streams._base import (
     Stream,
     ToDeviceStream,
     TypingStream,
-    UserSignatureStream,
 )
 from synapse.replication.tcp.streams.events import EventsStream
 from synapse.replication.tcp.streams.federation import FederationStream
@@ -62,7 +61,6 @@ STREAMS_MAP = {
         ToDeviceStream,
         FederationStream,
         AccountDataStream,
-        UserSignatureStream,
         UnPartialStatedRoomStream,
         UnPartialStatedEventStream,
     )
@@ -82,7 +80,6 @@ __all__ = [
     "DeviceListsStream",
     "ToDeviceStream",
     "AccountDataStream",
-    "UserSignatureStream",
     "UnPartialStatedRoomStream",
     "UnPartialStatedEventStream",
 ]
diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py
index fbf78da9c2..a4bdb48c0c 100644
--- a/synapse/replication/tcp/streams/_base.py
+++ b/synapse/replication/tcp/streams/_base.py
@@ -463,18 +463,67 @@ class DeviceListsStream(Stream):
     @attr.s(slots=True, frozen=True, auto_attribs=True)
     class DeviceListsStreamRow:
         entity: str
+        # Indicates that a user has signed their own device with their user-signing key
+        is_signature: bool
 
     NAME = "device_lists"
     ROW_TYPE = DeviceListsStreamRow
 
     def __init__(self, hs: "HomeServer"):
-        store = hs.get_datastores().main
+        self.store = hs.get_datastores().main
         super().__init__(
             hs.get_instance_name(),
-            current_token_without_instance(store.get_device_stream_token),
-            store.get_all_device_list_changes_for_remotes,
+            current_token_without_instance(self.store.get_device_stream_token),
+            self._update_function,
+        )
+
+    async def _update_function(
+        self,
+        instance_name: str,
+        from_token: Token,
+        current_token: Token,
+        target_row_count: int,
+    ) -> StreamUpdateResult:
+        (
+            device_updates,
+            devices_to_token,
+            devices_limited,
+        ) = await self.store.get_all_device_list_changes_for_remotes(
+            instance_name, from_token, current_token, target_row_count
         )
 
+        (
+            signatures_updates,
+            signatures_to_token,
+            signatures_limited,
+        ) = await self.store.get_all_user_signature_changes_for_remotes(
+            instance_name, from_token, current_token, target_row_count
+        )
+
+        upper_limit_token = current_token
+        if devices_limited:
+            upper_limit_token = min(upper_limit_token, devices_to_token)
+        if signatures_limited:
+            upper_limit_token = min(upper_limit_token, signatures_to_token)
+
+        device_updates = [
+            (stream_id, (entity, False))
+            for stream_id, (entity,) in device_updates
+            if stream_id <= upper_limit_token
+        ]
+
+        signatures_updates = [
+            (stream_id, (entity, True))
+            for stream_id, (entity,) in signatures_updates
+            if stream_id <= upper_limit_token
+        ]
+
+        updates = list(
+            heapq.merge(device_updates, signatures_updates, key=lambda row: row[0])
+        )
+
+        return updates, upper_limit_token, devices_limited or signatures_limited
+
 
 class ToDeviceStream(Stream):
     """New to_device messages for a client"""
@@ -583,22 +632,3 @@ class AccountDataStream(Stream):
             heapq.merge(room_rows, global_rows, tag_rows, key=lambda row: row[0])
         )
         return updates, to_token, limited
-
-
-class UserSignatureStream(Stream):
-    """A user has signed their own device with their user-signing key"""
-
-    @attr.s(slots=True, frozen=True, auto_attribs=True)
-    class UserSignatureStreamRow:
-        user_id: str
-
-    NAME = "user_signature"
-    ROW_TYPE = UserSignatureStreamRow
-
-    def __init__(self, hs: "HomeServer"):
-        store = hs.get_datastores().main
-        super().__init__(
-            hs.get_instance_name(),
-            current_token_without_instance(store.get_device_stream_token),
-            store.get_all_user_signature_changes_for_remotes,
-        )
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index b067664473..cd186c8472 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -38,7 +38,7 @@ from synapse.logging.opentracing import (
     whitelisted_homeserver,
 )
 from synapse.metrics.background_process_metrics import wrap_as_background_process
-from synapse.replication.tcp.streams._base import DeviceListsStream, UserSignatureStream
+from synapse.replication.tcp.streams._base import DeviceListsStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
@@ -163,9 +163,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     ) -> None:
         if stream_name == DeviceListsStream.NAME:
             self._invalidate_caches_for_devices(token, rows)
-        elif stream_name == UserSignatureStream.NAME:
-            for row in rows:
-                self._user_signature_stream_cache.entity_has_changed(row.user_id, token)
+
         return super().process_replication_rows(stream_name, instance_name, token, rows)
 
     def process_replication_position(
@@ -173,14 +171,17 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     ) -> None:
         if stream_name == DeviceListsStream.NAME:
             self._device_list_id_gen.advance(instance_name, token)
-        elif stream_name == UserSignatureStream.NAME:
-            self._device_list_id_gen.advance(instance_name, token)
+
         super().process_replication_position(stream_name, instance_name, token)
 
     def _invalidate_caches_for_devices(
         self, token: int, rows: Iterable[DeviceListsStream.DeviceListsStreamRow]
     ) -> None:
         for row in rows:
+            if row.is_signature:
+                self._user_signature_stream_cache.entity_has_changed(row.entity, token)
+                continue
+
             # The entities are either user IDs (starting with '@') whose devices
             # have changed, or remote servers that we need to tell about
             # changes.
-- 
cgit 1.5.1


From 316590d1ea273115a9e7925236e02d577a231de4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 17 Jan 2023 09:58:22 +0000
Subject: Fix bug in `wait_for_stream_position`  (#14856)

We were incorrectly checking if the *local* token had been advanced, rather than the token for the remote instance.

In practice, I don't think this has caused any bugs due to where we use `wait_for_stream_position`, as critically we don't use it on instances that also write to the given streams (and so the local token will lag behind all remote tokens).
---
 changelog.d/14856.misc                |  1 +
 synapse/replication/tcp/client.py     |  2 +-
 tests/replication/tcp/test_handler.py | 78 +++++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14856.misc

(limited to 'synapse')

diff --git a/changelog.d/14856.misc b/changelog.d/14856.misc
new file mode 100644
index 0000000000..3731d6cbf1
--- /dev/null
+++ b/changelog.d/14856.misc
@@ -0,0 +1 @@
+Fix `wait_for_stream_position` to correctly wait for the right instance to advance its token.
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 31022ce5fb..322d695bc7 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -325,7 +325,7 @@ class ReplicationDataHandler:
             # anyway in that case we don't need to wait.
             return
 
-        current_position = self._streams[stream_name].current_token(self._instance_name)
+        current_position = self._streams[stream_name].current_token(instance_name)
         if position <= current_position:
             # We're already past the position
             return
diff --git a/tests/replication/tcp/test_handler.py b/tests/replication/tcp/test_handler.py
index 1e299d2d67..555922409d 100644
--- a/tests/replication/tcp/test_handler.py
+++ b/tests/replication/tcp/test_handler.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.internet import defer
+
+from synapse.replication.tcp.commands import PositionCommand, RdataCommand
+
 from tests.replication._base import BaseMultiWorkerStreamTestCase
 
 
@@ -71,3 +75,77 @@ class ChannelsTestCase(BaseMultiWorkerStreamTestCase):
         self.assertEqual(
             len(self._redis_server._subscribers_by_channel[b"test/USER_IP"]), 1
         )
+
+    def test_wait_for_stream_position(self) -> None:
+        """Check that wait for stream position correctly waits for an update from the
+        correct instance.
+        """
+        store = self.hs.get_datastores().main
+        cmd_handler = self.hs.get_replication_command_handler()
+        data_handler = self.hs.get_replication_data_handler()
+
+        worker1 = self.make_worker_hs(
+            "synapse.app.generic_worker",
+            extra_config={
+                "worker_name": "worker1",
+                "run_background_tasks_on": "worker1",
+                "redis": {"enabled": True},
+            },
+        )
+
+        cache_id_gen = worker1.get_datastores().main._cache_id_gen
+        assert cache_id_gen is not None
+
+        self.replicate()
+
+        # First, make sure the master knows that `worker1` exists.
+        initial_token = cache_id_gen.get_current_token()
+        cmd_handler.send_command(
+            PositionCommand("caches", "worker1", initial_token, initial_token)
+        )
+        self.replicate()
+
+        # Next send out a normal RDATA, and check that waiting for that stream
+        # ID returns immediately.
+        ctx = cache_id_gen.get_next()
+        next_token = self.get_success(ctx.__aenter__())
+        self.get_success(ctx.__aexit__(None, None, None))
+
+        cmd_handler.send_command(
+            RdataCommand("caches", "worker1", next_token, ("func_name", [], 0))
+        )
+        self.replicate()
+
+        self.get_success(
+            data_handler.wait_for_stream_position("worker1", "caches", next_token)
+        )
+
+        # `wait_for_stream_position` should only return once master receives an
+        # RDATA from the worker
+        ctx = cache_id_gen.get_next()
+        next_token = self.get_success(ctx.__aenter__())
+        self.get_success(ctx.__aexit__(None, None, None))
+
+        d = defer.ensureDeferred(
+            data_handler.wait_for_stream_position("worker1", "caches", next_token)
+        )
+        self.assertFalse(d.called)
+
+        # ... updating the cache ID gen on the master still shouldn't cause the
+        # deferred to wake up.
+        ctx = store._cache_id_gen.get_next()
+        self.get_success(ctx.__aenter__())
+        self.get_success(ctx.__aexit__(None, None, None))
+
+        d = defer.ensureDeferred(
+            data_handler.wait_for_stream_position("worker1", "caches", next_token)
+        )
+        self.assertFalse(d.called)
+
+        # ... but receiving the RDATA should
+        cmd_handler.send_command(
+            RdataCommand("caches", "worker1", next_token, ("func_name", [], 0))
+        )
+        self.replicate()
+
+        self.assertTrue(d.called)
-- 
cgit 1.5.1


From 5b3af1c7d0c5a8901fada7648136f186726fd135 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 17 Jan 2023 12:44:15 +0000
Subject: Stabilise serving partial join responses (#14839)

Serving partial join responses is no longer experimental. They will only be served under the stable identifier if the the undocumented config flag experimental.msc3706_enabled is set to true.

Synapse continues to request a partial join only if the undocumented config flag experimental.faster_joins is set to true; this setting remains present and unaffected.
---
 changelog.d/14839.feature                           |  1 +
 docker/complement/conf/workers-shared-extra.yaml.j2 |  2 --
 synapse/config/experimental.py                      |  6 +++++-
 synapse/federation/transport/server/federation.py   | 21 ++++++++++-----------
 tests/federation/test_federation_server.py          |  3 +--
 5 files changed, 17 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/14839.feature

(limited to 'synapse')

diff --git a/changelog.d/14839.feature b/changelog.d/14839.feature
new file mode 100644
index 0000000000..a4206be007
--- /dev/null
+++ b/changelog.d/14839.feature
@@ -0,0 +1 @@
+Faster joins: always serve a partial join response to servers that request it with the stable query param.
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index 1170694df5..7e9ec23808 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -94,8 +94,6 @@ allow_device_name_lookup_over_federation: true
 experimental_features:
   # Enable history backfilling support
   msc2716_enabled: true
-  # server-side support for partial state in /send_join responses
-  msc3706_enabled: true
   {% if not workers_in_use %}
   # client-side support for partial state in /send_join responses
   faster_joins: true
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 72a17e0616..0444ef8244 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -75,11 +75,15 @@ class ExperimentalConfig(Config):
         )
 
         # MSC3706 (server-side support for partial state in /send_join responses)
+        # Synapse will always serve partial state responses to requests using the stable
+        # query parameter `omit_members`. If this flag is set, Synapse will also serve
+        # partial state responses to requests using the unstable query parameter
+        # `org.matrix.msc3706.partial_state`.
         self.msc3706_enabled: bool = experimental.get("msc3706_enabled", False)
 
         # experimental support for faster joins over federation
         # (MSC2775, MSC3706, MSC3895)
-        # requires a target server with msc3706_enabled enabled.
+        # requires a target server that can provide a partial join response (MSC3706)
         self.faster_joins_enabled: bool = experimental.get("faster_joins", False)
 
         # MSC3720 (Account status endpoint)
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index c0a700905b..17c427387e 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -422,7 +422,7 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet):
         server_name: str,
     ):
         super().__init__(hs, authenticator, ratelimiter, server_name)
-        self._msc3706_enabled = hs.config.experimental.msc3706_enabled
+        self._read_msc3706_query_param = hs.config.experimental.msc3706_enabled
 
     async def on_PUT(
         self,
@@ -436,16 +436,15 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet):
         #   match those given in content
 
         partial_state = False
-        if self._msc3706_enabled:
-            # The stable query parameter wins, if it disagrees with the unstable
-            # parameter for some reason.
-            stable_param = parse_boolean_from_args(query, "omit_members", default=None)
-            if stable_param is not None:
-                partial_state = stable_param
-            else:
-                partial_state = parse_boolean_from_args(
-                    query, "org.matrix.msc3706.partial_state", default=False
-                )
+        # The stable query parameter wins, if it disagrees with the unstable
+        # parameter for some reason.
+        stable_param = parse_boolean_from_args(query, "omit_members", default=None)
+        if stable_param is not None:
+            partial_state = stable_param
+        elif self._read_msc3706_query_param:
+            partial_state = parse_boolean_from_args(
+                query, "org.matrix.msc3706.partial_state", default=False
+            )
 
         result = await self.handler.on_send_join_request(
             origin, content, room_id, caller_supports_partial_state=partial_state
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index 27770304be..be719e49c0 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -211,9 +211,8 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase):
         )
         self.assertEqual(r[("m.room.member", joining_user)].membership, "join")
 
-    @override_config({"experimental_features": {"msc3706_enabled": True}})
     def test_send_join_partial_state(self) -> None:
-        """When MSC3706 support is enabled, /send_join should return partial state"""
+        """/send_join should return partial state, if requested"""
         joining_user = "@misspiggy:" + self.OTHER_SERVER_NAME
         join_result = self._make_join(joining_user)
 
-- 
cgit 1.5.1


From 4d6b1d3c47387466d34abb98613ca0d240057e24 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 18 Jan 2023 09:27:57 -0500
Subject: Properly check for frozendicts in event auth code. (#14864)

Check for for an instance of a mapping instead of a dict.

This only affects room version 10 when frozen events are enabled.
---
 changelog.d/14864.bugfix | 1 +
 synapse/event_auth.py    | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14864.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14864.bugfix b/changelog.d/14864.bugfix
new file mode 100644
index 0000000000..12c0c74ab3
--- /dev/null
+++ b/changelog.d/14864.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.64.0 when using room version 10 with frozen events enabled.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index d437b7e5d1..c4a7b16413 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import collections.abc
 import logging
 import typing
 from typing import (
@@ -877,7 +878,7 @@ def _check_power_levels(
                 if not isinstance(v, int):
                     raise SynapseError(400, f"{v!r} must be an integer.")
             if k in {"events", "notifications", "users"}:
-                if not isinstance(v, dict) or not all(
+                if not isinstance(v, collections.abc.Mapping) or not all(
                     isinstance(v, int) for v in v.values()
                 ):
                     raise SynapseError(
-- 
cgit 1.5.1


From e8f2bf5c40c27e68e5983ebbd1fc0281bc45bf5f Mon Sep 17 00:00:00 2001
From: Catalan Lover <48515417+FSG-Cat@users.noreply.github.com>
Date: Wed, 18 Jan 2023 19:59:48 +0100
Subject: Change default room version to 10. Implements MSC3904 (#14111)

* Change Documentation to have v10 as default room version

* Change Default Room version to 10

* Add changelog entry for default room version swap

* Add changelog entry for v10 default room version in docs

* Clarify doc changelog entry

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>

* Improve Documentation changes.

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>

* Update Changelog entry to have correct format

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>

* Update Spec Version to 1.5

* Only need 1 changelog.

* Fix test.

* Update "Changed in" line

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
Co-authored-by: Patrick Cloke <patrickc@matrix.org>
---
 changelog.d/14111.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  4 +++-
 synapse/config/server.py                         |  2 +-
 tests/rest/client/test_upgrade_room.py           | 12 +++++++++---
 4 files changed, 14 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/14111.feature

(limited to 'synapse')

diff --git a/changelog.d/14111.feature b/changelog.d/14111.feature
new file mode 100644
index 0000000000..0a794701a7
--- /dev/null
+++ b/changelog.d/14111.feature
@@ -0,0 +1 @@
+Update the default room version to [v10](https://spec.matrix.org/v1.5/rooms/v10/) ([MSC 3904](https://github.com/matrix-org/matrix-spec-proposals/pull/3904)). Contributed by @FSG-Cat.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 3481e866f7..2883f76a26 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -295,7 +295,9 @@ Known room versions are listed [here](https://spec.matrix.org/latest/rooms/#comp
 For example, for room version 1, `default_room_version` should be set
 to "1".
 
-Currently defaults to "9".
+Currently defaults to ["10"](https://spec.matrix.org/v1.5/rooms/v10/).
+
+_Changed in Synapse 1.76:_ the default version room version was increased from [9](https://spec.matrix.org/v1.5/rooms/v9/) to [10](https://spec.matrix.org/v1.5/rooms/v10/).
 
 Example configuration:
 ```yaml
diff --git a/synapse/config/server.py b/synapse/config/server.py
index ec46ca63ad..80bcfa4080 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -151,7 +151,7 @@ DEFAULT_IP_RANGE_BLACKLIST = [
     "fec0::/10",
 ]
 
-DEFAULT_ROOM_VERSION = "9"
+DEFAULT_ROOM_VERSION = "10"
 
 ROOM_COMPLEXITY_TOO_GREAT = (
     "Your homeserver is unable to join rooms this large or complex. "
diff --git a/tests/rest/client/test_upgrade_room.py b/tests/rest/client/test_upgrade_room.py
index 5e7bf97482..5ec343dd7f 100644
--- a/tests/rest/client/test_upgrade_room.py
+++ b/tests/rest/client/test_upgrade_room.py
@@ -199,9 +199,15 @@ class UpgradeRoomTest(unittest.HomeserverTestCase):
 
     def test_stringy_power_levels(self) -> None:
         """The room upgrade converts stringy power levels to proper integers."""
+        # Create a room on room version < 10.
+        room_id = self.helper.create_room_as(
+            self.creator, tok=self.creator_token, room_version="9"
+        )
+        self.helper.join(room_id, self.other, tok=self.other_token)
+
         # Retrieve the room's current power levels.
         power_levels = self.helper.get_state(
-            self.room_id,
+            room_id,
             "m.room.power_levels",
             tok=self.creator_token,
         )
@@ -217,14 +223,14 @@ class UpgradeRoomTest(unittest.HomeserverTestCase):
             # conscience, we ought to ensure it's upgrading from a sufficiently old
             # version of room.
             self.helper.send_state(
-                self.room_id,
+                room_id,
                 "m.room.power_levels",
                 body=power_levels,
                 tok=self.creator_token,
             )
 
         # Upgrade the room. Check the homeserver reports success.
-        channel = self._upgrade_room()
+        channel = self._upgrade_room(room_id=room_id)
         self.assertEqual(200, channel.code, channel.result)
 
         # Extract the new room ID.
-- 
cgit 1.5.1


From 9187fd940e2b2bbfd4df7204053cc26b2707aad4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 18 Jan 2023 19:35:29 +0000
Subject: Wait for streams to catch up when processing HTTP replication.
 (#14820)

This should hopefully mitigate a class of races where data gets out of
sync due a HTTP replication request racing with the replication streams.
---
 changelog.d/14820.bugfix                 |  1 +
 synapse/handlers/federation_event.py     |  4 ++
 synapse/replication/http/_base.py        | 97 +++++++++++++++++++++++++++++---
 synapse/replication/http/account_data.py | 29 +++++-----
 synapse/replication/http/devices.py      | 10 +---
 synapse/replication/http/federation.py   | 28 +++------
 synapse/replication/http/login.py        |  5 +-
 synapse/replication/http/membership.py   | 22 ++++----
 synapse/replication/http/presence.py     |  7 +--
 synapse/replication/http/push.py         |  5 +-
 synapse/replication/http/register.py     |  9 +--
 synapse/replication/http/send_event.py   |  5 +-
 synapse/replication/http/send_events.py  |  4 +-
 synapse/replication/http/state.py        |  2 +-
 synapse/replication/http/streams.py      |  6 +-
 synapse/replication/tcp/client.py        | 25 +++++++-
 synapse/replication/tcp/resource.py      | 43 +++++++-------
 synapse/storage/util/id_generators.py    | 34 ++++++-----
 synapse/types/__init__.py                |  6 ++
 tests/replication/http/test__base.py     |  9 +--
 tests/storage/test_id_generators.py      | 20 +++----
 21 files changed, 226 insertions(+), 145 deletions(-)
 create mode 100644 changelog.d/14820.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14820.bugfix b/changelog.d/14820.bugfix
new file mode 100644
index 0000000000..36e94f2b9b
--- /dev/null
+++ b/changelog.d/14820.bugfix
@@ -0,0 +1 @@
+Fix rare races when using workers.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 6df000faaf..904a721483 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -2259,6 +2259,10 @@ class FederationEventHandler:
                 event_and_contexts, backfilled=backfilled
             )
 
+            # After persistence we always need to notify replication there may
+            # be new data.
+            self._notifier.notify_replication()
+
             if self._ephemeral_messages_enabled:
                 for event in events:
                     # If there's an expiry timestamp on the event, schedule its expiry.
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index 3f4d3fc51a..709327b97f 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -17,7 +17,7 @@ import logging
 import re
 import urllib.parse
 from inspect import signature
-from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, List, Tuple
+from typing import TYPE_CHECKING, Any, Awaitable, Callable, ClassVar, Dict, List, Tuple
 
 from prometheus_client import Counter, Gauge
 
@@ -27,6 +27,7 @@ from twisted.web.server import Request
 from synapse.api.errors import HttpResponseException, SynapseError
 from synapse.http import RequestTimedOutError
 from synapse.http.server import HttpServer
+from synapse.http.servlet import parse_json_object_from_request
 from synapse.http.site import SynapseRequest
 from synapse.logging import opentracing
 from synapse.logging.opentracing import trace_with_opname
@@ -53,6 +54,9 @@ _outgoing_request_counter = Counter(
 )
 
 
+_STREAM_POSITION_KEY = "_INT_STREAM_POS"
+
+
 class ReplicationEndpoint(metaclass=abc.ABCMeta):
     """Helper base class for defining new replication HTTP endpoints.
 
@@ -94,6 +98,9 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
             a connection error is received.
         RETRY_ON_CONNECT_ERROR_ATTEMPTS (int): Number of attempts to retry when
             receiving connection errors, each will backoff exponentially longer.
+        WAIT_FOR_STREAMS (bool): Whether to wait for replication streams to
+            catch up before processing the request and/or response. Defaults to
+            True.
     """
 
     NAME: str = abc.abstractproperty()  # type: ignore
@@ -104,6 +111,8 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
     RETRY_ON_CONNECT_ERROR = True
     RETRY_ON_CONNECT_ERROR_ATTEMPTS = 5  # =63s (2^6-1)
 
+    WAIT_FOR_STREAMS: ClassVar[bool] = True
+
     def __init__(self, hs: "HomeServer"):
         if self.CACHE:
             self.response_cache: ResponseCache[str] = ResponseCache(
@@ -126,6 +135,10 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
         if hs.config.worker.worker_replication_secret:
             self._replication_secret = hs.config.worker.worker_replication_secret
 
+        self._streams = hs.get_replication_command_handler().get_streams_to_replicate()
+        self._replication = hs.get_replication_data_handler()
+        self._instance_name = hs.get_instance_name()
+
     def _check_auth(self, request: Request) -> None:
         # Get the authorization header.
         auth_headers = request.requestHeaders.getRawHeaders(b"Authorization")
@@ -160,7 +173,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
 
     @abc.abstractmethod
     async def _handle_request(
-        self, request: Request, **kwargs: Any
+        self, request: Request, content: JsonDict, **kwargs: Any
     ) -> Tuple[int, JsonDict]:
         """Handle incoming request.
 
@@ -201,6 +214,10 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
 
         @trace_with_opname("outgoing_replication_request")
         async def send_request(*, instance_name: str = "master", **kwargs: Any) -> Any:
+            # We have to pull these out here to avoid circular dependencies...
+            streams = hs.get_replication_command_handler().get_streams_to_replicate()
+            replication = hs.get_replication_data_handler()
+
             with outgoing_gauge.track_inprogress():
                 if instance_name == local_instance_name:
                     raise Exception("Trying to send HTTP request to self")
@@ -219,6 +236,24 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
 
                 data = await cls._serialize_payload(**kwargs)
 
+                if cls.METHOD != "GET" and cls.WAIT_FOR_STREAMS:
+                    # Include the current stream positions that we write to. We
+                    # don't do this for GETs as they don't have a body, and we
+                    # generally assume that a GET won't rely on data we have
+                    # written.
+                    if _STREAM_POSITION_KEY in data:
+                        raise Exception(
+                            "data to send contains %r key", _STREAM_POSITION_KEY
+                        )
+
+                    data[_STREAM_POSITION_KEY] = {
+                        "streams": {
+                            stream.NAME: stream.current_token(local_instance_name)
+                            for stream in streams
+                        },
+                        "instance_name": local_instance_name,
+                    }
+
                 url_args = [
                     urllib.parse.quote(kwargs[name], safe="") for name in cls.PATH_ARGS
                 ]
@@ -308,6 +343,18 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                     ) from e
 
                 _outgoing_request_counter.labels(cls.NAME, 200).inc()
+
+                # Wait on any streams that the remote may have written to.
+                for stream_name, position in result.get(
+                    _STREAM_POSITION_KEY, {}
+                ).items():
+                    await replication.wait_for_stream_position(
+                        instance_name=instance_name,
+                        stream_name=stream_name,
+                        position=position,
+                        raise_on_timeout=False,
+                    )
+
                 return result
 
         return send_request
@@ -353,6 +400,23 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
         if self._replication_secret:
             self._check_auth(request)
 
+        if self.METHOD == "GET":
+            # GET APIs always have an empty body.
+            content = {}
+        else:
+            content = parse_json_object_from_request(request)
+
+        # Wait on any streams that the remote may have written to.
+        for stream_name, position in content.get(_STREAM_POSITION_KEY, {"streams": {}})[
+            "streams"
+        ].items():
+            await self._replication.wait_for_stream_position(
+                instance_name=content[_STREAM_POSITION_KEY]["instance_name"],
+                stream_name=stream_name,
+                position=position,
+                raise_on_timeout=False,
+            )
+
         if self.CACHE:
             txn_id = kwargs.pop("txn_id")
 
@@ -361,13 +425,28 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
             # correctly yet. In particular, there may be issues to do with logging
             # context lifetimes.
 
-            return await self.response_cache.wrap(
-                txn_id, self._handle_request, request, **kwargs
+            code, response = await self.response_cache.wrap(
+                txn_id, self._handle_request, request, content, **kwargs
             )
+        else:
+            # The `@cancellable` decorator may be applied to `_handle_request`. But we
+            # told `HttpServer.register_paths` that our handler is `_check_auth_and_handle`,
+            # so we have to set up the cancellable flag ourselves.
+            request.is_render_cancellable = is_function_cancellable(
+                self._handle_request
+            )
+
+            code, response = await self._handle_request(request, content, **kwargs)
+
+        # Return streams we may have written to in the course of processing this
+        # request.
+        if _STREAM_POSITION_KEY in response:
+            raise Exception("data to send contains %r key", _STREAM_POSITION_KEY)
 
-        # The `@cancellable` decorator may be applied to `_handle_request`. But we
-        # told `HttpServer.register_paths` that our handler is `_check_auth_and_handle`,
-        # so we have to set up the cancellable flag ourselves.
-        request.is_render_cancellable = is_function_cancellable(self._handle_request)
+        if self.WAIT_FOR_STREAMS:
+            response[_STREAM_POSITION_KEY] = {
+                stream.NAME: stream.current_token(self._instance_name)
+                for stream in self._streams
+            }
 
-        return await self._handle_request(request, **kwargs)
+        return code, response
diff --git a/synapse/replication/http/account_data.py b/synapse/replication/http/account_data.py
index 0edc95977b..2374f810c9 100644
--- a/synapse/replication/http/account_data.py
+++ b/synapse/replication/http/account_data.py
@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING, Tuple
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
 
@@ -61,10 +60,8 @@ class ReplicationAddUserAccountDataRestServlet(ReplicationEndpoint):
         return payload
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str, account_data_type: str
+        self, request: Request, content: JsonDict, user_id: str, account_data_type: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         max_stream_id = await self.handler.add_account_data_for_user(
             user_id, account_data_type, content["content"]
         )
@@ -101,7 +98,7 @@ class ReplicationRemoveUserAccountDataRestServlet(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str, account_data_type: str
+        self, request: Request, content: JsonDict, user_id: str, account_data_type: str
     ) -> Tuple[int, JsonDict]:
         max_stream_id = await self.handler.remove_account_data_for_user(
             user_id, account_data_type
@@ -143,10 +140,13 @@ class ReplicationAddRoomAccountDataRestServlet(ReplicationEndpoint):
         return payload
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str, room_id: str, account_data_type: str
+        self,
+        request: Request,
+        content: JsonDict,
+        user_id: str,
+        room_id: str,
+        account_data_type: str,
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         max_stream_id = await self.handler.add_account_data_to_room(
             user_id, room_id, account_data_type, content["content"]
         )
@@ -183,7 +183,12 @@ class ReplicationRemoveRoomAccountDataRestServlet(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str, room_id: str, account_data_type: str
+        self,
+        request: Request,
+        content: JsonDict,
+        user_id: str,
+        room_id: str,
+        account_data_type: str,
     ) -> Tuple[int, JsonDict]:
         max_stream_id = await self.handler.remove_account_data_for_room(
             user_id, room_id, account_data_type
@@ -225,10 +230,8 @@ class ReplicationAddTagRestServlet(ReplicationEndpoint):
         return payload
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str, room_id: str, tag: str
+        self, request: Request, content: JsonDict, user_id: str, room_id: str, tag: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         max_stream_id = await self.handler.add_tag_to_room(
             user_id, room_id, tag, content["content"]
         )
@@ -266,7 +269,7 @@ class ReplicationRemoveTagRestServlet(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str, room_id: str, tag: str
+        self, request: Request, content: JsonDict, user_id: str, room_id: str, tag: str
     ) -> Tuple[int, JsonDict]:
         max_stream_id = await self.handler.remove_tag_from_room(
             user_id,
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index ea5c08e6cf..ecea6fc915 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.logging.opentracing import active_span
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
@@ -78,7 +77,7 @@ class ReplicationUserDevicesResyncRestServlet(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str
+        self, request: Request, content: JsonDict, user_id: str
     ) -> Tuple[int, Optional[JsonDict]]:
         user_devices = await self.device_list_updater.user_device_resync(user_id)
 
@@ -138,9 +137,8 @@ class ReplicationMultiUserDevicesResyncRestServlet(ReplicationEndpoint):
         return {"user_ids": user_ids}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request
+        self, request: Request, content: JsonDict
     ) -> Tuple[int, Dict[str, Optional[JsonDict]]]:
-        content = parse_json_object_from_request(request)
         user_ids: List[str] = content["user_ids"]
 
         logger.info("Resync for %r", user_ids)
@@ -205,10 +203,8 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
         }
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request
+        self, request: Request, content: JsonDict
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         user_id = content["user_id"]
         device_id = content["device_id"]
         keys = content["keys"]
diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py
index d3abafed28..53ad327030 100644
--- a/synapse/replication/http/federation.py
+++ b/synapse/replication/http/federation.py
@@ -21,7 +21,6 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
 from synapse.events import EventBase, make_event_from_dict
 from synapse.events.snapshot import EventContext
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
 from synapse.util.metrics import Measure
@@ -114,10 +113,8 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint):
 
         return payload
 
-    async def _handle_request(self, request: Request) -> Tuple[int, JsonDict]:  # type: ignore[override]
+    async def _handle_request(self, request: Request, content: JsonDict) -> Tuple[int, JsonDict]:  # type: ignore[override]
         with Measure(self.clock, "repl_fed_send_events_parse"):
-            content = parse_json_object_from_request(request)
-
             room_id = content["room_id"]
             backfilled = content["backfilled"]
 
@@ -181,13 +178,10 @@ class ReplicationFederationSendEduRestServlet(ReplicationEndpoint):
         return {"origin": origin, "content": content}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, edu_type: str
+        self, request: Request, content: JsonDict, edu_type: str
     ) -> Tuple[int, JsonDict]:
-        with Measure(self.clock, "repl_fed_send_edu_parse"):
-            content = parse_json_object_from_request(request)
-
-            origin = content["origin"]
-            edu_content = content["content"]
+        origin = content["origin"]
+        edu_content = content["content"]
 
         logger.info("Got %r edu from %s", edu_type, origin)
 
@@ -231,13 +225,10 @@ class ReplicationGetQueryRestServlet(ReplicationEndpoint):
         return {"args": args}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, query_type: str
+        self, request: Request, content: JsonDict, query_type: str
     ) -> Tuple[int, JsonDict]:
-        with Measure(self.clock, "repl_fed_query_parse"):
-            content = parse_json_object_from_request(request)
-
-            args = content["args"]
-            args["origin"] = content["origin"]
+        args = content["args"]
+        args["origin"] = content["origin"]
 
         logger.info("Got %r query from %s", query_type, args["origin"])
 
@@ -274,7 +265,7 @@ class ReplicationCleanRoomRestServlet(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, room_id: str
+        self, request: Request, content: JsonDict, room_id: str
     ) -> Tuple[int, JsonDict]:
         await self.store.clean_room_for_join(room_id)
 
@@ -307,9 +298,8 @@ class ReplicationStoreRoomOnOutlierMembershipRestServlet(ReplicationEndpoint):
         return {"room_version": room_version.identifier}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, room_id: str
+        self, request: Request, content: JsonDict, room_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
         room_version = KNOWN_ROOM_VERSIONS[content["room_version"]]
         await self.store.maybe_store_room_on_outlier_membership(room_id, room_version)
         return 200, {}
diff --git a/synapse/replication/http/login.py b/synapse/replication/http/login.py
index c68e18da12..6ad6cb1bfe 100644
--- a/synapse/replication/http/login.py
+++ b/synapse/replication/http/login.py
@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING, Optional, Tuple, cast
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
 
@@ -73,10 +72,8 @@ class RegisterDeviceReplicationServlet(ReplicationEndpoint):
         }
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str
+        self, request: Request, content: JsonDict, user_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         device_id = content["device_id"]
         initial_display_name = content["initial_display_name"]
         is_guest = content["is_guest"]
diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py
index 663bff5738..9fa1060d48 100644
--- a/synapse/replication/http/membership.py
+++ b/synapse/replication/http/membership.py
@@ -17,7 +17,6 @@ from typing import TYPE_CHECKING, List, Optional, Tuple
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.http.site import SynapseRequest
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict, Requester, UserID
@@ -79,10 +78,8 @@ class ReplicationRemoteJoinRestServlet(ReplicationEndpoint):
         }
 
     async def _handle_request(  # type: ignore[override]
-        self, request: SynapseRequest, room_id: str, user_id: str
+        self, request: SynapseRequest, content: JsonDict, room_id: str, user_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         remote_room_hosts = content["remote_room_hosts"]
         event_content = content["content"]
 
@@ -147,11 +144,10 @@ class ReplicationRemoteKnockRestServlet(ReplicationEndpoint):
     async def _handle_request(  # type: ignore[override]
         self,
         request: SynapseRequest,
+        content: JsonDict,
         room_id: str,
         user_id: str,
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         remote_room_hosts = content["remote_room_hosts"]
         event_content = content["content"]
 
@@ -217,10 +213,8 @@ class ReplicationRemoteRejectInviteRestServlet(ReplicationEndpoint):
         }
 
     async def _handle_request(  # type: ignore[override]
-        self, request: SynapseRequest, invite_event_id: str
+        self, request: SynapseRequest, content: JsonDict, invite_event_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         txn_id = content["txn_id"]
         event_content = content["content"]
 
@@ -285,10 +279,9 @@ class ReplicationRemoteRescindKnockRestServlet(ReplicationEndpoint):
     async def _handle_request(  # type: ignore[override]
         self,
         request: SynapseRequest,
+        content: JsonDict,
         knock_event_id: str,
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         txn_id = content["txn_id"]
         event_content = content["content"]
 
@@ -347,7 +340,12 @@ class ReplicationUserJoinedLeftRoomRestServlet(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, room_id: str, user_id: str, change: str
+        self,
+        request: Request,
+        content: JsonDict,
+        room_id: str,
+        user_id: str,
+        change: str,
     ) -> Tuple[int, JsonDict]:
         logger.info("user membership change: %s in %s", user_id, room_id)
 
diff --git a/synapse/replication/http/presence.py b/synapse/replication/http/presence.py
index 4a5b08f56f..db16aac9c2 100644
--- a/synapse/replication/http/presence.py
+++ b/synapse/replication/http/presence.py
@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING, Tuple
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict, UserID
 
@@ -56,7 +55,7 @@ class ReplicationBumpPresenceActiveTime(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str
+        self, request: Request, content: JsonDict, user_id: str
     ) -> Tuple[int, JsonDict]:
         await self._presence_handler.bump_presence_active_time(
             UserID.from_string(user_id)
@@ -107,10 +106,8 @@ class ReplicationPresenceSetState(ReplicationEndpoint):
         }
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str
+        self, request: Request, content: JsonDict, user_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         await self._presence_handler.set_state(
             UserID.from_string(user_id),
             content["state"],
diff --git a/synapse/replication/http/push.py b/synapse/replication/http/push.py
index af5c2f66a7..297e8ad564 100644
--- a/synapse/replication/http/push.py
+++ b/synapse/replication/http/push.py
@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING, Tuple
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
 
@@ -61,10 +60,8 @@ class ReplicationRemovePusherRestServlet(ReplicationEndpoint):
         return payload
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str
+        self, request: Request, content: JsonDict, user_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         app_id = content["app_id"]
         pushkey = content["pushkey"]
 
diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py
index 976c283360..265e601b96 100644
--- a/synapse/replication/http/register.py
+++ b/synapse/replication/http/register.py
@@ -18,7 +18,6 @@ from typing import TYPE_CHECKING, Optional, Tuple
 from twisted.web.server import Request
 
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict
 
@@ -96,10 +95,8 @@ class ReplicationRegisterServlet(ReplicationEndpoint):
         }
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str
+        self, request: Request, content: JsonDict, user_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         await self.registration_handler.check_registration_ratelimit(content["address"])
 
         # Always default admin users to approved (since it means they were created by
@@ -150,10 +147,8 @@ class ReplicationPostRegisterActionsServlet(ReplicationEndpoint):
         return {"auth_result": auth_result, "access_token": access_token}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, user_id: str
+        self, request: Request, content: JsonDict, user_id: str
     ) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
-
         auth_result = content["auth_result"]
         access_token = content["access_token"]
 
diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py
index 4215a1c1bc..27ad914075 100644
--- a/synapse/replication/http/send_event.py
+++ b/synapse/replication/http/send_event.py
@@ -21,7 +21,6 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase, make_event_from_dict
 from synapse.events.snapshot import EventContext
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict, Requester, UserID
 from synapse.util.metrics import Measure
@@ -114,11 +113,9 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint):
         return payload
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, event_id: str
+        self, request: Request, content: JsonDict, event_id: str
     ) -> Tuple[int, JsonDict]:
         with Measure(self.clock, "repl_send_event_parse"):
-            content = parse_json_object_from_request(request)
-
             event_dict = content["event"]
             room_ver = KNOWN_ROOM_VERSIONS[content["room_version"]]
             internal_metadata = content["internal_metadata"]
diff --git a/synapse/replication/http/send_events.py b/synapse/replication/http/send_events.py
index 8889bbb644..4f82c9f96d 100644
--- a/synapse/replication/http/send_events.py
+++ b/synapse/replication/http/send_events.py
@@ -21,7 +21,6 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase, make_event_from_dict
 from synapse.events.snapshot import EventContext
 from synapse.http.server import HttpServer
-from synapse.http.servlet import parse_json_object_from_request
 from synapse.replication.http._base import ReplicationEndpoint
 from synapse.types import JsonDict, Requester, UserID
 from synapse.util.metrics import Measure
@@ -114,10 +113,9 @@ class ReplicationSendEventsRestServlet(ReplicationEndpoint):
         return payload
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request
+        self, request: Request, payload: JsonDict
     ) -> Tuple[int, JsonDict]:
         with Measure(self.clock, "repl_send_events_parse"):
-            payload = parse_json_object_from_request(request)
             events_and_context = []
             events = payload["events"]
 
diff --git a/synapse/replication/http/state.py b/synapse/replication/http/state.py
index 838b7584e5..0c524e7de3 100644
--- a/synapse/replication/http/state.py
+++ b/synapse/replication/http/state.py
@@ -57,7 +57,7 @@ class ReplicationUpdateCurrentStateRestServlet(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, room_id: str
+        self, request: Request, content: JsonDict, room_id: str
     ) -> Tuple[int, JsonDict]:
         writer_instance = self._events_shard_config.get_instance(room_id)
         if writer_instance != self._instance_name:
diff --git a/synapse/replication/http/streams.py b/synapse/replication/http/streams.py
index c065225362..3c7b5b18ea 100644
--- a/synapse/replication/http/streams.py
+++ b/synapse/replication/http/streams.py
@@ -54,6 +54,10 @@ class ReplicationGetStreamUpdates(ReplicationEndpoint):
     PATH_ARGS = ("stream_name",)
     METHOD = "GET"
 
+    # We don't want to wait for replication streams to catch up, as this gets
+    # called in the process of catching replication streams up.
+    WAIT_FOR_STREAMS = False
+
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
 
@@ -67,7 +71,7 @@ class ReplicationGetStreamUpdates(ReplicationEndpoint):
         return {"from_token": from_token, "upto_token": upto_token}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request, stream_name: str
+        self, request: Request, content: JsonDict, stream_name: str
     ) -> Tuple[int, JsonDict]:
         stream = self.streams.get(stream_name)
         if stream is None:
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 322d695bc7..5c2482e40c 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -16,6 +16,7 @@
 import logging
 from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple
 
+from twisted.internet import defer
 from twisted.internet.defer import Deferred
 from twisted.internet.interfaces import IAddress, IConnector
 from twisted.internet.protocol import ReconnectingClientFactory
@@ -314,10 +315,21 @@ class ReplicationDataHandler:
             self.send_handler.wake_destination(server)
 
     async def wait_for_stream_position(
-        self, instance_name: str, stream_name: str, position: int
+        self,
+        instance_name: str,
+        stream_name: str,
+        position: int,
+        raise_on_timeout: bool = True,
     ) -> None:
         """Wait until this instance has received updates up to and including
         the given stream position.
+
+        Args:
+            instance_name
+            stream_name
+            position
+            raise_on_timeout: Whether to raise an exception if we time out
+                waiting for the updates, or if we log an error and return.
         """
 
         if instance_name == self._instance_name:
@@ -345,7 +357,16 @@ class ReplicationDataHandler:
         # We measure here to get in flight counts and average waiting time.
         with Measure(self._clock, "repl.wait_for_stream_position"):
             logger.info("Waiting for repl stream %r to reach %s", stream_name, position)
-            await make_deferred_yieldable(deferred)
+            try:
+                await make_deferred_yieldable(deferred)
+            except defer.TimeoutError:
+                logger.error("Timed out waiting for stream %s", stream_name)
+
+                if raise_on_timeout:
+                    raise
+
+                return
+
             logger.info(
                 "Finished waiting for repl stream %r to reach %s", stream_name, position
             )
diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py
index 99f09669f0..9d17eff714 100644
--- a/synapse/replication/tcp/resource.py
+++ b/synapse/replication/tcp/resource.py
@@ -199,33 +199,28 @@ class ReplicationStreamer:
                             # The token has advanced but there is no data to
                             # send, so we send a `POSITION` to inform other
                             # workers of the updated position.
-                            if stream.NAME == EventsStream.NAME:
-                                # XXX: We only do this for the EventStream as it
-                                # turns out that e.g. account data streams share
-                                # their "current token" with each other, meaning
-                                # that it is *not* safe to send a POSITION.
-
-                                # Note: `last_token` may not *actually* be the
-                                # last token we sent out in a RDATA or POSITION.
-                                # This can happen if we sent out an RDATA for
-                                # position X when our current token was say X+1.
-                                # Other workers will see RDATA for X and then a
-                                # POSITION with last token of X+1, which will
-                                # cause them to check if there were any missing
-                                # updates between X and X+1.
-                                logger.info(
-                                    "Sending position: %s -> %s",
+
+                            # Note: `last_token` may not *actually* be the
+                            # last token we sent out in a RDATA or POSITION.
+                            # This can happen if we sent out an RDATA for
+                            # position X when our current token was say X+1.
+                            # Other workers will see RDATA for X and then a
+                            # POSITION with last token of X+1, which will
+                            # cause them to check if there were any missing
+                            # updates between X and X+1.
+                            logger.info(
+                                "Sending position: %s -> %s",
+                                stream.NAME,
+                                current_token,
+                            )
+                            self.command_handler.send_command(
+                                PositionCommand(
                                     stream.NAME,
+                                    self._instance_name,
+                                    last_token,
                                     current_token,
                                 )
-                                self.command_handler.send_command(
-                                    PositionCommand(
-                                        stream.NAME,
-                                        self._instance_name,
-                                        last_token,
-                                        current_token,
-                                    )
-                                )
+                            )
                             continue
 
                         # Some streams return multiple rows with the same stream IDs,
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 0d7108f01b..8670ffbfa3 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -378,6 +378,12 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
             self._current_positions.values(), default=1
         )
 
+        if not writers:
+            # If there have been no explicit writers given then any instance can
+            # write to the stream. In which case, let's pre-seed our own
+            # position with the current minimum.
+            self._current_positions[self._instance_name] = self._persisted_upto_position
+
     def _load_current_ids(
         self,
         db_conn: LoggingDatabaseConnection,
@@ -695,24 +701,22 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
 
         heapq.heappush(self._known_persisted_positions, new_id)
 
-        # If we're a writer and we don't have any active writes we update our
-        # current position to the latest position seen. This allows the instance
-        # to report a recent position when asked, rather than a potentially old
-        # one (if this instance hasn't written anything for a while).
-        our_current_position = self._current_positions.get(self._instance_name)
-        if (
-            our_current_position
-            and not self._unfinished_ids
-            and not self._in_flight_fetches
-        ):
-            self._current_positions[self._instance_name] = max(
-                our_current_position, new_id
-            )
-
         # We move the current min position up if the minimum current positions
         # of all instances is higher (since by definition all positions less
         # that that have been persisted).
-        min_curr = min(self._current_positions.values(), default=0)
+        our_current_position = self._current_positions.get(self._instance_name, 0)
+        min_curr = min(
+            (
+                token
+                for name, token in self._current_positions.items()
+                if name != self._instance_name
+            ),
+            default=our_current_position,
+        )
+
+        if our_current_position and (self._unfinished_ids or self._in_flight_fetches):
+            min_curr = min(min_curr, our_current_position)
+
         self._persisted_upto_position = max(min_curr, self._persisted_upto_position)
 
         # We now iterate through the seen positions, discarding those that are
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 0c725eb967..c59eca2430 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -604,6 +604,12 @@ class RoomStreamToken:
         elif self.instance_map:
             entries = []
             for name, pos in self.instance_map.items():
+                if pos <= self.stream:
+                    # Ignore instances who are below the minimum stream position
+                    # (we might know they've advanced without seeing a recent
+                    # write from them).
+                    continue
+
                 instance_id = await store.get_id_for_instance(name)
                 entries.append(f"{instance_id}.{pos}")
 
diff --git a/tests/replication/http/test__base.py b/tests/replication/http/test__base.py
index 936ab4504a..e03d9b4cc0 100644
--- a/tests/replication/http/test__base.py
+++ b/tests/replication/http/test__base.py
@@ -44,7 +44,7 @@ class CancellableReplicationEndpoint(ReplicationEndpoint):
 
     @cancellable
     async def _handle_request(  # type: ignore[override]
-        self, request: Request
+        self, request: Request, content: JsonDict
     ) -> Tuple[int, JsonDict]:
         await self.clock.sleep(1.0)
         return HTTPStatus.OK, {"result": True}
@@ -54,6 +54,7 @@ class UncancellableReplicationEndpoint(ReplicationEndpoint):
     NAME = "uncancellable_sleep"
     PATH_ARGS = ()
     CACHE = False
+    WAIT_FOR_STREAMS = False
 
     def __init__(self, hs: HomeServer):
         super().__init__(hs)
@@ -64,7 +65,7 @@ class UncancellableReplicationEndpoint(ReplicationEndpoint):
         return {}
 
     async def _handle_request(  # type: ignore[override]
-        self, request: Request
+        self, request: Request, content: JsonDict
     ) -> Tuple[int, JsonDict]:
         await self.clock.sleep(1.0)
         return HTTPStatus.OK, {"result": True}
@@ -85,7 +86,7 @@ class ReplicationEndpointCancellationTestCase(unittest.HomeserverTestCase):
     def test_cancellable_disconnect(self) -> None:
         """Test that handlers with the `@cancellable` flag can be cancelled."""
         path = f"{REPLICATION_PREFIX}/{CancellableReplicationEndpoint.NAME}/"
-        channel = self.make_request("POST", path, await_result=False)
+        channel = self.make_request("POST", path, await_result=False, content={})
         test_disconnect(
             self.reactor,
             channel,
@@ -96,7 +97,7 @@ class ReplicationEndpointCancellationTestCase(unittest.HomeserverTestCase):
     def test_uncancellable_disconnect(self) -> None:
         """Test that handlers without the `@cancellable` flag cannot be cancelled."""
         path = f"{REPLICATION_PREFIX}/{UncancellableReplicationEndpoint.NAME}/"
-        channel = self.make_request("POST", path, await_result=False)
+        channel = self.make_request("POST", path, await_result=False, content={})
         test_disconnect(
             self.reactor,
             channel,
diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py
index d6a2b8d274..ff9691c518 100644
--- a/tests/storage/test_id_generators.py
+++ b/tests/storage/test_id_generators.py
@@ -349,8 +349,8 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase):
 
         # The first ID gen will notice that it can advance its token to 7 as it
         # has no in progress writes...
-        self.assertEqual(first_id_gen.get_positions(), {"first": 7, "second": 7})
-        self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 7)
+        self.assertEqual(first_id_gen.get_positions(), {"first": 3, "second": 7})
+        self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 3)
         self.assertEqual(first_id_gen.get_current_token_for_writer("second"), 7)
 
         # ... but the second ID gen doesn't know that.
@@ -366,8 +366,9 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase):
                 self.assertEqual(stream_id, 8)
 
                 self.assertEqual(
-                    first_id_gen.get_positions(), {"first": 7, "second": 7}
+                    first_id_gen.get_positions(), {"first": 3, "second": 7}
                 )
+                self.assertEqual(first_id_gen.get_persisted_upto_position(), 7)
 
         self.get_success(_get_next_async())
 
@@ -473,7 +474,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase):
 
         id_gen = self._create_id_generator("first", writers=["first", "second"])
 
-        self.assertEqual(id_gen.get_positions(), {"first": 5, "second": 5})
+        self.assertEqual(id_gen.get_positions(), {"first": 3, "second": 5})
 
         self.assertEqual(id_gen.get_persisted_upto_position(), 5)
 
@@ -720,7 +721,7 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase):
 
         self.get_success(_get_next_async2())
 
-        self.assertEqual(id_gen_1.get_positions(), {"first": -2, "second": -2})
+        self.assertEqual(id_gen_1.get_positions(), {"first": -1, "second": -2})
         self.assertEqual(id_gen_2.get_positions(), {"first": -1, "second": -2})
         self.assertEqual(id_gen_1.get_persisted_upto_position(), -2)
         self.assertEqual(id_gen_2.get_persisted_upto_position(), -2)
@@ -816,15 +817,12 @@ class MultiTableMultiWriterIdGeneratorTestCase(HomeserverTestCase):
         first_id_gen = self._create_id_generator("first", writers=["first", "second"])
         second_id_gen = self._create_id_generator("second", writers=["first", "second"])
 
-        # The first ID gen will notice that it can advance its token to 7 as it
-        # has no in progress writes...
-        self.assertEqual(first_id_gen.get_positions(), {"first": 7, "second": 6})
-        self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 7)
+        self.assertEqual(first_id_gen.get_positions(), {"first": 3, "second": 6})
+        self.assertEqual(first_id_gen.get_current_token_for_writer("first"), 3)
         self.assertEqual(first_id_gen.get_current_token_for_writer("second"), 6)
         self.assertEqual(first_id_gen.get_persisted_upto_position(), 7)
 
-        # ... but the second ID gen doesn't know that.
         self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 7})
         self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 3)
         self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 7)
-        self.assertEqual(first_id_gen.get_persisted_upto_position(), 7)
+        self.assertEqual(second_id_gen.get_persisted_upto_position(), 7)
-- 
cgit 1.5.1


From a7b54ca8d84e9371244d792c30fc9084579470e1 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 19 Jan 2023 12:47:10 +0000
Subject: Implement MSC3930: polls push rules (#14787)

---
 changelog.d/14787.feature                          |  1 +
 .../complement/conf/workers-shared-extra.yaml.j2   |  6 +-
 rust/benches/evaluator.rs                          |  9 ++-
 rust/src/push/base_rules.rs                        | 78 +++++++++++++++++++++-
 rust/src/push/evaluator.rs                         |  2 +-
 rust/src/push/mod.rs                               | 16 +++--
 scripts-dev/complement.sh                          |  2 +-
 stubs/synapse/synapse_rust/push.pyi                |  3 +-
 synapse/config/experimental.py                     |  7 ++
 synapse/storage/databases/main/push_rule.py        |  3 +-
 10 files changed, 114 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/14787.feature

(limited to 'synapse')

diff --git a/changelog.d/14787.feature b/changelog.d/14787.feature
new file mode 100644
index 0000000000..6a34035047
--- /dev/null
+++ b/changelog.d/14787.feature
@@ -0,0 +1 @@
+Implement experimental support for MSC3930: Push rules for (MSC3381) Polls.
\ No newline at end of file
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index 7e9ec23808..281157846a 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -98,12 +98,14 @@ experimental_features:
   # client-side support for partial state in /send_join responses
   faster_joins: true
   {% endif %}
-  # Filtering /messages by relation type.
-  msc3874_enabled: true
+  # Enable support for polls
+  msc3381_polls_enabled: true
   # Enable deleting device-specific notification settings stored in account data
   msc3890_enabled: true
   # Enable removing account data support
   msc3391_enabled: true
+  # Filtering /messages by relation type.
+  msc3874_enabled: true
 
 server_notices:
   system_mxid_localpart: _server
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 442a79348f..8c28bb0af3 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -150,8 +150,13 @@ fn bench_eval_message(b: &mut Bencher) {
     )
     .unwrap();
 
-    let rules =
-        FilteredPushRules::py_new(PushRules::new(Vec::new()), Default::default(), false, false);
+    let rules = FilteredPushRules::py_new(
+        PushRules::new(Vec::new()),
+        Default::default(),
+        false,
+        false,
+        false,
+    );
 
     b.iter(|| eval.run(&rules, Some("bob"), Some("person")));
 }
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 35129691ca..9140a69bb6 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -1,4 +1,4 @@
-// Copyright 2022 The Matrix.org Foundation C.I.C.
+// Copyright 2022, 2023 The Matrix.org Foundation C.I.C.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -208,6 +208,20 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default: true,
         default_enabled: true,
     },
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.org.matrix.msc3930.rule.poll_response"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.response")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[]),
+        default: true,
+        default_enabled: true,
+    },
 ];
 
 pub const BASE_APPEND_CONTENT_RULES: &[PushRule] = &[PushRule {
@@ -596,6 +610,68 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
         default: true,
         default_enabled: true,
     },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc3930.rule.poll_start_one_to_one"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.start")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc3930.rule.poll_start"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.start")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc3930.rule.poll_end_one_to_one"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::RoomMemberCount {
+                is: Some(Cow::Borrowed("2")),
+            }),
+            Condition::Known(KnownCondition::EventMatch(EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.end")),
+                pattern_type: None,
+            })),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
+    PushRule {
+        rule_id: Cow::Borrowed("global/underride/.org.matrix.msc3930.rule.poll_end"),
+        priority_class: 1,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Some(Cow::Borrowed("org.matrix.msc3381.poll.end")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify]),
+        default: true,
+        default_enabled: true,
+    },
 ];
 
 lazy_static! {
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index c901c0fbcc..0242ee1c5f 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -483,7 +483,7 @@ fn test_requires_room_version_supports_condition() {
     };
     let rules = PushRules::new(vec![custom_rule]);
     result = evaluator.run(
-        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, true),
+        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true),
         None,
         None,
     );
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 2e9d3e38a1..842b13c88b 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -411,8 +411,9 @@ impl PushRules {
 pub struct FilteredPushRules {
     push_rules: PushRules,
     enabled_map: BTreeMap<String, bool>,
-    msc3664_enabled: bool,
     msc1767_enabled: bool,
+    msc3381_polls_enabled: bool,
+    msc3664_enabled: bool,
 }
 
 #[pymethods]
@@ -421,14 +422,16 @@ impl FilteredPushRules {
     pub fn py_new(
         push_rules: PushRules,
         enabled_map: BTreeMap<String, bool>,
-        msc3664_enabled: bool,
         msc1767_enabled: bool,
+        msc3381_polls_enabled: bool,
+        msc3664_enabled: bool,
     ) -> Self {
         Self {
             push_rules,
             enabled_map,
-            msc3664_enabled,
             msc1767_enabled,
+            msc3381_polls_enabled,
+            msc3664_enabled,
         }
     }
 
@@ -447,13 +450,18 @@ impl FilteredPushRules {
             .iter()
             .filter(|rule| {
                 // Ignore disabled experimental push rules
+
+                if !self.msc1767_enabled && rule.rule_id.contains("org.matrix.msc1767") {
+                    return false;
+                }
+
                 if !self.msc3664_enabled
                     && rule.rule_id == "global/override/.im.nheko.msc3664.reply"
                 {
                     return false;
                 }
 
-                if !self.msc1767_enabled && rule.rule_id.contains("org.matrix.msc1767") {
+                if !self.msc3381_polls_enabled && rule.rule_id.contains("org.matrix.msc3930") {
                     return false;
                 }
 
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 7c48d8bccb..a183653d52 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -190,7 +190,7 @@ fi
 
 extra_test_args=()
 
-test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391"
+test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930"
 
 # All environment variables starting with PASS_ will be shared.
 # (The prefix is stripped off before reaching the container.)
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 373b40740b..304ed7111c 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -43,8 +43,9 @@ class FilteredPushRules:
         self,
         push_rules: PushRules,
         enabled_map: Dict[str, bool],
-        msc3664_enabled: bool,
         msc1767_enabled: bool,
+        msc3381_polls_enabled: bool,
+        msc3664_enabled: bool,
     ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 0444ef8244..89586db763 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -146,6 +146,13 @@ class ExperimentalConfig(Config):
                 "required to communicate account data deletions to clients."
             )
 
+        # MSC3381: Polls.
+        # In practice, supporting polls in Synapse only requires an implementation of
+        # MSC3930: Push rules for MSC3391 polls; which is what this option enables.
+        self.msc3381_polls_enabled: bool = experimental.get(
+            "msc3381_polls_enabled", False
+        )
+
         # MSC3912: Relation-based redactions.
         self.msc3912_enabled: bool = experimental.get("msc3912_enabled", False)
 
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index d4e4b777da..03182887d1 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -86,8 +86,9 @@ def _load_rules(
     filtered_rules = FilteredPushRules(
         push_rules,
         enabled_map,
-        msc3664_enabled=experimental_config.msc3664_enabled,
         msc1767_enabled=experimental_config.msc1767_enabled,
+        msc3664_enabled=experimental_config.msc3664_enabled,
+        msc3381_polls_enabled=experimental_config.msc3381_polls_enabled,
     )
 
     return filtered_rules
-- 
cgit 1.5.1


From cdf2707678dc9f08e965eb0f0c1f39e71552fe3e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 19 Jan 2023 22:19:56 +0000
Subject: Fix bug in wait for stream position (#14872)

This caused some requests to fail.

This caused some requests to fail.

This really only started causing issues due to #14856
---
 changelog.d/14872.misc            |  1 +
 synapse/replication/tcp/client.py | 29 +++++++++++++++++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/14872.misc

(limited to 'synapse')

diff --git a/changelog.d/14872.misc b/changelog.d/14872.misc
new file mode 100644
index 0000000000..3731d6cbf1
--- /dev/null
+++ b/changelog.d/14872.misc
@@ -0,0 +1 @@
+Fix `wait_for_stream_position` to correctly wait for the right instance to advance its token.
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 5c2482e40c..6e242c5749 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -133,9 +133,9 @@ class ReplicationDataHandler:
         if hs.should_send_federation():
             self.send_handler = FederationSenderHandler(hs)
 
-        # Map from stream to list of deferreds waiting for the stream to
+        # Map from stream and instance to list of deferreds waiting for the stream to
         # arrive at a particular position. The lists are sorted by stream position.
-        self._streams_to_waiters: Dict[str, List[Tuple[int, Deferred]]] = {}
+        self._streams_to_waiters: Dict[Tuple[str, str], List[Tuple[int, Deferred]]] = {}
 
     async def on_rdata(
         self, stream_name: str, instance_name: str, token: int, rows: list
@@ -270,7 +270,7 @@ class ReplicationDataHandler:
         # Notify any waiting deferreds. The list is ordered by position so we
         # just iterate through the list until we reach a position that is
         # greater than the received row position.
-        waiting_list = self._streams_to_waiters.get(stream_name, [])
+        waiting_list = self._streams_to_waiters.get((stream_name, instance_name), [])
 
         # Index of first item with a position after the current token, i.e we
         # have called all deferreds before this index. If not overwritten by
@@ -279,14 +279,13 @@ class ReplicationDataHandler:
         # `len(list)` works for both cases.
         index_of_first_deferred_not_called = len(waiting_list)
 
+        # We don't fire the deferreds until after we finish iterating over the
+        # list, to avoid the list changing when we fire the deferreds.
+        deferreds_to_callback = []
+
         for idx, (position, deferred) in enumerate(waiting_list):
             if position <= token:
-                try:
-                    with PreserveLoggingContext():
-                        deferred.callback(None)
-                except Exception:
-                    # The deferred has been cancelled or timed out.
-                    pass
+                deferreds_to_callback.append(deferred)
             else:
                 # The list is sorted by position so we don't need to continue
                 # checking any further entries in the list.
@@ -297,6 +296,14 @@ class ReplicationDataHandler:
         # loop. (This maintains the order so no need to resort)
         waiting_list[:] = waiting_list[index_of_first_deferred_not_called:]
 
+        for deferred in deferreds_to_callback:
+            try:
+                with PreserveLoggingContext():
+                    deferred.callback(None)
+            except Exception:
+                # The deferred has been cancelled or timed out.
+                pass
+
     async def on_position(
         self, stream_name: str, instance_name: str, token: int
     ) -> None:
@@ -349,7 +356,9 @@ class ReplicationDataHandler:
             deferred, _WAIT_FOR_REPLICATION_TIMEOUT_SECONDS, self._reactor
         )
 
-        waiting_list = self._streams_to_waiters.setdefault(stream_name, [])
+        waiting_list = self._streams_to_waiters.setdefault(
+            (stream_name, instance_name), []
+        )
 
         waiting_list.append((position, deferred))
         waiting_list.sort(key=lambda t: t[0])
-- 
cgit 1.5.1


From cdea7c11d082e73606bea5d0462f7971e90d836c Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 20 Jan 2023 12:06:19 +0000
Subject: Faster joins: Avoid starting duplicate partial state syncs (#14844)

Currently, we will try to start a new partial state sync every time we
perform a remote join, which is undesirable if there is already one
running for a given room.

We intend to perform remote joins whenever additional local users wish
to join a partial state room, so let's ensure that we do not start more
than one concurrent partial state sync for any given room.

------------------------------------------------------------------------

There is a race condition where the homeserver leaves a room and later
rejoins while the partial state sync from the previous membership is
still running. There is no guarantee that the previous partial state
sync will process the latest join, so we restart it if needed.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14844.misc            |   1 +
 synapse/handlers/federation.py    | 106 +++++++++++++++++++++++++++++++++---
 tests/handlers/test_federation.py | 112 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 210 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14844.misc

(limited to 'synapse')

diff --git a/changelog.d/14844.misc b/changelog.d/14844.misc
new file mode 100644
index 0000000000..30ce866304
--- /dev/null
+++ b/changelog.d/14844.misc
@@ -0,0 +1 @@
+Add check to avoid starting duplicate partial state syncs.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index eca75f1108..e386f77de6 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -27,6 +27,7 @@ from typing import (
     Iterable,
     List,
     Optional,
+    Set,
     Tuple,
     Union,
 )
@@ -171,12 +172,23 @@ class FederationHandler:
 
         self.third_party_event_rules = hs.get_third_party_event_rules()
 
+        # Tracks running partial state syncs by room ID.
+        # Partial state syncs currently only run on the main process, so it's okay to
+        # track them in-memory for now.
+        self._active_partial_state_syncs: Set[str] = set()
+        # Tracks partial state syncs we may want to restart.
+        # A dictionary mapping room IDs to (initial destination, other destinations)
+        # tuples.
+        self._partial_state_syncs_maybe_needing_restart: Dict[
+            str, Tuple[Optional[str], Collection[str]]
+        ] = {}
+
         # if this is the main process, fire off a background process to resume
         # any partial-state-resync operations which were in flight when we
         # were shut down.
         if not hs.config.worker.worker_app:
             run_as_background_process(
-                "resume_sync_partial_state_room", self._resume_sync_partial_state_room
+                "resume_sync_partial_state_room", self._resume_partial_state_room_sync
             )
 
     @trace
@@ -679,9 +691,7 @@ class FederationHandler:
                 if ret.partial_state:
                     # Kick off the process of asynchronously fetching the state for this
                     # room.
-                    run_as_background_process(
-                        desc="sync_partial_state_room",
-                        func=self._sync_partial_state_room,
+                    self._start_partial_state_room_sync(
                         initial_destination=origin,
                         other_destinations=ret.servers_in_room,
                         room_id=room_id,
@@ -1660,20 +1670,100 @@ class FederationHandler:
         # well.
         return None
 
-    async def _resume_sync_partial_state_room(self) -> None:
+    async def _resume_partial_state_room_sync(self) -> None:
         """Resumes resyncing of all partial-state rooms after a restart."""
         assert not self.config.worker.worker_app
 
         partial_state_rooms = await self.store.get_partial_state_room_resync_info()
         for room_id, resync_info in partial_state_rooms.items():
-            run_as_background_process(
-                desc="sync_partial_state_room",
-                func=self._sync_partial_state_room,
+            self._start_partial_state_room_sync(
                 initial_destination=resync_info.joined_via,
                 other_destinations=resync_info.servers_in_room,
                 room_id=room_id,
             )
 
+    def _start_partial_state_room_sync(
+        self,
+        initial_destination: Optional[str],
+        other_destinations: Collection[str],
+        room_id: str,
+    ) -> None:
+        """Starts the background process to resync the state of a partial state room,
+        if it is not already running.
+
+        Args:
+            initial_destination: the initial homeserver to pull the state from
+            other_destinations: other homeservers to try to pull the state from, if
+                `initial_destination` is unavailable
+            room_id: room to be resynced
+        """
+
+        async def _sync_partial_state_room_wrapper() -> None:
+            if room_id in self._active_partial_state_syncs:
+                # Another local user has joined the room while there is already a
+                # partial state sync running. This implies that there is a new join
+                # event to un-partial state. We might find ourselves in one of a few
+                # scenarios:
+                #  1. There is an existing partial state sync. The partial state sync
+                #     un-partial states the new join event before completing and all is
+                #     well.
+                #  2. Before the latest join, the homeserver was no longer in the room
+                #     and there is an existing partial state sync from our previous
+                #     membership of the room. The partial state sync may have:
+                #      a) succeeded, but not yet terminated. The room will not be
+                #         un-partial stated again unless we restart the partial state
+                #         sync.
+                #      b) failed, because we were no longer in the room and remote
+                #         homeservers were refusing our requests, but not yet
+                #         terminated. After the latest join, remote homeservers may
+                #         start answering our requests again, so we should restart the
+                #         partial state sync.
+                # In the cases where we would want to restart the partial state sync,
+                # the room would have the partial state flag when the partial state sync
+                # terminates.
+                self._partial_state_syncs_maybe_needing_restart[room_id] = (
+                    initial_destination,
+                    other_destinations,
+                )
+                return
+
+            self._active_partial_state_syncs.add(room_id)
+
+            try:
+                await self._sync_partial_state_room(
+                    initial_destination=initial_destination,
+                    other_destinations=other_destinations,
+                    room_id=room_id,
+                )
+            finally:
+                # Read the room's partial state flag while we still hold the claim to
+                # being the active partial state sync (so that another partial state
+                # sync can't come along and mess with it under us).
+                # Normally, the partial state flag will be gone. If it isn't, then we
+                # may find ourselves in scenario 2a or 2b as described in the comment
+                # above, where we want to restart the partial state sync.
+                is_still_partial_state_room = await self.store.is_partial_state_room(
+                    room_id
+                )
+                self._active_partial_state_syncs.remove(room_id)
+
+                if room_id in self._partial_state_syncs_maybe_needing_restart:
+                    (
+                        restart_initial_destination,
+                        restart_other_destinations,
+                    ) = self._partial_state_syncs_maybe_needing_restart.pop(room_id)
+
+                    if is_still_partial_state_room:
+                        self._start_partial_state_room_sync(
+                            initial_destination=restart_initial_destination,
+                            other_destinations=restart_other_destinations,
+                            room_id=room_id,
+                        )
+
+        run_as_background_process(
+            desc="sync_partial_state_room", func=_sync_partial_state_room_wrapper
+        )
+
     async def _sync_partial_state_room(
         self,
         initial_destination: Optional[str],
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index cedbb9fafc..c1558c40c3 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import cast
+from typing import Collection, Optional, cast
 from unittest import TestCase
 from unittest.mock import Mock, patch
 
+from twisted.internet.defer import Deferred
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import EventTypes
@@ -679,3 +680,112 @@ class PartialJoinTestCase(unittest.FederatingHomeserverTestCase):
             f"Stale partial-stated room flag left over for {room_id} after a"
             f" failed do_invite_join!",
         )
+
+    def test_duplicate_partial_state_room_syncs(self) -> None:
+        """
+        Tests that concurrent partial state syncs are not started for the same room.
+        """
+        is_partial_state = True
+        end_sync: "Deferred[None]" = Deferred()
+
+        async def is_partial_state_room(room_id: str) -> bool:
+            return is_partial_state
+
+        async def sync_partial_state_room(
+            initial_destination: Optional[str],
+            other_destinations: Collection[str],
+            room_id: str,
+        ) -> None:
+            nonlocal end_sync
+            try:
+                await end_sync
+            finally:
+                end_sync = Deferred()
+
+        mock_is_partial_state_room = Mock(side_effect=is_partial_state_room)
+        mock_sync_partial_state_room = Mock(side_effect=sync_partial_state_room)
+
+        fed_handler = self.hs.get_federation_handler()
+        store = self.hs.get_datastores().main
+
+        with patch.object(
+            fed_handler, "_sync_partial_state_room", mock_sync_partial_state_room
+        ), patch.object(store, "is_partial_state_room", mock_is_partial_state_room):
+            # Start the partial state sync.
+            fed_handler._start_partial_state_room_sync("hs1", ["hs2"], "room_id")
+            self.assertEqual(mock_sync_partial_state_room.call_count, 1)
+
+            # Try to start another partial state sync.
+            # Nothing should happen.
+            fed_handler._start_partial_state_room_sync("hs3", ["hs2"], "room_id")
+            self.assertEqual(mock_sync_partial_state_room.call_count, 1)
+
+            # End the partial state sync
+            is_partial_state = False
+            end_sync.callback(None)
+
+            # The partial state sync should not be restarted.
+            self.assertEqual(mock_sync_partial_state_room.call_count, 1)
+
+            # The next attempt to start the partial state sync should work.
+            is_partial_state = True
+            fed_handler._start_partial_state_room_sync("hs3", ["hs2"], "room_id")
+            self.assertEqual(mock_sync_partial_state_room.call_count, 2)
+
+    def test_partial_state_room_sync_restart(self) -> None:
+        """
+        Tests that partial state syncs are restarted when a second partial state sync
+        was deduplicated and the first partial state sync fails.
+        """
+        is_partial_state = True
+        end_sync: "Deferred[None]" = Deferred()
+
+        async def is_partial_state_room(room_id: str) -> bool:
+            return is_partial_state
+
+        async def sync_partial_state_room(
+            initial_destination: Optional[str],
+            other_destinations: Collection[str],
+            room_id: str,
+        ) -> None:
+            nonlocal end_sync
+            try:
+                await end_sync
+            finally:
+                end_sync = Deferred()
+
+        mock_is_partial_state_room = Mock(side_effect=is_partial_state_room)
+        mock_sync_partial_state_room = Mock(side_effect=sync_partial_state_room)
+
+        fed_handler = self.hs.get_federation_handler()
+        store = self.hs.get_datastores().main
+
+        with patch.object(
+            fed_handler, "_sync_partial_state_room", mock_sync_partial_state_room
+        ), patch.object(store, "is_partial_state_room", mock_is_partial_state_room):
+            # Start the partial state sync.
+            fed_handler._start_partial_state_room_sync("hs1", ["hs2"], "room_id")
+            self.assertEqual(mock_sync_partial_state_room.call_count, 1)
+
+            # Fail the partial state sync.
+            # The partial state sync should not be restarted.
+            end_sync.errback(Exception("Failed to request /state_ids"))
+            self.assertEqual(mock_sync_partial_state_room.call_count, 1)
+
+            # Start the partial state sync again.
+            fed_handler._start_partial_state_room_sync("hs1", ["hs2"], "room_id")
+            self.assertEqual(mock_sync_partial_state_room.call_count, 2)
+
+            # Deduplicate another partial state sync.
+            fed_handler._start_partial_state_room_sync("hs3", ["hs2"], "room_id")
+            self.assertEqual(mock_sync_partial_state_room.call_count, 2)
+
+            # Fail the partial state sync.
+            # It should restart with the latest parameters.
+            end_sync.errback(Exception("Failed to request /state_ids"))
+            self.assertEqual(mock_sync_partial_state_room.call_count, 3)
+            mock_sync_partial_state_room.assert_called_with(
+                initial_destination="hs3",
+                other_destinations=["hs2"],
+                room_id="room_id",
+            )
-- 
cgit 1.5.1


From 65d03866936adb144631d263a8539a2cb060fd43 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 20 Jan 2023 18:02:18 +0000
Subject: Always notify replication when a stream advances (#14877)

This ensures that all other workers are told about stream updates in a timely manner, without having to remember to manually poke replication.
---
 changelog.d/14877.misc                            |  1 +
 synapse/_scripts/synapse_port_db.py               |  4 +++
 synapse/notifier.py                               | 31 +++++++++++++++++++----
 synapse/server.py                                 |  6 ++++-
 synapse/storage/databases/main/account_data.py    |  2 ++
 synapse/storage/databases/main/cache.py           |  1 +
 synapse/storage/databases/main/deviceinbox.py     |  3 ++-
 synapse/storage/databases/main/devices.py         |  1 +
 synapse/storage/databases/main/end_to_end_keys.py |  5 +++-
 synapse/storage/databases/main/events_worker.py   | 10 +++++++-
 synapse/storage/databases/main/presence.py        |  3 ++-
 synapse/storage/databases/main/push_rule.py       |  1 +
 synapse/storage/databases/main/pusher.py          |  1 +
 synapse/storage/databases/main/receipts.py        |  2 ++
 synapse/storage/databases/main/room.py            |  6 ++++-
 synapse/storage/util/id_generators.py             | 26 +++++++++++++++++--
 tests/module_api/test_api.py                      |  3 +++
 tests/replication/tcp/test_handler.py             | 23 +++++------------
 tests/storage/test_id_generators.py               |  4 +++
 19 files changed, 104 insertions(+), 29 deletions(-)
 create mode 100644 changelog.d/14877.misc

(limited to 'synapse')

diff --git a/changelog.d/14877.misc b/changelog.d/14877.misc
new file mode 100644
index 0000000000..4e9c3fa33f
--- /dev/null
+++ b/changelog.d/14877.misc
@@ -0,0 +1 @@
+Always notify replication when a stream advances automatically.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index c463b60b26..5e137dbbf7 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -51,6 +51,7 @@ from synapse.logging.context import (
     make_deferred_yieldable,
     run_in_background,
 )
+from synapse.notifier import ReplicationNotifier
 from synapse.storage.database import DatabasePool, LoggingTransaction, make_conn
 from synapse.storage.databases.main import PushRuleStore
 from synapse.storage.databases.main.account_data import AccountDataWorkerStore
@@ -260,6 +261,9 @@ class MockHomeserver:
     def should_send_federation(self) -> bool:
         return False
 
+    def get_replication_notifier(self) -> ReplicationNotifier:
+        return ReplicationNotifier()
+
 
 class Porter:
     def __init__(
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 26b97cf766..28f0d4a25a 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -226,8 +226,7 @@ class Notifier:
         self.store = hs.get_datastores().main
         self.pending_new_room_events: List[_PendingRoomEventEntry] = []
 
-        # Called when there are new things to stream over replication
-        self.replication_callbacks: List[Callable[[], None]] = []
+        self._replication_notifier = hs.get_replication_notifier()
         self._new_join_in_room_callbacks: List[Callable[[str, str], None]] = []
 
         self._federation_client = hs.get_federation_http_client()
@@ -279,7 +278,7 @@ class Notifier:
         it needs to do any asynchronous work, a background thread should be started and
         wrapped with run_as_background_process.
         """
-        self.replication_callbacks.append(cb)
+        self._replication_notifier.add_replication_callback(cb)
 
     def add_new_join_in_room_callback(self, cb: Callable[[str, str], None]) -> None:
         """Add a callback that will be called when a user joins a room.
@@ -741,8 +740,7 @@ class Notifier:
 
     def notify_replication(self) -> None:
         """Notify the any replication listeners that there's a new event"""
-        for cb in self.replication_callbacks:
-            cb()
+        self._replication_notifier.notify_replication()
 
     def notify_user_joined_room(self, event_id: str, room_id: str) -> None:
         for cb in self._new_join_in_room_callbacks:
@@ -759,3 +757,26 @@ class Notifier:
         # Tell the federation client about the fact the server is back up, so
         # that any in flight requests can be immediately retried.
         self._federation_client.wake_destination(server)
+
+
+@attr.s(auto_attribs=True)
+class ReplicationNotifier:
+    """Tracks callbacks for things that need to know about stream changes.
+
+    This is separate from the notifier to avoid circular dependencies.
+    """
+
+    _replication_callbacks: List[Callable[[], None]] = attr.Factory(list)
+
+    def add_replication_callback(self, cb: Callable[[], None]) -> None:
+        """Add a callback that will be called when some new data is available.
+        Callback is not given any arguments. It should *not* return a Deferred - if
+        it needs to do any asynchronous work, a background thread should be started and
+        wrapped with run_as_background_process.
+        """
+        self._replication_callbacks.append(cb)
+
+    def notify_replication(self) -> None:
+        """Notify the any replication listeners that there's a new event"""
+        for cb in self._replication_callbacks:
+            cb()
diff --git a/synapse/server.py b/synapse/server.py
index f4ab94c4f3..9d6d268f49 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -107,7 +107,7 @@ from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpC
 from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
 from synapse.module_api import ModuleApi
-from synapse.notifier import Notifier
+from synapse.notifier import Notifier, ReplicationNotifier
 from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
 from synapse.push.pusherpool import PusherPool
 from synapse.replication.tcp.client import ReplicationDataHandler
@@ -389,6 +389,10 @@ class HomeServer(metaclass=abc.ABCMeta):
     def get_notifier(self) -> Notifier:
         return Notifier(self)
 
+    @cache_in_self
+    def get_replication_notifier(self) -> ReplicationNotifier:
+        return ReplicationNotifier()
+
     @cache_in_self
     def get_auth(self) -> Auth:
         return Auth(self)
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 881d7089db..8a359d7eb8 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -75,6 +75,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             self._account_data_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
+                notifier=hs.get_replication_notifier(),
                 stream_name="account_data",
                 instance_name=self._instance_name,
                 tables=[
@@ -95,6 +96,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             # SQLite).
             self._account_data_id_gen = StreamIdGenerator(
                 db_conn,
+                hs.get_replication_notifier(),
                 "room_account_data",
                 "stream_id",
                 extra_tables=[("room_tags_revisions", "stream_id")],
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 2179a8bf59..5b66431691 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -75,6 +75,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._cache_id_gen = MultiWriterIdGenerator(
                 db_conn,
                 database,
+                notifier=hs.get_replication_notifier(),
                 stream_name="caches",
                 instance_name=hs.get_instance_name(),
                 tables=[
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 713be91c5d..8e61aba454 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -91,6 +91,7 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                 MultiWriterIdGenerator(
                     db_conn=db_conn,
                     db=database,
+                    notifier=hs.get_replication_notifier(),
                     stream_name="to_device",
                     instance_name=self._instance_name,
                     tables=[("device_inbox", "instance_name", "stream_id")],
@@ -101,7 +102,7 @@ class DeviceInboxWorkerStore(SQLBaseStore):
         else:
             self._can_write_to_device = True
             self._device_inbox_id_gen = StreamIdGenerator(
-                db_conn, "device_inbox", "stream_id"
+                db_conn, hs.get_replication_notifier(), "device_inbox", "stream_id"
             )
 
         max_device_inbox_id = self._device_inbox_id_gen.get_current_token()
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index cd186c8472..903606fb46 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -92,6 +92,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         # class below that is used on the main process.
         self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
             db_conn,
+            hs.get_replication_notifier(),
             "device_lists_stream",
             "stream_id",
             extra_tables=[
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 4c691642e2..c4ac6c33ba 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -1181,7 +1181,10 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
         super().__init__(database, db_conn, hs)
 
         self._cross_signing_id_gen = StreamIdGenerator(
-            db_conn, "e2e_cross_signing_keys", "stream_id"
+            db_conn,
+            hs.get_replication_notifier(),
+            "e2e_cross_signing_keys",
+            "stream_id",
         )
 
     async def set_e2e_device_keys(
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index d150fa8a94..d8a8bcafb6 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -191,6 +191,7 @@ class EventsWorkerStore(SQLBaseStore):
             self._stream_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
+                notifier=hs.get_replication_notifier(),
                 stream_name="events",
                 instance_name=hs.get_instance_name(),
                 tables=[("events", "instance_name", "stream_ordering")],
@@ -200,6 +201,7 @@ class EventsWorkerStore(SQLBaseStore):
             self._backfill_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
+                notifier=hs.get_replication_notifier(),
                 stream_name="backfill",
                 instance_name=hs.get_instance_name(),
                 tables=[("events", "instance_name", "stream_ordering")],
@@ -217,12 +219,14 @@ class EventsWorkerStore(SQLBaseStore):
             # SQLite).
             self._stream_id_gen = StreamIdGenerator(
                 db_conn,
+                hs.get_replication_notifier(),
                 "events",
                 "stream_ordering",
                 is_writer=hs.get_instance_name() in hs.config.worker.writers.events,
             )
             self._backfill_id_gen = StreamIdGenerator(
                 db_conn,
+                hs.get_replication_notifier(),
                 "events",
                 "stream_ordering",
                 step=-1,
@@ -300,6 +304,7 @@ class EventsWorkerStore(SQLBaseStore):
             self._un_partial_stated_events_stream_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
+                notifier=hs.get_replication_notifier(),
                 stream_name="un_partial_stated_event_stream",
                 instance_name=hs.get_instance_name(),
                 tables=[
@@ -311,7 +316,10 @@ class EventsWorkerStore(SQLBaseStore):
             )
         else:
             self._un_partial_stated_events_stream_id_gen = StreamIdGenerator(
-                db_conn, "un_partial_stated_event_stream", "stream_id"
+                db_conn,
+                hs.get_replication_notifier(),
+                "un_partial_stated_event_stream",
+                "stream_id",
             )
 
     def get_un_partial_stated_events_token(self) -> int:
diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py
index 7b60815043..beb210f8ee 100644
--- a/synapse/storage/databases/main/presence.py
+++ b/synapse/storage/databases/main/presence.py
@@ -77,6 +77,7 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
             self._presence_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
+                notifier=hs.get_replication_notifier(),
                 stream_name="presence_stream",
                 instance_name=self._instance_name,
                 tables=[("presence_stream", "instance_name", "stream_id")],
@@ -85,7 +86,7 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
             )
         else:
             self._presence_id_gen = StreamIdGenerator(
-                db_conn, "presence_stream", "stream_id"
+                db_conn, hs.get_replication_notifier(), "presence_stream", "stream_id"
             )
 
         self.hs = hs
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 03182887d1..14ca167b34 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -118,6 +118,7 @@ class PushRulesWorkerStore(
         # class below that is used on the main process.
         self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
             db_conn,
+            hs.get_replication_notifier(),
             "push_rules_stream",
             "stream_id",
             is_writer=hs.config.worker.worker_app is None,
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index 7f24a3b6ec..df53e726e6 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -62,6 +62,7 @@ class PusherWorkerStore(SQLBaseStore):
         # class below that is used on the main process.
         self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
             db_conn,
+            hs.get_replication_notifier(),
             "pushers",
             "id",
             extra_tables=[("deleted_pushers", "stream_id")],
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 86f5bce5f0..3468f354e6 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -73,6 +73,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             self._receipts_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
+                notifier=hs.get_replication_notifier(),
                 stream_name="receipts",
                 instance_name=self._instance_name,
                 tables=[("receipts_linearized", "instance_name", "stream_id")],
@@ -91,6 +92,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             # SQLite).
             self._receipts_id_gen = StreamIdGenerator(
                 db_conn,
+                hs.get_replication_notifier(),
                 "receipts_linearized",
                 "stream_id",
                 is_writer=hs.get_instance_name() in hs.config.worker.writers.receipts,
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 78906a5e1d..7264a33cd4 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -126,6 +126,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             self._un_partial_stated_rooms_stream_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
                 db=database,
+                notifier=hs.get_replication_notifier(),
                 stream_name="un_partial_stated_room_stream",
                 instance_name=self._instance_name,
                 tables=[
@@ -137,7 +138,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             )
         else:
             self._un_partial_stated_rooms_stream_id_gen = StreamIdGenerator(
-                db_conn, "un_partial_stated_room_stream", "stream_id"
+                db_conn,
+                hs.get_replication_notifier(),
+                "un_partial_stated_room_stream",
+                "stream_id",
             )
 
     async def store_room(
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 8670ffbfa3..9adff3f4f5 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -20,6 +20,7 @@ from collections import OrderedDict
 from contextlib import contextmanager
 from types import TracebackType
 from typing import (
+    TYPE_CHECKING,
     AsyncContextManager,
     ContextManager,
     Dict,
@@ -49,6 +50,9 @@ from synapse.storage.database import (
 from synapse.storage.types import Cursor
 from synapse.storage.util.sequence import PostgresSequenceGenerator
 
+if TYPE_CHECKING:
+    from synapse.notifier import ReplicationNotifier
+
 logger = logging.getLogger(__name__)
 
 
@@ -182,6 +186,7 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
     def __init__(
         self,
         db_conn: LoggingDatabaseConnection,
+        notifier: "ReplicationNotifier",
         table: str,
         column: str,
         extra_tables: Iterable[Tuple[str, str]] = (),
@@ -205,6 +210,8 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
         # The key and values are the same, but we never look at the values.
         self._unfinished_ids: OrderedDict[int, int] = OrderedDict()
 
+        self._notifier = notifier
+
     def advance(self, instance_name: str, new_id: int) -> None:
         # Advance should never be called on a writer instance, only over replication
         if self._is_writer:
@@ -227,6 +234,8 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
                 with self._lock:
                     self._unfinished_ids.pop(next_id)
 
+                self._notifier.notify_replication()
+
         return _AsyncCtxManagerWrapper(manager())
 
     def get_next_mult(self, n: int) -> AsyncContextManager[Sequence[int]]:
@@ -250,6 +259,8 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
                     for next_id in next_ids:
                         self._unfinished_ids.pop(next_id)
 
+                self._notifier.notify_replication()
+
         return _AsyncCtxManagerWrapper(manager())
 
     def get_current_token(self) -> int:
@@ -296,6 +307,7 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
         self,
         db_conn: LoggingDatabaseConnection,
         db: DatabasePool,
+        notifier: "ReplicationNotifier",
         stream_name: str,
         instance_name: str,
         tables: List[Tuple[str, str, str]],
@@ -304,6 +316,7 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
         positive: bool = True,
     ) -> None:
         self._db = db
+        self._notifier = notifier
         self._stream_name = stream_name
         self._instance_name = instance_name
         self._positive = positive
@@ -535,7 +548,9 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
         # Cast safety: the second argument to _MultiWriterCtxManager, multiple_ids,
         # controls the return type. If `None` or omitted, the context manager yields
         # a single integer stream_id; otherwise it yields a list of stream_ids.
-        return cast(AsyncContextManager[int], _MultiWriterCtxManager(self))
+        return cast(
+            AsyncContextManager[int], _MultiWriterCtxManager(self, self._notifier)
+        )
 
     def get_next_mult(self, n: int) -> AsyncContextManager[List[int]]:
         # If we have a list of instances that are allowed to write to this
@@ -544,7 +559,10 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
             raise Exception("Tried to allocate stream ID on non-writer")
 
         # Cast safety: see get_next.
-        return cast(AsyncContextManager[List[int]], _MultiWriterCtxManager(self, n))
+        return cast(
+            AsyncContextManager[List[int]],
+            _MultiWriterCtxManager(self, self._notifier, n),
+        )
 
     def get_next_txn(self, txn: LoggingTransaction) -> int:
         """
@@ -563,6 +581,7 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
 
         txn.call_after(self._mark_id_as_finished, next_id)
         txn.call_on_exception(self._mark_id_as_finished, next_id)
+        txn.call_after(self._notifier.notify_replication)
 
         # Update the `stream_positions` table with newly updated stream
         # ID (unless self._writers is not set in which case we don't
@@ -787,6 +806,7 @@ class _MultiWriterCtxManager:
     """Async context manager returned by MultiWriterIdGenerator"""
 
     id_gen: MultiWriterIdGenerator
+    notifier: "ReplicationNotifier"
     multiple_ids: Optional[int] = None
     stream_ids: List[int] = attr.Factory(list)
 
@@ -814,6 +834,8 @@ class _MultiWriterCtxManager:
         for i in self.stream_ids:
             self.id_gen._mark_id_as_finished(i)
 
+        self.notifier.notify_replication()
+
         if exc_type is not None:
             return False
 
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index 9919938e80..8f88c0117d 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -404,6 +404,9 @@ class ModuleApiTestCase(HomeserverTestCase):
             self.module_api.send_local_online_presence_to([remote_user_id])
         )
 
+        # We don't always send out federation immediately, so we advance the clock.
+        self.reactor.advance(1000)
+
         # Check that a presence update was sent as part of a federation transaction
         found_update = False
         calls = (
diff --git a/tests/replication/tcp/test_handler.py b/tests/replication/tcp/test_handler.py
index 555922409d..6e4055cc21 100644
--- a/tests/replication/tcp/test_handler.py
+++ b/tests/replication/tcp/test_handler.py
@@ -14,7 +14,7 @@
 
 from twisted.internet import defer
 
-from synapse.replication.tcp.commands import PositionCommand, RdataCommand
+from synapse.replication.tcp.commands import PositionCommand
 
 from tests.replication._base import BaseMultiWorkerStreamTestCase
 
@@ -111,20 +111,14 @@ class ChannelsTestCase(BaseMultiWorkerStreamTestCase):
         next_token = self.get_success(ctx.__aenter__())
         self.get_success(ctx.__aexit__(None, None, None))
 
-        cmd_handler.send_command(
-            RdataCommand("caches", "worker1", next_token, ("func_name", [], 0))
-        )
-        self.replicate()
-
         self.get_success(
             data_handler.wait_for_stream_position("worker1", "caches", next_token)
         )
 
-        # `wait_for_stream_position` should only return once master receives an
-        # RDATA from the worker
-        ctx = cache_id_gen.get_next()
-        next_token = self.get_success(ctx.__aenter__())
-        self.get_success(ctx.__aexit__(None, None, None))
+        # `wait_for_stream_position` should only return once master receives a
+        # notification that `next_token` has persisted.
+        ctx_worker1 = cache_id_gen.get_next()
+        next_token = self.get_success(ctx_worker1.__aenter__())
 
         d = defer.ensureDeferred(
             data_handler.wait_for_stream_position("worker1", "caches", next_token)
@@ -142,10 +136,7 @@ class ChannelsTestCase(BaseMultiWorkerStreamTestCase):
         )
         self.assertFalse(d.called)
 
-        # ... but receiving the RDATA should
-        cmd_handler.send_command(
-            RdataCommand("caches", "worker1", next_token, ("func_name", [], 0))
-        )
-        self.replicate()
+        # ... but worker1 finishing (and so sending an update) should.
+        self.get_success(ctx_worker1.__aexit__(None, None, None))
 
         self.assertTrue(d.called)
diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py
index ff9691c518..9174fb0964 100644
--- a/tests/storage/test_id_generators.py
+++ b/tests/storage/test_id_generators.py
@@ -52,6 +52,7 @@ class StreamIdGeneratorTestCase(HomeserverTestCase):
         def _create(conn: LoggingDatabaseConnection) -> StreamIdGenerator:
             return StreamIdGenerator(
                 db_conn=conn,
+                notifier=self.hs.get_replication_notifier(),
                 table="foobar",
                 column="stream_id",
             )
@@ -196,6 +197,7 @@ class MultiWriterIdGeneratorTestCase(HomeserverTestCase):
             return MultiWriterIdGenerator(
                 conn,
                 self.db_pool,
+                notifier=self.hs.get_replication_notifier(),
                 stream_name="test_stream",
                 instance_name=instance_name,
                 tables=[("foobar", "instance_name", "stream_id")],
@@ -630,6 +632,7 @@ class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase):
             return MultiWriterIdGenerator(
                 conn,
                 self.db_pool,
+                notifier=self.hs.get_replication_notifier(),
                 stream_name="test_stream",
                 instance_name=instance_name,
                 tables=[("foobar", "instance_name", "stream_id")],
@@ -766,6 +769,7 @@ class MultiTableMultiWriterIdGeneratorTestCase(HomeserverTestCase):
             return MultiWriterIdGenerator(
                 conn,
                 self.db_pool,
+                notifier=self.hs.get_replication_notifier(),
                 stream_name="test_stream",
                 instance_name=instance_name,
                 tables=[
-- 
cgit 1.5.1


From 0ec12a37538d0df07d96cfc9cf5f5208f7453607 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 20 Jan 2023 21:04:33 +0000
Subject: Reduce max time we wait for stream positions (#14881)

Now that we wait for stream positions whenever we do a HTTP replication
hit, we need to be less brutal in the case where we do timeout (as we
have bugs around this).
---
 changelog.d/14881.misc            |  1 +
 synapse/replication/http/_base.py |  2 --
 synapse/replication/tcp/client.py | 21 +++++++++++----------
 3 files changed, 12 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/14881.misc

(limited to 'synapse')

diff --git a/changelog.d/14881.misc b/changelog.d/14881.misc
new file mode 100644
index 0000000000..be89d092b6
--- /dev/null
+++ b/changelog.d/14881.misc
@@ -0,0 +1 @@
+Reduce max time we wait for stream positions.
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index 709327b97f..908f3f1db7 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -352,7 +352,6 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                         instance_name=instance_name,
                         stream_name=stream_name,
                         position=position,
-                        raise_on_timeout=False,
                     )
 
                 return result
@@ -414,7 +413,6 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                 instance_name=content[_STREAM_POSITION_KEY]["instance_name"],
                 stream_name=stream_name,
                 position=position,
-                raise_on_timeout=False,
             )
 
         if self.CACHE:
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 6e242c5749..493f616679 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -59,7 +59,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 # How long we allow callers to wait for replication updates before timing out.
-_WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 30
+_WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 5
 
 
 class DirectTcpReplicationClientFactory(ReconnectingClientFactory):
@@ -326,7 +326,6 @@ class ReplicationDataHandler:
         instance_name: str,
         stream_name: str,
         position: int,
-        raise_on_timeout: bool = True,
     ) -> None:
         """Wait until this instance has received updates up to and including
         the given stream position.
@@ -335,8 +334,6 @@ class ReplicationDataHandler:
             instance_name
             stream_name
             position
-            raise_on_timeout: Whether to raise an exception if we time out
-                waiting for the updates, or if we log an error and return.
         """
 
         if instance_name == self._instance_name:
@@ -365,19 +362,23 @@ class ReplicationDataHandler:
 
         # We measure here to get in flight counts and average waiting time.
         with Measure(self._clock, "repl.wait_for_stream_position"):
-            logger.info("Waiting for repl stream %r to reach %s", stream_name, position)
+            logger.info(
+                "Waiting for repl stream %r to reach %s (%s)",
+                stream_name,
+                position,
+                instance_name,
+            )
             try:
                 await make_deferred_yieldable(deferred)
             except defer.TimeoutError:
                 logger.error("Timed out waiting for stream %s", stream_name)
-
-                if raise_on_timeout:
-                    raise
-
                 return
 
             logger.info(
-                "Finished waiting for repl stream %r to reach %s", stream_name, position
+                "Finished waiting for repl stream %r to reach %s (%s)",
+                stream_name,
+                position,
+                instance_name,
             )
 
     def stop_pusher(self, user_id: str, app_id: str, pushkey: str) -> None:
-- 
cgit 1.5.1


From d329a566df6ff2b635a375bf1b2c8ed3b2c9815d Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Sun, 22 Jan 2023 19:19:31 +0000
Subject: Faster joins: Fix incompatibility with restricted joins (#14882)

* Avoid clearing out forward extremities when doing a second remote join

When joining a restricted room where the local homeserver does not have
a user able to issue invites, we perform a second remote join. We want
to avoid clearing out forward extremities in this case because the
forward extremities we have are up to date and clearing out forward
extremities creates a window in which the room can get bricked if
Synapse crashes.

Signed-off-by: Sean Quah <seanq@matrix.org>

* Do a full join when doing a second remote join into a full state room

We cannot persist a partial state join event into a joined full state
room, so we perform a full state join for such rooms instead. As a
future optimization, we could always perform a partial state join and
compute or retrieve the full state ourselves if necessary.

Signed-off-by: Sean Quah <seanq@matrix.org>

* Add lock around partial state flag for rooms

Signed-off-by: Sean Quah <seanq@matrix.org>

* Preserve partial state info when doing a second partial state join

Signed-off-by: Sean Quah <seanq@matrix.org>

* Add newsfile

* Add a TODO(faster_joins) marker

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14882.bugfix                |   1 +
 synapse/federation/federation_client.py |   5 +
 synapse/handlers/federation.py          | 215 ++++++++++++++++++++------------
 3 files changed, 140 insertions(+), 81 deletions(-)
 create mode 100644 changelog.d/14882.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14882.bugfix b/changelog.d/14882.bugfix
new file mode 100644
index 0000000000..1fda344361
--- /dev/null
+++ b/changelog.d/14882.bugfix
@@ -0,0 +1 @@
+Faster joins: Fix incompatibility with joins into restricted rooms where no local users have the ability to invite.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 15a9a88302..f185b6c1f9 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -1157,6 +1157,11 @@ class FederationClient(FederationBase):
                     "members_omitted was set, but no servers were listed in the room"
                 )
 
+            if response.members_omitted and not partial_state:
+                raise InvalidResponseError(
+                    "members_omitted was set, but we asked for full state"
+                )
+
             return SendJoinResult(
                 event=event,
                 state=signed_state,
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index e386f77de6..2123ace8a6 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -48,7 +48,6 @@ from synapse.api.errors import (
     FederationError,
     FederationPullAttemptBackoffError,
     HttpResponseException,
-    LimitExceededError,
     NotFoundError,
     RequestSendFailed,
     SynapseError,
@@ -182,6 +181,12 @@ class FederationHandler:
         self._partial_state_syncs_maybe_needing_restart: Dict[
             str, Tuple[Optional[str], Collection[str]]
         ] = {}
+        # A lock guarding the partial state flag for rooms.
+        # When the lock is held for a given room, no other concurrent code may
+        # partial state or un-partial state the room.
+        self._is_partial_state_room_linearizer = Linearizer(
+            name="_is_partial_state_room_linearizer"
+        )
 
         # if this is the main process, fire off a background process to resume
         # any partial-state-resync operations which were in flight when we
@@ -599,7 +604,23 @@ class FederationHandler:
 
         self._federation_event_handler.room_queues[room_id] = []
 
-        await self._clean_room_for_join(room_id)
+        is_host_joined = await self.store.is_host_joined(room_id, self.server_name)
+
+        if not is_host_joined:
+            # We may have old forward extremities lying around if the homeserver left
+            # the room completely in the past. Clear them out.
+            #
+            # Note that this check-then-clear is subject to races where
+            #  * the homeserver is in the room and stops being in the room just after
+            #    the check. We won't reset the forward extremities, but that's okay,
+            #    since they will be almost up to date.
+            #  * the homeserver is not in the room and starts being in the room just
+            #    after the check. This can't happen, since `RoomMemberHandler` has a
+            #    linearizer lock which prevents concurrent remote joins into the same
+            #    room.
+            # In short, the races either have an acceptable outcome or should be
+            # impossible.
+            await self._clean_room_for_join(room_id)
 
         try:
             # Try the host we successfully got a response to /make_join/
@@ -611,91 +632,115 @@ class FederationHandler:
             except ValueError:
                 pass
 
-            ret = await self.federation_client.send_join(
-                host_list, event, room_version_obj
-            )
-
-            event = ret.event
-            origin = ret.origin
-            state = ret.state
-            auth_chain = ret.auth_chain
-            auth_chain.sort(key=lambda e: e.depth)
-
-            logger.debug("do_invite_join auth_chain: %s", auth_chain)
-            logger.debug("do_invite_join state: %s", state)
-
-            logger.debug("do_invite_join event: %s", event)
+            async with self._is_partial_state_room_linearizer.queue(room_id):
+                already_partial_state_room = await self.store.is_partial_state_room(
+                    room_id
+                )
 
-            # if this is the first time we've joined this room, it's time to add
-            # a row to `rooms` with the correct room version. If there's already a
-            # row there, we should override it, since it may have been populated
-            # based on an invite request which lied about the room version.
-            #
-            # federation_client.send_join has already checked that the room
-            # version in the received create event is the same as room_version_obj,
-            # so we can rely on it now.
-            #
-            await self.store.upsert_room_on_join(
-                room_id=room_id,
-                room_version=room_version_obj,
-                state_events=state,
-            )
+                ret = await self.federation_client.send_join(
+                    host_list,
+                    event,
+                    room_version_obj,
+                    # Perform a full join when we are already in the room and it is a
+                    # full state room, since we are not allowed to persist a partial
+                    # state join event in a full state room. In the future, we could
+                    # optimize this by always performing a partial state join and
+                    # computing the state ourselves or retrieving it from the remote
+                    # homeserver if necessary.
+                    #
+                    # There's a race where we leave the room, then perform a full join
+                    # anyway. This should end up being fast anyway, since we would
+                    # already have the full room state and auth chain persisted.
+                    partial_state=not is_host_joined or already_partial_state_room,
+                )
 
-            if ret.partial_state:
-                # Mark the room as having partial state.
-                # The background process is responsible for unmarking this flag,
-                # even if the join fails.
-                await self.store.store_partial_state_room(
+                event = ret.event
+                origin = ret.origin
+                state = ret.state
+                auth_chain = ret.auth_chain
+                auth_chain.sort(key=lambda e: e.depth)
+
+                logger.debug("do_invite_join auth_chain: %s", auth_chain)
+                logger.debug("do_invite_join state: %s", state)
+
+                logger.debug("do_invite_join event: %s", event)
+
+                # if this is the first time we've joined this room, it's time to add
+                # a row to `rooms` with the correct room version. If there's already a
+                # row there, we should override it, since it may have been populated
+                # based on an invite request which lied about the room version.
+                #
+                # federation_client.send_join has already checked that the room
+                # version in the received create event is the same as room_version_obj,
+                # so we can rely on it now.
+                #
+                await self.store.upsert_room_on_join(
                     room_id=room_id,
-                    servers=ret.servers_in_room,
-                    device_lists_stream_id=self.store.get_device_stream_token(),
-                    joined_via=origin,
+                    room_version=room_version_obj,
+                    state_events=state,
                 )
 
-            try:
-                max_stream_id = (
-                    await self._federation_event_handler.process_remote_join(
-                        origin,
-                        room_id,
-                        auth_chain,
-                        state,
-                        event,
-                        room_version_obj,
-                        partial_state=ret.partial_state,
+                if ret.partial_state and not already_partial_state_room:
+                    # Mark the room as having partial state.
+                    # The background process is responsible for unmarking this flag,
+                    # even if the join fails.
+                    # TODO(faster_joins):
+                    #     We may want to reset the partial state info if it's from an
+                    #     old, failed partial state join.
+                    #     https://github.com/matrix-org/synapse/issues/13000
+                    await self.store.store_partial_state_room(
+                        room_id=room_id,
+                        servers=ret.servers_in_room,
+                        device_lists_stream_id=self.store.get_device_stream_token(),
+                        joined_via=origin,
                     )
-                )
-            except PartialStateConflictError as e:
-                # The homeserver was already in the room and it is no longer partial
-                # stated. We ought to be doing a local join instead. Turn the error into
-                # a 429, as a hint to the client to try again.
-                # TODO(faster_joins): `_should_perform_remote_join` suggests that we may
-                #   do a remote join for restricted rooms even if we have full state.
-                logger.error(
-                    "Room %s was un-partial stated while processing remote join.",
-                    room_id,
-                )
-                raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)
-            else:
-                # Record the join event id for future use (when we finish the full
-                # join). We have to do this after persisting the event to keep foreign
-                # key constraints intact.
-                if ret.partial_state:
-                    await self.store.write_partial_state_rooms_join_event_id(
-                        room_id, event.event_id
+
+                try:
+                    max_stream_id = (
+                        await self._federation_event_handler.process_remote_join(
+                            origin,
+                            room_id,
+                            auth_chain,
+                            state,
+                            event,
+                            room_version_obj,
+                            partial_state=ret.partial_state,
+                        )
                     )
-            finally:
-                # Always kick off the background process that asynchronously fetches
-                # state for the room.
-                # If the join failed, the background process is responsible for
-                # cleaning up — including unmarking the room as a partial state room.
-                if ret.partial_state:
-                    # Kick off the process of asynchronously fetching the state for this
-                    # room.
-                    self._start_partial_state_room_sync(
-                        initial_destination=origin,
-                        other_destinations=ret.servers_in_room,
-                        room_id=room_id,
+                except PartialStateConflictError:
+                    # This should be impossible, since we hold the lock on the room's
+                    # partial statedness.
+                    logger.error(
+                        "Room %s was un-partial stated while processing remote join.",
+                        room_id,
                     )
+                    raise
+                else:
+                    # Record the join event id for future use (when we finish the full
+                    # join). We have to do this after persisting the event to keep
+                    # foreign key constraints intact.
+                    if ret.partial_state and not already_partial_state_room:
+                        # TODO(faster_joins):
+                        #     We may want to reset the partial state info if it's from
+                        #     an old, failed partial state join.
+                        #     https://github.com/matrix-org/synapse/issues/13000
+                        await self.store.write_partial_state_rooms_join_event_id(
+                            room_id, event.event_id
+                        )
+                finally:
+                    # Always kick off the background process that asynchronously fetches
+                    # state for the room.
+                    # If the join failed, the background process is responsible for
+                    # cleaning up — including unmarking the room as a partial state
+                    # room.
+                    if ret.partial_state:
+                        # Kick off the process of asynchronously fetching the state for
+                        # this room.
+                        self._start_partial_state_room_sync(
+                            initial_destination=origin,
+                            other_destinations=ret.servers_in_room,
+                            room_id=room_id,
+                        )
 
             # We wait here until this instance has seen the events come down
             # replication (if we're using replication) as the below uses caches.
@@ -1778,6 +1823,12 @@ class FederationHandler:
                 `initial_destination` is unavailable
             room_id: room to be resynced
         """
+        # Assume that we run on the main process for now.
+        # TODO(faster_joins,multiple workers)
+        # When moving the sync to workers, we need to ensure that
+        #  * `_start_partial_state_room_sync` still prevents duplicate resyncs
+        #  * `_is_partial_state_room_linearizer` correctly guards partial state flags
+        #    for rooms between the workers doing remote joins and resync.
         assert not self.config.worker.worker_app
 
         # TODO(faster_joins): do we need to lock to avoid races? What happens if other
@@ -1815,8 +1866,10 @@ class FederationHandler:
                 logger.info("Handling any pending device list updates")
                 await self._device_handler.handle_room_un_partial_stated(room_id)
 
-                logger.info("Clearing partial-state flag for %s", room_id)
-                success = await self.store.clear_partial_state_room(room_id)
+                async with self._is_partial_state_room_linearizer.queue(room_id):
+                    logger.info("Clearing partial-state flag for %s", room_id)
+                    success = await self.store.clear_partial_state_room(room_id)
+
                 if success:
                     logger.info("State resync complete for %s", room_id)
                     self._storage_controllers.state.notify_room_un_partial_stated(
-- 
cgit 1.5.1


From 22cc93afe38d34c859d8863a99996e7e72ca1733 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Sun, 22 Jan 2023 21:10:11 +0000
Subject: Enable Faster Remote Room Joins against worker-mode Synapse. (#14752)

* Enable Complement tests for Faster Remote Room Joins on worker-mode

* (dangerous) Add an override to allow Complement to use FRRJ under workers

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Fix race where we didn't send out replication notification

* MORE HACKS

* Fix get_un_partial_stated_rooms_token to take instance_name

* Fix bad merge

* Remove warning

* Correctly advance un_partial_stated_room_stream

* Fix merge

* Add another notify_replication

* Fixups

* Create a separate ReplicationNotifier

* Fix test

* Fix portdb

* Create a separate ReplicationNotifier

* Fix test

* Fix portdb

* Fix presence test

* Newsfile

* Apply suggestions from code review

* Update changelog.d/14752.misc

Co-authored-by: Erik Johnston <erik@matrix.org>

* lint

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/14752.misc                              |  1 +
 docker/complement/conf/workers-shared-extra.yaml.j2 |  2 --
 scripts-dev/complement.sh                           | 11 ++++-------
 synapse/app/generic_worker.py                       |  7 -------
 synapse/handlers/device.py                          |  2 ++
 synapse/handlers/federation.py                      |  7 ++++---
 synapse/replication/tcp/streams/partial_state.py    |  7 ++-----
 synapse/storage/databases/main/events_worker.py     | 13 ++++++++-----
 synapse/storage/databases/main/room.py              | 19 ++++++++++++-------
 synapse/storage/databases/main/state.py             |  2 ++
 10 files changed, 35 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/14752.misc

(limited to 'synapse')

diff --git a/changelog.d/14752.misc b/changelog.d/14752.misc
new file mode 100644
index 0000000000..1f9675c53b
--- /dev/null
+++ b/changelog.d/14752.misc
@@ -0,0 +1 @@
+Enable Complement tests for Faster Remote Room Joins against worker-mode Synapse.
\ No newline at end of file
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index 281157846a..63acf86a46 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -94,10 +94,8 @@ allow_device_name_lookup_over_federation: true
 experimental_features:
   # Enable history backfilling support
   msc2716_enabled: true
-  {% if not workers_in_use %}
   # client-side support for partial state in /send_join responses
   faster_joins: true
-  {% endif %}
   # Enable support for polls
   msc3381_polls_enabled: true
   # Enable deleting device-specific notification settings stored in account data
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index a183653d52..e72d96fd16 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -190,7 +190,7 @@ fi
 
 extra_test_args=()
 
-test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930"
+test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930,faster_joins"
 
 # All environment variables starting with PASS_ will be shared.
 # (The prefix is stripped off before reaching the container.)
@@ -223,12 +223,9 @@ else
     export PASS_SYNAPSE_COMPLEMENT_DATABASE=sqlite
   fi
 
-  # We only test faster room joins on monoliths, because they are purposefully
-  # being developed without worker support to start with.
-  #
-  # The tests for importing historical messages (MSC2716) also only pass with monoliths,
-  # currently.
-  test_tags="$test_tags,faster_joins,msc2716"
+  # The tests for importing historical messages (MSC2716)
+  # only pass with monoliths, currently.
+  test_tags="$test_tags,msc2716"
 fi
 
 
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 8108b1e98f..946f3a3807 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -282,13 +282,6 @@ def start(config_options: List[str]) -> None:
         "synapse.app.user_dir",
     )
 
-    if config.experimental.faster_joins_enabled:
-        raise ConfigError(
-            "You have enabled the experimental `faster_joins` config option, but it is "
-            "not compatible with worker deployments yet. Please disable `faster_joins` "
-            "or run Synapse as a single process deployment instead."
-        )
-
     synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts
     synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
 
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 0640ea79a0..58180ae2fa 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -974,6 +974,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
         self.federation = hs.get_federation_client()
         self.clock = hs.get_clock()
         self.device_handler = device_handler
+        self._notifier = hs.get_notifier()
 
         self._remote_edu_linearizer = Linearizer(name="remote_device_list")
 
@@ -1054,6 +1055,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
                 user_id,
                 device_id,
             )
+            self._notifier.notify_replication()
 
         room_ids = await self.store.get_rooms_for_user(user_id)
         if not room_ids:
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 2123ace8a6..7620245e26 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1870,14 +1870,15 @@ class FederationHandler:
                     logger.info("Clearing partial-state flag for %s", room_id)
                     success = await self.store.clear_partial_state_room(room_id)
 
+                    # Poke the notifier so that other workers see the write to
+                    # the un-partial-stated rooms stream.
+                    self._notifier.notify_replication()
+
                 if success:
                     logger.info("State resync complete for %s", room_id)
                     self._storage_controllers.state.notify_room_un_partial_stated(
                         room_id
                     )
-                    # Poke the notifier so that other workers see the write to
-                    # the un-partial-stated rooms stream.
-                    self._notifier.notify_replication()
 
                     # TODO(faster_joins) update room stats and user directory?
                     #   https://github.com/matrix-org/synapse/issues/12814
diff --git a/synapse/replication/tcp/streams/partial_state.py b/synapse/replication/tcp/streams/partial_state.py
index b5a2ae74b6..a8ce5ffd72 100644
--- a/synapse/replication/tcp/streams/partial_state.py
+++ b/synapse/replication/tcp/streams/partial_state.py
@@ -16,7 +16,6 @@ from typing import TYPE_CHECKING
 import attr
 
 from synapse.replication.tcp.streams import Stream
-from synapse.replication.tcp.streams._base import current_token_without_instance
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -42,8 +41,7 @@ class UnPartialStatedRoomStream(Stream):
         store = hs.get_datastores().main
         super().__init__(
             hs.get_instance_name(),
-            # TODO(faster_joins, multiple writers): we need to account for instance names
-            current_token_without_instance(store.get_un_partial_stated_rooms_token),
+            store.get_un_partial_stated_rooms_token,
             store.get_un_partial_stated_rooms_from_stream,
         )
 
@@ -70,7 +68,6 @@ class UnPartialStatedEventStream(Stream):
         store = hs.get_datastores().main
         super().__init__(
             hs.get_instance_name(),
-            # TODO(faster_joins, multiple writers): we need to account for instance names
-            current_token_without_instance(store.get_un_partial_stated_events_token),
+            store.get_un_partial_stated_events_token,
             store.get_un_partial_stated_events_from_stream,
         )
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index d8a8bcafb6..24127d0364 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -322,11 +322,12 @@ class EventsWorkerStore(SQLBaseStore):
                 "stream_id",
             )
 
-    def get_un_partial_stated_events_token(self) -> int:
-        # TODO(faster_joins, multiple writers): This is inappropriate if there are multiple
-        #     writers because workers that don't write often will hold all
-        #     readers up.
-        return self._un_partial_stated_events_stream_id_gen.get_current_token()
+    def get_un_partial_stated_events_token(self, instance_name: str) -> int:
+        return (
+            self._un_partial_stated_events_stream_id_gen.get_current_token_for_writer(
+                instance_name
+            )
+        )
 
     async def get_un_partial_stated_events_from_stream(
         self, instance_name: str, last_id: int, current_id: int, limit: int
@@ -416,6 +417,8 @@ class EventsWorkerStore(SQLBaseStore):
             self._stream_id_gen.advance(instance_name, token)
         elif stream_name == BackfillStream.NAME:
             self._backfill_id_gen.advance(instance_name, -token)
+        elif stream_name == UnPartialStatedEventStream.NAME:
+            self._un_partial_stated_events_stream_id_gen.advance(instance_name, token)
         super().process_replication_position(stream_name, instance_name, token)
 
     async def have_censored_event(self, event_id: str) -> bool:
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 7264a33cd4..6a65b2a89b 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -43,6 +43,7 @@ from synapse.api.errors import StoreError
 from synapse.api.room_versions import RoomVersion, RoomVersions
 from synapse.config.homeserver import HomeServerConfig
 from synapse.events import EventBase
+from synapse.replication.tcp.streams.partial_state import UnPartialStatedRoomStream
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
@@ -144,6 +145,13 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                 "stream_id",
             )
 
+    def process_replication_position(
+        self, stream_name: str, instance_name: str, token: int
+    ) -> None:
+        if stream_name == UnPartialStatedRoomStream.NAME:
+            self._un_partial_stated_rooms_stream_id_gen.advance(instance_name, token)
+        return super().process_replication_position(stream_name, instance_name, token)
+
     async def store_room(
         self,
         room_id: str,
@@ -1281,13 +1289,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
         )
         return result["join_event_id"], result["device_lists_stream_id"]
 
-    def get_un_partial_stated_rooms_token(self) -> int:
-        # TODO(faster_joins, multiple writers): This is inappropriate if there
-        #     are multiple writers because workers that don't write often will
-        #     hold all readers up.
-        #     (See `MultiWriterIdGenerator.get_persisted_upto_position` for an
-        #      explanation.)
-        return self._un_partial_stated_rooms_stream_id_gen.get_current_token()
+    def get_un_partial_stated_rooms_token(self, instance_name: str) -> int:
+        return self._un_partial_stated_rooms_stream_id_gen.get_current_token_for_writer(
+            instance_name
+        )
 
     async def get_un_partial_stated_rooms_from_stream(
         self, instance_name: str, last_id: int, current_id: int, limit: int
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index f32cbb2dec..ba325d390b 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -95,6 +95,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
             for row in rows:
                 assert isinstance(row, UnPartialStatedEventStreamRow)
                 self._get_state_group_for_event.invalidate((row.event_id,))
+                self.is_partial_state_event.invalidate((row.event_id,))
 
         super().process_replication_rows(stream_name, instance_name, token, rows)
 
@@ -485,6 +486,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
                 "rejection_status_changed": rejection_status_changed,
             },
         )
+        txn.call_after(self.hs.get_notifier().on_new_replication_data)
 
 
 class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
-- 
cgit 1.5.1


From 2ec9c58496e2138cbc4364aba238997c393d5308 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Mon, 23 Jan 2023 10:31:36 +0000
Subject: Faster joins: Update room stats and the user directory on workers
 when finishing join (#14874)

* Faster joins: Update room stats and user directory on workers when done

When finishing a partial state join to a room, we update the current
state of the room without persisting additional events. Workers receive
notice of the current state update over replication, but neglect to wake
the room stats and user directory updaters, which then get incidentally
triggered the next time an event is persisted or an unrelated event
persister sends out a stream position update.

We wake the room stats and user directory updaters at the appropriate
time in this commit.

Part of #12814 and #12815.

Signed-off-by: Sean Quah <seanq@matrix.org>

* fixup comment

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14874.bugfix             | 1 +
 synapse/handlers/federation.py       | 7 ++++---
 synapse/replication/tcp/client.py    | 6 ++++++
 synapse/storage/controllers/state.py | 2 --
 4 files changed, 11 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/14874.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14874.bugfix b/changelog.d/14874.bugfix
new file mode 100644
index 0000000000..91ae2ea9bd
--- /dev/null
+++ b/changelog.d/14874.bugfix
@@ -0,0 +1 @@
+Faster joins: Fix a bug in worker deployments where the room stats and user directory would not get updated when finishing a fast join until another event is sent or received.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 7620245e26..3217127865 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1880,9 +1880,10 @@ class FederationHandler:
                         room_id
                     )
 
-                    # TODO(faster_joins) update room stats and user directory?
-                    #   https://github.com/matrix-org/synapse/issues/12814
-                    #   https://github.com/matrix-org/synapse/issues/12815
+                    # Poke the notifier so that other workers see the write to
+                    # the un-partial-stated rooms stream.
+                    self._notifier.notify_replication()
+
                     return
 
                 # we raced against more events arriving with partial state. Go round
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 493f616679..2a9cb499a4 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -207,6 +207,12 @@ class ReplicationDataHandler:
             # we don't need to optimise this for multiple rows.
             for row in rows:
                 if row.type != EventsStreamEventRow.TypeId:
+                    # The row's data is an `EventsStreamCurrentStateRow`.
+                    # When we recompute the current state of a room based on forward
+                    # extremities (see `update_current_state`), no new events are
+                    # persisted, so we must poke the replication callbacks ourselves.
+                    # This functionality is used when finishing up a partial state join.
+                    self.notifier.notify_replication()
                     continue
                 assert isinstance(row, EventsStreamRow)
                 assert isinstance(row.data, EventsStreamEventRow)
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 26d79c6e62..2045169b9a 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -493,8 +493,6 @@ class StateStorageController:
                  up to date.
         """
         # FIXME(faster_joins): what do we do here?
-        #   https://github.com/matrix-org/synapse/issues/12814
-        #   https://github.com/matrix-org/synapse/issues/12815
         #   https://github.com/matrix-org/synapse/issues/13008
 
         return await self.stores.main.get_partial_current_state_deltas(
-- 
cgit 1.5.1


From 82d3efa3124f771579ba07553904f88625c443b0 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 23 Jan 2023 06:36:20 -0500
Subject: Skip processing stats for broken rooms. (#14873)

* Skip processing stats for broken rooms.

* Newsfragment

* Use a custom exception.
---
 changelog.d/14873.bugfix                        |  1 +
 synapse/storage/databases/main/events_worker.py |  6 +-
 synapse/storage/databases/main/stats.py         | 13 +++-
 tests/storage/databases/main/test_room.py       | 88 +++++++++++++++----------
 4 files changed, 72 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/14873.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14873.bugfix b/changelog.d/14873.bugfix
new file mode 100644
index 0000000000..9b058576cd
--- /dev/null
+++ b/changelog.d/14873.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the `populate_room_stats` background job could fail on broken rooms.
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 24127d0364..f42af34a2f 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -110,6 +110,10 @@ event_fetch_ongoing_gauge = Gauge(
 )
 
 
+class InvalidEventError(Exception):
+    """The event retrieved from the database is invalid and cannot be used."""
+
+
 @attr.s(slots=True, auto_attribs=True)
 class EventCacheEntry:
     event: EventBase
@@ -1310,7 +1314,7 @@ class EventsWorkerStore(SQLBaseStore):
                 # invites, so just accept it for all membership events.
                 #
                 if d["type"] != EventTypes.Member:
-                    raise Exception(
+                    raise InvalidEventError(
                         "Room %s for event %s is unknown" % (d["room_id"], event_id)
                     )
 
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 356d4ca788..0c1cbd540d 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -29,6 +29,7 @@ from synapse.storage.database import (
     LoggingDatabaseConnection,
     LoggingTransaction,
 )
+from synapse.storage.databases.main.events_worker import InvalidEventError
 from synapse.storage.databases.main.state_deltas import StateDeltasStore
 from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached
@@ -554,7 +555,17 @@ class StatsStore(StateDeltasStore):
             "get_initial_state_for_room", _fetch_current_state_stats
         )
 
-        state_event_map = await self.get_events(event_ids, get_prev_content=False)  # type: ignore[attr-defined]
+        try:
+            state_event_map = await self.get_events(event_ids, get_prev_content=False)  # type: ignore[attr-defined]
+        except InvalidEventError as e:
+            # If an exception occurs fetching events then the room is broken;
+            # skip process it to avoid being stuck on a room.
+            logger.warning(
+                "Failed to fetch events for room %s, skipping stats calculation: %r.",
+                room_id,
+                e,
+            )
+            return
 
         room_state: Dict[str, Union[None, bool, str]] = {
             "join_rules": None,
diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py
index 7d961fac64..3108ca3444 100644
--- a/tests/storage/databases/main/test_room.py
+++ b/tests/storage/databases/main/test_room.py
@@ -40,9 +40,23 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
         self.token = self.login("foo", "pass")
 
     def _generate_room(self) -> str:
-        room_id = self.helper.create_room_as(self.user_id, tok=self.token)
+        """Create a room and return the room ID."""
+        return self.helper.create_room_as(self.user_id, tok=self.token)
 
-        return room_id
+    def run_background_updates(self, update_name: str) -> None:
+        """Insert and run the background update."""
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                {"update_name": update_name, "progress_json": "{}"},
+            )
+        )
+
+        # ... and tell the DataStore that it hasn't finished all updates yet
+        self.store.db_pool.updates._all_done = False
+
+        # Now let's actually drive the updates to completion
+        self.wait_for_background_updates()
 
     def test_background_populate_rooms_creator_column(self) -> None:
         """Test that the background update to populate the rooms creator column
@@ -71,22 +85,7 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
         )
         self.assertEqual(room_creator_before, None)
 
-        # Insert and run the background update.
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                {
-                    "update_name": _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN,
-                    "progress_json": "{}",
-                },
-            )
-        )
-
-        # ... and tell the DataStore that it hasn't finished all updates yet
-        self.store.db_pool.updates._all_done = False
-
-        # Now let's actually drive the updates to completion
-        self.wait_for_background_updates()
+        self.run_background_updates(_BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN)
 
         # Make sure the background update filled in the room creator
         room_creator_after = self.get_success(
@@ -137,22 +136,7 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
             )
         )
 
-        # Insert and run the background update
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                {
-                    "update_name": _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN,
-                    "progress_json": "{}",
-                },
-            )
-        )
-
-        # ... and tell the DataStore that it hasn't finished all updates yet
-        self.store.db_pool.updates._all_done = False
-
-        # Now let's actually drive the updates to completion
-        self.wait_for_background_updates()
+        self.run_background_updates(_BackgroundUpdates.ADD_ROOM_TYPE_COLUMN)
 
         # Make sure the background update filled in the room type
         room_type_after = self.get_success(
@@ -164,3 +148,39 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
             )
         )
         self.assertEqual(room_type_after, RoomTypes.SPACE)
+
+    def test_populate_stats_broken_rooms(self) -> None:
+        """Ensure that re-populating room stats skips broken rooms."""
+
+        # Create a good room.
+        good_room_id = self._generate_room()
+
+        # Create a room and then break it by having no room version.
+        room_id = self._generate_room()
+        self.get_success(
+            self.store.db_pool.simple_update(
+                table="rooms",
+                keyvalues={"room_id": room_id},
+                updatevalues={"room_version": None},
+                desc="test",
+            )
+        )
+
+        # Nuke any current stats in the database.
+        self.get_success(
+            self.store.db_pool.simple_delete(
+                table="room_stats_state", keyvalues={"1": 1}, desc="test"
+            )
+        )
+
+        self.run_background_updates("populate_stats_process_rooms")
+
+        # Only the good room appears in the stats tables.
+        results = self.get_success(
+            self.store.db_pool.simple_select_onecol(
+                table="room_stats_state",
+                keyvalues={},
+                retcol="room_id",
+            )
+        )
+        self.assertEqual(results, [good_room_id])
-- 
cgit 1.5.1


From 80d44060c99e87c84da72fdfcaa9a508d38a26b4 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 23 Jan 2023 15:44:39 +0000
Subject: Faster joins: omit partial rooms from eager syncs until the resync
 completes (#14870)

* Allow `AbstractSet` in `StrCollection`

Or else frozensets are excluded. This will be useful in an upcoming
commit where I plan to change a function that accepts `List[str]` to
accept `StrCollection` instead.

* `rooms_to_exclude` -> `rooms_to_exclude_globally`

I am about to make use of this exclusion mechanism to exclude rooms for
a specific user and a specific sync. This rename helps to clarify the
distinction between the global config and the rooms to exclude for a
specific sync.

* Better function names for internal sync methods

* Track a list of excluded rooms on SyncResultBuilder

I plan to feed a list of partially stated rooms for this sync to ignore

* Exclude partial state rooms during eager sync

using the mechanism established in the previous commit

* Track un-partial-state stream in sync tokens

So that we can work out which rooms have become fully-stated during a
given sync period.

* Fix mutation of `@cached` return value

This was fouling up a complement test added alongside this PR.
Excluding a room would mean the set of forgotten rooms in the cache
would be extended. This means that room could be erroneously considered
forgotten in the future.

Introduced in #12310, Synapse 1.57.0. I don't think this had any
user-visible side effects (until now).

* SyncResultBuilder: track rooms to force as newly joined

Similar plan as before. We've omitted rooms from certain sync responses;
now we establish the mechanism to reintroduce them into future syncs.

* Read new field, to present rooms as newly joined

* Force un-partial-stated rooms to be newly-joined

for eager incremental syncs only, provided they're still fully stated

* Notify user stream listeners to wake up long polling syncs

* Changelog

* Typo fix

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Unnecessary list cast

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Rephrase comment

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Another comment

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Fixup merge(?)

* Poke notifier when receiving un-partial-stated msg over replication

* Fixup merge whoops

Thanks MV :)

Co-authored-by: Mathieu Velen <mathieuv@matrix.org>

Co-authored-by: Mathieu Velten <mathieuv@matrix.org>
Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/14870.feature                    |  1 +
 synapse/handlers/federation.py               | 15 +++----
 synapse/handlers/sync.py                     | 65 +++++++++++++++++++++++-----
 synapse/notifier.py                          | 26 +++++++++++
 synapse/replication/tcp/client.py            |  1 +
 synapse/storage/databases/main/relations.py  |  1 +
 synapse/storage/databases/main/room.py       | 47 +++++++++++++++++---
 synapse/storage/databases/main/roommember.py | 19 +++++---
 synapse/streams/events.py                    |  6 +++
 synapse/types/__init__.py                    | 15 ++++---
 tests/rest/admin/test_room.py                |  4 +-
 tests/rest/client/test_rooms.py              | 10 ++---
 tests/rest/client/test_sync.py               |  4 +-
 13 files changed, 170 insertions(+), 44 deletions(-)
 create mode 100644 changelog.d/14870.feature

(limited to 'synapse')

diff --git a/changelog.d/14870.feature b/changelog.d/14870.feature
new file mode 100644
index 0000000000..44f701d1c9
--- /dev/null
+++ b/changelog.d/14870.feature
@@ -0,0 +1 @@
+Faster joins: allow non-lazy-loading ("eager") syncs to complete after a partial join by omitting partial state rooms until they become fully stated.
\ No newline at end of file
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 3217127865..233f8c113d 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1868,22 +1868,17 @@ class FederationHandler:
 
                 async with self._is_partial_state_room_linearizer.queue(room_id):
                     logger.info("Clearing partial-state flag for %s", room_id)
-                    success = await self.store.clear_partial_state_room(room_id)
+                    new_stream_id = await self.store.clear_partial_state_room(room_id)
 
-                    # Poke the notifier so that other workers see the write to
-                    # the un-partial-stated rooms stream.
-                    self._notifier.notify_replication()
-
-                if success:
+                if new_stream_id is not None:
                     logger.info("State resync complete for %s", room_id)
                     self._storage_controllers.state.notify_room_un_partial_stated(
                         room_id
                     )
 
-                    # Poke the notifier so that other workers see the write to
-                    # the un-partial-stated rooms stream.
-                    self._notifier.notify_replication()
-
+                    await self._notifier.on_un_partial_stated_room(
+                        room_id, new_stream_id
+                    )
                     return
 
                 # we raced against more events arriving with partial state. Go round
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 78d488f2b1..ee11764567 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -290,7 +290,7 @@ class SyncHandler:
             expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE,
         )
 
-        self.rooms_to_exclude = hs.config.server.rooms_to_exclude_from_sync
+        self.rooms_to_exclude_globally = hs.config.server.rooms_to_exclude_from_sync
 
     async def wait_for_sync_for_user(
         self,
@@ -1340,7 +1340,10 @@ class SyncHandler:
         membership_change_events = []
         if since_token:
             membership_change_events = await self.store.get_membership_changes_for_user(
-                user_id, since_token.room_key, now_token.room_key, self.rooms_to_exclude
+                user_id,
+                since_token.room_key,
+                now_token.room_key,
+                self.rooms_to_exclude_globally,
             )
 
             mem_last_change_by_room_id: Dict[str, EventBase] = {}
@@ -1375,12 +1378,39 @@ class SyncHandler:
                 else:
                     mutable_joined_room_ids.discard(room_id)
 
+        # Tweak the set of rooms to return to the client for eager (non-lazy) syncs.
+        mutable_rooms_to_exclude = set(self.rooms_to_exclude_globally)
+        if not sync_config.filter_collection.lazy_load_members():
+            # Non-lazy syncs should never include partially stated rooms.
+            # Exclude all partially stated rooms from this sync.
+            for room_id in mutable_joined_room_ids:
+                if await self.store.is_partial_state_room(room_id):
+                    mutable_rooms_to_exclude.add(room_id)
+
+        # Incremental eager syncs should additionally include rooms that
+        # - we are joined to
+        # - are full-stated
+        # - became fully-stated at some point during the sync period
+        #   (These rooms will have been omitted during a previous eager sync.)
+        forced_newly_joined_room_ids = set()
+        if since_token and not sync_config.filter_collection.lazy_load_members():
+            un_partial_stated_rooms = (
+                await self.store.get_un_partial_stated_rooms_between(
+                    since_token.un_partial_stated_rooms_key,
+                    now_token.un_partial_stated_rooms_key,
+                    mutable_joined_room_ids,
+                )
+            )
+            for room_id in un_partial_stated_rooms:
+                if not await self.store.is_partial_state_room(room_id):
+                    forced_newly_joined_room_ids.add(room_id)
+
         # Now we have our list of joined room IDs, exclude as configured and freeze
         joined_room_ids = frozenset(
             (
                 room_id
                 for room_id in mutable_joined_room_ids
-                if room_id not in self.rooms_to_exclude
+                if room_id not in mutable_rooms_to_exclude
             )
         )
 
@@ -1397,6 +1427,8 @@ class SyncHandler:
             since_token=since_token,
             now_token=now_token,
             joined_room_ids=joined_room_ids,
+            excluded_room_ids=frozenset(mutable_rooms_to_exclude),
+            forced_newly_joined_room_ids=frozenset(forced_newly_joined_room_ids),
             membership_change_events=membership_change_events,
         )
 
@@ -1834,14 +1866,16 @@ class SyncHandler:
         # 3. Work out which rooms need reporting in the sync response.
         ignored_users = await self.store.ignored_users(user_id)
         if since_token:
-            room_changes = await self._get_rooms_changed(
+            room_changes = await self._get_room_changes_for_incremental_sync(
                 sync_result_builder, ignored_users
             )
             tags_by_room = await self.store.get_updated_tags(
                 user_id, since_token.account_data_key
             )
         else:
-            room_changes = await self._get_all_rooms(sync_result_builder, ignored_users)
+            room_changes = await self._get_room_changes_for_initial_sync(
+                sync_result_builder, ignored_users
+            )
             tags_by_room = await self.store.get_tags_for_user(user_id)
 
         log_kv({"rooms_changed": len(room_changes.room_entries)})
@@ -1900,7 +1934,7 @@ class SyncHandler:
 
         assert since_token
 
-        if membership_change_events:
+        if membership_change_events or sync_result_builder.forced_newly_joined_room_ids:
             return True
 
         stream_id = since_token.room_key.stream
@@ -1909,7 +1943,7 @@ class SyncHandler:
                 return True
         return False
 
-    async def _get_rooms_changed(
+    async def _get_room_changes_for_incremental_sync(
         self,
         sync_result_builder: "SyncResultBuilder",
         ignored_users: FrozenSet[str],
@@ -1947,7 +1981,9 @@ class SyncHandler:
         for event in membership_change_events:
             mem_change_events_by_room_id.setdefault(event.room_id, []).append(event)
 
-        newly_joined_rooms: List[str] = []
+        newly_joined_rooms: List[str] = list(
+            sync_result_builder.forced_newly_joined_room_ids
+        )
         newly_left_rooms: List[str] = []
         room_entries: List[RoomSyncResultBuilder] = []
         invited: List[InvitedSyncResult] = []
@@ -2153,7 +2189,7 @@ class SyncHandler:
             newly_left_rooms,
         )
 
-    async def _get_all_rooms(
+    async def _get_room_changes_for_initial_sync(
         self,
         sync_result_builder: "SyncResultBuilder",
         ignored_users: FrozenSet[str],
@@ -2178,7 +2214,7 @@ class SyncHandler:
         room_list = await self.store.get_rooms_for_local_user_where_membership_is(
             user_id=user_id,
             membership_list=Membership.LIST,
-            excluded_rooms=self.rooms_to_exclude,
+            excluded_rooms=sync_result_builder.excluded_room_ids,
         )
 
         room_entries = []
@@ -2549,6 +2585,13 @@ class SyncResultBuilder:
         since_token: The token supplied by user, or None.
         now_token: The token to sync up to.
         joined_room_ids: List of rooms the user is joined to
+        excluded_room_ids: Set of room ids we should omit from the /sync response.
+        forced_newly_joined_room_ids:
+            Rooms that should be presented in the /sync response as if they were
+            newly joined during the sync period, even if that's not the case.
+            (This is useful if the room was previously excluded from a /sync response,
+            and now the client should be made aware of it.)
+            Only used by incremental syncs.
 
         # The following mirror the fields in a sync response
         presence
@@ -2565,6 +2608,8 @@ class SyncResultBuilder:
     since_token: Optional[StreamToken]
     now_token: StreamToken
     joined_room_ids: FrozenSet[str]
+    excluded_room_ids: FrozenSet[str]
+    forced_newly_joined_room_ids: FrozenSet[str]
     membership_change_events: List[EventBase]
 
     presence: List[UserPresenceState] = attr.Factory(list)
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 28f0d4a25a..2b0e52f23c 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -314,6 +314,32 @@ class Notifier:
             event_entries.append((entry, event.event_id))
         await self.notify_new_room_events(event_entries, max_room_stream_token)
 
+    async def on_un_partial_stated_room(
+        self,
+        room_id: str,
+        new_token: int,
+    ) -> None:
+        """Used by the resync background processes to wake up all listeners
+        of this room when it is un-partial-stated.
+
+        It will also notify replication listeners of the change in stream.
+        """
+
+        # Wake up all related user stream notifiers
+        user_streams = self.room_to_user_streams.get(room_id, set())
+        time_now_ms = self.clock.time_msec()
+        for user_stream in user_streams:
+            try:
+                user_stream.notify(
+                    StreamKeyType.UN_PARTIAL_STATED_ROOMS, new_token, time_now_ms
+                )
+            except Exception:
+                logger.exception("Failed to notify listener")
+
+        # Poke the replication so that other workers also see the write to
+        # the un-partial-stated rooms stream.
+        self.notify_replication()
+
     async def notify_new_room_events(
         self,
         event_entries: List[Tuple[_PendingRoomEventEntry, str]],
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 2a9cb499a4..cc0528bd8e 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -260,6 +260,7 @@ class ReplicationDataHandler:
                 self._state_storage_controller.notify_room_un_partial_stated(
                     row.room_id
                 )
+                await self.notifier.on_un_partial_stated_room(row.room_id, token)
         elif stream_name == UnPartialStatedEventStream.NAME:
             for row in rows:
                 assert isinstance(row, UnPartialStatedEventStreamRow)
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index aea96e9d24..84f844b79e 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -292,6 +292,7 @@ class RelationsWorkerStore(SQLBaseStore):
                         to_device_key=0,
                         device_list_key=0,
                         groups_key=0,
+                        un_partial_stated_rooms_key=0,
                     )
 
             return events[:limit], next_token
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 6a65b2a89b..3aa7b94560 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -26,6 +26,7 @@ from typing import (
     Mapping,
     Optional,
     Sequence,
+    Set,
     Tuple,
     Union,
     cast,
@@ -1294,10 +1295,44 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             instance_name
         )
 
+    async def get_un_partial_stated_rooms_between(
+        self, last_id: int, current_id: int, room_ids: Collection[str]
+    ) -> Set[str]:
+        """Get all rooms that got un partial stated between `last_id` exclusive and
+        `current_id` inclusive.
+
+        Returns:
+            The list of room ids.
+        """
+
+        if last_id == current_id:
+            return set()
+
+        def _get_un_partial_stated_rooms_between_txn(
+            txn: LoggingTransaction,
+        ) -> Set[str]:
+            sql = """
+                SELECT DISTINCT room_id FROM un_partial_stated_room_stream
+                WHERE ? < stream_id AND stream_id <= ? AND
+            """
+
+            clause, args = make_in_list_sql_clause(
+                self.database_engine, "room_id", room_ids
+            )
+
+            txn.execute(sql + clause, [last_id, current_id] + args)
+
+            return {r[0] for r in txn}
+
+        return await self.db_pool.runInteraction(
+            "get_un_partial_stated_rooms_between",
+            _get_un_partial_stated_rooms_between_txn,
+        )
+
     async def get_un_partial_stated_rooms_from_stream(
         self, instance_name: str, last_id: int, current_id: int, limit: int
     ) -> Tuple[List[Tuple[int, Tuple[str]]], int, bool]:
-        """Get updates for caches replication stream.
+        """Get updates for un partial stated rooms replication stream.
 
         Args:
             instance_name: The writer we want to fetch updates from. Unused
@@ -2304,16 +2339,16 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             (room_id,),
         )
 
-    async def clear_partial_state_room(self, room_id: str) -> bool:
+    async def clear_partial_state_room(self, room_id: str) -> Optional[int]:
         """Clears the partial state flag for a room.
 
         Args:
             room_id: The room whose partial state flag is to be cleared.
 
         Returns:
-            `True` if the partial state flag has been cleared successfully.
+            The corresponding stream id for the un-partial-stated rooms stream.
 
-            `False` if the partial state flag could not be cleared because the room
+            `None` if the partial state flag could not be cleared because the room
             still contains events with partial state.
         """
         try:
@@ -2324,7 +2359,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
                     room_id,
                     un_partial_state_room_stream_id,
                 )
-                return True
+                return un_partial_state_room_stream_id
         except self.db_pool.engine.module.IntegrityError as e:
             # Assume that any `IntegrityError`s are due to partial state events.
             logger.info(
@@ -2332,7 +2367,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
                 room_id,
                 e,
             )
-            return False
+            return None
 
     def _clear_partial_state_room_txn(
         self,
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index f02c1d7ea7..8e2ba7b7b4 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -15,6 +15,7 @@
 import logging
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Collection,
     Dict,
     FrozenSet,
@@ -47,7 +48,13 @@ from synapse.storage.roommember import (
     ProfileInfo,
     RoomsForUser,
 )
-from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain_from_id
+from synapse.types import (
+    JsonDict,
+    PersistedEventPosition,
+    StateMap,
+    StrCollection,
+    get_domain_from_id,
+)
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import intern_string
 from synapse.util.caches.descriptors import _CacheContext, cached, cachedList
@@ -385,7 +392,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         self,
         user_id: str,
         membership_list: Collection[str],
-        excluded_rooms: Optional[List[str]] = None,
+        excluded_rooms: StrCollection = (),
     ) -> List[RoomsForUser]:
         """Get all the rooms for this *local* user where the membership for this user
         matches one in the membership list.
@@ -412,10 +419,12 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         )
 
         # Now we filter out forgotten and excluded rooms
-        rooms_to_exclude: Set[str] = await self.get_forgotten_rooms_for_user(user_id)
+        rooms_to_exclude = await self.get_forgotten_rooms_for_user(user_id)
 
         if excluded_rooms is not None:
-            rooms_to_exclude.update(set(excluded_rooms))
+            # Take a copy to avoid mutating the in-cache set
+            rooms_to_exclude = set(rooms_to_exclude)
+            rooms_to_exclude.update(excluded_rooms)
 
         return [room for room in rooms if room.room_id not in rooms_to_exclude]
 
@@ -1169,7 +1178,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return count == 0
 
     @cached()
-    async def get_forgotten_rooms_for_user(self, user_id: str) -> Set[str]:
+    async def get_forgotten_rooms_for_user(self, user_id: str) -> AbstractSet[str]:
         """Gets all rooms the user has forgotten.
 
         Args:
diff --git a/synapse/streams/events.py b/synapse/streams/events.py
index 619eb7f601..d7084d2358 100644
--- a/synapse/streams/events.py
+++ b/synapse/streams/events.py
@@ -53,11 +53,15 @@ class EventSources:
             *(attribute.type(hs) for attribute in attr.fields(_EventSourcesInner))
         )
         self.store = hs.get_datastores().main
+        self._instance_name = hs.get_instance_name()
 
     def get_current_token(self) -> StreamToken:
         push_rules_key = self.store.get_max_push_rules_stream_id()
         to_device_key = self.store.get_to_device_stream_token()
         device_list_key = self.store.get_device_stream_token()
+        un_partial_stated_rooms_key = self.store.get_un_partial_stated_rooms_token(
+            self._instance_name
+        )
 
         token = StreamToken(
             room_key=self.sources.room.get_current_key(),
@@ -70,6 +74,7 @@ class EventSources:
             device_list_key=device_list_key,
             # Groups key is unused.
             groups_key=0,
+            un_partial_stated_rooms_key=un_partial_stated_rooms_key,
         )
         return token
 
@@ -107,5 +112,6 @@ class EventSources:
             to_device_key=0,
             device_list_key=0,
             groups_key=0,
+            un_partial_stated_rooms_key=0,
         )
         return token
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index c59eca2430..f82d1cfc29 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -17,6 +17,7 @@ import re
 import string
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Any,
     ClassVar,
     Dict,
@@ -79,7 +80,7 @@ JsonSerializable = object
 
 # Collection[str] that does not include str itself; str being a Sequence[str]
 # is very misleading and results in bugs.
-StrCollection = Union[Tuple[str, ...], List[str], Set[str]]
+StrCollection = Union[Tuple[str, ...], List[str], AbstractSet[str]]
 
 
 # Note that this seems to require inheriting *directly* from Interface in order
@@ -633,6 +634,7 @@ class StreamKeyType:
     PUSH_RULES: Final = "push_rules_key"
     TO_DEVICE: Final = "to_device_key"
     DEVICE_LIST: Final = "device_list_key"
+    UN_PARTIAL_STATED_ROOMS = "un_partial_stated_rooms_key"
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
@@ -640,7 +642,7 @@ class StreamToken:
     """A collection of keys joined together by underscores in the following
     order and which represent the position in their respective streams.
 
-    ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1`
+    ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1_379`
         1. `room_key`: `s2633508` which is a `RoomStreamToken`
            - `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59`
            - See the docstring for `RoomStreamToken` for more details.
@@ -652,12 +654,13 @@ class StreamToken:
         7. `to_device_key`: `274711`
         8. `device_list_key`: `265584`
         9. `groups_key`: `1` (note that this key is now unused)
+        10. `un_partial_stated_rooms_key`: `379`
 
     You can see how many of these keys correspond to the various
     fields in a "/sync" response:
     ```json
     {
-        "next_batch": "s12_4_0_1_1_1_1_4_1",
+        "next_batch": "s12_4_0_1_1_1_1_4_1_1",
         "presence": {
             "events": []
         },
@@ -669,7 +672,7 @@ class StreamToken:
                 "!QrZlfIDQLNLdZHqTnt:hs1": {
                     "timeline": {
                         "events": [],
-                        "prev_batch": "s10_4_0_1_1_1_1_4_1",
+                        "prev_batch": "s10_4_0_1_1_1_1_4_1_1",
                         "limited": false
                     },
                     "state": {
@@ -705,6 +708,7 @@ class StreamToken:
     device_list_key: int
     # Note that the groups key is no longer used and may have bogus values.
     groups_key: int
+    un_partial_stated_rooms_key: int
 
     _SEPARATOR = "_"
     START: ClassVar["StreamToken"]
@@ -743,6 +747,7 @@ class StreamToken:
                 # serialized so that there will not be confusion in the future
                 # if additional tokens are added.
                 str(self.groups_key),
+                str(self.un_partial_stated_rooms_key),
             ]
         )
 
@@ -775,7 +780,7 @@ class StreamToken:
         return attr.evolve(self, **{key: new_value})
 
 
-StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0)
+StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0, 0)
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index e0f5d54aba..453a6e979c 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -1831,7 +1831,7 @@ class RoomMessagesTestCase(unittest.HomeserverTestCase):
 
     def test_topo_token_is_accepted(self) -> None:
         """Test Topo Token is accepted."""
-        token = "t1-0_0_0_0_0_0_0_0_0"
+        token = "t1-0_0_0_0_0_0_0_0_0_0"
         channel = self.make_request(
             "GET",
             "/_synapse/admin/v1/rooms/%s/messages?from=%s" % (self.room_id, token),
@@ -1845,7 +1845,7 @@ class RoomMessagesTestCase(unittest.HomeserverTestCase):
 
     def test_stream_token_is_accepted_for_fwd_pagianation(self) -> None:
         """Test that stream token is accepted for forward pagination."""
-        token = "s0_0_0_0_0_0_0_0_0"
+        token = "s0_0_0_0_0_0_0_0_0_0"
         channel = self.make_request(
             "GET",
             "/_synapse/admin/v1/rooms/%s/messages?from=%s" % (self.room_id, token),
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index b4daace556..9222cab198 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -1987,7 +1987,7 @@ class RoomMessageListTestCase(RoomBase):
         self.room_id = self.helper.create_room_as(self.user_id)
 
     def test_topo_token_is_accepted(self) -> None:
-        token = "t1-0_0_0_0_0_0_0_0_0"
+        token = "t1-0_0_0_0_0_0_0_0_0_0"
         channel = self.make_request(
             "GET", "/rooms/%s/messages?access_token=x&from=%s" % (self.room_id, token)
         )
@@ -1998,7 +1998,7 @@ class RoomMessageListTestCase(RoomBase):
         self.assertTrue("end" in channel.json_body)
 
     def test_stream_token_is_accepted_for_fwd_pagianation(self) -> None:
-        token = "s0_0_0_0_0_0_0_0_0"
+        token = "s0_0_0_0_0_0_0_0_0_0"
         channel = self.make_request(
             "GET", "/rooms/%s/messages?access_token=x&from=%s" % (self.room_id, token)
         )
@@ -2728,7 +2728,7 @@ class LabelsTestCase(unittest.HomeserverTestCase):
         """Test that we can filter by a label on a /messages request."""
         self._send_labelled_messages_in_room()
 
-        token = "s0_0_0_0_0_0_0_0_0"
+        token = "s0_0_0_0_0_0_0_0_0_0"
         channel = self.make_request(
             "GET",
             "/rooms/%s/messages?access_token=%s&from=%s&filter=%s"
@@ -2745,7 +2745,7 @@ class LabelsTestCase(unittest.HomeserverTestCase):
         """Test that we can filter by the absence of a label on a /messages request."""
         self._send_labelled_messages_in_room()
 
-        token = "s0_0_0_0_0_0_0_0_0"
+        token = "s0_0_0_0_0_0_0_0_0_0"
         channel = self.make_request(
             "GET",
             "/rooms/%s/messages?access_token=%s&from=%s&filter=%s"
@@ -2768,7 +2768,7 @@ class LabelsTestCase(unittest.HomeserverTestCase):
         """
         self._send_labelled_messages_in_room()
 
-        token = "s0_0_0_0_0_0_0_0_0"
+        token = "s0_0_0_0_0_0_0_0_0_0"
         channel = self.make_request(
             "GET",
             "/rooms/%s/messages?access_token=%s&from=%s&filter=%s"
diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py
index 0af643ecd9..c9afa0f3dd 100644
--- a/tests/rest/client/test_sync.py
+++ b/tests/rest/client/test_sync.py
@@ -913,7 +913,9 @@ class ExcludeRoomTestCase(unittest.HomeserverTestCase):
 
         # We need to manually append the room ID, because we can't know the ID before
         # creating the room, and we can't set the config after starting the homeserver.
-        self.hs.get_sync_handler().rooms_to_exclude.append(self.excluded_room_id)
+        self.hs.get_sync_handler().rooms_to_exclude_globally.append(
+            self.excluded_room_id
+        )
 
     def test_join_leave(self) -> None:
         """Tests that rooms are correctly excluded from the 'join' and 'leave' sections of
-- 
cgit 1.5.1


From 4607be0b7b2165710dc2e5e68ec4281b593ca8c5 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 24 Jan 2023 15:28:20 +0000
Subject: Request partial joins by default (#14905)

* Request partial joins by default

This is a little sloppy, but we are trying to gain confidence in faster
joins in the upcoming RC.

Admins can still opt out by adding the following to their Synapse
config:

```yaml
experimental:
    faster_joins: false
```

We may revert this change before the release proper, depending on how
testing in the wild goes.

* Changelog

* Try to fix the backfill test failures

* Upgrade notes

* Postgres compat?
---
 changelog.d/14905.feature                |  1 +
 docs/upgrade.md                          | 13 +++++++++++
 synapse/config/experimental.py           |  2 +-
 synapse/storage/databases/main/stream.py | 40 +++++++++++++++++++++++++++-----
 4 files changed, 49 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/14905.feature

(limited to 'synapse')

diff --git a/changelog.d/14905.feature b/changelog.d/14905.feature
new file mode 100644
index 0000000000..f13a4af981
--- /dev/null
+++ b/changelog.d/14905.feature
@@ -0,0 +1 @@
+Faster joins: request partial joins by default. Admins can opt-out of this for the time being---see the upgrade notes.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 0d486a3c82..6316db563b 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -90,6 +90,19 @@ process, for example:
 
 # Upgrading to v1.76.0
 
+## Faster joins are enabled by default
+
+When joining a room for the first time, Synapse 1.76.0rc1 will request a partial join from the other server by default. Previously, server admins had to opt-in to this using an experimental config flag.
+
+Server admins can opt out of this feature for the time being by setting
+
+```yaml
+experimental:
+    faster_joins: false
+```
+
+in their server config.
+
 ## Changes to the account data replication streams
 
 Synapse has changed the format of the account data and devices replication
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 89586db763..2590c88cde 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -84,7 +84,7 @@ class ExperimentalConfig(Config):
         # experimental support for faster joins over federation
         # (MSC2775, MSC3706, MSC3895)
         # requires a target server that can provide a partial join response (MSC3706)
-        self.faster_joins_enabled: bool = experimental.get("faster_joins", False)
+        self.faster_joins_enabled: bool = experimental.get("faster_joins", True)
 
         # MSC3720 (Account status endpoint)
         self.msc3720_enabled: bool = experimental.get("msc3720_enabled", False)
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 63d8350530..d28fc65df9 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -67,7 +67,7 @@ from synapse.storage.database import (
     make_in_list_sql_clause,
 )
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
 from synapse.storage.util.id_generators import MultiWriterIdGenerator
 from synapse.types import PersistedEventPosition, RoomStreamToken
 from synapse.util.caches.descriptors import cached
@@ -944,12 +944,40 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             room_id
             stream_key
         """
-        sql = (
-            "SELECT coalesce(MIN(topological_ordering), 0) FROM events"
-            " WHERE room_id = ? AND stream_ordering >= ?"
-        )
+        if isinstance(self.database_engine, PostgresEngine):
+            min_function = "LEAST"
+        elif isinstance(self.database_engine, Sqlite3Engine):
+            min_function = "MIN"
+        else:
+            raise RuntimeError(f"Unknown database engine {self.database_engine}")
+
+        # This query used to be
+        #    SELECT COALESCE(MIN(topological_ordering), 0) FROM events
+        #    WHERE room_id = ? and events.stream_ordering >= {stream_key}
+        # which returns 0 if the stream_key is newer than any event in
+        # the room. That's not wrong, but it seems to interact oddly with backfill,
+        # requiring a second call to /messages to actually backfill from a remote
+        # homeserver.
+        #
+        # Instead, rollback the stream ordering to that after the most recent event in
+        # this room.
+        sql = f"""
+            WITH fallback(max_stream_ordering) AS (
+                SELECT MAX(stream_ordering)
+                FROM events
+                WHERE room_id = ?
+            )
+            SELECT COALESCE(MIN(topological_ordering), 0) FROM events
+            WHERE
+                room_id = ?
+                AND events.stream_ordering >= {min_function}(
+                    ?,
+                    (SELECT max_stream_ordering FROM fallback)
+                )
+        """
+
         row = await self.db_pool.execute(
-            "get_current_topological_token", None, sql, room_id, stream_key
+            "get_current_topological_token", None, sql, room_id, room_id, stream_key
         )
         return row[0][0] if row else 0
 
-- 
cgit 1.5.1


From a63d4cc9e96c1f5bb9c5bb9fc9119fb137de3b1b Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Wed, 25 Jan 2023 13:38:53 +0000
Subject: Make sqlite database migrations transactional again (#14910)

#13873 introduced a regression which causes sqlite database migrations
to no longer run inside a transaction. Wrap them in a transaction again,
to avoid database corruption when migrations are interrupted.

Fixes #14909.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14910.bugfix          | 1 +
 synapse/storage/engines/_base.py  | 3 +++
 synapse/storage/engines/sqlite.py | 5 +++--
 3 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14910.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14910.bugfix b/changelog.d/14910.bugfix
new file mode 100644
index 0000000000..f1f34cd6ba
--- /dev/null
+++ b/changelog.d/14910.bugfix
@@ -0,0 +1 @@
+Fix a regression introduced in Synapse 1.69.0 which can result in database corruption when database migrations are interrupted on sqlite.
diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index 70e594a68f..bc9ca3a53c 100644
--- a/synapse/storage/engines/_base.py
+++ b/synapse/storage/engines/_base.py
@@ -132,6 +132,9 @@ class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCM
         """Execute a chunk of SQL containing multiple semicolon-delimited statements.
 
         This is not provided by DBAPI2, and so needs engine-specific support.
+
+        Some database engines may automatically COMMIT the ongoing transaction both
+        before and after executing the script.
         """
         ...
 
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index 14260442b6..2f7df85ce4 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -135,13 +135,14 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
         > than one statement with it, it will raise a Warning. Use executescript() if
         > you want to execute multiple SQL statements with one call.
 
-        Though the docs for `executescript` warn:
+        The script is wrapped in transaction control statemnets, since the docs for
+        `executescript` warn:
 
         > If there is a pending transaction, an implicit COMMIT statement is executed
         > first. No other implicit transaction control is performed; any transaction
         > control must be added to sql_script.
         """
-        cursor.executescript(script)
+        cursor.executescript(f"BEGIN TRANSACTION;\n{script}\nCOMMIT;")
 
 
 # Following functions taken from: https://github.com/coleifer/peewee
-- 
cgit 1.5.1


From 8e37ece015c8afd97572bdc742981792b02c6700 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 25 Jan 2023 16:11:06 +0000
Subject: Bump the client-side timeout for /state (#14912)

* Bump the client-side timeout for /state

to allow faster joins resyncs the chance to complete for large rooms.
We have seen this fair poorly (~90s for Matrix HQ's /state) in testing,
causing the resync to advance to another HS who hasn't seen our join yet.

* Changelog

* Milliseconds!!!!
---
 changelog.d/14912.misc                 | 1 +
 synapse/federation/transport/client.py | 4 ++++
 2 files changed, 5 insertions(+)
 create mode 100644 changelog.d/14912.misc

(limited to 'synapse')

diff --git a/changelog.d/14912.misc b/changelog.d/14912.misc
new file mode 100644
index 0000000000..9dbc6b3424
--- /dev/null
+++ b/changelog.d/14912.misc
@@ -0,0 +1 @@
+Faster joins: allow the resync process more time to fetch `/state` ids.
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 556883f079..682666ab36 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -102,6 +102,10 @@ class TransportLayerClient:
             destination,
             path=path,
             args={"event_id": event_id},
+            # This can take a looooooong time for large rooms. Give this a generous
+            # timeout of 10 minutes to avoid the partial state resync timing out early
+            # and trying a bunch of servers who haven't seen our join yet.
+            timeout=600_000,
             parser=_StateParser(room_version),
         )
 
-- 
cgit 1.5.1


From 3c3ba31507cbff27064ea3c6cf1e7add9583556a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 25 Jan 2023 15:14:03 -0500
Subject: Add missing type hints for tests.events. (#14904)

---
 changelog.d/14904.misc               |  1 +
 mypy.ini                             |  5 ++-
 synapse/events/utils.py              |  3 +-
 tests/events/test_presence_router.py | 58 +++++++++++++++++------------
 tests/events/test_snapshot.py        | 17 ++++++---
 tests/events/test_utils.py           | 71 +++++++++++++++++++-----------------
 6 files changed, 91 insertions(+), 64 deletions(-)
 create mode 100644 changelog.d/14904.misc

(limited to 'synapse')

diff --git a/changelog.d/14904.misc b/changelog.d/14904.misc
new file mode 100644
index 0000000000..d44571b731
--- /dev/null
+++ b/changelog.d/14904.misc
@@ -0,0 +1 @@
+Add missing type hints.
diff --git a/mypy.ini b/mypy.ini
index 248402532e..13890ce124 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -35,8 +35,6 @@ exclude = (?x)
    |tests/api/test_auth.py
    |tests/app/test_openid_listener.py
    |tests/appservice/test_scheduler.py
-   |tests/events/test_presence_router.py
-   |tests/events/test_utils.py
    |tests/federation/test_federation_catch_up.py
    |tests/federation/test_federation_sender.py
    |tests/handlers/test_typing.py
@@ -86,6 +84,9 @@ disallow_untyped_defs = True
 [mypy-tests.crypto.*]
 disallow_untyped_defs = True
 
+[mypy-tests.events.*]
+disallow_untyped_defs = True
+
 [mypy-tests.federation.transport.test_client]
 disallow_untyped_defs = True
 
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index ae57a4df5e..52e4b467e8 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -605,10 +605,11 @@ class EventClientSerializer:
 
 
 _PowerLevel = Union[str, int]
+PowerLevelsContent = Mapping[str, Union[_PowerLevel, Mapping[str, _PowerLevel]]]
 
 
 def copy_and_fixup_power_levels_contents(
-    old_power_levels: Mapping[str, Union[_PowerLevel, Mapping[str, _PowerLevel]]]
+    old_power_levels: PowerLevelsContent,
 ) -> Dict[str, Union[int, Dict[str, int]]]:
     """Copy the content of a power_levels event, unfreezing frozendicts along the way.
 
diff --git a/tests/events/test_presence_router.py b/tests/events/test_presence_router.py
index b703e4472e..a9893def74 100644
--- a/tests/events/test_presence_router.py
+++ b/tests/events/test_presence_router.py
@@ -16,6 +16,8 @@ from unittest.mock import Mock
 
 import attr
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EduTypes
 from synapse.events.presence_router import PresenceRouter, load_legacy_presence_router
 from synapse.federation.units import Transaction
@@ -23,11 +25,13 @@ from synapse.handlers.presence import UserPresenceState
 from synapse.module_api import ModuleApi
 from synapse.rest import admin
 from synapse.rest.client import login, presence, room
+from synapse.server import HomeServer
 from synapse.types import JsonDict, StreamToken, create_requester
+from synapse.util import Clock
 
 from tests.handlers.test_sync import generate_sync_config
 from tests.test_utils import simple_async_mock
-from tests.unittest import FederatingHomeserverTestCase, TestCase, override_config
+from tests.unittest import FederatingHomeserverTestCase, override_config
 
 
 @attr.s
@@ -49,9 +53,7 @@ class LegacyPresenceRouterTestModule:
         }
         return users_to_state
 
-    async def get_interested_users(
-        self, user_id: str
-    ) -> Union[Set[str], PresenceRouter.ALL_USERS]:
+    async def get_interested_users(self, user_id: str) -> Union[Set[str], str]:
         if user_id in self._config.users_who_should_receive_all_presence:
             return PresenceRouter.ALL_USERS
 
@@ -71,9 +73,14 @@ class LegacyPresenceRouterTestModule:
         # Initialise a typed config object
         config = PresenceRouterTestConfig()
 
-        config.users_who_should_receive_all_presence = config_dict.get(
+        users_who_should_receive_all_presence = config_dict.get(
             "users_who_should_receive_all_presence"
         )
+        assert isinstance(users_who_should_receive_all_presence, list)
+
+        config.users_who_should_receive_all_presence = (
+            users_who_should_receive_all_presence
+        )
 
         return config
 
@@ -96,9 +103,7 @@ class PresenceRouterTestModule:
         }
         return users_to_state
 
-    async def get_interested_users(
-        self, user_id: str
-    ) -> Union[Set[str], PresenceRouter.ALL_USERS]:
+    async def get_interested_users(self, user_id: str) -> Union[Set[str], str]:
         if user_id in self._config.users_who_should_receive_all_presence:
             return PresenceRouter.ALL_USERS
 
@@ -118,9 +123,14 @@ class PresenceRouterTestModule:
         # Initialise a typed config object
         config = PresenceRouterTestConfig()
 
-        config.users_who_should_receive_all_presence = config_dict.get(
+        users_who_should_receive_all_presence = config_dict.get(
             "users_who_should_receive_all_presence"
         )
+        assert isinstance(users_who_should_receive_all_presence, list)
+
+        config.users_who_should_receive_all_presence = (
+            users_who_should_receive_all_presence
+        )
 
         return config
 
@@ -140,7 +150,7 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
         presence.register_servlets,
     ]
 
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         # Mock out the calls over federation.
         fed_transport_client = Mock(spec=["send_transaction"])
         fed_transport_client.send_transaction = simple_async_mock({})
@@ -153,7 +163,9 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
 
         return hs
 
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.sync_handler = self.hs.get_sync_handler()
         self.module_api = homeserver.get_module_api()
 
@@ -176,7 +188,7 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
             },
         }
     )
-    def test_receiving_all_presence_legacy(self):
+    def test_receiving_all_presence_legacy(self) -> None:
         self.receiving_all_presence_test_body()
 
     @override_config(
@@ -193,10 +205,10 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
             ],
         }
     )
-    def test_receiving_all_presence(self):
+    def test_receiving_all_presence(self) -> None:
         self.receiving_all_presence_test_body()
 
-    def receiving_all_presence_test_body(self):
+    def receiving_all_presence_test_body(self) -> None:
         """Test that a user that does not share a room with another other can receive
         presence for them, due to presence routing.
         """
@@ -302,7 +314,7 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
             },
         }
     )
-    def test_send_local_online_presence_to_with_module_legacy(self):
+    def test_send_local_online_presence_to_with_module_legacy(self) -> None:
         self.send_local_online_presence_to_with_module_test_body()
 
     @override_config(
@@ -321,10 +333,10 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
             ],
         }
     )
-    def test_send_local_online_presence_to_with_module(self):
+    def test_send_local_online_presence_to_with_module(self) -> None:
         self.send_local_online_presence_to_with_module_test_body()
 
-    def send_local_online_presence_to_with_module_test_body(self):
+    def send_local_online_presence_to_with_module_test_body(self) -> None:
         """Tests that send_local_presence_to_users sends local online presence to a set
         of specified local and remote users, with a custom PresenceRouter module enabled.
         """
@@ -447,18 +459,18 @@ class PresenceRouterTestCase(FederatingHomeserverTestCase):
                     continue
 
                 # EDUs can contain multiple presence updates
-                for presence_update in edu["content"]["push"]:
+                for presence_edu in edu["content"]["push"]:
                     # Check for presence updates that contain the user IDs we're after
-                    found_users.add(presence_update["user_id"])
+                    found_users.add(presence_edu["user_id"])
 
                     # Ensure that no offline states are being sent out
-                    self.assertNotEqual(presence_update["presence"], "offline")
+                    self.assertNotEqual(presence_edu["presence"], "offline")
 
         self.assertEqual(found_users, expected_users)
 
 
 def send_presence_update(
-    testcase: TestCase,
+    testcase: FederatingHomeserverTestCase,
     user_id: str,
     access_token: str,
     presence_state: str,
@@ -479,7 +491,7 @@ def send_presence_update(
 
 
 def sync_presence(
-    testcase: TestCase,
+    testcase: FederatingHomeserverTestCase,
     user_id: str,
     since_token: Optional[StreamToken] = None,
 ) -> Tuple[List[UserPresenceState], StreamToken]:
@@ -500,7 +512,7 @@ def sync_presence(
     requester = create_requester(user_id)
     sync_config = generate_sync_config(requester.user.to_string())
     sync_result = testcase.get_success(
-        testcase.sync_handler.wait_for_sync_for_user(
+        testcase.hs.get_sync_handler().wait_for_sync_for_user(
             requester, sync_config, since_token
         )
     )
diff --git a/tests/events/test_snapshot.py b/tests/events/test_snapshot.py
index 8ddce83b83..6687c28e8f 100644
--- a/tests/events/test_snapshot.py
+++ b/tests/events/test_snapshot.py
@@ -12,9 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.rest import admin
 from synapse.rest.client import login, room
+from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests import unittest
 from tests.test_utils.event_injection import create_event
@@ -27,7 +32,7 @@ class TestEventContext(unittest.HomeserverTestCase):
         room.register_servlets,
     ]
 
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
         self._storage_controllers = hs.get_storage_controllers()
 
@@ -35,7 +40,7 @@ class TestEventContext(unittest.HomeserverTestCase):
         self.user_tok = self.login("u1", "pass")
         self.room_id = self.helper.create_room_as(tok=self.user_tok)
 
-    def test_serialize_deserialize_msg(self):
+    def test_serialize_deserialize_msg(self) -> None:
         """Test that an EventContext for a message event is the same after
         serialize/deserialize.
         """
@@ -51,7 +56,7 @@ class TestEventContext(unittest.HomeserverTestCase):
 
         self._check_serialize_deserialize(event, context)
 
-    def test_serialize_deserialize_state_no_prev(self):
+    def test_serialize_deserialize_state_no_prev(self) -> None:
         """Test that an EventContext for a state event (with not previous entry)
         is the same after serialize/deserialize.
         """
@@ -67,7 +72,7 @@ class TestEventContext(unittest.HomeserverTestCase):
 
         self._check_serialize_deserialize(event, context)
 
-    def test_serialize_deserialize_state_prev(self):
+    def test_serialize_deserialize_state_prev(self) -> None:
         """Test that an EventContext for a state event (which replaces a
         previous entry) is the same after serialize/deserialize.
         """
@@ -84,7 +89,9 @@ class TestEventContext(unittest.HomeserverTestCase):
 
         self._check_serialize_deserialize(event, context)
 
-    def _check_serialize_deserialize(self, event, context):
+    def _check_serialize_deserialize(
+        self, event: EventBase, context: EventContext
+    ) -> None:
         serialized = self.get_success(context.serialize(event, self.store))
 
         d_context = EventContext.deserialize(self._storage_controllers, serialized)
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index a79256846f..ff7b349d75 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -13,21 +13,24 @@
 # limitations under the License.
 
 import unittest as stdlib_unittest
+from typing import Any, List, Mapping, Optional
 
 from synapse.api.constants import EventContentFields
 from synapse.api.room_versions import RoomVersions
-from synapse.events import make_event_from_dict
+from synapse.events import EventBase, make_event_from_dict
 from synapse.events.utils import (
+    PowerLevelsContent,
     SerializeEventConfig,
     copy_and_fixup_power_levels_contents,
     maybe_upsert_event_field,
     prune_event,
     serialize_event,
 )
+from synapse.types import JsonDict
 from synapse.util.frozenutils import freeze
 
 
-def MockEvent(**kwargs):
+def MockEvent(**kwargs: Any) -> EventBase:
     if "event_id" not in kwargs:
         kwargs["event_id"] = "fake_event_id"
     if "type" not in kwargs:
@@ -60,7 +63,7 @@ class TestMaybeUpsertEventField(stdlib_unittest.TestCase):
 
 
 class PruneEventTestCase(stdlib_unittest.TestCase):
-    def run_test(self, evdict, matchdict, **kwargs):
+    def run_test(self, evdict: JsonDict, matchdict: JsonDict, **kwargs: Any) -> None:
         """
         Asserts that a new event constructed with `evdict` will look like
         `matchdict` when it is redacted.
@@ -74,7 +77,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             prune_event(make_event_from_dict(evdict, **kwargs)).get_dict(), matchdict
         )
 
-    def test_minimal(self):
+    def test_minimal(self) -> None:
         self.run_test(
             {"type": "A", "event_id": "$test:domain"},
             {
@@ -86,7 +89,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             },
         )
 
-    def test_basic_keys(self):
+    def test_basic_keys(self) -> None:
         """Ensure that the keys that should be untouched are kept."""
         # Note that some of the values below don't really make sense, but the
         # pruning of events doesn't worry about the values of any fields (with
@@ -138,7 +141,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.MSC2176,
         )
 
-    def test_unsigned(self):
+    def test_unsigned(self) -> None:
         """Ensure that unsigned properties get stripped (except age_ts and replaces_state)."""
         self.run_test(
             {
@@ -159,7 +162,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             },
         )
 
-    def test_content(self):
+    def test_content(self) -> None:
         """The content dictionary should be stripped in most cases."""
         self.run_test(
             {"type": "C", "event_id": "$test:domain", "content": {"things": "here"}},
@@ -194,7 +197,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
                 },
             )
 
-    def test_create(self):
+    def test_create(self) -> None:
         """Create events are partially redacted until MSC2176."""
         self.run_test(
             {
@@ -223,7 +226,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.MSC2176,
         )
 
-    def test_power_levels(self):
+    def test_power_levels(self) -> None:
         """Power level events keep a variety of content keys."""
         self.run_test(
             {
@@ -273,7 +276,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.MSC2176,
         )
 
-    def test_alias_event(self):
+    def test_alias_event(self) -> None:
         """Alias events have special behavior up through room version 6."""
         self.run_test(
             {
@@ -302,7 +305,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.V6,
         )
 
-    def test_redacts(self):
+    def test_redacts(self) -> None:
         """Redaction events have no special behaviour until MSC2174/MSC2176."""
 
         self.run_test(
@@ -328,7 +331,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.MSC2176,
         )
 
-    def test_join_rules(self):
+    def test_join_rules(self) -> None:
         """Join rules events have changed behavior starting with MSC3083."""
         self.run_test(
             {
@@ -371,7 +374,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.V8,
         )
 
-    def test_member(self):
+    def test_member(self) -> None:
         """Member events have changed behavior starting with MSC3375."""
         self.run_test(
             {
@@ -417,12 +420,12 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
 
 
 class SerializeEventTestCase(stdlib_unittest.TestCase):
-    def serialize(self, ev, fields):
+    def serialize(self, ev: EventBase, fields: Optional[List[str]]) -> JsonDict:
         return serialize_event(
             ev, 1479807801915, config=SerializeEventConfig(only_event_fields=fields)
         )
 
-    def test_event_fields_works_with_keys(self):
+    def test_event_fields_works_with_keys(self) -> None:
         self.assertEqual(
             self.serialize(
                 MockEvent(sender="@alice:localhost", room_id="!foo:bar"), ["room_id"]
@@ -430,7 +433,7 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
             {"room_id": "!foo:bar"},
         )
 
-    def test_event_fields_works_with_nested_keys(self):
+    def test_event_fields_works_with_nested_keys(self) -> None:
         self.assertEqual(
             self.serialize(
                 MockEvent(
@@ -443,7 +446,7 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
             {"content": {"body": "A message"}},
         )
 
-    def test_event_fields_works_with_dot_keys(self):
+    def test_event_fields_works_with_dot_keys(self) -> None:
         self.assertEqual(
             self.serialize(
                 MockEvent(
@@ -456,7 +459,7 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
             {"content": {"key.with.dots": {}}},
         )
 
-    def test_event_fields_works_with_nested_dot_keys(self):
+    def test_event_fields_works_with_nested_dot_keys(self) -> None:
         self.assertEqual(
             self.serialize(
                 MockEvent(
@@ -472,7 +475,7 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
             {"content": {"nested.dot.key": {"leaf.key": 42}}},
         )
 
-    def test_event_fields_nops_with_unknown_keys(self):
+    def test_event_fields_nops_with_unknown_keys(self) -> None:
         self.assertEqual(
             self.serialize(
                 MockEvent(
@@ -485,7 +488,7 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
             {"content": {"foo": "bar"}},
         )
 
-    def test_event_fields_nops_with_non_dict_keys(self):
+    def test_event_fields_nops_with_non_dict_keys(self) -> None:
         self.assertEqual(
             self.serialize(
                 MockEvent(
@@ -498,7 +501,7 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
             {},
         )
 
-    def test_event_fields_nops_with_array_keys(self):
+    def test_event_fields_nops_with_array_keys(self) -> None:
         self.assertEqual(
             self.serialize(
                 MockEvent(
@@ -511,7 +514,7 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
             {},
         )
 
-    def test_event_fields_all_fields_if_empty(self):
+    def test_event_fields_all_fields_if_empty(self) -> None:
         self.assertEqual(
             self.serialize(
                 MockEvent(
@@ -531,16 +534,16 @@ class SerializeEventTestCase(stdlib_unittest.TestCase):
             },
         )
 
-    def test_event_fields_fail_if_fields_not_str(self):
+    def test_event_fields_fail_if_fields_not_str(self) -> None:
         with self.assertRaises(TypeError):
             self.serialize(
-                MockEvent(room_id="!foo:bar", content={"foo": "bar"}), ["room_id", 4]
+                MockEvent(room_id="!foo:bar", content={"foo": "bar"}), ["room_id", 4]  # type: ignore[list-item]
             )
 
 
 class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase):
     def setUp(self) -> None:
-        self.test_content = {
+        self.test_content: PowerLevelsContent = {
             "ban": 50,
             "events": {"m.room.name": 100, "m.room.power_levels": 100},
             "events_default": 0,
@@ -553,10 +556,11 @@ class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase):
             "users_default": 0,
         }
 
-    def _test(self, input):
+    def _test(self, input: PowerLevelsContent) -> None:
         a = copy_and_fixup_power_levels_contents(input)
 
         self.assertEqual(a["ban"], 50)
+        assert isinstance(a["events"], Mapping)
         self.assertEqual(a["events"]["m.room.name"], 100)
 
         # make sure that changing the copy changes the copy and not the orig
@@ -564,18 +568,19 @@ class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase):
         a["events"]["m.room.power_levels"] = 20
 
         self.assertEqual(input["ban"], 50)
+        assert isinstance(input["events"], Mapping)
         self.assertEqual(input["events"]["m.room.power_levels"], 100)
 
-    def test_unfrozen(self):
+    def test_unfrozen(self) -> None:
         self._test(self.test_content)
 
-    def test_frozen(self):
+    def test_frozen(self) -> None:
         input = freeze(self.test_content)
         self._test(input)
 
-    def test_stringy_integers(self):
+    def test_stringy_integers(self) -> None:
         """String representations of decimal integers are converted to integers."""
-        input = {
+        input: PowerLevelsContent = {
             "a": "100",
             "b": {
                 "foo": 99,
@@ -603,9 +608,9 @@ class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase):
 
     def test_invalid_types_raise_type_error(self) -> None:
         with self.assertRaises(TypeError):
-            copy_and_fixup_power_levels_contents({"a": ["hello", "grandma"]})  # type: ignore[arg-type]
-            copy_and_fixup_power_levels_contents({"a": None})  # type: ignore[arg-type]
+            copy_and_fixup_power_levels_contents({"a": ["hello", "grandma"]})  # type: ignore[dict-item]
+            copy_and_fixup_power_levels_contents({"a": None})  # type: ignore[dict-item]
 
     def test_invalid_nesting_raises_type_error(self) -> None:
         with self.assertRaises(TypeError):
-            copy_and_fixup_power_levels_contents({"a": {"b": {"c": 1}}})
+            copy_and_fixup_power_levels_contents({"a": {"b": {"c": 1}}})  # type: ignore[dict-item]
-- 
cgit 1.5.1


From 7e8d455280b58dbda3ff24b19dbffad2d6c6c253 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 25 Jan 2023 16:34:37 -0500
Subject: Fix a bug in the send_local_online_presence_to module API (#14880)

Destination was being used incorrectly (a single destination instead
of a list of destinations was being passed).

This also updates some of the types in the area to not use Collection[str],
which is a footgun.
---
 changelog.d/14880.bugfix       |  1 +
 synapse/handlers/presence.py   | 18 ++++++++++++------
 synapse/module_api/__init__.py |  2 +-
 synapse/notifier.py            |  3 ++-
 synapse/streams/__init__.py    |  6 +++---
 5 files changed, 19 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/14880.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14880.bugfix b/changelog.d/14880.bugfix
new file mode 100644
index 0000000000..e56c567082
--- /dev/null
+++ b/changelog.d/14880.bugfix
@@ -0,0 +1 @@
+Fix a bug when using the `send_local_online_presence_to` module API.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 43e4e7b1b4..87af31aa27 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -64,7 +64,13 @@ from synapse.replication.tcp.commands import ClearUserSyncsCommand
 from synapse.replication.tcp.streams import PresenceFederationStream, PresenceStream
 from synapse.storage.databases.main import DataStore
 from synapse.streams import EventSource
-from synapse.types import JsonDict, StreamKeyType, UserID, get_domain_from_id
+from synapse.types import (
+    JsonDict,
+    StrCollection,
+    StreamKeyType,
+    UserID,
+    get_domain_from_id,
+)
 from synapse.util.async_helpers import Linearizer
 from synapse.util.metrics import Measure
 from synapse.util.wheel_timer import WheelTimer
@@ -320,7 +326,7 @@ class BasePresenceHandler(abc.ABC):
         for destination, host_states in hosts_to_states.items():
             self._federation.send_presence_to_destinations(host_states, [destination])
 
-    async def send_full_presence_to_users(self, user_ids: Collection[str]) -> None:
+    async def send_full_presence_to_users(self, user_ids: StrCollection) -> None:
         """
         Adds to the list of users who should receive a full snapshot of presence
         upon their next sync. Note that this only works for local users.
@@ -1601,7 +1607,7 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
         # Having a default limit doesn't match the EventSource API, but some
         # callers do not provide it. It is unused in this class.
         limit: int = 0,
-        room_ids: Optional[Collection[str]] = None,
+        room_ids: Optional[StrCollection] = None,
         is_guest: bool = False,
         explicit_room_id: Optional[str] = None,
         include_offline: bool = True,
@@ -1688,7 +1694,7 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
 
             # The set of users that we're interested in and that have had a presence update.
             # We'll actually pull the presence updates for these users at the end.
-            interested_and_updated_users: Collection[str]
+            interested_and_updated_users: StrCollection
 
             if from_key is not None:
                 # First get all users that have had a presence update
@@ -2120,7 +2126,7 @@ class PresenceFederationQueue:
         # stream_id, destinations, user_ids)`. We don't store the full states
         # for efficiency, and remote workers will already have the full states
         # cached.
-        self._queue: List[Tuple[int, int, Collection[str], Set[str]]] = []
+        self._queue: List[Tuple[int, int, StrCollection, Set[str]]] = []
 
         self._next_id = 1
 
@@ -2142,7 +2148,7 @@ class PresenceFederationQueue:
         self._queue = self._queue[index:]
 
     def send_presence_to_destinations(
-        self, states: Collection[UserPresenceState], destinations: Collection[str]
+        self, states: Collection[UserPresenceState], destinations: StrCollection
     ) -> None:
         """Send the presence states to the given destinations.
 
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 6153a48257..d22dd19d38 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -1158,7 +1158,7 @@ class ModuleApi:
             # Send to remote destinations.
             destination = UserID.from_string(user).domain
             presence_handler.get_federation_queue().send_presence_to_destinations(
-                presence_events, destination
+                presence_events, [destination]
             )
 
     def looping_background_call(
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 2b0e52f23c..a8832a3f8e 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -46,6 +46,7 @@ from synapse.types import (
     JsonDict,
     PersistedEventPosition,
     RoomStreamToken,
+    StrCollection,
     StreamKeyType,
     StreamToken,
     UserID,
@@ -716,7 +717,7 @@ class Notifier:
 
     async def _get_room_ids(
         self, user: UserID, explicit_room_id: Optional[str]
-    ) -> Tuple[Collection[str], bool]:
+    ) -> Tuple[StrCollection, bool]:
         joined_room_ids = await self.store.get_rooms_for_user(user.to_string())
         if explicit_room_id:
             if explicit_room_id in joined_room_ids:
diff --git a/synapse/streams/__init__.py b/synapse/streams/__init__.py
index 2dcd43d0a2..c6c8a0315c 100644
--- a/synapse/streams/__init__.py
+++ b/synapse/streams/__init__.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Collection, Generic, List, Optional, Tuple, TypeVar
+from typing import Generic, List, Optional, Tuple, TypeVar
 
-from synapse.types import UserID
+from synapse.types import StrCollection, UserID
 
 # The key, this is either a stream token or int.
 K = TypeVar("K")
@@ -28,7 +28,7 @@ class EventSource(Generic[K, R]):
         user: UserID,
         from_key: K,
         limit: int,
-        room_ids: Collection[str],
+        room_ids: StrCollection,
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
     ) -> Tuple[List[R], K]:
-- 
cgit 1.5.1


From cf66d712c615b96bce19e44118cce1ebda41d0b8 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 26 Jan 2023 10:38:49 +0000
Subject: Fix initialization of `_device_list_id_gen` (#14914)

On startup, the `_device_list_id_gen` stream id generator is initialized
using the maximum stream id seen in a list of tables. When we started
populating the `device_list_remote_pending` table in #13913, we forgot
to add it to the aforementioned list of tables, so the stream id
generator can hand out old stream ids after a restart. The end result is
that Synapse can fail to handle device list update EDUs after a restart
when a partial state join is in progress.

Add the `device_list_remote_pending` table to the list of tables to
consider when initializing the `_device_list_id_gen` stream id generator.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14914.bugfix                  | 1 +
 synapse/storage/databases/main/devices.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/14914.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14914.bugfix b/changelog.d/14914.bugfix
new file mode 100644
index 0000000000..af73cca70f
--- /dev/null
+++ b/changelog.d/14914.bugfix
@@ -0,0 +1 @@
+Faster joins: Fix a bug introduced in Synapse 1.69 where device list EDUs could fail to be handled after a restart when a faster join sync is in progress.
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 903606fb46..e8b6cc6b80 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -99,6 +99,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
                 ("user_signature_stream", "stream_id"),
                 ("device_lists_outbound_pokes", "stream_id"),
                 ("device_lists_changes_in_room", "stream_id"),
+                ("device_lists_remote_pending", "stream_id"),
             ],
             is_writer=hs.config.worker.worker_app is None,
         )
-- 
cgit 1.5.1


From 8a05d5de21888cdd0b53870fead3a1eae64f0b17 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 26 Jan 2023 12:15:36 -0500
Subject: Batch look-ups to see if rooms are partial stated. (#14917)

* Batch look-ups to see if rooms are partial stated.

* Fix issues found in linting.

* Fix typo.

* Apply suggestions from code review

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Clarify comments.

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Also improve the cache size while we're at it

* is_partial_state_rooms -> is_partial_state_room_batched

* Run `black`

* Improve annotation for `simple_select_many_batch`

* Fix is_partial_state_room_batched impl

* Okay, _actually_ fix impl

* Update description.

* Update synapse/storage/databases/main/room.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Run black.

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/14917.misc                 |  1 +
 synapse/handlers/sync.py               | 24 +++++++++++++++++-------
 synapse/storage/database.py            |  2 +-
 synapse/storage/databases/main/room.py | 27 ++++++++++++++++++++++++---
 4 files changed, 43 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/14917.misc

(limited to 'synapse')

diff --git a/changelog.d/14917.misc b/changelog.d/14917.misc
new file mode 100644
index 0000000000..4d1dd2639a
--- /dev/null
+++ b/changelog.d/14917.misc
@@ -0,0 +1 @@
+Faster joins: Improve performance of looking up partial-state status of rooms.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index ee11764567..5ebd3ea855 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1383,16 +1383,21 @@ class SyncHandler:
         if not sync_config.filter_collection.lazy_load_members():
             # Non-lazy syncs should never include partially stated rooms.
             # Exclude all partially stated rooms from this sync.
-            for room_id in mutable_joined_room_ids:
-                if await self.store.is_partial_state_room(room_id):
-                    mutable_rooms_to_exclude.add(room_id)
+            results = await self.store.is_partial_state_room_batched(
+                mutable_joined_room_ids
+            )
+            mutable_rooms_to_exclude.update(
+                room_id
+                for room_id, is_partial_state in results.items()
+                if is_partial_state
+            )
 
         # Incremental eager syncs should additionally include rooms that
         # - we are joined to
         # - are full-stated
         # - became fully-stated at some point during the sync period
         #   (These rooms will have been omitted during a previous eager sync.)
-        forced_newly_joined_room_ids = set()
+        forced_newly_joined_room_ids: Set[str] = set()
         if since_token and not sync_config.filter_collection.lazy_load_members():
             un_partial_stated_rooms = (
                 await self.store.get_un_partial_stated_rooms_between(
@@ -1401,9 +1406,14 @@ class SyncHandler:
                     mutable_joined_room_ids,
                 )
             )
-            for room_id in un_partial_stated_rooms:
-                if not await self.store.is_partial_state_room(room_id):
-                    forced_newly_joined_room_ids.add(room_id)
+            results = await self.store.is_partial_state_room_batched(
+                un_partial_stated_rooms
+            )
+            forced_newly_joined_room_ids.update(
+                room_id
+                for room_id, is_partial_state in results.items()
+                if not is_partial_state
+            )
 
         # Now we have our list of joined room IDs, exclude as configured and freeze
         joined_room_ids = frozenset(
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 88479a16db..e20c5c5302 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1819,7 +1819,7 @@ class DatabasePool:
         keyvalues: Optional[Dict[str, Any]] = None,
         desc: str = "simple_select_many_batch",
         batch_size: int = 100,
-    ) -> List[Any]:
+    ) -> List[Dict[str, Any]]:
         """Executes a SELECT query on the named table, which may return zero or
         more rows, returning the result as a list of dicts.
 
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 3aa7b94560..fbbc018887 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -60,9 +60,9 @@ from synapse.storage.util.id_generators import (
     MultiWriterIdGenerator,
     StreamIdGenerator,
 )
-from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID
+from synapse.types import JsonDict, RetentionPolicy, StrCollection, ThirdPartyInstanceID
 from synapse.util import json_encoder
-from synapse.util.caches.descriptors import cached
+from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.stringutils import MXC_REGEX
 
 if TYPE_CHECKING:
@@ -1255,7 +1255,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         return room_servers
 
-    @cached()
+    @cached(max_entries=10000)
     async def is_partial_state_room(self, room_id: str) -> bool:
         """Checks if this room has partial state.
 
@@ -1274,6 +1274,27 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         return entry is not None
 
+    @cachedList(cached_method_name="is_partial_state_room", list_name="room_ids")
+    async def is_partial_state_room_batched(
+        self, room_ids: StrCollection
+    ) -> Mapping[str, bool]:
+        """Checks if the given rooms have partial state.
+
+        Returns true for "partial-state" rooms, which means that the state
+        at events in the room, and `current_state_events`, may not yet be
+        complete.
+        """
+
+        rows: List[Dict[str, str]] = await self.db_pool.simple_select_many_batch(
+            table="partial_state_rooms",
+            column="room_id",
+            iterable=room_ids,
+            retcols=("room_id",),
+            desc="is_partial_state_room_batched",
+        )
+        partial_state_rooms = {row_dict["room_id"] for row_dict in rows}
+        return {room_id: room_id in partial_state_rooms for room_id in room_ids}
+
     async def get_join_event_id_and_device_lists_stream_id_for_partial_state(
         self, room_id: str
     ) -> Tuple[str, int]:
-- 
cgit 1.5.1


From ba79fb4a61784f4b5613da795a61f430af053ca6 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 26 Jan 2023 12:31:58 -0500
Subject: Use StrCollection in place of Collection[str] in (most) handlers
 code. (#14922)

Due to the increased safety of StrCollection over Collection[str]
and Sequence[str].
---
 changelog.d/14922.misc               |  1 +
 synapse/handlers/account_data.py     |  6 +++---
 synapse/handlers/device.py           |  6 +++---
 synapse/handlers/event_auth.py       |  8 ++++----
 synapse/handlers/federation.py       | 26 ++++++++------------------
 synapse/handlers/federation_event.py |  5 +++--
 synapse/handlers/pagination.py       |  6 +++---
 synapse/handlers/room.py             | 14 +++-----------
 synapse/handlers/room_summary.py     |  4 ++--
 synapse/handlers/search.py           |  8 ++++----
 synapse/handlers/sso.py              |  9 +++++----
 synapse/handlers/sync.py             |  4 ++--
 synapse/rest/client/push_rule.py     |  4 ++--
 13 files changed, 43 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/14922.misc

(limited to 'synapse')

diff --git a/changelog.d/14922.misc b/changelog.d/14922.misc
new file mode 100644
index 0000000000..2cc3614dfd
--- /dev/null
+++ b/changelog.d/14922.misc
@@ -0,0 +1 @@
+Use `StrCollection` to avoid potential bugs with `Collection[str]`.
diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py
index 834006356a..d500b21809 100644
--- a/synapse/handlers/account_data.py
+++ b/synapse/handlers/account_data.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import logging
 import random
-from typing import TYPE_CHECKING, Awaitable, Callable, Collection, List, Optional, Tuple
+from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional, Tuple
 
 from synapse.api.constants import AccountDataTypes
 from synapse.replication.http.account_data import (
@@ -26,7 +26,7 @@ from synapse.replication.http.account_data import (
     ReplicationRemoveUserAccountDataRestServlet,
 )
 from synapse.streams import EventSource
-from synapse.types import JsonDict, StreamKeyType, UserID
+from synapse.types import JsonDict, StrCollection, StreamKeyType, UserID
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -322,7 +322,7 @@ class AccountDataEventSource(EventSource[int, JsonDict]):
         user: UserID,
         from_key: int,
         limit: int,
-        room_ids: Collection[str],
+        room_ids: StrCollection,
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
     ) -> Tuple[List[JsonDict], int]:
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 58180ae2fa..5c06073901 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -18,7 +18,6 @@ from http import HTTPStatus
 from typing import (
     TYPE_CHECKING,
     Any,
-    Collection,
     Dict,
     Iterable,
     List,
@@ -45,6 +44,7 @@ from synapse.metrics.background_process_metrics import (
 )
 from synapse.types import (
     JsonDict,
+    StrCollection,
     StreamKeyType,
     StreamToken,
     UserID,
@@ -146,7 +146,7 @@ class DeviceWorkerHandler:
 
     @cancellable
     async def get_device_changes_in_shared_rooms(
-        self, user_id: str, room_ids: Collection[str], from_token: StreamToken
+        self, user_id: str, room_ids: StrCollection, from_token: StreamToken
     ) -> Set[str]:
         """Get the set of users whose devices have changed who share a room with
         the given user.
@@ -551,7 +551,7 @@ class DeviceHandler(DeviceWorkerHandler):
     @trace
     @measure_func("notify_device_update")
     async def notify_device_update(
-        self, user_id: str, device_ids: Collection[str]
+        self, user_id: str, device_ids: StrCollection
     ) -> None:
         """Notify that a user's device(s) has changed. Pokes the notifier, and
         remote servers if the user is local.
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index f91dbbecb7..a23a8ce2a1 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Collection, List, Mapping, Optional, Union
+from typing import TYPE_CHECKING, List, Mapping, Optional, Union
 
 from synapse import event_auth
 from synapse.api.constants import (
@@ -29,7 +29,7 @@ from synapse.event_auth import (
 )
 from synapse.events import EventBase
 from synapse.events.builder import EventBuilder
-from synapse.types import StateMap, get_domain_from_id
+from synapse.types import StateMap, StrCollection, get_domain_from_id
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -290,7 +290,7 @@ class EventAuthHandler:
 
     async def get_rooms_that_allow_join(
         self, state_ids: StateMap[str]
-    ) -> Collection[str]:
+    ) -> StrCollection:
         """
         Generate a list of rooms in which membership allows access to a room.
 
@@ -331,7 +331,7 @@ class EventAuthHandler:
 
         return result
 
-    async def is_user_in_rooms(self, room_ids: Collection[str], user_id: str) -> bool:
+    async def is_user_in_rooms(self, room_ids: StrCollection, user_id: str) -> bool:
         """
         Check whether a user is a member of any of the provided rooms.
 
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 233f8c113d..dc1cbf5c3d 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -20,17 +20,7 @@ import itertools
 import logging
 from enum import Enum
 from http import HTTPStatus
-from typing import (
-    TYPE_CHECKING,
-    Collection,
-    Dict,
-    Iterable,
-    List,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-)
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple, Union
 
 import attr
 from prometheus_client import Histogram
@@ -70,7 +60,7 @@ from synapse.replication.http.federation import (
 )
 from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.types import JsonDict, get_domain_from_id
+from synapse.types import JsonDict, StrCollection, get_domain_from_id
 from synapse.types.state import StateFilter
 from synapse.util.async_helpers import Linearizer
 from synapse.util.retryutils import NotRetryingDestination
@@ -179,7 +169,7 @@ class FederationHandler:
         # A dictionary mapping room IDs to (initial destination, other destinations)
         # tuples.
         self._partial_state_syncs_maybe_needing_restart: Dict[
-            str, Tuple[Optional[str], Collection[str]]
+            str, Tuple[Optional[str], StrCollection]
         ] = {}
         # A lock guarding the partial state flag for rooms.
         # When the lock is held for a given room, no other concurrent code may
@@ -437,7 +427,7 @@ class FederationHandler:
             )
         )
 
-        async def try_backfill(domains: Collection[str]) -> bool:
+        async def try_backfill(domains: StrCollection) -> bool:
             # TODO: Should we try multiple of these at a time?
 
             # Number of contacted remote homeservers that have denied our backfill
@@ -1730,7 +1720,7 @@ class FederationHandler:
     def _start_partial_state_room_sync(
         self,
         initial_destination: Optional[str],
-        other_destinations: Collection[str],
+        other_destinations: StrCollection,
         room_id: str,
     ) -> None:
         """Starts the background process to resync the state of a partial state room,
@@ -1812,7 +1802,7 @@ class FederationHandler:
     async def _sync_partial_state_room(
         self,
         initial_destination: Optional[str],
-        other_destinations: Collection[str],
+        other_destinations: StrCollection,
         room_id: str,
     ) -> None:
         """Background process to resync the state of a partial-state room
@@ -1949,9 +1939,9 @@ class FederationHandler:
 
 def _prioritise_destinations_for_partial_state_resync(
     initial_destination: Optional[str],
-    other_destinations: Collection[str],
+    other_destinations: StrCollection,
     room_id: str,
-) -> Collection[str]:
+) -> StrCollection:
     """Work out the order in which we should ask servers to resync events.
 
     If an `initial_destination` is given, it takes top priority. Otherwise
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 904a721483..e037acbca2 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -80,6 +80,7 @@ from synapse.types import (
     PersistedEventPosition,
     RoomStreamToken,
     StateMap,
+    StrCollection,
     UserID,
     get_domain_from_id,
 )
@@ -615,7 +616,7 @@ class FederationEventHandler:
 
     @trace
     async def backfill(
-        self, dest: str, room_id: str, limit: int, extremities: Collection[str]
+        self, dest: str, room_id: str, limit: int, extremities: StrCollection
     ) -> None:
         """Trigger a backfill request to `dest` for the given `room_id`
 
@@ -1565,7 +1566,7 @@ class FederationEventHandler:
     @trace
     @tag_args
     async def _get_events_and_persist(
-        self, destination: str, room_id: str, event_ids: Collection[str]
+        self, destination: str, room_id: str, event_ids: StrCollection
     ) -> None:
         """Fetch the given events from a server, and persist them as outliers.
 
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 8c8ff18a1a..1fe6567185 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Set
+from typing import TYPE_CHECKING, Dict, List, Optional, Set
 
 import attr
 
@@ -28,7 +28,7 @@ from synapse.logging.opentracing import trace
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.rest.admin._base import assert_user_is_admin
 from synapse.streams.config import PaginationConfig
-from synapse.types import JsonDict, Requester, StreamKeyType
+from synapse.types import JsonDict, Requester, StrCollection, StreamKeyType
 from synapse.types.state import StateFilter
 from synapse.util.async_helpers import ReadWriteLock
 from synapse.util.stringutils import random_string
@@ -391,7 +391,7 @@ class PaginationHandler:
         """
         return self._delete_by_id.get(delete_id)
 
-    def get_delete_ids_by_room(self, room_id: str) -> Optional[Collection[str]]:
+    def get_delete_ids_by_room(self, room_id: str) -> Optional[StrCollection]:
         """Get all active delete ids by room
 
         Args:
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 572c7b4db3..60a6d9cf3c 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -20,16 +20,7 @@ import random
 import string
 from collections import OrderedDict
 from http import HTTPStatus
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Awaitable,
-    Collection,
-    Dict,
-    List,
-    Optional,
-    Tuple,
-)
+from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Tuple
 
 import attr
 from typing_extensions import TypedDict
@@ -72,6 +63,7 @@ from synapse.types import (
     RoomID,
     RoomStreamToken,
     StateMap,
+    StrCollection,
     StreamKeyType,
     StreamToken,
     UserID,
@@ -1644,7 +1636,7 @@ class RoomEventSource(EventSource[RoomStreamToken, EventBase]):
         user: UserID,
         from_key: RoomStreamToken,
         limit: int,
-        room_ids: Collection[str],
+        room_ids: StrCollection,
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
     ) -> Tuple[List[EventBase], RoomStreamToken]:
diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py
index c6b869c6f4..4472019fbc 100644
--- a/synapse/handlers/room_summary.py
+++ b/synapse/handlers/room_summary.py
@@ -36,7 +36,7 @@ from synapse.api.errors import (
 )
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.events import EventBase
-from synapse.types import JsonDict, Requester
+from synapse.types import JsonDict, Requester, StrCollection
 from synapse.util.caches.response_cache import ResponseCache
 
 if TYPE_CHECKING:
@@ -870,7 +870,7 @@ class _RoomQueueEntry:
     # The room ID of this entry.
     room_id: str
     # The server to query if the room is not known locally.
-    via: Sequence[str]
+    via: StrCollection
     # The minimum number of hops necessary to get to this room (compared to the
     # originally requested room).
     depth: int = 0
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 40f4635c4e..9bbf83047d 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -14,7 +14,7 @@
 
 import itertools
 import logging
-from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple
 
 import attr
 from unpaddedbase64 import decode_base64, encode_base64
@@ -23,7 +23,7 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.api.filtering import Filter
 from synapse.events import EventBase
-from synapse.types import JsonDict, StreamKeyType, UserID
+from synapse.types import JsonDict, StrCollection, StreamKeyType, UserID
 from synapse.types.state import StateFilter
 from synapse.visibility import filter_events_for_client
 
@@ -418,7 +418,7 @@ class SearchHandler:
     async def _search_by_rank(
         self,
         user: UserID,
-        room_ids: Collection[str],
+        room_ids: StrCollection,
         search_term: str,
         keys: Iterable[str],
         search_filter: Filter,
@@ -491,7 +491,7 @@ class SearchHandler:
     async def _search_by_recent(
         self,
         user: UserID,
-        room_ids: Collection[str],
+        room_ids: StrCollection,
         search_term: str,
         keys: Iterable[str],
         search_filter: Filter,
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 44e70fc4b8..4a27c0f051 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -20,7 +20,6 @@ from typing import (
     Any,
     Awaitable,
     Callable,
-    Collection,
     Dict,
     Iterable,
     List,
@@ -47,6 +46,7 @@ from synapse.http.server import respond_with_html, respond_with_redirect
 from synapse.http.site import SynapseRequest
 from synapse.types import (
     JsonDict,
+    StrCollection,
     UserID,
     contains_invalid_mxid_characters,
     create_requester,
@@ -141,7 +141,8 @@ class UserAttributes:
     confirm_localpart: bool = False
     display_name: Optional[str] = None
     picture: Optional[str] = None
-    emails: Collection[str] = attr.Factory(list)
+    # mypy thinks these are incompatible for some reason.
+    emails: StrCollection = attr.Factory(list)  # type: ignore[assignment]
 
 
 @attr.s(slots=True, auto_attribs=True)
@@ -159,7 +160,7 @@ class UsernameMappingSession:
 
     # attributes returned by the ID mapper
     display_name: Optional[str]
-    emails: Collection[str]
+    emails: StrCollection
 
     # An optional dictionary of extra attributes to be provided to the client in the
     # login response.
@@ -174,7 +175,7 @@ class UsernameMappingSession:
     # choices made by the user
     chosen_localpart: Optional[str] = None
     use_display_name: bool = True
-    emails_to_use: Collection[str] = ()
+    emails_to_use: StrCollection = ()
     terms_accepted_version: Optional[str] = None
 
 
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index ee11764567..9e9601d423 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -17,7 +17,6 @@ from typing import (
     TYPE_CHECKING,
     AbstractSet,
     Any,
-    Collection,
     Dict,
     FrozenSet,
     List,
@@ -62,6 +61,7 @@ from synapse.types import (
     Requester,
     RoomStreamToken,
     StateMap,
+    StrCollection,
     StreamKeyType,
     StreamToken,
     UserID,
@@ -1179,7 +1179,7 @@ class SyncHandler:
     async def _find_missing_partial_state_memberships(
         self,
         room_id: str,
-        members_to_fetch: Collection[str],
+        members_to_fetch: StrCollection,
         events_with_membership_auth: Mapping[str, EventBase],
         found_state_ids: StateMap[str],
     ) -> StateMap[str]:
diff --git a/synapse/rest/client/push_rule.py b/synapse/rest/client/push_rule.py
index 8191b4e32c..ad5c10c99d 100644
--- a/synapse/rest/client/push_rule.py
+++ b/synapse/rest/client/push_rule.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, List, Sequence, Tuple, Union
+from typing import TYPE_CHECKING, List, Tuple, Union
 
 from synapse.api.errors import (
     NotFoundError,
@@ -169,7 +169,7 @@ class PushRuleRestServlet(RestServlet):
             raise UnrecognizedRequestError()
 
 
-def _rule_spec_from_path(path: Sequence[str]) -> RuleSpec:
+def _rule_spec_from_path(path: List[str]) -> RuleSpec:
     """Turn a sequence of path components into a rule spec
 
     Args:
-- 
cgit 1.5.1


From 345576bc349f2c96b273bea246a5bb44c705c6ec Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 26 Jan 2023 13:24:15 -0500
Subject: Fix paginating /relations with a live token (#14866)

The `/relations` endpoint was not properly handle "live tokens"
(i.e sync tokens), to do this properly we abstract the code that
`/messages` has and re-use it.
---
 changelog.d/14866.bugfix                    |   1 +
 synapse/storage/databases/main/relations.py |  38 +++----
 synapse/storage/databases/main/stream.py    | 154 +++++++++++++++++++---------
 3 files changed, 123 insertions(+), 70 deletions(-)
 create mode 100644 changelog.d/14866.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14866.bugfix b/changelog.d/14866.bugfix
new file mode 100644
index 0000000000..540f918cbd
--- /dev/null
+++ b/changelog.d/14866.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.53.0 where `next_batch` tokens from `/sync` could not be used with the `/relations` endpoint.
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 84f844b79e..be2242b6ac 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -40,9 +40,13 @@ from synapse.storage.database import (
     LoggingTransaction,
     make_in_list_sql_clause,
 )
-from synapse.storage.databases.main.stream import generate_pagination_where_clause
+from synapse.storage.databases.main.stream import (
+    generate_next_token,
+    generate_pagination_bounds,
+    generate_pagination_where_clause,
+)
 from synapse.storage.engines import PostgresEngine
-from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken
+from synapse.types import JsonDict, StreamKeyType, StreamToken
 from synapse.util.caches.descriptors import cached, cachedList
 
 if TYPE_CHECKING:
@@ -207,24 +211,23 @@ class RelationsWorkerStore(SQLBaseStore):
             where_clause.append("type = ?")
             where_args.append(event_type)
 
+        order, from_bound, to_bound = generate_pagination_bounds(
+            direction,
+            from_token.room_key if from_token else None,
+            to_token.room_key if to_token else None,
+        )
+
         pagination_clause = generate_pagination_where_clause(
             direction=direction,
             column_names=("topological_ordering", "stream_ordering"),
-            from_token=from_token.room_key.as_historical_tuple()
-            if from_token
-            else None,
-            to_token=to_token.room_key.as_historical_tuple() if to_token else None,
+            from_token=from_bound,
+            to_token=to_bound,
             engine=self.database_engine,
         )
 
         if pagination_clause:
             where_clause.append(pagination_clause)
 
-        if direction == "b":
-            order = "DESC"
-        else:
-            order = "ASC"
-
         sql = """
             SELECT event_id, relation_type, sender, topological_ordering, stream_ordering
             FROM event_relations
@@ -266,16 +269,9 @@ class RelationsWorkerStore(SQLBaseStore):
                 topo_orderings = topo_orderings[:limit]
                 stream_orderings = stream_orderings[:limit]
 
-                topo = topo_orderings[-1]
-                token = stream_orderings[-1]
-                if direction == "b":
-                    # Tokens are positions between events.
-                    # This token points *after* the last event in the chunk.
-                    # We need it to point to the event before it in the chunk
-                    # when we are going backwards so we subtract one from the
-                    # stream part.
-                    token -= 1
-                next_key = RoomStreamToken(topo, token)
+                next_key = generate_next_token(
+                    direction, topo_orderings[-1], stream_orderings[-1]
+                )
 
                 if from_token:
                     next_token = from_token.copy_and_replace(
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index d28fc65df9..8977bf33e7 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -170,6 +170,104 @@ def generate_pagination_where_clause(
     return " AND ".join(where_clause)
 
 
+def generate_pagination_bounds(
+    direction: str,
+    from_token: Optional[RoomStreamToken],
+    to_token: Optional[RoomStreamToken],
+) -> Tuple[
+    str, Optional[Tuple[Optional[int], int]], Optional[Tuple[Optional[int], int]]
+]:
+    """
+    Generate a start and end point for this page of events.
+
+    Args:
+        direction: Whether pagination is going forwards or backwards. One of "f" or "b".
+        from_token: The token to start pagination at, or None to start at the first value.
+        to_token: The token to end pagination at, or None to not limit the end point.
+
+    Returns:
+        A three tuple of:
+
+            ASC or DESC for sorting of the query.
+
+            The starting position as a tuple of ints representing
+            (topological position, stream position) or None if no from_token was
+            provided. The topological position may be None for live tokens.
+
+            The end position in the same format as the starting position, or None
+            if no to_token was provided.
+    """
+
+    # Tokens really represent positions between elements, but we use
+    # the convention of pointing to the event before the gap. Hence
+    # we have a bit of asymmetry when it comes to equalities.
+    if direction == "b":
+        order = "DESC"
+    else:
+        order = "ASC"
+
+    # The bounds for the stream tokens are complicated by the fact
+    # that we need to handle the instance_map part of the tokens. We do this
+    # by fetching all events between the min stream token and the maximum
+    # stream token (as returned by `RoomStreamToken.get_max_stream_pos`) and
+    # then filtering the results.
+    from_bound: Optional[Tuple[Optional[int], int]] = None
+    if from_token:
+        if from_token.topological is not None:
+            from_bound = from_token.as_historical_tuple()
+        elif direction == "b":
+            from_bound = (
+                None,
+                from_token.get_max_stream_pos(),
+            )
+        else:
+            from_bound = (
+                None,
+                from_token.stream,
+            )
+
+    to_bound: Optional[Tuple[Optional[int], int]] = None
+    if to_token:
+        if to_token.topological is not None:
+            to_bound = to_token.as_historical_tuple()
+        elif direction == "b":
+            to_bound = (
+                None,
+                to_token.stream,
+            )
+        else:
+            to_bound = (
+                None,
+                to_token.get_max_stream_pos(),
+            )
+
+    return order, from_bound, to_bound
+
+
+def generate_next_token(
+    direction: str, last_topo_ordering: int, last_stream_ordering: int
+) -> RoomStreamToken:
+    """
+    Generate the next room stream token based on the currently returned data.
+
+    Args:
+        direction: Whether pagination is going forwards or backwards. One of "f" or "b".
+        last_topo_ordering: The last topological ordering being returned.
+        last_stream_ordering: The last stream ordering being returned.
+
+    Returns:
+        A new RoomStreamToken to return to the client.
+    """
+    if direction == "b":
+        # Tokens are positions between events.
+        # This token points *after* the last event in the chunk.
+        # We need it to point to the event before it in the chunk
+        # when we are going backwards so we subtract one from the
+        # stream part.
+        last_stream_ordering -= 1
+    return RoomStreamToken(last_topo_ordering, last_stream_ordering)
+
+
 def _make_generic_sql_bound(
     bound: str,
     column_names: Tuple[str, str],
@@ -1300,47 +1398,11 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             `to_token`), or `limit` is zero.
         """
 
-        # Tokens really represent positions between elements, but we use
-        # the convention of pointing to the event before the gap. Hence
-        # we have a bit of asymmetry when it comes to equalities.
         args = [False, room_id]
-        if direction == "b":
-            order = "DESC"
-        else:
-            order = "ASC"
-
-        # The bounds for the stream tokens are complicated by the fact
-        # that we need to handle the instance_map part of the tokens. We do this
-        # by fetching all events between the min stream token and the maximum
-        # stream token (as returned by `RoomStreamToken.get_max_stream_pos`) and
-        # then filtering the results.
-        if from_token.topological is not None:
-            from_bound: Tuple[Optional[int], int] = from_token.as_historical_tuple()
-        elif direction == "b":
-            from_bound = (
-                None,
-                from_token.get_max_stream_pos(),
-            )
-        else:
-            from_bound = (
-                None,
-                from_token.stream,
-            )
 
-        to_bound: Optional[Tuple[Optional[int], int]] = None
-        if to_token:
-            if to_token.topological is not None:
-                to_bound = to_token.as_historical_tuple()
-            elif direction == "b":
-                to_bound = (
-                    None,
-                    to_token.stream,
-                )
-            else:
-                to_bound = (
-                    None,
-                    to_token.get_max_stream_pos(),
-                )
+        order, from_bound, to_bound = generate_pagination_bounds(
+            direction, from_token, to_token
+        )
 
         bounds = generate_pagination_where_clause(
             direction=direction,
@@ -1436,16 +1498,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         ][:limit]
 
         if rows:
-            topo = rows[-1].topological_ordering
-            token = rows[-1].stream_ordering
-            if direction == "b":
-                # Tokens are positions between events.
-                # This token points *after* the last event in the chunk.
-                # We need it to point to the event before it in the chunk
-                # when we are going backwards so we subtract one from the
-                # stream part.
-                token -= 1
-            next_token = RoomStreamToken(topo, token)
+            assert rows[-1].topological_ordering is not None
+            next_token = generate_next_token(
+                direction, rows[-1].topological_ordering, rows[-1].stream_ordering
+            )
         else:
             # TODO (erikj): We should work out what to do here instead.
             next_token = to_token if to_token else from_token
-- 
cgit 1.5.1


From fc35e0673f5b46ea0f5e53ef15626b14a452ca82 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 26 Jan 2023 14:45:24 -0500
Subject: Add missing type hints in tests (#14879)

* FIx-up type hints in tests.logging.
* Add missing type hints to test_transactions.
---
 changelog.d/14879.misc                 |  1 +
 mypy.ini                               |  6 ++---
 synapse/rest/client/transactions.py    |  3 ++-
 tests/logging/__init__.py              |  6 +++--
 tests/logging/test_opentracing.py      |  4 ++--
 tests/logging/test_remote_handler.py   | 25 +++++++++++++-------
 tests/logging/test_terse_json.py       | 30 ++++++++++++++----------
 tests/rest/client/test_transactions.py | 42 ++++++++++++++++++++++------------
 8 files changed, 75 insertions(+), 42 deletions(-)
 create mode 100644 changelog.d/14879.misc

(limited to 'synapse')

diff --git a/changelog.d/14879.misc b/changelog.d/14879.misc
new file mode 100644
index 0000000000..d44571b731
--- /dev/null
+++ b/changelog.d/14879.misc
@@ -0,0 +1 @@
+Add missing type hints.
diff --git a/mypy.ini b/mypy.ini
index e57bc64261..978d92940b 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -40,10 +40,7 @@ exclude = (?x)
    |tests/http/federation/test_matrix_federation_agent.py
    |tests/http/federation/test_srv_resolver.py
    |tests/http/test_proxyagent.py
-   |tests/logging/__init__.py
-   |tests/logging/test_terse_json.py
    |tests/module_api/test_api.py
-   |tests/rest/client/test_transactions.py
    |tests/rest/media/v1/test_media_storage.py
    |tests/server.py
    |tests/test_state.py
@@ -92,6 +89,9 @@ disallow_untyped_defs = True
 [mypy-tests.handlers.*]
 disallow_untyped_defs = True
 
+[mypy-tests.logging.*]
+disallow_untyped_defs = True
+
 [mypy-tests.metrics.*]
 disallow_untyped_defs = True
 
diff --git a/synapse/rest/client/transactions.py b/synapse/rest/client/transactions.py
index 61375651bc..3f40f1874a 100644
--- a/synapse/rest/client/transactions.py
+++ b/synapse/rest/client/transactions.py
@@ -19,6 +19,7 @@ from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Tuple
 
 from typing_extensions import ParamSpec
 
+from twisted.internet.defer import Deferred
 from twisted.python.failure import Failure
 from twisted.web.server import Request
 
@@ -90,7 +91,7 @@ class HttpTransactionCache:
         fn: Callable[P, Awaitable[Tuple[int, JsonDict]]],
         *args: P.args,
         **kwargs: P.kwargs,
-    ) -> Awaitable[Tuple[int, JsonDict]]:
+    ) -> "Deferred[Tuple[int, JsonDict]]":
         """Fetches the response for this transaction, or executes the given function
         to produce a response for this transaction.
 
diff --git a/tests/logging/__init__.py b/tests/logging/__init__.py
index 1acf5666a8..1c5de95a80 100644
--- a/tests/logging/__init__.py
+++ b/tests/logging/__init__.py
@@ -13,9 +13,11 @@
 # limitations under the License.
 import logging
 
+from tests.unittest import TestCase
 
-class LoggerCleanupMixin:
-    def get_logger(self, handler):
+
+class LoggerCleanupMixin(TestCase):
+    def get_logger(self, handler: logging.Handler) -> logging.Logger:
         """
         Attach a handler to a logger and add clean-ups to remove revert this.
         """
diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py
index 0917e478a5..e28ba84cc2 100644
--- a/tests/logging/test_opentracing.py
+++ b/tests/logging/test_opentracing.py
@@ -153,7 +153,7 @@ class LogContextScopeManagerTestCase(TestCase):
 
         scopes = []
 
-        async def task(i: int):
+        async def task(i: int) -> None:
             scope = start_active_span(
                 f"task{i}",
                 tracer=self._tracer,
@@ -165,7 +165,7 @@ class LogContextScopeManagerTestCase(TestCase):
             self.assertEqual(self._tracer.active_span, scope.span)
             scope.close()
 
-        async def root():
+        async def root() -> None:
             with start_active_span("root span", tracer=self._tracer) as root_scope:
                 self.assertEqual(self._tracer.active_span, root_scope.span)
                 scopes.append(root_scope)
diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py
index b0d046fe00..c08954d887 100644
--- a/tests/logging/test_remote_handler.py
+++ b/tests/logging/test_remote_handler.py
@@ -11,7 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from twisted.test.proto_helpers import AccumulatingProtocol
+from typing import Tuple
+
+from twisted.internet.protocol import Protocol
+from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock
 
 from synapse.logging import RemoteHandler
 
@@ -20,7 +23,9 @@ from tests.server import FakeTransport, get_clock
 from tests.unittest import TestCase
 
 
-def connect_logging_client(reactor, client_id):
+def connect_logging_client(
+    reactor: MemoryReactorClock, client_id: int
+) -> Tuple[Protocol, AccumulatingProtocol]:
     # This is essentially tests.server.connect_client, but disabling autoflush on
     # the client transport. This is necessary to avoid an infinite loop due to
     # sending of data via the logging transport causing additional logs to be
@@ -35,10 +40,10 @@ def connect_logging_client(reactor, client_id):
 
 
 class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor, _ = get_clock()
 
-    def test_log_output(self):
+    def test_log_output(self) -> None:
         """
         The remote handler delivers logs over TCP.
         """
@@ -51,6 +56,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
         client, server = connect_logging_client(self.reactor, 0)
 
         # Trigger data being sent
+        assert isinstance(client.transport, FakeTransport)
         client.transport.flush()
 
         # One log message, with a single trailing newline
@@ -61,7 +67,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
         # Ensure the data passed through properly.
         self.assertEqual(logs[0], "Hello there, wally!")
 
-    def test_log_backpressure_debug(self):
+    def test_log_backpressure_debug(self) -> None:
         """
         When backpressure is hit, DEBUG logs will be shed.
         """
@@ -83,6 +89,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
 
         # Allow the reconnection
         client, server = connect_logging_client(self.reactor, 0)
+        assert isinstance(client.transport, FakeTransport)
         client.transport.flush()
 
         # Only the 7 infos made it through, the debugs were elided
@@ -90,7 +97,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
         self.assertEqual(len(logs), 7)
         self.assertNotIn(b"debug", server.data)
 
-    def test_log_backpressure_info(self):
+    def test_log_backpressure_info(self) -> None:
         """
         When backpressure is hit, DEBUG and INFO logs will be shed.
         """
@@ -116,6 +123,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
 
         # Allow the reconnection
         client, server = connect_logging_client(self.reactor, 0)
+        assert isinstance(client.transport, FakeTransport)
         client.transport.flush()
 
         # The 10 warnings made it through, the debugs and infos were elided
@@ -124,7 +132,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
         self.assertNotIn(b"debug", server.data)
         self.assertNotIn(b"info", server.data)
 
-    def test_log_backpressure_cut_middle(self):
+    def test_log_backpressure_cut_middle(self) -> None:
         """
         When backpressure is hit, and no more DEBUG and INFOs cannot be culled,
         it will cut the middle messages out.
@@ -140,6 +148,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
 
         # Allow the reconnection
         client, server = connect_logging_client(self.reactor, 0)
+        assert isinstance(client.transport, FakeTransport)
         client.transport.flush()
 
         # The first five and last five warnings made it through, the debugs and
@@ -151,7 +160,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
             logs,
         )
 
-    def test_cancel_connection(self):
+    def test_cancel_connection(self) -> None:
         """
         Gracefully handle the connection being cancelled.
         """
diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py
index 0b0d8737c1..fa27f1279a 100644
--- a/tests/logging/test_terse_json.py
+++ b/tests/logging/test_terse_json.py
@@ -14,24 +14,28 @@
 import json
 import logging
 from io import BytesIO, StringIO
+from typing import cast
 from unittest.mock import Mock, patch
 
+from twisted.web.http import HTTPChannel
 from twisted.web.server import Request
 
 from synapse.http.site import SynapseRequest
 from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter
 from synapse.logging.context import LoggingContext, LoggingContextFilter
+from synapse.types import JsonDict
 
 from tests.logging import LoggerCleanupMixin
-from tests.server import FakeChannel
+from tests.server import FakeChannel, get_clock
 from tests.unittest import TestCase
 
 
 class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.output = StringIO()
+        self.reactor, _ = get_clock()
 
-    def get_log_line(self):
+    def get_log_line(self) -> JsonDict:
         # One log message, with a single trailing newline.
         data = self.output.getvalue()
         logs = data.splitlines()
@@ -39,7 +43,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         self.assertEqual(data.count("\n"), 1)
         return json.loads(logs[0])
 
-    def test_terse_json_output(self):
+    def test_terse_json_output(self) -> None:
         """
         The Terse JSON formatter converts log messages to JSON.
         """
@@ -61,7 +65,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         self.assertCountEqual(log.keys(), expected_log_keys)
         self.assertEqual(log["log"], "Hello there, wally!")
 
-    def test_extra_data(self):
+    def test_extra_data(self) -> None:
         """
         Additional information can be included in the structured logging.
         """
@@ -93,7 +97,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         self.assertEqual(log["int"], 3)
         self.assertIs(log["bool"], True)
 
-    def test_json_output(self):
+    def test_json_output(self) -> None:
         """
         The Terse JSON formatter converts log messages to JSON.
         """
@@ -114,7 +118,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         self.assertCountEqual(log.keys(), expected_log_keys)
         self.assertEqual(log["log"], "Hello there, wally!")
 
-    def test_with_context(self):
+    def test_with_context(self) -> None:
         """
         The logging context should be added to the JSON response.
         """
@@ -139,7 +143,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         self.assertEqual(log["log"], "Hello there, wally!")
         self.assertEqual(log["request"], "name")
 
-    def test_with_request_context(self):
+    def test_with_request_context(self) -> None:
         """
         Information from the logging context request should be added to the JSON response.
         """
@@ -154,11 +158,13 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         site.server_version_string = "Server v1"
         site.reactor = Mock()
         site.experimental_cors_msc3886 = False
-        request = SynapseRequest(FakeChannel(site, None), site)
+        request = SynapseRequest(
+            cast(HTTPChannel, FakeChannel(site, self.reactor)), site
+        )
         # Call requestReceived to finish instantiating the object.
         request.content = BytesIO()
-        # Partially skip some of the internal processing of SynapseRequest.
-        request._started_processing = Mock()
+        # Partially skip some internal processing of SynapseRequest.
+        request._started_processing = Mock()  # type: ignore[assignment]
         request.request_metrics = Mock(spec=["name"])
         with patch.object(Request, "render"):
             request.requestReceived(b"POST", b"/_matrix/client/versions", b"1.1")
@@ -200,7 +206,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         self.assertEqual(log["protocol"], "1.1")
         self.assertEqual(log["user_agent"], "")
 
-    def test_with_exception(self):
+    def test_with_exception(self) -> None:
         """
         The logging exception type & value should be added to the JSON response.
         """
diff --git a/tests/rest/client/test_transactions.py b/tests/rest/client/test_transactions.py
index 21a1ca2a68..3086e1b565 100644
--- a/tests/rest/client/test_transactions.py
+++ b/tests/rest/client/test_transactions.py
@@ -13,18 +13,22 @@
 # limitations under the License.
 
 from http import HTTPStatus
+from typing import Any, Generator, Tuple, cast
 from unittest.mock import Mock, call
 
-from twisted.internet import defer, reactor
+from twisted.internet import defer, reactor as _reactor
 
 from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context
 from synapse.rest.client.transactions import CLEANUP_PERIOD_MS, HttpTransactionCache
+from synapse.types import ISynapseReactor, JsonDict
 from synapse.util import Clock
 
 from tests import unittest
 from tests.test_utils import make_awaitable
 from tests.utils import MockClock
 
+reactor = cast(ISynapseReactor, _reactor)
+
 
 class HttpTransactionCacheTestCase(unittest.TestCase):
     def setUp(self) -> None:
@@ -34,11 +38,13 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
         self.hs.get_auth = Mock()
         self.cache = HttpTransactionCache(self.hs)
 
-        self.mock_http_response = (HTTPStatus.OK, "GOOD JOB!")
+        self.mock_http_response = (HTTPStatus.OK, {"result": "GOOD JOB!"})
         self.mock_key = "foo"
 
     @defer.inlineCallbacks
-    def test_executes_given_function(self):
+    def test_executes_given_function(
+        self,
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         cb = Mock(return_value=make_awaitable(self.mock_http_response))
         res = yield self.cache.fetch_or_execute(
             self.mock_key, cb, "some_arg", keyword="arg"
@@ -47,7 +53,9 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
         self.assertEqual(res, self.mock_http_response)
 
     @defer.inlineCallbacks
-    def test_deduplicates_based_on_key(self):
+    def test_deduplicates_based_on_key(
+        self,
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         cb = Mock(return_value=make_awaitable(self.mock_http_response))
         for i in range(3):  # invoke multiple times
             res = yield self.cache.fetch_or_execute(
@@ -58,18 +66,20 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
         cb.assert_called_once_with("some_arg", keyword="arg", changing_args=0)
 
     @defer.inlineCallbacks
-    def test_logcontexts_with_async_result(self):
+    def test_logcontexts_with_async_result(
+        self,
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         @defer.inlineCallbacks
-        def cb():
+        def cb() -> Generator["defer.Deferred[object]", object, Tuple[int, JsonDict]]:
             yield Clock(reactor).sleep(0)
-            return "yay"
+            return 1, {}
 
         @defer.inlineCallbacks
-        def test():
+        def test() -> Generator["defer.Deferred[Any]", object, None]:
             with LoggingContext("c") as c1:
                 res = yield self.cache.fetch_or_execute(self.mock_key, cb)
                 self.assertIs(current_context(), c1)
-                self.assertEqual(res, "yay")
+                self.assertEqual(res, (1, {}))
 
         # run the test twice in parallel
         d = defer.gatherResults([test(), test()])
@@ -78,13 +88,15 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
         self.assertIs(current_context(), SENTINEL_CONTEXT)
 
     @defer.inlineCallbacks
-    def test_does_not_cache_exceptions(self):
+    def test_does_not_cache_exceptions(
+        self,
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         """Checks that, if the callback throws an exception, it is called again
         for the next request.
         """
         called = [False]
 
-        def cb():
+        def cb() -> "defer.Deferred[Tuple[int, JsonDict]]":
             if called[0]:
                 # return a valid result the second time
                 return defer.succeed(self.mock_http_response)
@@ -104,13 +116,15 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
             self.assertIs(current_context(), test_context)
 
     @defer.inlineCallbacks
-    def test_does_not_cache_failures(self):
+    def test_does_not_cache_failures(
+        self,
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         """Checks that, if the callback returns a failure, it is called again
         for the next request.
         """
         called = [False]
 
-        def cb():
+        def cb() -> "defer.Deferred[Tuple[int, JsonDict]]":
             if called[0]:
                 # return a valid result the second time
                 return defer.succeed(self.mock_http_response)
@@ -130,7 +144,7 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
             self.assertIs(current_context(), test_context)
 
     @defer.inlineCallbacks
-    def test_cleans_up(self):
+    def test_cleans_up(self) -> Generator["defer.Deferred[Any]", object, None]:
         cb = Mock(return_value=make_awaitable(self.mock_http_response))
         yield self.cache.fetch_or_execute(self.mock_key, cb, "an arg")
         # should NOT have cleaned up yet
-- 
cgit 1.5.1


From 265735db9d7b0698a511fc9389db4d6f104f1aa8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 27 Jan 2023 07:27:55 -0500
Subject: Use an enum for direction. (#14927)

For better type safety we  use an enum instead of strings to
configure direction (backwards or forwards).
---
 changelog.d/14927.misc                      |  1 +
 synapse/api/constants.py                    |  7 ++++
 synapse/handlers/admin.py                   |  4 +-
 synapse/handlers/initial_sync.py            | 16 +++++++-
 synapse/handlers/pagination.py              |  6 +--
 synapse/handlers/relations.py               |  8 +++-
 synapse/storage/databases/main/relations.py |  8 ++--
 synapse/storage/databases/main/stream.py    | 59 +++++++++++++++--------------
 synapse/streams/config.py                   | 11 ++++--
 9 files changed, 76 insertions(+), 44 deletions(-)
 create mode 100644 changelog.d/14927.misc

(limited to 'synapse')

diff --git a/changelog.d/14927.misc b/changelog.d/14927.misc
new file mode 100644
index 0000000000..9f5384e60e
--- /dev/null
+++ b/changelog.d/14927.misc
@@ -0,0 +1 @@
+Add missing type hints.
\ No newline at end of file
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 6432d32d83..6f9239d21c 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -17,6 +17,8 @@
 
 """Contains constants from the specification."""
 
+import enum
+
 from typing_extensions import Final
 
 # the max size of a (canonical-json-encoded) event
@@ -290,3 +292,8 @@ class ApprovalNoticeMedium:
 
     NONE = "org.matrix.msc3866.none"
     EMAIL = "org.matrix.msc3866.email"
+
+
+class Direction(enum.Enum):
+    BACKWARDS = "b"
+    FORWARDS = "f"
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index 5bf8e86387..c81ea34758 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -16,7 +16,7 @@ import abc
 import logging
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set
 
-from synapse.api.constants import Membership
+from synapse.api.constants import Direction, Membership
 from synapse.events import EventBase
 from synapse.types import JsonDict, RoomStreamToken, StateMap, UserID
 from synapse.visibility import filter_events_for_client
@@ -197,7 +197,7 @@ class AdminHandler:
             # efficient method perhaps but it does guarantee we get everything.
             while True:
                 events, _ = await self.store.paginate_room_events(
-                    room_id, from_key, to_key, limit=100, direction="f"
+                    room_id, from_key, to_key, limit=100, direction=Direction.FORWARDS
                 )
                 if not events:
                     break
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 8c2260ad7d..191529bd8e 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -15,7 +15,13 @@
 import logging
 from typing import TYPE_CHECKING, List, Optional, Tuple, cast
 
-from synapse.api.constants import AccountDataTypes, EduTypes, EventTypes, Membership
+from synapse.api.constants import (
+    AccountDataTypes,
+    Direction,
+    EduTypes,
+    EventTypes,
+    Membership,
+)
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase
 from synapse.events.utils import SerializeEventConfig
@@ -57,7 +63,13 @@ class InitialSyncHandler:
         self.validator = EventValidator()
         self.snapshot_cache: ResponseCache[
             Tuple[
-                str, Optional[StreamToken], Optional[StreamToken], str, int, bool, bool
+                str,
+                Optional[StreamToken],
+                Optional[StreamToken],
+                Direction,
+                int,
+                bool,
+                bool,
             ]
         ] = ResponseCache(hs.get_clock(), "initial_sync_cache")
         self._event_serializer = hs.get_event_client_serializer()
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 1fe6567185..ceefa16b49 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -19,7 +19,7 @@ import attr
 
 from twisted.python.failure import Failure
 
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import Direction, EventTypes, Membership
 from synapse.api.errors import SynapseError
 from synapse.api.filtering import Filter
 from synapse.events.utils import SerializeEventConfig
@@ -448,7 +448,7 @@ class PaginationHandler:
 
         if pagin_config.from_token:
             from_token = pagin_config.from_token
-        elif pagin_config.direction == "f":
+        elif pagin_config.direction == Direction.FORWARDS:
             from_token = (
                 await self.hs.get_event_sources().get_start_token_for_pagination(
                     room_id
@@ -476,7 +476,7 @@ class PaginationHandler:
                     room_id, requester, allow_departed_users=True
                 )
 
-            if pagin_config.direction == "b":
+            if pagin_config.direction == Direction.BACKWARDS:
                 # if we're going backwards, we might need to backfill. This
                 # requires that we have a topo token.
                 if room_token.topological:
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index e96f9999a8..0fb15391e0 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Collection, Dict, FrozenSet, Iterable, List, O
 
 import attr
 
-from synapse.api.constants import EventTypes, RelationTypes
+from synapse.api.constants import Direction, EventTypes, RelationTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase, relation_from_event
 from synapse.logging.context import make_deferred_yieldable, run_in_background
@@ -413,7 +413,11 @@ class RelationsHandler:
 
                 # Attempt to find another event to use as the latest event.
                 potential_events, _ = await self._main_store.get_relations_for_event(
-                    event_id, event, room_id, RelationTypes.THREAD, direction="f"
+                    event_id,
+                    event,
+                    room_id,
+                    RelationTypes.THREAD,
+                    direction=Direction.FORWARDS,
                 )
 
                 # Filter out ignored users.
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index be2242b6ac..0018d6f7ab 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -30,7 +30,7 @@ from typing import (
 
 import attr
 
-from synapse.api.constants import MAIN_TIMELINE, RelationTypes
+from synapse.api.constants import MAIN_TIMELINE, Direction, RelationTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase
 from synapse.storage._base import SQLBaseStore
@@ -168,7 +168,7 @@ class RelationsWorkerStore(SQLBaseStore):
         relation_type: Optional[str] = None,
         event_type: Optional[str] = None,
         limit: int = 5,
-        direction: str = "b",
+        direction: Direction = Direction.BACKWARDS,
         from_token: Optional[StreamToken] = None,
         to_token: Optional[StreamToken] = None,
     ) -> Tuple[List[_RelatedEvent], Optional[StreamToken]]:
@@ -181,8 +181,8 @@ class RelationsWorkerStore(SQLBaseStore):
             relation_type: Only fetch events with this relation type, if given.
             event_type: Only fetch events with this event type, if given.
             limit: Only fetch the most recent `limit` events.
-            direction: Whether to fetch the most recent first (`"b"`) or the
-                oldest first (`"f"`).
+            direction: Whether to fetch the most recent first (backwards) or the
+                oldest first (forwards).
             from_token: Fetch rows from the given token, or from the start if None.
             to_token: Fetch rows up to the given token, or up to the end if None.
 
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 8977bf33e7..818c46182e 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -55,6 +55,7 @@ from typing_extensions import Literal
 
 from twisted.internet import defer
 
+from synapse.api.constants import Direction
 from synapse.api.filtering import Filter
 from synapse.events import EventBase
 from synapse.logging.context import make_deferred_yieldable, run_in_background
@@ -86,7 +87,6 @@ MAX_STREAM_SIZE = 1000
 _STREAM_TOKEN = "stream"
 _TOPOLOGICAL_TOKEN = "topological"
 
-
 # Used as return values for pagination APIs
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _EventDictReturn:
@@ -104,7 +104,7 @@ class _EventsAround:
 
 
 def generate_pagination_where_clause(
-    direction: str,
+    direction: Direction,
     column_names: Tuple[str, str],
     from_token: Optional[Tuple[Optional[int], int]],
     to_token: Optional[Tuple[Optional[int], int]],
@@ -130,27 +130,26 @@ def generate_pagination_where_clause(
           token, but include those that match the to token.
 
     Args:
-        direction: Whether we're paginating backwards("b") or forwards ("f").
+        direction: Whether we're paginating backwards or forwards.
         column_names: The column names to bound. Must *not* be user defined as
             these get inserted directly into the SQL statement without escapes.
         from_token: The start point for the pagination. This is an exclusive
-            minimum bound if direction is "f", and an inclusive maximum bound if
-            direction is "b".
+            minimum bound if direction is forwards, and an inclusive maximum bound if
+            direction is backwards.
         to_token: The endpoint point for the pagination. This is an inclusive
-            maximum bound if direction is "f", and an exclusive minimum bound if
-            direction is "b".
+            maximum bound if direction is forwards, and an exclusive minimum bound if
+            direction is backwards.
         engine: The database engine to generate the clauses for
 
     Returns:
         The sql expression
     """
-    assert direction in ("b", "f")
 
     where_clause = []
     if from_token:
         where_clause.append(
             _make_generic_sql_bound(
-                bound=">=" if direction == "b" else "<",
+                bound=">=" if direction == Direction.BACKWARDS else "<",
                 column_names=column_names,
                 values=from_token,
                 engine=engine,
@@ -160,7 +159,7 @@ def generate_pagination_where_clause(
     if to_token:
         where_clause.append(
             _make_generic_sql_bound(
-                bound="<" if direction == "b" else ">=",
+                bound="<" if direction == Direction.BACKWARDS else ">=",
                 column_names=column_names,
                 values=to_token,
                 engine=engine,
@@ -171,7 +170,7 @@ def generate_pagination_where_clause(
 
 
 def generate_pagination_bounds(
-    direction: str,
+    direction: Direction,
     from_token: Optional[RoomStreamToken],
     to_token: Optional[RoomStreamToken],
 ) -> Tuple[
@@ -181,7 +180,7 @@ def generate_pagination_bounds(
     Generate a start and end point for this page of events.
 
     Args:
-        direction: Whether pagination is going forwards or backwards. One of "f" or "b".
+        direction: Whether pagination is going forwards or backwards.
         from_token: The token to start pagination at, or None to start at the first value.
         to_token: The token to end pagination at, or None to not limit the end point.
 
@@ -201,7 +200,7 @@ def generate_pagination_bounds(
     # Tokens really represent positions between elements, but we use
     # the convention of pointing to the event before the gap. Hence
     # we have a bit of asymmetry when it comes to equalities.
-    if direction == "b":
+    if direction == Direction.BACKWARDS:
         order = "DESC"
     else:
         order = "ASC"
@@ -215,7 +214,7 @@ def generate_pagination_bounds(
     if from_token:
         if from_token.topological is not None:
             from_bound = from_token.as_historical_tuple()
-        elif direction == "b":
+        elif direction == Direction.BACKWARDS:
             from_bound = (
                 None,
                 from_token.get_max_stream_pos(),
@@ -230,7 +229,7 @@ def generate_pagination_bounds(
     if to_token:
         if to_token.topological is not None:
             to_bound = to_token.as_historical_tuple()
-        elif direction == "b":
+        elif direction == Direction.BACKWARDS:
             to_bound = (
                 None,
                 to_token.stream,
@@ -245,20 +244,20 @@ def generate_pagination_bounds(
 
 
 def generate_next_token(
-    direction: str, last_topo_ordering: int, last_stream_ordering: int
+    direction: Direction, last_topo_ordering: int, last_stream_ordering: int
 ) -> RoomStreamToken:
     """
     Generate the next room stream token based on the currently returned data.
 
     Args:
-        direction: Whether pagination is going forwards or backwards. One of "f" or "b".
+        direction: Whether pagination is going forwards or backwards.
         last_topo_ordering: The last topological ordering being returned.
         last_stream_ordering: The last stream ordering being returned.
 
     Returns:
         A new RoomStreamToken to return to the client.
     """
-    if direction == "b":
+    if direction == Direction.BACKWARDS:
         # Tokens are positions between events.
         # This token points *after* the last event in the chunk.
         # We need it to point to the event before it in the chunk
@@ -1201,7 +1200,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             txn,
             room_id,
             before_token,
-            direction="b",
+            direction=Direction.BACKWARDS,
             limit=before_limit,
             event_filter=event_filter,
         )
@@ -1211,7 +1210,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             txn,
             room_id,
             after_token,
-            direction="f",
+            direction=Direction.FORWARDS,
             limit=after_limit,
             event_filter=event_filter,
         )
@@ -1374,7 +1373,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         room_id: str,
         from_token: RoomStreamToken,
         to_token: Optional[RoomStreamToken] = None,
-        direction: str = "b",
+        direction: Direction = Direction.BACKWARDS,
         limit: int = -1,
         event_filter: Optional[Filter] = None,
     ) -> Tuple[List[_EventDictReturn], RoomStreamToken]:
@@ -1385,8 +1384,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             room_id
             from_token: The token used to stream from
             to_token: A token which if given limits the results to only those before
-            direction: Either 'b' or 'f' to indicate whether we are paginating
-                forwards or backwards from `from_key`.
+            direction: Indicates whether we are paginating forwards or backwards
+                from `from_key`.
             limit: The maximum number of events to return.
             event_filter: If provided filters the events to
                 those that match the filter.
@@ -1489,8 +1488,12 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             _EventDictReturn(event_id, topological_ordering, stream_ordering)
             for event_id, instance_name, topological_ordering, stream_ordering in txn
             if _filter_results(
-                lower_token=to_token if direction == "b" else from_token,
-                upper_token=from_token if direction == "b" else to_token,
+                lower_token=to_token
+                if direction == Direction.BACKWARDS
+                else from_token,
+                upper_token=from_token
+                if direction == Direction.BACKWARDS
+                else to_token,
                 instance_name=instance_name,
                 topological_ordering=topological_ordering,
                 stream_ordering=stream_ordering,
@@ -1514,7 +1517,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
         room_id: str,
         from_key: RoomStreamToken,
         to_key: Optional[RoomStreamToken] = None,
-        direction: str = "b",
+        direction: Direction = Direction.BACKWARDS,
         limit: int = -1,
         event_filter: Optional[Filter] = None,
     ) -> Tuple[List[EventBase], RoomStreamToken]:
@@ -1524,8 +1527,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             room_id
             from_key: The token used to stream from
             to_key: A token which if given limits the results to only those before
-            direction: Either 'b' or 'f' to indicate whether we are paginating
-                forwards or backwards from `from_key`.
+            direction: Indicates whether we are paginating forwards or backwards
+                from `from_key`.
             limit: The maximum number of events to return.
             event_filter: If provided filters the events to those that match the filter.
 
diff --git a/synapse/streams/config.py b/synapse/streams/config.py
index 6df2de919c..5cb7875181 100644
--- a/synapse/streams/config.py
+++ b/synapse/streams/config.py
@@ -16,6 +16,7 @@ from typing import Optional
 
 import attr
 
+from synapse.api.constants import Direction
 from synapse.api.errors import SynapseError
 from synapse.http.servlet import parse_integer, parse_string
 from synapse.http.site import SynapseRequest
@@ -34,7 +35,7 @@ class PaginationConfig:
 
     from_token: Optional[StreamToken]
     to_token: Optional[StreamToken]
-    direction: str
+    direction: Direction
     limit: int
 
     @classmethod
@@ -45,9 +46,13 @@ class PaginationConfig:
         default_limit: int,
         default_dir: str = "f",
     ) -> "PaginationConfig":
-        direction = parse_string(
-            request, "dir", default=default_dir, allowed_values=["f", "b"]
+        direction_str = parse_string(
+            request,
+            "dir",
+            default=default_dir,
+            allowed_values=[Direction.FORWARDS.value, Direction.BACKWARDS.value],
         )
+        direction = Direction(direction_str)
 
         from_tok_str = parse_string(request, "from")
         to_tok_str = parse_string(request, "to")
-- 
cgit 1.5.1


From 2a51f3ec36abeb1f5c1db795541988d1d9698e41 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 27 Jan 2023 10:16:21 -0500
Subject: Implement MSC3952: Intentional mentions (#14823)

MSC3952 defines push rules which searches for mentions in a list of
Matrix IDs in the event body, instead of searching the entire event
body for display name / local part.

This is implemented behind an experimental configuration flag and
does not yet implement the backwards compatibility pieces of the MSC.
---
 changelog.d/14823.feature                   |  1 +
 rust/src/push/base_rules.rs                 | 21 +++++++
 rust/src/push/evaluator.rs                  | 25 +++++++-
 rust/src/push/mod.rs                        | 34 +++++++++++
 stubs/synapse/synapse_rust/push.pyi         |  5 +-
 synapse/api/constants.py                    |  3 +
 synapse/config/experimental.py              |  5 ++
 synapse/push/bulk_push_rule_evaluator.py    | 25 +++++++-
 synapse/storage/databases/main/push_rule.py |  1 +
 tests/push/test_bulk_push_rule_evaluator.py | 88 +++++++++++++++++++++++++++++
 tests/push/test_push_rule_evaluator.py      | 66 +++++++++++++++++++---
 11 files changed, 263 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/14823.feature

(limited to 'synapse')

diff --git a/changelog.d/14823.feature b/changelog.d/14823.feature
new file mode 100644
index 0000000000..8293e99eff
--- /dev/null
+++ b/changelog.d/14823.feature
@@ -0,0 +1 @@
+Experimental support for [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952): intentional mentions.
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 9140a69bb6..880eed0ef4 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -131,6 +131,14 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default: true,
         default_enabled: true,
     },
+    PushRule {
+        rule_id: Cow::Borrowed(".org.matrix.msc3952.is_user_mentioned"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::IsUserMention)]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
     PushRule {
         rule_id: Cow::Borrowed("global/override/.m.rule.contains_display_name"),
         priority_class: 5,
@@ -139,6 +147,19 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default: true,
         default_enabled: true,
     },
+    PushRule {
+        rule_id: Cow::Borrowed(".org.matrix.msc3952.is_room_mentioned"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[
+            Condition::Known(KnownCondition::IsRoomMention),
+            Condition::Known(KnownCondition::SenderNotificationPermission {
+                key: Cow::Borrowed("room"),
+            }),
+        ]),
+        actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]),
+        default: true,
+        default_enabled: true,
+    },
     PushRule {
         rule_id: Cow::Borrowed("global/override/.m.rule.roomnotif"),
         priority_class: 5,
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 0242ee1c5f..aa71202e43 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, BTreeSet};
 
 use anyhow::{Context, Error};
 use lazy_static::lazy_static;
@@ -68,6 +68,11 @@ pub struct PushRuleEvaluator {
     /// The "content.body", if any.
     body: String,
 
+    /// The user mentions that were part of the message.
+    user_mentions: BTreeSet<String>,
+    /// True if the message is a room message.
+    room_mention: bool,
+
     /// The number of users in the room.
     room_member_count: u64,
 
@@ -100,6 +105,8 @@ impl PushRuleEvaluator {
     #[new]
     pub fn py_new(
         flattened_keys: BTreeMap<String, String>,
+        user_mentions: BTreeSet<String>,
+        room_mention: bool,
         room_member_count: u64,
         sender_power_level: Option<i64>,
         notification_power_levels: BTreeMap<String, i64>,
@@ -116,6 +123,8 @@ impl PushRuleEvaluator {
         Ok(PushRuleEvaluator {
             flattened_keys,
             body,
+            user_mentions,
+            room_mention,
             room_member_count,
             notification_power_levels,
             sender_power_level,
@@ -229,6 +238,14 @@ impl PushRuleEvaluator {
             KnownCondition::RelatedEventMatch(event_match) => {
                 self.match_related_event_match(event_match, user_id)?
             }
+            KnownCondition::IsUserMention => {
+                if let Some(uid) = user_id {
+                    self.user_mentions.contains(uid)
+                } else {
+                    false
+                }
+            }
+            KnownCondition::IsRoomMention => self.room_mention,
             KnownCondition::ContainsDisplayName => {
                 if let Some(dn) = display_name {
                     if !dn.is_empty() {
@@ -424,6 +441,8 @@ fn push_rule_evaluator() {
     flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
+        BTreeSet::new(),
+        false,
         10,
         Some(0),
         BTreeMap::new(),
@@ -449,6 +468,8 @@ fn test_requires_room_version_supports_condition() {
     let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()];
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
+        BTreeSet::new(),
+        false,
         10,
         Some(0),
         BTreeMap::new(),
@@ -483,7 +504,7 @@ fn test_requires_room_version_supports_condition() {
     };
     let rules = PushRules::new(vec![custom_rule]);
     result = evaluator.run(
-        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true),
+        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false),
         None,
         None,
     );
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 842b13c88b..7e449f2433 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -269,6 +269,10 @@ pub enum KnownCondition {
     EventMatch(EventMatchCondition),
     #[serde(rename = "im.nheko.msc3664.related_event_match")]
     RelatedEventMatch(RelatedEventMatchCondition),
+    #[serde(rename = "org.matrix.msc3952.is_user_mention")]
+    IsUserMention,
+    #[serde(rename = "org.matrix.msc3952.is_room_mention")]
+    IsRoomMention,
     ContainsDisplayName,
     RoomMemberCount {
         #[serde(skip_serializing_if = "Option::is_none")]
@@ -414,6 +418,7 @@ pub struct FilteredPushRules {
     msc1767_enabled: bool,
     msc3381_polls_enabled: bool,
     msc3664_enabled: bool,
+    msc3952_intentional_mentions: bool,
 }
 
 #[pymethods]
@@ -425,6 +430,7 @@ impl FilteredPushRules {
         msc1767_enabled: bool,
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
+        msc3952_intentional_mentions: bool,
     ) -> Self {
         Self {
             push_rules,
@@ -432,6 +438,7 @@ impl FilteredPushRules {
             msc1767_enabled,
             msc3381_polls_enabled,
             msc3664_enabled,
+            msc3952_intentional_mentions,
         }
     }
 
@@ -465,6 +472,11 @@ impl FilteredPushRules {
                     return false;
                 }
 
+                if !self.msc3952_intentional_mentions && rule.rule_id.contains("org.matrix.msc3952")
+                {
+                    return false;
+                }
+
                 true
             })
             .map(|r| {
@@ -522,6 +534,28 @@ fn test_deserialize_unstable_msc3931_condition() {
     ));
 }
 
+#[test]
+fn test_deserialize_unstable_msc3952_user_condition() {
+    let json = r#"{"kind":"org.matrix.msc3952.is_user_mention"}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(
+        condition,
+        Condition::Known(KnownCondition::IsUserMention)
+    ));
+}
+
+#[test]
+fn test_deserialize_unstable_msc3952_room_condition() {
+    let json = r#"{"kind":"org.matrix.msc3952.is_room_mention"}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(
+        condition,
+        Condition::Known(KnownCondition::IsRoomMention)
+    ));
+}
+
 #[test]
 fn test_deserialize_custom_condition() {
     let json = r#"{"kind":"custom_tag"}"#;
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 304ed7111c..588d90c25a 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Tuple, Union
+from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union
 
 from synapse.types import JsonDict
 
@@ -46,6 +46,7 @@ class FilteredPushRules:
         msc1767_enabled: bool,
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
+        msc3952_intentional_mentions: bool,
     ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
@@ -55,6 +56,8 @@ class PushRuleEvaluator:
     def __init__(
         self,
         flattened_keys: Mapping[str, str],
+        user_mentions: Set[str],
+        room_mention: bool,
         room_member_count: int,
         sender_power_level: Optional[int],
         notification_power_levels: Mapping[str, int],
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 6f9239d21c..0f224b34cd 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -233,6 +233,9 @@ class EventContentFields:
     # The authorising user for joining a restricted room.
     AUTHORISING_USER: Final = "join_authorised_via_users_server"
 
+    # Use for mentioning users.
+    MSC3952_MENTIONS: Final = "org.matrix.msc3952.mentions"
+
     # an unspecced field added to to-device messages to identify them uniquely-ish
     TO_DEVICE_MSGID: Final = "org.matrix.msgid"
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 2590c88cde..d2d0270ddd 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -168,3 +168,8 @@ class ExperimentalConfig(Config):
 
         # MSC3925: do not replace events with their edits
         self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False)
+
+        # MSC3952: Intentional mentions
+        self.msc3952_intentional_mentions = experimental.get(
+            "msc3952_intentional_mentions", False
+        )
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index f27ba64d53..deaec19564 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -22,13 +22,20 @@ from typing import (
     List,
     Mapping,
     Optional,
+    Set,
     Tuple,
     Union,
 )
 
 from prometheus_client import Counter
 
-from synapse.api.constants import MAIN_TIMELINE, EventTypes, Membership, RelationTypes
+from synapse.api.constants import (
+    MAIN_TIMELINE,
+    EventContentFields,
+    EventTypes,
+    Membership,
+    RelationTypes,
+)
 from synapse.api.room_versions import PushRuleRoomFlag, RoomVersion
 from synapse.event_auth import auth_types_for_event, get_user_power_level
 from synapse.events import EventBase, relation_from_event
@@ -342,8 +349,24 @@ class BulkPushRuleEvaluator:
             for user_id, level in notification_levels.items():
                 notification_levels[user_id] = int(level)
 
+        # Pull out any user and room mentions.
+        mentions = event.content.get(EventContentFields.MSC3952_MENTIONS)
+        user_mentions: Set[str] = set()
+        room_mention = False
+        if isinstance(mentions, dict):
+            # Remove out any non-string items and convert to a set.
+            user_mentions_raw = mentions.get("user_ids")
+            if isinstance(user_mentions_raw, list):
+                user_mentions = set(
+                    filter(lambda item: isinstance(item, str), user_mentions_raw)
+                )
+            # Room mention is only true if the value is exactly true.
+            room_mention = mentions.get("room") is True
+
         evaluator = PushRuleEvaluator(
             _flatten_dict(event, room_version=event.room_version),
+            user_mentions,
+            room_mention,
             room_member_count,
             sender_power_level,
             notification_levels,
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 14ca167b34..466a1145b7 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -89,6 +89,7 @@ def _load_rules(
         msc1767_enabled=experimental_config.msc1767_enabled,
         msc3664_enabled=experimental_config.msc3664_enabled,
         msc3381_polls_enabled=experimental_config.msc3381_polls_enabled,
+        msc3952_intentional_mentions=experimental_config.msc3952_intentional_mentions,
     )
 
     return filtered_rules
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 9c17a42b65..aba62b5dc8 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Any
 from unittest.mock import patch
 
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.api.constants import EventContentFields
 from synapse.api.room_versions import RoomVersions
 from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
 from synapse.rest import admin
@@ -126,3 +128,89 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         # Ensure no actions are generated!
         self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
         bulk_evaluator._action_for_event_by_user.assert_not_called()
+
+    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
+    def test_mentions(self) -> None:
+        """Test the behavior of an event which includes invalid mentions."""
+        bulk_evaluator = BulkPushRuleEvaluator(self.hs)
+
+        sentinel = object()
+
+        def create_and_process(mentions: Any = sentinel) -> bool:
+            """Returns true iff the `mentions` trigger an event push action."""
+            content = {}
+            if mentions is not sentinel:
+                content[EventContentFields.MSC3952_MENTIONS] = mentions
+
+            # Create a new message event which should cause a notification.
+            event, context = self.get_success(
+                self.event_creation_handler.create_event(
+                    self.requester,
+                    {
+                        "type": "test",
+                        "room_id": self.room_id,
+                        "content": content,
+                        "sender": f"@bob:{self.hs.hostname}",
+                    },
+                )
+            )
+
+            # Ensure no actions are generated!
+            self.get_success(
+                bulk_evaluator.action_for_events_by_user([(event, context)])
+            )
+
+            # If any actions are generated for this event, return true.
+            result = self.get_success(
+                self.hs.get_datastores().main.db_pool.simple_select_list(
+                    table="event_push_actions_staging",
+                    keyvalues={"event_id": event.event_id},
+                    retcols=("*",),
+                    desc="get_event_push_actions_staging",
+                )
+            )
+            return len(result) > 0
+
+        # Not including the mentions field should not notify.
+        self.assertFalse(create_and_process())
+        # An empty mentions field should not notify.
+        self.assertFalse(create_and_process({}))
+
+        # Non-dict mentions should be ignored.
+        mentions: Any
+        for mentions in (None, True, False, 1, "foo", []):
+            self.assertFalse(create_and_process(mentions))
+
+        # A non-list should be ignored.
+        for mentions in (None, True, False, 1, "foo", {}):
+            self.assertFalse(create_and_process({"user_ids": mentions}))
+
+        # The Matrix ID appearing anywhere in the list should notify.
+        self.assertTrue(create_and_process({"user_ids": [self.alice]}))
+        self.assertTrue(create_and_process({"user_ids": ["@another:test", self.alice]}))
+
+        # Duplicate user IDs should notify.
+        self.assertTrue(create_and_process({"user_ids": [self.alice, self.alice]}))
+
+        # Invalid entries in the list are ignored.
+        self.assertFalse(create_and_process({"user_ids": [None, True, False, {}, []]}))
+        self.assertTrue(
+            create_and_process({"user_ids": [None, True, False, {}, [], self.alice]})
+        )
+
+        # Room mentions from those without power should not notify.
+        self.assertFalse(create_and_process({"room": True}))
+
+        # Room mentions from those with power should notify.
+        self.helper.send_state(
+            self.room_id,
+            "m.room.power_levels",
+            {"notifications": {"room": 0}},
+            self.token,
+            state_key="",
+        )
+        self.assertTrue(create_and_process({"room": True}))
+
+        # Invalid data should not notify.
+        for mentions in (None, False, 1, "foo", [], {}):
+            self.assertFalse(create_and_process({"room": mentions}))
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 1b87756b75..9d01c989d4 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, List, Optional, Union, cast
+from typing import Dict, List, Optional, Set, Union, cast
 
 import frozendict
 
@@ -39,7 +39,12 @@ from tests.test_utils.event_injection import create_event, inject_member_event
 
 class PushRuleEvaluatorTestCase(unittest.TestCase):
     def _get_evaluator(
-        self, content: JsonMapping, related_events: Optional[JsonDict] = None
+        self,
+        content: JsonMapping,
+        *,
+        user_mentions: Optional[Set[str]] = None,
+        room_mention: bool = False,
+        related_events: Optional[JsonDict] = None,
     ) -> PushRuleEvaluator:
         event = FrozenEvent(
             {
@@ -57,13 +62,15 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         power_levels: Dict[str, Union[int, Dict[str, int]]] = {}
         return PushRuleEvaluator(
             _flatten_dict(event),
+            user_mentions or set(),
+            room_mention,
             room_member_count,
             sender_power_level,
             cast(Dict[str, int], power_levels.get("notifications", {})),
             {} if related_events is None else related_events,
-            True,
-            event.room_version.msc3931_push_features,
-            True,
+            related_event_match_enabled=True,
+            room_version_feature_flags=event.room_version.msc3931_push_features,
+            msc3931_enabled=True,
         )
 
     def test_display_name(self) -> None:
@@ -90,6 +97,51 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         # A display name with spaces should work fine.
         self.assertTrue(evaluator.matches(condition, "@user:test", "foo bar"))
 
+    def test_user_mentions(self) -> None:
+        """Check for user mentions."""
+        condition = {"kind": "org.matrix.msc3952.is_user_mention"}
+
+        # No mentions shouldn't match.
+        evaluator = self._get_evaluator({})
+        self.assertFalse(evaluator.matches(condition, "@user:test", None))
+
+        # An empty set shouldn't match
+        evaluator = self._get_evaluator({}, user_mentions=set())
+        self.assertFalse(evaluator.matches(condition, "@user:test", None))
+
+        # The Matrix ID appearing anywhere in the mentions list should match
+        evaluator = self._get_evaluator({}, user_mentions={"@user:test"})
+        self.assertTrue(evaluator.matches(condition, "@user:test", None))
+
+        evaluator = self._get_evaluator(
+            {}, user_mentions={"@another:test", "@user:test"}
+        )
+        self.assertTrue(evaluator.matches(condition, "@user:test", None))
+
+        # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions
+        # since the BulkPushRuleEvaluator is what handles data sanitisation.
+
+    def test_room_mentions(self) -> None:
+        """Check for room mentions."""
+        condition = {"kind": "org.matrix.msc3952.is_room_mention"}
+
+        # No room mention shouldn't match.
+        evaluator = self._get_evaluator({})
+        self.assertFalse(evaluator.matches(condition, None, None))
+
+        # Room mention should match.
+        evaluator = self._get_evaluator({}, room_mention=True)
+        self.assertTrue(evaluator.matches(condition, None, None))
+
+        # A room mention and user mention is valid.
+        evaluator = self._get_evaluator(
+            {}, user_mentions={"@another:test"}, room_mention=True
+        )
+        self.assertTrue(evaluator.matches(condition, None, None))
+
+        # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions
+        # since the BulkPushRuleEvaluator is what handles data sanitisation.
+
     def _assert_matches(
         self, condition: JsonDict, content: JsonMapping, msg: Optional[str] = None
     ) -> None:
@@ -308,7 +360,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
                     },
                 }
             },
-            {
+            related_events={
                 "m.in_reply_to": {
                     "event_id": "$parent_event_id",
                     "type": "m.room.message",
@@ -408,7 +460,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
                     },
                 }
             },
-            {
+            related_events={
                 "m.in_reply_to": {
                     "event_id": "$parent_event_id",
                     "type": "m.room.message",
-- 
cgit 1.5.1


From 510d4b06e7d346b4f94cb5598da90c9f668b62bb Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 30 Jan 2023 21:29:30 +0000
Subject: Handle malformed values of `notification.room` in power level events
 (#14942)

* Better test for bad values in power levels events

The previous test only checked that Synapse didn't raise an exception,
but didn't check that we had correctly interpreted the value of the
dodgy power level.

It also conflated two things: bad room notification levels, and bad user
levels. There _is_ logic for converting the latter to integers, but we
should test it separately.

* Check we ignore types that don't convert to int

* Handle `None` values in `notifications.room`

* Changelog

* Also test that bad values are rejected by event auth

* Docstring

* linter scripttttttttt
---
 changelog.d/14942.bugfix                    |  1 +
 synapse/push/bulk_push_rule_evaluator.py    | 19 +++++-
 tests/push/test_bulk_push_rule_evaluator.py | 93 +++++++++++++++++++++++++----
 tests/test_event_auth.py                    | 32 +++++++++-
 4 files changed, 128 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/14942.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14942.bugfix b/changelog.d/14942.bugfix
new file mode 100644
index 0000000000..a3ca3eb7e9
--- /dev/null
+++ b/changelog.d/14942.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.68.0 where we were unable to service remote joins in rooms with `@room` notification levels set to `null` in their (malformed) power levels.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index deaec19564..88cfc05d05 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -69,6 +69,9 @@ STATE_EVENT_TYPES_TO_MARK_UNREAD = {
 }
 
 
+SENTINEL = object()
+
+
 def _should_count_as_unread(event: EventBase, context: EventContext) -> bool:
     # Exclude rejected and soft-failed events.
     if context.rejected or event.internal_metadata.is_soft_failed():
@@ -343,11 +346,21 @@ class BulkPushRuleEvaluator:
         related_events = await self._related_events(event)
 
         # It's possible that old room versions have non-integer power levels (floats or
-        # strings). Workaround this by explicitly converting to int.
+        # strings; even the occasional `null`). For old rooms, we interpret these as if
+        # they were integers. Do this here for the `@room` power level threshold.
+        # Note that this is done automatically for the sender's power level by
+        # _get_power_levels_and_sender_level in its call to get_user_power_level
+        # (even for room V10.)
         notification_levels = power_levels.get("notifications", {})
         if not event.room_version.msc3667_int_only_power_levels:
-            for user_id, level in notification_levels.items():
-                notification_levels[user_id] = int(level)
+            keys = list(notification_levels.keys())
+            for key in keys:
+                level = notification_levels.get(key, SENTINEL)
+                if level is not SENTINEL and type(level) is not int:
+                    try:
+                        notification_levels[key] = int(level)
+                    except (TypeError, ValueError):
+                        del notification_levels[key]
 
         # Pull out any user and room mentions.
         mentions = event.content.get(EventContentFields.MSC3952_MENTIONS)
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index aba62b5dc8..fda48d9f61 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -15,6 +15,8 @@
 from typing import Any
 from unittest.mock import patch
 
+from parameterized import parameterized
+
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import EventContentFields
@@ -48,35 +50,84 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         self.requester = create_requester(self.alice)
 
         self.room_id = self.helper.create_room_as(
-            self.alice, room_version=RoomVersions.V9.identifier, tok=self.token
+            # This is deliberately set to V9, because we want to test the logic which
+            # handles stringy power levels. Stringy power levels were outlawed in V10.
+            self.alice,
+            room_version=RoomVersions.V9.identifier,
+            tok=self.token,
         )
 
         self.event_creation_handler = self.hs.get_event_creation_handler()
 
-    def test_action_for_event_by_user_handles_noninteger_power_levels(self) -> None:
-        """We should convert floats and strings to integers before passing to Rust.
+    @parameterized.expand(
+        [
+            # The historically-permitted bad values. Alice's notification should be
+            # allowed if this threshold is at or below her power level (60)
+            ("100", False),
+            ("0", True),
+            (12.34, True),
+            (60.0, True),
+            (67.89, False),
+            # Values that int(...) would not successfully cast should be ignored.
+            # The room notification level should then default to 50, per the spec, so
+            # Alice's notification is allowed.
+            (None, True),
+            # We haven't seen `"room": []` or `"room": {}` in the wild (yet), but
+            # let's check them for paranoia's sake.
+            ([], True),
+            ({}, True),
+        ]
+    )
+    def test_action_for_event_by_user_handles_noninteger_room_power_levels(
+        self, bad_room_level: object, should_permit: bool
+    ) -> None:
+        """We should convert strings in `room` to integers before passing to Rust.
+
+        Test this as follows:
+        - Create a room as Alice and invite two other users Bob and Charlie.
+        - Set PLs so that Alice has PL 60 and `notifications.room` is set to a bad value.
+        - Have Alice create a message notifying @room.
+        - Evaluate notification actions for that message. This should not raise.
+        - Look in the DB to see if that message triggered a highlight for Bob.
+
+        The test is parameterised with two arguments:
+        - the bad power level value for "room", before JSON serisalistion
+        - whether Bob should expect the message to be highlighted
 
         Reproduces #14060.
 
         A lack of validation: the gift that keeps on giving.
         """
-
-        # Alter the power levels in that room to include stringy and floaty levels.
-        # We need to suppress the validation logic or else it will reject these dodgy
-        # values. (Presumably this validation was not always present.)
+        # Join another user to the room, so that there is someone to see Alice's
+        # @room notification.
+        bob = self.register_user("bob", "pass")
+        bob_token = self.login(bob, "pass")
+        self.helper.join(self.room_id, bob, tok=bob_token)
+
+        # Alter the power levels in that room to include the bad @room notification
+        # level. We need to suppress
+        #
+        # - canonicaljson validation, because canonicaljson forbids floats;
+        # - the event jsonschema validation, because it will forbid bad values; and
+        # - the auth rules checks, because they stop us from creating power levels
+        #   with `"room": null`. (We want to test this case, because we have seen it
+        #   in the wild.)
+        #
+        # We have seen stringy and null values for "room" in the wild, so presumably
+        # some of this validation was missing in the past.
         with patch("synapse.events.validator.validate_canonicaljson"), patch(
             "synapse.events.validator.jsonschema.validate"
-        ):
-            self.helper.send_state(
+        ), patch("synapse.handlers.event_auth.check_state_dependent_auth_rules"):
+            pl_event_id = self.helper.send_state(
                 self.room_id,
                 "m.room.power_levels",
                 {
-                    "users": {self.alice: "100"},  # stringy
-                    "notifications": {"room": 100.0},  # float
+                    "users": {self.alice: 60},
+                    "notifications": {"room": bad_room_level},
                 },
                 self.token,
                 state_key="",
-            )
+            )["event_id"]
 
         # Create a new message event, and try to evaluate it under the dodgy
         # power level event.
@@ -88,10 +139,11 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                     "room_id": self.room_id,
                     "content": {
                         "msgtype": "m.text",
-                        "body": "helo",
+                        "body": "helo @room",
                     },
                     "sender": self.alice,
                 },
+                prev_event_ids=[pl_event_id],
             )
         )
 
@@ -99,6 +151,21 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         # should not raise
         self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
 
+        # Did Bob see Alice's @room notification?
+        highlighted_actions = self.get_success(
+            self.hs.get_datastores().main.db_pool.simple_select_list(
+                table="event_push_actions_staging",
+                keyvalues={
+                    "event_id": event.event_id,
+                    "user_id": bob,
+                    "highlight": 1,
+                },
+                retcols=("*",),
+                desc="get_event_push_actions_staging",
+            )
+        )
+        self.assertEqual(len(highlighted_actions), int(should_permit))
+
     @override_config({"push": {"enabled": False}})
     def test_action_for_event_by_user_disabled_by_config(self) -> None:
         """Ensure that push rules are not calculated when disabled in the config"""
diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py
index f4d9fba0a1..0a7937f1cc 100644
--- a/tests/test_event_auth.py
+++ b/tests/test_event_auth.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import unittest
-from typing import Collection, Dict, Iterable, List, Optional
+from typing import Any, Collection, Dict, Iterable, List, Optional
 
 from parameterized import parameterized
 
@@ -728,6 +728,36 @@ class EventAuthTestCase(unittest.TestCase):
                 pl_event.room_version, pl_event2, {("fake_type", "fake_key"): pl_event}
             )
 
+    def test_room_v10_rejects_other_non_integer_power_levels(self) -> None:
+        """We should reject PLs that are non-integer, non-string JSON values.
+
+        test_room_v10_rejects_string_power_levels above handles the string case.
+        """
+
+        def create_event(pl_event_content: Dict[str, Any]) -> EventBase:
+            return make_event_from_dict(
+                {
+                    "room_id": TEST_ROOM_ID,
+                    **_maybe_get_event_id_dict_for_room_version(RoomVersions.V10),
+                    "type": "m.room.power_levels",
+                    "sender": "@test:test.com",
+                    "state_key": "",
+                    "content": pl_event_content,
+                    "signatures": {"test.com": {"ed25519:0": "some9signature"}},
+                },
+                room_version=RoomVersions.V10,
+            )
+
+        contents: Iterable[Dict[str, Any]] = [
+            {"notifications": {"room": None}},
+            {"users": {"@alice:wonderland": []}},
+            {"users_default": {}},
+        ]
+        for content in contents:
+            event = create_event(content)
+            with self.assertRaises(SynapseError):
+                event_auth._check_power_levels(event.room_version, event, {})
+
 
 # helpers for making events
 TEST_DOMAIN = "example.com"
-- 
cgit 1.5.1


From 796a4b74823b721c72de07e45718f05e78e1565d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 31 Jan 2023 10:33:07 +0000
Subject: Prefer `type(x) is int` to `isinstance(x, int)` (#14945)

* Perfer `type(x) is int` to `isinstance(x, int)`

This covered all additional instances I could see where `x` was
user-controlled.
The remaining cases are

```
$ rg -s 'isinstance.*[^_]int'
tests/replication/_base.py
576:        if isinstance(obj, int):

synapse/util/caches/stream_change_cache.py
136:        assert isinstance(stream_pos, int)
214:        assert isinstance(stream_pos, int)
246:        assert isinstance(stream_pos, int)
267:        assert isinstance(stream_pos, int)

synapse/replication/tcp/external_cache.py
133:        if isinstance(result, int):

synapse/metrics/__init__.py
100:        if isinstance(calls, (int, float)):

synapse/handlers/appservice.py
262:        assert isinstance(new_token, int)

synapse/config/_util.py
62:        if isinstance(p, int):
```

which cover metrics, logic related to `jsonschema`, and replication and
data streams. AFAICS these are all internal to Synapse

* Changelog
---
 changelog.d/14945.misc                    |  1 +
 synapse/config/_base.py                   | 72 +++++++++++++++++++++----------
 synapse/config/cache.py                   |  4 +-
 synapse/config/server.py                  |  2 +-
 synapse/events/validator.py               |  4 +-
 synapse/federation/federation_client.py   |  2 +-
 synapse/handlers/message.py               |  2 +-
 synapse/rest/admin/__init__.py            |  2 +-
 synapse/rest/admin/registration_tokens.py | 15 +++----
 synapse/rest/admin/users.py               |  6 +--
 synapse/rest/client/report_event.py       |  2 +-
 synapse/rest/media/v1/oembed.py           |  2 +-
 synapse/rest/media/v1/thumbnailer.py      |  2 +-
 synapse/storage/databases/main/events.py  |  6 +--
 14 files changed, 75 insertions(+), 47 deletions(-)
 create mode 100644 changelog.d/14945.misc

(limited to 'synapse')

diff --git a/changelog.d/14945.misc b/changelog.d/14945.misc
new file mode 100644
index 0000000000..654174f9a8
--- /dev/null
+++ b/changelog.d/14945.misc
@@ -0,0 +1 @@
+Fix various long-standing bugs in Synapse's config, event and request handling where booleans were unintentionally accepted where an integer was expected.
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 1f6362aedd..2ce60610ca 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -174,15 +174,29 @@ class Config:
 
     @staticmethod
     def parse_size(value: Union[str, int]) -> int:
-        if isinstance(value, int):
+        """Interpret `value` as a number of bytes.
+
+        If an integer is provided it is treated as bytes and is unchanged.
+
+        String byte sizes can have a suffix of 'K' or `M`, representing kibibytes and
+        mebibytes respectively. No suffix is understood as a plain byte count.
+
+        Raises:
+            TypeError, if given something other than an integer or a string
+            ValueError: if given a string not of the form described above.
+        """
+        if type(value) is int:
             return value
-        sizes = {"K": 1024, "M": 1024 * 1024}
-        size = 1
-        suffix = value[-1]
-        if suffix in sizes:
-            value = value[:-1]
-            size = sizes[suffix]
-        return int(value) * size
+        elif type(value) is str:
+            sizes = {"K": 1024, "M": 1024 * 1024}
+            size = 1
+            suffix = value[-1]
+            if suffix in sizes:
+                value = value[:-1]
+                size = sizes[suffix]
+            return int(value) * size
+        else:
+            raise TypeError(f"Bad byte size {value!r}")
 
     @staticmethod
     def parse_duration(value: Union[str, int]) -> int:
@@ -198,22 +212,36 @@ class Config:
 
         Returns:
             The number of milliseconds in the duration.
+
+        Raises:
+            TypeError, if given something other than an integer or a string
+            ValueError: if given a string not of the form described above.
         """
-        if isinstance(value, int):
+        if type(value) is int:
             return value
-        second = 1000
-        minute = 60 * second
-        hour = 60 * minute
-        day = 24 * hour
-        week = 7 * day
-        year = 365 * day
-        sizes = {"s": second, "m": minute, "h": hour, "d": day, "w": week, "y": year}
-        size = 1
-        suffix = value[-1]
-        if suffix in sizes:
-            value = value[:-1]
-            size = sizes[suffix]
-        return int(value) * size
+        elif type(value) is str:
+            second = 1000
+            minute = 60 * second
+            hour = 60 * minute
+            day = 24 * hour
+            week = 7 * day
+            year = 365 * day
+            sizes = {
+                "s": second,
+                "m": minute,
+                "h": hour,
+                "d": day,
+                "w": week,
+                "y": year,
+            }
+            size = 1
+            suffix = value[-1]
+            if suffix in sizes:
+                value = value[:-1]
+                size = sizes[suffix]
+            return int(value) * size
+        else:
+            raise TypeError(f"Bad duration {value!r}")
 
     @staticmethod
     def abspath(file_path: str) -> str:
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
index 015b2a138e..05f69cb1ba 100644
--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -126,7 +126,7 @@ class CacheConfig(Config):
 
         cache_config = config.get("caches") or {}
         self.global_factor = cache_config.get("global_factor", _DEFAULT_FACTOR_SIZE)
-        if not isinstance(self.global_factor, (int, float)):
+        if type(self.global_factor) not in (int, float):
             raise ConfigError("caches.global_factor must be a number.")
 
         # Load cache factors from the config
@@ -151,7 +151,7 @@ class CacheConfig(Config):
         )
 
         for cache, factor in individual_factors.items():
-            if not isinstance(factor, (int, float)):
+            if type(factor) not in (int, float):
                 raise ConfigError(
                     "caches.per_cache_factors.%s must be a number" % (cache,)
                 )
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 80bcfa4080..ecdaa2d9dd 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -904,7 +904,7 @@ def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
         raise ConfigError(DIRECT_TCP_ERROR, ("listeners", str(num), "type"))
 
     port = listener.get("port")
-    if not isinstance(port, int):
+    if type(port) is not int:
         raise ConfigError("Listener configuration is lacking a valid 'port' option")
 
     tls = listener.get("tls", False)
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index a6f0104396..fb1737b910 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -139,7 +139,7 @@ class EventValidator:
         max_lifetime = event.content.get("max_lifetime")
 
         if min_lifetime is not None:
-            if not isinstance(min_lifetime, int):
+            if type(min_lifetime) is not int:
                 raise SynapseError(
                     code=400,
                     msg="'min_lifetime' must be an integer",
@@ -147,7 +147,7 @@ class EventValidator:
                 )
 
         if max_lifetime is not None:
-            if not isinstance(max_lifetime, int):
+            if type(max_lifetime) is not int:
                 raise SynapseError(
                     code=400,
                     msg="'max_lifetime' must be an integer",
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index f185b6c1f9..feb32e40e5 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -1864,7 +1864,7 @@ class TimestampToEventResponse:
             )
 
         origin_server_ts = d.get("origin_server_ts")
-        if not isinstance(origin_server_ts, int):
+        if type(origin_server_ts) is not int:
             raise ValueError(
                 "Invalid response: 'origin_server_ts' must be a int but received %r"
                 % origin_server_ts
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 3278a695ed..6290f7f523 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -377,7 +377,7 @@ class MessageHandler:
         """
 
         expiry_ts = event.content.get(EventContentFields.SELF_DESTRUCT_AFTER)
-        if not isinstance(expiry_ts, int) or event.is_state():
+        if type(expiry_ts) is not int or event.is_state():
             return
 
         # _schedule_expiry_for_event won't actually schedule anything if there's already
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index fb73886df0..79f22a59f1 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -152,7 +152,7 @@ class PurgeHistoryRestServlet(RestServlet):
             logger.info("[purge] purging up to token %s (event_id %s)", token, event_id)
         elif "purge_up_to_ts" in body:
             ts = body["purge_up_to_ts"]
-            if not isinstance(ts, int):
+            if type(ts) is not int:
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
                     "purge_up_to_ts must be an int",
diff --git a/synapse/rest/admin/registration_tokens.py b/synapse/rest/admin/registration_tokens.py
index af606e9252..95e751288b 100644
--- a/synapse/rest/admin/registration_tokens.py
+++ b/synapse/rest/admin/registration_tokens.py
@@ -143,7 +143,7 @@ class NewRegistrationTokenRestServlet(RestServlet):
         else:
             # Get length of token to generate (default is 16)
             length = body.get("length", 16)
-            if not isinstance(length, int):
+            if type(length) is not int:
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
                     "length must be an integer",
@@ -163,8 +163,7 @@ class NewRegistrationTokenRestServlet(RestServlet):
 
         uses_allowed = body.get("uses_allowed", None)
         if not (
-            uses_allowed is None
-            or (isinstance(uses_allowed, int) and uses_allowed >= 0)
+            uses_allowed is None or (type(uses_allowed) is int and uses_allowed >= 0)
         ):
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
@@ -173,13 +172,13 @@ class NewRegistrationTokenRestServlet(RestServlet):
             )
 
         expiry_time = body.get("expiry_time", None)
-        if not isinstance(expiry_time, (int, type(None))):
+        if type(expiry_time) not in (int, type(None)):
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "expiry_time must be an integer or null",
                 Codes.INVALID_PARAM,
             )
-        if isinstance(expiry_time, int) and expiry_time < self.clock.time_msec():
+        if type(expiry_time) is int and expiry_time < self.clock.time_msec():
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "expiry_time must not be in the past",
@@ -284,7 +283,7 @@ class RegistrationTokenRestServlet(RestServlet):
             uses_allowed = body["uses_allowed"]
             if not (
                 uses_allowed is None
-                or (isinstance(uses_allowed, int) and uses_allowed >= 0)
+                or (type(uses_allowed) is int and uses_allowed >= 0)
             ):
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
@@ -295,13 +294,13 @@ class RegistrationTokenRestServlet(RestServlet):
 
         if "expiry_time" in body:
             expiry_time = body["expiry_time"]
-            if not isinstance(expiry_time, (int, type(None))):
+            if type(expiry_time) not in (int, type(None)):
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
                     "expiry_time must be an integer or null",
                     Codes.INVALID_PARAM,
                 )
-            if isinstance(expiry_time, int) and expiry_time < self.clock.time_msec():
+            if type(expiry_time) is int and expiry_time < self.clock.time_msec():
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
                     "expiry_time must not be in the past",
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 6e0c44be2a..0841b89c1a 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -973,7 +973,7 @@ class UserTokenRestServlet(RestServlet):
         body = parse_json_object_from_request(request, allow_empty_body=True)
 
         valid_until_ms = body.get("valid_until_ms")
-        if valid_until_ms and not isinstance(valid_until_ms, int):
+        if type(valid_until_ms) not in (int, type(None)):
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST, "'valid_until_ms' parameter must be an int"
             )
@@ -1125,14 +1125,14 @@ class RateLimitRestServlet(RestServlet):
         messages_per_second = body.get("messages_per_second", 0)
         burst_count = body.get("burst_count", 0)
 
-        if not isinstance(messages_per_second, int) or messages_per_second < 0:
+        if type(messages_per_second) is not int or messages_per_second < 0:
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "%r parameter must be a positive int" % (messages_per_second,),
                 errcode=Codes.INVALID_PARAM,
             )
 
-        if not isinstance(burst_count, int) or burst_count < 0:
+        if type(burst_count) is not int or burst_count < 0:
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "%r parameter must be a positive int" % (burst_count,),
diff --git a/synapse/rest/client/report_event.py b/synapse/rest/client/report_event.py
index 6e962a4532..e2b410cf32 100644
--- a/synapse/rest/client/report_event.py
+++ b/synapse/rest/client/report_event.py
@@ -54,7 +54,7 @@ class ReportEventRestServlet(RestServlet):
                 "Param 'reason' must be a string",
                 Codes.BAD_JSON,
             )
-        if not isinstance(body.get("score", 0), int):
+        if type(body.get("score", 0)) is not int:
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "Param 'score' must be an integer",
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
index a3738a6250..7592aa5d47 100644
--- a/synapse/rest/media/v1/oembed.py
+++ b/synapse/rest/media/v1/oembed.py
@@ -200,7 +200,7 @@ class OEmbedProvider:
                 calc_description_and_urls(open_graph_response, oembed["html"])
             for size in ("width", "height"):
                 val = oembed.get(size)
-                if val is not None and isinstance(val, int):
+                if type(val) is int:
                     open_graph_response[f"og:video:{size}"] = val
 
         elif oembed_type == "link":
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index a48a4de92a..9480cc5763 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -77,7 +77,7 @@ class Thumbnailer:
             image_exif = self.image._getexif()  # type: ignore
             if image_exif is not None:
                 image_orientation = image_exif.get(EXIF_ORIENTATION_TAG)
-                assert isinstance(image_orientation, int)
+                assert type(image_orientation) is int
                 self.transpose_method = EXIF_TRANSPOSE_MAPPINGS.get(image_orientation)
         except Exception as e:
             # A lot of parsing errors can happen when parsing EXIF
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 0f097a2927..1536937b67 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1651,7 +1651,7 @@ class PersistEventsStore:
             if self._ephemeral_messages_enabled:
                 # If there's an expiry timestamp on the event, store it.
                 expiry_ts = event.content.get(EventContentFields.SELF_DESTRUCT_AFTER)
-                if isinstance(expiry_ts, int) and not event.is_state():
+                if type(expiry_ts) is int and not event.is_state():
                     self._insert_event_expiry_txn(txn, event.event_id, expiry_ts)
 
         # Insert into the room_memberships table.
@@ -2133,10 +2133,10 @@ class PersistEventsStore:
         ):
             if (
                 "min_lifetime" in event.content
-                and not isinstance(event.content.get("min_lifetime"), int)
+                and type(event.content["min_lifetime"]) is not int
             ) or (
                 "max_lifetime" in event.content
-                and not isinstance(event.content.get("max_lifetime"), int)
+                and type(event.content["max_lifetime"]) is not int
             ):
                 # Ignore the event if one of the value isn't an integer.
                 return
-- 
cgit 1.5.1


From a134e626e43e9c31a4618d4164ba7d6242c0f803 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 31 Jan 2023 10:57:02 +0000
Subject: Reject boolean power levels (#14944)

* Better test for bad values in power levels events

The previous test only checked that Synapse didn't raise an exception,
but didn't check that we had correctly interpreted the value of the
dodgy power level.

It also conflated two things: bad room notification levels, and bad user
levels. There _is_ logic for converting the latter to integers, but we
should test it separately.

* Check we ignore types that don't convert to int

* Handle `None` values in `notifications.room`

* Changelog

* Also test that bad values are rejected by event auth

* Docstring

* linter scripttttttttt

* Test boolean values in PL content

* Reject boolean power levels

* Changelog
---
 changelog.d/14944.bugfix              | 1 +
 synapse/event_auth.py                 | 4 ++--
 synapse/events/utils.py               | 6 +++---
 synapse/federation/federation_base.py | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/14944.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14944.bugfix b/changelog.d/14944.bugfix
new file mode 100644
index 0000000000..5fe1fb322b
--- /dev/null
+++ b/changelog.d/14944.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse v1.64 where boolean power levels were erroneously permitted in [v10 rooms](https://spec.matrix.org/v1.5/rooms/v10/).
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index c4a7b16413..e0be9f88cc 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -875,11 +875,11 @@ def _check_power_levels(
                 "kick",
                 "invite",
             }:
-                if not isinstance(v, int):
+                if type(v) is not int:
                     raise SynapseError(400, f"{v!r} must be an integer.")
             if k in {"events", "notifications", "users"}:
                 if not isinstance(v, collections.abc.Mapping) or not all(
-                    isinstance(v, int) for v in v.values()
+                    type(v) is int for v in v.values()
                 ):
                     raise SynapseError(
                         400,
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index 52e4b467e8..ebf8c7ed83 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -648,10 +648,10 @@ def _copy_power_level_value_as_integer(
 ) -> None:
     """Set `power_levels[key]` to the integer represented by `old_value`.
 
-    :raises TypeError: if `old_value` is not an integer, nor a base-10 string
+    :raises TypeError: if `old_value` is neither an integer nor a base-10 string
         representation of an integer.
     """
-    if isinstance(old_value, int):
+    if type(old_value) is int:
         power_levels[key] = old_value
         return
 
@@ -679,7 +679,7 @@ def validate_canonicaljson(value: Any) -> None:
     * Floats
     * NaN, Infinity, -Infinity
     """
-    if isinstance(value, int):
+    if type(value) is int:
         if value < CANONICALJSON_MIN_INT or CANONICALJSON_MAX_INT < value:
             raise SynapseError(400, "JSON integer out of range", Codes.BAD_JSON)
 
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index 6bd4742140..29fae716f5 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -280,7 +280,7 @@ def event_from_pdu_json(pdu_json: JsonDict, room_version: RoomVersion) -> EventB
         _strip_unsigned_values(pdu_json)
 
     depth = pdu_json["depth"]
-    if not isinstance(depth, int):
+    if type(depth) is not int:
         raise SynapseError(400, "Depth %r not an intger" % (depth,), Codes.BAD_JSON)
 
     if depth < 0:
-- 
cgit 1.5.1


From 6d14fdc2710688014a7a66cc48485462c6e86a1e Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 31 Jan 2023 11:03:55 +0000
Subject: Make sqlite database migrations transactional again, part two
 (#14926)

#14910 fixed the regression introduced by #13873 where sqlite database
migrations would no longer run inside a transaction. However, it
committed the transaction before Synapse updated its bookkeeping of
which migrations have been run, which means that migrations may be run
again after they have completed successfully.

Leave the transaction open at the end of `executescript`, to restore the
old, correct behaviour. Also make the PostgreSQL behaviour consistent
with SQLite.

Fixes #14909.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14926.bugfix            |  1 +
 synapse/storage/engines/_base.py    |  5 +-
 synapse/storage/engines/postgres.py |  6 ++-
 synapse/storage/engines/sqlite.py   |  6 ++-
 tests/storage/test_database.py      | 96 +++++++++++++++++++++++++++++++++++++
 5 files changed, 109 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/14926.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14926.bugfix b/changelog.d/14926.bugfix
new file mode 100644
index 0000000000..f1f34cd6ba
--- /dev/null
+++ b/changelog.d/14926.bugfix
@@ -0,0 +1 @@
+Fix a regression introduced in Synapse 1.69.0 which can result in database corruption when database migrations are interrupted on sqlite.
diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index bc9ca3a53c..0363cdc038 100644
--- a/synapse/storage/engines/_base.py
+++ b/synapse/storage/engines/_base.py
@@ -133,8 +133,9 @@ class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCM
 
         This is not provided by DBAPI2, and so needs engine-specific support.
 
-        Some database engines may automatically COMMIT the ongoing transaction both
-        before and after executing the script.
+        Any ongoing transaction is committed before executing the script in its own
+        transaction. The script transaction is left open and it is the responsibility of
+        the caller to commit it.
         """
         ...
 
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index f9f562ea45..b350f57ccb 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -220,5 +220,9 @@ class PostgresEngine(
         """Execute a chunk of SQL containing multiple semicolon-delimited statements.
 
         Psycopg2 seems happy to do this in DBAPI2's `execute()` function.
+
+        For consistency with SQLite, any ongoing transaction is committed before
+        executing the script in its own transaction. The script transaction is
+        left open and it is the responsibility of the caller to commit it.
         """
-        cursor.execute(script)
+        cursor.execute(f"COMMIT; BEGIN TRANSACTION; {script}")
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index 2f7df85ce4..28751e89a5 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -135,14 +135,16 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
         > than one statement with it, it will raise a Warning. Use executescript() if
         > you want to execute multiple SQL statements with one call.
 
-        The script is wrapped in transaction control statemnets, since the docs for
+        The script is prefixed with a `BEGIN TRANSACTION`, since the docs for
         `executescript` warn:
 
         > If there is a pending transaction, an implicit COMMIT statement is executed
         > first. No other implicit transaction control is performed; any transaction
         > control must be added to sql_script.
         """
-        cursor.executescript(f"BEGIN TRANSACTION;\n{script}\nCOMMIT;")
+        # The implementation of `executescript` can be found at
+        # https://github.com/python/cpython/blob/3.11/Modules/_sqlite/cursor.c#L1035.
+        cursor.executescript(f"BEGIN TRANSACTION; {script}")
 
 
 # Following functions taken from: https://github.com/coleifer/peewee
diff --git a/tests/storage/test_database.py b/tests/storage/test_database.py
index 543cce6b3e..8cd7c89ca2 100644
--- a/tests/storage/test_database.py
+++ b/tests/storage/test_database.py
@@ -22,6 +22,7 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.server import HomeServer
 from synapse.storage.database import (
     DatabasePool,
+    LoggingDatabaseConnection,
     LoggingTransaction,
     make_tuple_comparison_clause,
 )
@@ -37,6 +38,101 @@ class TupleComparisonClauseTestCase(unittest.TestCase):
         self.assertEqual(args, [1, 2])
 
 
+class ExecuteScriptTestCase(unittest.HomeserverTestCase):
+    """Tests for `BaseDatabaseEngine.executescript` implementations."""
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+        self.db_pool: DatabasePool = self.store.db_pool
+        self.get_success(
+            self.db_pool.runInteraction(
+                "create",
+                lambda txn: txn.execute("CREATE TABLE foo (name TEXT PRIMARY KEY)"),
+            )
+        )
+
+    def test_transaction(self) -> None:
+        """Test that all statements are run in a single transaction."""
+
+        def run(conn: LoggingDatabaseConnection) -> None:
+            cur = conn.cursor(txn_name="test_transaction")
+            self.db_pool.engine.executescript(
+                cur,
+                ";".join(
+                    [
+                        "INSERT INTO foo (name) VALUES ('transaction test')",
+                        # This next statement will fail. When `executescript` is not
+                        # transactional, the previous row will be observed later.
+                        "INSERT INTO foo (name) VALUES ('transaction test')",
+                    ]
+                ),
+            )
+
+        self.get_failure(
+            self.db_pool.runWithConnection(run),
+            self.db_pool.engine.module.IntegrityError,
+        )
+
+        self.assertIsNone(
+            self.get_success(
+                self.db_pool.simple_select_one_onecol(
+                    "foo",
+                    keyvalues={"name": "transaction test"},
+                    retcol="name",
+                    allow_none=True,
+                )
+            ),
+            "executescript is not running statements inside a transaction",
+        )
+
+    def test_commit(self) -> None:
+        """Test that the script transaction remains open and can be committed."""
+
+        def run(conn: LoggingDatabaseConnection) -> None:
+            cur = conn.cursor(txn_name="test_commit")
+            self.db_pool.engine.executescript(
+                cur, "INSERT INTO foo (name) VALUES ('commit test')"
+            )
+            cur.execute("COMMIT")
+
+        self.get_success(self.db_pool.runWithConnection(run))
+
+        self.assertIsNotNone(
+            self.get_success(
+                self.db_pool.simple_select_one_onecol(
+                    "foo",
+                    keyvalues={"name": "commit test"},
+                    retcol="name",
+                    allow_none=True,
+                )
+            ),
+        )
+
+    def test_rollback(self) -> None:
+        """Test that the script transaction remains open and can be rolled back."""
+
+        def run(conn: LoggingDatabaseConnection) -> None:
+            cur = conn.cursor(txn_name="test_rollback")
+            self.db_pool.engine.executescript(
+                cur, "INSERT INTO foo (name) VALUES ('rollback test')"
+            )
+            cur.execute("ROLLBACK")
+
+        self.get_success(self.db_pool.runWithConnection(run))
+
+        self.assertIsNone(
+            self.get_success(
+                self.db_pool.simple_select_one_onecol(
+                    "foo",
+                    keyvalues={"name": "rollback test"},
+                    retcol="name",
+                    allow_none=True,
+                )
+            ),
+            "executescript is not leaving the script transaction open",
+        )
+
+
 class CallbacksTestCase(unittest.HomeserverTestCase):
     """Tests for transaction callbacks."""
 
-- 
cgit 1.5.1


From 805b641fb6b31e677278eaf6e27875eba5c2a3d3 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 31 Jan 2023 11:31:52 +0000
Subject: Fix "Re-starting finished log context" spam when creating events
 (#14947)

`run_in_background` calls re-use the current logging context. When they
are not awaited, they can complete after the current logging context has
been marked as finished, which leads to log spam. Use
`run_as_background_process` instead.

Fixes one of the instances of #13090.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14947.bugfix    | 1 +
 synapse/handlers/message.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14947.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14947.bugfix b/changelog.d/14947.bugfix
new file mode 100644
index 0000000000..b9e768c44c
--- /dev/null
+++ b/changelog.d/14947.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where sending messages on servers with presence enabled would spam "Re-starting finished log context" log lines.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 6290f7f523..e688e00575 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1939,7 +1939,9 @@ class EventCreationHandler:
             if event.type == EventTypes.Message:
                 # We don't want to block sending messages on any presence code. This
                 # matters as sometimes presence code can take a while.
-                run_in_background(self._bump_active_time, requester.user)
+                run_as_background_process(
+                    "bump_presence_active_time", self._bump_active_time, requester.user
+                )
 
         async def _notify() -> None:
             try:
-- 
cgit 1.5.1


From 3b8574b4f250bac1e4d4cfbf6b1ceec83bc0bac2 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 31 Jan 2023 12:43:20 +0000
Subject: Tag /send_join responses to detect faster joins (#14950)

* Tag /send_join responses to detect faster joins

* Changelog

* Define a proper SynapseTag

* isort
---
 changelog.d/14950.misc                  | 1 +
 synapse/federation/federation_server.py | 6 ++++++
 synapse/logging/opentracing.py          | 5 +++++
 3 files changed, 12 insertions(+)
 create mode 100644 changelog.d/14950.misc

(limited to 'synapse')

diff --git a/changelog.d/14950.misc b/changelog.d/14950.misc
new file mode 100644
index 0000000000..6602776b3f
--- /dev/null
+++ b/changelog.d/14950.misc
@@ -0,0 +1 @@
+Faster joins: tag `v2/send_join/` requests to indicate if they served a partial join response.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 3197939a36..c9a6dfd1a4 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -62,7 +62,9 @@ from synapse.logging.context import (
     run_in_background,
 )
 from synapse.logging.opentracing import (
+    SynapseTags,
     log_kv,
+    set_tag,
     start_active_span_from_edu,
     tag_args,
     trace,
@@ -678,6 +680,10 @@ class FederationServer(FederationBase):
         room_id: str,
         caller_supports_partial_state: bool = False,
     ) -> Dict[str, Any]:
+        set_tag(
+            SynapseTags.SEND_JOIN_RESPONSE_IS_PARTIAL_STATE,
+            caller_supports_partial_state,
+        )
         await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
             requester=None,
             key=room_id,
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index a705af8356..8ef9a0dda8 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -322,6 +322,11 @@ class SynapseTags:
     # The name of the external cache
     CACHE_NAME = "cache.name"
 
+    # Boolean. Present on /v2/send_join requests, omitted from all others.
+    # True iff partial state was requested and we provided (or intended to provide)
+    # partial state in the response.
+    SEND_JOIN_RESPONSE_IS_PARTIAL_STATE = "send_join.partial_state_response"
+
     # Used to tag function arguments
     #
     # Tag a named arg. The name of the argument should be appended to this prefix.
-- 
cgit 1.5.1


From bf82b56babc9e2cacba34f8878da3b3834914b3a Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Wed, 1 Feb 2023 16:45:19 +0100
Subject: Add more user information to export-data command. (#14894)

* The user's profile information.
* The user's devices.
* The user's connections / IP address information.
---
 .ci/scripts/test_export_data_command.sh | 10 +++--
 changelog.d/14894.feature               |  1 +
 docs/usage/administration/admin_faq.md  | 80 ++++++++++++++++++++++++++-------
 synapse/app/admin_cmd.py                | 32 ++++++++++++-
 synapse/handlers/admin.py               | 43 ++++++++++++++++++
 tests/handlers/test_admin.py            | 60 +++++++++++++++++++++++++
 6 files changed, 206 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/14894.feature

(limited to 'synapse')

diff --git a/.ci/scripts/test_export_data_command.sh b/.ci/scripts/test_export_data_command.sh
index 9f6c49acff..36f836345c 100755
--- a/.ci/scripts/test_export_data_command.sh
+++ b/.ci/scripts/test_export_data_command.sh
@@ -23,8 +23,9 @@ poetry run python -m synapse.app.admin_cmd -c .ci/sqlite-config.yaml  export-dat
 --output-directory /tmp/export_data
 
 # Test that the output directory exists and contains the rooms directory
-dir="/tmp/export_data/rooms"
-if [ -d "$dir" ]; then
+dir_r="/tmp/export_data/rooms"
+dir_u="/tmp/export_data/user_data"
+if [ -d "$dir_r" ] && [ -d "$dir_u" ]; then
   echo "Command successful, this test passes"
 else
   echo "No output directories found, the command fails against a sqlite database."
@@ -43,8 +44,9 @@ poetry run python -m synapse.app.admin_cmd -c .ci/postgres-config.yaml  export-d
 --output-directory /tmp/export_data2
 
 # Test that the output directory exists and contains the rooms directory
-dir2="/tmp/export_data2/rooms"
-if [ -d "$dir2" ]; then
+dir_r2="/tmp/export_data2/rooms"
+dir_u2="/tmp/export_data2/user_data"
+if [ -d "$dir_r2" ] && [ -d "$dir_u2" ]; then
   echo "Command successful, this test passes"
 else
   echo "No output directories found, the command fails against a postgres database."
diff --git a/changelog.d/14894.feature b/changelog.d/14894.feature
new file mode 100644
index 0000000000..d22741d079
--- /dev/null
+++ b/changelog.d/14894.feature
@@ -0,0 +1 @@
+Adds profile information, devices and connections to the user data export via command line.
\ No newline at end of file
diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md
index 18ce6171db..7a27741199 100644
--- a/docs/usage/administration/admin_faq.md
+++ b/docs/usage/administration/admin_faq.md
@@ -2,13 +2,19 @@
 
 How do I become a server admin?
 ---
-If your server already has an admin account you should use the [User Admin API](../../admin_api/user_admin_api.md#change-whether-a-user-is-a-server-administrator-or-not) to promote other accounts to become admins.
+If your server already has an admin account you should use the
+[User Admin API](../../admin_api/user_admin_api.md#change-whether-a-user-is-a-server-administrator-or-not)
+to promote other accounts to become admins.
 
-If you don't have any admin accounts yet you won't be able to use the admin API, so you'll have to edit the database manually. Manually editing the database is generally not recommended so once you have an admin account: use the admin APIs to make further changes.
+If you don't have any admin accounts yet you won't be able to use the admin API,
+so you'll have to edit the database manually. Manually editing the database is
+generally not recommended so once you have an admin account: use the admin APIs
+to make further changes.
 
 ```sql
 UPDATE users SET admin = 1 WHERE name = '@foo:bar.com';
 ```
+
 What servers are my server talking to?
 ---
 Run this sql query on your db:
@@ -36,8 +42,38 @@ How can I export user data?
 ---
 Synapse includes a Python command to export data for a specific user. It takes the homeserver
 configuration file and the full Matrix ID of the user to export:
+
 ```console
-python -m synapse.app.admin_cmd -c <config_file> export-data <user_id>
+python -m synapse.app.admin_cmd -c <config_file> export-data <user_id> --output-directory <directory_path>
+```
+
+If you uses [Poetry](../../development/dependencies.md#managing-dependencies-with-poetry)
+to run Synapse:
+
+```console
+poetry run python -m synapse.app.admin_cmd -c <config_file> export-data <user_id> --output-directory <directory_path>
+```
+
+The directory to store the export data in can be customised with the
+`--output-directory` parameter; ensure that the provided directory is
+empty. If this parameter is not provided, Synapse defaults to creating
+a temporary directory (which starts with "synapse-exfiltrate") in `/tmp`,
+`/var/tmp`, or `/usr/tmp`, in that order.
+
+The exported data has the following layout:
+
+```
+output-directory
+├───rooms
+│   └───<room_id>
+│       ├───events
+│       ├───state
+│       ├───invite_state
+│       └───knock_state
+└───user_data
+    ├───connections
+    ├───devices
+    └───profile
 ```
 
 Manually resetting passwords
@@ -50,21 +86,29 @@ I have a problem with my server. Can I just delete my database and start again?
 ---
 Deleting your database is unlikely to make anything better. 
 
-It's easy to make the mistake of thinking that you can start again from a clean slate by dropping your database, but things don't work like that in a federated network: lots of other servers have information about your server.
+It's easy to make the mistake of thinking that you can start again from a clean
+slate by dropping your database, but things don't work like that in a federated
+network: lots of other servers have information about your server.
 
-For example: other servers might think that you are in a room, your server will think that you are not, and you'll probably be unable to interact with that room in a sensible way ever again.
+For example: other servers might think that you are in a room, your server will
+think that you are not, and you'll probably be unable to interact with that room
+in a sensible way ever again.
 
-In general, there are better solutions to any problem than dropping the database. Come and seek help in https://matrix.to/#/#synapse:matrix.org.
+In general, there are better solutions to any problem than dropping the database.
+Come and seek help in https://matrix.to/#/#synapse:matrix.org.
 
 There are two exceptions when it might be sensible to delete your database and start again:
-* You have *never* joined any rooms which are federated with other servers. For instance, a local deployment which the outside world can't talk to. 
-* You are changing the `server_name` in the homeserver configuration. In effect this makes your server a completely new one from the point of view of the network, so in this case it makes sense to start with a clean database.
+* You have *never* joined any rooms which are federated with other servers. For
+instance, a local deployment which the outside world can't talk to. 
+* You are changing the `server_name` in the homeserver configuration. In effect
+this makes your server a completely new one from the point of view of the network,
+so in this case it makes sense to start with a clean database.
 (In both cases you probably also want to clear out the media_store.)
 
 I've stuffed up access to my room, how can I delete it to free up the alias?
 ---
 Using the following curl command:
-```
+```console
 curl -H 'Authorization: Bearer <access-token>' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/<room-alias>
 ```
 `<access-token>` - can be obtained in riot by looking in the riot settings, down the bottom is:
@@ -75,19 +119,25 @@ Access Token:\<click to reveal\>
 How can I find the lines corresponding to a given HTTP request in my homeserver log?
 ---
 
-Synapse tags each log line according to the HTTP request it is processing. When it finishes processing each request, it logs a line containing the words `Processed request: `. For example:
+Synapse tags each log line according to the HTTP request it is processing. When
+it finishes processing each request, it logs a line containing the words
+`Processed request: `. For example:
 
 ```
 2019-02-14 22:35:08,196 - synapse.access.http.8008 - 302 - INFO - GET-37 - ::1 - 8008 - {@richvdh:localhost} Processed request: 0.173sec/0.001sec (0.002sec, 0.000sec) (0.027sec/0.026sec/2) 687B 200 "GET /_matrix/client/r0/sync HTTP/1.1" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" [0 dbevts]"
 ```
 
-Here we can see that the request has been tagged with `GET-37`. (The tag depends on the method of the HTTP request, so might start with `GET-`, `PUT-`, `POST-`, `OPTIONS-` or `DELETE-`.) So to find all lines corresponding to this request, we can do:
+Here we can see that the request has been tagged with `GET-37`. (The tag depends
+on the method of the HTTP request, so might start with `GET-`, `PUT-`, `POST-`,
+`OPTIONS-` or `DELETE-`.) So to find all lines corresponding to this request, we can do:
 
-```
+```console
 grep 'GET-37' homeserver.log
 ```
 
-If you want to paste that output into a github issue or matrix room, please remember to surround it with triple-backticks (```) to make it legible (see [quoting code](https://help.github.com/en/articles/basic-writing-and-formatting-syntax#quoting-code)).
+If you want to paste that output into a github issue or matrix room, please
+remember to surround it with triple-backticks (```) to make it legible
+(see [quoting code](https://help.github.com/en/articles/basic-writing-and-formatting-syntax#quoting-code)).
 
 
 What do all those fields in the 'Processed' line mean?
@@ -127,7 +177,7 @@ This is normally caused by a misconfiguration in your reverse-proxy. See [the re
 
 
 Help!! Synapse is slow and eats all my RAM/CPU!
------------------------------------------------
+---
 
 First, ensure you are running the latest version of Synapse, using Python 3
 with a [PostgreSQL database](../../postgres.md).
@@ -169,7 +219,7 @@ in the Synapse config file: [see here](../configuration/config_documentation.md#
 
 
 Running out of File Handles
----------------------------
+---
 
 If Synapse runs out of file handles, it typically fails badly - live-locking
 at 100% CPU, and/or failing to accept new TCP connections (blocking the
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 165d1c5db0..fe7afb9475 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -35,6 +35,7 @@ from synapse.storage.databases.main.appservice import (
     ApplicationServiceTransactionWorkerStore,
     ApplicationServiceWorkerStore,
 )
+from synapse.storage.databases.main.client_ips import ClientIpWorkerStore
 from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore
 from synapse.storage.databases.main.devices import DeviceWorkerStore
 from synapse.storage.databases.main.event_federation import EventFederationWorkerStore
@@ -43,6 +44,7 @@ from synapse.storage.databases.main.event_push_actions import (
 )
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.filtering import FilteringWorkerStore
+from synapse.storage.databases.main.profile import ProfileWorkerStore
 from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.registration import RegistrationWorkerStore
@@ -54,7 +56,7 @@ from synapse.storage.databases.main.state import StateGroupWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
 from synapse.storage.databases.main.tags import TagsWorkerStore
 from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
-from synapse.types import StateMap
+from synapse.types import JsonDict, StateMap
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.logcontext import LoggingContext
 
@@ -63,6 +65,7 @@ logger = logging.getLogger("synapse.app.admin_cmd")
 
 class AdminCmdSlavedStore(
     FilteringWorkerStore,
+    ClientIpWorkerStore,
     DeviceWorkerStore,
     TagsWorkerStore,
     DeviceInboxWorkerStore,
@@ -82,6 +85,7 @@ class AdminCmdSlavedStore(
     EventsWorkerStore,
     RegistrationWorkerStore,
     RoomWorkerStore,
+    ProfileWorkerStore,
 ):
     def __init__(
         self,
@@ -192,6 +196,32 @@ class FileExfiltrationWriter(ExfiltrationWriter):
             for event in state.values():
                 print(json.dumps(event), file=f)
 
+    def write_profile(self, profile: JsonDict) -> None:
+        user_directory = os.path.join(self.base_directory, "user_data")
+        os.makedirs(user_directory, exist_ok=True)
+        profile_file = os.path.join(user_directory, "profile")
+
+        with open(profile_file, "a") as f:
+            print(json.dumps(profile), file=f)
+
+    def write_devices(self, devices: List[JsonDict]) -> None:
+        user_directory = os.path.join(self.base_directory, "user_data")
+        os.makedirs(user_directory, exist_ok=True)
+        device_file = os.path.join(user_directory, "devices")
+
+        for device in devices:
+            with open(device_file, "a") as f:
+                print(json.dumps(device), file=f)
+
+    def write_connections(self, connections: List[JsonDict]) -> None:
+        user_directory = os.path.join(self.base_directory, "user_data")
+        os.makedirs(user_directory, exist_ok=True)
+        connection_file = os.path.join(user_directory, "connections")
+
+        for connection in connections:
+            with open(connection_file, "a") as f:
+                print(json.dumps(connection), file=f)
+
     def finished(self) -> str:
         return self.base_directory
 
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index c81ea34758..b03c214b14 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -30,6 +30,7 @@ logger = logging.getLogger(__name__)
 class AdminHandler:
     def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastores().main
+        self._device_handler = hs.get_device_handler()
         self._storage_controllers = hs.get_storage_controllers()
         self._state_storage_controller = self._storage_controllers.state
         self._msc3866_enabled = hs.config.experimental.msc3866.enabled
@@ -247,6 +248,21 @@ class AdminHandler:
                 )
                 writer.write_state(room_id, event_id, state)
 
+        # Get the user profile
+        profile = await self.get_user(UserID.from_string(user_id))
+        if profile is not None:
+            writer.write_profile(profile)
+
+        # Get all devices the user has
+        devices = await self._device_handler.get_devices_by_user(user_id)
+        writer.write_devices(devices)
+
+        # Get all connections the user has
+        connections = await self.get_whois(UserID.from_string(user_id))
+        writer.write_connections(
+            connections["devices"][""]["sessions"][0]["connections"]
+        )
+
         return writer.finished()
 
 
@@ -297,6 +313,33 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
         """
         raise NotImplementedError()
 
+    @abc.abstractmethod
+    def write_profile(self, profile: JsonDict) -> None:
+        """Write the profile of a user.
+
+        Args:
+            profile: The user profile.
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def write_devices(self, devices: List[JsonDict]) -> None:
+        """Write the devices of a user.
+
+        Args:
+            devices: The list of devices.
+        """
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def write_connections(self, connections: List[JsonDict]) -> None:
+        """Write the connections of a user.
+
+        Args:
+            connections: The list of connections / sessions.
+        """
+        raise NotImplementedError()
+
     @abc.abstractmethod
     def finished(self) -> Any:
         """Called when all data has successfully been exported and written.
diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py
index c1579dac61..6f300b8e11 100644
--- a/tests/handlers/test_admin.py
+++ b/tests/handlers/test_admin.py
@@ -38,6 +38,7 @@ class ExfiltrateData(unittest.HomeserverTestCase):
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.admin_handler = hs.get_admin_handler()
+        self._store = hs.get_datastores().main
 
         self.user1 = self.register_user("user1", "password")
         self.token1 = self.login("user1", "password")
@@ -236,3 +237,62 @@ class ExfiltrateData(unittest.HomeserverTestCase):
         self.assertEqual(args[0], room_id)
         self.assertEqual(args[1].content["membership"], "knock")
         self.assertTrue(args[2])  # Assert there is at least one bit of state
+
+    def test_profile(self) -> None:
+        """Tests that user profile get exported."""
+        writer = Mock()
+
+        self.get_success(self.admin_handler.export_user_data(self.user2, writer))
+
+        writer.write_events.assert_not_called()
+        writer.write_profile.assert_called_once()
+
+        # check only a few values, not all available
+        args = writer.write_profile.call_args[0]
+        self.assertEqual(args[0]["name"], self.user2)
+        self.assertIn("displayname", args[0])
+        self.assertIn("avatar_url", args[0])
+        self.assertIn("threepids", args[0])
+        self.assertIn("external_ids", args[0])
+        self.assertIn("creation_ts", args[0])
+
+    def test_devices(self) -> None:
+        """Tests that user devices get exported."""
+        writer = Mock()
+
+        self.get_success(self.admin_handler.export_user_data(self.user2, writer))
+
+        writer.write_events.assert_not_called()
+        writer.write_devices.assert_called_once()
+
+        args = writer.write_devices.call_args[0]
+        self.assertEqual(len(args[0]), 1)
+        self.assertEqual(args[0][0]["user_id"], self.user2)
+        self.assertIn("device_id", args[0][0])
+        self.assertIsNone(args[0][0]["display_name"])
+        self.assertIsNone(args[0][0]["last_seen_user_agent"])
+        self.assertIsNone(args[0][0]["last_seen_ts"])
+        self.assertIsNone(args[0][0]["last_seen_ip"])
+
+    def test_connections(self) -> None:
+        """Tests that user sessions / connections get exported."""
+        # Insert a user IP
+        self.get_success(
+            self._store.insert_client_ip(
+                self.user2, "access_token", "ip", "user_agent", "MY_DEVICE"
+            )
+        )
+
+        writer = Mock()
+
+        self.get_success(self.admin_handler.export_user_data(self.user2, writer))
+
+        writer.write_events.assert_not_called()
+        writer.write_connections.assert_called_once()
+
+        args = writer.write_connections.call_args[0]
+        self.assertEqual(len(args[0]), 1)
+        self.assertEqual(args[0][0]["ip"], "ip")
+        self.assertEqual(args[0][0]["user_agent"], "user_agent")
+        self.assertGreater(args[0][0]["last_seen"], 0)
+        self.assertNotIn("access_token", args[0][0])
-- 
cgit 1.5.1


From 230a831c734246aa4db7bd842947c7ea277ca126 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 1 Feb 2023 15:45:10 -0500
Subject: Attempt to delete more duplicate rows in receipts_linearized table.
 (#14915)

The previous assumption was that the stream_id column was unique
(for a room ID, receipt type, user ID tuple), but this turned out to be
incorrect.

Now find the max stream ID, then map this back to a database-specific
row identifier and delete other rows which match the (room ID, receipt type,
user ID) tuple, but *not* the row ID.
---
 changelog.d/14915.bugfix                      |  1 +
 synapse/storage/databases/main/receipts.py    | 34 ++++++++++++++++++++-------
 tests/storage/databases/main/test_receipts.py |  4 +++-
 3 files changed, 30 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/14915.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14915.bugfix b/changelog.d/14915.bugfix
new file mode 100644
index 0000000000..4969e5450c
--- /dev/null
+++ b/changelog.d/14915.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0 where the background updates to add non-thread unique indexes on receipts could fail when upgrading from 1.67.0 or earlier.
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 3468f354e6..29972d5204 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -941,10 +941,14 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
         receipts."""
 
         def _remote_duplicate_receipts_txn(txn: LoggingTransaction) -> None:
+            if isinstance(self.database_engine, PostgresEngine):
+                ROW_ID_NAME = "ctid"
+            else:
+                ROW_ID_NAME = "rowid"
+
             # Identify any duplicate receipts arising from
             # https://github.com/matrix-org/synapse/issues/14406.
-            # We expect the following query to use the per-thread receipt index and take
-            # less than a minute.
+            # The following query takes less than a minute on matrix.org.
             sql = """
                 SELECT MAX(stream_id), room_id, receipt_type, user_id
                 FROM receipts_linearized
@@ -956,19 +960,33 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
             duplicate_keys = cast(List[Tuple[int, str, str, str]], list(txn))
 
             # Then remove duplicate receipts, keeping the one with the highest
-            # `stream_id`. There should only be a single receipt with any given
-            # `stream_id`.
-            for max_stream_id, room_id, receipt_type, user_id in duplicate_keys:
-                sql = """
+            # `stream_id`. Since there might be duplicate rows with the same
+            # `stream_id`, we delete by the ctid instead.
+            for stream_id, room_id, receipt_type, user_id in duplicate_keys:
+                sql = f"""
+                SELECT {ROW_ID_NAME}
+                FROM receipts_linearized
+                WHERE
+                    room_id = ? AND
+                    receipt_type = ? AND
+                    user_id = ? AND
+                    thread_id IS NULL AND
+                    stream_id = ?
+                LIMIT 1
+                """
+                txn.execute(sql, (room_id, receipt_type, user_id, stream_id))
+                row_id = cast(Tuple[str], txn.fetchone())[0]
+
+                sql = f"""
                     DELETE FROM receipts_linearized
                     WHERE
                         room_id = ? AND
                         receipt_type = ? AND
                         user_id = ? AND
                         thread_id IS NULL AND
-                        stream_id < ?
+                        {ROW_ID_NAME} != ?
                 """
-                txn.execute(sql, (room_id, receipt_type, user_id, max_stream_id))
+                txn.execute(sql, (room_id, receipt_type, user_id, row_id))
 
         await self.db_pool.runInteraction(
             self.RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME,
diff --git a/tests/storage/databases/main/test_receipts.py b/tests/storage/databases/main/test_receipts.py
index 68026e2830..ac77aec003 100644
--- a/tests/storage/databases/main/test_receipts.py
+++ b/tests/storage/databases/main/test_receipts.py
@@ -168,7 +168,9 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase):
                     {"stream_id": 6, "event_id": "$some_event"},
                 ],
                 (self.other_room_id, "m.read", self.user_id): [
-                    {"stream_id": 7, "event_id": "$some_event"}
+                    # It is possible for stream IDs to be duplicated.
+                    {"stream_id": 7, "event_id": "$some_event"},
+                    {"stream_id": 7, "event_id": "$some_event"},
                 ],
             },
             expected_unique_receipts={
-- 
cgit 1.5.1


From 1182ae50635db94d3c9c47990a0befcbf6306b62 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 1 Feb 2023 16:35:24 -0500
Subject: Add helper to parse an enum from query args & use it. (#14956)

The `parse_enum` helper pulls an enum value from the query string
(by delegating down to the parse_string helper with values generated
from the enum).

This is used to pull out "f" and "b" in most places and then we thread
the resulting Direction enum throughout more code.
---
 changelog.d/14956.misc                             |  1 +
 synapse/federation/federation_client.py            | 15 +++--
 synapse/federation/federation_server.py            | 12 +++-
 synapse/federation/transport/client.py             |  8 +--
 synapse/federation/transport/server/federation.py  |  7 ++-
 synapse/handlers/account_data.py                   |  2 +-
 synapse/handlers/receipts.py                       |  2 +-
 synapse/handlers/room.py                           |  9 +--
 synapse/http/servlet.py                            | 70 ++++++++++++++++++++++
 synapse/rest/admin/event_reports.py                | 12 +---
 synapse/rest/admin/federation.py                   |  7 ++-
 synapse/rest/admin/media.py                        | 21 ++++---
 synapse/rest/admin/rooms.py                        | 16 ++---
 synapse/rest/admin/statistics.py                   | 11 +---
 synapse/rest/admin/users.py                        |  5 +-
 synapse/rest/client/relations.py                   |  3 +-
 synapse/rest/client/room.py                        |  5 +-
 synapse/storage/databases/main/__init__.py         |  5 +-
 synapse/storage/databases/main/events_worker.py    | 11 ++--
 synapse/storage/databases/main/media_repository.py |  5 +-
 synapse/storage/databases/main/room.py             |  9 +--
 synapse/storage/databases/main/stats.py            |  6 +-
 synapse/storage/databases/main/transactions.py     | 13 ++--
 synapse/streams/config.py                          | 12 +---
 tests/rest/admin/test_event_reports.py             |  5 +-
 25 files changed, 176 insertions(+), 96 deletions(-)
 create mode 100644 changelog.d/14956.misc

(limited to 'synapse')

diff --git a/changelog.d/14956.misc b/changelog.d/14956.misc
new file mode 100644
index 0000000000..9f5384e60e
--- /dev/null
+++ b/changelog.d/14956.misc
@@ -0,0 +1 @@
+Add missing type hints.
\ No newline at end of file
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index feb32e40e5..8493ffc2e5 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -37,7 +37,7 @@ from typing import (
 import attr
 from prometheus_client import Counter
 
-from synapse.api.constants import EventContentFields, EventTypes, Membership
+from synapse.api.constants import Direction, EventContentFields, EventTypes, Membership
 from synapse.api.errors import (
     CodeMessageException,
     Codes,
@@ -1680,7 +1680,12 @@ class FederationClient(FederationBase):
         return result
 
     async def timestamp_to_event(
-        self, *, destinations: List[str], room_id: str, timestamp: int, direction: str
+        self,
+        *,
+        destinations: List[str],
+        room_id: str,
+        timestamp: int,
+        direction: Direction,
     ) -> Optional["TimestampToEventResponse"]:
         """
         Calls each remote federating server from `destinations` asking for their closest
@@ -1693,7 +1698,7 @@ class FederationClient(FederationBase):
             room_id: Room to fetch the event from
             timestamp: The point in time (inclusive) we should navigate from in
                 the given direction to find the closest event.
-            direction: ["f"|"b"] to indicate whether we should navigate forward
+            direction: indicates whether we should navigate forward
                 or backward from the given timestamp to find the closest event.
 
         Returns:
@@ -1738,7 +1743,7 @@ class FederationClient(FederationBase):
             return None
 
     async def _timestamp_to_event_from_destination(
-        self, destination: str, room_id: str, timestamp: int, direction: str
+        self, destination: str, room_id: str, timestamp: int, direction: Direction
     ) -> "TimestampToEventResponse":
         """
         Calls a remote federating server at `destination` asking for their
@@ -1751,7 +1756,7 @@ class FederationClient(FederationBase):
             room_id: Room to fetch the event from
             timestamp: The point in time (inclusive) we should navigate from in
                 the given direction to find the closest event.
-            direction: ["f"|"b"] to indicate whether we should navigate forward
+            direction: indicates whether we should navigate forward
                 or backward from the given timestamp to find the closest event.
 
         Returns:
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index c9a6dfd1a4..8d36172484 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -34,7 +34,13 @@ from prometheus_client import Counter, Gauge, Histogram
 from twisted.internet.abstract import isIPAddress
 from twisted.python import failure
 
-from synapse.api.constants import EduTypes, EventContentFields, EventTypes, Membership
+from synapse.api.constants import (
+    Direction,
+    EduTypes,
+    EventContentFields,
+    EventTypes,
+    Membership,
+)
 from synapse.api.errors import (
     AuthError,
     Codes,
@@ -218,7 +224,7 @@ class FederationServer(FederationBase):
         return 200, res
 
     async def on_timestamp_to_event_request(
-        self, origin: str, room_id: str, timestamp: int, direction: str
+        self, origin: str, room_id: str, timestamp: int, direction: Direction
     ) -> Tuple[int, Dict[str, Any]]:
         """When we receive a federated `/timestamp_to_event` request,
         handle all of the logic for validating and fetching the event.
@@ -228,7 +234,7 @@ class FederationServer(FederationBase):
             room_id: Room to fetch the event from
             timestamp: The point in time (inclusive) we should navigate from in
                 the given direction to find the closest event.
-            direction: ["f"|"b"] to indicate whether we should navigate forward
+            direction: indicates whether we should navigate forward
                 or backward from the given timestamp to find the closest event.
 
         Returns:
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 682666ab36..c05d598b70 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -32,7 +32,7 @@ from typing import (
 import attr
 import ijson
 
-from synapse.api.constants import Membership
+from synapse.api.constants import Direction, Membership
 from synapse.api.errors import Codes, HttpResponseException, SynapseError
 from synapse.api.room_versions import RoomVersion
 from synapse.api.urls import (
@@ -169,7 +169,7 @@ class TransportLayerClient:
         )
 
     async def timestamp_to_event(
-        self, destination: str, room_id: str, timestamp: int, direction: str
+        self, destination: str, room_id: str, timestamp: int, direction: Direction
     ) -> Union[JsonDict, List]:
         """
         Calls a remote federating server at `destination` asking for their
@@ -180,7 +180,7 @@ class TransportLayerClient:
             room_id: Room to fetch the event from
             timestamp: The point in time (inclusive) we should navigate from in
                 the given direction to find the closest event.
-            direction: ["f"|"b"] to indicate whether we should navigate forward
+            direction: indicates whether we should navigate forward
                 or backward from the given timestamp to find the closest event.
 
         Returns:
@@ -194,7 +194,7 @@ class TransportLayerClient:
             room_id,
         )
 
-        args = {"ts": [str(timestamp)], "dir": [direction]}
+        args = {"ts": [str(timestamp)], "dir": [direction.value]}
 
         remote_response = await self.client.get_json(
             destination, path=path, args=args, try_trailing_slash_on_400=True
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 17c427387e..f7ca87adc4 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -26,7 +26,7 @@ from typing import (
 
 from typing_extensions import Literal
 
-from synapse.api.constants import EduTypes
+from synapse.api.constants import Direction, EduTypes
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersions
 from synapse.api.urls import FEDERATION_UNSTABLE_PREFIX, FEDERATION_V2_PREFIX
@@ -234,9 +234,10 @@ class FederationTimestampLookupServlet(BaseFederationServerServlet):
         room_id: str,
     ) -> Tuple[int, JsonDict]:
         timestamp = parse_integer_from_args(query, "ts", required=True)
-        direction = parse_string_from_args(
-            query, "dir", default="f", allowed_values=["f", "b"], required=True
+        direction_str = parse_string_from_args(
+            query, "dir", allowed_values=["f", "b"], required=True
         )
+        direction = Direction(direction_str)
 
         return await self.handler.on_timestamp_to_event_request(
             origin, room_id, timestamp, direction
diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py
index d500b21809..67e789eef7 100644
--- a/synapse/handlers/account_data.py
+++ b/synapse/handlers/account_data.py
@@ -314,7 +314,7 @@ class AccountDataEventSource(EventSource[int, JsonDict]):
     def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastores().main
 
-    def get_current_key(self, direction: str = "f") -> int:
+    def get_current_key(self) -> int:
         return self.store.get_max_account_data_stream_id()
 
     async def get_new_events(
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index 6a4fed1156..04c61ae3dd 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -315,5 +315,5 @@ class ReceiptEventSource(EventSource[int, JsonDict]):
 
         return events, to_key
 
-    def get_current_key(self, direction: str = "f") -> int:
+    def get_current_key(self) -> int:
         return self.store.get_max_receipt_stream_id()
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 60a6d9cf3c..7ba7c4ff07 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -27,6 +27,7 @@ from typing_extensions import TypedDict
 
 import synapse.events.snapshot
 from synapse.api.constants import (
+    Direction,
     EventContentFields,
     EventTypes,
     GuestAccess,
@@ -1487,7 +1488,7 @@ class TimestampLookupHandler:
         requester: Requester,
         room_id: str,
         timestamp: int,
-        direction: str,
+        direction: Direction,
     ) -> Tuple[str, int]:
         """Find the closest event to the given timestamp in the given direction.
         If we can't find an event locally or the event we have locally is next to a gap,
@@ -1498,7 +1499,7 @@ class TimestampLookupHandler:
             room_id: Room to fetch the event from
             timestamp: The point in time (inclusive) we should navigate from in
                 the given direction to find the closest event.
-            direction: ["f"|"b"] to indicate whether we should navigate forward
+            direction: indicates whether we should navigate forward
                 or backward from the given timestamp to find the closest event.
 
         Returns:
@@ -1533,13 +1534,13 @@ class TimestampLookupHandler:
                 local_event_id, allow_none=False, allow_rejected=False
             )
 
-            if direction == "f":
+            if direction == Direction.FORWARDS:
                 # We only need to check for a backward gap if we're looking forwards
                 # to ensure there is nothing in between.
                 is_event_next_to_backward_gap = (
                     await self.store.is_event_next_to_backward_gap(local_event)
                 )
-            elif direction == "b":
+            elif direction == Direction.BACKWARDS:
                 # We only need to check for a forward gap if we're looking backwards
                 # to ensure there is nothing in between
                 is_event_next_to_forward_gap = (
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index dead02cd5c..0070bd2940 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 """ This module contains base REST classes for constructing REST servlets. """
+import enum
 import logging
 from http import HTTPStatus
 from typing import (
@@ -362,6 +363,7 @@ def parse_string(
     request: Request,
     name: str,
     *,
+    default: Optional[str] = None,
     required: bool = False,
     allowed_values: Optional[Iterable[str]] = None,
     encoding: str = "ascii",
@@ -413,6 +415,74 @@ def parse_string(
     )
 
 
+EnumT = TypeVar("EnumT", bound=enum.Enum)
+
+
+@overload
+def parse_enum(
+    request: Request,
+    name: str,
+    E: Type[EnumT],
+    default: EnumT,
+) -> EnumT:
+    ...
+
+
+@overload
+def parse_enum(
+    request: Request,
+    name: str,
+    E: Type[EnumT],
+    *,
+    required: Literal[True],
+) -> EnumT:
+    ...
+
+
+def parse_enum(
+    request: Request,
+    name: str,
+    E: Type[EnumT],
+    default: Optional[EnumT] = None,
+    required: bool = False,
+) -> Optional[EnumT]:
+    """
+    Parse an enum parameter from the request query string.
+
+    Note that the enum *must only have string values*.
+
+    Args:
+        request: the twisted HTTP request.
+        name: the name of the query parameter.
+        E: the enum which represents valid values
+        default: enum value to use if the parameter is absent, defaults to None.
+        required: whether to raise a 400 SynapseError if the
+            parameter is absent, defaults to False.
+
+    Returns:
+        An enum value.
+
+    Raises:
+        SynapseError if the parameter is absent and required, or if the
+            parameter is present, must be one of a list of allowed values and
+            is not one of those allowed values.
+    """
+    # Assert the enum values are strings.
+    assert all(
+        isinstance(e.value, str) for e in E
+    ), "parse_enum only works with string values"
+    str_value = parse_string(
+        request,
+        name,
+        default=default.value if default is not None else None,
+        required=required,
+        allowed_values=[e.value for e in E],
+    )
+    if str_value is None:
+        return None
+    return E(str_value)
+
+
 def _parse_string_value(
     value: bytes,
     allowed_values: Optional[Iterable[str]],
diff --git a/synapse/rest/admin/event_reports.py b/synapse/rest/admin/event_reports.py
index 6d634eef70..a3beb74e2c 100644
--- a/synapse/rest/admin/event_reports.py
+++ b/synapse/rest/admin/event_reports.py
@@ -16,8 +16,9 @@ import logging
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
+from synapse.api.constants import Direction
 from synapse.api.errors import Codes, NotFoundError, SynapseError
-from synapse.http.servlet import RestServlet, parse_integer, parse_string
+from synapse.http.servlet import RestServlet, parse_enum, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
 from synapse.types import JsonDict
@@ -60,7 +61,7 @@ class EventReportsRestServlet(RestServlet):
 
         start = parse_integer(request, "from", default=0)
         limit = parse_integer(request, "limit", default=100)
-        direction = parse_string(request, "dir", default="b")
+        direction = parse_enum(request, "dir", Direction, Direction.BACKWARDS)
         user_id = parse_string(request, "user_id")
         room_id = parse_string(request, "room_id")
 
@@ -78,13 +79,6 @@ class EventReportsRestServlet(RestServlet):
                 errcode=Codes.INVALID_PARAM,
             )
 
-        if direction not in ("f", "b"):
-            raise SynapseError(
-                HTTPStatus.BAD_REQUEST,
-                "Unknown direction: %s" % (direction,),
-                errcode=Codes.INVALID_PARAM,
-            )
-
         event_reports, total = await self.store.get_event_reports_paginate(
             start, limit, direction, user_id, room_id
         )
diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py
index 023ed92144..e0ee55bd0e 100644
--- a/synapse/rest/admin/federation.py
+++ b/synapse/rest/admin/federation.py
@@ -15,9 +15,10 @@ import logging
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
+from synapse.api.constants import Direction
 from synapse.api.errors import Codes, NotFoundError, SynapseError
 from synapse.federation.transport.server import Authenticator
-from synapse.http.servlet import RestServlet, parse_integer, parse_string
+from synapse.http.servlet import RestServlet, parse_enum, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
 from synapse.storage.databases.main.transactions import DestinationSortOrder
@@ -79,7 +80,7 @@ class ListDestinationsRestServlet(RestServlet):
             allowed_values=[dest.value for dest in DestinationSortOrder],
         )
 
-        direction = parse_string(request, "dir", default="f", allowed_values=("f", "b"))
+        direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
 
         destinations, total = await self._store.get_destinations_paginate(
             start, limit, destination, order_by, direction
@@ -192,7 +193,7 @@ class DestinationMembershipRestServlet(RestServlet):
                 errcode=Codes.INVALID_PARAM,
             )
 
-        direction = parse_string(request, "dir", default="f", allowed_values=("f", "b"))
+        direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
 
         rooms, total = await self._store.get_destination_rooms_paginate(
             destination, start, limit, direction
diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py
index 73470f09ae..0d072c42a7 100644
--- a/synapse/rest/admin/media.py
+++ b/synapse/rest/admin/media.py
@@ -17,9 +17,16 @@ import logging
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
+from synapse.api.constants import Direction
 from synapse.api.errors import Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
-from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string
+from synapse.http.servlet import (
+    RestServlet,
+    parse_boolean,
+    parse_enum,
+    parse_integer,
+    parse_string,
+)
 from synapse.http.site import SynapseRequest
 from synapse.rest.admin._base import (
     admin_patterns,
@@ -389,7 +396,7 @@ class UserMediaRestServlet(RestServlet):
         # to newest media is on top for backward compatibility.
         if b"order_by" not in request.args and b"dir" not in request.args:
             order_by = MediaSortOrder.CREATED_TS.value
-            direction = "b"
+            direction = Direction.BACKWARDS
         else:
             order_by = parse_string(
                 request,
@@ -397,8 +404,8 @@ class UserMediaRestServlet(RestServlet):
                 default=MediaSortOrder.CREATED_TS.value,
                 allowed_values=[sort_order.value for sort_order in MediaSortOrder],
             )
-            direction = parse_string(
-                request, "dir", default="f", allowed_values=("f", "b")
+            direction = parse_enum(
+                request, "dir", Direction, default=Direction.FORWARDS
             )
 
         media, total = await self.store.get_local_media_by_user_paginate(
@@ -447,7 +454,7 @@ class UserMediaRestServlet(RestServlet):
         # to newest media is on top for backward compatibility.
         if b"order_by" not in request.args and b"dir" not in request.args:
             order_by = MediaSortOrder.CREATED_TS.value
-            direction = "b"
+            direction = Direction.BACKWARDS
         else:
             order_by = parse_string(
                 request,
@@ -455,8 +462,8 @@ class UserMediaRestServlet(RestServlet):
                 default=MediaSortOrder.CREATED_TS.value,
                 allowed_values=[sort_order.value for sort_order in MediaSortOrder],
             )
-            direction = parse_string(
-                request, "dir", default="f", allowed_values=("f", "b")
+            direction = parse_enum(
+                request, "dir", Direction, default=Direction.FORWARDS
             )
 
         media, _ = await self.store.get_local_media_by_user_paginate(
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index e957aa28ca..1d6e4982d7 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -16,13 +16,14 @@ from http import HTTPStatus
 from typing import TYPE_CHECKING, List, Optional, Tuple, cast
 from urllib import parse as urlparse
 
-from synapse.api.constants import EventTypes, JoinRules, Membership
+from synapse.api.constants import Direction, EventTypes, JoinRules, Membership
 from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.api.filtering import Filter
 from synapse.http.servlet import (
     ResolveRoomIdMixin,
     RestServlet,
     assert_params_in_dict,
+    parse_enum,
     parse_integer,
     parse_json_object_from_request,
     parse_string,
@@ -224,15 +225,8 @@ class ListRoomRestServlet(RestServlet):
                 errcode=Codes.INVALID_PARAM,
             )
 
-        direction = parse_string(request, "dir", default="f")
-        if direction not in ("f", "b"):
-            raise SynapseError(
-                HTTPStatus.BAD_REQUEST,
-                "Unknown direction: %s" % (direction,),
-                errcode=Codes.INVALID_PARAM,
-            )
-
-        reverse_order = True if direction == "b" else False
+        direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
+        reverse_order = True if direction == Direction.BACKWARDS else False
 
         # Return list of rooms according to parameters
         rooms, total_rooms = await self.store.get_rooms_paginate(
@@ -949,7 +943,7 @@ class RoomTimestampToEventRestServlet(RestServlet):
         await assert_user_is_admin(self._auth, requester)
 
         timestamp = parse_integer(request, "ts", required=True)
-        direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"])
+        direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
 
         (
             event_id,
diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py
index 3b142b8402..9c45f4650d 100644
--- a/synapse/rest/admin/statistics.py
+++ b/synapse/rest/admin/statistics.py
@@ -16,8 +16,9 @@ import logging
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
+from synapse.api.constants import Direction
 from synapse.api.errors import Codes, SynapseError
-from synapse.http.servlet import RestServlet, parse_integer, parse_string
+from synapse.http.servlet import RestServlet, parse_enum, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
 from synapse.storage.databases.main.stats import UserSortOrder
@@ -102,13 +103,7 @@ class UserMediaStatisticsRestServlet(RestServlet):
                 errcode=Codes.INVALID_PARAM,
             )
 
-        direction = parse_string(request, "dir", default="f")
-        if direction not in ("f", "b"):
-            raise SynapseError(
-                HTTPStatus.BAD_REQUEST,
-                "Unknown direction: %s" % (direction,),
-                errcode=Codes.INVALID_PARAM,
-            )
+        direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
 
         users_media, total = await self.store.get_users_media_usage_paginate(
             start, limit, from_ts, until_ts, order_by, direction, search_term
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 0841b89c1a..b9dca8ef3a 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -18,12 +18,13 @@ import secrets
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
-from synapse.api.constants import UserTypes
+from synapse.api.constants import Direction, UserTypes
 from synapse.api.errors import Codes, NotFoundError, SynapseError
 from synapse.http.servlet import (
     RestServlet,
     assert_params_in_dict,
     parse_boolean,
+    parse_enum,
     parse_integer,
     parse_json_object_from_request,
     parse_string,
@@ -120,7 +121,7 @@ class UsersRestServletV2(RestServlet):
             ),
         )
 
-        direction = parse_string(request, "dir", default="f", allowed_values=("f", "b"))
+        direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
 
         users, total = await self.store.get_users_paginate(
             start,
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index 9dd59196d9..7456d6f507 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -16,6 +16,7 @@ import logging
 import re
 from typing import TYPE_CHECKING, Optional, Tuple
 
+from synapse.api.constants import Direction
 from synapse.handlers.relations import ThreadsListInclude
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_integer, parse_string
@@ -59,7 +60,7 @@ class RelationPaginationServlet(RestServlet):
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
 
         pagination_config = await PaginationConfig.from_request(
-            self._store, request, default_limit=5, default_dir="b"
+            self._store, request, default_limit=5, default_dir=Direction.BACKWARDS
         )
 
         # The unstable version of this API returns an extra field for client
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 790614d721..d0db85cca7 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -26,7 +26,7 @@ from prometheus_client.core import Histogram
 from twisted.web.server import Request
 
 from synapse import event_auth
-from synapse.api.constants import EventTypes, Membership
+from synapse.api.constants import Direction, EventTypes, Membership
 from synapse.api.errors import (
     AuthError,
     Codes,
@@ -44,6 +44,7 @@ from synapse.http.servlet import (
     RestServlet,
     assert_params_in_dict,
     parse_boolean,
+    parse_enum,
     parse_integer,
     parse_json_object_from_request,
     parse_string,
@@ -1297,7 +1298,7 @@ class TimestampLookupRestServlet(RestServlet):
         await self._auth.check_user_in_room_or_world_readable(room_id, requester)
 
         timestamp = parse_integer(request, "ts", required=True)
-        direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"])
+        direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
 
         (
             event_id,
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 0e47592be3..837dc7646e 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -17,6 +17,7 @@
 import logging
 from typing import TYPE_CHECKING, List, Optional, Tuple, cast
 
+from synapse.api.constants import Direction
 from synapse.config.homeserver import HomeServerConfig
 from synapse.storage.database import (
     DatabasePool,
@@ -167,7 +168,7 @@ class DataStore(
         guests: bool = True,
         deactivated: bool = False,
         order_by: str = UserSortOrder.NAME.value,
-        direction: str = "f",
+        direction: Direction = Direction.FORWARDS,
         approved: bool = True,
     ) -> Tuple[List[JsonDict], int]:
         """Function to retrieve a paginated list of users from
@@ -197,7 +198,7 @@ class DataStore(
             # Set ordering
             order_by_column = UserSortOrder(order_by).value
 
-            if direction == "b":
+            if direction == Direction.BACKWARDS:
                 order = "DESC"
             else:
                 order = "ASC"
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index f42af34a2f..d7d08369ca 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -38,7 +38,7 @@ from typing_extensions import Literal
 
 from twisted.internet import defer
 
-from synapse.api.constants import EventTypes
+from synapse.api.constants import Direction, EventTypes
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.api.room_versions import (
     KNOWN_ROOM_VERSIONS,
@@ -2240,7 +2240,7 @@ class EventsWorkerStore(SQLBaseStore):
         )
 
     async def get_event_id_for_timestamp(
-        self, room_id: str, timestamp: int, direction: str
+        self, room_id: str, timestamp: int, direction: Direction
     ) -> Optional[str]:
         """Find the closest event to the given timestamp in the given direction.
 
@@ -2248,14 +2248,14 @@ class EventsWorkerStore(SQLBaseStore):
             room_id: Room to fetch the event from
             timestamp: The point in time (inclusive) we should navigate from in
                 the given direction to find the closest event.
-            direction: ["f"|"b"] to indicate whether we should navigate forward
+            direction: indicates whether we should navigate forward
                 or backward from the given timestamp to find the closest event.
 
         Returns:
             The closest event_id otherwise None if we can't find any event in
             the given direction.
         """
-        if direction == "b":
+        if direction == Direction.BACKWARDS:
             # Find closest event *before* a given timestamp. We use descending
             # (which gives values largest to smallest) because we want the
             # largest possible timestamp *before* the given timestamp.
@@ -2307,9 +2307,6 @@ class EventsWorkerStore(SQLBaseStore):
 
             return None
 
-        if direction not in ("f", "b"):
-            raise ValueError("Unknown direction: %s" % (direction,))
-
         return await self.db_pool.runInteraction(
             "get_event_id_for_timestamp_txn",
             get_event_id_for_timestamp_txn,
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index 9b172a64d8..b202c5eb87 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -26,6 +26,7 @@ from typing import (
     cast,
 )
 
+from synapse.api.constants import Direction
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -176,7 +177,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
         limit: int,
         user_id: str,
         order_by: str = MediaSortOrder.CREATED_TS.value,
-        direction: str = "f",
+        direction: Direction = Direction.FORWARDS,
     ) -> Tuple[List[Dict[str, Any]], int]:
         """Get a paginated list of metadata for a local piece of media
         which an user_id has uploaded
@@ -199,7 +200,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             # Set ordering
             order_by_column = MediaSortOrder(order_by).value
 
-            if direction == "b":
+            if direction == Direction.BACKWARDS:
                 order = "DESC"
             else:
                 order = "ASC"
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index fbbc018887..4ddb27f686 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -35,6 +35,7 @@ from typing import (
 import attr
 
 from synapse.api.constants import (
+    Direction,
     EventContentFields,
     EventTypes,
     JoinRules,
@@ -2204,7 +2205,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         self,
         start: int,
         limit: int,
-        direction: str = "b",
+        direction: Direction = Direction.BACKWARDS,
         user_id: Optional[str] = None,
         room_id: Optional[str] = None,
     ) -> Tuple[List[Dict[str, Any]], int]:
@@ -2213,8 +2214,8 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         Args:
             start: event offset to begin the query from
             limit: number of rows to retrieve
-            direction: Whether to fetch the most recent first (`"b"`) or the
-                oldest first (`"f"`)
+            direction: Whether to fetch the most recent first (backwards) or the
+                oldest first (forwards)
             user_id: search for user_id. Ignored if user_id is None
             room_id: search for room_id. Ignored if room_id is None
         Returns:
@@ -2236,7 +2237,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
                 filters.append("er.room_id LIKE ?")
                 args.extend(["%" + room_id + "%"])
 
-            if direction == "b":
+            if direction == Direction.BACKWARDS:
                 order = "DESC"
             else:
                 order = "ASC"
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 0c1cbd540d..d7b7d0c3c9 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -22,7 +22,7 @@ from typing_extensions import Counter
 
 from twisted.internet.defer import DeferredLock
 
-from synapse.api.constants import EventContentFields, EventTypes, Membership
+from synapse.api.constants import Direction, EventContentFields, EventTypes, Membership
 from synapse.api.errors import StoreError
 from synapse.storage.database import (
     DatabasePool,
@@ -663,7 +663,7 @@ class StatsStore(StateDeltasStore):
         from_ts: Optional[int] = None,
         until_ts: Optional[int] = None,
         order_by: Optional[str] = UserSortOrder.USER_ID.value,
-        direction: Optional[str] = "f",
+        direction: Direction = Direction.FORWARDS,
         search_term: Optional[str] = None,
     ) -> Tuple[List[JsonDict], int]:
         """Function to retrieve a paginated list of users and their uploaded local media
@@ -714,7 +714,7 @@ class StatsStore(StateDeltasStore):
                     500, "Incorrect value for order_by provided: %s" % order_by
                 )
 
-            if direction == "b":
+            if direction == Direction.BACKWARDS:
                 order = "DESC"
             else:
                 order = "ASC"
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index f8c6877ee8..6b33d809b6 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -19,6 +19,7 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, cast
 import attr
 from canonicaljson import encode_canonical_json
 
+from synapse.api.constants import Direction
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import db_to_json
 from synapse.storage.database import (
@@ -496,7 +497,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
         limit: int,
         destination: Optional[str] = None,
         order_by: str = DestinationSortOrder.DESTINATION.value,
-        direction: str = "f",
+        direction: Direction = Direction.FORWARDS,
     ) -> Tuple[List[JsonDict], int]:
         """Function to retrieve a paginated list of destinations.
         This will return a json list of destinations and the
@@ -518,7 +519,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
         ) -> Tuple[List[JsonDict], int]:
             order_by_column = DestinationSortOrder(order_by).value
 
-            if direction == "b":
+            if direction == Direction.BACKWARDS:
                 order = "DESC"
             else:
                 order = "ASC"
@@ -550,7 +551,11 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
         )
 
     async def get_destination_rooms_paginate(
-        self, destination: str, start: int, limit: int, direction: str = "f"
+        self,
+        destination: str,
+        start: int,
+        limit: int,
+        direction: Direction = Direction.FORWARDS,
     ) -> Tuple[List[JsonDict], int]:
         """Function to retrieve a paginated list of destination's rooms.
         This will return a json list of rooms and the
@@ -569,7 +574,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
             txn: LoggingTransaction,
         ) -> Tuple[List[JsonDict], int]:
 
-            if direction == "b":
+            if direction == Direction.BACKWARDS:
                 order = "DESC"
             else:
                 order = "ASC"
diff --git a/synapse/streams/config.py b/synapse/streams/config.py
index 5cb7875181..a044280410 100644
--- a/synapse/streams/config.py
+++ b/synapse/streams/config.py
@@ -18,7 +18,7 @@ import attr
 
 from synapse.api.constants import Direction
 from synapse.api.errors import SynapseError
-from synapse.http.servlet import parse_integer, parse_string
+from synapse.http.servlet import parse_enum, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.storage.databases.main import DataStore
 from synapse.types import StreamToken
@@ -44,15 +44,9 @@ class PaginationConfig:
         store: "DataStore",
         request: SynapseRequest,
         default_limit: int,
-        default_dir: str = "f",
+        default_dir: Direction = Direction.FORWARDS,
     ) -> "PaginationConfig":
-        direction_str = parse_string(
-            request,
-            "dir",
-            default=default_dir,
-            allowed_values=[Direction.FORWARDS.value, Direction.BACKWARDS.value],
-        )
-        direction = Direction(direction_str)
+        direction = parse_enum(request, "dir", Direction, default=default_dir)
 
         from_tok_str = parse_string(request, "from")
         to_tok_str = parse_string(request, "to")
diff --git a/tests/rest/admin/test_event_reports.py b/tests/rest/admin/test_event_reports.py
index 8a4e5c3f77..233eba3516 100644
--- a/tests/rest/admin/test_event_reports.py
+++ b/tests/rest/admin/test_event_reports.py
@@ -280,7 +280,10 @@ class EventReportsTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(400, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
-        self.assertEqual("Unknown direction: bar", channel.json_body["error"])
+        self.assertEqual(
+            "Query parameter 'dir' must be one of ['b', 'f']",
+            channel.json_body["error"],
+        )
 
     def test_limit_is_negative(self) -> None:
         """
-- 
cgit 1.5.1


From 58214dbb9b8a85c0dafc65162e9c20ee1885ce4e Mon Sep 17 00:00:00 2001
From: realtyem <realtyem@gmail.com>
Date: Wed, 1 Feb 2023 17:42:45 -0600
Subject: Allow enabling the asyncio reactor in complement (#14858)

Signed-off-by: Jason Little realtyem@gmail.com
---
 .github/workflows/tests.yml                    |  5 ++++-
 changelog.d/14858.misc                         |  1 +
 docker/complement/conf/start_for_complement.sh | 13 ++++++++++++-
 docs/development/contributing_guide.md         |  1 +
 scripts-dev/complement.sh                      |  5 +++++
 synapse/app/complement_fork_starter.py         | 21 +++++++++++++++++++--
 6 files changed, 42 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14858.misc

(limited to 'synapse')

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f184727ced..6561b490bc 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -541,8 +541,11 @@ jobs:
 
       - run: |
           set -o pipefail
-          POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | synapse/.ci/scripts/gotestfmt
+          COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | synapse/.ci/scripts/gotestfmt
         shell: bash
+        env:
+          POSTGRES: ${{ (matrix.database == 'Postgres') && 1 || '' }}
+          WORKERS: ${{ (matrix.arrangement == 'workers') && 1 || '' }}
         name: Run Complement Tests
 
   cargo-test:
diff --git a/changelog.d/14858.misc b/changelog.d/14858.misc
new file mode 100644
index 0000000000..c48f40cd38
--- /dev/null
+++ b/changelog.d/14858.misc
@@ -0,0 +1 @@
+Run the integration test suites with the asyncio reactor enabled in CI.
diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh
index 49d79745b0..af13209c54 100755
--- a/docker/complement/conf/start_for_complement.sh
+++ b/docker/complement/conf/start_for_complement.sh
@@ -6,7 +6,7 @@ set -e
 
 echo "Complement Synapse launcher"
 echo "  Args: $@"
-echo "  Env: SYNAPSE_COMPLEMENT_DATABASE=$SYNAPSE_COMPLEMENT_DATABASE SYNAPSE_COMPLEMENT_USE_WORKERS=$SYNAPSE_COMPLEMENT_USE_WORKERS"
+echo "  Env: SYNAPSE_COMPLEMENT_DATABASE=$SYNAPSE_COMPLEMENT_DATABASE SYNAPSE_COMPLEMENT_USE_WORKERS=$SYNAPSE_COMPLEMENT_USE_WORKERS SYNAPSE_COMPLEMENT_USE_ASYNCIO_REACTOR=$SYNAPSE_COMPLEMENT_USE_ASYNCIO_REACTOR"
 
 function log {
     d=$(date +"%Y-%m-%d %H:%M:%S,%3N")
@@ -76,6 +76,17 @@ else
 fi
 
 
+if [[ -n "$SYNAPSE_COMPLEMENT_USE_ASYNCIO_REACTOR" ]]; then
+  if [[ -n "$SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER" ]]; then
+    export SYNAPSE_COMPLEMENT_FORKING_LAUNCHER_ASYNC_IO_REACTOR="1"
+  else
+    export SYNAPSE_ASYNC_IO_REACTOR="1"
+  fi
+else
+  export SYNAPSE_ASYNC_IO_REACTOR="0"
+fi
+
+
 # Add Complement's appservice registration directory, if there is one
 # (It can be absent when there are no application services in this test!)
 if [ -d /complement/appservice ]; then
diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md
index 3cbfe96987..36bc884684 100644
--- a/docs/development/contributing_guide.md
+++ b/docs/development/contributing_guide.md
@@ -332,6 +332,7 @@ The above will run a monolithic (single-process) Synapse with SQLite as the data
     [here](https://github.com/matrix-org/synapse/blob/develop/docker/configure_workers_and_start.py#L54).
     A safe example would be `WORKER_TYPES="federation_inbound, federation_sender, synchrotron"`.
     See the [worker documentation](../workers.md) for additional information on workers.
+- Passing `ASYNCIO_REACTOR=1` as an environment variable to use the Twisted asyncio reactor instead of the default one.
 
 To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`, e.g:
 ```sh
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index e72d96fd16..66aaa3d848 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -228,6 +228,11 @@ else
   test_tags="$test_tags,msc2716"
 fi
 
+if [[ -n "$ASYNCIO_REACTOR" ]]; then
+  # Enable the Twisted asyncio reactor
+  export PASS_SYNAPSE_COMPLEMENT_USE_ASYNCIO_REACTOR=true
+fi
+
 
 if [[ -n "$SYNAPSE_TEST_LOG_LEVEL" ]]; then
   # Set the log level to what is desired
diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py
index 8c0f4a57e7..920538f44d 100644
--- a/synapse/app/complement_fork_starter.py
+++ b/synapse/app/complement_fork_starter.py
@@ -110,6 +110,8 @@ def _worker_entrypoint(
     and then kick off the worker's main() function.
     """
 
+    from synapse.util.stringutils import strtobool
+
     sys.argv = args
 
     # reset the custom signal handlers that we installed, so that the children start
@@ -117,9 +119,24 @@ def _worker_entrypoint(
     for sig, handler in _original_signal_handlers.items():
         signal.signal(sig, handler)
 
-    from twisted.internet.epollreactor import EPollReactor
+    # Install the asyncio reactor if the
+    # SYNAPSE_COMPLEMENT_FORKING_LAUNCHER_ASYNC_IO_REACTOR is set to 1. The
+    # SYNAPSE_ASYNC_IO_REACTOR variable would be used, but then causes
+    # synapse/__init__.py to also try to install an asyncio reactor.
+    if strtobool(
+        os.environ.get("SYNAPSE_COMPLEMENT_FORKING_LAUNCHER_ASYNC_IO_REACTOR", "0")
+    ):
+        import asyncio
+
+        from twisted.internet.asyncioreactor import AsyncioSelectorReactor
+
+        reactor = AsyncioSelectorReactor(asyncio.get_event_loop())
+        proxy_reactor._install_real_reactor(reactor)
+    else:
+        from twisted.internet.epollreactor import EPollReactor
+
+        proxy_reactor._install_real_reactor(EPollReactor())
 
-    proxy_reactor._install_real_reactor(EPollReactor())
     func()
 
 
-- 
cgit 1.5.1


From 2186ebed6c9dfe15cfa8a8a7c97c2a89a907f9a8 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 2 Feb 2023 16:49:14 +0000
Subject: Fetch fewer events when getting hosts in room (#14962)

---
 changelog.d/14962.feature                    |  1 +
 synapse/storage/databases/main/roommember.py | 46 ++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14962.feature

(limited to 'synapse')

diff --git a/changelog.d/14962.feature b/changelog.d/14962.feature
new file mode 100644
index 0000000000..38f26012f2
--- /dev/null
+++ b/changelog.d/14962.feature
@@ -0,0 +1 @@
+Improve performance when joining or sending an event large rooms.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 8e2ba7b7b4..ea6a5e2f34 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from itertools import chain
 from typing import (
     TYPE_CHECKING,
     AbstractSet,
@@ -1131,12 +1132,33 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             else:
                 # The cache doesn't match the state group or prev state group,
                 # so we calculate the result from first principles.
+                #
+                # We need to fetch all hosts joined to the room according to `state` by
+                # inspecting all join memberships in `state`. However, if the `state` is
+                # relatively recent then many of its events are likely to be held in
+                # the current state of the room, which is easily available and likely
+                # cached.
+                #
+                # We therefore compute the set of `state` events not in the
+                # current state and only fetch those.
+                current_memberships = (
+                    await self._get_approximate_current_memberships_in_room(room_id)
+                )
+                unknown_state_events = {}
+                joined_users_in_current_state = []
+
+                for (type, state_key), event_id in state.items():
+                    if event_id not in current_memberships:
+                        unknown_state_events[type, state_key] = event_id
+                    elif current_memberships[event_id] == Membership.JOIN:
+                        joined_users_in_current_state.append(state_key)
+
                 joined_user_ids = await self.get_joined_user_ids_from_state(
-                    room_id, state
+                    room_id, unknown_state_events
                 )
 
                 cache.hosts_to_joined_users = {}
-                for user_id in joined_user_ids:
+                for user_id in chain(joined_user_ids, joined_users_in_current_state):
                     host = intern_string(get_domain_from_id(user_id))
                     cache.hosts_to_joined_users.setdefault(host, set()).add(user_id)
 
@@ -1147,6 +1169,26 @@ class RoomMemberWorkerStore(EventsWorkerStore):
 
         return frozenset(cache.hosts_to_joined_users)
 
+    async def _get_approximate_current_memberships_in_room(
+        self, room_id: str
+    ) -> Mapping[str, Optional[str]]:
+        """Build a map from event id to membership, for all events in the current state.
+
+        The event ids of non-memberships events (e.g. `m.room.power_levels`) are present
+        in the result, mapped to values of `None`.
+
+        The result is approximate for partially-joined rooms. It is fully accurate
+        for fully-joined rooms.
+        """
+
+        rows = await self.db_pool.simple_select_list(
+            "current_state_events",
+            keyvalues={"room_id": room_id},
+            retcols=("event_id", "membership"),
+            desc="has_completed_background_updates",
+        )
+        return {row["event_id"]: row["membership"] for row in rows}
+
     @cached(max_entries=10000)
     def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache":
         return _JoinedHostsCache()
-- 
cgit 1.5.1


From f36da501be4287e723a0a53ac4568d836676a15d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 2 Feb 2023 11:58:20 -0500
Subject: Do not calculate presence or ephemeral events when they are filtered
 out (#14970)

This expands the previous optimisation from being only for initial
sync to being for all sync requests.

It also inverts some of the logic to be inclusive instead of exclusive.
---
 changelog.d/14970.misc   |  1 +
 synapse/handlers/sync.py | 19 +++++++++----------
 2 files changed, 10 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/14970.misc

(limited to 'synapse')

diff --git a/changelog.d/14970.misc b/changelog.d/14970.misc
new file mode 100644
index 0000000000..3657623602
--- /dev/null
+++ b/changelog.d/14970.misc
@@ -0,0 +1 @@
+Improve performance of `/sync` in a few situations.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 5235e29460..0cb8d5ef4b 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1459,10 +1459,12 @@ class SyncHandler:
             sync_result_builder, account_data_by_room
         )
 
-        block_all_presence_data = (
-            since_token is None and sync_config.filter_collection.blocks_all_presence()
+        # Presence data is included if the server has it enabled and not filtered out.
+        include_presence_data = (
+            self.hs_config.server.use_presence
+            and not sync_config.filter_collection.blocks_all_presence()
         )
-        if self.hs_config.server.use_presence and not block_all_presence_data:
+        if include_presence_data:
             logger.debug("Fetching presence data")
             await self._generate_sync_entry_for_presence(
                 sync_result_builder,
@@ -1841,15 +1843,12 @@ class SyncHandler:
         """
 
         since_token = sync_result_builder.since_token
-
-        # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
         user_id = sync_result_builder.sync_config.user.to_string()
-        block_all_room_ephemeral = (
-            since_token is None
-            and sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
-        )
 
-        if block_all_room_ephemeral:
+        # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
+        if (
+            sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
+        ):
             ephemeral_by_room: Dict[str, List[JsonDict]] = {}
         else:
             now_token, ephemeral_by_room = await self.ephemeral_by_room(
-- 
cgit 1.5.1


From da05b70af5bf84825332b2ac0d63c6deda4b376f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 2 Feb 2023 13:45:12 -0500
Subject: Skip unused calculations in sync handler. (#14908)

If a sync request does not need to calculate per-room entries &
is not generating presence & is not generating device list data
(e.g. during initial sync) avoid the expensive calculation of room
specific data.

This is a micro-optimisation for clients syncing simply to receive
to-device information.
---
 changelog.d/14908.misc   |   1 +
 synapse/api/filtering.py |   3 +
 synapse/handlers/sync.py | 258 ++++++++++++++++++++++++-----------------------
 3 files changed, 137 insertions(+), 125 deletions(-)
 create mode 100644 changelog.d/14908.misc

(limited to 'synapse')

diff --git a/changelog.d/14908.misc b/changelog.d/14908.misc
new file mode 100644
index 0000000000..3657623602
--- /dev/null
+++ b/changelog.d/14908.misc
@@ -0,0 +1 @@
+Improve performance of `/sync` in a few situations.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 4cf8f0cc8e..2b5af264b4 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -283,6 +283,9 @@ class FilterCollection:
             await self._room_filter.filter(events)
         )
 
+    def blocks_all_rooms(self) -> bool:
+        return self._room_filter.filters_all_rooms()
+
     def blocks_all_presence(self) -> bool:
         return (
             self._presence_filter.filters_all_types()
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0cb8d5ef4b..3566537894 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1448,41 +1448,67 @@ class SyncHandler:
             sync_result_builder
         )
 
-        logger.debug("Fetching room data")
-
-        (
-            newly_joined_rooms,
-            newly_joined_or_invited_or_knocked_users,
-            newly_left_rooms,
-            newly_left_users,
-        ) = await self._generate_sync_entry_for_rooms(
-            sync_result_builder, account_data_by_room
-        )
-
         # Presence data is included if the server has it enabled and not filtered out.
-        include_presence_data = (
+        include_presence_data = bool(
             self.hs_config.server.use_presence
             and not sync_config.filter_collection.blocks_all_presence()
         )
-        if include_presence_data:
-            logger.debug("Fetching presence data")
-            await self._generate_sync_entry_for_presence(
-                sync_result_builder,
+        # Device list updates are sent if a since token is provided.
+        include_device_list_updates = bool(since_token and since_token.device_list_key)
+
+        # If we do not care about the rooms or things which depend on the room
+        # data (namely presence and device list updates), then we can skip
+        # this process completely.
+        device_lists = DeviceListUpdates()
+        if (
+            not sync_result_builder.sync_config.filter_collection.blocks_all_rooms()
+            or include_presence_data
+            or include_device_list_updates
+        ):
+            logger.debug("Fetching room data")
+
+            # Note that _generate_sync_entry_for_rooms sets sync_result_builder.joined, which
+            # is used in calculate_user_changes below.
+            (
                 newly_joined_rooms,
-                newly_joined_or_invited_or_knocked_users,
+                newly_left_rooms,
+            ) = await self._generate_sync_entry_for_rooms(
+                sync_result_builder, account_data_by_room
             )
 
+            # Work out which users have joined or left rooms we're in. We use this
+            # to build the presence and device_list parts of the sync response in
+            # `_generate_sync_entry_for_presence` and
+            # `_generate_sync_entry_for_device_list` respectively.
+            if include_presence_data or include_device_list_updates:
+                # This uses the sync_result_builder.joined which is set in
+                # `_generate_sync_entry_for_rooms`, if that didn't find any joined
+                # rooms for some reason it is a no-op.
+                (
+                    newly_joined_or_invited_or_knocked_users,
+                    newly_left_users,
+                ) = sync_result_builder.calculate_user_changes()
+
+                if include_presence_data:
+                    logger.debug("Fetching presence data")
+                    await self._generate_sync_entry_for_presence(
+                        sync_result_builder,
+                        newly_joined_rooms,
+                        newly_joined_or_invited_or_knocked_users,
+                    )
+
+                if include_device_list_updates:
+                    device_lists = await self._generate_sync_entry_for_device_list(
+                        sync_result_builder,
+                        newly_joined_rooms=newly_joined_rooms,
+                        newly_joined_or_invited_or_knocked_users=newly_joined_or_invited_or_knocked_users,
+                        newly_left_rooms=newly_left_rooms,
+                        newly_left_users=newly_left_users,
+                    )
+
         logger.debug("Fetching to-device data")
         await self._generate_sync_entry_for_to_device(sync_result_builder)
 
-        device_lists = await self._generate_sync_entry_for_device_list(
-            sync_result_builder,
-            newly_joined_rooms=newly_joined_rooms,
-            newly_joined_or_invited_or_knocked_users=newly_joined_or_invited_or_knocked_users,
-            newly_left_rooms=newly_left_rooms,
-            newly_left_users=newly_left_users,
-        )
-
         logger.debug("Fetching OTK data")
         device_id = sync_config.device_id
         one_time_keys_count: JsonDict = {}
@@ -1551,6 +1577,7 @@ class SyncHandler:
 
         user_id = sync_result_builder.sync_config.user.to_string()
         since_token = sync_result_builder.since_token
+        assert since_token is not None
 
         # Take a copy since these fields will be mutated later.
         newly_joined_or_invited_or_knocked_users = set(
@@ -1558,92 +1585,85 @@ class SyncHandler:
         )
         newly_left_users = set(newly_left_users)
 
-        if since_token and since_token.device_list_key:
-            # We want to figure out what user IDs the client should refetch
-            # device keys for, and which users we aren't going to track changes
-            # for anymore.
-            #
-            # For the first step we check:
-            #   a. if any users we share a room with have updated their devices,
-            #      and
-            #   b. we also check if we've joined any new rooms, or if a user has
-            #      joined a room we're in.
-            #
-            # For the second step we just find any users we no longer share a
-            # room with by looking at all users that have left a room plus users
-            # that were in a room we've left.
+        # We want to figure out what user IDs the client should refetch
+        # device keys for, and which users we aren't going to track changes
+        # for anymore.
+        #
+        # For the first step we check:
+        #   a. if any users we share a room with have updated their devices,
+        #      and
+        #   b. we also check if we've joined any new rooms, or if a user has
+        #      joined a room we're in.
+        #
+        # For the second step we just find any users we no longer share a
+        # room with by looking at all users that have left a room plus users
+        # that were in a room we've left.
 
-            users_that_have_changed = set()
+        users_that_have_changed = set()
 
-            joined_rooms = sync_result_builder.joined_room_ids
+        joined_rooms = sync_result_builder.joined_room_ids
 
-            # Step 1a, check for changes in devices of users we share a room
-            # with
-            #
-            # We do this in two different ways depending on what we have cached.
-            # If we already have a list of all the user that have changed since
-            # the last sync then it's likely more efficient to compare the rooms
-            # they're in with the rooms the syncing user is in.
-            #
-            # If we don't have that info cached then we get all the users that
-            # share a room with our user and check if those users have changed.
-            cache_result = self.store.get_cached_device_list_changes(
-                since_token.device_list_key
-            )
-            if cache_result.hit:
-                changed_users = cache_result.entities
-
-                result = await self.store.get_rooms_for_users(changed_users)
-
-                for changed_user_id, entries in result.items():
-                    # Check if the changed user shares any rooms with the user,
-                    # or if the changed user is the syncing user (as we always
-                    # want to include device list updates of their own devices).
-                    if user_id == changed_user_id or any(
-                        rid in joined_rooms for rid in entries
-                    ):
-                        users_that_have_changed.add(changed_user_id)
-            else:
-                users_that_have_changed = (
-                    await self._device_handler.get_device_changes_in_shared_rooms(
-                        user_id,
-                        sync_result_builder.joined_room_ids,
-                        from_token=since_token,
-                    )
-                )
-
-            # Step 1b, check for newly joined rooms
-            for room_id in newly_joined_rooms:
-                joined_users = await self.store.get_users_in_room(room_id)
-                newly_joined_or_invited_or_knocked_users.update(joined_users)
+        # Step 1a, check for changes in devices of users we share a room
+        # with
+        #
+        # We do this in two different ways depending on what we have cached.
+        # If we already have a list of all the user that have changed since
+        # the last sync then it's likely more efficient to compare the rooms
+        # they're in with the rooms the syncing user is in.
+        #
+        # If we don't have that info cached then we get all the users that
+        # share a room with our user and check if those users have changed.
+        cache_result = self.store.get_cached_device_list_changes(
+            since_token.device_list_key
+        )
+        if cache_result.hit:
+            changed_users = cache_result.entities
 
-            # TODO: Check that these users are actually new, i.e. either they
-            # weren't in the previous sync *or* they left and rejoined.
-            users_that_have_changed.update(newly_joined_or_invited_or_knocked_users)
+            result = await self.store.get_rooms_for_users(changed_users)
 
-            user_signatures_changed = (
-                await self.store.get_users_whose_signatures_changed(
-                    user_id, since_token.device_list_key
+            for changed_user_id, entries in result.items():
+                # Check if the changed user shares any rooms with the user,
+                # or if the changed user is the syncing user (as we always
+                # want to include device list updates of their own devices).
+                if user_id == changed_user_id or any(
+                    rid in joined_rooms for rid in entries
+                ):
+                    users_that_have_changed.add(changed_user_id)
+        else:
+            users_that_have_changed = (
+                await self._device_handler.get_device_changes_in_shared_rooms(
+                    user_id,
+                    sync_result_builder.joined_room_ids,
+                    from_token=since_token,
                 )
             )
-            users_that_have_changed.update(user_signatures_changed)
 
-            # Now find users that we no longer track
-            for room_id in newly_left_rooms:
-                left_users = await self.store.get_users_in_room(room_id)
-                newly_left_users.update(left_users)
+        # Step 1b, check for newly joined rooms
+        for room_id in newly_joined_rooms:
+            joined_users = await self.store.get_users_in_room(room_id)
+            newly_joined_or_invited_or_knocked_users.update(joined_users)
 
-            # Remove any users that we still share a room with.
-            left_users_rooms = await self.store.get_rooms_for_users(newly_left_users)
-            for user_id, entries in left_users_rooms.items():
-                if any(rid in joined_rooms for rid in entries):
-                    newly_left_users.discard(user_id)
+        # TODO: Check that these users are actually new, i.e. either they
+        # weren't in the previous sync *or* they left and rejoined.
+        users_that_have_changed.update(newly_joined_or_invited_or_knocked_users)
 
-            return DeviceListUpdates(
-                changed=users_that_have_changed, left=newly_left_users
-            )
-        else:
-            return DeviceListUpdates()
+        user_signatures_changed = await self.store.get_users_whose_signatures_changed(
+            user_id, since_token.device_list_key
+        )
+        users_that_have_changed.update(user_signatures_changed)
+
+        # Now find users that we no longer track
+        for room_id in newly_left_rooms:
+            left_users = await self.store.get_users_in_room(room_id)
+            newly_left_users.update(left_users)
+
+        # Remove any users that we still share a room with.
+        left_users_rooms = await self.store.get_rooms_for_users(newly_left_users)
+        for user_id, entries in left_users_rooms.items():
+            if any(rid in joined_rooms for rid in entries):
+                newly_left_users.discard(user_id)
+
+        return DeviceListUpdates(changed=users_that_have_changed, left=newly_left_users)
 
     @trace
     async def _generate_sync_entry_for_to_device(
@@ -1720,6 +1740,7 @@ class SyncHandler:
         since_token = sync_result_builder.since_token
 
         if since_token and not sync_result_builder.full_state:
+            # TODO Do not fetch room account data if it will be unused.
             (
                 global_account_data,
                 account_data_by_room,
@@ -1736,6 +1757,7 @@ class SyncHandler:
                     sync_config.user
                 )
         else:
+            # TODO Do not fetch room account data if it will be unused.
             (
                 global_account_data,
                 account_data_by_room,
@@ -1818,7 +1840,7 @@ class SyncHandler:
         self,
         sync_result_builder: "SyncResultBuilder",
         account_data_by_room: Dict[str, Dict[str, JsonDict]],
-    ) -> Tuple[AbstractSet[str], AbstractSet[str], AbstractSet[str], AbstractSet[str]]:
+    ) -> Tuple[AbstractSet[str], AbstractSet[str]]:
         """Generates the rooms portion of the sync response. Populates the
         `sync_result_builder` with the result.
 
@@ -1831,24 +1853,22 @@ class SyncHandler:
             account_data_by_room: Dictionary of per room account data
 
         Returns:
-            Returns a 4-tuple describing rooms the user has joined or left, and users who've
-            joined or left rooms any rooms the user is in. This gets used later in
-            `_generate_sync_entry_for_device_list`.
+            Returns a 2-tuple describing rooms the user has joined or left.
 
             Its entries are:
             - newly_joined_rooms
-            - newly_joined_or_invited_or_knocked_users
             - newly_left_rooms
-            - newly_left_users
         """
 
         since_token = sync_result_builder.since_token
         user_id = sync_result_builder.sync_config.user.to_string()
 
         # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
-        if (
-            sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
-        ):
+        block_all_room_ephemeral = (
+            sync_result_builder.sync_config.filter_collection.blocks_all_rooms()
+            or sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
+        )
+        if block_all_room_ephemeral:
             ephemeral_by_room: Dict[str, List[JsonDict]] = {}
         else:
             now_token, ephemeral_by_room = await self.ephemeral_by_room(
@@ -1870,7 +1890,7 @@ class SyncHandler:
                     )
                     if not tags_by_room:
                         logger.debug("no-oping sync")
-                        return set(), set(), set(), set()
+                        return set(), set()
 
         # 3. Work out which rooms need reporting in the sync response.
         ignored_users = await self.store.ignored_users(user_id)
@@ -1899,6 +1919,7 @@ class SyncHandler:
         # joined or archived).
         async def handle_room_entries(room_entry: "RoomSyncResultBuilder") -> None:
             logger.debug("Generating room entry for %s", room_entry.room_id)
+            # Note that this mutates sync_result_builder.{joined,archived}.
             await self._generate_room_entry(
                 sync_result_builder,
                 room_entry,
@@ -1915,20 +1936,7 @@ class SyncHandler:
         sync_result_builder.invited.extend(invited)
         sync_result_builder.knocked.extend(knocked)
 
-        # 5. Work out which users have joined or left rooms we're in. We use this
-        # to build the device_list part of the sync response in
-        # `_generate_sync_entry_for_device_list`.
-        (
-            newly_joined_or_invited_or_knocked_users,
-            newly_left_users,
-        ) = sync_result_builder.calculate_user_changes()
-
-        return (
-            set(newly_joined_rooms),
-            newly_joined_or_invited_or_knocked_users,
-            set(newly_left_rooms),
-            newly_left_users,
-        )
+        return set(newly_joined_rooms), set(newly_left_rooms)
 
     async def _have_rooms_changed(
         self, sync_result_builder: "SyncResultBuilder"
-- 
cgit 1.5.1


From 8e9fc28c6aff6bb1aa960dfde4f9736fee1ae4fb Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 3 Feb 2023 08:27:31 -0500
Subject: Reload the pyo3-log config when the Python logging config changes.
 (#14976)

Since pyo3-log is initialized very early in the Python start-up
it caches the state of the loggers before they're fully initialized
(and thus are essentially disabled). Whenever we reload the
logging configuration we now also tell pyo3-log to discard
any cached logging configuration it has; it will refetch the
current logging configuration from Python at the next point
it logs.

This fixes Rust log lines not appearing in the homeserver logs.
---
 changelog.d/14976.bugfix                |  1 +
 rust/src/lib.rs                         | 17 +++++++++++--
 stubs/synapse/synapse_rust/__init__.pyi |  1 +
 synapse/config/logger.py                | 42 +++++++++++++++++++--------------
 tests/test_utils/logging_setup.py       |  3 +++
 5 files changed, 44 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/14976.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14976.bugfix b/changelog.d/14976.bugfix
new file mode 100644
index 0000000000..0cde046c0e
--- /dev/null
+++ b/changelog.d/14976.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.68.0 where logging from the Rust module was not properly logged.
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index c7b60e58a7..ce67f58611 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -1,7 +1,13 @@
+use lazy_static::lazy_static;
 use pyo3::prelude::*;
+use pyo3_log::ResetHandle;
 
 pub mod push;
 
+lazy_static! {
+    static ref LOGGING_HANDLE: ResetHandle = pyo3_log::init();
+}
+
 /// Returns the hash of all the rust source files at the time it was compiled.
 ///
 /// Used by python to detect if the rust library is outdated.
@@ -17,13 +23,20 @@ fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
     Ok((a + b).to_string())
 }
 
+/// Reset the cached logging configuration of pyo3-log to pick up any changes
+/// in the Python logging configuration.
+///
+#[pyfunction]
+fn reset_logging_config() {
+    LOGGING_HANDLE.reset();
+}
+
 /// The entry point for defining the Python module.
 #[pymodule]
 fn synapse_rust(py: Python<'_>, m: &PyModule) -> PyResult<()> {
-    pyo3_log::init();
-
     m.add_function(wrap_pyfunction!(sum_as_string, m)?)?;
     m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
+    m.add_function(wrap_pyfunction!(reset_logging_config, m)?)?;
 
     push::register_module(py, m)?;
 
diff --git a/stubs/synapse/synapse_rust/__init__.pyi b/stubs/synapse/synapse_rust/__init__.pyi
index 8658d3138f..d25c609106 100644
--- a/stubs/synapse/synapse_rust/__init__.pyi
+++ b/stubs/synapse/synapse_rust/__init__.pyi
@@ -1,2 +1,3 @@
 def sum_as_string(a: int, b: int) -> str: ...
 def get_rust_file_digest() -> str: ...
+def reset_logging_config() -> None: ...
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 5468b963a2..56db875b25 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -34,6 +34,7 @@ from twisted.logger import (
 
 from synapse.logging.context import LoggingContextFilter
 from synapse.logging.filter import MetadataFilter
+from synapse.synapse_rust import reset_logging_config
 from synapse.types import JsonDict
 
 from ..util import SYNAPSE_VERSION
@@ -200,24 +201,6 @@ def _setup_stdlib_logging(
     """
     Set up Python standard library logging.
     """
-    if log_config_path is None:
-        log_format = (
-            "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s"
-            " - %(message)s"
-        )
-
-        logger = logging.getLogger("")
-        logger.setLevel(logging.INFO)
-        logging.getLogger("synapse.storage.SQL").setLevel(logging.INFO)
-
-        formatter = logging.Formatter(log_format)
-
-        handler = logging.StreamHandler()
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-    else:
-        # Load the logging configuration.
-        _load_logging_config(log_config_path)
 
     # We add a log record factory that runs all messages through the
     # LoggingContextFilter so that we get the context *at the time we log*
@@ -237,6 +220,26 @@ def _setup_stdlib_logging(
 
     logging.setLogRecordFactory(factory)
 
+    # Configure the logger with the initial configuration.
+    if log_config_path is None:
+        log_format = (
+            "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s"
+            " - %(message)s"
+        )
+
+        logger = logging.getLogger("")
+        logger.setLevel(logging.INFO)
+        logging.getLogger("synapse.storage.SQL").setLevel(logging.INFO)
+
+        formatter = logging.Formatter(log_format)
+
+        handler = logging.StreamHandler()
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+    else:
+        # Load the logging configuration.
+        _load_logging_config(log_config_path)
+
     # Route Twisted's native logging through to the standard library logging
     # system.
     observer = STDLibLogObserver()
@@ -294,6 +297,9 @@ def _load_logging_config(log_config_path: str) -> None:
 
     logging.config.dictConfig(log_config)
 
+    # Blow away the pyo3-log cache so that it reloads the configuration.
+    reset_logging_config()
+
 
 def _reload_logging_config(log_config_path: Optional[str]) -> None:
     """
diff --git a/tests/test_utils/logging_setup.py b/tests/test_utils/logging_setup.py
index 9228454c9e..304c7b98c5 100644
--- a/tests/test_utils/logging_setup.py
+++ b/tests/test_utils/logging_setup.py
@@ -17,6 +17,7 @@ import os
 import twisted.logger
 
 from synapse.logging.context import LoggingContextFilter
+from synapse.synapse_rust import reset_logging_config
 
 
 class ToTwistedHandler(logging.Handler):
@@ -52,3 +53,5 @@ def setup_logging():
 
     log_level = os.environ.get("SYNAPSE_TEST_LOG_LEVEL", "ERROR")
     root_logger.setLevel(log_level)
+
+    reset_logging_config()
-- 
cgit 1.5.1


From 0a686d1d13c497af84f62ca192a401fdc18387ab Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 3 Feb 2023 15:39:59 +0000
Subject: Faster joins: Refactor handling of servers in room (#14954)

Ensure that the list of servers in a partial state room always contains
the server we joined off.

Also refactor `get_partial_state_servers_at_join` to return `None` when
the given room is no longer partial stated, to explicitly indicate when
the room has partial state. Otherwise it's not clear whether an empty
list means that the room has full state, or the room is partial stated,
but the server we joined off told us that there are no servers in the
room.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/14954.misc                  |  1 +
 synapse/federation/federation_client.py | 33 ++++++++++++++--------
 synapse/federation/sender/__init__.py   |  2 +-
 synapse/handlers/device.py              |  1 +
 synapse/handlers/federation.py          | 20 +++++++++----
 synapse/storage/controllers/state.py    |  3 +-
 synapse/storage/databases/main/room.py  | 50 ++++++++++++++++++++++-----------
 tests/handlers/test_federation.py       |  2 +-
 tests/handlers/test_room_member.py      |  2 +-
 9 files changed, 77 insertions(+), 37 deletions(-)
 create mode 100644 changelog.d/14954.misc

(limited to 'synapse')

diff --git a/changelog.d/14954.misc b/changelog.d/14954.misc
new file mode 100644
index 0000000000..b86b6bf01e
--- /dev/null
+++ b/changelog.d/14954.misc
@@ -0,0 +1 @@
+Faster room joins: Refactor internal handling of servers in room to never store an empty list.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 8493ffc2e5..0ac85a3be7 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -19,6 +19,7 @@ import itertools
 import logging
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Awaitable,
     Callable,
     Collection,
@@ -110,8 +111,9 @@ class SendJoinResult:
     # True if 'state' elides non-critical membership events
     partial_state: bool
 
-    # if 'partial_state' is set, a list of the servers in the room (otherwise empty)
-    servers_in_room: List[str]
+    # If 'partial_state' is set, a set of the servers in the room (otherwise empty).
+    # Always contains the server we joined off.
+    servers_in_room: AbstractSet[str]
 
 
 class FederationClient(FederationBase):
@@ -1152,15 +1154,24 @@ class FederationClient(FederationBase):
                     % (auth_chain_create_events,)
                 )
 
-            if response.members_omitted and not response.servers_in_room:
-                raise InvalidResponseError(
-                    "members_omitted was set, but no servers were listed in the room"
-                )
+            servers_in_room = None
+            if response.servers_in_room is not None:
+                servers_in_room = set(response.servers_in_room)
 
-            if response.members_omitted and not partial_state:
-                raise InvalidResponseError(
-                    "members_omitted was set, but we asked for full state"
-                )
+            if response.members_omitted:
+                if not servers_in_room:
+                    raise InvalidResponseError(
+                        "members_omitted was set, but no servers were listed in the room"
+                    )
+
+                if not partial_state:
+                    raise InvalidResponseError(
+                        "members_omitted was set, but we asked for full state"
+                    )
+
+                # `servers_in_room` is supposed to be a complete list.
+                # Fix things up in case the remote homeserver is badly behaved.
+                servers_in_room.add(destination)
 
             return SendJoinResult(
                 event=event,
@@ -1168,7 +1179,7 @@ class FederationClient(FederationBase):
                 auth_chain=signed_auth,
                 origin=destination,
                 partial_state=response.members_omitted,
-                servers_in_room=response.servers_in_room or [],
+                servers_in_room=servers_in_room or frozenset(),
             )
 
         # MSC3083 defines additional error codes for room joins.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 30ebd62883..43421a9c72 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -447,7 +447,7 @@ class FederationSender(AbstractFederationSender):
                             )
                         )
 
-                        if len(partial_state_destinations) > 0:
+                        if partial_state_destinations is not None:
                             destinations = partial_state_destinations
 
                     if destinations is None:
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 5c06073901..6f7963df43 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -859,6 +859,7 @@ class DeviceHandler(DeviceWorkerHandler):
         known_hosts_at_join = await self.store.get_partial_state_servers_at_join(
             room_id
         )
+        assert known_hosts_at_join is not None
         potentially_changed_hosts.difference_update(known_hosts_at_join)
 
         potentially_changed_hosts.discard(self.server_name)
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index dc1cbf5c3d..7f64130e0a 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -20,7 +20,17 @@ import itertools
 import logging
 from enum import Enum
 from http import HTTPStatus
-from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple, Union
+from typing import (
+    TYPE_CHECKING,
+    AbstractSet,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
 
 import attr
 from prometheus_client import Histogram
@@ -169,7 +179,7 @@ class FederationHandler:
         # A dictionary mapping room IDs to (initial destination, other destinations)
         # tuples.
         self._partial_state_syncs_maybe_needing_restart: Dict[
-            str, Tuple[Optional[str], StrCollection]
+            str, Tuple[Optional[str], AbstractSet[str]]
         ] = {}
         # A lock guarding the partial state flag for rooms.
         # When the lock is held for a given room, no other concurrent code may
@@ -1720,7 +1730,7 @@ class FederationHandler:
     def _start_partial_state_room_sync(
         self,
         initial_destination: Optional[str],
-        other_destinations: StrCollection,
+        other_destinations: AbstractSet[str],
         room_id: str,
     ) -> None:
         """Starts the background process to resync the state of a partial state room,
@@ -1802,7 +1812,7 @@ class FederationHandler:
     async def _sync_partial_state_room(
         self,
         initial_destination: Optional[str],
-        other_destinations: StrCollection,
+        other_destinations: AbstractSet[str],
         room_id: str,
     ) -> None:
         """Background process to resync the state of a partial-state room
@@ -1939,7 +1949,7 @@ class FederationHandler:
 
 def _prioritise_destinations_for_partial_state_resync(
     initial_destination: Optional[str],
-    other_destinations: StrCollection,
+    other_destinations: AbstractSet[str],
     room_id: str,
 ) -> StrCollection:
     """Work out the order in which we should ask servers to resync events.
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 2045169b9a..52efd4a171 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -569,10 +569,11 @@ class StateStorageController:
         is arbitrary for rooms with partial state.
         """
         # We have to read this list first to mitigate races with un-partial stating.
-        # This will be empty for rooms with full state.
         hosts_at_join = await self.stores.main.get_partial_state_servers_at_join(
             room_id
         )
+        if hosts_at_join is None:
+            hosts_at_join = frozenset()
 
         hosts_from_state = await self.stores.main.get_current_hosts_in_room(room_id)
 
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 4ddb27f686..644bbb8878 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -18,6 +18,7 @@ from abc import abstractmethod
 from enum import Enum
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Any,
     Awaitable,
     Collection,
@@ -25,7 +26,6 @@ from typing import (
     List,
     Mapping,
     Optional,
-    Sequence,
     Set,
     Tuple,
     Union,
@@ -109,7 +109,7 @@ class RoomSortOrder(Enum):
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class PartialStateResyncInfo:
     joined_via: Optional[str]
-    servers_in_room: List[str] = attr.ib(factory=list)
+    servers_in_room: Set[str] = attr.ib(factory=set)
 
 
 class RoomWorkerStore(CacheInvalidationWorkerStore):
@@ -1193,21 +1193,35 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             get_rooms_for_retention_period_in_range_txn,
         )
 
-    @cached(iterable=True)
-    async def get_partial_state_servers_at_join(self, room_id: str) -> Sequence[str]:
-        """Gets the list of servers in a partial state room at the time we joined it.
+    async def get_partial_state_servers_at_join(
+        self, room_id: str
+    ) -> Optional[AbstractSet[str]]:
+        """Gets the set of servers in a partial state room at the time we joined it.
 
         Returns:
             The `servers_in_room` list from the `/send_join` response for partial state
             rooms. May not be accurate or complete, as it comes from a remote
             homeserver.
-            An empty list for full state rooms.
+            `None` for full state rooms.
         """
-        return await self.db_pool.simple_select_onecol(
-            "partial_state_rooms_servers",
-            keyvalues={"room_id": room_id},
-            retcol="server_name",
-            desc="get_partial_state_servers_at_join",
+        servers_in_room = await self._get_partial_state_servers_at_join(room_id)
+
+        if len(servers_in_room) == 0:
+            return None
+
+        return servers_in_room
+
+    @cached(iterable=True)
+    async def _get_partial_state_servers_at_join(
+        self, room_id: str
+    ) -> AbstractSet[str]:
+        return frozenset(
+            await self.db_pool.simple_select_onecol(
+                "partial_state_rooms_servers",
+                keyvalues={"room_id": room_id},
+                retcol="server_name",
+                desc="get_partial_state_servers_at_join",
+            )
         )
 
     async def get_partial_state_room_resync_info(
@@ -1252,7 +1266,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                 # partial-joined between the two SELECTs, but this is unlikely to happen
                 # in practice.)
                 continue
-            entry.servers_in_room.append(server_name)
+            entry.servers_in_room.add(server_name)
 
         return room_servers
 
@@ -1942,7 +1956,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
     async def store_partial_state_room(
         self,
         room_id: str,
-        servers: Collection[str],
+        servers: AbstractSet[str],
         device_lists_stream_id: int,
         joined_via: str,
     ) -> None:
@@ -1957,11 +1971,13 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
 
         Args:
             room_id: the ID of the room
-            servers: other servers known to be in the room
+            servers: other servers known to be in the room. must include `joined_via`.
             device_lists_stream_id: the device_lists stream ID at the time when we first
                 joined the room.
             joined_via: the server name we requested a partial join from.
         """
+        assert joined_via in servers
+
         await self.db_pool.runInteraction(
             "store_partial_state_room",
             self._store_partial_state_room_txn,
@@ -1975,7 +1991,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         self,
         txn: LoggingTransaction,
         room_id: str,
-        servers: Collection[str],
+        servers: AbstractSet[str],
         device_lists_stream_id: int,
         joined_via: str,
     ) -> None:
@@ -1998,7 +2014,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         )
         self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
         self._invalidate_cache_and_stream(
-            txn, self.get_partial_state_servers_at_join, (room_id,)
+            txn, self._get_partial_state_servers_at_join, (room_id,)
         )
 
     async def write_partial_state_rooms_join_event_id(
@@ -2409,7 +2425,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         )
         self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,))
         self._invalidate_cache_and_stream(
-            txn, self.get_partial_state_servers_at_join, (room_id,)
+            txn, self._get_partial_state_servers_at_join, (room_id,)
         )
 
         DatabasePool.simple_insert_txn(
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index c1558c40c3..57675fa407 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -656,7 +656,7 @@ class PartialJoinTestCase(unittest.FederatingHomeserverTestCase):
                         EVENT_INVITATION_MEMBERSHIP,
                     ],
                     partial_state=True,
-                    servers_in_room=["example.com"],
+                    servers_in_room={"example.com"},
                 )
             )
         )
diff --git a/tests/handlers/test_room_member.py b/tests/handlers/test_room_member.py
index 6bbfd5dc84..6a38893b68 100644
--- a/tests/handlers/test_room_member.py
+++ b/tests/handlers/test_room_member.py
@@ -171,7 +171,7 @@ class TestJoinsLimitedByPerRoomRateLimiter(FederatingHomeserverTestCase):
                     state=[create_event],
                     auth_chain=[create_event],
                     partial_state=False,
-                    servers_in_room=[],
+                    servers_in_room=frozenset(),
                 )
             )
         )
-- 
cgit 1.5.1


From 52700a0bcf2caaa792b94e2a8c12f29d1c61b91e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 3 Feb 2023 11:28:20 -0500
Subject: Support the backwards compatibility features in MSC3952. (#14958)

If the feature is enabled and the event has a `m.mentions` property,
skip processing of the legacy mentions rules.
---
 changelog.d/14958.feature                   |   1 +
 rust/benches/evaluator.rs                   |   4 +
 rust/src/push/evaluator.rs                  |  19 +++
 stubs/synapse/synapse_rust/push.pyi         |   1 +
 synapse/push/bulk_push_rule_evaluator.py    |   9 +-
 tests/push/test_bulk_push_rule_evaluator.py | 191 ++++++++++++++++++++--------
 tests/push/test_push_rule_evaluator.py      |  18 ++-
 7 files changed, 184 insertions(+), 59 deletions(-)
 create mode 100644 changelog.d/14958.feature

(limited to 'synapse')

diff --git a/changelog.d/14958.feature b/changelog.d/14958.feature
new file mode 100644
index 0000000000..8293e99eff
--- /dev/null
+++ b/changelog.d/14958.feature
@@ -0,0 +1 @@
+Experimental support for [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952): intentional mentions.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 6b16a3f75b..859d54961c 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -33,6 +33,7 @@ fn bench_match_exact(b: &mut Bencher) {
 
     let eval = PushRuleEvaluator::py_new(
         flattened_keys,
+        false,
         BTreeSet::new(),
         false,
         10,
@@ -71,6 +72,7 @@ fn bench_match_word(b: &mut Bencher) {
 
     let eval = PushRuleEvaluator::py_new(
         flattened_keys,
+        false,
         BTreeSet::new(),
         false,
         10,
@@ -109,6 +111,7 @@ fn bench_match_word_miss(b: &mut Bencher) {
 
     let eval = PushRuleEvaluator::py_new(
         flattened_keys,
+        false,
         BTreeSet::new(),
         false,
         10,
@@ -147,6 +150,7 @@ fn bench_eval_message(b: &mut Bencher) {
 
     let eval = PushRuleEvaluator::py_new(
         flattened_keys,
+        false,
         BTreeSet::new(),
         false,
         10,
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index aa71202e43..da6f704c0e 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -68,6 +68,8 @@ pub struct PushRuleEvaluator {
     /// The "content.body", if any.
     body: String,
 
+    /// True if the event has a mentions property and MSC3952 support is enabled.
+    has_mentions: bool,
     /// The user mentions that were part of the message.
     user_mentions: BTreeSet<String>,
     /// True if the message is a room message.
@@ -105,6 +107,7 @@ impl PushRuleEvaluator {
     #[new]
     pub fn py_new(
         flattened_keys: BTreeMap<String, String>,
+        has_mentions: bool,
         user_mentions: BTreeSet<String>,
         room_mention: bool,
         room_member_count: u64,
@@ -123,6 +126,7 @@ impl PushRuleEvaluator {
         Ok(PushRuleEvaluator {
             flattened_keys,
             body,
+            has_mentions,
             user_mentions,
             room_mention,
             room_member_count,
@@ -155,6 +159,19 @@ impl PushRuleEvaluator {
             }
 
             let rule_id = &push_rule.rule_id().to_string();
+
+            // For backwards-compatibility the legacy mention rules are disabled
+            // if the event contains the 'm.mentions' property (and if the
+            // experimental feature is enabled, both of these are represented
+            // by the has_mentions flag).
+            if self.has_mentions
+                && (rule_id == "global/override/.m.rule.contains_display_name"
+                    || rule_id == "global/content/.m.rule.contains_user_name"
+                    || rule_id == "global/override/.m.rule.roomnotif")
+            {
+                continue;
+            }
+
             let extev_flag = &RoomVersionFeatures::ExtensibleEvents.as_str().to_string();
             let supports_extensible_events = self.room_version_feature_flags.contains(extev_flag);
             let safe_from_rver_condition = SAFE_EXTENSIBLE_EVENTS_RULE_IDS.contains(rule_id);
@@ -441,6 +458,7 @@ fn push_rule_evaluator() {
     flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
+        false,
         BTreeSet::new(),
         false,
         10,
@@ -468,6 +486,7 @@ fn test_requires_room_version_supports_condition() {
     let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()];
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
+        false,
         BTreeSet::new(),
         false,
         10,
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 588d90c25a..c0af2af3df 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -56,6 +56,7 @@ class PushRuleEvaluator:
     def __init__(
         self,
         flattened_keys: Mapping[str, str],
+        has_mentions: bool,
         user_mentions: Set[str],
         room_mention: bool,
         room_member_count: int,
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 88cfc05d05..9bf92b9765 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -119,6 +119,9 @@ class BulkPushRuleEvaluator:
         self.should_calculate_push_rules = self.hs.config.push.enable_push
 
         self._related_event_match_enabled = self.hs.config.experimental.msc3664_enabled
+        self._intentional_mentions_enabled = (
+            self.hs.config.experimental.msc3952_intentional_mentions
+        )
 
         self.room_push_rule_cache_metrics = register_cache(
             "cache",
@@ -364,9 +367,12 @@ class BulkPushRuleEvaluator:
 
         # Pull out any user and room mentions.
         mentions = event.content.get(EventContentFields.MSC3952_MENTIONS)
+        has_mentions = self._intentional_mentions_enabled and isinstance(mentions, dict)
         user_mentions: Set[str] = set()
         room_mention = False
-        if isinstance(mentions, dict):
+        if has_mentions:
+            # mypy seems to have lost the type even though it must be a dict here.
+            assert isinstance(mentions, dict)
             # Remove out any non-string items and convert to a set.
             user_mentions_raw = mentions.get("user_ids")
             if isinstance(user_mentions_raw, list):
@@ -378,6 +384,7 @@ class BulkPushRuleEvaluator:
 
         evaluator = PushRuleEvaluator(
             _flatten_dict(event, room_version=event.room_version),
+            has_mentions,
             user_mentions,
             room_mention,
             room_member_count,
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index fda48d9f61..3b2d082dcb 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
+from typing import Any, Optional
 from unittest.mock import patch
 
 from parameterized import parameterized
@@ -25,7 +25,7 @@ from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
 from synapse.rest import admin
 from synapse.rest.client import login, register, room
 from synapse.server import HomeServer
-from synapse.types import create_requester
+from synapse.types import JsonDict, create_requester
 from synapse.util import Clock
 
 from tests.test_utils import simple_async_mock
@@ -196,77 +196,144 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
         bulk_evaluator._action_for_event_by_user.assert_not_called()
 
-    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
-    def test_mentions(self) -> None:
-        """Test the behavior of an event which includes invalid mentions."""
-        bulk_evaluator = BulkPushRuleEvaluator(self.hs)
-
-        sentinel = object()
-
-        def create_and_process(mentions: Any = sentinel) -> bool:
-            """Returns true iff the `mentions` trigger an event push action."""
-            content = {}
-            if mentions is not sentinel:
-                content[EventContentFields.MSC3952_MENTIONS] = mentions
-
-            # Create a new message event which should cause a notification.
-            event, context = self.get_success(
-                self.event_creation_handler.create_event(
-                    self.requester,
-                    {
-                        "type": "test",
-                        "room_id": self.room_id,
-                        "content": content,
-                        "sender": f"@bob:{self.hs.hostname}",
-                    },
-                )
+    def _create_and_process(
+        self, bulk_evaluator: BulkPushRuleEvaluator, content: Optional[JsonDict] = None
+    ) -> bool:
+        """Returns true iff the `mentions` trigger an event push action."""
+        # Create a new message event which should cause a notification.
+        event, context = self.get_success(
+            self.event_creation_handler.create_event(
+                self.requester,
+                {
+                    "type": "test",
+                    "room_id": self.room_id,
+                    "content": content or {},
+                    "sender": f"@bob:{self.hs.hostname}",
+                },
             )
+        )
 
-            # Ensure no actions are generated!
-            self.get_success(
-                bulk_evaluator.action_for_events_by_user([(event, context)])
-            )
+        # Execute the push rule machinery.
+        self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
 
-            # If any actions are generated for this event, return true.
-            result = self.get_success(
-                self.hs.get_datastores().main.db_pool.simple_select_list(
-                    table="event_push_actions_staging",
-                    keyvalues={"event_id": event.event_id},
-                    retcols=("*",),
-                    desc="get_event_push_actions_staging",
-                )
+        # If any actions are generated for this event, return true.
+        result = self.get_success(
+            self.hs.get_datastores().main.db_pool.simple_select_list(
+                table="event_push_actions_staging",
+                keyvalues={"event_id": event.event_id},
+                retcols=("*",),
+                desc="get_event_push_actions_staging",
             )
-            return len(result) > 0
+        )
+        return len(result) > 0
+
+    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
+    def test_user_mentions(self) -> None:
+        """Test the behavior of an event which includes invalid user mentions."""
+        bulk_evaluator = BulkPushRuleEvaluator(self.hs)
 
         # Not including the mentions field should not notify.
-        self.assertFalse(create_and_process())
+        self.assertFalse(self._create_and_process(bulk_evaluator))
         # An empty mentions field should not notify.
-        self.assertFalse(create_and_process({}))
+        self.assertFalse(
+            self._create_and_process(
+                bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {}}
+            )
+        )
 
         # Non-dict mentions should be ignored.
         mentions: Any
         for mentions in (None, True, False, 1, "foo", []):
-            self.assertFalse(create_and_process(mentions))
+            self.assertFalse(
+                self._create_and_process(
+                    bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: mentions}
+                )
+            )
 
         # A non-list should be ignored.
         for mentions in (None, True, False, 1, "foo", {}):
-            self.assertFalse(create_and_process({"user_ids": mentions}))
+            self.assertFalse(
+                self._create_and_process(
+                    bulk_evaluator,
+                    {EventContentFields.MSC3952_MENTIONS: {"user_ids": mentions}},
+                )
+            )
 
         # The Matrix ID appearing anywhere in the list should notify.
-        self.assertTrue(create_and_process({"user_ids": [self.alice]}))
-        self.assertTrue(create_and_process({"user_ids": ["@another:test", self.alice]}))
+        self.assertTrue(
+            self._create_and_process(
+                bulk_evaluator,
+                {EventContentFields.MSC3952_MENTIONS: {"user_ids": [self.alice]}},
+            )
+        )
+        self.assertTrue(
+            self._create_and_process(
+                bulk_evaluator,
+                {
+                    EventContentFields.MSC3952_MENTIONS: {
+                        "user_ids": ["@another:test", self.alice]
+                    }
+                },
+            )
+        )
 
         # Duplicate user IDs should notify.
-        self.assertTrue(create_and_process({"user_ids": [self.alice, self.alice]}))
+        self.assertTrue(
+            self._create_and_process(
+                bulk_evaluator,
+                {
+                    EventContentFields.MSC3952_MENTIONS: {
+                        "user_ids": [self.alice, self.alice]
+                    }
+                },
+            )
+        )
 
         # Invalid entries in the list are ignored.
-        self.assertFalse(create_and_process({"user_ids": [None, True, False, {}, []]}))
+        self.assertFalse(
+            self._create_and_process(
+                bulk_evaluator,
+                {
+                    EventContentFields.MSC3952_MENTIONS: {
+                        "user_ids": [None, True, False, {}, []]
+                    }
+                },
+            )
+        )
         self.assertTrue(
-            create_and_process({"user_ids": [None, True, False, {}, [], self.alice]})
+            self._create_and_process(
+                bulk_evaluator,
+                {
+                    EventContentFields.MSC3952_MENTIONS: {
+                        "user_ids": [None, True, False, {}, [], self.alice]
+                    }
+                },
+            )
         )
 
+        # The legacy push rule should not mention if the mentions field exists.
+        self.assertFalse(
+            self._create_and_process(
+                bulk_evaluator,
+                {
+                    "body": self.alice,
+                    "msgtype": "m.text",
+                    EventContentFields.MSC3952_MENTIONS: {},
+                },
+            )
+        )
+
+    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
+    def test_room_mentions(self) -> None:
+        """Test the behavior of an event which includes invalid room mentions."""
+        bulk_evaluator = BulkPushRuleEvaluator(self.hs)
+
         # Room mentions from those without power should not notify.
-        self.assertFalse(create_and_process({"room": True}))
+        self.assertFalse(
+            self._create_and_process(
+                bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {"room": True}}
+            )
+        )
 
         # Room mentions from those with power should notify.
         self.helper.send_state(
@@ -276,8 +343,30 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
             self.token,
             state_key="",
         )
-        self.assertTrue(create_and_process({"room": True}))
+        self.assertTrue(
+            self._create_and_process(
+                bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {"room": True}}
+            )
+        )
 
         # Invalid data should not notify.
+        mentions: Any
         for mentions in (None, False, 1, "foo", [], {}):
-            self.assertFalse(create_and_process({"room": mentions}))
+            self.assertFalse(
+                self._create_and_process(
+                    bulk_evaluator,
+                    {EventContentFields.MSC3952_MENTIONS: {"room": mentions}},
+                )
+            )
+
+        # The legacy push rule should not mention if the mentions field exists.
+        self.assertFalse(
+            self._create_and_process(
+                bulk_evaluator,
+                {
+                    "body": "@room",
+                    "msgtype": "m.text",
+                    EventContentFields.MSC3952_MENTIONS: {},
+                },
+            )
+        )
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 9d01c989d4..81661e181b 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -42,6 +42,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         self,
         content: JsonMapping,
         *,
+        has_mentions: bool = False,
         user_mentions: Optional[Set[str]] = None,
         room_mention: bool = False,
         related_events: Optional[JsonDict] = None,
@@ -62,6 +63,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         power_levels: Dict[str, Union[int, Dict[str, int]]] = {}
         return PushRuleEvaluator(
             _flatten_dict(event),
+            has_mentions,
             user_mentions or set(),
             room_mention,
             room_member_count,
@@ -102,19 +104,21 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         condition = {"kind": "org.matrix.msc3952.is_user_mention"}
 
         # No mentions shouldn't match.
-        evaluator = self._get_evaluator({})
+        evaluator = self._get_evaluator({}, has_mentions=True)
         self.assertFalse(evaluator.matches(condition, "@user:test", None))
 
         # An empty set shouldn't match
-        evaluator = self._get_evaluator({}, user_mentions=set())
+        evaluator = self._get_evaluator({}, has_mentions=True, user_mentions=set())
         self.assertFalse(evaluator.matches(condition, "@user:test", None))
 
         # The Matrix ID appearing anywhere in the mentions list should match
-        evaluator = self._get_evaluator({}, user_mentions={"@user:test"})
+        evaluator = self._get_evaluator(
+            {}, has_mentions=True, user_mentions={"@user:test"}
+        )
         self.assertTrue(evaluator.matches(condition, "@user:test", None))
 
         evaluator = self._get_evaluator(
-            {}, user_mentions={"@another:test", "@user:test"}
+            {}, has_mentions=True, user_mentions={"@another:test", "@user:test"}
         )
         self.assertTrue(evaluator.matches(condition, "@user:test", None))
 
@@ -126,16 +130,16 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         condition = {"kind": "org.matrix.msc3952.is_room_mention"}
 
         # No room mention shouldn't match.
-        evaluator = self._get_evaluator({})
+        evaluator = self._get_evaluator({}, has_mentions=True)
         self.assertFalse(evaluator.matches(condition, None, None))
 
         # Room mention should match.
-        evaluator = self._get_evaluator({}, room_mention=True)
+        evaluator = self._get_evaluator({}, has_mentions=True, room_mention=True)
         self.assertTrue(evaluator.matches(condition, None, None))
 
         # A room mention and user mention is valid.
         evaluator = self._get_evaluator(
-            {}, user_mentions={"@another:test"}, room_mention=True
+            {}, has_mentions=True, user_mentions={"@another:test"}, room_mention=True
         )
         self.assertTrue(evaluator.matches(condition, None, None))
 
-- 
cgit 1.5.1


From f0cae26d58f6f907236112be5f4eaecc376b1304 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 3 Feb 2023 11:48:13 -0500
Subject: Add a docstring & tests for _flatten_dict. (#14981)

---
 changelog.d/14981.misc                   |  1 +
 synapse/push/bulk_push_rule_evaluator.py | 23 +++++++++++++++++++++++
 tests/push/test_push_rule_evaluator.py   | 26 +++++++++++++++++++++++++-
 3 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/14981.misc

(limited to 'synapse')

diff --git a/changelog.d/14981.misc b/changelog.d/14981.misc
new file mode 100644
index 0000000000..68ac8335fc
--- /dev/null
+++ b/changelog.d/14981.misc
@@ -0,0 +1 @@
+Add tests for `_flatten_dict`.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 9bf92b9765..20369f3dfe 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -473,6 +473,29 @@ def _flatten_dict(
     prefix: Optional[List[str]] = None,
     result: Optional[Dict[str, str]] = None,
 ) -> Dict[str, str]:
+    """
+    Given a JSON dictionary (or event) which might contain sub dictionaries,
+    flatten it into a single layer dictionary by combining the keys & sub-keys.
+
+    Any (non-dictionary), non-string value is dropped.
+
+    Transforms:
+
+        {"foo": {"bar": "test"}}
+
+    To:
+
+        {"foo.bar": "test"}
+
+    Args:
+        d: The event or content to continue flattening.
+        room_version: The room version object.
+        prefix: The key prefix (from outer dictionaries).
+        result: The result to mutate.
+
+    Returns:
+        The resulting dictionary.
+    """
     if prefix is None:
         prefix = []
     if result is None:
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 81661e181b..7c430c4ecb 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, List, Optional, Set, Union, cast
+from typing import Any, Dict, List, Optional, Set, Union, cast
 
 import frozendict
 
@@ -37,6 +37,30 @@ from tests import unittest
 from tests.test_utils.event_injection import create_event, inject_member_event
 
 
+class FlattenDictTestCase(unittest.TestCase):
+    def test_simple(self) -> None:
+        """Test a dictionary that isn't modified."""
+        input = {"foo": "abc"}
+        self.assertEqual(input, _flatten_dict(input))
+
+    def test_nested(self) -> None:
+        """Nested dictionaries become dotted paths."""
+        input = {"foo": {"bar": "abc"}}
+        self.assertEqual({"foo.bar": "abc"}, _flatten_dict(input))
+
+    def test_non_string(self) -> None:
+        """Non-string items are dropped."""
+        input: Dict[str, Any] = {
+            "woo": "woo",
+            "foo": True,
+            "bar": 1,
+            "baz": None,
+            "fuzz": [],
+            "boo": {},
+        }
+        self.assertEqual({"woo": "woo"}, _flatten_dict(input))
+
+
 class PushRuleEvaluatorTestCase(unittest.TestCase):
     def _get_evaluator(
         self,
-- 
cgit 1.5.1


From b2d97bac0910c4730ea83fbee50abbdce2ba23be Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 3 Feb 2023 14:31:14 -0500
Subject: Implement MSC3958: suppress notifications from edits (#14960)

Co-authored-by: Brad Murray <brad@beeper.com>
Co-authored-by: Nick Barrett <nick@beeper.com>

Copy the suppress_edits push rule from Beeper to implement MSC3958.

https://github.com/beeper/synapse/blame/9415a1284b1bfb558bd66f28c24ca1611e6c6fa2/rust/src/push/base_rules.rs#L98-L114
---
 changelog.d/14960.feature                   |  1 +
 rust/benches/evaluator.rs                   |  1 +
 rust/src/push/base_rules.rs                 | 17 ++++++++++++
 rust/src/push/evaluator.rs                  |  2 +-
 rust/src/push/mod.rs                        |  8 ++++++
 stubs/synapse/synapse_rust/push.pyi         |  1 +
 synapse/config/experimental.py              |  5 ++++
 synapse/storage/databases/main/push_rule.py |  1 +
 tests/push/test_bulk_push_rule_evaluator.py | 42 ++++++++++++++++++++++++++++-
 9 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14960.feature

(limited to 'synapse')

diff --git a/changelog.d/14960.feature b/changelog.d/14960.feature
new file mode 100644
index 0000000000..b9bb331273
--- /dev/null
+++ b/changelog.d/14960.feature
@@ -0,0 +1 @@
+Experimental support to suppress notifications from message edits ([MSC3958](https://github.com/matrix-org/matrix-spec-proposals/pull/3958)).
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 859d54961c..35f7a50bce 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -170,6 +170,7 @@ fn bench_eval_message(b: &mut Bencher) {
         false,
         false,
         false,
+        false,
     );
 
     b.iter(|| eval.run(&rules, Some("bob"), Some("person")));
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 49add4e951..e9af26dd4f 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -63,6 +63,23 @@ pub const BASE_PREPEND_OVERRIDE_RULES: &[PushRule] = &[PushRule {
 }];
 
 pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
+    // We don't want to notify on edits. Not only can this be confusing in real
+    // time (2 notifications, one message) but it's especially confusing
+    // if a bridge needs to edit a previously backfilled message.
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.com.beeper.suppress_edits"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("content.m.relates_to.rel_type"),
+                pattern: Some(Cow::Borrowed("m.replace")),
+                pattern_type: None,
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::DontNotify]),
+        default: true,
+        default_enabled: true,
+    },
     PushRule {
         rule_id: Cow::Borrowed("global/override/.m.rule.suppress_notices"),
         priority_class: 5,
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index da6f704c0e..ec7a8c4453 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -523,7 +523,7 @@ fn test_requires_room_version_supports_condition() {
     };
     let rules = PushRules::new(vec![custom_rule]);
     result = evaluator.run(
-        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false),
+        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false, false),
         None,
         None,
     );
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 7e449f2433..3c4f876cab 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -419,6 +419,7 @@ pub struct FilteredPushRules {
     msc3381_polls_enabled: bool,
     msc3664_enabled: bool,
     msc3952_intentional_mentions: bool,
+    msc3958_suppress_edits_enabled: bool,
 }
 
 #[pymethods]
@@ -431,6 +432,7 @@ impl FilteredPushRules {
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
         msc3952_intentional_mentions: bool,
+        msc3958_suppress_edits_enabled: bool,
     ) -> Self {
         Self {
             push_rules,
@@ -439,6 +441,7 @@ impl FilteredPushRules {
             msc3381_polls_enabled,
             msc3664_enabled,
             msc3952_intentional_mentions,
+            msc3958_suppress_edits_enabled,
         }
     }
 
@@ -476,6 +479,11 @@ impl FilteredPushRules {
                 {
                     return false;
                 }
+                if !self.msc3958_suppress_edits_enabled
+                    && rule.rule_id == "global/override/.com.beeper.suppress_edits"
+                {
+                    return false;
+                }
 
                 true
             })
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index c0af2af3df..754acab2f9 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -47,6 +47,7 @@ class FilteredPushRules:
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
         msc3952_intentional_mentions: bool,
+        msc3958_suppress_edits_enabled: bool,
     ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index d2d0270ddd..53c0682dfd 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -173,3 +173,8 @@ class ExperimentalConfig(Config):
         self.msc3952_intentional_mentions = experimental.get(
             "msc3952_intentional_mentions", False
         )
+
+        # MSC3959: Do not generate notifications for edits.
+        self.msc3958_supress_edit_notifs = experimental.get(
+            "msc3958_supress_edit_notifs", False
+        )
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 466a1145b7..9b2bbe060d 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -90,6 +90,7 @@ def _load_rules(
         msc3664_enabled=experimental_config.msc3664_enabled,
         msc3381_polls_enabled=experimental_config.msc3381_polls_enabled,
         msc3952_intentional_mentions=experimental_config.msc3952_intentional_mentions,
+        msc3958_suppress_edits_enabled=experimental_config.msc3958_supress_edit_notifs,
     )
 
     return filtered_rules
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 3b2d082dcb..7567756135 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -19,7 +19,7 @@ from parameterized import parameterized
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.constants import EventContentFields
+from synapse.api.constants import EventContentFields, RelationTypes
 from synapse.api.room_versions import RoomVersions
 from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
 from synapse.rest import admin
@@ -370,3 +370,43 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 },
             )
         )
+
+    @override_config({"experimental_features": {"msc3958_supress_edit_notifs": True}})
+    def test_suppress_edits(self) -> None:
+        """Under the default push rules, event edits should not generate notifications."""
+        bulk_evaluator = BulkPushRuleEvaluator(self.hs)
+
+        # Create & persist an event to use as the parent of the relation.
+        event, context = self.get_success(
+            self.event_creation_handler.create_event(
+                self.requester,
+                {
+                    "type": "m.room.message",
+                    "room_id": self.room_id,
+                    "content": {
+                        "msgtype": "m.text",
+                        "body": "helo",
+                    },
+                    "sender": self.alice,
+                },
+            )
+        )
+        self.get_success(
+            self.event_creation_handler.handle_new_client_event(
+                self.requester, events_and_context=[(event, context)]
+            )
+        )
+
+        # Room mentions from those without power should not notify.
+        self.assertFalse(
+            self._create_and_process(
+                bulk_evaluator,
+                {
+                    "body": self.alice,
+                    "m.relates_to": {
+                        "rel_type": RelationTypes.REPLACE,
+                        "event_id": event.event_id,
+                    },
+                },
+            )
+        )
-- 
cgit 1.5.1


From 6e6edea6c15dc1a15f44d9e92d334e3ce0f827dd Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 3 Feb 2023 20:03:23 +0000
Subject: Properly typecheck tests.api (#14983)

---
 changelog.d/14983.misc         |   1 +
 mypy.ini                       |   4 +-
 synapse/api/filtering.py       |   4 +-
 tests/api/test_auth.py         |  64 +++++++++--------
 tests/api/test_filtering.py    | 157 +++++++++++++++++++++++------------------
 tests/api/test_ratelimiting.py |  18 ++---
 tests/events/test_utils.py     |   2 +
 7 files changed, 140 insertions(+), 110 deletions(-)
 create mode 100644 changelog.d/14983.misc

(limited to 'synapse')

diff --git a/changelog.d/14983.misc b/changelog.d/14983.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/14983.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 57f43395bb..a6e37bc377 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -32,7 +32,6 @@ exclude = (?x)
    |synapse/storage/databases/main/cache.py
    |synapse/storage/schema/
 
-   |tests/api/test_auth.py
    |tests/appservice/test_scheduler.py
    |tests/federation/test_federation_catch_up.py
    |tests/federation/test_federation_sender.py
@@ -73,6 +72,9 @@ disallow_untyped_defs = False
 [mypy-tests.*]
 disallow_untyped_defs = False
 
+[mypy-tests.api.*]
+disallow_untyped_defs = True
+
 [mypy-tests.app.*]
 disallow_untyped_defs = True
 
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 2b5af264b4..83c42fc25a 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -252,9 +252,9 @@ class FilterCollection:
         return self._room_timeline_filter.unread_thread_notifications
 
     async def filter_presence(
-        self, events: Iterable[UserPresenceState]
+        self, presence_states: Iterable[UserPresenceState]
     ) -> List[UserPresenceState]:
-        return await self._presence_filter.filter(events)
+        return await self._presence_filter.filter(presence_states)
 
     async def filter_account_data(self, events: Iterable[JsonDict]) -> List[JsonDict]:
         return await self._account_data.filter(events)
diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py
index e0f363555b..6e36e73f0d 100644
--- a/tests/api/test_auth.py
+++ b/tests/api/test_auth.py
@@ -31,7 +31,7 @@ from synapse.api.errors import (
 from synapse.appservice import ApplicationService
 from synapse.server import HomeServer
 from synapse.storage.databases.main.registration import TokenLookupResult
-from synapse.types import Requester
+from synapse.types import Requester, UserID
 from synapse.util import Clock
 
 from tests import unittest
@@ -41,10 +41,12 @@ from tests.utils import mock_getRawHeaders
 
 
 class AuthTestCase(unittest.HomeserverTestCase):
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = Mock()
 
-        hs.datastores.main = self.store
+        # type-ignore: datastores is None until hs.setup() is called---but it'll
+        # have been called by the HomeserverTestCase machinery.
+        hs.datastores.main = self.store  # type: ignore[union-attr]
         hs.get_auth_handler().store = self.store
         self.auth = Auth(hs)
 
@@ -61,7 +63,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.store.insert_client_ip = simple_async_mock(None)
         self.store.is_support_user = simple_async_mock(False)
 
-    def test_get_user_by_req_user_valid_token(self):
+    def test_get_user_by_req_user_valid_token(self) -> None:
         user_info = TokenLookupResult(
             user_id=self.test_user, token_id=5, device_id="device"
         )
@@ -74,7 +76,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         requester = self.get_success(self.auth.get_user_by_req(request))
         self.assertEqual(requester.user.to_string(), self.test_user)
 
-    def test_get_user_by_req_user_bad_token(self):
+    def test_get_user_by_req_user_bad_token(self) -> None:
         self.store.get_user_by_access_token = simple_async_mock(None)
 
         request = Mock(args={})
@@ -86,7 +88,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(f.code, 401)
         self.assertEqual(f.errcode, "M_UNKNOWN_TOKEN")
 
-    def test_get_user_by_req_user_missing_token(self):
+    def test_get_user_by_req_user_missing_token(self) -> None:
         user_info = TokenLookupResult(user_id=self.test_user, token_id=5)
         self.store.get_user_by_access_token = simple_async_mock(user_info)
 
@@ -98,7 +100,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(f.code, 401)
         self.assertEqual(f.errcode, "M_MISSING_TOKEN")
 
-    def test_get_user_by_req_appservice_valid_token(self):
+    def test_get_user_by_req_appservice_valid_token(self) -> None:
         app_service = Mock(
             token="foobar", url="a_url", sender=self.test_user, ip_range_whitelist=None
         )
@@ -112,7 +114,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         requester = self.get_success(self.auth.get_user_by_req(request))
         self.assertEqual(requester.user.to_string(), self.test_user)
 
-    def test_get_user_by_req_appservice_valid_token_good_ip(self):
+    def test_get_user_by_req_appservice_valid_token_good_ip(self) -> None:
         from netaddr import IPSet
 
         app_service = Mock(
@@ -131,7 +133,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         requester = self.get_success(self.auth.get_user_by_req(request))
         self.assertEqual(requester.user.to_string(), self.test_user)
 
-    def test_get_user_by_req_appservice_valid_token_bad_ip(self):
+    def test_get_user_by_req_appservice_valid_token_bad_ip(self) -> None:
         from netaddr import IPSet
 
         app_service = Mock(
@@ -153,7 +155,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(f.code, 401)
         self.assertEqual(f.errcode, "M_UNKNOWN_TOKEN")
 
-    def test_get_user_by_req_appservice_bad_token(self):
+    def test_get_user_by_req_appservice_bad_token(self) -> None:
         self.store.get_app_service_by_token = Mock(return_value=None)
         self.store.get_user_by_access_token = simple_async_mock(None)
 
@@ -166,7 +168,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(f.code, 401)
         self.assertEqual(f.errcode, "M_UNKNOWN_TOKEN")
 
-    def test_get_user_by_req_appservice_missing_token(self):
+    def test_get_user_by_req_appservice_missing_token(self) -> None:
         app_service = Mock(token="foobar", url="a_url", sender=self.test_user)
         self.store.get_app_service_by_token = Mock(return_value=app_service)
         self.store.get_user_by_access_token = simple_async_mock(None)
@@ -179,7 +181,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(f.code, 401)
         self.assertEqual(f.errcode, "M_MISSING_TOKEN")
 
-    def test_get_user_by_req_appservice_valid_token_valid_user_id(self):
+    def test_get_user_by_req_appservice_valid_token_valid_user_id(self) -> None:
         masquerading_user_id = b"@doppelganger:matrix.org"
         app_service = Mock(
             token="foobar", url="a_url", sender=self.test_user, ip_range_whitelist=None
@@ -200,7 +202,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
             requester.user.to_string(), masquerading_user_id.decode("utf8")
         )
 
-    def test_get_user_by_req_appservice_valid_token_bad_user_id(self):
+    def test_get_user_by_req_appservice_valid_token_bad_user_id(self) -> None:
         masquerading_user_id = b"@doppelganger:matrix.org"
         app_service = Mock(
             token="foobar", url="a_url", sender=self.test_user, ip_range_whitelist=None
@@ -217,7 +219,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.get_failure(self.auth.get_user_by_req(request), AuthError)
 
     @override_config({"experimental_features": {"msc3202_device_masquerading": True}})
-    def test_get_user_by_req_appservice_valid_token_valid_device_id(self):
+    def test_get_user_by_req_appservice_valid_token_valid_device_id(self) -> None:
         """
         Tests that when an application service passes the device_id URL parameter
         with the ID of a valid device for the user in question,
@@ -249,7 +251,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(requester.device_id, masquerading_device_id.decode("utf8"))
 
     @override_config({"experimental_features": {"msc3202_device_masquerading": True}})
-    def test_get_user_by_req_appservice_valid_token_invalid_device_id(self):
+    def test_get_user_by_req_appservice_valid_token_invalid_device_id(self) -> None:
         """
         Tests that when an application service passes the device_id URL parameter
         with an ID that is not a valid device ID for the user in question,
@@ -279,7 +281,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(failure.value.code, 400)
         self.assertEqual(failure.value.errcode, Codes.EXCLUSIVE)
 
-    def test_get_user_by_req__puppeted_token__not_tracking_puppeted_mau(self):
+    def test_get_user_by_req__puppeted_token__not_tracking_puppeted_mau(self) -> None:
         self.store.get_user_by_access_token = simple_async_mock(
             TokenLookupResult(
                 user_id="@baldrick:matrix.org",
@@ -298,7 +300,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.get_success(self.auth.get_user_by_req(request))
         self.store.insert_client_ip.assert_called_once()
 
-    def test_get_user_by_req__puppeted_token__tracking_puppeted_mau(self):
+    def test_get_user_by_req__puppeted_token__tracking_puppeted_mau(self) -> None:
         self.auth._track_puppeted_user_ips = True
         self.store.get_user_by_access_token = simple_async_mock(
             TokenLookupResult(
@@ -318,7 +320,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.get_success(self.auth.get_user_by_req(request))
         self.assertEqual(self.store.insert_client_ip.call_count, 2)
 
-    def test_get_user_from_macaroon(self):
+    def test_get_user_from_macaroon(self) -> None:
         self.store.get_user_by_access_token = simple_async_mock(None)
 
         user_id = "@baldrick:matrix.org"
@@ -336,7 +338,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
             self.auth.get_user_by_access_token(serialized), InvalidClientTokenError
         )
 
-    def test_get_guest_user_from_macaroon(self):
+    def test_get_guest_user_from_macaroon(self) -> None:
         self.store.get_user_by_id = simple_async_mock({"is_guest": True})
         self.store.get_user_by_access_token = simple_async_mock(None)
 
@@ -357,7 +359,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertTrue(user_info.is_guest)
         self.store.get_user_by_id.assert_called_with(user_id)
 
-    def test_blocking_mau(self):
+    def test_blocking_mau(self) -> None:
         self.auth_blocking._limit_usage_by_mau = False
         self.auth_blocking._max_mau_value = 50
         lots_of_users = 100
@@ -381,7 +383,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.store.get_monthly_active_count = simple_async_mock(small_number_of_users)
         self.get_success(self.auth_blocking.check_auth_blocking())
 
-    def test_blocking_mau__depending_on_user_type(self):
+    def test_blocking_mau__depending_on_user_type(self) -> None:
         self.auth_blocking._max_mau_value = 50
         self.auth_blocking._limit_usage_by_mau = True
 
@@ -400,7 +402,9 @@ class AuthTestCase(unittest.HomeserverTestCase):
         # Real users not allowed
         self.get_failure(self.auth_blocking.check_auth_blocking(), ResourceLimitError)
 
-    def test_blocking_mau__appservice_requester_allowed_when_not_tracking_ips(self):
+    def test_blocking_mau__appservice_requester_allowed_when_not_tracking_ips(
+        self,
+    ) -> None:
         self.auth_blocking._max_mau_value = 50
         self.auth_blocking._limit_usage_by_mau = True
         self.auth_blocking._track_appservice_user_ips = False
@@ -418,7 +422,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
             sender="@appservice:sender",
         )
         requester = Requester(
-            user="@appservice:server",
+            user=UserID.from_string("@appservice:server"),
             access_token_id=None,
             device_id="FOOBAR",
             is_guest=False,
@@ -428,7 +432,9 @@ class AuthTestCase(unittest.HomeserverTestCase):
         )
         self.get_success(self.auth_blocking.check_auth_blocking(requester=requester))
 
-    def test_blocking_mau__appservice_requester_disallowed_when_tracking_ips(self):
+    def test_blocking_mau__appservice_requester_disallowed_when_tracking_ips(
+        self,
+    ) -> None:
         self.auth_blocking._max_mau_value = 50
         self.auth_blocking._limit_usage_by_mau = True
         self.auth_blocking._track_appservice_user_ips = True
@@ -446,7 +452,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
             sender="@appservice:sender",
         )
         requester = Requester(
-            user="@appservice:server",
+            user=UserID.from_string("@appservice:server"),
             access_token_id=None,
             device_id="FOOBAR",
             is_guest=False,
@@ -459,7 +465,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
             ResourceLimitError,
         )
 
-    def test_reserved_threepid(self):
+    def test_reserved_threepid(self) -> None:
         self.auth_blocking._limit_usage_by_mau = True
         self.auth_blocking._max_mau_value = 1
         self.store.get_monthly_active_count = simple_async_mock(2)
@@ -476,7 +482,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
 
         self.get_success(self.auth_blocking.check_auth_blocking(threepid=threepid))
 
-    def test_hs_disabled(self):
+    def test_hs_disabled(self) -> None:
         self.auth_blocking._hs_disabled = True
         self.auth_blocking._hs_disabled_message = "Reason for being disabled"
         e = self.get_failure(
@@ -486,7 +492,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(e.value.errcode, Codes.RESOURCE_LIMIT_EXCEEDED)
         self.assertEqual(e.value.code, 403)
 
-    def test_hs_disabled_no_server_notices_user(self):
+    def test_hs_disabled_no_server_notices_user(self) -> None:
         """Check that 'hs_disabled_message' works correctly when there is no
         server_notices user.
         """
@@ -503,7 +509,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         self.assertEqual(e.value.errcode, Codes.RESOURCE_LIMIT_EXCEEDED)
         self.assertEqual(e.value.code, 403)
 
-    def test_server_notices_mxid_special_cased(self):
+    def test_server_notices_mxid_special_cased(self) -> None:
         self.auth_blocking._hs_disabled = True
         user = "@user:server"
         self.auth_blocking._server_notices_mxid = user
diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py
index d5524d296e..0f45615160 100644
--- a/tests/api/test_filtering.py
+++ b/tests/api/test_filtering.py
@@ -14,40 +14,36 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+from typing import List
 from unittest.mock import patch
 
 import jsonschema
 from frozendict import frozendict
 
+from twisted.test.proto_helpers import MemoryReactor
+
 from synapse.api.constants import EduTypes, EventContentFields
 from synapse.api.errors import SynapseError
 from synapse.api.filtering import Filter
-from synapse.events import make_event_from_dict
+from synapse.api.presence import UserPresenceState
+from synapse.server import HomeServer
+from synapse.types import JsonDict
+from synapse.util import Clock
 
 from tests import unittest
+from tests.events.test_utils import MockEvent
 
 user_localpart = "test_user"
 
 
-def MockEvent(**kwargs):
-    if "event_id" not in kwargs:
-        kwargs["event_id"] = "fake_event_id"
-    if "type" not in kwargs:
-        kwargs["type"] = "fake_type"
-    if "content" not in kwargs:
-        kwargs["content"] = {}
-    return make_event_from_dict(kwargs)
-
-
 class FilteringTestCase(unittest.HomeserverTestCase):
-    def prepare(self, reactor, clock, hs):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.filtering = hs.get_filtering()
         self.datastore = hs.get_datastores().main
 
-    def test_errors_on_invalid_filters(self):
+    def test_errors_on_invalid_filters(self) -> None:
         # See USER_FILTER_SCHEMA for the filter schema.
-        invalid_filters = [
+        invalid_filters: List[JsonDict] = [
             # `account_data` must be a dictionary
             {"account_data": "Hello World"},
             # `event_fields` entries must not contain backslashes
@@ -63,10 +59,10 @@ class FilteringTestCase(unittest.HomeserverTestCase):
             with self.assertRaises(SynapseError):
                 self.filtering.check_valid_filter(filter)
 
-    def test_ignores_unknown_filter_fields(self):
+    def test_ignores_unknown_filter_fields(self) -> None:
         # For forward compatibility, we must ignore unknown filter fields.
         # See USER_FILTER_SCHEMA for the filter schema.
-        filters = [
+        filters: List[JsonDict] = [
             {"org.matrix.msc9999.future_option": True},
             {"presence": {"org.matrix.msc9999.future_option": True}},
             {"room": {"org.matrix.msc9999.future_option": True}},
@@ -76,8 +72,8 @@ class FilteringTestCase(unittest.HomeserverTestCase):
             self.filtering.check_valid_filter(filter)
             # Must not raise.
 
-    def test_valid_filters(self):
-        valid_filters = [
+    def test_valid_filters(self) -> None:
+        valid_filters: List[JsonDict] = [
             {
                 "room": {
                     "timeline": {"limit": 20},
@@ -132,22 +128,22 @@ class FilteringTestCase(unittest.HomeserverTestCase):
             except jsonschema.ValidationError as e:
                 self.fail(e)
 
-    def test_limits_are_applied(self):
+    def test_limits_are_applied(self) -> None:
         # TODO
         pass
 
-    def test_definition_types_works_with_literals(self):
+    def test_definition_types_works_with_literals(self) -> None:
         definition = {"types": ["m.room.message", "org.matrix.foo.bar"]}
         event = MockEvent(sender="@foo:bar", type="m.room.message", room_id="!foo:bar")
 
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_definition_types_works_with_wildcards(self):
+    def test_definition_types_works_with_wildcards(self) -> None:
         definition = {"types": ["m.*", "org.matrix.foo.bar"]}
         event = MockEvent(sender="@foo:bar", type="m.room.message", room_id="!foo:bar")
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_definition_types_works_with_unknowns(self):
+    def test_definition_types_works_with_unknowns(self) -> None:
         definition = {"types": ["m.room.message", "org.matrix.foo.bar"]}
         event = MockEvent(
             sender="@foo:bar",
@@ -156,24 +152,24 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_types_works_with_literals(self):
+    def test_definition_not_types_works_with_literals(self) -> None:
         definition = {"not_types": ["m.room.message", "org.matrix.foo.bar"]}
         event = MockEvent(sender="@foo:bar", type="m.room.message", room_id="!foo:bar")
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_types_works_with_wildcards(self):
+    def test_definition_not_types_works_with_wildcards(self) -> None:
         definition = {"not_types": ["m.room.message", "org.matrix.*"]}
         event = MockEvent(
             sender="@foo:bar", type="org.matrix.custom.event", room_id="!foo:bar"
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_types_works_with_unknowns(self):
+    def test_definition_not_types_works_with_unknowns(self) -> None:
         definition = {"not_types": ["m.*", "org.*"]}
         event = MockEvent(sender="@foo:bar", type="com.nom.nom.nom", room_id="!foo:bar")
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_types_takes_priority_over_types(self):
+    def test_definition_not_types_takes_priority_over_types(self) -> None:
         definition = {
             "not_types": ["m.*", "org.*"],
             "types": ["m.room.message", "m.room.topic"],
@@ -181,35 +177,35 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         event = MockEvent(sender="@foo:bar", type="m.room.topic", room_id="!foo:bar")
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_senders_works_with_literals(self):
+    def test_definition_senders_works_with_literals(self) -> None:
         definition = {"senders": ["@flibble:wibble"]}
         event = MockEvent(
             sender="@flibble:wibble", type="com.nom.nom.nom", room_id="!foo:bar"
         )
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_definition_senders_works_with_unknowns(self):
+    def test_definition_senders_works_with_unknowns(self) -> None:
         definition = {"senders": ["@flibble:wibble"]}
         event = MockEvent(
             sender="@challenger:appears", type="com.nom.nom.nom", room_id="!foo:bar"
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_senders_works_with_literals(self):
+    def test_definition_not_senders_works_with_literals(self) -> None:
         definition = {"not_senders": ["@flibble:wibble"]}
         event = MockEvent(
             sender="@flibble:wibble", type="com.nom.nom.nom", room_id="!foo:bar"
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_senders_works_with_unknowns(self):
+    def test_definition_not_senders_works_with_unknowns(self) -> None:
         definition = {"not_senders": ["@flibble:wibble"]}
         event = MockEvent(
             sender="@challenger:appears", type="com.nom.nom.nom", room_id="!foo:bar"
         )
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_senders_takes_priority_over_senders(self):
+    def test_definition_not_senders_takes_priority_over_senders(self) -> None:
         definition = {
             "not_senders": ["@misspiggy:muppets"],
             "senders": ["@kermit:muppets", "@misspiggy:muppets"],
@@ -219,14 +215,14 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_rooms_works_with_literals(self):
+    def test_definition_rooms_works_with_literals(self) -> None:
         definition = {"rooms": ["!secretbase:unknown"]}
         event = MockEvent(
             sender="@foo:bar", type="m.room.message", room_id="!secretbase:unknown"
         )
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_definition_rooms_works_with_unknowns(self):
+    def test_definition_rooms_works_with_unknowns(self) -> None:
         definition = {"rooms": ["!secretbase:unknown"]}
         event = MockEvent(
             sender="@foo:bar",
@@ -235,7 +231,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_rooms_works_with_literals(self):
+    def test_definition_not_rooms_works_with_literals(self) -> None:
         definition = {"not_rooms": ["!anothersecretbase:unknown"]}
         event = MockEvent(
             sender="@foo:bar",
@@ -244,7 +240,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_rooms_works_with_unknowns(self):
+    def test_definition_not_rooms_works_with_unknowns(self) -> None:
         definition = {"not_rooms": ["!secretbase:unknown"]}
         event = MockEvent(
             sender="@foo:bar",
@@ -253,7 +249,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_definition_not_rooms_takes_priority_over_rooms(self):
+    def test_definition_not_rooms_takes_priority_over_rooms(self) -> None:
         definition = {
             "not_rooms": ["!secretbase:unknown"],
             "rooms": ["!secretbase:unknown"],
@@ -263,7 +259,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_combined_event(self):
+    def test_definition_combined_event(self) -> None:
         definition = {
             "not_senders": ["@misspiggy:muppets"],
             "senders": ["@kermit:muppets"],
@@ -279,7 +275,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_definition_combined_event_bad_sender(self):
+    def test_definition_combined_event_bad_sender(self) -> None:
         definition = {
             "not_senders": ["@misspiggy:muppets"],
             "senders": ["@kermit:muppets"],
@@ -295,7 +291,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_combined_event_bad_room(self):
+    def test_definition_combined_event_bad_room(self) -> None:
         definition = {
             "not_senders": ["@misspiggy:muppets"],
             "senders": ["@kermit:muppets"],
@@ -311,7 +307,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_definition_combined_event_bad_type(self):
+    def test_definition_combined_event_bad_type(self) -> None:
         definition = {
             "not_senders": ["@misspiggy:muppets"],
             "senders": ["@kermit:muppets"],
@@ -327,7 +323,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-    def test_filter_labels(self):
+    def test_filter_labels(self) -> None:
         definition = {"org.matrix.labels": ["#fun"]}
         event = MockEvent(
             sender="@foo:bar",
@@ -356,7 +352,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_filter_not_labels(self):
+    def test_filter_not_labels(self) -> None:
         definition = {"org.matrix.not_labels": ["#fun"]}
         event = MockEvent(
             sender="@foo:bar",
@@ -377,7 +373,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
     @unittest.override_config({"experimental_features": {"msc3874_enabled": True}})
-    def test_filter_rel_type(self):
+    def test_filter_rel_type(self) -> None:
         definition = {"org.matrix.msc3874.rel_types": ["m.thread"]}
         event = MockEvent(
             sender="@foo:bar",
@@ -407,7 +403,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
     @unittest.override_config({"experimental_features": {"msc3874_enabled": True}})
-    def test_filter_not_rel_type(self):
+    def test_filter_not_rel_type(self) -> None:
         definition = {"org.matrix.msc3874.not_rel_types": ["m.thread"]}
         event = MockEvent(
             sender="@foo:bar",
@@ -436,15 +432,25 @@ class FilteringTestCase(unittest.HomeserverTestCase):
 
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
-    def test_filter_presence_match(self):
-        user_filter_json = {"presence": {"types": ["m.*"]}}
+    def test_filter_presence_match(self) -> None:
+        """Check that filter_presence return events which matches the filter."""
+        user_filter_json = {"presence": {"senders": ["@foo:bar"]}}
         filter_id = self.get_success(
             self.datastore.add_user_filter(
                 user_localpart=user_localpart, user_filter=user_filter_json
             )
         )
-        event = MockEvent(sender="@foo:bar", type="m.profile")
-        events = [event]
+        presence_states = [
+            UserPresenceState(
+                user_id="@foo:bar",
+                state="unavailable",
+                last_active_ts=0,
+                last_federation_update_ts=0,
+                last_user_sync_ts=0,
+                status_msg=None,
+                currently_active=False,
+            ),
+        ]
 
         user_filter = self.get_success(
             self.filtering.get_user_filter(
@@ -452,23 +458,29 @@ class FilteringTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        results = self.get_success(user_filter.filter_presence(events=events))
-        self.assertEqual(events, results)
+        results = self.get_success(user_filter.filter_presence(presence_states))
+        self.assertEqual(presence_states, results)
 
-    def test_filter_presence_no_match(self):
-        user_filter_json = {"presence": {"types": ["m.*"]}}
+    def test_filter_presence_no_match(self) -> None:
+        """Check that filter_presence does not return events rejected by the filter."""
+        user_filter_json = {"presence": {"not_senders": ["@foo:bar"]}}
 
         filter_id = self.get_success(
             self.datastore.add_user_filter(
                 user_localpart=user_localpart + "2", user_filter=user_filter_json
             )
         )
-        event = MockEvent(
-            event_id="$asdasd:localhost",
-            sender="@foo:bar",
-            type="custom.avatar.3d.crazy",
-        )
-        events = [event]
+        presence_states = [
+            UserPresenceState(
+                user_id="@foo:bar",
+                state="unavailable",
+                last_active_ts=0,
+                last_federation_update_ts=0,
+                last_user_sync_ts=0,
+                status_msg=None,
+                currently_active=False,
+            ),
+        ]
 
         user_filter = self.get_success(
             self.filtering.get_user_filter(
@@ -476,10 +488,10 @@ class FilteringTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        results = self.get_success(user_filter.filter_presence(events=events))
+        results = self.get_success(user_filter.filter_presence(presence_states))
         self.assertEqual([], results)
 
-    def test_filter_room_state_match(self):
+    def test_filter_room_state_match(self) -> None:
         user_filter_json = {"room": {"state": {"types": ["m.*"]}}}
         filter_id = self.get_success(
             self.datastore.add_user_filter(
@@ -498,7 +510,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         results = self.get_success(user_filter.filter_room_state(events=events))
         self.assertEqual(events, results)
 
-    def test_filter_room_state_no_match(self):
+    def test_filter_room_state_no_match(self) -> None:
         user_filter_json = {"room": {"state": {"types": ["m.*"]}}}
         filter_id = self.get_success(
             self.datastore.add_user_filter(
@@ -519,7 +531,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         results = self.get_success(user_filter.filter_room_state(events))
         self.assertEqual([], results)
 
-    def test_filter_rooms(self):
+    def test_filter_rooms(self) -> None:
         definition = {
             "rooms": ["!allowed:example.com", "!excluded:example.com"],
             "not_rooms": ["!excluded:example.com"],
@@ -535,7 +547,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(filtered_room_ids, ["!allowed:example.com"])
 
-    def test_filter_relations(self):
+    def test_filter_relations(self) -> None:
         events = [
             # An event without a relation.
             MockEvent(
@@ -551,9 +563,8 @@ class FilteringTestCase(unittest.HomeserverTestCase):
                 type="org.matrix.custom.event",
                 room_id="!foo:bar",
             ),
-            # Non-EventBase objects get passed through.
-            {},
         ]
+        jsondicts: List[JsonDict] = [{}]
 
         # For the following tests we patch the datastore method (intead of injecting
         # events). This is a bit cheeky, but tests the logic of _check_event_relations.
@@ -561,7 +572,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         # Filter for a particular sender.
         definition = {"related_by_senders": ["@foo:bar"]}
 
-        async def events_have_relations(*args, **kwargs):
+        async def events_have_relations(*args: object, **kwargs: object) -> List[str]:
             return ["$with_relation"]
 
         with patch.object(
@@ -572,9 +583,17 @@ class FilteringTestCase(unittest.HomeserverTestCase):
                     Filter(self.hs, definition)._check_event_relations(events)
                 )
             )
+            # Non-EventBase objects get passed through.
+            filtered_jsondicts = list(
+                self.get_success(
+                    Filter(self.hs, definition)._check_event_relations(jsondicts)
+                )
+            )
+
         self.assertEqual(filtered_events, events[1:])
+        self.assertEqual(filtered_jsondicts, [{}])
 
-    def test_add_filter(self):
+    def test_add_filter(self) -> None:
         user_filter_json = {"room": {"state": {"types": ["m.*"]}}}
 
         filter_id = self.get_success(
@@ -595,7 +614,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
             ),
         )
 
-    def test_get_filter(self):
+    def test_get_filter(self) -> None:
         user_filter_json = {"room": {"state": {"types": ["m.*"]}}}
 
         filter_id = self.get_success(
diff --git a/tests/api/test_ratelimiting.py b/tests/api/test_ratelimiting.py
index b5fd08d437..fa6c1c02ce 100644
--- a/tests/api/test_ratelimiting.py
+++ b/tests/api/test_ratelimiting.py
@@ -6,7 +6,7 @@ from tests import unittest
 
 
 class TestRatelimiter(unittest.HomeserverTestCase):
-    def test_allowed_via_can_do_action(self):
+    def test_allowed_via_can_do_action(self) -> None:
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
@@ -31,7 +31,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         self.assertTrue(allowed)
         self.assertEqual(20.0, time_allowed)
 
-    def test_allowed_appservice_ratelimited_via_can_requester_do_action(self):
+    def test_allowed_appservice_ratelimited_via_can_requester_do_action(self) -> None:
         appservice = ApplicationService(
             token="fake_token",
             id="foo",
@@ -64,7 +64,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         self.assertTrue(allowed)
         self.assertEqual(20.0, time_allowed)
 
-    def test_allowed_appservice_via_can_requester_do_action(self):
+    def test_allowed_appservice_via_can_requester_do_action(self) -> None:
         appservice = ApplicationService(
             token="fake_token",
             id="foo",
@@ -97,7 +97,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         self.assertTrue(allowed)
         self.assertEqual(-1, time_allowed)
 
-    def test_allowed_via_ratelimit(self):
+    def test_allowed_via_ratelimit(self) -> None:
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
@@ -120,7 +120,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
             limiter.ratelimit(None, key="test_id", _time_now_s=10)
         )
 
-    def test_allowed_via_can_do_action_and_overriding_parameters(self):
+    def test_allowed_via_can_do_action_and_overriding_parameters(self) -> None:
         """Test that we can override options of can_do_action that would otherwise fail
         an action
         """
@@ -169,7 +169,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         self.assertTrue(allowed)
         self.assertEqual(1.0, time_allowed)
 
-    def test_allowed_via_ratelimit_and_overriding_parameters(self):
+    def test_allowed_via_ratelimit_and_overriding_parameters(self) -> None:
         """Test that we can override options of the ratelimit method that would otherwise
         fail an action
         """
@@ -204,7 +204,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
             limiter.ratelimit(None, key=("test_id",), _time_now_s=1, burst_count=10)
         )
 
-    def test_pruning(self):
+    def test_pruning(self) -> None:
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
@@ -223,7 +223,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
 
         self.assertNotIn("test_id_1", limiter.actions)
 
-    def test_db_user_override(self):
+    def test_db_user_override(self) -> None:
         """Test that users that have ratelimiting disabled in the DB aren't
         ratelimited.
         """
@@ -250,7 +250,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         for _ in range(20):
             self.get_success_or_raise(limiter.ratelimit(requester, _time_now_s=0))
 
-    def test_multiple_actions(self):
+    def test_multiple_actions(self) -> None:
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index ff7b349d75..4174a237ec 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -35,6 +35,8 @@ def MockEvent(**kwargs: Any) -> EventBase:
         kwargs["event_id"] = "fake_event_id"
     if "type" not in kwargs:
         kwargs["type"] = "fake_type"
+    if "content" not in kwargs:
+        kwargs["content"] = {}
     return make_event_from_dict(kwargs)
 
 
-- 
cgit 1.5.1


From b3bf58a8a5f56674cb0ea0ab6c29aba5775dec52 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 6 Feb 2023 11:29:51 +0000
Subject: Only notify the target of a membership event (#14971)

* Only notify the target of a membership event

Naughty, but should be a big speedup in large rooms
---
 changelog.d/14971.misc                   |  1 +
 synapse/push/bulk_push_rule_evaluator.py | 38 +++++++++++++++++++++++++-------
 2 files changed, 31 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/14971.misc

(limited to 'synapse')

diff --git a/changelog.d/14971.misc b/changelog.d/14971.misc
new file mode 100644
index 0000000000..130045a123
--- /dev/null
+++ b/changelog.d/14971.misc
@@ -0,0 +1 @@
+Improve performance of joining and leaving large rooms with many local users.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 20369f3dfe..f73dceb128 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -142,15 +142,34 @@ class BulkPushRuleEvaluator:
         Returns:
             Mapping of user ID to their push rules.
         """
-        # We get the users who may need to be notified by first fetching the
-        # local users currently in the room, finding those that have push rules,
-        # and *then* checking which users are actually allowed to see the event.
-        #
-        # The alternative is to first fetch all users that were joined at the
-        # event, but that requires fetching the full state at the event, which
-        # may be expensive for large rooms with few local users.
+        # If this is a membership event, only calculate push rules for the target.
+        # While it's possible for users to configure push rules to respond to such an
+        # event, in practise nobody does this. At the cost of violating the spec a
+        # little, we can skip fetching a huge number of push rules in large rooms.
+        # This helps make joins and leaves faster.
+        if event.type == EventTypes.Member:
+            local_users = []
+            # We never notify a user about their own actions. This is enforced in
+            # `_action_for_event_by_user` in the loop over `rules_by_user`, but we
+            # do the same check here to avoid unnecessary DB queries.
+            if event.sender != event.state_key and self.hs.is_mine_id(event.state_key):
+                # Check the target is in the room, to avoid notifying them of
+                # e.g. a pre-emptive ban.
+                target_already_in_room = await self.store.check_local_user_in_room(
+                    event.state_key, event.room_id
+                )
+                if target_already_in_room:
+                    local_users = [event.state_key]
+        else:
+            # We get the users who may need to be notified by first fetching the
+            # local users currently in the room, finding those that have push rules,
+            # and *then* checking which users are actually allowed to see the event.
+            #
+            # The alternative is to first fetch all users that were joined at the
+            # event, but that requires fetching the full state at the event, which
+            # may be expensive for large rooms with few local users.
 
-        local_users = await self.store.get_local_users_in_room(event.room_id)
+            local_users = await self.store.get_local_users_in_room(event.room_id)
 
         # Filter out appservice users.
         local_users = [
@@ -167,6 +186,9 @@ class BulkPushRuleEvaluator:
                 local_users = list(local_users)
                 local_users.append(invited)
 
+        if not local_users:
+            return {}
+
         rules_by_user = await self.store.bulk_get_push_rules(local_users)
 
         logger.debug("Users in room: %s", local_users)
-- 
cgit 1.5.1


From e8269ed391a199bbe0e43efc28c68c98b949b323 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 6 Feb 2023 12:49:06 +0000
Subject: Type hints for tests.appservice (#14990)

* Accept a Sequence of events in synapse.appservice

This avoids some casts/ignores in the tests I'm about to fixup. It seems
that `List[Mock]` is not a subtype of `List[EventBase]`, but
`Sequence[Mock]` is a subtype of `Sequence[EventBase]`. So presumably
`Mock` is considered a subtype of anything, much like `Any`.

* make tests.appservice.test_scheduler pass mypy

* Extra hints in tests.appservice.test_scheduler

* Extra hints in tests.appservice.test_api

* Extra hints in tests.appservice.test_appservice

* Disallow untyped defs

* Changelog
---
 changelog.d/14990.misc                       |  1 +
 mypy.ini                                     |  4 +-
 synapse/appservice/__init__.py               |  4 +-
 synapse/appservice/api.py                    | 14 ++++-
 synapse/appservice/scheduler.py              |  3 +-
 synapse/storage/databases/main/appservice.py | 14 ++++-
 tests/appservice/test_api.py                 |  4 +-
 tests/appservice/test_appservice.py          | 55 ++++++++++++-----
 tests/appservice/test_scheduler.py           | 92 ++++++++++++++++++----------
 9 files changed, 132 insertions(+), 59 deletions(-)
 create mode 100644 changelog.d/14990.misc

(limited to 'synapse')

diff --git a/changelog.d/14990.misc b/changelog.d/14990.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/14990.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index a6e37bc377..351b8ccade 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -32,7 +32,6 @@ exclude = (?x)
    |synapse/storage/databases/main/cache.py
    |synapse/storage/schema/
 
-   |tests/appservice/test_scheduler.py
    |tests/federation/test_federation_catch_up.py
    |tests/federation/test_federation_sender.py
    |tests/http/federation/test_matrix_federation_agent.py
@@ -78,6 +77,9 @@ disallow_untyped_defs = True
 [mypy-tests.app.*]
 disallow_untyped_defs = True
 
+[mypy-tests.appservice.*]
+disallow_untyped_defs = True
+
 [mypy-tests.config.*]
 disallow_untyped_defs = True
 
diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py
index 65615f50b8..35c330a3c4 100644
--- a/synapse/appservice/__init__.py
+++ b/synapse/appservice/__init__.py
@@ -16,7 +16,7 @@
 import logging
 import re
 from enum import Enum
-from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Pattern
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Pattern, Sequence
 
 import attr
 from netaddr import IPSet
@@ -377,7 +377,7 @@ class AppServiceTransaction:
         self,
         service: ApplicationService,
         id: int,
-        events: List[EventBase],
+        events: Sequence[EventBase],
         ephemeral: List[JsonDict],
         to_device_messages: List[JsonDict],
         one_time_keys_count: TransactionOneTimeKeysCount,
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index edafd433cd..1a6f69e7d3 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -14,7 +14,17 @@
 # limitations under the License.
 import logging
 import urllib.parse
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Tuple
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+)
 
 from prometheus_client import Counter
 from typing_extensions import TypeGuard
@@ -259,7 +269,7 @@ class ApplicationServiceApi(SimpleHttpClient):
     async def push_bulk(
         self,
         service: "ApplicationService",
-        events: List[EventBase],
+        events: Sequence[EventBase],
         ephemeral: List[JsonDict],
         to_device_messages: List[JsonDict],
         one_time_keys_count: TransactionOneTimeKeysCount,
diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index 7b562795a3..3a319b0d42 100644
--- a/synapse/appservice/scheduler.py
+++ b/synapse/appservice/scheduler.py
@@ -57,6 +57,7 @@ from typing import (
     Iterable,
     List,
     Optional,
+    Sequence,
     Set,
     Tuple,
 )
@@ -364,7 +365,7 @@ class _TransactionController:
     async def send(
         self,
         service: ApplicationService,
-        events: List[EventBase],
+        events: Sequence[EventBase],
         ephemeral: Optional[List[JsonDict]] = None,
         to_device_messages: Optional[List[JsonDict]] = None,
         one_time_keys_count: Optional[TransactionOneTimeKeysCount] = None,
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index c2c8018ee2..5fb152c4ff 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -14,7 +14,17 @@
 # limitations under the License.
 import logging
 import re
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Pattern, Tuple, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Optional,
+    Pattern,
+    Sequence,
+    Tuple,
+    cast,
+)
 
 from synapse.appservice import (
     ApplicationService,
@@ -257,7 +267,7 @@ class ApplicationServiceTransactionWorkerStore(
     async def create_appservice_txn(
         self,
         service: ApplicationService,
-        events: List[EventBase],
+        events: Sequence[EventBase],
         ephemeral: List[JsonDict],
         to_device_messages: List[JsonDict],
         one_time_keys_count: TransactionOneTimeKeysCount,
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 89ee79396f..9d183b733e 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -29,7 +29,7 @@ URL = "http://mytestservice"
 
 
 class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.api = hs.get_application_service_api()
         self.service = ApplicationService(
             id="unique_identifier",
@@ -39,7 +39,7 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             hs_token=TOKEN,
         )
 
-    def test_query_3pe_authenticates_token(self):
+    def test_query_3pe_authenticates_token(self) -> None:
         """
         Tests that 3pe queries to the appservice are authenticated
         with the appservice's token.
diff --git a/tests/appservice/test_appservice.py b/tests/appservice/test_appservice.py
index d4dccfc2f0..dee976356f 100644
--- a/tests/appservice/test_appservice.py
+++ b/tests/appservice/test_appservice.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import re
+from typing import Generator
 from unittest.mock import Mock
 
 from twisted.internet import defer
@@ -27,7 +28,7 @@ def _regex(regex: str, exclusive: bool = True) -> Namespace:
 
 
 class ApplicationServiceTestCase(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.service = ApplicationService(
             id="unique_identifier",
             sender="@as:test",
@@ -46,7 +47,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         self.store.get_local_users_in_room = simple_async_mock([])
 
     @defer.inlineCallbacks
-    def test_regex_user_id_prefix_match(self):
+    def test_regex_user_id_prefix_match(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         self.event.sender = "@irc_foobar:matrix.org"
         self.assertTrue(
@@ -60,7 +63,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_regex_user_id_prefix_no_match(self):
+    def test_regex_user_id_prefix_no_match(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         self.event.sender = "@someone_else:matrix.org"
         self.assertFalse(
@@ -74,7 +79,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_regex_room_member_is_checked(self):
+    def test_regex_room_member_is_checked(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         self.event.sender = "@someone_else:matrix.org"
         self.event.type = "m.room.member"
@@ -90,7 +97,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_regex_room_id_match(self):
+    def test_regex_room_id_match(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_ROOMS].append(
             _regex("!some_prefix.*some_suffix:matrix.org")
         )
@@ -106,7 +115,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_regex_room_id_no_match(self):
+    def test_regex_room_id_no_match(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_ROOMS].append(
             _regex("!some_prefix.*some_suffix:matrix.org")
         )
@@ -122,7 +133,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_regex_alias_match(self):
+    def test_regex_alias_match(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_ALIASES].append(
             _regex("#irc_.*:matrix.org")
         )
@@ -140,44 +153,46 @@ class ApplicationServiceTestCase(unittest.TestCase):
             )
         )
 
-    def test_non_exclusive_alias(self):
+    def test_non_exclusive_alias(self) -> None:
         self.service.namespaces[ApplicationService.NS_ALIASES].append(
             _regex("#irc_.*:matrix.org", exclusive=False)
         )
         self.assertFalse(self.service.is_exclusive_alias("#irc_foobar:matrix.org"))
 
-    def test_non_exclusive_room(self):
+    def test_non_exclusive_room(self) -> None:
         self.service.namespaces[ApplicationService.NS_ROOMS].append(
             _regex("!irc_.*:matrix.org", exclusive=False)
         )
         self.assertFalse(self.service.is_exclusive_room("!irc_foobar:matrix.org"))
 
-    def test_non_exclusive_user(self):
+    def test_non_exclusive_user(self) -> None:
         self.service.namespaces[ApplicationService.NS_USERS].append(
             _regex("@irc_.*:matrix.org", exclusive=False)
         )
         self.assertFalse(self.service.is_exclusive_user("@irc_foobar:matrix.org"))
 
-    def test_exclusive_alias(self):
+    def test_exclusive_alias(self) -> None:
         self.service.namespaces[ApplicationService.NS_ALIASES].append(
             _regex("#irc_.*:matrix.org", exclusive=True)
         )
         self.assertTrue(self.service.is_exclusive_alias("#irc_foobar:matrix.org"))
 
-    def test_exclusive_user(self):
+    def test_exclusive_user(self) -> None:
         self.service.namespaces[ApplicationService.NS_USERS].append(
             _regex("@irc_.*:matrix.org", exclusive=True)
         )
         self.assertTrue(self.service.is_exclusive_user("@irc_foobar:matrix.org"))
 
-    def test_exclusive_room(self):
+    def test_exclusive_room(self) -> None:
         self.service.namespaces[ApplicationService.NS_ROOMS].append(
             _regex("!irc_.*:matrix.org", exclusive=True)
         )
         self.assertTrue(self.service.is_exclusive_room("!irc_foobar:matrix.org"))
 
     @defer.inlineCallbacks
-    def test_regex_alias_no_match(self):
+    def test_regex_alias_no_match(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_ALIASES].append(
             _regex("#irc_.*:matrix.org")
         )
@@ -196,7 +211,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_regex_multiple_matches(self):
+    def test_regex_multiple_matches(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_ALIASES].append(
             _regex("#irc_.*:matrix.org")
         )
@@ -215,7 +232,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_interested_in_self(self):
+    def test_interested_in_self(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         # make sure invites get through
         self.service.sender = "@appservice:name"
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
@@ -233,7 +252,9 @@ class ApplicationServiceTestCase(unittest.TestCase):
         )
 
     @defer.inlineCallbacks
-    def test_member_list_match(self):
+    def test_member_list_match(
+        self,
+    ) -> Generator["defer.Deferred[object]", object, None]:
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         # Note that @irc_fo:here is the AS user.
         self.store.get_local_users_in_room = simple_async_mock(
diff --git a/tests/appservice/test_scheduler.py b/tests/appservice/test_scheduler.py
index 0a1ae83a2b..febcc1499d 100644
--- a/tests/appservice/test_scheduler.py
+++ b/tests/appservice/test_scheduler.py
@@ -11,20 +11,28 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, cast
 from unittest.mock import Mock
 
+from typing_extensions import TypeAlias
+
 from twisted.internet import defer
 
-from synapse.appservice import ApplicationServiceState
+from synapse.appservice import (
+    ApplicationService,
+    ApplicationServiceState,
+    TransactionOneTimeKeysCount,
+    TransactionUnusedFallbackKeys,
+)
 from synapse.appservice.scheduler import (
     ApplicationServiceScheduler,
     _Recoverer,
     _TransactionController,
 )
+from synapse.events import EventBase
 from synapse.logging.context import make_deferred_yieldable
 from synapse.server import HomeServer
-from synapse.types import DeviceListUpdates
+from synapse.types import DeviceListUpdates, JsonDict
 from synapse.util import Clock
 
 from tests import unittest
@@ -37,18 +45,18 @@ if TYPE_CHECKING:
 
 
 class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.clock = MockClock()
         self.store = Mock()
         self.as_api = Mock()
         self.recoverer = Mock()
         self.recoverer_fn = Mock(return_value=self.recoverer)
         self.txnctrl = _TransactionController(
-            clock=self.clock, store=self.store, as_api=self.as_api
+            clock=cast(Clock, self.clock), store=self.store, as_api=self.as_api
         )
         self.txnctrl.RECOVERER_CLASS = self.recoverer_fn
 
-    def test_single_service_up_txn_sent(self):
+    def test_single_service_up_txn_sent(self) -> None:
         # Test: The AS is up and the txn is successfully sent.
         service = Mock()
         events = [Mock(), Mock()]
@@ -76,7 +84,7 @@ class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase):
         self.assertEqual(0, len(self.txnctrl.recoverers))  # no recoverer made
         txn.complete.assert_called_once_with(self.store)  # txn completed
 
-    def test_single_service_down(self):
+    def test_single_service_down(self) -> None:
         # Test: The AS is down so it shouldn't push; Recoverers will do it.
         # It should still make a transaction though.
         service = Mock()
@@ -103,7 +111,7 @@ class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase):
         self.assertEqual(0, txn.send.call_count)  # txn not sent though
         self.assertEqual(0, txn.complete.call_count)  # or completed
 
-    def test_single_service_up_txn_not_sent(self):
+    def test_single_service_up_txn_not_sent(self) -> None:
         # Test: The AS is up and the txn is not sent. A Recoverer is made and
         # started.
         service = Mock()
@@ -139,26 +147,28 @@ class ApplicationServiceSchedulerTransactionCtrlTestCase(unittest.TestCase):
 
 
 class ApplicationServiceSchedulerRecovererTestCase(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.clock = MockClock()
         self.as_api = Mock()
         self.store = Mock()
         self.service = Mock()
         self.callback = simple_async_mock()
         self.recoverer = _Recoverer(
-            clock=self.clock,
+            clock=cast(Clock, self.clock),
             as_api=self.as_api,
             store=self.store,
             service=self.service,
             callback=self.callback,
         )
 
-    def test_recover_single_txn(self):
+    def test_recover_single_txn(self) -> None:
         txn = Mock()
         # return one txn to send, then no more old txns
         txns = [txn, None]
 
-        def take_txn(*args, **kwargs):
+        def take_txn(
+            *args: object, **kwargs: object
+        ) -> "defer.Deferred[Optional[Mock]]":
             return defer.succeed(txns.pop(0))
 
         self.store.get_oldest_unsent_txn = Mock(side_effect=take_txn)
@@ -177,12 +187,14 @@ class ApplicationServiceSchedulerRecovererTestCase(unittest.TestCase):
         self.callback.assert_called_once_with(self.recoverer)
         self.assertEqual(self.recoverer.service, self.service)
 
-    def test_recover_retry_txn(self):
+    def test_recover_retry_txn(self) -> None:
         txn = Mock()
         txns = [txn, None]
         pop_txn = False
 
-        def take_txn(*args, **kwargs):
+        def take_txn(
+            *args: object, **kwargs: object
+        ) -> "defer.Deferred[Optional[Mock]]":
             if pop_txn:
                 return defer.succeed(txns.pop(0))
             else:
@@ -214,8 +226,24 @@ class ApplicationServiceSchedulerRecovererTestCase(unittest.TestCase):
         self.callback.assert_called_once_with(self.recoverer)
 
 
+# Corresponds to synapse.appservice.scheduler._TransactionController.send
+TxnCtrlArgs: TypeAlias = """
+defer.Deferred[
+    Tuple[
+        ApplicationService,
+        Sequence[EventBase],
+        Optional[List[JsonDict]],
+        Optional[List[JsonDict]],
+        Optional[TransactionOneTimeKeysCount],
+        Optional[TransactionUnusedFallbackKeys],
+        Optional[DeviceListUpdates],
+    ]
+]
+"""
+
+
 class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
-    def prepare(self, reactor: "MemoryReactor", clock: Clock, hs: HomeServer):
+    def prepare(self, reactor: "MemoryReactor", clock: Clock, hs: HomeServer) -> None:
         self.scheduler = ApplicationServiceScheduler(hs)
         self.txn_ctrl = Mock()
         self.txn_ctrl.send = simple_async_mock()
@@ -224,7 +252,7 @@ class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
         self.scheduler.txn_ctrl = self.txn_ctrl
         self.scheduler.queuer.txn_ctrl = self.txn_ctrl
 
-    def test_send_single_event_no_queue(self):
+    def test_send_single_event_no_queue(self) -> None:
         # Expect the event to be sent immediately.
         service = Mock(id=4)
         event = Mock()
@@ -233,8 +261,8 @@ class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
             service, [event], [], [], None, None, DeviceListUpdates()
         )
 
-    def test_send_single_event_with_queue(self):
-        d = defer.Deferred()
+    def test_send_single_event_with_queue(self) -> None:
+        d: TxnCtrlArgs = defer.Deferred()
         self.txn_ctrl.send = Mock(return_value=make_deferred_yieldable(d))
         service = Mock(id=4)
         event = Mock(event_id="first")
@@ -257,22 +285,22 @@ class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(2, self.txn_ctrl.send.call_count)
 
-    def test_multiple_service_queues(self):
+    def test_multiple_service_queues(self) -> None:
         # Tests that each service has its own queue, and that they don't block
         # on each other.
         srv1 = Mock(id=4)
-        srv_1_defer = defer.Deferred()
+        srv_1_defer: "defer.Deferred[EventBase]" = defer.Deferred()
         srv_1_event = Mock(event_id="srv1a")
         srv_1_event2 = Mock(event_id="srv1b")
 
         srv2 = Mock(id=6)
-        srv_2_defer = defer.Deferred()
+        srv_2_defer: "defer.Deferred[EventBase]" = defer.Deferred()
         srv_2_event = Mock(event_id="srv2a")
         srv_2_event2 = Mock(event_id="srv2b")
 
         send_return_list = [srv_1_defer, srv_2_defer]
 
-        def do_send(*args, **kwargs):
+        def do_send(*args: object, **kwargs: object) -> "defer.Deferred[EventBase]":
             return make_deferred_yieldable(send_return_list.pop(0))
 
         self.txn_ctrl.send = Mock(side_effect=do_send)
@@ -297,12 +325,12 @@ class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(3, self.txn_ctrl.send.call_count)
 
-    def test_send_large_txns(self):
-        srv_1_defer = defer.Deferred()
-        srv_2_defer = defer.Deferred()
+    def test_send_large_txns(self) -> None:
+        srv_1_defer: "defer.Deferred[EventBase]" = defer.Deferred()
+        srv_2_defer: "defer.Deferred[EventBase]" = defer.Deferred()
         send_return_list = [srv_1_defer, srv_2_defer]
 
-        def do_send(*args, **kwargs):
+        def do_send(*args: object, **kwargs: object) -> "defer.Deferred[EventBase]":
             return make_deferred_yieldable(send_return_list.pop(0))
 
         self.txn_ctrl.send = Mock(side_effect=do_send)
@@ -328,7 +356,7 @@ class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(3, self.txn_ctrl.send.call_count)
 
-    def test_send_single_ephemeral_no_queue(self):
+    def test_send_single_ephemeral_no_queue(self) -> None:
         # Expect the event to be sent immediately.
         service = Mock(id=4, name="service")
         event_list = [Mock(name="event")]
@@ -337,7 +365,7 @@ class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
             service, [], event_list, [], None, None, DeviceListUpdates()
         )
 
-    def test_send_multiple_ephemeral_no_queue(self):
+    def test_send_multiple_ephemeral_no_queue(self) -> None:
         # Expect the event to be sent immediately.
         service = Mock(id=4, name="service")
         event_list = [Mock(name="event1"), Mock(name="event2"), Mock(name="event3")]
@@ -346,8 +374,8 @@ class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
             service, [], event_list, [], None, None, DeviceListUpdates()
         )
 
-    def test_send_single_ephemeral_with_queue(self):
-        d = defer.Deferred()
+    def test_send_single_ephemeral_with_queue(self) -> None:
+        d: TxnCtrlArgs = defer.Deferred()
         self.txn_ctrl.send = Mock(return_value=make_deferred_yieldable(d))
         service = Mock(id=4)
         event_list_1 = [Mock(event_id="event1"), Mock(event_id="event2")]
@@ -377,8 +405,8 @@ class ApplicationServiceSchedulerQueuerTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(2, self.txn_ctrl.send.call_count)
 
-    def test_send_large_txns_ephemeral(self):
-        d = defer.Deferred()
+    def test_send_large_txns_ephemeral(self) -> None:
+        d: TxnCtrlArgs = defer.Deferred()
         self.txn_ctrl.send = Mock(return_value=make_deferred_yieldable(d))
         # Expect the event to be sent immediately.
         service = Mock(id=4, name="service")
-- 
cgit 1.5.1


From 5fdc12f482c68e2cdbb78d7db5de2cfe621720d4 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Tue, 7 Feb 2023 01:10:54 +0100
Subject: Add `event_stream_ordering` column to membership state tables
 (#14979)

This adds an `event_stream_ordering` column to `current_state_events`,
`local_current_membership` and `room_memberships`. Each of these tables
is regularly joined with the `events` table to get the stream ordering
and denormalising this into each table will yield significant query
performance improvements once used. Includes a background job to
populate these values from the `events` table.

Same idea as https://github.com/matrix-org/synapse/pull/13703.

Signed off by Nick @ Beeper (@fizzadar).
---
 changelog.d/14979.misc                             |   1 +
 synapse/storage/databases/main/events.py           |  23 +++--
 .../storage/databases/main/events_bg_updates.py    | 104 ++++++++++++++++++++-
 synapse/storage/databases/main/events_worker.py    |   8 +-
 .../26membership_tables_event_stream_ordering.sql  |  21 +++++
 5 files changed, 146 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/14979.misc
 create mode 100644 synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql

(limited to 'synapse')

diff --git a/changelog.d/14979.misc b/changelog.d/14979.misc
new file mode 100644
index 0000000000..c09911e48d
--- /dev/null
+++ b/changelog.d/14979.misc
@@ -0,0 +1 @@
+Add denormalised event stream ordering column to membership state tables for future use. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 1536937b67..b6cce0a7cc 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1147,11 +1147,15 @@ class PersistEventsStore:
                 # been inserted into room_memberships.
                 txn.execute_batch(
                     """INSERT INTO current_state_events
-                        (room_id, type, state_key, event_id, membership)
-                    VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
+                        (room_id, type, state_key, event_id, membership, event_stream_ordering)
+                    VALUES (
+                        ?, ?, ?, ?,
+                        (SELECT membership FROM room_memberships WHERE event_id = ?),
+                        (SELECT stream_ordering FROM events WHERE event_id = ?)
+                    )
                     """,
                     [
-                        (room_id, key[0], key[1], ev_id, ev_id)
+                        (room_id, key[0], key[1], ev_id, ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                     ],
                 )
@@ -1178,11 +1182,15 @@ class PersistEventsStore:
             if to_insert:
                 txn.execute_batch(
                     """INSERT INTO local_current_membership
-                        (room_id, user_id, event_id, membership)
-                    VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
+                        (room_id, user_id, event_id, membership, event_stream_ordering)
+                    VALUES (
+                        ?, ?, ?,
+                        (SELECT membership FROM room_memberships WHERE event_id = ?),
+                        (SELECT stream_ordering FROM events WHERE event_id = ?)
+                    )
                     """,
                     [
-                        (room_id, key[1], ev_id, ev_id)
+                        (room_id, key[1], ev_id, ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                         if key[0] == EventTypes.Member and self.is_mine_id(key[1])
                     ],
@@ -1790,6 +1798,7 @@ class PersistEventsStore:
             table="room_memberships",
             keys=(
                 "event_id",
+                "event_stream_ordering",
                 "user_id",
                 "sender",
                 "room_id",
@@ -1800,6 +1809,7 @@ class PersistEventsStore:
             values=[
                 (
                     event.event_id,
+                    event.internal_metadata.stream_ordering,
                     event.state_key,
                     event.user_id,
                     event.room_id,
@@ -1832,6 +1842,7 @@ class PersistEventsStore:
                     keyvalues={"room_id": event.room_id, "user_id": event.state_key},
                     values={
                         "event_id": event.event_id,
+                        "event_stream_ordering": event.internal_metadata.stream_ordering,
                         "membership": event.membership,
                     },
                 )
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index b9d3c36d60..0e81d38cca 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, Tuple, ca
 
 import attr
 
-from synapse.api.constants import EventContentFields, RelationTypes
+from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import make_event_from_dict
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
@@ -71,6 +71,10 @@ class _BackgroundUpdates:
 
     EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index"
 
+    POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING = (
+        "populate_membership_event_stream_ordering"
+    )
+
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _CalculateChainCover:
@@ -99,6 +103,10 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
     ):
         super().__init__(database, db_conn, hs)
 
+        self.db_pool.updates.register_background_update_handler(
+            _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING,
+            self._populate_membership_event_stream_ordering,
+        )
         self.db_pool.updates.register_background_update_handler(
             _BackgroundUpdates.EVENT_ORIGIN_SERVER_TS_NAME,
             self._background_reindex_origin_server_ts,
@@ -1498,3 +1506,97 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             )
 
         return batch_size
+
+    async def _populate_membership_event_stream_ordering(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        def _populate_membership_event_stream_ordering(
+            txn: LoggingTransaction,
+        ) -> bool:
+
+            if "max_stream_ordering" in progress:
+                max_stream_ordering = progress["max_stream_ordering"]
+            else:
+                txn.execute("SELECT max(stream_ordering) FROM events")
+                res = txn.fetchone()
+                if res is None or res[0] is None:
+                    return True
+                else:
+                    max_stream_ordering = res[0]
+
+            start = progress.get("stream_ordering", 0)
+            stop = start + batch_size
+
+            sql = f"""
+                SELECT room_id, event_id, stream_ordering
+                FROM events
+                WHERE
+                    type = '{EventTypes.Member}'
+                    AND stream_ordering >= ?
+                    AND stream_ordering < ?
+            """
+            txn.execute(sql, (start, stop))
+
+            rows: List[Tuple[str, str, int]] = cast(
+                List[Tuple[str, str, int]], txn.fetchall()
+            )
+
+            event_ids: List[Tuple[str]] = []
+            event_stream_orderings: List[Tuple[int]] = []
+
+            for _, event_id, event_stream_ordering in rows:
+                event_ids.append((event_id,))
+                event_stream_orderings.append((event_stream_ordering,))
+
+            self.db_pool.simple_update_many_txn(
+                txn,
+                table="current_state_events",
+                key_names=("event_id",),
+                key_values=event_ids,
+                value_names=("event_stream_ordering",),
+                value_values=event_stream_orderings,
+            )
+
+            self.db_pool.simple_update_many_txn(
+                txn,
+                table="room_memberships",
+                key_names=("event_id",),
+                key_values=event_ids,
+                value_names=("event_stream_ordering",),
+                value_values=event_stream_orderings,
+            )
+
+            # NOTE: local_current_membership has no index on event_id, so only
+            # the room ID here will reduce the query rows read.
+            for room_id, event_id, event_stream_ordering in rows:
+                txn.execute(
+                    """
+                        UPDATE local_current_membership
+                        SET event_stream_ordering = ?
+                        WHERE room_id = ? AND event_id = ?
+                    """,
+                    (event_stream_ordering, room_id, event_id),
+                )
+
+            self.db_pool.updates._background_update_progress_txn(
+                txn,
+                _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING,
+                {
+                    "stream_ordering": stop,
+                    "max_stream_ordering": max_stream_ordering,
+                },
+            )
+
+            return stop > max_stream_ordering
+
+        finished = await self.db_pool.runInteraction(
+            "_populate_membership_event_stream_ordering",
+            _populate_membership_event_stream_ordering,
+        )
+
+        if finished:
+            await self.db_pool.updates._end_background_update(
+                _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING
+            )
+
+        return batch_size
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index d7d08369ca..6d0ef10258 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1779,7 +1779,7 @@ class EventsWorkerStore(SQLBaseStore):
             txn: LoggingTransaction,
         ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]:
             sql = (
-                "SELECT event_stream_ordering, e.event_id, e.room_id, e.type,"
+                "SELECT out.event_stream_ordering, e.event_id, e.room_id, e.type,"
                 " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL,"
                 " e.outlier"
                 " FROM events AS e"
@@ -1791,10 +1791,10 @@ class EventsWorkerStore(SQLBaseStore):
                 " LEFT JOIN event_relations USING (event_id)"
                 " LEFT JOIN room_memberships USING (event_id)"
                 " LEFT JOIN rejections USING (event_id)"
-                " WHERE ? < event_stream_ordering"
-                " AND event_stream_ordering <= ?"
+                " WHERE ? < out.event_stream_ordering"
+                " AND out.event_stream_ordering <= ?"
                 " AND out.instance_name = ?"
-                " ORDER BY event_stream_ordering ASC"
+                " ORDER BY out.event_stream_ordering ASC"
             )
 
             txn.execute(sql, (last_id, current_id, instance_name))
diff --git a/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql b/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql
new file mode 100644
index 0000000000..7c30a67fc4
--- /dev/null
+++ b/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql
@@ -0,0 +1,21 @@
+/* Copyright 2022 Beeper
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE current_state_events ADD COLUMN event_stream_ordering BIGINT;
+ALTER TABLE local_current_membership ADD COLUMN event_stream_ordering BIGINT;
+ALTER TABLE room_memberships ADD COLUMN event_stream_ordering BIGINT;
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('populate_membership_event_stream_ordering', '{}');
-- 
cgit 1.5.1


From d0fed7a37b8b6ce166cae856fe243757aa7c7294 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 7 Feb 2023 00:20:04 +0000
Subject: Properly typecheck types.http (#14988)

* Tweak http types in Synapse

AFACIS these are correct, and they make mypy happier on tests.http.

* Type hints for test_proxyagent

* type hints for test_srv_resolver

* test_matrix_federation_agent

* tests.http.server._base

* tests.http.__init__

* tests.http.test_additional_resource

* tests.http.test_client

* tests.http.test_endpoint

* tests.http.test_matrixfederationclient

* tests.http.test_servlet

* tests.http.test_simple_client

* tests.http.test_site

* One fixup in tests.server

* Untyped defs

* Changelog

* Fixup syntax for Python 3.7

* Fix olddeps syntax

* Use a twisted IPv4 addr for dummy_address

* Fix typo, thanks Sean

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

* Remove redundant `Optional`

---------

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/14988.misc                             |   1 +
 mypy.ini                                           |   6 +-
 synapse/http/client.py                             |   5 +-
 synapse/http/proxyagent.py                         |   3 +-
 tests/http/__init__.py                             |  19 ++-
 .../federation/test_matrix_federation_agent.py     | 142 +++++++++++++--------
 tests/http/federation/test_srv_resolver.py         |  60 +++++----
 tests/http/server/_base.py                         |   2 +-
 tests/http/test_additional_resource.py             |  18 ++-
 tests/http/test_client.py                          |  37 ++++--
 tests/http/test_endpoint.py                        |   4 +-
 tests/http/test_matrixfederationclient.py          |  53 ++++----
 tests/http/test_proxyagent.py                      | 103 +++++++++------
 tests/http/test_servlet.py                         |   8 +-
 tests/http/test_simple_client.py                   |  14 +-
 tests/http/test_site.py                            |   8 +-
 tests/server.py                                    |   6 +-
 17 files changed, 298 insertions(+), 191 deletions(-)
 create mode 100644 changelog.d/14988.misc

(limited to 'synapse')

diff --git a/changelog.d/14988.misc b/changelog.d/14988.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/14988.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 93de1c97ea..11e683b704 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -32,9 +32,6 @@ exclude = (?x)
    |synapse/storage/databases/main/cache.py
    |synapse/storage/schema/
 
-   |tests/http/federation/test_matrix_federation_agent.py
-   |tests/http/federation/test_srv_resolver.py
-   |tests/http/test_proxyagent.py
    |tests/module_api/test_api.py
    |tests/rest/media/v1/test_media_storage.py
    |tests/server.py
@@ -92,6 +89,9 @@ disallow_untyped_defs = True
 [mypy-tests.handlers.*]
 disallow_untyped_defs = True
 
+[mypy-tests.http.*]
+disallow_untyped_defs = True
+
 [mypy-tests.logging.*]
 disallow_untyped_defs = True
 
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 4eb740c040..a05f297933 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -44,6 +44,7 @@ from twisted.internet.interfaces import (
     IAddress,
     IDelayedCall,
     IHostResolution,
+    IReactorCore,
     IReactorPluggableNameResolver,
     IReactorTime,
     IResolutionReceiver,
@@ -226,7 +227,9 @@ class _IPBlacklistingResolver:
         return recv
 
 
-@implementer(ISynapseReactor)
+# ISynapseReactor implies IReactorCore, but explicitly marking it this as an implementer
+# of IReactorCore seems to keep mypy-zope happier.
+@implementer(IReactorCore, ISynapseReactor)
 class BlacklistingReactorWrapper:
     """
     A Reactor wrapper which will prevent DNS resolution to blacklisted IP
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index 18899bc6d1..94ef737b9e 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -38,7 +38,6 @@ from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS, IResponse
 
 from synapse.http import redact_uri
 from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials
-from synapse.types import ISynapseReactor
 
 logger = logging.getLogger(__name__)
 
@@ -84,7 +83,7 @@ class ProxyAgent(_AgentBase):
     def __init__(
         self,
         reactor: IReactorCore,
-        proxy_reactor: Optional[ISynapseReactor] = None,
+        proxy_reactor: Optional[IReactorCore] = None,
         contextFactory: Optional[IPolicyForHTTPS] = None,
         connectTimeout: Optional[float] = None,
         bindAddress: Optional[bytes] = None,
diff --git a/tests/http/__init__.py b/tests/http/__init__.py
index 093537adef..528cdee34b 100644
--- a/tests/http/__init__.py
+++ b/tests/http/__init__.py
@@ -19,13 +19,15 @@ from zope.interface import implementer
 
 from OpenSSL import SSL
 from OpenSSL.SSL import Connection
+from twisted.internet.address import IPv4Address
 from twisted.internet.interfaces import IOpenSSLServerConnectionCreator
 from twisted.internet.ssl import Certificate, trustRootFromCertificates
+from twisted.protocols.tls import TLSMemoryBIOProtocol
 from twisted.web.client import BrowserLikePolicyForHTTPS  # noqa: F401
 from twisted.web.iweb import IPolicyForHTTPS  # noqa: F401
 
 
-def get_test_https_policy():
+def get_test_https_policy() -> BrowserLikePolicyForHTTPS:
     """Get a test IPolicyForHTTPS which trusts the test CA cert
 
     Returns:
@@ -39,7 +41,7 @@ def get_test_https_policy():
     return BrowserLikePolicyForHTTPS(trustRoot=trust_root)
 
 
-def get_test_ca_cert_file():
+def get_test_ca_cert_file() -> str:
     """Get the path to the test CA cert
 
     The keypair is generated with:
@@ -51,7 +53,7 @@ def get_test_ca_cert_file():
     return os.path.join(os.path.dirname(__file__), "ca.crt")
 
 
-def get_test_key_file():
+def get_test_key_file() -> str:
     """get the path to the test key
 
     The key file is made with:
@@ -137,15 +139,20 @@ class TestServerTLSConnectionFactory:
     """An SSL connection creator which returns connections which present a certificate
     signed by our test CA."""
 
-    def __init__(self, sanlist):
+    def __init__(self, sanlist: List[bytes]):
         """
         Args:
-            sanlist: list[bytes]: a list of subjectAltName values for the cert
+            sanlist: a list of subjectAltName values for the cert
         """
         self._cert_file = create_test_cert_file(sanlist)
 
-    def serverConnectionForTLS(self, tlsProtocol):
+    def serverConnectionForTLS(self, tlsProtocol: TLSMemoryBIOProtocol) -> Connection:
         ctx = SSL.Context(SSL.SSLv23_METHOD)
         ctx.use_certificate_file(self._cert_file)
         ctx.use_privatekey_file(get_test_key_file())
         return Connection(ctx, None)
+
+
+# A dummy address, useful for tests that use FakeTransport and don't care about where
+# packets are going to/coming from.
+dummy_address = IPv4Address("TCP", "127.0.0.1", 80)
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index 992d8f94fd..acfdcd3bca 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -14,7 +14,7 @@
 import base64
 import logging
 import os
-from typing import Iterable, Optional
+from typing import Any, Awaitable, Callable, Generator, List, Optional, cast
 from unittest.mock import Mock, patch
 
 import treq
@@ -24,14 +24,19 @@ from zope.interface import implementer
 
 from twisted.internet import defer
 from twisted.internet._sslverify import ClientTLSOptions, OpenSSLCertificateOptions
-from twisted.internet.interfaces import IProtocolFactory
+from twisted.internet.defer import Deferred
+from twisted.internet.endpoints import _WrappingProtocol
+from twisted.internet.interfaces import (
+    IOpenSSLClientConnectionCreator,
+    IProtocolFactory,
+)
 from twisted.internet.protocol import Factory
 from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol
 from twisted.web._newclient import ResponseNeverReceived
 from twisted.web.client import Agent
 from twisted.web.http import HTTPChannel, Request
 from twisted.web.http_headers import Headers
-from twisted.web.iweb import IPolicyForHTTPS
+from twisted.web.iweb import IPolicyForHTTPS, IResponse
 
 from synapse.config.homeserver import HomeServerConfig
 from synapse.crypto.context_factory import FederationPolicyForHTTPS
@@ -42,11 +47,21 @@ from synapse.http.federation.well_known_resolver import (
     WellKnownResolver,
     _cache_period_from_headers,
 )
-from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context
+from synapse.logging.context import (
+    SENTINEL_CONTEXT,
+    LoggingContext,
+    LoggingContextOrSentinel,
+    current_context,
+)
+from synapse.types import ISynapseReactor
 from synapse.util.caches.ttlcache import TTLCache
 
 from tests import unittest
-from tests.http import TestServerTLSConnectionFactory, get_test_ca_cert_file
+from tests.http import (
+    TestServerTLSConnectionFactory,
+    dummy_address,
+    get_test_ca_cert_file,
+)
 from tests.server import FakeTransport, ThreadedMemoryReactorClock
 from tests.utils import default_config
 
@@ -54,15 +69,17 @@ logger = logging.getLogger(__name__)
 
 
 # Once Async Mocks or lambdas are supported this can go away.
-def generate_resolve_service(result):
-    async def resolve_service(_):
+def generate_resolve_service(
+    result: List[Server],
+) -> Callable[[Any], Awaitable[List[Server]]]:
+    async def resolve_service(_: Any) -> List[Server]:
         return result
 
     return resolve_service
 
 
 class MatrixFederationAgentTests(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor = ThreadedMemoryReactorClock()
 
         self.mock_resolver = Mock()
@@ -75,8 +92,12 @@ class MatrixFederationAgentTests(unittest.TestCase):
 
         self.tls_factory = FederationPolicyForHTTPS(config)
 
-        self.well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds)
-        self.had_well_known_cache = TTLCache("test_cache", timer=self.reactor.seconds)
+        self.well_known_cache: TTLCache[bytes, Optional[bytes]] = TTLCache(
+            "test_cache", timer=self.reactor.seconds
+        )
+        self.had_well_known_cache: TTLCache[bytes, bool] = TTLCache(
+            "test_cache", timer=self.reactor.seconds
+        )
         self.well_known_resolver = WellKnownResolver(
             self.reactor,
             Agent(self.reactor, contextFactory=self.tls_factory),
@@ -89,8 +110,8 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self,
         client_factory: IProtocolFactory,
         ssl: bool = True,
-        expected_sni: bytes = None,
-        tls_sanlist: Optional[Iterable[bytes]] = None,
+        expected_sni: Optional[bytes] = None,
+        tls_sanlist: Optional[List[bytes]] = None,
     ) -> HTTPChannel:
         """Builds a test server, and completes the outgoing client connection
         Args:
@@ -116,8 +137,8 @@ class MatrixFederationAgentTests(unittest.TestCase):
         if ssl:
             server_factory = _wrap_server_factory_for_tls(server_factory, tls_sanlist)
 
-        server_protocol = server_factory.buildProtocol(None)
-
+        server_protocol = server_factory.buildProtocol(dummy_address)
+        assert server_protocol is not None
         # now, tell the client protocol factory to build the client protocol (it will be a
         # _WrappingProtocol, around a TLSMemoryBIOProtocol, around an
         # HTTP11ClientProtocol) and wire the output of said protocol up to the server via
@@ -125,7 +146,8 @@ class MatrixFederationAgentTests(unittest.TestCase):
         #
         # Normally this would be done by the TCP socket code in Twisted, but we are
         # stubbing that out here.
-        client_protocol = client_factory.buildProtocol(None)
+        client_protocol = client_factory.buildProtocol(dummy_address)
+        assert isinstance(client_protocol, _WrappingProtocol)
         client_protocol.makeConnection(
             FakeTransport(server_protocol, self.reactor, client_protocol)
         )
@@ -136,6 +158,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         )
 
         if ssl:
+            assert isinstance(server_protocol, TLSMemoryBIOProtocol)
             # fish the test server back out of the server-side TLS protocol.
             http_protocol = server_protocol.wrappedProtocol
             # grab a hold of the TLS connection, in case it gets torn down
@@ -144,6 +167,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
             http_protocol = server_protocol
             tls_connection = None
 
+        assert isinstance(http_protocol, HTTPChannel)
         # give the reactor a pump to get the TLS juices flowing (if needed)
         self.reactor.advance(0)
 
@@ -159,12 +183,14 @@ class MatrixFederationAgentTests(unittest.TestCase):
         return http_protocol
 
     @defer.inlineCallbacks
-    def _make_get_request(self, uri: bytes):
+    def _make_get_request(
+        self, uri: bytes
+    ) -> Generator["Deferred[object]", object, IResponse]:
         """
         Sends a simple GET request via the agent, and checks its logcontext management
         """
         with LoggingContext("one") as context:
-            fetch_d = self.agent.request(b"GET", uri)
+            fetch_d: Deferred[IResponse] = self.agent.request(b"GET", uri)
 
             # Nothing happened yet
             self.assertNoResult(fetch_d)
@@ -172,8 +198,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
             # should have reset logcontext to the sentinel
             _check_logcontext(SENTINEL_CONTEXT)
 
+            fetch_res: IResponse
             try:
-                fetch_res = yield fetch_d
+                fetch_res = yield fetch_d  # type: ignore[misc, assignment]
                 return fetch_res
             except Exception as e:
                 logger.info("Fetch of %s failed: %s", uri.decode("ascii"), e)
@@ -216,7 +243,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         request: Request,
         content: bytes,
         headers: Optional[dict] = None,
-    ):
+    ) -> None:
         """Check that an incoming request looks like a valid .well-known request, and
         send back the response.
         """
@@ -237,16 +264,16 @@ class MatrixFederationAgentTests(unittest.TestCase):
         because it is created too early during setUp
         """
         return MatrixFederationAgent(
-            reactor=self.reactor,
+            reactor=cast(ISynapseReactor, self.reactor),
             tls_client_options_factory=self.tls_factory,
-            user_agent="test-agent",  # Note that this is unused since _well_known_resolver is provided.
+            user_agent=b"test-agent",  # Note that this is unused since _well_known_resolver is provided.
             ip_whitelist=IPSet(),
             ip_blacklist=IPSet(),
             _srv_resolver=self.mock_resolver,
             _well_known_resolver=self.well_known_resolver,
         )
 
-    def test_get(self):
+    def test_get(self) -> None:
         """happy-path test of a GET request with an explicit port"""
         self._do_get()
 
@@ -254,11 +281,11 @@ class MatrixFederationAgentTests(unittest.TestCase):
         os.environ,
         {"https_proxy": "proxy.com", "no_proxy": "testserv"},
     )
-    def test_get_bypass_proxy(self):
+    def test_get_bypass_proxy(self) -> None:
         """test of a GET request with an explicit port and bypass proxy"""
         self._do_get()
 
-    def _do_get(self):
+    def _do_get(self) -> None:
         """test of a GET request with an explicit port"""
         self.agent = self._make_agent()
 
@@ -318,7 +345,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
     @patch.dict(
         os.environ, {"https_proxy": "http://proxy.com", "no_proxy": "unused.com"}
     )
-    def test_get_via_http_proxy(self):
+    def test_get_via_http_proxy(self) -> None:
         """test for federation request through a http proxy"""
         self._do_get_via_proxy(expect_proxy_ssl=False, expected_auth_credentials=None)
 
@@ -326,7 +353,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         os.environ,
         {"https_proxy": "http://user:pass@proxy.com", "no_proxy": "unused.com"},
     )
-    def test_get_via_http_proxy_with_auth(self):
+    def test_get_via_http_proxy_with_auth(self) -> None:
         """test for federation request through a http proxy with authentication"""
         self._do_get_via_proxy(
             expect_proxy_ssl=False, expected_auth_credentials=b"user:pass"
@@ -335,7 +362,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
     @patch.dict(
         os.environ, {"https_proxy": "https://proxy.com", "no_proxy": "unused.com"}
     )
-    def test_get_via_https_proxy(self):
+    def test_get_via_https_proxy(self) -> None:
         """test for federation request through a https proxy"""
         self._do_get_via_proxy(expect_proxy_ssl=True, expected_auth_credentials=None)
 
@@ -343,7 +370,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         os.environ,
         {"https_proxy": "https://user:pass@proxy.com", "no_proxy": "unused.com"},
     )
-    def test_get_via_https_proxy_with_auth(self):
+    def test_get_via_https_proxy_with_auth(self) -> None:
         """test for federation request through a https proxy with authentication"""
         self._do_get_via_proxy(
             expect_proxy_ssl=True, expected_auth_credentials=b"user:pass"
@@ -353,7 +380,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self,
         expect_proxy_ssl: bool = False,
         expected_auth_credentials: Optional[bytes] = None,
-    ):
+    ) -> None:
         """Send a https federation request via an agent and check that it is correctly
             received at the proxy and client. The proxy can use either http or https.
         Args:
@@ -418,10 +445,12 @@ class MatrixFederationAgentTests(unittest.TestCase):
         # now we make another test server to act as the upstream HTTP server.
         server_ssl_protocol = _wrap_server_factory_for_tls(
             _get_test_protocol_factory()
-        ).buildProtocol(None)
+        ).buildProtocol(dummy_address)
+        assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol)
 
         # Tell the HTTP server to send outgoing traffic back via the proxy's transport.
         proxy_server_transport = proxy_server.transport
+        assert proxy_server_transport is not None
         server_ssl_protocol.makeConnection(proxy_server_transport)
 
         # ... and replace the protocol on the proxy's transport with the
@@ -451,6 +480,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
 
         # now there should be a pending request
         http_server = server_ssl_protocol.wrappedProtocol
+        assert isinstance(http_server, HTTPChannel)
         self.assertEqual(len(http_server.requests), 1)
 
         request = http_server.requests[0]
@@ -491,7 +521,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         json = self.successResultOf(treq.json_content(response))
         self.assertEqual(json, {"a": 1})
 
-    def test_get_ip_address(self):
+    def test_get_ip_address(self) -> None:
         """
         Test the behaviour when the server name contains an explicit IP (with no port)
         """
@@ -526,7 +556,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_get_ipv6_address(self):
+    def test_get_ipv6_address(self) -> None:
         """
         Test the behaviour when the server name contains an explicit IPv6 address
         (with no port)
@@ -562,7 +592,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_get_ipv6_address_with_port(self):
+    def test_get_ipv6_address_with_port(self) -> None:
         """
         Test the behaviour when the server name contains an explicit IPv6 address
         (with explicit port)
@@ -598,7 +628,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_get_hostname_bad_cert(self):
+    def test_get_hostname_bad_cert(self) -> None:
         """
         Test the behaviour when the certificate on the server doesn't match the hostname
         """
@@ -651,7 +681,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         failure_reason = e.value.reasons[0]
         self.assertIsInstance(failure_reason.value, VerificationError)
 
-    def test_get_ip_address_bad_cert(self):
+    def test_get_ip_address_bad_cert(self) -> None:
         """
         Test the behaviour when the server name contains an explicit IP, but
         the server cert doesn't cover it
@@ -684,7 +714,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         failure_reason = e.value.reasons[0]
         self.assertIsInstance(failure_reason.value, VerificationError)
 
-    def test_get_no_srv_no_well_known(self):
+    def test_get_no_srv_no_well_known(self) -> None:
         """
         Test the behaviour when the server name has no port, no SRV, and no well-known
         """
@@ -740,7 +770,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_get_well_known(self):
+    def test_get_well_known(self) -> None:
         """Test the behaviour when the .well-known delegates elsewhere"""
         self.agent = self._make_agent()
 
@@ -802,7 +832,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.well_known_cache.expire()
         self.assertNotIn(b"testserv", self.well_known_cache)
 
-    def test_get_well_known_redirect(self):
+    def test_get_well_known_redirect(self) -> None:
         """Test the behaviour when the server name has no port and no SRV record, but
         the .well-known has a 300 redirect
         """
@@ -892,7 +922,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.well_known_cache.expire()
         self.assertNotIn(b"testserv", self.well_known_cache)
 
-    def test_get_invalid_well_known(self):
+    def test_get_invalid_well_known(self) -> None:
         """
         Test the behaviour when the server name has an *invalid* well-known (and no SRV)
         """
@@ -945,7 +975,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_get_well_known_unsigned_cert(self):
+    def test_get_well_known_unsigned_cert(self) -> None:
         """Test the behaviour when the .well-known server presents a cert
         not signed by a CA
         """
@@ -969,7 +999,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
             ip_blacklist=IPSet(),
             _srv_resolver=self.mock_resolver,
             _well_known_resolver=WellKnownResolver(
-                self.reactor,
+                cast(ISynapseReactor, self.reactor),
                 Agent(self.reactor, contextFactory=tls_factory),
                 b"test-agent",
                 well_known_cache=self.well_known_cache,
@@ -999,7 +1029,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
             b"_matrix._tcp.testserv"
         )
 
-    def test_get_hostname_srv(self):
+    def test_get_hostname_srv(self) -> None:
         """
         Test the behaviour when there is a single SRV record
         """
@@ -1041,7 +1071,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_get_well_known_srv(self):
+    def test_get_well_known_srv(self) -> None:
         """Test the behaviour when the .well-known redirects to a place where there
         is a SRV.
         """
@@ -1101,7 +1131,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_idna_servername(self):
+    def test_idna_servername(self) -> None:
         """test the behaviour when the server name has idna chars in"""
         self.agent = self._make_agent()
 
@@ -1163,7 +1193,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_idna_srv_target(self):
+    def test_idna_srv_target(self) -> None:
         """test the behaviour when the target of a SRV record has idna chars"""
         self.agent = self._make_agent()
 
@@ -1206,7 +1236,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
-    def test_well_known_cache(self):
+    def test_well_known_cache(self) -> None:
         self.reactor.lookups["testserv"] = "1.2.3.4"
 
         fetch_d = defer.ensureDeferred(
@@ -1262,7 +1292,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         r = self.successResultOf(fetch_d)
         self.assertEqual(r.delegated_server, b"other-server")
 
-    def test_well_known_cache_with_temp_failure(self):
+    def test_well_known_cache_with_temp_failure(self) -> None:
         """Test that we refetch well-known before the cache expires, and that
         it ignores transient errors.
         """
@@ -1341,7 +1371,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         r = self.successResultOf(fetch_d)
         self.assertEqual(r.delegated_server, None)
 
-    def test_well_known_too_large(self):
+    def test_well_known_too_large(self) -> None:
         """A well-known query that returns a result which is too large should be rejected."""
         self.reactor.lookups["testserv"] = "1.2.3.4"
 
@@ -1367,7 +1397,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         r = self.successResultOf(fetch_d)
         self.assertIsNone(r.delegated_server)
 
-    def test_srv_fallbacks(self):
+    def test_srv_fallbacks(self) -> None:
         """Test that other SRV results are tried if the first one fails."""
         self.agent = self._make_agent()
 
@@ -1427,7 +1457,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
 
 
 class TestCachePeriodFromHeaders(unittest.TestCase):
-    def test_cache_control(self):
+    def test_cache_control(self) -> None:
         # uppercase
         self.assertEqual(
             _cache_period_from_headers(
@@ -1464,7 +1494,7 @@ class TestCachePeriodFromHeaders(unittest.TestCase):
             0,
         )
 
-    def test_expires(self):
+    def test_expires(self) -> None:
         self.assertEqual(
             _cache_period_from_headers(
                 Headers({b"Expires": [b"Wed, 30 Jan 2019 07:35:33 GMT"]}),
@@ -1491,14 +1521,14 @@ class TestCachePeriodFromHeaders(unittest.TestCase):
         self.assertEqual(_cache_period_from_headers(Headers({b"Expires": [b"0"]})), 0)
 
 
-def _check_logcontext(context):
+def _check_logcontext(context: LoggingContextOrSentinel) -> None:
     current = current_context()
     if current is not context:
         raise AssertionError("Expected logcontext %s but was %s" % (context, current))
 
 
 def _wrap_server_factory_for_tls(
-    factory: IProtocolFactory, sanlist: Iterable[bytes] = None
+    factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None
 ) -> IProtocolFactory:
     """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory
     The resultant factory will create a TLS server which presents a certificate
@@ -1537,7 +1567,7 @@ def _get_test_protocol_factory() -> IProtocolFactory:
     return server_factory
 
 
-def _log_request(request: str):
+def _log_request(request: str) -> None:
     """Implements Factory.log, which is expected by Request.finish"""
     logger.info(f"Completed request {request}")
 
@@ -1547,6 +1577,8 @@ class TrustingTLSPolicyForHTTPS:
     """An IPolicyForHTTPS which checks that the certificate belongs to the
     right server, but doesn't check the certificate chain."""
 
-    def creatorForNetloc(self, hostname, port):
+    def creatorForNetloc(
+        self, hostname: bytes, port: int
+    ) -> IOpenSSLClientConnectionCreator:
         certificateOptions = OpenSSLCertificateOptions()
         return ClientTLSOptions(hostname, certificateOptions.getContext())
diff --git a/tests/http/federation/test_srv_resolver.py b/tests/http/federation/test_srv_resolver.py
index 77ce8432ac..7748f56ee6 100644
--- a/tests/http/federation/test_srv_resolver.py
+++ b/tests/http/federation/test_srv_resolver.py
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+from typing import Dict, Generator, List, Tuple, cast
 from unittest.mock import Mock
 
 from twisted.internet import defer
@@ -20,7 +20,7 @@ from twisted.internet.defer import Deferred
 from twisted.internet.error import ConnectError
 from twisted.names import dns, error
 
-from synapse.http.federation.srv_resolver import SrvResolver
+from synapse.http.federation.srv_resolver import Server, SrvResolver
 from synapse.logging.context import LoggingContext, current_context
 
 from tests import unittest
@@ -28,7 +28,7 @@ from tests.utils import MockClock
 
 
 class SrvResolverTestCase(unittest.TestCase):
-    def test_resolve(self):
+    def test_resolve(self) -> None:
         dns_client_mock = Mock()
 
         service_name = b"test_service.example.com"
@@ -38,18 +38,19 @@ class SrvResolverTestCase(unittest.TestCase):
             type=dns.SRV, payload=dns.Record_SRV(target=host_name)
         )
 
-        result_deferred = Deferred()
+        result_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred()
         dns_client_mock.lookupService.return_value = result_deferred
 
-        cache = {}
+        cache: Dict[bytes, List[Server]] = {}
         resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
 
         @defer.inlineCallbacks
-        def do_lookup():
+        def do_lookup() -> Generator["Deferred[object]", object, List[Server]]:
 
             with LoggingContext("one") as ctx:
                 resolve_d = resolver.resolve_service(service_name)
-                result = yield defer.ensureDeferred(resolve_d)
+                result: List[Server]
+                result = yield defer.ensureDeferred(resolve_d)  # type: ignore[assignment]
 
                 # should have restored our context
                 self.assertIs(current_context(), ctx)
@@ -70,7 +71,9 @@ class SrvResolverTestCase(unittest.TestCase):
         self.assertEqual(servers[0].host, host_name)
 
     @defer.inlineCallbacks
-    def test_from_cache_expired_and_dns_fail(self):
+    def test_from_cache_expired_and_dns_fail(
+        self,
+    ) -> Generator["Deferred[object]", object, None]:
         dns_client_mock = Mock()
         dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError())
 
@@ -81,10 +84,13 @@ class SrvResolverTestCase(unittest.TestCase):
         entry.priority = 0
         entry.weight = 0
 
-        cache = {service_name: [entry]}
+        cache = {service_name: [cast(Server, entry)]}
         resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
 
-        servers = yield defer.ensureDeferred(resolver.resolve_service(service_name))
+        servers: List[Server]
+        servers = yield defer.ensureDeferred(
+            resolver.resolve_service(service_name)
+        )  # type: ignore[assignment]
 
         dns_client_mock.lookupService.assert_called_once_with(service_name)
 
@@ -92,7 +98,7 @@ class SrvResolverTestCase(unittest.TestCase):
         self.assertEqual(servers, cache[service_name])
 
     @defer.inlineCallbacks
-    def test_from_cache(self):
+    def test_from_cache(self) -> Generator["Deferred[object]", object, None]:
         clock = MockClock()
 
         dns_client_mock = Mock(spec_set=["lookupService"])
@@ -105,12 +111,15 @@ class SrvResolverTestCase(unittest.TestCase):
         entry.priority = 0
         entry.weight = 0
 
-        cache = {service_name: [entry]}
+        cache = {service_name: [cast(Server, entry)]}
         resolver = SrvResolver(
             dns_client=dns_client_mock, cache=cache, get_time=clock.time
         )
 
-        servers = yield defer.ensureDeferred(resolver.resolve_service(service_name))
+        servers: List[Server]
+        servers = yield defer.ensureDeferred(
+            resolver.resolve_service(service_name)
+        )  # type: ignore[assignment]
 
         self.assertFalse(dns_client_mock.lookupService.called)
 
@@ -118,45 +127,48 @@ class SrvResolverTestCase(unittest.TestCase):
         self.assertEqual(servers, cache[service_name])
 
     @defer.inlineCallbacks
-    def test_empty_cache(self):
+    def test_empty_cache(self) -> Generator["Deferred[object]", object, None]:
         dns_client_mock = Mock()
 
         dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError())
 
         service_name = b"test_service.example.com"
 
-        cache = {}
+        cache: Dict[bytes, List[Server]] = {}
         resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
 
         with self.assertRaises(error.DNSServerError):
             yield defer.ensureDeferred(resolver.resolve_service(service_name))
 
     @defer.inlineCallbacks
-    def test_name_error(self):
+    def test_name_error(self) -> Generator["Deferred[object]", object, None]:
         dns_client_mock = Mock()
 
         dns_client_mock.lookupService.return_value = defer.fail(error.DNSNameError())
 
         service_name = b"test_service.example.com"
 
-        cache = {}
+        cache: Dict[bytes, List[Server]] = {}
         resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
 
-        servers = yield defer.ensureDeferred(resolver.resolve_service(service_name))
+        servers: List[Server]
+        servers = yield defer.ensureDeferred(
+            resolver.resolve_service(service_name)
+        )  # type: ignore[assignment]
 
         self.assertEqual(len(servers), 0)
         self.assertEqual(len(cache), 0)
 
-    def test_disabled_service(self):
+    def test_disabled_service(self) -> None:
         """
         test the behaviour when there is a single record which is ".".
         """
         service_name = b"test_service.example.com"
 
-        lookup_deferred = Deferred()
+        lookup_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred()
         dns_client_mock = Mock()
         dns_client_mock.lookupService.return_value = lookup_deferred
-        cache = {}
+        cache: Dict[bytes, List[Server]] = {}
         resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
 
         # Old versions of Twisted don't have an ensureDeferred in failureResultOf.
@@ -173,16 +185,16 @@ class SrvResolverTestCase(unittest.TestCase):
 
         self.failureResultOf(resolve_d, ConnectError)
 
-    def test_non_srv_answer(self):
+    def test_non_srv_answer(self) -> None:
         """
         test the behaviour when the dns server gives us a spurious non-SRV response
         """
         service_name = b"test_service.example.com"
 
-        lookup_deferred = Deferred()
+        lookup_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred()
         dns_client_mock = Mock()
         dns_client_mock.lookupService.return_value = lookup_deferred
-        cache = {}
+        cache: Dict[bytes, List[Server]] = {}
         resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
 
         # Old versions of Twisted don't have an ensureDeferred in successResultOf.
diff --git a/tests/http/server/_base.py b/tests/http/server/_base.py
index 5071f83574..36472e57a8 100644
--- a/tests/http/server/_base.py
+++ b/tests/http/server/_base.py
@@ -556,6 +556,6 @@ def _get_stack_frame_method_name(frame_info: inspect.FrameInfo) -> str:
     return method_name
 
 
-def _hash_stack(stack: List[inspect.FrameInfo]):
+def _hash_stack(stack: List[inspect.FrameInfo]) -> Tuple[str, ...]:
     """Turns a stack into a hashable value that can be put into a set."""
     return tuple(_format_stack_frame(frame) for frame in stack)
diff --git a/tests/http/test_additional_resource.py b/tests/http/test_additional_resource.py
index 391196425c..ec6aacf235 100644
--- a/tests/http/test_additional_resource.py
+++ b/tests/http/test_additional_resource.py
@@ -11,28 +11,34 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Any
 
+from twisted.web.server import Request
 
 from synapse.http.additional_resource import AdditionalResource
 from synapse.http.server import respond_with_json
+from synapse.http.site import SynapseRequest
+from synapse.types import JsonDict
 
 from tests.server import FakeSite, make_request
 from tests.unittest import HomeserverTestCase
 
 
 class _AsyncTestCustomEndpoint:
-    def __init__(self, config, module_api):
+    def __init__(self, config: JsonDict, module_api: Any) -> None:
         pass
 
-    async def handle_request(self, request):
+    async def handle_request(self, request: Request) -> None:
+        assert isinstance(request, SynapseRequest)
         respond_with_json(request, 200, {"some_key": "some_value_async"})
 
 
 class _SyncTestCustomEndpoint:
-    def __init__(self, config, module_api):
+    def __init__(self, config: JsonDict, module_api: Any) -> None:
         pass
 
-    async def handle_request(self, request):
+    async def handle_request(self, request: Request) -> None:
+        assert isinstance(request, SynapseRequest)
         respond_with_json(request, 200, {"some_key": "some_value_sync"})
 
 
@@ -41,7 +47,7 @@ class AdditionalResourceTests(HomeserverTestCase):
     and async handlers.
     """
 
-    def test_async(self):
+    def test_async(self) -> None:
         handler = _AsyncTestCustomEndpoint({}, None).handle_request
         resource = AdditionalResource(self.hs, handler)
 
@@ -52,7 +58,7 @@ class AdditionalResourceTests(HomeserverTestCase):
         self.assertEqual(channel.code, 200)
         self.assertEqual(channel.json_body, {"some_key": "some_value_async"})
 
-    def test_sync(self):
+    def test_sync(self) -> None:
         handler = _SyncTestCustomEndpoint({}, None).handle_request
         resource = AdditionalResource(self.hs, handler)
 
diff --git a/tests/http/test_client.py b/tests/http/test_client.py
index 7e2f2a01cc..9cfe1ad0de 100644
--- a/tests/http/test_client.py
+++ b/tests/http/test_client.py
@@ -13,10 +13,12 @@
 #  limitations under the License.
 
 from io import BytesIO
+from typing import Tuple, Union
 from unittest.mock import Mock
 
 from netaddr import IPSet
 
+from twisted.internet.defer import Deferred
 from twisted.internet.error import DNSLookupError
 from twisted.python.failure import Failure
 from twisted.test.proto_helpers import AccumulatingProtocol
@@ -28,6 +30,7 @@ from synapse.http.client import (
     BlacklistingAgentWrapper,
     BlacklistingReactorWrapper,
     BodyExceededMaxSize,
+    _DiscardBodyWithMaxSizeProtocol,
     read_body_with_max_size,
 )
 
@@ -36,7 +39,9 @@ from tests.unittest import TestCase
 
 
 class ReadBodyWithMaxSizeTests(TestCase):
-    def _build_response(self, length=UNKNOWN_LENGTH):
+    def _build_response(
+        self, length: Union[int, str] = UNKNOWN_LENGTH
+    ) -> Tuple[BytesIO, "Deferred[int]", _DiscardBodyWithMaxSizeProtocol]:
         """Start reading the body, returns the response, result and proto"""
         response = Mock(length=length)
         result = BytesIO()
@@ -48,23 +53,27 @@ class ReadBodyWithMaxSizeTests(TestCase):
 
         return result, deferred, protocol
 
-    def _assert_error(self, deferred, protocol):
+    def _assert_error(
+        self, deferred: "Deferred[int]", protocol: _DiscardBodyWithMaxSizeProtocol
+    ) -> None:
         """Ensure that the expected error is received."""
-        self.assertIsInstance(deferred.result, Failure)
+        assert isinstance(deferred.result, Failure)
         self.assertIsInstance(deferred.result.value, BodyExceededMaxSize)
-        protocol.transport.abortConnection.assert_called_once()
+        assert protocol.transport is not None
+        # type-ignore: presumably abortConnection has been replaced with a Mock.
+        protocol.transport.abortConnection.assert_called_once()  # type: ignore[attr-defined]
 
-    def _cleanup_error(self, deferred):
+    def _cleanup_error(self, deferred: "Deferred[int]") -> None:
         """Ensure that the error in the Deferred is handled gracefully."""
         called = [False]
 
-        def errback(f):
+        def errback(f: Failure) -> None:
             called[0] = True
 
         deferred.addErrback(errback)
         self.assertTrue(called[0])
 
-    def test_no_error(self):
+    def test_no_error(self) -> None:
         """A response that is NOT too large."""
         result, deferred, protocol = self._build_response()
 
@@ -76,7 +85,7 @@ class ReadBodyWithMaxSizeTests(TestCase):
         self.assertEqual(result.getvalue(), b"12345")
         self.assertEqual(deferred.result, 5)
 
-    def test_too_large(self):
+    def test_too_large(self) -> None:
         """A response which is too large raises an exception."""
         result, deferred, protocol = self._build_response()
 
@@ -87,7 +96,7 @@ class ReadBodyWithMaxSizeTests(TestCase):
         self._assert_error(deferred, protocol)
         self._cleanup_error(deferred)
 
-    def test_multiple_packets(self):
+    def test_multiple_packets(self) -> None:
         """Data should be accumulated through mutliple packets."""
         result, deferred, protocol = self._build_response()
 
@@ -100,7 +109,7 @@ class ReadBodyWithMaxSizeTests(TestCase):
         self.assertEqual(result.getvalue(), b"1234")
         self.assertEqual(deferred.result, 4)
 
-    def test_additional_data(self):
+    def test_additional_data(self) -> None:
         """A connection can receive data after being closed."""
         result, deferred, protocol = self._build_response()
 
@@ -115,7 +124,7 @@ class ReadBodyWithMaxSizeTests(TestCase):
         self._assert_error(deferred, protocol)
         self._cleanup_error(deferred)
 
-    def test_content_length(self):
+    def test_content_length(self) -> None:
         """The body shouldn't be read (at all) if the Content-Length header is too large."""
         result, deferred, protocol = self._build_response(length=10)
 
@@ -132,7 +141,7 @@ class ReadBodyWithMaxSizeTests(TestCase):
 
 
 class BlacklistingAgentTest(TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor, self.clock = get_clock()
 
         self.safe_domain, self.safe_ip = b"safe.test", b"1.2.3.4"
@@ -151,7 +160,7 @@ class BlacklistingAgentTest(TestCase):
         self.ip_whitelist = IPSet([self.allowed_ip.decode()])
         self.ip_blacklist = IPSet(["5.0.0.0/8"])
 
-    def test_reactor(self):
+    def test_reactor(self) -> None:
         """Apply the blacklisting reactor and ensure it properly blocks connections to particular domains and IPs."""
         agent = Agent(
             BlacklistingReactorWrapper(
@@ -197,7 +206,7 @@ class BlacklistingAgentTest(TestCase):
             response = self.successResultOf(d)
             self.assertEqual(response.code, 200)
 
-    def test_agent(self):
+    def test_agent(self) -> None:
         """Apply the blacklisting agent and ensure it properly blocks connections to particular IPs."""
         agent = BlacklistingAgentWrapper(
             Agent(self.reactor),
diff --git a/tests/http/test_endpoint.py b/tests/http/test_endpoint.py
index a801f002a0..8c18e56881 100644
--- a/tests/http/test_endpoint.py
+++ b/tests/http/test_endpoint.py
@@ -17,7 +17,7 @@ from tests import unittest
 
 
 class ServerNameTestCase(unittest.TestCase):
-    def test_parse_server_name(self):
+    def test_parse_server_name(self) -> None:
         test_data = {
             "localhost": ("localhost", None),
             "my-example.com:1234": ("my-example.com", 1234),
@@ -32,7 +32,7 @@ class ServerNameTestCase(unittest.TestCase):
         for i, o in test_data.items():
             self.assertEqual(parse_server_name(i), o)
 
-    def test_validate_bad_server_names(self):
+    def test_validate_bad_server_names(self) -> None:
         test_data = [
             "",  # empty
             "localhost:http",  # non-numeric port
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index be9eaf34e8..fdd22a8e94 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -11,16 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+from typing import Generator
 from unittest.mock import Mock
 
 from netaddr import IPSet
 from parameterized import parameterized
 
 from twisted.internet import defer
-from twisted.internet.defer import TimeoutError
+from twisted.internet.defer import Deferred, TimeoutError
 from twisted.internet.error import ConnectingCancelledError, DNSLookupError
-from twisted.test.proto_helpers import StringTransport
+from twisted.test.proto_helpers import MemoryReactor, StringTransport
 from twisted.web.client import ResponseNeverReceived
 from twisted.web.http import HTTPChannel
 
@@ -30,34 +30,43 @@ from synapse.http.matrixfederationclient import (
     MatrixFederationHttpClient,
     MatrixFederationRequest,
 )
-from synapse.logging.context import SENTINEL_CONTEXT, LoggingContext, current_context
+from synapse.logging.context import (
+    SENTINEL_CONTEXT,
+    LoggingContext,
+    LoggingContextOrSentinel,
+    current_context,
+)
+from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests.server import FakeTransport
 from tests.unittest import HomeserverTestCase
 
 
-def check_logcontext(context):
+def check_logcontext(context: LoggingContextOrSentinel) -> None:
     current = current_context()
     if current is not context:
         raise AssertionError("Expected logcontext %s but was %s" % (context, current))
 
 
 class FederationClientTests(HomeserverTestCase):
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver(reactor=reactor, clock=clock)
         return hs
 
-    def prepare(self, reactor, clock, homeserver):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         self.cl = MatrixFederationHttpClient(self.hs, None)
         self.reactor.lookups["testserv"] = "1.2.3.4"
 
-    def test_client_get(self):
+    def test_client_get(self) -> None:
         """
         happy-path test of a GET request
         """
 
         @defer.inlineCallbacks
-        def do_request():
+        def do_request() -> Generator["Deferred[object]", object, object]:
             with LoggingContext("one") as context:
                 fetch_d = defer.ensureDeferred(
                     self.cl.get_json("testserv:8008", "foo/bar")
@@ -119,7 +128,7 @@ class FederationClientTests(HomeserverTestCase):
         # check the response is as expected
         self.assertEqual(res, {"a": 1})
 
-    def test_dns_error(self):
+    def test_dns_error(self) -> None:
         """
         If the DNS lookup returns an error, it will bubble up.
         """
@@ -132,7 +141,7 @@ class FederationClientTests(HomeserverTestCase):
         self.assertIsInstance(f.value, RequestSendFailed)
         self.assertIsInstance(f.value.inner_exception, DNSLookupError)
 
-    def test_client_connection_refused(self):
+    def test_client_connection_refused(self) -> None:
         d = defer.ensureDeferred(
             self.cl.get_json("testserv:8008", "foo/bar", timeout=10000)
         )
@@ -156,7 +165,7 @@ class FederationClientTests(HomeserverTestCase):
         self.assertIsInstance(f.value, RequestSendFailed)
         self.assertIs(f.value.inner_exception, e)
 
-    def test_client_never_connect(self):
+    def test_client_never_connect(self) -> None:
         """
         If the HTTP request is not connected and is timed out, it'll give a
         ConnectingCancelledError or TimeoutError.
@@ -188,7 +197,7 @@ class FederationClientTests(HomeserverTestCase):
             f.value.inner_exception, (ConnectingCancelledError, TimeoutError)
         )
 
-    def test_client_connect_no_response(self):
+    def test_client_connect_no_response(self) -> None:
         """
         If the HTTP request is connected, but gets no response before being
         timed out, it'll give a ResponseNeverReceived.
@@ -222,7 +231,7 @@ class FederationClientTests(HomeserverTestCase):
         self.assertIsInstance(f.value, RequestSendFailed)
         self.assertIsInstance(f.value.inner_exception, ResponseNeverReceived)
 
-    def test_client_ip_range_blacklist(self):
+    def test_client_ip_range_blacklist(self) -> None:
         """Ensure that Synapse does not try to connect to blacklisted IPs"""
 
         # Set up the ip_range blacklist
@@ -292,7 +301,7 @@ class FederationClientTests(HomeserverTestCase):
         f = self.failureResultOf(d, RequestSendFailed)
         self.assertIsInstance(f.value.inner_exception, ConnectingCancelledError)
 
-    def test_client_gets_headers(self):
+    def test_client_gets_headers(self) -> None:
         """
         Once the client gets the headers, _request returns successfully.
         """
@@ -319,7 +328,7 @@ class FederationClientTests(HomeserverTestCase):
         self.assertEqual(r.code, 200)
 
     @parameterized.expand(["get_json", "post_json", "delete_json", "put_json"])
-    def test_timeout_reading_body(self, method_name: str):
+    def test_timeout_reading_body(self, method_name: str) -> None:
         """
         If the HTTP request is connected, but gets no response before being
         timed out, it'll give a RequestSendFailed with can_retry.
@@ -351,7 +360,7 @@ class FederationClientTests(HomeserverTestCase):
         self.assertTrue(f.value.can_retry)
         self.assertIsInstance(f.value.inner_exception, defer.TimeoutError)
 
-    def test_client_requires_trailing_slashes(self):
+    def test_client_requires_trailing_slashes(self) -> None:
         """
         If a connection is made to a client but the client rejects it due to
         requiring a trailing slash. We need to retry the request with a
@@ -405,7 +414,7 @@ class FederationClientTests(HomeserverTestCase):
         r = self.successResultOf(d)
         self.assertEqual(r, {})
 
-    def test_client_does_not_retry_on_400_plus(self):
+    def test_client_does_not_retry_on_400_plus(self) -> None:
         """
         Another test for trailing slashes but now test that we don't retry on
         trailing slashes on a non-400/M_UNRECOGNIZED response.
@@ -450,7 +459,7 @@ class FederationClientTests(HomeserverTestCase):
         # We should get a 404 failure response
         self.failureResultOf(d)
 
-    def test_client_sends_body(self):
+    def test_client_sends_body(self) -> None:
         defer.ensureDeferred(
             self.cl.post_json(
                 "testserv:8008", "foo/bar", timeout=10000, data={"a": "b"}
@@ -474,7 +483,7 @@ class FederationClientTests(HomeserverTestCase):
         content = request.content.read()
         self.assertEqual(content, b'{"a":"b"}')
 
-    def test_closes_connection(self):
+    def test_closes_connection(self) -> None:
         """Check that the client closes unused HTTP connections"""
         d = defer.ensureDeferred(self.cl.get_json("testserv:8008", "foo/bar"))
 
@@ -514,7 +523,7 @@ class FederationClientTests(HomeserverTestCase):
         self.assertTrue(conn.disconnecting)
 
     @parameterized.expand([(b"",), (b"foo",), (b'{"a": Infinity}',)])
-    def test_json_error(self, return_value):
+    def test_json_error(self, return_value: bytes) -> None:
         """
         Test what happens if invalid JSON is returned from the remote endpoint.
         """
@@ -560,7 +569,7 @@ class FederationClientTests(HomeserverTestCase):
         f = self.failureResultOf(test_d)
         self.assertIsInstance(f.value, RequestSendFailed)
 
-    def test_too_big(self):
+    def test_too_big(self) -> None:
         """
         Test what happens if a huge response is returned from the remote endpoint.
         """
diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py
index 2db77c6a73..a817940730 100644
--- a/tests/http/test_proxyagent.py
+++ b/tests/http/test_proxyagent.py
@@ -14,7 +14,7 @@
 import base64
 import logging
 import os
-from typing import Iterable, Optional
+from typing import List, Optional
 from unittest.mock import patch
 
 import treq
@@ -22,7 +22,11 @@ from netaddr import IPSet
 from parameterized import parameterized
 
 from twisted.internet import interfaces  # noqa: F401
-from twisted.internet.endpoints import HostnameEndpoint, _WrapperEndpoint
+from twisted.internet.endpoints import (
+    HostnameEndpoint,
+    _WrapperEndpoint,
+    _WrappingProtocol,
+)
 from twisted.internet.interfaces import IProtocol, IProtocolFactory
 from twisted.internet.protocol import Factory
 from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol
@@ -32,7 +36,11 @@ from synapse.http.client import BlacklistingReactorWrapper
 from synapse.http.connectproxyclient import ProxyCredentials
 from synapse.http.proxyagent import ProxyAgent, parse_proxy
 
-from tests.http import TestServerTLSConnectionFactory, get_test_https_policy
+from tests.http import (
+    TestServerTLSConnectionFactory,
+    dummy_address,
+    get_test_https_policy,
+)
 from tests.server import FakeTransport, ThreadedMemoryReactorClock
 from tests.unittest import TestCase
 
@@ -183,7 +191,7 @@ class ProxyParserTests(TestCase):
         expected_hostname: bytes,
         expected_port: int,
         expected_credentials: Optional[bytes],
-    ):
+    ) -> None:
         """
         Tests that a given proxy URL will be broken into the components.
         Args:
@@ -209,7 +217,7 @@ class ProxyParserTests(TestCase):
 
 
 class MatrixFederationAgentTests(TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         self.reactor = ThreadedMemoryReactorClock()
 
     def _make_connection(
@@ -218,7 +226,7 @@ class MatrixFederationAgentTests(TestCase):
         server_factory: IProtocolFactory,
         ssl: bool = False,
         expected_sni: Optional[bytes] = None,
-        tls_sanlist: Optional[Iterable[bytes]] = None,
+        tls_sanlist: Optional[List[bytes]] = None,
     ) -> IProtocol:
         """Builds a test server, and completes the outgoing client connection
 
@@ -244,7 +252,8 @@ class MatrixFederationAgentTests(TestCase):
         if ssl:
             server_factory = _wrap_server_factory_for_tls(server_factory, tls_sanlist)
 
-        server_protocol = server_factory.buildProtocol(None)
+        server_protocol = server_factory.buildProtocol(dummy_address)
+        assert server_protocol is not None
 
         # now, tell the client protocol factory to build the client protocol,
         # and wire the output of said protocol up to the server via
@@ -252,7 +261,8 @@ class MatrixFederationAgentTests(TestCase):
         #
         # Normally this would be done by the TCP socket code in Twisted, but we are
         # stubbing that out here.
-        client_protocol = client_factory.buildProtocol(None)
+        client_protocol = client_factory.buildProtocol(dummy_address)
+        assert client_protocol is not None
         client_protocol.makeConnection(
             FakeTransport(server_protocol, self.reactor, client_protocol)
         )
@@ -263,6 +273,7 @@ class MatrixFederationAgentTests(TestCase):
         )
 
         if ssl:
+            assert isinstance(server_protocol, TLSMemoryBIOProtocol)
             http_protocol = server_protocol.wrappedProtocol
             tls_connection = server_protocol._tlsConnection
         else:
@@ -288,7 +299,7 @@ class MatrixFederationAgentTests(TestCase):
         scheme: bytes,
         hostname: bytes,
         path: bytes,
-    ):
+    ) -> None:
         """Runs a test case for a direct connection not going through a proxy.
 
         Args:
@@ -319,6 +330,7 @@ class MatrixFederationAgentTests(TestCase):
             ssl=is_https,
             expected_sni=hostname if is_https else None,
         )
+        assert isinstance(http_server, HTTPChannel)
 
         # the FakeTransport is async, so we need to pump the reactor
         self.reactor.advance(0)
@@ -339,34 +351,34 @@ class MatrixFederationAgentTests(TestCase):
         body = self.successResultOf(treq.content(resp))
         self.assertEqual(body, b"result")
 
-    def test_http_request(self):
+    def test_http_request(self) -> None:
         agent = ProxyAgent(self.reactor)
         self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
-    def test_https_request(self):
+    def test_https_request(self) -> None:
         agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy())
         self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
 
-    def test_http_request_use_proxy_empty_environment(self):
+    def test_http_request_use_proxy_empty_environment(self) -> None:
         agent = ProxyAgent(self.reactor, use_proxy=True)
         self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
     @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "NO_PROXY": "test.com"})
-    def test_http_request_via_uppercase_no_proxy(self):
+    def test_http_request_via_uppercase_no_proxy(self) -> None:
         agent = ProxyAgent(self.reactor, use_proxy=True)
         self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
     @patch.dict(
         os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "test.com,unused.com"}
     )
-    def test_http_request_via_no_proxy(self):
+    def test_http_request_via_no_proxy(self) -> None:
         agent = ProxyAgent(self.reactor, use_proxy=True)
         self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
     @patch.dict(
         os.environ, {"https_proxy": "proxy.com", "no_proxy": "test.com,unused.com"}
     )
-    def test_https_request_via_no_proxy(self):
+    def test_https_request_via_no_proxy(self) -> None:
         agent = ProxyAgent(
             self.reactor,
             contextFactory=get_test_https_policy(),
@@ -375,12 +387,12 @@ class MatrixFederationAgentTests(TestCase):
         self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
 
     @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "*"})
-    def test_http_request_via_no_proxy_star(self):
+    def test_http_request_via_no_proxy_star(self) -> None:
         agent = ProxyAgent(self.reactor, use_proxy=True)
         self._test_request_direct_connection(agent, b"http", b"test.com", b"")
 
     @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "*"})
-    def test_https_request_via_no_proxy_star(self):
+    def test_https_request_via_no_proxy_star(self) -> None:
         agent = ProxyAgent(
             self.reactor,
             contextFactory=get_test_https_policy(),
@@ -389,7 +401,7 @@ class MatrixFederationAgentTests(TestCase):
         self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
 
     @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"})
-    def test_http_request_via_proxy(self):
+    def test_http_request_via_proxy(self) -> None:
         """
         Tests that requests can be made through a proxy.
         """
@@ -401,7 +413,7 @@ class MatrixFederationAgentTests(TestCase):
         os.environ,
         {"http_proxy": "bob:pinkponies@proxy.com:8888", "no_proxy": "unused.com"},
     )
-    def test_http_request_via_proxy_with_auth(self):
+    def test_http_request_via_proxy_with_auth(self) -> None:
         """
         Tests that authenticated requests can be made through a proxy.
         """
@@ -412,7 +424,7 @@ class MatrixFederationAgentTests(TestCase):
     @patch.dict(
         os.environ, {"http_proxy": "https://proxy.com:8888", "no_proxy": "unused.com"}
     )
-    def test_http_request_via_https_proxy(self):
+    def test_http_request_via_https_proxy(self) -> None:
         self._do_http_request_via_proxy(
             expect_proxy_ssl=True, expected_auth_credentials=None
         )
@@ -424,13 +436,13 @@ class MatrixFederationAgentTests(TestCase):
             "no_proxy": "unused.com",
         },
     )
-    def test_http_request_via_https_proxy_with_auth(self):
+    def test_http_request_via_https_proxy_with_auth(self) -> None:
         self._do_http_request_via_proxy(
             expect_proxy_ssl=True, expected_auth_credentials=b"bob:pinkponies"
         )
 
     @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"})
-    def test_https_request_via_proxy(self):
+    def test_https_request_via_proxy(self) -> None:
         """Tests that TLS-encrypted requests can be made through a proxy"""
         self._do_https_request_via_proxy(
             expect_proxy_ssl=False, expected_auth_credentials=None
@@ -440,7 +452,7 @@ class MatrixFederationAgentTests(TestCase):
         os.environ,
         {"https_proxy": "bob:pinkponies@proxy.com", "no_proxy": "unused.com"},
     )
-    def test_https_request_via_proxy_with_auth(self):
+    def test_https_request_via_proxy_with_auth(self) -> None:
         """Tests that authenticated, TLS-encrypted requests can be made through a proxy"""
         self._do_https_request_via_proxy(
             expect_proxy_ssl=False, expected_auth_credentials=b"bob:pinkponies"
@@ -449,7 +461,7 @@ class MatrixFederationAgentTests(TestCase):
     @patch.dict(
         os.environ, {"https_proxy": "https://proxy.com", "no_proxy": "unused.com"}
     )
-    def test_https_request_via_https_proxy(self):
+    def test_https_request_via_https_proxy(self) -> None:
         """Tests that TLS-encrypted requests can be made through a proxy"""
         self._do_https_request_via_proxy(
             expect_proxy_ssl=True, expected_auth_credentials=None
@@ -459,7 +471,7 @@ class MatrixFederationAgentTests(TestCase):
         os.environ,
         {"https_proxy": "https://bob:pinkponies@proxy.com", "no_proxy": "unused.com"},
     )
-    def test_https_request_via_https_proxy_with_auth(self):
+    def test_https_request_via_https_proxy_with_auth(self) -> None:
         """Tests that authenticated, TLS-encrypted requests can be made through a proxy"""
         self._do_https_request_via_proxy(
             expect_proxy_ssl=True, expected_auth_credentials=b"bob:pinkponies"
@@ -469,7 +481,7 @@ class MatrixFederationAgentTests(TestCase):
         self,
         expect_proxy_ssl: bool = False,
         expected_auth_credentials: Optional[bytes] = None,
-    ):
+    ) -> None:
         """Send a http request via an agent and check that it is correctly received at
             the proxy. The proxy can use either http or https.
         Args:
@@ -501,6 +513,7 @@ class MatrixFederationAgentTests(TestCase):
             tls_sanlist=[b"DNS:proxy.com"] if expect_proxy_ssl else None,
             expected_sni=b"proxy.com" if expect_proxy_ssl else None,
         )
+        assert isinstance(http_server, HTTPChannel)
 
         # the FakeTransport is async, so we need to pump the reactor
         self.reactor.advance(0)
@@ -542,7 +555,7 @@ class MatrixFederationAgentTests(TestCase):
         self,
         expect_proxy_ssl: bool = False,
         expected_auth_credentials: Optional[bytes] = None,
-    ):
+    ) -> None:
         """Send a https request via an agent and check that it is correctly received at
             the proxy and client. The proxy can use either http or https.
         Args:
@@ -606,10 +619,12 @@ class MatrixFederationAgentTests(TestCase):
         # now we make another test server to act as the upstream HTTP server.
         server_ssl_protocol = _wrap_server_factory_for_tls(
             _get_test_protocol_factory()
-        ).buildProtocol(None)
+        ).buildProtocol(dummy_address)
+        assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol)
 
         # Tell the HTTP server to send outgoing traffic back via the proxy's transport.
         proxy_server_transport = proxy_server.transport
+        assert proxy_server_transport is not None
         server_ssl_protocol.makeConnection(proxy_server_transport)
 
         # ... and replace the protocol on the proxy's transport with the
@@ -644,6 +659,7 @@ class MatrixFederationAgentTests(TestCase):
 
         # now there should be a pending request
         http_server = server_ssl_protocol.wrappedProtocol
+        assert isinstance(http_server, HTTPChannel)
         self.assertEqual(len(http_server.requests), 1)
 
         request = http_server.requests[0]
@@ -667,7 +683,7 @@ class MatrixFederationAgentTests(TestCase):
         self.assertEqual(body, b"result")
 
     @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"})
-    def test_http_request_via_proxy_with_blacklist(self):
+    def test_http_request_via_proxy_with_blacklist(self) -> None:
         # The blacklist includes the configured proxy IP.
         agent = ProxyAgent(
             BlacklistingReactorWrapper(
@@ -691,6 +707,7 @@ class MatrixFederationAgentTests(TestCase):
         http_server = self._make_connection(
             client_factory, _get_test_protocol_factory()
         )
+        assert isinstance(http_server, HTTPChannel)
 
         # the FakeTransport is async, so we need to pump the reactor
         self.reactor.advance(0)
@@ -712,7 +729,7 @@ class MatrixFederationAgentTests(TestCase):
         self.assertEqual(body, b"result")
 
     @patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"})
-    def test_https_request_via_uppercase_proxy_with_blacklist(self):
+    def test_https_request_via_uppercase_proxy_with_blacklist(self) -> None:
         # The blacklist includes the configured proxy IP.
         agent = ProxyAgent(
             BlacklistingReactorWrapper(
@@ -737,11 +754,15 @@ class MatrixFederationAgentTests(TestCase):
         proxy_server = self._make_connection(
             client_factory, _get_test_protocol_factory()
         )
+        assert isinstance(proxy_server, HTTPChannel)
 
         # fish the transports back out so that we can do the old switcheroo
         s2c_transport = proxy_server.transport
+        assert isinstance(s2c_transport, FakeTransport)
         client_protocol = s2c_transport.other
+        assert isinstance(client_protocol, _WrappingProtocol)
         c2s_transport = client_protocol.transport
+        assert isinstance(c2s_transport, FakeTransport)
 
         # the FakeTransport is async, so we need to pump the reactor
         self.reactor.advance(0)
@@ -762,8 +783,10 @@ class MatrixFederationAgentTests(TestCase):
 
         # now we can replace the proxy channel with a new, SSL-wrapped HTTP channel
         ssl_factory = _wrap_server_factory_for_tls(_get_test_protocol_factory())
-        ssl_protocol = ssl_factory.buildProtocol(None)
+        ssl_protocol = ssl_factory.buildProtocol(dummy_address)
+        assert isinstance(ssl_protocol, TLSMemoryBIOProtocol)
         http_server = ssl_protocol.wrappedProtocol
+        assert isinstance(http_server, HTTPChannel)
 
         ssl_protocol.makeConnection(
             FakeTransport(client_protocol, self.reactor, ssl_protocol)
@@ -797,28 +820,28 @@ class MatrixFederationAgentTests(TestCase):
         self.assertEqual(body, b"result")
 
     @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"})
-    def test_proxy_with_no_scheme(self):
+    def test_proxy_with_no_scheme(self) -> None:
         http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
-        self.assertIsInstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint)
+        assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint)
         self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com")
         self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888)
 
     @patch.dict(os.environ, {"http_proxy": "socks://proxy.com:8888"})
-    def test_proxy_with_unsupported_scheme(self):
+    def test_proxy_with_unsupported_scheme(self) -> None:
         with self.assertRaises(ValueError):
             ProxyAgent(self.reactor, use_proxy=True)
 
     @patch.dict(os.environ, {"http_proxy": "http://proxy.com:8888"})
-    def test_proxy_with_http_scheme(self):
+    def test_proxy_with_http_scheme(self) -> None:
         http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
-        self.assertIsInstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint)
+        assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint)
         self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com")
         self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888)
 
     @patch.dict(os.environ, {"http_proxy": "https://proxy.com:8888"})
-    def test_proxy_with_https_scheme(self):
+    def test_proxy_with_https_scheme(self) -> None:
         https_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
-        self.assertIsInstance(https_proxy_agent.http_proxy_endpoint, _WrapperEndpoint)
+        assert isinstance(https_proxy_agent.http_proxy_endpoint, _WrapperEndpoint)
         self.assertEqual(
             https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._hostStr, "proxy.com"
         )
@@ -828,7 +851,7 @@ class MatrixFederationAgentTests(TestCase):
 
 
 def _wrap_server_factory_for_tls(
-    factory: IProtocolFactory, sanlist: Iterable[bytes] = None
+    factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None
 ) -> IProtocolFactory:
     """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory
 
@@ -865,6 +888,6 @@ def _get_test_protocol_factory() -> IProtocolFactory:
     return server_factory
 
 
-def _log_request(request: str):
+def _log_request(request: str) -> None:
     """Implements Factory.log, which is expected by Request.finish"""
     logger.info(f"Completed request {request}")
diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py
index 46166292fe..c8d215b6dc 100644
--- a/tests/http/test_servlet.py
+++ b/tests/http/test_servlet.py
@@ -14,7 +14,7 @@
 import json
 from http import HTTPStatus
 from io import BytesIO
-from typing import Tuple
+from typing import Tuple, Union
 from unittest.mock import Mock
 
 from synapse.api.errors import Codes, SynapseError
@@ -33,7 +33,7 @@ from tests import unittest
 from tests.http.server._base import test_disconnect
 
 
-def make_request(content):
+def make_request(content: Union[bytes, JsonDict]) -> Mock:
     """Make an object that acts enough like a request."""
     request = Mock(spec=["method", "uri", "content"])
 
@@ -47,7 +47,7 @@ def make_request(content):
 
 
 class TestServletUtils(unittest.TestCase):
-    def test_parse_json_value(self):
+    def test_parse_json_value(self) -> None:
         """Basic tests for parse_json_value_from_request."""
         # Test round-tripping.
         obj = {"foo": 1}
@@ -78,7 +78,7 @@ class TestServletUtils(unittest.TestCase):
         with self.assertRaises(SynapseError):
             parse_json_value_from_request(make_request(b'{"foo": Infinity}'))
 
-    def test_parse_json_object(self):
+    def test_parse_json_object(self) -> None:
         """Basic tests for parse_json_object_from_request."""
         # Test empty.
         result = parse_json_object_from_request(
diff --git a/tests/http/test_simple_client.py b/tests/http/test_simple_client.py
index c85a3665c1..010601da4b 100644
--- a/tests/http/test_simple_client.py
+++ b/tests/http/test_simple_client.py
@@ -17,22 +17,24 @@ from netaddr import IPSet
 
 from twisted.internet import defer
 from twisted.internet.error import DNSLookupError
+from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.http import RequestTimedOutError
 from synapse.http.client import SimpleHttpClient
 from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
 
 class SimpleHttpClientTests(HomeserverTestCase):
-    def prepare(self, reactor, clock, hs: "HomeServer"):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: "HomeServer") -> None:
         # Add a DNS entry for a test server
         self.reactor.lookups["testserv"] = "1.2.3.4"
 
         self.cl = hs.get_simple_http_client()
 
-    def test_dns_error(self):
+    def test_dns_error(self) -> None:
         """
         If the DNS lookup returns an error, it will bubble up.
         """
@@ -42,7 +44,7 @@ class SimpleHttpClientTests(HomeserverTestCase):
         f = self.failureResultOf(d)
         self.assertIsInstance(f.value, DNSLookupError)
 
-    def test_client_connection_refused(self):
+    def test_client_connection_refused(self) -> None:
         d = defer.ensureDeferred(self.cl.get_json("http://testserv:8008/foo/bar"))
 
         self.pump()
@@ -63,7 +65,7 @@ class SimpleHttpClientTests(HomeserverTestCase):
 
         self.assertIs(f.value, e)
 
-    def test_client_never_connect(self):
+    def test_client_never_connect(self) -> None:
         """
         If the HTTP request is not connected and is timed out, it'll give a
         ConnectingCancelledError or TimeoutError.
@@ -90,7 +92,7 @@ class SimpleHttpClientTests(HomeserverTestCase):
 
         self.assertIsInstance(f.value, RequestTimedOutError)
 
-    def test_client_connect_no_response(self):
+    def test_client_connect_no_response(self) -> None:
         """
         If the HTTP request is connected, but gets no response before being
         timed out, it'll give a ResponseNeverReceived.
@@ -121,7 +123,7 @@ class SimpleHttpClientTests(HomeserverTestCase):
 
         self.assertIsInstance(f.value, RequestTimedOutError)
 
-    def test_client_ip_range_blacklist(self):
+    def test_client_ip_range_blacklist(self) -> None:
         """Ensure that Synapse does not try to connect to blacklisted IPs"""
 
         # Add some DNS entries we'll blacklist
diff --git a/tests/http/test_site.py b/tests/http/test_site.py
index b2dbf76d33..9a78fede92 100644
--- a/tests/http/test_site.py
+++ b/tests/http/test_site.py
@@ -13,18 +13,20 @@
 # limitations under the License.
 
 from twisted.internet.address import IPv6Address
-from twisted.test.proto_helpers import StringTransport
+from twisted.test.proto_helpers import MemoryReactor, StringTransport
 
 from synapse.app.homeserver import SynapseHomeServer
+from synapse.server import HomeServer
+from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase
 
 
 class SynapseRequestTestCase(HomeserverTestCase):
-    def make_homeserver(self, reactor, clock):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         return self.setup_test_homeserver(homeserver_to_use=SynapseHomeServer)
 
-    def test_large_request(self):
+    def test_large_request(self) -> None:
         """overlarge HTTP requests should be rejected"""
         self.hs.start_listening()
 
diff --git a/tests/server.py b/tests/server.py
index b1730fcc8d..237bcad8ba 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -70,7 +70,7 @@ from synapse.logging.context import ContextResourceUsage
 from synapse.server import HomeServer
 from synapse.storage import DataStore
 from synapse.storage.engines import PostgresEngine, create_engine
-from synapse.types import JsonDict
+from synapse.types import ISynapseReactor, JsonDict
 from synapse.util import Clock
 
 from tests.utils import (
@@ -401,7 +401,9 @@ def make_request(
     return channel
 
 
-@implementer(IReactorPluggableNameResolver)
+# ISynapseReactor implies IReactorPluggableNameResolver, but explicitly
+# marking this as an implementer of the latter seems to keep mypy-zope happier.
+@implementer(IReactorPluggableNameResolver, ISynapseReactor)
 class ThreadedMemoryReactorClock(MemoryReactorClock):
     """
     A MemoryReactorClock that supports callFromThread.
-- 
cgit 1.5.1


From 5b55c32d610b2baec8622f0418519b130ab4fa30 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 7 Feb 2023 06:56:09 -0500
Subject: Add tests for using _flatten_dict with an event. (#15002)

---
 changelog.d/15002.misc                   |  1 +
 synapse/push/bulk_push_rule_evaluator.py | 13 +++----
 tests/push/test_push_rule_evaluator.py   | 63 +++++++++++++++++++++++++++++++-
 3 files changed, 68 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/15002.misc

(limited to 'synapse')

diff --git a/changelog.d/15002.misc b/changelog.d/15002.misc
new file mode 100644
index 0000000000..68ac8335fc
--- /dev/null
+++ b/changelog.d/15002.misc
@@ -0,0 +1 @@
+Add tests for `_flatten_dict`.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index f73dceb128..d9c0a98f44 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -36,7 +36,7 @@ from synapse.api.constants import (
     Membership,
     RelationTypes,
 )
-from synapse.api.room_versions import PushRuleRoomFlag, RoomVersion
+from synapse.api.room_versions import PushRuleRoomFlag
 from synapse.event_auth import auth_types_for_event, get_user_power_level
 from synapse.events import EventBase, relation_from_event
 from synapse.events.snapshot import EventContext
@@ -405,7 +405,7 @@ class BulkPushRuleEvaluator:
             room_mention = mentions.get("room") is True
 
         evaluator = PushRuleEvaluator(
-            _flatten_dict(event, room_version=event.room_version),
+            _flatten_dict(event),
             has_mentions,
             user_mentions,
             room_mention,
@@ -491,7 +491,6 @@ StateGroup = Union[object, int]
 
 def _flatten_dict(
     d: Union[EventBase, Mapping[str, Any]],
-    room_version: Optional[RoomVersion] = None,
     prefix: Optional[List[str]] = None,
     result: Optional[Dict[str, str]] = None,
 ) -> Dict[str, str]:
@@ -511,7 +510,6 @@ def _flatten_dict(
 
     Args:
         d: The event or content to continue flattening.
-        room_version: The room version object.
         prefix: The key prefix (from outer dictionaries).
         result: The result to mutate.
 
@@ -531,14 +529,13 @@ def _flatten_dict(
 
     # `room_version` should only ever be set when looking at the top level of an event
     if (
-        room_version is not None
-        and PushRuleRoomFlag.EXTENSIBLE_EVENTS in room_version.msc3931_push_features
-        and isinstance(d, EventBase)
+        isinstance(d, EventBase)
+        and PushRuleRoomFlag.EXTENSIBLE_EVENTS in d.room_version.msc3931_push_features
     ):
         # Room supports extensible events: replace `content.body` with the plain text
         # representation from `m.markup`, as per MSC1767.
         markup = d.get("content").get("m.markup")
-        if room_version.identifier.startswith("org.matrix.msc1767."):
+        if d.room_version.identifier.startswith("org.matrix.msc1767."):
             markup = d.get("content").get("org.matrix.msc1767.markup")
         if markup is not None and isinstance(markup, list):
             text = ""
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 7c430c4ecb..da33423871 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -22,7 +22,7 @@ import synapse.rest.admin
 from synapse.api.constants import EventTypes, HistoryVisibility, Membership
 from synapse.api.room_versions import RoomVersions
 from synapse.appservice import ApplicationService
-from synapse.events import FrozenEvent
+from synapse.events import FrozenEvent, make_event_from_dict
 from synapse.push.bulk_push_rule_evaluator import _flatten_dict
 from synapse.push.httppusher import tweaks_for_actions
 from synapse.rest import admin
@@ -60,6 +60,67 @@ class FlattenDictTestCase(unittest.TestCase):
         }
         self.assertEqual({"woo": "woo"}, _flatten_dict(input))
 
+    def test_event(self) -> None:
+        """Events can also be flattened."""
+        event = make_event_from_dict(
+            {
+                "room_id": "!test:test",
+                "type": "m.room.message",
+                "sender": "@alice:test",
+                "content": {
+                    "msgtype": "m.text",
+                    "body": "Hello world!",
+                    "format": "org.matrix.custom.html",
+                    "formatted_body": "<h1>Hello world!</h1>",
+                },
+            },
+            room_version=RoomVersions.V8,
+        )
+        expected = {
+            "content.msgtype": "m.text",
+            "content.body": "hello world!",
+            "content.format": "org.matrix.custom.html",
+            "content.formatted_body": "<h1>hello world!</h1>",
+            "room_id": "!test:test",
+            "sender": "@alice:test",
+            "type": "m.room.message",
+        }
+        self.assertEqual(expected, _flatten_dict(event))
+
+    def test_extensible_events(self) -> None:
+        """Extensible events has compatibility behaviour."""
+        event_dict = {
+            "room_id": "!test:test",
+            "type": "m.room.message",
+            "sender": "@alice:test",
+            "content": {
+                "org.matrix.msc1767.markup": [
+                    {"mimetype": "text/plain", "body": "Hello world!"},
+                    {"mimetype": "text/html", "body": "<h1>Hello world!</h1>"},
+                ]
+            },
+        }
+
+        # For a current room version, there's no special behavior.
+        event = make_event_from_dict(event_dict, room_version=RoomVersions.V8)
+        expected = {
+            "room_id": "!test:test",
+            "sender": "@alice:test",
+            "type": "m.room.message",
+        }
+        self.assertEqual(expected, _flatten_dict(event))
+
+        # For a room version with extensible events, they parse out the text/plain
+        # to a content.body property.
+        event = make_event_from_dict(event_dict, room_version=RoomVersions.MSC1767v10)
+        expected = {
+            "content.body": "hello world!",
+            "room_id": "!test:test",
+            "sender": "@alice:test",
+            "type": "m.room.message",
+        }
+        self.assertEqual(expected, _flatten_dict(event))
+
 
 class PushRuleEvaluatorTestCase(unittest.TestCase):
     def _get_evaluator(
-- 
cgit 1.5.1


From 2dff93099b5aa7e213da76a9c4b3de84385b58e1 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 7 Feb 2023 15:24:44 +0000
Subject: Typecheck tests.rest.media.v1.test_media_storage (#15008)

* Fix MediaStorage type hint

* Typecheck tests.rest.media.v1.test_media_storage

* Changelog

* Remove assert and make the comment succinct

* Fix syntax for olddeps
---
 changelog.d/15008.misc                    |  1 +
 mypy.ini                                  |  1 -
 synapse/rest/media/v1/media_storage.py    |  7 ++---
 tests/rest/media/v1/test_media_storage.py | 49 +++++++++++++++++++------------
 4 files changed, 35 insertions(+), 23 deletions(-)
 create mode 100644 changelog.d/15008.misc

(limited to 'synapse')

diff --git a/changelog.d/15008.misc b/changelog.d/15008.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15008.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 0efafb26b6..4598002c4a 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -33,7 +33,6 @@ exclude = (?x)
    |synapse/storage/schema/
 
    |tests/module_api/test_api.py
-   |tests/rest/media/v1/test_media_storage.py
    |tests/server.py
    )$
 
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index a5c3de192f..db25848744 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -46,10 +46,9 @@ from ._base import FileInfo, Responder
 from .filepath import MediaFilePaths
 
 if TYPE_CHECKING:
+    from synapse.rest.media.v1.storage_provider import StorageProvider
     from synapse.server import HomeServer
 
-    from .storage_provider import StorageProviderWrapper
-
 logger = logging.getLogger(__name__)
 
 
@@ -68,7 +67,7 @@ class MediaStorage:
         hs: "HomeServer",
         local_media_directory: str,
         filepaths: MediaFilePaths,
-        storage_providers: Sequence["StorageProviderWrapper"],
+        storage_providers: Sequence["StorageProvider"],
     ):
         self.hs = hs
         self.reactor = hs.get_reactor()
@@ -360,7 +359,7 @@ class ReadableFileWrapper:
     clock: Clock
     path: str
 
-    async def write_chunks_to(self, callback: Callable[[bytes], None]) -> None:
+    async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None:
         """Reads the file in chunks and calls the callback with each chunk."""
 
         with open(self.path, "rb") as file:
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index d18fc13c21..17a3b06a8e 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -16,7 +16,7 @@ import shutil
 import tempfile
 from binascii import unhexlify
 from io import BytesIO
-from typing import Any, BinaryIO, Dict, List, Optional, Union
+from typing import Any, BinaryIO, ClassVar, Dict, List, Optional, Tuple, Union
 from unittest.mock import Mock
 from urllib import parse
 
@@ -32,6 +32,7 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.api.errors import Codes
 from synapse.events import EventBase
 from synapse.events.spamcheck import load_legacy_spam_checkers
+from synapse.http.types import QueryParams
 from synapse.logging.context import make_deferred_yieldable
 from synapse.module_api import ModuleApi
 from synapse.rest import admin
@@ -41,7 +42,7 @@ from synapse.rest.media.v1.filepath import MediaFilePaths
 from synapse.rest.media.v1.media_storage import MediaStorage, ReadableFileWrapper
 from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend
 from synapse.server import HomeServer
-from synapse.types import RoomAlias
+from synapse.types import JsonDict, RoomAlias
 from synapse.util import Clock
 
 from tests import unittest
@@ -201,36 +202,46 @@ class _TestImage:
     ],
 )
 class MediaRepoTests(unittest.HomeserverTestCase):
-
+    test_image: ClassVar[_TestImage]
     hijack_auth = True
     user_id = "@test:user"
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
 
-        self.fetches = []
+        self.fetches: List[
+            Tuple[
+                "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]",
+                str,
+                str,
+                Optional[QueryParams],
+            ]
+        ] = []
 
         def get_file(
             destination: str,
             path: str,
             output_stream: BinaryIO,
-            args: Optional[Dict[str, Union[str, List[str]]]] = None,
+            args: Optional[QueryParams] = None,
+            retry_on_dns_fail: bool = True,
             max_size: Optional[int] = None,
-        ) -> Deferred:
-            """
-            Returns tuple[int,dict,str,int] of file length, response headers,
-            absolute URI, and response code.
-            """
+            ignore_backoff: bool = False,
+        ) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]":
+            """A mock for MatrixFederationHttpClient.get_file."""
 
-            def write_to(r):
+            def write_to(
+                r: Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]
+            ) -> Tuple[int, Dict[bytes, List[bytes]]]:
                 data, response = r
                 output_stream.write(data)
                 return response
 
-            d = Deferred()
-            d.addCallback(write_to)
+            d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred()
             self.fetches.append((d, destination, path, args))
-            return make_deferred_yieldable(d)
+            # Note that this callback changes the value held by d.
+            d_after_callback = d.addCallback(write_to)
+            return make_deferred_yieldable(d_after_callback)
 
+        # Mock out the homeserver's MatrixFederationHttpClient
         client = Mock()
         client.get_file = get_file
 
@@ -461,6 +472,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         # Synapse should regenerate missing thumbnails.
         origin, media_id = self.media_id.split("/")
         info = self.get_success(self.store.get_cached_remote_media(origin, media_id))
+        assert info is not None
         file_id = info["filesystem_id"]
 
         thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir(
@@ -581,7 +593,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
                         "thumbnail_method": method,
                         "thumbnail_type": self.test_image.content_type,
                         "thumbnail_length": 256,
-                        "filesystem_id": f"thumbnail1{self.test_image.extension}",
+                        "filesystem_id": f"thumbnail1{self.test_image.extension.decode()}",
                     },
                     {
                         "thumbnail_width": 32,
@@ -589,10 +601,10 @@ class MediaRepoTests(unittest.HomeserverTestCase):
                         "thumbnail_method": method,
                         "thumbnail_type": self.test_image.content_type,
                         "thumbnail_length": 256,
-                        "filesystem_id": f"thumbnail2{self.test_image.extension}",
+                        "filesystem_id": f"thumbnail2{self.test_image.extension.decode()}",
                     },
                 ],
-                file_id=f"image{self.test_image.extension}",
+                file_id=f"image{self.test_image.extension.decode()}",
                 url_cache=None,
                 server_name=None,
             )
@@ -637,6 +649,7 @@ class TestSpamCheckerLegacy:
         self.config = config
         self.api = api
 
+    @staticmethod
     def parse_config(config: Dict[str, Any]) -> Dict[str, Any]:
         return config
 
@@ -748,7 +761,7 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase):
 
     async def check_media_file_for_spam(
         self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
-    ) -> Union[Codes, Literal["NOT_SPAM"]]:
+    ) -> Union[Codes, Literal["NOT_SPAM"], Tuple[Codes, JsonDict]]:
         buf = BytesIO()
         await file_wrapper.write_chunks_to(buf.write)
 
-- 
cgit 1.5.1


From 9cd7610f86ab5051c9365dd38d1eec405a5f8ca6 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 7 Feb 2023 15:26:55 +0000
Subject: Revert "Add `event_stream_ordering` column to membership state tables
 (#14979)"

This reverts commit 5fdc12f482c68e2cdbb78d7db5de2cfe621720d4.
---
 synapse/storage/databases/main/events.py           |  23 ++---
 .../storage/databases/main/events_bg_updates.py    | 104 +--------------------
 synapse/storage/databases/main/events_worker.py    |   8 +-
 .../26membership_tables_event_stream_ordering.sql  |  21 -----
 4 files changed, 11 insertions(+), 145 deletions(-)
 delete mode 100644 synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql

(limited to 'synapse')

diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index b6cce0a7cc..1536937b67 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1147,15 +1147,11 @@ class PersistEventsStore:
                 # been inserted into room_memberships.
                 txn.execute_batch(
                     """INSERT INTO current_state_events
-                        (room_id, type, state_key, event_id, membership, event_stream_ordering)
-                    VALUES (
-                        ?, ?, ?, ?,
-                        (SELECT membership FROM room_memberships WHERE event_id = ?),
-                        (SELECT stream_ordering FROM events WHERE event_id = ?)
-                    )
+                        (room_id, type, state_key, event_id, membership)
+                    VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
                     """,
                     [
-                        (room_id, key[0], key[1], ev_id, ev_id, ev_id)
+                        (room_id, key[0], key[1], ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                     ],
                 )
@@ -1182,15 +1178,11 @@ class PersistEventsStore:
             if to_insert:
                 txn.execute_batch(
                     """INSERT INTO local_current_membership
-                        (room_id, user_id, event_id, membership, event_stream_ordering)
-                    VALUES (
-                        ?, ?, ?,
-                        (SELECT membership FROM room_memberships WHERE event_id = ?),
-                        (SELECT stream_ordering FROM events WHERE event_id = ?)
-                    )
+                        (room_id, user_id, event_id, membership)
+                    VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
                     """,
                     [
-                        (room_id, key[1], ev_id, ev_id, ev_id)
+                        (room_id, key[1], ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                         if key[0] == EventTypes.Member and self.is_mine_id(key[1])
                     ],
@@ -1798,7 +1790,6 @@ class PersistEventsStore:
             table="room_memberships",
             keys=(
                 "event_id",
-                "event_stream_ordering",
                 "user_id",
                 "sender",
                 "room_id",
@@ -1809,7 +1800,6 @@ class PersistEventsStore:
             values=[
                 (
                     event.event_id,
-                    event.internal_metadata.stream_ordering,
                     event.state_key,
                     event.user_id,
                     event.room_id,
@@ -1842,7 +1832,6 @@ class PersistEventsStore:
                     keyvalues={"room_id": event.room_id, "user_id": event.state_key},
                     values={
                         "event_id": event.event_id,
-                        "event_stream_ordering": event.internal_metadata.stream_ordering,
                         "membership": event.membership,
                     },
                 )
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 0e81d38cca..b9d3c36d60 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, Tuple, ca
 
 import attr
 
-from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
+from synapse.api.constants import EventContentFields, RelationTypes
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import make_event_from_dict
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
@@ -71,10 +71,6 @@ class _BackgroundUpdates:
 
     EVENTS_JUMP_TO_DATE_INDEX = "events_jump_to_date_index"
 
-    POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING = (
-        "populate_membership_event_stream_ordering"
-    )
-
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _CalculateChainCover:
@@ -103,10 +99,6 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        self.db_pool.updates.register_background_update_handler(
-            _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING,
-            self._populate_membership_event_stream_ordering,
-        )
         self.db_pool.updates.register_background_update_handler(
             _BackgroundUpdates.EVENT_ORIGIN_SERVER_TS_NAME,
             self._background_reindex_origin_server_ts,
@@ -1506,97 +1498,3 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             )
 
         return batch_size
-
-    async def _populate_membership_event_stream_ordering(
-        self, progress: JsonDict, batch_size: int
-    ) -> int:
-        def _populate_membership_event_stream_ordering(
-            txn: LoggingTransaction,
-        ) -> bool:
-
-            if "max_stream_ordering" in progress:
-                max_stream_ordering = progress["max_stream_ordering"]
-            else:
-                txn.execute("SELECT max(stream_ordering) FROM events")
-                res = txn.fetchone()
-                if res is None or res[0] is None:
-                    return True
-                else:
-                    max_stream_ordering = res[0]
-
-            start = progress.get("stream_ordering", 0)
-            stop = start + batch_size
-
-            sql = f"""
-                SELECT room_id, event_id, stream_ordering
-                FROM events
-                WHERE
-                    type = '{EventTypes.Member}'
-                    AND stream_ordering >= ?
-                    AND stream_ordering < ?
-            """
-            txn.execute(sql, (start, stop))
-
-            rows: List[Tuple[str, str, int]] = cast(
-                List[Tuple[str, str, int]], txn.fetchall()
-            )
-
-            event_ids: List[Tuple[str]] = []
-            event_stream_orderings: List[Tuple[int]] = []
-
-            for _, event_id, event_stream_ordering in rows:
-                event_ids.append((event_id,))
-                event_stream_orderings.append((event_stream_ordering,))
-
-            self.db_pool.simple_update_many_txn(
-                txn,
-                table="current_state_events",
-                key_names=("event_id",),
-                key_values=event_ids,
-                value_names=("event_stream_ordering",),
-                value_values=event_stream_orderings,
-            )
-
-            self.db_pool.simple_update_many_txn(
-                txn,
-                table="room_memberships",
-                key_names=("event_id",),
-                key_values=event_ids,
-                value_names=("event_stream_ordering",),
-                value_values=event_stream_orderings,
-            )
-
-            # NOTE: local_current_membership has no index on event_id, so only
-            # the room ID here will reduce the query rows read.
-            for room_id, event_id, event_stream_ordering in rows:
-                txn.execute(
-                    """
-                        UPDATE local_current_membership
-                        SET event_stream_ordering = ?
-                        WHERE room_id = ? AND event_id = ?
-                    """,
-                    (event_stream_ordering, room_id, event_id),
-                )
-
-            self.db_pool.updates._background_update_progress_txn(
-                txn,
-                _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING,
-                {
-                    "stream_ordering": stop,
-                    "max_stream_ordering": max_stream_ordering,
-                },
-            )
-
-            return stop > max_stream_ordering
-
-        finished = await self.db_pool.runInteraction(
-            "_populate_membership_event_stream_ordering",
-            _populate_membership_event_stream_ordering,
-        )
-
-        if finished:
-            await self.db_pool.updates._end_background_update(
-                _BackgroundUpdates.POPULATE_MEMBERSHIP_EVENT_STREAM_ORDERING
-            )
-
-        return batch_size
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 6d0ef10258..d7d08369ca 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1779,7 +1779,7 @@ class EventsWorkerStore(SQLBaseStore):
             txn: LoggingTransaction,
         ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]:
             sql = (
-                "SELECT out.event_stream_ordering, e.event_id, e.room_id, e.type,"
+                "SELECT event_stream_ordering, e.event_id, e.room_id, e.type,"
                 " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL,"
                 " e.outlier"
                 " FROM events AS e"
@@ -1791,10 +1791,10 @@ class EventsWorkerStore(SQLBaseStore):
                 " LEFT JOIN event_relations USING (event_id)"
                 " LEFT JOIN room_memberships USING (event_id)"
                 " LEFT JOIN rejections USING (event_id)"
-                " WHERE ? < out.event_stream_ordering"
-                " AND out.event_stream_ordering <= ?"
+                " WHERE ? < event_stream_ordering"
+                " AND event_stream_ordering <= ?"
                 " AND out.instance_name = ?"
-                " ORDER BY out.event_stream_ordering ASC"
+                " ORDER BY event_stream_ordering ASC"
             )
 
             txn.execute(sql, (last_id, current_id, instance_name))
diff --git a/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql b/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql
deleted file mode 100644
index 7c30a67fc4..0000000000
--- a/synapse/storage/schema/main/delta/73/26membership_tables_event_stream_ordering.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright 2022 Beeper
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-ALTER TABLE current_state_events ADD COLUMN event_stream_ordering BIGINT;
-ALTER TABLE local_current_membership ADD COLUMN event_stream_ordering BIGINT;
-ALTER TABLE room_memberships ADD COLUMN event_stream_ordering BIGINT;
-
-INSERT INTO background_updates (update_name, progress_json) VALUES
-  ('populate_membership_event_stream_ordering', '{}');
-- 
cgit 1.5.1


From f10caa73eee0caa91cf373966104d1ededae2aee Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 7 Feb 2023 15:33:33 +0000
Subject: Disambiguate `get_ex_outlier_stream_rows` query

A backwards-compatible piece of #14979 that's safe to land now.
---
 synapse/storage/databases/main/events_worker.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'synapse')

diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index d7d08369ca..6d0ef10258 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1779,7 +1779,7 @@ class EventsWorkerStore(SQLBaseStore):
             txn: LoggingTransaction,
         ) -> List[Tuple[int, str, str, str, str, str, str, str, bool, bool]]:
             sql = (
-                "SELECT event_stream_ordering, e.event_id, e.room_id, e.type,"
+                "SELECT out.event_stream_ordering, e.event_id, e.room_id, e.type,"
                 " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL,"
                 " e.outlier"
                 " FROM events AS e"
@@ -1791,10 +1791,10 @@ class EventsWorkerStore(SQLBaseStore):
                 " LEFT JOIN event_relations USING (event_id)"
                 " LEFT JOIN room_memberships USING (event_id)"
                 " LEFT JOIN rejections USING (event_id)"
-                " WHERE ? < event_stream_ordering"
-                " AND event_stream_ordering <= ?"
+                " WHERE ? < out.event_stream_ordering"
+                " AND out.event_stream_ordering <= ?"
                 " AND out.instance_name = ?"
-                " ORDER BY event_stream_ordering ASC"
+                " ORDER BY out.event_stream_ordering ASC"
             )
 
             txn.execute(sql, (last_id, current_id, instance_name))
-- 
cgit 1.5.1


From c78c67c5a909c6749f25b251d46be3df8f56f8c2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 8 Feb 2023 17:41:55 +0100
Subject: Fix bug in replication where response is cached (#15024)

---
 changelog.d/15024.bugfix          | 1 +
 synapse/replication/http/_base.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/15024.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15024.bugfix b/changelog.d/15024.bugfix
new file mode 100644
index 0000000000..dddd406322
--- /dev/null
+++ b/changelog.d/15024.bugfix
@@ -0,0 +1 @@
+Fix bug where retried replication requests would return a failure. Introduced in v1.76.0.
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index 908f3f1db7..c20d9c7e9d 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -426,6 +426,8 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
             code, response = await self.response_cache.wrap(
                 txn_id, self._handle_request, request, content, **kwargs
             )
+            # Take a copy so we don't mutate things in the cache.
+            response = dict(response)
         else:
             # The `@cancellable` decorator may be applied to `_handle_request`. But we
             # told `HttpServer.register_paths` that our handler is `_check_auth_and_handle`,
-- 
cgit 1.5.1


From c951fbedcb81895c199c1f4cfe2251d6c3a7b5f4 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 8 Feb 2023 13:09:41 -0500
Subject: MSC3873: Escape keys when flattening dicts. (#15004)

This disambiguates keys which attempt to match fields
with a dot in them (e.g. m.relates_to).

Disabled by default behind an experimental configuration flag.
---
 changelog.d/15004.feature                |  1 +
 synapse/config/experimental.py           |  5 +++++
 synapse/push/bulk_push_rule_evaluator.py | 30 ++++++++++++++++++++++++++----
 tests/push/test_push_rule_evaluator.py   |  8 ++++++++
 4 files changed, 40 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15004.feature

(limited to 'synapse')

diff --git a/changelog.d/15004.feature b/changelog.d/15004.feature
new file mode 100644
index 0000000000..d11d0aca91
--- /dev/null
+++ b/changelog.d/15004.feature
@@ -0,0 +1 @@
+Implement [MSC3873](https://github.com/matrix-org/matrix-spec-proposals/pull/3873) to unambiguate push rule keys with dots in them.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 53c0682dfd..5e3a889081 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -169,6 +169,11 @@ class ExperimentalConfig(Config):
         # MSC3925: do not replace events with their edits
         self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False)
 
+        # MSC3873: Disambiguate event_match keys.
+        self.msc3783_escape_event_match_key = experimental.get(
+            "msc3783_escape_event_match_key", False
+        )
+
         # MSC3952: Intentional mentions
         self.msc3952_intentional_mentions = experimental.get(
             "msc3952_intentional_mentions", False
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index d9c0a98f44..39d2f88f03 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -271,7 +271,10 @@ class BulkPushRuleEvaluator:
                     related_event_id, allow_none=True
                 )
                 if related_event is not None:
-                    related_events[relation_type] = _flatten_dict(related_event)
+                    related_events[relation_type] = _flatten_dict(
+                        related_event,
+                        msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key,
+                    )
 
             reply_event_id = (
                 event.content.get("m.relates_to", {})
@@ -286,7 +289,10 @@ class BulkPushRuleEvaluator:
                 )
 
                 if related_event is not None:
-                    related_events["m.in_reply_to"] = _flatten_dict(related_event)
+                    related_events["m.in_reply_to"] = _flatten_dict(
+                        related_event,
+                        msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key,
+                    )
 
                     # indicate that this is from a fallback relation.
                     if relation_type == "m.thread" and event.content.get(
@@ -405,7 +411,10 @@ class BulkPushRuleEvaluator:
             room_mention = mentions.get("room") is True
 
         evaluator = PushRuleEvaluator(
-            _flatten_dict(event),
+            _flatten_dict(
+                event,
+                msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key,
+            ),
             has_mentions,
             user_mentions,
             room_mention,
@@ -493,6 +502,8 @@ def _flatten_dict(
     d: Union[EventBase, Mapping[str, Any]],
     prefix: Optional[List[str]] = None,
     result: Optional[Dict[str, str]] = None,
+    *,
+    msc3783_escape_event_match_key: bool = False,
 ) -> Dict[str, str]:
     """
     Given a JSON dictionary (or event) which might contain sub dictionaries,
@@ -521,11 +532,22 @@ def _flatten_dict(
     if result is None:
         result = {}
     for key, value in d.items():
+        if msc3783_escape_event_match_key:
+            # Escape periods in the key with a backslash (and backslashes with an
+            # extra backslash). This is since a period is used as a separator between
+            # nested fields.
+            key = key.replace("\\", "\\\\").replace(".", "\\.")
+
         if isinstance(value, str):
             result[".".join(prefix + [key])] = value.lower()
         elif isinstance(value, Mapping):
             # do not set `room_version` due to recursion considerations below
-            _flatten_dict(value, prefix=(prefix + [key]), result=result)
+            _flatten_dict(
+                value,
+                prefix=(prefix + [key]),
+                result=result,
+                msc3783_escape_event_match_key=msc3783_escape_event_match_key,
+            )
 
     # `room_version` should only ever be set when looking at the top level of an event
     if (
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index da33423871..516b65cc3c 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -48,6 +48,14 @@ class FlattenDictTestCase(unittest.TestCase):
         input = {"foo": {"bar": "abc"}}
         self.assertEqual({"foo.bar": "abc"}, _flatten_dict(input))
 
+        # If a field has a dot in it, escape it.
+        input = {"m.foo": {"b\\ar": "abc"}}
+        self.assertEqual({"m.foo.b\\ar": "abc"}, _flatten_dict(input))
+        self.assertEqual(
+            {"m\\.foo.b\\\\ar": "abc"},
+            _flatten_dict(input, msc3783_escape_event_match_key=True),
+        )
+
     def test_non_string(self) -> None:
         """Non-string items are dropped."""
         input: Dict[str, Any] = {
-- 
cgit 1.5.1


From 55e4d27b36fd69a3cf3eceecbd42706579ef2dc7 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 8 Feb 2023 11:25:11 -0800
Subject: Limit concurrent event creation for a room to avoid state resolution
 when sending bursts of events to a local room (#14977)

---
 changelog.d/14977.misc      | 1 +
 synapse/handlers/message.py | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14977.misc

(limited to 'synapse')

diff --git a/changelog.d/14977.misc b/changelog.d/14977.misc
new file mode 100644
index 0000000000..4d551c52b7
--- /dev/null
+++ b/changelog.d/14977.misc
@@ -0,0 +1 @@
+Limit concurrent event creation for a room to avoid state resolution when sending bursts of events to a local room.
\ No newline at end of file
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index e688e00575..5f6da2943f 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -499,9 +499,9 @@ class EventCreationHandler:
 
         self.request_ratelimiter = hs.get_request_ratelimiter()
 
-        # We arbitrarily limit concurrent event creation for a room to 5.
-        # This is to stop us from diverging history *too* much.
-        self.limiter = Linearizer(max_count=5, name="room_event_creation_limit")
+        # We limit concurrent event creation for a room to 1. This prevents state resolution
+        # from occurring when sending bursts of events to a local room
+        self.limiter = Linearizer(max_count=1, name="room_event_creation_limit")
 
         self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator()
 
-- 
cgit 1.5.1


From 733531ee3e695da92f10e01b24f62ee35e09e4cd Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 9 Feb 2023 09:49:04 -0500
Subject: Add final type hint to synapse.server. (#15035)

---
 changelog.d/15035.misc                   |  1 +
 mypy.ini                                 |  3 ---
 synapse/handlers/room.py                 |  2 +-
 synapse/server.py                        | 12 +++++-------
 synapse/storage/_base.py                 |  2 ++
 synapse/storage/database.py              |  1 +
 synapse/storage/databases/main/events.py |  2 +-
 7 files changed, 11 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15035.misc

(limited to 'synapse')

diff --git a/changelog.d/15035.misc b/changelog.d/15035.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15035.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 3f144e61fb..57f27ba4f7 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -53,9 +53,6 @@ warn_unused_ignores = False
 [mypy-synapse.util.caches.treecache]
 disallow_untyped_defs = False
 
-[mypy-synapse.server]
-disallow_untyped_defs = False
-
 [mypy-synapse.storage.database]
 disallow_untyped_defs = False
 
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 7ba7c4ff07..0e759b8a5d 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1076,7 +1076,7 @@ class RoomCreationHandler:
         state_map: MutableStateMap[str] = {}
         # current_state_group of last event created. Used for computing event context of
         # events to be batched
-        current_state_group = None
+        current_state_group: Optional[int] = None
 
         def create_event_dict(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict:
             e = {"type": etype, "content": content}
diff --git a/synapse/server.py b/synapse/server.py
index 9d6d268f49..efc6b5f895 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -21,7 +21,7 @@
 import abc
 import functools
 import logging
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypeVar, cast
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast
 
 from twisted.internet.interfaces import IOpenSSLContextFactory
 from twisted.internet.tcp import Port
@@ -144,10 +144,10 @@ if TYPE_CHECKING:
     from synapse.handlers.saml import SamlHandler
 
 
-T = TypeVar("T", bound=Callable[..., Any])
+T = TypeVar("T")
 
 
-def cache_in_self(builder: T) -> T:
+def cache_in_self(builder: Callable[["HomeServer"], T]) -> Callable[["HomeServer"], T]:
     """Wraps a function called e.g. `get_foo`, checking if `self.foo` exists and
     returning if so. If not, calls the given function and sets `self.foo` to it.
 
@@ -166,7 +166,7 @@ def cache_in_self(builder: T) -> T:
     building = [False]
 
     @functools.wraps(builder)
-    def _get(self):
+    def _get(self: "HomeServer") -> T:
         try:
             return getattr(self, depname)
         except AttributeError:
@@ -185,9 +185,7 @@ def cache_in_self(builder: T) -> T:
 
         return dep
 
-    # We cast here as we need to tell mypy that `_get` has the same signature as
-    # `builder`.
-    return cast(T, _get)
+    return _get
 
 
 class HomeServer(metaclass=abc.ABCMeta):
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 41d9111019..481fec72fe 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -37,6 +37,8 @@ class SQLBaseStore(metaclass=ABCMeta):
     per data store (and not one per physical database).
     """
 
+    db_pool: DatabasePool
+
     def __init__(
         self,
         database: DatabasePool,
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index e20c5c5302..feaa6cdd07 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -499,6 +499,7 @@ class DatabasePool:
     """
 
     _TXN_ID = 0
+    engine: BaseDatabaseEngine
 
     def __init__(
         self,
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 1536937b67..cb66376fb4 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -306,7 +306,7 @@ class PersistEventsStore:
 
         # The set of event_ids to return. This includes all soft-failed events
         # and their prev events.
-        existing_prevs = set()
+        existing_prevs: Set[str] = set()
 
         def _get_prevs_before_rejected_txn(
             txn: LoggingTransaction, batch: Collection[str]
-- 
cgit 1.5.1


From cd2484dc2e943e40242337dae61f5170638116a2 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 9 Feb 2023 15:28:26 +0000
Subject: Bump schema version (#15036)

* Bump schema version

This should have been included in
f10caa73eee0caa91cf373966104d1ededae2aee (and #14979).

* Changelog
---
 changelog.d/15036.misc             | 1 +
 synapse/storage/schema/__init__.py | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15036.misc

(limited to 'synapse')

diff --git a/changelog.d/15036.misc b/changelog.d/15036.misc
new file mode 100644
index 0000000000..b0adc9c9d1
--- /dev/null
+++ b/changelog.d/15036.misc
@@ -0,0 +1 @@
+Prepare for future database schema changes.
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 19dbf2da7f..d3103a6c7a 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 73  # remember to update the list below when updating
+SCHEMA_VERSION = 74  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -78,7 +78,7 @@ Changes in SCHEMA_VERSION = 72:
     - Unused column application_services_state.last_txn is dropped
     - Cache invalidation stream id sequence now begins at 2 to match code expectation.
 
-Changes in SCHEMA_VERSION = 73;
+Changes in SCHEMA_VERSION = 73:
     - thread_id column is added to event_push_actions, event_push_actions_staging
       event_push_summary, receipts_linearized, and receipts_graph.
     - Add table `event_failed_pull_attempts` to keep track when we fail to pull
@@ -86,6 +86,11 @@ Changes in SCHEMA_VERSION = 73;
     - Add indexes to various tables (`event_failed_pull_attempts`, `insertion_events`,
       `batch_events`) to make it easy to delete all associated rows when purging a room.
     - `inserted_ts` column is added to `event_push_actions_staging` table.
+
+Changes in SCHEMA_VERSION = 74:
+    - A query on `event_stream_ordering` column has now been disambiguated (i.e. the
+      codebase can handle the `current_state_events`, `local_current_memberships` and
+      `room_memberships` tables having an `event_stream_ordering` column).
 """
 
 
-- 
cgit 1.5.1


From 8a6e0434889ea94893119775b6f56904cbc575c2 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 9 Feb 2023 10:56:02 -0500
Subject: Avoid mutating cached room aliases. (#15038)

This might cause incorrect data in other callers which
are not expecting the canonical alias to be added into
the response.
---
 changelog.d/15038.bugfix      | 1 +
 synapse/handlers/directory.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15038.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15038.bugfix b/changelog.d/15038.bugfix
new file mode 100644
index 0000000000..4695a09756
--- /dev/null
+++ b/changelog.d/15038.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the room aliases returned could be corrupted.
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 2ea52257cb..d31b0fbb17 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -485,7 +485,8 @@ class DirectoryHandler:
                 )
             )
             if canonical_alias:
-                room_aliases.append(canonical_alias)
+                # Ensure we do not mutate room_aliases.
+                room_aliases = room_aliases + [canonical_alias]
 
             if not self.config.roomdirectory.is_publishing_room_allowed(
                 user_id, room_id, room_aliases
-- 
cgit 1.5.1


From d22c1c862c8259465a8e95c41eb1f00d0367a640 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 9 Feb 2023 13:04:24 -0500
Subject: Respond correctly to unknown methods on known endpoints (#14605)

Respond with a 405 error if a request is received on a known endpoint,
but to an unknown method, per MSC3743.
---
 changelog.d/14605.bugfix          |  1 +
 docs/admin_api/media_admin_api.md | 10 +++++++-
 docs/upgrade.md                   | 10 ++++++++
 synapse/http/server.py            | 40 ++++++++++++--------------------
 synapse/rest/admin/media.py       | 18 +++++++++++----
 synapse/rest/client/room_keys.py  | 48 ++++++++++++++++++++++++++-------------
 synapse/rest/client/tags.py       |  4 +++-
 tests/rest/admin/test_media.py    |  9 +++++---
 8 files changed, 89 insertions(+), 51 deletions(-)
 create mode 100644 changelog.d/14605.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14605.bugfix b/changelog.d/14605.bugfix
new file mode 100644
index 0000000000..cb95a87d92
--- /dev/null
+++ b/changelog.d/14605.bugfix
@@ -0,0 +1 @@
+Return spec-compliant JSON errors when unknown endpoints are requested.
diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md
index 7f8c8e22c1..30833f3109 100644
--- a/docs/admin_api/media_admin_api.md
+++ b/docs/admin_api/media_admin_api.md
@@ -235,6 +235,14 @@ The following fields are returned in the JSON response body:
 
 Request:
 
+```
+POST /_synapse/admin/v1/media/delete?before_ts=<before_ts>
+
+{}
+```
+
+*Deprecated in Synapse v1.78.0:* This API is available at the deprecated endpoint:
+
 ```
 POST /_synapse/admin/v1/media/<server_name>/delete?before_ts=<before_ts>
 
@@ -243,7 +251,7 @@ POST /_synapse/admin/v1/media/<server_name>/delete?before_ts=<before_ts>
 
 URL Parameters
 
-* `server_name`: string - The name of your local server (e.g `matrix.org`).
+* `server_name`: string - The name of your local server (e.g `matrix.org`). *Deprecated in Synapse v1.78.0.*
 * `before_ts`: string representing a positive integer - Unix timestamp in milliseconds.
 Files that were last used before this timestamp will be deleted. It is the timestamp of
 last access, not the timestamp when the file was created.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index bc143444be..15167b8c58 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,15 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.78.0
+
+## Deprecate the `/_synapse/admin/v1/media/<server_name>/delete` admin API
+
+Synapse 1.78.0 replaces the `/_synapse/admin/v1/media/<server_name>/delete`
+admin API with an identical endpoint at `/_synapse/admin/v1/media/delete`. Please
+update your tooling to use the new endpoint. The deprecated version will be removed
+in a future release.
+
 # Upgrading to v1.76.0
 
 ## Faster joins are enabled by default
@@ -137,6 +146,7 @@ and then do `pip install matrix-synapse[user-search]` for a PyPI install.
 Docker images and Debian packages need nothing specific as they already
 include or specify ICU as an explicit dependency.
 
+
 # Upgrading to v1.73.0
 
 ## Legacy Prometheus metric names have now been removed
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 2563858f3c..9314454af1 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -30,7 +30,6 @@ from typing import (
     Iterable,
     Iterator,
     List,
-    NoReturn,
     Optional,
     Pattern,
     Tuple,
@@ -340,7 +339,8 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
 
             return callback_return
 
-        return _unrecognised_request_handler(request)
+        # A request with an unknown method (for a known endpoint) was received.
+        raise UnrecognizedRequestError(code=405)
 
     @abc.abstractmethod
     def _send_response(
@@ -396,7 +396,6 @@ class DirectServeJsonResource(_AsyncResource):
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _PathEntry:
-    pattern: Pattern
     callback: ServletCallback
     servlet_classname: str
 
@@ -425,13 +424,14 @@ class JsonResource(DirectServeJsonResource):
     ):
         super().__init__(canonical_json, extract_context)
         self.clock = hs.get_clock()
-        self.path_regexs: Dict[bytes, List[_PathEntry]] = {}
+        # Map of path regex -> method -> callback.
+        self._routes: Dict[Pattern[str], Dict[bytes, _PathEntry]] = {}
         self.hs = hs
 
     def register_paths(
         self,
         method: str,
-        path_patterns: Iterable[Pattern],
+        path_patterns: Iterable[Pattern[str]],
         callback: ServletCallback,
         servlet_classname: str,
     ) -> None:
@@ -455,8 +455,8 @@ class JsonResource(DirectServeJsonResource):
 
         for path_pattern in path_patterns:
             logger.debug("Registering for %s %s", method, path_pattern.pattern)
-            self.path_regexs.setdefault(method_bytes, []).append(
-                _PathEntry(path_pattern, callback, servlet_classname)
+            self._routes.setdefault(path_pattern, {})[method_bytes] = _PathEntry(
+                callback, servlet_classname
             )
 
     def _get_handler_for_request(
@@ -478,14 +478,17 @@ class JsonResource(DirectServeJsonResource):
 
         # Loop through all the registered callbacks to check if the method
         # and path regex match
-        for path_entry in self.path_regexs.get(request_method, []):
-            m = path_entry.pattern.match(request_path)
+        for path_pattern, methods in self._routes.items():
+            m = path_pattern.match(request_path)
             if m:
-                # We found a match!
+                # We found a matching path!
+                path_entry = methods.get(request_method)
+                if not path_entry:
+                    raise UnrecognizedRequestError(code=405)
                 return path_entry.callback, path_entry.servlet_classname, m.groupdict()
 
-        # Huh. No one wanted to handle that? Fiiiiiine. Send 400.
-        return _unrecognised_request_handler, "unrecognised_request_handler", {}
+        # Huh. No one wanted to handle that? Fiiiiiine.
+        raise UnrecognizedRequestError(code=404)
 
     async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]:
         callback, servlet_classname, group_dict = self._get_handler_for_request(request)
@@ -567,19 +570,6 @@ class StaticResource(File):
         return super().render_GET(request)
 
 
-def _unrecognised_request_handler(request: Request) -> NoReturn:
-    """Request handler for unrecognised requests
-
-    This is a request handler suitable for return from
-    _get_handler_for_request. It actually just raises an
-    UnrecognizedRequestError.
-
-    Args:
-        request: Unused, but passed in to match the signature of ServletCallback.
-    """
-    raise UnrecognizedRequestError(code=404)
-
-
 class UnrecognizedRequestResource(resource.Resource):
     """
     Similar to twisted.web.resource.NoResource, but returns a JSON 404 with an
diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py
index 0d072c42a7..c134ccfb3d 100644
--- a/synapse/rest/admin/media.py
+++ b/synapse/rest/admin/media.py
@@ -15,7 +15,7 @@
 
 import logging
 from http import HTTPStatus
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, Optional, Tuple
 
 from synapse.api.constants import Direction
 from synapse.api.errors import Codes, NotFoundError, SynapseError
@@ -285,7 +285,12 @@ class DeleteMediaByDateSize(RestServlet):
     timestamp and size.
     """
 
-    PATTERNS = admin_patterns("/media/(?P<server_name>[^/]*)/delete$")
+    PATTERNS = [
+        *admin_patterns("/media/delete$"),
+        # This URL kept around for legacy reasons, it is undesirable since it
+        # overlaps with the DeleteMediaByID servlet.
+        *admin_patterns("/media/(?P<server_name>[^/]*)/delete$"),
+    ]
 
     def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastores().main
@@ -294,7 +299,7 @@ class DeleteMediaByDateSize(RestServlet):
         self.media_repository = hs.get_media_repository()
 
     async def on_POST(
-        self, request: SynapseRequest, server_name: str
+        self, request: SynapseRequest, server_name: Optional[str] = None
     ) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self.auth, request)
 
@@ -322,7 +327,8 @@ class DeleteMediaByDateSize(RestServlet):
                 errcode=Codes.INVALID_PARAM,
             )
 
-        if self.server_name != server_name:
+        # This check is useless, we keep it for the legacy endpoint only.
+        if server_name is not None and self.server_name != server_name:
             raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only delete local media")
 
         logging.info(
@@ -489,6 +495,8 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer)
     ProtectMediaByID(hs).register(http_server)
     UnprotectMediaByID(hs).register(http_server)
     ListMediaInRoom(hs).register(http_server)
-    DeleteMediaByID(hs).register(http_server)
+    # XXX DeleteMediaByDateSize must be registered before DeleteMediaByID as
+    #     their URL routes overlap.
     DeleteMediaByDateSize(hs).register(http_server)
+    DeleteMediaByID(hs).register(http_server)
     UserMediaRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/room_keys.py b/synapse/rest/client/room_keys.py
index f7081f638e..4e7ffdb555 100644
--- a/synapse/rest/client/room_keys.py
+++ b/synapse/rest/client/room_keys.py
@@ -259,6 +259,32 @@ class RoomKeysNewVersionServlet(RestServlet):
         self.auth = hs.get_auth()
         self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler()
 
+    async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        """
+        Retrieve the version information about the most current backup version (if any)
+
+        It takes out an exclusive lock on this user's room_key backups, to ensure
+        clients only upload to the current backup.
+
+        Returns 404 if the given version does not exist.
+
+        GET /room_keys/version HTTP/1.1
+        {
+            "version": "12345",
+            "algorithm": "m.megolm_backup.v1",
+            "auth_data": "dGhpcyBzaG91bGQgYWN0dWFsbHkgYmUgZW5jcnlwdGVkIGpzb24K"
+        }
+        """
+        requester = await self.auth.get_user_by_req(request, allow_guest=False)
+        user_id = requester.user.to_string()
+
+        try:
+            info = await self.e2e_room_keys_handler.get_version_info(user_id)
+        except SynapseError as e:
+            if e.code == 404:
+                raise SynapseError(404, "No backup found", Codes.NOT_FOUND)
+        return 200, info
+
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         """
         Create a new backup version for this user's room_keys with the given
@@ -301,7 +327,7 @@ class RoomKeysNewVersionServlet(RestServlet):
 
 
 class RoomKeysVersionServlet(RestServlet):
-    PATTERNS = client_patterns("/room_keys/version(/(?P<version>[^/]+))?$")
+    PATTERNS = client_patterns("/room_keys/version/(?P<version>[^/]+)$")
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -309,12 +335,11 @@ class RoomKeysVersionServlet(RestServlet):
         self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler()
 
     async def on_GET(
-        self, request: SynapseRequest, version: Optional[str]
+        self, request: SynapseRequest, version: str
     ) -> Tuple[int, JsonDict]:
         """
         Retrieve the version information about a given version of the user's
-        room_keys backup.  If the version part is missing, returns info about the
-        most current backup version (if any)
+        room_keys backup.
 
         It takes out an exclusive lock on this user's room_key backups, to ensure
         clients only upload to the current backup.
@@ -339,20 +364,16 @@ class RoomKeysVersionServlet(RestServlet):
         return 200, info
 
     async def on_DELETE(
-        self, request: SynapseRequest, version: Optional[str]
+        self, request: SynapseRequest, version: str
     ) -> Tuple[int, JsonDict]:
         """
         Delete the information about a given version of the user's
-        room_keys backup.  If the version part is missing, deletes the most
-        current backup version (if any). Doesn't delete the actual room data.
+        room_keys backup. Doesn't delete the actual room data.
 
         DELETE /room_keys/version/12345 HTTP/1.1
         HTTP/1.1 200 OK
         {}
         """
-        if version is None:
-            raise SynapseError(400, "No version specified to delete", Codes.NOT_FOUND)
-
         requester = await self.auth.get_user_by_req(request, allow_guest=False)
         user_id = requester.user.to_string()
 
@@ -360,7 +381,7 @@ class RoomKeysVersionServlet(RestServlet):
         return 200, {}
 
     async def on_PUT(
-        self, request: SynapseRequest, version: Optional[str]
+        self, request: SynapseRequest, version: str
     ) -> Tuple[int, JsonDict]:
         """
         Update the information about a given version of the user's room_keys backup.
@@ -386,11 +407,6 @@ class RoomKeysVersionServlet(RestServlet):
         user_id = requester.user.to_string()
         info = parse_json_object_from_request(request)
 
-        if version is None:
-            raise SynapseError(
-                400, "No version specified to update", Codes.MISSING_PARAM
-            )
-
         await self.e2e_room_keys_handler.update_version(user_id, version, info)
         return 200, {}
 
diff --git a/synapse/rest/client/tags.py b/synapse/rest/client/tags.py
index ca638755c7..dde08417a4 100644
--- a/synapse/rest/client/tags.py
+++ b/synapse/rest/client/tags.py
@@ -34,7 +34,9 @@ class TagListServlet(RestServlet):
     GET /user/{user_id}/rooms/{room_id}/tags HTTP/1.1
     """
 
-    PATTERNS = client_patterns("/user/(?P<user_id>[^/]*)/rooms/(?P<room_id>[^/]*)/tags")
+    PATTERNS = client_patterns(
+        "/user/(?P<user_id>[^/]*)/rooms/(?P<room_id>[^/]*)/tags$"
+    )
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py
index aadb31ca83..db77a45ae3 100644
--- a/tests/rest/admin/test_media.py
+++ b/tests/rest/admin/test_media.py
@@ -213,7 +213,8 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase):
         self.admin_user_tok = self.login("admin", "pass")
 
         self.filepaths = MediaFilePaths(hs.config.media.media_store_path)
-        self.url = "/_synapse/admin/v1/media/%s/delete" % self.server_name
+        self.url = "/_synapse/admin/v1/media/delete"
+        self.legacy_url = "/_synapse/admin/v1/media/%s/delete" % self.server_name
 
         # Move clock up to somewhat realistic time
         self.reactor.advance(1000000000)
@@ -332,11 +333,13 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase):
             channel.json_body["error"],
         )
 
-    def test_delete_media_never_accessed(self) -> None:
+    @parameterized.expand([(True,), (False,)])
+    def test_delete_media_never_accessed(self, use_legacy_url: bool) -> None:
         """
         Tests that media deleted if it is older than `before_ts` and never accessed
         `last_access_ts` is `NULL` and `created_ts` < `before_ts`
         """
+        url = self.legacy_url if use_legacy_url else self.url
 
         # upload and do not access
         server_and_media_id = self._create_media()
@@ -351,7 +354,7 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase):
         now_ms = self.clock.time_msec()
         channel = self.make_request(
             "POST",
-            self.url + "?before_ts=" + str(now_ms),
+            url + "?before_ts=" + str(now_ms),
             access_token=self.admin_user_tok,
         )
         self.assertEqual(200, channel.code, msg=channel.json_body)
-- 
cgit 1.5.1


From c1d2ce2901ab1c7cfaeebb4683af05a2ebf19fa6 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 9 Feb 2023 19:57:01 +0000
Subject: Do not always start a db txn on Postgres (#14840)

---
 changelog.d/14840.misc              |  1 +
 synapse/storage/prepare_database.py | 13 +++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14840.misc

(limited to 'synapse')

diff --git a/changelog.d/14840.misc b/changelog.d/14840.misc
new file mode 100644
index 0000000000..ff6084284a
--- /dev/null
+++ b/changelog.d/14840.misc
@@ -0,0 +1 @@
+Prevent "WARNING:  there is already a transaction in progress" lines appearing in PostgreSQL's logs on some occasions.
\ No newline at end of file
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 3acdb39da7..6c335a9315 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -23,7 +23,7 @@ from typing_extensions import Counter as CounterType
 
 from synapse.config.homeserver import HomeServerConfig
 from synapse.storage.database import LoggingDatabaseConnection
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
 from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION
 from synapse.storage.types import Cursor
 
@@ -108,9 +108,14 @@ def prepare_database(
         # so we start one before running anything. This ensures that any upgrades
         # are either applied completely, or not at all.
         #
-        # (psycopg2 automatically starts a transaction as soon as we run any statements
-        # at all, so this is redundant but harmless there.)
-        cur.execute("BEGIN TRANSACTION")
+        # psycopg2 does not automatically start transactions when in autocommit mode.
+        # While it is technically harmless to nest transactions in postgres, doing so
+        # results in a warning in Postgres' logs per query. And we'd rather like to
+        # avoid doing that.
+        if isinstance(database_engine, Sqlite3Engine) or (
+            isinstance(database_engine, PostgresEngine) and db_conn.autocommit
+        ):
+            cur.execute("BEGIN TRANSACTION")
 
         logger.info("%r: Checking existing schema version", databases)
         version_info = _get_or_create_schema_state(cur, database_engine)
-- 
cgit 1.5.1


From 03bccd542bcffe3ea12cd35108740a7d62dd38ab Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Thu, 9 Feb 2023 13:05:02 -0800
Subject: Add a class UnpersistedEventContext to allow for the batching up of
 storing state groups (#14675)

* add class UnpersistedEventContext

* modify create new client event to create unpersistedeventcontexts

* persist event contexts after creation

* fix tests to persist unpersisted event contexts

* cleanup

* misc lints + cleanup

* changelog + fix comments

* lints

* fix batch insertion?

* reduce redundant calculation

* add unpersisted event classes

* rework compute_event_context, split into function that returns unpersisted event context and then persists it

* use calculate_context_info to create unpersisted event contexts

* update typing

* $%#^&*

* black

* fix comments and consolidate classes, use attr.s for class

* requested changes

* lint

* requested changes

* requested changes

* refactor to be stupidly explicit

* clearer renaming and flow

* make partial state non-optional

* update docstrings

---------

Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/14675.misc                |   1 +
 synapse/events/snapshot.py            | 174 ++++++++++++++++++++++++++++++++-
 synapse/events/third_party_rules.py   |   6 +-
 synapse/handlers/federation.py        |  59 ++++++++----
 synapse/handlers/federation_event.py  |   6 +-
 synapse/handlers/message.py           |  42 +++++---
 synapse/state/__init__.py             | 176 ++++++++++++++--------------------
 tests/handlers/test_user_directory.py |   4 +-
 tests/rest/admin/test_user.py         |   4 +-
 tests/storage/test_redaction.py       |  24 +++--
 tests/storage/test_state.py           |   4 +-
 tests/test_utils/event_injection.py   |   7 +-
 tests/test_visibility.py              |   9 +-
 tests/utils.py                        |   5 +-
 14 files changed, 359 insertions(+), 162 deletions(-)
 create mode 100644 changelog.d/14675.misc

(limited to 'synapse')

diff --git a/changelog.d/14675.misc b/changelog.d/14675.misc
new file mode 100644
index 0000000000..bc1ac1c82a
--- /dev/null
+++ b/changelog.d/14675.misc
@@ -0,0 +1 @@
+Add a class UnpersistedEventContext to allow for the batching up of storing state groups.
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index 6eaef8b57a..e0d82ad81c 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, List, Optional, Tuple
 
 import attr
@@ -26,8 +27,51 @@ if TYPE_CHECKING:
     from synapse.types.state import StateFilter
 
 
+class UnpersistedEventContextBase(ABC):
+    """
+    This is a base class for EventContext and UnpersistedEventContext, objects which
+    hold information relevant to storing an associated event. Note that an
+    UnpersistedEventContexts must be converted into an EventContext before it is
+    suitable to send to the db with its associated event.
+
+    Attributes:
+        _storage: storage controllers for interfacing with the database
+        app_service: If the associated event is being sent by a (local) application service, that
+            app service.
+    """
+
+    def __init__(self, storage_controller: "StorageControllers"):
+        self._storage: "StorageControllers" = storage_controller
+        self.app_service: Optional[ApplicationService] = None
+
+    @abstractmethod
+    async def persist(
+        self,
+        event: EventBase,
+    ) -> "EventContext":
+        """
+        A method to convert an UnpersistedEventContext to an EventContext, suitable for
+        sending to the database with the associated event.
+        """
+        pass
+
+    @abstractmethod
+    async def get_prev_state_ids(
+        self, state_filter: Optional["StateFilter"] = None
+    ) -> StateMap[str]:
+        """
+        Gets the room state at the event (ie not including the event if the event is a
+        state event).
+
+        Args:
+            state_filter: specifies the type of state event to fetch from DB, example:
+            EventTypes.JoinRules
+        """
+        pass
+
+
 @attr.s(slots=True, auto_attribs=True)
-class EventContext:
+class EventContext(UnpersistedEventContextBase):
     """
     Holds information relevant to persisting an event
 
@@ -77,9 +121,6 @@ class EventContext:
         delta_ids: If ``prev_group`` is not None, the state delta between ``prev_group``
             and ``state_group``.
 
-        app_service: If this event is being sent by a (local) application service, that
-            app service.
-
         partial_state: if True, we may be storing this event with a temporary,
             incomplete state.
     """
@@ -122,6 +163,9 @@ class EventContext:
         """Return an EventContext instance suitable for persisting an outlier event"""
         return EventContext(storage=storage)
 
+    async def persist(self, event: EventBase) -> "EventContext":
+        return self
+
     async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict:
         """Converts self to a type that can be serialized as JSON, and then
         deserialized by `deserialize`
@@ -254,6 +298,128 @@ class EventContext:
         )
 
 
+@attr.s(slots=True, auto_attribs=True)
+class UnpersistedEventContext(UnpersistedEventContextBase):
+    """
+    The event context holds information about the state groups for an event. It is important
+    to remember that an event technically has two state groups: the state group before the
+    event, and the state group after the event. If the event is not a state event, the state
+    group will not change (ie the state group before the event will be the same as the state
+    group after the event), but if it is a state event the state group before the event
+    will differ from the state group after the event.
+    This is a version of an EventContext before the new state group (if any) has been
+    computed and stored. It contains information about the state before the event (which
+    also may be the information after the event, if the event is not a state event). The
+    UnpersistedEventContext must be converted into an EventContext by calling the method
+    'persist' on it before it is suitable to be sent to the DB for processing.
+
+        state_group_after_event:
+             The state group after the event. This will always be None until it is persisted.
+             If the event is not a state event, this will be the same as
+             state_group_before_event.
+
+        state_group_before_event:
+            The ID of the state group representing the state of the room before this event.
+
+        state_delta_due_to_event:
+            If the event is a state event, then this is the delta of the state between
+             `state_group` and `state_group_before_event`
+
+        prev_group_for_state_group_before_event:
+            If it is known, ``state_group_before_event``'s previous state group.
+
+        delta_ids_to_state_group_before_event:
+             If ``prev_group_for_state_group_before_event`` is not None, the state delta
+             between ``prev_group_for_state_group_before_event`` and ``state_group_before_event``.
+
+        partial_state:
+            Whether the event has partial state.
+
+        state_map_before_event:
+            A map of the state before the event, i.e. the state at `state_group_before_event`
+    """
+
+    _storage: "StorageControllers"
+    state_group_before_event: Optional[int]
+    state_group_after_event: Optional[int]
+    state_delta_due_to_event: Optional[dict]
+    prev_group_for_state_group_before_event: Optional[int]
+    delta_ids_to_state_group_before_event: Optional[StateMap[str]]
+    partial_state: bool
+    state_map_before_event: Optional[StateMap[str]] = None
+
+    async def get_prev_state_ids(
+        self, state_filter: Optional["StateFilter"] = None
+    ) -> StateMap[str]:
+        """
+        Gets the room state map, excluding this event.
+
+        Args:
+            state_filter: specifies the type of state event to fetch from DB
+
+        Returns:
+            Maps a (type, state_key) to the event ID of the state event matching
+            this tuple.
+        """
+        if self.state_map_before_event:
+            return self.state_map_before_event
+
+        assert self.state_group_before_event is not None
+        return await self._storage.state.get_state_ids_for_group(
+            self.state_group_before_event, state_filter
+        )
+
+    async def persist(self, event: EventBase) -> EventContext:
+        """
+        Creates a full `EventContext` for the event, persisting any referenced state that
+        has not yet been persisted.
+
+        Args:
+             event: event that the EventContext is associated with.
+
+        Returns: An EventContext suitable for sending to the database with the event
+        for persisting
+        """
+        assert self.partial_state is not None
+
+        # If we have a full set of state for before the event but don't have a state
+        # group for that state, we need to get one
+        if self.state_group_before_event is None:
+            assert self.state_map_before_event
+            state_group_before_event = await self._storage.state.store_state_group(
+                event.event_id,
+                event.room_id,
+                prev_group=self.prev_group_for_state_group_before_event,
+                delta_ids=self.delta_ids_to_state_group_before_event,
+                current_state_ids=self.state_map_before_event,
+            )
+            self.state_group_before_event = state_group_before_event
+
+        # if the event isn't a state event the state group doesn't change
+        if not self.state_delta_due_to_event:
+            state_group_after_event = self.state_group_before_event
+
+        # otherwise if it is a state event we need to get a state group for it
+        else:
+            state_group_after_event = await self._storage.state.store_state_group(
+                event.event_id,
+                event.room_id,
+                prev_group=self.state_group_before_event,
+                delta_ids=self.state_delta_due_to_event,
+                current_state_ids=None,
+            )
+
+        return EventContext.with_state(
+            storage=self._storage,
+            state_group=state_group_after_event,
+            state_group_before_event=self.state_group_before_event,
+            state_delta_due_to_event=self.state_delta_due_to_event,
+            partial_state=self.partial_state,
+            prev_group=self.state_group_before_event,
+            delta_ids=self.state_delta_due_to_event,
+        )
+
+
 def _encode_state_dict(
     state_dict: Optional[StateMap[str]],
 ) -> Optional[List[Tuple[str, str, str]]]:
diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py
index 72ab696898..97c61cc258 100644
--- a/synapse/events/third_party_rules.py
+++ b/synapse/events/third_party_rules.py
@@ -18,7 +18,7 @@ from twisted.internet.defer import CancelledError
 
 from synapse.api.errors import ModuleFailedException, SynapseError
 from synapse.events import EventBase
-from synapse.events.snapshot import EventContext
+from synapse.events.snapshot import UnpersistedEventContextBase
 from synapse.storage.roommember import ProfileInfo
 from synapse.types import Requester, StateMap
 from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
@@ -231,7 +231,9 @@ class ThirdPartyEventRules:
             self._on_threepid_bind_callbacks.append(on_threepid_bind)
 
     async def check_event_allowed(
-        self, event: EventBase, context: EventContext
+        self,
+        event: EventBase,
+        context: UnpersistedEventContextBase,
     ) -> Tuple[bool, Optional[dict]]:
         """Check if a provided event should be allowed in the given context.
 
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 7f64130e0a..43ed4a3dd1 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -56,7 +56,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
 from synapse.crypto.event_signing import compute_event_signature
 from synapse.event_auth import validate_event_for_room_version
 from synapse.events import EventBase
-from synapse.events.snapshot import EventContext
+from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
 from synapse.events.validator import EventValidator
 from synapse.federation.federation_client import InvalidResponseError
 from synapse.http.servlet import assert_params_in_dict
@@ -990,7 +990,10 @@ class FederationHandler:
         )
 
         try:
-            event, context = await self.event_creation_handler.create_new_client_event(
+            (
+                event,
+                unpersisted_context,
+            ) = await self.event_creation_handler.create_new_client_event(
                 builder=builder
             )
         except SynapseError as e:
@@ -998,7 +1001,9 @@ class FederationHandler:
             raise
 
         # Ensure the user can even join the room.
-        await self._federation_event_handler.check_join_restrictions(context, event)
+        await self._federation_event_handler.check_join_restrictions(
+            unpersisted_context, event
+        )
 
         # The remote hasn't signed it yet, obviously. We'll do the full checks
         # when we get the event back in `on_send_join_request`
@@ -1178,7 +1183,7 @@ class FederationHandler:
             },
         )
 
-        event, context = await self.event_creation_handler.create_new_client_event(
+        event, _ = await self.event_creation_handler.create_new_client_event(
             builder=builder
         )
 
@@ -1228,12 +1233,13 @@ class FederationHandler:
             },
         )
 
-        event, context = await self.event_creation_handler.create_new_client_event(
-            builder=builder
-        )
+        (
+            event,
+            unpersisted_context,
+        ) = await self.event_creation_handler.create_new_client_event(builder=builder)
 
         event_allowed, _ = await self.third_party_event_rules.check_event_allowed(
-            event, context
+            event, unpersisted_context
         )
         if not event_allowed:
             logger.warning("Creation of knock %s forbidden by third-party rules", event)
@@ -1406,15 +1412,20 @@ class FederationHandler:
                 try:
                     (
                         event,
-                        context,
+                        unpersisted_context,
                     ) = await self.event_creation_handler.create_new_client_event(
                         builder=builder
                     )
 
-                    event, context = await self.add_display_name_to_third_party_invite(
-                        room_version_obj, event_dict, event, context
+                    (
+                        event,
+                        unpersisted_context,
+                    ) = await self.add_display_name_to_third_party_invite(
+                        room_version_obj, event_dict, event, unpersisted_context
                     )
 
+                    context = await unpersisted_context.persist(event)
+
                     EventValidator().validate_new(event, self.config)
 
                     # We need to tell the transaction queue to send this out, even
@@ -1483,14 +1494,19 @@ class FederationHandler:
             try:
                 (
                     event,
-                    context,
+                    unpersisted_context,
                 ) = await self.event_creation_handler.create_new_client_event(
                     builder=builder
                 )
-                event, context = await self.add_display_name_to_third_party_invite(
-                    room_version_obj, event_dict, event, context
+                (
+                    event,
+                    unpersisted_context,
+                ) = await self.add_display_name_to_third_party_invite(
+                    room_version_obj, event_dict, event, unpersisted_context
                 )
 
+                context = await unpersisted_context.persist(event)
+
                 try:
                     validate_event_for_room_version(event)
                     await self._event_auth_handler.check_auth_rules_from_context(event)
@@ -1522,8 +1538,8 @@ class FederationHandler:
         room_version_obj: RoomVersion,
         event_dict: JsonDict,
         event: EventBase,
-        context: EventContext,
-    ) -> Tuple[EventBase, EventContext]:
+        context: UnpersistedEventContextBase,
+    ) -> Tuple[EventBase, UnpersistedEventContextBase]:
         key = (
             EventTypes.ThirdPartyInvite,
             event.content["third_party_invite"]["signed"]["token"],
@@ -1557,11 +1573,14 @@ class FederationHandler:
             room_version_obj, event_dict
         )
         EventValidator().validate_builder(builder)
-        event, context = await self.event_creation_handler.create_new_client_event(
-            builder=builder
-        )
+
+        (
+            event,
+            unpersisted_context,
+        ) = await self.event_creation_handler.create_new_client_event(builder=builder)
+
         EventValidator().validate_new(event, self.config)
-        return event, context
+        return event, unpersisted_context
 
     async def _check_signature(self, event: EventBase, context: EventContext) -> None:
         """
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index e037acbca2..3561f2f1de 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -58,7 +58,7 @@ from synapse.event_auth import (
     validate_event_for_room_version,
 )
 from synapse.events import EventBase
-from synapse.events.snapshot import EventContext
+from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
 from synapse.federation.federation_client import InvalidResponseError, PulledPduInfo
 from synapse.logging.context import nested_logging_context
 from synapse.logging.opentracing import (
@@ -426,7 +426,9 @@ class FederationEventHandler:
         return event, context
 
     async def check_join_restrictions(
-        self, context: EventContext, event: EventBase
+        self,
+        context: UnpersistedEventContextBase,
+        event: EventBase,
     ) -> None:
         """Check that restrictions in restricted join rules are matched
 
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 5f6da2943f..3e30f52e4d 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -48,7 +48,7 @@ from synapse.api.urls import ConsentURIBuilder
 from synapse.event_auth import validate_event_for_room_version
 from synapse.events import EventBase, relation_from_event
 from synapse.events.builder import EventBuilder
-from synapse.events.snapshot import EventContext
+from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
 from synapse.events.utils import maybe_upsert_event_field
 from synapse.events.validator import EventValidator
 from synapse.handlers.directory import DirectoryHandler
@@ -708,7 +708,7 @@ class EventCreationHandler:
 
         builder.internal_metadata.historical = historical
 
-        event, context = await self.create_new_client_event(
+        event, unpersisted_context = await self.create_new_client_event(
             builder=builder,
             requester=requester,
             allow_no_prev_events=allow_no_prev_events,
@@ -721,6 +721,8 @@ class EventCreationHandler:
             current_state_group=current_state_group,
         )
 
+        context = await unpersisted_context.persist(event)
+
         # In an ideal world we wouldn't need the second part of this condition. However,
         # this behaviour isn't spec'd yet, meaning we should be able to deactivate this
         # behaviour. Another reason is that this code is also evaluated each time a new
@@ -1083,13 +1085,14 @@ class EventCreationHandler:
         state_map: Optional[StateMap[str]] = None,
         for_batch: bool = False,
         current_state_group: Optional[int] = None,
-    ) -> Tuple[EventBase, EventContext]:
+    ) -> Tuple[EventBase, UnpersistedEventContextBase]:
         """Create a new event for a local client. If bool for_batch is true, will
         create an event using the prev_event_ids, and will create an event context for
         the event using the parameters state_map and current_state_group, thus these parameters
         must be provided in this case if for_batch is True. The subsequently created event
         and context are suitable for being batched up and bulk persisted to the database
-        with other similarly created events.
+        with other similarly created events. Note that this returns an UnpersistedEventContext,
+        which must be converted to an EventContext before it can be sent to the DB.
 
         Args:
             builder:
@@ -1131,7 +1134,7 @@ class EventCreationHandler:
                 batch persisting
 
         Returns:
-            Tuple of created event, context
+            Tuple of created event, UnpersistedEventContext
         """
         # Strip down the state_event_ids to only what we need to auth the event.
         # For example, we don't need extra m.room.member that don't match event.sender
@@ -1192,9 +1195,16 @@ class EventCreationHandler:
             event = await builder.build(
                 prev_event_ids=prev_event_ids, auth_event_ids=auth_ids, depth=depth
             )
-            context = await self.state.compute_event_context_for_batched(
-                event, state_map, current_state_group
+
+            context: UnpersistedEventContextBase = (
+                await self.state.calculate_context_info(
+                    event,
+                    state_ids_before_event=state_map,
+                    partial_state=False,
+                    state_group_before_event=current_state_group,
+                )
             )
+
         else:
             event = await builder.build(
                 prev_event_ids=prev_event_ids,
@@ -1244,16 +1254,17 @@ class EventCreationHandler:
 
                     state_map_for_event[(data.event_type, data.state_key)] = state_id
 
-                context = await self.state.compute_event_context(
+                # TODO(faster_joins): check how MSC2716 works and whether we can have
+                #   partial state here
+                #   https://github.com/matrix-org/synapse/issues/13003
+                context = await self.state.calculate_context_info(
                     event,
                     state_ids_before_event=state_map_for_event,
-                    # TODO(faster_joins): check how MSC2716 works and whether we can have
-                    #   partial state here
-                    #   https://github.com/matrix-org/synapse/issues/13003
                     partial_state=False,
                 )
+
             else:
-                context = await self.state.compute_event_context(event)
+                context = await self.state.calculate_context_info(event)
 
         if requester:
             context.app_service = requester.app_service
@@ -2082,9 +2093,9 @@ class EventCreationHandler:
 
     async def _rebuild_event_after_third_party_rules(
         self, third_party_result: dict, original_event: EventBase
-    ) -> Tuple[EventBase, EventContext]:
+    ) -> Tuple[EventBase, UnpersistedEventContextBase]:
         # the third_party_event_rules want to replace the event.
-        # we do some basic checks, and then return the replacement event and context.
+        # we do some basic checks, and then return the replacement event.
 
         # Construct a new EventBuilder and validate it, which helps with the
         # rest of these checks.
@@ -2138,5 +2149,6 @@ class EventCreationHandler:
 
         # we rebuild the event context, to be on the safe side. If nothing else,
         # delta_ids might need an update.
-        context = await self.state.compute_event_context(event)
+        context = await self.state.calculate_context_info(event)
+
         return event, context
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index fdfb46ab82..e877e6f1a1 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -39,7 +39,11 @@ from prometheus_client import Counter, Histogram
 from synapse.api.constants import EventTypes
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions
 from synapse.events import EventBase
-from synapse.events.snapshot import EventContext
+from synapse.events.snapshot import (
+    EventContext,
+    UnpersistedEventContext,
+    UnpersistedEventContextBase,
+)
 from synapse.logging.context import ContextResourceUsage
 from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet
 from synapse.state import v1, v2
@@ -262,31 +266,31 @@ class StateHandler:
         state = await entry.get_state(self._state_storage_controller, StateFilter.all())
         return await self.store.get_joined_hosts(room_id, state, entry)
 
-    async def compute_event_context(
+    async def calculate_context_info(
         self,
         event: EventBase,
         state_ids_before_event: Optional[StateMap[str]] = None,
         partial_state: Optional[bool] = None,
-    ) -> EventContext:
-        """Build an EventContext structure for a non-outlier event.
-
-        (for an outlier, call EventContext.for_outlier directly)
-
-        This works out what the current state should be for the event, and
-        generates a new state group if necessary.
-
-        Args:
-            event:
-            state_ids_before_event: The event ids of the state before the event if
-                it can't be calculated from existing events. This is normally
-                only specified when receiving an event from federation where we
-                don't have the prev events, e.g. when backfilling.
-            partial_state:
-                `True` if `state_ids_before_event` is partial and omits non-critical
-                membership events.
-                `False` if `state_ids_before_event` is the full state.
-                `None` when `state_ids_before_event` is not provided. In this case, the
-                flag will be calculated based on `event`'s prev events.
+        state_group_before_event: Optional[int] = None,
+    ) -> UnpersistedEventContextBase:
+        """
+        Calulates the contents of an unpersisted event context, other than the current
+        state group (which is either provided or calculated when the event context is persisted)
+
+        state_ids_before_event:
+            The event ids of the full state before the event if
+            it can't be calculated from existing events. This is normally
+            only specified when receiving an event from federation where we
+            don't have the prev events, e.g. when backfilling or when the event
+            is being created for batch persisting.
+        partial_state:
+            `True` if `state_ids_before_event` is partial and omits non-critical
+            membership events.
+            `False` if `state_ids_before_event` is the full state.
+            `None` when `state_ids_before_event` is not provided. In this case, the
+            flag will be calculated based on `event`'s prev events.
+        state_group_before_event:
+            the current state group at the time of event, if known
         Returns:
             The event context.
 
@@ -294,7 +298,6 @@ class StateHandler:
             RuntimeError if `state_ids_before_event` is not provided and one or more
                 prev events are missing or outliers.
         """
-
         assert not event.internal_metadata.is_outlier()
 
         #
@@ -306,17 +309,6 @@ class StateHandler:
             state_group_before_event_prev_group = None
             deltas_to_state_group_before_event = None
 
-            # .. though we need to get a state group for it.
-            state_group_before_event = (
-                await self._state_storage_controller.store_state_group(
-                    event.event_id,
-                    event.room_id,
-                    prev_group=None,
-                    delta_ids=None,
-                    current_state_ids=state_ids_before_event,
-                )
-            )
-
             # the partial_state flag must be provided
             assert partial_state is not None
         else:
@@ -345,6 +337,7 @@ class StateHandler:
             logger.debug("calling resolve_state_groups from compute_event_context")
             # we've already taken into account partial state, so no need to wait for
             # complete state here.
+
             entry = await self.resolve_state_groups_for_events(
                 event.room_id,
                 event.prev_event_ids(),
@@ -383,18 +376,19 @@ class StateHandler:
         #
 
         if not event.is_state():
-            return EventContext.with_state(
+            return UnpersistedEventContext(
                 storage=self._storage_controllers,
                 state_group_before_event=state_group_before_event,
-                state_group=state_group_before_event,
+                state_group_after_event=state_group_before_event,
                 state_delta_due_to_event={},
-                prev_group=state_group_before_event_prev_group,
-                delta_ids=deltas_to_state_group_before_event,
+                prev_group_for_state_group_before_event=state_group_before_event_prev_group,
+                delta_ids_to_state_group_before_event=deltas_to_state_group_before_event,
                 partial_state=partial_state,
+                state_map_before_event=state_ids_before_event,
             )
 
         #
-        # otherwise, we'll need to create a new state group for after the event
+        # otherwise, we'll need to set up creating a new state group for after the event
         #
 
         key = (event.type, event.state_key)
@@ -412,88 +406,60 @@ class StateHandler:
 
         delta_ids = {key: event.event_id}
 
-        state_group_after_event = (
-            await self._state_storage_controller.store_state_group(
-                event.event_id,
-                event.room_id,
-                prev_group=state_group_before_event,
-                delta_ids=delta_ids,
-                current_state_ids=None,
-            )
-        )
-
-        return EventContext.with_state(
+        return UnpersistedEventContext(
             storage=self._storage_controllers,
-            state_group=state_group_after_event,
             state_group_before_event=state_group_before_event,
+            state_group_after_event=None,
             state_delta_due_to_event=delta_ids,
-            prev_group=state_group_before_event,
-            delta_ids=delta_ids,
+            prev_group_for_state_group_before_event=state_group_before_event_prev_group,
+            delta_ids_to_state_group_before_event=deltas_to_state_group_before_event,
             partial_state=partial_state,
+            state_map_before_event=state_ids_before_event,
         )
 
-    async def compute_event_context_for_batched(
+    async def compute_event_context(
         self,
         event: EventBase,
-        state_ids_before_event: StateMap[str],
-        current_state_group: int,
+        state_ids_before_event: Optional[StateMap[str]] = None,
+        partial_state: Optional[bool] = None,
     ) -> EventContext:
-        """
-        Generate an event context for an event that has not yet been persisted to the
-        database. Intended for use with events that are created to be persisted in a batch.
-        Args:
-            event: the event the context is being computed for
-            state_ids_before_event: a state map consisting of the state ids of the events
-            created prior to this event.
-            current_state_group: the current state group before the event.
-        """
-        state_group_before_event_prev_group = None
-        deltas_to_state_group_before_event = None
-
-        state_group_before_event = current_state_group
-
-        # if the event is not state, we are set
-        if not event.is_state():
-            return EventContext.with_state(
-                storage=self._storage_controllers,
-                state_group_before_event=state_group_before_event,
-                state_group=state_group_before_event,
-                state_delta_due_to_event={},
-                prev_group=state_group_before_event_prev_group,
-                delta_ids=deltas_to_state_group_before_event,
-                partial_state=False,
-            )
+        """Build an EventContext structure for a non-outlier event.
 
-        # otherwise, we'll need to create a new state group for after the event
-        key = (event.type, event.state_key)
+        (for an outlier, call EventContext.for_outlier directly)
 
-        if state_ids_before_event is not None:
-            replaces = state_ids_before_event.get(key)
+        This works out what the current state should be for the event, and
+        generates a new state group if necessary.
 
-        if replaces and replaces != event.event_id:
-            event.unsigned["replaces_state"] = replaces
+        Args:
+            event:
+            state_ids_before_event: The event ids of the state before the event if
+                it can't be calculated from existing events. This is normally
+                only specified when receiving an event from federation where we
+                don't have the prev events, e.g. when backfilling.
+            partial_state:
+                `True` if `state_ids_before_event` is partial and omits non-critical
+                membership events.
+                `False` if `state_ids_before_event` is the full state.
+                `None` when `state_ids_before_event` is not provided. In this case, the
+                flag will be calculated based on `event`'s prev events.
+            entry:
+                A state cache entry for the resolved state across the prev events. We may
+                have already calculated this, so if it's available pass it in
+        Returns:
+            The event context.
 
-        delta_ids = {key: event.event_id}
+        Raises:
+            RuntimeError if `state_ids_before_event` is not provided and one or more
+                prev events are missing or outliers.
+        """
 
-        state_group_after_event = (
-            await self._state_storage_controller.store_state_group(
-                event.event_id,
-                event.room_id,
-                prev_group=state_group_before_event,
-                delta_ids=delta_ids,
-                current_state_ids=None,
-            )
+        unpersisted_context = await self.calculate_context_info(
+            event=event,
+            state_ids_before_event=state_ids_before_event,
+            partial_state=partial_state,
         )
 
-        return EventContext.with_state(
-            storage=self._storage_controllers,
-            state_group=state_group_after_event,
-            state_group_before_event=state_group_before_event,
-            state_delta_due_to_event=delta_ids,
-            prev_group=state_group_before_event,
-            delta_ids=delta_ids,
-            partial_state=False,
-        )
+        return await unpersisted_context.persist(event)
 
     @measure_func()
     async def resolve_state_groups_for_events(
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index 75fc5a17a4..e9be5fb504 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -949,10 +949,12 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
 
+        context = self.get_success(unpersisted_context.persist(event))
+
         self.get_success(
             self.hs.get_storage_controllers().persistence.persist_event(event, context)
         )
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 5c1ced355f..b50406e129 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -2934,10 +2934,12 @@ class UserMembershipRestTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             event_creation_handler.create_new_client_event(builder)
         )
 
+        context = self.get_success(unpersisted_context.persist(event))
+
         self.get_success(storage_controllers.persistence.persist_event(event, context))
 
         # Now get rooms
diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py
index df4740f9d9..0100f7da14 100644
--- a/tests/storage/test_redaction.py
+++ b/tests/storage/test_redaction.py
@@ -74,10 +74,12 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
 
+        context = self.get_success(unpersisted_context.persist(event))
+
         self.get_success(self._persistence.persist_event(event, context))
 
         return event
@@ -96,10 +98,12 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
 
+        context = self.get_success(unpersisted_context.persist(event))
+
         self.get_success(self._persistence.persist_event(event, context))
 
         return event
@@ -119,10 +123,12 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
 
+        context = self.get_success(unpersisted_context.persist(event))
+
         self.get_success(self._persistence.persist_event(event, context))
 
         return event
@@ -259,7 +265,7 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             def internal_metadata(self) -> _EventInternalMetadata:
                 return self._base_builder.internal_metadata
 
-        event_1, context_1 = self.get_success(
+        event_1, unpersisted_context_1 = self.get_success(
             self.event_creation_handler.create_new_client_event(
                 cast(
                     EventBuilder,
@@ -280,9 +286,11 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             )
         )
 
+        context_1 = self.get_success(unpersisted_context_1.persist(event_1))
+
         self.get_success(self._persistence.persist_event(event_1, context_1))
 
-        event_2, context_2 = self.get_success(
+        event_2, unpersisted_context_2 = self.get_success(
             self.event_creation_handler.create_new_client_event(
                 cast(
                     EventBuilder,
@@ -302,6 +310,8 @@ class RedactionTestCase(unittest.HomeserverTestCase):
                 )
             )
         )
+
+        context_2 = self.get_success(unpersisted_context_2.persist(event_2))
         self.get_success(self._persistence.persist_event(event_2, context_2))
 
         # fetch one of the redactions
@@ -421,10 +431,12 @@ class RedactionTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        redaction_event, context = self.get_success(
+        redaction_event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
 
+        context = self.get_success(unpersisted_context.persist(redaction_event))
+
         self.get_success(self._persistence.persist_event(redaction_event, context))
 
         # Now lets jump to the future where we have censored the redaction event
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index bad7f0bc60..f730b888f7 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -67,10 +67,12 @@ class StateStoreTestCase(HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
 
+        context = self.get_success(unpersisted_context.persist(event))
+
         assert self.storage.persistence is not None
         self.get_success(self.storage.persistence.persist_event(event, context))
 
diff --git a/tests/test_utils/event_injection.py b/tests/test_utils/event_injection.py
index 1a50c2acf1..a6330ed840 100644
--- a/tests/test_utils/event_injection.py
+++ b/tests/test_utils/event_injection.py
@@ -92,8 +92,13 @@ async def create_event(
     builder = hs.get_event_builder_factory().for_room_version(
         KNOWN_ROOM_VERSIONS[room_version], kwargs
     )
-    event, context = await hs.get_event_creation_handler().create_new_client_event(
+    (
+        event,
+        unpersisted_context,
+    ) = await hs.get_event_creation_handler().create_new_client_event(
         builder, prev_event_ids=prev_event_ids
     )
 
+    context = await unpersisted_context.persist(event)
+
     return event, context
diff --git a/tests/test_visibility.py b/tests/test_visibility.py
index 875e37988f..36d6b37aa4 100644
--- a/tests/test_visibility.py
+++ b/tests/test_visibility.py
@@ -175,9 +175,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
+        context = self.get_success(unpersisted_context.persist(event))
         self.get_success(
             self._storage_controllers.persistence.persist_event(event, context)
         )
@@ -202,9 +203,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
+        context = self.get_success(unpersisted_context.persist(event))
 
         self.get_success(
             self._storage_controllers.persistence.persist_event(event, context)
@@ -226,9 +228,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
             },
         )
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_new_client_event(builder)
         )
+        context = self.get_success(unpersisted_context.persist(event))
 
         self.get_success(
             self._storage_controllers.persistence.persist_event(event, context)
diff --git a/tests/utils.py b/tests/utils.py
index d76bf9716a..15fabbc2d0 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -335,6 +335,9 @@ async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None:
         },
     )
 
-    event, context = await event_creation_handler.create_new_client_event(builder)
+    event, unpersisted_context = await event_creation_handler.create_new_client_event(
+        builder
+    )
+    context = await unpersisted_context.persist(event)
 
     await persistence_store.persist_event(event, context)
-- 
cgit 1.5.1


From a5a799722db0c33dc61fb2c6c7282ff7e82eb2e9 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 9 Feb 2023 22:33:39 +0000
Subject: Tag federation request spans with the worker name (#15042)

* Systematically include worker name as process info

* Changelog

* don't bother with inner setdefault
---
 changelog.d/15042.feature      |  1 +
 synapse/api/auth.py            |  7 -------
 synapse/logging/opentracing.py | 10 +++++++++-
 3 files changed, 10 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/15042.feature

(limited to 'synapse')

diff --git a/changelog.d/15042.feature b/changelog.d/15042.feature
new file mode 100644
index 0000000000..7a4de89f00
--- /dev/null
+++ b/changelog.d/15042.feature
@@ -0,0 +1 @@
+Tag opentracing spans for federation requests with the name of the worker serving the request.
diff --git a/synapse/api/auth.py b/synapse/api/auth.py
index 3d7f986ac7..66e869bc2d 100644
--- a/synapse/api/auth.py
+++ b/synapse/api/auth.py
@@ -32,7 +32,6 @@ from synapse.appservice import ApplicationService
 from synapse.http import get_request_user_agent
 from synapse.http.site import SynapseRequest
 from synapse.logging.opentracing import (
-    SynapseTags,
     active_span,
     force_tracing,
     start_active_span,
@@ -162,12 +161,6 @@ class Auth:
                 parent_span.set_tag(
                     "authenticated_entity", requester.authenticated_entity
                 )
-                # We tag the Synapse instance name so that it's an easy jumping
-                # off point into the logs. Can also be used to filter for an
-                # instance that is under load.
-                parent_span.set_tag(
-                    SynapseTags.INSTANCE_NAME, self.hs.get_instance_name()
-                )
                 parent_span.set_tag("user_id", requester.user.to_string())
                 if requester.device_id is not None:
                     parent_span.set_tag("device_id", requester.device_id)
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 8ef9a0dda8..6c7cf1b294 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -466,8 +466,16 @@ def init_tracer(hs: "HomeServer") -> None:
         STRIP_INSTANCE_NUMBER_SUFFIX_REGEX, "", hs.get_instance_name()
     )
 
+    jaeger_config = hs.config.tracing.jaeger_config
+    tags = jaeger_config.setdefault("tags", {})
+
+    # tag the Synapse instance name so that it's an easy jumping
+    # off point into the logs. Can also be used to filter for an
+    # instance that is under load.
+    tags[SynapseTags.INSTANCE_NAME] = hs.get_instance_name()
+
     config = JaegerConfig(
-        config=hs.config.tracing.jaeger_config,
+        config=jaeger_config,
         service_name=f"{hs.config.server.server_name} {instance_name_by_type}",
         scope_manager=LogContextScopeManager(),
         metrics_factory=PrometheusMetricsFactory(),
-- 
cgit 1.5.1


From fd296b7343f2e557519f1ec81325ad836bcbdbf9 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 10 Feb 2023 10:52:35 +0100
Subject: Fix exception on start up about device lists (#15041)

Fixes #15010.
---
 changelog.d/15041.misc                    | 1 +
 synapse/storage/databases/main/devices.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/15041.misc

(limited to 'synapse')

diff --git a/changelog.d/15041.misc b/changelog.d/15041.misc
new file mode 100644
index 0000000000..d602b0043a
--- /dev/null
+++ b/changelog.d/15041.misc
@@ -0,0 +1 @@
+Fix a rare exception in logs on start up.
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index e8b6cc6b80..766c2052fb 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -100,6 +100,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
                 ("device_lists_outbound_pokes", "stream_id"),
                 ("device_lists_changes_in_room", "stream_id"),
                 ("device_lists_remote_pending", "stream_id"),
+                ("device_lists_changes_converted_stream_position", "stream_id"),
             ],
             is_writer=hs.config.worker.worker_app is None,
         )
-- 
cgit 1.5.1


From a481fb9f98ad10e5e129bdc7664c59498a7332f6 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 10 Feb 2023 08:09:47 -0500
Subject: Refactor get_user_devices_from_cache to avoid mutating cached values.
 (#15040)

The previous version of the code could mutate a cached value,
but only if the input requested all devices of a user *and* a specific
device.

To avoid this nonsensical situation we no longer fetch a specific
device ID if all of a user's devices are returned.
---
 changelog.d/15040.misc                    |  1 +
 synapse/handlers/e2e_keys.py              | 11 +++++++----
 synapse/storage/databases/main/devices.py | 31 +++++++++++++++++--------------
 3 files changed, 25 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/15040.misc

(limited to 'synapse')

diff --git a/changelog.d/15040.misc b/changelog.d/15040.misc
new file mode 100644
index 0000000000..ca129b64af
--- /dev/null
+++ b/changelog.d/15040.misc
@@ -0,0 +1 @@
+Avoid mutating a cached value in `get_user_devices_from_cache`.
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index d2188ca08f..43cbece21b 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -159,19 +159,22 @@ class E2eKeysHandler:
             # A map of destination -> user ID -> device IDs.
             remote_queries_not_in_cache: Dict[str, Dict[str, Iterable[str]]] = {}
             if remote_queries:
-                query_list: List[Tuple[str, Optional[str]]] = []
+                user_ids = set()
+                user_and_device_ids: List[Tuple[str, str]] = []
                 for user_id, device_ids in remote_queries.items():
                     if device_ids:
-                        query_list.extend(
+                        user_and_device_ids.extend(
                             (user_id, device_id) for device_id in device_ids
                         )
                     else:
-                        query_list.append((user_id, None))
+                        user_ids.add(user_id)
 
                 (
                     user_ids_not_in_cache,
                     remote_results,
-                ) = await self.store.get_user_devices_from_cache(query_list)
+                ) = await self.store.get_user_devices_from_cache(
+                    user_ids, user_and_device_ids
+                )
 
                 # Check that the homeserver still shares a room with all cached users.
                 # Note that this check may be slightly racy when a remote user leaves a
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 766c2052fb..85c1778a81 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -746,42 +746,45 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     @trace
     @cancellable
     async def get_user_devices_from_cache(
-        self, query_list: List[Tuple[str, Optional[str]]]
+        self, user_ids: Set[str], user_and_device_ids: List[Tuple[str, str]]
     ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]:
         """Get the devices (and keys if any) for remote users from the cache.
 
         Args:
-            query_list: List of (user_id, device_ids), if device_ids is
-                falsey then return all device ids for that user.
+            user_ids: users which should have all device IDs returned
+            user_and_device_ids: List of (user_id, device_ids)
 
         Returns:
             A tuple of (user_ids_not_in_cache, results_map), where
             user_ids_not_in_cache is a set of user_ids and results_map is a
             mapping of user_id -> device_id -> device_info.
         """
-        user_ids = {user_id for user_id, _ in query_list}
-        user_map = await self.get_device_list_last_stream_id_for_remotes(list(user_ids))
+        unique_user_ids = user_ids | {user_id for user_id, _ in user_and_device_ids}
+        user_map = await self.get_device_list_last_stream_id_for_remotes(
+            list(unique_user_ids)
+        )
 
         # We go and check if any of the users need to have their device lists
         # resynced. If they do then we remove them from the cached list.
         users_needing_resync = await self.get_user_ids_requiring_device_list_resync(
-            user_ids
+            unique_user_ids
         )
         user_ids_in_cache = {
             user_id for user_id, stream_id in user_map.items() if stream_id
         } - users_needing_resync
-        user_ids_not_in_cache = user_ids - user_ids_in_cache
+        user_ids_not_in_cache = unique_user_ids - user_ids_in_cache
 
+        # First fetch all the users which all devices are to be returned.
         results: Dict[str, Dict[str, JsonDict]] = {}
-        for user_id, device_id in query_list:
-            if user_id not in user_ids_in_cache:
-                continue
-
-            if device_id:
+        for user_id in user_ids:
+            if user_id in user_ids_in_cache:
+                results[user_id] = await self.get_cached_devices_for_user(user_id)
+        # Then fetch all device-specific requests, but skip users we've already
+        # fetched all devices for.
+        for user_id, device_id in user_and_device_ids:
+            if user_id in user_ids_in_cache and user_id not in user_ids:
                 device = await self._get_cached_user_device(user_id, device_id)
                 results.setdefault(user_id, {})[device_id] = device
-            else:
-                results[user_id] = await self.get_cached_devices_for_user(user_id)
 
         set_tag("in_cache", str(results))
         set_tag("not_in_cache", str(user_ids_not_in_cache))
-- 
cgit 1.5.1


From b95407908dfde97e483952722b6fa7a533ff5093 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 10 Feb 2023 13:11:20 +0000
Subject: Avoid mutating cached values in
 `_generate_sync_entry_for_account_data` (#15047)

---
 changelog.d/15047.misc   | 1 +
 synapse/handlers/sync.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/15047.misc

(limited to 'synapse')

diff --git a/changelog.d/15047.misc b/changelog.d/15047.misc
new file mode 100644
index 0000000000..561dc874de
--- /dev/null
+++ b/changelog.d/15047.misc
@@ -0,0 +1 @@
+Avoid mutating cached values in `_generate_sync_entry_for_account_data`.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 3566537894..202b35eee6 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1753,6 +1753,7 @@ class SyncHandler:
             )
 
             if push_rules_changed:
+                global_account_data = dict(global_account_data)
                 global_account_data["m.push_rules"] = await self.push_rules_for_user(
                     sync_config.user
                 )
@@ -1763,6 +1764,7 @@ class SyncHandler:
                 account_data_by_room,
             ) = await self.store.get_account_data_for_user(sync_config.user.to_string())
 
+            global_account_data = dict(global_account_data)
             global_account_data["m.push_rules"] = await self.push_rules_for_user(
                 sync_config.user
             )
-- 
cgit 1.5.1


From cf5233b783273efc84b991e7242fb4761ccc201a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 10 Feb 2023 09:22:16 -0500
Subject: Avoid fetching unused account data in sync. (#14973)

The per-room account data is no longer unconditionally
fetched, even if all rooms will be filtered out.

Global account data will not be fetched if it will all be
filtered out.
---
 changelog.d/14973.misc                         |   1 +
 synapse/api/filtering.py                       |  30 +++++-
 synapse/handlers/account_data.py               |  10 +-
 synapse/handlers/initial_sync.py               |   5 +-
 synapse/handlers/room_member.py                |   2 +-
 synapse/handlers/sync.py                       |  88 +++++++++--------
 synapse/rest/admin/users.py                    |   3 +-
 synapse/storage/databases/main/account_data.py | 127 ++++++++++++++++++-------
 8 files changed, 176 insertions(+), 90 deletions(-)
 create mode 100644 changelog.d/14973.misc

(limited to 'synapse')

diff --git a/changelog.d/14973.misc b/changelog.d/14973.misc
new file mode 100644
index 0000000000..3657623602
--- /dev/null
+++ b/changelog.d/14973.misc
@@ -0,0 +1 @@
+Improve performance of `/sync` in a few situations.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 83c42fc25a..b9f432cc23 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -219,9 +219,13 @@ class FilterCollection:
         self._room_timeline_filter = Filter(hs, room_filter_json.get("timeline", {}))
         self._room_state_filter = Filter(hs, room_filter_json.get("state", {}))
         self._room_ephemeral_filter = Filter(hs, room_filter_json.get("ephemeral", {}))
-        self._room_account_data = Filter(hs, room_filter_json.get("account_data", {}))
+        self._room_account_data_filter = Filter(
+            hs, room_filter_json.get("account_data", {})
+        )
         self._presence_filter = Filter(hs, filter_json.get("presence", {}))
-        self._account_data = Filter(hs, filter_json.get("account_data", {}))
+        self._global_account_data_filter = Filter(
+            hs, filter_json.get("account_data", {})
+        )
 
         self.include_leave = filter_json.get("room", {}).get("include_leave", False)
         self.event_fields = filter_json.get("event_fields", [])
@@ -256,8 +260,10 @@ class FilterCollection:
     ) -> List[UserPresenceState]:
         return await self._presence_filter.filter(presence_states)
 
-    async def filter_account_data(self, events: Iterable[JsonDict]) -> List[JsonDict]:
-        return await self._account_data.filter(events)
+    async def filter_global_account_data(
+        self, events: Iterable[JsonDict]
+    ) -> List[JsonDict]:
+        return await self._global_account_data_filter.filter(events)
 
     async def filter_room_state(self, events: Iterable[EventBase]) -> List[EventBase]:
         return await self._room_state_filter.filter(
@@ -279,7 +285,7 @@ class FilterCollection:
     async def filter_room_account_data(
         self, events: Iterable[JsonDict]
     ) -> List[JsonDict]:
-        return await self._room_account_data.filter(
+        return await self._room_account_data_filter.filter(
             await self._room_filter.filter(events)
         )
 
@@ -292,6 +298,13 @@ class FilterCollection:
             or self._presence_filter.filters_all_senders()
         )
 
+    def blocks_all_global_account_data(self) -> bool:
+        """True if all global acount data will be filtered out."""
+        return (
+            self._global_account_data_filter.filters_all_types()
+            or self._global_account_data_filter.filters_all_senders()
+        )
+
     def blocks_all_room_ephemeral(self) -> bool:
         return (
             self._room_ephemeral_filter.filters_all_types()
@@ -299,6 +312,13 @@ class FilterCollection:
             or self._room_ephemeral_filter.filters_all_rooms()
         )
 
+    def blocks_all_room_account_data(self) -> bool:
+        return (
+            self._room_account_data_filter.filters_all_types()
+            or self._room_account_data_filter.filters_all_senders()
+            or self._room_account_data_filter.filters_all_rooms()
+        )
+
     def blocks_all_room_timeline(self) -> bool:
         return (
             self._room_timeline_filter.filters_all_types()
diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py
index 67e789eef7..797de46dbc 100644
--- a/synapse/handlers/account_data.py
+++ b/synapse/handlers/account_data.py
@@ -343,10 +343,12 @@ class AccountDataEventSource(EventSource[int, JsonDict]):
                 }
             )
 
-        (
-            account_data,
-            room_account_data,
-        ) = await self.store.get_updated_account_data_for_user(user_id, last_stream_id)
+        account_data = await self.store.get_updated_global_account_data_for_user(
+            user_id, last_stream_id
+        )
+        room_account_data = await self.store.get_updated_room_account_data_for_user(
+            user_id, last_stream_id
+        )
 
         for account_data_type, content in account_data.items():
             results.append({"type": account_data_type, "content": content})
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 191529bd8e..1a29abde98 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -154,9 +154,8 @@ class InitialSyncHandler:
 
         tags_by_room = await self.store.get_tags_for_user(user_id)
 
-        account_data, account_data_by_room = await self.store.get_account_data_for_user(
-            user_id
-        )
+        account_data = await self.store.get_global_account_data_for_user(user_id)
+        account_data_by_room = await self.store.get_room_account_data_for_user(user_id)
 
         public_room_ids = await self.store.get_public_room_ids()
 
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index d236cc09b5..6e7141d2ef 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -484,7 +484,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             user_id: The user's ID.
         """
         # Retrieve user account data for predecessor room
-        user_account_data, _ = await self.store.get_account_data_for_user(user_id)
+        user_account_data = await self.store.get_global_account_data_for_user(user_id)
 
         # Copy direct message state if applicable
         direct_rooms = user_account_data.get(AccountDataTypes.DIRECT, {})
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 202b35eee6..399685e5b7 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1444,9 +1444,9 @@ class SyncHandler:
 
         logger.debug("Fetching account data")
 
-        account_data_by_room = await self._generate_sync_entry_for_account_data(
-            sync_result_builder
-        )
+        # Global account data is included if it is not filtered out.
+        if not sync_config.filter_collection.blocks_all_global_account_data():
+            await self._generate_sync_entry_for_account_data(sync_result_builder)
 
         # Presence data is included if the server has it enabled and not filtered out.
         include_presence_data = bool(
@@ -1472,9 +1472,7 @@ class SyncHandler:
             (
                 newly_joined_rooms,
                 newly_left_rooms,
-            ) = await self._generate_sync_entry_for_rooms(
-                sync_result_builder, account_data_by_room
-            )
+            ) = await self._generate_sync_entry_for_rooms(sync_result_builder)
 
             # Work out which users have joined or left rooms we're in. We use this
             # to build the presence and device_list parts of the sync response in
@@ -1717,35 +1715,29 @@ class SyncHandler:
 
     async def _generate_sync_entry_for_account_data(
         self, sync_result_builder: "SyncResultBuilder"
-    ) -> Dict[str, Dict[str, JsonDict]]:
-        """Generates the account data portion of the sync response.
+    ) -> None:
+        """Generates the global account data portion of the sync response.
 
         Account data (called "Client Config" in the spec) can be set either globally
         or for a specific room. Account data consists of a list of events which
         accumulate state, much like a room.
 
-        This function retrieves global and per-room account data. The former is written
-        to the given `sync_result_builder`. The latter is returned directly, to be
-        later written to the `sync_result_builder` on a room-by-room basis.
+        This function retrieves global account data and writes it to the given
+        `sync_result_builder`. See `_generate_sync_entry_for_rooms` for handling
+         of per-room account data.
 
         Args:
             sync_result_builder
-
-        Returns:
-            A dictionary whose keys (room ids) map to the per room account data for that
-            room.
         """
         sync_config = sync_result_builder.sync_config
         user_id = sync_result_builder.sync_config.user.to_string()
         since_token = sync_result_builder.since_token
 
         if since_token and not sync_result_builder.full_state:
-            # TODO Do not fetch room account data if it will be unused.
-            (
-                global_account_data,
-                account_data_by_room,
-            ) = await self.store.get_updated_account_data_for_user(
-                user_id, since_token.account_data_key
+            global_account_data = (
+                await self.store.get_updated_global_account_data_for_user(
+                    user_id, since_token.account_data_key
+                )
             )
 
             push_rules_changed = await self.store.have_push_rules_changed_for_user(
@@ -1758,28 +1750,26 @@ class SyncHandler:
                     sync_config.user
                 )
         else:
-            # TODO Do not fetch room account data if it will be unused.
-            (
-                global_account_data,
-                account_data_by_room,
-            ) = await self.store.get_account_data_for_user(sync_config.user.to_string())
+            all_global_account_data = await self.store.get_global_account_data_for_user(
+                user_id
+            )
 
-            global_account_data = dict(global_account_data)
+            global_account_data = dict(all_global_account_data)
             global_account_data["m.push_rules"] = await self.push_rules_for_user(
                 sync_config.user
             )
 
-        account_data_for_user = await sync_config.filter_collection.filter_account_data(
-            [
-                {"type": account_data_type, "content": content}
-                for account_data_type, content in global_account_data.items()
-            ]
+        account_data_for_user = (
+            await sync_config.filter_collection.filter_global_account_data(
+                [
+                    {"type": account_data_type, "content": content}
+                    for account_data_type, content in global_account_data.items()
+                ]
+            )
         )
 
         sync_result_builder.account_data = account_data_for_user
 
-        return account_data_by_room
-
     async def _generate_sync_entry_for_presence(
         self,
         sync_result_builder: "SyncResultBuilder",
@@ -1839,9 +1829,7 @@ class SyncHandler:
         sync_result_builder.presence = presence
 
     async def _generate_sync_entry_for_rooms(
-        self,
-        sync_result_builder: "SyncResultBuilder",
-        account_data_by_room: Dict[str, Dict[str, JsonDict]],
+        self, sync_result_builder: "SyncResultBuilder"
     ) -> Tuple[AbstractSet[str], AbstractSet[str]]:
         """Generates the rooms portion of the sync response. Populates the
         `sync_result_builder` with the result.
@@ -1852,7 +1840,6 @@ class SyncHandler:
 
         Args:
             sync_result_builder
-            account_data_by_room: Dictionary of per room account data
 
         Returns:
             Returns a 2-tuple describing rooms the user has joined or left.
@@ -1865,9 +1852,30 @@ class SyncHandler:
         since_token = sync_result_builder.since_token
         user_id = sync_result_builder.sync_config.user.to_string()
 
+        blocks_all_rooms = (
+            sync_result_builder.sync_config.filter_collection.blocks_all_rooms()
+        )
+
+        # 0. Start by fetching room account data (if required).
+        if (
+            blocks_all_rooms
+            or sync_result_builder.sync_config.filter_collection.blocks_all_room_account_data()
+        ):
+            account_data_by_room: Mapping[str, Mapping[str, JsonDict]] = {}
+        elif since_token and not sync_result_builder.full_state:
+            account_data_by_room = (
+                await self.store.get_updated_room_account_data_for_user(
+                    user_id, since_token.account_data_key
+                )
+            )
+        else:
+            account_data_by_room = await self.store.get_room_account_data_for_user(
+                user_id
+            )
+
         # 1. Start by fetching all ephemeral events in rooms we've joined (if required).
         block_all_room_ephemeral = (
-            sync_result_builder.sync_config.filter_collection.blocks_all_rooms()
+            blocks_all_rooms
             or sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral()
         )
         if block_all_room_ephemeral:
@@ -2294,7 +2302,7 @@ class SyncHandler:
         room_builder: "RoomSyncResultBuilder",
         ephemeral: List[JsonDict],
         tags: Optional[Dict[str, Dict[str, Any]]],
-        account_data: Dict[str, JsonDict],
+        account_data: Mapping[str, JsonDict],
         always_include: bool = False,
     ) -> None:
         """Populates the `joined` and `archived` section of `sync_result_builder`
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index b9dca8ef3a..0c0bf540b9 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -1192,7 +1192,8 @@ class AccountDataRestServlet(RestServlet):
         if not await self._store.get_user_by_id(user_id):
             raise NotFoundError("User not found")
 
-        global_data, by_room_data = await self._store.get_account_data_for_user(user_id)
+        global_data = await self._store.get_global_account_data_for_user(user_id)
+        by_room_data = await self._store.get_room_account_data_for_user(user_id)
         return HTTPStatus.OK, {
             "account_data": {
                 "global": global_data,
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 8a359d7eb8..2d6f02c14f 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -21,6 +21,7 @@ from typing import (
     FrozenSet,
     Iterable,
     List,
+    Mapping,
     Optional,
     Tuple,
     cast,
@@ -122,25 +123,25 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         return self._account_data_id_gen.get_current_token()
 
     @cached()
-    async def get_account_data_for_user(
+    async def get_global_account_data_for_user(
         self, user_id: str
-    ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]:
+    ) -> Mapping[str, JsonDict]:
         """
-        Get all the client account_data for a user.
+        Get all the global client account_data for a user.
 
         If experimental MSC3391 support is enabled, any entries with an empty
         content body are excluded; as this means they have been deleted.
 
         Args:
             user_id: The user to get the account_data for.
+
         Returns:
-            A 2-tuple of a dict of global account_data and a dict mapping from
-            room_id string to per room account_data dicts.
+            The global account_data.
         """
 
-        def get_account_data_for_user_txn(
+        def get_global_account_data_for_user(
             txn: LoggingTransaction,
-        ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]:
+        ) -> Dict[str, JsonDict]:
             # The 'content != '{}' condition below prevents us from using
             # `simple_select_list_txn` here, as it doesn't support conditions
             # other than 'equals'.
@@ -158,10 +159,34 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             txn.execute(sql, (user_id,))
             rows = self.db_pool.cursor_to_dict(txn)
 
-            global_account_data = {
+            return {
                 row["account_data_type"]: db_to_json(row["content"]) for row in rows
             }
 
+        return await self.db_pool.runInteraction(
+            "get_global_account_data_for_user", get_global_account_data_for_user
+        )
+
+    @cached()
+    async def get_room_account_data_for_user(
+        self, user_id: str
+    ) -> Mapping[str, Mapping[str, JsonDict]]:
+        """
+        Get all of the per-room client account_data for a user.
+
+        If experimental MSC3391 support is enabled, any entries with an empty
+        content body are excluded; as this means they have been deleted.
+
+        Args:
+            user_id: The user to get the account_data for.
+
+        Returns:
+            A dict mapping from room_id string to per-room account_data dicts.
+        """
+
+        def get_room_account_data_for_user_txn(
+            txn: LoggingTransaction,
+        ) -> Dict[str, Dict[str, JsonDict]]:
             # The 'content != '{}' condition below prevents us from using
             # `simple_select_list_txn` here, as it doesn't support conditions
             # other than 'equals'.
@@ -185,10 +210,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
 
                 room_data[row["account_data_type"]] = db_to_json(row["content"])
 
-            return global_account_data, by_room
+            return by_room
 
         return await self.db_pool.runInteraction(
-            "get_account_data_for_user", get_account_data_for_user_txn
+            "get_room_account_data_for_user_txn", get_room_account_data_for_user_txn
         )
 
     @cached(num_args=2, max_entries=5000, tree=True)
@@ -342,36 +367,61 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             "get_updated_room_account_data", get_updated_room_account_data_txn
         )
 
-    async def get_updated_account_data_for_user(
+    async def get_updated_global_account_data_for_user(
         self, user_id: str, stream_id: int
-    ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]:
-        """Get all the client account_data for a that's changed for a user
+    ) -> Dict[str, JsonDict]:
+        """Get all the global account_data that's changed for a user.
 
         Args:
             user_id: The user to get the account_data for.
             stream_id: The point in the stream since which to get updates
+
         Returns:
-            A deferred pair of a dict of global account_data and a dict
-            mapping from room_id string to per room account_data dicts.
+            A dict of global account_data.
         """
 
-        def get_updated_account_data_for_user_txn(
+        def get_updated_global_account_data_for_user(
             txn: LoggingTransaction,
-        ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]:
-            sql = (
-                "SELECT account_data_type, content FROM account_data"
-                " WHERE user_id = ? AND stream_id > ?"
-            )
-
+        ) -> Dict[str, JsonDict]:
+            sql = """
+                SELECT account_data_type, content FROM account_data
+                WHERE user_id = ? AND stream_id > ?
+            """
             txn.execute(sql, (user_id, stream_id))
 
-            global_account_data = {row[0]: db_to_json(row[1]) for row in txn}
+            return {row[0]: db_to_json(row[1]) for row in txn}
 
-            sql = (
-                "SELECT room_id, account_data_type, content FROM room_account_data"
-                " WHERE user_id = ? AND stream_id > ?"
-            )
+        changed = self._account_data_stream_cache.has_entity_changed(
+            user_id, int(stream_id)
+        )
+        if not changed:
+            return {}
+
+        return await self.db_pool.runInteraction(
+            "get_updated_global_account_data_for_user",
+            get_updated_global_account_data_for_user,
+        )
+
+    async def get_updated_room_account_data_for_user(
+        self, user_id: str, stream_id: int
+    ) -> Dict[str, Dict[str, JsonDict]]:
+        """Get all the room account_data that's changed for a user.
 
+        Args:
+            user_id: The user to get the account_data for.
+            stream_id: The point in the stream since which to get updates
+
+        Returns:
+            A dict mapping from room_id string to per room account_data dicts.
+        """
+
+        def get_updated_room_account_data_for_user_txn(
+            txn: LoggingTransaction,
+        ) -> Dict[str, Dict[str, JsonDict]]:
+            sql = """
+                SELECT room_id, account_data_type, content FROM room_account_data
+                WHERE user_id = ? AND stream_id > ?
+            """
             txn.execute(sql, (user_id, stream_id))
 
             account_data_by_room: Dict[str, Dict[str, JsonDict]] = {}
@@ -379,16 +429,17 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                 room_account_data = account_data_by_room.setdefault(row[0], {})
                 room_account_data[row[1]] = db_to_json(row[2])
 
-            return global_account_data, account_data_by_room
+            return account_data_by_room
 
         changed = self._account_data_stream_cache.has_entity_changed(
             user_id, int(stream_id)
         )
         if not changed:
-            return {}, {}
+            return {}
 
         return await self.db_pool.runInteraction(
-            "get_updated_account_data_for_user", get_updated_account_data_for_user_txn
+            "get_updated_room_account_data_for_user",
+            get_updated_room_account_data_for_user_txn,
         )
 
     @cached(max_entries=5000, iterable=True)
@@ -444,7 +495,8 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                     self.get_global_account_data_by_type_for_user.invalidate(
                         (row.user_id, row.data_type)
                     )
-                self.get_account_data_for_user.invalidate((row.user_id,))
+                self.get_global_account_data_for_user.invalidate((row.user_id,))
+                self.get_room_account_data_for_user.invalidate((row.user_id,))
                 self.get_account_data_for_room.invalidate((row.user_id, row.room_id))
                 self.get_account_data_for_room_and_type.invalidate(
                     (row.user_id, row.room_id, row.data_type)
@@ -492,7 +544,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             )
 
             self._account_data_stream_cache.entity_has_changed(user_id, next_id)
-            self.get_account_data_for_user.invalidate((user_id,))
+            self.get_room_account_data_for_user.invalidate((user_id,))
             self.get_account_data_for_room.invalidate((user_id, room_id))
             self.get_account_data_for_room_and_type.prefill(
                 (user_id, room_id, account_data_type), content
@@ -558,7 +610,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                 return None
 
             self._account_data_stream_cache.entity_has_changed(user_id, next_id)
-            self.get_account_data_for_user.invalidate((user_id,))
+            self.get_room_account_data_for_user.invalidate((user_id,))
             self.get_account_data_for_room.invalidate((user_id, room_id))
             self.get_account_data_for_room_and_type.prefill(
                 (user_id, room_id, account_data_type), {}
@@ -593,7 +645,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             )
 
             self._account_data_stream_cache.entity_has_changed(user_id, next_id)
-            self.get_account_data_for_user.invalidate((user_id,))
+            self.get_global_account_data_for_user.invalidate((user_id,))
             self.get_global_account_data_by_type_for_user.invalidate(
                 (user_id, account_data_type)
             )
@@ -761,7 +813,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                 return None
 
             self._account_data_stream_cache.entity_has_changed(user_id, next_id)
-            self.get_account_data_for_user.invalidate((user_id,))
+            self.get_global_account_data_for_user.invalidate((user_id,))
             self.get_global_account_data_by_type_for_user.prefill(
                 (user_id, account_data_type), {}
             )
@@ -822,7 +874,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             txn, self.get_account_data_for_room_and_type, (user_id,)
         )
         self._invalidate_cache_and_stream(
-            txn, self.get_account_data_for_user, (user_id,)
+            txn, self.get_global_account_data_for_user, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_room_account_data_for_user, (user_id,)
         )
         self._invalidate_cache_and_stream(
             txn, self.get_global_account_data_by_type_for_user, (user_id,)
-- 
cgit 1.5.1


From 14be78d492fc31e743e9e5855ddb8b4c9520985a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 10 Feb 2023 12:37:07 -0500
Subject: Support for MSC3758: exact_event_match push condition (#14964)

This specifies to search for an exact value match, instead of
string globbing. It only works across non-compound JSON values
(null, boolean, integer, and strings).
---
 changelog.d/14964.feature                |   1 +
 rust/benches/evaluator.rs                |  65 +++++++++++---
 rust/src/push/evaluator.rs               |  69 +++++++++++----
 rust/src/push/mod.rs                     |  83 +++++++++++++++++
 stubs/synapse/synapse_rust/push.pyi      |   7 +-
 synapse/config/experimental.py           |   5 ++
 synapse/push/bulk_push_rule_evaluator.py |  18 ++--
 synapse/types/__init__.py                |   2 +
 tests/push/test_push_rule_evaluator.py   | 147 ++++++++++++++++++++++++++++++-
 9 files changed, 356 insertions(+), 41 deletions(-)
 create mode 100644 changelog.d/14964.feature

(limited to 'synapse')

diff --git a/changelog.d/14964.feature b/changelog.d/14964.feature
new file mode 100644
index 0000000000..13c0bc193b
--- /dev/null
+++ b/changelog.d/14964.feature
@@ -0,0 +1 @@
+Implement the experimental `exact_event_match` push rule condition from [MSC3758](https://github.com/matrix-org/matrix-spec-proposals/pull/3758).
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 35f7a50bce..229553ebf8 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -16,6 +16,7 @@
 use std::collections::BTreeSet;
 use synapse::push::{
     evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules,
+    SimpleJsonValue,
 };
 use test::Bencher;
 
@@ -24,9 +25,18 @@ extern crate test;
 #[bench]
 fn bench_match_exact(b: &mut Bencher) {
     let flattened_keys = [
-        ("type".to_string(), "m.text".to_string()),
-        ("room_id".to_string(), "!room:server".to_string()),
-        ("content.body".to_string(), "test message".to_string()),
+        (
+            "type".to_string(),
+            SimpleJsonValue::Str("m.text".to_string()),
+        ),
+        (
+            "room_id".to_string(),
+            SimpleJsonValue::Str("!room:server".to_string()),
+        ),
+        (
+            "content.body".to_string(),
+            SimpleJsonValue::Str("test message".to_string()),
+        ),
     ]
     .into_iter()
     .collect();
@@ -43,6 +53,7 @@ fn bench_match_exact(b: &mut Bencher) {
         true,
         vec![],
         false,
+        false,
     )
     .unwrap();
 
@@ -63,9 +74,18 @@ fn bench_match_exact(b: &mut Bencher) {
 #[bench]
 fn bench_match_word(b: &mut Bencher) {
     let flattened_keys = [
-        ("type".to_string(), "m.text".to_string()),
-        ("room_id".to_string(), "!room:server".to_string()),
-        ("content.body".to_string(), "test message".to_string()),
+        (
+            "type".to_string(),
+            SimpleJsonValue::Str("m.text".to_string()),
+        ),
+        (
+            "room_id".to_string(),
+            SimpleJsonValue::Str("!room:server".to_string()),
+        ),
+        (
+            "content.body".to_string(),
+            SimpleJsonValue::Str("test message".to_string()),
+        ),
     ]
     .into_iter()
     .collect();
@@ -82,6 +102,7 @@ fn bench_match_word(b: &mut Bencher) {
         true,
         vec![],
         false,
+        false,
     )
     .unwrap();
 
@@ -102,9 +123,18 @@ fn bench_match_word(b: &mut Bencher) {
 #[bench]
 fn bench_match_word_miss(b: &mut Bencher) {
     let flattened_keys = [
-        ("type".to_string(), "m.text".to_string()),
-        ("room_id".to_string(), "!room:server".to_string()),
-        ("content.body".to_string(), "test message".to_string()),
+        (
+            "type".to_string(),
+            SimpleJsonValue::Str("m.text".to_string()),
+        ),
+        (
+            "room_id".to_string(),
+            SimpleJsonValue::Str("!room:server".to_string()),
+        ),
+        (
+            "content.body".to_string(),
+            SimpleJsonValue::Str("test message".to_string()),
+        ),
     ]
     .into_iter()
     .collect();
@@ -121,6 +151,7 @@ fn bench_match_word_miss(b: &mut Bencher) {
         true,
         vec![],
         false,
+        false,
     )
     .unwrap();
 
@@ -141,9 +172,18 @@ fn bench_match_word_miss(b: &mut Bencher) {
 #[bench]
 fn bench_eval_message(b: &mut Bencher) {
     let flattened_keys = [
-        ("type".to_string(), "m.text".to_string()),
-        ("room_id".to_string(), "!room:server".to_string()),
-        ("content.body".to_string(), "test message".to_string()),
+        (
+            "type".to_string(),
+            SimpleJsonValue::Str("m.text".to_string()),
+        ),
+        (
+            "room_id".to_string(),
+            SimpleJsonValue::Str("!room:server".to_string()),
+        ),
+        (
+            "content.body".to_string(),
+            SimpleJsonValue::Str("test message".to_string()),
+        ),
     ]
     .into_iter()
     .collect();
@@ -160,6 +200,7 @@ fn bench_eval_message(b: &mut Bencher) {
         true,
         vec![],
         false,
+        false,
     )
     .unwrap();
 
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index ec7a8c4453..dd6b4343ec 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -22,8 +22,8 @@ use regex::Regex;
 
 use super::{
     utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType},
-    Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition,
-    RelatedEventMatchCondition,
+    Action, Condition, EventMatchCondition, ExactEventMatchCondition, FilteredPushRules,
+    KnownCondition, RelatedEventMatchCondition, SimpleJsonValue,
 };
 
 lazy_static! {
@@ -61,9 +61,9 @@ impl RoomVersionFeatures {
 /// Allows running a set of push rules against a particular event.
 #[pyclass]
 pub struct PushRuleEvaluator {
-    /// A mapping of "flattened" keys to string values in the event, e.g.
+    /// A mapping of "flattened" keys to simple JSON values in the event, e.g.
     /// includes things like "type" and "content.msgtype".
-    flattened_keys: BTreeMap<String, String>,
+    flattened_keys: BTreeMap<String, SimpleJsonValue>,
 
     /// The "content.body", if any.
     body: String,
@@ -87,7 +87,7 @@ pub struct PushRuleEvaluator {
 
     /// The related events, indexed by relation type. Flattened in the same manner as
     /// `flattened_keys`.
-    related_events_flattened: BTreeMap<String, BTreeMap<String, String>>,
+    related_events_flattened: BTreeMap<String, BTreeMap<String, SimpleJsonValue>>,
 
     /// If msc3664, push rules for related events, is enabled.
     related_event_match_enabled: bool,
@@ -98,6 +98,9 @@ pub struct PushRuleEvaluator {
     /// If MSC3931 (room version feature flags) is enabled. Usually controlled by the same
     /// flag as MSC1767 (extensible events core).
     msc3931_enabled: bool,
+
+    /// If MSC3758 (exact_event_match push rule condition) is enabled.
+    msc3758_exact_event_match: bool,
 }
 
 #[pymethods]
@@ -106,22 +109,23 @@ impl PushRuleEvaluator {
     #[allow(clippy::too_many_arguments)]
     #[new]
     pub fn py_new(
-        flattened_keys: BTreeMap<String, String>,
+        flattened_keys: BTreeMap<String, SimpleJsonValue>,
         has_mentions: bool,
         user_mentions: BTreeSet<String>,
         room_mention: bool,
         room_member_count: u64,
         sender_power_level: Option<i64>,
         notification_power_levels: BTreeMap<String, i64>,
-        related_events_flattened: BTreeMap<String, BTreeMap<String, String>>,
+        related_events_flattened: BTreeMap<String, BTreeMap<String, SimpleJsonValue>>,
         related_event_match_enabled: bool,
         room_version_feature_flags: Vec<String>,
         msc3931_enabled: bool,
+        msc3758_exact_event_match: bool,
     ) -> Result<Self, Error> {
-        let body = flattened_keys
-            .get("content.body")
-            .cloned()
-            .unwrap_or_default();
+        let body = match flattened_keys.get("content.body") {
+            Some(SimpleJsonValue::Str(s)) => s.clone(),
+            _ => String::new(),
+        };
 
         Ok(PushRuleEvaluator {
             flattened_keys,
@@ -136,6 +140,7 @@ impl PushRuleEvaluator {
             related_event_match_enabled,
             room_version_feature_flags,
             msc3931_enabled,
+            msc3758_exact_event_match,
         })
     }
 
@@ -252,6 +257,9 @@ impl PushRuleEvaluator {
             KnownCondition::EventMatch(event_match) => {
                 self.match_event_match(event_match, user_id)?
             }
+            KnownCondition::ExactEventMatch(exact_event_match) => {
+                self.match_exact_event_match(exact_event_match)?
+            }
             KnownCondition::RelatedEventMatch(event_match) => {
                 self.match_related_event_match(event_match, user_id)?
             }
@@ -337,7 +345,9 @@ impl PushRuleEvaluator {
             return Ok(false);
         };
 
-        let haystack = if let Some(haystack) = self.flattened_keys.get(&*event_match.key) {
+        let haystack = if let Some(SimpleJsonValue::Str(haystack)) =
+            self.flattened_keys.get(&*event_match.key)
+        {
             haystack
         } else {
             return Ok(false);
@@ -355,6 +365,27 @@ impl PushRuleEvaluator {
         compiled_pattern.is_match(haystack)
     }
 
+    /// Evaluates a `exact_event_match` condition. (MSC3758)
+    fn match_exact_event_match(
+        &self,
+        exact_event_match: &ExactEventMatchCondition,
+    ) -> Result<bool, Error> {
+        // First check if the feature is enabled.
+        if !self.msc3758_exact_event_match {
+            return Ok(false);
+        }
+
+        let value = &exact_event_match.value;
+
+        let haystack = if let Some(haystack) = self.flattened_keys.get(&*exact_event_match.key) {
+            haystack
+        } else {
+            return Ok(false);
+        };
+
+        Ok(haystack == &**value)
+    }
+
     /// Evaluates a `related_event_match` condition. (MSC3664)
     fn match_related_event_match(
         &self,
@@ -410,7 +441,7 @@ impl PushRuleEvaluator {
             return Ok(false);
         };
 
-        let haystack = if let Some(haystack) = event.get(&**key) {
+        let haystack = if let Some(SimpleJsonValue::Str(haystack)) = event.get(&**key) {
             haystack
         } else {
             return Ok(false);
@@ -455,7 +486,10 @@ impl PushRuleEvaluator {
 #[test]
 fn push_rule_evaluator() {
     let mut flattened_keys = BTreeMap::new();
-    flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
+    flattened_keys.insert(
+        "content.body".to_string(),
+        SimpleJsonValue::Str("foo bar bob hello".to_string()),
+    );
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
         false,
@@ -468,6 +502,7 @@ fn push_rule_evaluator() {
         true,
         vec![],
         true,
+        true,
     )
     .unwrap();
 
@@ -482,7 +517,10 @@ fn test_requires_room_version_supports_condition() {
     use crate::push::{PushRule, PushRules};
 
     let mut flattened_keys = BTreeMap::new();
-    flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
+    flattened_keys.insert(
+        "content.body".to_string(),
+        SimpleJsonValue::Str("foo bar bob hello".to_string()),
+    );
     let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()];
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
@@ -496,6 +534,7 @@ fn test_requires_room_version_supports_condition() {
         false,
         flags,
         true,
+        true,
     )
     .unwrap();
 
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 3c4f876cab..79e519fe11 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -56,7 +56,9 @@ use std::collections::{BTreeMap, HashMap, HashSet};
 
 use anyhow::{Context, Error};
 use log::warn;
+use pyo3::exceptions::PyTypeError;
 use pyo3::prelude::*;
+use pyo3::types::{PyBool, PyLong, PyString};
 use pythonize::{depythonize, pythonize};
 use serde::de::Error as _;
 use serde::{Deserialize, Serialize};
@@ -248,6 +250,36 @@ impl<'de> Deserialize<'de> for Action {
     }
 }
 
+/// A simple JSON values (string, int, boolean, or null).
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[serde(untagged)]
+pub enum SimpleJsonValue {
+    Str(String),
+    Int(i64),
+    Bool(bool),
+    Null,
+}
+
+impl<'source> FromPyObject<'source> for SimpleJsonValue {
+    fn extract(ob: &'source PyAny) -> PyResult<Self> {
+        if let Ok(s) = <PyString as pyo3::PyTryFrom>::try_from(ob) {
+            Ok(SimpleJsonValue::Str(s.to_string()))
+        // A bool *is* an int, ensure we try bool first.
+        } else if let Ok(b) = <PyBool as pyo3::PyTryFrom>::try_from(ob) {
+            Ok(SimpleJsonValue::Bool(b.extract()?))
+        } else if let Ok(i) = <PyLong as pyo3::PyTryFrom>::try_from(ob) {
+            Ok(SimpleJsonValue::Int(i.extract()?))
+        } else if ob.is_none() {
+            Ok(SimpleJsonValue::Null)
+        } else {
+            Err(PyTypeError::new_err(format!(
+                "Can't convert from {} to SimpleJsonValue",
+                ob.get_type().name()?
+            )))
+        }
+    }
+}
+
 /// A condition used in push rules to match against an event.
 ///
 /// We need this split as `serde` doesn't give us the ability to have a
@@ -267,6 +299,8 @@ pub enum Condition {
 #[serde(tag = "kind")]
 pub enum KnownCondition {
     EventMatch(EventMatchCondition),
+    #[serde(rename = "com.beeper.msc3758.exact_event_match")]
+    ExactEventMatch(ExactEventMatchCondition),
     #[serde(rename = "im.nheko.msc3664.related_event_match")]
     RelatedEventMatch(RelatedEventMatchCondition),
     #[serde(rename = "org.matrix.msc3952.is_user_mention")]
@@ -309,6 +343,13 @@ pub struct EventMatchCondition {
     pub pattern_type: Option<Cow<'static, str>>,
 }
 
+/// The body of a [`Condition::ExactEventMatch`]
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct ExactEventMatchCondition {
+    pub key: Cow<'static, str>,
+    pub value: Cow<'static, SimpleJsonValue>,
+}
+
 /// The body of a [`Condition::RelatedEventMatch`]
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct RelatedEventMatchCondition {
@@ -542,6 +583,48 @@ fn test_deserialize_unstable_msc3931_condition() {
     ));
 }
 
+#[test]
+fn test_deserialize_unstable_msc3758_condition() {
+    // A string condition should work.
+    let json =
+        r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":"foo"}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(
+        condition,
+        Condition::Known(KnownCondition::ExactEventMatch(_))
+    ));
+
+    // A boolean condition should work.
+    let json =
+        r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":true}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(
+        condition,
+        Condition::Known(KnownCondition::ExactEventMatch(_))
+    ));
+
+    // An integer condition should work.
+    let json = r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":1}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(
+        condition,
+        Condition::Known(KnownCondition::ExactEventMatch(_))
+    ));
+
+    // A null condition should work
+    let json =
+        r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":null}"#;
+
+    let condition: Condition = serde_json::from_str(json).unwrap();
+    assert!(matches!(
+        condition,
+        Condition::Known(KnownCondition::ExactEventMatch(_))
+    ));
+}
+
 #[test]
 fn test_deserialize_unstable_msc3952_user_condition() {
     let json = r#"{"kind":"org.matrix.msc3952.is_user_mention"}"#;
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 754acab2f9..328f681a29 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -14,7 +14,7 @@
 
 from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union
 
-from synapse.types import JsonDict
+from synapse.types import JsonDict, SimpleJsonValue
 
 class PushRule:
     @property
@@ -56,17 +56,18 @@ def get_base_rule_ids() -> Collection[str]: ...
 class PushRuleEvaluator:
     def __init__(
         self,
-        flattened_keys: Mapping[str, str],
+        flattened_keys: Mapping[str, SimpleJsonValue],
         has_mentions: bool,
         user_mentions: Set[str],
         room_mention: bool,
         room_member_count: int,
         sender_power_level: Optional[int],
         notification_power_levels: Mapping[str, int],
-        related_events_flattened: Mapping[str, Mapping[str, str]],
+        related_events_flattened: Mapping[str, Mapping[str, SimpleJsonValue]],
         related_event_match_enabled: bool,
         room_version_feature_flags: Tuple[str, ...],
         msc3931_enabled: bool,
+        msc3758_exact_event_match: bool,
     ): ...
     def run(
         self,
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 5e3a889081..6ac2f0c10d 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -169,6 +169,11 @@ class ExperimentalConfig(Config):
         # MSC3925: do not replace events with their edits
         self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False)
 
+        # MSC3758: exact_event_match push rule condition
+        self.msc3758_exact_event_match = experimental.get(
+            "msc3758_exact_event_match", False
+        )
+
         # MSC3873: Disambiguate event_match keys.
         self.msc3783_escape_event_match_key = experimental.get(
             "msc3783_escape_event_match_key", False
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 39d2f88f03..8568aca528 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -43,6 +43,7 @@ from synapse.events.snapshot import EventContext
 from synapse.state import POWER_KEY
 from synapse.storage.databases.main.roommember import EventIdMembership
 from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator
+from synapse.types import SimpleJsonValue
 from synapse.types.state import StateFilter
 from synapse.util.caches import register_cache
 from synapse.util.metrics import measure_func
@@ -256,13 +257,15 @@ class BulkPushRuleEvaluator:
 
         return pl_event.content if pl_event else {}, sender_level
 
-    async def _related_events(self, event: EventBase) -> Dict[str, Dict[str, str]]:
+    async def _related_events(
+        self, event: EventBase
+    ) -> Dict[str, Dict[str, SimpleJsonValue]]:
         """Fetches the related events for 'event'. Sets the im.vector.is_falling_back key if the event is from a fallback relation
 
         Returns:
             Mapping of relation type to flattened events.
         """
-        related_events: Dict[str, Dict[str, str]] = {}
+        related_events: Dict[str, Dict[str, SimpleJsonValue]] = {}
         if self._related_event_match_enabled:
             related_event_id = event.content.get("m.relates_to", {}).get("event_id")
             relation_type = event.content.get("m.relates_to", {}).get("rel_type")
@@ -425,6 +428,7 @@ class BulkPushRuleEvaluator:
             self._related_event_match_enabled,
             event.room_version.msc3931_push_features,
             self.hs.config.experimental.msc1767_enabled,  # MSC3931 flag
+            self.hs.config.experimental.msc3758_exact_event_match,
         )
 
         users = rules_by_user.keys()
@@ -501,15 +505,15 @@ StateGroup = Union[object, int]
 def _flatten_dict(
     d: Union[EventBase, Mapping[str, Any]],
     prefix: Optional[List[str]] = None,
-    result: Optional[Dict[str, str]] = None,
+    result: Optional[Dict[str, SimpleJsonValue]] = None,
     *,
     msc3783_escape_event_match_key: bool = False,
-) -> Dict[str, str]:
+) -> Dict[str, SimpleJsonValue]:
     """
     Given a JSON dictionary (or event) which might contain sub dictionaries,
     flatten it into a single layer dictionary by combining the keys & sub-keys.
 
-    Any (non-dictionary), non-string value is dropped.
+    String, integer, boolean, and null values are kept. All others are dropped.
 
     Transforms:
 
@@ -538,8 +542,8 @@ def _flatten_dict(
             # nested fields.
             key = key.replace("\\", "\\\\").replace(".", "\\.")
 
-        if isinstance(value, str):
-            result[".".join(prefix + [key])] = value.lower()
+        if isinstance(value, (bool, str)) or type(value) is int or value is None:
+            result[".".join(prefix + [key])] = value
         elif isinstance(value, Mapping):
             # do not set `room_version` due to recursion considerations below
             _flatten_dict(
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index f82d1cfc29..52e366c8ae 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -69,6 +69,8 @@ StateMap = Mapping[StateKey, T]
 MutableStateMap = MutableMapping[StateKey, T]
 
 # JSON types. These could be made stronger, but will do for now.
+# A "simple" (canonical) JSON value.
+SimpleJsonValue = Optional[Union[str, int, bool]]
 # A JSON-serialisable dict.
 JsonDict = Dict[str, Any]
 # A JSON-serialisable mapping; roughly speaking an immutable JSONDict.
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 516b65cc3c..6603447341 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -57,7 +57,7 @@ class FlattenDictTestCase(unittest.TestCase):
         )
 
     def test_non_string(self) -> None:
-        """Non-string items are dropped."""
+        """Booleans, ints, and nulls should be kept while other items are dropped."""
         input: Dict[str, Any] = {
             "woo": "woo",
             "foo": True,
@@ -66,7 +66,9 @@ class FlattenDictTestCase(unittest.TestCase):
             "fuzz": [],
             "boo": {},
         }
-        self.assertEqual({"woo": "woo"}, _flatten_dict(input))
+        self.assertEqual(
+            {"woo": "woo", "foo": True, "bar": 1, "baz": None}, _flatten_dict(input)
+        )
 
     def test_event(self) -> None:
         """Events can also be flattened."""
@@ -86,9 +88,9 @@ class FlattenDictTestCase(unittest.TestCase):
         )
         expected = {
             "content.msgtype": "m.text",
-            "content.body": "hello world!",
+            "content.body": "Hello world!",
             "content.format": "org.matrix.custom.html",
-            "content.formatted_body": "<h1>hello world!</h1>",
+            "content.formatted_body": "<h1>Hello world!</h1>",
             "room_id": "!test:test",
             "sender": "@alice:test",
             "type": "m.room.message",
@@ -166,6 +168,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             related_event_match_enabled=True,
             room_version_feature_flags=event.room_version.msc3931_push_features,
             msc3931_enabled=True,
+            msc3758_exact_event_match=True,
         )
 
     def test_display_name(self) -> None:
@@ -410,6 +413,142 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             "pattern should not match before a newline",
         )
 
+    def test_exact_event_match_string(self) -> None:
+        """Check that exact_event_match conditions work as expected for strings."""
+
+        # Test against a string value.
+        condition = {
+            "kind": "com.beeper.msc3758.exact_event_match",
+            "key": "content.value",
+            "value": "foobaz",
+        }
+        self._assert_matches(
+            condition,
+            {"value": "foobaz"},
+            "exact value should match",
+        )
+        self._assert_not_matches(
+            condition,
+            {"value": "FoobaZ"},
+            "values should match and be case-sensitive",
+        )
+        self._assert_not_matches(
+            condition,
+            {"value": "test foobaz test"},
+            "values must exactly match",
+        )
+        value: Any
+        for value in (True, False, 1, 1.1, None, [], {}):
+            self._assert_not_matches(
+                condition,
+                {"value": value},
+                "incorrect types should not match",
+            )
+
+        # it should work on frozendicts too
+        self._assert_matches(
+            condition,
+            frozendict.frozendict({"value": "foobaz"}),
+            "values should match on frozendicts",
+        )
+
+    def test_exact_event_match_boolean(self) -> None:
+        """Check that exact_event_match conditions work as expected for booleans."""
+
+        # Test against a True boolean value.
+        condition = {
+            "kind": "com.beeper.msc3758.exact_event_match",
+            "key": "content.value",
+            "value": True,
+        }
+        self._assert_matches(
+            condition,
+            {"value": True},
+            "exact value should match",
+        )
+        self._assert_not_matches(
+            condition,
+            {"value": False},
+            "incorrect values should not match",
+        )
+        for value in ("foobaz", 1, 1.1, None, [], {}):
+            self._assert_not_matches(
+                condition,
+                {"value": value},
+                "incorrect types should not match",
+            )
+
+        # Test against a False boolean value.
+        condition = {
+            "kind": "com.beeper.msc3758.exact_event_match",
+            "key": "content.value",
+            "value": False,
+        }
+        self._assert_matches(
+            condition,
+            {"value": False},
+            "exact value should match",
+        )
+        self._assert_not_matches(
+            condition,
+            {"value": True},
+            "incorrect values should not match",
+        )
+        # Choose false-y values to ensure there's no type coercion.
+        for value in ("", 0, 1.1, None, [], {}):
+            self._assert_not_matches(
+                condition,
+                {"value": value},
+                "incorrect types should not match",
+            )
+
+    def test_exact_event_match_null(self) -> None:
+        """Check that exact_event_match conditions work as expected for null."""
+
+        condition = {
+            "kind": "com.beeper.msc3758.exact_event_match",
+            "key": "content.value",
+            "value": None,
+        }
+        self._assert_matches(
+            condition,
+            {"value": None},
+            "exact value should match",
+        )
+        for value in ("foobaz", True, False, 1, 1.1, [], {}):
+            self._assert_not_matches(
+                condition,
+                {"value": value},
+                "incorrect types should not match",
+            )
+
+    def test_exact_event_match_integer(self) -> None:
+        """Check that exact_event_match conditions work as expected for integers."""
+
+        condition = {
+            "kind": "com.beeper.msc3758.exact_event_match",
+            "key": "content.value",
+            "value": 1,
+        }
+        self._assert_matches(
+            condition,
+            {"value": 1},
+            "exact value should match",
+        )
+        value: Any
+        for value in (1.1, -1, 0):
+            self._assert_not_matches(
+                condition,
+                {"value": value},
+                "incorrect values should not match",
+            )
+        for value in ("1", True, False, None, [], {}):
+            self._assert_not_matches(
+                condition,
+                {"value": value},
+                "incorrect types should not match",
+            )
+
     def test_no_body(self) -> None:
         """Not having a body shouldn't break the evaluator."""
         evaluator = self._get_evaluator({})
-- 
cgit 1.5.1


From d0c713cc85f094c323b2ba3f02d8ac411a7f0705 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 10 Feb 2023 23:29:00 +0000
Subject: Return read-only collections from `@cached` methods (#13755)

It's important that collections returned from `@cached` methods are not
modified, otherwise future retrievals from the cache will return the
modified collection.

This applies to the return values from `@cached` methods and the values
inside the dictionaries returned by `@cachedList` methods. It's not
necessary for the dictionaries returned by `@cachedList` methods
themselves to be read-only.

Signed-off-by: Sean Quah <seanq@matrix.org>
Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/13755.misc                             |  1 +
 synapse/app/phone_stats_home.py                    |  4 ++--
 synapse/config/room_directory.py                   |  6 +++---
 synapse/events/builder.py                          |  6 +++---
 synapse/federation/federation_server.py            |  3 ++-
 synapse/handlers/directory.py                      |  6 +++---
 synapse/handlers/receipts.py                       |  4 ++--
 synapse/handlers/room.py                           |  2 +-
 synapse/handlers/sync.py                           |  4 ++--
 synapse/push/bulk_push_rule_evaluator.py           |  4 ++--
 synapse/state/__init__.py                          |  2 +-
 synapse/storage/controllers/state.py               |  6 +++---
 synapse/storage/databases/main/account_data.py     |  2 +-
 synapse/storage/databases/main/appservice.py       |  2 +-
 synapse/storage/databases/main/devices.py          | 17 +++++++++------
 synapse/storage/databases/main/directory.py        |  4 ++--
 synapse/storage/databases/main/end_to_end_keys.py  | 25 +++++++++++++---------
 synapse/storage/databases/main/event_federation.py | 11 ++++++----
 .../storage/databases/main/monthly_active_users.py |  4 ++--
 synapse/storage/databases/main/receipts.py         | 10 +++++----
 synapse/storage/databases/main/registration.py     |  4 ++--
 synapse/storage/databases/main/relations.py        |  7 ++++--
 synapse/storage/databases/main/roommember.py       | 19 ++++++++--------
 synapse/storage/databases/main/signatures.py       |  6 +++---
 synapse/storage/databases/main/tags.py             |  8 ++++---
 synapse/storage/databases/main/user_directory.py   |  4 ++--
 tests/rest/admin/test_server_notice.py             |  4 ++--
 27 files changed, 98 insertions(+), 77 deletions(-)
 create mode 100644 changelog.d/13755.misc

(limited to 'synapse')

diff --git a/changelog.d/13755.misc b/changelog.d/13755.misc
new file mode 100644
index 0000000000..662ee00e99
--- /dev/null
+++ b/changelog.d/13755.misc
@@ -0,0 +1 @@
+Re-type hint some collections as read-only.
diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py
index 53db1e85b3..897dd3edac 100644
--- a/synapse/app/phone_stats_home.py
+++ b/synapse/app/phone_stats_home.py
@@ -15,7 +15,7 @@ import logging
 import math
 import resource
 import sys
-from typing import TYPE_CHECKING, List, Sized, Tuple
+from typing import TYPE_CHECKING, List, Mapping, Sized, Tuple
 
 from prometheus_client import Gauge
 
@@ -194,7 +194,7 @@ def start_phone_stats_home(hs: "HomeServer") -> None:
     @wrap_as_background_process("generate_monthly_active_users")
     async def generate_monthly_active_users() -> None:
         current_mau_count = 0
-        current_mau_count_by_service = {}
+        current_mau_count_by_service: Mapping[str, int] = {}
         reserved_users: Sized = ()
         store = hs.get_datastores().main
         if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only:
diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py
index 3ed236217f..8666c22f01 100644
--- a/synapse/config/room_directory.py
+++ b/synapse/config/room_directory.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, List
+from typing import Any, Collection
 
 from matrix_common.regex import glob_to_regex
 
@@ -70,7 +70,7 @@ class RoomDirectoryConfig(Config):
         return False
 
     def is_publishing_room_allowed(
-        self, user_id: str, room_id: str, aliases: List[str]
+        self, user_id: str, room_id: str, aliases: Collection[str]
     ) -> bool:
         """Checks if the given user is allowed to publish the room
 
@@ -122,7 +122,7 @@ class _RoomDirectoryRule:
         except Exception as e:
             raise ConfigError("Failed to parse glob into regex") from e
 
-    def matches(self, user_id: str, room_id: str, aliases: List[str]) -> bool:
+    def matches(self, user_id: str, room_id: str, aliases: Collection[str]) -> bool:
         """Tests if this rule matches the given user_id, room_id and aliases.
 
         Args:
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 94dd1298e1..c82745275f 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Tuple, Union
 
 import attr
 from signedjson.types import SigningKey
@@ -103,7 +103,7 @@ class EventBuilder:
 
     async def build(
         self,
-        prev_event_ids: List[str],
+        prev_event_ids: Collection[str],
         auth_event_ids: Optional[List[str]],
         depth: Optional[int] = None,
     ) -> EventBase:
@@ -136,7 +136,7 @@ class EventBuilder:
 
         format_version = self.room_version.event_format
         # The types of auth/prev events changes between event versions.
-        prev_events: Union[List[str], List[Tuple[str, Dict[str, str]]]]
+        prev_events: Union[Collection[str], List[Tuple[str, Dict[str, str]]]]
         auth_events: Union[List[str], List[Tuple[str, Dict[str, str]]]]
         if format_version == EventFormatVersions.ROOM_V1_V2:
             auth_events = await self._store.add_event_hashes(auth_event_ids)
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 8d36172484..6addc0bb65 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -23,6 +23,7 @@ from typing import (
     Collection,
     Dict,
     List,
+    Mapping,
     Optional,
     Tuple,
     Union,
@@ -1512,7 +1513,7 @@ class FederationHandlerRegistry:
 def _get_event_ids_for_partial_state_join(
     join_event: EventBase,
     prev_state_ids: StateMap[str],
-    summary: Dict[str, MemberSummary],
+    summary: Mapping[str, MemberSummary],
 ) -> Collection[str]:
     """Calculate state to be returned in a partial_state send_join
 
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index d31b0fbb17..a5798e9483 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -14,7 +14,7 @@
 
 import logging
 import string
-from typing import TYPE_CHECKING, Iterable, List, Optional
+from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence
 
 from typing_extensions import Literal
 
@@ -486,7 +486,7 @@ class DirectoryHandler:
             )
             if canonical_alias:
                 # Ensure we do not mutate room_aliases.
-                room_aliases = room_aliases + [canonical_alias]
+                room_aliases = list(room_aliases) + [canonical_alias]
 
             if not self.config.roomdirectory.is_publishing_room_allowed(
                 user_id, room_id, room_aliases
@@ -529,7 +529,7 @@ class DirectoryHandler:
 
     async def get_aliases_for_room(
         self, requester: Requester, room_id: str
-    ) -> List[str]:
+    ) -> Sequence[str]:
         """
         Get a list of the aliases that currently point to this room on this server
         """
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index 04c61ae3dd..2bacdebfb5 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple
+from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Tuple
 
 from synapse.api.constants import EduTypes, ReceiptTypes
 from synapse.appservice import ApplicationService
@@ -189,7 +189,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]):
 
     @staticmethod
     def filter_out_private_receipts(
-        rooms: List[JsonDict], user_id: str
+        rooms: Sequence[JsonDict], user_id: str
     ) -> List[JsonDict]:
         """
         Filters a list of serialized receipts (as returned by /sync and /initialSync)
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 0e759b8a5d..060bbcb181 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1928,6 +1928,6 @@ class RoomShutdownHandler:
         return {
             "kicked_users": kicked_users,
             "failed_to_kick_users": failed_to_kick_users,
-            "local_aliases": aliases_for_room,
+            "local_aliases": list(aliases_for_room),
             "new_room_id": new_room_id,
         }
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 399685e5b7..4bae46158a 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1519,7 +1519,7 @@ class SyncHandler:
             one_time_keys_count = await self.store.count_e2e_one_time_keys(
                 user_id, device_id
             )
-            unused_fallback_key_types = (
+            unused_fallback_key_types = list(
                 await self.store.get_e2e_unused_fallback_key_types(user_id, device_id)
             )
 
@@ -2301,7 +2301,7 @@ class SyncHandler:
         sync_result_builder: "SyncResultBuilder",
         room_builder: "RoomSyncResultBuilder",
         ephemeral: List[JsonDict],
-        tags: Optional[Dict[str, Dict[str, Any]]],
+        tags: Optional[Mapping[str, Mapping[str, Any]]],
         account_data: Mapping[str, JsonDict],
         always_include: bool = False,
     ) -> None:
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 8568aca528..f6a5bffb0f 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -22,6 +22,7 @@ from typing import (
     List,
     Mapping,
     Optional,
+    Sequence,
     Set,
     Tuple,
     Union,
@@ -149,7 +150,7 @@ class BulkPushRuleEvaluator:
         # little, we can skip fetching a huge number of push rules in large rooms.
         # This helps make joins and leaves faster.
         if event.type == EventTypes.Member:
-            local_users = []
+            local_users: Sequence[str] = []
             # We never notify a user about their own actions. This is enforced in
             # `_action_for_event_by_user` in the loop over `rules_by_user`, but we
             # do the same check here to avoid unnecessary DB queries.
@@ -184,7 +185,6 @@ class BulkPushRuleEvaluator:
         if event.type == EventTypes.Member and event.membership == Membership.INVITE:
             invited = event.state_key
             if invited and self.hs.is_mine_id(invited) and invited not in local_users:
-                local_users = list(local_users)
                 local_users.append(invited)
 
         if not local_users:
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index e877e6f1a1..4dc25df67e 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -226,7 +226,7 @@ class StateHandler:
         return await ret.get_state(self._state_storage_controller, state_filter)
 
     async def get_current_user_ids_in_room(
-        self, room_id: str, latest_event_ids: List[str]
+        self, room_id: str, latest_event_ids: Collection[str]
     ) -> Set[str]:
         """
         Get the users IDs who are currently in a room.
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 52efd4a171..9d7a8a792f 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -14,6 +14,7 @@
 import logging
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Any,
     Awaitable,
     Callable,
@@ -23,7 +24,6 @@ from typing import (
     List,
     Mapping,
     Optional,
-    Set,
     Tuple,
 )
 
@@ -527,7 +527,7 @@ class StateStorageController:
         )
         return state_map.get(key)
 
-    async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
+    async def get_current_hosts_in_room(self, room_id: str) -> AbstractSet[str]:
         """Get current hosts in room based on current state.
 
         Blocks until we have full state for the given room. This only happens for rooms
@@ -584,7 +584,7 @@ class StateStorageController:
 
     async def get_users_in_room_with_profiles(
         self, room_id: str
-    ) -> Dict[str, ProfileInfo]:
+    ) -> Mapping[str, ProfileInfo]:
         """
         Get the current users in the room with their profiles.
         If the room is currently partial-stated, this will block until the room has
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 2d6f02c14f..95567826f2 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -240,7 +240,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     @cached(num_args=2, tree=True)
     async def get_account_data_for_room(
         self, user_id: str, room_id: str
-    ) -> Dict[str, JsonDict]:
+    ) -> Mapping[str, JsonDict]:
         """Get all the client account_data for a user for a room.
 
         Args:
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index 5fb152c4ff..484db175d0 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -166,7 +166,7 @@ class ApplicationServiceWorkerStore(RoomMemberWorkerStore):
         room_id: str,
         app_service: "ApplicationService",
         cache_context: _CacheContext,
-    ) -> List[str]:
+    ) -> Sequence[str]:
         """
         Get all users in a room that the appservice controls.
 
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 85c1778a81..1ca66d57d4 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -21,6 +21,7 @@ from typing import (
     Dict,
     Iterable,
     List,
+    Mapping,
     Optional,
     Set,
     Tuple,
@@ -202,7 +203,9 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     def get_device_stream_token(self) -> int:
         return self._device_list_id_gen.get_current_token()
 
-    async def count_devices_by_users(self, user_ids: Optional[List[str]] = None) -> int:
+    async def count_devices_by_users(
+        self, user_ids: Optional[Collection[str]] = None
+    ) -> int:
         """Retrieve number of all devices of given users.
         Only returns number of devices that are not marked as hidden.
 
@@ -213,7 +216,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         """
 
         def count_devices_by_users_txn(
-            txn: LoggingTransaction, user_ids: List[str]
+            txn: LoggingTransaction, user_ids: Collection[str]
         ) -> int:
             sql = """
                 SELECT count(*)
@@ -747,7 +750,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     @cancellable
     async def get_user_devices_from_cache(
         self, user_ids: Set[str], user_and_device_ids: List[Tuple[str, str]]
-    ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]:
+    ) -> Tuple[Set[str], Dict[str, Mapping[str, JsonDict]]]:
         """Get the devices (and keys if any) for remote users from the cache.
 
         Args:
@@ -775,16 +778,18 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         user_ids_not_in_cache = unique_user_ids - user_ids_in_cache
 
         # First fetch all the users which all devices are to be returned.
-        results: Dict[str, Dict[str, JsonDict]] = {}
+        results: Dict[str, Mapping[str, JsonDict]] = {}
         for user_id in user_ids:
             if user_id in user_ids_in_cache:
                 results[user_id] = await self.get_cached_devices_for_user(user_id)
         # Then fetch all device-specific requests, but skip users we've already
         # fetched all devices for.
+        device_specific_results: Dict[str, Dict[str, JsonDict]] = {}
         for user_id, device_id in user_and_device_ids:
             if user_id in user_ids_in_cache and user_id not in user_ids:
                 device = await self._get_cached_user_device(user_id, device_id)
-                results.setdefault(user_id, {})[device_id] = device
+                device_specific_results.setdefault(user_id, {})[device_id] = device
+        results.update(device_specific_results)
 
         set_tag("in_cache", str(results))
         set_tag("not_in_cache", str(user_ids_not_in_cache))
@@ -802,7 +807,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         return db_to_json(content)
 
     @cached()
-    async def get_cached_devices_for_user(self, user_id: str) -> Dict[str, JsonDict]:
+    async def get_cached_devices_for_user(self, user_id: str) -> Mapping[str, JsonDict]:
         devices = await self.db_pool.simple_select_list(
             table="device_lists_remote_cache",
             keyvalues={"user_id": user_id},
diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py
index 5903fdaf00..44aa181174 100644
--- a/synapse/storage/databases/main/directory.py
+++ b/synapse/storage/databases/main/directory.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Iterable, List, Optional, Tuple
+from typing import Iterable, List, Optional, Sequence, Tuple
 
 import attr
 
@@ -74,7 +74,7 @@ class DirectoryWorkerStore(CacheInvalidationWorkerStore):
         )
 
     @cached(max_entries=5000)
-    async def get_aliases_for_room(self, room_id: str) -> List[str]:
+    async def get_aliases_for_room(self, room_id: str) -> Sequence[str]:
         return await self.db_pool.simple_select_onecol(
             "room_aliases",
             {"room_id": room_id},
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index c4ac6c33ba..752dc16e17 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -20,7 +20,9 @@ from typing import (
     Dict,
     Iterable,
     List,
+    Mapping,
     Optional,
+    Sequence,
     Tuple,
     Union,
     cast,
@@ -691,7 +693,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     @cached(max_entries=10000)
     async def get_e2e_unused_fallback_key_types(
         self, user_id: str, device_id: str
-    ) -> List[str]:
+    ) -> Sequence[str]:
         """Returns the fallback key types that have an unused key.
 
         Args:
@@ -731,7 +733,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         return user_keys.get(key_type)
 
     @cached(num_args=1)
-    def _get_bare_e2e_cross_signing_keys(self, user_id: str) -> Dict[str, JsonDict]:
+    def _get_bare_e2e_cross_signing_keys(self, user_id: str) -> Mapping[str, JsonDict]:
         """Dummy function.  Only used to make a cache for
         _get_bare_e2e_cross_signing_keys_bulk.
         """
@@ -744,7 +746,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     )
     async def _get_bare_e2e_cross_signing_keys_bulk(
         self, user_ids: Iterable[str]
-    ) -> Dict[str, Optional[Dict[str, JsonDict]]]:
+    ) -> Dict[str, Optional[Mapping[str, JsonDict]]]:
         """Returns the cross-signing keys for a set of users.  The output of this
         function should be passed to _get_e2e_cross_signing_signatures_txn if
         the signatures for the calling user need to be fetched.
@@ -765,7 +767,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         )
 
         # The `Optional` comes from the `@cachedList` decorator.
-        return cast(Dict[str, Optional[Dict[str, JsonDict]]], result)
+        return cast(Dict[str, Optional[Mapping[str, JsonDict]]], result)
 
     def _get_bare_e2e_cross_signing_keys_bulk_txn(
         self,
@@ -924,7 +926,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     @cancellable
     async def get_e2e_cross_signing_keys_bulk(
         self, user_ids: List[str], from_user_id: Optional[str] = None
-    ) -> Dict[str, Optional[Dict[str, JsonDict]]]:
+    ) -> Dict[str, Optional[Mapping[str, JsonDict]]]:
         """Returns the cross-signing keys for a set of users.
 
         Args:
@@ -940,11 +942,14 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         result = await self._get_bare_e2e_cross_signing_keys_bulk(user_ids)
 
         if from_user_id:
-            result = await self.db_pool.runInteraction(
-                "get_e2e_cross_signing_signatures",
-                self._get_e2e_cross_signing_signatures_txn,
-                result,
-                from_user_id,
+            result = cast(
+                Dict[str, Optional[Mapping[str, JsonDict]]],
+                await self.db_pool.runInteraction(
+                    "get_e2e_cross_signing_signatures",
+                    self._get_e2e_cross_signing_signatures_txn,
+                    result,
+                    from_user_id,
+                ),
             )
 
         return result
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index bbee02ab18..ca780cca36 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -22,6 +22,7 @@ from typing import (
     Iterable,
     List,
     Optional,
+    Sequence,
     Set,
     Tuple,
     cast,
@@ -1004,7 +1005,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             room_id,
         )
 
-    async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[Optional[str], int]:
+    async def get_max_depth_of(
+        self, event_ids: Collection[str]
+    ) -> Tuple[Optional[str], int]:
         """Returns the event ID and depth for the event that has the max depth from a set of event IDs
 
         Args:
@@ -1141,7 +1144,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         )
 
     @cached(max_entries=5000, iterable=True)
-    async def get_latest_event_ids_in_room(self, room_id: str) -> List[str]:
+    async def get_latest_event_ids_in_room(self, room_id: str) -> Sequence[str]:
         return await self.db_pool.simple_select_onecol(
             table="event_forward_extremities",
             keyvalues={"room_id": room_id},
@@ -1171,7 +1174,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
     @cancellable
     async def get_forward_extremities_for_room_at_stream_ordering(
         self, room_id: str, stream_ordering: int
-    ) -> List[str]:
+    ) -> Sequence[str]:
         """For a given room_id and stream_ordering, return the forward
         extremeties of the room at that point in "time".
 
@@ -1204,7 +1207,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
     @cached(max_entries=5000, num_args=2)
     async def _get_forward_extremeties_for_room(
         self, room_id: str, stream_ordering: int
-    ) -> List[str]:
+    ) -> Sequence[str]:
         """For a given room_id and stream_ordering, return the forward
         extremeties of the room at that point in "time".
 
diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py
index db9a24db5e..4b1061e6d7 100644
--- a/synapse/storage/databases/main/monthly_active_users.py
+++ b/synapse/storage/databases/main/monthly_active_users.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, cast
 
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage.database import (
@@ -95,7 +95,7 @@ class MonthlyActiveUsersWorkerStore(RegistrationWorkerStore):
         return await self.db_pool.runInteraction("count_users", _count_users)
 
     @cached(num_args=0)
-    async def get_monthly_active_count_by_service(self) -> Dict[str, int]:
+    async def get_monthly_active_count_by_service(self) -> Mapping[str, int]:
         """Generates current count of monthly active users broken down by service.
         A service is typically an appservice but also includes native matrix users.
         Since the `monthly_active_users` table is populated from the `user_ips` table
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 29972d5204..dddf49c2d5 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -21,7 +21,9 @@ from typing import (
     Dict,
     Iterable,
     List,
+    Mapping,
     Optional,
+    Sequence,
     Tuple,
     cast,
 )
@@ -288,7 +290,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
     async def get_linearized_receipts_for_room(
         self, room_id: str, to_key: int, from_key: Optional[int] = None
-    ) -> List[dict]:
+    ) -> Sequence[JsonDict]:
         """Get receipts for a single room for sending to clients.
 
         Args:
@@ -311,7 +313,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
     @cached(tree=True)
     async def _get_linearized_receipts_for_room(
         self, room_id: str, to_key: int, from_key: Optional[int] = None
-    ) -> List[JsonDict]:
+    ) -> Sequence[JsonDict]:
         """See get_linearized_receipts_for_room"""
 
         def f(txn: LoggingTransaction) -> List[Dict[str, Any]]:
@@ -354,7 +356,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
     )
     async def _get_linearized_receipts_for_rooms(
         self, room_ids: Collection[str], to_key: int, from_key: Optional[int] = None
-    ) -> Dict[str, List[JsonDict]]:
+    ) -> Dict[str, Sequence[JsonDict]]:
         if not room_ids:
             return {}
 
@@ -416,7 +418,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
     )
     async def get_linearized_receipts_for_all_rooms(
         self, to_key: int, from_key: Optional[int] = None
-    ) -> Dict[str, JsonDict]:
+    ) -> Mapping[str, JsonDict]:
         """Get receipts for all rooms between two stream_ids, up
         to a limit of the latest 100 read receipts.
 
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 31f0f2bd3d..9a55e17624 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -16,7 +16,7 @@
 import logging
 import random
 import re
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast
 
 import attr
 
@@ -192,7 +192,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             )
 
     @cached()
-    async def get_user_by_id(self, user_id: str) -> Optional[Dict[str, Any]]:
+    async def get_user_by_id(self, user_id: str) -> Optional[Mapping[str, Any]]:
         """Deprecated: use get_userinfo_by_id instead"""
 
         def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]:
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 0018d6f7ab..fa3266c081 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -22,6 +22,7 @@ from typing import (
     List,
     Mapping,
     Optional,
+    Sequence,
     Set,
     Tuple,
     Union,
@@ -171,7 +172,7 @@ class RelationsWorkerStore(SQLBaseStore):
         direction: Direction = Direction.BACKWARDS,
         from_token: Optional[StreamToken] = None,
         to_token: Optional[StreamToken] = None,
-    ) -> Tuple[List[_RelatedEvent], Optional[StreamToken]]:
+    ) -> Tuple[Sequence[_RelatedEvent], Optional[StreamToken]]:
         """Get a list of relations for an event, ordered by topological ordering.
 
         Args:
@@ -397,7 +398,9 @@ class RelationsWorkerStore(SQLBaseStore):
         return result is not None
 
     @cached()
-    async def get_aggregation_groups_for_event(self, event_id: str) -> List[JsonDict]:
+    async def get_aggregation_groups_for_event(
+        self, event_id: str
+    ) -> Sequence[JsonDict]:
         raise NotImplementedError()
 
     @cachedList(
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index ea6a5e2f34..694a5b802c 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -24,6 +24,7 @@ from typing import (
     List,
     Mapping,
     Optional,
+    Sequence,
     Set,
     Tuple,
     Union,
@@ -153,7 +154,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return self._known_servers_count
 
     @cached(max_entries=100000, iterable=True)
-    async def get_users_in_room(self, room_id: str) -> List[str]:
+    async def get_users_in_room(self, room_id: str) -> Sequence[str]:
         """Returns a list of users in the room.
 
         Will return inaccurate results for rooms with partial state, since the state for
@@ -190,9 +191,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         )
 
     @cached()
-    def get_user_in_room_with_profile(
-        self, room_id: str, user_id: str
-    ) -> Dict[str, ProfileInfo]:
+    def get_user_in_room_with_profile(self, room_id: str, user_id: str) -> ProfileInfo:
         raise NotImplementedError()
 
     @cachedList(
@@ -246,7 +245,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
     @cached(max_entries=100000, iterable=True)
     async def get_users_in_room_with_profiles(
         self, room_id: str
-    ) -> Dict[str, ProfileInfo]:
+    ) -> Mapping[str, ProfileInfo]:
         """Get a mapping from user ID to profile information for all users in a given room.
 
         The profile information comes directly from this room's `m.room.member`
@@ -285,7 +284,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         )
 
     @cached(max_entries=100000)
-    async def get_room_summary(self, room_id: str) -> Dict[str, MemberSummary]:
+    async def get_room_summary(self, room_id: str) -> Mapping[str, MemberSummary]:
         """Get the details of a room roughly suitable for use by the room
         summary extension to /sync. Useful when lazy loading room members.
         Args:
@@ -357,7 +356,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
     @cached()
     async def get_invited_rooms_for_local_user(
         self, user_id: str
-    ) -> List[RoomsForUser]:
+    ) -> Sequence[RoomsForUser]:
         """Get all the rooms the *local* user is invited to.
 
         Args:
@@ -475,7 +474,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return results
 
     @cached(iterable=True)
-    async def get_local_users_in_room(self, room_id: str) -> List[str]:
+    async def get_local_users_in_room(self, room_id: str) -> Sequence[str]:
         """
         Retrieves a list of the current roommembers who are local to the server.
         """
@@ -791,7 +790,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         """Returns the set of users who share a room with `user_id`"""
         room_ids = await self.get_rooms_for_user(user_id)
 
-        user_who_share_room = set()
+        user_who_share_room: Set[str] = set()
         for room_id in room_ids:
             user_ids = await self.get_users_in_room(room_id)
             user_who_share_room.update(user_ids)
@@ -953,7 +952,7 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         return True
 
     @cached(iterable=True, max_entries=10000)
-    async def get_current_hosts_in_room(self, room_id: str) -> Set[str]:
+    async def get_current_hosts_in_room(self, room_id: str) -> AbstractSet[str]:
         """Get current hosts in room based on current state."""
 
         # First we check if we already have `get_users_in_room` in the cache, as
diff --git a/synapse/storage/databases/main/signatures.py b/synapse/storage/databases/main/signatures.py
index 05da15074a..5dcb1fc0b5 100644
--- a/synapse/storage/databases/main/signatures.py
+++ b/synapse/storage/databases/main/signatures.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Collection, Dict, List, Tuple
+from typing import Collection, Dict, List, Mapping, Tuple
 
 from unpaddedbase64 import encode_base64
 
@@ -26,7 +26,7 @@ from synapse.util.caches.descriptors import cached, cachedList
 
 class SignatureWorkerStore(EventsWorkerStore):
     @cached()
-    def get_event_reference_hash(self, event_id: str) -> Dict[str, Dict[str, bytes]]:
+    def get_event_reference_hash(self, event_id: str) -> Mapping[str, bytes]:
         # This is a dummy function to allow get_event_reference_hashes
         # to use its cache
         raise NotImplementedError()
@@ -36,7 +36,7 @@ class SignatureWorkerStore(EventsWorkerStore):
     )
     async def get_event_reference_hashes(
         self, event_ids: Collection[str]
-    ) -> Dict[str, Dict[str, bytes]]:
+    ) -> Mapping[str, Mapping[str, bytes]]:
         """Get all hashes for given events.
 
         Args:
diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
index d5500cdd47..c149a9eacb 100644
--- a/synapse/storage/databases/main/tags.py
+++ b/synapse/storage/databases/main/tags.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 import logging
-from typing import Any, Dict, Iterable, List, Tuple, cast
+from typing import Any, Dict, Iterable, List, Mapping, Tuple, cast
 
 from synapse.api.constants import AccountDataTypes
 from synapse.replication.tcp.streams import AccountDataStream
@@ -32,7 +32,9 @@ logger = logging.getLogger(__name__)
 
 class TagsWorkerStore(AccountDataWorkerStore):
     @cached()
-    async def get_tags_for_user(self, user_id: str) -> Dict[str, Dict[str, JsonDict]]:
+    async def get_tags_for_user(
+        self, user_id: str
+    ) -> Mapping[str, Mapping[str, JsonDict]]:
         """Get all the tags for a user.
 
 
@@ -107,7 +109,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
 
     async def get_updated_tags(
         self, user_id: str, stream_id: int
-    ) -> Dict[str, Dict[str, JsonDict]]:
+    ) -> Mapping[str, Mapping[str, JsonDict]]:
         """Get all the tags for the rooms where the tags have changed since the
         given version
 
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 14ef5b040d..f6a6fd4079 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -16,9 +16,9 @@ import logging
 import re
 from typing import (
     TYPE_CHECKING,
-    Dict,
     Iterable,
     List,
+    Mapping,
     Optional,
     Sequence,
     Set,
@@ -586,7 +586,7 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
         )
 
     @cached()
-    async def get_user_in_directory(self, user_id: str) -> Optional[Dict[str, str]]:
+    async def get_user_in_directory(self, user_id: str) -> Optional[Mapping[str, str]]:
         return await self.db_pool.simple_select_one(
             table="user_directory",
             keyvalues={"user_id": user_id},
diff --git a/tests/rest/admin/test_server_notice.py b/tests/rest/admin/test_server_notice.py
index a2f347f666..f71ff46d87 100644
--- a/tests/rest/admin/test_server_notice.py
+++ b/tests/rest/admin/test_server_notice.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List
+from typing import List, Sequence
 
 from twisted.test.proto_helpers import MemoryReactor
 
@@ -558,7 +558,7 @@ class ServerNoticeTestCase(unittest.HomeserverTestCase):
 
     def _check_invite_and_join_status(
         self, user_id: str, expected_invites: int, expected_memberships: int
-    ) -> List[RoomsForUser]:
+    ) -> Sequence[RoomsForUser]:
         """Check invite and room membership status of a user.
 
         Args
-- 
cgit 1.5.1


From 6cddf24e361fe43f086307c833cd814dc03363b6 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Sat, 11 Feb 2023 00:31:05 +0100
Subject: Faster joins: don't stall when a user joins during a fast join
 (#14606)

Fixes #12801.
Complement tests are at
https://github.com/matrix-org/complement/pull/567.

Avoid blocking on full state when handling a subsequent join into a
partial state room.

Also always perform a remote join into partial state rooms, since we do
not know whether the joining user has been banned and want to avoid
leaking history to banned users.

Signed-off-by: Mathieu Velten <mathieuv@matrix.org>
Co-authored-by: Sean Quah <seanq@matrix.org>
Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/14606.misc                   |   1 +
 synapse/api/errors.py                    |  22 ++++++
 synapse/federation/federation_server.py  |   2 +-
 synapse/handlers/event_auth.py           |  16 ++---
 synapse/handlers/federation.py           |   2 +-
 synapse/handlers/federation_event.py     |  59 ++++++++++++++--
 synapse/handlers/message.py              |   2 +-
 synapse/handlers/room.py                 |   2 +-
 synapse/handlers/room_member.py          | 118 ++++++++++++++++++++++---------
 synapse/handlers/room_member_worker.py   |   5 +-
 synapse/storage/databases/main/events.py |  21 +-----
 tests/handlers/test_federation.py        |  40 +++++------
 12 files changed, 196 insertions(+), 94 deletions(-)
 create mode 100644 changelog.d/14606.misc

(limited to 'synapse')

diff --git a/changelog.d/14606.misc b/changelog.d/14606.misc
new file mode 100644
index 0000000000..e2debc96d8
--- /dev/null
+++ b/changelog.d/14606.misc
@@ -0,0 +1 @@
+Faster joins: don't stall when another user joins during a fast join resync.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index c2c177fd71..9235ce6536 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -751,3 +751,25 @@ class ModuleFailedException(Exception):
     Raised when a module API callback fails, for example because it raised an
     exception.
     """
+
+
+class PartialStateConflictError(SynapseError):
+    """An internal error raised when attempting to persist an event with partial state
+    after the room containing the event has been un-partial stated.
+
+    This error should be handled by recomputing the event context and trying again.
+
+    This error has an HTTP status code so that it can be transported over replication.
+    It should not be exposed to clients.
+    """
+
+    @staticmethod
+    def message() -> str:
+        return "Cannot persist partial state event in un-partial stated room"
+
+    def __init__(self) -> None:
+        super().__init__(
+            HTTPStatus.CONFLICT,
+            msg=PartialStateConflictError.message(),
+            errcode=Codes.UNKNOWN,
+        )
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 6addc0bb65..6d99845de5 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -48,6 +48,7 @@ from synapse.api.errors import (
     FederationError,
     IncompatibleRoomVersionError,
     NotFoundError,
+    PartialStateConflictError,
     SynapseError,
     UnsupportedRoomVersionError,
 )
@@ -81,7 +82,6 @@ from synapse.replication.http.federation import (
     ReplicationFederationSendEduRestServlet,
     ReplicationGetQueryRestServlet,
 )
-from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.lock import Lock
 from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
 from synapse.storage.roommember import MemberSummary
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index a23a8ce2a1..46dd63c3f0 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -202,7 +202,7 @@ class EventAuthHandler:
         state_ids: StateMap[str],
         room_version: RoomVersion,
         user_id: str,
-        prev_member_event: Optional[EventBase],
+        prev_membership: Optional[str],
     ) -> None:
         """
         Check whether a user can join a room without an invite due to restricted join rules.
@@ -214,15 +214,14 @@ class EventAuthHandler:
             state_ids: The state of the room as it currently is.
             room_version: The room version of the room being joined.
             user_id: The user joining the room.
-            prev_member_event: The current membership event for this user.
+            prev_membership: The current membership state for this user. `None` if the
+                user has never joined the room (equivalent to "leave").
 
         Raises:
             AuthError if the user cannot join the room.
         """
         # If the member is invited or currently joined, then nothing to do.
-        if prev_member_event and (
-            prev_member_event.membership in (Membership.JOIN, Membership.INVITE)
-        ):
+        if prev_membership in (Membership.JOIN, Membership.INVITE):
             return
 
         # This is not a room with a restricted join rule, so we don't need to do the
@@ -255,13 +254,14 @@ class EventAuthHandler:
             )
 
     async def has_restricted_join_rules(
-        self, state_ids: StateMap[str], room_version: RoomVersion
+        self, partial_state_ids: StateMap[str], room_version: RoomVersion
     ) -> bool:
         """
         Return if the room has the proper join rules set for access via rooms.
 
         Args:
-            state_ids: The state of the room as it currently is.
+            state_ids: The state of the room as it currently is. May be full or partial
+                state.
             room_version: The room version of the room to query.
 
         Returns:
@@ -272,7 +272,7 @@ class EventAuthHandler:
             return False
 
         # If there's no join rule, then it defaults to invite (so this doesn't apply).
-        join_rules_event_id = state_ids.get((EventTypes.JoinRules, ""), None)
+        join_rules_event_id = partial_state_ids.get((EventTypes.JoinRules, ""), None)
         if not join_rules_event_id:
             return False
 
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 43ed4a3dd1..08727e4857 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -49,6 +49,7 @@ from synapse.api.errors import (
     FederationPullAttemptBackoffError,
     HttpResponseException,
     NotFoundError,
+    PartialStateConflictError,
     RequestSendFailed,
     SynapseError,
 )
@@ -68,7 +69,6 @@ from synapse.replication.http.federation import (
     ReplicationCleanRoomRestServlet,
     ReplicationStoreRoomOnOutlierMembershipRestServlet,
 )
-from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.types import JsonDict, StrCollection, get_domain_from_id
 from synapse.types.state import StateFilter
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 3561f2f1de..b7136f8d1c 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -47,6 +47,7 @@ from synapse.api.errors import (
     FederationError,
     FederationPullAttemptBackoffError,
     HttpResponseException,
+    PartialStateConflictError,
     RequestSendFailed,
     SynapseError,
 )
@@ -74,7 +75,6 @@ from synapse.replication.http.federation import (
     ReplicationFederationSendEventsRestServlet,
 )
 from synapse.state import StateResolutionStore
-from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.types import (
     PersistedEventPosition,
@@ -441,16 +441,17 @@ class FederationEventHandler:
         # Check if the user is already in the room or invited to the room.
         user_id = event.state_key
         prev_member_event_id = prev_state_ids.get((EventTypes.Member, user_id), None)
-        prev_member_event = None
+        prev_membership = None
         if prev_member_event_id:
             prev_member_event = await self._store.get_event(prev_member_event_id)
+            prev_membership = prev_member_event.membership
 
         # Check if the member should be allowed access via membership in a space.
         await self._event_auth_handler.check_restricted_join_rules(
             prev_state_ids,
             event.room_version,
             user_id,
-            prev_member_event,
+            prev_membership,
         )
 
     @trace
@@ -526,11 +527,57 @@ class FederationEventHandler:
             "Peristing join-via-remote %s (partial_state: %s)", event, partial_state
         )
         with nested_logging_context(suffix=event.event_id):
+            if partial_state:
+                # When handling a second partial state join into a partial state room,
+                # the returned state will exclude the membership from the first join. To
+                # preserve prior memberships, we try to compute the partial state before
+                # the event ourselves if we know about any of the prev events.
+                #
+                # When we don't know about any of the prev events, it's fine to just use
+                # the returned state, since the new join will create a new forward
+                # extremity, and leave the forward extremity containing our prior
+                # memberships alone.
+                prev_event_ids = set(event.prev_event_ids())
+                seen_event_ids = await self._store.have_events_in_timeline(
+                    prev_event_ids
+                )
+                missing_event_ids = prev_event_ids - seen_event_ids
+
+                state_maps_to_resolve: List[StateMap[str]] = []
+
+                # Fetch the state after the prev events that we know about.
+                state_maps_to_resolve.extend(
+                    (
+                        await self._state_storage_controller.get_state_groups_ids(
+                            room_id, seen_event_ids, await_full_state=False
+                        )
+                    ).values()
+                )
+
+                # When there are prev events we do not have the state for, we state
+                # resolve with the state returned by the remote homeserver.
+                if missing_event_ids or len(state_maps_to_resolve) == 0:
+                    state_maps_to_resolve.append(
+                        {(e.type, e.state_key): e.event_id for e in state}
+                    )
+
+                state_ids_before_event = (
+                    await self._state_resolution_handler.resolve_events_with_store(
+                        event.room_id,
+                        room_version.identifier,
+                        state_maps_to_resolve,
+                        event_map=None,
+                        state_res_store=StateResolutionStore(self._store),
+                    )
+                )
+            else:
+                state_ids_before_event = {
+                    (e.type, e.state_key): e.event_id for e in state
+                }
+
             context = await self._state_handler.compute_event_context(
                 event,
-                state_ids_before_event={
-                    (e.type, e.state_key): e.event_id for e in state
-                },
+                state_ids_before_event=state_ids_before_event,
                 partial_state=partial_state,
             )
 
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 3e30f52e4d..8f5b658d9d 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -38,6 +38,7 @@ from synapse.api.errors import (
     Codes,
     ConsentNotGivenError,
     NotFoundError,
+    PartialStateConflictError,
     ShadowBanError,
     SynapseError,
     UnstableSpecAuthError,
@@ -57,7 +58,6 @@ from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.send_event import ReplicationSendEventRestServlet
 from synapse.replication.http.send_events import ReplicationSendEventsRestServlet
-from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.types import (
     MutableStateMap,
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 060bbcb181..837dabb3b7 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -43,6 +43,7 @@ from synapse.api.errors import (
     Codes,
     LimitExceededError,
     NotFoundError,
+    PartialStateConflictError,
     StoreError,
     SynapseError,
 )
@@ -54,7 +55,6 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents
 from synapse.handlers.relations import BundledAggregations
 from synapse.module_api import NOT_SPAM
 from synapse.rest.admin._base import assert_user_is_admin
-from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.streams import EventSource
 from synapse.types import (
     JsonDict,
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 6e7141d2ef..a965c7ec76 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -26,7 +26,13 @@ from synapse.api.constants import (
     GuestAccess,
     Membership,
 )
-from synapse.api.errors import AuthError, Codes, ShadowBanError, SynapseError
+from synapse.api.errors import (
+    AuthError,
+    Codes,
+    PartialStateConflictError,
+    ShadowBanError,
+    SynapseError,
+)
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.event_auth import get_named_level, get_power_level_event
 from synapse.events import EventBase
@@ -34,7 +40,6 @@ from synapse.events.snapshot import EventContext
 from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN
 from synapse.logging import opentracing
 from synapse.module_api import NOT_SPAM
-from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.types import (
     JsonDict,
     Requester,
@@ -56,6 +61,13 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+class NoKnownServersError(SynapseError):
+    """No server already resident to the room was provided to the join/knock operation."""
+
+    def __init__(self, msg: str = "No known servers"):
+        super().__init__(404, msg)
+
+
 class RoomMemberHandler(metaclass=abc.ABCMeta):
     # TODO(paul): This handler currently contains a messy conflation of
     #   low-level API that works on UserID objects and so on, and REST-level
@@ -185,6 +197,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             room_id: Room that we are trying to join
             user: User who is trying to join
             content: A dict that should be used as the content of the join event.
+
+        Raises:
+            NoKnownServersError: if remote_room_hosts does not contain a server joined to
+                the room.
         """
         raise NotImplementedError()
 
@@ -823,14 +839,19 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
         latest_event_ids = await self.store.get_prev_events_for_room(room_id)
 
-        state_before_join = await self.state_handler.compute_state_after_events(
-            room_id, latest_event_ids
+        is_partial_state_room = await self.store.is_partial_state_room(room_id)
+        partial_state_before_join = await self.state_handler.compute_state_after_events(
+            room_id, latest_event_ids, await_full_state=False
         )
+        # `is_partial_state_room` also indicates whether `partial_state_before_join` is
+        # partial.
 
         # TODO: Refactor into dictionary of explicitly allowed transitions
         # between old and new state, with specific error messages for some
         # transitions and generic otherwise
-        old_state_id = state_before_join.get((EventTypes.Member, target.to_string()))
+        old_state_id = partial_state_before_join.get(
+            (EventTypes.Member, target.to_string())
+        )
         if old_state_id:
             old_state = await self.store.get_event(old_state_id, allow_none=True)
             old_membership = old_state.content.get("membership") if old_state else None
@@ -881,11 +902,11 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             if action == "kick":
                 raise AuthError(403, "The target user is not in the room")
 
-        is_host_in_room = await self._is_host_in_room(state_before_join)
+        is_host_in_room = await self._is_host_in_room(partial_state_before_join)
 
         if effective_membership_state == Membership.JOIN:
             if requester.is_guest:
-                guest_can_join = await self._can_guest_join(state_before_join)
+                guest_can_join = await self._can_guest_join(partial_state_before_join)
                 if not guest_can_join:
                     # This should be an auth check, but guests are a local concept,
                     # so don't really fit into the general auth process.
@@ -927,8 +948,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 room_id,
                 remote_room_hosts,
                 content,
+                is_partial_state_room,
                 is_host_in_room,
-                state_before_join,
+                partial_state_before_join,
             )
             if remote_join:
                 if ratelimit:
@@ -1073,8 +1095,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         room_id: str,
         remote_room_hosts: List[str],
         content: JsonDict,
+        is_partial_state_room: bool,
         is_host_in_room: bool,
-        state_before_join: StateMap[str],
+        partial_state_before_join: StateMap[str],
     ) -> Tuple[bool, List[str]]:
         """
         Check whether the server should do a remote join (as opposed to a local
@@ -1093,9 +1116,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             remote_room_hosts: A list of remote room hosts.
             content: The content to use as the event body of the join. This may
                 be modified.
-            is_host_in_room: True if the host is in the room.
-            state_before_join: The state before the join event (i.e. the resolution of
-                the states after its parent events).
+            is_partial_state_room: `True` if the server currently doesn't hold the full
+                state of the room.
+            is_host_in_room: `True` if the host is in the room.
+            partial_state_before_join: The state before the join event (i.e. the
+                resolution of the states after its parent events). May be full or
+                partial state, depending on `is_partial_state_room`.
 
         Returns:
             A tuple of:
@@ -1109,6 +1135,23 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         if not is_host_in_room:
             return True, remote_room_hosts
 
+        prev_member_event_id = partial_state_before_join.get(
+            (EventTypes.Member, user_id), None
+        )
+        previous_membership = None
+        if prev_member_event_id:
+            prev_member_event = await self.store.get_event(prev_member_event_id)
+            previous_membership = prev_member_event.membership
+
+        # If we are not fully joined yet, and the target is not already in the room,
+        # let's do a remote join so another server with the full state can validate
+        # that the user has not been banned for example.
+        # We could just accept the join and wait for state res to resolve that later on
+        # but we would then leak room history to this person until then, which is pretty
+        # bad.
+        if is_partial_state_room and previous_membership != Membership.JOIN:
+            return True, remote_room_hosts
+
         # If the host is in the room, but not one of the authorised hosts
         # for restricted join rules, a remote join must be used.
         room_version = await self.store.get_room_version(room_id)
@@ -1116,21 +1159,19 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # If restricted join rules are not being used, a local join can always
         # be used.
         if not await self.event_auth_handler.has_restricted_join_rules(
-            state_before_join, room_version
+            partial_state_before_join, room_version
         ):
             return False, []
 
         # If the user is invited to the room or already joined, the join
         # event can always be issued locally.
-        prev_member_event_id = state_before_join.get((EventTypes.Member, user_id), None)
-        prev_member_event = None
-        if prev_member_event_id:
-            prev_member_event = await self.store.get_event(prev_member_event_id)
-            if prev_member_event.membership in (
-                Membership.JOIN,
-                Membership.INVITE,
-            ):
-                return False, []
+        if previous_membership in (Membership.JOIN, Membership.INVITE):
+            return False, []
+
+        # All the partial state cases are covered above. We have been given the full
+        # state of the room.
+        assert not is_partial_state_room
+        state_before_join = partial_state_before_join
 
         # If the local host has a user who can issue invites, then a local
         # join can be done.
@@ -1154,7 +1195,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
         # Ensure the member should be allowed access via membership in a room.
         await self.event_auth_handler.check_restricted_join_rules(
-            state_before_join, room_version, user_id, prev_member_event
+            state_before_join, room_version, user_id, previous_membership
         )
 
         # If this is going to be a local join, additional information must
@@ -1304,11 +1345,17 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 if prev_member_event.membership == Membership.JOIN:
                     await self._user_left_room(target_user, room_id)
 
-    async def _can_guest_join(self, current_state_ids: StateMap[str]) -> bool:
+    async def _can_guest_join(self, partial_current_state_ids: StateMap[str]) -> bool:
         """
         Returns whether a guest can join a room based on its current state.
+
+        Args:
+            partial_current_state_ids: The current state of the room. May be full or
+                partial state.
         """
-        guest_access_id = current_state_ids.get((EventTypes.GuestAccess, ""), None)
+        guest_access_id = partial_current_state_ids.get(
+            (EventTypes.GuestAccess, ""), None
+        )
         if not guest_access_id:
             return False
 
@@ -1634,19 +1681,25 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         )
         return event, stream_id
 
-    async def _is_host_in_room(self, current_state_ids: StateMap[str]) -> bool:
+    async def _is_host_in_room(self, partial_current_state_ids: StateMap[str]) -> bool:
+        """Returns whether the homeserver is in the room based on its current state.
+
+        Args:
+            partial_current_state_ids: The current state of the room. May be full or
+                partial state.
+        """
         # Have we just created the room, and is this about to be the very
         # first member event?
-        create_event_id = current_state_ids.get(("m.room.create", ""))
-        if len(current_state_ids) == 1 and create_event_id:
+        create_event_id = partial_current_state_ids.get(("m.room.create", ""))
+        if len(partial_current_state_ids) == 1 and create_event_id:
             # We can only get here if we're in the process of creating the room
             return True
 
-        for etype, state_key in current_state_ids:
+        for etype, state_key in partial_current_state_ids:
             if etype != EventTypes.Member or not self.hs.is_mine_id(state_key):
                 continue
 
-            event_id = current_state_ids[(etype, state_key)]
+            event_id = partial_current_state_ids[(etype, state_key)]
             event = await self.store.get_event(event_id, allow_none=True)
             if not event:
                 continue
@@ -1715,8 +1768,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         ]
 
         if len(remote_room_hosts) == 0:
-            raise SynapseError(
-                404,
+            raise NoKnownServersError(
                 "Can't join remote room because no servers "
                 "that are in the room have been provided.",
             )
@@ -1947,7 +1999,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         ]
 
         if len(remote_room_hosts) == 0:
-            raise SynapseError(404, "No known servers")
+            raise NoKnownServersError()
 
         return await self.federation_handler.do_knock(
             remote_room_hosts, room_id, user.to_string(), content=content
diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py
index 221552a2a6..ba261702d4 100644
--- a/synapse/handlers/room_member_worker.py
+++ b/synapse/handlers/room_member_worker.py
@@ -15,8 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, List, Optional, Tuple
 
-from synapse.api.errors import SynapseError
-from synapse.handlers.room_member import RoomMemberHandler
+from synapse.handlers.room_member import NoKnownServersError, RoomMemberHandler
 from synapse.replication.http.membership import (
     ReplicationRemoteJoinRestServlet as ReplRemoteJoin,
     ReplicationRemoteKnockRestServlet as ReplRemoteKnock,
@@ -52,7 +51,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler):
     ) -> Tuple[str, int]:
         """Implements RoomMemberHandler._remote_join"""
         if len(remote_room_hosts) == 0:
-            raise SynapseError(404, "No known servers")
+            raise NoKnownServersError()
 
         ret = await self._remote_join_client(
             requester=requester,
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index cb66376fb4..ffe766fd56 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -16,7 +16,6 @@
 import itertools
 import logging
 from collections import OrderedDict
-from http import HTTPStatus
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -36,7 +35,7 @@ from prometheus_client import Counter
 
 import synapse.metrics
 from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
-from synapse.api.errors import Codes, SynapseError
+from synapse.api.errors import PartialStateConflictError
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase, relation_from_event
 from synapse.events.snapshot import EventContext
@@ -72,24 +71,6 @@ event_counter = Counter(
 )
 
 
-class PartialStateConflictError(SynapseError):
-    """An internal error raised when attempting to persist an event with partial state
-    after the room containing the event has been un-partial stated.
-
-    This error should be handled by recomputing the event context and trying again.
-
-    This error has an HTTP status code so that it can be transported over replication.
-    It should not be exposed to clients.
-    """
-
-    def __init__(self) -> None:
-        super().__init__(
-            HTTPStatus.CONFLICT,
-            msg="Cannot persist partial state event in un-partial stated room",
-            errcode=Codes.UNKNOWN,
-        )
-
-
 @attr.s(slots=True, auto_attribs=True)
 class DeltaState:
     """Deltas to use to update the `current_state_events` table.
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index 57675fa407..5868eb2da7 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -575,26 +575,6 @@ class PartialJoinTestCase(unittest.FederatingHomeserverTestCase):
         fed_client = fed_handler.federation_client
 
         room_id = "!room:example.com"
-        membership_event = make_event_from_dict(
-            {
-                "room_id": room_id,
-                "type": "m.room.member",
-                "sender": "@alice:test",
-                "state_key": "@alice:test",
-                "content": {"membership": "join"},
-            },
-            RoomVersions.V10,
-        )
-
-        mock_make_membership_event = Mock(
-            return_value=make_awaitable(
-                (
-                    "example.com",
-                    membership_event,
-                    RoomVersions.V10,
-                )
-            )
-        )
 
         EVENT_CREATE = make_event_from_dict(
             {
@@ -640,6 +620,26 @@ class PartialJoinTestCase(unittest.FederatingHomeserverTestCase):
             },
             room_version=RoomVersions.V10,
         )
+        membership_event = make_event_from_dict(
+            {
+                "room_id": room_id,
+                "type": "m.room.member",
+                "sender": "@alice:test",
+                "state_key": "@alice:test",
+                "content": {"membership": "join"},
+                "prev_events": [EVENT_INVITATION_MEMBERSHIP.event_id],
+            },
+            RoomVersions.V10,
+        )
+        mock_make_membership_event = Mock(
+            return_value=make_awaitable(
+                (
+                    "example.com",
+                    membership_event,
+                    RoomVersions.V10,
+                )
+            )
+        )
         mock_send_join = Mock(
             return_value=make_awaitable(
                 SendJoinResult(
-- 
cgit 1.5.1


From c10e13125057e506381d1be8c2ec1394eee45d62 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 13 Feb 2023 11:49:20 +0000
Subject: Apply logging from hotfixes branch to develop (#15054)

* Apply logging from hotfixes branch to develop

Part of #4826.

Originally added in #11882.

* Changelog
---
 changelog.d/15054.misc         | 1 +
 synapse/rest/client/account.py | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 changelog.d/15054.misc

(limited to 'synapse')

diff --git a/changelog.d/15054.misc b/changelog.d/15054.misc
new file mode 100644
index 0000000000..d800b107cf
--- /dev/null
+++ b/changelog.d/15054.misc
@@ -0,0 +1 @@
+Merge debug logging from the hotfixes branch.
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 4373c73662..232f3a976d 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -415,6 +415,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
             request, MsisdnRequestTokenBody
         )
         msisdn = phone_number_to_msisdn(body.country, body.phone_number)
+        logger.info("Request #%s to verify ownership of %s", body.send_attempt, msisdn)
 
         if not await check_3pid_allowed(self.hs, "msisdn", msisdn):
             raise SynapseError(
@@ -444,6 +445,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
                 await self.hs.get_clock().sleep(random.randint(1, 10) / 10)
                 return 200, {"sid": random_string(16)}
 
+            logger.info("MSISDN %s is already in use by %s", msisdn, existing_user_id)
             raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE)
 
         if not self.hs.config.registration.account_threepid_delegate_msisdn:
@@ -468,6 +470,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet):
         threepid_send_requests.labels(type="msisdn", reason="add_threepid").observe(
             body.send_attempt
         )
+        logger.info("MSISDN %s: got response from identity server: %s", msisdn, ret)
 
         return 200, ret
 
-- 
cgit 1.5.1


From bdccfd24773d7482ae497263634312640dab01d1 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 13 Feb 2023 12:12:48 +0000
Subject: Refactor arguments of `try_unbind_threepid(_with_id_server)` from
 dict to separate args (#15053)

---
 changelog.d/15053.misc                 |  1 +
 synapse/handlers/auth.py               |  5 ++--
 synapse/handlers/deactivate_account.py |  7 +----
 synapse/handlers/identity.py           | 47 +++++++++++++++++-----------------
 synapse/rest/client/account.py         |  7 +----
 5 files changed, 28 insertions(+), 39 deletions(-)
 create mode 100644 changelog.d/15053.misc

(limited to 'synapse')

diff --git a/changelog.d/15053.misc b/changelog.d/15053.misc
new file mode 100644
index 0000000000..c27528f5c6
--- /dev/null
+++ b/changelog.d/15053.misc
@@ -0,0 +1 @@
+Refactor arguments of `try_unbind_threepid` and `_try_unbind_threepid_with_id_server` to not use dictionaries.
\ No newline at end of file
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 30f2d46c3c..57a6854b1e 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -1593,9 +1593,8 @@ class AuthHandler:
         if medium == "email":
             address = canonicalise_email(address)
 
-        identity_handler = self.hs.get_identity_handler()
-        result = await identity_handler.try_unbind_threepid(
-            user_id, {"medium": medium, "address": address, "id_server": id_server}
+        result = await self.hs.get_identity_handler().try_unbind_threepid(
+            user_id, medium, address, id_server
         )
 
         await self.store.user_delete_threepid(user_id, medium, address)
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index d74d135c0c..d24f649382 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -106,12 +106,7 @@ class DeactivateAccountHandler:
         for threepid in threepids:
             try:
                 result = await self._identity_handler.try_unbind_threepid(
-                    user_id,
-                    {
-                        "medium": threepid["medium"],
-                        "address": threepid["address"],
-                        "id_server": id_server,
-                    },
+                    user_id, threepid["medium"], threepid["address"], id_server
                 )
                 identity_server_supports_unbinding &= result
             except Exception:
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 848e46eb9b..bf0f7acf80 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -219,28 +219,31 @@ class IdentityHandler:
             data = json_decoder.decode(e.msg)  # XXX WAT?
             return data
 
-    async def try_unbind_threepid(self, mxid: str, threepid: dict) -> bool:
-        """Attempt to remove a 3PID from an identity server, or if one is not provided, all
-        identity servers we're aware the binding is present on
+    async def try_unbind_threepid(
+        self, mxid: str, medium: str, address: str, id_server: Optional[str]
+    ) -> bool:
+        """Attempt to remove a 3PID from one or more identity servers.
 
         Args:
             mxid: Matrix user ID of binding to be removed
-            threepid: Dict with medium & address of binding to be
-                removed, and an optional id_server.
+            medium: The medium of the third-party ID.
+            address: The address of the third-party ID.
+            id_server: An identity server to attempt to unbind from. If None,
+                attempt to remove the association from all identity servers
+                known to potentially have it.
 
         Raises:
-            SynapseError: If we failed to contact the identity server
+            SynapseError: If we failed to contact one or more identity servers.
 
         Returns:
-            True on success, otherwise False if the identity
-            server doesn't support unbinding (or no identity server found to
-            contact).
+            True on success, otherwise False if the identity server doesn't
+            support unbinding (or no identity server to contact was found).
         """
-        if threepid.get("id_server"):
-            id_servers = [threepid["id_server"]]
+        if id_server:
+            id_servers = [id_server]
         else:
             id_servers = await self.store.get_id_servers_user_bound(
-                user_id=mxid, medium=threepid["medium"], address=threepid["address"]
+                mxid, medium, address
             )
 
         # We don't know where to unbind, so we don't have a choice but to return
@@ -249,20 +252,21 @@ class IdentityHandler:
 
         changed = True
         for id_server in id_servers:
-            changed &= await self.try_unbind_threepid_with_id_server(
-                mxid, threepid, id_server
+            changed &= await self._try_unbind_threepid_with_id_server(
+                mxid, medium, address, id_server
             )
 
         return changed
 
-    async def try_unbind_threepid_with_id_server(
-        self, mxid: str, threepid: dict, id_server: str
+    async def _try_unbind_threepid_with_id_server(
+        self, mxid: str, medium: str, address: str, id_server: str
     ) -> bool:
         """Removes a binding from an identity server
 
         Args:
             mxid: Matrix user ID of binding to be removed
-            threepid: Dict with medium & address of binding to be removed
+            medium: The medium of the third-party ID
+            address: The address of the third-party ID
             id_server: Identity server to unbind from
 
         Raises:
@@ -286,7 +290,7 @@ class IdentityHandler:
 
         content = {
             "mxid": mxid,
-            "threepid": {"medium": threepid["medium"], "address": threepid["address"]},
+            "threepid": {"medium": medium, "address": address},
         }
 
         # we abuse the federation http client to sign the request, but we have to send it
@@ -319,12 +323,7 @@ class IdentityHandler:
         except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
 
-        await self.store.remove_user_bound_threepid(
-            user_id=mxid,
-            medium=threepid["medium"],
-            address=threepid["address"],
-            id_server=id_server,
-        )
+        await self.store.remove_user_bound_threepid(mxid, medium, address, id_server)
 
         return changed
 
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 232f3a976d..662f5bf762 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -737,12 +737,7 @@ class ThreepidUnbindRestServlet(RestServlet):
         # Attempt to unbind the threepid from an identity server. If id_server is None, try to
         # unbind from all identity servers this threepid has been added to in the past
         result = await self.identity_handler.try_unbind_threepid(
-            requester.user.to_string(),
-            {
-                "address": body.address,
-                "medium": body.medium,
-                "id_server": body.id_server,
-            },
+            requester.user.to_string(), body.medium, body.address, body.id_server
         )
         return 200, {"id_server_unbind_result": "success" if result else "no-support"}
 
-- 
cgit 1.5.1


From 3d7aead5d62e6da97e006199b3f957325e54b053 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 13 Feb 2023 16:30:58 +0000
Subject: Tweak comment on `_is_local_room_accessible` as part of room
 visibility in `/hierarchy` to clarify the condition for a room being visible.
 (#14834)

---
 changelog.d/14834.misc           | 1 +
 synapse/handlers/room_summary.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/14834.misc

(limited to 'synapse')

diff --git a/changelog.d/14834.misc b/changelog.d/14834.misc
new file mode 100644
index 0000000000..e683212dc4
--- /dev/null
+++ b/changelog.d/14834.misc
@@ -0,0 +1 @@
+Tweak comment on `_is_local_room_accessible` as part of room visibility in `/hierarchy` to clarify the condition for a room being visible.
\ No newline at end of file
diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py
index 4472019fbc..807245160d 100644
--- a/synapse/handlers/room_summary.py
+++ b/synapse/handlers/room_summary.py
@@ -521,8 +521,8 @@ class RoomSummaryHandler:
 
         It should return true if:
 
-        * The requester is joined or can join the room (per MSC3173).
-        * The origin server has any user that is joined or can join the room.
+        * The requesting user is joined or can join the room (per MSC3173); or
+        * The origin server has any user that is joined or can join the room; or
         * The history visibility is set to world readable.
 
         Args:
-- 
cgit 1.5.1


From db2b105d69fa331bb3f050df82266314f61577ea Mon Sep 17 00:00:00 2001
From: Harishankar Kumar <31770598+hari01584@users.noreply.github.com>
Date: Tue, 14 Feb 2023 15:07:08 +0530
Subject: Change collection[str] to StrCollection in event_auth code (#14929)

Signed-off-by: Harishankar Kumar <hari01584@gmail.com>
---
 changelog.d/14929.misc                             |  1 +
 synapse/event_auth.py                              | 23 +++++++++-------------
 synapse/events/__init__.py                         |  6 +++---
 synapse/storage/databases/main/events.py           |  7 +++----
 .../storage/databases/main/events_bg_updates.py    |  6 +++---
 5 files changed, 19 insertions(+), 24 deletions(-)
 create mode 100644 changelog.d/14929.misc

(limited to 'synapse')

diff --git a/changelog.d/14929.misc b/changelog.d/14929.misc
new file mode 100644
index 0000000000..2cc3614dfd
--- /dev/null
+++ b/changelog.d/14929.misc
@@ -0,0 +1 @@
+Use `StrCollection` to avoid potential bugs with `Collection[str]`.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index e0be9f88cc..4d6d1b8ebd 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -16,18 +16,7 @@
 import collections.abc
 import logging
 import typing
-from typing import (
-    Any,
-    Collection,
-    Dict,
-    Iterable,
-    List,
-    Mapping,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-)
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Set, Tuple, Union
 
 from canonicaljson import encode_canonical_json
 from signedjson.key import decode_verify_key_bytes
@@ -56,7 +45,13 @@ from synapse.api.room_versions import (
     RoomVersions,
 )
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.types import MutableStateMap, StateMap, UserID, get_domain_from_id
+from synapse.types import (
+    MutableStateMap,
+    StateMap,
+    StrCollection,
+    UserID,
+    get_domain_from_id,
+)
 
 if typing.TYPE_CHECKING:
     # conditional imports to avoid import cycle
@@ -69,7 +64,7 @@ logger = logging.getLogger(__name__)
 class _EventSourceStore(Protocol):
     async def get_events(
         self,
-        event_ids: Collection[str],
+        event_ids: StrCollection,
         redact_behaviour: EventRedactBehaviour,
         get_prev_content: bool = False,
         allow_rejected: bool = False,
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 8aca9a3ab9..91118a8d84 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -39,7 +39,7 @@ from unpaddedbase64 import encode_base64
 
 from synapse.api.constants import RelationTypes
 from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions
-from synapse.types import JsonDict, RoomStreamToken
+from synapse.types import JsonDict, RoomStreamToken, StrCollection
 from synapse.util.caches import intern_dict
 from synapse.util.frozenutils import freeze
 from synapse.util.stringutils import strtobool
@@ -413,7 +413,7 @@ class EventBase(metaclass=abc.ABCMeta):
         """
         return [e for e, _ in self._dict["prev_events"]]
 
-    def auth_event_ids(self) -> Sequence[str]:
+    def auth_event_ids(self) -> StrCollection:
         """Returns the list of auth event IDs. The order matches the order
         specified in the event, though there is no meaning to it.
 
@@ -558,7 +558,7 @@ class FrozenEventV2(EventBase):
         """
         return self._dict["prev_events"]
 
-    def auth_event_ids(self) -> Sequence[str]:
+    def auth_event_ids(self) -> StrCollection:
         """Returns the list of auth event IDs. The order matches the order
         specified in the event, though there is no meaning to it.
 
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index ffe766fd56..7996cbb557 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -25,7 +25,6 @@ from typing import (
     Iterable,
     List,
     Optional,
-    Sequence,
     Set,
     Tuple,
 )
@@ -51,7 +50,7 @@ from synapse.storage.databases.main.search import SearchEntry
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.util.id_generators import AbstractStreamIdGenerator
 from synapse.storage.util.sequence import SequenceGenerator
-from synapse.types import JsonDict, StateMap, get_domain_from_id
+from synapse.types import JsonDict, StateMap, StrCollection, get_domain_from_id
 from synapse.util import json_encoder
 from synapse.util.iterutils import batch_iter, sorted_topologically
 from synapse.util.stringutils import non_null_str_or_none
@@ -552,7 +551,7 @@ class PersistEventsStore:
         event_chain_id_gen: SequenceGenerator,
         event_to_room_id: Dict[str, str],
         event_to_types: Dict[str, Tuple[str, str]],
-        event_to_auth_chain: Dict[str, Sequence[str]],
+        event_to_auth_chain: Dict[str, StrCollection],
     ) -> None:
         """Calculate the chain cover index for the given events.
 
@@ -846,7 +845,7 @@ class PersistEventsStore:
         event_chain_id_gen: SequenceGenerator,
         event_to_room_id: Dict[str, str],
         event_to_types: Dict[str, Tuple[str, str]],
-        event_to_auth_chain: Dict[str, Sequence[str]],
+        event_to_auth_chain: Dict[str, StrCollection],
         events_to_calc_chain_id_for: Set[str],
         chain_map: Dict[str, Tuple[int, int]],
     ) -> Dict[str, Tuple[int, int]]:
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index b9d3c36d60..584536111d 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, Tuple, cast
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast
 
 import attr
 
@@ -29,7 +29,7 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.events import PersistEventsStore
 from synapse.storage.types import Cursor
-from synapse.types import JsonDict
+from synapse.types import JsonDict, StrCollection
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -1061,7 +1061,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             self.event_chain_id_gen,  # type: ignore[attr-defined]
             event_to_room_id,
             event_to_types,
-            cast(Dict[str, Sequence[str]], event_to_auth_chain),
+            cast(Dict[str, StrCollection], event_to_auth_chain),
         )
 
         return _CalculateChainCover(
-- 
cgit 1.5.1


From f09db5c9918b6aaeb1f53ab4fac3a7f05f512c5f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 14 Feb 2023 12:10:29 +0100
Subject: Skip calculating unread push actions in `/sync` when `enable_push` is
 false. (#14980)

---
 changelog.d/14980.misc                               | 1 +
 synapse/handlers/sync.py                             | 8 ++++++++
 synapse/storage/databases/main/event_push_actions.py | 7 +++++++
 3 files changed, 16 insertions(+)
 create mode 100644 changelog.d/14980.misc

(limited to 'synapse')

diff --git a/changelog.d/14980.misc b/changelog.d/14980.misc
new file mode 100644
index 0000000000..145f4a788b
--- /dev/null
+++ b/changelog.d/14980.misc
@@ -0,0 +1 @@
+Skip calculating unread push actions in /sync when enable_push is false.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4bae46158a..3a9cddf15a 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -269,6 +269,8 @@ class SyncHandler:
         self._state_storage_controller = self._storage_controllers.state
         self._device_handler = hs.get_device_handler()
 
+        self.should_calculate_push_rules = hs.config.push.enable_push
+
         # TODO: flush cache entries on subsequent sync request.
         #    Once we get the next /sync request (ie, one with the same access token
         #    that sets 'since' to 'next_batch'), we know that device won't need a
@@ -1288,6 +1290,12 @@ class SyncHandler:
     async def unread_notifs_for_room_id(
         self, room_id: str, sync_config: SyncConfig
     ) -> RoomNotifCounts:
+        if not self.should_calculate_push_rules:
+            # If push rules have been universally disabled then we know we won't
+            # have any unread counts in the DB, so we may as well skip asking
+            # the DB.
+            return RoomNotifCounts.empty()
+
         with Measure(self.clock, "unread_notifs_for_room_id"):
 
             return await self.store.get_unread_event_push_actions_by_room_for_user(
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 3a0c370fde..eeccf5db24 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -203,11 +203,18 @@ class RoomNotifCounts:
     # Map of thread ID to the notification counts.
     threads: Dict[str, NotifCounts]
 
+    @staticmethod
+    def empty() -> "RoomNotifCounts":
+        return _EMPTY_ROOM_NOTIF_COUNTS
+
     def __len__(self) -> int:
         # To properly account for the amount of space in any caches.
         return len(self.threads) + 1
 
 
+_EMPTY_ROOM_NOTIF_COUNTS = RoomNotifCounts(NotifCounts(), {})
+
+
 def _serialize_action(
     actions: Collection[Union[Mapping, str]], is_highlight: bool
 ) -> str:
-- 
cgit 1.5.1


From cb262713b701d1abcbca03334d17e2d0f81eee4a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 14 Feb 2023 12:20:25 +0100
Subject: Fix clashing DB txn name (#15070)

* Fix clashing DB txn name

* Newsfile
---
 changelog.d/15070.misc                            | 1 +
 synapse/storage/databases/main/end_to_end_keys.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15070.misc

(limited to 'synapse')

diff --git a/changelog.d/15070.misc b/changelog.d/15070.misc
new file mode 100644
index 0000000000..0f3244de9f
--- /dev/null
+++ b/changelog.d/15070.misc
@@ -0,0 +1 @@
+Fix clashing database transaction name.
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 752dc16e17..2c2d145666 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -262,7 +262,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
         for batch in batch_iter(signature_query, 50):
             cross_sigs_result = await self.db_pool.runInteraction(
-                "get_e2e_cross_signing_signatures",
+                "get_e2e_cross_signing_signatures_for_devices",
                 self._get_e2e_cross_signing_signatures_for_devices_txn,
                 batch,
             )
-- 
cgit 1.5.1


From 463c19ac3648b242c480e299349d2ef90bf38a0b Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 14 Feb 2023 12:32:19 +0000
Subject: Faster joins: Omit device list updates from partial state rooms in
 /sync (#15069)

...when lazy loading of members is not enabled. It's weird to notify
a client that another user's device list has changed when the client
doesn't think that they share a room.

Note that when a room is un-partial stated, device list updates are
emitted for every member in that room over /sync.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15069.misc   | 1 +
 synapse/handlers/sync.py | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 changelog.d/15069.misc

(limited to 'synapse')

diff --git a/changelog.d/15069.misc b/changelog.d/15069.misc
new file mode 100644
index 0000000000..e7a619ad2b
--- /dev/null
+++ b/changelog.d/15069.misc
@@ -0,0 +1 @@
+Faster joins: omit device list updates originating from partial state rooms in /sync responses without lazy loading of members enabled.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 3a9cddf15a..4e4595312c 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1399,6 +1399,11 @@ class SyncHandler:
                 for room_id, is_partial_state in results.items()
                 if is_partial_state
             )
+            membership_change_events = [
+                event
+                for event in membership_change_events
+                if not results.get(event.room_id, False)
+            ]
 
         # Incremental eager syncs should additionally include rooms that
         # - we are joined to
-- 
cgit 1.5.1


From e9b1ff9f31f8ff093e7eaf9c54fa8f40a3b66aa8 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 14 Feb 2023 15:50:59 +0000
Subject: Prevent clients from reporting nonexistent events. (#13779)

---
 changelog.d/13779.bugfix               |  1 +
 synapse/rest/client/report_event.py    | 11 ++++++++++-
 tests/rest/client/test_report_event.py | 12 ++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/13779.bugfix

(limited to 'synapse')

diff --git a/changelog.d/13779.bugfix b/changelog.d/13779.bugfix
new file mode 100644
index 0000000000..a92c722c6e
--- /dev/null
+++ b/changelog.d/13779.bugfix
@@ -0,0 +1 @@
+Prevent clients from reporting nonexistent events.
\ No newline at end of file
diff --git a/synapse/rest/client/report_event.py b/synapse/rest/client/report_event.py
index e2b410cf32..9be5860221 100644
--- a/synapse/rest/client/report_event.py
+++ b/synapse/rest/client/report_event.py
@@ -16,7 +16,7 @@ import logging
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
-from synapse.api.errors import Codes, SynapseError
+from synapse.api.errors import Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
@@ -39,6 +39,7 @@ class ReportEventRestServlet(RestServlet):
         self.auth = hs.get_auth()
         self.clock = hs.get_clock()
         self.store = hs.get_datastores().main
+        self._event_handler = self.hs.get_event_handler()
 
     async def on_POST(
         self, request: SynapseRequest, room_id: str, event_id: str
@@ -61,6 +62,14 @@ class ReportEventRestServlet(RestServlet):
                 Codes.BAD_JSON,
             )
 
+        event = await self._event_handler.get_event(
+            requester.user, room_id, event_id, show_redacted=False
+        )
+        if event is None:
+            raise NotFoundError(
+                "Unable to report event: it does not exist or you aren't able to see it."
+            )
+
         await self.store.add_event_report(
             room_id=room_id,
             event_id=event_id,
diff --git a/tests/rest/client/test_report_event.py b/tests/rest/client/test_report_event.py
index 7cb1017a4a..1250685d39 100644
--- a/tests/rest/client/test_report_event.py
+++ b/tests/rest/client/test_report_event.py
@@ -73,6 +73,18 @@ class ReportEventTestCase(unittest.HomeserverTestCase):
         data = {"reason": None, "score": None}
         self._assert_status(400, data)
 
+    def test_cannot_report_nonexistent_event(self) -> None:
+        """
+        Tests that we don't accept event reports for events which do not exist.
+        """
+        channel = self.make_request(
+            "POST",
+            f"rooms/{self.room_id}/report/$nonsenseeventid:test",
+            {"reason": "i am very sad"},
+            access_token=self.other_user_tok,
+        )
+        self.assertEqual(404, channel.code, msg=channel.result["body"])
+
     def _assert_status(self, response_status: int, data: JsonDict) -> None:
         channel = self.make_request(
             "POST", self.report_path, data, access_token=self.other_user_tok
-- 
cgit 1.5.1


From 119e0795a58548fb38fab299e7c362fcbb388d68 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 14 Feb 2023 14:02:19 -0500
Subject: Implement MSC3966: Add a push rule condition to search for a value in
 an array. (#15045)

The `exact_event_property_contains` condition can be used to
search for a value inside of an array.
---
 changelog.d/15045.feature                |  1 +
 rust/benches/evaluator.rs                | 32 +++++++++-------
 rust/src/push/evaluator.rs               | 65 +++++++++++++++++++++++++-------
 rust/src/push/mod.rs                     | 33 +++++++++++++++-
 stubs/synapse/synapse_rust/push.pyi      |  7 ++--
 synapse/config/experimental.py           |  5 +++
 synapse/push/bulk_push_rule_evaluator.py | 21 +++++++----
 synapse/types/__init__.py                |  1 +
 tests/push/test_push_rule_evaluator.py   | 53 ++++++++++++++++++++++++--
 9 files changed, 176 insertions(+), 42 deletions(-)
 create mode 100644 changelog.d/15045.feature

(limited to 'synapse')

diff --git a/changelog.d/15045.feature b/changelog.d/15045.feature
new file mode 100644
index 0000000000..87766befda
--- /dev/null
+++ b/changelog.d/15045.feature
@@ -0,0 +1 @@
+Experimental support for [MSC3966](https://github.com/matrix-org/matrix-spec-proposals/pull/3966): the `exact_event_property_contains` push rule condition.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 229553ebf8..8213dfd9ea 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -15,8 +15,8 @@
 #![feature(test)]
 use std::collections::BTreeSet;
 use synapse::push::{
-    evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules,
-    SimpleJsonValue,
+    evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, JsonValue,
+    PushRules, SimpleJsonValue,
 };
 use test::Bencher;
 
@@ -27,15 +27,15 @@ fn bench_match_exact(b: &mut Bencher) {
     let flattened_keys = [
         (
             "type".to_string(),
-            SimpleJsonValue::Str("m.text".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())),
         ),
         (
             "room_id".to_string(),
-            SimpleJsonValue::Str("!room:server".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())),
         ),
         (
             "content.body".to_string(),
-            SimpleJsonValue::Str("test message".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("test message".to_string())),
         ),
     ]
     .into_iter()
@@ -54,6 +54,7 @@ fn bench_match_exact(b: &mut Bencher) {
         vec![],
         false,
         false,
+        false,
     )
     .unwrap();
 
@@ -76,15 +77,15 @@ fn bench_match_word(b: &mut Bencher) {
     let flattened_keys = [
         (
             "type".to_string(),
-            SimpleJsonValue::Str("m.text".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())),
         ),
         (
             "room_id".to_string(),
-            SimpleJsonValue::Str("!room:server".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())),
         ),
         (
             "content.body".to_string(),
-            SimpleJsonValue::Str("test message".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("test message".to_string())),
         ),
     ]
     .into_iter()
@@ -103,6 +104,7 @@ fn bench_match_word(b: &mut Bencher) {
         vec![],
         false,
         false,
+        false,
     )
     .unwrap();
 
@@ -125,15 +127,15 @@ fn bench_match_word_miss(b: &mut Bencher) {
     let flattened_keys = [
         (
             "type".to_string(),
-            SimpleJsonValue::Str("m.text".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())),
         ),
         (
             "room_id".to_string(),
-            SimpleJsonValue::Str("!room:server".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())),
         ),
         (
             "content.body".to_string(),
-            SimpleJsonValue::Str("test message".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("test message".to_string())),
         ),
     ]
     .into_iter()
@@ -152,6 +154,7 @@ fn bench_match_word_miss(b: &mut Bencher) {
         vec![],
         false,
         false,
+        false,
     )
     .unwrap();
 
@@ -174,15 +177,15 @@ fn bench_eval_message(b: &mut Bencher) {
     let flattened_keys = [
         (
             "type".to_string(),
-            SimpleJsonValue::Str("m.text".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())),
         ),
         (
             "room_id".to_string(),
-            SimpleJsonValue::Str("!room:server".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())),
         ),
         (
             "content.body".to_string(),
-            SimpleJsonValue::Str("test message".to_string()),
+            JsonValue::Value(SimpleJsonValue::Str("test message".to_string())),
         ),
     ]
     .into_iter()
@@ -201,6 +204,7 @@ fn bench_eval_message(b: &mut Bencher) {
         vec![],
         false,
         false,
+        false,
     )
     .unwrap();
 
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index dd6b4343ec..2eaa06ad76 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -14,6 +14,7 @@
 
 use std::collections::{BTreeMap, BTreeSet};
 
+use crate::push::JsonValue;
 use anyhow::{Context, Error};
 use lazy_static::lazy_static;
 use log::warn;
@@ -63,7 +64,7 @@ impl RoomVersionFeatures {
 pub struct PushRuleEvaluator {
     /// A mapping of "flattened" keys to simple JSON values in the event, e.g.
     /// includes things like "type" and "content.msgtype".
-    flattened_keys: BTreeMap<String, SimpleJsonValue>,
+    flattened_keys: BTreeMap<String, JsonValue>,
 
     /// The "content.body", if any.
     body: String,
@@ -87,7 +88,7 @@ pub struct PushRuleEvaluator {
 
     /// The related events, indexed by relation type. Flattened in the same manner as
     /// `flattened_keys`.
-    related_events_flattened: BTreeMap<String, BTreeMap<String, SimpleJsonValue>>,
+    related_events_flattened: BTreeMap<String, BTreeMap<String, JsonValue>>,
 
     /// If msc3664, push rules for related events, is enabled.
     related_event_match_enabled: bool,
@@ -101,6 +102,9 @@ pub struct PushRuleEvaluator {
 
     /// If MSC3758 (exact_event_match push rule condition) is enabled.
     msc3758_exact_event_match: bool,
+
+    /// If MSC3966 (exact_event_property_contains push rule condition) is enabled.
+    msc3966_exact_event_property_contains: bool,
 }
 
 #[pymethods]
@@ -109,21 +113,22 @@ impl PushRuleEvaluator {
     #[allow(clippy::too_many_arguments)]
     #[new]
     pub fn py_new(
-        flattened_keys: BTreeMap<String, SimpleJsonValue>,
+        flattened_keys: BTreeMap<String, JsonValue>,
         has_mentions: bool,
         user_mentions: BTreeSet<String>,
         room_mention: bool,
         room_member_count: u64,
         sender_power_level: Option<i64>,
         notification_power_levels: BTreeMap<String, i64>,
-        related_events_flattened: BTreeMap<String, BTreeMap<String, SimpleJsonValue>>,
+        related_events_flattened: BTreeMap<String, BTreeMap<String, JsonValue>>,
         related_event_match_enabled: bool,
         room_version_feature_flags: Vec<String>,
         msc3931_enabled: bool,
         msc3758_exact_event_match: bool,
+        msc3966_exact_event_property_contains: bool,
     ) -> Result<Self, Error> {
         let body = match flattened_keys.get("content.body") {
-            Some(SimpleJsonValue::Str(s)) => s.clone(),
+            Some(JsonValue::Value(SimpleJsonValue::Str(s))) => s.clone(),
             _ => String::new(),
         };
 
@@ -141,6 +146,7 @@ impl PushRuleEvaluator {
             room_version_feature_flags,
             msc3931_enabled,
             msc3758_exact_event_match,
+            msc3966_exact_event_property_contains,
         })
     }
 
@@ -263,6 +269,9 @@ impl PushRuleEvaluator {
             KnownCondition::RelatedEventMatch(event_match) => {
                 self.match_related_event_match(event_match, user_id)?
             }
+            KnownCondition::ExactEventPropertyContains(exact_event_match) => {
+                self.match_exact_event_property_contains(exact_event_match)?
+            }
             KnownCondition::IsUserMention => {
                 if let Some(uid) = user_id {
                     self.user_mentions.contains(uid)
@@ -345,7 +354,7 @@ impl PushRuleEvaluator {
             return Ok(false);
         };
 
-        let haystack = if let Some(SimpleJsonValue::Str(haystack)) =
+        let haystack = if let Some(JsonValue::Value(SimpleJsonValue::Str(haystack))) =
             self.flattened_keys.get(&*event_match.key)
         {
             haystack
@@ -377,7 +386,9 @@ impl PushRuleEvaluator {
 
         let value = &exact_event_match.value;
 
-        let haystack = if let Some(haystack) = self.flattened_keys.get(&*exact_event_match.key) {
+        let haystack = if let Some(JsonValue::Value(haystack)) =
+            self.flattened_keys.get(&*exact_event_match.key)
+        {
             haystack
         } else {
             return Ok(false);
@@ -441,11 +452,12 @@ impl PushRuleEvaluator {
             return Ok(false);
         };
 
-        let haystack = if let Some(SimpleJsonValue::Str(haystack)) = event.get(&**key) {
-            haystack
-        } else {
-            return Ok(false);
-        };
+        let haystack =
+            if let Some(JsonValue::Value(SimpleJsonValue::Str(haystack))) = event.get(&**key) {
+                haystack
+            } else {
+                return Ok(false);
+            };
 
         // For the content.body we match against "words", but for everything
         // else we match against the entire value.
@@ -459,6 +471,29 @@ impl PushRuleEvaluator {
         compiled_pattern.is_match(haystack)
     }
 
+    /// Evaluates a `exact_event_property_contains` condition. (MSC3758)
+    fn match_exact_event_property_contains(
+        &self,
+        exact_event_match: &ExactEventMatchCondition,
+    ) -> Result<bool, Error> {
+        // First check if the feature is enabled.
+        if !self.msc3966_exact_event_property_contains {
+            return Ok(false);
+        }
+
+        let value = &exact_event_match.value;
+
+        let haystack = if let Some(JsonValue::Array(haystack)) =
+            self.flattened_keys.get(&*exact_event_match.key)
+        {
+            haystack
+        } else {
+            return Ok(false);
+        };
+
+        Ok(haystack.contains(&**value))
+    }
+
     /// Match the member count against an 'is' condition
     /// The `is` condition can be things like '>2', '==3' or even just '4'.
     fn match_member_count(&self, is: &str) -> Result<bool, Error> {
@@ -488,7 +523,7 @@ fn push_rule_evaluator() {
     let mut flattened_keys = BTreeMap::new();
     flattened_keys.insert(
         "content.body".to_string(),
-        SimpleJsonValue::Str("foo bar bob hello".to_string()),
+        JsonValue::Value(SimpleJsonValue::Str("foo bar bob hello".to_string())),
     );
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
@@ -503,6 +538,7 @@ fn push_rule_evaluator() {
         vec![],
         true,
         true,
+        true,
     )
     .unwrap();
 
@@ -519,7 +555,7 @@ fn test_requires_room_version_supports_condition() {
     let mut flattened_keys = BTreeMap::new();
     flattened_keys.insert(
         "content.body".to_string(),
-        SimpleJsonValue::Str("foo bar bob hello".to_string()),
+        JsonValue::Value(SimpleJsonValue::Str("foo bar bob hello".to_string())),
     );
     let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()];
     let evaluator = PushRuleEvaluator::py_new(
@@ -535,6 +571,7 @@ fn test_requires_room_version_supports_condition() {
         flags,
         true,
         true,
+        true,
     )
     .unwrap();
 
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 79e519fe11..253b5f367c 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -58,7 +58,7 @@ use anyhow::{Context, Error};
 use log::warn;
 use pyo3::exceptions::PyTypeError;
 use pyo3::prelude::*;
-use pyo3::types::{PyBool, PyLong, PyString};
+use pyo3::types::{PyBool, PyList, PyLong, PyString};
 use pythonize::{depythonize, pythonize};
 use serde::de::Error as _;
 use serde::{Deserialize, Serialize};
@@ -280,6 +280,35 @@ impl<'source> FromPyObject<'source> for SimpleJsonValue {
     }
 }
 
+/// A JSON values (list, string, int, boolean, or null).
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[serde(untagged)]
+pub enum JsonValue {
+    Array(Vec<SimpleJsonValue>),
+    Value(SimpleJsonValue),
+}
+
+impl<'source> FromPyObject<'source> for JsonValue {
+    fn extract(ob: &'source PyAny) -> PyResult<Self> {
+        if let Ok(l) = <PyList as pyo3::PyTryFrom>::try_from(ob) {
+            match l.iter().map(SimpleJsonValue::extract).collect() {
+                Ok(a) => Ok(JsonValue::Array(a)),
+                Err(e) => Err(PyTypeError::new_err(format!(
+                    "Can't convert to JsonValue::Array: {}",
+                    e
+                ))),
+            }
+        } else if let Ok(v) = SimpleJsonValue::extract(ob) {
+            Ok(JsonValue::Value(v))
+        } else {
+            Err(PyTypeError::new_err(format!(
+                "Can't convert from {} to JsonValue",
+                ob.get_type().name()?
+            )))
+        }
+    }
+}
+
 /// A condition used in push rules to match against an event.
 ///
 /// We need this split as `serde` doesn't give us the ability to have a
@@ -303,6 +332,8 @@ pub enum KnownCondition {
     ExactEventMatch(ExactEventMatchCondition),
     #[serde(rename = "im.nheko.msc3664.related_event_match")]
     RelatedEventMatch(RelatedEventMatchCondition),
+    #[serde(rename = "org.matrix.msc3966.exact_event_property_contains")]
+    ExactEventPropertyContains(ExactEventMatchCondition),
     #[serde(rename = "org.matrix.msc3952.is_user_mention")]
     IsUserMention,
     #[serde(rename = "org.matrix.msc3952.is_room_mention")]
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 328f681a29..7b33c30cc9 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -14,7 +14,7 @@
 
 from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union
 
-from synapse.types import JsonDict, SimpleJsonValue
+from synapse.types import JsonDict, JsonValue
 
 class PushRule:
     @property
@@ -56,18 +56,19 @@ def get_base_rule_ids() -> Collection[str]: ...
 class PushRuleEvaluator:
     def __init__(
         self,
-        flattened_keys: Mapping[str, SimpleJsonValue],
+        flattened_keys: Mapping[str, JsonValue],
         has_mentions: bool,
         user_mentions: Set[str],
         room_mention: bool,
         room_member_count: int,
         sender_power_level: Optional[int],
         notification_power_levels: Mapping[str, int],
-        related_events_flattened: Mapping[str, Mapping[str, SimpleJsonValue]],
+        related_events_flattened: Mapping[str, Mapping[str, JsonValue]],
         related_event_match_enabled: bool,
         room_version_feature_flags: Tuple[str, ...],
         msc3931_enabled: bool,
         msc3758_exact_event_match: bool,
+        msc3966_exact_event_property_contains: bool,
     ): ...
     def run(
         self,
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 6ac2f0c10d..1d294f8798 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -188,3 +188,8 @@ class ExperimentalConfig(Config):
         self.msc3958_supress_edit_notifs = experimental.get(
             "msc3958_supress_edit_notifs", False
         )
+
+        # MSC3966: exact_event_property_contains push rule condition.
+        self.msc3966_exact_event_property_contains = experimental.get(
+            "msc3966_exact_event_property_contains", False
+        )
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index f6a5bffb0f..2e917c90c4 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -44,7 +44,7 @@ from synapse.events.snapshot import EventContext
 from synapse.state import POWER_KEY
 from synapse.storage.databases.main.roommember import EventIdMembership
 from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator
-from synapse.types import SimpleJsonValue
+from synapse.types import JsonValue
 from synapse.types.state import StateFilter
 from synapse.util.caches import register_cache
 from synapse.util.metrics import measure_func
@@ -259,13 +259,13 @@ class BulkPushRuleEvaluator:
 
     async def _related_events(
         self, event: EventBase
-    ) -> Dict[str, Dict[str, SimpleJsonValue]]:
+    ) -> Dict[str, Dict[str, JsonValue]]:
         """Fetches the related events for 'event'. Sets the im.vector.is_falling_back key if the event is from a fallback relation
 
         Returns:
             Mapping of relation type to flattened events.
         """
-        related_events: Dict[str, Dict[str, SimpleJsonValue]] = {}
+        related_events: Dict[str, Dict[str, JsonValue]] = {}
         if self._related_event_match_enabled:
             related_event_id = event.content.get("m.relates_to", {}).get("event_id")
             relation_type = event.content.get("m.relates_to", {}).get("rel_type")
@@ -429,6 +429,7 @@ class BulkPushRuleEvaluator:
             event.room_version.msc3931_push_features,
             self.hs.config.experimental.msc1767_enabled,  # MSC3931 flag
             self.hs.config.experimental.msc3758_exact_event_match,
+            self.hs.config.experimental.msc3966_exact_event_property_contains,
         )
 
         users = rules_by_user.keys()
@@ -502,18 +503,22 @@ RulesByUser = Dict[str, List[Rule]]
 StateGroup = Union[object, int]
 
 
+def _is_simple_value(value: Any) -> bool:
+    return isinstance(value, (bool, str)) or type(value) is int or value is None
+
+
 def _flatten_dict(
     d: Union[EventBase, Mapping[str, Any]],
     prefix: Optional[List[str]] = None,
-    result: Optional[Dict[str, SimpleJsonValue]] = None,
+    result: Optional[Dict[str, JsonValue]] = None,
     *,
     msc3783_escape_event_match_key: bool = False,
-) -> Dict[str, SimpleJsonValue]:
+) -> Dict[str, JsonValue]:
     """
     Given a JSON dictionary (or event) which might contain sub dictionaries,
     flatten it into a single layer dictionary by combining the keys & sub-keys.
 
-    String, integer, boolean, and null values are kept. All others are dropped.
+    String, integer, boolean, null or lists of those values are kept. All others are dropped.
 
     Transforms:
 
@@ -542,8 +547,10 @@ def _flatten_dict(
             # nested fields.
             key = key.replace("\\", "\\\\").replace(".", "\\.")
 
-        if isinstance(value, (bool, str)) or type(value) is int or value is None:
+        if _is_simple_value(value):
             result[".".join(prefix + [key])] = value
+        elif isinstance(value, (list, tuple)):
+            result[".".join(prefix + [key])] = [v for v in value if _is_simple_value(v)]
         elif isinstance(value, Mapping):
             # do not set `room_version` due to recursion considerations below
             _flatten_dict(
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 52e366c8ae..33363867c4 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -71,6 +71,7 @@ MutableStateMap = MutableMapping[StateKey, T]
 # JSON types. These could be made stronger, but will do for now.
 # A "simple" (canonical) JSON value.
 SimpleJsonValue = Optional[Union[str, int, bool]]
+JsonValue = Union[List[SimpleJsonValue], Tuple[SimpleJsonValue, ...], SimpleJsonValue]
 # A JSON-serialisable dict.
 JsonDict = Dict[str, Any]
 # A JSON-serialisable mapping; roughly speaking an immutable JSONDict.
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 6603447341..0554d247bc 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -32,6 +32,7 @@ from synapse.storage.databases.main.appservice import _make_exclusive_regex
 from synapse.synapse_rust.push import PushRuleEvaluator
 from synapse.types import JsonDict, JsonMapping, UserID
 from synapse.util import Clock
+from synapse.util.frozenutils import freeze
 
 from tests import unittest
 from tests.test_utils.event_injection import create_event, inject_member_event
@@ -57,17 +58,24 @@ class FlattenDictTestCase(unittest.TestCase):
         )
 
     def test_non_string(self) -> None:
-        """Booleans, ints, and nulls should be kept while other items are dropped."""
+        """String, booleans, ints, nulls and list of those should be kept while other items are dropped."""
         input: Dict[str, Any] = {
             "woo": "woo",
             "foo": True,
             "bar": 1,
             "baz": None,
-            "fuzz": [],
+            "fuzz": ["woo", True, 1, None, [], {}],
             "boo": {},
         }
         self.assertEqual(
-            {"woo": "woo", "foo": True, "bar": 1, "baz": None}, _flatten_dict(input)
+            {
+                "woo": "woo",
+                "foo": True,
+                "bar": 1,
+                "baz": None,
+                "fuzz": ["woo", True, 1, None],
+            },
+            _flatten_dict(input),
         )
 
     def test_event(self) -> None:
@@ -117,6 +125,7 @@ class FlattenDictTestCase(unittest.TestCase):
             "room_id": "!test:test",
             "sender": "@alice:test",
             "type": "m.room.message",
+            "content.org.matrix.msc1767.markup": [],
         }
         self.assertEqual(expected, _flatten_dict(event))
 
@@ -128,6 +137,7 @@ class FlattenDictTestCase(unittest.TestCase):
             "room_id": "!test:test",
             "sender": "@alice:test",
             "type": "m.room.message",
+            "content.org.matrix.msc1767.markup": [],
         }
         self.assertEqual(expected, _flatten_dict(event))
 
@@ -169,6 +179,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             room_version_feature_flags=event.room_version.msc3931_push_features,
             msc3931_enabled=True,
             msc3758_exact_event_match=True,
+            msc3966_exact_event_property_contains=True,
         )
 
     def test_display_name(self) -> None:
@@ -549,6 +560,42 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
                 "incorrect types should not match",
             )
 
+    def test_exact_event_property_contains(self) -> None:
+        """Check that exact_event_property_contains conditions work as expected."""
+
+        condition = {
+            "kind": "org.matrix.msc3966.exact_event_property_contains",
+            "key": "content.value",
+            "value": "foobaz",
+        }
+        self._assert_matches(
+            condition,
+            {"value": ["foobaz"]},
+            "exact value should match",
+        )
+        self._assert_matches(
+            condition,
+            {"value": ["foobaz", "bugz"]},
+            "extra values should match",
+        )
+        self._assert_not_matches(
+            condition,
+            {"value": ["FoobaZ"]},
+            "values should match and be case-sensitive",
+        )
+        self._assert_not_matches(
+            condition,
+            {"value": "foobaz"},
+            "does not search in a string",
+        )
+
+        # it should work on frozendicts too
+        self._assert_matches(
+            condition,
+            freeze({"value": ["foobaz"]}),
+            "values should match on frozendicts",
+        )
+
     def test_no_body(self) -> None:
         """Not having a body shouldn't break the evaluator."""
         evaluator = self._get_evaluator({})
-- 
cgit 1.5.1


From 06ba71083eefbe1fd9a8eeed10e541dd7b52796f Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 14 Feb 2023 23:42:29 +0000
Subject: Fix order of partial state tables when purging (#15068)

* Fix order of partial state tables when purging

`partial_state_rooms` has an FK on `events` pointing to the join event we
get from `/send_join`, so we must delete from that table before deleting
from `events`.

**NB:** It would be nice to cancel any resync processes for the room
being purged. We do not do this at present. To do so reliably we'd need
an internal HTTP "replication" endpoint, because the worker doing the
resync process may be different to that handling the purge request.

The first time the resync process tries to write data after the deletion
it will fail because we have deleted necessary data e.g. auth
events. AFAICS it will not retry the resync, so the only downside to
not cancelling the resync is a scary-looking traceback.

(This is presumably extremely race-sensitive.)

* Changelog

* admist(?) -> between

* Warn about a race

* Fix typo, thanks Sean

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

---------

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/15068.bugfix                       | 1 +
 synapse/handlers/federation.py                 | 5 +++++
 synapse/storage/databases/main/purge_events.py | 6 ++++--
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15068.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15068.bugfix b/changelog.d/15068.bugfix
new file mode 100644
index 0000000000..f09ffa2877
--- /dev/null
+++ b/changelog.d/15068.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.76.0 where partially-joined rooms could not be deleted using the [purge room API](https://matrix-org.github.io/synapse/latest/admin_api/rooms.html#delete-room-api).
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 08727e4857..1d0f6bcd6f 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1880,6 +1880,11 @@ class FederationHandler:
                 logger.info("Updating current state for %s", room_id)
                 # TODO(faster_joins): notify workers in notify_room_un_partial_stated
                 #   https://github.com/matrix-org/synapse/issues/12994
+                #
+                # NB: there's a potential race here. If room is purged just before we
+                # call this, we _might_ end up inserting rows into current_state_events.
+                # (The logic is hard to chase through.) We think this is fine, but if
+                # not the HS admin should purge the room again.
                 await self.state_handler.update_current_state(room_id)
 
                 logger.info("Handling any pending device list updates")
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 9213ce0b5a..9c41d01e13 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -420,12 +420,14 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "event_push_actions",
             "event_search",
             "event_failed_pull_attempts",
+            # Note: the partial state tables have foreign keys between each other, and to
+            # `events` and `rooms`. We need to delete from them in the right order.
             "partial_state_events",
+            "partial_state_rooms_servers",
+            "partial_state_rooms",
             "events",
             "federation_inbound_events_staging",
             "local_current_membership",
-            "partial_state_rooms_servers",
-            "partial_state_rooms",
             "receipts_graph",
             "receipts_linearized",
             "room_aliases",
-- 
cgit 1.5.1


From 5febf88b6c5194582f427142dc0850625547c0d9 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 15 Feb 2023 11:47:57 +0000
Subject: Update the error code for duplicate annotation (#15075)

---
 changelog.d/15075.feature   | 2 ++
 synapse/api/errors.py       | 4 ++++
 synapse/handlers/message.py | 6 +++++-
 3 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15075.feature

(limited to 'synapse')

diff --git a/changelog.d/15075.feature b/changelog.d/15075.feature
new file mode 100644
index 0000000000..d25a7567a4
--- /dev/null
+++ b/changelog.d/15075.feature
@@ -0,0 +1,2 @@
+Update the error code returned when user sends a duplicate annotation.
+
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 9235ce6536..e1737de59b 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -108,6 +108,10 @@ class Codes(str, Enum):
 
     USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL"
 
+    # Attempt to send a second annotation with the same event type & annotation key
+    # MSC2677
+    DUPLICATE_ANNOTATION = "M_DUPLICATE_ANNOTATION"
+
 
 class CodeMessageException(RuntimeError):
     """An exception with integer code and message string attributes.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 8f5b658d9d..aa90d0000d 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1337,7 +1337,11 @@ class EventCreationHandler:
                 relation.parent_id, event.type, aggregation_key, event.sender
             )
             if already_exists:
-                raise SynapseError(400, "Can't send same reaction twice")
+                raise SynapseError(
+                    400,
+                    "Can't send same reaction twice",
+                    errcode=Codes.DUPLICATE_ANNOTATION,
+                )
 
         # Don't attempt to start a thread if the parent event is a relation.
         elif relation.rel_type == RelationTypes.THREAD:
-- 
cgit 1.5.1


From 27a3a72a50cb24f25e48fad1e6e79aba2cd1bea2 Mon Sep 17 00:00:00 2001
From: 999lakhisidhu <42063995+999lakhisidhu@users.noreply.github.com>
Date: Wed, 15 Feb 2023 16:39:31 +0400
Subject: Support for selecting the Redis logical database. (#15034)

Note that this is only used for key-value store (cached values)
and not for the pub/sub replication used by Synapse.
---
 changelog.d/15034.feature                        | 1 +
 contrib/docker_compose_workers/README.md         | 1 +
 docs/usage/configuration/config_documentation.md | 4 ++++
 synapse/config/redis.py                          | 1 +
 synapse/server.py                                | 1 +
 5 files changed, 8 insertions(+)
 create mode 100644 changelog.d/15034.feature

(limited to 'synapse')

diff --git a/changelog.d/15034.feature b/changelog.d/15034.feature
new file mode 100644
index 0000000000..34f320da92
--- /dev/null
+++ b/changelog.d/15034.feature
@@ -0,0 +1 @@
+Allow Synapse to use a specific Redis [logical database](https://redis.io/commands/select/) in worker-mode deployments.
diff --git a/contrib/docker_compose_workers/README.md b/contrib/docker_compose_workers/README.md
index bdd3dd32e0..d3cdfe5614 100644
--- a/contrib/docker_compose_workers/README.md
+++ b/contrib/docker_compose_workers/README.md
@@ -68,6 +68,7 @@ redis:
   enabled: true
   host: redis
   port: 6379
+  # dbid:  <redis_logical_db_id>
   # password: <secret_password>  
 ```
 
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 2883f76a26..75483bfb12 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3927,6 +3927,9 @@ This setting has the following sub-options:
 * `host` and `port`: Optional host and port to use to connect to redis. Defaults to
    localhost and 6379
 * `password`: Optional password if configured on the Redis instance.
+* `dbid`: Optional redis dbid if needs to connect to specific redis logical db.
+
+  _Added in Synapse 1.78.0._
 
 Example configuration:
 ```yaml
@@ -3935,6 +3938,7 @@ redis:
   host: localhost
   port: 6379
   password: <secret_password>
+  dbid: <dbid>
 ```
 ---
 ## Individual worker configuration
diff --git a/synapse/config/redis.py b/synapse/config/redis.py
index b42dd2e93a..e6a75be434 100644
--- a/synapse/config/redis.py
+++ b/synapse/config/redis.py
@@ -33,4 +33,5 @@ class RedisConfig(Config):
 
         self.redis_host = redis_config.get("host", "localhost")
         self.redis_port = redis_config.get("port", 6379)
+        self.redis_dbid = redis_config.get("dbid", None)
         self.redis_password = redis_config.get("password")
diff --git a/synapse/server.py b/synapse/server.py
index efc6b5f895..e5a3475247 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -827,6 +827,7 @@ class HomeServer(metaclass=abc.ABCMeta):
             hs=self,
             host=self.config.redis.redis_host,
             port=self.config.redis.redis_port,
+            dbid=self.config.redis.redis_dbid,
             password=self.config.redis.redis_password,
             reconnect=True,
         )
-- 
cgit 1.5.1


From 3ad817bfe561e0b7ddcd8398a76a4a4d3d789138 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Wed, 15 Feb 2023 13:59:06 +0000
Subject: Fix federated joins when the first server in the list is not in the
 room (#15074)

Previously we would give up upon receiving a 404 from the first server,
instead of trying the rest of the servers in the list.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15074.bugfix                |  1 +
 synapse/federation/federation_client.py | 11 +++++------
 2 files changed, 6 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15074.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15074.bugfix b/changelog.d/15074.bugfix
new file mode 100644
index 0000000000..d1ceb4f4c8
--- /dev/null
+++ b/changelog.d/15074.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where federated joins would fail if the first server in the list of servers to try is not in the room.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 0ac85a3be7..7d04560dca 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -884,7 +884,7 @@ class FederationClient(FederationBase):
                 if 500 <= e.code < 600:
                     failover = True
 
-                elif e.code == 400 and synapse_error.errcode in failover_errcodes:
+                elif 400 <= e.code < 500 and synapse_error.errcode in failover_errcodes:
                     failover = True
 
                 elif failover_on_unknown_endpoint and self._is_unknown_endpoint(
@@ -999,14 +999,13 @@ class FederationClient(FederationBase):
 
             return destination, ev, room_version
 
+        failover_errcodes = {Codes.NOT_FOUND}
         # MSC3083 defines additional error codes for room joins. Unfortunately
         # we do not yet know the room version, assume these will only be returned
         # by valid room versions.
-        failover_errcodes = (
-            (Codes.UNABLE_AUTHORISE_JOIN, Codes.UNABLE_TO_GRANT_JOIN)
-            if membership == Membership.JOIN
-            else None
-        )
+        if membership == Membership.JOIN:
+            failover_errcodes.add(Codes.UNABLE_AUTHORISE_JOIN)
+            failover_errcodes.add(Codes.UNABLE_TO_GRANT_JOIN)
 
         return await self._try_destination_list(
             "make_" + membership,
-- 
cgit 1.5.1


From 979f237b282cbdaab8d74cc4c7473117093d63d9 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 16 Feb 2023 09:51:22 -0500
Subject: Update intentional mentions (MSC3952) to depend on
 `exact_event_match` (MSC3758). (#15037)

This replaces the specific `is_room_mention` push rule condition
used in MSC3952 with the generic `exact_event_match` push rule
condition from MSC3758.

No functionality changes due to this.
---
 changelog.d/15037.misc                      |  1 +
 rust/benches/evaluator.rs                   |  4 ----
 rust/src/push/base_rules.rs                 |  7 +++++--
 rust/src/push/evaluator.rs                  |  7 -------
 rust/src/push/mod.rs                        | 13 -------------
 stubs/synapse/synapse_rust/push.pyi         |  1 -
 synapse/config/experimental.py              |  7 ++++---
 synapse/push/bulk_push_rule_evaluator.py    |  4 ----
 tests/push/test_bulk_push_rule_evaluator.py | 18 ++++++++++++++++--
 tests/push/test_push_rule_evaluator.py      | 23 -----------------------
 10 files changed, 26 insertions(+), 59 deletions(-)
 create mode 100644 changelog.d/15037.misc

(limited to 'synapse')

diff --git a/changelog.d/15037.misc b/changelog.d/15037.misc
new file mode 100644
index 0000000000..fabfe77d35
--- /dev/null
+++ b/changelog.d/15037.misc
@@ -0,0 +1 @@
+Update [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952) support based on changes to the MSC.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 8213dfd9ea..efd19a2165 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -45,7 +45,6 @@ fn bench_match_exact(b: &mut Bencher) {
         flattened_keys,
         false,
         BTreeSet::new(),
-        false,
         10,
         Some(0),
         Default::default(),
@@ -95,7 +94,6 @@ fn bench_match_word(b: &mut Bencher) {
         flattened_keys,
         false,
         BTreeSet::new(),
-        false,
         10,
         Some(0),
         Default::default(),
@@ -145,7 +143,6 @@ fn bench_match_word_miss(b: &mut Bencher) {
         flattened_keys,
         false,
         BTreeSet::new(),
-        false,
         10,
         Some(0),
         Default::default(),
@@ -195,7 +192,6 @@ fn bench_eval_message(b: &mut Bencher) {
         flattened_keys,
         false,
         BTreeSet::new(),
-        false,
         10,
         Some(0),
         Default::default(),
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index dcbca340fe..4a62b9696f 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -21,13 +21,13 @@ use lazy_static::lazy_static;
 use serde_json::Value;
 
 use super::KnownCondition;
-use crate::push::Action;
 use crate::push::Condition;
 use crate::push::EventMatchCondition;
 use crate::push::PushRule;
 use crate::push::RelatedEventMatchCondition;
 use crate::push::SetTweak;
 use crate::push::TweakValue;
+use crate::push::{Action, ExactEventMatchCondition, SimpleJsonValue};
 
 const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak {
     set_tweak: Cow::Borrowed("highlight"),
@@ -168,7 +168,10 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         rule_id: Cow::Borrowed(".org.matrix.msc3952.is_room_mention"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[
-            Condition::Known(KnownCondition::IsRoomMention),
+            Condition::Known(KnownCondition::ExactEventMatch(ExactEventMatchCondition {
+                key: Cow::Borrowed("content.org.matrix.msc3952.mentions.room"),
+                value: Cow::Borrowed(&SimpleJsonValue::Bool(true)),
+            })),
             Condition::Known(KnownCondition::SenderNotificationPermission {
                 key: Cow::Borrowed("room"),
             }),
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 2eaa06ad76..55551ecb56 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -73,8 +73,6 @@ pub struct PushRuleEvaluator {
     has_mentions: bool,
     /// The user mentions that were part of the message.
     user_mentions: BTreeSet<String>,
-    /// True if the message is a room message.
-    room_mention: bool,
 
     /// The number of users in the room.
     room_member_count: u64,
@@ -116,7 +114,6 @@ impl PushRuleEvaluator {
         flattened_keys: BTreeMap<String, JsonValue>,
         has_mentions: bool,
         user_mentions: BTreeSet<String>,
-        room_mention: bool,
         room_member_count: u64,
         sender_power_level: Option<i64>,
         notification_power_levels: BTreeMap<String, i64>,
@@ -137,7 +134,6 @@ impl PushRuleEvaluator {
             body,
             has_mentions,
             user_mentions,
-            room_mention,
             room_member_count,
             notification_power_levels,
             sender_power_level,
@@ -279,7 +275,6 @@ impl PushRuleEvaluator {
                     false
                 }
             }
-            KnownCondition::IsRoomMention => self.room_mention,
             KnownCondition::ContainsDisplayName => {
                 if let Some(dn) = display_name {
                     if !dn.is_empty() {
@@ -529,7 +524,6 @@ fn push_rule_evaluator() {
         flattened_keys,
         false,
         BTreeSet::new(),
-        false,
         10,
         Some(0),
         BTreeMap::new(),
@@ -562,7 +556,6 @@ fn test_requires_room_version_supports_condition() {
         flattened_keys,
         false,
         BTreeSet::new(),
-        false,
         10,
         Some(0),
         BTreeMap::new(),
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 253b5f367c..fdd2b2c143 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -336,8 +336,6 @@ pub enum KnownCondition {
     ExactEventPropertyContains(ExactEventMatchCondition),
     #[serde(rename = "org.matrix.msc3952.is_user_mention")]
     IsUserMention,
-    #[serde(rename = "org.matrix.msc3952.is_room_mention")]
-    IsRoomMention,
     ContainsDisplayName,
     RoomMemberCount {
         #[serde(skip_serializing_if = "Option::is_none")]
@@ -667,17 +665,6 @@ fn test_deserialize_unstable_msc3952_user_condition() {
     ));
 }
 
-#[test]
-fn test_deserialize_unstable_msc3952_room_condition() {
-    let json = r#"{"kind":"org.matrix.msc3952.is_room_mention"}"#;
-
-    let condition: Condition = serde_json::from_str(json).unwrap();
-    assert!(matches!(
-        condition,
-        Condition::Known(KnownCondition::IsRoomMention)
-    ));
-}
-
 #[test]
 fn test_deserialize_custom_condition() {
     let json = r#"{"kind":"custom_tag"}"#;
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 7b33c30cc9..a8f0ed2435 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -59,7 +59,6 @@ class PushRuleEvaluator:
         flattened_keys: Mapping[str, JsonValue],
         has_mentions: bool,
         user_mentions: Set[str],
-        room_mention: bool,
         room_member_count: int,
         sender_power_level: Optional[int],
         notification_power_levels: Mapping[str, int],
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 1d294f8798..54c91953e1 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -179,9 +179,10 @@ class ExperimentalConfig(Config):
             "msc3783_escape_event_match_key", False
         )
 
-        # MSC3952: Intentional mentions
-        self.msc3952_intentional_mentions = experimental.get(
-            "msc3952_intentional_mentions", False
+        # MSC3952: Intentional mentions, this depends on MSC3758.
+        self.msc3952_intentional_mentions = (
+            experimental.get("msc3952_intentional_mentions", False)
+            and self.msc3758_exact_event_match
         )
 
         # MSC3959: Do not generate notifications for edits.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 2e917c90c4..5fc38431ba 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -400,7 +400,6 @@ class BulkPushRuleEvaluator:
         mentions = event.content.get(EventContentFields.MSC3952_MENTIONS)
         has_mentions = self._intentional_mentions_enabled and isinstance(mentions, dict)
         user_mentions: Set[str] = set()
-        room_mention = False
         if has_mentions:
             # mypy seems to have lost the type even though it must be a dict here.
             assert isinstance(mentions, dict)
@@ -410,8 +409,6 @@ class BulkPushRuleEvaluator:
                 user_mentions = set(
                     filter(lambda item: isinstance(item, str), user_mentions_raw)
                 )
-            # Room mention is only true if the value is exactly true.
-            room_mention = mentions.get("room") is True
 
         evaluator = PushRuleEvaluator(
             _flatten_dict(
@@ -420,7 +417,6 @@ class BulkPushRuleEvaluator:
             ),
             has_mentions,
             user_mentions,
-            room_mention,
             room_member_count,
             sender_power_level,
             notification_levels,
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 7567756135..199e3d7b70 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -227,7 +227,14 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         )
         return len(result) > 0
 
-    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3758_exact_event_match": True,
+                "msc3952_intentional_mentions": True,
+            }
+        }
+    )
     def test_user_mentions(self) -> None:
         """Test the behavior of an event which includes invalid user mentions."""
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
@@ -323,7 +330,14 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
             )
         )
 
-    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
+    @override_config(
+        {
+            "experimental_features": {
+                "msc3758_exact_event_match": True,
+                "msc3952_intentional_mentions": True,
+            }
+        }
+    )
     def test_room_mentions(self) -> None:
         """Test the behavior of an event which includes invalid room mentions."""
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 0554d247bc..d320a12f96 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -149,7 +149,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         *,
         has_mentions: bool = False,
         user_mentions: Optional[Set[str]] = None,
-        room_mention: bool = False,
         related_events: Optional[JsonDict] = None,
     ) -> PushRuleEvaluator:
         event = FrozenEvent(
@@ -170,7 +169,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             _flatten_dict(event),
             has_mentions,
             user_mentions or set(),
-            room_mention,
             room_member_count,
             sender_power_level,
             cast(Dict[str, int], power_levels.get("notifications", {})),
@@ -232,27 +230,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions
         # since the BulkPushRuleEvaluator is what handles data sanitisation.
 
-    def test_room_mentions(self) -> None:
-        """Check for room mentions."""
-        condition = {"kind": "org.matrix.msc3952.is_room_mention"}
-
-        # No room mention shouldn't match.
-        evaluator = self._get_evaluator({}, has_mentions=True)
-        self.assertFalse(evaluator.matches(condition, None, None))
-
-        # Room mention should match.
-        evaluator = self._get_evaluator({}, has_mentions=True, room_mention=True)
-        self.assertTrue(evaluator.matches(condition, None, None))
-
-        # A room mention and user mention is valid.
-        evaluator = self._get_evaluator(
-            {}, has_mentions=True, user_mentions={"@another:test"}, room_mention=True
-        )
-        self.assertTrue(evaluator.matches(condition, None, None))
-
-        # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions
-        # since the BulkPushRuleEvaluator is what handles data sanitisation.
-
     def _assert_matches(
         self, condition: JsonDict, content: JsonMapping, msg: Optional[str] = None
     ) -> None:
-- 
cgit 1.5.1


From ffc2ee521d26f5b842df7902ade5de7a538e602d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 16 Feb 2023 16:09:11 +0000
Subject: Use mypy 1.0 (#15052)

* Update mypy and mypy-zope
* Remove unused ignores

These used to suppress

```
synapse/storage/engines/__init__.py:28: error: "__new__" must return a
class instance (got "NoReturn")  [misc]
```

and

```
synapse/http/matrixfederationclient.py:1270: error: "BaseException" has no attribute "reasons"  [attr-defined]
```

(note that we check `hasattr(e, "reasons")` above)

* Avoid empty body warnings, sometimes by marking methods as abstract

E.g.

```
tests/handlers/test_register.py:58: error: Missing return statement  [empty-body]
tests/handlers/test_register.py:108: error: Missing return statement  [empty-body]
```

* Suppress false positive about `JaegerConfig`

Complaint was

```
synapse/logging/opentracing.py:450: error: Function "Type[Config]" could always be true in boolean context  [truthy-function]
```

* Fix not calling `is_state()`

Oops!

```
tests/rest/client/test_third_party_rules.py:428: error: Function "Callable[[], bool]" could always be true in boolean context  [truthy-function]
```

* Suppress false positives from ParamSpecs

````
synapse/logging/opentracing.py:971: error: Argument 2 to "_custom_sync_async_decorator" has incompatible type "Callable[[Arg(Callable[P, R], 'func'), **P], _GeneratorContextManager[None]]"; expected "Callable[[Callable[P, R], **P], _GeneratorContextManager[None]]"  [arg-type]
synapse/logging/opentracing.py:1017: error: Argument 2 to "_custom_sync_async_decorator" has incompatible type "Callable[[Arg(Callable[P, R], 'func'), **P], _GeneratorContextManager[None]]"; expected "Callable[[Callable[P, R], **P], _GeneratorContextManager[None]]"  [arg-type]
````

* Drive-by improvement to `wrapping_logic` annotation

* Workaround false "unreachable" positives

See https://github.com/Shoobx/mypy-zope/issues/91

```
tests/http/test_proxyagent.py:626: error: Statement is unreachable  [unreachable]
tests/http/test_proxyagent.py:762: error: Statement is unreachable  [unreachable]
tests/http/test_proxyagent.py:826: error: Statement is unreachable  [unreachable]
tests/http/test_proxyagent.py:838: error: Statement is unreachable  [unreachable]
tests/http/test_proxyagent.py:845: error: Statement is unreachable  [unreachable]
tests/http/federation/test_matrix_federation_agent.py:151: error: Statement is unreachable  [unreachable]
tests/http/federation/test_matrix_federation_agent.py:452: error: Statement is unreachable  [unreachable]
tests/logging/test_remote_handler.py:60: error: Statement is unreachable  [unreachable]
tests/logging/test_remote_handler.py:93: error: Statement is unreachable  [unreachable]
tests/logging/test_remote_handler.py:127: error: Statement is unreachable  [unreachable]
tests/logging/test_remote_handler.py:152: error: Statement is unreachable  [unreachable]
```

* Changelog

* Tweak DBAPI2 Protocol to be accepted by mypy 1.0

Some extra context in:
- https://github.com/matrix-org/python-canonicaljson/pull/57
- https://github.com/python/mypy/issues/6002
- https://mypy.readthedocs.io/en/latest/common_issues.html#covariant-subtyping-of-mutable-protocol-members-is-rejected

* Pull in updated canonicaljson lib

so the protocol check just works

* Improve comments in opentracing

I tried to workaround the ignores but found it too much trouble.

I think the corresponding issue is
https://github.com/python/mypy/issues/12909. The mypy repo has a PR
claiming to fix this (https://github.com/python/mypy/pull/14677) which
might mean this gets resolved soon?

* Better annotation for INTERACTIVE_AUTH_CHECKERS

* Drive-by AUTH_TYPE annotation, to remove an ignore
---
 changelog.d/15052.misc                             |  1 +
 poetry.lock                                        | 69 ++++++++++----------
 synapse/handlers/auth.py                           |  2 +-
 synapse/handlers/ui_auth/checkers.py               | 18 ++++--
 synapse/http/matrixfederationclient.py             |  2 +-
 synapse/logging/opentracing.py                     | 24 +++++--
 synapse/rest/media/v1/_base.py                     |  9 ++-
 synapse/storage/engines/__init__.py                |  4 +-
 synapse/storage/types.py                           | 74 ++++++++++++++++++----
 synapse/streams/__init__.py                        |  7 +-
 tests/handlers/test_register.py                    |  4 +-
 .../federation/test_matrix_federation_agent.py     | 11 ++--
 tests/http/test_proxyagent.py                      | 40 ++++++------
 tests/logging/test_remote_handler.py               | 17 ++---
 tests/rest/client/test_auth.py                     |  3 +
 tests/rest/client/test_third_party_rules.py        |  2 +-
 tests/utils.py                                     | 26 +++++++-
 17 files changed, 209 insertions(+), 104 deletions(-)
 create mode 100644 changelog.d/15052.misc

(limited to 'synapse')

diff --git a/changelog.d/15052.misc b/changelog.d/15052.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15052.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/poetry.lock b/poetry.lock
index e534b30d2b..eb1e3d797b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -146,14 +146,14 @@ css = ["tinycss2 (>=1.1.0,<1.2)"]
 
 [[package]]
 name = "canonicaljson"
-version = "1.6.4"
+version = "1.6.5"
 description = "Canonical JSON"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "canonicaljson-1.6.4-py3-none-any.whl", hash = "sha256:55d282853b4245dbcd953fe54c39b91571813d7c44e1dbf66e3c4f97ff134a48"},
-    {file = "canonicaljson-1.6.4.tar.gz", hash = "sha256:6c09b2119511f30eb1126cfcd973a10824e20f1cfd25039cde3d1218dd9c8d8f"},
+    {file = "canonicaljson-1.6.5-py3-none-any.whl", hash = "sha256:806ea6f2cbb7405d20259e1c36dd1214ba5c242fa9165f5bd0bf2081f82c23fb"},
+    {file = "canonicaljson-1.6.5.tar.gz", hash = "sha256:68dfc157b011e07d94bf74b5d4ccc01958584ed942d9dfd5fdd706609e81cd4b"},
 ]
 
 [package.dependencies]
@@ -1146,36 +1146,38 @@ files = [
 
 [[package]]
 name = "mypy"
-version = "0.981"
+version = "1.0.0"
 description = "Optional static typing for Python"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "mypy-0.981-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4bc460e43b7785f78862dab78674e62ec3cd523485baecfdf81a555ed29ecfa0"},
-    {file = "mypy-0.981-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:756fad8b263b3ba39e4e204ee53042671b660c36c9017412b43af210ddee7b08"},
-    {file = "mypy-0.981-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16a0145d6d7d00fbede2da3a3096dcc9ecea091adfa8da48fa6a7b75d35562d"},
-    {file = "mypy-0.981-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce65f70b14a21fdac84c294cde75e6dbdabbcff22975335e20827b3b94bdbf49"},
-    {file = "mypy-0.981-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e35d764784b42c3e256848fb8ed1d4292c9fc0098413adb28d84974c095b279"},
-    {file = "mypy-0.981-cp310-cp310-win_amd64.whl", hash = "sha256:e53773073c864d5f5cec7f3fc72fbbcef65410cde8cc18d4f7242dea60dac52e"},
-    {file = "mypy-0.981-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6ee196b1d10b8b215e835f438e06965d7a480f6fe016eddbc285f13955cca659"},
-    {file = "mypy-0.981-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad21d4c9d3673726cf986ea1d0c9fb66905258709550ddf7944c8f885f208be"},
-    {file = "mypy-0.981-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d1debb09043e1f5ee845fa1e96d180e89115b30e47c5d3ce53bc967bab53f62d"},
-    {file = "mypy-0.981-cp37-cp37m-win_amd64.whl", hash = "sha256:9f362470a3480165c4c6151786b5379351b790d56952005be18bdbdd4c7ce0ae"},
-    {file = "mypy-0.981-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c9e0efb95ed6ca1654951bd5ec2f3fa91b295d78bf6527e026529d4aaa1e0c30"},
-    {file = "mypy-0.981-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e178eaffc3c5cd211a87965c8c0df6da91ed7d258b5fc72b8e047c3771317ddb"},
-    {file = "mypy-0.981-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06e1eac8d99bd404ed8dd34ca29673c4346e76dd8e612ea507763dccd7e13c7a"},
-    {file = "mypy-0.981-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa38f82f53e1e7beb45557ff167c177802ba7b387ad017eab1663d567017c8ee"},
-    {file = "mypy-0.981-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:64e1f6af81c003f85f0dfed52db632817dabb51b65c0318ffbf5ff51995bbb08"},
-    {file = "mypy-0.981-cp38-cp38-win_amd64.whl", hash = "sha256:e1acf62a8c4f7c092462c738aa2c2489e275ed386320c10b2e9bff31f6f7e8d6"},
-    {file = "mypy-0.981-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6ede64e52257931315826fdbfc6ea878d89a965580d1a65638ef77cb551f56d"},
-    {file = "mypy-0.981-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb3978b191b9fa0488524bb4ffedf2c573340e8c2b4206fc191d44c7093abfb7"},
-    {file = "mypy-0.981-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f8fcf7b4b3cc0c74fb33ae54a4cd00bb854d65645c48beccf65fa10b17882c"},
-    {file = "mypy-0.981-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64d2ce043a209a297df322eb4054dfbaa9de9e8738291706eaafda81ab2b362"},
-    {file = "mypy-0.981-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2ee3dbc53d4df7e6e3b1c68ac6a971d3a4fb2852bf10a05fda228721dd44fae1"},
-    {file = "mypy-0.981-cp39-cp39-win_amd64.whl", hash = "sha256:8e8e49aa9cc23aa4c926dc200ce32959d3501c4905147a66ce032f05cb5ecb92"},
-    {file = "mypy-0.981-py3-none-any.whl", hash = "sha256:794f385653e2b749387a42afb1e14c2135e18daeb027e0d97162e4b7031210f8"},
-    {file = "mypy-0.981.tar.gz", hash = "sha256:ad77c13037d3402fbeffda07d51e3f228ba078d1c7096a73759c9419ea031bf4"},
+    {file = "mypy-1.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0626db16705ab9f7fa6c249c017c887baf20738ce7f9129da162bb3075fc1af"},
+    {file = "mypy-1.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ace23f6bb4aec4604b86c4843276e8fa548d667dbbd0cb83a3ae14b18b2db6c"},
+    {file = "mypy-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87edfaf344c9401942883fad030909116aa77b0fa7e6e8e1c5407e14549afe9a"},
+    {file = "mypy-1.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0ab090d9240d6b4e99e1fa998c2d0aa5b29fc0fb06bd30e7ad6183c95fa07593"},
+    {file = "mypy-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:7cc2c01dfc5a3cbddfa6c13f530ef3b95292f926329929001d45e124342cd6b7"},
+    {file = "mypy-1.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14d776869a3e6c89c17eb943100f7868f677703c8a4e00b3803918f86aafbc52"},
+    {file = "mypy-1.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb2782a036d9eb6b5a6efcdda0986774bf798beef86a62da86cb73e2a10b423d"},
+    {file = "mypy-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cfca124f0ac6707747544c127880893ad72a656e136adc935c8600740b21ff5"},
+    {file = "mypy-1.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8845125d0b7c57838a10fd8925b0f5f709d0e08568ce587cc862aacce453e3dd"},
+    {file = "mypy-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b1b9e1ed40544ef486fa8ac022232ccc57109f379611633ede8e71630d07d2"},
+    {file = "mypy-1.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c7cf862aef988b5fbaa17764ad1d21b4831436701c7d2b653156a9497d92c83c"},
+    {file = "mypy-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cd187d92b6939617f1168a4fe68f68add749902c010e66fe574c165c742ed88"},
+    {file = "mypy-1.0.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4e5175026618c178dfba6188228b845b64131034ab3ba52acaffa8f6c361f805"},
+    {file = "mypy-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2f6ac8c87e046dc18c7d1d7f6653a66787a4555085b056fe2d599f1f1a2a2d21"},
+    {file = "mypy-1.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7306edca1c6f1b5fa0bc9aa645e6ac8393014fa82d0fa180d0ebc990ebe15964"},
+    {file = "mypy-1.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3cfad08f16a9c6611e6143485a93de0e1e13f48cfb90bcad7d5fde1c0cec3d36"},
+    {file = "mypy-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67cced7f15654710386e5c10b96608f1ee3d5c94ca1da5a2aad5889793a824c1"},
+    {file = "mypy-1.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a86b794e8a56ada65c573183756eac8ac5b8d3d59daf9d5ebd72ecdbb7867a43"},
+    {file = "mypy-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:50979d5efff8d4135d9db293c6cb2c42260e70fb010cbc697b1311a4d7a39ddb"},
+    {file = "mypy-1.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3ae4c7a99e5153496243146a3baf33b9beff714464ca386b5f62daad601d87af"},
+    {file = "mypy-1.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e398652d005a198a7f3c132426b33c6b85d98aa7dc852137a2a3be8890c4072"},
+    {file = "mypy-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be78077064d016bc1b639c2cbcc5be945b47b4261a4f4b7d8923f6c69c5c9457"},
+    {file = "mypy-1.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92024447a339400ea00ac228369cd242e988dd775640755fa4ac0c126e49bb74"},
+    {file = "mypy-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fe523fcbd52c05040c7bee370d66fee8373c5972171e4fbc323153433198592d"},
+    {file = "mypy-1.0.0-py3-none-any.whl", hash = "sha256:2efa963bdddb27cb4a0d42545cd137a8d2b883bd181bbc4525b568ef6eca258f"},
+    {file = "mypy-1.0.0.tar.gz", hash = "sha256:f34495079c8d9da05b183f9f7daec2878280c2ad7cc81da686ef0b484cea2ecf"},
 ]
 
 [package.dependencies]
@@ -1186,6 +1188,7 @@ typing-extensions = ">=3.10"
 
 [package.extras]
 dmypy = ["psutil (>=4.0)"]
+install-types = ["pip"]
 python2 = ["typed-ast (>=1.4.0,<2)"]
 reports = ["lxml"]
 
@@ -1203,18 +1206,18 @@ files = [
 
 [[package]]
 name = "mypy-zope"
-version = "0.3.11"
+version = "0.9.0"
 description = "Plugin for mypy to support zope interfaces"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "mypy-zope-0.3.11.tar.gz", hash = "sha256:d4255f9f04d48c79083bbd4e2fea06513a6ac7b8de06f8c4ce563fd85142ca05"},
-    {file = "mypy_zope-0.3.11-py3-none-any.whl", hash = "sha256:ec080a6508d1f7805c8d2054f9fdd13c849742ce96803519e1fdfa3d3cab7140"},
+    {file = "mypy-zope-0.9.0.tar.gz", hash = "sha256:88bf6cd056e38b338e6956055958a7805b4ff84404ccd99e29883a3647a1aeb3"},
+    {file = "mypy_zope-0.9.0-py3-none-any.whl", hash = "sha256:e1bb4b57084f76ff8a154a3e07880a1af2ac6536c491dad4b143d529f72c5d15"},
 ]
 
 [package.dependencies]
-mypy = "0.981"
+mypy = "1.0.0"
 "zope.interface" = "*"
 "zope.schema" = "*"
 
@@ -1705,7 +1708,7 @@ files = [
 cffi = ">=1.4.1"
 
 [package.extras]
-docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
+docs = ["sphinx (>=1.6.5)", "sphinx_rtd_theme"]
 tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
 
 [[package]]
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 57a6854b1e..cf12b55d21 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -201,7 +201,7 @@ class AuthHandler:
         for auth_checker_class in INTERACTIVE_AUTH_CHECKERS:
             inst = auth_checker_class(hs)
             if inst.is_enabled():
-                self.checkers[inst.AUTH_TYPE] = inst  # type: ignore
+                self.checkers[inst.AUTH_TYPE] = inst
 
         self.bcrypt_rounds = hs.config.registration.bcrypt_rounds
 
diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py
index 332edcca24..78a75bfed6 100644
--- a/synapse/handlers/ui_auth/checkers.py
+++ b/synapse/handlers/ui_auth/checkers.py
@@ -13,7 +13,8 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Any
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any, ClassVar, Sequence, Type
 
 from twisted.web.client import PartialDownloadError
 
@@ -27,19 +28,28 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class UserInteractiveAuthChecker:
+class UserInteractiveAuthChecker(ABC):
     """Abstract base class for an interactive auth checker"""
 
-    def __init__(self, hs: "HomeServer"):
+    # This should really be an "abstract class property", i.e. it should
+    # be an error to instantiate a subclass that doesn't specify an AUTH_TYPE.
+    # But calling this a `ClassVar` is simpler than a decorator stack of
+    # @property @abstractmethod and @classmethod (if that's even the right order).
+    AUTH_TYPE: ClassVar[str]
+
+    def __init__(self, hs: "HomeServer"):  # noqa: B027
         pass
 
+    @abstractmethod
     def is_enabled(self) -> bool:
         """Check if the configuration of the homeserver allows this checker to work
 
         Returns:
             True if this login type is enabled.
         """
+        raise NotImplementedError()
 
+    @abstractmethod
     async def check_auth(self, authdict: dict, clientip: str) -> Any:
         """Given the authentication dict from the client, attempt to check this step
 
@@ -304,7 +314,7 @@ class RegistrationTokenAuthChecker(UserInteractiveAuthChecker):
             )
 
 
-INTERACTIVE_AUTH_CHECKERS = [
+INTERACTIVE_AUTH_CHECKERS: Sequence[Type[UserInteractiveAuthChecker]] = [
     DummyAuthChecker,
     TermsAuthChecker,
     RecaptchaAuthChecker,
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index b92f1d3d1a..312aab4dcc 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -1267,7 +1267,7 @@ class MatrixFederationHttpClient:
 def _flatten_response_never_received(e: BaseException) -> str:
     if hasattr(e, "reasons"):
         reasons = ", ".join(
-            _flatten_response_never_received(f.value) for f in e.reasons  # type: ignore[attr-defined]
+            _flatten_response_never_received(f.value) for f in e.reasons
         )
 
         return "%s:[%s]" % (type(e).__name__, reasons)
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 6c7cf1b294..5aed71262f 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -188,7 +188,7 @@ from typing import (
 )
 
 import attr
-from typing_extensions import ParamSpec
+from typing_extensions import Concatenate, ParamSpec
 
 from twisted.internet import defer
 from twisted.web.http import Request
@@ -445,7 +445,7 @@ def init_tracer(hs: "HomeServer") -> None:
         opentracing = None  # type: ignore[assignment]
         return
 
-    if not opentracing or not JaegerConfig:
+    if opentracing is None or JaegerConfig is None:
         raise ConfigError(
             "The server has been configured to use opentracing but opentracing is not "
             "installed."
@@ -872,7 +872,7 @@ def extract_text_map(carrier: Dict[str, str]) -> Optional["opentracing.SpanConte
 
 def _custom_sync_async_decorator(
     func: Callable[P, R],
-    wrapping_logic: Callable[[Callable[P, R], Any, Any], ContextManager[None]],
+    wrapping_logic: Callable[Concatenate[Callable[P, R], P], ContextManager[None]],
 ) -> Callable[P, R]:
     """
     Decorates a function that is sync or async (coroutines), or that returns a Twisted
@@ -902,10 +902,14 @@ def _custom_sync_async_decorator(
     """
 
     if inspect.iscoroutinefunction(func):
-
+        # In this branch, R = Awaitable[RInner], for some other type RInner
         @wraps(func)
-        async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+        async def _wrapper(
+            *args: P.args, **kwargs: P.kwargs
+        ) -> Any:  # Return type is RInner
             with wrapping_logic(func, *args, **kwargs):
+                # type-ignore: func() returns R, but mypy doesn't know that R is
+                # Awaitable here.
                 return await func(*args, **kwargs)  # type: ignore[misc]
 
     else:
@@ -972,7 +976,11 @@ def trace_with_opname(
         if not opentracing:
             return func
 
-        return _custom_sync_async_decorator(func, _wrapping_logic)
+        # type-ignore: mypy seems to be confused by the ParamSpecs here.
+        # I think the problem is https://github.com/python/mypy/issues/12909
+        return _custom_sync_async_decorator(
+            func, _wrapping_logic  # type: ignore[arg-type]
+        )
 
     return _decorator
 
@@ -1018,7 +1026,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]:
         set_tag(SynapseTags.FUNC_KWARGS, str(kwargs))
         yield
 
-    return _custom_sync_async_decorator(func, _wrapping_logic)
+    # type-ignore: mypy seems to be confused by the ParamSpecs here.
+    # I think the problem is https://github.com/python/mypy/issues/12909
+    return _custom_sync_async_decorator(func, _wrapping_logic)  # type: ignore[arg-type]
 
 
 @contextlib.contextmanager
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index d30878f704..6e035afcce 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -16,6 +16,7 @@
 import logging
 import os
 import urllib
+from abc import ABC, abstractmethod
 from types import TracebackType
 from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type
 
@@ -284,13 +285,14 @@ async def respond_with_responder(
     finish_request(request)
 
 
-class Responder:
+class Responder(ABC):
     """Represents a response that can be streamed to the requester.
 
     Responder is a context manager which *must* be used, so that any resources
     held can be cleaned up.
     """
 
+    @abstractmethod
     def write_to_consumer(self, consumer: IConsumer) -> Awaitable:
         """Stream response into consumer
 
@@ -300,11 +302,12 @@ class Responder:
         Returns:
             Resolves once the response has finished being written
         """
+        raise NotImplementedError()
 
-    def __enter__(self) -> None:
+    def __enter__(self) -> None:  # noqa: B027
         pass
 
-    def __exit__(
+    def __exit__(  # noqa: B027
         self,
         exc_type: Optional[Type[BaseException]],
         exc_val: Optional[BaseException],
diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py
index a182e8a098..d1ccb7390a 100644
--- a/synapse/storage/engines/__init__.py
+++ b/synapse/storage/engines/__init__.py
@@ -25,7 +25,7 @@ try:
 except ImportError:
 
     class PostgresEngine(BaseDatabaseEngine):  # type: ignore[no-redef]
-        def __new__(cls, *args: object, **kwargs: object) -> NoReturn:  # type: ignore[misc]
+        def __new__(cls, *args: object, **kwargs: object) -> NoReturn:
             raise RuntimeError(
                 f"Cannot create {cls.__name__} -- psycopg2 module is not installed"
             )
@@ -36,7 +36,7 @@ try:
 except ImportError:
 
     class Sqlite3Engine(BaseDatabaseEngine):  # type: ignore[no-redef]
-        def __new__(cls, *args: object, **kwargs: object) -> NoReturn:  # type: ignore[misc]
+        def __new__(cls, *args: object, **kwargs: object) -> NoReturn:
             raise RuntimeError(
                 f"Cannot create {cls.__name__} -- sqlite3 module is not installed"
             )
diff --git a/synapse/storage/types.py b/synapse/storage/types.py
index 0031df1e06..56a0048539 100644
--- a/synapse/storage/types.py
+++ b/synapse/storage/types.py
@@ -12,7 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from types import TracebackType
-from typing import Any, Iterator, List, Mapping, Optional, Sequence, Tuple, Type, Union
+from typing import (
+    Any,
+    Callable,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+)
 
 from typing_extensions import Protocol
 
@@ -112,15 +123,35 @@ class DBAPI2Module(Protocol):
     #   extends from this hierarchy. See
     #     https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3#exceptions
     #     https://www.postgresql.org/docs/current/errcodes-appendix.html#ERRCODES-TABLE
-    Warning: Type[Exception]
-    Error: Type[Exception]
+    #
+    # Note: rather than
+    #     x: T
+    # we write
+    #     @property
+    #     def x(self) -> T: ...
+    # which expresses that the protocol attribute `x` is read-only. The mypy docs
+    #     https://mypy.readthedocs.io/en/latest/common_issues.html#covariant-subtyping-of-mutable-protocol-members-is-rejected
+    # explain why this is necessary for safety. TL;DR: we shouldn't be able to write
+    # to `x`, only read from it. See also https://github.com/python/mypy/issues/6002 .
+    @property
+    def Warning(self) -> Type[Exception]:
+        ...
+
+    @property
+    def Error(self) -> Type[Exception]:
+        ...
 
     # Errors are divided into `InterfaceError`s (something went wrong in the database
     # driver) and `DatabaseError`s (something went wrong in the database). These are
     # both subclasses of `Error`, but we can't currently express this in type
     # annotations due to https://github.com/python/mypy/issues/8397
-    InterfaceError: Type[Exception]
-    DatabaseError: Type[Exception]
+    @property
+    def InterfaceError(self) -> Type[Exception]:
+        ...
+
+    @property
+    def DatabaseError(self) -> Type[Exception]:
+        ...
 
     # Everything below is a subclass of `DatabaseError`.
 
@@ -128,7 +159,9 @@ class DBAPI2Module(Protocol):
     # - An integer was too big for its data type.
     # - An invalid date time was provided.
     # - A string contained a null code point.
-    DataError: Type[Exception]
+    @property
+    def DataError(self) -> Type[Exception]:
+        ...
 
     # Roughly: something went wrong in the database, but it's not within the application
     # programmer's control. Examples:
@@ -138,28 +171,45 @@ class DBAPI2Module(Protocol):
     # - A serialisation failure occurred.
     # - The database ran out of resources, such as storage, memory, connections, etc.
     # - The database encountered an error from the operating system.
-    OperationalError: Type[Exception]
+    @property
+    def OperationalError(self) -> Type[Exception]:
+        ...
 
     # Roughly: we've given the database data which breaks a rule we asked it to enforce.
     # Examples:
     # - Stop, criminal scum! You violated the foreign key constraint
     # - Also check constraints, non-null constraints, etc.
-    IntegrityError: Type[Exception]
+    @property
+    def IntegrityError(self) -> Type[Exception]:
+        ...
 
     # Roughly: something went wrong within the database server itself.
-    InternalError: Type[Exception]
+    @property
+    def InternalError(self) -> Type[Exception]:
+        ...
 
     # Roughly: the application did something silly that needs to be fixed. Examples:
     # - We don't have permissions to do something.
     # - We tried to create a table with duplicate column names.
     # - We tried to use a reserved name.
     # - We referred to a column that doesn't exist.
-    ProgrammingError: Type[Exception]
+    @property
+    def ProgrammingError(self) -> Type[Exception]:
+        ...
 
     # Roughly: we've tried to do something that this database doesn't support.
-    NotSupportedError: Type[Exception]
+    @property
+    def NotSupportedError(self) -> Type[Exception]:
+        ...
 
-    def connect(self, **parameters: object) -> Connection:
+    # We originally wrote
+    # def connect(self, *args, **kwargs) -> Connection: ...
+    # But mypy doesn't seem to like that because sqlite3.connect takes a mandatory
+    # positional argument. We can't make that part of the signature though, because
+    # psycopg2.connect doesn't have a mandatory positional argument. Instead, we use
+    # the following slightly unusual workaround.
+    @property
+    def connect(self) -> Callable[..., Connection]:
         ...
 
 
diff --git a/synapse/streams/__init__.py b/synapse/streams/__init__.py
index c6c8a0315c..8a48ffc48d 100644
--- a/synapse/streams/__init__.py
+++ b/synapse/streams/__init__.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+from abc import ABC, abstractmethod
 from typing import Generic, List, Optional, Tuple, TypeVar
 
 from synapse.types import StrCollection, UserID
@@ -22,7 +22,8 @@ K = TypeVar("K")
 R = TypeVar("R")
 
 
-class EventSource(Generic[K, R]):
+class EventSource(ABC, Generic[K, R]):
+    @abstractmethod
     async def get_new_events(
         self,
         user: UserID,
@@ -32,4 +33,4 @@ class EventSource(Generic[K, R]):
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
     ) -> Tuple[List[R], K]:
-        ...
+        raise NotImplementedError()
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index 782ef09cf4..1db99b3c00 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -62,7 +62,7 @@ class TestSpamChecker:
         request_info: Collection[Tuple[str, str]],
         auth_provider_id: Optional[str],
     ) -> RegistrationBehaviour:
-        pass
+        return RegistrationBehaviour.ALLOW
 
 
 class DenyAll(TestSpamChecker):
@@ -111,7 +111,7 @@ class TestLegacyRegistrationSpamChecker:
         username: Optional[str],
         request_info: Collection[Tuple[str, str]],
     ) -> RegistrationBehaviour:
-        pass
+        return RegistrationBehaviour.ALLOW
 
 
 class LegacyAllowAll(TestLegacyRegistrationSpamChecker):
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index acfdcd3bca..d27422515c 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -63,7 +63,7 @@ from tests.http import (
     get_test_ca_cert_file,
 )
 from tests.server import FakeTransport, ThreadedMemoryReactorClock
-from tests.utils import default_config
+from tests.utils import checked_cast, default_config
 
 logger = logging.getLogger(__name__)
 
@@ -146,8 +146,10 @@ class MatrixFederationAgentTests(unittest.TestCase):
         #
         # Normally this would be done by the TCP socket code in Twisted, but we are
         # stubbing that out here.
-        client_protocol = client_factory.buildProtocol(dummy_address)
-        assert isinstance(client_protocol, _WrappingProtocol)
+        # NB: we use a checked_cast here to workaround https://github.com/Shoobx/mypy-zope/issues/91)
+        client_protocol = checked_cast(
+            _WrappingProtocol, client_factory.buildProtocol(dummy_address)
+        )
         client_protocol.makeConnection(
             FakeTransport(server_protocol, self.reactor, client_protocol)
         )
@@ -446,7 +448,6 @@ class MatrixFederationAgentTests(unittest.TestCase):
         server_ssl_protocol = _wrap_server_factory_for_tls(
             _get_test_protocol_factory()
         ).buildProtocol(dummy_address)
-        assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol)
 
         # Tell the HTTP server to send outgoing traffic back via the proxy's transport.
         proxy_server_transport = proxy_server.transport
@@ -1529,7 +1530,7 @@ def _check_logcontext(context: LoggingContextOrSentinel) -> None:
 
 def _wrap_server_factory_for_tls(
     factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None
-) -> IProtocolFactory:
+) -> TLSMemoryBIOFactory:
     """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory
     The resultant factory will create a TLS server which presents a certificate
     signed by our test CA, valid for the domains in `sanlist`
diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py
index a817940730..22fdc7f5f2 100644
--- a/tests/http/test_proxyagent.py
+++ b/tests/http/test_proxyagent.py
@@ -43,6 +43,7 @@ from tests.http import (
 )
 from tests.server import FakeTransport, ThreadedMemoryReactorClock
 from tests.unittest import TestCase
+from tests.utils import checked_cast
 
 logger = logging.getLogger(__name__)
 
@@ -620,7 +621,6 @@ class MatrixFederationAgentTests(TestCase):
         server_ssl_protocol = _wrap_server_factory_for_tls(
             _get_test_protocol_factory()
         ).buildProtocol(dummy_address)
-        assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol)
 
         # Tell the HTTP server to send outgoing traffic back via the proxy's transport.
         proxy_server_transport = proxy_server.transport
@@ -757,12 +757,14 @@ class MatrixFederationAgentTests(TestCase):
         assert isinstance(proxy_server, HTTPChannel)
 
         # fish the transports back out so that we can do the old switcheroo
-        s2c_transport = proxy_server.transport
-        assert isinstance(s2c_transport, FakeTransport)
-        client_protocol = s2c_transport.other
-        assert isinstance(client_protocol, _WrappingProtocol)
-        c2s_transport = client_protocol.transport
-        assert isinstance(c2s_transport, FakeTransport)
+        # To help mypy out with the various Protocols and wrappers and mocks, we do
+        # some explicit casting. Without the casts, we hit the bug I reported at
+        # https://github.com/Shoobx/mypy-zope/issues/91 .
+        # We also double-checked these casts at runtime (test-time) because I found it
+        # quite confusing to deduce these types in the first place!
+        s2c_transport = checked_cast(FakeTransport, proxy_server.transport)
+        client_protocol = checked_cast(_WrappingProtocol, s2c_transport.other)
+        c2s_transport = checked_cast(FakeTransport, client_protocol.transport)
 
         # the FakeTransport is async, so we need to pump the reactor
         self.reactor.advance(0)
@@ -822,9 +824,9 @@ class MatrixFederationAgentTests(TestCase):
     @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"})
     def test_proxy_with_no_scheme(self) -> None:
         http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
-        assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint)
-        self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com")
-        self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888)
+        proxy_ep = checked_cast(HostnameEndpoint, http_proxy_agent.http_proxy_endpoint)
+        self.assertEqual(proxy_ep._hostStr, "proxy.com")
+        self.assertEqual(proxy_ep._port, 8888)
 
     @patch.dict(os.environ, {"http_proxy": "socks://proxy.com:8888"})
     def test_proxy_with_unsupported_scheme(self) -> None:
@@ -834,25 +836,21 @@ class MatrixFederationAgentTests(TestCase):
     @patch.dict(os.environ, {"http_proxy": "http://proxy.com:8888"})
     def test_proxy_with_http_scheme(self) -> None:
         http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
-        assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint)
-        self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com")
-        self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888)
+        proxy_ep = checked_cast(HostnameEndpoint, http_proxy_agent.http_proxy_endpoint)
+        self.assertEqual(proxy_ep._hostStr, "proxy.com")
+        self.assertEqual(proxy_ep._port, 8888)
 
     @patch.dict(os.environ, {"http_proxy": "https://proxy.com:8888"})
     def test_proxy_with_https_scheme(self) -> None:
         https_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
-        assert isinstance(https_proxy_agent.http_proxy_endpoint, _WrapperEndpoint)
-        self.assertEqual(
-            https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._hostStr, "proxy.com"
-        )
-        self.assertEqual(
-            https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._port, 8888
-        )
+        proxy_ep = checked_cast(_WrapperEndpoint, https_proxy_agent.http_proxy_endpoint)
+        self.assertEqual(proxy_ep._wrappedEndpoint._hostStr, "proxy.com")
+        self.assertEqual(proxy_ep._wrappedEndpoint._port, 8888)
 
 
 def _wrap_server_factory_for_tls(
     factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None
-) -> IProtocolFactory:
+) -> TLSMemoryBIOFactory:
     """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory
 
     The resultant factory will create a TLS server which presents a certificate
diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py
index c08954d887..5191e31a8a 100644
--- a/tests/logging/test_remote_handler.py
+++ b/tests/logging/test_remote_handler.py
@@ -21,6 +21,7 @@ from synapse.logging import RemoteHandler
 from tests.logging import LoggerCleanupMixin
 from tests.server import FakeTransport, get_clock
 from tests.unittest import TestCase
+from tests.utils import checked_cast
 
 
 def connect_logging_client(
@@ -56,8 +57,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
         client, server = connect_logging_client(self.reactor, 0)
 
         # Trigger data being sent
-        assert isinstance(client.transport, FakeTransport)
-        client.transport.flush()
+        client_transport = checked_cast(FakeTransport, client.transport)
+        client_transport.flush()
 
         # One log message, with a single trailing newline
         logs = server.data.decode("utf8").splitlines()
@@ -89,8 +90,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
 
         # Allow the reconnection
         client, server = connect_logging_client(self.reactor, 0)
-        assert isinstance(client.transport, FakeTransport)
-        client.transport.flush()
+        client_transport = checked_cast(FakeTransport, client.transport)
+        client_transport.flush()
 
         # Only the 7 infos made it through, the debugs were elided
         logs = server.data.splitlines()
@@ -123,8 +124,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
 
         # Allow the reconnection
         client, server = connect_logging_client(self.reactor, 0)
-        assert isinstance(client.transport, FakeTransport)
-        client.transport.flush()
+        client_transport = checked_cast(FakeTransport, client.transport)
+        client_transport.flush()
 
         # The 10 warnings made it through, the debugs and infos were elided
         logs = server.data.splitlines()
@@ -148,8 +149,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
 
         # Allow the reconnection
         client, server = connect_logging_client(self.reactor, 0)
-        assert isinstance(client.transport, FakeTransport)
-        client.transport.flush()
+        client_transport = checked_cast(FakeTransport, client.transport)
+        client_transport.flush()
 
         # The first five and last five warnings made it through, the debugs and
         # infos were elided
diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py
index 208ec44829..f4e1e7de43 100644
--- a/tests/rest/client/test_auth.py
+++ b/tests/rest/client/test_auth.py
@@ -43,6 +43,9 @@ class DummyRecaptchaChecker(UserInteractiveAuthChecker):
         super().__init__(hs)
         self.recaptcha_attempts: List[Tuple[dict, str]] = []
 
+    def is_enabled(self) -> bool:
+        return True
+
     def check_auth(self, authdict: dict, clientip: str) -> Any:
         self.recaptcha_attempts.append((authdict, clientip))
         return succeed(True)
diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py
index 3325d43a2f..5fa3440691 100644
--- a/tests/rest/client/test_third_party_rules.py
+++ b/tests/rest/client/test_third_party_rules.py
@@ -425,7 +425,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         async def test_fn(
             event: EventBase, state_events: StateMap[EventBase]
         ) -> Tuple[bool, Optional[JsonDict]]:
-            if event.is_state and event.type == EventTypes.PowerLevels:
+            if event.is_state() and event.type == EventTypes.PowerLevels:
                 await api.create_and_send_event_into_room(
                     {
                         "room_id": event.room_id,
diff --git a/tests/utils.py b/tests/utils.py
index 15fabbc2d0..a0ac11bc5c 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -15,7 +15,7 @@
 
 import atexit
 import os
-from typing import Any, Callable, Dict, List, Tuple, Union, overload
+from typing import Any, Callable, Dict, List, Tuple, Type, TypeVar, Union, overload
 
 import attr
 from typing_extensions import Literal, ParamSpec
@@ -341,3 +341,27 @@ async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None:
     context = await unpersisted_context.persist(event)
 
     await persistence_store.persist_event(event, context)
+
+
+T = TypeVar("T")
+
+
+def checked_cast(type: Type[T], x: object) -> T:
+    """A version of typing.cast that is checked at runtime.
+
+    We have our own function for this for two reasons:
+
+    1. typing.cast itself is deliberately a no-op at runtime, see
+       https://docs.python.org/3/library/typing.html#typing.cast
+    2. To help workaround a mypy-zope bug https://github.com/Shoobx/mypy-zope/issues/91
+       where mypy would erroneously consider `isinstance(x, type)` to be false in all
+       circumstances.
+
+    For this to make sense, `T` needs to be something that `isinstance` can check; see
+        https://docs.python.org/3/library/functions.html?highlight=isinstance#isinstance
+        https://docs.python.org/3/glossary.html#term-abstract-base-class
+        https://docs.python.org/3/library/typing.html#typing.runtime_checkable
+    for more details.
+    """
+    assert isinstance(x, type)
+    return x
-- 
cgit 1.5.1


From 4f4f27e57fdab1d7cc6e275b8acabc785952205e Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 17 Feb 2023 09:40:32 +0000
Subject: Mitigate a race where /make_join could 403 for restricted rooms
 (#15080)

Previously, when creating a join event in /make_join, we would decide
whether to include additional fields to satisfy restricted room checks
based on the current state of the room. Then, when building the event,
we would capture the forward extremities of the room to use as prev
events.

This is subject to race conditions. For example, when leaving and
rejoining a room, the following sequence of events leads to a misleading
403 response:
1. /make_join reads the current state of the room and sees that the user
   is still in the room. It decides to omit the field required for
   restricted room joins.
2. The leave event is persisted and the room's forward extremities are
   updated.
3. /make_join builds the event, using the post-leave forward extremities.
   The event then fails the restricted room checks.

To mitigate the race, we move the read of the forward extremities closer
to the read of the current state. Ideally, we would compute the state
based off the chosen prev events, but that can involve state resolution,
which is expensive.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15080.bugfix       |  1 +
 synapse/handlers/federation.py | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15080.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15080.bugfix b/changelog.d/15080.bugfix
new file mode 100644
index 0000000000..965d0b921e
--- /dev/null
+++ b/changelog.d/15080.bugfix
@@ -0,0 +1 @@
+Reduce the likelihood of a rare race condition where rejoining a restricted room over federation would fail.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 1d0f6bcd6f..5f2057269d 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -952,7 +952,20 @@ class FederationHandler:
         #
         # Note that this requires the /send_join request to come back to the
         # same server.
+        prev_event_ids = None
         if room_version.msc3083_join_rules:
+            # Note that the room's state can change out from under us and render our
+            # nice join rules-conformant event non-conformant by the time we build the
+            # event. When this happens, our validation at the end fails and we respond
+            # to the requesting server with a 403, which is misleading — it indicates
+            # that the user is not allowed to join the room and the joining server
+            # should not bother retrying via this homeserver or any others, when
+            # in fact we've just messed up with building the event.
+            #
+            # To reduce the likelihood of this race, we capture the forward extremities
+            # of the room (prev_event_ids) just before fetching the current state, and
+            # hope that the state we fetch corresponds to the prev events we chose.
+            prev_event_ids = await self.store.get_prev_events_for_room(room_id)
             state_ids = await self._state_storage_controller.get_current_state_ids(
                 room_id
             )
@@ -994,7 +1007,8 @@ class FederationHandler:
                 event,
                 unpersisted_context,
             ) = await self.event_creation_handler.create_new_client_event(
-                builder=builder
+                builder=builder,
+                prev_event_ids=prev_event_ids,
             )
         except SynapseError as e:
             logger.warning("Failed to create join to %s because %s", room_id, e)
-- 
cgit 1.5.1


From 61bfcd669ae596a8df940f434e3e2335059100b1 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Fri, 17 Feb 2023 14:54:55 +0100
Subject: Add account data to export command (#14969)

* Add account data to to export command

* newsfile

* remove not needed function

* update newsfile

* adopt #14973
---
 changelog.d/14969.feature              |  1 +
 docs/usage/administration/admin_faq.md |  3 +++
 synapse/app/admin_cmd.py               | 15 ++++++++++-
 synapse/handlers/admin.py              | 49 +++++++++++++++++++++++-----------
 tests/handlers/test_admin.py           | 27 +++++++++++++++++++
 5 files changed, 79 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/14969.feature

(limited to 'synapse')

diff --git a/changelog.d/14969.feature b/changelog.d/14969.feature
new file mode 100644
index 0000000000..a4680ef9c8
--- /dev/null
+++ b/changelog.d/14969.feature
@@ -0,0 +1 @@
+Add account data to the command line [user data export tool](https://matrix-org.github.io/synapse/v1.78/usage/administration/admin_faq.html#how-can-i-export-user-data).
\ No newline at end of file
diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md
index 7a27741199..925e1d175e 100644
--- a/docs/usage/administration/admin_faq.md
+++ b/docs/usage/administration/admin_faq.md
@@ -71,6 +71,9 @@ output-directory
 │       ├───invite_state
 │       └───knock_state
 └───user_data
+    ├───account_data
+    │   ├───global
+    │   └───<room_id>
     ├───connections
     ├───devices
     └───profile
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index fe7afb9475..ad51f33165 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -17,7 +17,7 @@ import logging
 import os
 import sys
 import tempfile
-from typing import List, Optional
+from typing import List, Mapping, Optional
 
 from twisted.internet import defer, task
 
@@ -222,6 +222,19 @@ class FileExfiltrationWriter(ExfiltrationWriter):
             with open(connection_file, "a") as f:
                 print(json.dumps(connection), file=f)
 
+    def write_account_data(
+        self, file_name: str, account_data: Mapping[str, JsonDict]
+    ) -> None:
+        account_data_directory = os.path.join(
+            self.base_directory, "user_data", "account_data"
+        )
+        os.makedirs(account_data_directory, exist_ok=True)
+
+        account_data_file = os.path.join(account_data_directory, file_name)
+
+        with open(account_data_file, "a") as f:
+            print(json.dumps(account_data), file=f)
+
     def finished(self) -> str:
         return self.base_directory
 
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index b03c214b14..8b7760b2cc 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -14,7 +14,7 @@
 
 import abc
 import logging
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set
+from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Set
 
 from synapse.api.constants import Direction, Membership
 from synapse.events import EventBase
@@ -29,7 +29,7 @@ logger = logging.getLogger(__name__)
 
 class AdminHandler:
     def __init__(self, hs: "HomeServer"):
-        self.store = hs.get_datastores().main
+        self._store = hs.get_datastores().main
         self._device_handler = hs.get_device_handler()
         self._storage_controllers = hs.get_storage_controllers()
         self._state_storage_controller = self._storage_controllers.state
@@ -38,7 +38,7 @@ class AdminHandler:
     async def get_whois(self, user: UserID) -> JsonDict:
         connections = []
 
-        sessions = await self.store.get_user_ip_and_agents(user)
+        sessions = await self._store.get_user_ip_and_agents(user)
         for session in sessions:
             connections.append(
                 {
@@ -57,7 +57,7 @@ class AdminHandler:
 
     async def get_user(self, user: UserID) -> Optional[JsonDict]:
         """Function to get user details"""
-        user_info_dict = await self.store.get_user_by_id(user.to_string())
+        user_info_dict = await self._store.get_user_by_id(user.to_string())
         if user_info_dict is None:
             return None
 
@@ -89,11 +89,11 @@ class AdminHandler:
         }
 
         # Add additional user metadata
-        profile = await self.store.get_profileinfo(user.localpart)
-        threepids = await self.store.user_get_threepids(user.to_string())
+        profile = await self._store.get_profileinfo(user.localpart)
+        threepids = await self._store.user_get_threepids(user.to_string())
         external_ids = [
             ({"auth_provider": auth_provider, "external_id": external_id})
-            for auth_provider, external_id in await self.store.get_external_ids_by_user(
+            for auth_provider, external_id in await self._store.get_external_ids_by_user(
                 user.to_string()
             )
         ]
@@ -101,7 +101,7 @@ class AdminHandler:
         user_info_dict["avatar_url"] = profile.avatar_url
         user_info_dict["threepids"] = threepids
         user_info_dict["external_ids"] = external_ids
-        user_info_dict["erased"] = await self.store.is_user_erased(user.to_string())
+        user_info_dict["erased"] = await self._store.is_user_erased(user.to_string())
 
         return user_info_dict
 
@@ -117,7 +117,7 @@ class AdminHandler:
             The returned value is that returned by `writer.finished()`.
         """
         # Get all rooms the user is in or has been in
-        rooms = await self.store.get_rooms_for_local_user_where_membership_is(
+        rooms = await self._store.get_rooms_for_local_user_where_membership_is(
             user_id,
             membership_list=(
                 Membership.JOIN,
@@ -131,7 +131,7 @@ class AdminHandler:
         # We only try and fetch events for rooms the user has been in. If
         # they've been e.g. invited to a room without joining then we handle
         # those separately.
-        rooms_user_has_been_in = await self.store.get_rooms_user_has_been_in(user_id)
+        rooms_user_has_been_in = await self._store.get_rooms_user_has_been_in(user_id)
 
         for index, room in enumerate(rooms):
             room_id = room.room_id
@@ -140,7 +140,7 @@ class AdminHandler:
                 "[%s] Handling room %s, %d/%d", user_id, room_id, index + 1, len(rooms)
             )
 
-            forgotten = await self.store.did_forget(user_id, room_id)
+            forgotten = await self._store.did_forget(user_id, room_id)
             if forgotten:
                 logger.info("[%s] User forgot room %d, ignoring", user_id, room_id)
                 continue
@@ -152,14 +152,14 @@ class AdminHandler:
 
                 if room.membership == Membership.INVITE:
                     event_id = room.event_id
-                    invite = await self.store.get_event(event_id, allow_none=True)
+                    invite = await self._store.get_event(event_id, allow_none=True)
                     if invite:
                         invited_state = invite.unsigned["invite_room_state"]
                         writer.write_invite(room_id, invite, invited_state)
 
                 if room.membership == Membership.KNOCK:
                     event_id = room.event_id
-                    knock = await self.store.get_event(event_id, allow_none=True)
+                    knock = await self._store.get_event(event_id, allow_none=True)
                     if knock:
                         knock_state = knock.unsigned["knock_room_state"]
                         writer.write_knock(room_id, knock, knock_state)
@@ -170,7 +170,7 @@ class AdminHandler:
             # were joined. We estimate that point by looking at the
             # stream_ordering of the last membership if it wasn't a join.
             if room.membership == Membership.JOIN:
-                stream_ordering = self.store.get_room_max_stream_ordering()
+                stream_ordering = self._store.get_room_max_stream_ordering()
             else:
                 stream_ordering = room.stream_ordering
 
@@ -197,7 +197,7 @@ class AdminHandler:
             # events that we have and then filtering, this isn't the most
             # efficient method perhaps but it does guarantee we get everything.
             while True:
-                events, _ = await self.store.paginate_room_events(
+                events, _ = await self._store.paginate_room_events(
                     room_id, from_key, to_key, limit=100, direction=Direction.FORWARDS
                 )
                 if not events:
@@ -263,6 +263,13 @@ class AdminHandler:
             connections["devices"][""]["sessions"][0]["connections"]
         )
 
+        # Get all account data the user has global and in rooms
+        global_data = await self._store.get_global_account_data_for_user(user_id)
+        by_room_data = await self._store.get_room_account_data_for_user(user_id)
+        writer.write_account_data("global", global_data)
+        for room_id in by_room_data:
+            writer.write_account_data(room_id, by_room_data[room_id])
+
         return writer.finished()
 
 
@@ -340,6 +347,18 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
         """
         raise NotImplementedError()
 
+    @abc.abstractmethod
+    def write_account_data(
+        self, file_name: str, account_data: Mapping[str, JsonDict]
+    ) -> None:
+        """Write the account data of a user.
+
+        Args:
+            file_name: file name to write data
+            account_data: mapping of global or room account_data
+        """
+        raise NotImplementedError()
+
     @abc.abstractmethod
     def finished(self) -> Any:
         """Called when all data has successfully been exported and written.
diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py
index 6f300b8e11..1b97aaeed1 100644
--- a/tests/handlers/test_admin.py
+++ b/tests/handlers/test_admin.py
@@ -296,3 +296,30 @@ class ExfiltrateData(unittest.HomeserverTestCase):
         self.assertEqual(args[0][0]["user_agent"], "user_agent")
         self.assertGreater(args[0][0]["last_seen"], 0)
         self.assertNotIn("access_token", args[0][0])
+
+    def test_account_data(self) -> None:
+        """Tests that user account data get exported."""
+        # add account data
+        self.get_success(
+            self._store.add_account_data_for_user(self.user2, "m.global", {"a": 1})
+        )
+        self.get_success(
+            self._store.add_account_data_to_room(
+                self.user2, "test_room", "m.per_room", {"b": 2}
+            )
+        )
+
+        writer = Mock()
+
+        self.get_success(self.admin_handler.export_user_data(self.user2, writer))
+
+        # two calls, one call for user data and one call for room data
+        writer.write_account_data.assert_called()
+
+        args = writer.write_account_data.call_args_list[0][0]
+        self.assertEqual(args[0], "global")
+        self.assertEqual(args[1]["m.global"]["a"], 1)
+
+        args = writer.write_account_data.call_args_list[1][0]
+        self.assertEqual(args[0], "test_room")
+        self.assertEqual(args[1]["m.per_room"]["b"], 2)
-- 
cgit 1.5.1


From 1cbc3f197cc1b9732649ffb769b05d90c0e904d7 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 20 Feb 2023 12:00:18 +0000
Subject: Fix a bug introduced in Synapse v1.74.0 where searching with colons
 when using ICU for search term tokenisation would fail with an error.
 (#15079)

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/15079.bugfix                         |  1 +
 synapse/storage/databases/main/user_directory.py | 24 +++++++--
 tests/handlers/test_user_directory.py            |  7 +++
 tests/storage/test_user_directory.py             | 63 +++++++++++++++++++++++-
 4 files changed, 90 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/15079.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15079.bugfix b/changelog.d/15079.bugfix
new file mode 100644
index 0000000000..907892c1ef
--- /dev/null
+++ b/changelog.d/15079.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse v1.74.0 where searching with colons when using ICU for search term tokenisation would fail with an error.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index f6a6fd4079..30af4b3b6c 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -918,11 +918,19 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]:
     We use this so that we can add prefix matching, which isn't something
     that is supported by default.
     """
-    results = _parse_words(search_term)
+    escaped_words = []
+    for word in _parse_words(search_term):
+        # Postgres tsvector and tsquery quoting rules:
+        # words potentially containing punctuation should be quoted
+        # and then existing quotes and backslashes should be doubled
+        # See: https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY
+
+        quoted_word = word.replace("'", "''").replace("\\", "\\\\")
+        escaped_words.append(f"'{quoted_word}'")
 
-    both = " & ".join("(%s:* | %s)" % (result, result) for result in results)
-    exact = " & ".join("%s" % (result,) for result in results)
-    prefix = " & ".join("%s:*" % (result,) for result in results)
+    both = " & ".join("(%s:* | %s)" % (word, word) for word in escaped_words)
+    exact = " & ".join("%s" % (word,) for word in escaped_words)
+    prefix = " & ".join("%s:*" % (word,) for word in escaped_words)
 
     return both, exact, prefix
 
@@ -944,6 +952,14 @@ def _parse_words(search_term: str) -> List[str]:
     if USE_ICU:
         return _parse_words_with_icu(search_term)
 
+    return _parse_words_with_regex(search_term)
+
+
+def _parse_words_with_regex(search_term: str) -> List[str]:
+    """
+    Break down search term into words, when we don't have ICU available.
+    See: `_parse_words`
+    """
     return re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 
 
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index f65a68b9c2..a02c1c6227 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -192,6 +192,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         self.helper.join(room, self.appservice.sender, tok=self.appservice.token)
         self._check_only_one_user_in_directory(user, room)
 
+    def test_search_term_with_colon_in_it_does_not_raise(self) -> None:
+        """
+        Regression test: Test that search terms with colons in them are acceptable.
+        """
+        u1 = self.register_user("user1", "pass")
+        self.get_success(self.handler.search_users(u1, "haha:paamayim-nekudotayim", 10))
+
     def test_user_not_in_users_table(self) -> None:
         """Unclear how it happens, but on matrix.org we've seen join events
         for users who aren't in the users table. Test that we don't fall over
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py
index f1ca523d23..2d169684cf 100644
--- a/tests/storage/test_user_directory.py
+++ b/tests/storage/test_user_directory.py
@@ -25,6 +25,11 @@ from synapse.rest.client import login, register, room
 from synapse.server import HomeServer
 from synapse.storage import DataStore
 from synapse.storage.background_updates import _BackgroundUpdateHandler
+from synapse.storage.databases.main import user_directory
+from synapse.storage.databases.main.user_directory import (
+    _parse_words_with_icu,
+    _parse_words_with_regex,
+)
 from synapse.storage.roommember import ProfileInfo
 from synapse.util import Clock
 
@@ -42,7 +47,7 @@ ALICE = "@alice:a"
 BOB = "@bob:b"
 BOBBY = "@bobby:a"
 # The localpart isn't 'Bela' on purpose so we can test looking up display names.
-BELA = "@somenickname:a"
+BELA = "@somenickname:example.org"
 
 
 class GetUserDirectoryTables:
@@ -423,6 +428,8 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
 
 
 class UserDirectoryStoreTestCase(HomeserverTestCase):
+    use_icu = False
+
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.store = hs.get_datastores().main
 
@@ -434,6 +441,12 @@ class UserDirectoryStoreTestCase(HomeserverTestCase):
         self.get_success(self.store.update_profile_in_user_dir(BELA, "Bela", None))
         self.get_success(self.store.add_users_in_public_rooms("!room:id", (ALICE, BOB)))
 
+        self._restore_use_icu = user_directory.USE_ICU
+        user_directory.USE_ICU = self.use_icu
+
+    def tearDown(self) -> None:
+        user_directory.USE_ICU = self._restore_use_icu
+
     def test_search_user_dir(self) -> None:
         # normally when alice searches the directory she should just find
         # bob because bobby doesn't share a room with her.
@@ -478,6 +491,26 @@ class UserDirectoryStoreTestCase(HomeserverTestCase):
             {"user_id": BELA, "display_name": "Bela", "avatar_url": None},
         )
 
+    @override_config({"user_directory": {"search_all_users": True}})
+    def test_search_user_dir_start_of_user_id(self) -> None:
+        """Tests that a user can look up another user by searching for the start
+        of their user ID.
+        """
+        r = self.get_success(self.store.search_user_dir(ALICE, "somenickname:exa", 10))
+        self.assertFalse(r["limited"])
+        self.assertEqual(1, len(r["results"]))
+        self.assertDictEqual(
+            r["results"][0],
+            {"user_id": BELA, "display_name": "Bela", "avatar_url": None},
+        )
+
+
+class UserDirectoryStoreTestCaseWithIcu(UserDirectoryStoreTestCase):
+    use_icu = True
+
+    if not icu:
+        skip = "Requires PyICU"
+
 
 class UserDirectoryICUTestCase(HomeserverTestCase):
     if not icu:
@@ -513,3 +546,31 @@ class UserDirectoryICUTestCase(HomeserverTestCase):
             r["results"][0],
             {"user_id": ALICE, "display_name": display_name, "avatar_url": None},
         )
+
+    def test_icu_word_boundary_punctuation(self) -> None:
+        """
+        Tests the behaviour of punctuation with the ICU tokeniser.
+
+        Seems to depend on underlying version of ICU.
+        """
+
+        # Note: either tokenisation is fine, because Postgres actually splits
+        # words itself afterwards.
+        self.assertIn(
+            _parse_words_with_icu("lazy'fox jumped:over the.dog"),
+            (
+                # ICU 66 on Ubuntu 20.04
+                ["lazy'fox", "jumped", "over", "the", "dog"],
+                # ICU 70 on Ubuntu 22.04
+                ["lazy'fox", "jumped:over", "the.dog"],
+            ),
+        )
+
+    def test_regex_word_boundary_punctuation(self) -> None:
+        """
+        Tests the behaviour of punctuation with the non-ICU tokeniser
+        """
+        self.assertEqual(
+            _parse_words_with_regex("lazy'fox jumped:over the.dog"),
+            ["lazy", "fox", "jumped", "over", "the", "dog"],
+        )
-- 
cgit 1.5.1


From 490a3675bd7225b5695e505fea225d7c30127551 Mon Sep 17 00:00:00 2001
From: realtyem <realtyem@gmail.com>
Date: Mon, 20 Feb 2023 06:23:00 -0600
Subject: Allow health listener resource to load (#15096)

* Allow health listener resource to load.

* changelog

* Update changelog.d/15096.bugfix
---
 changelog.d/15096.bugfix | 1 +
 synapse/config/server.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/15096.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15096.bugfix b/changelog.d/15096.bugfix
new file mode 100644
index 0000000000..09b4d861f8
--- /dev/null
+++ b/changelog.d/15096.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.76 where workers would fail to start if the `health` listener was configured.
diff --git a/synapse/config/server.py b/synapse/config/server.py
index ecdaa2d9dd..d4ef9930b0 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -177,6 +177,7 @@ KNOWN_RESOURCES = {
     "client",
     "consent",
     "federation",
+    "health",
     "keys",
     "media",
     "metrics",
-- 
cgit 1.5.1


From e26d7d5ae786df8d9d9a4dbd0f734e5c2f08aafd Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 20 Feb 2023 13:35:24 +0000
Subject: Teach portdb about `un_partial_stated_event_stream` (#15108)

* Sort BOOLEAN_COLUMNS and APPEND_ONLY_TABLES

So I can see if a given table is present in logarithmic time, rather
than linear.

* Teach portdb about `un_partial_stated_event_streams`

* Comments comments comments

* Changelog
---
 changelog.d/15108.bugfix            |  1 +
 synapse/_scripts/synapse_port_db.py | 85 +++++++++++++++++++++++--------------
 2 files changed, 53 insertions(+), 33 deletions(-)
 create mode 100644 changelog.d/15108.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15108.bugfix b/changelog.d/15108.bugfix
new file mode 100644
index 0000000000..30af8b439d
--- /dev/null
+++ b/changelog.d/15108.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.75 where the [portdb script](https://matrix-org.github.io/synapse/release-v1.78/postgres.html#porting-from-sqlite) would fail to run after a room had been faster-joined.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 5e137dbbf7..0d35e0af8f 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -94,61 +94,80 @@ reactor = cast(ISynapseReactor, reactor_)
 logger = logging.getLogger("synapse_port_db")
 
 
+# SQLite doesn't have a dedicated boolean type (it stores True/False as 1/0). This means
+# portdb will read sqlite bools as integers, then try to insert them into postgres
+# boolean columns---which fails. Lacking some Python-parseable metaschema, we must
+# specify which integer columns should be inserted as booleans into postgres.
 BOOLEAN_COLUMNS = {
-    "events": ["processed", "outlier", "contains_url"],
-    "rooms": ["is_public", "has_auth_chain_index"],
+    "access_tokens": ["used"],
+    "account_validity": ["email_sent"],
+    "device_lists_changes_in_room": ["converted_to_destinations"],
+    "device_lists_outbound_pokes": ["sent"],
+    "devices": ["hidden"],
+    "e2e_fallback_keys_json": ["used"],
+    "e2e_room_keys": ["is_verified"],
     "event_edges": ["is_state"],
+    "events": ["processed", "outlier", "contains_url"],
+    "local_media_repository": ["safe_from_quarantine"],
     "presence_list": ["accepted"],
     "presence_stream": ["currently_active"],
     "public_room_list_stream": ["visibility"],
-    "devices": ["hidden"],
-    "device_lists_outbound_pokes": ["sent"],
-    "users_who_share_rooms": ["share_private"],
-    "e2e_room_keys": ["is_verified"],
-    "account_validity": ["email_sent"],
+    "pushers": ["enabled"],
     "redactions": ["have_censored"],
     "room_stats_state": ["is_federatable"],
-    "local_media_repository": ["safe_from_quarantine"],
+    "rooms": ["is_public", "has_auth_chain_index"],
     "users": ["shadow_banned", "approved"],
-    "e2e_fallback_keys_json": ["used"],
-    "access_tokens": ["used"],
-    "device_lists_changes_in_room": ["converted_to_destinations"],
-    "pushers": ["enabled"],
+    "un_partial_stated_event_stream": ["rejection_status_changed"],
+    "users_who_share_rooms": ["share_private"],
 }
 
 
+# These tables are never deleted from in normal operation [*], so we can resume porting
+# over rows from a previous attempt rather than starting from scratch.
+#
+# [*]: We do delete from many of these tables when purging a room, and
+#      presumably when purging old events. So we might e.g.
+#
+#      1. Run portdb and port half of some table.
+#      2. Stop portdb.
+#      3. Purge something, deleting some of the rows we've ported over.
+#      4. Restart portdb. The rows deleted from sqlite are still present in postgres.
+#
+#      But this isn't the end of the world: we should be able to repeat the purge
+#      on the postgres DB when porting completes.
 APPEND_ONLY_TABLES = [
+    "cache_invalidation_stream_by_instance",
+    "event_auth",
+    "event_edges",
+    "event_json",
     "event_reference_hashes",
+    "event_search",
+    "event_to_state_groups",
     "events",
-    "event_json",
-    "state_events",
-    "room_memberships",
-    "topics",
-    "room_names",
-    "rooms",
+    "ex_outlier_stream",
     "local_media_repository",
     "local_media_repository_thumbnails",
+    "presence_stream",
+    "public_room_list_stream",
+    "push_rules_stream",
+    "received_transactions",
+    "redactions",
+    "rejections",
     "remote_media_cache",
     "remote_media_cache_thumbnails",
-    "redactions",
-    "event_edges",
-    "event_auth",
-    "received_transactions",
+    "room_memberships",
+    "room_names",
+    "rooms",
     "sent_transactions",
-    "transaction_id_to_pdu",
-    "users",
+    "state_events",
+    "state_group_edges",
     "state_groups",
     "state_groups_state",
-    "event_to_state_groups",
-    "rejections",
-    "event_search",
-    "presence_stream",
-    "push_rules_stream",
-    "ex_outlier_stream",
-    "cache_invalidation_stream_by_instance",
-    "public_room_list_stream",
-    "state_group_edges",
     "stream_ordering_to_exterm",
+    "topics",
+    "transaction_id_to_pdu",
+    "un_partial_stated_event_stream",
+    "users",
 ]
 
 
-- 
cgit 1.5.1


From addd12f16dc35a4f82cb48807719909e7aed9dcb Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 21 Feb 2023 12:26:00 +0000
Subject: Tweak logging for when a worker waits for its view of a replication
 stream to catch up. (#15120)Co-authored-by: Sean Quah
 <8349537+squahtx@users.noreply.github.com>

* Improve logging messages for the 'wait for repl stream' read-after-write consistency feature

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Update synapse/replication/tcp/client.py

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/15120.misc            |  1 +
 synapse/replication/tcp/client.py | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15120.misc

(limited to 'synapse')

diff --git a/changelog.d/15120.misc b/changelog.d/15120.misc
new file mode 100644
index 0000000000..ebbc0c9027
--- /dev/null
+++ b/changelog.d/15120.misc
@@ -0,0 +1 @@
+Tweak logging for when a worker waits for its view of a replication stream to catch up.
\ No newline at end of file
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index cc0528bd8e..424854efbe 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -370,15 +370,23 @@ class ReplicationDataHandler:
         # We measure here to get in flight counts and average waiting time.
         with Measure(self._clock, "repl.wait_for_stream_position"):
             logger.info(
-                "Waiting for repl stream %r to reach %s (%s)",
+                "Waiting for repl stream %r to reach %s (%s); currently at: %s",
                 stream_name,
                 position,
                 instance_name,
+                current_position,
             )
             try:
                 await make_deferred_yieldable(deferred)
             except defer.TimeoutError:
-                logger.error("Timed out waiting for stream %s", stream_name)
+                logger.error(
+                    "Timed out waiting for repl stream %r to reach %s (%s)"
+                    "; currently at: %s",
+                    stream_name,
+                    position,
+                    instance_name,
+                    self._streams[stream_name].current_token(instance_name),
+                )
                 return
 
             logger.info(
-- 
cgit 1.5.1


From 647ff3ef65e7a54b2719755802b4e6f2f45f5eb6 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 22 Feb 2023 11:07:28 +0000
Subject: Remove unused `room_alias` field from `/createRoom` response (#15093)

* Change `create_room` return type

* Don't return room alias from /createRoom

* Update other callsites

* Fix up mypy complaints

It looks like new_room_user_id is None iff new_room_id is None. It's a
shame we haven't expressed this in a way that mypy can understand.

* Changelog
---
 changelog.d/15093.bugfix                         |  1 +
 synapse/handlers/register.py                     |  4 +--
 synapse/handlers/room.py                         | 38 ++++++++++++------------
 synapse/module_api/__init__.py                   |  6 ++--
 synapse/rest/client/room.py                      |  4 +--
 synapse/server_notices/server_notices_manager.py |  3 +-
 tests/storage/test_cleanup_extrems.py            |  8 ++---
 tests/storage/test_event_metrics.py              |  3 +-
 tests/storage/test_receipts.py                   | 10 ++++---
 tests/test_federation.py                         |  2 +-
 10 files changed, 40 insertions(+), 39 deletions(-)
 create mode 100644 changelog.d/15093.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15093.bugfix b/changelog.d/15093.bugfix
new file mode 100644
index 0000000000..00f1c19391
--- /dev/null
+++ b/changelog.d/15093.bugfix
@@ -0,0 +1 @@
+Remove the unspecced `room_alias` field from the [`/createRoom`](https://spec.matrix.org/v1.6/client-server-api/#post_matrixclientv3createroom) response.
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index c611efb760..e4e506e62c 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -476,7 +476,7 @@ class RegistrationHandler:
                     # create room expects the localpart of the room alias
                     config["room_alias_name"] = room_alias.localpart
 
-                    info, _ = await room_creation_handler.create_room(
+                    room_id, _, _ = await room_creation_handler.create_room(
                         fake_requester,
                         config=config,
                         ratelimit=False,
@@ -490,7 +490,7 @@ class RegistrationHandler:
                                 user_id, authenticated_entity=self._server_name
                             ),
                             target=UserID.from_string(user_id),
-                            room_id=info["room_id"],
+                            room_id=room_id,
                             # Since it was just created, there are no remote hosts.
                             remote_room_hosts=[],
                             action="join",
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 837dabb3b7..37c87c8351 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -690,13 +690,14 @@ class RoomCreationHandler:
         config: JsonDict,
         ratelimit: bool = True,
         creator_join_profile: Optional[JsonDict] = None,
-    ) -> Tuple[dict, int]:
+    ) -> Tuple[str, Optional[RoomAlias], int]:
         """Creates a new room.
 
         Args:
-            requester:
-                The user who requested the room creation.
-            config : A dict of configuration options.
+            requester: The user who requested the room creation.
+            config: A dict of configuration options. This will be the body of
+                a /createRoom request; see
+                https://spec.matrix.org/latest/client-server-api/#post_matrixclientv3createroom
             ratelimit: set to False to disable the rate limiter
 
             creator_join_profile:
@@ -707,14 +708,17 @@ class RoomCreationHandler:
                 `avatar_url` and/or `displayname`.
 
         Returns:
-                First, a dict containing the keys `room_id` and, if an alias
-                was, requested, `room_alias`. Secondly, the stream_id of the
-                last persisted event.
+            A 3-tuple containing:
+                - the room ID;
+                - if requested, the room alias, otherwise None; and
+                - the `stream_id` of the last persisted event.
         Raises:
-            SynapseError if the room ID couldn't be stored, 3pid invitation config
-            validation failed, or something went horribly wrong.
-            ResourceLimitError if server is blocked to some resource being
-            exceeded
+            SynapseError:
+                if the room ID couldn't be stored, 3pid invitation config
+                validation failed, or something went horribly wrong.
+            ResourceLimitError:
+                if server is blocked to some resource being
+                exceeded
         """
         user_id = requester.user.to_string()
 
@@ -1024,11 +1028,6 @@ class RoomCreationHandler:
             last_sent_event_id = member_event_id
             depth += 1
 
-        result = {"room_id": room_id}
-
-        if room_alias:
-            result["room_alias"] = room_alias.to_string()
-
         # Always wait for room creation to propagate before returning
         await self._replication.wait_for_stream_position(
             self.hs.config.worker.events_shard_config.get_instance(room_id),
@@ -1036,7 +1035,7 @@ class RoomCreationHandler:
             last_stream_id,
         )
 
-        return result, last_stream_id
+        return room_id, room_alias, last_stream_id
 
     async def _send_events_for_new_room(
         self,
@@ -1825,7 +1824,7 @@ class RoomShutdownHandler:
                 new_room_user_id, authenticated_entity=requester_user_id
             )
 
-            info, stream_id = await self._room_creation_handler.create_room(
+            new_room_id, _, stream_id = await self._room_creation_handler.create_room(
                 room_creator_requester,
                 config={
                     "preset": RoomCreationPreset.PUBLIC_CHAT,
@@ -1834,7 +1833,6 @@ class RoomShutdownHandler:
                 },
                 ratelimit=False,
             )
-            new_room_id = info["room_id"]
 
             logger.info(
                 "Shutting down room %r, joining to new room: %r", room_id, new_room_id
@@ -1887,6 +1885,7 @@ class RoomShutdownHandler:
 
                 # Join users to new room
                 if new_room_user_id:
+                    assert new_room_id is not None
                     await self.room_member_handler.update_membership(
                         requester=target_requester,
                         target=target_requester.user,
@@ -1919,6 +1918,7 @@ class RoomShutdownHandler:
 
             aliases_for_room = await self.store.get_aliases_for_room(room_id)
 
+            assert new_room_id is not None
             await self.store.update_aliases_for_room(
                 room_id, new_room_id, requester_user_id
             )
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index d22dd19d38..1964276a54 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -1576,14 +1576,14 @@ class ModuleApi:
             )
 
         requester = create_requester(user_id)
-        room_id_and_alias, _ = await self._hs.get_room_creation_handler().create_room(
+        room_id, room_alias, _ = await self._hs.get_room_creation_handler().create_room(
             requester=requester,
             config=config,
             ratelimit=ratelimit,
             creator_join_profile=creator_join_profile,
         )
-
-        return room_id_and_alias["room_id"], room_id_and_alias.get("room_alias", None)
+        room_alias_str = room_alias.to_string() if room_alias else None
+        return room_id, room_alias_str
 
     async def set_displayname(
         self,
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index d0db85cca7..14b04810a1 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -160,11 +160,11 @@ class RoomCreateRestServlet(TransactionRestServlet):
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
-        info, _ = await self._room_creation_handler.create_room(
+        room_id, _, _ = await self._room_creation_handler.create_room(
             requester, self.get_room_config(request)
         )
 
-        return 200, info
+        return 200, {"room_id": room_id}
 
     def get_room_config(self, request: Request) -> JsonDict:
         user_supplied_config = parse_json_object_from_request(request)
diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py
index 564e3705c2..9732dbdb6e 100644
--- a/synapse/server_notices/server_notices_manager.py
+++ b/synapse/server_notices/server_notices_manager.py
@@ -178,7 +178,7 @@ class ServerNoticesManager:
                 "avatar_url": self._config.servernotices.server_notices_mxid_avatar_url,
             }
 
-        info, _ = await self._room_creation_handler.create_room(
+        room_id, _, _ = await self._room_creation_handler.create_room(
             requester,
             config={
                 "preset": RoomCreationPreset.PRIVATE_CHAT,
@@ -188,7 +188,6 @@ class ServerNoticesManager:
             ratelimit=False,
             creator_join_profile=join_profile,
         )
-        room_id = info["room_id"]
 
         self.maybe_get_notice_room_for_user.invalidate((user_id,))
 
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
index d570684c99..7de109966d 100644
--- a/tests/storage/test_cleanup_extrems.py
+++ b/tests/storage/test_cleanup_extrems.py
@@ -43,8 +43,9 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         # Create a test user and room
         self.user = UserID("alice", "test")
         self.requester = create_requester(self.user)
-        info, _ = self.get_success(self.room_creator.create_room(self.requester, {}))
-        self.room_id = info["room_id"]
+        self.room_id, _, _ = self.get_success(
+            self.room_creator.create_room(self.requester, {})
+        )
 
     def run_background_update(self) -> None:
         """Re run the background update to clean up the extremities."""
@@ -275,10 +276,9 @@ class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
         self.user = UserID.from_string(self.register_user("user1", "password"))
         self.token1 = self.login("user1", "password")
         self.requester = create_requester(self.user)
-        info, _ = self.get_success(
+        self.room_id, _, _ = self.get_success(
             self.room_creator.create_room(self.requester, {"visibility": "public"})
         )
-        self.room_id = info["room_id"]
         self.event_creator = homeserver.get_event_creation_handler()
         homeserver.config.consent.user_consent_version = self.CONSENT_VERSION
 
diff --git a/tests/storage/test_event_metrics.py b/tests/storage/test_event_metrics.py
index a91411168c..6897addbd3 100644
--- a/tests/storage/test_event_metrics.py
+++ b/tests/storage/test_event_metrics.py
@@ -33,8 +33,7 @@ class ExtremStatisticsTestCase(HomeserverTestCase):
         events = [(3, 2), (6, 2), (4, 6)]
 
         for event_count, extrems in events:
-            info, _ = self.get_success(room_creator.create_room(requester, {}))
-            room_id = info["room_id"]
+            room_id, _, _ = self.get_success(room_creator.create_room(requester, {}))
 
             last_event = None
 
diff --git a/tests/storage/test_receipts.py b/tests/storage/test_receipts.py
index 12c17f1073..1b52eef23f 100644
--- a/tests/storage/test_receipts.py
+++ b/tests/storage/test_receipts.py
@@ -50,12 +50,14 @@ class ReceiptTestCase(HomeserverTestCase):
         self.otherRequester = create_requester(self.otherUser)
 
         # Create a test room
-        info, _ = self.get_success(self.room_creator.create_room(self.ourRequester, {}))
-        self.room_id1 = info["room_id"]
+        self.room_id1, _, _ = self.get_success(
+            self.room_creator.create_room(self.ourRequester, {})
+        )
 
         # Create a second test room
-        info, _ = self.get_success(self.room_creator.create_room(self.ourRequester, {}))
-        self.room_id2 = info["room_id"]
+        self.room_id2, _, _ = self.get_success(
+            self.room_creator.create_room(self.ourRequester, {})
+        )
 
         # Join the second user to the first room
         memberEvent, memberEventContext = self.get_success(
diff --git a/tests/test_federation.py b/tests/test_federation.py
index 82dfd88b99..46d2f99eac 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -47,7 +47,7 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
             room_creator.create_room(
                 our_user, room_creator._presets_dict["public_chat"], ratelimit=False
             )
-        )[0]["room_id"]
+        )[0]
 
         self.store = self.hs.get_datastores().main
 
-- 
cgit 1.5.1


From 6def779a1a7c49cd10e635986fbfa1e422eb20bf Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Wed, 22 Feb 2023 20:29:39 +0100
Subject: Use `json.dump` in `FileExfiltrationWriter` (#15095)

To directly write to the open file, instead of writing to an
in-memory string first.
---
 changelog.d/15095.misc   |  1 +
 synapse/app/admin_cmd.py | 16 ++++++++--------
 2 files changed, 9 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/15095.misc

(limited to 'synapse')

diff --git a/changelog.d/15095.misc b/changelog.d/15095.misc
new file mode 100644
index 0000000000..a2fafe2fff
--- /dev/null
+++ b/changelog.d/15095.misc
@@ -0,0 +1 @@
+Refactor writing json data in `FileExfiltrationWriter`.
\ No newline at end of file
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index ad51f33165..5003777f0d 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -149,7 +149,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
 
         with open(events_file, "a") as f:
             for event in events:
-                print(json.dumps(event.get_pdu_json()), file=f)
+                json.dump(event.get_pdu_json(), fp=f)
 
     def write_state(
         self, room_id: str, event_id: str, state: StateMap[EventBase]
@@ -162,7 +162,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
 
         with open(event_file, "a") as f:
             for event in state.values():
-                print(json.dumps(event.get_pdu_json()), file=f)
+                json.dump(event.get_pdu_json(), fp=f)
 
     def write_invite(
         self, room_id: str, event: EventBase, state: StateMap[EventBase]
@@ -178,7 +178,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
 
         with open(invite_state, "a") as f:
             for event in state.values():
-                print(json.dumps(event), file=f)
+                json.dump(event, fp=f)
 
     def write_knock(
         self, room_id: str, event: EventBase, state: StateMap[EventBase]
@@ -194,7 +194,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
 
         with open(knock_state, "a") as f:
             for event in state.values():
-                print(json.dumps(event), file=f)
+                json.dump(event, fp=f)
 
     def write_profile(self, profile: JsonDict) -> None:
         user_directory = os.path.join(self.base_directory, "user_data")
@@ -202,7 +202,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
         profile_file = os.path.join(user_directory, "profile")
 
         with open(profile_file, "a") as f:
-            print(json.dumps(profile), file=f)
+            json.dump(profile, fp=f)
 
     def write_devices(self, devices: List[JsonDict]) -> None:
         user_directory = os.path.join(self.base_directory, "user_data")
@@ -211,7 +211,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
 
         for device in devices:
             with open(device_file, "a") as f:
-                print(json.dumps(device), file=f)
+                json.dump(device, fp=f)
 
     def write_connections(self, connections: List[JsonDict]) -> None:
         user_directory = os.path.join(self.base_directory, "user_data")
@@ -220,7 +220,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
 
         for connection in connections:
             with open(connection_file, "a") as f:
-                print(json.dumps(connection), file=f)
+                json.dump(connection, fp=f)
 
     def write_account_data(
         self, file_name: str, account_data: Mapping[str, JsonDict]
@@ -233,7 +233,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
         account_data_file = os.path.join(account_data_directory, file_name)
 
         with open(account_data_file, "a") as f:
-            print(json.dumps(account_data), file=f)
+            json.dump(account_data, fp=f)
 
     def finished(self) -> str:
         return self.base_directory
-- 
cgit 1.5.1


From 4ed08ff72ef8f1abf85ab22de1e51b570f67b27e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 22 Feb 2023 14:37:18 -0500
Subject: Tighten the default rate limit of creating new devices. (#15135)

---
 changelog.d/15135.misc                           |  1 +
 docs/usage/configuration/config_documentation.md |  6 +++---
 synapse/config/ratelimiting.py                   | 13 +++++++++++--
 3 files changed, 15 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/15135.misc

(limited to 'synapse')

diff --git a/changelog.d/15135.misc b/changelog.d/15135.misc
new file mode 100644
index 0000000000..25c4dbffe1
--- /dev/null
+++ b/changelog.d/15135.misc
@@ -0,0 +1 @@
+Tighten the login ratelimit defaults.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 58c6955689..ab1f9f4963 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1518,11 +1518,11 @@ rc_registration_token_validity:
 
 This option specifies several limits for login:
 * `address` ratelimits login requests based on the client's IP
-      address. Defaults to `per_second: 0.17`, `burst_count: 3`.
+      address. Defaults to `per_second: 0.003`, `burst_count: 5`.
 
 * `account` ratelimits login requests based on the account the
-  client is attempting to log into. Defaults to `per_second: 0.17`,
-  `burst_count: 3`.
+  client is attempting to log into. Defaults to `per_second: 0.03`,
+  `burst_count: 5`.
 
 * `failed_attempts` ratelimits login requests based on the account the
   client is attempting to log into, based on the amount of failed login
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index 5c13fe428a..b733fac617 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -87,9 +87,18 @@ class RatelimitConfig(Config):
             defaults={"per_second": 0.1, "burst_count": 5},
         )
 
+        # It is reasonable to login with a bunch of devices at once (i.e. when
+        # setting up an account), but it is *not* valid to continually be
+        # logging into new devices.
         rc_login_config = config.get("rc_login", {})
-        self.rc_login_address = RatelimitSettings(rc_login_config.get("address", {}))
-        self.rc_login_account = RatelimitSettings(rc_login_config.get("account", {}))
+        self.rc_login_address = RatelimitSettings(
+            rc_login_config.get("address", {}),
+            defaults={"per_second": 0.003, "burst_count": 5},
+        )
+        self.rc_login_account = RatelimitSettings(
+            rc_login_config.get("account", {}),
+            defaults={"per_second": 0.003, "burst_count": 5},
+        )
         self.rc_login_failed_attempts = RatelimitSettings(
             rc_login_config.get("failed_attempts", {})
         )
-- 
cgit 1.5.1


From 9bb2eac71962970d02842bca441f4bcdbbf93a11 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 22 Feb 2023 15:29:09 -0500
Subject: Bump black from 22.12.0 to 23.1.0 (#15103)

---
 changelog.d/15103.misc                             |  1 +
 poetry.lock                                        | 42 ++++++++++++++--------
 stubs/sortedcontainers/sortedlist.pyi              |  1 -
 synapse/_scripts/register_new_matrix_user.py       |  2 --
 synapse/_scripts/synapse_port_db.py                |  1 -
 synapse/_scripts/synctl.py                         |  1 -
 synapse/app/_base.py                               |  2 +-
 synapse/app/complement_fork_starter.py             |  2 +-
 synapse/app/generic_worker.py                      |  1 -
 synapse/app/homeserver.py                          |  1 -
 synapse/config/consent.py                          |  1 -
 synapse/config/database.py                         |  1 -
 synapse/config/homeserver.py                       |  1 -
 synapse/config/ratelimiting.py                     |  1 -
 synapse/config/repository.py                       |  1 -
 synapse/config/server.py                           |  1 -
 synapse/config/tls.py                              |  1 -
 synapse/crypto/keyring.py                          |  2 +-
 synapse/events/third_party_rules.py                |  2 --
 synapse/federation/send_queue.py                   |  4 +--
 synapse/handlers/appservice.py                     |  2 +-
 synapse/handlers/auth.py                           |  2 --
 synapse/handlers/directory.py                      |  8 +++--
 synapse/handlers/e2e_room_keys.py                  |  1 -
 synapse/handlers/event_auth.py                     |  1 -
 synapse/handlers/initial_sync.py                   |  1 -
 synapse/handlers/presence.py                       |  2 --
 synapse/handlers/room.py                           |  8 +++--
 synapse/handlers/room_batch.py                     |  2 +-
 synapse/handlers/sync.py                           |  1 -
 synapse/logging/opentracing.py                     |  1 +
 synapse/metrics/__init__.py                        |  1 -
 synapse/metrics/_gc.py                             |  1 -
 synapse/push/bulk_push_rule_evaluator.py           |  1 -
 synapse/replication/http/account_data.py           |  1 -
 synapse/replication/http/devices.py                |  1 -
 synapse/replication/tcp/redis.py                   |  1 -
 synapse/replication/tcp/streams/events.py          |  1 -
 synapse/rest/admin/rooms.py                        |  4 ---
 synapse/rest/admin/users.py                        |  8 +++--
 synapse/rest/client/auth.py                        |  1 -
 synapse/rest/client/filter.py                      |  1 -
 synapse/rest/client/register.py                    | 18 ++++++----
 synapse/rest/media/v1/_base.py                     |  1 -
 synapse/rest/media/v1/thumbnailer.py               |  1 -
 synapse/storage/databases/main/deviceinbox.py      |  5 ++-
 synapse/storage/databases/main/devices.py          |  4 +--
 synapse/storage/databases/main/e2e_room_keys.py    |  2 +-
 synapse/storage/databases/main/end_to_end_keys.py  |  8 ++---
 synapse/storage/databases/main/event_federation.py |  1 -
 synapse/storage/databases/main/events.py           |  1 -
 .../storage/databases/main/events_bg_updates.py    |  4 +--
 synapse/storage/databases/main/events_worker.py    |  2 +-
 synapse/storage/databases/main/media_repository.py |  1 -
 synapse/storage/databases/main/pusher.py           |  3 --
 synapse/storage/databases/main/receipts.py         |  1 -
 synapse/storage/databases/main/room.py             |  1 -
 synapse/storage/databases/main/search.py           |  2 --
 synapse/storage/databases/main/state.py            |  1 -
 synapse/storage/databases/main/stats.py            |  2 +-
 synapse/storage/databases/main/stream.py           |  1 +
 synapse/storage/databases/main/transactions.py     |  1 -
 synapse/storage/databases/main/user_directory.py   |  1 -
 synapse/storage/databases/state/bg_updates.py      |  1 -
 synapse/storage/databases/state/store.py           |  7 ++--
 synapse/storage/prepare_database.py                |  4 +--
 synapse/types/state.py                             |  2 +-
 synapse/util/caches/__init__.py                    |  1 -
 synapse/util/check_dependencies.py                 |  2 +-
 synapse/util/patch_inline_callbacks.py             |  1 -
 synmark/__main__.py                                |  2 --
 synmark/suites/logging.py                          |  1 -
 tests/federation/test_complexity.py                |  4 ---
 tests/federation/test_federation_server.py         |  1 -
 tests/handlers/test_sso.py                         |  1 -
 tests/handlers/test_stats.py                       |  1 -
 tests/http/federation/test_srv_resolver.py         |  1 -
 tests/http/test_client.py                          |  2 +-
 tests/push/test_bulk_push_rule_evaluator.py        |  1 -
 tests/push/test_email.py                           |  2 --
 tests/replication/slave/storage/test_events.py     |  1 -
 tests/rest/admin/test_device.py                    |  3 --
 tests/rest/admin/test_media.py                     |  5 ---
 tests/rest/admin/test_room.py                      |  1 -
 tests/rest/admin/test_server_notice.py             |  1 -
 tests/rest/client/test_account.py                  |  4 ---
 tests/rest/client/test_auth.py                     |  2 --
 tests/rest/client/test_capabilities.py             |  1 -
 tests/rest/client/test_consent.py                  |  1 -
 tests/rest/client/test_directory.py                |  1 -
 tests/rest/client/test_ephemeral_message.py        |  1 -
 tests/rest/client/test_events.py                   |  3 --
 tests/rest/client/test_filter.py                   |  1 -
 tests/rest/client/test_login.py                    |  2 --
 tests/rest/client/test_login_token_request.py      |  1 -
 tests/rest/client/test_presence.py                 |  1 -
 tests/rest/client/test_profile.py                  |  3 --
 tests/rest/client/test_register.py                 |  4 ---
 tests/rest/client/test_rendezvous.py               |  1 -
 tests/rest/client/test_rooms.py                    | 14 ++------
 tests/rest/client/test_sync.py                     |  3 --
 tests/rest/client/test_third_party_rules.py        |  3 ++
 tests/rest/media/test_media_retention.py           |  1 -
 tests/rest/media/v1/test_media_storage.py          |  3 --
 tests/rest/media/v1/test_url_preview.py            |  3 --
 tests/server_notices/test_consent.py               |  2 --
 tests/storage/databases/main/test_deviceinbox.py   |  1 -
 tests/storage/databases/main/test_receipts.py      |  2 +-
 tests/storage/databases/main/test_room.py          |  1 -
 tests/storage/test_client_ips.py                   |  1 -
 tests/storage/test_event_chain.py                  |  2 --
 tests/storage/test_event_federation.py             |  2 +-
 tests/storage/test_event_push_actions.py           |  2 +-
 tests/storage/test_purge.py                        |  1 -
 tests/storage/test_roommember.py                   |  3 --
 tests/storage/test_state.py                        | 30 ++++++++--------
 tests/test_mau.py                                  |  1 -
 117 files changed, 108 insertions(+), 218 deletions(-)
 create mode 100644 changelog.d/15103.misc

(limited to 'synapse')

diff --git a/changelog.d/15103.misc b/changelog.d/15103.misc
new file mode 100644
index 0000000000..65322498c9
--- /dev/null
+++ b/changelog.d/15103.misc
@@ -0,0 +1 @@
+Bump black from 22.12.0 to 23.1.0.
diff --git a/poetry.lock b/poetry.lock
index 4d724ab782..8ffdab7a22 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -90,32 +90,46 @@ typecheck = ["mypy"]
 
 [[package]]
 name = "black"
-version = "22.12.0"
+version = "23.1.0"
 description = "The uncompromising code formatter."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "black-22.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eedd20838bd5d75b80c9f5487dbcb06836a43833a37846cf1d8c1cc01cef59d"},
-    {file = "black-22.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:159a46a4947f73387b4d83e87ea006dbb2337eab6c879620a3ba52699b1f4351"},
-    {file = "black-22.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f"},
-    {file = "black-22.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:7412e75863aa5c5411886804678b7d083c7c28421210180d67dfd8cf1221e1f4"},
-    {file = "black-22.12.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c116eed0efb9ff870ded8b62fe9f28dd61ef6e9ddd28d83d7d264a38417dcee2"},
-    {file = "black-22.12.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1f58cbe16dfe8c12b7434e50ff889fa479072096d79f0a7f25e4ab8e94cd8350"},
-    {file = "black-22.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77d86c9f3db9b1bf6761244bc0b3572a546f5fe37917a044e02f3166d5aafa7d"},
-    {file = "black-22.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:82d9fe8fee3401e02e79767016b4907820a7dc28d70d137eb397b92ef3cc5bfc"},
-    {file = "black-22.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101c69b23df9b44247bd88e1d7e90154336ac4992502d4197bdac35dd7ee3320"},
-    {file = "black-22.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:559c7a1ba9a006226f09e4916060982fd27334ae1998e7a38b3f33a37f7a2148"},
-    {file = "black-22.12.0-py3-none-any.whl", hash = "sha256:436cc9167dd28040ad90d3b404aec22cedf24a6e4d7de221bec2730ec0c97bcf"},
-    {file = "black-22.12.0.tar.gz", hash = "sha256:229351e5a18ca30f447bf724d007f890f97e13af070bb6ad4c0a441cd7596a2f"},
+    {file = "black-23.1.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221"},
+    {file = "black-23.1.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26"},
+    {file = "black-23.1.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b"},
+    {file = "black-23.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104"},
+    {file = "black-23.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074"},
+    {file = "black-23.1.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27"},
+    {file = "black-23.1.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648"},
+    {file = "black-23.1.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958"},
+    {file = "black-23.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a"},
+    {file = "black-23.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481"},
+    {file = "black-23.1.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad"},
+    {file = "black-23.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8"},
+    {file = "black-23.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24"},
+    {file = "black-23.1.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6"},
+    {file = "black-23.1.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd"},
+    {file = "black-23.1.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580"},
+    {file = "black-23.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468"},
+    {file = "black-23.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753"},
+    {file = "black-23.1.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651"},
+    {file = "black-23.1.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06"},
+    {file = "black-23.1.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739"},
+    {file = "black-23.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9"},
+    {file = "black-23.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555"},
+    {file = "black-23.1.0-py3-none-any.whl", hash = "sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32"},
+    {file = "black-23.1.0.tar.gz", hash = "sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac"},
 ]
 
 [package.dependencies]
 click = ">=8.0.0"
 mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
 pathspec = ">=0.9.0"
 platformdirs = ">=2"
-tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""}
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
 typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""}
 typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
 
diff --git a/stubs/sortedcontainers/sortedlist.pyi b/stubs/sortedcontainers/sortedlist.pyi
index 1fe1a136f1..0e745c0a79 100644
--- a/stubs/sortedcontainers/sortedlist.pyi
+++ b/stubs/sortedcontainers/sortedlist.pyi
@@ -29,7 +29,6 @@ _Repr = Callable[[], str]
 def recursive_repr(fillvalue: str = ...) -> Callable[[_Repr], _Repr]: ...
 
 class SortedList(MutableSequence[_T]):
-
     DEFAULT_LOAD_FACTOR: int = ...
     def __init__(
         self,
diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py
index 2b74a40166..19ca399d44 100644
--- a/synapse/_scripts/register_new_matrix_user.py
+++ b/synapse/_scripts/register_new_matrix_user.py
@@ -47,7 +47,6 @@ def request_registration(
     _print: Callable[[str], None] = print,
     exit: Callable[[int], None] = sys.exit,
 ) -> None:
-
     url = "%s/_synapse/admin/v1/register" % (server_location.rstrip("/"),)
 
     # Get the nonce
@@ -154,7 +153,6 @@ def register_new_user(
 
 
 def main() -> None:
-
     logging.captureWarnings(True)
 
     parser = argparse.ArgumentParser(
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 0d35e0af8f..2c9cbf8b27 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -1205,7 +1205,6 @@ class CursesProgress(Progress):
         if self.finished:
             status = "Time spent: %s (Done!)" % (duration_str,)
         else:
-
             if self.total_processed > 0:
                 left = float(self.total_remaining) / self.total_processed
 
diff --git a/synapse/_scripts/synctl.py b/synapse/_scripts/synctl.py
index b4c96ad7f3..077b90935e 100755
--- a/synapse/_scripts/synctl.py
+++ b/synapse/_scripts/synctl.py
@@ -167,7 +167,6 @@ Worker = collections.namedtuple(
 
 
 def main() -> None:
-
     parser = argparse.ArgumentParser()
 
     parser.add_argument(
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index a5aa2185a2..28062dd69d 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -213,7 +213,7 @@ def handle_startup_exception(e: Exception) -> NoReturn:
 def redirect_stdio_to_logs() -> None:
     streams = [("stdout", LogLevel.info), ("stderr", LogLevel.error)]
 
-    for (stream, level) in streams:
+    for stream, level in streams:
         oldStream = getattr(sys, stream)
         loggingFile = LoggingFile(
             logger=twisted.logger.Logger(namespace=stream),
diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py
index 920538f44d..c8dc3f9d76 100644
--- a/synapse/app/complement_fork_starter.py
+++ b/synapse/app/complement_fork_starter.py
@@ -219,7 +219,7 @@ def main() -> None:
     # memory space and don't need to repeat the work of loading the code!
     # Instead of using fork() directly, we use the multiprocessing library,
     # which uses fork() on Unix platforms.
-    for (func, worker_args) in zip(worker_functions, args_by_worker):
+    for func, worker_args in zip(worker_functions, args_by_worker):
         process = multiprocessing.Process(
             target=_worker_entrypoint, args=(func, proxy_reactor, worker_args)
         )
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 946f3a3807..0dec24369a 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -157,7 +157,6 @@ class GenericWorkerServer(HomeServer):
     DATASTORE_CLASS = GenericWorkerSlavedStore  # type: ignore
 
     def _listen_http(self, listener_config: ListenerConfig) -> None:
-
         assert listener_config.http_options is not None
 
         # We always include a health resource.
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 6176a70eb2..b8830b1a9c 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -321,7 +321,6 @@ def setup(config_options: List[str]) -> SynapseHomeServer:
             and not config.registration.registrations_require_3pid
             and not config.registration.registration_requires_token
         ):
-
             raise ConfigError(
                 "You have enabled open registration without any verification. This is a known vector for "
                 "spam and abuse. If you would like to allow public registration, please consider adding email, "
diff --git a/synapse/config/consent.py b/synapse/config/consent.py
index be74609dc4..5bfd0cbb71 100644
--- a/synapse/config/consent.py
+++ b/synapse/config/consent.py
@@ -22,7 +22,6 @@ from ._base import Config
 
 
 class ConsentConfig(Config):
-
     section = "consent"
 
     def __init__(self, *args: Any):
diff --git a/synapse/config/database.py b/synapse/config/database.py
index 928fec8dfe..596d8769fe 100644
--- a/synapse/config/database.py
+++ b/synapse/config/database.py
@@ -154,7 +154,6 @@ class DatabaseConfig(Config):
             logger.warning(NON_SQLITE_DATABASE_PATH_WARNING)
 
     def set_databasepath(self, database_path: str) -> None:
-
         if database_path != ":memory:":
             database_path = self.abspath(database_path)
 
diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py
index 4d2b298a70..c205a78039 100644
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@@ -56,7 +56,6 @@ from .workers import WorkerConfig
 
 
 class HomeServerConfig(RootConfig):
-
     config_classes = [
         ModulesConfig,
         ServerConfig,
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index b733fac617..a5514e70a2 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -46,7 +46,6 @@ class RatelimitConfig(Config):
     section = "ratelimiting"
 
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
-
         # Load the new-style messages config if it exists. Otherwise fall back
         # to the old method.
         if "rc_message" in config:
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index e4759711ed..2da40c09f0 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -116,7 +116,6 @@ class ContentRepositoryConfig(Config):
     section = "media"
 
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
-
         # Only enable the media repo if either the media repo is enabled or the
         # current worker app is the media repo.
         if (
diff --git a/synapse/config/server.py b/synapse/config/server.py
index d4ef9930b0..0e46b849cf 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -735,7 +735,6 @@ class ServerConfig(Config):
         listeners: Optional[List[dict]],
         **kwargs: Any,
     ) -> str:
-
         _, bind_port = parse_and_validate_server_name(server_name)
         if bind_port is not None:
             unsecure_port = bind_port - 400
diff --git a/synapse/config/tls.py b/synapse/config/tls.py
index 336fe3e0da..318270ebb8 100644
--- a/synapse/config/tls.py
+++ b/synapse/config/tls.py
@@ -30,7 +30,6 @@ class TlsConfig(Config):
     section = "tls"
 
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
-
         self.tls_certificate_file = self.abspath(config.get("tls_certificate_path"))
         self.tls_private_key_file = self.abspath(config.get("tls_private_key_path"))
 
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 86cd4af9bd..d710607c63 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -399,7 +399,7 @@ class Keyring:
         # We now convert the returned list of results into a map from server
         # name to key ID to FetchKeyResult, to return.
         to_return: Dict[str, Dict[str, FetchKeyResult]] = {}
-        for (request, results) in zip(deduped_requests, results_per_request):
+        for request, results in zip(deduped_requests, results_per_request):
             to_return_by_server = to_return.setdefault(request.server_name, {})
             for key_id, key_result in results.items():
                 existing = to_return_by_server.get(key_id)
diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py
index 97c61cc258..9a25ed419b 100644
--- a/synapse/events/third_party_rules.py
+++ b/synapse/events/third_party_rules.py
@@ -78,7 +78,6 @@ def load_legacy_third_party_event_rules(hs: "HomeServer") -> None:
         # correctly, we need to await its result. Therefore it doesn't make a lot of
         # sense to make it go through the run() wrapper.
         if f.__name__ == "check_event_allowed":
-
             # We need to wrap check_event_allowed because its old form would return either
             # a boolean or a dict, but now we want to return the dict separately from the
             # boolean.
@@ -100,7 +99,6 @@ def load_legacy_third_party_event_rules(hs: "HomeServer") -> None:
             return wrap_check_event_allowed
 
         if f.__name__ == "on_create_room":
-
             # We need to wrap on_create_room because its old form would return a boolean
             # if the room creation is denied, but now we just want it to raise an
             # exception.
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index d720b5fd3f..3063df7990 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -314,7 +314,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
         # stream position.
         keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}
 
-        for ((destination, edu_key), pos) in keyed_edus.items():
+        for (destination, edu_key), pos in keyed_edus.items():
             rows.append(
                 (
                     pos,
@@ -329,7 +329,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
         j = self.edus.bisect_right(to_token) + 1
         edus = self.edus.items()[i:j]
 
-        for (pos, edu) in edus:
+        for pos, edu in edus:
             rows.append((pos, EduRow(edu)))
 
         # Sort rows based on pos
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 5d1d21cdc8..ec3ab968e9 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -737,7 +737,7 @@ class ApplicationServicesHandler:
         )
 
         ret = []
-        for (success, result) in results:
+        for success, result in results:
             if success:
                 ret.extend(result)
 
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index cf12b55d21..b12bc4c9a3 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -815,7 +815,6 @@ class AuthHandler:
         now_ms = self._clock.time_msec()
 
         if existing_token.expiry_ts is not None and existing_token.expiry_ts < now_ms:
-
             raise SynapseError(
                 HTTPStatus.FORBIDDEN,
                 "The supplied refresh token has expired",
@@ -2259,7 +2258,6 @@ class PasswordAuthProvider:
     async def on_logged_out(
         self, user_id: str, device_id: Optional[str], access_token: str
     ) -> None:
-
         # call all of the on_logged_out callbacks
         for callback in self.on_logged_out_callbacks:
             try:
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index a5798e9483..1fb23cc9bf 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -497,9 +497,11 @@ class DirectoryHandler:
                 raise SynapseError(403, "Not allowed to publish room")
 
             # Check if publishing is blocked by a third party module
-            allowed_by_third_party_rules = await (
-                self.third_party_event_rules.check_visibility_can_be_modified(
-                    room_id, visibility
+            allowed_by_third_party_rules = (
+                await (
+                    self.third_party_event_rules.check_visibility_can_be_modified(
+                        room_id, visibility
+                    )
                 )
             )
             if not allowed_by_third_party_rules:
diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py
index 83f53ceb88..50317ec753 100644
--- a/synapse/handlers/e2e_room_keys.py
+++ b/synapse/handlers/e2e_room_keys.py
@@ -188,7 +188,6 @@ class E2eRoomKeysHandler:
 
         # XXX: perhaps we should use a finer grained lock here?
         async with self._upload_linearizer.queue(user_id):
-
             # Check that the version we're trying to upload is the current version
             try:
                 version_info = await self.store.get_e2e_room_keys_version_info(user_id)
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index 46dd63c3f0..c508861b6a 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -236,7 +236,6 @@ class EventAuthHandler:
         # in any of them.
         allowed_rooms = await self.get_rooms_that_allow_join(state_ids)
         if not await self.is_user_in_rooms(allowed_rooms, user_id):
-
             # If this is a remote request, the user might be in an allowed room
             # that we do not know about.
             if get_domain_from_id(user_id) != self._server_name:
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 1a29abde98..aead0b44b9 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -124,7 +124,6 @@ class InitialSyncHandler:
         as_client_event: bool = True,
         include_archived: bool = False,
     ) -> JsonDict:
-
         memberships = [Membership.INVITE, Membership.JOIN]
         if include_archived:
             memberships.append(Membership.LEAVE)
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 87af31aa27..4ad2233573 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -777,7 +777,6 @@ class PresenceHandler(BasePresenceHandler):
         )
 
         if self.unpersisted_users_changes:
-
             await self.store.update_presence(
                 [
                     self.user_to_current_state[user_id]
@@ -823,7 +822,6 @@ class PresenceHandler(BasePresenceHandler):
         now = self.clock.time_msec()
 
         with Measure(self.clock, "presence_update_states"):
-
             # NOTE: We purposefully don't await between now and when we've
             # calculated what we want to do with the new states, to avoid races.
 
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 37c87c8351..a26ec02284 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -868,9 +868,11 @@ class RoomCreationHandler:
         )
 
         # Check whether this visibility value is blocked by a third party module
-        allowed_by_third_party_rules = await (
-            self.third_party_event_rules.check_visibility_can_be_modified(
-                room_id, visibility
+        allowed_by_third_party_rules = (
+            await (
+                self.third_party_event_rules.check_visibility_can_be_modified(
+                    room_id, visibility
+                )
             )
         )
         if not allowed_by_third_party_rules:
diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py
index c73d2adaad..5d4ca0e2d2 100644
--- a/synapse/handlers/room_batch.py
+++ b/synapse/handlers/room_batch.py
@@ -374,7 +374,7 @@ class RoomBatchHandler:
         # correct stream_ordering as they are backfilled (which decrements).
         # Events are sorted by (topological_ordering, stream_ordering)
         # where topological_ordering is just depth.
-        for (event, context) in reversed(events_to_persist):
+        for event, context in reversed(events_to_persist):
             # This call can't raise `PartialStateConflictError` since we forbid
             # use of the historical batch API during partial state
             await self.event_creation_handler.handle_new_client_event(
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 4e4595312c..fd6d946c37 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1297,7 +1297,6 @@ class SyncHandler:
             return RoomNotifCounts.empty()
 
         with Measure(self.clock, "unread_notifs_for_room_id"):
-
             return await self.store.get_unread_event_push_actions_by_room_for_user(
                 room_id,
                 sync_config.user.to_string(),
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 5aed71262f..c70eee649c 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -524,6 +524,7 @@ def whitelisted_homeserver(destination: str) -> bool:
 
 # Start spans and scopes
 
+
 # Could use kwargs but I want these to be explicit
 def start_active_span(
     operation_name: str,
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index b01372565d..8ce5887229 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -87,7 +87,6 @@ class LaterGauge(Collector):
     ]
 
     def collect(self) -> Iterable[Metric]:
-
         g = GaugeMetricFamily(self.name, self.desc, labels=self.labels)
 
         try:
diff --git a/synapse/metrics/_gc.py b/synapse/metrics/_gc.py
index b7d47ce3e7..a22c4e5bbd 100644
--- a/synapse/metrics/_gc.py
+++ b/synapse/metrics/_gc.py
@@ -139,7 +139,6 @@ def install_gc_manager() -> None:
 
 class PyPyGCStats(Collector):
     def collect(self) -> Iterable[Metric]:
-
         # @stats is a pretty-printer object with __str__() returning a nice table,
         # plus some fields that contain data from that table.
         # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB').
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 5fc38431ba..8f834be774 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -330,7 +330,6 @@ class BulkPushRuleEvaluator:
         context: EventContext,
         event_id_to_event: Mapping[str, EventBase],
     ) -> None:
-
         if (
             not event.internal_metadata.is_notifiable()
             or event.internal_metadata.is_historical()
diff --git a/synapse/replication/http/account_data.py b/synapse/replication/http/account_data.py
index 2374f810c9..111ec07e64 100644
--- a/synapse/replication/http/account_data.py
+++ b/synapse/replication/http/account_data.py
@@ -265,7 +265,6 @@ class ReplicationRemoveTagRestServlet(ReplicationEndpoint):
 
     @staticmethod
     async def _serialize_payload(user_id: str, room_id: str, tag: str) -> JsonDict:  # type: ignore[override]
-
         return {}
 
     async def _handle_request(  # type: ignore[override]
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index ecea6fc915..cc3929dcf5 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -195,7 +195,6 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
     async def _serialize_payload(  # type: ignore[override]
         user_id: str, device_id: str, keys: JsonDict
     ) -> JsonDict:
-
         return {
             "user_id": user_id,
             "device_id": device_id,
diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py
index fd1c0ec6af..dfc061eb5e 100644
--- a/synapse/replication/tcp/redis.py
+++ b/synapse/replication/tcp/redis.py
@@ -328,7 +328,6 @@ class RedisDirectTcpReplicationClientFactory(SynapseRedisFactory):
         outbound_redis_connection: txredisapi.ConnectionHandler,
         channel_names: List[str],
     ):
-
         super().__init__(
             hs,
             uuid="subscriber",
diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py
index 14b6705862..ad9b760713 100644
--- a/synapse/replication/tcp/streams/events.py
+++ b/synapse/replication/tcp/streams/events.py
@@ -139,7 +139,6 @@ class EventsStream(Stream):
         current_token: Token,
         target_row_count: int,
     ) -> StreamUpdateResult:
-
         # the events stream merges together three separate sources:
         #  * new events
         #  * current_state changes
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 1d6e4982d7..4de56bf13f 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -75,7 +75,6 @@ class RoomRestV2Servlet(RestServlet):
     async def on_DELETE(
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
-
         requester = await self._auth.get_user_by_req(request)
         await assert_user_is_admin(self._auth, requester)
 
@@ -144,7 +143,6 @@ class DeleteRoomStatusByRoomIdRestServlet(RestServlet):
     async def on_GET(
         self, request: SynapseRequest, room_id: str
     ) -> Tuple[int, JsonDict]:
-
         await assert_requester_is_admin(self._auth, request)
 
         if not RoomID.is_valid(room_id):
@@ -181,7 +179,6 @@ class DeleteRoomStatusByDeleteIdRestServlet(RestServlet):
     async def on_GET(
         self, request: SynapseRequest, delete_id: str
     ) -> Tuple[int, JsonDict]:
-
         await assert_requester_is_admin(self._auth, request)
 
         delete_status = self._pagination_handler.get_delete_status(delete_id)
@@ -438,7 +435,6 @@ class RoomStateRestServlet(RestServlet):
 
 
 class JoinRoomAliasServlet(ResolveRoomIdMixin, RestServlet):
-
     PATTERNS = admin_patterns("/join/(?P<room_identifier>[^/]*)$")
 
     def __init__(self, hs: "HomeServer"):
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 0c0bf540b9..7cc4db20d6 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -683,8 +683,12 @@ class AccountValidityRenewServlet(RestServlet):
         await assert_requester_is_admin(self.auth, request)
 
         if self.account_activity_handler.on_legacy_admin_request_callback:
-            expiration_ts = await (
-                self.account_activity_handler.on_legacy_admin_request_callback(request)
+            expiration_ts = (
+                await (
+                    self.account_activity_handler.on_legacy_admin_request_callback(
+                        request
+                    )
+                )
             )
         else:
             body = parse_json_object_from_request(request)
diff --git a/synapse/rest/client/auth.py b/synapse/rest/client/auth.py
index eb77337044..276a1b405d 100644
--- a/synapse/rest/client/auth.py
+++ b/synapse/rest/client/auth.py
@@ -97,7 +97,6 @@ class AuthRestServlet(RestServlet):
         return None
 
     async def on_POST(self, request: Request, stagetype: str) -> None:
-
         session = parse_string(request, "session")
         if not session:
             raise SynapseError(400, "No session supplied")
diff --git a/synapse/rest/client/filter.py b/synapse/rest/client/filter.py
index cc1c2f9731..236199897c 100644
--- a/synapse/rest/client/filter.py
+++ b/synapse/rest/client/filter.py
@@ -79,7 +79,6 @@ class CreateFilterRestServlet(RestServlet):
     async def on_POST(
         self, request: SynapseRequest, user_id: str
     ) -> Tuple[int, JsonDict]:
-
         target_user = UserID.from_string(user_id)
         requester = await self.auth.get_user_by_req(request)
 
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 3cb1e7e375..bce806f2bb 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -628,10 +628,12 @@ class RegisterRestServlet(RestServlet):
             if not password_hash:
                 raise SynapseError(400, "Missing params: password", Codes.MISSING_PARAM)
 
-            desired_username = await (
-                self.password_auth_provider.get_username_for_registration(
-                    auth_result,
-                    params,
+            desired_username = (
+                await (
+                    self.password_auth_provider.get_username_for_registration(
+                        auth_result,
+                        params,
+                    )
                 )
             )
 
@@ -682,9 +684,11 @@ class RegisterRestServlet(RestServlet):
                 session_id
             )
 
-            display_name = await (
-                self.password_auth_provider.get_displayname_for_registration(
-                    auth_result, params
+            display_name = (
+                await (
+                    self.password_auth_provider.get_displayname_for_registration(
+                        auth_result, params
+                    )
                 )
             )
 
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index 6e035afcce..ef8334ae25 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -270,7 +270,6 @@ async def respond_with_responder(
         logger.debug("Responding to media request with responder %s", responder)
         add_file_headers(request, media_type, file_size, upload_name)
         try:
-
             await responder.write_to_consumer(request)
         except Exception as e:
             # The majority of the time this will be due to the client having gone
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index 9480cc5763..f909a4fb9a 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -38,7 +38,6 @@ class ThumbnailError(Exception):
 
 
 class Thumbnailer:
-
     FORMATS = {"image/jpeg": "JPEG", "image/png": "PNG"}
 
     @staticmethod
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 8e61aba454..0d75d9739a 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -721,8 +721,8 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                         ],
                     )
 
-                for (user_id, messages_by_device) in edu["messages"].items():
-                    for (device_id, msg) in messages_by_device.items():
+                for user_id, messages_by_device in edu["messages"].items():
+                    for device_id, msg in messages_by_device.items():
                         with start_active_span("store_outgoing_to_device_message"):
                             set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["sender"])
                             set_tag(SynapseTags.TO_DEVICE_EDU_ID, edu["message_id"])
@@ -959,7 +959,6 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
         def _remove_dead_devices_from_device_inbox_txn(
             txn: LoggingTransaction,
         ) -> Tuple[int, bool]:
-
             if "max_stream_id" in progress:
                 max_stream_id = progress["max_stream_id"]
             else:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 1ca66d57d4..0dd15f16ff 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -512,7 +512,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             results.append(("org.matrix.signing_key_update", result))
 
         if issue_8631_logger.isEnabledFor(logging.DEBUG):
-            for (user_id, edu) in results:
+            for user_id, edu in results:
                 issue_8631_logger.debug(
                     "device update to %s for %s from %s to %s: %s",
                     destination,
@@ -1316,7 +1316,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
                 )
             """
             count = 0
-            for (destination, user_id, stream_id, device_id) in rows:
+            for destination, user_id, stream_id, device_id in rows:
                 txn.execute(
                     delete_sql, (destination, user_id, stream_id, stream_id, device_id)
                 )
diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py
index 6240f9a75e..9f8d2e4bea 100644
--- a/synapse/storage/databases/main/e2e_room_keys.py
+++ b/synapse/storage/databases/main/e2e_room_keys.py
@@ -108,7 +108,7 @@ class EndToEndRoomKeyStore(SQLBaseStore):
             raise StoreError(404, "No backup with that version exists")
 
         values = []
-        for (room_id, session_id, room_key) in room_keys:
+        for room_id, session_id, room_key in room_keys:
             values.append(
                 (
                     user_id,
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 2c2d145666..b9c39b1718 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -268,7 +268,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             )
 
             # add each cross-signing signature to the correct device in the result dict.
-            for (user_id, key_id, device_id, signature) in cross_sigs_result:
+            for user_id, key_id, device_id, signature in cross_sigs_result:
                 target_device_result = result[user_id][device_id]
                 # We've only looked up cross-signatures for non-deleted devices with key
                 # data.
@@ -311,7 +311,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         # devices.
         user_list = []
         user_device_list = []
-        for (user_id, device_id) in query_list:
+        for user_id, device_id in query_list:
             if device_id is None:
                 user_list.append(user_id)
             else:
@@ -353,7 +353,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
             txn.execute(sql, query_params)
 
-            for (user_id, device_id, display_name, key_json) in txn:
+            for user_id, device_id, display_name, key_json in txn:
                 assert device_id is not None
                 if include_deleted_devices:
                     deleted_devices.remove((user_id, device_id))
@@ -382,7 +382,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         signature_query_clauses = []
         signature_query_params = []
 
-        for (user_id, device_id) in device_query:
+        for user_id, device_id in device_query:
             signature_query_clauses.append(
                 "target_user_id = ? AND target_device_id = ? AND user_id = ?"
             )
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index ca780cca36..ff3edeb716 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1612,7 +1612,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         latest_events: List[str],
         limit: int,
     ) -> List[str]:
-
         seen_events = set(earliest_events)
         front = set(latest_events) - seen_events
         event_results: List[str] = []
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 7996cbb557..73b8aea16c 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -469,7 +469,6 @@ class PersistEventsStore:
         txn: LoggingTransaction,
         events: List[EventBase],
     ) -> None:
-
         # We only care about state events, so this if there are no state events.
         if not any(e.is_state() for e in events):
             return
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 584536111d..0a275e6ce6 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -709,7 +709,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
 
             nbrows = 0
             last_row_event_id = ""
-            for (event_id, event_json_raw) in results:
+            for event_id, event_json_raw in results:
                 try:
                     event_json = db_to_json(event_json_raw)
 
@@ -1167,7 +1167,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             results = list(txn)
             # (event_id, parent_id, rel_type) for each relation
             relations_to_insert: List[Tuple[str, str, str]] = []
-            for (event_id, event_json_raw) in results:
+            for event_id, event_json_raw in results:
                 try:
                     event_json = db_to_json(event_json_raw)
                 except Exception as e:
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 6d0ef10258..b7e7498125 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1493,7 +1493,7 @@ class EventsWorkerStore(SQLBaseStore):
 
             txn.execute(redactions_sql + clause, args)
 
-            for (redacter, redacted) in txn:
+            for redacter, redacted in txn:
                 d = event_dict.get(redacted)
                 if d:
                     d.redactions.append(redacter)
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index b202c5eb87..fa8be214ce 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -196,7 +196,6 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
         def get_local_media_by_user_paginate_txn(
             txn: LoggingTransaction,
         ) -> Tuple[List[Dict[str, Any]], int]:
-
             # Set ordering
             order_by_column = MediaSortOrder(order_by).value
 
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index df53e726e6..fddbc07afa 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -344,7 +344,6 @@ class PusherWorkerStore(SQLBaseStore):
         last_user = progress.get("last_user", "")
 
         def _delete_pushers(txn: LoggingTransaction) -> int:
-
             sql = """
                 SELECT name FROM users
                 WHERE deactivated = ? and name > ?
@@ -392,7 +391,6 @@ class PusherWorkerStore(SQLBaseStore):
         last_pusher = progress.get("last_pusher", 0)
 
         def _delete_pushers(txn: LoggingTransaction) -> int:
-
             sql = """
                 SELECT p.id, access_token FROM pushers AS p
                 LEFT JOIN access_tokens AS a ON (p.access_token = a.id)
@@ -449,7 +447,6 @@ class PusherWorkerStore(SQLBaseStore):
         last_pusher = progress.get("last_pusher", 0)
 
         def _delete_pushers(txn: LoggingTransaction) -> int:
-
             sql = """
                 SELECT p.id, p.user_name, p.app_id, p.pushkey
                 FROM pushers AS p
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index dddf49c2d5..92a82240ab 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -887,7 +887,6 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
         def _populate_receipt_event_stream_ordering_txn(
             txn: LoggingTransaction,
         ) -> bool:
-
             if "max_stream_id" in progress:
                 max_stream_id = progress["max_stream_id"]
             else:
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 644bbb8878..39f89291b2 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -2168,7 +2168,6 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         def _get_event_report_txn(
             txn: LoggingTransaction, report_id: int
         ) -> Optional[Dict[str, Any]]:
-
             sql = """
                 SELECT
                     er.id,
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
index 3fe433f66c..a7aae661d8 100644
--- a/synapse/storage/databases/main/search.py
+++ b/synapse/storage/databases/main/search.py
@@ -122,7 +122,6 @@ class SearchWorkerStore(SQLBaseStore):
 
 
 class SearchBackgroundUpdateStore(SearchWorkerStore):
-
     EVENT_SEARCH_UPDATE_NAME = "event_search"
     EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
     EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
@@ -615,7 +614,6 @@ class SearchStore(SearchBackgroundUpdateStore):
             """
             count_args = [search_query] + count_args
         elif isinstance(self.database_engine, Sqlite3Engine):
-
             # We use CROSS JOIN here to ensure we use the right indexes.
             # https://sqlite.org/optoverview.html#crossjoin
             #
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index ba325d390b..ebb2ae964f 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -490,7 +490,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
 
 
 class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
-
     CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
     EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
     DELETE_CURRENT_STATE_UPDATE_NAME = "delete_old_current_state_events"
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index d7b7d0c3c9..d3393d8e49 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -461,7 +461,7 @@ class StatsStore(StateDeltasStore):
         insert_cols = []
         qargs = []
 
-        for (key, val) in chain(
+        for key, val in chain(
             keyvalues.items(), absolutes.items(), additive_relatives.items()
         ):
             insert_cols.append(key)
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 818c46182e..ac5fbf6b86 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -87,6 +87,7 @@ MAX_STREAM_SIZE = 1000
 _STREAM_TOKEN = "stream"
 _TOPOLOGICAL_TOKEN = "topological"
 
+
 # Used as return values for pagination APIs
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _EventDictReturn:
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index 6b33d809b6..6d72bd9f67 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -573,7 +573,6 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
         def get_destination_rooms_paginate_txn(
             txn: LoggingTransaction,
         ) -> Tuple[List[JsonDict], int]:
-
             if direction == Direction.BACKWARDS:
                 order = "DESC"
             else:
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 30af4b3b6c..c3f2b61bd5 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -98,7 +98,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
     async def _populate_user_directory_createtables(
         self, progress: JsonDict, batch_size: int
     ) -> int:
-
         # Get all the rooms that we want to process.
         def _make_staging_area(txn: LoggingTransaction) -> None:
             sql = (
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
index d743282f13..097dea5182 100644
--- a/synapse/storage/databases/state/bg_updates.py
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -251,7 +251,6 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
 
 
 class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
-
     STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
     STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
     STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx"
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index 1a7232b276..89b1faa6c8 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -257,14 +257,11 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
         member_filter, non_member_filter = state_filter.get_member_split()
 
         # Now we look them up in the member and non-member caches
-        (
-            non_member_state,
-            incomplete_groups_nm,
-        ) = self._get_state_for_groups_using_cache(
+        non_member_state, incomplete_groups_nm = self._get_state_for_groups_using_cache(
             groups, self._state_group_cache, state_filter=non_member_filter
         )
 
-        (member_state, incomplete_groups_m,) = self._get_state_for_groups_using_cache(
+        member_state, incomplete_groups_m = self._get_state_for_groups_using_cache(
             groups, self._state_group_members_cache, state_filter=member_filter
         )
 
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 6c335a9315..2a1c6fa31b 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -563,7 +563,7 @@ def _apply_module_schemas(
     """
     # This is the old way for password_auth_provider modules to make changes
     # to the database. This should instead be done using the module API
-    for (mod, _config) in config.authproviders.password_providers:
+    for mod, _config in config.authproviders.password_providers:
         if not hasattr(mod, "get_db_schema_files"):
             continue
         modname = ".".join((mod.__module__, mod.__name__))
@@ -591,7 +591,7 @@ def _apply_module_schema_files(
         (modname,),
     )
     applied_deltas = {d for d, in cur}
-    for (name, stream) in names_and_streams:
+    for name, stream in names_and_streams:
         if name in applied_deltas:
             continue
 
diff --git a/synapse/types/state.py b/synapse/types/state.py
index 743a4f9217..4b3071acce 100644
--- a/synapse/types/state.py
+++ b/synapse/types/state.py
@@ -120,7 +120,7 @@ class StateFilter:
 
     def to_types(self) -> Iterable[Tuple[str, Optional[str]]]:
         """The inverse to `from_types`."""
-        for (event_type, state_keys) in self.types.items():
+        for event_type, state_keys in self.types.items():
             if state_keys is None:
                 yield event_type, None
             else:
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 9387632d0d..6ffa56217e 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -98,7 +98,6 @@ class EvictionReason(Enum):
 
 @attr.s(slots=True, auto_attribs=True)
 class CacheMetric:
-
     _cache: Sized
     _cache_type: str
     _cache_name: str
diff --git a/synapse/util/check_dependencies.py b/synapse/util/check_dependencies.py
index 3b1e205700..1c0fde4966 100644
--- a/synapse/util/check_dependencies.py
+++ b/synapse/util/check_dependencies.py
@@ -183,7 +183,7 @@ def check_requirements(extra: Optional[str] = None) -> None:
     deps_unfulfilled = []
     errors = []
 
-    for (requirement, must_be_installed) in dependencies:
+    for requirement, must_be_installed in dependencies:
         try:
             dist: metadata.Distribution = metadata.distribution(requirement.name)
         except metadata.PackageNotFoundError:
diff --git a/synapse/util/patch_inline_callbacks.py b/synapse/util/patch_inline_callbacks.py
index f97f98a057..d00d34e652 100644
--- a/synapse/util/patch_inline_callbacks.py
+++ b/synapse/util/patch_inline_callbacks.py
@@ -211,7 +211,6 @@ def _check_yield_points(
                 result = Failure()
 
             if current_context() != expected_context:
-
                 # This happens because the context is lost sometime *after* the
                 # previous yield and *after* the current yield. E.g. the
                 # deferred we waited on didn't follow the rules, or we forgot to
diff --git a/synmark/__main__.py b/synmark/__main__.py
index 35a59e347a..19de639187 100644
--- a/synmark/__main__.py
+++ b/synmark/__main__.py
@@ -34,12 +34,10 @@ def make_test(main):
     """
 
     def _main(loops):
-
         reactor = make_reactor()
 
         file_out = StringIO()
         with redirect_stderr(file_out):
-
             d = Deferred()
             d.addCallback(lambda _: ensureDeferred(main(reactor, loops)))
 
diff --git a/synmark/suites/logging.py b/synmark/suites/logging.py
index 9419892e95..8beb077e0a 100644
--- a/synmark/suites/logging.py
+++ b/synmark/suites/logging.py
@@ -30,7 +30,6 @@ from synapse.util import Clock
 
 
 class LineCounter(LineOnlyReceiver):
-
     delimiter = b"\n"
 
     def __init__(self, *args, **kwargs):
diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py
index 35dd9a20df..33af8770fd 100644
--- a/tests/federation/test_complexity.py
+++ b/tests/federation/test_complexity.py
@@ -24,7 +24,6 @@ from tests.test_utils import make_awaitable
 
 
 class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
-
     servlets = [
         admin.register_servlets,
         room.register_servlets,
@@ -37,7 +36,6 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         return config
 
     def test_complexity_simple(self) -> None:
-
         u1 = self.register_user("u1", "pass")
         u1_token = self.login("u1", "pass")
 
@@ -71,7 +69,6 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         self.assertEqual(complexity, 1.23)
 
     def test_join_too_large(self) -> None:
-
         u1 = self.register_user("u1", "pass")
 
         handler = self.hs.get_room_member_handler()
@@ -131,7 +128,6 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         self.assertEqual(f.value.errcode, Codes.RESOURCE_LIMIT_EXCEEDED)
 
     def test_join_too_large_once_joined(self) -> None:
-
         u1 = self.register_user("u1", "pass")
         u1_token = self.login("u1", "pass")
 
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index bba6469b55..6c7738d810 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -34,7 +34,6 @@ from tests.unittest import override_config
 
 
 class FederationServerTests(unittest.FederatingHomeserverTestCase):
-
     servlets = [
         admin.register_servlets,
         room.register_servlets,
diff --git a/tests/handlers/test_sso.py b/tests/handlers/test_sso.py
index 137deab138..d6f43a98fc 100644
--- a/tests/handlers/test_sso.py
+++ b/tests/handlers/test_sso.py
@@ -113,7 +113,6 @@ async def mock_get_file(
     headers: Optional[RawHeaders] = None,
     is_allowed_content_type: Optional[Callable[[str], bool]] = None,
 ) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
-
     fake_response = FakeResponse(code=404)
     if url == "http://my.server/me.png":
         fake_response = FakeResponse(
diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py
index f1a50c5bcb..d11ded6c5b 100644
--- a/tests/handlers/test_stats.py
+++ b/tests/handlers/test_stats.py
@@ -31,7 +31,6 @@ EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM = 6
 
 
 class StatsRoomTests(unittest.HomeserverTestCase):
-
     servlets = [
         admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
diff --git a/tests/http/federation/test_srv_resolver.py b/tests/http/federation/test_srv_resolver.py
index 7748f56ee6..6ab13357f9 100644
--- a/tests/http/federation/test_srv_resolver.py
+++ b/tests/http/federation/test_srv_resolver.py
@@ -46,7 +46,6 @@ class SrvResolverTestCase(unittest.TestCase):
 
         @defer.inlineCallbacks
         def do_lookup() -> Generator["Deferred[object]", object, List[Server]]:
-
             with LoggingContext("one") as ctx:
                 resolve_d = resolver.resolve_service(service_name)
                 result: List[Server]
diff --git a/tests/http/test_client.py b/tests/http/test_client.py
index 9cfe1ad0de..f6d6684985 100644
--- a/tests/http/test_client.py
+++ b/tests/http/test_client.py
@@ -149,7 +149,7 @@ class BlacklistingAgentTest(TestCase):
         self.allowed_domain, self.allowed_ip = b"allowed.test", b"5.1.1.1"
 
         # Configure the reactor's DNS resolver.
-        for (domain, ip) in (
+        for domain, ip in (
             (self.safe_domain, self.safe_ip),
             (self.unsafe_domain, self.unsafe_ip),
             (self.allowed_domain, self.allowed_ip),
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 199e3d7b70..dce6899e78 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -33,7 +33,6 @@ from tests.unittest import HomeserverTestCase, override_config
 
 
 class TestBulkPushRuleEvaluator(HomeserverTestCase):
-
     servlets = [
         admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
diff --git a/tests/push/test_email.py b/tests/push/test_email.py
index 7563f33fdc..0a3aca5c50 100644
--- a/tests/push/test_email.py
+++ b/tests/push/test_email.py
@@ -39,7 +39,6 @@ class _User:
 
 
 class EmailPusherTests(HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
@@ -48,7 +47,6 @@ class EmailPusherTests(HomeserverTestCase):
     hijack_auth = False
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         config = self.default_config()
         config["email"] = {
             "enable_notifs": True,
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
index ddca9d696c..57c781a0c3 100644
--- a/tests/replication/slave/storage/test_events.py
+++ b/tests/replication/slave/storage/test_events.py
@@ -64,7 +64,6 @@ def patch__eq__(cls: object) -> Callable[[], None]:
 
 
 class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase):
-
     STORE_TYPE = EventsWorkerStore
 
     def setUp(self) -> None:
diff --git a/tests/rest/admin/test_device.py b/tests/rest/admin/test_device.py
index 03f2112b07..aaa488bced 100644
--- a/tests/rest/admin/test_device.py
+++ b/tests/rest/admin/test_device.py
@@ -28,7 +28,6 @@ from tests import unittest
 
 
 class DeviceRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -291,7 +290,6 @@ class DeviceRestTestCase(unittest.HomeserverTestCase):
 
 
 class DevicesRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
@@ -415,7 +413,6 @@ class DevicesRestTestCase(unittest.HomeserverTestCase):
 
 
 class DeleteDevicesRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py
index db77a45ae3..f41319a5b6 100644
--- a/tests/rest/admin/test_media.py
+++ b/tests/rest/admin/test_media.py
@@ -34,7 +34,6 @@ INVALID_TIMESTAMP_IN_S = 1893456000  # 2030-01-01 in seconds
 
 
 class DeleteMediaByIDTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         synapse.rest.admin.register_servlets_for_media_repo,
@@ -196,7 +195,6 @@ class DeleteMediaByIDTestCase(unittest.HomeserverTestCase):
 
 
 class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         synapse.rest.admin.register_servlets_for_media_repo,
@@ -594,7 +592,6 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase):
 
 
 class QuarantineMediaByIDTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         synapse.rest.admin.register_servlets_for_media_repo,
@@ -724,7 +721,6 @@ class QuarantineMediaByIDTestCase(unittest.HomeserverTestCase):
 
 
 class ProtectMediaByIDTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         synapse.rest.admin.register_servlets_for_media_repo,
@@ -821,7 +817,6 @@ class ProtectMediaByIDTestCase(unittest.HomeserverTestCase):
 
 
 class PurgeMediaCacheTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         synapse.rest.admin.register_servlets_for_media_repo,
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index 453a6e979c..9dbb778679 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -1990,7 +1990,6 @@ class RoomMessagesTestCase(unittest.HomeserverTestCase):
 
 
 class JoinAliasRoomTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         room.register_servlets,
diff --git a/tests/rest/admin/test_server_notice.py b/tests/rest/admin/test_server_notice.py
index f71ff46d87..28b999573e 100644
--- a/tests/rest/admin/test_server_notice.py
+++ b/tests/rest/admin/test_server_notice.py
@@ -28,7 +28,6 @@ from tests.unittest import override_config
 
 
 class ServerNoticeTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index e2ee1a1766..2b05dffc7d 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -40,7 +40,6 @@ from tests.unittest import override_config
 
 
 class PasswordResetTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         account.register_servlets,
         synapse.rest.admin.register_servlets_for_client_rest_resource,
@@ -408,7 +407,6 @@ class PasswordResetTestCase(unittest.HomeserverTestCase):
 
 
 class DeactivateTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         login.register_servlets,
@@ -492,7 +490,6 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
 
 
 class WhoamiTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         login.register_servlets,
@@ -567,7 +564,6 @@ class WhoamiTestCase(unittest.HomeserverTestCase):
 
 
 class ThreepidEmailRestTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         account.register_servlets,
         login.register_servlets,
diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py
index a144610078..0d8fe77b88 100644
--- a/tests/rest/client/test_auth.py
+++ b/tests/rest/client/test_auth.py
@@ -52,7 +52,6 @@ class DummyRecaptchaChecker(UserInteractiveAuthChecker):
 
 
 class FallbackAuthTests(unittest.HomeserverTestCase):
-
     servlets = [
         auth.register_servlets,
         register.register_servlets,
@@ -60,7 +59,6 @@ class FallbackAuthTests(unittest.HomeserverTestCase):
     hijack_auth = False
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         config = self.default_config()
 
         config["enable_registration_captcha"] = True
diff --git a/tests/rest/client/test_capabilities.py b/tests/rest/client/test_capabilities.py
index d1751e1557..c16e8d43f4 100644
--- a/tests/rest/client/test_capabilities.py
+++ b/tests/rest/client/test_capabilities.py
@@ -26,7 +26,6 @@ from tests.unittest import override_config
 
 
 class CapabilitiesTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         capabilities.register_servlets,
diff --git a/tests/rest/client/test_consent.py b/tests/rest/client/test_consent.py
index b1ca81a911..bb845179d3 100644
--- a/tests/rest/client/test_consent.py
+++ b/tests/rest/client/test_consent.py
@@ -38,7 +38,6 @@ class ConsentResourceTestCase(unittest.HomeserverTestCase):
     hijack_auth = False
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         config = self.default_config()
         config["form_secret"] = "123abc"
 
diff --git a/tests/rest/client/test_directory.py b/tests/rest/client/test_directory.py
index 7a88aa2cda..6490e883bf 100644
--- a/tests/rest/client/test_directory.py
+++ b/tests/rest/client/test_directory.py
@@ -28,7 +28,6 @@ from tests.unittest import override_config
 
 
 class DirectoryTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         admin.register_servlets_for_client_rest_resource,
         directory.register_servlets,
diff --git a/tests/rest/client/test_ephemeral_message.py b/tests/rest/client/test_ephemeral_message.py
index 9fa1f82dfe..f31ebc8021 100644
--- a/tests/rest/client/test_ephemeral_message.py
+++ b/tests/rest/client/test_ephemeral_message.py
@@ -26,7 +26,6 @@ from tests import unittest
 
 
 class EphemeralMessageTestCase(unittest.HomeserverTestCase):
-
     user_id = "@user:test"
 
     servlets = [
diff --git a/tests/rest/client/test_events.py b/tests/rest/client/test_events.py
index a9b7db9db2..54df2a252c 100644
--- a/tests/rest/client/test_events.py
+++ b/tests/rest/client/test_events.py
@@ -38,7 +38,6 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         config = self.default_config()
         config["enable_registration_captcha"] = False
         config["enable_registration"] = True
@@ -51,7 +50,6 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase):
         return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-
         # register an account
         self.user_id = self.register_user("sid1", "pass")
         self.token = self.login(self.user_id, "pass")
@@ -142,7 +140,6 @@ class GetEventsTestCase(unittest.HomeserverTestCase):
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-
         # register an account
         self.user_id = self.register_user("sid1", "pass")
         self.token = self.login(self.user_id, "pass")
diff --git a/tests/rest/client/test_filter.py b/tests/rest/client/test_filter.py
index 830762fd53..91678abf13 100644
--- a/tests/rest/client/test_filter.py
+++ b/tests/rest/client/test_filter.py
@@ -25,7 +25,6 @@ PATH_PREFIX = "/_matrix/client/v2_alpha"
 
 
 class FilterTestCase(unittest.HomeserverTestCase):
-
     user_id = "@apple:test"
     hijack_auth = True
     EXAMPLE_FILTER = {"room": {"timeline": {"types": ["m.room.message"]}}}
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index ff5baa9f0a..62acf4f44e 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -89,7 +89,6 @@ ADDITIONAL_LOGIN_FLOWS = [
 
 
 class LoginRestServletTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         login.register_servlets,
@@ -737,7 +736,6 @@ class MultiSSOTestCase(unittest.HomeserverTestCase):
 
 
 class CASTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         login.register_servlets,
     ]
diff --git a/tests/rest/client/test_login_token_request.py b/tests/rest/client/test_login_token_request.py
index 6aedc1a11c..b8187db982 100644
--- a/tests/rest/client/test_login_token_request.py
+++ b/tests/rest/client/test_login_token_request.py
@@ -26,7 +26,6 @@ endpoint = "/_matrix/client/unstable/org.matrix.msc3882/login/token"
 
 
 class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         login.register_servlets,
         admin.register_servlets,
diff --git a/tests/rest/client/test_presence.py b/tests/rest/client/test_presence.py
index 67e16880e6..dcbb125a3b 100644
--- a/tests/rest/client/test_presence.py
+++ b/tests/rest/client/test_presence.py
@@ -35,7 +35,6 @@ class PresenceTestCase(unittest.HomeserverTestCase):
     servlets = [presence.register_servlets]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         self.presence_handler = Mock(spec=PresenceHandler)
         self.presence_handler.set_state.return_value = make_awaitable(None)
 
diff --git a/tests/rest/client/test_profile.py b/tests/rest/client/test_profile.py
index 8de5a342ae..27c93ad761 100644
--- a/tests/rest/client/test_profile.py
+++ b/tests/rest/client/test_profile.py
@@ -30,7 +30,6 @@ from tests import unittest
 
 
 class ProfileTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         admin.register_servlets_for_client_rest_resource,
         login.register_servlets,
@@ -324,7 +323,6 @@ class ProfileTestCase(unittest.HomeserverTestCase):
 
 
 class ProfilesRestrictedTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         admin.register_servlets_for_client_rest_resource,
         login.register_servlets,
@@ -404,7 +402,6 @@ class ProfilesRestrictedTestCase(unittest.HomeserverTestCase):
 
 
 class OwnProfileUnrestrictedTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         admin.register_servlets_for_client_rest_resource,
         login.register_servlets,
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index 4c561f9525..b228dba861 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -40,7 +40,6 @@ from tests.unittest import override_config
 
 
 class RegisterRestServletTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         login.register_servlets,
         register.register_servlets,
@@ -797,7 +796,6 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
 
 class AccountValidityTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         register.register_servlets,
         synapse.rest.admin.register_servlets_for_client_rest_resource,
@@ -913,7 +911,6 @@ class AccountValidityTestCase(unittest.HomeserverTestCase):
 
 
 class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         register.register_servlets,
         synapse.rest.admin.register_servlets_for_client_rest_resource,
@@ -1132,7 +1129,6 @@ class AccountValidityRenewalByEmailTestCase(unittest.HomeserverTestCase):
 
 
 class AccountValidityBackgroundJobTestCase(unittest.HomeserverTestCase):
-
     servlets = [synapse.rest.admin.register_servlets_for_client_rest_resource]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
diff --git a/tests/rest/client/test_rendezvous.py b/tests/rest/client/test_rendezvous.py
index c0eb5d01a6..8dbd64be55 100644
--- a/tests/rest/client/test_rendezvous.py
+++ b/tests/rest/client/test_rendezvous.py
@@ -25,7 +25,6 @@ endpoint = "/_matrix/client/unstable/org.matrix.msc3886/rendezvous"
 
 
 class RendezvousServletTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         rendezvous.register_servlets,
     ]
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index cfad182b2f..4dd763096d 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -65,7 +65,6 @@ class RoomBase(unittest.HomeserverTestCase):
     servlets = [room.register_servlets, room.register_deprecated_servlets]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         self.hs = self.setup_test_homeserver(
             "red",
             federation_http_client=None,
@@ -92,7 +91,6 @@ class RoomPermissionsTestCase(RoomBase):
     rmcreator_id = "@notme:red"
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-
         self.helper.auth_user_id = self.rmcreator_id
         # create some rooms under the name rmcreator_id
         self.uncreated_rmid = "!aa:test"
@@ -1127,7 +1125,6 @@ class RoomInviteRatelimitTestCase(RoomBase):
 
 
 class RoomJoinTestCase(RoomBase):
-
     servlets = [
         admin.register_servlets,
         login.register_servlets,
@@ -2102,7 +2099,6 @@ class RoomSearchTestCase(unittest.HomeserverTestCase):
     hijack_auth = False
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-
         # Register the user who does the searching
         self.user_id2 = self.register_user("user", "pass")
         self.access_token = self.login("user", "pass")
@@ -2195,7 +2191,6 @@ class RoomSearchTestCase(unittest.HomeserverTestCase):
 
 
 class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
@@ -2203,7 +2198,6 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         self.url = b"/_matrix/client/r0/publicRooms"
 
         config = self.default_config()
@@ -2225,7 +2219,6 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase):
 
 
 class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
@@ -2233,7 +2226,6 @@ class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         config = self.default_config()
         config["allow_public_rooms_without_auth"] = True
         self.hs = self.setup_test_homeserver(config=config)
@@ -2414,7 +2406,6 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase):
 
 
 class PerRoomProfilesForbiddenTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
@@ -2983,7 +2974,6 @@ class RelationsTestCase(PaginationTestCase):
 
 
 class ContextTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
@@ -3359,7 +3349,6 @@ class RoomCanonicalAliasTestCase(unittest.HomeserverTestCase):
 
 
 class ThreepidInviteTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         admin.register_servlets,
         login.register_servlets,
@@ -3438,7 +3427,8 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
         """
         Test allowing/blocking threepid invites with a spam-check module.
 
-        In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`."""
+        In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`.
+        """
         # Mock a few functions to prevent the test from failing due to failing to talk to
         # a remote IS. We keep the mock for make_and_store_3pid_invite around so we
         # can check its call_count later on during the test.
diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py
index b9047194dd..9c876c7a32 100644
--- a/tests/rest/client/test_sync.py
+++ b/tests/rest/client/test_sync.py
@@ -41,7 +41,6 @@ from tests.server import TimedOutException
 
 
 class FilterTestCase(unittest.HomeserverTestCase):
-
     user_id = "@apple:test"
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
@@ -191,7 +190,6 @@ class SyncFilterTestCase(unittest.HomeserverTestCase):
 
 
 class SyncTypingTests(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
         room.register_servlets,
@@ -892,7 +890,6 @@ class DeviceListSyncTestCase(unittest.HomeserverTestCase):
 
 
 class ExcludeRoomTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py
index 5fa3440691..c0f93f898a 100644
--- a/tests/rest/client/test_third_party_rules.py
+++ b/tests/rest/client/test_third_party_rules.py
@@ -137,6 +137,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         """Tests that a forbidden event is forbidden from being sent, but an allowed one
         can be sent.
         """
+
         # patch the rules module with a Mock which will return False for some event
         # types
         async def check(
@@ -243,6 +244,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
 
     def test_modify_event(self) -> None:
         """The module can return a modified version of the event"""
+
         # first patch the event checker so that it will modify the event
         async def check(
             ev: EventBase, state: StateMap[EventBase]
@@ -275,6 +277,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
 
     def test_message_edit(self) -> None:
         """Ensure that the module doesn't cause issues with edited messages."""
+
         # first patch the event checker so that it will modify the event
         async def check(
             ev: EventBase, state: StateMap[EventBase]
diff --git a/tests/rest/media/test_media_retention.py b/tests/rest/media/test_media_retention.py
index 23f227aed6..b59d9dfd4d 100644
--- a/tests/rest/media/test_media_retention.py
+++ b/tests/rest/media/test_media_retention.py
@@ -31,7 +31,6 @@ from tests.utils import MockClock
 
 
 class MediaRetentionTestCase(unittest.HomeserverTestCase):
-
     ONE_DAY_IN_MS = 24 * 60 * 60 * 1000
     THIRTY_DAYS_IN_MS = 30 * ONE_DAY_IN_MS
 
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
index 17a3b06a8e..8ed27179c4 100644
--- a/tests/rest/media/v1/test_media_storage.py
+++ b/tests/rest/media/v1/test_media_storage.py
@@ -52,7 +52,6 @@ from tests.utils import default_config
 
 
 class MediaStorageTests(unittest.HomeserverTestCase):
-
     needs_threadpool = True
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
@@ -207,7 +206,6 @@ class MediaRepoTests(unittest.HomeserverTestCase):
     user_id = "@test:user"
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         self.fetches: List[
             Tuple[
                 "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]",
@@ -268,7 +266,6 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-
         media_resource = hs.get_media_repository_resource()
         self.download_resource = media_resource.children[b"download"]
         self.thumbnail_resource = media_resource.children[b"thumbnail"]
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 2c321f8d04..6fcf60ce19 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -58,7 +58,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
     )
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         config = self.default_config()
         config["url_preview_enabled"] = True
         config["max_spider_size"] = 9999999
@@ -118,7 +117,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-
         self.media_repo = hs.get_media_repository_resource()
         self.preview_url = self.media_repo.children[b"preview_url"]
 
@@ -133,7 +131,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
                 addressTypes: Optional[Sequence[Type[IAddress]]] = None,
                 transportSemantics: str = "TCP",
             ) -> IResolutionReceiver:
-
                 resolution = HostResolution(hostName)
                 resolutionReceiver.resolutionBegan(resolution)
                 if hostName not in self.lookups:
diff --git a/tests/server_notices/test_consent.py b/tests/server_notices/test_consent.py
index 6540ed53f1..3fdf5a6d52 100644
--- a/tests/server_notices/test_consent.py
+++ b/tests/server_notices/test_consent.py
@@ -25,7 +25,6 @@ from tests import unittest
 
 
 class ConsentNoticesTests(unittest.HomeserverTestCase):
-
     servlets = [
         sync.register_servlets,
         synapse.rest.admin.register_servlets_for_client_rest_resource,
@@ -34,7 +33,6 @@ class ConsentNoticesTests(unittest.HomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-
         tmpdir = self.mktemp()
         os.mkdir(tmpdir)
         self.consent_notice_message = "consent %(consent_uri)s"
diff --git a/tests/storage/databases/main/test_deviceinbox.py b/tests/storage/databases/main/test_deviceinbox.py
index 373707b275..b6d5c474b0 100644
--- a/tests/storage/databases/main/test_deviceinbox.py
+++ b/tests/storage/databases/main/test_deviceinbox.py
@@ -23,7 +23,6 @@ from tests.unittest import HomeserverTestCase
 
 
 class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase):
-
     servlets = [
         admin.register_servlets,
         devices.register_servlets,
diff --git a/tests/storage/databases/main/test_receipts.py b/tests/storage/databases/main/test_receipts.py
index ac77aec003..71db47405e 100644
--- a/tests/storage/databases/main/test_receipts.py
+++ b/tests/storage/databases/main/test_receipts.py
@@ -26,7 +26,6 @@ from tests.unittest import HomeserverTestCase
 
 
 class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase):
-
     servlets = [
         admin.register_servlets,
         room.register_servlets,
@@ -62,6 +61,7 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase):
                 keys and expected receipt key-values after duplicate receipts have been
                 removed.
         """
+
         # First, undo the background update.
         def drop_receipts_unique_index(txn: LoggingTransaction) -> None:
             txn.execute(f"DROP INDEX IF EXISTS {index_name}")
diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py
index 3108ca3444..dbd8f3a85e 100644
--- a/tests/storage/databases/main/test_room.py
+++ b/tests/storage/databases/main/test_room.py
@@ -27,7 +27,6 @@ from tests.unittest import HomeserverTestCase
 
 
 class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase):
-
     servlets = [
         admin.register_servlets,
         room.register_servlets,
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 7f7f4ef892..cd0079871c 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -656,7 +656,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
 
 
 class ClientIpAuthTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py
index a10e5fa8b1..73d11e7786 100644
--- a/tests/storage/test_event_chain.py
+++ b/tests/storage/test_event_chain.py
@@ -417,7 +417,6 @@ class EventChainStoreTestCase(HomeserverTestCase):
     def fetch_chains(
         self, events: List[EventBase]
     ) -> Tuple[Dict[str, Tuple[int, int]], _LinkMap]:
-
         # Fetch the map from event ID -> (chain ID, sequence number)
         rows = self.get_success(
             self.store.db_pool.simple_select_many_batch(
@@ -492,7 +491,6 @@ class LinkMapTestCase(unittest.TestCase):
 
 
 class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
-
     servlets = [
         admin.register_servlets,
         room.register_servlets,
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 8fc7936ab0..3e1984c15c 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -672,7 +672,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
 
         complete_event_dict_map: Dict[str, JsonDict] = {}
         stream_ordering = 0
-        for (event_id, prev_event_ids) in event_graph.items():
+        for event_id, prev_event_ids in event_graph.items():
             depth = depth_map[event_id]
 
             complete_event_dict_map[event_id] = {
diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py
index 76c06a9d1e..aa19c3bd30 100644
--- a/tests/storage/test_event_push_actions.py
+++ b/tests/storage/test_event_push_actions.py
@@ -774,7 +774,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase):
         self.assertEqual(r, 3)
 
         # add a bunch of dummy events to the events table
-        for (stream_ordering, ts) in (
+        for stream_ordering, ts in (
             (3, 110),
             (4, 120),
             (5, 120),
diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py
index d8f42c5d05..857e2caf2e 100644
--- a/tests/storage/test_purge.py
+++ b/tests/storage/test_purge.py
@@ -23,7 +23,6 @@ from tests.unittest import HomeserverTestCase
 
 
 class PurgeTests(HomeserverTestCase):
-
     user_id = "@red:server"
     servlets = [room.register_servlets]
 
diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py
index 8794401823..f4c4661aaf 100644
--- a/tests/storage/test_roommember.py
+++ b/tests/storage/test_roommember.py
@@ -27,7 +27,6 @@ from tests.test_utils import event_injection
 
 
 class RoomMemberStoreTestCase(unittest.HomeserverTestCase):
-
     servlets = [
         login.register_servlets,
         register_servlets_for_client_rest_resource,
@@ -35,7 +34,6 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase):
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: TestHomeServer) -> None:  # type: ignore[override]
-
         # We can't test the RoomMemberStore on its own without the other event
         # storage logic
         self.store = hs.get_datastores().main
@@ -48,7 +46,6 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase):
         self.u_charlie = UserID.from_string("@charlie:elsewhere")
 
     def test_one_member(self) -> None:
-
         # Alice creates the room, and is automatically joined
         self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
 
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index f730b888f7..e82c03f597 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -242,7 +242,7 @@ class StateStoreTestCase(HomeserverTestCase):
 
         # test _get_state_for_group_using_cache correctly filters out members
         # with types=[]
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
@@ -259,7 +259,7 @@ class StateStoreTestCase(HomeserverTestCase):
             state_dict,
         )
 
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
@@ -272,7 +272,7 @@ class StateStoreTestCase(HomeserverTestCase):
 
         # test _get_state_for_group_using_cache correctly filters in members
         # with wildcard types
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
@@ -289,7 +289,7 @@ class StateStoreTestCase(HomeserverTestCase):
             state_dict,
         )
 
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
@@ -309,7 +309,7 @@ class StateStoreTestCase(HomeserverTestCase):
 
         # test _get_state_for_group_using_cache correctly filters in members
         # with specific types
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
@@ -327,7 +327,7 @@ class StateStoreTestCase(HomeserverTestCase):
             state_dict,
         )
 
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
@@ -341,7 +341,7 @@ class StateStoreTestCase(HomeserverTestCase):
 
         # test _get_state_for_group_using_cache correctly filters in members
         # with specific types
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
@@ -392,7 +392,7 @@ class StateStoreTestCase(HomeserverTestCase):
         # test _get_state_for_group_using_cache correctly filters out members
         # with types=[]
         room_id = self.room.to_string()
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
@@ -404,7 +404,7 @@ class StateStoreTestCase(HomeserverTestCase):
         self.assertDictEqual({}, state_dict)
 
         room_id = self.room.to_string()
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
@@ -417,7 +417,7 @@ class StateStoreTestCase(HomeserverTestCase):
 
         # test _get_state_for_group_using_cache correctly filters in members
         # wildcard types
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
@@ -428,7 +428,7 @@ class StateStoreTestCase(HomeserverTestCase):
         self.assertEqual(is_all, False)
         self.assertDictEqual({}, state_dict)
 
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
@@ -447,7 +447,7 @@ class StateStoreTestCase(HomeserverTestCase):
 
         # test _get_state_for_group_using_cache correctly filters in members
         # with specific types
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
@@ -459,7 +459,7 @@ class StateStoreTestCase(HomeserverTestCase):
         self.assertEqual(is_all, False)
         self.assertDictEqual({}, state_dict)
 
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
@@ -473,7 +473,7 @@ class StateStoreTestCase(HomeserverTestCase):
 
         # test _get_state_for_group_using_cache correctly filters in members
         # with specific types
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
@@ -485,7 +485,7 @@ class StateStoreTestCase(HomeserverTestCase):
         self.assertEqual(is_all, False)
         self.assertDictEqual({}, state_dict)
 
-        (state_dict, is_all,) = self.state_datastore._get_state_for_group_using_cache(
+        state_dict, is_all = self.state_datastore._get_state_for_group_using_cache(
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
diff --git a/tests/test_mau.py b/tests/test_mau.py
index 4e7665a22b..ff21098a59 100644
--- a/tests/test_mau.py
+++ b/tests/test_mau.py
@@ -32,7 +32,6 @@ from tests.utils import default_config
 
 
 class TestMauLimit(unittest.HomeserverTestCase):
-
     servlets = [register.register_servlets, sync.register_servlets]
 
     def default_config(self) -> JsonDict:
-- 
cgit 1.5.1


From a068ad7dd4910c81bb0886fbf986dde126eeb4ee Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Thu, 23 Feb 2023 19:14:17 +0100
Subject: Add information on uploaded media to user export command. (#15107)

---
 changelog.d/15107.feature              |  1 +
 docs/usage/administration/admin_faq.md | 74 ++++++++++++++++++++++++++--------
 synapse/app/admin_cmd.py               | 10 +++++
 synapse/handlers/admin.py              | 38 +++++++++++++++++
 tests/handlers/test_admin.py           | 29 +++++++++++++
 5 files changed, 136 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/15107.feature

(limited to 'synapse')

diff --git a/changelog.d/15107.feature b/changelog.d/15107.feature
new file mode 100644
index 0000000000..2bdb6a29fc
--- /dev/null
+++ b/changelog.d/15107.feature
@@ -0,0 +1 @@
+Add media information to the command line [user data export tool](https://matrix-org.github.io/synapse/v1.79/usage/administration/admin_faq.html#how-can-i-export-user-data).
\ No newline at end of file
diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md
index 925e1d175e..28c3dd53a5 100644
--- a/docs/usage/administration/admin_faq.md
+++ b/docs/usage/administration/admin_faq.md
@@ -70,13 +70,55 @@ output-directory
 │       ├───state
 │       ├───invite_state
 │       └───knock_state
-└───user_data
-    ├───account_data
-    │   ├───global
-    │   └───<room_id>
-    ├───connections
-    ├───devices
-    └───profile
+├───user_data
+│   ├───account_data
+│   │   ├───global
+│   │   └───<room_id>
+│   ├───connections
+│   ├───devices
+│   └───profile
+└───media_ids
+    └───<media_id>
+```
+
+The `media_ids` folder contains only the metadata of the media uploaded by the user.
+It does not contain the media itself.
+Furthermore, only the `media_ids` that Synapse manages itself are exported.
+If another media repository (e.g. [matrix-media-repo](https://github.com/turt2live/matrix-media-repo))
+is used, the data must be exported separately.
+
+With the `media_ids` the media files can be downloaded.
+Media that have been sent in encrypted rooms are only retrieved in encrypted form.
+The following script can help with download the media files:
+
+```bash
+#!/usr/bin/env bash
+
+# Parameters
+#
+#   source_directory: Directory which contains the export with the media_ids.
+#   target_directory: Directory into which all files are to be downloaded.
+#   repository_url: Address of the media repository resp. media worker.
+#   serverName: Name of the server (`server_name` from homeserver.yaml).
+#
+#   Example:
+#       ./download_media.sh /tmp/export_data/media_ids/ /tmp/export_data/media_files/ http://localhost:8008 matrix.example.com
+
+source_directory=$1
+target_directory=$2
+repository_url=$3
+serverName=$4
+
+mkdir -p $target_directory
+
+for file in $source_directory/*; do
+    filename=$(basename ${file})
+    url=$repository_url/_matrix/media/v3/download/$serverName/$filename
+    echo "Downloading $filename - $url"
+    if ! wget -o /dev/null -P $target_directory $url; then
+        echo "Could not download $filename"
+    fi
+done
 ```
 
 Manually resetting passwords
@@ -87,7 +129,7 @@ can reset a user's password using the [admin API](../../admin_api/user_admin_api
 
 I have a problem with my server. Can I just delete my database and start again?
 ---
-Deleting your database is unlikely to make anything better. 
+Deleting your database is unlikely to make anything better.
 
 It's easy to make the mistake of thinking that you can start again from a clean
 slate by dropping your database, but things don't work like that in a federated
@@ -102,7 +144,7 @@ Come and seek help in https://matrix.to/#/#synapse:matrix.org.
 
 There are two exceptions when it might be sensible to delete your database and start again:
 * You have *never* joined any rooms which are federated with other servers. For
-instance, a local deployment which the outside world can't talk to. 
+instance, a local deployment which the outside world can't talk to.
 * You are changing the `server_name` in the homeserver configuration. In effect
 this makes your server a completely new one from the point of view of the network,
 so in this case it makes sense to start with a clean database.
@@ -115,7 +157,7 @@ Using the following curl command:
 curl -H 'Authorization: Bearer <access-token>' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/<room-alias>
 ```
 `<access-token>` - can be obtained in riot by looking in the riot settings, down the bottom is:
-Access Token:\<click to reveal\> 
+Access Token:\<click to reveal\>
 
 `<room-alias>` - the room alias, eg. #my_room:matrix.org this possibly needs to be URL encoded also, for example  %23my_room%3Amatrix.org
 
@@ -152,13 +194,13 @@ What are the biggest rooms on my server?
 ---
 
 ```sql
-SELECT s.canonical_alias, g.room_id, count(*) AS num_rows 
-FROM 
-  state_groups_state AS g, 
-  room_stats_state AS s 
-WHERE g.room_id = s.room_id 
+SELECT s.canonical_alias, g.room_id, count(*) AS num_rows
+FROM
+  state_groups_state AS g,
+  room_stats_state AS s
+WHERE g.room_id = s.room_id
 GROUP BY s.canonical_alias, g.room_id
-ORDER BY num_rows desc 
+ORDER BY num_rows desc
 LIMIT 10;
 ```
 
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index 5003777f0d..b05fe2c589 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -44,6 +44,7 @@ from synapse.storage.databases.main.event_push_actions import (
 )
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.filtering import FilteringWorkerStore
+from synapse.storage.databases.main.media_repository import MediaRepositoryStore
 from synapse.storage.databases.main.profile import ProfileWorkerStore
 from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
@@ -86,6 +87,7 @@ class AdminCmdSlavedStore(
     RegistrationWorkerStore,
     RoomWorkerStore,
     ProfileWorkerStore,
+    MediaRepositoryStore,
 ):
     def __init__(
         self,
@@ -235,6 +237,14 @@ class FileExfiltrationWriter(ExfiltrationWriter):
         with open(account_data_file, "a") as f:
             json.dump(account_data, fp=f)
 
+    def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None:
+        file_directory = os.path.join(self.base_directory, "media_ids")
+        os.makedirs(file_directory, exist_ok=True)
+        media_id_file = os.path.join(file_directory, media_id)
+
+        with open(media_id_file, "w") as f:
+            json.dump(media_metadata, fp=f)
+
     def finished(self) -> str:
         return self.base_directory
 
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index 8b7760b2cc..b06f25b03c 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -252,16 +252,19 @@ class AdminHandler:
         profile = await self.get_user(UserID.from_string(user_id))
         if profile is not None:
             writer.write_profile(profile)
+            logger.info("[%s] Written profile", user_id)
 
         # Get all devices the user has
         devices = await self._device_handler.get_devices_by_user(user_id)
         writer.write_devices(devices)
+        logger.info("[%s] Written %s devices", user_id, len(devices))
 
         # Get all connections the user has
         connections = await self.get_whois(UserID.from_string(user_id))
         writer.write_connections(
             connections["devices"][""]["sessions"][0]["connections"]
         )
+        logger.info("[%s] Written %s connections", user_id, len(connections))
 
         # Get all account data the user has global and in rooms
         global_data = await self._store.get_global_account_data_for_user(user_id)
@@ -269,6 +272,29 @@ class AdminHandler:
         writer.write_account_data("global", global_data)
         for room_id in by_room_data:
             writer.write_account_data(room_id, by_room_data[room_id])
+        logger.info(
+            "[%s] Written account data for %s rooms", user_id, len(by_room_data)
+        )
+
+        # Get all media ids the user has
+        limit = 100
+        start = 0
+        while True:
+            media_ids, total = await self._store.get_local_media_by_user_paginate(
+                start, limit, user_id
+            )
+            for media in media_ids:
+                writer.write_media_id(media["media_id"], media)
+
+            logger.info(
+                "[%s] Written %d media_ids of %s",
+                user_id,
+                (start + len(media_ids)),
+                total,
+            )
+            if (start + limit) >= total:
+                break
+            start += limit
 
         return writer.finished()
 
@@ -359,6 +385,18 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
         """
         raise NotImplementedError()
 
+    @abc.abstractmethod
+    def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None:
+        """Write the media's metadata of a user.
+        Exports only the metadata, as this can be fetched from the database via
+        read only. In order to access the files, a connection to the correct
+        media repository would be required.
+
+        Args:
+            media_id: ID of the media.
+            media_metadata: Metadata of one media file.
+        """
+
     @abc.abstractmethod
     def finished(self) -> Any:
         """Called when all data has successfully been exported and written.
diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py
index 1b97aaeed1..5569ccef8a 100644
--- a/tests/handlers/test_admin.py
+++ b/tests/handlers/test_admin.py
@@ -23,6 +23,7 @@ from synapse.api.constants import EventTypes, JoinRules
 from synapse.api.room_versions import RoomVersions
 from synapse.rest.client import knock, login, room
 from synapse.server import HomeServer
+from synapse.types import UserID
 from synapse.util import Clock
 
 from tests import unittest
@@ -323,3 +324,31 @@ class ExfiltrateData(unittest.HomeserverTestCase):
         args = writer.write_account_data.call_args_list[1][0]
         self.assertEqual(args[0], "test_room")
         self.assertEqual(args[1]["m.per_room"]["b"], 2)
+
+    def test_media_ids(self) -> None:
+        """Tests that media's metadata get exported."""
+
+        self.get_success(
+            self._store.store_local_media(
+                media_id="media_1",
+                media_type="image/png",
+                time_now_ms=self.clock.time_msec(),
+                upload_name=None,
+                media_length=50,
+                user_id=UserID.from_string(self.user2),
+            )
+        )
+
+        writer = Mock()
+
+        self.get_success(self.admin_handler.export_user_data(self.user2, writer))
+
+        writer.write_media_id.assert_called_once()
+
+        args = writer.write_media_id.call_args[0]
+        self.assertEqual(args[0], "media_1")
+        self.assertEqual(args[1]["media_id"], "media_1")
+        self.assertEqual(args[1]["media_length"], 50)
+        self.assertGreater(args[1]["created_ts"], 0)
+        self.assertIsNone(args[1]["upload_name"])
+        self.assertIsNone(args[1]["last_access_ts"])
-- 
cgit 1.5.1


From ec79870f1422be47e8d6e85f315799888278969b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 23 Feb 2023 16:06:42 -0500
Subject: Fix a typo in MSC3873 config option. (#15138)

Previously the experimental configuration option referred to the wrong
MSC number.
---
 changelog.d/15138.misc                   |  1 +
 synapse/config/experimental.py           |  4 ++--
 synapse/push/bulk_push_rule_evaluator.py | 12 ++++++------
 tests/push/test_push_rule_evaluator.py   |  2 +-
 4 files changed, 10 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/15138.misc

(limited to 'synapse')

diff --git a/changelog.d/15138.misc b/changelog.d/15138.misc
new file mode 100644
index 0000000000..fb706b27f2
--- /dev/null
+++ b/changelog.d/15138.misc
@@ -0,0 +1 @@
+Fix a typo in an experimental config setting.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 54c91953e1..bc38fae0b6 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -175,8 +175,8 @@ class ExperimentalConfig(Config):
         )
 
         # MSC3873: Disambiguate event_match keys.
-        self.msc3783_escape_event_match_key = experimental.get(
-            "msc3783_escape_event_match_key", False
+        self.msc3873_escape_event_match_key = experimental.get(
+            "msc3873_escape_event_match_key", False
         )
 
         # MSC3952: Intentional mentions, this depends on MSC3758.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 8f834be774..3c4a152d6b 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -276,7 +276,7 @@ class BulkPushRuleEvaluator:
                 if related_event is not None:
                     related_events[relation_type] = _flatten_dict(
                         related_event,
-                        msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key,
+                        msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key,
                     )
 
             reply_event_id = (
@@ -294,7 +294,7 @@ class BulkPushRuleEvaluator:
                 if related_event is not None:
                     related_events["m.in_reply_to"] = _flatten_dict(
                         related_event,
-                        msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key,
+                        msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key,
                     )
 
                     # indicate that this is from a fallback relation.
@@ -412,7 +412,7 @@ class BulkPushRuleEvaluator:
         evaluator = PushRuleEvaluator(
             _flatten_dict(
                 event,
-                msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key,
+                msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key,
             ),
             has_mentions,
             user_mentions,
@@ -507,7 +507,7 @@ def _flatten_dict(
     prefix: Optional[List[str]] = None,
     result: Optional[Dict[str, JsonValue]] = None,
     *,
-    msc3783_escape_event_match_key: bool = False,
+    msc3873_escape_event_match_key: bool = False,
 ) -> Dict[str, JsonValue]:
     """
     Given a JSON dictionary (or event) which might contain sub dictionaries,
@@ -536,7 +536,7 @@ def _flatten_dict(
     if result is None:
         result = {}
     for key, value in d.items():
-        if msc3783_escape_event_match_key:
+        if msc3873_escape_event_match_key:
             # Escape periods in the key with a backslash (and backslashes with an
             # extra backslash). This is since a period is used as a separator between
             # nested fields.
@@ -552,7 +552,7 @@ def _flatten_dict(
                 value,
                 prefix=(prefix + [key]),
                 result=result,
-                msc3783_escape_event_match_key=msc3783_escape_event_match_key,
+                msc3873_escape_event_match_key=msc3873_escape_event_match_key,
             )
 
     # `room_version` should only ever be set when looking at the top level of an event
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index d320a12f96..4e858fd16f 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -54,7 +54,7 @@ class FlattenDictTestCase(unittest.TestCase):
         self.assertEqual({"m.foo.b\\ar": "abc"}, _flatten_dict(input))
         self.assertEqual(
             {"m\\.foo.b\\\\ar": "abc"},
-            _flatten_dict(input, msc3783_escape_event_match_key=True),
+            _flatten_dict(input, msc3873_escape_event_match_key=True),
         )
 
     def test_non_string(self) -> None:
-- 
cgit 1.5.1


From f8a584ed0259cbb3c3a51726d1008d04c26b4bd8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 23 Feb 2023 16:07:46 -0500
Subject: Stop parsing the unspecced type parameter on thumbnail requests.
 (#15137)

Ideally we would replace this with parsing of the Accept header
or something else, but for now just make Synapse spec compliant
by ignoring the unspecced parameter.

It does not seem that this is ever sent by a client, and even if it is
there's a reasonable fallback.
---
 changelog.d/15137.removal                   | 1 +
 synapse/rest/media/v1/thumbnail_resource.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15137.removal

(limited to 'synapse')

diff --git a/changelog.d/15137.removal b/changelog.d/15137.removal
new file mode 100644
index 0000000000..c533b0c9dd
--- /dev/null
+++ b/changelog.d/15137.removal
@@ -0,0 +1 @@
+Remove the undocumented and unspecced `type` parameter to the `/thumbnail` endpoint.
diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
index 5f725c7600..3e720018b3 100644
--- a/synapse/rest/media/v1/thumbnail_resource.py
+++ b/synapse/rest/media/v1/thumbnail_resource.py
@@ -69,7 +69,8 @@ class ThumbnailResource(DirectServeJsonResource):
         width = parse_integer(request, "width", required=True)
         height = parse_integer(request, "height", required=True)
         method = parse_string(request, "method", "scale")
-        m_type = parse_string(request, "type", "image/png")
+        # TODO Parse the Accept header to get an prioritised list of thumbnail types.
+        m_type = "image/png"
 
         if server_name == self.server_name:
             if self.dynamic_thumbnails:
-- 
cgit 1.5.1


From 682151a464f688768d5bd8308e16bd4024ad2e57 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 23 Feb 2023 16:08:53 -0500
Subject: Do not fail completely if oEmbed autodiscovery fails. (#15092)

Previously if an autodiscovered oEmbed request failed (e.g. the
oEmbed endpoint is down or does not exist) then the entire URL
preview would fail. Instead we now return everything we can, even
if this additional request fails.
---
 changelog.d/15092.bugfix                      |  1 +
 synapse/rest/media/v1/preview_url_resource.py | 33 ++++++++++++++------
 tests/rest/media/v1/test_url_preview.py       | 44 +++++++++++++++++++++++++--
 3 files changed, 65 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/15092.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15092.bugfix b/changelog.d/15092.bugfix
new file mode 100644
index 0000000000..67509c5c69
--- /dev/null
+++ b/changelog.d/15092.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where a URL preview would break if the discovered oEmbed failed to download.
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index a8f6fd6b35..4a594ab9d8 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -163,6 +163,10 @@ class PreviewUrlResource(DirectServeJsonResource):
        7. Stores the result in the database cache.
     4. Returns the result.
 
+    If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
+    image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
+    does not fail. As much information as possible is returned.
+
     The in-memory cache expires after 1 hour.
 
     Expired entries in the database cache (and their associated media files) are
@@ -364,16 +368,25 @@ class PreviewUrlResource(DirectServeJsonResource):
                 oembed_url = self._oembed.autodiscover_from_html(tree)
                 og_from_oembed: JsonDict = {}
                 if oembed_url:
-                    oembed_info = await self._handle_url(
-                        oembed_url, user, allow_data_urls=True
-                    )
-                    (
-                        og_from_oembed,
-                        author_name,
-                        expiration_ms,
-                    ) = await self._handle_oembed_response(
-                        url, oembed_info, expiration_ms
-                    )
+                    try:
+                        oembed_info = await self._handle_url(
+                            oembed_url, user, allow_data_urls=True
+                        )
+                    except Exception as e:
+                        # Fetching the oEmbed info failed, don't block the entire URL preview.
+                        logger.warning(
+                            "oEmbed fetch failed during URL preview: %s errored with %s",
+                            oembed_url,
+                            e,
+                        )
+                    else:
+                        (
+                            og_from_oembed,
+                            author_name,
+                            expiration_ms,
+                        ) = await self._handle_oembed_response(
+                            url, oembed_info, expiration_ms
+                        )
 
                 # Parse Open Graph information from the HTML in case the oEmbed
                 # response failed or is incomplete.
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 6fcf60ce19..2acfccec61 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -657,7 +657,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         """If the preview image doesn't exist, ensure some data is returned."""
         self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
 
-        end_content = (
+        result = (
             b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
         )
 
@@ -678,8 +678,8 @@ class URLPreviewTests(unittest.HomeserverTestCase):
                 b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
                 b'Content-Type: text/html; charset="utf8"\r\n\r\n'
             )
-            % (len(end_content),)
-            + end_content
+            % (len(result),)
+            + result
         )
 
         self.pump()
@@ -688,6 +688,44 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         # The image should not be in the result.
         self.assertNotIn("og:image", channel.json_body)
 
+    def test_oembed_failure(self) -> None:
+        """If the autodiscovered oEmbed URL fails, ensure some data is returned."""
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        result = b"""
+        <title>oEmbed Autodiscovery Fail</title>
+        <link rel="alternate" type="application/json+oembed"
+            href="http://example.com/oembed?url=http%3A%2F%2Fmatrix.org&format=json"
+            title="matrixdotorg" />
+        """
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        # The image should not be in the result.
+        self.assertEqual(channel.json_body["og:title"], "oEmbed Autodiscovery Fail")
+
     def test_data_url(self) -> None:
         """
         Requesting to preview a data URL is not supported.
-- 
cgit 1.5.1


From 335f52d595c2c32e4b512b97e2851bc98b819ca7 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 24 Feb 2023 13:39:45 +0000
Subject: Improve handling of non-ASCII characters in user directory search
 (#15143)

* Fix a long-standing bug where non-ASCII characters in search terms,
  including accented letters, would not match characters in a different
  case.
* Fix a long-standing bug where search terms using combining accents
  would not match display names using precomposed accents and vice
  versa.

To fully take effect, the user directory must be rebuilt after this
change.

Fixes #14630.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15143.misc                           |   1 +
 synapse/storage/databases/main/user_directory.py |  52 ++++++++-
 tests/storage/test_user_directory.py             | 133 +++++++++++++++++++++++
 3 files changed, 184 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15143.misc

(limited to 'synapse')

diff --git a/changelog.d/15143.misc b/changelog.d/15143.misc
new file mode 100644
index 0000000000..cff4518811
--- /dev/null
+++ b/changelog.d/15143.misc
@@ -0,0 +1 @@
+Fix a long-standing bug where the user directory search was not case-insensitive for accented characters.
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index c3f2b61bd5..f16a509ac4 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -14,6 +14,7 @@
 
 import logging
 import re
+import unicodedata
 from typing import (
     TYPE_CHECKING,
     Iterable,
@@ -490,6 +491,11 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 values={"display_name": display_name, "avatar_url": avatar_url},
             )
 
+            # The display name that goes into the database index.
+            index_display_name = display_name
+            if index_display_name is not None:
+                index_display_name = _filter_text_for_index(index_display_name)
+
             if isinstance(self.database_engine, PostgresEngine):
                 # We weight the localpart most highly, then display name and finally
                 # server name
@@ -507,11 +513,15 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                         user_id,
                         get_localpart_from_id(user_id),
                         get_domain_from_id(user_id),
-                        display_name,
+                        index_display_name,
                     ),
                 )
             elif isinstance(self.database_engine, Sqlite3Engine):
-                value = "%s %s" % (user_id, display_name) if display_name else user_id
+                value = (
+                    "%s %s" % (user_id, index_display_name)
+                    if index_display_name
+                    else user_id
+                )
                 self.db_pool.simple_upsert_txn(
                     txn,
                     table="user_directory_search",
@@ -896,6 +906,41 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
         return {"limited": limited, "results": results[0:limit]}
 
 
+def _filter_text_for_index(text: str) -> str:
+    """Transforms text before it is inserted into the user directory index, or searched
+    for in the user directory index.
+
+    Note that the user directory search table needs to be rebuilt whenever this function
+    changes.
+    """
+    # Lowercase the text, to make searches case-insensitive.
+    # This is necessary for both PostgreSQL and SQLite. PostgreSQL's
+    # `to_tsquery/to_tsvector` functions don't lowercase non-ASCII characters when using
+    # the "C" collation, while SQLite just doesn't lowercase non-ASCII characters at
+    # all.
+    text = text.lower()
+
+    # Normalize the text. NFKC normalization has two effects:
+    #  1. It canonicalizes the text, ie. maps all visually identical strings to the same
+    #     string. For example, ["e", "◌́"] is mapped to ["é"].
+    #  2. It maps strings that are roughly equivalent to the same string.
+    #     For example, ["ǆ"] is mapped to ["d", "ž"], ["①"] to ["1"] and ["i⁹"] to
+    #     ["i", "9"].
+    text = unicodedata.normalize("NFKC", text)
+
+    # Note that nothing is done to make searches accent-insensitive.
+    # That could be achieved by converting to NFKD form instead (with combining accents
+    # split out) and filtering out combining accents using `unicodedata.combining(c)`.
+    # The downside of this may be noisier search results, since search terms with
+    # explicit accents will match characters with no accents, or completely different
+    # accents.
+    #
+    # text = unicodedata.normalize("NFKD", text)
+    # text = "".join([c for c in text if not unicodedata.combining(c)])
+
+    return text
+
+
 def _parse_query_sqlite(search_term: str) -> str:
     """Takes a plain unicode string from the user and converts it into a form
     that can be passed to database.
@@ -905,6 +950,7 @@ def _parse_query_sqlite(search_term: str) -> str:
     We specifically add both a prefix and non prefix matching term so that
     exact matches get ranked higher.
     """
+    search_term = _filter_text_for_index(search_term)
 
     # Pull out the individual words, discarding any non-word characters.
     results = _parse_words(search_term)
@@ -917,6 +963,8 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]:
     We use this so that we can add prefix matching, which isn't something
     that is supported by default.
     """
+    search_term = _filter_text_for_index(search_term)
+
     escaped_words = []
     for word in _parse_words(search_term):
         # Postgres tsvector and tsquery quoting rules:
diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py
index 2d169684cf..43b724c4dd 100644
--- a/tests/storage/test_user_directory.py
+++ b/tests/storage/test_user_directory.py
@@ -504,6 +504,139 @@ class UserDirectoryStoreTestCase(HomeserverTestCase):
             {"user_id": BELA, "display_name": "Bela", "avatar_url": None},
         )
 
+    @override_config({"user_directory": {"search_all_users": True}})
+    def test_search_user_dir_ascii_case_insensitivity(self) -> None:
+        """Tests that a user can look up another user by searching for their name in a
+        different case.
+        """
+        CHARLIE = "@someuser:example.org"
+        self.get_success(
+            self.store.update_profile_in_user_dir(CHARLIE, "Charlie", None)
+        )
+
+        r = self.get_success(self.store.search_user_dir(ALICE, "cHARLIE", 10))
+        self.assertFalse(r["limited"])
+        self.assertEqual(1, len(r["results"]))
+        self.assertDictEqual(
+            r["results"][0],
+            {"user_id": CHARLIE, "display_name": "Charlie", "avatar_url": None},
+        )
+
+    @override_config({"user_directory": {"search_all_users": True}})
+    def test_search_user_dir_unicode_case_insensitivity(self) -> None:
+        """Tests that a user can look up another user by searching for their name in a
+        different case.
+        """
+        IVAN = "@someuser:example.org"
+        self.get_success(self.store.update_profile_in_user_dir(IVAN, "Иван", None))
+
+        r = self.get_success(self.store.search_user_dir(ALICE, "иВАН", 10))
+        self.assertFalse(r["limited"])
+        self.assertEqual(1, len(r["results"]))
+        self.assertDictEqual(
+            r["results"][0],
+            {"user_id": IVAN, "display_name": "Иван", "avatar_url": None},
+        )
+
+    @override_config({"user_directory": {"search_all_users": True}})
+    def test_search_user_dir_dotted_dotless_i_case_insensitivity(self) -> None:
+        """Tests that a user can look up another user by searching for their name in a
+        different case, when their name contains dotted or dotless "i"s.
+
+        Some languages have dotted and dotless versions of "i", which are considered to
+        be different letters: i <-> İ, ı <-> I. To make things difficult, they reuse the
+        ASCII "i" and "I" code points, despite having different lowercase / uppercase
+        forms.
+        """
+        USER = "@someuser:example.org"
+
+        expected_matches = [
+            # (search_term, display_name)
+            # A search for "i" should match "İ".
+            ("iiiii", "İİİİİ"),
+            # A search for "I" should match "ı".
+            ("IIIII", "ııııı"),
+            # A search for "ı" should match "I".
+            ("ııııı", "IIIII"),
+            # A search for "İ" should match "i".
+            ("İİİİİ", "iiiii"),
+        ]
+
+        for search_term, display_name in expected_matches:
+            self.get_success(
+                self.store.update_profile_in_user_dir(USER, display_name, None)
+            )
+
+            r = self.get_success(self.store.search_user_dir(ALICE, search_term, 10))
+            self.assertFalse(r["limited"])
+            self.assertEqual(
+                1,
+                len(r["results"]),
+                f"searching for {search_term!r} did not match {display_name!r}",
+            )
+            self.assertDictEqual(
+                r["results"][0],
+                {"user_id": USER, "display_name": display_name, "avatar_url": None},
+            )
+
+        # We don't test for negative matches, to allow implementations that consider all
+        # the i variants to be the same.
+
+    test_search_user_dir_dotted_dotless_i_case_insensitivity.skip = "not supported"  # type: ignore
+
+    @override_config({"user_directory": {"search_all_users": True}})
+    def test_search_user_dir_unicode_normalization(self) -> None:
+        """Tests that a user can look up another user by searching for their name with
+        either composed or decomposed accents.
+        """
+        AMELIE = "@someuser:example.org"
+
+        expected_matches = [
+            # (search_term, display_name)
+            ("Ame\u0301lie", "Amélie"),
+            ("Amélie", "Ame\u0301lie"),
+        ]
+
+        for search_term, display_name in expected_matches:
+            self.get_success(
+                self.store.update_profile_in_user_dir(AMELIE, display_name, None)
+            )
+
+            r = self.get_success(self.store.search_user_dir(ALICE, search_term, 10))
+            self.assertFalse(r["limited"])
+            self.assertEqual(
+                1,
+                len(r["results"]),
+                f"searching for {search_term!r} did not match {display_name!r}",
+            )
+            self.assertDictEqual(
+                r["results"][0],
+                {"user_id": AMELIE, "display_name": display_name, "avatar_url": None},
+            )
+
+    @override_config({"user_directory": {"search_all_users": True}})
+    def test_search_user_dir_accent_insensitivity(self) -> None:
+        """Tests that a user can look up another user by searching for their name
+        without any accents.
+        """
+        AMELIE = "@someuser:example.org"
+        self.get_success(self.store.update_profile_in_user_dir(AMELIE, "Amélie", None))
+
+        r = self.get_success(self.store.search_user_dir(ALICE, "amelie", 10))
+        self.assertFalse(r["limited"])
+        self.assertEqual(1, len(r["results"]))
+        self.assertDictEqual(
+            r["results"][0],
+            {"user_id": AMELIE, "display_name": "Amélie", "avatar_url": None},
+        )
+
+        # It may be desirable for "é"s in search terms to not match plain "e"s and we
+        # really don't want "é"s in search terms to match "e"s with different accents.
+        # But we don't test for this to allow implementations that consider all
+        # "e"-lookalikes to be the same.
+
+    test_search_user_dir_accent_insensitivity.skip = "not supported yet"  # type: ignore
+
 
 class UserDirectoryStoreTestCaseWithIcu(UserDirectoryStoreTestCase):
     use_icu = True
-- 
cgit 1.5.1


From b2357a898cdd1f4a2222609abfe471801ea88dcd Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 24 Feb 2023 14:39:50 +0000
Subject: Fix bug where 5s delays would occasionally happen. (#15150)

This only affects deployments using workers.
---
 changelog.d/15150.bugfix              |  1 +
 synapse/replication/tcp/resource.py   | 18 +++++++++++
 tests/replication/tcp/test_handler.py | 61 +++++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+)
 create mode 100644 changelog.d/15150.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15150.bugfix b/changelog.d/15150.bugfix
new file mode 100644
index 0000000000..8668bc587f
--- /dev/null
+++ b/changelog.d/15150.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.76 where 5s delays would occasionally occur in deployments using workers.
diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py
index 9d17eff714..347467d863 100644
--- a/synapse/replication/tcp/resource.py
+++ b/synapse/replication/tcp/resource.py
@@ -238,6 +238,24 @@ class ReplicationStreamer:
                             except Exception:
                                 logger.exception("Failed to replicate")
 
+                        # The last token we send may not match the current
+                        # token, in which case we want to send out a `POSITION`
+                        # to tell other workers the actual current position.
+                        if updates[-1][0] < current_token:
+                            logger.info(
+                                "Sending position: %s -> %s",
+                                stream.NAME,
+                                current_token,
+                            )
+                            self.command_handler.send_command(
+                                PositionCommand(
+                                    stream.NAME,
+                                    self._instance_name,
+                                    updates[-1][0],
+                                    current_token,
+                                )
+                            )
+
             logger.debug("No more pending updates, breaking poke loop")
         finally:
             self.pending_updates = False
diff --git a/tests/replication/tcp/test_handler.py b/tests/replication/tcp/test_handler.py
index bf927beb6a..bab77b2df7 100644
--- a/tests/replication/tcp/test_handler.py
+++ b/tests/replication/tcp/test_handler.py
@@ -141,3 +141,64 @@ class ChannelsTestCase(BaseMultiWorkerStreamTestCase):
         self.get_success(ctx_worker1.__aexit__(None, None, None))
 
         self.assertTrue(d.called)
+
+    def test_wait_for_stream_position_rdata(self) -> None:
+        """Check that wait for stream position correctly waits for an update
+        from the correct instance, when RDATA is sent.
+        """
+        store = self.hs.get_datastores().main
+        cmd_handler = self.hs.get_replication_command_handler()
+        data_handler = self.hs.get_replication_data_handler()
+
+        worker1 = self.make_worker_hs(
+            "synapse.app.generic_worker",
+            extra_config={
+                "worker_name": "worker1",
+                "run_background_tasks_on": "worker1",
+                "redis": {"enabled": True},
+            },
+        )
+
+        cache_id_gen = worker1.get_datastores().main._cache_id_gen
+        assert cache_id_gen is not None
+
+        self.replicate()
+
+        # First, make sure the master knows that `worker1` exists.
+        initial_token = cache_id_gen.get_current_token()
+        cmd_handler.send_command(
+            PositionCommand("caches", "worker1", initial_token, initial_token)
+        )
+        self.replicate()
+
+        # `wait_for_stream_position` should only return once master receives a
+        # notification that `next_token2` has persisted.
+        ctx_worker1 = cache_id_gen.get_next_mult(2)
+        next_token1, next_token2 = self.get_success(ctx_worker1.__aenter__())
+
+        d = defer.ensureDeferred(
+            data_handler.wait_for_stream_position("worker1", "caches", next_token2)
+        )
+        self.assertFalse(d.called)
+
+        # Insert an entry into the cache stream with token `next_token1`, but
+        # not `next_token2`.
+        self.get_success(
+            store.db_pool.simple_insert(
+                table="cache_invalidation_stream_by_instance",
+                values={
+                    "stream_id": next_token1,
+                    "instance_name": "worker1",
+                    "cache_func": "foo",
+                    "keys": [],
+                    "invalidation_ts": 0,
+                },
+            )
+        )
+
+        # Finish the context manager, triggering the data to be sent to master.
+        self.get_success(ctx_worker1.__aexit__(None, None, None))
+
+        # Master should get told about `next_token2`, so the deferred should
+        # resolve.
+        self.assertTrue(d.called)
-- 
cgit 1.5.1


From 1c95ddd09bbc46046a3412e7bb03a87aa3b6f65a Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Fri, 24 Feb 2023 13:15:29 -0800
Subject: Batch up storing state groups when creating new room (#14918)

---
 changelog.d/14918.misc                      |   1 +
 synapse/events/snapshot.py                  |  49 +++++++++++
 synapse/handlers/message.py                 |  16 ++--
 synapse/handlers/room.py                    |  37 ++++----
 synapse/handlers/room_batch.py              |   4 +-
 synapse/handlers/room_member.py             |  13 ++-
 synapse/storage/databases/state/store.py    | 119 ++++++++++++++++++++++++++
 tests/handlers/test_message.py              |  25 ++++--
 tests/handlers/test_register.py             |   3 +-
 tests/push/test_bulk_push_rule_evaluator.py |  13 +--
 tests/rest/client/test_rooms.py             |   4 +-
 tests/storage/test_event_chain.py           |   6 +-
 tests/storage/test_state.py                 | 126 ++++++++++++++++++++++++++++
 tests/unittest.py                           |   4 +-
 14 files changed, 371 insertions(+), 49 deletions(-)
 create mode 100644 changelog.d/14918.misc

(limited to 'synapse')

diff --git a/changelog.d/14918.misc b/changelog.d/14918.misc
new file mode 100644
index 0000000000..828794354a
--- /dev/null
+++ b/changelog.d/14918.misc
@@ -0,0 +1 @@
+Batch up storing state groups when creating a new room.
\ No newline at end of file
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index e0d82ad81c..a91a5d1e3c 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -23,6 +23,7 @@ from synapse.types import JsonDict, StateMap
 
 if TYPE_CHECKING:
     from synapse.storage.controllers import StorageControllers
+    from synapse.storage.databases import StateGroupDataStore
     from synapse.storage.databases.main import DataStore
     from synapse.types.state import StateFilter
 
@@ -348,6 +349,54 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
     partial_state: bool
     state_map_before_event: Optional[StateMap[str]] = None
 
+    @classmethod
+    async def batch_persist_unpersisted_contexts(
+        cls,
+        events_and_context: List[Tuple[EventBase, "UnpersistedEventContextBase"]],
+        room_id: str,
+        last_known_state_group: int,
+        datastore: "StateGroupDataStore",
+    ) -> List[Tuple[EventBase, EventContext]]:
+        """
+        Takes a list of events and their associated unpersisted contexts and persists
+        the unpersisted contexts, returning a list of events and persisted contexts.
+        Note that all the events must be in a linear chain (ie a <- b <- c).
+
+        Args:
+            events_and_context: A list of events and their unpersisted contexts
+            room_id: the room_id for the events
+            last_known_state_group: the last persisted state group
+            datastore: a state datastore
+        """
+        amended_events_and_context = await datastore.store_state_deltas_for_batched(
+            events_and_context, room_id, last_known_state_group
+        )
+
+        events_and_persisted_context = []
+        for event, unpersisted_context in amended_events_and_context:
+            if event.is_state():
+                context = EventContext(
+                    storage=unpersisted_context._storage,
+                    state_group=unpersisted_context.state_group_after_event,
+                    state_group_before_event=unpersisted_context.state_group_before_event,
+                    state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
+                    partial_state=unpersisted_context.partial_state,
+                    prev_group=unpersisted_context.state_group_before_event,
+                    delta_ids=unpersisted_context.state_delta_due_to_event,
+                )
+            else:
+                context = EventContext(
+                    storage=unpersisted_context._storage,
+                    state_group=unpersisted_context.state_group_after_event,
+                    state_group_before_event=unpersisted_context.state_group_before_event,
+                    state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
+                    partial_state=unpersisted_context.partial_state,
+                    prev_group=unpersisted_context.prev_group_for_state_group_before_event,
+                    delta_ids=unpersisted_context.delta_ids_to_state_group_before_event,
+                )
+            events_and_persisted_context.append((event, context))
+        return events_and_persisted_context
+
     async def get_prev_state_ids(
         self, state_filter: Optional["StateFilter"] = None
     ) -> StateMap[str]:
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index aa90d0000d..e433d6b01f 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -574,7 +574,7 @@ class EventCreationHandler:
         state_map: Optional[StateMap[str]] = None,
         for_batch: bool = False,
         current_state_group: Optional[int] = None,
-    ) -> Tuple[EventBase, EventContext]:
+    ) -> Tuple[EventBase, UnpersistedEventContextBase]:
         """
         Given a dict from a client, create a new event. If bool for_batch is true, will
         create an event using the prev_event_ids, and will create an event context for
@@ -721,8 +721,6 @@ class EventCreationHandler:
             current_state_group=current_state_group,
         )
 
-        context = await unpersisted_context.persist(event)
-
         # In an ideal world we wouldn't need the second part of this condition. However,
         # this behaviour isn't spec'd yet, meaning we should be able to deactivate this
         # behaviour. Another reason is that this code is also evaluated each time a new
@@ -739,7 +737,7 @@ class EventCreationHandler:
                 assert state_map is not None
                 prev_event_id = state_map.get((EventTypes.Member, event.sender))
             else:
-                prev_state_ids = await context.get_prev_state_ids(
+                prev_state_ids = await unpersisted_context.get_prev_state_ids(
                     StateFilter.from_types([(EventTypes.Member, None)])
                 )
                 prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender))
@@ -764,8 +762,7 @@ class EventCreationHandler:
                 )
 
         self.validator.validate_new(event, self.config)
-
-        return event, context
+        return event, unpersisted_context
 
     async def _is_exempt_from_privacy_policy(
         self, builder: EventBuilder, requester: Requester
@@ -1005,7 +1002,7 @@ class EventCreationHandler:
         max_retries = 5
         for i in range(max_retries):
             try:
-                event, context = await self.create_event(
+                event, unpersisted_context = await self.create_event(
                     requester,
                     event_dict,
                     txn_id=txn_id,
@@ -1016,6 +1013,7 @@ class EventCreationHandler:
                     historical=historical,
                     depth=depth,
                 )
+                context = await unpersisted_context.persist(event)
 
                 assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % (
                     event.sender,
@@ -1190,7 +1188,6 @@ class EventCreationHandler:
         if for_batch:
             assert prev_event_ids is not None
             assert state_map is not None
-            assert current_state_group is not None
             auth_ids = self._event_auth_handler.compute_auth_events(builder, state_map)
             event = await builder.build(
                 prev_event_ids=prev_event_ids, auth_event_ids=auth_ids, depth=depth
@@ -2046,7 +2043,7 @@ class EventCreationHandler:
                 max_retries = 5
                 for i in range(max_retries):
                     try:
-                        event, context = await self.create_event(
+                        event, unpersisted_context = await self.create_event(
                             requester,
                             {
                                 "type": EventTypes.Dummy,
@@ -2055,6 +2052,7 @@ class EventCreationHandler:
                                 "sender": user_id,
                             },
                         )
+                        context = await unpersisted_context.persist(event)
 
                         event.internal_metadata.proactively_send = False
 
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index a26ec02284..b1784638f4 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -51,6 +51,7 @@ from synapse.api.filtering import Filter
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
 from synapse.event_auth import validate_event_for_room_version
 from synapse.events import EventBase
+from synapse.events.snapshot import UnpersistedEventContext
 from synapse.events.utils import copy_and_fixup_power_levels_contents
 from synapse.handlers.relations import BundledAggregations
 from synapse.module_api import NOT_SPAM
@@ -211,7 +212,7 @@ class RoomCreationHandler:
                 # the required power level to send the tombstone event.
                 (
                     tombstone_event,
-                    tombstone_context,
+                    tombstone_unpersisted_context,
                 ) = await self.event_creation_handler.create_event(
                     requester,
                     {
@@ -225,6 +226,9 @@ class RoomCreationHandler:
                         },
                     },
                 )
+                tombstone_context = await tombstone_unpersisted_context.persist(
+                    tombstone_event
+                )
                 validate_event_for_room_version(tombstone_event)
                 await self._event_auth_handler.check_auth_rules_from_context(
                     tombstone_event
@@ -1092,7 +1096,7 @@ class RoomCreationHandler:
             content: JsonDict,
             for_batch: bool,
             **kwargs: Any,
-        ) -> Tuple[EventBase, synapse.events.snapshot.EventContext]:
+        ) -> Tuple[EventBase, synapse.events.snapshot.UnpersistedEventContextBase]:
             """
             Creates an event and associated event context.
             Args:
@@ -1111,20 +1115,23 @@ class RoomCreationHandler:
 
             event_dict = create_event_dict(etype, content, **kwargs)
 
-            new_event, new_context = await self.event_creation_handler.create_event(
+            (
+                new_event,
+                new_unpersisted_context,
+            ) = await self.event_creation_handler.create_event(
                 creator,
                 event_dict,
                 prev_event_ids=prev_event,
                 depth=depth,
                 state_map=state_map,
                 for_batch=for_batch,
-                current_state_group=current_state_group,
             )
+
             depth += 1
             prev_event = [new_event.event_id]
             state_map[(new_event.type, new_event.state_key)] = new_event.event_id
 
-            return new_event, new_context
+            return new_event, new_unpersisted_context
 
         try:
             config = self._presets_dict[preset_config]
@@ -1134,10 +1141,10 @@ class RoomCreationHandler:
             )
 
         creation_content.update({"creator": creator_id})
-        creation_event, creation_context = await create_event(
+        creation_event, unpersisted_creation_context = await create_event(
             EventTypes.Create, creation_content, False
         )
-
+        creation_context = await unpersisted_creation_context.persist(creation_event)
         logger.debug("Sending %s in new room", EventTypes.Member)
         ev = await self.event_creation_handler.handle_new_client_event(
             requester=creator,
@@ -1181,7 +1188,6 @@ class RoomCreationHandler:
             power_event, power_context = await create_event(
                 EventTypes.PowerLevels, pl_content, True
             )
-            current_state_group = power_context._state_group
             events_to_send.append((power_event, power_context))
         else:
             power_level_content: JsonDict = {
@@ -1230,14 +1236,12 @@ class RoomCreationHandler:
                 power_level_content,
                 True,
             )
-            current_state_group = pl_context._state_group
             events_to_send.append((pl_event, pl_context))
 
         if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state:
             room_alias_event, room_alias_context = await create_event(
                 EventTypes.CanonicalAlias, {"alias": room_alias.to_string()}, True
             )
-            current_state_group = room_alias_context._state_group
             events_to_send.append((room_alias_event, room_alias_context))
 
         if (EventTypes.JoinRules, "") not in initial_state:
@@ -1246,7 +1250,6 @@ class RoomCreationHandler:
                 {"join_rule": config["join_rules"]},
                 True,
             )
-            current_state_group = join_rules_context._state_group
             events_to_send.append((join_rules_event, join_rules_context))
 
         if (EventTypes.RoomHistoryVisibility, "") not in initial_state:
@@ -1255,7 +1258,6 @@ class RoomCreationHandler:
                 {"history_visibility": config["history_visibility"]},
                 True,
             )
-            current_state_group = visibility_context._state_group
             events_to_send.append((visibility_event, visibility_context))
 
         if config["guest_can_join"]:
@@ -1265,14 +1267,12 @@ class RoomCreationHandler:
                     {EventContentFields.GUEST_ACCESS: GuestAccess.CAN_JOIN},
                     True,
                 )
-                current_state_group = guest_access_context._state_group
                 events_to_send.append((guest_access_event, guest_access_context))
 
         for (etype, state_key), content in initial_state.items():
             event, context = await create_event(
                 etype, content, True, state_key=state_key
             )
-            current_state_group = context._state_group
             events_to_send.append((event, context))
 
         if config["encrypted"]:
@@ -1284,9 +1284,16 @@ class RoomCreationHandler:
             )
             events_to_send.append((encryption_event, encryption_context))
 
+        datastore = self.hs.get_datastores().state
+        events_and_context = (
+            await UnpersistedEventContext.batch_persist_unpersisted_contexts(
+                events_to_send, room_id, current_state_group, datastore
+            )
+        )
+
         last_event = await self.event_creation_handler.handle_new_client_event(
             creator,
-            events_to_send,
+            events_and_context,
             ignore_shadow_ban=True,
             ratelimit=False,
         )
diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py
index 5d4ca0e2d2..bf9df60218 100644
--- a/synapse/handlers/room_batch.py
+++ b/synapse/handlers/room_batch.py
@@ -327,7 +327,7 @@ class RoomBatchHandler:
             # Mark all events as historical
             event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True
 
-            event, context = await self.event_creation_handler.create_event(
+            event, unpersisted_context = await self.event_creation_handler.create_event(
                 await self.create_requester_for_user_id_from_app_service(
                     ev["sender"], app_service_requester.app_service
                 ),
@@ -345,7 +345,7 @@ class RoomBatchHandler:
                 historical=True,
                 depth=inherited_depth,
             )
-
+            context = await unpersisted_context.persist(event)
             assert context._state_group
 
             # Normally this is done when persisting the event but we have to
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index a965c7ec76..de7476f300 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -414,7 +414,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         max_retries = 5
         for i in range(max_retries):
             try:
-                event, context = await self.event_creation_handler.create_event(
+                (
+                    event,
+                    unpersisted_context,
+                ) = await self.event_creation_handler.create_event(
                     requester,
                     {
                         "type": EventTypes.Member,
@@ -435,7 +438,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     outlier=outlier,
                     historical=historical,
                 )
-
+                context = await unpersisted_context.persist(event)
                 prev_state_ids = await context.get_prev_state_ids(
                     StateFilter.from_types([(EventTypes.Member, None)])
                 )
@@ -1944,7 +1947,10 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         max_retries = 5
         for i in range(max_retries):
             try:
-                event, context = await self.event_creation_handler.create_event(
+                (
+                    event,
+                    unpersisted_context,
+                ) = await self.event_creation_handler.create_event(
                     requester,
                     event_dict,
                     txn_id=txn_id,
@@ -1952,6 +1958,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
                     auth_event_ids=auth_event_ids,
                     outlier=True,
                 )
+                context = await unpersisted_context.persist(event)
                 event.internal_metadata.out_of_band_membership = True
 
                 result_event = (
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index 89b1faa6c8..bf4cdfdf29 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -18,6 +18,8 @@ from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Se
 import attr
 
 from synapse.api.constants import EventTypes
+from synapse.events import EventBase
+from synapse.events.snapshot import UnpersistedEventContext, UnpersistedEventContextBase
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -401,6 +403,123 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
                 fetched_keys=non_member_types,
             )
 
+    async def store_state_deltas_for_batched(
+        self,
+        events_and_context: List[Tuple[EventBase, UnpersistedEventContextBase]],
+        room_id: str,
+        prev_group: int,
+    ) -> List[Tuple[EventBase, UnpersistedEventContext]]:
+        """Generate and store state deltas for a group of events and contexts created to be
+        batch persisted. Note that all the events must be in a linear chain (ie a <- b <- c).
+
+        Args:
+            events_and_context: the events to generate and store a state groups for
+            and their associated contexts
+            room_id: the id of the room the events were created for
+            prev_group: the state group of the last event persisted before the batched events
+            were created
+        """
+
+        def insert_deltas_group_txn(
+            txn: LoggingTransaction,
+            events_and_context: List[Tuple[EventBase, UnpersistedEventContext]],
+            prev_group: int,
+        ) -> List[Tuple[EventBase, UnpersistedEventContext]]:
+            """Generate and store state groups for the provided events and contexts.
+
+            Requires that we have the state as a delta from the last persisted state group.
+
+            Returns:
+                A list of state groups
+            """
+            is_in_db = self.db_pool.simple_select_one_onecol_txn(
+                txn,
+                table="state_groups",
+                keyvalues={"id": prev_group},
+                retcol="id",
+                allow_none=True,
+            )
+            if not is_in_db:
+                raise Exception(
+                    "Trying to persist state with unpersisted prev_group: %r"
+                    % (prev_group,)
+                )
+
+            num_state_groups = sum(
+                1 for event, _ in events_and_context if event.is_state()
+            )
+
+            state_groups = self._state_group_seq_gen.get_next_mult_txn(
+                txn, num_state_groups
+            )
+
+            sg_before = prev_group
+            state_group_iter = iter(state_groups)
+            for event, context in events_and_context:
+                if not event.is_state():
+                    context.state_group_after_event = sg_before
+                    context.state_group_before_event = sg_before
+                    continue
+
+                sg_after = next(state_group_iter)
+                context.state_group_after_event = sg_after
+                context.state_group_before_event = sg_before
+                context.state_delta_due_to_event = {
+                    (event.type, event.state_key): event.event_id
+                }
+                sg_before = sg_after
+
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="state_groups",
+                keys=("id", "room_id", "event_id"),
+                values=[
+                    (context.state_group_after_event, room_id, event.event_id)
+                    for event, context in events_and_context
+                    if event.is_state()
+                ],
+            )
+
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="state_group_edges",
+                keys=("state_group", "prev_state_group"),
+                values=[
+                    (
+                        context.state_group_after_event,
+                        context.state_group_before_event,
+                    )
+                    for event, context in events_and_context
+                    if event.is_state()
+                ],
+            )
+
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="state_groups_state",
+                keys=("state_group", "room_id", "type", "state_key", "event_id"),
+                values=[
+                    (
+                        context.state_group_after_event,
+                        room_id,
+                        key[0],
+                        key[1],
+                        state_id,
+                    )
+                    for event, context in events_and_context
+                    if context.state_delta_due_to_event is not None
+                    for key, state_id in context.state_delta_due_to_event.items()
+                ],
+            )
+            return events_and_context
+
+        return await self.db_pool.runInteraction(
+            "store_state_deltas_for_batched.insert_deltas_group",
+            insert_deltas_group_txn,
+            events_and_context,
+            prev_group,
+        )
+
     async def store_state_group(
         self,
         event_id: str,
diff --git a/tests/handlers/test_message.py b/tests/handlers/test_message.py
index 69d384442f..9691d66b48 100644
--- a/tests/handlers/test_message.py
+++ b/tests/handlers/test_message.py
@@ -18,7 +18,7 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import EventTypes
 from synapse.events import EventBase
-from synapse.events.snapshot import EventContext
+from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
@@ -79,7 +79,9 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
 
         return memberEvent, memberEventContext
 
-    def _create_duplicate_event(self, txn_id: str) -> Tuple[EventBase, EventContext]:
+    def _create_duplicate_event(
+        self, txn_id: str
+    ) -> Tuple[EventBase, UnpersistedEventContextBase]:
         """Create a new event with the given transaction ID. All events produced
         by this method will be considered duplicates.
         """
@@ -107,7 +109,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
 
         txn_id = "something_suitably_random"
 
-        event1, context = self._create_duplicate_event(txn_id)
+        event1, unpersisted_context = self._create_duplicate_event(txn_id)
+        context = self.get_success(unpersisted_context.persist(event1))
 
         ret_event1 = self.get_success(
             self.handler.handle_new_client_event(
@@ -119,7 +122,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(event1.event_id, ret_event1.event_id)
 
-        event2, context = self._create_duplicate_event(txn_id)
+        event2, unpersisted_context = self._create_duplicate_event(txn_id)
+        context = self.get_success(unpersisted_context.persist(event2))
 
         # We want to test that the deduplication at the persit event end works,
         # so we want to make sure we test with different events.
@@ -140,7 +144,9 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
 
         # Let's test that calling `persist_event` directly also does the right
         # thing.
-        event3, context = self._create_duplicate_event(txn_id)
+        event3, unpersisted_context = self._create_duplicate_event(txn_id)
+        context = self.get_success(unpersisted_context.persist(event3))
+
         self.assertNotEqual(event1.event_id, event3.event_id)
 
         ret_event3, event_pos3, _ = self.get_success(
@@ -154,7 +160,8 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
 
         # Let's test that calling `persist_events` directly also does the right
         # thing.
-        event4, context = self._create_duplicate_event(txn_id)
+        event4, unpersisted_context = self._create_duplicate_event(txn_id)
+        context = self.get_success(unpersisted_context.persist(event4))
         self.assertNotEqual(event1.event_id, event3.event_id)
 
         events, _ = self.get_success(
@@ -174,8 +181,10 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
         txn_id = "something_else_suitably_random"
 
         # Create two duplicate events to persist at the same time
-        event1, context1 = self._create_duplicate_event(txn_id)
-        event2, context2 = self._create_duplicate_event(txn_id)
+        event1, unpersisted_context1 = self._create_duplicate_event(txn_id)
+        context1 = self.get_success(unpersisted_context1.persist(event1))
+        event2, unpersisted_context2 = self._create_duplicate_event(txn_id)
+        context2 = self.get_success(unpersisted_context2.persist(event2))
 
         # Ensure their event IDs are different to start with
         self.assertNotEqual(event1.event_id, event2.event_id)
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index 1db99b3c00..aff1ec4758 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -507,7 +507,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         # Lower the permissions of the inviter.
         event_creation_handler = self.hs.get_event_creation_handler()
         requester = create_requester(inviter)
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             event_creation_handler.create_event(
                 requester,
                 {
@@ -519,6 +519,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
                 },
             )
         )
+        context = self.get_success(unpersisted_context.persist(event))
         self.get_success(
             event_creation_handler.handle_new_client_event(
                 requester, events_and_context=[(event, context)]
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index dce6899e78..1458076a90 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -130,7 +130,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
 
         # Create a new message event, and try to evaluate it under the dodgy
         # power level event.
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_event(
                 self.requester,
                 {
@@ -145,6 +145,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 prev_event_ids=[pl_event_id],
             )
         )
+        context = self.get_success(unpersisted_context.persist(event))
 
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
         # should not raise
@@ -170,7 +171,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         """Ensure that push rules are not calculated when disabled in the config"""
 
         # Create a new message event which should cause a notification.
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_event(
                 self.requester,
                 {
@@ -184,6 +185,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 },
             )
         )
+        context = self.get_success(unpersisted_context.persist(event))
 
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
         # Mock the method which calculates push rules -- we do this instead of
@@ -200,7 +202,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
     ) -> bool:
         """Returns true iff the `mentions` trigger an event push action."""
         # Create a new message event which should cause a notification.
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_event(
                 self.requester,
                 {
@@ -211,7 +213,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 },
             )
         )
-
+        context = self.get_success(unpersisted_context.persist(event))
         # Execute the push rule machinery.
         self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
 
@@ -390,7 +392,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
 
         # Create & persist an event to use as the parent of the relation.
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             self.event_creation_handler.create_event(
                 self.requester,
                 {
@@ -404,6 +406,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 },
             )
         )
+        context = self.get_success(unpersisted_context.persist(event))
         self.get_success(
             self.event_creation_handler.handle_new_client_event(
                 self.requester, events_and_context=[(event, context)]
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 4dd763096d..a4900703c4 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -713,7 +713,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(33, channel.resource_usage.db_txn_count)
+        self.assertEqual(30, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -726,7 +726,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(36, channel.resource_usage.db_txn_count)
+        self.assertEqual(32, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py
index 73d11e7786..e39b63edac 100644
--- a/tests/storage/test_event_chain.py
+++ b/tests/storage/test_event_chain.py
@@ -522,7 +522,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_prev_events_for_room(room_id)
         )
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             event_handler.create_event(
                 self.requester,
                 {
@@ -535,6 +535,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
                 prev_event_ids=latest_event_ids,
             )
         )
+        context = self.get_success(unpersisted_context.persist(event))
         self.get_success(
             event_handler.handle_new_client_event(
                 self.requester, events_and_context=[(event, context)]
@@ -544,7 +545,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
         assert state_ids1 is not None
         state1 = set(state_ids1.values())
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             event_handler.create_event(
                 self.requester,
                 {
@@ -557,6 +558,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
                 prev_event_ids=latest_event_ids,
             )
         )
+        context = self.get_success(unpersisted_context.persist(event))
         self.get_success(
             event_handler.handle_new_client_event(
                 self.requester, events_and_context=[(event, context)]
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index e82c03f597..62aed6af0a 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -496,3 +496,129 @@ class StateStoreTestCase(HomeserverTestCase):
 
         self.assertEqual(is_all, True)
         self.assertDictEqual({(e5.type, e5.state_key): e5.event_id}, state_dict)
+
+    def test_batched_state_group_storing(self) -> None:
+        creation_event = self.inject_state_event(
+            self.room, self.u_alice, EventTypes.Create, "", {}
+        )
+        state_to_event = self.get_success(
+            self.storage.state.get_state_groups(
+                self.room.to_string(), [creation_event.event_id]
+            )
+        )
+        current_state_group = list(state_to_event.keys())[0]
+
+        # create some unpersisted events and event contexts to store against room
+        events_and_context = []
+        builder = self.event_builder_factory.for_room_version(
+            RoomVersions.V1,
+            {
+                "type": EventTypes.Name,
+                "sender": self.u_alice.to_string(),
+                "state_key": "",
+                "room_id": self.room.to_string(),
+                "content": {"name": "first rename of room"},
+            },
+        )
+
+        event1, unpersisted_context1 = self.get_success(
+            self.event_creation_handler.create_new_client_event(builder)
+        )
+        events_and_context.append((event1, unpersisted_context1))
+
+        builder2 = self.event_builder_factory.for_room_version(
+            RoomVersions.V1,
+            {
+                "type": EventTypes.JoinRules,
+                "sender": self.u_alice.to_string(),
+                "state_key": "",
+                "room_id": self.room.to_string(),
+                "content": {"join_rule": "private"},
+            },
+        )
+
+        event2, unpersisted_context2 = self.get_success(
+            self.event_creation_handler.create_new_client_event(builder2)
+        )
+        events_and_context.append((event2, unpersisted_context2))
+
+        builder3 = self.event_builder_factory.for_room_version(
+            RoomVersions.V1,
+            {
+                "type": EventTypes.Message,
+                "sender": self.u_alice.to_string(),
+                "room_id": self.room.to_string(),
+                "content": {"body": "hello from event 3", "msgtype": "m.text"},
+            },
+        )
+
+        event3, unpersisted_context3 = self.get_success(
+            self.event_creation_handler.create_new_client_event(builder3)
+        )
+        events_and_context.append((event3, unpersisted_context3))
+
+        builder4 = self.event_builder_factory.for_room_version(
+            RoomVersions.V1,
+            {
+                "type": EventTypes.JoinRules,
+                "sender": self.u_alice.to_string(),
+                "state_key": "",
+                "room_id": self.room.to_string(),
+                "content": {"join_rule": "public"},
+            },
+        )
+
+        event4, unpersisted_context4 = self.get_success(
+            self.event_creation_handler.create_new_client_event(builder4)
+        )
+        events_and_context.append((event4, unpersisted_context4))
+
+        processed_events_and_context = self.get_success(
+            self.hs.get_datastores().state.store_state_deltas_for_batched(
+                events_and_context, self.room.to_string(), current_state_group
+            )
+        )
+
+        # check that only state events are in state_groups, and all state events are in state_groups
+        res = self.get_success(
+            self.store.db_pool.simple_select_list(
+                table="state_groups",
+                keyvalues=None,
+                retcols=("event_id",),
+            )
+        )
+
+        events = []
+        for result in res:
+            self.assertNotIn(event3.event_id, result)
+            events.append(result.get("event_id"))
+
+        for event, _ in processed_events_and_context:
+            if event.is_state():
+                self.assertIn(event.event_id, events)
+
+        # check that each unique state has state group in state_groups_state and that the
+        # type/state key is correct, and check that each state event's state group
+        # has an entry and prev event in state_group_edges
+        for event, context in processed_events_and_context:
+            if event.is_state():
+                state = self.get_success(
+                    self.store.db_pool.simple_select_list(
+                        table="state_groups_state",
+                        keyvalues={"state_group": context.state_group_after_event},
+                        retcols=("type", "state_key"),
+                    )
+                )
+                self.assertEqual(event.type, state[0].get("type"))
+                self.assertEqual(event.state_key, state[0].get("state_key"))
+
+                groups = self.get_success(
+                    self.store.db_pool.simple_select_list(
+                        table="state_group_edges",
+                        keyvalues={"state_group": str(context.state_group_after_event)},
+                        retcols=("*",),
+                    )
+                )
+                self.assertEqual(
+                    context.state_group_before_event, groups[0].get("prev_state_group")
+                )
diff --git a/tests/unittest.py b/tests/unittest.py
index b21e7f1221..f9160faa1d 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -723,7 +723,7 @@ class HomeserverTestCase(TestCase):
         event_creator = self.hs.get_event_creation_handler()
         requester = create_requester(user)
 
-        event, context = self.get_success(
+        event, unpersisted_context = self.get_success(
             event_creator.create_event(
                 requester,
                 {
@@ -735,7 +735,7 @@ class HomeserverTestCase(TestCase):
                 prev_event_ids=prev_event_ids,
             )
         )
-
+        context = self.get_success(unpersisted_context.persist(event))
         if soft_failed:
             event.internal_metadata.soft_failed = True
 
-- 
cgit 1.5.1


From 3f2ef205e228282a8a744db59115caa4b17da9a1 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 27 Feb 2023 13:03:22 +0000
Subject: Small fixes to `MatrixFederationHttpClient` docstrings (#15148)

---
 changelog.d/15148.doc                  |  1 +
 synapse/http/matrixfederationclient.py | 16 +++++++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/15148.doc

(limited to 'synapse')

diff --git a/changelog.d/15148.doc b/changelog.d/15148.doc
new file mode 100644
index 0000000000..4e9e163306
--- /dev/null
+++ b/changelog.d/15148.doc
@@ -0,0 +1 @@
+Correct small documentation errors in some `MatrixFederationHttpClient` methods.
\ No newline at end of file
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 312aab4dcc..3302d4e48a 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -440,7 +440,7 @@ class MatrixFederationHttpClient:
         Args:
             request: details of request to be sent
 
-            retry_on_dns_fail: true if the request should be retied on DNS failures
+            retry_on_dns_fail: true if the request should be retried on DNS failures
 
             timeout: number of milliseconds to wait for the response headers
                 (including connecting to the server), *for each attempt*.
@@ -475,7 +475,7 @@ class MatrixFederationHttpClient:
                 (except 429).
             NotRetryingDestination: If we are not yet ready to retry this
                 server.
-            FederationDeniedError: If this destination  is not on our
+            FederationDeniedError: If this destination is not on our
                 federation whitelist
             RequestSendFailed: If there were problems connecting to the
                 remote, due to e.g. DNS failures, connection timeouts etc.
@@ -871,7 +871,7 @@ class MatrixFederationHttpClient:
                 (except 429).
             NotRetryingDestination: If we are not yet ready to retry this
                 server.
-            FederationDeniedError: If this destination  is not on our
+            FederationDeniedError: If this destination is not on our
                 federation whitelist
             RequestSendFailed: If there were problems connecting to the
                 remote, due to e.g. DNS failures, connection timeouts etc.
@@ -958,7 +958,7 @@ class MatrixFederationHttpClient:
                 (except 429).
             NotRetryingDestination: If we are not yet ready to retry this
                 server.
-            FederationDeniedError: If this destination  is not on our
+            FederationDeniedError: If this destination is not on our
                 federation whitelist
             RequestSendFailed: If there were problems connecting to the
                 remote, due to e.g. DNS failures, connection timeouts etc.
@@ -1036,6 +1036,8 @@ class MatrixFederationHttpClient:
             args: A dictionary used to create query strings, defaults to
                 None.
 
+            retry_on_dns_fail: true if the request should be retried on DNS failures
+
             timeout: number of milliseconds to wait for the response.
                 self._default_timeout (60s) by default.
 
@@ -1063,7 +1065,7 @@ class MatrixFederationHttpClient:
                 (except 429).
             NotRetryingDestination: If we are not yet ready to retry this
                 server.
-            FederationDeniedError: If this destination  is not on our
+            FederationDeniedError: If this destination is not on our
                 federation whitelist
             RequestSendFailed: If there were problems connecting to the
                 remote, due to e.g. DNS failures, connection timeouts etc.
@@ -1141,7 +1143,7 @@ class MatrixFederationHttpClient:
                 (except 429).
             NotRetryingDestination: If we are not yet ready to retry this
                 server.
-            FederationDeniedError: If this destination  is not on our
+            FederationDeniedError: If this destination is not on our
                 federation whitelist
             RequestSendFailed: If there were problems connecting to the
                 remote, due to e.g. DNS failures, connection timeouts etc.
@@ -1197,7 +1199,7 @@ class MatrixFederationHttpClient:
                 (except 429).
             NotRetryingDestination: If we are not yet ready to retry this
                 server.
-            FederationDeniedError: If this destination  is not on our
+            FederationDeniedError: If this destination is not on our
                 federation whitelist
             RequestSendFailed: If there were problems connecting to the
                 remote, due to e.g. DNS failures, connection timeouts etc.
-- 
cgit 1.5.1


From 4fc8875876374ec8f97a3b3cc344a4e3abcf769f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 27 Feb 2023 08:26:05 -0500
Subject: Refactor media modules. (#15146)

* Removes the `v1` directory from `test.rest.media.v1`.
* Moves the non-REST code from `synapse.rest.media.v1` to `synapse.media`.
* Flatten the `v1` directory from `synapse.rest.media`,  but leave compatiblity
  with 3rd party media repositories and spam checkers.
---
 changelog.d/15146.misc                             |    1 +
 synapse/_scripts/move_remote_media_to_new_store.py |    2 +-
 synapse/config/repository.py                       |   12 +-
 synapse/events/spamcheck.py                        |    4 +-
 synapse/media/_base.py                             |  479 ++++++++
 synapse/media/filepath.py                          |  410 +++++++
 synapse/media/media_repository.py                  | 1038 ++++++++++++++++
 synapse/media/media_storage.py                     |  374 ++++++
 synapse/media/oembed.py                            |  265 +++++
 synapse/media/preview_html.py                      |  501 ++++++++
 synapse/media/storage_provider.py                  |  181 +++
 synapse/media/thumbnailer.py                       |  221 ++++
 synapse/rest/media/config_resource.py              |   41 +
 synapse/rest/media/download_resource.py            |   75 ++
 synapse/rest/media/media_repository_resource.py    |   93 ++
 synapse/rest/media/preview_url_resource.py         |  869 ++++++++++++++
 synapse/rest/media/thumbnail_resource.py           |  554 +++++++++
 synapse/rest/media/upload_resource.py              |  108 ++
 synapse/rest/media/v1/_base.py                     |  470 +-------
 synapse/rest/media/v1/config_resource.py           |   41 -
 synapse/rest/media/v1/download_resource.py         |   76 --
 synapse/rest/media/v1/filepath.py                  |  410 -------
 synapse/rest/media/v1/media_repository.py          | 1112 ------------------
 synapse/rest/media/v1/media_storage.py             |  365 +-----
 synapse/rest/media/v1/oembed.py                    |  265 -----
 synapse/rest/media/v1/preview_html.py              |  501 --------
 synapse/rest/media/v1/preview_url_resource.py      |  871 --------------
 synapse/rest/media/v1/storage_provider.py          |  172 +--
 synapse/rest/media/v1/thumbnail_resource.py        |  555 ---------
 synapse/rest/media/v1/thumbnailer.py               |  221 ----
 synapse/rest/media/v1/upload_resource.py           |  108 --
 synapse/server.py                                  |    6 +-
 tests/media/__init__.py                            |   13 +
 tests/media/test_base.py                           |   38 +
 tests/media/test_filepath.py                       |  595 ++++++++++
 tests/media/test_html_preview.py                   |  542 +++++++++
 tests/media/test_media_storage.py                  |  792 +++++++++++++
 tests/media/test_oembed.py                         |  162 +++
 tests/rest/admin/test_media.py                     |    2 +-
 tests/rest/admin/test_user.py                      |    2 +-
 tests/rest/media/test_url_preview.py               | 1234 ++++++++++++++++++++
 tests/rest/media/v1/__init__.py                    |   13 -
 tests/rest/media/v1/test_base.py                   |   38 -
 tests/rest/media/v1/test_filepath.py               |  595 ----------
 tests/rest/media/v1/test_html_preview.py           |  542 ---------
 tests/rest/media/v1/test_media_storage.py          |  792 -------------
 tests/rest/media/v1/test_oembed.py                 |  162 ---
 tests/rest/media/v1/test_url_preview.py            | 1234 --------------------
 48 files changed, 8612 insertions(+), 8545 deletions(-)
 create mode 100644 changelog.d/15146.misc
 create mode 100644 synapse/media/_base.py
 create mode 100644 synapse/media/filepath.py
 create mode 100644 synapse/media/media_repository.py
 create mode 100644 synapse/media/media_storage.py
 create mode 100644 synapse/media/oembed.py
 create mode 100644 synapse/media/preview_html.py
 create mode 100644 synapse/media/storage_provider.py
 create mode 100644 synapse/media/thumbnailer.py
 create mode 100644 synapse/rest/media/config_resource.py
 create mode 100644 synapse/rest/media/download_resource.py
 create mode 100644 synapse/rest/media/media_repository_resource.py
 create mode 100644 synapse/rest/media/preview_url_resource.py
 create mode 100644 synapse/rest/media/thumbnail_resource.py
 create mode 100644 synapse/rest/media/upload_resource.py
 delete mode 100644 synapse/rest/media/v1/config_resource.py
 delete mode 100644 synapse/rest/media/v1/download_resource.py
 delete mode 100644 synapse/rest/media/v1/filepath.py
 delete mode 100644 synapse/rest/media/v1/media_repository.py
 delete mode 100644 synapse/rest/media/v1/oembed.py
 delete mode 100644 synapse/rest/media/v1/preview_html.py
 delete mode 100644 synapse/rest/media/v1/preview_url_resource.py
 delete mode 100644 synapse/rest/media/v1/thumbnail_resource.py
 delete mode 100644 synapse/rest/media/v1/thumbnailer.py
 delete mode 100644 synapse/rest/media/v1/upload_resource.py
 create mode 100644 tests/media/__init__.py
 create mode 100644 tests/media/test_base.py
 create mode 100644 tests/media/test_filepath.py
 create mode 100644 tests/media/test_html_preview.py
 create mode 100644 tests/media/test_media_storage.py
 create mode 100644 tests/media/test_oembed.py
 create mode 100644 tests/rest/media/test_url_preview.py
 delete mode 100644 tests/rest/media/v1/__init__.py
 delete mode 100644 tests/rest/media/v1/test_base.py
 delete mode 100644 tests/rest/media/v1/test_filepath.py
 delete mode 100644 tests/rest/media/v1/test_html_preview.py
 delete mode 100644 tests/rest/media/v1/test_media_storage.py
 delete mode 100644 tests/rest/media/v1/test_oembed.py
 delete mode 100644 tests/rest/media/v1/test_url_preview.py

(limited to 'synapse')

diff --git a/changelog.d/15146.misc b/changelog.d/15146.misc
new file mode 100644
index 0000000000..8de5f95239
--- /dev/null
+++ b/changelog.d/15146.misc
@@ -0,0 +1 @@
+Refactor the media modules.
diff --git a/synapse/_scripts/move_remote_media_to_new_store.py b/synapse/_scripts/move_remote_media_to_new_store.py
index 819afaaca6..0dd36bee20 100755
--- a/synapse/_scripts/move_remote_media_to_new_store.py
+++ b/synapse/_scripts/move_remote_media_to_new_store.py
@@ -37,7 +37,7 @@ import os
 import shutil
 import sys
 
-from synapse.rest.media.v1.filepath import MediaFilePaths
+from synapse.media.filepath import MediaFilePaths
 
 logger = logging.getLogger()
 
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 2da40c09f0..ecb3edbe3a 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -178,11 +178,13 @@ class ContentRepositoryConfig(Config):
         for i, provider_config in enumerate(storage_providers):
             # We special case the module "file_system" so as not to need to
             # expose FileStorageProviderBackend
-            if provider_config["module"] == "file_system":
-                provider_config["module"] = (
-                    "synapse.rest.media.v1.storage_provider"
-                    ".FileStorageProviderBackend"
-                )
+            if (
+                provider_config["module"] == "file_system"
+                or provider_config["module"] == "synapse.rest.media.v1.storage_provider"
+            ):
+                provider_config[
+                    "module"
+                ] = "synapse.media.storage_provider.FileStorageProviderBackend"
 
             provider_class, parsed_config = load_module(
                 provider_config, ("media_storage_providers", "<item %i>" % i)
diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py
index 623a2c71ea..765c15bb51 100644
--- a/synapse/events/spamcheck.py
+++ b/synapse/events/spamcheck.py
@@ -33,8 +33,8 @@ from typing_extensions import Literal
 import synapse
 from synapse.api.errors import Codes
 from synapse.logging.opentracing import trace
-from synapse.rest.media.v1._base import FileInfo
-from synapse.rest.media.v1.media_storage import ReadableFileWrapper
+from synapse.media._base import FileInfo
+from synapse.media.media_storage import ReadableFileWrapper
 from synapse.spam_checker_api import RegistrationBehaviour
 from synapse.types import JsonDict, RoomAlias, UserProfile
 from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
diff --git a/synapse/media/_base.py b/synapse/media/_base.py
new file mode 100644
index 0000000000..ef8334ae25
--- /dev/null
+++ b/synapse/media/_base.py
@@ -0,0 +1,479 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import urllib
+from abc import ABC, abstractmethod
+from types import TracebackType
+from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type
+
+import attr
+
+from twisted.internet.interfaces import IConsumer
+from twisted.protocols.basic import FileSender
+from twisted.web.server import Request
+
+from synapse.api.errors import Codes, SynapseError, cs_error
+from synapse.http.server import finish_request, respond_with_json
+from synapse.http.site import SynapseRequest
+from synapse.logging.context import make_deferred_yieldable
+from synapse.util.stringutils import is_ascii, parse_and_validate_server_name
+
+logger = logging.getLogger(__name__)
+
+# list all text content types that will have the charset default to UTF-8 when
+# none is given
+TEXT_CONTENT_TYPES = [
+    "text/css",
+    "text/csv",
+    "text/html",
+    "text/calendar",
+    "text/plain",
+    "text/javascript",
+    "application/json",
+    "application/ld+json",
+    "application/rtf",
+    "image/svg+xml",
+    "text/xml",
+]
+
+
+def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
+    """Parses the server name, media ID and optional file name from the request URI
+
+    Also performs some rough validation on the server name.
+
+    Args:
+        request: The `Request`.
+
+    Returns:
+        A tuple containing the parsed server name, media ID and optional file name.
+
+    Raises:
+        SynapseError(404): if parsing or validation fail for any reason
+    """
+    try:
+        # The type on postpath seems incorrect in Twisted 21.2.0.
+        postpath: List[bytes] = request.postpath  # type: ignore
+        assert postpath
+
+        # This allows users to append e.g. /test.png to the URL. Useful for
+        # clients that parse the URL to see content type.
+        server_name_bytes, media_id_bytes = postpath[:2]
+        server_name = server_name_bytes.decode("utf-8")
+        media_id = media_id_bytes.decode("utf8")
+
+        # Validate the server name, raising if invalid
+        parse_and_validate_server_name(server_name)
+
+        file_name = None
+        if len(postpath) > 2:
+            try:
+                file_name = urllib.parse.unquote(postpath[-1].decode("utf-8"))
+            except UnicodeDecodeError:
+                pass
+        return server_name, media_id, file_name
+    except Exception:
+        raise SynapseError(
+            404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN
+        )
+
+
+def respond_404(request: SynapseRequest) -> None:
+    respond_with_json(
+        request,
+        404,
+        cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND),
+        send_cors=True,
+    )
+
+
+async def respond_with_file(
+    request: SynapseRequest,
+    media_type: str,
+    file_path: str,
+    file_size: Optional[int] = None,
+    upload_name: Optional[str] = None,
+) -> None:
+    logger.debug("Responding with %r", file_path)
+
+    if os.path.isfile(file_path):
+        if file_size is None:
+            stat = os.stat(file_path)
+            file_size = stat.st_size
+
+        add_file_headers(request, media_type, file_size, upload_name)
+
+        with open(file_path, "rb") as f:
+            await make_deferred_yieldable(FileSender().beginFileTransfer(f, request))
+
+        finish_request(request)
+    else:
+        respond_404(request)
+
+
+def add_file_headers(
+    request: Request,
+    media_type: str,
+    file_size: Optional[int],
+    upload_name: Optional[str],
+) -> None:
+    """Adds the correct response headers in preparation for responding with the
+    media.
+
+    Args:
+        request
+        media_type: The media/content type.
+        file_size: Size in bytes of the media, if known.
+        upload_name: The name of the requested file, if any.
+    """
+
+    def _quote(x: str) -> str:
+        return urllib.parse.quote(x.encode("utf-8"))
+
+    # Default to a UTF-8 charset for text content types.
+    # ex, uses UTF-8 for 'text/css' but not 'text/css; charset=UTF-16'
+    if media_type.lower() in TEXT_CONTENT_TYPES:
+        content_type = media_type + "; charset=UTF-8"
+    else:
+        content_type = media_type
+
+    request.setHeader(b"Content-Type", content_type.encode("UTF-8"))
+    if upload_name:
+        # RFC6266 section 4.1 [1] defines both `filename` and `filename*`.
+        #
+        # `filename` is defined to be a `value`, which is defined by RFC2616
+        # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token`
+        # is (essentially) a single US-ASCII word, and a `quoted-string` is a
+        # US-ASCII string surrounded by double-quotes, using backslash as an
+        # escape character. Note that %-encoding is *not* permitted.
+        #
+        # `filename*` is defined to be an `ext-value`, which is defined in
+        # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`,
+        # where `value-chars` is essentially a %-encoded string in the given charset.
+        #
+        # [1]: https://tools.ietf.org/html/rfc6266#section-4.1
+        # [2]: https://tools.ietf.org/html/rfc2616#section-3.6
+        # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1
+
+        # We avoid the quoted-string version of `filename`, because (a) synapse didn't
+        # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we
+        # may as well just do the filename* version.
+        if _can_encode_filename_as_token(upload_name):
+            disposition = "inline; filename=%s" % (upload_name,)
+        else:
+            disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)
+
+        request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
+
+    # cache for at least a day.
+    # XXX: we might want to turn this off for data we don't want to
+    # recommend caching as it's sensitive or private - or at least
+    # select private. don't bother setting Expires as all our
+    # clients are smart enough to be happy with Cache-Control
+    request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
+    if file_size is not None:
+        request.setHeader(b"Content-Length", b"%d" % (file_size,))
+
+    # Tell web crawlers to not index, archive, or follow links in media. This
+    # should help to prevent things in the media repo from showing up in web
+    # search results.
+    request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
+
+
+# separators as defined in RFC2616. SP and HT are handled separately.
+# see _can_encode_filename_as_token.
+_FILENAME_SEPARATOR_CHARS = {
+    "(",
+    ")",
+    "<",
+    ">",
+    "@",
+    ",",
+    ";",
+    ":",
+    "\\",
+    '"',
+    "/",
+    "[",
+    "]",
+    "?",
+    "=",
+    "{",
+    "}",
+}
+
+
+def _can_encode_filename_as_token(x: str) -> bool:
+    for c in x:
+        # from RFC2616:
+        #
+        #        token          = 1*<any CHAR except CTLs or separators>
+        #
+        #        separators     = "(" | ")" | "<" | ">" | "@"
+        #                       | "," | ";" | ":" | "\" | <">
+        #                       | "/" | "[" | "]" | "?" | "="
+        #                       | "{" | "}" | SP | HT
+        #
+        #        CHAR           = <any US-ASCII character (octets 0 - 127)>
+        #
+        #        CTL            = <any US-ASCII control character
+        #                         (octets 0 - 31) and DEL (127)>
+        #
+        if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS:
+            return False
+    return True
+
+
+async def respond_with_responder(
+    request: SynapseRequest,
+    responder: "Optional[Responder]",
+    media_type: str,
+    file_size: Optional[int],
+    upload_name: Optional[str] = None,
+) -> None:
+    """Responds to the request with given responder. If responder is None then
+    returns 404.
+
+    Args:
+        request
+        responder
+        media_type: The media/content type.
+        file_size: Size in bytes of the media. If not known it should be None
+        upload_name: The name of the requested file, if any.
+    """
+    if not responder:
+        respond_404(request)
+        return
+
+    # If we have a responder we *must* use it as a context manager.
+    with responder:
+        if request._disconnected:
+            logger.warning(
+                "Not sending response to request %s, already disconnected.", request
+            )
+            return
+
+        logger.debug("Responding to media request with responder %s", responder)
+        add_file_headers(request, media_type, file_size, upload_name)
+        try:
+            await responder.write_to_consumer(request)
+        except Exception as e:
+            # The majority of the time this will be due to the client having gone
+            # away. Unfortunately, Twisted simply throws a generic exception at us
+            # in that case.
+            logger.warning("Failed to write to consumer: %s %s", type(e), e)
+
+            # Unregister the producer, if it has one, so Twisted doesn't complain
+            if request.producer:
+                request.unregisterProducer()
+
+    finish_request(request)
+
+
+class Responder(ABC):
+    """Represents a response that can be streamed to the requester.
+
+    Responder is a context manager which *must* be used, so that any resources
+    held can be cleaned up.
+    """
+
+    @abstractmethod
+    def write_to_consumer(self, consumer: IConsumer) -> Awaitable:
+        """Stream response into consumer
+
+        Args:
+            consumer: The consumer to stream into.
+
+        Returns:
+            Resolves once the response has finished being written
+        """
+        raise NotImplementedError()
+
+    def __enter__(self) -> None:  # noqa: B027
+        pass
+
+    def __exit__(  # noqa: B027
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> None:
+        pass
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class ThumbnailInfo:
+    """Details about a generated thumbnail."""
+
+    width: int
+    height: int
+    method: str
+    # Content type of thumbnail, e.g. image/png
+    type: str
+    # The size of the media file, in bytes.
+    length: Optional[int] = None
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class FileInfo:
+    """Details about a requested/uploaded file."""
+
+    # The server name where the media originated from, or None if local.
+    server_name: Optional[str]
+    # The local ID of the file. For local files this is the same as the media_id
+    file_id: str
+    # If the file is for the url preview cache
+    url_cache: bool = False
+    # Whether the file is a thumbnail or not.
+    thumbnail: Optional[ThumbnailInfo] = None
+
+    # The below properties exist to maintain compatibility with third-party modules.
+    @property
+    def thumbnail_width(self) -> Optional[int]:
+        if not self.thumbnail:
+            return None
+        return self.thumbnail.width
+
+    @property
+    def thumbnail_height(self) -> Optional[int]:
+        if not self.thumbnail:
+            return None
+        return self.thumbnail.height
+
+    @property
+    def thumbnail_method(self) -> Optional[str]:
+        if not self.thumbnail:
+            return None
+        return self.thumbnail.method
+
+    @property
+    def thumbnail_type(self) -> Optional[str]:
+        if not self.thumbnail:
+            return None
+        return self.thumbnail.type
+
+    @property
+    def thumbnail_length(self) -> Optional[int]:
+        if not self.thumbnail:
+            return None
+        return self.thumbnail.length
+
+
+def get_filename_from_headers(headers: Dict[bytes, List[bytes]]) -> Optional[str]:
+    """
+    Get the filename of the downloaded file by inspecting the
+    Content-Disposition HTTP header.
+
+    Args:
+        headers: The HTTP request headers.
+
+    Returns:
+        The filename, or None.
+    """
+    content_disposition = headers.get(b"Content-Disposition", [b""])
+
+    # No header, bail out.
+    if not content_disposition[0]:
+        return None
+
+    _, params = _parse_header(content_disposition[0])
+
+    upload_name = None
+
+    # First check if there is a valid UTF-8 filename
+    upload_name_utf8 = params.get(b"filename*", None)
+    if upload_name_utf8:
+        if upload_name_utf8.lower().startswith(b"utf-8''"):
+            upload_name_utf8 = upload_name_utf8[7:]
+            # We have a filename*= section. This MUST be ASCII, and any UTF-8
+            # bytes are %-quoted.
+            try:
+                # Once it is decoded, we can then unquote the %-encoded
+                # parts strictly into a unicode string.
+                upload_name = urllib.parse.unquote(
+                    upload_name_utf8.decode("ascii"), errors="strict"
+                )
+            except UnicodeDecodeError:
+                # Incorrect UTF-8.
+                pass
+
+    # If there isn't check for an ascii name.
+    if not upload_name:
+        upload_name_ascii = params.get(b"filename", None)
+        if upload_name_ascii and is_ascii(upload_name_ascii):
+            upload_name = upload_name_ascii.decode("ascii")
+
+    # This may be None here, indicating we did not find a matching name.
+    return upload_name
+
+
+def _parse_header(line: bytes) -> Tuple[bytes, Dict[bytes, bytes]]:
+    """Parse a Content-type like header.
+
+    Cargo-culted from `cgi`, but works on bytes rather than strings.
+
+    Args:
+        line: header to be parsed
+
+    Returns:
+        The main content-type, followed by the parameter dictionary
+    """
+    parts = _parseparam(b";" + line)
+    key = next(parts)
+    pdict = {}
+    for p in parts:
+        i = p.find(b"=")
+        if i >= 0:
+            name = p[:i].strip().lower()
+            value = p[i + 1 :].strip()
+
+            # strip double-quotes
+            if len(value) >= 2 and value[0:1] == value[-1:] == b'"':
+                value = value[1:-1]
+                value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
+            pdict[name] = value
+
+    return key, pdict
+
+
+def _parseparam(s: bytes) -> Generator[bytes, None, None]:
+    """Generator which splits the input on ;, respecting double-quoted sequences
+
+    Cargo-culted from `cgi`, but works on bytes rather than strings.
+
+    Args:
+        s: header to be parsed
+
+    Returns:
+        The split input
+    """
+    while s[:1] == b";":
+        s = s[1:]
+
+        # look for the next ;
+        end = s.find(b";")
+
+        # if there is an odd number of " marks between here and the next ;, skip to the
+        # next ; instead
+        while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
+            end = s.find(b";", end + 1)
+
+        if end < 0:
+            end = len(s)
+        f = s[:end]
+        yield f.strip()
+        s = s[end:]
diff --git a/synapse/media/filepath.py b/synapse/media/filepath.py
new file mode 100644
index 0000000000..1f6441c412
--- /dev/null
+++ b/synapse/media/filepath.py
@@ -0,0 +1,410 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import os
+import re
+import string
+from typing import Any, Callable, List, TypeVar, Union, cast
+
+NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d")
+
+
+F = TypeVar("F", bound=Callable[..., str])
+
+
+def _wrap_in_base_path(func: F) -> F:
+    """Takes a function that returns a relative path and turns it into an
+    absolute path based on the location of the primary media store
+    """
+
+    @functools.wraps(func)
+    def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str:
+        path = func(self, *args, **kwargs)
+        return os.path.join(self.base_path, path)
+
+    return cast(F, _wrapped)
+
+
+GetPathMethod = TypeVar(
+    "GetPathMethod", bound=Union[Callable[..., str], Callable[..., List[str]]]
+)
+
+
+def _wrap_with_jail_check(relative: bool) -> Callable[[GetPathMethod], GetPathMethod]:
+    """Wraps a path-returning method to check that the returned path(s) do not escape
+    the media store directory.
+
+    The path-returning method may return either a single path, or a list of paths.
+
+    The check is not expected to ever fail, unless `func` is missing a call to
+    `_validate_path_component`, or `_validate_path_component` is buggy.
+
+    Args:
+        relative: A boolean indicating whether the wrapped method returns paths relative
+            to the media store directory.
+
+    Returns:
+        A method which will wrap a path-returning method, adding a check to ensure that
+        the returned path(s) lie within the media store directory. The check will raise
+        a `ValueError` if it fails.
+    """
+
+    def _wrap_with_jail_check_inner(func: GetPathMethod) -> GetPathMethod:
+        @functools.wraps(func)
+        def _wrapped(
+            self: "MediaFilePaths", *args: Any, **kwargs: Any
+        ) -> Union[str, List[str]]:
+            path_or_paths = func(self, *args, **kwargs)
+
+            if isinstance(path_or_paths, list):
+                paths_to_check = path_or_paths
+            else:
+                paths_to_check = [path_or_paths]
+
+            for path in paths_to_check:
+                # Construct the path that will ultimately be used.
+                # We cannot guess whether `path` is relative to the media store
+                # directory, since the media store directory may itself be a relative
+                # path.
+                if relative:
+                    path = os.path.join(self.base_path, path)
+                normalized_path = os.path.normpath(path)
+
+                # Now that `normpath` has eliminated `../`s and `./`s from the path,
+                # `os.path.commonpath` can be used to check whether it lies within the
+                # media store directory.
+                if (
+                    os.path.commonpath([normalized_path, self.normalized_base_path])
+                    != self.normalized_base_path
+                ):
+                    # The path resolves to outside the media store directory,
+                    # or `self.base_path` is `.`, which is an unlikely configuration.
+                    raise ValueError(f"Invalid media store path: {path!r}")
+
+                # Note that `os.path.normpath`/`abspath` has a subtle caveat:
+                # `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a
+                # different path if `a/b/c` is a symlink. That is, the check above is
+                # not perfect and may allow a certain restricted subset of untrustworthy
+                # paths through. Since the check above is secondary to the main
+                # `_validate_path_component` checks, it's less important for it to be
+                # perfect.
+                #
+                # As an alternative, `os.path.realpath` will resolve symlinks, but
+                # proves problematic if there are symlinks inside the media store.
+                # eg. if `url_store/` is symlinked to elsewhere, its canonical path
+                # won't match that of the main media store directory.
+
+            return path_or_paths
+
+        return cast(GetPathMethod, _wrapped)
+
+    return _wrap_with_jail_check_inner
+
+
+ALLOWED_CHARACTERS = set(
+    string.ascii_letters
+    + string.digits
+    + "_-"
+    + ".[]:"  # Domain names, IPv6 addresses and ports in server names
+)
+FORBIDDEN_NAMES = {
+    "",
+    os.path.curdir,  # "." for the current platform
+    os.path.pardir,  # ".." for the current platform
+}
+
+
+def _validate_path_component(name: str) -> str:
+    """Checks that the given string can be safely used as a path component
+
+    Args:
+        name: The path component to check.
+
+    Returns:
+        The path component if valid.
+
+    Raises:
+        ValueError: If `name` cannot be safely used as a path component.
+    """
+    if not ALLOWED_CHARACTERS.issuperset(name) or name in FORBIDDEN_NAMES:
+        raise ValueError(f"Invalid path component: {name!r}")
+
+    return name
+
+
+class MediaFilePaths:
+    """Describes where files are stored on disk.
+
+    Most of the functions have a `*_rel` variant which returns a file path that
+    is relative to the base media store path. This is mainly used when we want
+    to write to the backup media store (when one is configured)
+    """
+
+    def __init__(self, primary_base_path: str):
+        self.base_path = primary_base_path
+        self.normalized_base_path = os.path.normpath(self.base_path)
+
+        # Refuse to initialize if paths cannot be validated correctly for the current
+        # platform.
+        assert os.path.sep not in ALLOWED_CHARACTERS
+        assert os.path.altsep not in ALLOWED_CHARACTERS
+        # On Windows, paths have all sorts of weirdness which `_validate_path_component`
+        # does not consider. In any case, the remote media store can't work correctly
+        # for certain homeservers there, since ":"s aren't allowed in paths.
+        assert os.name == "posix"
+
+    @_wrap_with_jail_check(relative=True)
+    def local_media_filepath_rel(self, media_id: str) -> str:
+        return os.path.join(
+            "local_content",
+            _validate_path_component(media_id[0:2]),
+            _validate_path_component(media_id[2:4]),
+            _validate_path_component(media_id[4:]),
+        )
+
+    local_media_filepath = _wrap_in_base_path(local_media_filepath_rel)
+
+    @_wrap_with_jail_check(relative=True)
+    def local_media_thumbnail_rel(
+        self, media_id: str, width: int, height: int, content_type: str, method: str
+    ) -> str:
+        top_level_type, sub_type = content_type.split("/")
+        file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
+        return os.path.join(
+            "local_thumbnails",
+            _validate_path_component(media_id[0:2]),
+            _validate_path_component(media_id[2:4]),
+            _validate_path_component(media_id[4:]),
+            _validate_path_component(file_name),
+        )
+
+    local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)
+
+    @_wrap_with_jail_check(relative=False)
+    def local_media_thumbnail_dir(self, media_id: str) -> str:
+        """
+        Retrieve the local store path of thumbnails of a given media_id
+
+        Args:
+            media_id: The media ID to query.
+        Returns:
+            Path of local_thumbnails from media_id
+        """
+        return os.path.join(
+            self.base_path,
+            "local_thumbnails",
+            _validate_path_component(media_id[0:2]),
+            _validate_path_component(media_id[2:4]),
+            _validate_path_component(media_id[4:]),
+        )
+
+    @_wrap_with_jail_check(relative=True)
+    def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str:
+        return os.path.join(
+            "remote_content",
+            _validate_path_component(server_name),
+            _validate_path_component(file_id[0:2]),
+            _validate_path_component(file_id[2:4]),
+            _validate_path_component(file_id[4:]),
+        )
+
+    remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel)
+
+    @_wrap_with_jail_check(relative=True)
+    def remote_media_thumbnail_rel(
+        self,
+        server_name: str,
+        file_id: str,
+        width: int,
+        height: int,
+        content_type: str,
+        method: str,
+    ) -> str:
+        top_level_type, sub_type = content_type.split("/")
+        file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
+        return os.path.join(
+            "remote_thumbnail",
+            _validate_path_component(server_name),
+            _validate_path_component(file_id[0:2]),
+            _validate_path_component(file_id[2:4]),
+            _validate_path_component(file_id[4:]),
+            _validate_path_component(file_name),
+        )
+
+    remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel)
+
+    # Legacy path that was used to store thumbnails previously.
+    # Should be removed after some time, when most of the thumbnails are stored
+    # using the new path.
+    @_wrap_with_jail_check(relative=True)
+    def remote_media_thumbnail_rel_legacy(
+        self, server_name: str, file_id: str, width: int, height: int, content_type: str
+    ) -> str:
+        top_level_type, sub_type = content_type.split("/")
+        file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
+        return os.path.join(
+            "remote_thumbnail",
+            _validate_path_component(server_name),
+            _validate_path_component(file_id[0:2]),
+            _validate_path_component(file_id[2:4]),
+            _validate_path_component(file_id[4:]),
+            _validate_path_component(file_name),
+        )
+
+    @_wrap_with_jail_check(relative=False)
+    def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str:
+        return os.path.join(
+            self.base_path,
+            "remote_thumbnail",
+            _validate_path_component(server_name),
+            _validate_path_component(file_id[0:2]),
+            _validate_path_component(file_id[2:4]),
+            _validate_path_component(file_id[4:]),
+        )
+
+    @_wrap_with_jail_check(relative=True)
+    def url_cache_filepath_rel(self, media_id: str) -> str:
+        if NEW_FORMAT_ID_RE.match(media_id):
+            # Media id is of the form <DATE><RANDOM_STRING>
+            # E.g.: 2017-09-28-fsdRDt24DS234dsf
+            return os.path.join(
+                "url_cache",
+                _validate_path_component(media_id[:10]),
+                _validate_path_component(media_id[11:]),
+            )
+        else:
+            return os.path.join(
+                "url_cache",
+                _validate_path_component(media_id[0:2]),
+                _validate_path_component(media_id[2:4]),
+                _validate_path_component(media_id[4:]),
+            )
+
+    url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel)
+
+    @_wrap_with_jail_check(relative=False)
+    def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]:
+        "The dirs to try and remove if we delete the media_id file"
+        if NEW_FORMAT_ID_RE.match(media_id):
+            return [
+                os.path.join(
+                    self.base_path, "url_cache", _validate_path_component(media_id[:10])
+                )
+            ]
+        else:
+            return [
+                os.path.join(
+                    self.base_path,
+                    "url_cache",
+                    _validate_path_component(media_id[0:2]),
+                    _validate_path_component(media_id[2:4]),
+                ),
+                os.path.join(
+                    self.base_path, "url_cache", _validate_path_component(media_id[0:2])
+                ),
+            ]
+
+    @_wrap_with_jail_check(relative=True)
+    def url_cache_thumbnail_rel(
+        self, media_id: str, width: int, height: int, content_type: str, method: str
+    ) -> str:
+        # Media id is of the form <DATE><RANDOM_STRING>
+        # E.g.: 2017-09-28-fsdRDt24DS234dsf
+
+        top_level_type, sub_type = content_type.split("/")
+        file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
+
+        if NEW_FORMAT_ID_RE.match(media_id):
+            return os.path.join(
+                "url_cache_thumbnails",
+                _validate_path_component(media_id[:10]),
+                _validate_path_component(media_id[11:]),
+                _validate_path_component(file_name),
+            )
+        else:
+            return os.path.join(
+                "url_cache_thumbnails",
+                _validate_path_component(media_id[0:2]),
+                _validate_path_component(media_id[2:4]),
+                _validate_path_component(media_id[4:]),
+                _validate_path_component(file_name),
+            )
+
+    url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
+
+    @_wrap_with_jail_check(relative=True)
+    def url_cache_thumbnail_directory_rel(self, media_id: str) -> str:
+        # Media id is of the form <DATE><RANDOM_STRING>
+        # E.g.: 2017-09-28-fsdRDt24DS234dsf
+
+        if NEW_FORMAT_ID_RE.match(media_id):
+            return os.path.join(
+                "url_cache_thumbnails",
+                _validate_path_component(media_id[:10]),
+                _validate_path_component(media_id[11:]),
+            )
+        else:
+            return os.path.join(
+                "url_cache_thumbnails",
+                _validate_path_component(media_id[0:2]),
+                _validate_path_component(media_id[2:4]),
+                _validate_path_component(media_id[4:]),
+            )
+
+    url_cache_thumbnail_directory = _wrap_in_base_path(
+        url_cache_thumbnail_directory_rel
+    )
+
+    @_wrap_with_jail_check(relative=False)
+    def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
+        "The dirs to try and remove if we delete the media_id thumbnails"
+        # Media id is of the form <DATE><RANDOM_STRING>
+        # E.g.: 2017-09-28-fsdRDt24DS234dsf
+        if NEW_FORMAT_ID_RE.match(media_id):
+            return [
+                os.path.join(
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[:10]),
+                    _validate_path_component(media_id[11:]),
+                ),
+                os.path.join(
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[:10]),
+                ),
+            ]
+        else:
+            return [
+                os.path.join(
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[0:2]),
+                    _validate_path_component(media_id[2:4]),
+                    _validate_path_component(media_id[4:]),
+                ),
+                os.path.join(
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[0:2]),
+                    _validate_path_component(media_id[2:4]),
+                ),
+                os.path.join(
+                    self.base_path,
+                    "url_cache_thumbnails",
+                    _validate_path_component(media_id[0:2]),
+                ),
+            ]
diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py
new file mode 100644
index 0000000000..b81e3c2b0c
--- /dev/null
+++ b/synapse/media/media_repository.py
@@ -0,0 +1,1038 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import errno
+import logging
+import os
+import shutil
+from io import BytesIO
+from typing import IO, TYPE_CHECKING, Dict, List, Optional, Set, Tuple
+
+from matrix_common.types.mxc_uri import MXCUri
+
+import twisted.internet.error
+import twisted.web.http
+from twisted.internet.defer import Deferred
+
+from synapse.api.errors import (
+    FederationDeniedError,
+    HttpResponseException,
+    NotFoundError,
+    RequestSendFailed,
+    SynapseError,
+)
+from synapse.config.repository import ThumbnailRequirement
+from synapse.http.site import SynapseRequest
+from synapse.logging.context import defer_to_thread
+from synapse.media._base import (
+    FileInfo,
+    Responder,
+    ThumbnailInfo,
+    get_filename_from_headers,
+    respond_404,
+    respond_with_responder,
+)
+from synapse.media.filepath import MediaFilePaths
+from synapse.media.media_storage import MediaStorage
+from synapse.media.storage_provider import StorageProviderWrapper
+from synapse.media.thumbnailer import Thumbnailer, ThumbnailError
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.types import UserID
+from synapse.util.async_helpers import Linearizer
+from synapse.util.retryutils import NotRetryingDestination
+from synapse.util.stringutils import random_string
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+# How often to run the background job to update the "recently accessed"
+# attribute of local and remote media.
+UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000  # 1 minute
+# How often to run the background job to check for local and remote media
+# that should be purged according to the configured media retention settings.
+MEDIA_RETENTION_CHECK_PERIOD_MS = 60 * 60 * 1000  # 1 hour
+
+
+class MediaRepository:
+    def __init__(self, hs: "HomeServer"):
+        self.hs = hs
+        self.auth = hs.get_auth()
+        self.client = hs.get_federation_http_client()
+        self.clock = hs.get_clock()
+        self.server_name = hs.hostname
+        self.store = hs.get_datastores().main
+        self.max_upload_size = hs.config.media.max_upload_size
+        self.max_image_pixels = hs.config.media.max_image_pixels
+
+        Thumbnailer.set_limits(self.max_image_pixels)
+
+        self.primary_base_path: str = hs.config.media.media_store_path
+        self.filepaths: MediaFilePaths = MediaFilePaths(self.primary_base_path)
+
+        self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
+        self.thumbnail_requirements = hs.config.media.thumbnail_requirements
+
+        self.remote_media_linearizer = Linearizer(name="media_remote")
+
+        self.recently_accessed_remotes: Set[Tuple[str, str]] = set()
+        self.recently_accessed_locals: Set[str] = set()
+
+        self.federation_domain_whitelist = (
+            hs.config.federation.federation_domain_whitelist
+        )
+
+        # List of StorageProviders where we should search for media and
+        # potentially upload to.
+        storage_providers = []
+
+        for (
+            clz,
+            provider_config,
+            wrapper_config,
+        ) in hs.config.media.media_storage_providers:
+            backend = clz(hs, provider_config)
+            provider = StorageProviderWrapper(
+                backend,
+                store_local=wrapper_config.store_local,
+                store_remote=wrapper_config.store_remote,
+                store_synchronous=wrapper_config.store_synchronous,
+            )
+            storage_providers.append(provider)
+
+        self.media_storage = MediaStorage(
+            self.hs, self.primary_base_path, self.filepaths, storage_providers
+        )
+
+        self.clock.looping_call(
+            self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS
+        )
+
+        # Media retention configuration options
+        self._media_retention_local_media_lifetime_ms = (
+            hs.config.media.media_retention_local_media_lifetime_ms
+        )
+        self._media_retention_remote_media_lifetime_ms = (
+            hs.config.media.media_retention_remote_media_lifetime_ms
+        )
+
+        # Check whether local or remote media retention is configured
+        if (
+            hs.config.media.media_retention_local_media_lifetime_ms is not None
+            or hs.config.media.media_retention_remote_media_lifetime_ms is not None
+        ):
+            # Run the background job to apply media retention rules routinely,
+            # with the duration between runs dictated by the homeserver config.
+            self.clock.looping_call(
+                self._start_apply_media_retention_rules,
+                MEDIA_RETENTION_CHECK_PERIOD_MS,
+            )
+
+    def _start_update_recently_accessed(self) -> Deferred:
+        return run_as_background_process(
+            "update_recently_accessed_media", self._update_recently_accessed
+        )
+
+    def _start_apply_media_retention_rules(self) -> Deferred:
+        return run_as_background_process(
+            "apply_media_retention_rules", self._apply_media_retention_rules
+        )
+
+    async def _update_recently_accessed(self) -> None:
+        remote_media = self.recently_accessed_remotes
+        self.recently_accessed_remotes = set()
+
+        local_media = self.recently_accessed_locals
+        self.recently_accessed_locals = set()
+
+        await self.store.update_cached_last_access_time(
+            local_media, remote_media, self.clock.time_msec()
+        )
+
+    def mark_recently_accessed(self, server_name: Optional[str], media_id: str) -> None:
+        """Mark the given media as recently accessed.
+
+        Args:
+            server_name: Origin server of media, or None if local
+            media_id: The media ID of the content
+        """
+        if server_name:
+            self.recently_accessed_remotes.add((server_name, media_id))
+        else:
+            self.recently_accessed_locals.add(media_id)
+
+    async def create_content(
+        self,
+        media_type: str,
+        upload_name: Optional[str],
+        content: IO,
+        content_length: int,
+        auth_user: UserID,
+    ) -> MXCUri:
+        """Store uploaded content for a local user and return the mxc URL
+
+        Args:
+            media_type: The content type of the file.
+            upload_name: The name of the file, if provided.
+            content: A file like object that is the content to store
+            content_length: The length of the content
+            auth_user: The user_id of the uploader
+
+        Returns:
+            The mxc url of the stored content
+        """
+
+        media_id = random_string(24)
+
+        file_info = FileInfo(server_name=None, file_id=media_id)
+
+        fname = await self.media_storage.store_file(content, file_info)
+
+        logger.info("Stored local media in file %r", fname)
+
+        await self.store.store_local_media(
+            media_id=media_id,
+            media_type=media_type,
+            time_now_ms=self.clock.time_msec(),
+            upload_name=upload_name,
+            media_length=content_length,
+            user_id=auth_user,
+        )
+
+        await self._generate_thumbnails(None, media_id, media_id, media_type)
+
+        return MXCUri(self.server_name, media_id)
+
+    async def get_local_media(
+        self, request: SynapseRequest, media_id: str, name: Optional[str]
+    ) -> None:
+        """Responds to requests for local media, if exists, or returns 404.
+
+        Args:
+            request: The incoming request.
+            media_id: The media ID of the content. (This is the same as
+                the file_id for local content.)
+            name: Optional name that, if specified, will be used as
+                the filename in the Content-Disposition header of the response.
+
+        Returns:
+            Resolves once a response has successfully been written to request
+        """
+        media_info = await self.store.get_local_media(media_id)
+        if not media_info or media_info["quarantined_by"]:
+            respond_404(request)
+            return
+
+        self.mark_recently_accessed(None, media_id)
+
+        media_type = media_info["media_type"]
+        if not media_type:
+            media_type = "application/octet-stream"
+        media_length = media_info["media_length"]
+        upload_name = name if name else media_info["upload_name"]
+        url_cache = media_info["url_cache"]
+
+        file_info = FileInfo(None, media_id, url_cache=bool(url_cache))
+
+        responder = await self.media_storage.fetch_media(file_info)
+        await respond_with_responder(
+            request, responder, media_type, media_length, upload_name
+        )
+
+    async def get_remote_media(
+        self,
+        request: SynapseRequest,
+        server_name: str,
+        media_id: str,
+        name: Optional[str],
+    ) -> None:
+        """Respond to requests for remote media.
+
+        Args:
+            request: The incoming request.
+            server_name: Remote server_name where the media originated.
+            media_id: The media ID of the content (as defined by the remote server).
+            name: Optional name that, if specified, will be used as
+                the filename in the Content-Disposition header of the response.
+
+        Returns:
+            Resolves once a response has successfully been written to request
+        """
+        if (
+            self.federation_domain_whitelist is not None
+            and server_name not in self.federation_domain_whitelist
+        ):
+            raise FederationDeniedError(server_name)
+
+        self.mark_recently_accessed(server_name, media_id)
+
+        # We linearize here to ensure that we don't try and download remote
+        # media multiple times concurrently
+        key = (server_name, media_id)
+        async with self.remote_media_linearizer.queue(key):
+            responder, media_info = await self._get_remote_media_impl(
+                server_name, media_id
+            )
+
+        # We deliberately stream the file outside the lock
+        if responder:
+            media_type = media_info["media_type"]
+            media_length = media_info["media_length"]
+            upload_name = name if name else media_info["upload_name"]
+            await respond_with_responder(
+                request, responder, media_type, media_length, upload_name
+            )
+        else:
+            respond_404(request)
+
+    async def get_remote_media_info(self, server_name: str, media_id: str) -> dict:
+        """Gets the media info associated with the remote file, downloading
+        if necessary.
+
+        Args:
+            server_name: Remote server_name where the media originated.
+            media_id: The media ID of the content (as defined by the remote server).
+
+        Returns:
+            The media info of the file
+        """
+        if (
+            self.federation_domain_whitelist is not None
+            and server_name not in self.federation_domain_whitelist
+        ):
+            raise FederationDeniedError(server_name)
+
+        # We linearize here to ensure that we don't try and download remote
+        # media multiple times concurrently
+        key = (server_name, media_id)
+        async with self.remote_media_linearizer.queue(key):
+            responder, media_info = await self._get_remote_media_impl(
+                server_name, media_id
+            )
+
+        # Ensure we actually use the responder so that it releases resources
+        if responder:
+            with responder:
+                pass
+
+        return media_info
+
+    async def _get_remote_media_impl(
+        self, server_name: str, media_id: str
+    ) -> Tuple[Optional[Responder], dict]:
+        """Looks for media in local cache, if not there then attempt to
+        download from remote server.
+
+        Args:
+            server_name: Remote server_name where the media originated.
+            media_id: The media ID of the content (as defined by the
+                remote server).
+
+        Returns:
+            A tuple of responder and the media info of the file.
+        """
+        media_info = await self.store.get_cached_remote_media(server_name, media_id)
+
+        # file_id is the ID we use to track the file locally. If we've already
+        # seen the file then reuse the existing ID, otherwise generate a new
+        # one.
+
+        # If we have an entry in the DB, try and look for it
+        if media_info:
+            file_id = media_info["filesystem_id"]
+            file_info = FileInfo(server_name, file_id)
+
+            if media_info["quarantined_by"]:
+                logger.info("Media is quarantined")
+                raise NotFoundError()
+
+            if not media_info["media_type"]:
+                media_info["media_type"] = "application/octet-stream"
+
+            responder = await self.media_storage.fetch_media(file_info)
+            if responder:
+                return responder, media_info
+
+        # Failed to find the file anywhere, lets download it.
+
+        try:
+            media_info = await self._download_remote_file(
+                server_name,
+                media_id,
+            )
+        except SynapseError:
+            raise
+        except Exception as e:
+            # An exception may be because we downloaded media in another
+            # process, so let's check if we magically have the media.
+            media_info = await self.store.get_cached_remote_media(server_name, media_id)
+            if not media_info:
+                raise e
+
+        file_id = media_info["filesystem_id"]
+        if not media_info["media_type"]:
+            media_info["media_type"] = "application/octet-stream"
+        file_info = FileInfo(server_name, file_id)
+
+        # We generate thumbnails even if another process downloaded the media
+        # as a) it's conceivable that the other download request dies before it
+        # generates thumbnails, but mainly b) we want to be sure the thumbnails
+        # have finished being generated before responding to the client,
+        # otherwise they'll request thumbnails and get a 404 if they're not
+        # ready yet.
+        await self._generate_thumbnails(
+            server_name, media_id, file_id, media_info["media_type"]
+        )
+
+        responder = await self.media_storage.fetch_media(file_info)
+        return responder, media_info
+
+    async def _download_remote_file(
+        self,
+        server_name: str,
+        media_id: str,
+    ) -> dict:
+        """Attempt to download the remote file from the given server name,
+        using the given file_id as the local id.
+
+        Args:
+            server_name: Originating server
+            media_id: The media ID of the content (as defined by the
+                remote server). This is different than the file_id, which is
+                locally generated.
+            file_id: Local file ID
+
+        Returns:
+            The media info of the file.
+        """
+
+        file_id = random_string(24)
+
+        file_info = FileInfo(server_name=server_name, file_id=file_id)
+
+        with self.media_storage.store_into_file(file_info) as (f, fname, finish):
+            request_path = "/".join(
+                ("/_matrix/media/r0/download", server_name, media_id)
+            )
+            try:
+                length, headers = await self.client.get_file(
+                    server_name,
+                    request_path,
+                    output_stream=f,
+                    max_size=self.max_upload_size,
+                    args={
+                        # tell the remote server to 404 if it doesn't
+                        # recognise the server_name, to make sure we don't
+                        # end up with a routing loop.
+                        "allow_remote": "false"
+                    },
+                )
+            except RequestSendFailed as e:
+                logger.warning(
+                    "Request failed fetching remote media %s/%s: %r",
+                    server_name,
+                    media_id,
+                    e,
+                )
+                raise SynapseError(502, "Failed to fetch remote media")
+
+            except HttpResponseException as e:
+                logger.warning(
+                    "HTTP error fetching remote media %s/%s: %s",
+                    server_name,
+                    media_id,
+                    e.response,
+                )
+                if e.code == twisted.web.http.NOT_FOUND:
+                    raise e.to_synapse_error()
+                raise SynapseError(502, "Failed to fetch remote media")
+
+            except SynapseError:
+                logger.warning(
+                    "Failed to fetch remote media %s/%s", server_name, media_id
+                )
+                raise
+            except NotRetryingDestination:
+                logger.warning("Not retrying destination %r", server_name)
+                raise SynapseError(502, "Failed to fetch remote media")
+            except Exception:
+                logger.exception(
+                    "Failed to fetch remote media %s/%s", server_name, media_id
+                )
+                raise SynapseError(502, "Failed to fetch remote media")
+
+            await finish()
+
+            if b"Content-Type" in headers:
+                media_type = headers[b"Content-Type"][0].decode("ascii")
+            else:
+                media_type = "application/octet-stream"
+            upload_name = get_filename_from_headers(headers)
+            time_now_ms = self.clock.time_msec()
+
+            # Multiple remote media download requests can race (when using
+            # multiple media repos), so this may throw a violation constraint
+            # exception. If it does we'll delete the newly downloaded file from
+            # disk (as we're in the ctx manager).
+            #
+            # However: we've already called `finish()` so we may have also
+            # written to the storage providers. This is preferable to the
+            # alternative where we call `finish()` *after* this, where we could
+            # end up having an entry in the DB but fail to write the files to
+            # the storage providers.
+            await self.store.store_cached_remote_media(
+                origin=server_name,
+                media_id=media_id,
+                media_type=media_type,
+                time_now_ms=self.clock.time_msec(),
+                upload_name=upload_name,
+                media_length=length,
+                filesystem_id=file_id,
+            )
+
+        logger.info("Stored remote media in file %r", fname)
+
+        media_info = {
+            "media_type": media_type,
+            "media_length": length,
+            "upload_name": upload_name,
+            "created_ts": time_now_ms,
+            "filesystem_id": file_id,
+        }
+
+        return media_info
+
+    def _get_thumbnail_requirements(
+        self, media_type: str
+    ) -> Tuple[ThumbnailRequirement, ...]:
+        scpos = media_type.find(";")
+        if scpos > 0:
+            media_type = media_type[:scpos]
+        return self.thumbnail_requirements.get(media_type, ())
+
+    def _generate_thumbnail(
+        self,
+        thumbnailer: Thumbnailer,
+        t_width: int,
+        t_height: int,
+        t_method: str,
+        t_type: str,
+    ) -> Optional[BytesIO]:
+        m_width = thumbnailer.width
+        m_height = thumbnailer.height
+
+        if m_width * m_height >= self.max_image_pixels:
+            logger.info(
+                "Image too large to thumbnail %r x %r > %r",
+                m_width,
+                m_height,
+                self.max_image_pixels,
+            )
+            return None
+
+        if thumbnailer.transpose_method is not None:
+            m_width, m_height = thumbnailer.transpose()
+
+        if t_method == "crop":
+            return thumbnailer.crop(t_width, t_height, t_type)
+        elif t_method == "scale":
+            t_width, t_height = thumbnailer.aspect(t_width, t_height)
+            t_width = min(m_width, t_width)
+            t_height = min(m_height, t_height)
+            return thumbnailer.scale(t_width, t_height, t_type)
+
+        return None
+
+    async def generate_local_exact_thumbnail(
+        self,
+        media_id: str,
+        t_width: int,
+        t_height: int,
+        t_method: str,
+        t_type: str,
+        url_cache: bool,
+    ) -> Optional[str]:
+        input_path = await self.media_storage.ensure_media_is_in_local_cache(
+            FileInfo(None, media_id, url_cache=url_cache)
+        )
+
+        try:
+            thumbnailer = Thumbnailer(input_path)
+        except ThumbnailError as e:
+            logger.warning(
+                "Unable to generate a thumbnail for local media %s using a method of %s and type of %s: %s",
+                media_id,
+                t_method,
+                t_type,
+                e,
+            )
+            return None
+
+        with thumbnailer:
+            t_byte_source = await defer_to_thread(
+                self.hs.get_reactor(),
+                self._generate_thumbnail,
+                thumbnailer,
+                t_width,
+                t_height,
+                t_method,
+                t_type,
+            )
+
+        if t_byte_source:
+            try:
+                file_info = FileInfo(
+                    server_name=None,
+                    file_id=media_id,
+                    url_cache=url_cache,
+                    thumbnail=ThumbnailInfo(
+                        width=t_width,
+                        height=t_height,
+                        method=t_method,
+                        type=t_type,
+                    ),
+                )
+
+                output_path = await self.media_storage.store_file(
+                    t_byte_source, file_info
+                )
+            finally:
+                t_byte_source.close()
+
+            logger.info("Stored thumbnail in file %r", output_path)
+
+            t_len = os.path.getsize(output_path)
+
+            await self.store.store_local_thumbnail(
+                media_id, t_width, t_height, t_type, t_method, t_len
+            )
+
+            return output_path
+
+        # Could not generate thumbnail.
+        return None
+
+    async def generate_remote_exact_thumbnail(
+        self,
+        server_name: str,
+        file_id: str,
+        media_id: str,
+        t_width: int,
+        t_height: int,
+        t_method: str,
+        t_type: str,
+    ) -> Optional[str]:
+        input_path = await self.media_storage.ensure_media_is_in_local_cache(
+            FileInfo(server_name, file_id)
+        )
+
+        try:
+            thumbnailer = Thumbnailer(input_path)
+        except ThumbnailError as e:
+            logger.warning(
+                "Unable to generate a thumbnail for remote media %s from %s using a method of %s and type of %s: %s",
+                media_id,
+                server_name,
+                t_method,
+                t_type,
+                e,
+            )
+            return None
+
+        with thumbnailer:
+            t_byte_source = await defer_to_thread(
+                self.hs.get_reactor(),
+                self._generate_thumbnail,
+                thumbnailer,
+                t_width,
+                t_height,
+                t_method,
+                t_type,
+            )
+
+        if t_byte_source:
+            try:
+                file_info = FileInfo(
+                    server_name=server_name,
+                    file_id=file_id,
+                    thumbnail=ThumbnailInfo(
+                        width=t_width,
+                        height=t_height,
+                        method=t_method,
+                        type=t_type,
+                    ),
+                )
+
+                output_path = await self.media_storage.store_file(
+                    t_byte_source, file_info
+                )
+            finally:
+                t_byte_source.close()
+
+            logger.info("Stored thumbnail in file %r", output_path)
+
+            t_len = os.path.getsize(output_path)
+
+            await self.store.store_remote_media_thumbnail(
+                server_name,
+                media_id,
+                file_id,
+                t_width,
+                t_height,
+                t_type,
+                t_method,
+                t_len,
+            )
+
+            return output_path
+
+        # Could not generate thumbnail.
+        return None
+
+    async def _generate_thumbnails(
+        self,
+        server_name: Optional[str],
+        media_id: str,
+        file_id: str,
+        media_type: str,
+        url_cache: bool = False,
+    ) -> Optional[dict]:
+        """Generate and store thumbnails for an image.
+
+        Args:
+            server_name: The server name if remote media, else None if local
+            media_id: The media ID of the content. (This is the same as
+                the file_id for local content)
+            file_id: Local file ID
+            media_type: The content type of the file
+            url_cache: If we are thumbnailing images downloaded for the URL cache,
+                used exclusively by the url previewer
+
+        Returns:
+            Dict with "width" and "height" keys of original image or None if the
+            media cannot be thumbnailed.
+        """
+        requirements = self._get_thumbnail_requirements(media_type)
+        if not requirements:
+            return None
+
+        input_path = await self.media_storage.ensure_media_is_in_local_cache(
+            FileInfo(server_name, file_id, url_cache=url_cache)
+        )
+
+        try:
+            thumbnailer = Thumbnailer(input_path)
+        except ThumbnailError as e:
+            logger.warning(
+                "Unable to generate thumbnails for remote media %s from %s of type %s: %s",
+                media_id,
+                server_name,
+                media_type,
+                e,
+            )
+            return None
+
+        with thumbnailer:
+            m_width = thumbnailer.width
+            m_height = thumbnailer.height
+
+            if m_width * m_height >= self.max_image_pixels:
+                logger.info(
+                    "Image too large to thumbnail %r x %r > %r",
+                    m_width,
+                    m_height,
+                    self.max_image_pixels,
+                )
+                return None
+
+            if thumbnailer.transpose_method is not None:
+                m_width, m_height = await defer_to_thread(
+                    self.hs.get_reactor(), thumbnailer.transpose
+                )
+
+            # We deduplicate the thumbnail sizes by ignoring the cropped versions if
+            # they have the same dimensions of a scaled one.
+            thumbnails: Dict[Tuple[int, int, str], str] = {}
+            for requirement in requirements:
+                if requirement.method == "crop":
+                    thumbnails.setdefault(
+                        (requirement.width, requirement.height, requirement.media_type),
+                        requirement.method,
+                    )
+                elif requirement.method == "scale":
+                    t_width, t_height = thumbnailer.aspect(
+                        requirement.width, requirement.height
+                    )
+                    t_width = min(m_width, t_width)
+                    t_height = min(m_height, t_height)
+                    thumbnails[
+                        (t_width, t_height, requirement.media_type)
+                    ] = requirement.method
+
+            # Now we generate the thumbnails for each dimension, store it
+            for (t_width, t_height, t_type), t_method in thumbnails.items():
+                # Generate the thumbnail
+                if t_method == "crop":
+                    t_byte_source = await defer_to_thread(
+                        self.hs.get_reactor(),
+                        thumbnailer.crop,
+                        t_width,
+                        t_height,
+                        t_type,
+                    )
+                elif t_method == "scale":
+                    t_byte_source = await defer_to_thread(
+                        self.hs.get_reactor(),
+                        thumbnailer.scale,
+                        t_width,
+                        t_height,
+                        t_type,
+                    )
+                else:
+                    logger.error("Unrecognized method: %r", t_method)
+                    continue
+
+                if not t_byte_source:
+                    continue
+
+                file_info = FileInfo(
+                    server_name=server_name,
+                    file_id=file_id,
+                    url_cache=url_cache,
+                    thumbnail=ThumbnailInfo(
+                        width=t_width,
+                        height=t_height,
+                        method=t_method,
+                        type=t_type,
+                    ),
+                )
+
+                with self.media_storage.store_into_file(file_info) as (
+                    f,
+                    fname,
+                    finish,
+                ):
+                    try:
+                        await self.media_storage.write_to_file(t_byte_source, f)
+                        await finish()
+                    finally:
+                        t_byte_source.close()
+
+                    t_len = os.path.getsize(fname)
+
+                    # Write to database
+                    if server_name:
+                        # Multiple remote media download requests can race (when
+                        # using multiple media repos), so this may throw a violation
+                        # constraint exception. If it does we'll delete the newly
+                        # generated thumbnail from disk (as we're in the ctx
+                        # manager).
+                        #
+                        # However: we've already called `finish()` so we may have
+                        # also written to the storage providers. This is preferable
+                        # to the alternative where we call `finish()` *after* this,
+                        # where we could end up having an entry in the DB but fail
+                        # to write the files to the storage providers.
+                        try:
+                            await self.store.store_remote_media_thumbnail(
+                                server_name,
+                                media_id,
+                                file_id,
+                                t_width,
+                                t_height,
+                                t_type,
+                                t_method,
+                                t_len,
+                            )
+                        except Exception as e:
+                            thumbnail_exists = (
+                                await self.store.get_remote_media_thumbnail(
+                                    server_name,
+                                    media_id,
+                                    t_width,
+                                    t_height,
+                                    t_type,
+                                )
+                            )
+                            if not thumbnail_exists:
+                                raise e
+                    else:
+                        await self.store.store_local_thumbnail(
+                            media_id, t_width, t_height, t_type, t_method, t_len
+                        )
+
+        return {"width": m_width, "height": m_height}
+
+    async def _apply_media_retention_rules(self) -> None:
+        """
+        Purge old local and remote media according to the media retention rules
+        defined in the homeserver config.
+        """
+        # Purge remote media
+        if self._media_retention_remote_media_lifetime_ms is not None:
+            # Calculate a threshold timestamp derived from the configured lifetime. Any
+            # media that has not been accessed since this timestamp will be removed.
+            remote_media_threshold_timestamp_ms = (
+                self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms
+            )
+
+            logger.info(
+                "Purging remote media last accessed before"
+                f" {remote_media_threshold_timestamp_ms}"
+            )
+
+            await self.delete_old_remote_media(
+                before_ts=remote_media_threshold_timestamp_ms
+            )
+
+        # And now do the same for local media
+        if self._media_retention_local_media_lifetime_ms is not None:
+            # This works the same as the remote media threshold
+            local_media_threshold_timestamp_ms = (
+                self.clock.time_msec() - self._media_retention_local_media_lifetime_ms
+            )
+
+            logger.info(
+                "Purging local media last accessed before"
+                f" {local_media_threshold_timestamp_ms}"
+            )
+
+            await self.delete_old_local_media(
+                before_ts=local_media_threshold_timestamp_ms,
+                keep_profiles=True,
+                delete_quarantined_media=False,
+                delete_protected_media=False,
+            )
+
+    async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
+        old_media = await self.store.get_remote_media_ids(
+            before_ts, include_quarantined_media=False
+        )
+
+        deleted = 0
+
+        for media in old_media:
+            origin = media["media_origin"]
+            media_id = media["media_id"]
+            file_id = media["filesystem_id"]
+            key = (origin, media_id)
+
+            logger.info("Deleting: %r", key)
+
+            # TODO: Should we delete from the backup store
+
+            async with self.remote_media_linearizer.queue(key):
+                full_path = self.filepaths.remote_media_filepath(origin, file_id)
+                try:
+                    os.remove(full_path)
+                except OSError as e:
+                    logger.warning("Failed to remove file: %r", full_path)
+                    if e.errno == errno.ENOENT:
+                        pass
+                    else:
+                        continue
+
+                thumbnail_dir = self.filepaths.remote_media_thumbnail_dir(
+                    origin, file_id
+                )
+                shutil.rmtree(thumbnail_dir, ignore_errors=True)
+
+                await self.store.delete_remote_media(origin, media_id)
+                deleted += 1
+
+        return {"deleted": deleted}
+
+    async def delete_local_media_ids(
+        self, media_ids: List[str]
+    ) -> Tuple[List[str], int]:
+        """
+        Delete the given local or remote media ID from this server
+
+        Args:
+            media_id: The media ID to delete.
+        Returns:
+            A tuple of (list of deleted media IDs, total deleted media IDs).
+        """
+        return await self._remove_local_media_from_disk(media_ids)
+
+    async def delete_old_local_media(
+        self,
+        before_ts: int,
+        size_gt: int = 0,
+        keep_profiles: bool = True,
+        delete_quarantined_media: bool = False,
+        delete_protected_media: bool = False,
+    ) -> Tuple[List[str], int]:
+        """
+        Delete local or remote media from this server by size and timestamp. Removes
+        media files, any thumbnails and cached URLs.
+
+        Args:
+            before_ts: Unix timestamp in ms.
+                Files that were last used before this timestamp will be deleted.
+            size_gt: Size of the media in bytes. Files that are larger will be deleted.
+            keep_profiles: Switch to delete also files that are still used in image data
+                (e.g user profile, room avatar). If false these files will be deleted.
+            delete_quarantined_media: If True, media marked as quarantined will be deleted.
+            delete_protected_media: If True, media marked as protected will be deleted.
+
+        Returns:
+            A tuple of (list of deleted media IDs, total deleted media IDs).
+        """
+        old_media = await self.store.get_local_media_ids(
+            before_ts,
+            size_gt,
+            keep_profiles,
+            include_quarantined_media=delete_quarantined_media,
+            include_protected_media=delete_protected_media,
+        )
+        return await self._remove_local_media_from_disk(old_media)
+
+    async def _remove_local_media_from_disk(
+        self, media_ids: List[str]
+    ) -> Tuple[List[str], int]:
+        """
+        Delete local or remote media from this server. Removes media files,
+        any thumbnails and cached URLs.
+
+        Args:
+            media_ids: List of media_id to delete
+        Returns:
+            A tuple of (list of deleted media IDs, total deleted media IDs).
+        """
+        removed_media = []
+        for media_id in media_ids:
+            logger.info("Deleting media with ID '%s'", media_id)
+            full_path = self.filepaths.local_media_filepath(media_id)
+            try:
+                os.remove(full_path)
+            except OSError as e:
+                logger.warning("Failed to remove file: %r: %s", full_path, e)
+                if e.errno == errno.ENOENT:
+                    pass
+                else:
+                    continue
+
+            thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id)
+            shutil.rmtree(thumbnail_dir, ignore_errors=True)
+
+            await self.store.delete_remote_media(self.server_name, media_id)
+
+            await self.store.delete_url_cache((media_id,))
+            await self.store.delete_url_cache_media((media_id,))
+
+            removed_media.append(media_id)
+
+        return removed_media, len(removed_media)
diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py
new file mode 100644
index 0000000000..a7e22a91e1
--- /dev/null
+++ b/synapse/media/media_storage.py
@@ -0,0 +1,374 @@
+# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import contextlib
+import logging
+import os
+import shutil
+from types import TracebackType
+from typing import (
+    IO,
+    TYPE_CHECKING,
+    Any,
+    Awaitable,
+    BinaryIO,
+    Callable,
+    Generator,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+)
+
+import attr
+
+from twisted.internet.defer import Deferred
+from twisted.internet.interfaces import IConsumer
+from twisted.protocols.basic import FileSender
+
+import synapse
+from synapse.api.errors import NotFoundError
+from synapse.logging.context import defer_to_thread, make_deferred_yieldable
+from synapse.util import Clock
+from synapse.util.file_consumer import BackgroundFileConsumer
+
+from ._base import FileInfo, Responder
+from .filepath import MediaFilePaths
+
+if TYPE_CHECKING:
+    from synapse.media.storage_provider import StorageProvider
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class MediaStorage:
+    """Responsible for storing/fetching files from local sources.
+
+    Args:
+        hs
+        local_media_directory: Base path where we store media on disk
+        filepaths
+        storage_providers: List of StorageProvider that are used to fetch and store files.
+    """
+
+    def __init__(
+        self,
+        hs: "HomeServer",
+        local_media_directory: str,
+        filepaths: MediaFilePaths,
+        storage_providers: Sequence["StorageProvider"],
+    ):
+        self.hs = hs
+        self.reactor = hs.get_reactor()
+        self.local_media_directory = local_media_directory
+        self.filepaths = filepaths
+        self.storage_providers = storage_providers
+        self.spam_checker = hs.get_spam_checker()
+        self.clock = hs.get_clock()
+
+    async def store_file(self, source: IO, file_info: FileInfo) -> str:
+        """Write `source` to the on disk media store, and also any other
+        configured storage providers
+
+        Args:
+            source: A file like object that should be written
+            file_info: Info about the file to store
+
+        Returns:
+            the file path written to in the primary media store
+        """
+
+        with self.store_into_file(file_info) as (f, fname, finish_cb):
+            # Write to the main repository
+            await self.write_to_file(source, f)
+            await finish_cb()
+
+        return fname
+
+    async def write_to_file(self, source: IO, output: IO) -> None:
+        """Asynchronously write the `source` to `output`."""
+        await defer_to_thread(self.reactor, _write_file_synchronously, source, output)
+
+    @contextlib.contextmanager
+    def store_into_file(
+        self, file_info: FileInfo
+    ) -> Generator[Tuple[BinaryIO, str, Callable[[], Awaitable[None]]], None, None]:
+        """Context manager used to get a file like object to write into, as
+        described by file_info.
+
+        Actually yields a 3-tuple (file, fname, finish_cb), where file is a file
+        like object that can be written to, fname is the absolute path of file
+        on disk, and finish_cb is a function that returns an awaitable.
+
+        fname can be used to read the contents from after upload, e.g. to
+        generate thumbnails.
+
+        finish_cb must be called and waited on after the file has been
+        successfully been written to. Should not be called if there was an
+        error.
+
+        Args:
+            file_info: Info about the file to store
+
+        Example:
+
+            with media_storage.store_into_file(info) as (f, fname, finish_cb):
+                # .. write into f ...
+                await finish_cb()
+        """
+
+        path = self._file_info_to_path(file_info)
+        fname = os.path.join(self.local_media_directory, path)
+
+        dirname = os.path.dirname(fname)
+        os.makedirs(dirname, exist_ok=True)
+
+        finished_called = [False]
+
+        try:
+            with open(fname, "wb") as f:
+
+                async def finish() -> None:
+                    # Ensure that all writes have been flushed and close the
+                    # file.
+                    f.flush()
+                    f.close()
+
+                    spam_check = await self.spam_checker.check_media_file_for_spam(
+                        ReadableFileWrapper(self.clock, fname), file_info
+                    )
+                    if spam_check != synapse.module_api.NOT_SPAM:
+                        logger.info("Blocking media due to spam checker")
+                        # Note that we'll delete the stored media, due to the
+                        # try/except below. The media also won't be stored in
+                        # the DB.
+                        # We currently ignore any additional field returned by
+                        # the spam-check API.
+                        raise SpamMediaException(errcode=spam_check[0])
+
+                    for provider in self.storage_providers:
+                        await provider.store_file(path, file_info)
+
+                    finished_called[0] = True
+
+                yield f, fname, finish
+        except Exception as e:
+            try:
+                os.remove(fname)
+            except Exception:
+                pass
+
+            raise e from None
+
+        if not finished_called:
+            raise Exception("Finished callback not called")
+
+    async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
+        """Attempts to fetch media described by file_info from the local cache
+        and configured storage providers.
+
+        Args:
+            file_info
+
+        Returns:
+            Returns a Responder if the file was found, otherwise None.
+        """
+        paths = [self._file_info_to_path(file_info)]
+
+        # fallback for remote thumbnails with no method in the filename
+        if file_info.thumbnail and file_info.server_name:
+            paths.append(
+                self.filepaths.remote_media_thumbnail_rel_legacy(
+                    server_name=file_info.server_name,
+                    file_id=file_info.file_id,
+                    width=file_info.thumbnail.width,
+                    height=file_info.thumbnail.height,
+                    content_type=file_info.thumbnail.type,
+                )
+            )
+
+        for path in paths:
+            local_path = os.path.join(self.local_media_directory, path)
+            if os.path.exists(local_path):
+                logger.debug("responding with local file %s", local_path)
+                return FileResponder(open(local_path, "rb"))
+            logger.debug("local file %s did not exist", local_path)
+
+        for provider in self.storage_providers:
+            for path in paths:
+                res: Any = await provider.fetch(path, file_info)
+                if res:
+                    logger.debug("Streaming %s from %s", path, provider)
+                    return res
+                logger.debug("%s not found on %s", path, provider)
+
+        return None
+
+    async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str:
+        """Ensures that the given file is in the local cache. Attempts to
+        download it from storage providers if it isn't.
+
+        Args:
+            file_info
+
+        Returns:
+            Full path to local file
+        """
+        path = self._file_info_to_path(file_info)
+        local_path = os.path.join(self.local_media_directory, path)
+        if os.path.exists(local_path):
+            return local_path
+
+        # Fallback for paths without method names
+        # Should be removed in the future
+        if file_info.thumbnail and file_info.server_name:
+            legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
+                server_name=file_info.server_name,
+                file_id=file_info.file_id,
+                width=file_info.thumbnail.width,
+                height=file_info.thumbnail.height,
+                content_type=file_info.thumbnail.type,
+            )
+            legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
+            if os.path.exists(legacy_local_path):
+                return legacy_local_path
+
+        dirname = os.path.dirname(local_path)
+        os.makedirs(dirname, exist_ok=True)
+
+        for provider in self.storage_providers:
+            res: Any = await provider.fetch(path, file_info)
+            if res:
+                with res:
+                    consumer = BackgroundFileConsumer(
+                        open(local_path, "wb"), self.reactor
+                    )
+                    await res.write_to_consumer(consumer)
+                    await consumer.wait()
+                return local_path
+
+        raise NotFoundError()
+
+    def _file_info_to_path(self, file_info: FileInfo) -> str:
+        """Converts file_info into a relative path.
+
+        The path is suitable for storing files under a directory, e.g. used to
+        store files on local FS under the base media repository directory.
+        """
+        if file_info.url_cache:
+            if file_info.thumbnail:
+                return self.filepaths.url_cache_thumbnail_rel(
+                    media_id=file_info.file_id,
+                    width=file_info.thumbnail.width,
+                    height=file_info.thumbnail.height,
+                    content_type=file_info.thumbnail.type,
+                    method=file_info.thumbnail.method,
+                )
+            return self.filepaths.url_cache_filepath_rel(file_info.file_id)
+
+        if file_info.server_name:
+            if file_info.thumbnail:
+                return self.filepaths.remote_media_thumbnail_rel(
+                    server_name=file_info.server_name,
+                    file_id=file_info.file_id,
+                    width=file_info.thumbnail.width,
+                    height=file_info.thumbnail.height,
+                    content_type=file_info.thumbnail.type,
+                    method=file_info.thumbnail.method,
+                )
+            return self.filepaths.remote_media_filepath_rel(
+                file_info.server_name, file_info.file_id
+            )
+
+        if file_info.thumbnail:
+            return self.filepaths.local_media_thumbnail_rel(
+                media_id=file_info.file_id,
+                width=file_info.thumbnail.width,
+                height=file_info.thumbnail.height,
+                content_type=file_info.thumbnail.type,
+                method=file_info.thumbnail.method,
+            )
+        return self.filepaths.local_media_filepath_rel(file_info.file_id)
+
+
+def _write_file_synchronously(source: IO, dest: IO) -> None:
+    """Write `source` to the file like `dest` synchronously. Should be called
+    from a thread.
+
+    Args:
+        source: A file like object that's to be written
+        dest: A file like object to be written to
+    """
+    source.seek(0)  # Ensure we read from the start of the file
+    shutil.copyfileobj(source, dest)
+
+
+class FileResponder(Responder):
+    """Wraps an open file that can be sent to a request.
+
+    Args:
+        open_file: A file like object to be streamed ot the client,
+            is closed when finished streaming.
+    """
+
+    def __init__(self, open_file: IO):
+        self.open_file = open_file
+
+    def write_to_consumer(self, consumer: IConsumer) -> Deferred:
+        return make_deferred_yieldable(
+            FileSender().beginFileTransfer(self.open_file, consumer)
+        )
+
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> None:
+        self.open_file.close()
+
+
+class SpamMediaException(NotFoundError):
+    """The media was blocked by a spam checker, so we simply 404 the request (in
+    the same way as if it was quarantined).
+    """
+
+
+@attr.s(slots=True, auto_attribs=True)
+class ReadableFileWrapper:
+    """Wrapper that allows reading a file in chunks, yielding to the reactor,
+    and writing to a callback.
+
+    This is simplified `FileSender` that takes an IO object rather than an
+    `IConsumer`.
+    """
+
+    CHUNK_SIZE = 2**14
+
+    clock: Clock
+    path: str
+
+    async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None:
+        """Reads the file in chunks and calls the callback with each chunk."""
+
+        with open(self.path, "rb") as file:
+            while True:
+                chunk = file.read(self.CHUNK_SIZE)
+                if not chunk:
+                    break
+
+                callback(chunk)
+
+                # We yield to the reactor by sleeping for 0 seconds.
+                await self.clock.sleep(0)
diff --git a/synapse/media/oembed.py b/synapse/media/oembed.py
new file mode 100644
index 0000000000..c0eaf04be5
--- /dev/null
+++ b/synapse/media/oembed.py
@@ -0,0 +1,265 @@
+#  Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import html
+import logging
+import urllib.parse
+from typing import TYPE_CHECKING, List, Optional
+
+import attr
+
+from synapse.media.preview_html import parse_html_description
+from synapse.types import JsonDict
+from synapse.util import json_decoder
+
+if TYPE_CHECKING:
+    from lxml import etree
+
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class OEmbedResult:
+    # The Open Graph result (converted from the oEmbed result).
+    open_graph_result: JsonDict
+    # The author_name of the oEmbed result
+    author_name: Optional[str]
+    # Number of milliseconds to cache the content, according to the oEmbed response.
+    #
+    # This will be None if no cache-age is provided in the oEmbed response (or
+    # if the oEmbed response cannot be turned into an Open Graph response).
+    cache_age: Optional[int]
+
+
+class OEmbedProvider:
+    """
+    A helper for accessing oEmbed content.
+
+    It can be used to check if a URL should be accessed via oEmbed and for
+    requesting/parsing oEmbed content.
+    """
+
+    def __init__(self, hs: "HomeServer"):
+        self._oembed_patterns = {}
+        for oembed_endpoint in hs.config.oembed.oembed_patterns:
+            api_endpoint = oembed_endpoint.api_endpoint
+
+            # Only JSON is supported at the moment. This could be declared in
+            # the formats field. Otherwise, if the endpoint ends in .xml assume
+            # it doesn't support JSON.
+            if (
+                oembed_endpoint.formats is not None
+                and "json" not in oembed_endpoint.formats
+            ) or api_endpoint.endswith(".xml"):
+                logger.info(
+                    "Ignoring oEmbed endpoint due to not supporting JSON: %s",
+                    api_endpoint,
+                )
+                continue
+
+            # Iterate through each URL pattern and point it to the endpoint.
+            for pattern in oembed_endpoint.url_patterns:
+                self._oembed_patterns[pattern] = api_endpoint
+
+    def get_oembed_url(self, url: str) -> Optional[str]:
+        """
+        Check whether the URL should be downloaded as oEmbed content instead.
+
+        Args:
+            url: The URL to check.
+
+        Returns:
+            A URL to use instead or None if the original URL should be used.
+        """
+        for url_pattern, endpoint in self._oembed_patterns.items():
+            if url_pattern.fullmatch(url):
+                # TODO Specify max height / width.
+
+                # Note that only the JSON format is supported, some endpoints want
+                # this in the URL, others want it as an argument.
+                endpoint = endpoint.replace("{format}", "json")
+
+                args = {"url": url, "format": "json"}
+                query_str = urllib.parse.urlencode(args, True)
+                return f"{endpoint}?{query_str}"
+
+        # No match.
+        return None
+
+    def autodiscover_from_html(self, tree: "etree.Element") -> Optional[str]:
+        """
+        Search an HTML document for oEmbed autodiscovery information.
+
+        Args:
+            tree: The parsed HTML body.
+
+        Returns:
+            The URL to use for oEmbed information, or None if no URL was found.
+        """
+        # Search for link elements with the proper rel and type attributes.
+        for tag in tree.xpath(
+            "//link[@rel='alternate'][@type='application/json+oembed']"
+        ):
+            if "href" in tag.attrib:
+                return tag.attrib["href"]
+
+        # Some providers (e.g. Flickr) use alternative instead of alternate.
+        for tag in tree.xpath(
+            "//link[@rel='alternative'][@type='application/json+oembed']"
+        ):
+            if "href" in tag.attrib:
+                return tag.attrib["href"]
+
+        return None
+
+    def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult:
+        """
+        Parse the oEmbed response into an Open Graph response.
+
+        Args:
+            url: The URL which is being previewed (not the one which was
+                requested).
+            raw_body: The oEmbed response as JSON encoded as bytes.
+
+        Returns:
+            json-encoded Open Graph data
+        """
+
+        try:
+            # oEmbed responses *must* be UTF-8 according to the spec.
+            oembed = json_decoder.decode(raw_body.decode("utf-8"))
+        except ValueError:
+            return OEmbedResult({}, None, None)
+
+        # The version is a required string field, but not always provided,
+        # or sometimes provided as a float. Be lenient.
+        oembed_version = oembed.get("version", "1.0")
+        if oembed_version != "1.0" and oembed_version != 1:
+            return OEmbedResult({}, None, None)
+
+        # Attempt to parse the cache age, if possible.
+        try:
+            cache_age = int(oembed.get("cache_age")) * 1000
+        except (TypeError, ValueError):
+            # If the cache age cannot be parsed (e.g. wrong type or invalid
+            # string), ignore it.
+            cache_age = None
+
+        # The oEmbed response converted to Open Graph.
+        open_graph_response: JsonDict = {"og:url": url}
+
+        title = oembed.get("title")
+        if title and isinstance(title, str):
+            # A common WordPress plug-in seems to incorrectly escape entities
+            # in the oEmbed response.
+            open_graph_response["og:title"] = html.unescape(title)
+
+        author_name = oembed.get("author_name")
+        if not isinstance(author_name, str):
+            author_name = None
+
+        # Use the provider name and as the site.
+        provider_name = oembed.get("provider_name")
+        if provider_name and isinstance(provider_name, str):
+            open_graph_response["og:site_name"] = provider_name
+
+        # If a thumbnail exists, use it. Note that dimensions will be calculated later.
+        thumbnail_url = oembed.get("thumbnail_url")
+        if thumbnail_url and isinstance(thumbnail_url, str):
+            open_graph_response["og:image"] = thumbnail_url
+
+        # Process each type separately.
+        oembed_type = oembed.get("type")
+        if oembed_type == "rich":
+            html_str = oembed.get("html")
+            if isinstance(html_str, str):
+                calc_description_and_urls(open_graph_response, html_str)
+
+        elif oembed_type == "photo":
+            # If this is a photo, use the full image, not the thumbnail.
+            url = oembed.get("url")
+            if url and isinstance(url, str):
+                open_graph_response["og:image"] = url
+
+        elif oembed_type == "video":
+            open_graph_response["og:type"] = "video.other"
+            html_str = oembed.get("html")
+            if html_str and isinstance(html_str, str):
+                calc_description_and_urls(open_graph_response, oembed["html"])
+            for size in ("width", "height"):
+                val = oembed.get(size)
+                if type(val) is int:
+                    open_graph_response[f"og:video:{size}"] = val
+
+        elif oembed_type == "link":
+            open_graph_response["og:type"] = "website"
+
+        else:
+            logger.warning("Unknown oEmbed type: %s", oembed_type)
+
+        return OEmbedResult(open_graph_response, author_name, cache_age)
+
+
+def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
+    results = []
+    for tag in tree.xpath("//*/" + tag_name):
+        if "src" in tag.attrib:
+            results.append(tag.attrib["src"])
+    return results
+
+
+def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None:
+    """
+    Calculate description for an HTML document.
+
+    This uses lxml to convert the HTML document into plaintext. If errors
+    occur during processing of the document, an empty response is returned.
+
+    Args:
+        open_graph_response: The current Open Graph summary. This is updated with additional fields.
+        html_body: The HTML document, as bytes.
+
+    Returns:
+        The summary
+    """
+    # If there's no body, nothing useful is going to be found.
+    if not html_body:
+        return
+
+    from lxml import etree
+
+    # Create an HTML parser. If this fails, log and return no metadata.
+    parser = etree.HTMLParser(recover=True, encoding="utf-8")
+
+    # Attempt to parse the body. If this fails, log and return no metadata.
+    tree = etree.fromstring(html_body, parser)
+
+    # The data was successfully parsed, but no tree was found.
+    if tree is None:
+        return
+
+    # Attempt to find interesting URLs (images, videos, embeds).
+    if "og:image" not in open_graph_response:
+        image_urls = _fetch_urls(tree, "img")
+        if image_urls:
+            open_graph_response["og:image"] = image_urls[0]
+
+    video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed")
+    if video_urls:
+        open_graph_response["og:video"] = video_urls[0]
+
+    description = parse_html_description(tree)
+    if description:
+        open_graph_response["og:description"] = description
diff --git a/synapse/media/preview_html.py b/synapse/media/preview_html.py
new file mode 100644
index 0000000000..516d0434f0
--- /dev/null
+++ b/synapse/media/preview_html.py
@@ -0,0 +1,501 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import codecs
+import logging
+import re
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    Set,
+    Union,
+)
+
+if TYPE_CHECKING:
+    from lxml import etree
+
+logger = logging.getLogger(__name__)
+
+_charset_match = re.compile(
+    rb'<\s*meta[^>]*charset\s*=\s*"?([a-z0-9_-]+)"?', flags=re.I
+)
+_xml_encoding_match = re.compile(
+    rb'\s*<\s*\?\s*xml[^>]*encoding="([a-z0-9_-]+)"', flags=re.I
+)
+_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
+
+# Certain elements aren't meant for display.
+ARIA_ROLES_TO_IGNORE = {"directory", "menu", "menubar", "toolbar"}
+
+
+def _normalise_encoding(encoding: str) -> Optional[str]:
+    """Use the Python codec's name as the normalised entry."""
+    try:
+        return codecs.lookup(encoding).name
+    except LookupError:
+        return None
+
+
+def _get_html_media_encodings(
+    body: bytes, content_type: Optional[str]
+) -> Iterable[str]:
+    """
+    Get potential encoding of the body based on the (presumably) HTML body or the content-type header.
+
+    The precedence used for finding a character encoding is:
+
+    1. <meta> tag with a charset declared.
+    2. The XML document's character encoding attribute.
+    3. The Content-Type header.
+    4. Fallback to utf-8.
+    5. Fallback to windows-1252.
+
+    This roughly follows the algorithm used by BeautifulSoup's bs4.dammit.EncodingDetector.
+
+    Args:
+        body: The HTML document, as bytes.
+        content_type: The Content-Type header.
+
+    Returns:
+        The character encoding of the body, as a string.
+    """
+    # There's no point in returning an encoding more than once.
+    attempted_encodings: Set[str] = set()
+
+    # Limit searches to the first 1kb, since it ought to be at the top.
+    body_start = body[:1024]
+
+    # Check if it has an encoding set in a meta tag.
+    match = _charset_match.search(body_start)
+    if match:
+        encoding = _normalise_encoding(match.group(1).decode("ascii"))
+        if encoding:
+            attempted_encodings.add(encoding)
+            yield encoding
+
+    # TODO Support <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+
+    # Check if it has an XML document with an encoding.
+    match = _xml_encoding_match.match(body_start)
+    if match:
+        encoding = _normalise_encoding(match.group(1).decode("ascii"))
+        if encoding and encoding not in attempted_encodings:
+            attempted_encodings.add(encoding)
+            yield encoding
+
+    # Check the HTTP Content-Type header for a character set.
+    if content_type:
+        content_match = _content_type_match.match(content_type)
+        if content_match:
+            encoding = _normalise_encoding(content_match.group(1))
+            if encoding and encoding not in attempted_encodings:
+                attempted_encodings.add(encoding)
+                yield encoding
+
+    # Finally, fallback to UTF-8, then windows-1252.
+    for fallback in ("utf-8", "cp1252"):
+        if fallback not in attempted_encodings:
+            yield fallback
+
+
+def decode_body(
+    body: bytes, uri: str, content_type: Optional[str] = None
+) -> Optional["etree.Element"]:
+    """
+    This uses lxml to parse the HTML document.
+
+    Args:
+        body: The HTML document, as bytes.
+        uri: The URI used to download the body.
+        content_type: The Content-Type header.
+
+    Returns:
+        The parsed HTML body, or None if an error occurred during processed.
+    """
+    # If there's no body, nothing useful is going to be found.
+    if not body:
+        return None
+
+    # The idea here is that multiple encodings are tried until one works.
+    # Unfortunately the result is never used and then LXML will decode the string
+    # again with the found encoding.
+    for encoding in _get_html_media_encodings(body, content_type):
+        try:
+            body.decode(encoding)
+        except Exception:
+            pass
+        else:
+            break
+    else:
+        logger.warning("Unable to decode HTML body for %s", uri)
+        return None
+
+    from lxml import etree
+
+    # Create an HTML parser.
+    parser = etree.HTMLParser(recover=True, encoding=encoding)
+
+    # Attempt to parse the body. Returns None if the body was successfully
+    # parsed, but no tree was found.
+    return etree.fromstring(body, parser)
+
+
+def _get_meta_tags(
+    tree: "etree.Element",
+    property: str,
+    prefix: str,
+    property_mapper: Optional[Callable[[str], Optional[str]]] = None,
+) -> Dict[str, Optional[str]]:
+    """
+    Search for meta tags prefixed with a particular string.
+
+    Args:
+        tree: The parsed HTML document.
+        property: The name of the property which contains the tag name, e.g.
+            "property" for Open Graph.
+        prefix: The prefix on the property to search for, e.g. "og" for Open Graph.
+        property_mapper: An optional callable to map the property to the Open Graph
+            form. Can return None for a key to ignore that key.
+
+    Returns:
+        A map of tag name to value.
+    """
+    results: Dict[str, Optional[str]] = {}
+    for tag in tree.xpath(
+        f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]"
+    ):
+        # if we've got more than 50 tags, someone is taking the piss
+        if len(results) >= 50:
+            logger.warning(
+                "Skipping parsing of Open Graph for page with too many '%s:' tags",
+                prefix,
+            )
+            return {}
+
+        key = tag.attrib[property]
+        if property_mapper:
+            key = property_mapper(key)
+            # None is a special value used to ignore a value.
+            if key is None:
+                continue
+
+        results[key] = tag.attrib["content"]
+
+    return results
+
+
+def _map_twitter_to_open_graph(key: str) -> Optional[str]:
+    """
+    Map a Twitter card property to the analogous Open Graph property.
+
+    Args:
+        key: The Twitter card property (starts with "twitter:").
+
+    Returns:
+        The Open Graph property (starts with "og:") or None to have this property
+        be ignored.
+    """
+    # Twitter card properties with no analogous Open Graph property.
+    if key == "twitter:card" or key == "twitter:creator":
+        return None
+    if key == "twitter:site":
+        return "og:site_name"
+    # Otherwise, swap twitter to og.
+    return "og" + key[7:]
+
+
+def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
+    """
+    Parse the HTML document into an Open Graph response.
+
+    This uses lxml to search the HTML document for Open Graph data (or
+    synthesizes it from the document).
+
+    Args:
+        tree: The parsed HTML document.
+
+    Returns:
+        The Open Graph response as a dictionary.
+    """
+
+    # Search for Open Graph (og:) meta tags, e.g.:
+    #
+    # "og:type"         : "video",
+    # "og:url"          : "https://www.youtube.com/watch?v=LXDBoHyjmtw",
+    # "og:site_name"    : "YouTube",
+    # "og:video:type"   : "application/x-shockwave-flash",
+    # "og:description"  : "Fun stuff happening here",
+    # "og:title"        : "RemoteJam - Matrix team hack for Disrupt Europe Hackathon",
+    # "og:image"        : "https://i.ytimg.com/vi/LXDBoHyjmtw/maxresdefault.jpg",
+    # "og:video:url"    : "http://www.youtube.com/v/LXDBoHyjmtw?version=3&autohide=1",
+    # "og:video:width"  : "1280"
+    # "og:video:height" : "720",
+    # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3",
+
+    og = _get_meta_tags(tree, "property", "og")
+
+    # TODO: Search for properties specific to the different Open Graph types,
+    # such as article: meta tags, e.g.:
+    #
+    # "article:publisher" : "https://www.facebook.com/thethudonline" />
+    # "article:author" content="https://www.facebook.com/thethudonline" />
+    # "article:tag" content="baby" />
+    # "article:section" content="Breaking News" />
+    # "article:published_time" content="2016-03-31T19:58:24+00:00" />
+    # "article:modified_time" content="2016-04-01T18:31:53+00:00" />
+
+    # Search for Twitter Card (twitter:) meta tags, e.g.:
+    #
+    # "twitter:site"    : "@matrixdotorg"
+    # "twitter:creator" : "@matrixdotorg"
+    #
+    # Twitter cards tags also duplicate Open Graph tags.
+    #
+    # See https://developer.twitter.com/en/docs/twitter-for-websites/cards/guides/getting-started
+    twitter = _get_meta_tags(tree, "name", "twitter", _map_twitter_to_open_graph)
+    # Merge the Twitter values with the Open Graph values, but do not overwrite
+    # information from Open Graph tags.
+    for key, value in twitter.items():
+        if key not in og:
+            og[key] = value
+
+    if "og:title" not in og:
+        # Attempt to find a title from the title tag, or the biggest header on the page.
+        title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()")
+        if title:
+            og["og:title"] = title[0].strip()
+        else:
+            og["og:title"] = None
+
+    if "og:image" not in og:
+        meta_image = tree.xpath(
+            "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]"
+        )
+        # If a meta image is found, use it.
+        if meta_image:
+            og["og:image"] = meta_image[0]
+        else:
+            # Try to find images which are larger than 10px by 10px.
+            #
+            # TODO: consider inlined CSS styles as well as width & height attribs
+            images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]")
+            images = sorted(
+                images,
+                key=lambda i: (
+                    -1 * float(i.attrib["width"]) * float(i.attrib["height"])
+                ),
+            )
+            # If no images were found, try to find *any* images.
+            if not images:
+                images = tree.xpath("//img[@src][1]")
+            if images:
+                og["og:image"] = images[0].attrib["src"]
+
+            # Finally, fallback to the favicon if nothing else.
+            else:
+                favicons = tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]")
+                if favicons:
+                    og["og:image"] = favicons[0]
+
+    if "og:description" not in og:
+        # Check the first meta description tag for content.
+        meta_description = tree.xpath(
+            "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]"
+        )
+        # If a meta description is found with content, use it.
+        if meta_description:
+            og["og:description"] = meta_description[0]
+        else:
+            og["og:description"] = parse_html_description(tree)
+    elif og["og:description"]:
+        # This must be a non-empty string at this point.
+        assert isinstance(og["og:description"], str)
+        og["og:description"] = summarize_paragraphs([og["og:description"]])
+
+    # TODO: delete the url downloads to stop diskfilling,
+    # as we only ever cared about its OG
+    return og
+
+
+def parse_html_description(tree: "etree.Element") -> Optional[str]:
+    """
+    Calculate a text description based on an HTML document.
+
+    Grabs any text nodes which are inside the <body/> tag, unless they are within
+    an HTML5 semantic markup tag (<header/>, <nav/>, <aside/>, <footer/>), or
+    if they are within a <script/>, <svg/> or <style/> tag, or if they are within
+    a tag whose content is usually only shown to old browsers
+    (<iframe/>, <video/>, <canvas/>, <picture/>).
+
+    This is a very very very coarse approximation to a plain text render of the page.
+
+    Args:
+        tree: The parsed HTML document.
+
+    Returns:
+        The plain text description, or None if one cannot be generated.
+    """
+    # We don't just use XPATH here as that is slow on some machines.
+
+    from lxml import etree
+
+    TAGS_TO_REMOVE = {
+        "header",
+        "nav",
+        "aside",
+        "footer",
+        "script",
+        "noscript",
+        "style",
+        "svg",
+        "iframe",
+        "video",
+        "canvas",
+        "img",
+        "picture",
+        etree.Comment,
+    }
+
+    # Split all the text nodes into paragraphs (by splitting on new
+    # lines)
+    text_nodes = (
+        re.sub(r"\s+", "\n", el).strip()
+        for el in _iterate_over_text(tree.find("body"), TAGS_TO_REMOVE)
+    )
+    return summarize_paragraphs(text_nodes)
+
+
+def _iterate_over_text(
+    tree: Optional["etree.Element"],
+    tags_to_ignore: Set[Union[str, "etree.Comment"]],
+    stack_limit: int = 1024,
+) -> Generator[str, None, None]:
+    """Iterate over the tree returning text nodes in a depth first fashion,
+    skipping text nodes inside certain tags.
+
+    Args:
+        tree: The parent element to iterate. Can be None if there isn't one.
+        tags_to_ignore: Set of tags to ignore
+        stack_limit: Maximum stack size limit for depth-first traversal.
+            Nodes will be dropped if this limit is hit, which may truncate the
+            textual result.
+            Intended to limit the maximum working memory when generating a preview.
+    """
+
+    if tree is None:
+        return
+
+    # This is a stack whose items are elements to iterate over *or* strings
+    # to be returned.
+    elements: List[Union[str, "etree.Element"]] = [tree]
+    while elements:
+        el = elements.pop()
+
+        if isinstance(el, str):
+            yield el
+        elif el.tag not in tags_to_ignore:
+            # If the element isn't meant for display, ignore it.
+            if el.get("role") in ARIA_ROLES_TO_IGNORE:
+                continue
+
+            # el.text is the text before the first child, so we can immediately
+            # return it if the text exists.
+            if el.text:
+                yield el.text
+
+            # We add to the stack all the element's children, interspersed with
+            # each child's tail text (if it exists).
+            #
+            # We iterate in reverse order so that earlier pieces of text appear
+            # closer to the top of the stack.
+            for child in el.iterchildren(reversed=True):
+                if len(elements) > stack_limit:
+                    # We've hit our limit for working memory
+                    break
+
+                if child.tail:
+                    # The tail text of a node is text that comes *after* the node,
+                    # so we always include it even if we ignore the child node.
+                    elements.append(child.tail)
+
+                elements.append(child)
+
+
+def summarize_paragraphs(
+    text_nodes: Iterable[str], min_size: int = 200, max_size: int = 500
+) -> Optional[str]:
+    """
+    Try to get a summary respecting first paragraph and then word boundaries.
+
+    Args:
+        text_nodes: The paragraphs to summarize.
+        min_size: The minimum number of words to include.
+        max_size: The maximum number of words to include.
+
+    Returns:
+        A summary of the text nodes, or None if that was not possible.
+    """
+
+    # TODO: Respect sentences?
+
+    description = ""
+
+    # Keep adding paragraphs until we get to the MIN_SIZE.
+    for text_node in text_nodes:
+        if len(description) < min_size:
+            text_node = re.sub(r"[\t \r\n]+", " ", text_node)
+            description += text_node + "\n\n"
+        else:
+            break
+
+    description = description.strip()
+    description = re.sub(r"[\t ]+", " ", description)
+    description = re.sub(r"[\t \r\n]*[\r\n]+", "\n\n", description)
+
+    # If the concatenation of paragraphs to get above MIN_SIZE
+    # took us over MAX_SIZE, then we need to truncate mid paragraph
+    if len(description) > max_size:
+        new_desc = ""
+
+        # This splits the paragraph into words, but keeping the
+        # (preceding) whitespace intact so we can easily concat
+        # words back together.
+        for match in re.finditer(r"\s*\S+", description):
+            word = match.group()
+
+            # Keep adding words while the total length is less than
+            # MAX_SIZE.
+            if len(word) + len(new_desc) < max_size:
+                new_desc += word
+            else:
+                # At this point the next word *will* take us over
+                # MAX_SIZE, but we also want to ensure that its not
+                # a huge word. If it is add it anyway and we'll
+                # truncate later.
+                if len(new_desc) < min_size:
+                    new_desc += word
+                break
+
+        # Double check that we're not over the limit
+        if len(new_desc) > max_size:
+            new_desc = new_desc[:max_size]
+
+        # We always add an ellipsis because at the very least
+        # we chopped mid paragraph.
+        description = new_desc.strip() + "…"
+    return description if description else None
diff --git a/synapse/media/storage_provider.py b/synapse/media/storage_provider.py
new file mode 100644
index 0000000000..1c9b71d69c
--- /dev/null
+++ b/synapse/media/storage_provider.py
@@ -0,0 +1,181 @@
+# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+import os
+import shutil
+from typing import TYPE_CHECKING, Callable, Optional
+
+from synapse.config._base import Config
+from synapse.logging.context import defer_to_thread, run_in_background
+from synapse.util.async_helpers import maybe_awaitable
+
+from ._base import FileInfo, Responder
+from .media_storage import FileResponder
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+class StorageProvider(metaclass=abc.ABCMeta):
+    """A storage provider is a service that can store uploaded media and
+    retrieve them.
+    """
+
+    @abc.abstractmethod
+    async def store_file(self, path: str, file_info: FileInfo) -> None:
+        """Store the file described by file_info. The actual contents can be
+        retrieved by reading the file in file_info.upload_path.
+
+        Args:
+            path: Relative path of file in local cache
+            file_info: The metadata of the file.
+        """
+
+    @abc.abstractmethod
+    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
+        """Attempt to fetch the file described by file_info and stream it
+        into writer.
+
+        Args:
+            path: Relative path of file in local cache
+            file_info: The metadata of the file.
+
+        Returns:
+            Returns a Responder if the provider has the file, otherwise returns None.
+        """
+
+
+class StorageProviderWrapper(StorageProvider):
+    """Wraps a storage provider and provides various config options
+
+    Args:
+        backend: The storage provider to wrap.
+        store_local: Whether to store new local files or not.
+        store_synchronous: Whether to wait for file to be successfully
+            uploaded, or todo the upload in the background.
+        store_remote: Whether remote media should be uploaded
+    """
+
+    def __init__(
+        self,
+        backend: StorageProvider,
+        store_local: bool,
+        store_synchronous: bool,
+        store_remote: bool,
+    ):
+        self.backend = backend
+        self.store_local = store_local
+        self.store_synchronous = store_synchronous
+        self.store_remote = store_remote
+
+    def __str__(self) -> str:
+        return "StorageProviderWrapper[%s]" % (self.backend,)
+
+    async def store_file(self, path: str, file_info: FileInfo) -> None:
+        if not file_info.server_name and not self.store_local:
+            return None
+
+        if file_info.server_name and not self.store_remote:
+            return None
+
+        if file_info.url_cache:
+            # The URL preview cache is short lived and not worth offloading or
+            # backing up.
+            return None
+
+        if self.store_synchronous:
+            # store_file is supposed to return an Awaitable, but guard
+            # against improper implementations.
+            await maybe_awaitable(self.backend.store_file(path, file_info))  # type: ignore
+        else:
+            # TODO: Handle errors.
+            async def store() -> None:
+                try:
+                    return await maybe_awaitable(
+                        self.backend.store_file(path, file_info)
+                    )
+                except Exception:
+                    logger.exception("Error storing file")
+
+            run_in_background(store)
+
+    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
+        if file_info.url_cache:
+            # Files in the URL preview cache definitely aren't stored here,
+            # so avoid any potentially slow I/O or network access.
+            return None
+
+        # store_file is supposed to return an Awaitable, but guard
+        # against improper implementations.
+        return await maybe_awaitable(self.backend.fetch(path, file_info))
+
+
+class FileStorageProviderBackend(StorageProvider):
+    """A storage provider that stores files in a directory on a filesystem.
+
+    Args:
+        hs
+        config: The config returned by `parse_config`.
+    """
+
+    def __init__(self, hs: "HomeServer", config: str):
+        self.hs = hs
+        self.cache_directory = hs.config.media.media_store_path
+        self.base_directory = config
+
+    def __str__(self) -> str:
+        return "FileStorageProviderBackend[%s]" % (self.base_directory,)
+
+    async def store_file(self, path: str, file_info: FileInfo) -> None:
+        """See StorageProvider.store_file"""
+
+        primary_fname = os.path.join(self.cache_directory, path)
+        backup_fname = os.path.join(self.base_directory, path)
+
+        dirname = os.path.dirname(backup_fname)
+        os.makedirs(dirname, exist_ok=True)
+
+        # mypy needs help inferring the type of the second parameter, which is generic
+        shutil_copyfile: Callable[[str, str], str] = shutil.copyfile
+        await defer_to_thread(
+            self.hs.get_reactor(),
+            shutil_copyfile,
+            primary_fname,
+            backup_fname,
+        )
+
+    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
+        """See StorageProvider.fetch"""
+
+        backup_fname = os.path.join(self.base_directory, path)
+        if os.path.isfile(backup_fname):
+            return FileResponder(open(backup_fname, "rb"))
+
+        return None
+
+    @staticmethod
+    def parse_config(config: dict) -> str:
+        """Called on startup to parse config supplied. This should parse
+        the config and raise if there is a problem.
+
+        The returned value is passed into the constructor.
+
+        In this case we only care about a single param, the directory, so let's
+        just pull that out.
+        """
+        return Config.ensure_directory(config["directory"])
diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py
new file mode 100644
index 0000000000..f909a4fb9a
--- /dev/null
+++ b/synapse/media/thumbnailer.py
@@ -0,0 +1,221 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from io import BytesIO
+from types import TracebackType
+from typing import Optional, Tuple, Type
+
+from PIL import Image
+
+logger = logging.getLogger(__name__)
+
+EXIF_ORIENTATION_TAG = 0x0112
+EXIF_TRANSPOSE_MAPPINGS = {
+    2: Image.FLIP_LEFT_RIGHT,
+    3: Image.ROTATE_180,
+    4: Image.FLIP_TOP_BOTTOM,
+    5: Image.TRANSPOSE,
+    6: Image.ROTATE_270,
+    7: Image.TRANSVERSE,
+    8: Image.ROTATE_90,
+}
+
+
+class ThumbnailError(Exception):
+    """An error occurred generating a thumbnail."""
+
+
+class Thumbnailer:
+    FORMATS = {"image/jpeg": "JPEG", "image/png": "PNG"}
+
+    @staticmethod
+    def set_limits(max_image_pixels: int) -> None:
+        Image.MAX_IMAGE_PIXELS = max_image_pixels
+
+    def __init__(self, input_path: str):
+        # Have we closed the image?
+        self._closed = False
+
+        try:
+            self.image = Image.open(input_path)
+        except OSError as e:
+            # If an error occurs opening the image, a thumbnail won't be able to
+            # be generated.
+            raise ThumbnailError from e
+        except Image.DecompressionBombError as e:
+            # If an image decompression bomb error occurs opening the image,
+            # then the image exceeds the pixel limit and a thumbnail won't
+            # be able to be generated.
+            raise ThumbnailError from e
+
+        self.width, self.height = self.image.size
+        self.transpose_method = None
+        try:
+            # We don't use ImageOps.exif_transpose since it crashes with big EXIF
+            #
+            # Ignore safety: Pillow seems to acknowledge that this method is
+            # "private, experimental, but generally widely used". Pillow 6
+            # includes a public getexif() method (no underscore) that we might
+            # consider using instead when we can bump that dependency.
+            #
+            # At the time of writing, Debian buster (currently oldstable)
+            # provides version 5.4.1. It's expected to EOL in mid-2022, see
+            # https://wiki.debian.org/DebianReleases#Production_Releases
+            image_exif = self.image._getexif()  # type: ignore
+            if image_exif is not None:
+                image_orientation = image_exif.get(EXIF_ORIENTATION_TAG)
+                assert type(image_orientation) is int
+                self.transpose_method = EXIF_TRANSPOSE_MAPPINGS.get(image_orientation)
+        except Exception as e:
+            # A lot of parsing errors can happen when parsing EXIF
+            logger.info("Error parsing image EXIF information: %s", e)
+
+    def transpose(self) -> Tuple[int, int]:
+        """Transpose the image using its EXIF Orientation tag
+
+        Returns:
+            A tuple containing the new image size in pixels as (width, height).
+        """
+        if self.transpose_method is not None:
+            # Safety: `transpose` takes an int rather than e.g. an IntEnum.
+            # self.transpose_method is set above to be a value in
+            # EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values.
+            with self.image:
+                self.image = self.image.transpose(self.transpose_method)  # type: ignore[arg-type]
+            self.width, self.height = self.image.size
+            self.transpose_method = None
+            # We don't need EXIF any more
+            self.image.info["exif"] = None
+        return self.image.size
+
+    def aspect(self, max_width: int, max_height: int) -> Tuple[int, int]:
+        """Calculate the largest size that preserves aspect ratio which
+        fits within the given rectangle::
+
+            (w_in / h_in) = (w_out / h_out)
+            w_out = max(min(w_max, h_max * (w_in / h_in)), 1)
+            h_out = max(min(h_max, w_max * (h_in / w_in)), 1)
+
+        Args:
+            max_width: The largest possible width.
+            max_height: The largest possible height.
+        """
+
+        if max_width * self.height < max_height * self.width:
+            return max_width, max((max_width * self.height) // self.width, 1)
+        else:
+            return max((max_height * self.width) // self.height, 1), max_height
+
+    def _resize(self, width: int, height: int) -> Image.Image:
+        # 1-bit or 8-bit color palette images need converting to RGB
+        # otherwise they will be scaled using nearest neighbour which
+        # looks awful.
+        #
+        # If the image has transparency, use RGBA instead.
+        if self.image.mode in ["1", "L", "P"]:
+            if self.image.info.get("transparency", None) is not None:
+                with self.image:
+                    self.image = self.image.convert("RGBA")
+            else:
+                with self.image:
+                    self.image = self.image.convert("RGB")
+        return self.image.resize((width, height), Image.ANTIALIAS)
+
+    def scale(self, width: int, height: int, output_type: str) -> BytesIO:
+        """Rescales the image to the given dimensions.
+
+        Returns:
+            The bytes of the encoded image ready to be written to disk
+        """
+        with self._resize(width, height) as scaled:
+            return self._encode_image(scaled, output_type)
+
+    def crop(self, width: int, height: int, output_type: str) -> BytesIO:
+        """Rescales and crops the image to the given dimensions preserving
+        aspect::
+            (w_in / h_in) = (w_scaled / h_scaled)
+            w_scaled = max(w_out, h_out * (w_in / h_in))
+            h_scaled = max(h_out, w_out * (h_in / w_in))
+
+        Args:
+            max_width: The largest possible width.
+            max_height: The largest possible height.
+
+        Returns:
+            The bytes of the encoded image ready to be written to disk
+        """
+        if width * self.height > height * self.width:
+            scaled_width = width
+            scaled_height = (width * self.height) // self.width
+            crop_top = (scaled_height - height) // 2
+            crop_bottom = height + crop_top
+            crop = (0, crop_top, width, crop_bottom)
+        else:
+            scaled_width = (height * self.width) // self.height
+            scaled_height = height
+            crop_left = (scaled_width - width) // 2
+            crop_right = width + crop_left
+            crop = (crop_left, 0, crop_right, height)
+
+        with self._resize(scaled_width, scaled_height) as scaled_image:
+            with scaled_image.crop(crop) as cropped:
+                return self._encode_image(cropped, output_type)
+
+    def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO:
+        output_bytes_io = BytesIO()
+        fmt = self.FORMATS[output_type]
+        if fmt == "JPEG":
+            output_image = output_image.convert("RGB")
+        output_image.save(output_bytes_io, fmt, quality=80)
+        return output_bytes_io
+
+    def close(self) -> None:
+        """Closes the underlying image file.
+
+        Once closed no other functions can be called.
+
+        Can be called multiple times.
+        """
+
+        if self._closed:
+            return
+
+        self._closed = True
+
+        # Since we run this on the finalizer then we need to handle `__init__`
+        # raising an exception before it can define `self.image`.
+        image = getattr(self, "image", None)
+        if image is None:
+            return
+
+        image.close()
+
+    def __enter__(self) -> "Thumbnailer":
+        """Make `Thumbnailer` a context manager that calls `close` on
+        `__exit__`.
+        """
+        return self
+
+    def __exit__(
+        self,
+        type: Optional[Type[BaseException]],
+        value: Optional[BaseException],
+        traceback: Optional[TracebackType],
+    ) -> None:
+        self.close()
+
+    def __del__(self) -> None:
+        # Make sure we actually do close the image, rather than leak data.
+        self.close()
diff --git a/synapse/rest/media/config_resource.py b/synapse/rest/media/config_resource.py
new file mode 100644
index 0000000000..a95804d327
--- /dev/null
+++ b/synapse/rest/media/config_resource.py
@@ -0,0 +1,41 @@
+# Copyright 2018 Will Hunt <will@half-shot.uk>
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import TYPE_CHECKING
+
+from synapse.http.server import DirectServeJsonResource, respond_with_json
+from synapse.http.site import SynapseRequest
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+class MediaConfigResource(DirectServeJsonResource):
+    isLeaf = True
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        config = hs.config
+        self.clock = hs.get_clock()
+        self.auth = hs.get_auth()
+        self.limits_dict = {"m.upload.size": config.media.max_upload_size}
+
+    async def _async_render_GET(self, request: SynapseRequest) -> None:
+        await self.auth.get_user_by_req(request)
+        respond_with_json(request, 200, self.limits_dict, send_cors=True)
+
+    async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
+        respond_with_json(request, 200, {}, send_cors=True)
diff --git a/synapse/rest/media/download_resource.py b/synapse/rest/media/download_resource.py
new file mode 100644
index 0000000000..8f270cf4cc
--- /dev/null
+++ b/synapse/rest/media/download_resource.py
@@ -0,0 +1,75 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING
+
+from synapse.http.server import (
+    DirectServeJsonResource,
+    set_corp_headers,
+    set_cors_headers,
+)
+from synapse.http.servlet import parse_boolean
+from synapse.http.site import SynapseRequest
+from synapse.media._base import parse_media_id, respond_404
+
+if TYPE_CHECKING:
+    from synapse.media.media_repository import MediaRepository
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class DownloadResource(DirectServeJsonResource):
+    isLeaf = True
+
+    def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"):
+        super().__init__()
+        self.media_repo = media_repo
+        self.server_name = hs.hostname
+
+    async def _async_render_GET(self, request: SynapseRequest) -> None:
+        set_cors_headers(request)
+        set_corp_headers(request)
+        request.setHeader(
+            b"Content-Security-Policy",
+            b"sandbox;"
+            b" default-src 'none';"
+            b" script-src 'none';"
+            b" plugin-types application/pdf;"
+            b" style-src 'unsafe-inline';"
+            b" media-src 'self';"
+            b" object-src 'self';",
+        )
+        # Limited non-standard form of CSP for IE11
+        request.setHeader(b"X-Content-Security-Policy", b"sandbox;")
+        request.setHeader(
+            b"Referrer-Policy",
+            b"no-referrer",
+        )
+        server_name, media_id, name = parse_media_id(request)
+        if server_name == self.server_name:
+            await self.media_repo.get_local_media(request, media_id, name)
+        else:
+            allow_remote = parse_boolean(request, "allow_remote", default=True)
+            if not allow_remote:
+                logger.info(
+                    "Rejecting request for remote media %s/%s due to allow_remote",
+                    server_name,
+                    media_id,
+                )
+                respond_404(request)
+                return
+
+            await self.media_repo.get_remote_media(request, server_name, media_id, name)
diff --git a/synapse/rest/media/media_repository_resource.py b/synapse/rest/media/media_repository_resource.py
new file mode 100644
index 0000000000..5ebaa3b032
--- /dev/null
+++ b/synapse/rest/media/media_repository_resource.py
@@ -0,0 +1,93 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+
+from synapse.config._base import ConfigError
+from synapse.http.server import UnrecognizedRequestResource
+
+from .config_resource import MediaConfigResource
+from .download_resource import DownloadResource
+from .preview_url_resource import PreviewUrlResource
+from .thumbnail_resource import ThumbnailResource
+from .upload_resource import UploadResource
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+class MediaRepositoryResource(UnrecognizedRequestResource):
+    """File uploading and downloading.
+
+    Uploads are POSTed to a resource which returns a token which is used to GET
+    the download::
+
+        => POST /_matrix/media/r0/upload HTTP/1.1
+           Content-Type: <media-type>
+           Content-Length: <content-length>
+
+           <media>
+
+        <= HTTP/1.1 200 OK
+           Content-Type: application/json
+
+           { "content_uri": "mxc://<server-name>/<media-id>" }
+
+        => GET /_matrix/media/r0/download/<server-name>/<media-id> HTTP/1.1
+
+        <= HTTP/1.1 200 OK
+           Content-Type: <media-type>
+           Content-Disposition: attachment;filename=<upload-filename>
+
+           <media>
+
+    Clients can get thumbnails by supplying a desired width and height and
+    thumbnailing method::
+
+        => GET /_matrix/media/r0/thumbnail/<server_name>
+                /<media-id>?width=<w>&height=<h>&method=<m> HTTP/1.1
+
+        <= HTTP/1.1 200 OK
+           Content-Type: image/jpeg or image/png
+
+           <thumbnail>
+
+    The thumbnail methods are "crop" and "scale". "scale" tries to return an
+    image where either the width or the height is smaller than the requested
+    size. The client should then scale and letterbox the image if it needs to
+    fit within a given rectangle. "crop" tries to return an image where the
+    width and height are close to the requested size and the aspect matches
+    the requested size. The client should scale the image if it needs to fit
+    within a given rectangle.
+    """
+
+    def __init__(self, hs: "HomeServer"):
+        # If we're not configured to use it, raise if we somehow got here.
+        if not hs.config.media.can_load_media_repo:
+            raise ConfigError("Synapse is not configured to use a media repo.")
+
+        super().__init__()
+        media_repo = hs.get_media_repository()
+
+        self.putChild(b"upload", UploadResource(hs, media_repo))
+        self.putChild(b"download", DownloadResource(hs, media_repo))
+        self.putChild(
+            b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage)
+        )
+        if hs.config.media.url_preview_enabled:
+            self.putChild(
+                b"preview_url",
+                PreviewUrlResource(hs, media_repo, media_repo.media_storage),
+            )
+        self.putChild(b"config", MediaConfigResource(hs))
diff --git a/synapse/rest/media/preview_url_resource.py b/synapse/rest/media/preview_url_resource.py
new file mode 100644
index 0000000000..7ada728757
--- /dev/null
+++ b/synapse/rest/media/preview_url_resource.py
@@ -0,0 +1,869 @@
+# Copyright 2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import datetime
+import errno
+import fnmatch
+import logging
+import os
+import re
+import shutil
+import sys
+import traceback
+from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, Tuple
+from urllib.parse import urljoin, urlparse, urlsplit
+from urllib.request import urlopen
+
+import attr
+
+from twisted.internet.defer import Deferred
+from twisted.internet.error import DNSLookupError
+
+from synapse.api.errors import Codes, SynapseError
+from synapse.http.client import SimpleHttpClient
+from synapse.http.server import (
+    DirectServeJsonResource,
+    respond_with_json,
+    respond_with_json_bytes,
+)
+from synapse.http.servlet import parse_integer, parse_string
+from synapse.http.site import SynapseRequest
+from synapse.logging.context import make_deferred_yieldable, run_in_background
+from synapse.media._base import FileInfo, get_filename_from_headers
+from synapse.media.media_storage import MediaStorage
+from synapse.media.oembed import OEmbedProvider
+from synapse.media.preview_html import decode_body, parse_html_to_open_graph
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.types import JsonDict, UserID
+from synapse.util import json_encoder
+from synapse.util.async_helpers import ObservableDeferred
+from synapse.util.caches.expiringcache import ExpiringCache
+from synapse.util.stringutils import random_string
+
+if TYPE_CHECKING:
+    from synapse.media.media_repository import MediaRepository
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+OG_TAG_NAME_MAXLEN = 50
+OG_TAG_VALUE_MAXLEN = 1000
+
+ONE_HOUR = 60 * 60 * 1000
+ONE_DAY = 24 * ONE_HOUR
+IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class DownloadResult:
+    length: int
+    uri: str
+    response_code: int
+    media_type: str
+    download_name: Optional[str]
+    expires: int
+    etag: Optional[str]
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class MediaInfo:
+    """
+    Information parsed from downloading media being previewed.
+    """
+
+    # The Content-Type header of the response.
+    media_type: str
+    # The length (in bytes) of the downloaded media.
+    media_length: int
+    # The media filename, according to the server. This is parsed from the
+    # returned headers, if possible.
+    download_name: Optional[str]
+    # The time of the preview.
+    created_ts_ms: int
+    # Information from the media storage provider about where the file is stored
+    # on disk.
+    filesystem_id: str
+    filename: str
+    # The URI being previewed.
+    uri: str
+    # The HTTP response code.
+    response_code: int
+    # The timestamp (in milliseconds) of when this preview expires.
+    expires: int
+    # The ETag header of the response.
+    etag: Optional[str]
+
+
+class PreviewUrlResource(DirectServeJsonResource):
+    """
+    The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API
+    for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix
+    specific additions).
+
+    This does have trade-offs compared to other designs:
+
+    * Pros:
+      * Simple and flexible; can be used by any clients at any point
+    * Cons:
+      * If each homeserver provides one of these independently, all the homeservers in a
+        room may needlessly DoS the target URI
+      * The URL metadata must be stored somewhere, rather than just using Matrix
+        itself to store the media.
+      * Matrix cannot be used to distribute the metadata between homeservers.
+
+    When Synapse is asked to preview a URL it does the following:
+
+    1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
+       config).
+    2. Checks the URL against an in-memory cache and returns the result if it exists. (This
+       is also used to de-duplicate processing of multiple in-flight requests at once.)
+    3. Kicks off a background process to generate a preview:
+       1. Checks URL and timestamp against the database cache and returns the result if it
+          has not expired and was successful (a 2xx return code).
+       2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
+          does, update the URL to download.
+       3. Downloads the URL and stores it into a file via the media storage provider
+          and saves the local media metadata.
+       4. If the media is an image:
+          1. Generates thumbnails.
+          2. Generates an Open Graph response based on image properties.
+       5. If the media is HTML:
+          1. Decodes the HTML via the stored file.
+          2. Generates an Open Graph response from the HTML.
+          3. If a JSON oEmbed URL was found in the HTML via autodiscovery:
+             1. Downloads the URL and stores it into a file via the media storage provider
+                and saves the local media metadata.
+             2. Convert the oEmbed response to an Open Graph response.
+             3. Override any Open Graph data from the HTML with data from oEmbed.
+          4. If an image exists in the Open Graph response:
+             1. Downloads the URL and stores it into a file via the media storage
+                provider and saves the local media metadata.
+             2. Generates thumbnails.
+             3. Updates the Open Graph response based on image properties.
+       6. If the media is JSON and an oEmbed URL was found:
+          1. Convert the oEmbed response to an Open Graph response.
+          2. If a thumbnail or image is in the oEmbed response:
+             1. Downloads the URL and stores it into a file via the media storage
+                provider and saves the local media metadata.
+             2. Generates thumbnails.
+             3. Updates the Open Graph response based on image properties.
+       7. Stores the result in the database cache.
+    4. Returns the result.
+
+    If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
+    image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
+    does not fail. As much information as possible is returned.
+
+    The in-memory cache expires after 1 hour.
+
+    Expired entries in the database cache (and their associated media files) are
+    deleted every 10 seconds. The default expiration time is 1 hour from download.
+    """
+
+    isLeaf = True
+
+    def __init__(
+        self,
+        hs: "HomeServer",
+        media_repo: "MediaRepository",
+        media_storage: MediaStorage,
+    ):
+        super().__init__()
+
+        self.auth = hs.get_auth()
+        self.clock = hs.get_clock()
+        self.filepaths = media_repo.filepaths
+        self.max_spider_size = hs.config.media.max_spider_size
+        self.server_name = hs.hostname
+        self.store = hs.get_datastores().main
+        self.client = SimpleHttpClient(
+            hs,
+            treq_args={"browser_like_redirects": True},
+            ip_whitelist=hs.config.media.url_preview_ip_range_whitelist,
+            ip_blacklist=hs.config.media.url_preview_ip_range_blacklist,
+            use_proxy=True,
+        )
+        self.media_repo = media_repo
+        self.primary_base_path = media_repo.primary_base_path
+        self.media_storage = media_storage
+
+        self._oembed = OEmbedProvider(hs)
+
+        # We run the background jobs if we're the instance specified (or no
+        # instance is specified, where we assume there is only one instance
+        # serving media).
+        instance_running_jobs = hs.config.media.media_instance_running_background_jobs
+        self._worker_run_media_background_jobs = (
+            instance_running_jobs is None
+            or instance_running_jobs == hs.get_instance_name()
+        )
+
+        self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist
+        self.url_preview_accept_language = hs.config.media.url_preview_accept_language
+
+        # memory cache mapping urls to an ObservableDeferred returning
+        # JSON-encoded OG metadata
+        self._cache: ExpiringCache[str, ObservableDeferred] = ExpiringCache(
+            cache_name="url_previews",
+            clock=self.clock,
+            # don't spider URLs more often than once an hour
+            expiry_ms=ONE_HOUR,
+        )
+
+        if self._worker_run_media_background_jobs:
+            self._cleaner_loop = self.clock.looping_call(
+                self._start_expire_url_cache_data, 10 * 1000
+            )
+
+    async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
+        request.setHeader(b"Allow", b"OPTIONS, GET")
+        respond_with_json(request, 200, {}, send_cors=True)
+
+    async def _async_render_GET(self, request: SynapseRequest) -> None:
+        # XXX: if get_user_by_req fails, what should we do in an async render?
+        requester = await self.auth.get_user_by_req(request)
+        url = parse_string(request, "url", required=True)
+        ts = parse_integer(request, "ts")
+        if ts is None:
+            ts = self.clock.time_msec()
+
+        # XXX: we could move this into _do_preview if we wanted.
+        url_tuple = urlsplit(url)
+        for entry in self.url_preview_url_blacklist:
+            match = True
+            for attrib in entry:
+                pattern = entry[attrib]
+                value = getattr(url_tuple, attrib)
+                logger.debug(
+                    "Matching attrib '%s' with value '%s' against pattern '%s'",
+                    attrib,
+                    value,
+                    pattern,
+                )
+
+                if value is None:
+                    match = False
+                    continue
+
+                # Some attributes might not be parsed as strings by urlsplit (such as the
+                # port, which is parsed as an int). Because we use match functions that
+                # expect strings, we want to make sure that's what we give them.
+                value_str = str(value)
+
+                if pattern.startswith("^"):
+                    if not re.match(pattern, value_str):
+                        match = False
+                        continue
+                else:
+                    if not fnmatch.fnmatch(value_str, pattern):
+                        match = False
+                        continue
+            if match:
+                logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
+                raise SynapseError(
+                    403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
+                )
+
+        # the in-memory cache:
+        # * ensures that only one request is active at a time
+        # * takes load off the DB for the thundering herds
+        # * also caches any failures (unlike the DB) so we don't keep
+        #    requesting the same endpoint
+
+        observable = self._cache.get(url)
+
+        if not observable:
+            download = run_in_background(self._do_preview, url, requester.user, ts)
+            observable = ObservableDeferred(download, consumeErrors=True)
+            self._cache[url] = observable
+        else:
+            logger.info("Returning cached response")
+
+        og = await make_deferred_yieldable(observable.observe())
+        respond_with_json_bytes(request, 200, og, send_cors=True)
+
+    async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
+        """Check the db, and download the URL and build a preview
+
+        Args:
+            url: The URL to preview.
+            user: The user requesting the preview.
+            ts: The timestamp requested for the preview.
+
+        Returns:
+            json-encoded og data
+        """
+        # check the URL cache in the DB (which will also provide us with
+        # historical previews, if we have any)
+        cache_result = await self.store.get_url_cache(url, ts)
+        if (
+            cache_result
+            and cache_result["expires_ts"] > ts
+            and cache_result["response_code"] / 100 == 2
+        ):
+            # It may be stored as text in the database, not as bytes (such as
+            # PostgreSQL). If so, encode it back before handing it on.
+            og = cache_result["og"]
+            if isinstance(og, str):
+                og = og.encode("utf8")
+            return og
+
+        # If this URL can be accessed via oEmbed, use that instead.
+        url_to_download = url
+        oembed_url = self._oembed.get_oembed_url(url)
+        if oembed_url:
+            url_to_download = oembed_url
+
+        media_info = await self._handle_url(url_to_download, user)
+
+        logger.debug("got media_info of '%s'", media_info)
+
+        # The number of milliseconds that the response should be considered valid.
+        expiration_ms = media_info.expires
+        author_name: Optional[str] = None
+
+        if _is_media(media_info.media_type):
+            file_id = media_info.filesystem_id
+            dims = await self.media_repo._generate_thumbnails(
+                None, file_id, file_id, media_info.media_type, url_cache=True
+            )
+
+            og = {
+                "og:description": media_info.download_name,
+                "og:image": f"mxc://{self.server_name}/{media_info.filesystem_id}",
+                "og:image:type": media_info.media_type,
+                "matrix:image:size": media_info.media_length,
+            }
+
+            if dims:
+                og["og:image:width"] = dims["width"]
+                og["og:image:height"] = dims["height"]
+            else:
+                logger.warning("Couldn't get dims for %s" % url)
+
+            # define our OG response for this media
+        elif _is_html(media_info.media_type):
+            # TODO: somehow stop a big HTML tree from exploding synapse's RAM
+
+            with open(media_info.filename, "rb") as file:
+                body = file.read()
+
+            tree = decode_body(body, media_info.uri, media_info.media_type)
+            if tree is not None:
+                # Check if this HTML document points to oEmbed information and
+                # defer to that.
+                oembed_url = self._oembed.autodiscover_from_html(tree)
+                og_from_oembed: JsonDict = {}
+                if oembed_url:
+                    try:
+                        oembed_info = await self._handle_url(
+                            oembed_url, user, allow_data_urls=True
+                        )
+                    except Exception as e:
+                        # Fetching the oEmbed info failed, don't block the entire URL preview.
+                        logger.warning(
+                            "oEmbed fetch failed during URL preview: %s errored with %s",
+                            oembed_url,
+                            e,
+                        )
+                    else:
+                        (
+                            og_from_oembed,
+                            author_name,
+                            expiration_ms,
+                        ) = await self._handle_oembed_response(
+                            url, oembed_info, expiration_ms
+                        )
+
+                # Parse Open Graph information from the HTML in case the oEmbed
+                # response failed or is incomplete.
+                og_from_html = parse_html_to_open_graph(tree)
+
+                # Compile the Open Graph response by using the scraped
+                # information from the HTML and overlaying any information
+                # from the oEmbed response.
+                og = {**og_from_html, **og_from_oembed}
+
+                await self._precache_image_url(user, media_info, og)
+            else:
+                og = {}
+
+        elif oembed_url:
+            # Handle the oEmbed information.
+            og, author_name, expiration_ms = await self._handle_oembed_response(
+                url, media_info, expiration_ms
+            )
+            await self._precache_image_url(user, media_info, og)
+
+        else:
+            logger.warning("Failed to find any OG data in %s", url)
+            og = {}
+
+        # If we don't have a title but we have author_name, copy it as
+        # title
+        if not og.get("og:title") and author_name:
+            og["og:title"] = author_name
+
+        # filter out any stupidly long values
+        keys_to_remove = []
+        for k, v in og.items():
+            # values can be numeric as well as strings, hence the cast to str
+            if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN:
+                logger.warning(
+                    "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
+                )
+                keys_to_remove.append(k)
+        for k in keys_to_remove:
+            del og[k]
+
+        logger.debug("Calculated OG for %s as %s", url, og)
+
+        jsonog = json_encoder.encode(og)
+
+        # Cap the amount of time to consider a response valid.
+        expiration_ms = min(expiration_ms, ONE_DAY)
+
+        # store OG in history-aware DB cache
+        await self.store.store_url_cache(
+            url,
+            media_info.response_code,
+            media_info.etag,
+            media_info.created_ts_ms + expiration_ms,
+            jsonog,
+            media_info.filesystem_id,
+            media_info.created_ts_ms,
+        )
+
+        return jsonog.encode("utf8")
+
+    async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
+        """
+        Fetches a remote URL and parses the headers.
+
+        Args:
+             url: The URL to fetch.
+             output_stream: The stream to write the content to.
+
+        Returns:
+            A tuple of:
+                Media length, URL downloaded, the HTTP response code,
+                the media type, the downloaded file name, the number of
+                milliseconds the result is valid for, the etag header.
+        """
+
+        try:
+            logger.debug("Trying to get preview for url '%s'", url)
+            length, headers, uri, code = await self.client.get_file(
+                url,
+                output_stream=output_stream,
+                max_size=self.max_spider_size,
+                headers={
+                    b"Accept-Language": self.url_preview_accept_language,
+                    # Use a custom user agent for the preview because some sites will only return
+                    # Open Graph metadata to crawler user agents. Omit the Synapse version
+                    # string to avoid leaking information.
+                    b"User-Agent": [
+                        "Synapse (bot; +https://github.com/matrix-org/synapse)"
+                    ],
+                },
+                is_allowed_content_type=_is_previewable,
+            )
+        except SynapseError:
+            # Pass SynapseErrors through directly, so that the servlet
+            # handler will return a SynapseError to the client instead of
+            # blank data or a 500.
+            raise
+        except DNSLookupError:
+            # DNS lookup returned no results
+            # Note: This will also be the case if one of the resolved IP
+            # addresses is blacklisted
+            raise SynapseError(
+                502,
+                "DNS resolution failure during URL preview generation",
+                Codes.UNKNOWN,
+            )
+        except Exception as e:
+            # FIXME: pass through 404s and other error messages nicely
+            logger.warning("Error downloading %s: %r", url, e)
+
+            raise SynapseError(
+                500,
+                "Failed to download content: %s"
+                % (traceback.format_exception_only(sys.exc_info()[0], e),),
+                Codes.UNKNOWN,
+            )
+
+        if b"Content-Type" in headers:
+            media_type = headers[b"Content-Type"][0].decode("ascii")
+        else:
+            media_type = "application/octet-stream"
+
+        download_name = get_filename_from_headers(headers)
+
+        # FIXME: we should calculate a proper expiration based on the
+        # Cache-Control and Expire headers.  But for now, assume 1 hour.
+        expires = ONE_HOUR
+        etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None
+
+        return DownloadResult(
+            length, uri, code, media_type, download_name, expires, etag
+        )
+
+    async def _parse_data_url(
+        self, url: str, output_stream: BinaryIO
+    ) -> DownloadResult:
+        """
+        Parses a data: URL.
+
+        Args:
+             url: The URL to parse.
+             output_stream: The stream to write the content to.
+
+        Returns:
+            A tuple of:
+                Media length, URL downloaded, the HTTP response code,
+                the media type, the downloaded file name, the number of
+                milliseconds the result is valid for, the etag header.
+        """
+
+        try:
+            logger.debug("Trying to parse data url '%s'", url)
+            with urlopen(url) as url_info:
+                # TODO Can this be more efficient.
+                output_stream.write(url_info.read())
+        except Exception as e:
+            logger.warning("Error parsing data: URL %s: %r", url, e)
+
+            raise SynapseError(
+                500,
+                "Failed to parse data URL: %s"
+                % (traceback.format_exception_only(sys.exc_info()[0], e),),
+                Codes.UNKNOWN,
+            )
+
+        return DownloadResult(
+            # Read back the length that has been written.
+            length=output_stream.tell(),
+            uri=url,
+            # If it was parsed, consider this a 200 OK.
+            response_code=200,
+            # urlopen shoves the media-type from the data URL into the content type
+            # header object.
+            media_type=url_info.headers.get_content_type(),
+            # Some features are not supported by data: URLs.
+            download_name=None,
+            expires=ONE_HOUR,
+            etag=None,
+        )
+
+    async def _handle_url(
+        self, url: str, user: UserID, allow_data_urls: bool = False
+    ) -> MediaInfo:
+        """
+        Fetches content from a URL and parses the result to generate a MediaInfo.
+
+        It uses the media storage provider to persist the fetched content and
+        stores the mapping into the database.
+
+        Args:
+             url: The URL to fetch.
+             user: The user who ahs requested this URL.
+             allow_data_urls: True if data URLs should be allowed.
+
+        Returns:
+            A MediaInfo object describing the fetched content.
+        """
+
+        # TODO: we should probably honour robots.txt... except in practice
+        # we're most likely being explicitly triggered by a human rather than a
+        # bot, so are we really a robot?
+
+        file_id = datetime.date.today().isoformat() + "_" + random_string(16)
+
+        file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
+
+        with self.media_storage.store_into_file(file_info) as (f, fname, finish):
+            if url.startswith("data:"):
+                if not allow_data_urls:
+                    raise SynapseError(
+                        500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN
+                    )
+
+                download_result = await self._parse_data_url(url, f)
+            else:
+                download_result = await self._download_url(url, f)
+
+            await finish()
+
+        try:
+            time_now_ms = self.clock.time_msec()
+
+            await self.store.store_local_media(
+                media_id=file_id,
+                media_type=download_result.media_type,
+                time_now_ms=time_now_ms,
+                upload_name=download_result.download_name,
+                media_length=download_result.length,
+                user_id=user,
+                url_cache=url,
+            )
+
+        except Exception as e:
+            logger.error("Error handling downloaded %s: %r", url, e)
+            # TODO: we really ought to delete the downloaded file in this
+            # case, since we won't have recorded it in the db, and will
+            # therefore not expire it.
+            raise
+
+        return MediaInfo(
+            media_type=download_result.media_type,
+            media_length=download_result.length,
+            download_name=download_result.download_name,
+            created_ts_ms=time_now_ms,
+            filesystem_id=file_id,
+            filename=fname,
+            uri=download_result.uri,
+            response_code=download_result.response_code,
+            expires=download_result.expires,
+            etag=download_result.etag,
+        )
+
+    async def _precache_image_url(
+        self, user: UserID, media_info: MediaInfo, og: JsonDict
+    ) -> None:
+        """
+        Pre-cache the image (if one exists) for posterity
+
+        Args:
+            user: The user requesting the preview.
+            media_info: The media being previewed.
+            og: The Open Graph dictionary. This is modified with image information.
+        """
+        # If there's no image or it is blank, there's nothing to do.
+        if "og:image" not in og:
+            return
+
+        # Remove the raw image URL, this will be replaced with an MXC URL, if successful.
+        image_url = og.pop("og:image")
+        if not image_url:
+            return
+
+        # The image URL from the HTML might be relative to the previewed page,
+        # convert it to an URL which can be requested directly.
+        url_parts = urlparse(image_url)
+        if url_parts.scheme != "data":
+            image_url = urljoin(media_info.uri, image_url)
+
+        # FIXME: it might be cleaner to use the same flow as the main /preview_url
+        # request itself and benefit from the same caching etc.  But for now we
+        # just rely on the caching on the master request to speed things up.
+        try:
+            image_info = await self._handle_url(image_url, user, allow_data_urls=True)
+        except Exception as e:
+            # Pre-caching the image failed, don't block the entire URL preview.
+            logger.warning(
+                "Pre-caching image failed during URL preview: %s errored with %s",
+                image_url,
+                e,
+            )
+            return
+
+        if _is_media(image_info.media_type):
+            # TODO: make sure we don't choke on white-on-transparent images
+            file_id = image_info.filesystem_id
+            dims = await self.media_repo._generate_thumbnails(
+                None, file_id, file_id, image_info.media_type, url_cache=True
+            )
+            if dims:
+                og["og:image:width"] = dims["width"]
+                og["og:image:height"] = dims["height"]
+            else:
+                logger.warning("Couldn't get dims for %s", image_url)
+
+            og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
+            og["og:image:type"] = image_info.media_type
+            og["matrix:image:size"] = image_info.media_length
+
+    async def _handle_oembed_response(
+        self, url: str, media_info: MediaInfo, expiration_ms: int
+    ) -> Tuple[JsonDict, Optional[str], int]:
+        """
+        Parse the downloaded oEmbed info.
+
+        Args:
+            url: The URL which is being previewed (not the one which was
+                requested).
+            media_info: The media being previewed.
+            expiration_ms: The length of time, in milliseconds, the media is valid for.
+
+        Returns:
+            A tuple of:
+                The Open Graph dictionary, if the oEmbed info can be parsed.
+                The author name if it could be retrieved from oEmbed.
+                The (possibly updated) length of time, in milliseconds, the media is valid for.
+        """
+        # If JSON was not returned, there's nothing to do.
+        if not _is_json(media_info.media_type):
+            return {}, None, expiration_ms
+
+        with open(media_info.filename, "rb") as file:
+            body = file.read()
+
+        oembed_response = self._oembed.parse_oembed_response(url, body)
+        open_graph_result = oembed_response.open_graph_result
+
+        # Use the cache age from the oEmbed result, if one was given.
+        if open_graph_result and oembed_response.cache_age is not None:
+            expiration_ms = oembed_response.cache_age
+
+        return open_graph_result, oembed_response.author_name, expiration_ms
+
+    def _start_expire_url_cache_data(self) -> Deferred:
+        return run_as_background_process(
+            "expire_url_cache_data", self._expire_url_cache_data
+        )
+
+    async def _expire_url_cache_data(self) -> None:
+        """Clean up expired url cache content, media and thumbnails."""
+
+        assert self._worker_run_media_background_jobs
+
+        now = self.clock.time_msec()
+
+        logger.debug("Running url preview cache expiry")
+
+        def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
+            """Attempt to remove the given chain of parent directories
+
+            Args:
+                dirs: The list of directory paths to delete, with children appearing
+                    before their parents.
+            """
+            for dir in dirs:
+                try:
+                    os.rmdir(dir)
+                except FileNotFoundError:
+                    # Already deleted, continue with deleting the rest
+                    pass
+                except OSError as e:
+                    # Failed, skip deleting the rest of the parent dirs
+                    if e.errno != errno.ENOTEMPTY:
+                        logger.warning(
+                            "Failed to remove media directory while clearing url preview cache: %r: %s",
+                            dir,
+                            e,
+                        )
+                    break
+
+        # First we delete expired url cache entries
+        media_ids = await self.store.get_expired_url_cache(now)
+
+        removed_media = []
+        for media_id in media_ids:
+            fname = self.filepaths.url_cache_filepath(media_id)
+            try:
+                os.remove(fname)
+            except FileNotFoundError:
+                pass  # If the path doesn't exist, meh
+            except OSError as e:
+                logger.warning(
+                    "Failed to remove media while clearing url preview cache: %r: %s",
+                    media_id,
+                    e,
+                )
+                continue
+
+            removed_media.append(media_id)
+
+            dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
+            try_remove_parent_dirs(dirs)
+
+        await self.store.delete_url_cache(removed_media)
+
+        if removed_media:
+            logger.debug(
+                "Deleted %d entries from url preview cache", len(removed_media)
+            )
+        else:
+            logger.debug("No entries removed from url preview cache")
+
+        # Now we delete old images associated with the url cache.
+        # These may be cached for a bit on the client (i.e., they
+        # may have a room open with a preview url thing open).
+        # So we wait a couple of days before deleting, just in case.
+        expire_before = now - IMAGE_CACHE_EXPIRY_MS
+        media_ids = await self.store.get_url_cache_media_before(expire_before)
+
+        removed_media = []
+        for media_id in media_ids:
+            fname = self.filepaths.url_cache_filepath(media_id)
+            try:
+                os.remove(fname)
+            except FileNotFoundError:
+                pass  # If the path doesn't exist, meh
+            except OSError as e:
+                logger.warning(
+                    "Failed to remove media from url preview cache: %r: %s", media_id, e
+                )
+                continue
+
+            dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
+            try_remove_parent_dirs(dirs)
+
+            thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
+            try:
+                shutil.rmtree(thumbnail_dir)
+            except FileNotFoundError:
+                pass  # If the path doesn't exist, meh
+            except OSError as e:
+                logger.warning(
+                    "Failed to remove media from url preview cache: %r: %s", media_id, e
+                )
+                continue
+
+            removed_media.append(media_id)
+
+            dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
+            # Note that one of the directories to be deleted has already been
+            # removed by the `rmtree` above.
+            try_remove_parent_dirs(dirs)
+
+        await self.store.delete_url_cache_media(removed_media)
+
+        if removed_media:
+            logger.debug("Deleted %d media from url preview cache", len(removed_media))
+        else:
+            logger.debug("No media removed from url preview cache")
+
+
+def _is_media(content_type: str) -> bool:
+    return content_type.lower().startswith("image/")
+
+
+def _is_html(content_type: str) -> bool:
+    content_type = content_type.lower()
+    return content_type.startswith("text/html") or content_type.startswith(
+        "application/xhtml"
+    )
+
+
+def _is_json(content_type: str) -> bool:
+    return content_type.lower().startswith("application/json")
+
+
+def _is_previewable(content_type: str) -> bool:
+    """Returns True for content types for which we will perform URL preview and False
+    otherwise."""
+
+    return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py
new file mode 100644
index 0000000000..4ee2a0dbda
--- /dev/null
+++ b/synapse/rest/media/thumbnail_resource.py
@@ -0,0 +1,554 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+
+from synapse.api.errors import Codes, SynapseError, cs_error
+from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
+from synapse.http.server import (
+    DirectServeJsonResource,
+    respond_with_json,
+    set_corp_headers,
+    set_cors_headers,
+)
+from synapse.http.servlet import parse_integer, parse_string
+from synapse.http.site import SynapseRequest
+from synapse.media._base import (
+    FileInfo,
+    ThumbnailInfo,
+    parse_media_id,
+    respond_404,
+    respond_with_file,
+    respond_with_responder,
+)
+from synapse.media.media_storage import MediaStorage
+
+if TYPE_CHECKING:
+    from synapse.media.media_repository import MediaRepository
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class ThumbnailResource(DirectServeJsonResource):
+    isLeaf = True
+
+    def __init__(
+        self,
+        hs: "HomeServer",
+        media_repo: "MediaRepository",
+        media_storage: MediaStorage,
+    ):
+        super().__init__()
+
+        self.store = hs.get_datastores().main
+        self.media_repo = media_repo
+        self.media_storage = media_storage
+        self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
+        self.server_name = hs.hostname
+
+    async def _async_render_GET(self, request: SynapseRequest) -> None:
+        set_cors_headers(request)
+        set_corp_headers(request)
+        server_name, media_id, _ = parse_media_id(request)
+        width = parse_integer(request, "width", required=True)
+        height = parse_integer(request, "height", required=True)
+        method = parse_string(request, "method", "scale")
+        # TODO Parse the Accept header to get an prioritised list of thumbnail types.
+        m_type = "image/png"
+
+        if server_name == self.server_name:
+            if self.dynamic_thumbnails:
+                await self._select_or_generate_local_thumbnail(
+                    request, media_id, width, height, method, m_type
+                )
+            else:
+                await self._respond_local_thumbnail(
+                    request, media_id, width, height, method, m_type
+                )
+            self.media_repo.mark_recently_accessed(None, media_id)
+        else:
+            if self.dynamic_thumbnails:
+                await self._select_or_generate_remote_thumbnail(
+                    request, server_name, media_id, width, height, method, m_type
+                )
+            else:
+                await self._respond_remote_thumbnail(
+                    request, server_name, media_id, width, height, method, m_type
+                )
+            self.media_repo.mark_recently_accessed(server_name, media_id)
+
+    async def _respond_local_thumbnail(
+        self,
+        request: SynapseRequest,
+        media_id: str,
+        width: int,
+        height: int,
+        method: str,
+        m_type: str,
+    ) -> None:
+        media_info = await self.store.get_local_media(media_id)
+
+        if not media_info:
+            respond_404(request)
+            return
+        if media_info["quarantined_by"]:
+            logger.info("Media is quarantined")
+            respond_404(request)
+            return
+
+        thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
+        await self._select_and_respond_with_thumbnail(
+            request,
+            width,
+            height,
+            method,
+            m_type,
+            thumbnail_infos,
+            media_id,
+            media_id,
+            url_cache=bool(media_info["url_cache"]),
+            server_name=None,
+        )
+
+    async def _select_or_generate_local_thumbnail(
+        self,
+        request: SynapseRequest,
+        media_id: str,
+        desired_width: int,
+        desired_height: int,
+        desired_method: str,
+        desired_type: str,
+    ) -> None:
+        media_info = await self.store.get_local_media(media_id)
+
+        if not media_info:
+            respond_404(request)
+            return
+        if media_info["quarantined_by"]:
+            logger.info("Media is quarantined")
+            respond_404(request)
+            return
+
+        thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
+        for info in thumbnail_infos:
+            t_w = info["thumbnail_width"] == desired_width
+            t_h = info["thumbnail_height"] == desired_height
+            t_method = info["thumbnail_method"] == desired_method
+            t_type = info["thumbnail_type"] == desired_type
+
+            if t_w and t_h and t_method and t_type:
+                file_info = FileInfo(
+                    server_name=None,
+                    file_id=media_id,
+                    url_cache=media_info["url_cache"],
+                    thumbnail=ThumbnailInfo(
+                        width=info["thumbnail_width"],
+                        height=info["thumbnail_height"],
+                        type=info["thumbnail_type"],
+                        method=info["thumbnail_method"],
+                    ),
+                )
+
+                t_type = file_info.thumbnail_type
+                t_length = info["thumbnail_length"]
+
+                responder = await self.media_storage.fetch_media(file_info)
+                if responder:
+                    await respond_with_responder(request, responder, t_type, t_length)
+                    return
+
+        logger.debug("We don't have a thumbnail of that size. Generating")
+
+        # Okay, so we generate one.
+        file_path = await self.media_repo.generate_local_exact_thumbnail(
+            media_id,
+            desired_width,
+            desired_height,
+            desired_method,
+            desired_type,
+            url_cache=bool(media_info["url_cache"]),
+        )
+
+        if file_path:
+            await respond_with_file(request, desired_type, file_path)
+        else:
+            logger.warning("Failed to generate thumbnail")
+            raise SynapseError(400, "Failed to generate thumbnail.")
+
+    async def _select_or_generate_remote_thumbnail(
+        self,
+        request: SynapseRequest,
+        server_name: str,
+        media_id: str,
+        desired_width: int,
+        desired_height: int,
+        desired_method: str,
+        desired_type: str,
+    ) -> None:
+        media_info = await self.media_repo.get_remote_media_info(server_name, media_id)
+
+        thumbnail_infos = await self.store.get_remote_media_thumbnails(
+            server_name, media_id
+        )
+
+        file_id = media_info["filesystem_id"]
+
+        for info in thumbnail_infos:
+            t_w = info["thumbnail_width"] == desired_width
+            t_h = info["thumbnail_height"] == desired_height
+            t_method = info["thumbnail_method"] == desired_method
+            t_type = info["thumbnail_type"] == desired_type
+
+            if t_w and t_h and t_method and t_type:
+                file_info = FileInfo(
+                    server_name=server_name,
+                    file_id=media_info["filesystem_id"],
+                    thumbnail=ThumbnailInfo(
+                        width=info["thumbnail_width"],
+                        height=info["thumbnail_height"],
+                        type=info["thumbnail_type"],
+                        method=info["thumbnail_method"],
+                    ),
+                )
+
+                t_type = file_info.thumbnail_type
+                t_length = info["thumbnail_length"]
+
+                responder = await self.media_storage.fetch_media(file_info)
+                if responder:
+                    await respond_with_responder(request, responder, t_type, t_length)
+                    return
+
+        logger.debug("We don't have a thumbnail of that size. Generating")
+
+        # Okay, so we generate one.
+        file_path = await self.media_repo.generate_remote_exact_thumbnail(
+            server_name,
+            file_id,
+            media_id,
+            desired_width,
+            desired_height,
+            desired_method,
+            desired_type,
+        )
+
+        if file_path:
+            await respond_with_file(request, desired_type, file_path)
+        else:
+            logger.warning("Failed to generate thumbnail")
+            raise SynapseError(400, "Failed to generate thumbnail.")
+
+    async def _respond_remote_thumbnail(
+        self,
+        request: SynapseRequest,
+        server_name: str,
+        media_id: str,
+        width: int,
+        height: int,
+        method: str,
+        m_type: str,
+    ) -> None:
+        # TODO: Don't download the whole remote file
+        # We should proxy the thumbnail from the remote server instead of
+        # downloading the remote file and generating our own thumbnails.
+        media_info = await self.media_repo.get_remote_media_info(server_name, media_id)
+
+        thumbnail_infos = await self.store.get_remote_media_thumbnails(
+            server_name, media_id
+        )
+        await self._select_and_respond_with_thumbnail(
+            request,
+            width,
+            height,
+            method,
+            m_type,
+            thumbnail_infos,
+            media_id,
+            media_info["filesystem_id"],
+            url_cache=False,
+            server_name=server_name,
+        )
+
+    async def _select_and_respond_with_thumbnail(
+        self,
+        request: SynapseRequest,
+        desired_width: int,
+        desired_height: int,
+        desired_method: str,
+        desired_type: str,
+        thumbnail_infos: List[Dict[str, Any]],
+        media_id: str,
+        file_id: str,
+        url_cache: bool,
+        server_name: Optional[str] = None,
+    ) -> None:
+        """
+        Respond to a request with an appropriate thumbnail from the previously generated thumbnails.
+
+        Args:
+            request: The incoming request.
+            desired_width: The desired width, the returned thumbnail may be larger than this.
+            desired_height: The desired height, the returned thumbnail may be larger than this.
+            desired_method: The desired method used to generate the thumbnail.
+            desired_type: The desired content-type of the thumbnail.
+            thumbnail_infos: A list of dictionaries of candidate thumbnails.
+            file_id: The ID of the media that a thumbnail is being requested for.
+            url_cache: True if this is from a URL cache.
+            server_name: The server name, if this is a remote thumbnail.
+        """
+        logger.debug(
+            "_select_and_respond_with_thumbnail: media_id=%s desired=%sx%s (%s) thumbnail_infos=%s",
+            media_id,
+            desired_width,
+            desired_height,
+            desired_method,
+            thumbnail_infos,
+        )
+
+        # If `dynamic_thumbnails` is enabled, we expect Synapse to go down a
+        # different code path to handle it.
+        assert not self.dynamic_thumbnails
+
+        if thumbnail_infos:
+            file_info = self._select_thumbnail(
+                desired_width,
+                desired_height,
+                desired_method,
+                desired_type,
+                thumbnail_infos,
+                file_id,
+                url_cache,
+                server_name,
+            )
+            if not file_info:
+                logger.info("Couldn't find a thumbnail matching the desired inputs")
+                respond_404(request)
+                return
+
+            # The thumbnail property must exist.
+            assert file_info.thumbnail is not None
+
+            responder = await self.media_storage.fetch_media(file_info)
+            if responder:
+                await respond_with_responder(
+                    request,
+                    responder,
+                    file_info.thumbnail.type,
+                    file_info.thumbnail.length,
+                )
+                return
+
+            # If we can't find the thumbnail we regenerate it. This can happen
+            # if e.g. we've deleted the thumbnails but still have the original
+            # image somewhere.
+            #
+            # Since we have an entry for the thumbnail in the DB we a) know we
+            # have have successfully generated the thumbnail in the past (so we
+            # don't need to worry about repeatedly failing to generate
+            # thumbnails), and b) have already calculated that appropriate
+            # width/height/method so we can just call the "generate exact"
+            # methods.
+
+            # First let's check that we do actually have the original image
+            # still. This will throw a 404 if we don't.
+            # TODO: We should refetch the thumbnails for remote media.
+            await self.media_storage.ensure_media_is_in_local_cache(
+                FileInfo(server_name, file_id, url_cache=url_cache)
+            )
+
+            if server_name:
+                await self.media_repo.generate_remote_exact_thumbnail(
+                    server_name,
+                    file_id=file_id,
+                    media_id=media_id,
+                    t_width=file_info.thumbnail.width,
+                    t_height=file_info.thumbnail.height,
+                    t_method=file_info.thumbnail.method,
+                    t_type=file_info.thumbnail.type,
+                )
+            else:
+                await self.media_repo.generate_local_exact_thumbnail(
+                    media_id=media_id,
+                    t_width=file_info.thumbnail.width,
+                    t_height=file_info.thumbnail.height,
+                    t_method=file_info.thumbnail.method,
+                    t_type=file_info.thumbnail.type,
+                    url_cache=url_cache,
+                )
+
+            responder = await self.media_storage.fetch_media(file_info)
+            await respond_with_responder(
+                request,
+                responder,
+                file_info.thumbnail.type,
+                file_info.thumbnail.length,
+            )
+        else:
+            # This might be because:
+            # 1. We can't create thumbnails for the given media (corrupted or
+            #    unsupported file type), or
+            # 2. The thumbnailing process never ran or errored out initially
+            #    when the media was first uploaded (these bugs should be
+            #    reported and fixed).
+            # Note that we don't attempt to generate a thumbnail now because
+            # `dynamic_thumbnails` is disabled.
+            logger.info("Failed to find any generated thumbnails")
+
+            respond_with_json(
+                request,
+                400,
+                cs_error(
+                    "Cannot find any thumbnails for the requested media (%r). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)"
+                    % (
+                        request.postpath,
+                        ", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()),
+                    ),
+                    code=Codes.UNKNOWN,
+                ),
+                send_cors=True,
+            )
+
+    def _select_thumbnail(
+        self,
+        desired_width: int,
+        desired_height: int,
+        desired_method: str,
+        desired_type: str,
+        thumbnail_infos: List[Dict[str, Any]],
+        file_id: str,
+        url_cache: bool,
+        server_name: Optional[str],
+    ) -> Optional[FileInfo]:
+        """
+        Choose an appropriate thumbnail from the previously generated thumbnails.
+
+        Args:
+            desired_width: The desired width, the returned thumbnail may be larger than this.
+            desired_height: The desired height, the returned thumbnail may be larger than this.
+            desired_method: The desired method used to generate the thumbnail.
+            desired_type: The desired content-type of the thumbnail.
+            thumbnail_infos: A list of dictionaries of candidate thumbnails.
+            file_id: The ID of the media that a thumbnail is being requested for.
+            url_cache: True if this is from a URL cache.
+            server_name: The server name, if this is a remote thumbnail.
+
+        Returns:
+             The thumbnail which best matches the desired parameters.
+        """
+        desired_method = desired_method.lower()
+
+        # The chosen thumbnail.
+        thumbnail_info = None
+
+        d_w = desired_width
+        d_h = desired_height
+
+        if desired_method == "crop":
+            # Thumbnails that match equal or larger sizes of desired width/height.
+            crop_info_list: List[Tuple[int, int, int, bool, int, Dict[str, Any]]] = []
+            # Other thumbnails.
+            crop_info_list2: List[Tuple[int, int, int, bool, int, Dict[str, Any]]] = []
+            for info in thumbnail_infos:
+                # Skip thumbnails generated with different methods.
+                if info["thumbnail_method"] != "crop":
+                    continue
+
+                t_w = info["thumbnail_width"]
+                t_h = info["thumbnail_height"]
+                aspect_quality = abs(d_w * t_h - d_h * t_w)
+                min_quality = 0 if d_w <= t_w and d_h <= t_h else 1
+                size_quality = abs((d_w - t_w) * (d_h - t_h))
+                type_quality = desired_type != info["thumbnail_type"]
+                length_quality = info["thumbnail_length"]
+                if t_w >= d_w or t_h >= d_h:
+                    crop_info_list.append(
+                        (
+                            aspect_quality,
+                            min_quality,
+                            size_quality,
+                            type_quality,
+                            length_quality,
+                            info,
+                        )
+                    )
+                else:
+                    crop_info_list2.append(
+                        (
+                            aspect_quality,
+                            min_quality,
+                            size_quality,
+                            type_quality,
+                            length_quality,
+                            info,
+                        )
+                    )
+            # Pick the most appropriate thumbnail. Some values of `desired_width` and
+            # `desired_height` may result in a tie, in which case we avoid comparing on
+            # the thumbnail info dictionary and pick the thumbnail that appears earlier
+            # in the list of candidates.
+            if crop_info_list:
+                thumbnail_info = min(crop_info_list, key=lambda t: t[:-1])[-1]
+            elif crop_info_list2:
+                thumbnail_info = min(crop_info_list2, key=lambda t: t[:-1])[-1]
+        elif desired_method == "scale":
+            # Thumbnails that match equal or larger sizes of desired width/height.
+            info_list: List[Tuple[int, bool, int, Dict[str, Any]]] = []
+            # Other thumbnails.
+            info_list2: List[Tuple[int, bool, int, Dict[str, Any]]] = []
+
+            for info in thumbnail_infos:
+                # Skip thumbnails generated with different methods.
+                if info["thumbnail_method"] != "scale":
+                    continue
+
+                t_w = info["thumbnail_width"]
+                t_h = info["thumbnail_height"]
+                size_quality = abs((d_w - t_w) * (d_h - t_h))
+                type_quality = desired_type != info["thumbnail_type"]
+                length_quality = info["thumbnail_length"]
+                if t_w >= d_w or t_h >= d_h:
+                    info_list.append((size_quality, type_quality, length_quality, info))
+                else:
+                    info_list2.append(
+                        (size_quality, type_quality, length_quality, info)
+                    )
+            # Pick the most appropriate thumbnail. Some values of `desired_width` and
+            # `desired_height` may result in a tie, in which case we avoid comparing on
+            # the thumbnail info dictionary and pick the thumbnail that appears earlier
+            # in the list of candidates.
+            if info_list:
+                thumbnail_info = min(info_list, key=lambda t: t[:-1])[-1]
+            elif info_list2:
+                thumbnail_info = min(info_list2, key=lambda t: t[:-1])[-1]
+
+        if thumbnail_info:
+            return FileInfo(
+                file_id=file_id,
+                url_cache=url_cache,
+                server_name=server_name,
+                thumbnail=ThumbnailInfo(
+                    width=thumbnail_info["thumbnail_width"],
+                    height=thumbnail_info["thumbnail_height"],
+                    type=thumbnail_info["thumbnail_type"],
+                    method=thumbnail_info["thumbnail_method"],
+                    length=thumbnail_info["thumbnail_length"],
+                ),
+            )
+
+        # No matching thumbnail was found.
+        return None
diff --git a/synapse/rest/media/upload_resource.py b/synapse/rest/media/upload_resource.py
new file mode 100644
index 0000000000..697348613b
--- /dev/null
+++ b/synapse/rest/media/upload_resource.py
@@ -0,0 +1,108 @@
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import IO, TYPE_CHECKING, Dict, List, Optional
+
+from synapse.api.errors import Codes, SynapseError
+from synapse.http.server import DirectServeJsonResource, respond_with_json
+from synapse.http.servlet import parse_bytes_from_args
+from synapse.http.site import SynapseRequest
+from synapse.media.media_storage import SpamMediaException
+
+if TYPE_CHECKING:
+    from synapse.media.media_repository import MediaRepository
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class UploadResource(DirectServeJsonResource):
+    isLeaf = True
+
+    def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"):
+        super().__init__()
+
+        self.media_repo = media_repo
+        self.filepaths = media_repo.filepaths
+        self.store = hs.get_datastores().main
+        self.clock = hs.get_clock()
+        self.server_name = hs.hostname
+        self.auth = hs.get_auth()
+        self.max_upload_size = hs.config.media.max_upload_size
+        self.clock = hs.get_clock()
+
+    async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
+        respond_with_json(request, 200, {}, send_cors=True)
+
+    async def _async_render_POST(self, request: SynapseRequest) -> None:
+        requester = await self.auth.get_user_by_req(request)
+        raw_content_length = request.getHeader("Content-Length")
+        if raw_content_length is None:
+            raise SynapseError(msg="Request must specify a Content-Length", code=400)
+        try:
+            content_length = int(raw_content_length)
+        except ValueError:
+            raise SynapseError(msg="Content-Length value is invalid", code=400)
+        if content_length > self.max_upload_size:
+            raise SynapseError(
+                msg="Upload request body is too large",
+                code=413,
+                errcode=Codes.TOO_LARGE,
+            )
+
+        args: Dict[bytes, List[bytes]] = request.args  # type: ignore
+        upload_name_bytes = parse_bytes_from_args(args, "filename")
+        if upload_name_bytes:
+            try:
+                upload_name: Optional[str] = upload_name_bytes.decode("utf8")
+            except UnicodeDecodeError:
+                raise SynapseError(
+                    msg="Invalid UTF-8 filename parameter: %r" % (upload_name_bytes,),
+                    code=400,
+                )
+
+        # If the name is falsey (e.g. an empty byte string) ensure it is None.
+        else:
+            upload_name = None
+
+        headers = request.requestHeaders
+
+        if headers.hasHeader(b"Content-Type"):
+            content_type_headers = headers.getRawHeaders(b"Content-Type")
+            assert content_type_headers  # for mypy
+            media_type = content_type_headers[0].decode("ascii")
+        else:
+            media_type = "application/octet-stream"
+
+        # if headers.hasHeader(b"Content-Disposition"):
+        #     disposition = headers.getRawHeaders(b"Content-Disposition")[0]
+        # TODO(markjh): parse content-dispostion
+
+        try:
+            content: IO = request.content  # type: ignore
+            content_uri = await self.media_repo.create_content(
+                media_type, upload_name, content, content_length, requester.user
+            )
+        except SpamMediaException:
+            # For uploading of media we want to respond with a 400, instead of
+            # the default 404, as that would just be confusing.
+            raise SynapseError(400, "Bad content")
+
+        logger.info("Uploaded content with URI '%s'", content_uri)
+
+        respond_with_json(
+            request, 200, {"content_uri": str(content_uri)}, send_cors=True
+        )
diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py
index ef8334ae25..88427a5737 100644
--- a/synapse/rest/media/v1/_base.py
+++ b/synapse/rest/media/v1/_base.py
@@ -1,5 +1,4 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2019-2021 The Matrix.org Foundation C.I.C.
+# Copyright 2023 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,468 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
-import logging
-import os
-import urllib
-from abc import ABC, abstractmethod
-from types import TracebackType
-from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type
-
-import attr
-
-from twisted.internet.interfaces import IConsumer
-from twisted.protocols.basic import FileSender
-from twisted.web.server import Request
-
-from synapse.api.errors import Codes, SynapseError, cs_error
-from synapse.http.server import finish_request, respond_with_json
-from synapse.http.site import SynapseRequest
-from synapse.logging.context import make_deferred_yieldable
-from synapse.util.stringutils import is_ascii, parse_and_validate_server_name
-
-logger = logging.getLogger(__name__)
-
-# list all text content types that will have the charset default to UTF-8 when
-# none is given
-TEXT_CONTENT_TYPES = [
-    "text/css",
-    "text/csv",
-    "text/html",
-    "text/calendar",
-    "text/plain",
-    "text/javascript",
-    "application/json",
-    "application/ld+json",
-    "application/rtf",
-    "image/svg+xml",
-    "text/xml",
-]
-
-
-def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
-    """Parses the server name, media ID and optional file name from the request URI
-
-    Also performs some rough validation on the server name.
-
-    Args:
-        request: The `Request`.
-
-    Returns:
-        A tuple containing the parsed server name, media ID and optional file name.
-
-    Raises:
-        SynapseError(404): if parsing or validation fail for any reason
-    """
-    try:
-        # The type on postpath seems incorrect in Twisted 21.2.0.
-        postpath: List[bytes] = request.postpath  # type: ignore
-        assert postpath
-
-        # This allows users to append e.g. /test.png to the URL. Useful for
-        # clients that parse the URL to see content type.
-        server_name_bytes, media_id_bytes = postpath[:2]
-        server_name = server_name_bytes.decode("utf-8")
-        media_id = media_id_bytes.decode("utf8")
-
-        # Validate the server name, raising if invalid
-        parse_and_validate_server_name(server_name)
-
-        file_name = None
-        if len(postpath) > 2:
-            try:
-                file_name = urllib.parse.unquote(postpath[-1].decode("utf-8"))
-            except UnicodeDecodeError:
-                pass
-        return server_name, media_id, file_name
-    except Exception:
-        raise SynapseError(
-            404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN
-        )
-
-
-def respond_404(request: SynapseRequest) -> None:
-    respond_with_json(
-        request,
-        404,
-        cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND),
-        send_cors=True,
-    )
-
-
-async def respond_with_file(
-    request: SynapseRequest,
-    media_type: str,
-    file_path: str,
-    file_size: Optional[int] = None,
-    upload_name: Optional[str] = None,
-) -> None:
-    logger.debug("Responding with %r", file_path)
-
-    if os.path.isfile(file_path):
-        if file_size is None:
-            stat = os.stat(file_path)
-            file_size = stat.st_size
-
-        add_file_headers(request, media_type, file_size, upload_name)
-
-        with open(file_path, "rb") as f:
-            await make_deferred_yieldable(FileSender().beginFileTransfer(f, request))
-
-        finish_request(request)
-    else:
-        respond_404(request)
-
-
-def add_file_headers(
-    request: Request,
-    media_type: str,
-    file_size: Optional[int],
-    upload_name: Optional[str],
-) -> None:
-    """Adds the correct response headers in preparation for responding with the
-    media.
-
-    Args:
-        request
-        media_type: The media/content type.
-        file_size: Size in bytes of the media, if known.
-        upload_name: The name of the requested file, if any.
-    """
-
-    def _quote(x: str) -> str:
-        return urllib.parse.quote(x.encode("utf-8"))
-
-    # Default to a UTF-8 charset for text content types.
-    # ex, uses UTF-8 for 'text/css' but not 'text/css; charset=UTF-16'
-    if media_type.lower() in TEXT_CONTENT_TYPES:
-        content_type = media_type + "; charset=UTF-8"
-    else:
-        content_type = media_type
-
-    request.setHeader(b"Content-Type", content_type.encode("UTF-8"))
-    if upload_name:
-        # RFC6266 section 4.1 [1] defines both `filename` and `filename*`.
-        #
-        # `filename` is defined to be a `value`, which is defined by RFC2616
-        # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token`
-        # is (essentially) a single US-ASCII word, and a `quoted-string` is a
-        # US-ASCII string surrounded by double-quotes, using backslash as an
-        # escape character. Note that %-encoding is *not* permitted.
-        #
-        # `filename*` is defined to be an `ext-value`, which is defined in
-        # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`,
-        # where `value-chars` is essentially a %-encoded string in the given charset.
-        #
-        # [1]: https://tools.ietf.org/html/rfc6266#section-4.1
-        # [2]: https://tools.ietf.org/html/rfc2616#section-3.6
-        # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1
-
-        # We avoid the quoted-string version of `filename`, because (a) synapse didn't
-        # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we
-        # may as well just do the filename* version.
-        if _can_encode_filename_as_token(upload_name):
-            disposition = "inline; filename=%s" % (upload_name,)
-        else:
-            disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)
-
-        request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
-
-    # cache for at least a day.
-    # XXX: we might want to turn this off for data we don't want to
-    # recommend caching as it's sensitive or private - or at least
-    # select private. don't bother setting Expires as all our
-    # clients are smart enough to be happy with Cache-Control
-    request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
-    if file_size is not None:
-        request.setHeader(b"Content-Length", b"%d" % (file_size,))
-
-    # Tell web crawlers to not index, archive, or follow links in media. This
-    # should help to prevent things in the media repo from showing up in web
-    # search results.
-    request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
-
-
-# separators as defined in RFC2616. SP and HT are handled separately.
-# see _can_encode_filename_as_token.
-_FILENAME_SEPARATOR_CHARS = {
-    "(",
-    ")",
-    "<",
-    ">",
-    "@",
-    ",",
-    ";",
-    ":",
-    "\\",
-    '"',
-    "/",
-    "[",
-    "]",
-    "?",
-    "=",
-    "{",
-    "}",
-}
-
-
-def _can_encode_filename_as_token(x: str) -> bool:
-    for c in x:
-        # from RFC2616:
-        #
-        #        token          = 1*<any CHAR except CTLs or separators>
-        #
-        #        separators     = "(" | ")" | "<" | ">" | "@"
-        #                       | "," | ";" | ":" | "\" | <">
-        #                       | "/" | "[" | "]" | "?" | "="
-        #                       | "{" | "}" | SP | HT
-        #
-        #        CHAR           = <any US-ASCII character (octets 0 - 127)>
-        #
-        #        CTL            = <any US-ASCII control character
-        #                         (octets 0 - 31) and DEL (127)>
-        #
-        if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS:
-            return False
-    return True
-
-
-async def respond_with_responder(
-    request: SynapseRequest,
-    responder: "Optional[Responder]",
-    media_type: str,
-    file_size: Optional[int],
-    upload_name: Optional[str] = None,
-) -> None:
-    """Responds to the request with given responder. If responder is None then
-    returns 404.
-
-    Args:
-        request
-        responder
-        media_type: The media/content type.
-        file_size: Size in bytes of the media. If not known it should be None
-        upload_name: The name of the requested file, if any.
-    """
-    if not responder:
-        respond_404(request)
-        return
-
-    # If we have a responder we *must* use it as a context manager.
-    with responder:
-        if request._disconnected:
-            logger.warning(
-                "Not sending response to request %s, already disconnected.", request
-            )
-            return
-
-        logger.debug("Responding to media request with responder %s", responder)
-        add_file_headers(request, media_type, file_size, upload_name)
-        try:
-            await responder.write_to_consumer(request)
-        except Exception as e:
-            # The majority of the time this will be due to the client having gone
-            # away. Unfortunately, Twisted simply throws a generic exception at us
-            # in that case.
-            logger.warning("Failed to write to consumer: %s %s", type(e), e)
-
-            # Unregister the producer, if it has one, so Twisted doesn't complain
-            if request.producer:
-                request.unregisterProducer()
-
-    finish_request(request)
-
-
-class Responder(ABC):
-    """Represents a response that can be streamed to the requester.
-
-    Responder is a context manager which *must* be used, so that any resources
-    held can be cleaned up.
-    """
-
-    @abstractmethod
-    def write_to_consumer(self, consumer: IConsumer) -> Awaitable:
-        """Stream response into consumer
-
-        Args:
-            consumer: The consumer to stream into.
-
-        Returns:
-            Resolves once the response has finished being written
-        """
-        raise NotImplementedError()
-
-    def __enter__(self) -> None:  # noqa: B027
-        pass
-
-    def __exit__(  # noqa: B027
-        self,
-        exc_type: Optional[Type[BaseException]],
-        exc_val: Optional[BaseException],
-        exc_tb: Optional[TracebackType],
-    ) -> None:
-        pass
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class ThumbnailInfo:
-    """Details about a generated thumbnail."""
-
-    width: int
-    height: int
-    method: str
-    # Content type of thumbnail, e.g. image/png
-    type: str
-    # The size of the media file, in bytes.
-    length: Optional[int] = None
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class FileInfo:
-    """Details about a requested/uploaded file."""
-
-    # The server name where the media originated from, or None if local.
-    server_name: Optional[str]
-    # The local ID of the file. For local files this is the same as the media_id
-    file_id: str
-    # If the file is for the url preview cache
-    url_cache: bool = False
-    # Whether the file is a thumbnail or not.
-    thumbnail: Optional[ThumbnailInfo] = None
-
-    # The below properties exist to maintain compatibility with third-party modules.
-    @property
-    def thumbnail_width(self) -> Optional[int]:
-        if not self.thumbnail:
-            return None
-        return self.thumbnail.width
-
-    @property
-    def thumbnail_height(self) -> Optional[int]:
-        if not self.thumbnail:
-            return None
-        return self.thumbnail.height
-
-    @property
-    def thumbnail_method(self) -> Optional[str]:
-        if not self.thumbnail:
-            return None
-        return self.thumbnail.method
-
-    @property
-    def thumbnail_type(self) -> Optional[str]:
-        if not self.thumbnail:
-            return None
-        return self.thumbnail.type
-
-    @property
-    def thumbnail_length(self) -> Optional[int]:
-        if not self.thumbnail:
-            return None
-        return self.thumbnail.length
-
-
-def get_filename_from_headers(headers: Dict[bytes, List[bytes]]) -> Optional[str]:
-    """
-    Get the filename of the downloaded file by inspecting the
-    Content-Disposition HTTP header.
-
-    Args:
-        headers: The HTTP request headers.
-
-    Returns:
-        The filename, or None.
-    """
-    content_disposition = headers.get(b"Content-Disposition", [b""])
-
-    # No header, bail out.
-    if not content_disposition[0]:
-        return None
-
-    _, params = _parse_header(content_disposition[0])
-
-    upload_name = None
-
-    # First check if there is a valid UTF-8 filename
-    upload_name_utf8 = params.get(b"filename*", None)
-    if upload_name_utf8:
-        if upload_name_utf8.lower().startswith(b"utf-8''"):
-            upload_name_utf8 = upload_name_utf8[7:]
-            # We have a filename*= section. This MUST be ASCII, and any UTF-8
-            # bytes are %-quoted.
-            try:
-                # Once it is decoded, we can then unquote the %-encoded
-                # parts strictly into a unicode string.
-                upload_name = urllib.parse.unquote(
-                    upload_name_utf8.decode("ascii"), errors="strict"
-                )
-            except UnicodeDecodeError:
-                # Incorrect UTF-8.
-                pass
-
-    # If there isn't check for an ascii name.
-    if not upload_name:
-        upload_name_ascii = params.get(b"filename", None)
-        if upload_name_ascii and is_ascii(upload_name_ascii):
-            upload_name = upload_name_ascii.decode("ascii")
-
-    # This may be None here, indicating we did not find a matching name.
-    return upload_name
-
-
-def _parse_header(line: bytes) -> Tuple[bytes, Dict[bytes, bytes]]:
-    """Parse a Content-type like header.
-
-    Cargo-culted from `cgi`, but works on bytes rather than strings.
-
-    Args:
-        line: header to be parsed
-
-    Returns:
-        The main content-type, followed by the parameter dictionary
-    """
-    parts = _parseparam(b";" + line)
-    key = next(parts)
-    pdict = {}
-    for p in parts:
-        i = p.find(b"=")
-        if i >= 0:
-            name = p[:i].strip().lower()
-            value = p[i + 1 :].strip()
-
-            # strip double-quotes
-            if len(value) >= 2 and value[0:1] == value[-1:] == b'"':
-                value = value[1:-1]
-                value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
-            pdict[name] = value
-
-    return key, pdict
-
-
-def _parseparam(s: bytes) -> Generator[bytes, None, None]:
-    """Generator which splits the input on ;, respecting double-quoted sequences
-
-    Cargo-culted from `cgi`, but works on bytes rather than strings.
-
-    Args:
-        s: header to be parsed
-
-    Returns:
-        The split input
-    """
-    while s[:1] == b";":
-        s = s[1:]
-
-        # look for the next ;
-        end = s.find(b";")
-
-        # if there is an odd number of " marks between here and the next ;, skip to the
-        # next ; instead
-        while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
-            end = s.find(b";", end + 1)
-
-        if end < 0:
-            end = len(s)
-        f = s[:end]
-        yield f.strip()
-        s = s[end:]
+# This exists purely for backwards compatibility with media providers and spam checkers.
+from synapse.media._base import FileInfo, Responder  # noqa: F401
diff --git a/synapse/rest/media/v1/config_resource.py b/synapse/rest/media/v1/config_resource.py
deleted file mode 100644
index a95804d327..0000000000
--- a/synapse/rest/media/v1/config_resource.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2018 Will Hunt <will@half-shot.uk>
-# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from typing import TYPE_CHECKING
-
-from synapse.http.server import DirectServeJsonResource, respond_with_json
-from synapse.http.site import SynapseRequest
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-
-class MediaConfigResource(DirectServeJsonResource):
-    isLeaf = True
-
-    def __init__(self, hs: "HomeServer"):
-        super().__init__()
-        config = hs.config
-        self.clock = hs.get_clock()
-        self.auth = hs.get_auth()
-        self.limits_dict = {"m.upload.size": config.media.max_upload_size}
-
-    async def _async_render_GET(self, request: SynapseRequest) -> None:
-        await self.auth.get_user_by_req(request)
-        respond_with_json(request, 200, self.limits_dict, send_cors=True)
-
-    async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
-        respond_with_json(request, 200, {}, send_cors=True)
diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py
deleted file mode 100644
index 048a042692..0000000000
--- a/synapse/rest/media/v1/download_resource.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from typing import TYPE_CHECKING
-
-from synapse.http.server import (
-    DirectServeJsonResource,
-    set_corp_headers,
-    set_cors_headers,
-)
-from synapse.http.servlet import parse_boolean
-from synapse.http.site import SynapseRequest
-
-from ._base import parse_media_id, respond_404
-
-if TYPE_CHECKING:
-    from synapse.rest.media.v1.media_repository import MediaRepository
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-class DownloadResource(DirectServeJsonResource):
-    isLeaf = True
-
-    def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"):
-        super().__init__()
-        self.media_repo = media_repo
-        self.server_name = hs.hostname
-
-    async def _async_render_GET(self, request: SynapseRequest) -> None:
-        set_cors_headers(request)
-        set_corp_headers(request)
-        request.setHeader(
-            b"Content-Security-Policy",
-            b"sandbox;"
-            b" default-src 'none';"
-            b" script-src 'none';"
-            b" plugin-types application/pdf;"
-            b" style-src 'unsafe-inline';"
-            b" media-src 'self';"
-            b" object-src 'self';",
-        )
-        # Limited non-standard form of CSP for IE11
-        request.setHeader(b"X-Content-Security-Policy", b"sandbox;")
-        request.setHeader(
-            b"Referrer-Policy",
-            b"no-referrer",
-        )
-        server_name, media_id, name = parse_media_id(request)
-        if server_name == self.server_name:
-            await self.media_repo.get_local_media(request, media_id, name)
-        else:
-            allow_remote = parse_boolean(request, "allow_remote", default=True)
-            if not allow_remote:
-                logger.info(
-                    "Rejecting request for remote media %s/%s due to allow_remote",
-                    server_name,
-                    media_id,
-                )
-                respond_404(request)
-                return
-
-            await self.media_repo.get_remote_media(request, server_name, media_id, name)
diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py
deleted file mode 100644
index 1f6441c412..0000000000
--- a/synapse/rest/media/v1/filepath.py
+++ /dev/null
@@ -1,410 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import functools
-import os
-import re
-import string
-from typing import Any, Callable, List, TypeVar, Union, cast
-
-NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d")
-
-
-F = TypeVar("F", bound=Callable[..., str])
-
-
-def _wrap_in_base_path(func: F) -> F:
-    """Takes a function that returns a relative path and turns it into an
-    absolute path based on the location of the primary media store
-    """
-
-    @functools.wraps(func)
-    def _wrapped(self: "MediaFilePaths", *args: Any, **kwargs: Any) -> str:
-        path = func(self, *args, **kwargs)
-        return os.path.join(self.base_path, path)
-
-    return cast(F, _wrapped)
-
-
-GetPathMethod = TypeVar(
-    "GetPathMethod", bound=Union[Callable[..., str], Callable[..., List[str]]]
-)
-
-
-def _wrap_with_jail_check(relative: bool) -> Callable[[GetPathMethod], GetPathMethod]:
-    """Wraps a path-returning method to check that the returned path(s) do not escape
-    the media store directory.
-
-    The path-returning method may return either a single path, or a list of paths.
-
-    The check is not expected to ever fail, unless `func` is missing a call to
-    `_validate_path_component`, or `_validate_path_component` is buggy.
-
-    Args:
-        relative: A boolean indicating whether the wrapped method returns paths relative
-            to the media store directory.
-
-    Returns:
-        A method which will wrap a path-returning method, adding a check to ensure that
-        the returned path(s) lie within the media store directory. The check will raise
-        a `ValueError` if it fails.
-    """
-
-    def _wrap_with_jail_check_inner(func: GetPathMethod) -> GetPathMethod:
-        @functools.wraps(func)
-        def _wrapped(
-            self: "MediaFilePaths", *args: Any, **kwargs: Any
-        ) -> Union[str, List[str]]:
-            path_or_paths = func(self, *args, **kwargs)
-
-            if isinstance(path_or_paths, list):
-                paths_to_check = path_or_paths
-            else:
-                paths_to_check = [path_or_paths]
-
-            for path in paths_to_check:
-                # Construct the path that will ultimately be used.
-                # We cannot guess whether `path` is relative to the media store
-                # directory, since the media store directory may itself be a relative
-                # path.
-                if relative:
-                    path = os.path.join(self.base_path, path)
-                normalized_path = os.path.normpath(path)
-
-                # Now that `normpath` has eliminated `../`s and `./`s from the path,
-                # `os.path.commonpath` can be used to check whether it lies within the
-                # media store directory.
-                if (
-                    os.path.commonpath([normalized_path, self.normalized_base_path])
-                    != self.normalized_base_path
-                ):
-                    # The path resolves to outside the media store directory,
-                    # or `self.base_path` is `.`, which is an unlikely configuration.
-                    raise ValueError(f"Invalid media store path: {path!r}")
-
-                # Note that `os.path.normpath`/`abspath` has a subtle caveat:
-                # `a/b/c/../c` will normalize to `a/b/c`, but the former refers to a
-                # different path if `a/b/c` is a symlink. That is, the check above is
-                # not perfect and may allow a certain restricted subset of untrustworthy
-                # paths through. Since the check above is secondary to the main
-                # `_validate_path_component` checks, it's less important for it to be
-                # perfect.
-                #
-                # As an alternative, `os.path.realpath` will resolve symlinks, but
-                # proves problematic if there are symlinks inside the media store.
-                # eg. if `url_store/` is symlinked to elsewhere, its canonical path
-                # won't match that of the main media store directory.
-
-            return path_or_paths
-
-        return cast(GetPathMethod, _wrapped)
-
-    return _wrap_with_jail_check_inner
-
-
-ALLOWED_CHARACTERS = set(
-    string.ascii_letters
-    + string.digits
-    + "_-"
-    + ".[]:"  # Domain names, IPv6 addresses and ports in server names
-)
-FORBIDDEN_NAMES = {
-    "",
-    os.path.curdir,  # "." for the current platform
-    os.path.pardir,  # ".." for the current platform
-}
-
-
-def _validate_path_component(name: str) -> str:
-    """Checks that the given string can be safely used as a path component
-
-    Args:
-        name: The path component to check.
-
-    Returns:
-        The path component if valid.
-
-    Raises:
-        ValueError: If `name` cannot be safely used as a path component.
-    """
-    if not ALLOWED_CHARACTERS.issuperset(name) or name in FORBIDDEN_NAMES:
-        raise ValueError(f"Invalid path component: {name!r}")
-
-    return name
-
-
-class MediaFilePaths:
-    """Describes where files are stored on disk.
-
-    Most of the functions have a `*_rel` variant which returns a file path that
-    is relative to the base media store path. This is mainly used when we want
-    to write to the backup media store (when one is configured)
-    """
-
-    def __init__(self, primary_base_path: str):
-        self.base_path = primary_base_path
-        self.normalized_base_path = os.path.normpath(self.base_path)
-
-        # Refuse to initialize if paths cannot be validated correctly for the current
-        # platform.
-        assert os.path.sep not in ALLOWED_CHARACTERS
-        assert os.path.altsep not in ALLOWED_CHARACTERS
-        # On Windows, paths have all sorts of weirdness which `_validate_path_component`
-        # does not consider. In any case, the remote media store can't work correctly
-        # for certain homeservers there, since ":"s aren't allowed in paths.
-        assert os.name == "posix"
-
-    @_wrap_with_jail_check(relative=True)
-    def local_media_filepath_rel(self, media_id: str) -> str:
-        return os.path.join(
-            "local_content",
-            _validate_path_component(media_id[0:2]),
-            _validate_path_component(media_id[2:4]),
-            _validate_path_component(media_id[4:]),
-        )
-
-    local_media_filepath = _wrap_in_base_path(local_media_filepath_rel)
-
-    @_wrap_with_jail_check(relative=True)
-    def local_media_thumbnail_rel(
-        self, media_id: str, width: int, height: int, content_type: str, method: str
-    ) -> str:
-        top_level_type, sub_type = content_type.split("/")
-        file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
-        return os.path.join(
-            "local_thumbnails",
-            _validate_path_component(media_id[0:2]),
-            _validate_path_component(media_id[2:4]),
-            _validate_path_component(media_id[4:]),
-            _validate_path_component(file_name),
-        )
-
-    local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)
-
-    @_wrap_with_jail_check(relative=False)
-    def local_media_thumbnail_dir(self, media_id: str) -> str:
-        """
-        Retrieve the local store path of thumbnails of a given media_id
-
-        Args:
-            media_id: The media ID to query.
-        Returns:
-            Path of local_thumbnails from media_id
-        """
-        return os.path.join(
-            self.base_path,
-            "local_thumbnails",
-            _validate_path_component(media_id[0:2]),
-            _validate_path_component(media_id[2:4]),
-            _validate_path_component(media_id[4:]),
-        )
-
-    @_wrap_with_jail_check(relative=True)
-    def remote_media_filepath_rel(self, server_name: str, file_id: str) -> str:
-        return os.path.join(
-            "remote_content",
-            _validate_path_component(server_name),
-            _validate_path_component(file_id[0:2]),
-            _validate_path_component(file_id[2:4]),
-            _validate_path_component(file_id[4:]),
-        )
-
-    remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel)
-
-    @_wrap_with_jail_check(relative=True)
-    def remote_media_thumbnail_rel(
-        self,
-        server_name: str,
-        file_id: str,
-        width: int,
-        height: int,
-        content_type: str,
-        method: str,
-    ) -> str:
-        top_level_type, sub_type = content_type.split("/")
-        file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
-        return os.path.join(
-            "remote_thumbnail",
-            _validate_path_component(server_name),
-            _validate_path_component(file_id[0:2]),
-            _validate_path_component(file_id[2:4]),
-            _validate_path_component(file_id[4:]),
-            _validate_path_component(file_name),
-        )
-
-    remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel)
-
-    # Legacy path that was used to store thumbnails previously.
-    # Should be removed after some time, when most of the thumbnails are stored
-    # using the new path.
-    @_wrap_with_jail_check(relative=True)
-    def remote_media_thumbnail_rel_legacy(
-        self, server_name: str, file_id: str, width: int, height: int, content_type: str
-    ) -> str:
-        top_level_type, sub_type = content_type.split("/")
-        file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type)
-        return os.path.join(
-            "remote_thumbnail",
-            _validate_path_component(server_name),
-            _validate_path_component(file_id[0:2]),
-            _validate_path_component(file_id[2:4]),
-            _validate_path_component(file_id[4:]),
-            _validate_path_component(file_name),
-        )
-
-    @_wrap_with_jail_check(relative=False)
-    def remote_media_thumbnail_dir(self, server_name: str, file_id: str) -> str:
-        return os.path.join(
-            self.base_path,
-            "remote_thumbnail",
-            _validate_path_component(server_name),
-            _validate_path_component(file_id[0:2]),
-            _validate_path_component(file_id[2:4]),
-            _validate_path_component(file_id[4:]),
-        )
-
-    @_wrap_with_jail_check(relative=True)
-    def url_cache_filepath_rel(self, media_id: str) -> str:
-        if NEW_FORMAT_ID_RE.match(media_id):
-            # Media id is of the form <DATE><RANDOM_STRING>
-            # E.g.: 2017-09-28-fsdRDt24DS234dsf
-            return os.path.join(
-                "url_cache",
-                _validate_path_component(media_id[:10]),
-                _validate_path_component(media_id[11:]),
-            )
-        else:
-            return os.path.join(
-                "url_cache",
-                _validate_path_component(media_id[0:2]),
-                _validate_path_component(media_id[2:4]),
-                _validate_path_component(media_id[4:]),
-            )
-
-    url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel)
-
-    @_wrap_with_jail_check(relative=False)
-    def url_cache_filepath_dirs_to_delete(self, media_id: str) -> List[str]:
-        "The dirs to try and remove if we delete the media_id file"
-        if NEW_FORMAT_ID_RE.match(media_id):
-            return [
-                os.path.join(
-                    self.base_path, "url_cache", _validate_path_component(media_id[:10])
-                )
-            ]
-        else:
-            return [
-                os.path.join(
-                    self.base_path,
-                    "url_cache",
-                    _validate_path_component(media_id[0:2]),
-                    _validate_path_component(media_id[2:4]),
-                ),
-                os.path.join(
-                    self.base_path, "url_cache", _validate_path_component(media_id[0:2])
-                ),
-            ]
-
-    @_wrap_with_jail_check(relative=True)
-    def url_cache_thumbnail_rel(
-        self, media_id: str, width: int, height: int, content_type: str, method: str
-    ) -> str:
-        # Media id is of the form <DATE><RANDOM_STRING>
-        # E.g.: 2017-09-28-fsdRDt24DS234dsf
-
-        top_level_type, sub_type = content_type.split("/")
-        file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method)
-
-        if NEW_FORMAT_ID_RE.match(media_id):
-            return os.path.join(
-                "url_cache_thumbnails",
-                _validate_path_component(media_id[:10]),
-                _validate_path_component(media_id[11:]),
-                _validate_path_component(file_name),
-            )
-        else:
-            return os.path.join(
-                "url_cache_thumbnails",
-                _validate_path_component(media_id[0:2]),
-                _validate_path_component(media_id[2:4]),
-                _validate_path_component(media_id[4:]),
-                _validate_path_component(file_name),
-            )
-
-    url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
-
-    @_wrap_with_jail_check(relative=True)
-    def url_cache_thumbnail_directory_rel(self, media_id: str) -> str:
-        # Media id is of the form <DATE><RANDOM_STRING>
-        # E.g.: 2017-09-28-fsdRDt24DS234dsf
-
-        if NEW_FORMAT_ID_RE.match(media_id):
-            return os.path.join(
-                "url_cache_thumbnails",
-                _validate_path_component(media_id[:10]),
-                _validate_path_component(media_id[11:]),
-            )
-        else:
-            return os.path.join(
-                "url_cache_thumbnails",
-                _validate_path_component(media_id[0:2]),
-                _validate_path_component(media_id[2:4]),
-                _validate_path_component(media_id[4:]),
-            )
-
-    url_cache_thumbnail_directory = _wrap_in_base_path(
-        url_cache_thumbnail_directory_rel
-    )
-
-    @_wrap_with_jail_check(relative=False)
-    def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
-        "The dirs to try and remove if we delete the media_id thumbnails"
-        # Media id is of the form <DATE><RANDOM_STRING>
-        # E.g.: 2017-09-28-fsdRDt24DS234dsf
-        if NEW_FORMAT_ID_RE.match(media_id):
-            return [
-                os.path.join(
-                    self.base_path,
-                    "url_cache_thumbnails",
-                    _validate_path_component(media_id[:10]),
-                    _validate_path_component(media_id[11:]),
-                ),
-                os.path.join(
-                    self.base_path,
-                    "url_cache_thumbnails",
-                    _validate_path_component(media_id[:10]),
-                ),
-            ]
-        else:
-            return [
-                os.path.join(
-                    self.base_path,
-                    "url_cache_thumbnails",
-                    _validate_path_component(media_id[0:2]),
-                    _validate_path_component(media_id[2:4]),
-                    _validate_path_component(media_id[4:]),
-                ),
-                os.path.join(
-                    self.base_path,
-                    "url_cache_thumbnails",
-                    _validate_path_component(media_id[0:2]),
-                    _validate_path_component(media_id[2:4]),
-                ),
-                os.path.join(
-                    self.base_path,
-                    "url_cache_thumbnails",
-                    _validate_path_component(media_id[0:2]),
-                ),
-            ]
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
deleted file mode 100644
index c70e1837af..0000000000
--- a/synapse/rest/media/v1/media_repository.py
+++ /dev/null
@@ -1,1112 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import errno
-import logging
-import os
-import shutil
-from io import BytesIO
-from typing import IO, TYPE_CHECKING, Dict, List, Optional, Set, Tuple
-
-from matrix_common.types.mxc_uri import MXCUri
-
-import twisted.internet.error
-import twisted.web.http
-from twisted.internet.defer import Deferred
-
-from synapse.api.errors import (
-    FederationDeniedError,
-    HttpResponseException,
-    NotFoundError,
-    RequestSendFailed,
-    SynapseError,
-)
-from synapse.config._base import ConfigError
-from synapse.config.repository import ThumbnailRequirement
-from synapse.http.server import UnrecognizedRequestResource
-from synapse.http.site import SynapseRequest
-from synapse.logging.context import defer_to_thread
-from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.types import UserID
-from synapse.util.async_helpers import Linearizer
-from synapse.util.retryutils import NotRetryingDestination
-from synapse.util.stringutils import random_string
-
-from ._base import (
-    FileInfo,
-    Responder,
-    ThumbnailInfo,
-    get_filename_from_headers,
-    respond_404,
-    respond_with_responder,
-)
-from .config_resource import MediaConfigResource
-from .download_resource import DownloadResource
-from .filepath import MediaFilePaths
-from .media_storage import MediaStorage
-from .preview_url_resource import PreviewUrlResource
-from .storage_provider import StorageProviderWrapper
-from .thumbnail_resource import ThumbnailResource
-from .thumbnailer import Thumbnailer, ThumbnailError
-from .upload_resource import UploadResource
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-# How often to run the background job to update the "recently accessed"
-# attribute of local and remote media.
-UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000  # 1 minute
-# How often to run the background job to check for local and remote media
-# that should be purged according to the configured media retention settings.
-MEDIA_RETENTION_CHECK_PERIOD_MS = 60 * 60 * 1000  # 1 hour
-
-
-class MediaRepository:
-    def __init__(self, hs: "HomeServer"):
-        self.hs = hs
-        self.auth = hs.get_auth()
-        self.client = hs.get_federation_http_client()
-        self.clock = hs.get_clock()
-        self.server_name = hs.hostname
-        self.store = hs.get_datastores().main
-        self.max_upload_size = hs.config.media.max_upload_size
-        self.max_image_pixels = hs.config.media.max_image_pixels
-
-        Thumbnailer.set_limits(self.max_image_pixels)
-
-        self.primary_base_path: str = hs.config.media.media_store_path
-        self.filepaths: MediaFilePaths = MediaFilePaths(self.primary_base_path)
-
-        self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
-        self.thumbnail_requirements = hs.config.media.thumbnail_requirements
-
-        self.remote_media_linearizer = Linearizer(name="media_remote")
-
-        self.recently_accessed_remotes: Set[Tuple[str, str]] = set()
-        self.recently_accessed_locals: Set[str] = set()
-
-        self.federation_domain_whitelist = (
-            hs.config.federation.federation_domain_whitelist
-        )
-
-        # List of StorageProviders where we should search for media and
-        # potentially upload to.
-        storage_providers = []
-
-        for (
-            clz,
-            provider_config,
-            wrapper_config,
-        ) in hs.config.media.media_storage_providers:
-            backend = clz(hs, provider_config)
-            provider = StorageProviderWrapper(
-                backend,
-                store_local=wrapper_config.store_local,
-                store_remote=wrapper_config.store_remote,
-                store_synchronous=wrapper_config.store_synchronous,
-            )
-            storage_providers.append(provider)
-
-        self.media_storage = MediaStorage(
-            self.hs, self.primary_base_path, self.filepaths, storage_providers
-        )
-
-        self.clock.looping_call(
-            self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS
-        )
-
-        # Media retention configuration options
-        self._media_retention_local_media_lifetime_ms = (
-            hs.config.media.media_retention_local_media_lifetime_ms
-        )
-        self._media_retention_remote_media_lifetime_ms = (
-            hs.config.media.media_retention_remote_media_lifetime_ms
-        )
-
-        # Check whether local or remote media retention is configured
-        if (
-            hs.config.media.media_retention_local_media_lifetime_ms is not None
-            or hs.config.media.media_retention_remote_media_lifetime_ms is not None
-        ):
-            # Run the background job to apply media retention rules routinely,
-            # with the duration between runs dictated by the homeserver config.
-            self.clock.looping_call(
-                self._start_apply_media_retention_rules,
-                MEDIA_RETENTION_CHECK_PERIOD_MS,
-            )
-
-    def _start_update_recently_accessed(self) -> Deferred:
-        return run_as_background_process(
-            "update_recently_accessed_media", self._update_recently_accessed
-        )
-
-    def _start_apply_media_retention_rules(self) -> Deferred:
-        return run_as_background_process(
-            "apply_media_retention_rules", self._apply_media_retention_rules
-        )
-
-    async def _update_recently_accessed(self) -> None:
-        remote_media = self.recently_accessed_remotes
-        self.recently_accessed_remotes = set()
-
-        local_media = self.recently_accessed_locals
-        self.recently_accessed_locals = set()
-
-        await self.store.update_cached_last_access_time(
-            local_media, remote_media, self.clock.time_msec()
-        )
-
-    def mark_recently_accessed(self, server_name: Optional[str], media_id: str) -> None:
-        """Mark the given media as recently accessed.
-
-        Args:
-            server_name: Origin server of media, or None if local
-            media_id: The media ID of the content
-        """
-        if server_name:
-            self.recently_accessed_remotes.add((server_name, media_id))
-        else:
-            self.recently_accessed_locals.add(media_id)
-
-    async def create_content(
-        self,
-        media_type: str,
-        upload_name: Optional[str],
-        content: IO,
-        content_length: int,
-        auth_user: UserID,
-    ) -> MXCUri:
-        """Store uploaded content for a local user and return the mxc URL
-
-        Args:
-            media_type: The content type of the file.
-            upload_name: The name of the file, if provided.
-            content: A file like object that is the content to store
-            content_length: The length of the content
-            auth_user: The user_id of the uploader
-
-        Returns:
-            The mxc url of the stored content
-        """
-
-        media_id = random_string(24)
-
-        file_info = FileInfo(server_name=None, file_id=media_id)
-
-        fname = await self.media_storage.store_file(content, file_info)
-
-        logger.info("Stored local media in file %r", fname)
-
-        await self.store.store_local_media(
-            media_id=media_id,
-            media_type=media_type,
-            time_now_ms=self.clock.time_msec(),
-            upload_name=upload_name,
-            media_length=content_length,
-            user_id=auth_user,
-        )
-
-        await self._generate_thumbnails(None, media_id, media_id, media_type)
-
-        return MXCUri(self.server_name, media_id)
-
-    async def get_local_media(
-        self, request: SynapseRequest, media_id: str, name: Optional[str]
-    ) -> None:
-        """Responds to requests for local media, if exists, or returns 404.
-
-        Args:
-            request: The incoming request.
-            media_id: The media ID of the content. (This is the same as
-                the file_id for local content.)
-            name: Optional name that, if specified, will be used as
-                the filename in the Content-Disposition header of the response.
-
-        Returns:
-            Resolves once a response has successfully been written to request
-        """
-        media_info = await self.store.get_local_media(media_id)
-        if not media_info or media_info["quarantined_by"]:
-            respond_404(request)
-            return
-
-        self.mark_recently_accessed(None, media_id)
-
-        media_type = media_info["media_type"]
-        if not media_type:
-            media_type = "application/octet-stream"
-        media_length = media_info["media_length"]
-        upload_name = name if name else media_info["upload_name"]
-        url_cache = media_info["url_cache"]
-
-        file_info = FileInfo(None, media_id, url_cache=bool(url_cache))
-
-        responder = await self.media_storage.fetch_media(file_info)
-        await respond_with_responder(
-            request, responder, media_type, media_length, upload_name
-        )
-
-    async def get_remote_media(
-        self,
-        request: SynapseRequest,
-        server_name: str,
-        media_id: str,
-        name: Optional[str],
-    ) -> None:
-        """Respond to requests for remote media.
-
-        Args:
-            request: The incoming request.
-            server_name: Remote server_name where the media originated.
-            media_id: The media ID of the content (as defined by the remote server).
-            name: Optional name that, if specified, will be used as
-                the filename in the Content-Disposition header of the response.
-
-        Returns:
-            Resolves once a response has successfully been written to request
-        """
-        if (
-            self.federation_domain_whitelist is not None
-            and server_name not in self.federation_domain_whitelist
-        ):
-            raise FederationDeniedError(server_name)
-
-        self.mark_recently_accessed(server_name, media_id)
-
-        # We linearize here to ensure that we don't try and download remote
-        # media multiple times concurrently
-        key = (server_name, media_id)
-        async with self.remote_media_linearizer.queue(key):
-            responder, media_info = await self._get_remote_media_impl(
-                server_name, media_id
-            )
-
-        # We deliberately stream the file outside the lock
-        if responder:
-            media_type = media_info["media_type"]
-            media_length = media_info["media_length"]
-            upload_name = name if name else media_info["upload_name"]
-            await respond_with_responder(
-                request, responder, media_type, media_length, upload_name
-            )
-        else:
-            respond_404(request)
-
-    async def get_remote_media_info(self, server_name: str, media_id: str) -> dict:
-        """Gets the media info associated with the remote file, downloading
-        if necessary.
-
-        Args:
-            server_name: Remote server_name where the media originated.
-            media_id: The media ID of the content (as defined by the remote server).
-
-        Returns:
-            The media info of the file
-        """
-        if (
-            self.federation_domain_whitelist is not None
-            and server_name not in self.federation_domain_whitelist
-        ):
-            raise FederationDeniedError(server_name)
-
-        # We linearize here to ensure that we don't try and download remote
-        # media multiple times concurrently
-        key = (server_name, media_id)
-        async with self.remote_media_linearizer.queue(key):
-            responder, media_info = await self._get_remote_media_impl(
-                server_name, media_id
-            )
-
-        # Ensure we actually use the responder so that it releases resources
-        if responder:
-            with responder:
-                pass
-
-        return media_info
-
-    async def _get_remote_media_impl(
-        self, server_name: str, media_id: str
-    ) -> Tuple[Optional[Responder], dict]:
-        """Looks for media in local cache, if not there then attempt to
-        download from remote server.
-
-        Args:
-            server_name: Remote server_name where the media originated.
-            media_id: The media ID of the content (as defined by the
-                remote server).
-
-        Returns:
-            A tuple of responder and the media info of the file.
-        """
-        media_info = await self.store.get_cached_remote_media(server_name, media_id)
-
-        # file_id is the ID we use to track the file locally. If we've already
-        # seen the file then reuse the existing ID, otherwise generate a new
-        # one.
-
-        # If we have an entry in the DB, try and look for it
-        if media_info:
-            file_id = media_info["filesystem_id"]
-            file_info = FileInfo(server_name, file_id)
-
-            if media_info["quarantined_by"]:
-                logger.info("Media is quarantined")
-                raise NotFoundError()
-
-            if not media_info["media_type"]:
-                media_info["media_type"] = "application/octet-stream"
-
-            responder = await self.media_storage.fetch_media(file_info)
-            if responder:
-                return responder, media_info
-
-        # Failed to find the file anywhere, lets download it.
-
-        try:
-            media_info = await self._download_remote_file(
-                server_name,
-                media_id,
-            )
-        except SynapseError:
-            raise
-        except Exception as e:
-            # An exception may be because we downloaded media in another
-            # process, so let's check if we magically have the media.
-            media_info = await self.store.get_cached_remote_media(server_name, media_id)
-            if not media_info:
-                raise e
-
-        file_id = media_info["filesystem_id"]
-        if not media_info["media_type"]:
-            media_info["media_type"] = "application/octet-stream"
-        file_info = FileInfo(server_name, file_id)
-
-        # We generate thumbnails even if another process downloaded the media
-        # as a) it's conceivable that the other download request dies before it
-        # generates thumbnails, but mainly b) we want to be sure the thumbnails
-        # have finished being generated before responding to the client,
-        # otherwise they'll request thumbnails and get a 404 if they're not
-        # ready yet.
-        await self._generate_thumbnails(
-            server_name, media_id, file_id, media_info["media_type"]
-        )
-
-        responder = await self.media_storage.fetch_media(file_info)
-        return responder, media_info
-
-    async def _download_remote_file(
-        self,
-        server_name: str,
-        media_id: str,
-    ) -> dict:
-        """Attempt to download the remote file from the given server name,
-        using the given file_id as the local id.
-
-        Args:
-            server_name: Originating server
-            media_id: The media ID of the content (as defined by the
-                remote server). This is different than the file_id, which is
-                locally generated.
-            file_id: Local file ID
-
-        Returns:
-            The media info of the file.
-        """
-
-        file_id = random_string(24)
-
-        file_info = FileInfo(server_name=server_name, file_id=file_id)
-
-        with self.media_storage.store_into_file(file_info) as (f, fname, finish):
-            request_path = "/".join(
-                ("/_matrix/media/r0/download", server_name, media_id)
-            )
-            try:
-                length, headers = await self.client.get_file(
-                    server_name,
-                    request_path,
-                    output_stream=f,
-                    max_size=self.max_upload_size,
-                    args={
-                        # tell the remote server to 404 if it doesn't
-                        # recognise the server_name, to make sure we don't
-                        # end up with a routing loop.
-                        "allow_remote": "false"
-                    },
-                )
-            except RequestSendFailed as e:
-                logger.warning(
-                    "Request failed fetching remote media %s/%s: %r",
-                    server_name,
-                    media_id,
-                    e,
-                )
-                raise SynapseError(502, "Failed to fetch remote media")
-
-            except HttpResponseException as e:
-                logger.warning(
-                    "HTTP error fetching remote media %s/%s: %s",
-                    server_name,
-                    media_id,
-                    e.response,
-                )
-                if e.code == twisted.web.http.NOT_FOUND:
-                    raise e.to_synapse_error()
-                raise SynapseError(502, "Failed to fetch remote media")
-
-            except SynapseError:
-                logger.warning(
-                    "Failed to fetch remote media %s/%s", server_name, media_id
-                )
-                raise
-            except NotRetryingDestination:
-                logger.warning("Not retrying destination %r", server_name)
-                raise SynapseError(502, "Failed to fetch remote media")
-            except Exception:
-                logger.exception(
-                    "Failed to fetch remote media %s/%s", server_name, media_id
-                )
-                raise SynapseError(502, "Failed to fetch remote media")
-
-            await finish()
-
-            if b"Content-Type" in headers:
-                media_type = headers[b"Content-Type"][0].decode("ascii")
-            else:
-                media_type = "application/octet-stream"
-            upload_name = get_filename_from_headers(headers)
-            time_now_ms = self.clock.time_msec()
-
-            # Multiple remote media download requests can race (when using
-            # multiple media repos), so this may throw a violation constraint
-            # exception. If it does we'll delete the newly downloaded file from
-            # disk (as we're in the ctx manager).
-            #
-            # However: we've already called `finish()` so we may have also
-            # written to the storage providers. This is preferable to the
-            # alternative where we call `finish()` *after* this, where we could
-            # end up having an entry in the DB but fail to write the files to
-            # the storage providers.
-            await self.store.store_cached_remote_media(
-                origin=server_name,
-                media_id=media_id,
-                media_type=media_type,
-                time_now_ms=self.clock.time_msec(),
-                upload_name=upload_name,
-                media_length=length,
-                filesystem_id=file_id,
-            )
-
-        logger.info("Stored remote media in file %r", fname)
-
-        media_info = {
-            "media_type": media_type,
-            "media_length": length,
-            "upload_name": upload_name,
-            "created_ts": time_now_ms,
-            "filesystem_id": file_id,
-        }
-
-        return media_info
-
-    def _get_thumbnail_requirements(
-        self, media_type: str
-    ) -> Tuple[ThumbnailRequirement, ...]:
-        scpos = media_type.find(";")
-        if scpos > 0:
-            media_type = media_type[:scpos]
-        return self.thumbnail_requirements.get(media_type, ())
-
-    def _generate_thumbnail(
-        self,
-        thumbnailer: Thumbnailer,
-        t_width: int,
-        t_height: int,
-        t_method: str,
-        t_type: str,
-    ) -> Optional[BytesIO]:
-        m_width = thumbnailer.width
-        m_height = thumbnailer.height
-
-        if m_width * m_height >= self.max_image_pixels:
-            logger.info(
-                "Image too large to thumbnail %r x %r > %r",
-                m_width,
-                m_height,
-                self.max_image_pixels,
-            )
-            return None
-
-        if thumbnailer.transpose_method is not None:
-            m_width, m_height = thumbnailer.transpose()
-
-        if t_method == "crop":
-            return thumbnailer.crop(t_width, t_height, t_type)
-        elif t_method == "scale":
-            t_width, t_height = thumbnailer.aspect(t_width, t_height)
-            t_width = min(m_width, t_width)
-            t_height = min(m_height, t_height)
-            return thumbnailer.scale(t_width, t_height, t_type)
-
-        return None
-
-    async def generate_local_exact_thumbnail(
-        self,
-        media_id: str,
-        t_width: int,
-        t_height: int,
-        t_method: str,
-        t_type: str,
-        url_cache: bool,
-    ) -> Optional[str]:
-        input_path = await self.media_storage.ensure_media_is_in_local_cache(
-            FileInfo(None, media_id, url_cache=url_cache)
-        )
-
-        try:
-            thumbnailer = Thumbnailer(input_path)
-        except ThumbnailError as e:
-            logger.warning(
-                "Unable to generate a thumbnail for local media %s using a method of %s and type of %s: %s",
-                media_id,
-                t_method,
-                t_type,
-                e,
-            )
-            return None
-
-        with thumbnailer:
-            t_byte_source = await defer_to_thread(
-                self.hs.get_reactor(),
-                self._generate_thumbnail,
-                thumbnailer,
-                t_width,
-                t_height,
-                t_method,
-                t_type,
-            )
-
-        if t_byte_source:
-            try:
-                file_info = FileInfo(
-                    server_name=None,
-                    file_id=media_id,
-                    url_cache=url_cache,
-                    thumbnail=ThumbnailInfo(
-                        width=t_width,
-                        height=t_height,
-                        method=t_method,
-                        type=t_type,
-                    ),
-                )
-
-                output_path = await self.media_storage.store_file(
-                    t_byte_source, file_info
-                )
-            finally:
-                t_byte_source.close()
-
-            logger.info("Stored thumbnail in file %r", output_path)
-
-            t_len = os.path.getsize(output_path)
-
-            await self.store.store_local_thumbnail(
-                media_id, t_width, t_height, t_type, t_method, t_len
-            )
-
-            return output_path
-
-        # Could not generate thumbnail.
-        return None
-
-    async def generate_remote_exact_thumbnail(
-        self,
-        server_name: str,
-        file_id: str,
-        media_id: str,
-        t_width: int,
-        t_height: int,
-        t_method: str,
-        t_type: str,
-    ) -> Optional[str]:
-        input_path = await self.media_storage.ensure_media_is_in_local_cache(
-            FileInfo(server_name, file_id)
-        )
-
-        try:
-            thumbnailer = Thumbnailer(input_path)
-        except ThumbnailError as e:
-            logger.warning(
-                "Unable to generate a thumbnail for remote media %s from %s using a method of %s and type of %s: %s",
-                media_id,
-                server_name,
-                t_method,
-                t_type,
-                e,
-            )
-            return None
-
-        with thumbnailer:
-            t_byte_source = await defer_to_thread(
-                self.hs.get_reactor(),
-                self._generate_thumbnail,
-                thumbnailer,
-                t_width,
-                t_height,
-                t_method,
-                t_type,
-            )
-
-        if t_byte_source:
-            try:
-                file_info = FileInfo(
-                    server_name=server_name,
-                    file_id=file_id,
-                    thumbnail=ThumbnailInfo(
-                        width=t_width,
-                        height=t_height,
-                        method=t_method,
-                        type=t_type,
-                    ),
-                )
-
-                output_path = await self.media_storage.store_file(
-                    t_byte_source, file_info
-                )
-            finally:
-                t_byte_source.close()
-
-            logger.info("Stored thumbnail in file %r", output_path)
-
-            t_len = os.path.getsize(output_path)
-
-            await self.store.store_remote_media_thumbnail(
-                server_name,
-                media_id,
-                file_id,
-                t_width,
-                t_height,
-                t_type,
-                t_method,
-                t_len,
-            )
-
-            return output_path
-
-        # Could not generate thumbnail.
-        return None
-
-    async def _generate_thumbnails(
-        self,
-        server_name: Optional[str],
-        media_id: str,
-        file_id: str,
-        media_type: str,
-        url_cache: bool = False,
-    ) -> Optional[dict]:
-        """Generate and store thumbnails for an image.
-
-        Args:
-            server_name: The server name if remote media, else None if local
-            media_id: The media ID of the content. (This is the same as
-                the file_id for local content)
-            file_id: Local file ID
-            media_type: The content type of the file
-            url_cache: If we are thumbnailing images downloaded for the URL cache,
-                used exclusively by the url previewer
-
-        Returns:
-            Dict with "width" and "height" keys of original image or None if the
-            media cannot be thumbnailed.
-        """
-        requirements = self._get_thumbnail_requirements(media_type)
-        if not requirements:
-            return None
-
-        input_path = await self.media_storage.ensure_media_is_in_local_cache(
-            FileInfo(server_name, file_id, url_cache=url_cache)
-        )
-
-        try:
-            thumbnailer = Thumbnailer(input_path)
-        except ThumbnailError as e:
-            logger.warning(
-                "Unable to generate thumbnails for remote media %s from %s of type %s: %s",
-                media_id,
-                server_name,
-                media_type,
-                e,
-            )
-            return None
-
-        with thumbnailer:
-            m_width = thumbnailer.width
-            m_height = thumbnailer.height
-
-            if m_width * m_height >= self.max_image_pixels:
-                logger.info(
-                    "Image too large to thumbnail %r x %r > %r",
-                    m_width,
-                    m_height,
-                    self.max_image_pixels,
-                )
-                return None
-
-            if thumbnailer.transpose_method is not None:
-                m_width, m_height = await defer_to_thread(
-                    self.hs.get_reactor(), thumbnailer.transpose
-                )
-
-            # We deduplicate the thumbnail sizes by ignoring the cropped versions if
-            # they have the same dimensions of a scaled one.
-            thumbnails: Dict[Tuple[int, int, str], str] = {}
-            for requirement in requirements:
-                if requirement.method == "crop":
-                    thumbnails.setdefault(
-                        (requirement.width, requirement.height, requirement.media_type),
-                        requirement.method,
-                    )
-                elif requirement.method == "scale":
-                    t_width, t_height = thumbnailer.aspect(
-                        requirement.width, requirement.height
-                    )
-                    t_width = min(m_width, t_width)
-                    t_height = min(m_height, t_height)
-                    thumbnails[
-                        (t_width, t_height, requirement.media_type)
-                    ] = requirement.method
-
-            # Now we generate the thumbnails for each dimension, store it
-            for (t_width, t_height, t_type), t_method in thumbnails.items():
-                # Generate the thumbnail
-                if t_method == "crop":
-                    t_byte_source = await defer_to_thread(
-                        self.hs.get_reactor(),
-                        thumbnailer.crop,
-                        t_width,
-                        t_height,
-                        t_type,
-                    )
-                elif t_method == "scale":
-                    t_byte_source = await defer_to_thread(
-                        self.hs.get_reactor(),
-                        thumbnailer.scale,
-                        t_width,
-                        t_height,
-                        t_type,
-                    )
-                else:
-                    logger.error("Unrecognized method: %r", t_method)
-                    continue
-
-                if not t_byte_source:
-                    continue
-
-                file_info = FileInfo(
-                    server_name=server_name,
-                    file_id=file_id,
-                    url_cache=url_cache,
-                    thumbnail=ThumbnailInfo(
-                        width=t_width,
-                        height=t_height,
-                        method=t_method,
-                        type=t_type,
-                    ),
-                )
-
-                with self.media_storage.store_into_file(file_info) as (
-                    f,
-                    fname,
-                    finish,
-                ):
-                    try:
-                        await self.media_storage.write_to_file(t_byte_source, f)
-                        await finish()
-                    finally:
-                        t_byte_source.close()
-
-                    t_len = os.path.getsize(fname)
-
-                    # Write to database
-                    if server_name:
-                        # Multiple remote media download requests can race (when
-                        # using multiple media repos), so this may throw a violation
-                        # constraint exception. If it does we'll delete the newly
-                        # generated thumbnail from disk (as we're in the ctx
-                        # manager).
-                        #
-                        # However: we've already called `finish()` so we may have
-                        # also written to the storage providers. This is preferable
-                        # to the alternative where we call `finish()` *after* this,
-                        # where we could end up having an entry in the DB but fail
-                        # to write the files to the storage providers.
-                        try:
-                            await self.store.store_remote_media_thumbnail(
-                                server_name,
-                                media_id,
-                                file_id,
-                                t_width,
-                                t_height,
-                                t_type,
-                                t_method,
-                                t_len,
-                            )
-                        except Exception as e:
-                            thumbnail_exists = (
-                                await self.store.get_remote_media_thumbnail(
-                                    server_name,
-                                    media_id,
-                                    t_width,
-                                    t_height,
-                                    t_type,
-                                )
-                            )
-                            if not thumbnail_exists:
-                                raise e
-                    else:
-                        await self.store.store_local_thumbnail(
-                            media_id, t_width, t_height, t_type, t_method, t_len
-                        )
-
-        return {"width": m_width, "height": m_height}
-
-    async def _apply_media_retention_rules(self) -> None:
-        """
-        Purge old local and remote media according to the media retention rules
-        defined in the homeserver config.
-        """
-        # Purge remote media
-        if self._media_retention_remote_media_lifetime_ms is not None:
-            # Calculate a threshold timestamp derived from the configured lifetime. Any
-            # media that has not been accessed since this timestamp will be removed.
-            remote_media_threshold_timestamp_ms = (
-                self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms
-            )
-
-            logger.info(
-                "Purging remote media last accessed before"
-                f" {remote_media_threshold_timestamp_ms}"
-            )
-
-            await self.delete_old_remote_media(
-                before_ts=remote_media_threshold_timestamp_ms
-            )
-
-        # And now do the same for local media
-        if self._media_retention_local_media_lifetime_ms is not None:
-            # This works the same as the remote media threshold
-            local_media_threshold_timestamp_ms = (
-                self.clock.time_msec() - self._media_retention_local_media_lifetime_ms
-            )
-
-            logger.info(
-                "Purging local media last accessed before"
-                f" {local_media_threshold_timestamp_ms}"
-            )
-
-            await self.delete_old_local_media(
-                before_ts=local_media_threshold_timestamp_ms,
-                keep_profiles=True,
-                delete_quarantined_media=False,
-                delete_protected_media=False,
-            )
-
-    async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
-        old_media = await self.store.get_remote_media_ids(
-            before_ts, include_quarantined_media=False
-        )
-
-        deleted = 0
-
-        for media in old_media:
-            origin = media["media_origin"]
-            media_id = media["media_id"]
-            file_id = media["filesystem_id"]
-            key = (origin, media_id)
-
-            logger.info("Deleting: %r", key)
-
-            # TODO: Should we delete from the backup store
-
-            async with self.remote_media_linearizer.queue(key):
-                full_path = self.filepaths.remote_media_filepath(origin, file_id)
-                try:
-                    os.remove(full_path)
-                except OSError as e:
-                    logger.warning("Failed to remove file: %r", full_path)
-                    if e.errno == errno.ENOENT:
-                        pass
-                    else:
-                        continue
-
-                thumbnail_dir = self.filepaths.remote_media_thumbnail_dir(
-                    origin, file_id
-                )
-                shutil.rmtree(thumbnail_dir, ignore_errors=True)
-
-                await self.store.delete_remote_media(origin, media_id)
-                deleted += 1
-
-        return {"deleted": deleted}
-
-    async def delete_local_media_ids(
-        self, media_ids: List[str]
-    ) -> Tuple[List[str], int]:
-        """
-        Delete the given local or remote media ID from this server
-
-        Args:
-            media_id: The media ID to delete.
-        Returns:
-            A tuple of (list of deleted media IDs, total deleted media IDs).
-        """
-        return await self._remove_local_media_from_disk(media_ids)
-
-    async def delete_old_local_media(
-        self,
-        before_ts: int,
-        size_gt: int = 0,
-        keep_profiles: bool = True,
-        delete_quarantined_media: bool = False,
-        delete_protected_media: bool = False,
-    ) -> Tuple[List[str], int]:
-        """
-        Delete local or remote media from this server by size and timestamp. Removes
-        media files, any thumbnails and cached URLs.
-
-        Args:
-            before_ts: Unix timestamp in ms.
-                Files that were last used before this timestamp will be deleted.
-            size_gt: Size of the media in bytes. Files that are larger will be deleted.
-            keep_profiles: Switch to delete also files that are still used in image data
-                (e.g user profile, room avatar). If false these files will be deleted.
-            delete_quarantined_media: If True, media marked as quarantined will be deleted.
-            delete_protected_media: If True, media marked as protected will be deleted.
-
-        Returns:
-            A tuple of (list of deleted media IDs, total deleted media IDs).
-        """
-        old_media = await self.store.get_local_media_ids(
-            before_ts,
-            size_gt,
-            keep_profiles,
-            include_quarantined_media=delete_quarantined_media,
-            include_protected_media=delete_protected_media,
-        )
-        return await self._remove_local_media_from_disk(old_media)
-
-    async def _remove_local_media_from_disk(
-        self, media_ids: List[str]
-    ) -> Tuple[List[str], int]:
-        """
-        Delete local or remote media from this server. Removes media files,
-        any thumbnails and cached URLs.
-
-        Args:
-            media_ids: List of media_id to delete
-        Returns:
-            A tuple of (list of deleted media IDs, total deleted media IDs).
-        """
-        removed_media = []
-        for media_id in media_ids:
-            logger.info("Deleting media with ID '%s'", media_id)
-            full_path = self.filepaths.local_media_filepath(media_id)
-            try:
-                os.remove(full_path)
-            except OSError as e:
-                logger.warning("Failed to remove file: %r: %s", full_path, e)
-                if e.errno == errno.ENOENT:
-                    pass
-                else:
-                    continue
-
-            thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id)
-            shutil.rmtree(thumbnail_dir, ignore_errors=True)
-
-            await self.store.delete_remote_media(self.server_name, media_id)
-
-            await self.store.delete_url_cache((media_id,))
-            await self.store.delete_url_cache_media((media_id,))
-
-            removed_media.append(media_id)
-
-        return removed_media, len(removed_media)
-
-
-class MediaRepositoryResource(UnrecognizedRequestResource):
-    """File uploading and downloading.
-
-    Uploads are POSTed to a resource which returns a token which is used to GET
-    the download::
-
-        => POST /_matrix/media/r0/upload HTTP/1.1
-           Content-Type: <media-type>
-           Content-Length: <content-length>
-
-           <media>
-
-        <= HTTP/1.1 200 OK
-           Content-Type: application/json
-
-           { "content_uri": "mxc://<server-name>/<media-id>" }
-
-        => GET /_matrix/media/r0/download/<server-name>/<media-id> HTTP/1.1
-
-        <= HTTP/1.1 200 OK
-           Content-Type: <media-type>
-           Content-Disposition: attachment;filename=<upload-filename>
-
-           <media>
-
-    Clients can get thumbnails by supplying a desired width and height and
-    thumbnailing method::
-
-        => GET /_matrix/media/r0/thumbnail/<server_name>
-                /<media-id>?width=<w>&height=<h>&method=<m> HTTP/1.1
-
-        <= HTTP/1.1 200 OK
-           Content-Type: image/jpeg or image/png
-
-           <thumbnail>
-
-    The thumbnail methods are "crop" and "scale". "scale" tries to return an
-    image where either the width or the height is smaller than the requested
-    size. The client should then scale and letterbox the image if it needs to
-    fit within a given rectangle. "crop" tries to return an image where the
-    width and height are close to the requested size and the aspect matches
-    the requested size. The client should scale the image if it needs to fit
-    within a given rectangle.
-    """
-
-    def __init__(self, hs: "HomeServer"):
-        # If we're not configured to use it, raise if we somehow got here.
-        if not hs.config.media.can_load_media_repo:
-            raise ConfigError("Synapse is not configured to use a media repo.")
-
-        super().__init__()
-        media_repo = hs.get_media_repository()
-
-        self.putChild(b"upload", UploadResource(hs, media_repo))
-        self.putChild(b"download", DownloadResource(hs, media_repo))
-        self.putChild(
-            b"thumbnail", ThumbnailResource(hs, media_repo, media_repo.media_storage)
-        )
-        if hs.config.media.url_preview_enabled:
-            self.putChild(
-                b"preview_url",
-                PreviewUrlResource(hs, media_repo, media_repo.media_storage),
-            )
-        self.putChild(b"config", MediaConfigResource(hs))
diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py
index db25848744..11b0e8e231 100644
--- a/synapse/rest/media/v1/media_storage.py
+++ b/synapse/rest/media/v1/media_storage.py
@@ -1,4 +1,4 @@
-# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
+# Copyright 2023 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,364 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import contextlib
-import logging
-import os
-import shutil
-from types import TracebackType
-from typing import (
-    IO,
-    TYPE_CHECKING,
-    Any,
-    Awaitable,
-    BinaryIO,
-    Callable,
-    Generator,
-    Optional,
-    Sequence,
-    Tuple,
-    Type,
-)
-
-import attr
-
-from twisted.internet.defer import Deferred
-from twisted.internet.interfaces import IConsumer
-from twisted.protocols.basic import FileSender
-
-import synapse
-from synapse.api.errors import NotFoundError
-from synapse.logging.context import defer_to_thread, make_deferred_yieldable
-from synapse.util import Clock
-from synapse.util.file_consumer import BackgroundFileConsumer
-
-from ._base import FileInfo, Responder
-from .filepath import MediaFilePaths
-
-if TYPE_CHECKING:
-    from synapse.rest.media.v1.storage_provider import StorageProvider
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-class MediaStorage:
-    """Responsible for storing/fetching files from local sources.
-
-    Args:
-        hs
-        local_media_directory: Base path where we store media on disk
-        filepaths
-        storage_providers: List of StorageProvider that are used to fetch and store files.
-    """
-
-    def __init__(
-        self,
-        hs: "HomeServer",
-        local_media_directory: str,
-        filepaths: MediaFilePaths,
-        storage_providers: Sequence["StorageProvider"],
-    ):
-        self.hs = hs
-        self.reactor = hs.get_reactor()
-        self.local_media_directory = local_media_directory
-        self.filepaths = filepaths
-        self.storage_providers = storage_providers
-        self.spam_checker = hs.get_spam_checker()
-        self.clock = hs.get_clock()
-
-    async def store_file(self, source: IO, file_info: FileInfo) -> str:
-        """Write `source` to the on disk media store, and also any other
-        configured storage providers
-
-        Args:
-            source: A file like object that should be written
-            file_info: Info about the file to store
-
-        Returns:
-            the file path written to in the primary media store
-        """
-
-        with self.store_into_file(file_info) as (f, fname, finish_cb):
-            # Write to the main repository
-            await self.write_to_file(source, f)
-            await finish_cb()
-
-        return fname
-
-    async def write_to_file(self, source: IO, output: IO) -> None:
-        """Asynchronously write the `source` to `output`."""
-        await defer_to_thread(self.reactor, _write_file_synchronously, source, output)
-
-    @contextlib.contextmanager
-    def store_into_file(
-        self, file_info: FileInfo
-    ) -> Generator[Tuple[BinaryIO, str, Callable[[], Awaitable[None]]], None, None]:
-        """Context manager used to get a file like object to write into, as
-        described by file_info.
-
-        Actually yields a 3-tuple (file, fname, finish_cb), where file is a file
-        like object that can be written to, fname is the absolute path of file
-        on disk, and finish_cb is a function that returns an awaitable.
-
-        fname can be used to read the contents from after upload, e.g. to
-        generate thumbnails.
-
-        finish_cb must be called and waited on after the file has been
-        successfully been written to. Should not be called if there was an
-        error.
-
-        Args:
-            file_info: Info about the file to store
-
-        Example:
-
-            with media_storage.store_into_file(info) as (f, fname, finish_cb):
-                # .. write into f ...
-                await finish_cb()
-        """
-
-        path = self._file_info_to_path(file_info)
-        fname = os.path.join(self.local_media_directory, path)
-
-        dirname = os.path.dirname(fname)
-        os.makedirs(dirname, exist_ok=True)
-
-        finished_called = [False]
-
-        try:
-            with open(fname, "wb") as f:
-
-                async def finish() -> None:
-                    # Ensure that all writes have been flushed and close the
-                    # file.
-                    f.flush()
-                    f.close()
-
-                    spam_check = await self.spam_checker.check_media_file_for_spam(
-                        ReadableFileWrapper(self.clock, fname), file_info
-                    )
-                    if spam_check != synapse.module_api.NOT_SPAM:
-                        logger.info("Blocking media due to spam checker")
-                        # Note that we'll delete the stored media, due to the
-                        # try/except below. The media also won't be stored in
-                        # the DB.
-                        # We currently ignore any additional field returned by
-                        # the spam-check API.
-                        raise SpamMediaException(errcode=spam_check[0])
-
-                    for provider in self.storage_providers:
-                        await provider.store_file(path, file_info)
-
-                    finished_called[0] = True
-
-                yield f, fname, finish
-        except Exception as e:
-            try:
-                os.remove(fname)
-            except Exception:
-                pass
-
-            raise e from None
-
-        if not finished_called:
-            raise Exception("Finished callback not called")
-
-    async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
-        """Attempts to fetch media described by file_info from the local cache
-        and configured storage providers.
-
-        Args:
-            file_info
-
-        Returns:
-            Returns a Responder if the file was found, otherwise None.
-        """
-        paths = [self._file_info_to_path(file_info)]
-
-        # fallback for remote thumbnails with no method in the filename
-        if file_info.thumbnail and file_info.server_name:
-            paths.append(
-                self.filepaths.remote_media_thumbnail_rel_legacy(
-                    server_name=file_info.server_name,
-                    file_id=file_info.file_id,
-                    width=file_info.thumbnail.width,
-                    height=file_info.thumbnail.height,
-                    content_type=file_info.thumbnail.type,
-                )
-            )
-
-        for path in paths:
-            local_path = os.path.join(self.local_media_directory, path)
-            if os.path.exists(local_path):
-                logger.debug("responding with local file %s", local_path)
-                return FileResponder(open(local_path, "rb"))
-            logger.debug("local file %s did not exist", local_path)
-
-        for provider in self.storage_providers:
-            for path in paths:
-                res: Any = await provider.fetch(path, file_info)
-                if res:
-                    logger.debug("Streaming %s from %s", path, provider)
-                    return res
-                logger.debug("%s not found on %s", path, provider)
-
-        return None
-
-    async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str:
-        """Ensures that the given file is in the local cache. Attempts to
-        download it from storage providers if it isn't.
-
-        Args:
-            file_info
-
-        Returns:
-            Full path to local file
-        """
-        path = self._file_info_to_path(file_info)
-        local_path = os.path.join(self.local_media_directory, path)
-        if os.path.exists(local_path):
-            return local_path
-
-        # Fallback for paths without method names
-        # Should be removed in the future
-        if file_info.thumbnail and file_info.server_name:
-            legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
-                server_name=file_info.server_name,
-                file_id=file_info.file_id,
-                width=file_info.thumbnail.width,
-                height=file_info.thumbnail.height,
-                content_type=file_info.thumbnail.type,
-            )
-            legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
-            if os.path.exists(legacy_local_path):
-                return legacy_local_path
-
-        dirname = os.path.dirname(local_path)
-        os.makedirs(dirname, exist_ok=True)
-
-        for provider in self.storage_providers:
-            res: Any = await provider.fetch(path, file_info)
-            if res:
-                with res:
-                    consumer = BackgroundFileConsumer(
-                        open(local_path, "wb"), self.reactor
-                    )
-                    await res.write_to_consumer(consumer)
-                    await consumer.wait()
-                return local_path
-
-        raise NotFoundError()
-
-    def _file_info_to_path(self, file_info: FileInfo) -> str:
-        """Converts file_info into a relative path.
-
-        The path is suitable for storing files under a directory, e.g. used to
-        store files on local FS under the base media repository directory.
-        """
-        if file_info.url_cache:
-            if file_info.thumbnail:
-                return self.filepaths.url_cache_thumbnail_rel(
-                    media_id=file_info.file_id,
-                    width=file_info.thumbnail.width,
-                    height=file_info.thumbnail.height,
-                    content_type=file_info.thumbnail.type,
-                    method=file_info.thumbnail.method,
-                )
-            return self.filepaths.url_cache_filepath_rel(file_info.file_id)
-
-        if file_info.server_name:
-            if file_info.thumbnail:
-                return self.filepaths.remote_media_thumbnail_rel(
-                    server_name=file_info.server_name,
-                    file_id=file_info.file_id,
-                    width=file_info.thumbnail.width,
-                    height=file_info.thumbnail.height,
-                    content_type=file_info.thumbnail.type,
-                    method=file_info.thumbnail.method,
-                )
-            return self.filepaths.remote_media_filepath_rel(
-                file_info.server_name, file_info.file_id
-            )
-
-        if file_info.thumbnail:
-            return self.filepaths.local_media_thumbnail_rel(
-                media_id=file_info.file_id,
-                width=file_info.thumbnail.width,
-                height=file_info.thumbnail.height,
-                content_type=file_info.thumbnail.type,
-                method=file_info.thumbnail.method,
-            )
-        return self.filepaths.local_media_filepath_rel(file_info.file_id)
-
-
-def _write_file_synchronously(source: IO, dest: IO) -> None:
-    """Write `source` to the file like `dest` synchronously. Should be called
-    from a thread.
-
-    Args:
-        source: A file like object that's to be written
-        dest: A file like object to be written to
-    """
-    source.seek(0)  # Ensure we read from the start of the file
-    shutil.copyfileobj(source, dest)
-
-
-class FileResponder(Responder):
-    """Wraps an open file that can be sent to a request.
-
-    Args:
-        open_file: A file like object to be streamed ot the client,
-            is closed when finished streaming.
-    """
-
-    def __init__(self, open_file: IO):
-        self.open_file = open_file
-
-    def write_to_consumer(self, consumer: IConsumer) -> Deferred:
-        return make_deferred_yieldable(
-            FileSender().beginFileTransfer(self.open_file, consumer)
-        )
-
-    def __exit__(
-        self,
-        exc_type: Optional[Type[BaseException]],
-        exc_val: Optional[BaseException],
-        exc_tb: Optional[TracebackType],
-    ) -> None:
-        self.open_file.close()
-
-
-class SpamMediaException(NotFoundError):
-    """The media was blocked by a spam checker, so we simply 404 the request (in
-    the same way as if it was quarantined).
-    """
-
-
-@attr.s(slots=True, auto_attribs=True)
-class ReadableFileWrapper:
-    """Wrapper that allows reading a file in chunks, yielding to the reactor,
-    and writing to a callback.
-
-    This is simplified `FileSender` that takes an IO object rather than an
-    `IConsumer`.
-    """
-
-    CHUNK_SIZE = 2**14
-
-    clock: Clock
-    path: str
-
-    async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None:
-        """Reads the file in chunks and calls the callback with each chunk."""
-
-        with open(self.path, "rb") as file:
-            while True:
-                chunk = file.read(self.CHUNK_SIZE)
-                if not chunk:
-                    break
-
-                callback(chunk)
+#
 
-                # We yield to the reactor by sleeping for 0 seconds.
-                await self.clock.sleep(0)
+# This exists purely for backwards compatibility with spam checkers.
+from synapse.media.media_storage import ReadableFileWrapper  # noqa: F401
diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py
deleted file mode 100644
index 7592aa5d47..0000000000
--- a/synapse/rest/media/v1/oembed.py
+++ /dev/null
@@ -1,265 +0,0 @@
-#  Copyright 2021 The Matrix.org Foundation C.I.C.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-import html
-import logging
-import urllib.parse
-from typing import TYPE_CHECKING, List, Optional
-
-import attr
-
-from synapse.rest.media.v1.preview_html import parse_html_description
-from synapse.types import JsonDict
-from synapse.util import json_decoder
-
-if TYPE_CHECKING:
-    from lxml import etree
-
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class OEmbedResult:
-    # The Open Graph result (converted from the oEmbed result).
-    open_graph_result: JsonDict
-    # The author_name of the oEmbed result
-    author_name: Optional[str]
-    # Number of milliseconds to cache the content, according to the oEmbed response.
-    #
-    # This will be None if no cache-age is provided in the oEmbed response (or
-    # if the oEmbed response cannot be turned into an Open Graph response).
-    cache_age: Optional[int]
-
-
-class OEmbedProvider:
-    """
-    A helper for accessing oEmbed content.
-
-    It can be used to check if a URL should be accessed via oEmbed and for
-    requesting/parsing oEmbed content.
-    """
-
-    def __init__(self, hs: "HomeServer"):
-        self._oembed_patterns = {}
-        for oembed_endpoint in hs.config.oembed.oembed_patterns:
-            api_endpoint = oembed_endpoint.api_endpoint
-
-            # Only JSON is supported at the moment. This could be declared in
-            # the formats field. Otherwise, if the endpoint ends in .xml assume
-            # it doesn't support JSON.
-            if (
-                oembed_endpoint.formats is not None
-                and "json" not in oembed_endpoint.formats
-            ) or api_endpoint.endswith(".xml"):
-                logger.info(
-                    "Ignoring oEmbed endpoint due to not supporting JSON: %s",
-                    api_endpoint,
-                )
-                continue
-
-            # Iterate through each URL pattern and point it to the endpoint.
-            for pattern in oembed_endpoint.url_patterns:
-                self._oembed_patterns[pattern] = api_endpoint
-
-    def get_oembed_url(self, url: str) -> Optional[str]:
-        """
-        Check whether the URL should be downloaded as oEmbed content instead.
-
-        Args:
-            url: The URL to check.
-
-        Returns:
-            A URL to use instead or None if the original URL should be used.
-        """
-        for url_pattern, endpoint in self._oembed_patterns.items():
-            if url_pattern.fullmatch(url):
-                # TODO Specify max height / width.
-
-                # Note that only the JSON format is supported, some endpoints want
-                # this in the URL, others want it as an argument.
-                endpoint = endpoint.replace("{format}", "json")
-
-                args = {"url": url, "format": "json"}
-                query_str = urllib.parse.urlencode(args, True)
-                return f"{endpoint}?{query_str}"
-
-        # No match.
-        return None
-
-    def autodiscover_from_html(self, tree: "etree.Element") -> Optional[str]:
-        """
-        Search an HTML document for oEmbed autodiscovery information.
-
-        Args:
-            tree: The parsed HTML body.
-
-        Returns:
-            The URL to use for oEmbed information, or None if no URL was found.
-        """
-        # Search for link elements with the proper rel and type attributes.
-        for tag in tree.xpath(
-            "//link[@rel='alternate'][@type='application/json+oembed']"
-        ):
-            if "href" in tag.attrib:
-                return tag.attrib["href"]
-
-        # Some providers (e.g. Flickr) use alternative instead of alternate.
-        for tag in tree.xpath(
-            "//link[@rel='alternative'][@type='application/json+oembed']"
-        ):
-            if "href" in tag.attrib:
-                return tag.attrib["href"]
-
-        return None
-
-    def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult:
-        """
-        Parse the oEmbed response into an Open Graph response.
-
-        Args:
-            url: The URL which is being previewed (not the one which was
-                requested).
-            raw_body: The oEmbed response as JSON encoded as bytes.
-
-        Returns:
-            json-encoded Open Graph data
-        """
-
-        try:
-            # oEmbed responses *must* be UTF-8 according to the spec.
-            oembed = json_decoder.decode(raw_body.decode("utf-8"))
-        except ValueError:
-            return OEmbedResult({}, None, None)
-
-        # The version is a required string field, but not always provided,
-        # or sometimes provided as a float. Be lenient.
-        oembed_version = oembed.get("version", "1.0")
-        if oembed_version != "1.0" and oembed_version != 1:
-            return OEmbedResult({}, None, None)
-
-        # Attempt to parse the cache age, if possible.
-        try:
-            cache_age = int(oembed.get("cache_age")) * 1000
-        except (TypeError, ValueError):
-            # If the cache age cannot be parsed (e.g. wrong type or invalid
-            # string), ignore it.
-            cache_age = None
-
-        # The oEmbed response converted to Open Graph.
-        open_graph_response: JsonDict = {"og:url": url}
-
-        title = oembed.get("title")
-        if title and isinstance(title, str):
-            # A common WordPress plug-in seems to incorrectly escape entities
-            # in the oEmbed response.
-            open_graph_response["og:title"] = html.unescape(title)
-
-        author_name = oembed.get("author_name")
-        if not isinstance(author_name, str):
-            author_name = None
-
-        # Use the provider name and as the site.
-        provider_name = oembed.get("provider_name")
-        if provider_name and isinstance(provider_name, str):
-            open_graph_response["og:site_name"] = provider_name
-
-        # If a thumbnail exists, use it. Note that dimensions will be calculated later.
-        thumbnail_url = oembed.get("thumbnail_url")
-        if thumbnail_url and isinstance(thumbnail_url, str):
-            open_graph_response["og:image"] = thumbnail_url
-
-        # Process each type separately.
-        oembed_type = oembed.get("type")
-        if oembed_type == "rich":
-            html_str = oembed.get("html")
-            if isinstance(html_str, str):
-                calc_description_and_urls(open_graph_response, html_str)
-
-        elif oembed_type == "photo":
-            # If this is a photo, use the full image, not the thumbnail.
-            url = oembed.get("url")
-            if url and isinstance(url, str):
-                open_graph_response["og:image"] = url
-
-        elif oembed_type == "video":
-            open_graph_response["og:type"] = "video.other"
-            html_str = oembed.get("html")
-            if html_str and isinstance(html_str, str):
-                calc_description_and_urls(open_graph_response, oembed["html"])
-            for size in ("width", "height"):
-                val = oembed.get(size)
-                if type(val) is int:
-                    open_graph_response[f"og:video:{size}"] = val
-
-        elif oembed_type == "link":
-            open_graph_response["og:type"] = "website"
-
-        else:
-            logger.warning("Unknown oEmbed type: %s", oembed_type)
-
-        return OEmbedResult(open_graph_response, author_name, cache_age)
-
-
-def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
-    results = []
-    for tag in tree.xpath("//*/" + tag_name):
-        if "src" in tag.attrib:
-            results.append(tag.attrib["src"])
-    return results
-
-
-def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None:
-    """
-    Calculate description for an HTML document.
-
-    This uses lxml to convert the HTML document into plaintext. If errors
-    occur during processing of the document, an empty response is returned.
-
-    Args:
-        open_graph_response: The current Open Graph summary. This is updated with additional fields.
-        html_body: The HTML document, as bytes.
-
-    Returns:
-        The summary
-    """
-    # If there's no body, nothing useful is going to be found.
-    if not html_body:
-        return
-
-    from lxml import etree
-
-    # Create an HTML parser. If this fails, log and return no metadata.
-    parser = etree.HTMLParser(recover=True, encoding="utf-8")
-
-    # Attempt to parse the body. If this fails, log and return no metadata.
-    tree = etree.fromstring(html_body, parser)
-
-    # The data was successfully parsed, but no tree was found.
-    if tree is None:
-        return
-
-    # Attempt to find interesting URLs (images, videos, embeds).
-    if "og:image" not in open_graph_response:
-        image_urls = _fetch_urls(tree, "img")
-        if image_urls:
-            open_graph_response["og:image"] = image_urls[0]
-
-    video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed")
-    if video_urls:
-        open_graph_response["og:video"] = video_urls[0]
-
-    description = parse_html_description(tree)
-    if description:
-        open_graph_response["og:description"] = description
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
deleted file mode 100644
index 516d0434f0..0000000000
--- a/synapse/rest/media/v1/preview_html.py
+++ /dev/null
@@ -1,501 +0,0 @@
-# Copyright 2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import codecs
-import logging
-import re
-from typing import (
-    TYPE_CHECKING,
-    Callable,
-    Dict,
-    Generator,
-    Iterable,
-    List,
-    Optional,
-    Set,
-    Union,
-)
-
-if TYPE_CHECKING:
-    from lxml import etree
-
-logger = logging.getLogger(__name__)
-
-_charset_match = re.compile(
-    rb'<\s*meta[^>]*charset\s*=\s*"?([a-z0-9_-]+)"?', flags=re.I
-)
-_xml_encoding_match = re.compile(
-    rb'\s*<\s*\?\s*xml[^>]*encoding="([a-z0-9_-]+)"', flags=re.I
-)
-_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
-
-# Certain elements aren't meant for display.
-ARIA_ROLES_TO_IGNORE = {"directory", "menu", "menubar", "toolbar"}
-
-
-def _normalise_encoding(encoding: str) -> Optional[str]:
-    """Use the Python codec's name as the normalised entry."""
-    try:
-        return codecs.lookup(encoding).name
-    except LookupError:
-        return None
-
-
-def _get_html_media_encodings(
-    body: bytes, content_type: Optional[str]
-) -> Iterable[str]:
-    """
-    Get potential encoding of the body based on the (presumably) HTML body or the content-type header.
-
-    The precedence used for finding a character encoding is:
-
-    1. <meta> tag with a charset declared.
-    2. The XML document's character encoding attribute.
-    3. The Content-Type header.
-    4. Fallback to utf-8.
-    5. Fallback to windows-1252.
-
-    This roughly follows the algorithm used by BeautifulSoup's bs4.dammit.EncodingDetector.
-
-    Args:
-        body: The HTML document, as bytes.
-        content_type: The Content-Type header.
-
-    Returns:
-        The character encoding of the body, as a string.
-    """
-    # There's no point in returning an encoding more than once.
-    attempted_encodings: Set[str] = set()
-
-    # Limit searches to the first 1kb, since it ought to be at the top.
-    body_start = body[:1024]
-
-    # Check if it has an encoding set in a meta tag.
-    match = _charset_match.search(body_start)
-    if match:
-        encoding = _normalise_encoding(match.group(1).decode("ascii"))
-        if encoding:
-            attempted_encodings.add(encoding)
-            yield encoding
-
-    # TODO Support <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
-
-    # Check if it has an XML document with an encoding.
-    match = _xml_encoding_match.match(body_start)
-    if match:
-        encoding = _normalise_encoding(match.group(1).decode("ascii"))
-        if encoding and encoding not in attempted_encodings:
-            attempted_encodings.add(encoding)
-            yield encoding
-
-    # Check the HTTP Content-Type header for a character set.
-    if content_type:
-        content_match = _content_type_match.match(content_type)
-        if content_match:
-            encoding = _normalise_encoding(content_match.group(1))
-            if encoding and encoding not in attempted_encodings:
-                attempted_encodings.add(encoding)
-                yield encoding
-
-    # Finally, fallback to UTF-8, then windows-1252.
-    for fallback in ("utf-8", "cp1252"):
-        if fallback not in attempted_encodings:
-            yield fallback
-
-
-def decode_body(
-    body: bytes, uri: str, content_type: Optional[str] = None
-) -> Optional["etree.Element"]:
-    """
-    This uses lxml to parse the HTML document.
-
-    Args:
-        body: The HTML document, as bytes.
-        uri: The URI used to download the body.
-        content_type: The Content-Type header.
-
-    Returns:
-        The parsed HTML body, or None if an error occurred during processed.
-    """
-    # If there's no body, nothing useful is going to be found.
-    if not body:
-        return None
-
-    # The idea here is that multiple encodings are tried until one works.
-    # Unfortunately the result is never used and then LXML will decode the string
-    # again with the found encoding.
-    for encoding in _get_html_media_encodings(body, content_type):
-        try:
-            body.decode(encoding)
-        except Exception:
-            pass
-        else:
-            break
-    else:
-        logger.warning("Unable to decode HTML body for %s", uri)
-        return None
-
-    from lxml import etree
-
-    # Create an HTML parser.
-    parser = etree.HTMLParser(recover=True, encoding=encoding)
-
-    # Attempt to parse the body. Returns None if the body was successfully
-    # parsed, but no tree was found.
-    return etree.fromstring(body, parser)
-
-
-def _get_meta_tags(
-    tree: "etree.Element",
-    property: str,
-    prefix: str,
-    property_mapper: Optional[Callable[[str], Optional[str]]] = None,
-) -> Dict[str, Optional[str]]:
-    """
-    Search for meta tags prefixed with a particular string.
-
-    Args:
-        tree: The parsed HTML document.
-        property: The name of the property which contains the tag name, e.g.
-            "property" for Open Graph.
-        prefix: The prefix on the property to search for, e.g. "og" for Open Graph.
-        property_mapper: An optional callable to map the property to the Open Graph
-            form. Can return None for a key to ignore that key.
-
-    Returns:
-        A map of tag name to value.
-    """
-    results: Dict[str, Optional[str]] = {}
-    for tag in tree.xpath(
-        f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]"
-    ):
-        # if we've got more than 50 tags, someone is taking the piss
-        if len(results) >= 50:
-            logger.warning(
-                "Skipping parsing of Open Graph for page with too many '%s:' tags",
-                prefix,
-            )
-            return {}
-
-        key = tag.attrib[property]
-        if property_mapper:
-            key = property_mapper(key)
-            # None is a special value used to ignore a value.
-            if key is None:
-                continue
-
-        results[key] = tag.attrib["content"]
-
-    return results
-
-
-def _map_twitter_to_open_graph(key: str) -> Optional[str]:
-    """
-    Map a Twitter card property to the analogous Open Graph property.
-
-    Args:
-        key: The Twitter card property (starts with "twitter:").
-
-    Returns:
-        The Open Graph property (starts with "og:") or None to have this property
-        be ignored.
-    """
-    # Twitter card properties with no analogous Open Graph property.
-    if key == "twitter:card" or key == "twitter:creator":
-        return None
-    if key == "twitter:site":
-        return "og:site_name"
-    # Otherwise, swap twitter to og.
-    return "og" + key[7:]
-
-
-def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
-    """
-    Parse the HTML document into an Open Graph response.
-
-    This uses lxml to search the HTML document for Open Graph data (or
-    synthesizes it from the document).
-
-    Args:
-        tree: The parsed HTML document.
-
-    Returns:
-        The Open Graph response as a dictionary.
-    """
-
-    # Search for Open Graph (og:) meta tags, e.g.:
-    #
-    # "og:type"         : "video",
-    # "og:url"          : "https://www.youtube.com/watch?v=LXDBoHyjmtw",
-    # "og:site_name"    : "YouTube",
-    # "og:video:type"   : "application/x-shockwave-flash",
-    # "og:description"  : "Fun stuff happening here",
-    # "og:title"        : "RemoteJam - Matrix team hack for Disrupt Europe Hackathon",
-    # "og:image"        : "https://i.ytimg.com/vi/LXDBoHyjmtw/maxresdefault.jpg",
-    # "og:video:url"    : "http://www.youtube.com/v/LXDBoHyjmtw?version=3&autohide=1",
-    # "og:video:width"  : "1280"
-    # "og:video:height" : "720",
-    # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3",
-
-    og = _get_meta_tags(tree, "property", "og")
-
-    # TODO: Search for properties specific to the different Open Graph types,
-    # such as article: meta tags, e.g.:
-    #
-    # "article:publisher" : "https://www.facebook.com/thethudonline" />
-    # "article:author" content="https://www.facebook.com/thethudonline" />
-    # "article:tag" content="baby" />
-    # "article:section" content="Breaking News" />
-    # "article:published_time" content="2016-03-31T19:58:24+00:00" />
-    # "article:modified_time" content="2016-04-01T18:31:53+00:00" />
-
-    # Search for Twitter Card (twitter:) meta tags, e.g.:
-    #
-    # "twitter:site"    : "@matrixdotorg"
-    # "twitter:creator" : "@matrixdotorg"
-    #
-    # Twitter cards tags also duplicate Open Graph tags.
-    #
-    # See https://developer.twitter.com/en/docs/twitter-for-websites/cards/guides/getting-started
-    twitter = _get_meta_tags(tree, "name", "twitter", _map_twitter_to_open_graph)
-    # Merge the Twitter values with the Open Graph values, but do not overwrite
-    # information from Open Graph tags.
-    for key, value in twitter.items():
-        if key not in og:
-            og[key] = value
-
-    if "og:title" not in og:
-        # Attempt to find a title from the title tag, or the biggest header on the page.
-        title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()")
-        if title:
-            og["og:title"] = title[0].strip()
-        else:
-            og["og:title"] = None
-
-    if "og:image" not in og:
-        meta_image = tree.xpath(
-            "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]"
-        )
-        # If a meta image is found, use it.
-        if meta_image:
-            og["og:image"] = meta_image[0]
-        else:
-            # Try to find images which are larger than 10px by 10px.
-            #
-            # TODO: consider inlined CSS styles as well as width & height attribs
-            images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]")
-            images = sorted(
-                images,
-                key=lambda i: (
-                    -1 * float(i.attrib["width"]) * float(i.attrib["height"])
-                ),
-            )
-            # If no images were found, try to find *any* images.
-            if not images:
-                images = tree.xpath("//img[@src][1]")
-            if images:
-                og["og:image"] = images[0].attrib["src"]
-
-            # Finally, fallback to the favicon if nothing else.
-            else:
-                favicons = tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]")
-                if favicons:
-                    og["og:image"] = favicons[0]
-
-    if "og:description" not in og:
-        # Check the first meta description tag for content.
-        meta_description = tree.xpath(
-            "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]"
-        )
-        # If a meta description is found with content, use it.
-        if meta_description:
-            og["og:description"] = meta_description[0]
-        else:
-            og["og:description"] = parse_html_description(tree)
-    elif og["og:description"]:
-        # This must be a non-empty string at this point.
-        assert isinstance(og["og:description"], str)
-        og["og:description"] = summarize_paragraphs([og["og:description"]])
-
-    # TODO: delete the url downloads to stop diskfilling,
-    # as we only ever cared about its OG
-    return og
-
-
-def parse_html_description(tree: "etree.Element") -> Optional[str]:
-    """
-    Calculate a text description based on an HTML document.
-
-    Grabs any text nodes which are inside the <body/> tag, unless they are within
-    an HTML5 semantic markup tag (<header/>, <nav/>, <aside/>, <footer/>), or
-    if they are within a <script/>, <svg/> or <style/> tag, or if they are within
-    a tag whose content is usually only shown to old browsers
-    (<iframe/>, <video/>, <canvas/>, <picture/>).
-
-    This is a very very very coarse approximation to a plain text render of the page.
-
-    Args:
-        tree: The parsed HTML document.
-
-    Returns:
-        The plain text description, or None if one cannot be generated.
-    """
-    # We don't just use XPATH here as that is slow on some machines.
-
-    from lxml import etree
-
-    TAGS_TO_REMOVE = {
-        "header",
-        "nav",
-        "aside",
-        "footer",
-        "script",
-        "noscript",
-        "style",
-        "svg",
-        "iframe",
-        "video",
-        "canvas",
-        "img",
-        "picture",
-        etree.Comment,
-    }
-
-    # Split all the text nodes into paragraphs (by splitting on new
-    # lines)
-    text_nodes = (
-        re.sub(r"\s+", "\n", el).strip()
-        for el in _iterate_over_text(tree.find("body"), TAGS_TO_REMOVE)
-    )
-    return summarize_paragraphs(text_nodes)
-
-
-def _iterate_over_text(
-    tree: Optional["etree.Element"],
-    tags_to_ignore: Set[Union[str, "etree.Comment"]],
-    stack_limit: int = 1024,
-) -> Generator[str, None, None]:
-    """Iterate over the tree returning text nodes in a depth first fashion,
-    skipping text nodes inside certain tags.
-
-    Args:
-        tree: The parent element to iterate. Can be None if there isn't one.
-        tags_to_ignore: Set of tags to ignore
-        stack_limit: Maximum stack size limit for depth-first traversal.
-            Nodes will be dropped if this limit is hit, which may truncate the
-            textual result.
-            Intended to limit the maximum working memory when generating a preview.
-    """
-
-    if tree is None:
-        return
-
-    # This is a stack whose items are elements to iterate over *or* strings
-    # to be returned.
-    elements: List[Union[str, "etree.Element"]] = [tree]
-    while elements:
-        el = elements.pop()
-
-        if isinstance(el, str):
-            yield el
-        elif el.tag not in tags_to_ignore:
-            # If the element isn't meant for display, ignore it.
-            if el.get("role") in ARIA_ROLES_TO_IGNORE:
-                continue
-
-            # el.text is the text before the first child, so we can immediately
-            # return it if the text exists.
-            if el.text:
-                yield el.text
-
-            # We add to the stack all the element's children, interspersed with
-            # each child's tail text (if it exists).
-            #
-            # We iterate in reverse order so that earlier pieces of text appear
-            # closer to the top of the stack.
-            for child in el.iterchildren(reversed=True):
-                if len(elements) > stack_limit:
-                    # We've hit our limit for working memory
-                    break
-
-                if child.tail:
-                    # The tail text of a node is text that comes *after* the node,
-                    # so we always include it even if we ignore the child node.
-                    elements.append(child.tail)
-
-                elements.append(child)
-
-
-def summarize_paragraphs(
-    text_nodes: Iterable[str], min_size: int = 200, max_size: int = 500
-) -> Optional[str]:
-    """
-    Try to get a summary respecting first paragraph and then word boundaries.
-
-    Args:
-        text_nodes: The paragraphs to summarize.
-        min_size: The minimum number of words to include.
-        max_size: The maximum number of words to include.
-
-    Returns:
-        A summary of the text nodes, or None if that was not possible.
-    """
-
-    # TODO: Respect sentences?
-
-    description = ""
-
-    # Keep adding paragraphs until we get to the MIN_SIZE.
-    for text_node in text_nodes:
-        if len(description) < min_size:
-            text_node = re.sub(r"[\t \r\n]+", " ", text_node)
-            description += text_node + "\n\n"
-        else:
-            break
-
-    description = description.strip()
-    description = re.sub(r"[\t ]+", " ", description)
-    description = re.sub(r"[\t \r\n]*[\r\n]+", "\n\n", description)
-
-    # If the concatenation of paragraphs to get above MIN_SIZE
-    # took us over MAX_SIZE, then we need to truncate mid paragraph
-    if len(description) > max_size:
-        new_desc = ""
-
-        # This splits the paragraph into words, but keeping the
-        # (preceding) whitespace intact so we can easily concat
-        # words back together.
-        for match in re.finditer(r"\s*\S+", description):
-            word = match.group()
-
-            # Keep adding words while the total length is less than
-            # MAX_SIZE.
-            if len(word) + len(new_desc) < max_size:
-                new_desc += word
-            else:
-                # At this point the next word *will* take us over
-                # MAX_SIZE, but we also want to ensure that its not
-                # a huge word. If it is add it anyway and we'll
-                # truncate later.
-                if len(new_desc) < min_size:
-                    new_desc += word
-                break
-
-        # Double check that we're not over the limit
-        if len(new_desc) > max_size:
-            new_desc = new_desc[:max_size]
-
-        # We always add an ellipsis because at the very least
-        # we chopped mid paragraph.
-        description = new_desc.strip() + "…"
-    return description if description else None
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
deleted file mode 100644
index 4a594ab9d8..0000000000
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ /dev/null
@@ -1,871 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import datetime
-import errno
-import fnmatch
-import logging
-import os
-import re
-import shutil
-import sys
-import traceback
-from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, Tuple
-from urllib.parse import urljoin, urlparse, urlsplit
-from urllib.request import urlopen
-
-import attr
-
-from twisted.internet.defer import Deferred
-from twisted.internet.error import DNSLookupError
-
-from synapse.api.errors import Codes, SynapseError
-from synapse.http.client import SimpleHttpClient
-from synapse.http.server import (
-    DirectServeJsonResource,
-    respond_with_json,
-    respond_with_json_bytes,
-)
-from synapse.http.servlet import parse_integer, parse_string
-from synapse.http.site import SynapseRequest
-from synapse.logging.context import make_deferred_yieldable, run_in_background
-from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.rest.media.v1._base import get_filename_from_headers
-from synapse.rest.media.v1.media_storage import MediaStorage
-from synapse.rest.media.v1.oembed import OEmbedProvider
-from synapse.rest.media.v1.preview_html import decode_body, parse_html_to_open_graph
-from synapse.types import JsonDict, UserID
-from synapse.util import json_encoder
-from synapse.util.async_helpers import ObservableDeferred
-from synapse.util.caches.expiringcache import ExpiringCache
-from synapse.util.stringutils import random_string
-
-from ._base import FileInfo
-
-if TYPE_CHECKING:
-    from synapse.rest.media.v1.media_repository import MediaRepository
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-OG_TAG_NAME_MAXLEN = 50
-OG_TAG_VALUE_MAXLEN = 1000
-
-ONE_HOUR = 60 * 60 * 1000
-ONE_DAY = 24 * ONE_HOUR
-IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class DownloadResult:
-    length: int
-    uri: str
-    response_code: int
-    media_type: str
-    download_name: Optional[str]
-    expires: int
-    etag: Optional[str]
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class MediaInfo:
-    """
-    Information parsed from downloading media being previewed.
-    """
-
-    # The Content-Type header of the response.
-    media_type: str
-    # The length (in bytes) of the downloaded media.
-    media_length: int
-    # The media filename, according to the server. This is parsed from the
-    # returned headers, if possible.
-    download_name: Optional[str]
-    # The time of the preview.
-    created_ts_ms: int
-    # Information from the media storage provider about where the file is stored
-    # on disk.
-    filesystem_id: str
-    filename: str
-    # The URI being previewed.
-    uri: str
-    # The HTTP response code.
-    response_code: int
-    # The timestamp (in milliseconds) of when this preview expires.
-    expires: int
-    # The ETag header of the response.
-    etag: Optional[str]
-
-
-class PreviewUrlResource(DirectServeJsonResource):
-    """
-    The `GET /_matrix/media/r0/preview_url` endpoint provides a generic preview API
-    for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix
-    specific additions).
-
-    This does have trade-offs compared to other designs:
-
-    * Pros:
-      * Simple and flexible; can be used by any clients at any point
-    * Cons:
-      * If each homeserver provides one of these independently, all the homeservers in a
-        room may needlessly DoS the target URI
-      * The URL metadata must be stored somewhere, rather than just using Matrix
-        itself to store the media.
-      * Matrix cannot be used to distribute the metadata between homeservers.
-
-    When Synapse is asked to preview a URL it does the following:
-
-    1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
-       config).
-    2. Checks the URL against an in-memory cache and returns the result if it exists. (This
-       is also used to de-duplicate processing of multiple in-flight requests at once.)
-    3. Kicks off a background process to generate a preview:
-       1. Checks URL and timestamp against the database cache and returns the result if it
-          has not expired and was successful (a 2xx return code).
-       2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
-          does, update the URL to download.
-       3. Downloads the URL and stores it into a file via the media storage provider
-          and saves the local media metadata.
-       4. If the media is an image:
-          1. Generates thumbnails.
-          2. Generates an Open Graph response based on image properties.
-       5. If the media is HTML:
-          1. Decodes the HTML via the stored file.
-          2. Generates an Open Graph response from the HTML.
-          3. If a JSON oEmbed URL was found in the HTML via autodiscovery:
-             1. Downloads the URL and stores it into a file via the media storage provider
-                and saves the local media metadata.
-             2. Convert the oEmbed response to an Open Graph response.
-             3. Override any Open Graph data from the HTML with data from oEmbed.
-          4. If an image exists in the Open Graph response:
-             1. Downloads the URL and stores it into a file via the media storage
-                provider and saves the local media metadata.
-             2. Generates thumbnails.
-             3. Updates the Open Graph response based on image properties.
-       6. If the media is JSON and an oEmbed URL was found:
-          1. Convert the oEmbed response to an Open Graph response.
-          2. If a thumbnail or image is in the oEmbed response:
-             1. Downloads the URL and stores it into a file via the media storage
-                provider and saves the local media metadata.
-             2. Generates thumbnails.
-             3. Updates the Open Graph response based on image properties.
-       7. Stores the result in the database cache.
-    4. Returns the result.
-
-    If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
-    image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
-    does not fail. As much information as possible is returned.
-
-    The in-memory cache expires after 1 hour.
-
-    Expired entries in the database cache (and their associated media files) are
-    deleted every 10 seconds. The default expiration time is 1 hour from download.
-    """
-
-    isLeaf = True
-
-    def __init__(
-        self,
-        hs: "HomeServer",
-        media_repo: "MediaRepository",
-        media_storage: MediaStorage,
-    ):
-        super().__init__()
-
-        self.auth = hs.get_auth()
-        self.clock = hs.get_clock()
-        self.filepaths = media_repo.filepaths
-        self.max_spider_size = hs.config.media.max_spider_size
-        self.server_name = hs.hostname
-        self.store = hs.get_datastores().main
-        self.client = SimpleHttpClient(
-            hs,
-            treq_args={"browser_like_redirects": True},
-            ip_whitelist=hs.config.media.url_preview_ip_range_whitelist,
-            ip_blacklist=hs.config.media.url_preview_ip_range_blacklist,
-            use_proxy=True,
-        )
-        self.media_repo = media_repo
-        self.primary_base_path = media_repo.primary_base_path
-        self.media_storage = media_storage
-
-        self._oembed = OEmbedProvider(hs)
-
-        # We run the background jobs if we're the instance specified (or no
-        # instance is specified, where we assume there is only one instance
-        # serving media).
-        instance_running_jobs = hs.config.media.media_instance_running_background_jobs
-        self._worker_run_media_background_jobs = (
-            instance_running_jobs is None
-            or instance_running_jobs == hs.get_instance_name()
-        )
-
-        self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist
-        self.url_preview_accept_language = hs.config.media.url_preview_accept_language
-
-        # memory cache mapping urls to an ObservableDeferred returning
-        # JSON-encoded OG metadata
-        self._cache: ExpiringCache[str, ObservableDeferred] = ExpiringCache(
-            cache_name="url_previews",
-            clock=self.clock,
-            # don't spider URLs more often than once an hour
-            expiry_ms=ONE_HOUR,
-        )
-
-        if self._worker_run_media_background_jobs:
-            self._cleaner_loop = self.clock.looping_call(
-                self._start_expire_url_cache_data, 10 * 1000
-            )
-
-    async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
-        request.setHeader(b"Allow", b"OPTIONS, GET")
-        respond_with_json(request, 200, {}, send_cors=True)
-
-    async def _async_render_GET(self, request: SynapseRequest) -> None:
-        # XXX: if get_user_by_req fails, what should we do in an async render?
-        requester = await self.auth.get_user_by_req(request)
-        url = parse_string(request, "url", required=True)
-        ts = parse_integer(request, "ts")
-        if ts is None:
-            ts = self.clock.time_msec()
-
-        # XXX: we could move this into _do_preview if we wanted.
-        url_tuple = urlsplit(url)
-        for entry in self.url_preview_url_blacklist:
-            match = True
-            for attrib in entry:
-                pattern = entry[attrib]
-                value = getattr(url_tuple, attrib)
-                logger.debug(
-                    "Matching attrib '%s' with value '%s' against pattern '%s'",
-                    attrib,
-                    value,
-                    pattern,
-                )
-
-                if value is None:
-                    match = False
-                    continue
-
-                # Some attributes might not be parsed as strings by urlsplit (such as the
-                # port, which is parsed as an int). Because we use match functions that
-                # expect strings, we want to make sure that's what we give them.
-                value_str = str(value)
-
-                if pattern.startswith("^"):
-                    if not re.match(pattern, value_str):
-                        match = False
-                        continue
-                else:
-                    if not fnmatch.fnmatch(value_str, pattern):
-                        match = False
-                        continue
-            if match:
-                logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
-                raise SynapseError(
-                    403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
-                )
-
-        # the in-memory cache:
-        # * ensures that only one request is active at a time
-        # * takes load off the DB for the thundering herds
-        # * also caches any failures (unlike the DB) so we don't keep
-        #    requesting the same endpoint
-
-        observable = self._cache.get(url)
-
-        if not observable:
-            download = run_in_background(self._do_preview, url, requester.user, ts)
-            observable = ObservableDeferred(download, consumeErrors=True)
-            self._cache[url] = observable
-        else:
-            logger.info("Returning cached response")
-
-        og = await make_deferred_yieldable(observable.observe())
-        respond_with_json_bytes(request, 200, og, send_cors=True)
-
-    async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
-        """Check the db, and download the URL and build a preview
-
-        Args:
-            url: The URL to preview.
-            user: The user requesting the preview.
-            ts: The timestamp requested for the preview.
-
-        Returns:
-            json-encoded og data
-        """
-        # check the URL cache in the DB (which will also provide us with
-        # historical previews, if we have any)
-        cache_result = await self.store.get_url_cache(url, ts)
-        if (
-            cache_result
-            and cache_result["expires_ts"] > ts
-            and cache_result["response_code"] / 100 == 2
-        ):
-            # It may be stored as text in the database, not as bytes (such as
-            # PostgreSQL). If so, encode it back before handing it on.
-            og = cache_result["og"]
-            if isinstance(og, str):
-                og = og.encode("utf8")
-            return og
-
-        # If this URL can be accessed via oEmbed, use that instead.
-        url_to_download = url
-        oembed_url = self._oembed.get_oembed_url(url)
-        if oembed_url:
-            url_to_download = oembed_url
-
-        media_info = await self._handle_url(url_to_download, user)
-
-        logger.debug("got media_info of '%s'", media_info)
-
-        # The number of milliseconds that the response should be considered valid.
-        expiration_ms = media_info.expires
-        author_name: Optional[str] = None
-
-        if _is_media(media_info.media_type):
-            file_id = media_info.filesystem_id
-            dims = await self.media_repo._generate_thumbnails(
-                None, file_id, file_id, media_info.media_type, url_cache=True
-            )
-
-            og = {
-                "og:description": media_info.download_name,
-                "og:image": f"mxc://{self.server_name}/{media_info.filesystem_id}",
-                "og:image:type": media_info.media_type,
-                "matrix:image:size": media_info.media_length,
-            }
-
-            if dims:
-                og["og:image:width"] = dims["width"]
-                og["og:image:height"] = dims["height"]
-            else:
-                logger.warning("Couldn't get dims for %s" % url)
-
-            # define our OG response for this media
-        elif _is_html(media_info.media_type):
-            # TODO: somehow stop a big HTML tree from exploding synapse's RAM
-
-            with open(media_info.filename, "rb") as file:
-                body = file.read()
-
-            tree = decode_body(body, media_info.uri, media_info.media_type)
-            if tree is not None:
-                # Check if this HTML document points to oEmbed information and
-                # defer to that.
-                oembed_url = self._oembed.autodiscover_from_html(tree)
-                og_from_oembed: JsonDict = {}
-                if oembed_url:
-                    try:
-                        oembed_info = await self._handle_url(
-                            oembed_url, user, allow_data_urls=True
-                        )
-                    except Exception as e:
-                        # Fetching the oEmbed info failed, don't block the entire URL preview.
-                        logger.warning(
-                            "oEmbed fetch failed during URL preview: %s errored with %s",
-                            oembed_url,
-                            e,
-                        )
-                    else:
-                        (
-                            og_from_oembed,
-                            author_name,
-                            expiration_ms,
-                        ) = await self._handle_oembed_response(
-                            url, oembed_info, expiration_ms
-                        )
-
-                # Parse Open Graph information from the HTML in case the oEmbed
-                # response failed or is incomplete.
-                og_from_html = parse_html_to_open_graph(tree)
-
-                # Compile the Open Graph response by using the scraped
-                # information from the HTML and overlaying any information
-                # from the oEmbed response.
-                og = {**og_from_html, **og_from_oembed}
-
-                await self._precache_image_url(user, media_info, og)
-            else:
-                og = {}
-
-        elif oembed_url:
-            # Handle the oEmbed information.
-            og, author_name, expiration_ms = await self._handle_oembed_response(
-                url, media_info, expiration_ms
-            )
-            await self._precache_image_url(user, media_info, og)
-
-        else:
-            logger.warning("Failed to find any OG data in %s", url)
-            og = {}
-
-        # If we don't have a title but we have author_name, copy it as
-        # title
-        if not og.get("og:title") and author_name:
-            og["og:title"] = author_name
-
-        # filter out any stupidly long values
-        keys_to_remove = []
-        for k, v in og.items():
-            # values can be numeric as well as strings, hence the cast to str
-            if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN:
-                logger.warning(
-                    "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
-                )
-                keys_to_remove.append(k)
-        for k in keys_to_remove:
-            del og[k]
-
-        logger.debug("Calculated OG for %s as %s", url, og)
-
-        jsonog = json_encoder.encode(og)
-
-        # Cap the amount of time to consider a response valid.
-        expiration_ms = min(expiration_ms, ONE_DAY)
-
-        # store OG in history-aware DB cache
-        await self.store.store_url_cache(
-            url,
-            media_info.response_code,
-            media_info.etag,
-            media_info.created_ts_ms + expiration_ms,
-            jsonog,
-            media_info.filesystem_id,
-            media_info.created_ts_ms,
-        )
-
-        return jsonog.encode("utf8")
-
-    async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
-        """
-        Fetches a remote URL and parses the headers.
-
-        Args:
-             url: The URL to fetch.
-             output_stream: The stream to write the content to.
-
-        Returns:
-            A tuple of:
-                Media length, URL downloaded, the HTTP response code,
-                the media type, the downloaded file name, the number of
-                milliseconds the result is valid for, the etag header.
-        """
-
-        try:
-            logger.debug("Trying to get preview for url '%s'", url)
-            length, headers, uri, code = await self.client.get_file(
-                url,
-                output_stream=output_stream,
-                max_size=self.max_spider_size,
-                headers={
-                    b"Accept-Language": self.url_preview_accept_language,
-                    # Use a custom user agent for the preview because some sites will only return
-                    # Open Graph metadata to crawler user agents. Omit the Synapse version
-                    # string to avoid leaking information.
-                    b"User-Agent": [
-                        "Synapse (bot; +https://github.com/matrix-org/synapse)"
-                    ],
-                },
-                is_allowed_content_type=_is_previewable,
-            )
-        except SynapseError:
-            # Pass SynapseErrors through directly, so that the servlet
-            # handler will return a SynapseError to the client instead of
-            # blank data or a 500.
-            raise
-        except DNSLookupError:
-            # DNS lookup returned no results
-            # Note: This will also be the case if one of the resolved IP
-            # addresses is blacklisted
-            raise SynapseError(
-                502,
-                "DNS resolution failure during URL preview generation",
-                Codes.UNKNOWN,
-            )
-        except Exception as e:
-            # FIXME: pass through 404s and other error messages nicely
-            logger.warning("Error downloading %s: %r", url, e)
-
-            raise SynapseError(
-                500,
-                "Failed to download content: %s"
-                % (traceback.format_exception_only(sys.exc_info()[0], e),),
-                Codes.UNKNOWN,
-            )
-
-        if b"Content-Type" in headers:
-            media_type = headers[b"Content-Type"][0].decode("ascii")
-        else:
-            media_type = "application/octet-stream"
-
-        download_name = get_filename_from_headers(headers)
-
-        # FIXME: we should calculate a proper expiration based on the
-        # Cache-Control and Expire headers.  But for now, assume 1 hour.
-        expires = ONE_HOUR
-        etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None
-
-        return DownloadResult(
-            length, uri, code, media_type, download_name, expires, etag
-        )
-
-    async def _parse_data_url(
-        self, url: str, output_stream: BinaryIO
-    ) -> DownloadResult:
-        """
-        Parses a data: URL.
-
-        Args:
-             url: The URL to parse.
-             output_stream: The stream to write the content to.
-
-        Returns:
-            A tuple of:
-                Media length, URL downloaded, the HTTP response code,
-                the media type, the downloaded file name, the number of
-                milliseconds the result is valid for, the etag header.
-        """
-
-        try:
-            logger.debug("Trying to parse data url '%s'", url)
-            with urlopen(url) as url_info:
-                # TODO Can this be more efficient.
-                output_stream.write(url_info.read())
-        except Exception as e:
-            logger.warning("Error parsing data: URL %s: %r", url, e)
-
-            raise SynapseError(
-                500,
-                "Failed to parse data URL: %s"
-                % (traceback.format_exception_only(sys.exc_info()[0], e),),
-                Codes.UNKNOWN,
-            )
-
-        return DownloadResult(
-            # Read back the length that has been written.
-            length=output_stream.tell(),
-            uri=url,
-            # If it was parsed, consider this a 200 OK.
-            response_code=200,
-            # urlopen shoves the media-type from the data URL into the content type
-            # header object.
-            media_type=url_info.headers.get_content_type(),
-            # Some features are not supported by data: URLs.
-            download_name=None,
-            expires=ONE_HOUR,
-            etag=None,
-        )
-
-    async def _handle_url(
-        self, url: str, user: UserID, allow_data_urls: bool = False
-    ) -> MediaInfo:
-        """
-        Fetches content from a URL and parses the result to generate a MediaInfo.
-
-        It uses the media storage provider to persist the fetched content and
-        stores the mapping into the database.
-
-        Args:
-             url: The URL to fetch.
-             user: The user who ahs requested this URL.
-             allow_data_urls: True if data URLs should be allowed.
-
-        Returns:
-            A MediaInfo object describing the fetched content.
-        """
-
-        # TODO: we should probably honour robots.txt... except in practice
-        # we're most likely being explicitly triggered by a human rather than a
-        # bot, so are we really a robot?
-
-        file_id = datetime.date.today().isoformat() + "_" + random_string(16)
-
-        file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
-
-        with self.media_storage.store_into_file(file_info) as (f, fname, finish):
-            if url.startswith("data:"):
-                if not allow_data_urls:
-                    raise SynapseError(
-                        500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN
-                    )
-
-                download_result = await self._parse_data_url(url, f)
-            else:
-                download_result = await self._download_url(url, f)
-
-            await finish()
-
-        try:
-            time_now_ms = self.clock.time_msec()
-
-            await self.store.store_local_media(
-                media_id=file_id,
-                media_type=download_result.media_type,
-                time_now_ms=time_now_ms,
-                upload_name=download_result.download_name,
-                media_length=download_result.length,
-                user_id=user,
-                url_cache=url,
-            )
-
-        except Exception as e:
-            logger.error("Error handling downloaded %s: %r", url, e)
-            # TODO: we really ought to delete the downloaded file in this
-            # case, since we won't have recorded it in the db, and will
-            # therefore not expire it.
-            raise
-
-        return MediaInfo(
-            media_type=download_result.media_type,
-            media_length=download_result.length,
-            download_name=download_result.download_name,
-            created_ts_ms=time_now_ms,
-            filesystem_id=file_id,
-            filename=fname,
-            uri=download_result.uri,
-            response_code=download_result.response_code,
-            expires=download_result.expires,
-            etag=download_result.etag,
-        )
-
-    async def _precache_image_url(
-        self, user: UserID, media_info: MediaInfo, og: JsonDict
-    ) -> None:
-        """
-        Pre-cache the image (if one exists) for posterity
-
-        Args:
-            user: The user requesting the preview.
-            media_info: The media being previewed.
-            og: The Open Graph dictionary. This is modified with image information.
-        """
-        # If there's no image or it is blank, there's nothing to do.
-        if "og:image" not in og:
-            return
-
-        # Remove the raw image URL, this will be replaced with an MXC URL, if successful.
-        image_url = og.pop("og:image")
-        if not image_url:
-            return
-
-        # The image URL from the HTML might be relative to the previewed page,
-        # convert it to an URL which can be requested directly.
-        url_parts = urlparse(image_url)
-        if url_parts.scheme != "data":
-            image_url = urljoin(media_info.uri, image_url)
-
-        # FIXME: it might be cleaner to use the same flow as the main /preview_url
-        # request itself and benefit from the same caching etc.  But for now we
-        # just rely on the caching on the master request to speed things up.
-        try:
-            image_info = await self._handle_url(image_url, user, allow_data_urls=True)
-        except Exception as e:
-            # Pre-caching the image failed, don't block the entire URL preview.
-            logger.warning(
-                "Pre-caching image failed during URL preview: %s errored with %s",
-                image_url,
-                e,
-            )
-            return
-
-        if _is_media(image_info.media_type):
-            # TODO: make sure we don't choke on white-on-transparent images
-            file_id = image_info.filesystem_id
-            dims = await self.media_repo._generate_thumbnails(
-                None, file_id, file_id, image_info.media_type, url_cache=True
-            )
-            if dims:
-                og["og:image:width"] = dims["width"]
-                og["og:image:height"] = dims["height"]
-            else:
-                logger.warning("Couldn't get dims for %s", image_url)
-
-            og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
-            og["og:image:type"] = image_info.media_type
-            og["matrix:image:size"] = image_info.media_length
-
-    async def _handle_oembed_response(
-        self, url: str, media_info: MediaInfo, expiration_ms: int
-    ) -> Tuple[JsonDict, Optional[str], int]:
-        """
-        Parse the downloaded oEmbed info.
-
-        Args:
-            url: The URL which is being previewed (not the one which was
-                requested).
-            media_info: The media being previewed.
-            expiration_ms: The length of time, in milliseconds, the media is valid for.
-
-        Returns:
-            A tuple of:
-                The Open Graph dictionary, if the oEmbed info can be parsed.
-                The author name if it could be retrieved from oEmbed.
-                The (possibly updated) length of time, in milliseconds, the media is valid for.
-        """
-        # If JSON was not returned, there's nothing to do.
-        if not _is_json(media_info.media_type):
-            return {}, None, expiration_ms
-
-        with open(media_info.filename, "rb") as file:
-            body = file.read()
-
-        oembed_response = self._oembed.parse_oembed_response(url, body)
-        open_graph_result = oembed_response.open_graph_result
-
-        # Use the cache age from the oEmbed result, if one was given.
-        if open_graph_result and oembed_response.cache_age is not None:
-            expiration_ms = oembed_response.cache_age
-
-        return open_graph_result, oembed_response.author_name, expiration_ms
-
-    def _start_expire_url_cache_data(self) -> Deferred:
-        return run_as_background_process(
-            "expire_url_cache_data", self._expire_url_cache_data
-        )
-
-    async def _expire_url_cache_data(self) -> None:
-        """Clean up expired url cache content, media and thumbnails."""
-
-        assert self._worker_run_media_background_jobs
-
-        now = self.clock.time_msec()
-
-        logger.debug("Running url preview cache expiry")
-
-        def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
-            """Attempt to remove the given chain of parent directories
-
-            Args:
-                dirs: The list of directory paths to delete, with children appearing
-                    before their parents.
-            """
-            for dir in dirs:
-                try:
-                    os.rmdir(dir)
-                except FileNotFoundError:
-                    # Already deleted, continue with deleting the rest
-                    pass
-                except OSError as e:
-                    # Failed, skip deleting the rest of the parent dirs
-                    if e.errno != errno.ENOTEMPTY:
-                        logger.warning(
-                            "Failed to remove media directory while clearing url preview cache: %r: %s",
-                            dir,
-                            e,
-                        )
-                    break
-
-        # First we delete expired url cache entries
-        media_ids = await self.store.get_expired_url_cache(now)
-
-        removed_media = []
-        for media_id in media_ids:
-            fname = self.filepaths.url_cache_filepath(media_id)
-            try:
-                os.remove(fname)
-            except FileNotFoundError:
-                pass  # If the path doesn't exist, meh
-            except OSError as e:
-                logger.warning(
-                    "Failed to remove media while clearing url preview cache: %r: %s",
-                    media_id,
-                    e,
-                )
-                continue
-
-            removed_media.append(media_id)
-
-            dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
-            try_remove_parent_dirs(dirs)
-
-        await self.store.delete_url_cache(removed_media)
-
-        if removed_media:
-            logger.debug(
-                "Deleted %d entries from url preview cache", len(removed_media)
-            )
-        else:
-            logger.debug("No entries removed from url preview cache")
-
-        # Now we delete old images associated with the url cache.
-        # These may be cached for a bit on the client (i.e., they
-        # may have a room open with a preview url thing open).
-        # So we wait a couple of days before deleting, just in case.
-        expire_before = now - IMAGE_CACHE_EXPIRY_MS
-        media_ids = await self.store.get_url_cache_media_before(expire_before)
-
-        removed_media = []
-        for media_id in media_ids:
-            fname = self.filepaths.url_cache_filepath(media_id)
-            try:
-                os.remove(fname)
-            except FileNotFoundError:
-                pass  # If the path doesn't exist, meh
-            except OSError as e:
-                logger.warning(
-                    "Failed to remove media from url preview cache: %r: %s", media_id, e
-                )
-                continue
-
-            dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
-            try_remove_parent_dirs(dirs)
-
-            thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
-            try:
-                shutil.rmtree(thumbnail_dir)
-            except FileNotFoundError:
-                pass  # If the path doesn't exist, meh
-            except OSError as e:
-                logger.warning(
-                    "Failed to remove media from url preview cache: %r: %s", media_id, e
-                )
-                continue
-
-            removed_media.append(media_id)
-
-            dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
-            # Note that one of the directories to be deleted has already been
-            # removed by the `rmtree` above.
-            try_remove_parent_dirs(dirs)
-
-        await self.store.delete_url_cache_media(removed_media)
-
-        if removed_media:
-            logger.debug("Deleted %d media from url preview cache", len(removed_media))
-        else:
-            logger.debug("No media removed from url preview cache")
-
-
-def _is_media(content_type: str) -> bool:
-    return content_type.lower().startswith("image/")
-
-
-def _is_html(content_type: str) -> bool:
-    content_type = content_type.lower()
-    return content_type.startswith("text/html") or content_type.startswith(
-        "application/xhtml"
-    )
-
-
-def _is_json(content_type: str) -> bool:
-    return content_type.lower().startswith("application/json")
-
-
-def _is_previewable(content_type: str) -> bool:
-    """Returns True for content types for which we will perform URL preview and False
-    otherwise."""
-
-    return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py
index 1c9b71d69c..d7653f30ae 100644
--- a/synapse/rest/media/v1/storage_provider.py
+++ b/synapse/rest/media/v1/storage_provider.py
@@ -1,4 +1,4 @@
-# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
+# Copyright 2023 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,171 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
-import abc
-import logging
-import os
-import shutil
-from typing import TYPE_CHECKING, Callable, Optional
-
-from synapse.config._base import Config
-from synapse.logging.context import defer_to_thread, run_in_background
-from synapse.util.async_helpers import maybe_awaitable
-
-from ._base import FileInfo, Responder
-from .media_storage import FileResponder
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-
-class StorageProvider(metaclass=abc.ABCMeta):
-    """A storage provider is a service that can store uploaded media and
-    retrieve them.
-    """
-
-    @abc.abstractmethod
-    async def store_file(self, path: str, file_info: FileInfo) -> None:
-        """Store the file described by file_info. The actual contents can be
-        retrieved by reading the file in file_info.upload_path.
-
-        Args:
-            path: Relative path of file in local cache
-            file_info: The metadata of the file.
-        """
-
-    @abc.abstractmethod
-    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
-        """Attempt to fetch the file described by file_info and stream it
-        into writer.
-
-        Args:
-            path: Relative path of file in local cache
-            file_info: The metadata of the file.
-
-        Returns:
-            Returns a Responder if the provider has the file, otherwise returns None.
-        """
-
-
-class StorageProviderWrapper(StorageProvider):
-    """Wraps a storage provider and provides various config options
-
-    Args:
-        backend: The storage provider to wrap.
-        store_local: Whether to store new local files or not.
-        store_synchronous: Whether to wait for file to be successfully
-            uploaded, or todo the upload in the background.
-        store_remote: Whether remote media should be uploaded
-    """
-
-    def __init__(
-        self,
-        backend: StorageProvider,
-        store_local: bool,
-        store_synchronous: bool,
-        store_remote: bool,
-    ):
-        self.backend = backend
-        self.store_local = store_local
-        self.store_synchronous = store_synchronous
-        self.store_remote = store_remote
-
-    def __str__(self) -> str:
-        return "StorageProviderWrapper[%s]" % (self.backend,)
-
-    async def store_file(self, path: str, file_info: FileInfo) -> None:
-        if not file_info.server_name and not self.store_local:
-            return None
-
-        if file_info.server_name and not self.store_remote:
-            return None
-
-        if file_info.url_cache:
-            # The URL preview cache is short lived and not worth offloading or
-            # backing up.
-            return None
-
-        if self.store_synchronous:
-            # store_file is supposed to return an Awaitable, but guard
-            # against improper implementations.
-            await maybe_awaitable(self.backend.store_file(path, file_info))  # type: ignore
-        else:
-            # TODO: Handle errors.
-            async def store() -> None:
-                try:
-                    return await maybe_awaitable(
-                        self.backend.store_file(path, file_info)
-                    )
-                except Exception:
-                    logger.exception("Error storing file")
-
-            run_in_background(store)
-
-    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
-        if file_info.url_cache:
-            # Files in the URL preview cache definitely aren't stored here,
-            # so avoid any potentially slow I/O or network access.
-            return None
-
-        # store_file is supposed to return an Awaitable, but guard
-        # against improper implementations.
-        return await maybe_awaitable(self.backend.fetch(path, file_info))
-
-
-class FileStorageProviderBackend(StorageProvider):
-    """A storage provider that stores files in a directory on a filesystem.
-
-    Args:
-        hs
-        config: The config returned by `parse_config`.
-    """
-
-    def __init__(self, hs: "HomeServer", config: str):
-        self.hs = hs
-        self.cache_directory = hs.config.media.media_store_path
-        self.base_directory = config
-
-    def __str__(self) -> str:
-        return "FileStorageProviderBackend[%s]" % (self.base_directory,)
-
-    async def store_file(self, path: str, file_info: FileInfo) -> None:
-        """See StorageProvider.store_file"""
-
-        primary_fname = os.path.join(self.cache_directory, path)
-        backup_fname = os.path.join(self.base_directory, path)
-
-        dirname = os.path.dirname(backup_fname)
-        os.makedirs(dirname, exist_ok=True)
-
-        # mypy needs help inferring the type of the second parameter, which is generic
-        shutil_copyfile: Callable[[str, str], str] = shutil.copyfile
-        await defer_to_thread(
-            self.hs.get_reactor(),
-            shutil_copyfile,
-            primary_fname,
-            backup_fname,
-        )
-
-    async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
-        """See StorageProvider.fetch"""
-
-        backup_fname = os.path.join(self.base_directory, path)
-        if os.path.isfile(backup_fname):
-            return FileResponder(open(backup_fname, "rb"))
-
-        return None
-
-    @staticmethod
-    def parse_config(config: dict) -> str:
-        """Called on startup to parse config supplied. This should parse
-        the config and raise if there is a problem.
-
-        The returned value is passed into the constructor.
-
-        In this case we only care about a single param, the directory, so let's
-        just pull that out.
-        """
-        return Config.ensure_directory(config["directory"])
+# This exists purely for backwards compatibility with media providers.
+from synapse.media.storage_provider import StorageProvider  # noqa: F401
diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py
deleted file mode 100644
index 3e720018b3..0000000000
--- a/synapse/rest/media/v1/thumbnail_resource.py
+++ /dev/null
@@ -1,555 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import logging
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
-
-from synapse.api.errors import Codes, SynapseError, cs_error
-from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
-from synapse.http.server import (
-    DirectServeJsonResource,
-    respond_with_json,
-    set_corp_headers,
-    set_cors_headers,
-)
-from synapse.http.servlet import parse_integer, parse_string
-from synapse.http.site import SynapseRequest
-from synapse.rest.media.v1.media_storage import MediaStorage
-
-from ._base import (
-    FileInfo,
-    ThumbnailInfo,
-    parse_media_id,
-    respond_404,
-    respond_with_file,
-    respond_with_responder,
-)
-
-if TYPE_CHECKING:
-    from synapse.rest.media.v1.media_repository import MediaRepository
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-class ThumbnailResource(DirectServeJsonResource):
-    isLeaf = True
-
-    def __init__(
-        self,
-        hs: "HomeServer",
-        media_repo: "MediaRepository",
-        media_storage: MediaStorage,
-    ):
-        super().__init__()
-
-        self.store = hs.get_datastores().main
-        self.media_repo = media_repo
-        self.media_storage = media_storage
-        self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
-        self.server_name = hs.hostname
-
-    async def _async_render_GET(self, request: SynapseRequest) -> None:
-        set_cors_headers(request)
-        set_corp_headers(request)
-        server_name, media_id, _ = parse_media_id(request)
-        width = parse_integer(request, "width", required=True)
-        height = parse_integer(request, "height", required=True)
-        method = parse_string(request, "method", "scale")
-        # TODO Parse the Accept header to get an prioritised list of thumbnail types.
-        m_type = "image/png"
-
-        if server_name == self.server_name:
-            if self.dynamic_thumbnails:
-                await self._select_or_generate_local_thumbnail(
-                    request, media_id, width, height, method, m_type
-                )
-            else:
-                await self._respond_local_thumbnail(
-                    request, media_id, width, height, method, m_type
-                )
-            self.media_repo.mark_recently_accessed(None, media_id)
-        else:
-            if self.dynamic_thumbnails:
-                await self._select_or_generate_remote_thumbnail(
-                    request, server_name, media_id, width, height, method, m_type
-                )
-            else:
-                await self._respond_remote_thumbnail(
-                    request, server_name, media_id, width, height, method, m_type
-                )
-            self.media_repo.mark_recently_accessed(server_name, media_id)
-
-    async def _respond_local_thumbnail(
-        self,
-        request: SynapseRequest,
-        media_id: str,
-        width: int,
-        height: int,
-        method: str,
-        m_type: str,
-    ) -> None:
-        media_info = await self.store.get_local_media(media_id)
-
-        if not media_info:
-            respond_404(request)
-            return
-        if media_info["quarantined_by"]:
-            logger.info("Media is quarantined")
-            respond_404(request)
-            return
-
-        thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
-        await self._select_and_respond_with_thumbnail(
-            request,
-            width,
-            height,
-            method,
-            m_type,
-            thumbnail_infos,
-            media_id,
-            media_id,
-            url_cache=bool(media_info["url_cache"]),
-            server_name=None,
-        )
-
-    async def _select_or_generate_local_thumbnail(
-        self,
-        request: SynapseRequest,
-        media_id: str,
-        desired_width: int,
-        desired_height: int,
-        desired_method: str,
-        desired_type: str,
-    ) -> None:
-        media_info = await self.store.get_local_media(media_id)
-
-        if not media_info:
-            respond_404(request)
-            return
-        if media_info["quarantined_by"]:
-            logger.info("Media is quarantined")
-            respond_404(request)
-            return
-
-        thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
-        for info in thumbnail_infos:
-            t_w = info["thumbnail_width"] == desired_width
-            t_h = info["thumbnail_height"] == desired_height
-            t_method = info["thumbnail_method"] == desired_method
-            t_type = info["thumbnail_type"] == desired_type
-
-            if t_w and t_h and t_method and t_type:
-                file_info = FileInfo(
-                    server_name=None,
-                    file_id=media_id,
-                    url_cache=media_info["url_cache"],
-                    thumbnail=ThumbnailInfo(
-                        width=info["thumbnail_width"],
-                        height=info["thumbnail_height"],
-                        type=info["thumbnail_type"],
-                        method=info["thumbnail_method"],
-                    ),
-                )
-
-                t_type = file_info.thumbnail_type
-                t_length = info["thumbnail_length"]
-
-                responder = await self.media_storage.fetch_media(file_info)
-                if responder:
-                    await respond_with_responder(request, responder, t_type, t_length)
-                    return
-
-        logger.debug("We don't have a thumbnail of that size. Generating")
-
-        # Okay, so we generate one.
-        file_path = await self.media_repo.generate_local_exact_thumbnail(
-            media_id,
-            desired_width,
-            desired_height,
-            desired_method,
-            desired_type,
-            url_cache=bool(media_info["url_cache"]),
-        )
-
-        if file_path:
-            await respond_with_file(request, desired_type, file_path)
-        else:
-            logger.warning("Failed to generate thumbnail")
-            raise SynapseError(400, "Failed to generate thumbnail.")
-
-    async def _select_or_generate_remote_thumbnail(
-        self,
-        request: SynapseRequest,
-        server_name: str,
-        media_id: str,
-        desired_width: int,
-        desired_height: int,
-        desired_method: str,
-        desired_type: str,
-    ) -> None:
-        media_info = await self.media_repo.get_remote_media_info(server_name, media_id)
-
-        thumbnail_infos = await self.store.get_remote_media_thumbnails(
-            server_name, media_id
-        )
-
-        file_id = media_info["filesystem_id"]
-
-        for info in thumbnail_infos:
-            t_w = info["thumbnail_width"] == desired_width
-            t_h = info["thumbnail_height"] == desired_height
-            t_method = info["thumbnail_method"] == desired_method
-            t_type = info["thumbnail_type"] == desired_type
-
-            if t_w and t_h and t_method and t_type:
-                file_info = FileInfo(
-                    server_name=server_name,
-                    file_id=media_info["filesystem_id"],
-                    thumbnail=ThumbnailInfo(
-                        width=info["thumbnail_width"],
-                        height=info["thumbnail_height"],
-                        type=info["thumbnail_type"],
-                        method=info["thumbnail_method"],
-                    ),
-                )
-
-                t_type = file_info.thumbnail_type
-                t_length = info["thumbnail_length"]
-
-                responder = await self.media_storage.fetch_media(file_info)
-                if responder:
-                    await respond_with_responder(request, responder, t_type, t_length)
-                    return
-
-        logger.debug("We don't have a thumbnail of that size. Generating")
-
-        # Okay, so we generate one.
-        file_path = await self.media_repo.generate_remote_exact_thumbnail(
-            server_name,
-            file_id,
-            media_id,
-            desired_width,
-            desired_height,
-            desired_method,
-            desired_type,
-        )
-
-        if file_path:
-            await respond_with_file(request, desired_type, file_path)
-        else:
-            logger.warning("Failed to generate thumbnail")
-            raise SynapseError(400, "Failed to generate thumbnail.")
-
-    async def _respond_remote_thumbnail(
-        self,
-        request: SynapseRequest,
-        server_name: str,
-        media_id: str,
-        width: int,
-        height: int,
-        method: str,
-        m_type: str,
-    ) -> None:
-        # TODO: Don't download the whole remote file
-        # We should proxy the thumbnail from the remote server instead of
-        # downloading the remote file and generating our own thumbnails.
-        media_info = await self.media_repo.get_remote_media_info(server_name, media_id)
-
-        thumbnail_infos = await self.store.get_remote_media_thumbnails(
-            server_name, media_id
-        )
-        await self._select_and_respond_with_thumbnail(
-            request,
-            width,
-            height,
-            method,
-            m_type,
-            thumbnail_infos,
-            media_id,
-            media_info["filesystem_id"],
-            url_cache=False,
-            server_name=server_name,
-        )
-
-    async def _select_and_respond_with_thumbnail(
-        self,
-        request: SynapseRequest,
-        desired_width: int,
-        desired_height: int,
-        desired_method: str,
-        desired_type: str,
-        thumbnail_infos: List[Dict[str, Any]],
-        media_id: str,
-        file_id: str,
-        url_cache: bool,
-        server_name: Optional[str] = None,
-    ) -> None:
-        """
-        Respond to a request with an appropriate thumbnail from the previously generated thumbnails.
-
-        Args:
-            request: The incoming request.
-            desired_width: The desired width, the returned thumbnail may be larger than this.
-            desired_height: The desired height, the returned thumbnail may be larger than this.
-            desired_method: The desired method used to generate the thumbnail.
-            desired_type: The desired content-type of the thumbnail.
-            thumbnail_infos: A list of dictionaries of candidate thumbnails.
-            file_id: The ID of the media that a thumbnail is being requested for.
-            url_cache: True if this is from a URL cache.
-            server_name: The server name, if this is a remote thumbnail.
-        """
-        logger.debug(
-            "_select_and_respond_with_thumbnail: media_id=%s desired=%sx%s (%s) thumbnail_infos=%s",
-            media_id,
-            desired_width,
-            desired_height,
-            desired_method,
-            thumbnail_infos,
-        )
-
-        # If `dynamic_thumbnails` is enabled, we expect Synapse to go down a
-        # different code path to handle it.
-        assert not self.dynamic_thumbnails
-
-        if thumbnail_infos:
-            file_info = self._select_thumbnail(
-                desired_width,
-                desired_height,
-                desired_method,
-                desired_type,
-                thumbnail_infos,
-                file_id,
-                url_cache,
-                server_name,
-            )
-            if not file_info:
-                logger.info("Couldn't find a thumbnail matching the desired inputs")
-                respond_404(request)
-                return
-
-            # The thumbnail property must exist.
-            assert file_info.thumbnail is not None
-
-            responder = await self.media_storage.fetch_media(file_info)
-            if responder:
-                await respond_with_responder(
-                    request,
-                    responder,
-                    file_info.thumbnail.type,
-                    file_info.thumbnail.length,
-                )
-                return
-
-            # If we can't find the thumbnail we regenerate it. This can happen
-            # if e.g. we've deleted the thumbnails but still have the original
-            # image somewhere.
-            #
-            # Since we have an entry for the thumbnail in the DB we a) know we
-            # have have successfully generated the thumbnail in the past (so we
-            # don't need to worry about repeatedly failing to generate
-            # thumbnails), and b) have already calculated that appropriate
-            # width/height/method so we can just call the "generate exact"
-            # methods.
-
-            # First let's check that we do actually have the original image
-            # still. This will throw a 404 if we don't.
-            # TODO: We should refetch the thumbnails for remote media.
-            await self.media_storage.ensure_media_is_in_local_cache(
-                FileInfo(server_name, file_id, url_cache=url_cache)
-            )
-
-            if server_name:
-                await self.media_repo.generate_remote_exact_thumbnail(
-                    server_name,
-                    file_id=file_id,
-                    media_id=media_id,
-                    t_width=file_info.thumbnail.width,
-                    t_height=file_info.thumbnail.height,
-                    t_method=file_info.thumbnail.method,
-                    t_type=file_info.thumbnail.type,
-                )
-            else:
-                await self.media_repo.generate_local_exact_thumbnail(
-                    media_id=media_id,
-                    t_width=file_info.thumbnail.width,
-                    t_height=file_info.thumbnail.height,
-                    t_method=file_info.thumbnail.method,
-                    t_type=file_info.thumbnail.type,
-                    url_cache=url_cache,
-                )
-
-            responder = await self.media_storage.fetch_media(file_info)
-            await respond_with_responder(
-                request,
-                responder,
-                file_info.thumbnail.type,
-                file_info.thumbnail.length,
-            )
-        else:
-            # This might be because:
-            # 1. We can't create thumbnails for the given media (corrupted or
-            #    unsupported file type), or
-            # 2. The thumbnailing process never ran or errored out initially
-            #    when the media was first uploaded (these bugs should be
-            #    reported and fixed).
-            # Note that we don't attempt to generate a thumbnail now because
-            # `dynamic_thumbnails` is disabled.
-            logger.info("Failed to find any generated thumbnails")
-
-            respond_with_json(
-                request,
-                400,
-                cs_error(
-                    "Cannot find any thumbnails for the requested media (%r). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)"
-                    % (
-                        request.postpath,
-                        ", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()),
-                    ),
-                    code=Codes.UNKNOWN,
-                ),
-                send_cors=True,
-            )
-
-    def _select_thumbnail(
-        self,
-        desired_width: int,
-        desired_height: int,
-        desired_method: str,
-        desired_type: str,
-        thumbnail_infos: List[Dict[str, Any]],
-        file_id: str,
-        url_cache: bool,
-        server_name: Optional[str],
-    ) -> Optional[FileInfo]:
-        """
-        Choose an appropriate thumbnail from the previously generated thumbnails.
-
-        Args:
-            desired_width: The desired width, the returned thumbnail may be larger than this.
-            desired_height: The desired height, the returned thumbnail may be larger than this.
-            desired_method: The desired method used to generate the thumbnail.
-            desired_type: The desired content-type of the thumbnail.
-            thumbnail_infos: A list of dictionaries of candidate thumbnails.
-            file_id: The ID of the media that a thumbnail is being requested for.
-            url_cache: True if this is from a URL cache.
-            server_name: The server name, if this is a remote thumbnail.
-
-        Returns:
-             The thumbnail which best matches the desired parameters.
-        """
-        desired_method = desired_method.lower()
-
-        # The chosen thumbnail.
-        thumbnail_info = None
-
-        d_w = desired_width
-        d_h = desired_height
-
-        if desired_method == "crop":
-            # Thumbnails that match equal or larger sizes of desired width/height.
-            crop_info_list: List[Tuple[int, int, int, bool, int, Dict[str, Any]]] = []
-            # Other thumbnails.
-            crop_info_list2: List[Tuple[int, int, int, bool, int, Dict[str, Any]]] = []
-            for info in thumbnail_infos:
-                # Skip thumbnails generated with different methods.
-                if info["thumbnail_method"] != "crop":
-                    continue
-
-                t_w = info["thumbnail_width"]
-                t_h = info["thumbnail_height"]
-                aspect_quality = abs(d_w * t_h - d_h * t_w)
-                min_quality = 0 if d_w <= t_w and d_h <= t_h else 1
-                size_quality = abs((d_w - t_w) * (d_h - t_h))
-                type_quality = desired_type != info["thumbnail_type"]
-                length_quality = info["thumbnail_length"]
-                if t_w >= d_w or t_h >= d_h:
-                    crop_info_list.append(
-                        (
-                            aspect_quality,
-                            min_quality,
-                            size_quality,
-                            type_quality,
-                            length_quality,
-                            info,
-                        )
-                    )
-                else:
-                    crop_info_list2.append(
-                        (
-                            aspect_quality,
-                            min_quality,
-                            size_quality,
-                            type_quality,
-                            length_quality,
-                            info,
-                        )
-                    )
-            # Pick the most appropriate thumbnail. Some values of `desired_width` and
-            # `desired_height` may result in a tie, in which case we avoid comparing on
-            # the thumbnail info dictionary and pick the thumbnail that appears earlier
-            # in the list of candidates.
-            if crop_info_list:
-                thumbnail_info = min(crop_info_list, key=lambda t: t[:-1])[-1]
-            elif crop_info_list2:
-                thumbnail_info = min(crop_info_list2, key=lambda t: t[:-1])[-1]
-        elif desired_method == "scale":
-            # Thumbnails that match equal or larger sizes of desired width/height.
-            info_list: List[Tuple[int, bool, int, Dict[str, Any]]] = []
-            # Other thumbnails.
-            info_list2: List[Tuple[int, bool, int, Dict[str, Any]]] = []
-
-            for info in thumbnail_infos:
-                # Skip thumbnails generated with different methods.
-                if info["thumbnail_method"] != "scale":
-                    continue
-
-                t_w = info["thumbnail_width"]
-                t_h = info["thumbnail_height"]
-                size_quality = abs((d_w - t_w) * (d_h - t_h))
-                type_quality = desired_type != info["thumbnail_type"]
-                length_quality = info["thumbnail_length"]
-                if t_w >= d_w or t_h >= d_h:
-                    info_list.append((size_quality, type_quality, length_quality, info))
-                else:
-                    info_list2.append(
-                        (size_quality, type_quality, length_quality, info)
-                    )
-            # Pick the most appropriate thumbnail. Some values of `desired_width` and
-            # `desired_height` may result in a tie, in which case we avoid comparing on
-            # the thumbnail info dictionary and pick the thumbnail that appears earlier
-            # in the list of candidates.
-            if info_list:
-                thumbnail_info = min(info_list, key=lambda t: t[:-1])[-1]
-            elif info_list2:
-                thumbnail_info = min(info_list2, key=lambda t: t[:-1])[-1]
-
-        if thumbnail_info:
-            return FileInfo(
-                file_id=file_id,
-                url_cache=url_cache,
-                server_name=server_name,
-                thumbnail=ThumbnailInfo(
-                    width=thumbnail_info["thumbnail_width"],
-                    height=thumbnail_info["thumbnail_height"],
-                    type=thumbnail_info["thumbnail_type"],
-                    method=thumbnail_info["thumbnail_method"],
-                    length=thumbnail_info["thumbnail_length"],
-                ),
-            )
-
-        # No matching thumbnail was found.
-        return None
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
deleted file mode 100644
index f909a4fb9a..0000000000
--- a/synapse/rest/media/v1/thumbnailer.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from io import BytesIO
-from types import TracebackType
-from typing import Optional, Tuple, Type
-
-from PIL import Image
-
-logger = logging.getLogger(__name__)
-
-EXIF_ORIENTATION_TAG = 0x0112
-EXIF_TRANSPOSE_MAPPINGS = {
-    2: Image.FLIP_LEFT_RIGHT,
-    3: Image.ROTATE_180,
-    4: Image.FLIP_TOP_BOTTOM,
-    5: Image.TRANSPOSE,
-    6: Image.ROTATE_270,
-    7: Image.TRANSVERSE,
-    8: Image.ROTATE_90,
-}
-
-
-class ThumbnailError(Exception):
-    """An error occurred generating a thumbnail."""
-
-
-class Thumbnailer:
-    FORMATS = {"image/jpeg": "JPEG", "image/png": "PNG"}
-
-    @staticmethod
-    def set_limits(max_image_pixels: int) -> None:
-        Image.MAX_IMAGE_PIXELS = max_image_pixels
-
-    def __init__(self, input_path: str):
-        # Have we closed the image?
-        self._closed = False
-
-        try:
-            self.image = Image.open(input_path)
-        except OSError as e:
-            # If an error occurs opening the image, a thumbnail won't be able to
-            # be generated.
-            raise ThumbnailError from e
-        except Image.DecompressionBombError as e:
-            # If an image decompression bomb error occurs opening the image,
-            # then the image exceeds the pixel limit and a thumbnail won't
-            # be able to be generated.
-            raise ThumbnailError from e
-
-        self.width, self.height = self.image.size
-        self.transpose_method = None
-        try:
-            # We don't use ImageOps.exif_transpose since it crashes with big EXIF
-            #
-            # Ignore safety: Pillow seems to acknowledge that this method is
-            # "private, experimental, but generally widely used". Pillow 6
-            # includes a public getexif() method (no underscore) that we might
-            # consider using instead when we can bump that dependency.
-            #
-            # At the time of writing, Debian buster (currently oldstable)
-            # provides version 5.4.1. It's expected to EOL in mid-2022, see
-            # https://wiki.debian.org/DebianReleases#Production_Releases
-            image_exif = self.image._getexif()  # type: ignore
-            if image_exif is not None:
-                image_orientation = image_exif.get(EXIF_ORIENTATION_TAG)
-                assert type(image_orientation) is int
-                self.transpose_method = EXIF_TRANSPOSE_MAPPINGS.get(image_orientation)
-        except Exception as e:
-            # A lot of parsing errors can happen when parsing EXIF
-            logger.info("Error parsing image EXIF information: %s", e)
-
-    def transpose(self) -> Tuple[int, int]:
-        """Transpose the image using its EXIF Orientation tag
-
-        Returns:
-            A tuple containing the new image size in pixels as (width, height).
-        """
-        if self.transpose_method is not None:
-            # Safety: `transpose` takes an int rather than e.g. an IntEnum.
-            # self.transpose_method is set above to be a value in
-            # EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values.
-            with self.image:
-                self.image = self.image.transpose(self.transpose_method)  # type: ignore[arg-type]
-            self.width, self.height = self.image.size
-            self.transpose_method = None
-            # We don't need EXIF any more
-            self.image.info["exif"] = None
-        return self.image.size
-
-    def aspect(self, max_width: int, max_height: int) -> Tuple[int, int]:
-        """Calculate the largest size that preserves aspect ratio which
-        fits within the given rectangle::
-
-            (w_in / h_in) = (w_out / h_out)
-            w_out = max(min(w_max, h_max * (w_in / h_in)), 1)
-            h_out = max(min(h_max, w_max * (h_in / w_in)), 1)
-
-        Args:
-            max_width: The largest possible width.
-            max_height: The largest possible height.
-        """
-
-        if max_width * self.height < max_height * self.width:
-            return max_width, max((max_width * self.height) // self.width, 1)
-        else:
-            return max((max_height * self.width) // self.height, 1), max_height
-
-    def _resize(self, width: int, height: int) -> Image.Image:
-        # 1-bit or 8-bit color palette images need converting to RGB
-        # otherwise they will be scaled using nearest neighbour which
-        # looks awful.
-        #
-        # If the image has transparency, use RGBA instead.
-        if self.image.mode in ["1", "L", "P"]:
-            if self.image.info.get("transparency", None) is not None:
-                with self.image:
-                    self.image = self.image.convert("RGBA")
-            else:
-                with self.image:
-                    self.image = self.image.convert("RGB")
-        return self.image.resize((width, height), Image.ANTIALIAS)
-
-    def scale(self, width: int, height: int, output_type: str) -> BytesIO:
-        """Rescales the image to the given dimensions.
-
-        Returns:
-            The bytes of the encoded image ready to be written to disk
-        """
-        with self._resize(width, height) as scaled:
-            return self._encode_image(scaled, output_type)
-
-    def crop(self, width: int, height: int, output_type: str) -> BytesIO:
-        """Rescales and crops the image to the given dimensions preserving
-        aspect::
-            (w_in / h_in) = (w_scaled / h_scaled)
-            w_scaled = max(w_out, h_out * (w_in / h_in))
-            h_scaled = max(h_out, w_out * (h_in / w_in))
-
-        Args:
-            max_width: The largest possible width.
-            max_height: The largest possible height.
-
-        Returns:
-            The bytes of the encoded image ready to be written to disk
-        """
-        if width * self.height > height * self.width:
-            scaled_width = width
-            scaled_height = (width * self.height) // self.width
-            crop_top = (scaled_height - height) // 2
-            crop_bottom = height + crop_top
-            crop = (0, crop_top, width, crop_bottom)
-        else:
-            scaled_width = (height * self.width) // self.height
-            scaled_height = height
-            crop_left = (scaled_width - width) // 2
-            crop_right = width + crop_left
-            crop = (crop_left, 0, crop_right, height)
-
-        with self._resize(scaled_width, scaled_height) as scaled_image:
-            with scaled_image.crop(crop) as cropped:
-                return self._encode_image(cropped, output_type)
-
-    def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO:
-        output_bytes_io = BytesIO()
-        fmt = self.FORMATS[output_type]
-        if fmt == "JPEG":
-            output_image = output_image.convert("RGB")
-        output_image.save(output_bytes_io, fmt, quality=80)
-        return output_bytes_io
-
-    def close(self) -> None:
-        """Closes the underlying image file.
-
-        Once closed no other functions can be called.
-
-        Can be called multiple times.
-        """
-
-        if self._closed:
-            return
-
-        self._closed = True
-
-        # Since we run this on the finalizer then we need to handle `__init__`
-        # raising an exception before it can define `self.image`.
-        image = getattr(self, "image", None)
-        if image is None:
-            return
-
-        image.close()
-
-    def __enter__(self) -> "Thumbnailer":
-        """Make `Thumbnailer` a context manager that calls `close` on
-        `__exit__`.
-        """
-        return self
-
-    def __exit__(
-        self,
-        type: Optional[Type[BaseException]],
-        value: Optional[BaseException],
-        traceback: Optional[TracebackType],
-    ) -> None:
-        self.close()
-
-    def __del__(self) -> None:
-        # Make sure we actually do close the image, rather than leak data.
-        self.close()
diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py
deleted file mode 100644
index 97548b54e5..0000000000
--- a/synapse/rest/media/v1/upload_resource.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from typing import IO, TYPE_CHECKING, Dict, List, Optional
-
-from synapse.api.errors import Codes, SynapseError
-from synapse.http.server import DirectServeJsonResource, respond_with_json
-from synapse.http.servlet import parse_bytes_from_args
-from synapse.http.site import SynapseRequest
-from synapse.rest.media.v1.media_storage import SpamMediaException
-
-if TYPE_CHECKING:
-    from synapse.rest.media.v1.media_repository import MediaRepository
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-class UploadResource(DirectServeJsonResource):
-    isLeaf = True
-
-    def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"):
-        super().__init__()
-
-        self.media_repo = media_repo
-        self.filepaths = media_repo.filepaths
-        self.store = hs.get_datastores().main
-        self.clock = hs.get_clock()
-        self.server_name = hs.hostname
-        self.auth = hs.get_auth()
-        self.max_upload_size = hs.config.media.max_upload_size
-        self.clock = hs.get_clock()
-
-    async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
-        respond_with_json(request, 200, {}, send_cors=True)
-
-    async def _async_render_POST(self, request: SynapseRequest) -> None:
-        requester = await self.auth.get_user_by_req(request)
-        raw_content_length = request.getHeader("Content-Length")
-        if raw_content_length is None:
-            raise SynapseError(msg="Request must specify a Content-Length", code=400)
-        try:
-            content_length = int(raw_content_length)
-        except ValueError:
-            raise SynapseError(msg="Content-Length value is invalid", code=400)
-        if content_length > self.max_upload_size:
-            raise SynapseError(
-                msg="Upload request body is too large",
-                code=413,
-                errcode=Codes.TOO_LARGE,
-            )
-
-        args: Dict[bytes, List[bytes]] = request.args  # type: ignore
-        upload_name_bytes = parse_bytes_from_args(args, "filename")
-        if upload_name_bytes:
-            try:
-                upload_name: Optional[str] = upload_name_bytes.decode("utf8")
-            except UnicodeDecodeError:
-                raise SynapseError(
-                    msg="Invalid UTF-8 filename parameter: %r" % (upload_name_bytes,),
-                    code=400,
-                )
-
-        # If the name is falsey (e.g. an empty byte string) ensure it is None.
-        else:
-            upload_name = None
-
-        headers = request.requestHeaders
-
-        if headers.hasHeader(b"Content-Type"):
-            content_type_headers = headers.getRawHeaders(b"Content-Type")
-            assert content_type_headers  # for mypy
-            media_type = content_type_headers[0].decode("ascii")
-        else:
-            media_type = "application/octet-stream"
-
-        # if headers.hasHeader(b"Content-Disposition"):
-        #     disposition = headers.getRawHeaders(b"Content-Disposition")[0]
-        # TODO(markjh): parse content-dispostion
-
-        try:
-            content: IO = request.content  # type: ignore
-            content_uri = await self.media_repo.create_content(
-                media_type, upload_name, content, content_length, requester.user
-            )
-        except SpamMediaException:
-            # For uploading of media we want to respond with a 400, instead of
-            # the default 404, as that would just be confusing.
-            raise SynapseError(400, "Bad content")
-
-        logger.info("Uploaded content with URI '%s'", content_uri)
-
-        respond_with_json(
-            request, 200, {"content_uri": str(content_uri)}, send_cors=True
-        )
diff --git a/synapse/server.py b/synapse/server.py
index e5a3475247..a7c32e9a60 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -105,6 +105,7 @@ from synapse.handlers.typing import FollowerTypingHandler, TypingWriterHandler
 from synapse.handlers.user_directory import UserDirectoryHandler
 from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient
 from synapse.http.matrixfederationclient import MatrixFederationHttpClient
+from synapse.media.media_repository import MediaRepository
 from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
 from synapse.module_api import ModuleApi
 from synapse.notifier import Notifier, ReplicationNotifier
@@ -115,10 +116,7 @@ from synapse.replication.tcp.external_cache import ExternalCache
 from synapse.replication.tcp.handler import ReplicationCommandHandler
 from synapse.replication.tcp.resource import ReplicationStreamer
 from synapse.replication.tcp.streams import STREAMS_MAP, Stream
-from synapse.rest.media.v1.media_repository import (
-    MediaRepository,
-    MediaRepositoryResource,
-)
+from synapse.rest.media.media_repository_resource import MediaRepositoryResource
 from synapse.server_notices.server_notices_manager import ServerNoticesManager
 from synapse.server_notices.server_notices_sender import ServerNoticesSender
 from synapse.server_notices.worker_server_notices_sender import (
diff --git a/tests/media/__init__.py b/tests/media/__init__.py
new file mode 100644
index 0000000000..68910cbf5b
--- /dev/null
+++ b/tests/media/__init__.py
@@ -0,0 +1,13 @@
+#  Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/media/test_base.py b/tests/media/test_base.py
new file mode 100644
index 0000000000..66498c744d
--- /dev/null
+++ b/tests/media/test_base.py
@@ -0,0 +1,38 @@
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.media._base import get_filename_from_headers
+
+from tests import unittest
+
+
+class GetFileNameFromHeadersTests(unittest.TestCase):
+    # input -> expected result
+    TEST_CASES = {
+        b"inline; filename=abc.txt": "abc.txt",
+        b'inline; filename="azerty"': "azerty",
+        b'inline; filename="aze%20rty"': "aze%20rty",
+        b'inline; filename="aze"rty"': 'aze"rty',
+        b'inline; filename="azer;ty"': "azer;ty",
+        b"inline; filename*=utf-8''foo%C2%A3bar": "foo£bar",
+    }
+
+    def tests(self) -> None:
+        for hdr, expected in self.TEST_CASES.items():
+            res = get_filename_from_headers({b"Content-Disposition": [hdr]})
+            self.assertEqual(
+                res,
+                expected,
+                f"expected output for {hdr!r} to be {expected} but was {res}",
+            )
diff --git a/tests/media/test_filepath.py b/tests/media/test_filepath.py
new file mode 100644
index 0000000000..95e3b83d5a
--- /dev/null
+++ b/tests/media/test_filepath.py
@@ -0,0 +1,595 @@
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import os
+from typing import Iterable
+
+from synapse.media.filepath import MediaFilePaths, _wrap_with_jail_check
+
+from tests import unittest
+
+
+class MediaFilePathsTestCase(unittest.TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+
+        self.filepaths = MediaFilePaths("/media_store")
+
+    def test_local_media_filepath(self) -> None:
+        """Test local media paths"""
+        self.assertEqual(
+            self.filepaths.local_media_filepath_rel("GerZNDnDZVjsOtardLuwfIBg"),
+            "local_content/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+        self.assertEqual(
+            self.filepaths.local_media_filepath("GerZNDnDZVjsOtardLuwfIBg"),
+            "/media_store/local_content/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+
+    def test_local_media_thumbnail(self) -> None:
+        """Test local media thumbnail paths"""
+        self.assertEqual(
+            self.filepaths.local_media_thumbnail_rel(
+                "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale"
+            ),
+            "local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
+        )
+        self.assertEqual(
+            self.filepaths.local_media_thumbnail(
+                "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale"
+            ),
+            "/media_store/local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
+        )
+
+    def test_local_media_thumbnail_dir(self) -> None:
+        """Test local media thumbnail directory paths"""
+        self.assertEqual(
+            self.filepaths.local_media_thumbnail_dir("GerZNDnDZVjsOtardLuwfIBg"),
+            "/media_store/local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+
+    def test_remote_media_filepath(self) -> None:
+        """Test remote media paths"""
+        self.assertEqual(
+            self.filepaths.remote_media_filepath_rel(
+                "example.com", "GerZNDnDZVjsOtardLuwfIBg"
+            ),
+            "remote_content/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+        self.assertEqual(
+            self.filepaths.remote_media_filepath(
+                "example.com", "GerZNDnDZVjsOtardLuwfIBg"
+            ),
+            "/media_store/remote_content/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+
+    def test_remote_media_thumbnail(self) -> None:
+        """Test remote media thumbnail paths"""
+        self.assertEqual(
+            self.filepaths.remote_media_thumbnail_rel(
+                "example.com",
+                "GerZNDnDZVjsOtardLuwfIBg",
+                800,
+                600,
+                "image/jpeg",
+                "scale",
+            ),
+            "remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
+        )
+        self.assertEqual(
+            self.filepaths.remote_media_thumbnail(
+                "example.com",
+                "GerZNDnDZVjsOtardLuwfIBg",
+                800,
+                600,
+                "image/jpeg",
+                "scale",
+            ),
+            "/media_store/remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
+        )
+
+    def test_remote_media_thumbnail_legacy(self) -> None:
+        """Test old-style remote media thumbnail paths"""
+        self.assertEqual(
+            self.filepaths.remote_media_thumbnail_rel_legacy(
+                "example.com", "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg"
+            ),
+            "remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg",
+        )
+
+    def test_remote_media_thumbnail_dir(self) -> None:
+        """Test remote media thumbnail directory paths"""
+        self.assertEqual(
+            self.filepaths.remote_media_thumbnail_dir(
+                "example.com", "GerZNDnDZVjsOtardLuwfIBg"
+            ),
+            "/media_store/remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+
+    def test_url_cache_filepath(self) -> None:
+        """Test URL cache paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar"),
+            "url_cache/2020-01-02/GerZNDnDZVjsOtar",
+        )
+        self.assertEqual(
+            self.filepaths.url_cache_filepath("2020-01-02_GerZNDnDZVjsOtar"),
+            "/media_store/url_cache/2020-01-02/GerZNDnDZVjsOtar",
+        )
+
+    def test_url_cache_filepath_legacy(self) -> None:
+        """Test old-style URL cache paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_filepath_rel("GerZNDnDZVjsOtardLuwfIBg"),
+            "url_cache/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+        self.assertEqual(
+            self.filepaths.url_cache_filepath("GerZNDnDZVjsOtardLuwfIBg"),
+            "/media_store/url_cache/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+
+    def test_url_cache_filepath_dirs_to_delete(self) -> None:
+        """Test URL cache cleanup paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_filepath_dirs_to_delete(
+                "2020-01-02_GerZNDnDZVjsOtar"
+            ),
+            ["/media_store/url_cache/2020-01-02"],
+        )
+
+    def test_url_cache_filepath_dirs_to_delete_legacy(self) -> None:
+        """Test old-style URL cache cleanup paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_filepath_dirs_to_delete(
+                "GerZNDnDZVjsOtardLuwfIBg"
+            ),
+            [
+                "/media_store/url_cache/Ge/rZ",
+                "/media_store/url_cache/Ge",
+            ],
+        )
+
+    def test_url_cache_thumbnail(self) -> None:
+        """Test URL cache thumbnail paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail_rel(
+                "2020-01-02_GerZNDnDZVjsOtar", 800, 600, "image/jpeg", "scale"
+            ),
+            "url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar/800-600-image-jpeg-scale",
+        )
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail(
+                "2020-01-02_GerZNDnDZVjsOtar", 800, 600, "image/jpeg", "scale"
+            ),
+            "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar/800-600-image-jpeg-scale",
+        )
+
+    def test_url_cache_thumbnail_legacy(self) -> None:
+        """Test old-style URL cache thumbnail paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail_rel(
+                "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale"
+            ),
+            "url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
+        )
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail(
+                "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale"
+            ),
+            "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
+        )
+
+    def test_url_cache_thumbnail_directory(self) -> None:
+        """Test URL cache thumbnail directory paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail_directory_rel(
+                "2020-01-02_GerZNDnDZVjsOtar"
+            ),
+            "url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar",
+        )
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail_directory("2020-01-02_GerZNDnDZVjsOtar"),
+            "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar",
+        )
+
+    def test_url_cache_thumbnail_directory_legacy(self) -> None:
+        """Test old-style URL cache thumbnail directory paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail_directory_rel(
+                "GerZNDnDZVjsOtardLuwfIBg"
+            ),
+            "url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail_directory("GerZNDnDZVjsOtardLuwfIBg"),
+            "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+        )
+
+    def test_url_cache_thumbnail_dirs_to_delete(self) -> None:
+        """Test URL cache thumbnail cleanup paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail_dirs_to_delete(
+                "2020-01-02_GerZNDnDZVjsOtar"
+            ),
+            [
+                "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar",
+                "/media_store/url_cache_thumbnails/2020-01-02",
+            ],
+        )
+
+    def test_url_cache_thumbnail_dirs_to_delete_legacy(self) -> None:
+        """Test old-style URL cache thumbnail cleanup paths"""
+        self.assertEqual(
+            self.filepaths.url_cache_thumbnail_dirs_to_delete(
+                "GerZNDnDZVjsOtardLuwfIBg"
+            ),
+            [
+                "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg",
+                "/media_store/url_cache_thumbnails/Ge/rZ",
+                "/media_store/url_cache_thumbnails/Ge",
+            ],
+        )
+
+    def test_server_name_validation(self) -> None:
+        """Test validation of server names"""
+        self._test_path_validation(
+            [
+                "remote_media_filepath_rel",
+                "remote_media_filepath",
+                "remote_media_thumbnail_rel",
+                "remote_media_thumbnail",
+                "remote_media_thumbnail_rel_legacy",
+                "remote_media_thumbnail_dir",
+            ],
+            parameter="server_name",
+            valid_values=[
+                "matrix.org",
+                "matrix.org:8448",
+                "matrix-federation.matrix.org",
+                "matrix-federation.matrix.org:8448",
+                "10.1.12.123",
+                "10.1.12.123:8448",
+                "[fd00:abcd::ffff]",
+                "[fd00:abcd::ffff]:8448",
+            ],
+            invalid_values=[
+                "/matrix.org",
+                "matrix.org/..",
+                "matrix.org\x00",
+                "",
+                ".",
+                "..",
+                "/",
+            ],
+        )
+
+    def test_file_id_validation(self) -> None:
+        """Test validation of local, remote and legacy URL cache file / media IDs"""
+        # File / media IDs get split into three parts to form paths, consisting of the
+        # first two characters, next two characters and rest of the ID.
+        valid_file_ids = [
+            "GerZNDnDZVjsOtardLuwfIBg",
+            # Unexpected, but produces an acceptable path:
+            "GerZN",  # "N" becomes the last directory
+        ]
+        invalid_file_ids = [
+            "/erZNDnDZVjsOtardLuwfIBg",
+            "Ge/ZNDnDZVjsOtardLuwfIBg",
+            "GerZ/DnDZVjsOtardLuwfIBg",
+            "GerZ/..",
+            "G\x00rZNDnDZVjsOtardLuwfIBg",
+            "Ger\x00NDnDZVjsOtardLuwfIBg",
+            "GerZNDnDZVjsOtardLuwfIBg\x00",
+            "",
+            "Ge",
+            "GerZ",
+            "GerZ.",
+            "..rZNDnDZVjsOtardLuwfIBg",
+            "Ge..NDnDZVjsOtardLuwfIBg",
+            "GerZ..",
+            "GerZ/",
+        ]
+
+        self._test_path_validation(
+            [
+                "local_media_filepath_rel",
+                "local_media_filepath",
+                "local_media_thumbnail_rel",
+                "local_media_thumbnail",
+                "local_media_thumbnail_dir",
+                # Legacy URL cache media IDs
+                "url_cache_filepath_rel",
+                "url_cache_filepath",
+                # `url_cache_filepath_dirs_to_delete` is tested below.
+                "url_cache_thumbnail_rel",
+                "url_cache_thumbnail",
+                "url_cache_thumbnail_directory_rel",
+                "url_cache_thumbnail_directory",
+                "url_cache_thumbnail_dirs_to_delete",
+            ],
+            parameter="media_id",
+            valid_values=valid_file_ids,
+            invalid_values=invalid_file_ids,
+        )
+
+        # `url_cache_filepath_dirs_to_delete` ignores what would be the last path
+        # component, so only the first 4 characters matter.
+        self._test_path_validation(
+            [
+                "url_cache_filepath_dirs_to_delete",
+            ],
+            parameter="media_id",
+            valid_values=valid_file_ids,
+            invalid_values=[
+                "/erZNDnDZVjsOtardLuwfIBg",
+                "Ge/ZNDnDZVjsOtardLuwfIBg",
+                "G\x00rZNDnDZVjsOtardLuwfIBg",
+                "Ger\x00NDnDZVjsOtardLuwfIBg",
+                "",
+                "Ge",
+                "..rZNDnDZVjsOtardLuwfIBg",
+                "Ge..NDnDZVjsOtardLuwfIBg",
+            ],
+        )
+
+        self._test_path_validation(
+            [
+                "remote_media_filepath_rel",
+                "remote_media_filepath",
+                "remote_media_thumbnail_rel",
+                "remote_media_thumbnail",
+                "remote_media_thumbnail_rel_legacy",
+                "remote_media_thumbnail_dir",
+            ],
+            parameter="file_id",
+            valid_values=valid_file_ids,
+            invalid_values=invalid_file_ids,
+        )
+
+    def test_url_cache_media_id_validation(self) -> None:
+        """Test validation of URL cache media IDs"""
+        self._test_path_validation(
+            [
+                "url_cache_filepath_rel",
+                "url_cache_filepath",
+                # `url_cache_filepath_dirs_to_delete` only cares about the date prefix
+                "url_cache_thumbnail_rel",
+                "url_cache_thumbnail",
+                "url_cache_thumbnail_directory_rel",
+                "url_cache_thumbnail_directory",
+                "url_cache_thumbnail_dirs_to_delete",
+            ],
+            parameter="media_id",
+            valid_values=[
+                "2020-01-02_GerZNDnDZVjsOtar",
+                "2020-01-02_G",  # Unexpected, but produces an acceptable path
+            ],
+            invalid_values=[
+                "2020-01-02",
+                "2020-01-02-",
+                "2020-01-02-.",
+                "2020-01-02-..",
+                "2020-01-02-/",
+                "2020-01-02-/GerZNDnDZVjsOtar",
+                "2020-01-02-GerZNDnDZVjsOtar/..",
+                "2020-01-02-GerZNDnDZVjsOtar\x00",
+            ],
+        )
+
+    def test_content_type_validation(self) -> None:
+        """Test validation of thumbnail content types"""
+        self._test_path_validation(
+            [
+                "local_media_thumbnail_rel",
+                "local_media_thumbnail",
+                "remote_media_thumbnail_rel",
+                "remote_media_thumbnail",
+                "remote_media_thumbnail_rel_legacy",
+                "url_cache_thumbnail_rel",
+                "url_cache_thumbnail",
+            ],
+            parameter="content_type",
+            valid_values=[
+                "image/jpeg",
+            ],
+            invalid_values=[
+                "",  # ValueError: not enough values to unpack
+                "image/jpeg/abc",  # ValueError: too many values to unpack
+                "image/jpeg\x00",
+            ],
+        )
+
+    def test_thumbnail_method_validation(self) -> None:
+        """Test validation of thumbnail methods"""
+        self._test_path_validation(
+            [
+                "local_media_thumbnail_rel",
+                "local_media_thumbnail",
+                "remote_media_thumbnail_rel",
+                "remote_media_thumbnail",
+                "url_cache_thumbnail_rel",
+                "url_cache_thumbnail",
+            ],
+            parameter="method",
+            valid_values=[
+                "crop",
+                "scale",
+            ],
+            invalid_values=[
+                "/scale",
+                "scale/..",
+                "scale\x00",
+                "/",
+            ],
+        )
+
+    def _test_path_validation(
+        self,
+        methods: Iterable[str],
+        parameter: str,
+        valid_values: Iterable[str],
+        invalid_values: Iterable[str],
+    ) -> None:
+        """Test that the specified methods validate the named parameter as expected
+
+        Args:
+            methods: The names of `MediaFilePaths` methods to test
+            parameter: The name of the parameter to test
+            valid_values: A list of parameter values that are expected to be accepted
+            invalid_values: A list of parameter values that are expected to be rejected
+
+        Raises:
+            AssertionError: If a value was accepted when it should have failed
+                validation.
+            ValueError: If a value failed validation when it should have been accepted.
+        """
+        for method in methods:
+            get_path = getattr(self.filepaths, method)
+
+            parameters = inspect.signature(get_path).parameters
+            kwargs = {
+                "server_name": "matrix.org",
+                "media_id": "GerZNDnDZVjsOtardLuwfIBg",
+                "file_id": "GerZNDnDZVjsOtardLuwfIBg",
+                "width": 800,
+                "height": 600,
+                "content_type": "image/jpeg",
+                "method": "scale",
+            }
+
+            if get_path.__name__.startswith("url_"):
+                kwargs["media_id"] = "2020-01-02_GerZNDnDZVjsOtar"
+
+            kwargs = {k: v for k, v in kwargs.items() if k in parameters}
+            kwargs.pop(parameter)
+
+            for value in valid_values:
+                kwargs[parameter] = value
+                get_path(**kwargs)
+                # No exception should be raised
+
+            for value in invalid_values:
+                with self.assertRaises(ValueError):
+                    kwargs[parameter] = value
+                    path_or_list = get_path(**kwargs)
+                    self.fail(
+                        f"{value!r} unexpectedly passed validation: "
+                        f"{method} returned {path_or_list!r}"
+                    )
+
+
+class MediaFilePathsJailTestCase(unittest.TestCase):
+    def _check_relative_path(self, filepaths: MediaFilePaths, path: str) -> None:
+        """Passes a relative path through the jail check.
+
+        Args:
+            filepaths: The `MediaFilePaths` instance.
+            path: A path relative to the media store directory.
+
+        Raises:
+            ValueError: If the jail check fails.
+        """
+
+        @_wrap_with_jail_check(relative=True)
+        def _make_relative_path(self: MediaFilePaths, path: str) -> str:
+            return path
+
+        _make_relative_path(filepaths, path)
+
+    def _check_absolute_path(self, filepaths: MediaFilePaths, path: str) -> None:
+        """Passes an absolute path through the jail check.
+
+        Args:
+            filepaths: The `MediaFilePaths` instance.
+            path: A path relative to the media store directory.
+
+        Raises:
+            ValueError: If the jail check fails.
+        """
+
+        @_wrap_with_jail_check(relative=False)
+        def _make_absolute_path(self: MediaFilePaths, path: str) -> str:
+            return os.path.join(self.base_path, path)
+
+        _make_absolute_path(filepaths, path)
+
+    def test_traversal_inside(self) -> None:
+        """Test the jail check for paths that stay within the media directory."""
+        # Despite the `../`s, these paths still lie within the media directory and it's
+        # expected for the jail check to allow them through.
+        # These paths ought to trip the other checks in place and should never be
+        # returned.
+        filepaths = MediaFilePaths("/media_store")
+        path = "url_cache/2020-01-02/../../GerZNDnDZVjsOtar"
+        self._check_relative_path(filepaths, path)
+        self._check_absolute_path(filepaths, path)
+
+    def test_traversal_outside(self) -> None:
+        """Test that the jail check fails for paths that escape the media directory."""
+        filepaths = MediaFilePaths("/media_store")
+        path = "url_cache/2020-01-02/../../../GerZNDnDZVjsOtar"
+        with self.assertRaises(ValueError):
+            self._check_relative_path(filepaths, path)
+        with self.assertRaises(ValueError):
+            self._check_absolute_path(filepaths, path)
+
+    def test_traversal_reentry(self) -> None:
+        """Test the jail check for paths that exit and re-enter the media directory."""
+        # These paths lie outside the media directory if it is a symlink, and inside
+        # otherwise. Ideally the check should fail, but this proves difficult.
+        # This test documents the behaviour for this edge case.
+        # These paths ought to trip the other checks in place and should never be
+        # returned.
+        filepaths = MediaFilePaths("/media_store")
+        path = "url_cache/2020-01-02/../../../media_store/GerZNDnDZVjsOtar"
+        self._check_relative_path(filepaths, path)
+        self._check_absolute_path(filepaths, path)
+
+    def test_symlink(self) -> None:
+        """Test that a symlink does not cause the jail check to fail."""
+        media_store_path = self.mktemp()
+
+        # symlink the media store directory
+        os.symlink("/mnt/synapse/media_store", media_store_path)
+
+        # Test that relative and absolute paths don't trip the check
+        # NB: `media_store_path` is a relative path
+        filepaths = MediaFilePaths(media_store_path)
+        self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
+        self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
+
+        filepaths = MediaFilePaths(os.path.abspath(media_store_path))
+        self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
+        self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
+
+    def test_symlink_subdirectory(self) -> None:
+        """Test that a symlinked subdirectory does not cause the jail check to fail."""
+        media_store_path = self.mktemp()
+        os.mkdir(media_store_path)
+
+        # symlink `url_cache/`
+        os.symlink(
+            "/mnt/synapse/media_store_url_cache",
+            os.path.join(media_store_path, "url_cache"),
+        )
+
+        # Test that relative and absolute paths don't trip the check
+        # NB: `media_store_path` is a relative path
+        filepaths = MediaFilePaths(media_store_path)
+        self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
+        self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
+
+        filepaths = MediaFilePaths(os.path.abspath(media_store_path))
+        self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
+        self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
diff --git a/tests/media/test_html_preview.py b/tests/media/test_html_preview.py
new file mode 100644
index 0000000000..e7da75db3e
--- /dev/null
+++ b/tests/media/test_html_preview.py
@@ -0,0 +1,542 @@
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.media.preview_html import (
+    _get_html_media_encodings,
+    decode_body,
+    parse_html_to_open_graph,
+    summarize_paragraphs,
+)
+
+from tests import unittest
+
+try:
+    import lxml
+except ImportError:
+    lxml = None
+
+
+class SummarizeTestCase(unittest.TestCase):
+    if not lxml:
+        skip = "url preview feature requires lxml"
+
+    def test_long_summarize(self) -> None:
+        example_paras = [
+            """Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:
+            Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in
+            Troms county, Norway. The administrative centre of the municipality is
+            the city of Tromsø. Outside of Norway, Tromso and Tromsö are
+            alternative spellings of the city.Tromsø is considered the northernmost
+            city in the world with a population above 50,000. The most populous town
+            north of it is Alta, Norway, with a population of 14,272 (2013).""",
+            """Tromsø lies in Northern Norway. The municipality has a population of
+            (2015) 72,066, but with an annual influx of students it has over 75,000
+            most of the year. It is the largest urban area in Northern Norway and the
+            third largest north of the Arctic Circle (following Murmansk and Norilsk).
+            Most of Tromsø, including the city centre, is located on the island of
+            Tromsøya, 350 kilometres (217 mi) north of the Arctic Circle. In 2012,
+            Tromsøya had a population of 36,088. Substantial parts of the urban area
+            are also situated on the mainland to the east, and on parts of Kvaløya—a
+            large island to the west. Tromsøya is connected to the mainland by the Tromsø
+            Bridge and the Tromsøysund Tunnel, and to the island of Kvaløya by the
+            Sandnessund Bridge. Tromsø Airport connects the city to many destinations
+            in Europe. The city is warmer than most other places located on the same
+            latitude, due to the warming effect of the Gulf Stream.""",
+            """The city centre of Tromsø contains the highest number of old wooden
+            houses in Northern Norway, the oldest house dating from 1789. The Arctic
+            Cathedral, a modern church from 1965, is probably the most famous landmark
+            in Tromsø. The city is a cultural centre for its region, with several
+            festivals taking place in the summer. Some of Norway's best-known
+             musicians, Torbjørn Brundtland and Svein Berge of the electronica duo
+             Röyksopp and Lene Marlin grew up and started their careers in Tromsø.
+             Noted electronic musician Geir Jenssen also hails from Tromsø.""",
+        ]
+
+        desc = summarize_paragraphs(example_paras, min_size=200, max_size=500)
+
+        self.assertEqual(
+            desc,
+            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
+            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
+            " Troms county, Norway. The administrative centre of the municipality is"
+            " the city of Tromsø. Outside of Norway, Tromso and Tromsö are"
+            " alternative spellings of the city.Tromsø is considered the northernmost"
+            " city in the world with a population above 50,000. The most populous town"
+            " north of it is Alta, Norway, with a population of 14,272 (2013).",
+        )
+
+        desc = summarize_paragraphs(example_paras[1:], min_size=200, max_size=500)
+
+        self.assertEqual(
+            desc,
+            "Tromsø lies in Northern Norway. The municipality has a population of"
+            " (2015) 72,066, but with an annual influx of students it has over 75,000"
+            " most of the year. It is the largest urban area in Northern Norway and the"
+            " third largest north of the Arctic Circle (following Murmansk and Norilsk)."
+            " Most of Tromsø, including the city centre, is located on the island of"
+            " Tromsøya, 350 kilometres (217 mi) north of the Arctic Circle. In 2012,"
+            " Tromsøya had a population of 36,088. Substantial parts of the urban…",
+        )
+
+    def test_short_summarize(self) -> None:
+        example_paras = [
+            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
+            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
+            " Troms county, Norway.",
+            "Tromsø lies in Northern Norway. The municipality has a population of"
+            " (2015) 72,066, but with an annual influx of students it has over 75,000"
+            " most of the year.",
+            "The city centre of Tromsø contains the highest number of old wooden"
+            " houses in Northern Norway, the oldest house dating from 1789. The Arctic"
+            " Cathedral, a modern church from 1965, is probably the most famous landmark"
+            " in Tromsø.",
+        ]
+
+        desc = summarize_paragraphs(example_paras, min_size=200, max_size=500)
+
+        self.assertEqual(
+            desc,
+            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
+            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
+            " Troms county, Norway.\n"
+            "\n"
+            "Tromsø lies in Northern Norway. The municipality has a population of"
+            " (2015) 72,066, but with an annual influx of students it has over 75,000"
+            " most of the year.",
+        )
+
+    def test_small_then_large_summarize(self) -> None:
+        example_paras = [
+            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
+            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
+            " Troms county, Norway.",
+            "Tromsø lies in Northern Norway. The municipality has a population of"
+            " (2015) 72,066, but with an annual influx of students it has over 75,000"
+            " most of the year."
+            " The city centre of Tromsø contains the highest number of old wooden"
+            " houses in Northern Norway, the oldest house dating from 1789. The Arctic"
+            " Cathedral, a modern church from 1965, is probably the most famous landmark"
+            " in Tromsø.",
+        ]
+
+        desc = summarize_paragraphs(example_paras, min_size=200, max_size=500)
+        self.assertEqual(
+            desc,
+            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
+            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
+            " Troms county, Norway.\n"
+            "\n"
+            "Tromsø lies in Northern Norway. The municipality has a population of"
+            " (2015) 72,066, but with an annual influx of students it has over 75,000"
+            " most of the year. The city centre of Tromsø contains the highest number"
+            " of old wooden houses in Northern Norway, the oldest house dating from"
+            " 1789. The Arctic Cathedral, a modern church from…",
+        )
+
+
+class OpenGraphFromHtmlTestCase(unittest.TestCase):
+    if not lxml:
+        skip = "url preview feature requires lxml"
+
+    def test_simple(self) -> None:
+        html = b"""
+        <html>
+        <head><title>Foo</title></head>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
+
+    def test_comment(self) -> None:
+        html = b"""
+        <html>
+        <head><title>Foo</title></head>
+        <body>
+        <!-- HTML comment -->
+        Some text.
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
+
+    def test_comment2(self) -> None:
+        html = b"""
+        <html>
+        <head><title>Foo</title></head>
+        <body>
+        Some text.
+        <!-- HTML comment -->
+        Some more text.
+        <p>Text</p>
+        More text
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(
+            og,
+            {
+                "og:title": "Foo",
+                "og:description": "Some text.\n\nSome more text.\n\nText\n\nMore text",
+            },
+        )
+
+    def test_script(self) -> None:
+        html = b"""
+        <html>
+        <head><title>Foo</title></head>
+        <body>
+        <script> (function() {})() </script>
+        Some text.
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
+
+    def test_missing_title(self) -> None:
+        html = b"""
+        <html>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
+
+        # Another variant is a title with no content.
+        html = b"""
+        <html>
+        <head><title></title></head>
+        <body>
+        <h1>Title</h1>
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(og, {"og:title": "Title", "og:description": "Title"})
+
+    def test_h1_as_title(self) -> None:
+        html = b"""
+        <html>
+        <meta property="og:description" content="Some text."/>
+        <body>
+        <h1>Title</h1>
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
+
+    def test_empty_description(self) -> None:
+        """Description tags with empty content should be ignored."""
+        html = b"""
+        <html>
+        <meta property="og:description" content=""/>
+        <meta property="og:description"/>
+        <meta name="description" content=""/>
+        <meta name="description"/>
+        <meta name="description" content="Finally!"/>
+        <body>
+        <h1>Title</h1>
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(og, {"og:title": "Title", "og:description": "Finally!"})
+
+    def test_missing_title_and_broken_h1(self) -> None:
+        html = b"""
+        <html>
+        <body>
+        <h1><a href="foo"/></h1>
+        Some text.
+        </body>
+        </html>
+        """
+
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+
+        self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
+
+    def test_empty(self) -> None:
+        """Test a body with no data in it."""
+        html = b""
+        tree = decode_body(html, "http://example.com/test.html")
+        self.assertIsNone(tree)
+
+    def test_no_tree(self) -> None:
+        """A valid body with no tree in it."""
+        html = b"\x00"
+        tree = decode_body(html, "http://example.com/test.html")
+        self.assertIsNone(tree)
+
+    def test_xml(self) -> None:
+        """Test decoding XML and ensure it works properly."""
+        # Note that the strip() call is important to ensure the xml tag starts
+        # at the initial byte.
+        html = b"""
+        <?xml version="1.0" encoding="UTF-8"?>
+
+        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+        <head><title>Foo</title></head><body>Some text.</body></html>
+        """.strip()
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
+
+    def test_invalid_encoding(self) -> None:
+        """An invalid character encoding should be ignored and treated as UTF-8, if possible."""
+        html = b"""
+        <html>
+        <head><title>Foo</title></head>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+        tree = decode_body(html, "http://example.com/test.html", "invalid-encoding")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
+
+    def test_invalid_encoding2(self) -> None:
+        """A body which doesn't match the sent character encoding."""
+        # Note that this contains an invalid UTF-8 sequence in the title.
+        html = b"""
+        <html>
+        <head><title>\xff\xff Foo</title></head>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})
+
+    def test_windows_1252(self) -> None:
+        """A body which uses cp1252, but doesn't declare that."""
+        html = b"""
+        <html>
+        <head><title>\xf3</title></head>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."})
+
+    def test_twitter_tag(self) -> None:
+        """Twitter card tags should be used if nothing else is available."""
+        html = b"""
+        <html>
+        <meta name="twitter:card" content="summary">
+        <meta name="twitter:description" content="Description">
+        <meta name="twitter:site" content="@matrixdotorg">
+        </html>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(
+            og,
+            {
+                "og:title": None,
+                "og:description": "Description",
+                "og:site_name": "@matrixdotorg",
+            },
+        )
+
+        # But they shouldn't override Open Graph values.
+        html = b"""
+        <html>
+        <meta name="twitter:card" content="summary">
+        <meta name="twitter:description" content="Description">
+        <meta property="og:description" content="Real Description">
+        <meta name="twitter:site" content="@matrixdotorg">
+        <meta property="og:site_name" content="matrix.org">
+        </html>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(
+            og,
+            {
+                "og:title": None,
+                "og:description": "Real Description",
+                "og:site_name": "matrix.org",
+            },
+        )
+
+    def test_nested_nodes(self) -> None:
+        """A body with some nested nodes. Tests that we iterate over children
+        in the right order (and don't reverse the order of the text)."""
+        html = b"""
+        <a href="somewhere">Welcome <b>the bold <u>and underlined text <svg>
+        with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(
+            og,
+            {
+                "og:title": None,
+                "og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text",
+            },
+        )
+
+
+class MediaEncodingTestCase(unittest.TestCase):
+    def test_meta_charset(self) -> None:
+        """A character encoding is found via the meta tag."""
+        encodings = _get_html_media_encodings(
+            b"""
+        <html>
+        <head><meta charset="ascii">
+        </head>
+        </html>
+        """,
+            "text/html",
+        )
+        self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"])
+
+        # A less well-formed version.
+        encodings = _get_html_media_encodings(
+            b"""
+        <html>
+        <head>< meta charset = ascii>
+        </head>
+        </html>
+        """,
+            "text/html",
+        )
+        self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"])
+
+    def test_meta_charset_underscores(self) -> None:
+        """A character encoding contains underscore."""
+        encodings = _get_html_media_encodings(
+            b"""
+        <html>
+        <head><meta charset="Shift_JIS">
+        </head>
+        </html>
+        """,
+            "text/html",
+        )
+        self.assertEqual(list(encodings), ["shift_jis", "utf-8", "cp1252"])
+
+    def test_xml_encoding(self) -> None:
+        """A character encoding is found via the meta tag."""
+        encodings = _get_html_media_encodings(
+            b"""
+        <?xml version="1.0" encoding="ascii"?>
+        <html>
+        </html>
+        """,
+            "text/html",
+        )
+        self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"])
+
+    def test_meta_xml_encoding(self) -> None:
+        """Meta tags take precedence over XML encoding."""
+        encodings = _get_html_media_encodings(
+            b"""
+        <?xml version="1.0" encoding="ascii"?>
+        <html>
+        <head><meta charset="UTF-16">
+        </head>
+        </html>
+        """,
+            "text/html",
+        )
+        self.assertEqual(list(encodings), ["utf-16", "ascii", "utf-8", "cp1252"])
+
+    def test_content_type(self) -> None:
+        """A character encoding is found via the Content-Type header."""
+        # Test a few variations of the header.
+        headers = (
+            'text/html; charset="ascii";',
+            "text/html;charset=ascii;",
+            'text/html;  charset="ascii"',
+            "text/html; charset=ascii",
+            'text/html; charset="ascii;',
+            'text/html; charset=ascii";',
+        )
+        for header in headers:
+            encodings = _get_html_media_encodings(b"", header)
+            self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"])
+
+    def test_fallback(self) -> None:
+        """A character encoding cannot be found in the body or header."""
+        encodings = _get_html_media_encodings(b"", "text/html")
+        self.assertEqual(list(encodings), ["utf-8", "cp1252"])
+
+    def test_duplicates(self) -> None:
+        """Ensure each encoding is only attempted once."""
+        encodings = _get_html_media_encodings(
+            b"""
+        <?xml version="1.0" encoding="utf8"?>
+        <html>
+        <head><meta charset="UTF-8">
+        </head>
+        </html>
+        """,
+            'text/html; charset="UTF_8"',
+        )
+        self.assertEqual(list(encodings), ["utf-8", "cp1252"])
+
+    def test_unknown_invalid(self) -> None:
+        """A character encoding should be ignored if it is unknown or invalid."""
+        encodings = _get_html_media_encodings(
+            b"""
+        <html>
+        <head><meta charset="invalid">
+        </head>
+        </html>
+        """,
+            'text/html; charset="invalid"',
+        )
+        self.assertEqual(list(encodings), ["utf-8", "cp1252"])
diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py
new file mode 100644
index 0000000000..870047d0f2
--- /dev/null
+++ b/tests/media/test_media_storage.py
@@ -0,0 +1,792 @@
+# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import shutil
+import tempfile
+from binascii import unhexlify
+from io import BytesIO
+from typing import Any, BinaryIO, ClassVar, Dict, List, Optional, Tuple, Union
+from unittest.mock import Mock
+from urllib import parse
+
+import attr
+from parameterized import parameterized, parameterized_class
+from PIL import Image as Image
+from typing_extensions import Literal
+
+from twisted.internet import defer
+from twisted.internet.defer import Deferred
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.api.errors import Codes
+from synapse.events import EventBase
+from synapse.events.spamcheck import load_legacy_spam_checkers
+from synapse.http.types import QueryParams
+from synapse.logging.context import make_deferred_yieldable
+from synapse.media._base import FileInfo
+from synapse.media.filepath import MediaFilePaths
+from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
+from synapse.media.storage_provider import FileStorageProviderBackend
+from synapse.module_api import ModuleApi
+from synapse.rest import admin
+from synapse.rest.client import login
+from synapse.server import HomeServer
+from synapse.types import JsonDict, RoomAlias
+from synapse.util import Clock
+
+from tests import unittest
+from tests.server import FakeChannel, FakeSite, make_request
+from tests.test_utils import SMALL_PNG
+from tests.utils import default_config
+
+
+class MediaStorageTests(unittest.HomeserverTestCase):
+    needs_threadpool = True
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
+        self.addCleanup(shutil.rmtree, self.test_dir)
+
+        self.primary_base_path = os.path.join(self.test_dir, "primary")
+        self.secondary_base_path = os.path.join(self.test_dir, "secondary")
+
+        hs.config.media.media_store_path = self.primary_base_path
+
+        storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)]
+
+        self.filepaths = MediaFilePaths(self.primary_base_path)
+        self.media_storage = MediaStorage(
+            hs, self.primary_base_path, self.filepaths, storage_providers
+        )
+
+    def test_ensure_media_is_in_local_cache(self) -> None:
+        media_id = "some_media_id"
+        test_body = "Test\n"
+
+        # First we create a file that is in a storage provider but not in the
+        # local primary media store
+        rel_path = self.filepaths.local_media_filepath_rel(media_id)
+        secondary_path = os.path.join(self.secondary_base_path, rel_path)
+
+        os.makedirs(os.path.dirname(secondary_path))
+
+        with open(secondary_path, "w") as f:
+            f.write(test_body)
+
+        # Now we run ensure_media_is_in_local_cache, which should copy the file
+        # to the local cache.
+        file_info = FileInfo(None, media_id)
+
+        # This uses a real blocking threadpool so we have to wait for it to be
+        # actually done :/
+        x = defer.ensureDeferred(
+            self.media_storage.ensure_media_is_in_local_cache(file_info)
+        )
+
+        # Hotloop until the threadpool does its job...
+        self.wait_on_thread(x)
+
+        local_path = self.get_success(x)
+
+        self.assertTrue(os.path.exists(local_path))
+
+        # Asserts the file is under the expected local cache directory
+        self.assertEqual(
+            os.path.commonprefix([self.primary_base_path, local_path]),
+            self.primary_base_path,
+        )
+
+        with open(local_path) as f:
+            body = f.read()
+
+        self.assertEqual(test_body, body)
+
+
+@attr.s(auto_attribs=True, slots=True, frozen=True)
+class _TestImage:
+    """An image for testing thumbnailing with the expected results
+
+    Attributes:
+        data: The raw image to thumbnail
+        content_type: The type of the image as a content type, e.g. "image/png"
+        extension: The extension associated with the format, e.g. ".png"
+        expected_cropped: The expected bytes from cropped thumbnailing, or None if
+            test should just check for success.
+        expected_scaled: The expected bytes from scaled thumbnailing, or None if
+            test should just check for a valid image returned.
+        expected_found: True if the file should exist on the server, or False if
+            a 404/400 is expected.
+        unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
+            False if the thumbnailing should succeed or a normal 404 is expected.
+    """
+
+    data: bytes
+    content_type: bytes
+    extension: bytes
+    expected_cropped: Optional[bytes] = None
+    expected_scaled: Optional[bytes] = None
+    expected_found: bool = True
+    unable_to_thumbnail: bool = False
+
+
+@parameterized_class(
+    ("test_image",),
+    [
+        # small png
+        (
+            _TestImage(
+                SMALL_PNG,
+                b"image/png",
+                b".png",
+                unhexlify(
+                    b"89504e470d0a1a0a0000000d4948445200000020000000200806"
+                    b"000000737a7af40000001a49444154789cedc101010000008220"
+                    b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
+                    b"44ae426082"
+                ),
+                unhexlify(
+                    b"89504e470d0a1a0a0000000d4948445200000001000000010806"
+                    b"0000001f15c4890000000d49444154789c636060606000000005"
+                    b"0001a5f645400000000049454e44ae426082"
+                ),
+            ),
+        ),
+        # small png with transparency.
+        (
+            _TestImage(
+                unhexlify(
+                    b"89504e470d0a1a0a0000000d49484452000000010000000101000"
+                    b"00000376ef9240000000274524e5300010194fdae0000000a4944"
+                    b"4154789c636800000082008177cd72b60000000049454e44ae426"
+                    b"082"
+                ),
+                b"image/png",
+                b".png",
+                # Note that we don't check the output since it varies across
+                # different versions of Pillow.
+            ),
+        ),
+        # small lossless webp
+        (
+            _TestImage(
+                unhexlify(
+                    b"524946461a000000574542505650384c0d0000002f0000001007"
+                    b"1011118888fe0700"
+                ),
+                b"image/webp",
+                b".webp",
+            ),
+        ),
+        # an empty file
+        (
+            _TestImage(
+                b"",
+                b"image/gif",
+                b".gif",
+                expected_found=False,
+                unable_to_thumbnail=True,
+            ),
+        ),
+    ],
+)
+class MediaRepoTests(unittest.HomeserverTestCase):
+    test_image: ClassVar[_TestImage]
+    hijack_auth = True
+    user_id = "@test:user"
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        self.fetches: List[
+            Tuple[
+                "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]",
+                str,
+                str,
+                Optional[QueryParams],
+            ]
+        ] = []
+
+        def get_file(
+            destination: str,
+            path: str,
+            output_stream: BinaryIO,
+            args: Optional[QueryParams] = None,
+            retry_on_dns_fail: bool = True,
+            max_size: Optional[int] = None,
+            ignore_backoff: bool = False,
+        ) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]":
+            """A mock for MatrixFederationHttpClient.get_file."""
+
+            def write_to(
+                r: Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]
+            ) -> Tuple[int, Dict[bytes, List[bytes]]]:
+                data, response = r
+                output_stream.write(data)
+                return response
+
+            d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred()
+            self.fetches.append((d, destination, path, args))
+            # Note that this callback changes the value held by d.
+            d_after_callback = d.addCallback(write_to)
+            return make_deferred_yieldable(d_after_callback)
+
+        # Mock out the homeserver's MatrixFederationHttpClient
+        client = Mock()
+        client.get_file = get_file
+
+        self.storage_path = self.mktemp()
+        self.media_store_path = self.mktemp()
+        os.mkdir(self.storage_path)
+        os.mkdir(self.media_store_path)
+
+        config = self.default_config()
+        config["media_store_path"] = self.media_store_path
+        config["max_image_pixels"] = 2000000
+
+        provider_config = {
+            "module": "synapse.media.storage_provider.FileStorageProviderBackend",
+            "store_local": True,
+            "store_synchronous": False,
+            "store_remote": True,
+            "config": {"directory": self.storage_path},
+        }
+        config["media_storage_providers"] = [provider_config]
+
+        hs = self.setup_test_homeserver(config=config, federation_http_client=client)
+
+        return hs
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        media_resource = hs.get_media_repository_resource()
+        self.download_resource = media_resource.children[b"download"]
+        self.thumbnail_resource = media_resource.children[b"thumbnail"]
+        self.store = hs.get_datastores().main
+        self.media_repo = hs.get_media_repository()
+
+        self.media_id = "example.com/12345"
+
+    def _req(
+        self, content_disposition: Optional[bytes], include_content_type: bool = True
+    ) -> FakeChannel:
+        channel = make_request(
+            self.reactor,
+            FakeSite(self.download_resource, self.reactor),
+            "GET",
+            self.media_id,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        # We've made one fetch, to example.com, using the media URL, and asking
+        # the other server not to do a remote fetch
+        self.assertEqual(len(self.fetches), 1)
+        self.assertEqual(self.fetches[0][1], "example.com")
+        self.assertEqual(
+            self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id
+        )
+        self.assertEqual(self.fetches[0][3], {"allow_remote": "false"})
+
+        headers = {
+            b"Content-Length": [b"%d" % (len(self.test_image.data))],
+        }
+
+        if include_content_type:
+            headers[b"Content-Type"] = [self.test_image.content_type]
+
+        if content_disposition:
+            headers[b"Content-Disposition"] = [content_disposition]
+
+        self.fetches[0][0].callback(
+            (self.test_image.data, (len(self.test_image.data), headers))
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        return channel
+
+    def test_handle_missing_content_type(self) -> None:
+        channel = self._req(
+            b"inline; filename=out" + self.test_image.extension,
+            include_content_type=False,
+        )
+        headers = channel.headers
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            headers.getRawHeaders(b"Content-Type"), [b"application/octet-stream"]
+        )
+
+    def test_disposition_filename_ascii(self) -> None:
+        """
+        If the filename is filename=<ascii> then Synapse will decode it as an
+        ASCII string, and use filename= in the response.
+        """
+        channel = self._req(b"inline; filename=out" + self.test_image.extension)
+
+        headers = channel.headers
+        self.assertEqual(
+            headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
+        )
+        self.assertEqual(
+            headers.getRawHeaders(b"Content-Disposition"),
+            [b"inline; filename=out" + self.test_image.extension],
+        )
+
+    def test_disposition_filenamestar_utf8escaped(self) -> None:
+        """
+        If the filename is filename=*utf8''<utf8 escaped> then Synapse will
+        correctly decode it as the UTF-8 string, and use filename* in the
+        response.
+        """
+        filename = parse.quote("\u2603".encode()).encode("ascii")
+        channel = self._req(
+            b"inline; filename*=utf-8''" + filename + self.test_image.extension
+        )
+
+        headers = channel.headers
+        self.assertEqual(
+            headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
+        )
+        self.assertEqual(
+            headers.getRawHeaders(b"Content-Disposition"),
+            [b"inline; filename*=utf-8''" + filename + self.test_image.extension],
+        )
+
+    def test_disposition_none(self) -> None:
+        """
+        If there is no filename, one isn't passed on in the Content-Disposition
+        of the request.
+        """
+        channel = self._req(None)
+
+        headers = channel.headers
+        self.assertEqual(
+            headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
+        )
+        self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None)
+
+    def test_thumbnail_crop(self) -> None:
+        """Test that a cropped remote thumbnail is available."""
+        self._test_thumbnail(
+            "crop",
+            self.test_image.expected_cropped,
+            expected_found=self.test_image.expected_found,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
+
+    def test_thumbnail_scale(self) -> None:
+        """Test that a scaled remote thumbnail is available."""
+        self._test_thumbnail(
+            "scale",
+            self.test_image.expected_scaled,
+            expected_found=self.test_image.expected_found,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
+
+    def test_invalid_type(self) -> None:
+        """An invalid thumbnail type is never available."""
+        self._test_thumbnail(
+            "invalid",
+            None,
+            expected_found=False,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
+
+    @unittest.override_config(
+        {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]}
+    )
+    def test_no_thumbnail_crop(self) -> None:
+        """
+        Override the config to generate only scaled thumbnails, but request a cropped one.
+        """
+        self._test_thumbnail(
+            "crop",
+            None,
+            expected_found=False,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
+
+    @unittest.override_config(
+        {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]}
+    )
+    def test_no_thumbnail_scale(self) -> None:
+        """
+        Override the config to generate only cropped thumbnails, but request a scaled one.
+        """
+        self._test_thumbnail(
+            "scale",
+            None,
+            expected_found=False,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
+
+    def test_thumbnail_repeated_thumbnail(self) -> None:
+        """Test that fetching the same thumbnail works, and deleting the on disk
+        thumbnail regenerates it.
+        """
+        self._test_thumbnail(
+            "scale",
+            self.test_image.expected_scaled,
+            expected_found=self.test_image.expected_found,
+            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
+        )
+
+        if not self.test_image.expected_found:
+            return
+
+        # Fetching again should work, without re-requesting the image from the
+        # remote.
+        params = "?width=32&height=32&method=scale"
+        channel = make_request(
+            self.reactor,
+            FakeSite(self.thumbnail_resource, self.reactor),
+            "GET",
+            self.media_id + params,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        self.assertEqual(channel.code, 200)
+        if self.test_image.expected_scaled:
+            self.assertEqual(
+                channel.result["body"],
+                self.test_image.expected_scaled,
+                channel.result["body"],
+            )
+
+        # Deleting the thumbnail on disk then re-requesting it should work as
+        # Synapse should regenerate missing thumbnails.
+        origin, media_id = self.media_id.split("/")
+        info = self.get_success(self.store.get_cached_remote_media(origin, media_id))
+        assert info is not None
+        file_id = info["filesystem_id"]
+
+        thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir(
+            origin, file_id
+        )
+        shutil.rmtree(thumbnail_dir, ignore_errors=True)
+
+        channel = make_request(
+            self.reactor,
+            FakeSite(self.thumbnail_resource, self.reactor),
+            "GET",
+            self.media_id + params,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        self.assertEqual(channel.code, 200)
+        if self.test_image.expected_scaled:
+            self.assertEqual(
+                channel.result["body"],
+                self.test_image.expected_scaled,
+                channel.result["body"],
+            )
+
+    def _test_thumbnail(
+        self,
+        method: str,
+        expected_body: Optional[bytes],
+        expected_found: bool,
+        unable_to_thumbnail: bool = False,
+    ) -> None:
+        """Test the given thumbnailing method works as expected.
+
+        Args:
+            method: The thumbnailing method to use (crop, scale).
+            expected_body: The expected bytes from thumbnailing, or None if
+                test should just check for a valid image.
+            expected_found: True if the file should exist on the server, or False if
+                a 404/400 is expected.
+            unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
+                False if the thumbnailing should succeed or a normal 404 is expected.
+        """
+
+        params = "?width=32&height=32&method=" + method
+        channel = make_request(
+            self.reactor,
+            FakeSite(self.thumbnail_resource, self.reactor),
+            "GET",
+            self.media_id + params,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        headers = {
+            b"Content-Length": [b"%d" % (len(self.test_image.data))],
+            b"Content-Type": [self.test_image.content_type],
+        }
+        self.fetches[0][0].callback(
+            (self.test_image.data, (len(self.test_image.data), headers))
+        )
+        self.pump()
+
+        if expected_found:
+            self.assertEqual(channel.code, 200)
+
+            self.assertEqual(
+                channel.headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
+                [b"cross-origin"],
+            )
+
+            if expected_body is not None:
+                self.assertEqual(
+                    channel.result["body"], expected_body, channel.result["body"]
+                )
+            else:
+                # ensure that the result is at least some valid image
+                Image.open(BytesIO(channel.result["body"]))
+        elif unable_to_thumbnail:
+            # A 400 with a JSON body.
+            self.assertEqual(channel.code, 400)
+            self.assertEqual(
+                channel.json_body,
+                {
+                    "errcode": "M_UNKNOWN",
+                    "error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
+                },
+            )
+        else:
+            # A 404 with a JSON body.
+            self.assertEqual(channel.code, 404)
+            self.assertEqual(
+                channel.json_body,
+                {
+                    "errcode": "M_NOT_FOUND",
+                    "error": "Not found [b'example.com', b'12345']",
+                },
+            )
+
+    @parameterized.expand([("crop", 16), ("crop", 64), ("scale", 16), ("scale", 64)])
+    def test_same_quality(self, method: str, desired_size: int) -> None:
+        """Test that choosing between thumbnails with the same quality rating succeeds.
+
+        We are not particular about which thumbnail is chosen."""
+        self.assertIsNotNone(
+            self.thumbnail_resource._select_thumbnail(
+                desired_width=desired_size,
+                desired_height=desired_size,
+                desired_method=method,
+                desired_type=self.test_image.content_type,
+                # Provide two identical thumbnails which are guaranteed to have the same
+                # quality rating.
+                thumbnail_infos=[
+                    {
+                        "thumbnail_width": 32,
+                        "thumbnail_height": 32,
+                        "thumbnail_method": method,
+                        "thumbnail_type": self.test_image.content_type,
+                        "thumbnail_length": 256,
+                        "filesystem_id": f"thumbnail1{self.test_image.extension.decode()}",
+                    },
+                    {
+                        "thumbnail_width": 32,
+                        "thumbnail_height": 32,
+                        "thumbnail_method": method,
+                        "thumbnail_type": self.test_image.content_type,
+                        "thumbnail_length": 256,
+                        "filesystem_id": f"thumbnail2{self.test_image.extension.decode()}",
+                    },
+                ],
+                file_id=f"image{self.test_image.extension.decode()}",
+                url_cache=None,
+                server_name=None,
+            )
+        )
+
+    def test_x_robots_tag_header(self) -> None:
+        """
+        Tests that the `X-Robots-Tag` header is present, which informs web crawlers
+        to not index, archive, or follow links in media.
+        """
+        channel = self._req(b"inline; filename=out" + self.test_image.extension)
+
+        headers = channel.headers
+        self.assertEqual(
+            headers.getRawHeaders(b"X-Robots-Tag"),
+            [b"noindex, nofollow, noarchive, noimageindex"],
+        )
+
+    def test_cross_origin_resource_policy_header(self) -> None:
+        """
+        Test that the Cross-Origin-Resource-Policy header is set to "cross-origin"
+        allowing web clients to embed media from the downloads API.
+        """
+        channel = self._req(b"inline; filename=out" + self.test_image.extension)
+
+        headers = channel.headers
+
+        self.assertEqual(
+            headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
+            [b"cross-origin"],
+        )
+
+
+class TestSpamCheckerLegacy:
+    """A spam checker module that rejects all media that includes the bytes
+    `evil`.
+
+    Uses the legacy Spam-Checker API.
+    """
+
+    def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None:
+        self.config = config
+        self.api = api
+
+    @staticmethod
+    def parse_config(config: Dict[str, Any]) -> Dict[str, Any]:
+        return config
+
+    async def check_event_for_spam(self, event: EventBase) -> Union[bool, str]:
+        return False  # allow all events
+
+    async def user_may_invite(
+        self,
+        inviter_userid: str,
+        invitee_userid: str,
+        room_id: str,
+    ) -> bool:
+        return True  # allow all invites
+
+    async def user_may_create_room(self, userid: str) -> bool:
+        return True  # allow all room creations
+
+    async def user_may_create_room_alias(
+        self, userid: str, room_alias: RoomAlias
+    ) -> bool:
+        return True  # allow all room aliases
+
+    async def user_may_publish_room(self, userid: str, room_id: str) -> bool:
+        return True  # allow publishing of all rooms
+
+    async def check_media_file_for_spam(
+        self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
+    ) -> bool:
+        buf = BytesIO()
+        await file_wrapper.write_chunks_to(buf.write)
+
+        return b"evil" in buf.getvalue()
+
+
+class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase):
+    servlets = [
+        login.register_servlets,
+        admin.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.user = self.register_user("user", "pass")
+        self.tok = self.login("user", "pass")
+
+        # Allow for uploading and downloading to/from the media repo
+        self.media_repo = hs.get_media_repository_resource()
+        self.download_resource = self.media_repo.children[b"download"]
+        self.upload_resource = self.media_repo.children[b"upload"]
+
+        load_legacy_spam_checkers(hs)
+
+    def default_config(self) -> Dict[str, Any]:
+        config = default_config("test")
+
+        config.update(
+            {
+                "spam_checker": [
+                    {
+                        "module": TestSpamCheckerLegacy.__module__
+                        + ".TestSpamCheckerLegacy",
+                        "config": {},
+                    }
+                ]
+            }
+        )
+
+        return config
+
+    def test_upload_innocent(self) -> None:
+        """Attempt to upload some innocent data that should be allowed."""
+        self.helper.upload_media(
+            self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
+        )
+
+    def test_upload_ban(self) -> None:
+        """Attempt to upload some data that includes bytes "evil", which should
+        get rejected by the spam checker.
+        """
+
+        data = b"Some evil data"
+
+        self.helper.upload_media(
+            self.upload_resource, data, tok=self.tok, expect_code=400
+        )
+
+
+EVIL_DATA = b"Some evil data"
+EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API"
+
+
+class SpamCheckerTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        login.register_servlets,
+        admin.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.user = self.register_user("user", "pass")
+        self.tok = self.login("user", "pass")
+
+        # Allow for uploading and downloading to/from the media repo
+        self.media_repo = hs.get_media_repository_resource()
+        self.download_resource = self.media_repo.children[b"download"]
+        self.upload_resource = self.media_repo.children[b"upload"]
+
+        hs.get_module_api().register_spam_checker_callbacks(
+            check_media_file_for_spam=self.check_media_file_for_spam
+        )
+
+    async def check_media_file_for_spam(
+        self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
+    ) -> Union[Codes, Literal["NOT_SPAM"], Tuple[Codes, JsonDict]]:
+        buf = BytesIO()
+        await file_wrapper.write_chunks_to(buf.write)
+
+        if buf.getvalue() == EVIL_DATA:
+            return Codes.FORBIDDEN
+        elif buf.getvalue() == EVIL_DATA_EXPERIMENT:
+            return (Codes.FORBIDDEN, {})
+        else:
+            return "NOT_SPAM"
+
+    def test_upload_innocent(self) -> None:
+        """Attempt to upload some innocent data that should be allowed."""
+        self.helper.upload_media(
+            self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
+        )
+
+    def test_upload_ban(self) -> None:
+        """Attempt to upload some data that includes bytes "evil", which should
+        get rejected by the spam checker.
+        """
+
+        self.helper.upload_media(
+            self.upload_resource, EVIL_DATA, tok=self.tok, expect_code=400
+        )
+
+        self.helper.upload_media(
+            self.upload_resource,
+            EVIL_DATA_EXPERIMENT,
+            tok=self.tok,
+            expect_code=400,
+        )
diff --git a/tests/media/test_oembed.py b/tests/media/test_oembed.py
new file mode 100644
index 0000000000..c8bf8421da
--- /dev/null
+++ b/tests/media/test_oembed.py
@@ -0,0 +1,162 @@
+#  Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import json
+
+from parameterized import parameterized
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.media.oembed import OEmbedProvider, OEmbedResult
+from synapse.server import HomeServer
+from synapse.types import JsonDict
+from synapse.util import Clock
+
+from tests.unittest import HomeserverTestCase
+
+try:
+    import lxml
+except ImportError:
+    lxml = None
+
+
+class OEmbedTests(HomeserverTestCase):
+    if not lxml:
+        skip = "url preview feature requires lxml"
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.oembed = OEmbedProvider(hs)
+
+    def parse_response(self, response: JsonDict) -> OEmbedResult:
+        return self.oembed.parse_oembed_response(
+            "https://test", json.dumps(response).encode("utf-8")
+        )
+
+    def test_version(self) -> None:
+        """Accept versions that are similar to 1.0 as a string or int (or missing)."""
+        for version in ("1.0", 1.0, 1):
+            result = self.parse_response({"version": version})
+            # An empty Open Graph response is an error, ensure the URL is included.
+            self.assertIn("og:url", result.open_graph_result)
+
+        # A missing version should be treated as 1.0.
+        result = self.parse_response({"type": "link"})
+        self.assertIn("og:url", result.open_graph_result)
+
+        # Invalid versions should be rejected.
+        for version in ("2.0", "1", 1.1, 0, None, {}, []):
+            result = self.parse_response({"version": version, "type": "link"})
+            # An empty Open Graph response is an error, ensure the URL is included.
+            self.assertEqual({}, result.open_graph_result)
+
+    def test_cache_age(self) -> None:
+        """Ensure a cache-age is parsed properly."""
+        # Correct-ish cache ages are allowed.
+        for cache_age in ("1", 1.0, 1):
+            result = self.parse_response({"cache_age": cache_age})
+            self.assertEqual(result.cache_age, 1000)
+
+        # Invalid cache ages are ignored.
+        for cache_age in ("invalid", {}):
+            result = self.parse_response({"cache_age": cache_age})
+            self.assertIsNone(result.cache_age)
+
+        # Cache age is optional.
+        result = self.parse_response({})
+        self.assertIsNone(result.cache_age)
+
+    @parameterized.expand(
+        [
+            ("title", "title"),
+            ("provider_name", "site_name"),
+            ("thumbnail_url", "image"),
+        ],
+        name_func=lambda func, num, p: f"{func.__name__}_{p.args[0]}",
+    )
+    def test_property(self, oembed_property: str, open_graph_property: str) -> None:
+        """Test properties which must be strings."""
+        result = self.parse_response({oembed_property: "test"})
+        self.assertIn(f"og:{open_graph_property}", result.open_graph_result)
+        self.assertEqual(result.open_graph_result[f"og:{open_graph_property}"], "test")
+
+        result = self.parse_response({oembed_property: 1})
+        self.assertNotIn(f"og:{open_graph_property}", result.open_graph_result)
+
+    def test_author_name(self) -> None:
+        """Test the author_name property."""
+        result = self.parse_response({"author_name": "test"})
+        self.assertEqual(result.author_name, "test")
+
+        result = self.parse_response({"author_name": 1})
+        self.assertIsNone(result.author_name)
+
+    def test_rich(self) -> None:
+        """Test a type of rich."""
+        result = self.parse_response({"html": "test<img src='foo'>", "type": "rich"})
+        self.assertIn("og:description", result.open_graph_result)
+        self.assertIn("og:image", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:description"], "test")
+        self.assertEqual(result.open_graph_result["og:image"], "foo")
+
+        result = self.parse_response({"type": "rich"})
+        self.assertNotIn("og:description", result.open_graph_result)
+
+        result = self.parse_response({"html": 1, "type": "rich"})
+        self.assertNotIn("og:description", result.open_graph_result)
+
+    def test_photo(self) -> None:
+        """Test a type of photo."""
+        result = self.parse_response({"url": "test", "type": "photo"})
+        self.assertIn("og:image", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:image"], "test")
+
+        result = self.parse_response({"type": "photo"})
+        self.assertNotIn("og:image", result.open_graph_result)
+
+        result = self.parse_response({"url": 1, "type": "photo"})
+        self.assertNotIn("og:image", result.open_graph_result)
+
+    def test_video(self) -> None:
+        """Test a type of video."""
+        result = self.parse_response({"html": "test", "type": "video"})
+        self.assertIn("og:type", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:type"], "video.other")
+        self.assertIn("og:description", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:description"], "test")
+
+        result = self.parse_response({"type": "video"})
+        self.assertIn("og:type", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:type"], "video.other")
+        self.assertNotIn("og:description", result.open_graph_result)
+
+        result = self.parse_response({"url": 1, "type": "video"})
+        self.assertIn("og:type", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:type"], "video.other")
+        self.assertNotIn("og:description", result.open_graph_result)
+
+    def test_link(self) -> None:
+        """Test type of link."""
+        result = self.parse_response({"type": "link"})
+        self.assertIn("og:type", result.open_graph_result)
+        self.assertEqual(result.open_graph_result["og:type"], "website")
+
+    def test_title_html_entities(self) -> None:
+        """Test HTML entities in title"""
+        result = self.parse_response(
+            {"title": "Why JSON isn&#8217;t a Good Configuration Language"}
+        )
+        self.assertEqual(
+            result.open_graph_result["og:title"],
+            "Why JSON isn’t a Good Configuration Language",
+        )
diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py
index f41319a5b6..6d04911d67 100644
--- a/tests/rest/admin/test_media.py
+++ b/tests/rest/admin/test_media.py
@@ -20,8 +20,8 @@ from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
 from synapse.api.errors import Codes
+from synapse.media.filepath import MediaFilePaths
 from synapse.rest.client import login, profile, room
-from synapse.rest.media.v1.filepath import MediaFilePaths
 from synapse.server import HomeServer
 from synapse.util import Clock
 
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index f5b213219f..4b8f889a71 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -28,8 +28,8 @@ import synapse.rest.admin
 from synapse.api.constants import ApprovalNoticeMedium, LoginType, UserTypes
 from synapse.api.errors import Codes, HttpResponseException, ResourceLimitError
 from synapse.api.room_versions import RoomVersions
+from synapse.media.filepath import MediaFilePaths
 from synapse.rest.client import devices, login, logout, profile, register, room, sync
-from synapse.rest.media.v1.filepath import MediaFilePaths
 from synapse.server import HomeServer
 from synapse.types import JsonDict, UserID, create_requester
 from synapse.util import Clock
diff --git a/tests/rest/media/test_url_preview.py b/tests/rest/media/test_url_preview.py
new file mode 100644
index 0000000000..e91dc581c2
--- /dev/null
+++ b/tests/rest/media/test_url_preview.py
@@ -0,0 +1,1234 @@
+# Copyright 2018 New Vector Ltd
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import json
+import os
+import re
+from typing import Any, Dict, Optional, Sequence, Tuple, Type
+from urllib.parse import quote, urlencode
+
+from twisted.internet._resolver import HostResolution
+from twisted.internet.address import IPv4Address, IPv6Address
+from twisted.internet.error import DNSLookupError
+from twisted.internet.interfaces import IAddress, IResolutionReceiver
+from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactor
+
+from synapse.config.oembed import OEmbedEndpointConfig
+from synapse.rest.media.media_repository_resource import MediaRepositoryResource
+from synapse.rest.media.preview_url_resource import IMAGE_CACHE_EXPIRY_MS
+from synapse.server import HomeServer
+from synapse.types import JsonDict
+from synapse.util import Clock
+from synapse.util.stringutils import parse_and_validate_mxc_uri
+
+from tests import unittest
+from tests.server import FakeTransport
+from tests.test_utils import SMALL_PNG
+from tests.utils import MockClock
+
+try:
+    import lxml
+except ImportError:
+    lxml = None
+
+
+class URLPreviewTests(unittest.HomeserverTestCase):
+    if not lxml:
+        skip = "url preview feature requires lxml"
+
+    hijack_auth = True
+    user_id = "@test:user"
+    end_content = (
+        b"<html><head>"
+        b'<meta property="og:title" content="~matrix~" />'
+        b'<meta property="og:description" content="hi" />'
+        b"</head></html>"
+    )
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        config = self.default_config()
+        config["url_preview_enabled"] = True
+        config["max_spider_size"] = 9999999
+        config["url_preview_ip_range_blacklist"] = (
+            "192.168.1.1",
+            "1.0.0.0/8",
+            "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
+            "2001:800::/21",
+        )
+        config["url_preview_ip_range_whitelist"] = ("1.1.1.1",)
+        config["url_preview_accept_language"] = [
+            "en-UK",
+            "en-US;q=0.9",
+            "fr;q=0.8",
+            "*;q=0.7",
+        ]
+
+        self.storage_path = self.mktemp()
+        self.media_store_path = self.mktemp()
+        os.mkdir(self.storage_path)
+        os.mkdir(self.media_store_path)
+        config["media_store_path"] = self.media_store_path
+
+        provider_config = {
+            "module": "synapse.media.storage_provider.FileStorageProviderBackend",
+            "store_local": True,
+            "store_synchronous": False,
+            "store_remote": True,
+            "config": {"directory": self.storage_path},
+        }
+
+        config["media_storage_providers"] = [provider_config]
+
+        hs = self.setup_test_homeserver(config=config)
+
+        # After the hs is created, modify the parsed oEmbed config (to avoid
+        # messing with files).
+        #
+        # Note that HTTP URLs are used to avoid having to deal with TLS in tests.
+        hs.config.oembed.oembed_patterns = [
+            OEmbedEndpointConfig(
+                api_endpoint="http://publish.twitter.com/oembed",
+                url_patterns=[
+                    re.compile(r"http://twitter\.com/.+/status/.+"),
+                ],
+                formats=None,
+            ),
+            OEmbedEndpointConfig(
+                api_endpoint="http://www.hulu.com/api/oembed.{format}",
+                url_patterns=[
+                    re.compile(r"http://www\.hulu\.com/watch/.+"),
+                ],
+                formats=["json"],
+            ),
+        ]
+
+        return hs
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.media_repo = hs.get_media_repository_resource()
+        self.preview_url = self.media_repo.children[b"preview_url"]
+
+        self.lookups: Dict[str, Any] = {}
+
+        class Resolver:
+            def resolveHostName(
+                _self,
+                resolutionReceiver: IResolutionReceiver,
+                hostName: str,
+                portNumber: int = 0,
+                addressTypes: Optional[Sequence[Type[IAddress]]] = None,
+                transportSemantics: str = "TCP",
+            ) -> IResolutionReceiver:
+                resolution = HostResolution(hostName)
+                resolutionReceiver.resolutionBegan(resolution)
+                if hostName not in self.lookups:
+                    raise DNSLookupError("OH NO")
+
+                for i in self.lookups[hostName]:
+                    resolutionReceiver.addressResolved(i[0]("TCP", i[1], portNumber))
+                resolutionReceiver.resolutionComplete()
+                return resolutionReceiver
+
+        self.reactor.nameResolver = Resolver()  # type: ignore[assignment]
+
+    def create_test_resource(self) -> MediaRepositoryResource:
+        return self.hs.get_media_repository_resource()
+
+    def _assert_small_png(self, json_body: JsonDict) -> None:
+        """Assert properties from the SMALL_PNG test image."""
+        self.assertTrue(json_body["og:image"].startswith("mxc://"))
+        self.assertEqual(json_body["og:image:height"], 1)
+        self.assertEqual(json_body["og:image:width"], 1)
+        self.assertEqual(json_body["og:image:type"], "image/png")
+        self.assertEqual(json_body["matrix:image:size"], 67)
+
+    def test_cache_returns_correct_type(self) -> None:
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+            % (len(self.end_content),)
+            + self.end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+        )
+
+        # Check the cache returns the correct response
+        channel = self.make_request(
+            "GET", "preview_url?url=http://matrix.org", shorthand=False
+        )
+
+        # Check the cache response has the same content
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+        )
+
+        # Clear the in-memory cache
+        self.assertIn("http://matrix.org", self.preview_url._cache)
+        self.preview_url._cache.pop("http://matrix.org")
+        self.assertNotIn("http://matrix.org", self.preview_url._cache)
+
+        # Check the database cache returns the correct response
+        channel = self.make_request(
+            "GET", "preview_url?url=http://matrix.org", shorthand=False
+        )
+
+        # Check the cache response has the same content
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+        )
+
+    def test_non_ascii_preview_httpequiv(self) -> None:
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        end_content = (
+            b"<html><head>"
+            b'<meta http-equiv="Content-Type" content="text/html; charset=windows-1251"/>'
+            b'<meta property="og:title" content="\xe4\xea\xe0" />'
+            b'<meta property="og:description" content="hi" />'
+            b"</head></html>"
+        )
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(end_content),)
+            + end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
+
+    def test_video_rejected(self) -> None:
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        end_content = b"anything"
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b"Content-Type: video/mp4\r\n\r\n"
+            )
+            % (len(end_content))
+            + end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 502)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "Requested file's content type not allowed for this operation: video/mp4",
+            },
+        )
+
+    def test_audio_rejected(self) -> None:
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        end_content = b"anything"
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b"Content-Type: audio/aac\r\n\r\n"
+            )
+            % (len(end_content))
+            + end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 502)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "Requested file's content type not allowed for this operation: audio/aac",
+            },
+        )
+
+    def test_non_ascii_preview_content_type(self) -> None:
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        end_content = (
+            b"<html><head>"
+            b'<meta property="og:title" content="\xe4\xea\xe0" />'
+            b'<meta property="og:description" content="hi" />'
+            b"</head></html>"
+        )
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="windows-1251"\r\n\r\n'
+            )
+            % (len(end_content),)
+            + end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
+
+    def test_overlong_title(self) -> None:
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        end_content = (
+            b"<html><head>"
+            b"<title>" + b"x" * 2000 + b"</title>"
+            b'<meta property="og:description" content="hi" />'
+            b"</head></html>"
+        )
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="windows-1251"\r\n\r\n'
+            )
+            % (len(end_content),)
+            + end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        res = channel.json_body
+        # We should only see the `og:description` field, as `title` is too long and should be stripped out
+        self.assertCountEqual(["og:description"], res.keys())
+
+    def test_ipaddr(self) -> None:
+        """
+        IP addresses can be previewed directly.
+        """
+        self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://example.com",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+            % (len(self.end_content),)
+            + self.end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+        )
+
+    def test_blacklisted_ip_specific(self) -> None:
+        """
+        Blacklisted IP addresses, found via DNS, are not spidered.
+        """
+        self.lookups["example.com"] = [(IPv4Address, "192.168.1.1")]
+
+        channel = self.make_request(
+            "GET", "preview_url?url=http://example.com", shorthand=False
+        )
+
+        # No requests made.
+        self.assertEqual(len(self.reactor.tcpClients), 0)
+        self.assertEqual(channel.code, 502)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "DNS resolution failure during URL preview generation",
+            },
+        )
+
+    def test_blacklisted_ip_range(self) -> None:
+        """
+        Blacklisted IP ranges, IPs found over DNS, are not spidered.
+        """
+        self.lookups["example.com"] = [(IPv4Address, "1.1.1.2")]
+
+        channel = self.make_request(
+            "GET", "preview_url?url=http://example.com", shorthand=False
+        )
+
+        self.assertEqual(channel.code, 502)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "DNS resolution failure during URL preview generation",
+            },
+        )
+
+    def test_blacklisted_ip_specific_direct(self) -> None:
+        """
+        Blacklisted IP addresses, accessed directly, are not spidered.
+        """
+        channel = self.make_request(
+            "GET", "preview_url?url=http://192.168.1.1", shorthand=False
+        )
+
+        # No requests made.
+        self.assertEqual(len(self.reactor.tcpClients), 0)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "IP address blocked by IP blacklist entry",
+            },
+        )
+        self.assertEqual(channel.code, 403)
+
+    def test_blacklisted_ip_range_direct(self) -> None:
+        """
+        Blacklisted IP ranges, accessed directly, are not spidered.
+        """
+        channel = self.make_request(
+            "GET", "preview_url?url=http://1.1.1.2", shorthand=False
+        )
+
+        self.assertEqual(channel.code, 403)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "IP address blocked by IP blacklist entry",
+            },
+        )
+
+    def test_blacklisted_ip_range_whitelisted_ip(self) -> None:
+        """
+        Blacklisted but then subsequently whitelisted IP addresses can be
+        spidered.
+        """
+        self.lookups["example.com"] = [(IPv4Address, "1.1.1.1")]
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://example.com",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+
+        client.dataReceived(
+            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+            % (len(self.end_content),)
+            + self.end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+        )
+
+    def test_blacklisted_ip_with_external_ip(self) -> None:
+        """
+        If a hostname resolves a blacklisted IP, even if there's a
+        non-blacklisted one, it will be rejected.
+        """
+        # Hardcode the URL resolving to the IP we want.
+        self.lookups["example.com"] = [
+            (IPv4Address, "1.1.1.2"),
+            (IPv4Address, "10.1.2.3"),
+        ]
+
+        channel = self.make_request(
+            "GET", "preview_url?url=http://example.com", shorthand=False
+        )
+        self.assertEqual(channel.code, 502)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "DNS resolution failure during URL preview generation",
+            },
+        )
+
+    def test_blacklisted_ipv6_specific(self) -> None:
+        """
+        Blacklisted IP addresses, found via DNS, are not spidered.
+        """
+        self.lookups["example.com"] = [
+            (IPv6Address, "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")
+        ]
+
+        channel = self.make_request(
+            "GET", "preview_url?url=http://example.com", shorthand=False
+        )
+
+        # No requests made.
+        self.assertEqual(len(self.reactor.tcpClients), 0)
+        self.assertEqual(channel.code, 502)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "DNS resolution failure during URL preview generation",
+            },
+        )
+
+    def test_blacklisted_ipv6_range(self) -> None:
+        """
+        Blacklisted IP ranges, IPs found over DNS, are not spidered.
+        """
+        self.lookups["example.com"] = [(IPv6Address, "2001:800::1")]
+
+        channel = self.make_request(
+            "GET", "preview_url?url=http://example.com", shorthand=False
+        )
+
+        self.assertEqual(channel.code, 502)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "DNS resolution failure during URL preview generation",
+            },
+        )
+
+    def test_OPTIONS(self) -> None:
+        """
+        OPTIONS returns the OPTIONS.
+        """
+        channel = self.make_request(
+            "OPTIONS", "preview_url?url=http://example.com", shorthand=False
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body, {})
+
+    def test_accept_language_config_option(self) -> None:
+        """
+        Accept-Language header is sent to the remote server
+        """
+        self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
+
+        # Build and make a request to the server
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://example.com",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        # Extract Synapse's tcp client
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+
+        # Build a fake remote server to reply with
+        server = AccumulatingProtocol()
+
+        # Connect the two together
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+
+        # Tell Synapse that it has received some data from the remote server
+        client.dataReceived(
+            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+            % (len(self.end_content),)
+            + self.end_content
+        )
+
+        # Move the reactor along until we get a response on our original channel
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
+        )
+
+        # Check that the server received the Accept-Language header as part
+        # of the request from Synapse
+        self.assertIn(
+            (
+                b"Accept-Language: en-UK\r\n"
+                b"Accept-Language: en-US;q=0.9\r\n"
+                b"Accept-Language: fr;q=0.8\r\n"
+                b"Accept-Language: *;q=0.7"
+            ),
+            server.data,
+        )
+
+    def test_nonexistent_image(self) -> None:
+        """If the preview image doesn't exist, ensure some data is returned."""
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        result = (
+            b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
+        )
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        # The image should not be in the result.
+        self.assertNotIn("og:image", channel.json_body)
+
+    def test_oembed_failure(self) -> None:
+        """If the autodiscovered oEmbed URL fails, ensure some data is returned."""
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        result = b"""
+        <title>oEmbed Autodiscovery Fail</title>
+        <link rel="alternate" type="application/json+oembed"
+            href="http://example.com/oembed?url=http%3A%2F%2Fmatrix.org&format=json"
+            title="matrixdotorg" />
+        """
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        # The image should not be in the result.
+        self.assertEqual(channel.json_body["og:title"], "oEmbed Autodiscovery Fail")
+
+    def test_data_url(self) -> None:
+        """
+        Requesting to preview a data URL is not supported.
+        """
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        data = base64.b64encode(SMALL_PNG).decode()
+
+        query_params = urlencode(
+            {
+                "url": f'<html><head><img src="data:image/png;base64,{data}" /></head></html>'
+            }
+        )
+
+        channel = self.make_request(
+            "GET",
+            f"preview_url?{query_params}",
+            shorthand=False,
+        )
+        self.pump()
+
+        self.assertEqual(channel.code, 500)
+
+    def test_inline_data_url(self) -> None:
+        """
+        An inline image (as a data URL) should be parsed properly.
+        """
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        data = base64.b64encode(SMALL_PNG)
+
+        end_content = (
+            b"<html><head>" b'<img src="data:image/png;base64,%s" />' b"</head></html>"
+        ) % (data,)
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(end_content),)
+            + end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        self._assert_small_png(channel.json_body)
+
+    def test_oembed_photo(self) -> None:
+        """Test an oEmbed endpoint which returns a 'photo' type which redirects the preview to a new URL."""
+        self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+        self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+        result = {
+            "version": "1.0",
+            "type": "photo",
+            "url": "http://cdn.twitter.com/matrixdotorg",
+        }
+        oembed_content = json.dumps(result).encode("utf-8")
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://twitter.com/matrixdotorg/status/12345",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+            )
+            % (len(oembed_content),)
+            + oembed_content
+        )
+
+        self.pump()
+
+        # Ensure a second request is made to the photo URL.
+        client = self.reactor.tcpClients[1][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b"Content-Type: image/png\r\n\r\n"
+            )
+            % (len(SMALL_PNG),)
+            + SMALL_PNG
+        )
+
+        self.pump()
+
+        # Ensure the URL is what was requested.
+        self.assertIn(b"/matrixdotorg", server.data)
+
+        self.assertEqual(channel.code, 200)
+        body = channel.json_body
+        self.assertEqual(body["og:url"], "http://twitter.com/matrixdotorg/status/12345")
+        self._assert_small_png(body)
+
+    def test_oembed_rich(self) -> None:
+        """Test an oEmbed endpoint which returns HTML content via the 'rich' type."""
+        self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+        result = {
+            "version": "1.0",
+            "type": "rich",
+            # Note that this provides the author, not the title.
+            "author_name": "Alice",
+            "html": "<div>Content Preview</div>",
+        }
+        end_content = json.dumps(result).encode("utf-8")
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://twitter.com/matrixdotorg/status/12345",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+            )
+            % (len(end_content),)
+            + end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        body = channel.json_body
+        self.assertEqual(
+            body,
+            {
+                "og:url": "http://twitter.com/matrixdotorg/status/12345",
+                "og:title": "Alice",
+                "og:description": "Content Preview",
+            },
+        )
+
+    def test_oembed_format(self) -> None:
+        """Test an oEmbed endpoint which requires the format in the URL."""
+        self.lookups["www.hulu.com"] = [(IPv4Address, "10.1.2.3")]
+
+        result = {
+            "version": "1.0",
+            "type": "rich",
+            "html": "<div>Content Preview</div>",
+        }
+        end_content = json.dumps(result).encode("utf-8")
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://www.hulu.com/watch/12345",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+            )
+            % (len(end_content),)
+            + end_content
+        )
+
+        self.pump()
+
+        # The {format} should have been turned into json.
+        self.assertIn(b"/api/oembed.json", server.data)
+        # A URL parameter of format=json should be provided.
+        self.assertIn(b"format=json", server.data)
+
+        self.assertEqual(channel.code, 200)
+        body = channel.json_body
+        self.assertEqual(
+            body,
+            {
+                "og:url": "http://www.hulu.com/watch/12345",
+                "og:description": "Content Preview",
+            },
+        )
+
+    def test_oembed_autodiscovery(self) -> None:
+        """
+        Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL.
+        1. Request a preview of a URL which is not known to the oEmbed code.
+        2. It returns HTML including a link to an oEmbed preview.
+        3. The oEmbed preview is requested and returns a URL for an image.
+        4. The image is requested for thumbnailing.
+        """
+        # This is a little cheesy in that we use the www subdomain (which isn't the
+        # list of oEmbed patterns) to get "raw" HTML response.
+        self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+        self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+        self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+        result = b"""
+        <link rel="alternate" type="application/json+oembed"
+            href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json"
+            title="matrixdotorg" />
+        """
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://www.twitter.com/matrixdotorg/status/12345",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+
+        self.pump()
+
+        # The oEmbed response.
+        result2 = {
+            "version": "1.0",
+            "type": "photo",
+            "url": "http://cdn.twitter.com/matrixdotorg",
+        }
+        oembed_content = json.dumps(result2).encode("utf-8")
+
+        # Ensure a second request is made to the oEmbed URL.
+        client = self.reactor.tcpClients[1][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+            )
+            % (len(oembed_content),)
+            + oembed_content
+        )
+
+        self.pump()
+
+        # Ensure the URL is what was requested.
+        self.assertIn(b"/oembed?", server.data)
+
+        # Ensure a third request is made to the photo URL.
+        client = self.reactor.tcpClients[2][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b"Content-Type: image/png\r\n\r\n"
+            )
+            % (len(SMALL_PNG),)
+            + SMALL_PNG
+        )
+
+        self.pump()
+
+        # Ensure the URL is what was requested.
+        self.assertIn(b"/matrixdotorg", server.data)
+
+        self.assertEqual(channel.code, 200)
+        body = channel.json_body
+        self.assertEqual(
+            body["og:url"], "http://www.twitter.com/matrixdotorg/status/12345"
+        )
+        self._assert_small_png(body)
+
+    def _download_image(self) -> Tuple[str, str]:
+        """Downloads an image into the URL cache.
+        Returns:
+            A (host, media_id) tuple representing the MXC URI of the image.
+        """
+        self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://cdn.twitter.com/matrixdotorg",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: image/png\r\n\r\n"
+            % (len(SMALL_PNG),)
+            + SMALL_PNG
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+        body = channel.json_body
+        mxc_uri = body["og:image"]
+        host, _port, media_id = parse_and_validate_mxc_uri(mxc_uri)
+        self.assertIsNone(_port)
+        return host, media_id
+
+    def test_storage_providers_exclude_files(self) -> None:
+        """Test that files are not stored in or fetched from storage providers."""
+        host, media_id = self._download_image()
+
+        rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
+        media_store_path = os.path.join(self.media_store_path, rel_file_path)
+        storage_provider_path = os.path.join(self.storage_path, rel_file_path)
+
+        # Check storage
+        self.assertTrue(os.path.isfile(media_store_path))
+        self.assertFalse(
+            os.path.isfile(storage_provider_path),
+            "URL cache file was unexpectedly stored in a storage provider",
+        )
+
+        # Check fetching
+        channel = self.make_request(
+            "GET",
+            f"download/{host}/{media_id}",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        # Move cached file into the storage provider
+        os.makedirs(os.path.dirname(storage_provider_path), exist_ok=True)
+        os.rename(media_store_path, storage_provider_path)
+
+        channel = self.make_request(
+            "GET",
+            f"download/{host}/{media_id}",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+        self.assertEqual(
+            channel.code,
+            404,
+            "URL cache file was unexpectedly retrieved from a storage provider",
+        )
+
+    def test_storage_providers_exclude_thumbnails(self) -> None:
+        """Test that thumbnails are not stored in or fetched from storage providers."""
+        host, media_id = self._download_image()
+
+        rel_thumbnail_path = (
+            self.preview_url.filepaths.url_cache_thumbnail_directory_rel(media_id)
+        )
+        media_store_thumbnail_path = os.path.join(
+            self.media_store_path, rel_thumbnail_path
+        )
+        storage_provider_thumbnail_path = os.path.join(
+            self.storage_path, rel_thumbnail_path
+        )
+
+        # Check storage
+        self.assertTrue(os.path.isdir(media_store_thumbnail_path))
+        self.assertFalse(
+            os.path.isdir(storage_provider_thumbnail_path),
+            "URL cache thumbnails were unexpectedly stored in a storage provider",
+        )
+
+        # Check fetching
+        channel = self.make_request(
+            "GET",
+            f"thumbnail/{host}/{media_id}?width=32&height=32&method=scale",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        # Remove the original, otherwise thumbnails will regenerate
+        rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
+        media_store_path = os.path.join(self.media_store_path, rel_file_path)
+        os.remove(media_store_path)
+
+        # Move cached thumbnails into the storage provider
+        os.makedirs(os.path.dirname(storage_provider_thumbnail_path), exist_ok=True)
+        os.rename(media_store_thumbnail_path, storage_provider_thumbnail_path)
+
+        channel = self.make_request(
+            "GET",
+            f"thumbnail/{host}/{media_id}?width=32&height=32&method=scale",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+        self.assertEqual(
+            channel.code,
+            404,
+            "URL cache thumbnail was unexpectedly retrieved from a storage provider",
+        )
+
+    def test_cache_expiry(self) -> None:
+        """Test that URL cache files and thumbnails are cleaned up properly on expiry."""
+        self.preview_url.clock = MockClock()
+
+        _host, media_id = self._download_image()
+
+        file_path = self.preview_url.filepaths.url_cache_filepath(media_id)
+        file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete(
+            media_id
+        )
+        thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory(
+            media_id
+        )
+        thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete(
+            media_id
+        )
+
+        self.assertTrue(os.path.isfile(file_path))
+        self.assertTrue(os.path.isdir(thumbnail_dir))
+
+        self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1)
+        self.get_success(self.preview_url._expire_url_cache_data())
+
+        for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
+            self.assertFalse(
+                os.path.exists(path),
+                f"{os.path.relpath(path, self.media_store_path)} was not deleted",
+            )
+
+    @unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]})
+    def test_blacklist_port(self) -> None:
+        """Tests that blacklisting URLs with a port makes previewing such URLs
+        fail with a 403 error and doesn't impact other previews.
+        """
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        bad_url = quote("http://matrix.org:8888/foo")
+        good_url = quote("http://matrix.org/foo")
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=" + bad_url,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+        self.assertEqual(channel.code, 403, channel.result)
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=" + good_url,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+            % (len(self.end_content),)
+            + self.end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
diff --git a/tests/rest/media/v1/__init__.py b/tests/rest/media/v1/__init__.py
deleted file mode 100644
index b1ee10cfcc..0000000000
--- a/tests/rest/media/v1/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/tests/rest/media/v1/test_base.py b/tests/rest/media/v1/test_base.py
deleted file mode 100644
index c73179151a..0000000000
--- a/tests/rest/media/v1/test_base.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright 2019 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.rest.media.v1._base import get_filename_from_headers
-
-from tests import unittest
-
-
-class GetFileNameFromHeadersTests(unittest.TestCase):
-    # input -> expected result
-    TEST_CASES = {
-        b"inline; filename=abc.txt": "abc.txt",
-        b'inline; filename="azerty"': "azerty",
-        b'inline; filename="aze%20rty"': "aze%20rty",
-        b'inline; filename="aze"rty"': 'aze"rty',
-        b'inline; filename="azer;ty"': "azer;ty",
-        b"inline; filename*=utf-8''foo%C2%A3bar": "foo£bar",
-    }
-
-    def tests(self) -> None:
-        for hdr, expected in self.TEST_CASES.items():
-            res = get_filename_from_headers({b"Content-Disposition": [hdr]})
-            self.assertEqual(
-                res,
-                expected,
-                f"expected output for {hdr!r} to be {expected} but was {res}",
-            )
diff --git a/tests/rest/media/v1/test_filepath.py b/tests/rest/media/v1/test_filepath.py
deleted file mode 100644
index 43e6f0f70a..0000000000
--- a/tests/rest/media/v1/test_filepath.py
+++ /dev/null
@@ -1,595 +0,0 @@
-# Copyright 2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import inspect
-import os
-from typing import Iterable
-
-from synapse.rest.media.v1.filepath import MediaFilePaths, _wrap_with_jail_check
-
-from tests import unittest
-
-
-class MediaFilePathsTestCase(unittest.TestCase):
-    def setUp(self) -> None:
-        super().setUp()
-
-        self.filepaths = MediaFilePaths("/media_store")
-
-    def test_local_media_filepath(self) -> None:
-        """Test local media paths"""
-        self.assertEqual(
-            self.filepaths.local_media_filepath_rel("GerZNDnDZVjsOtardLuwfIBg"),
-            "local_content/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-        self.assertEqual(
-            self.filepaths.local_media_filepath("GerZNDnDZVjsOtardLuwfIBg"),
-            "/media_store/local_content/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-
-    def test_local_media_thumbnail(self) -> None:
-        """Test local media thumbnail paths"""
-        self.assertEqual(
-            self.filepaths.local_media_thumbnail_rel(
-                "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale"
-            ),
-            "local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
-        )
-        self.assertEqual(
-            self.filepaths.local_media_thumbnail(
-                "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale"
-            ),
-            "/media_store/local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
-        )
-
-    def test_local_media_thumbnail_dir(self) -> None:
-        """Test local media thumbnail directory paths"""
-        self.assertEqual(
-            self.filepaths.local_media_thumbnail_dir("GerZNDnDZVjsOtardLuwfIBg"),
-            "/media_store/local_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-
-    def test_remote_media_filepath(self) -> None:
-        """Test remote media paths"""
-        self.assertEqual(
-            self.filepaths.remote_media_filepath_rel(
-                "example.com", "GerZNDnDZVjsOtardLuwfIBg"
-            ),
-            "remote_content/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-        self.assertEqual(
-            self.filepaths.remote_media_filepath(
-                "example.com", "GerZNDnDZVjsOtardLuwfIBg"
-            ),
-            "/media_store/remote_content/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-
-    def test_remote_media_thumbnail(self) -> None:
-        """Test remote media thumbnail paths"""
-        self.assertEqual(
-            self.filepaths.remote_media_thumbnail_rel(
-                "example.com",
-                "GerZNDnDZVjsOtardLuwfIBg",
-                800,
-                600,
-                "image/jpeg",
-                "scale",
-            ),
-            "remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
-        )
-        self.assertEqual(
-            self.filepaths.remote_media_thumbnail(
-                "example.com",
-                "GerZNDnDZVjsOtardLuwfIBg",
-                800,
-                600,
-                "image/jpeg",
-                "scale",
-            ),
-            "/media_store/remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
-        )
-
-    def test_remote_media_thumbnail_legacy(self) -> None:
-        """Test old-style remote media thumbnail paths"""
-        self.assertEqual(
-            self.filepaths.remote_media_thumbnail_rel_legacy(
-                "example.com", "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg"
-            ),
-            "remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg",
-        )
-
-    def test_remote_media_thumbnail_dir(self) -> None:
-        """Test remote media thumbnail directory paths"""
-        self.assertEqual(
-            self.filepaths.remote_media_thumbnail_dir(
-                "example.com", "GerZNDnDZVjsOtardLuwfIBg"
-            ),
-            "/media_store/remote_thumbnail/example.com/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-
-    def test_url_cache_filepath(self) -> None:
-        """Test URL cache paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_filepath_rel("2020-01-02_GerZNDnDZVjsOtar"),
-            "url_cache/2020-01-02/GerZNDnDZVjsOtar",
-        )
-        self.assertEqual(
-            self.filepaths.url_cache_filepath("2020-01-02_GerZNDnDZVjsOtar"),
-            "/media_store/url_cache/2020-01-02/GerZNDnDZVjsOtar",
-        )
-
-    def test_url_cache_filepath_legacy(self) -> None:
-        """Test old-style URL cache paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_filepath_rel("GerZNDnDZVjsOtardLuwfIBg"),
-            "url_cache/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-        self.assertEqual(
-            self.filepaths.url_cache_filepath("GerZNDnDZVjsOtardLuwfIBg"),
-            "/media_store/url_cache/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-
-    def test_url_cache_filepath_dirs_to_delete(self) -> None:
-        """Test URL cache cleanup paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_filepath_dirs_to_delete(
-                "2020-01-02_GerZNDnDZVjsOtar"
-            ),
-            ["/media_store/url_cache/2020-01-02"],
-        )
-
-    def test_url_cache_filepath_dirs_to_delete_legacy(self) -> None:
-        """Test old-style URL cache cleanup paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_filepath_dirs_to_delete(
-                "GerZNDnDZVjsOtardLuwfIBg"
-            ),
-            [
-                "/media_store/url_cache/Ge/rZ",
-                "/media_store/url_cache/Ge",
-            ],
-        )
-
-    def test_url_cache_thumbnail(self) -> None:
-        """Test URL cache thumbnail paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail_rel(
-                "2020-01-02_GerZNDnDZVjsOtar", 800, 600, "image/jpeg", "scale"
-            ),
-            "url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar/800-600-image-jpeg-scale",
-        )
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail(
-                "2020-01-02_GerZNDnDZVjsOtar", 800, 600, "image/jpeg", "scale"
-            ),
-            "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar/800-600-image-jpeg-scale",
-        )
-
-    def test_url_cache_thumbnail_legacy(self) -> None:
-        """Test old-style URL cache thumbnail paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail_rel(
-                "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale"
-            ),
-            "url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
-        )
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail(
-                "GerZNDnDZVjsOtardLuwfIBg", 800, 600, "image/jpeg", "scale"
-            ),
-            "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg/800-600-image-jpeg-scale",
-        )
-
-    def test_url_cache_thumbnail_directory(self) -> None:
-        """Test URL cache thumbnail directory paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail_directory_rel(
-                "2020-01-02_GerZNDnDZVjsOtar"
-            ),
-            "url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar",
-        )
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail_directory("2020-01-02_GerZNDnDZVjsOtar"),
-            "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar",
-        )
-
-    def test_url_cache_thumbnail_directory_legacy(self) -> None:
-        """Test old-style URL cache thumbnail directory paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail_directory_rel(
-                "GerZNDnDZVjsOtardLuwfIBg"
-            ),
-            "url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail_directory("GerZNDnDZVjsOtardLuwfIBg"),
-            "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-        )
-
-    def test_url_cache_thumbnail_dirs_to_delete(self) -> None:
-        """Test URL cache thumbnail cleanup paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail_dirs_to_delete(
-                "2020-01-02_GerZNDnDZVjsOtar"
-            ),
-            [
-                "/media_store/url_cache_thumbnails/2020-01-02/GerZNDnDZVjsOtar",
-                "/media_store/url_cache_thumbnails/2020-01-02",
-            ],
-        )
-
-    def test_url_cache_thumbnail_dirs_to_delete_legacy(self) -> None:
-        """Test old-style URL cache thumbnail cleanup paths"""
-        self.assertEqual(
-            self.filepaths.url_cache_thumbnail_dirs_to_delete(
-                "GerZNDnDZVjsOtardLuwfIBg"
-            ),
-            [
-                "/media_store/url_cache_thumbnails/Ge/rZ/NDnDZVjsOtardLuwfIBg",
-                "/media_store/url_cache_thumbnails/Ge/rZ",
-                "/media_store/url_cache_thumbnails/Ge",
-            ],
-        )
-
-    def test_server_name_validation(self) -> None:
-        """Test validation of server names"""
-        self._test_path_validation(
-            [
-                "remote_media_filepath_rel",
-                "remote_media_filepath",
-                "remote_media_thumbnail_rel",
-                "remote_media_thumbnail",
-                "remote_media_thumbnail_rel_legacy",
-                "remote_media_thumbnail_dir",
-            ],
-            parameter="server_name",
-            valid_values=[
-                "matrix.org",
-                "matrix.org:8448",
-                "matrix-federation.matrix.org",
-                "matrix-federation.matrix.org:8448",
-                "10.1.12.123",
-                "10.1.12.123:8448",
-                "[fd00:abcd::ffff]",
-                "[fd00:abcd::ffff]:8448",
-            ],
-            invalid_values=[
-                "/matrix.org",
-                "matrix.org/..",
-                "matrix.org\x00",
-                "",
-                ".",
-                "..",
-                "/",
-            ],
-        )
-
-    def test_file_id_validation(self) -> None:
-        """Test validation of local, remote and legacy URL cache file / media IDs"""
-        # File / media IDs get split into three parts to form paths, consisting of the
-        # first two characters, next two characters and rest of the ID.
-        valid_file_ids = [
-            "GerZNDnDZVjsOtardLuwfIBg",
-            # Unexpected, but produces an acceptable path:
-            "GerZN",  # "N" becomes the last directory
-        ]
-        invalid_file_ids = [
-            "/erZNDnDZVjsOtardLuwfIBg",
-            "Ge/ZNDnDZVjsOtardLuwfIBg",
-            "GerZ/DnDZVjsOtardLuwfIBg",
-            "GerZ/..",
-            "G\x00rZNDnDZVjsOtardLuwfIBg",
-            "Ger\x00NDnDZVjsOtardLuwfIBg",
-            "GerZNDnDZVjsOtardLuwfIBg\x00",
-            "",
-            "Ge",
-            "GerZ",
-            "GerZ.",
-            "..rZNDnDZVjsOtardLuwfIBg",
-            "Ge..NDnDZVjsOtardLuwfIBg",
-            "GerZ..",
-            "GerZ/",
-        ]
-
-        self._test_path_validation(
-            [
-                "local_media_filepath_rel",
-                "local_media_filepath",
-                "local_media_thumbnail_rel",
-                "local_media_thumbnail",
-                "local_media_thumbnail_dir",
-                # Legacy URL cache media IDs
-                "url_cache_filepath_rel",
-                "url_cache_filepath",
-                # `url_cache_filepath_dirs_to_delete` is tested below.
-                "url_cache_thumbnail_rel",
-                "url_cache_thumbnail",
-                "url_cache_thumbnail_directory_rel",
-                "url_cache_thumbnail_directory",
-                "url_cache_thumbnail_dirs_to_delete",
-            ],
-            parameter="media_id",
-            valid_values=valid_file_ids,
-            invalid_values=invalid_file_ids,
-        )
-
-        # `url_cache_filepath_dirs_to_delete` ignores what would be the last path
-        # component, so only the first 4 characters matter.
-        self._test_path_validation(
-            [
-                "url_cache_filepath_dirs_to_delete",
-            ],
-            parameter="media_id",
-            valid_values=valid_file_ids,
-            invalid_values=[
-                "/erZNDnDZVjsOtardLuwfIBg",
-                "Ge/ZNDnDZVjsOtardLuwfIBg",
-                "G\x00rZNDnDZVjsOtardLuwfIBg",
-                "Ger\x00NDnDZVjsOtardLuwfIBg",
-                "",
-                "Ge",
-                "..rZNDnDZVjsOtardLuwfIBg",
-                "Ge..NDnDZVjsOtardLuwfIBg",
-            ],
-        )
-
-        self._test_path_validation(
-            [
-                "remote_media_filepath_rel",
-                "remote_media_filepath",
-                "remote_media_thumbnail_rel",
-                "remote_media_thumbnail",
-                "remote_media_thumbnail_rel_legacy",
-                "remote_media_thumbnail_dir",
-            ],
-            parameter="file_id",
-            valid_values=valid_file_ids,
-            invalid_values=invalid_file_ids,
-        )
-
-    def test_url_cache_media_id_validation(self) -> None:
-        """Test validation of URL cache media IDs"""
-        self._test_path_validation(
-            [
-                "url_cache_filepath_rel",
-                "url_cache_filepath",
-                # `url_cache_filepath_dirs_to_delete` only cares about the date prefix
-                "url_cache_thumbnail_rel",
-                "url_cache_thumbnail",
-                "url_cache_thumbnail_directory_rel",
-                "url_cache_thumbnail_directory",
-                "url_cache_thumbnail_dirs_to_delete",
-            ],
-            parameter="media_id",
-            valid_values=[
-                "2020-01-02_GerZNDnDZVjsOtar",
-                "2020-01-02_G",  # Unexpected, but produces an acceptable path
-            ],
-            invalid_values=[
-                "2020-01-02",
-                "2020-01-02-",
-                "2020-01-02-.",
-                "2020-01-02-..",
-                "2020-01-02-/",
-                "2020-01-02-/GerZNDnDZVjsOtar",
-                "2020-01-02-GerZNDnDZVjsOtar/..",
-                "2020-01-02-GerZNDnDZVjsOtar\x00",
-            ],
-        )
-
-    def test_content_type_validation(self) -> None:
-        """Test validation of thumbnail content types"""
-        self._test_path_validation(
-            [
-                "local_media_thumbnail_rel",
-                "local_media_thumbnail",
-                "remote_media_thumbnail_rel",
-                "remote_media_thumbnail",
-                "remote_media_thumbnail_rel_legacy",
-                "url_cache_thumbnail_rel",
-                "url_cache_thumbnail",
-            ],
-            parameter="content_type",
-            valid_values=[
-                "image/jpeg",
-            ],
-            invalid_values=[
-                "",  # ValueError: not enough values to unpack
-                "image/jpeg/abc",  # ValueError: too many values to unpack
-                "image/jpeg\x00",
-            ],
-        )
-
-    def test_thumbnail_method_validation(self) -> None:
-        """Test validation of thumbnail methods"""
-        self._test_path_validation(
-            [
-                "local_media_thumbnail_rel",
-                "local_media_thumbnail",
-                "remote_media_thumbnail_rel",
-                "remote_media_thumbnail",
-                "url_cache_thumbnail_rel",
-                "url_cache_thumbnail",
-            ],
-            parameter="method",
-            valid_values=[
-                "crop",
-                "scale",
-            ],
-            invalid_values=[
-                "/scale",
-                "scale/..",
-                "scale\x00",
-                "/",
-            ],
-        )
-
-    def _test_path_validation(
-        self,
-        methods: Iterable[str],
-        parameter: str,
-        valid_values: Iterable[str],
-        invalid_values: Iterable[str],
-    ) -> None:
-        """Test that the specified methods validate the named parameter as expected
-
-        Args:
-            methods: The names of `MediaFilePaths` methods to test
-            parameter: The name of the parameter to test
-            valid_values: A list of parameter values that are expected to be accepted
-            invalid_values: A list of parameter values that are expected to be rejected
-
-        Raises:
-            AssertionError: If a value was accepted when it should have failed
-                validation.
-            ValueError: If a value failed validation when it should have been accepted.
-        """
-        for method in methods:
-            get_path = getattr(self.filepaths, method)
-
-            parameters = inspect.signature(get_path).parameters
-            kwargs = {
-                "server_name": "matrix.org",
-                "media_id": "GerZNDnDZVjsOtardLuwfIBg",
-                "file_id": "GerZNDnDZVjsOtardLuwfIBg",
-                "width": 800,
-                "height": 600,
-                "content_type": "image/jpeg",
-                "method": "scale",
-            }
-
-            if get_path.__name__.startswith("url_"):
-                kwargs["media_id"] = "2020-01-02_GerZNDnDZVjsOtar"
-
-            kwargs = {k: v for k, v in kwargs.items() if k in parameters}
-            kwargs.pop(parameter)
-
-            for value in valid_values:
-                kwargs[parameter] = value
-                get_path(**kwargs)
-                # No exception should be raised
-
-            for value in invalid_values:
-                with self.assertRaises(ValueError):
-                    kwargs[parameter] = value
-                    path_or_list = get_path(**kwargs)
-                    self.fail(
-                        f"{value!r} unexpectedly passed validation: "
-                        f"{method} returned {path_or_list!r}"
-                    )
-
-
-class MediaFilePathsJailTestCase(unittest.TestCase):
-    def _check_relative_path(self, filepaths: MediaFilePaths, path: str) -> None:
-        """Passes a relative path through the jail check.
-
-        Args:
-            filepaths: The `MediaFilePaths` instance.
-            path: A path relative to the media store directory.
-
-        Raises:
-            ValueError: If the jail check fails.
-        """
-
-        @_wrap_with_jail_check(relative=True)
-        def _make_relative_path(self: MediaFilePaths, path: str) -> str:
-            return path
-
-        _make_relative_path(filepaths, path)
-
-    def _check_absolute_path(self, filepaths: MediaFilePaths, path: str) -> None:
-        """Passes an absolute path through the jail check.
-
-        Args:
-            filepaths: The `MediaFilePaths` instance.
-            path: A path relative to the media store directory.
-
-        Raises:
-            ValueError: If the jail check fails.
-        """
-
-        @_wrap_with_jail_check(relative=False)
-        def _make_absolute_path(self: MediaFilePaths, path: str) -> str:
-            return os.path.join(self.base_path, path)
-
-        _make_absolute_path(filepaths, path)
-
-    def test_traversal_inside(self) -> None:
-        """Test the jail check for paths that stay within the media directory."""
-        # Despite the `../`s, these paths still lie within the media directory and it's
-        # expected for the jail check to allow them through.
-        # These paths ought to trip the other checks in place and should never be
-        # returned.
-        filepaths = MediaFilePaths("/media_store")
-        path = "url_cache/2020-01-02/../../GerZNDnDZVjsOtar"
-        self._check_relative_path(filepaths, path)
-        self._check_absolute_path(filepaths, path)
-
-    def test_traversal_outside(self) -> None:
-        """Test that the jail check fails for paths that escape the media directory."""
-        filepaths = MediaFilePaths("/media_store")
-        path = "url_cache/2020-01-02/../../../GerZNDnDZVjsOtar"
-        with self.assertRaises(ValueError):
-            self._check_relative_path(filepaths, path)
-        with self.assertRaises(ValueError):
-            self._check_absolute_path(filepaths, path)
-
-    def test_traversal_reentry(self) -> None:
-        """Test the jail check for paths that exit and re-enter the media directory."""
-        # These paths lie outside the media directory if it is a symlink, and inside
-        # otherwise. Ideally the check should fail, but this proves difficult.
-        # This test documents the behaviour for this edge case.
-        # These paths ought to trip the other checks in place and should never be
-        # returned.
-        filepaths = MediaFilePaths("/media_store")
-        path = "url_cache/2020-01-02/../../../media_store/GerZNDnDZVjsOtar"
-        self._check_relative_path(filepaths, path)
-        self._check_absolute_path(filepaths, path)
-
-    def test_symlink(self) -> None:
-        """Test that a symlink does not cause the jail check to fail."""
-        media_store_path = self.mktemp()
-
-        # symlink the media store directory
-        os.symlink("/mnt/synapse/media_store", media_store_path)
-
-        # Test that relative and absolute paths don't trip the check
-        # NB: `media_store_path` is a relative path
-        filepaths = MediaFilePaths(media_store_path)
-        self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
-        self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
-
-        filepaths = MediaFilePaths(os.path.abspath(media_store_path))
-        self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
-        self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
-
-    def test_symlink_subdirectory(self) -> None:
-        """Test that a symlinked subdirectory does not cause the jail check to fail."""
-        media_store_path = self.mktemp()
-        os.mkdir(media_store_path)
-
-        # symlink `url_cache/`
-        os.symlink(
-            "/mnt/synapse/media_store_url_cache",
-            os.path.join(media_store_path, "url_cache"),
-        )
-
-        # Test that relative and absolute paths don't trip the check
-        # NB: `media_store_path` is a relative path
-        filepaths = MediaFilePaths(media_store_path)
-        self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
-        self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
-
-        filepaths = MediaFilePaths(os.path.abspath(media_store_path))
-        self._check_relative_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
-        self._check_absolute_path(filepaths, "url_cache/2020-01-02/GerZNDnDZVjsOtar")
diff --git a/tests/rest/media/v1/test_html_preview.py b/tests/rest/media/v1/test_html_preview.py
deleted file mode 100644
index 1062081a06..0000000000
--- a/tests/rest/media/v1/test_html_preview.py
+++ /dev/null
@@ -1,542 +0,0 @@
-# Copyright 2014-2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from synapse.rest.media.v1.preview_html import (
-    _get_html_media_encodings,
-    decode_body,
-    parse_html_to_open_graph,
-    summarize_paragraphs,
-)
-
-from tests import unittest
-
-try:
-    import lxml
-except ImportError:
-    lxml = None
-
-
-class SummarizeTestCase(unittest.TestCase):
-    if not lxml:
-        skip = "url preview feature requires lxml"
-
-    def test_long_summarize(self) -> None:
-        example_paras = [
-            """Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:
-            Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in
-            Troms county, Norway. The administrative centre of the municipality is
-            the city of Tromsø. Outside of Norway, Tromso and Tromsö are
-            alternative spellings of the city.Tromsø is considered the northernmost
-            city in the world with a population above 50,000. The most populous town
-            north of it is Alta, Norway, with a population of 14,272 (2013).""",
-            """Tromsø lies in Northern Norway. The municipality has a population of
-            (2015) 72,066, but with an annual influx of students it has over 75,000
-            most of the year. It is the largest urban area in Northern Norway and the
-            third largest north of the Arctic Circle (following Murmansk and Norilsk).
-            Most of Tromsø, including the city centre, is located on the island of
-            Tromsøya, 350 kilometres (217 mi) north of the Arctic Circle. In 2012,
-            Tromsøya had a population of 36,088. Substantial parts of the urban area
-            are also situated on the mainland to the east, and on parts of Kvaløya—a
-            large island to the west. Tromsøya is connected to the mainland by the Tromsø
-            Bridge and the Tromsøysund Tunnel, and to the island of Kvaløya by the
-            Sandnessund Bridge. Tromsø Airport connects the city to many destinations
-            in Europe. The city is warmer than most other places located on the same
-            latitude, due to the warming effect of the Gulf Stream.""",
-            """The city centre of Tromsø contains the highest number of old wooden
-            houses in Northern Norway, the oldest house dating from 1789. The Arctic
-            Cathedral, a modern church from 1965, is probably the most famous landmark
-            in Tromsø. The city is a cultural centre for its region, with several
-            festivals taking place in the summer. Some of Norway's best-known
-             musicians, Torbjørn Brundtland and Svein Berge of the electronica duo
-             Röyksopp and Lene Marlin grew up and started their careers in Tromsø.
-             Noted electronic musician Geir Jenssen also hails from Tromsø.""",
-        ]
-
-        desc = summarize_paragraphs(example_paras, min_size=200, max_size=500)
-
-        self.assertEqual(
-            desc,
-            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
-            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
-            " Troms county, Norway. The administrative centre of the municipality is"
-            " the city of Tromsø. Outside of Norway, Tromso and Tromsö are"
-            " alternative spellings of the city.Tromsø is considered the northernmost"
-            " city in the world with a population above 50,000. The most populous town"
-            " north of it is Alta, Norway, with a population of 14,272 (2013).",
-        )
-
-        desc = summarize_paragraphs(example_paras[1:], min_size=200, max_size=500)
-
-        self.assertEqual(
-            desc,
-            "Tromsø lies in Northern Norway. The municipality has a population of"
-            " (2015) 72,066, but with an annual influx of students it has over 75,000"
-            " most of the year. It is the largest urban area in Northern Norway and the"
-            " third largest north of the Arctic Circle (following Murmansk and Norilsk)."
-            " Most of Tromsø, including the city centre, is located on the island of"
-            " Tromsøya, 350 kilometres (217 mi) north of the Arctic Circle. In 2012,"
-            " Tromsøya had a population of 36,088. Substantial parts of the urban…",
-        )
-
-    def test_short_summarize(self) -> None:
-        example_paras = [
-            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
-            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
-            " Troms county, Norway.",
-            "Tromsø lies in Northern Norway. The municipality has a population of"
-            " (2015) 72,066, but with an annual influx of students it has over 75,000"
-            " most of the year.",
-            "The city centre of Tromsø contains the highest number of old wooden"
-            " houses in Northern Norway, the oldest house dating from 1789. The Arctic"
-            " Cathedral, a modern church from 1965, is probably the most famous landmark"
-            " in Tromsø.",
-        ]
-
-        desc = summarize_paragraphs(example_paras, min_size=200, max_size=500)
-
-        self.assertEqual(
-            desc,
-            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
-            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
-            " Troms county, Norway.\n"
-            "\n"
-            "Tromsø lies in Northern Norway. The municipality has a population of"
-            " (2015) 72,066, but with an annual influx of students it has over 75,000"
-            " most of the year.",
-        )
-
-    def test_small_then_large_summarize(self) -> None:
-        example_paras = [
-            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
-            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
-            " Troms county, Norway.",
-            "Tromsø lies in Northern Norway. The municipality has a population of"
-            " (2015) 72,066, but with an annual influx of students it has over 75,000"
-            " most of the year."
-            " The city centre of Tromsø contains the highest number of old wooden"
-            " houses in Northern Norway, the oldest house dating from 1789. The Arctic"
-            " Cathedral, a modern church from 1965, is probably the most famous landmark"
-            " in Tromsø.",
-        ]
-
-        desc = summarize_paragraphs(example_paras, min_size=200, max_size=500)
-        self.assertEqual(
-            desc,
-            "Tromsø (Norwegian pronunciation: [ˈtrʊmsœ] ( listen); Northern Sami:"
-            " Romsa; Finnish: Tromssa[2] Kven: Tromssa) is a city and municipality in"
-            " Troms county, Norway.\n"
-            "\n"
-            "Tromsø lies in Northern Norway. The municipality has a population of"
-            " (2015) 72,066, but with an annual influx of students it has over 75,000"
-            " most of the year. The city centre of Tromsø contains the highest number"
-            " of old wooden houses in Northern Norway, the oldest house dating from"
-            " 1789. The Arctic Cathedral, a modern church from…",
-        )
-
-
-class OpenGraphFromHtmlTestCase(unittest.TestCase):
-    if not lxml:
-        skip = "url preview feature requires lxml"
-
-    def test_simple(self) -> None:
-        html = b"""
-        <html>
-        <head><title>Foo</title></head>
-        <body>
-        Some text.
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
-
-    def test_comment(self) -> None:
-        html = b"""
-        <html>
-        <head><title>Foo</title></head>
-        <body>
-        <!-- HTML comment -->
-        Some text.
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
-
-    def test_comment2(self) -> None:
-        html = b"""
-        <html>
-        <head><title>Foo</title></head>
-        <body>
-        Some text.
-        <!-- HTML comment -->
-        Some more text.
-        <p>Text</p>
-        More text
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(
-            og,
-            {
-                "og:title": "Foo",
-                "og:description": "Some text.\n\nSome more text.\n\nText\n\nMore text",
-            },
-        )
-
-    def test_script(self) -> None:
-        html = b"""
-        <html>
-        <head><title>Foo</title></head>
-        <body>
-        <script> (function() {})() </script>
-        Some text.
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
-
-    def test_missing_title(self) -> None:
-        html = b"""
-        <html>
-        <body>
-        Some text.
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
-
-        # Another variant is a title with no content.
-        html = b"""
-        <html>
-        <head><title></title></head>
-        <body>
-        <h1>Title</h1>
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(og, {"og:title": "Title", "og:description": "Title"})
-
-    def test_h1_as_title(self) -> None:
-        html = b"""
-        <html>
-        <meta property="og:description" content="Some text."/>
-        <body>
-        <h1>Title</h1>
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
-
-    def test_empty_description(self) -> None:
-        """Description tags with empty content should be ignored."""
-        html = b"""
-        <html>
-        <meta property="og:description" content=""/>
-        <meta property="og:description"/>
-        <meta name="description" content=""/>
-        <meta name="description"/>
-        <meta name="description" content="Finally!"/>
-        <body>
-        <h1>Title</h1>
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(og, {"og:title": "Title", "og:description": "Finally!"})
-
-    def test_missing_title_and_broken_h1(self) -> None:
-        html = b"""
-        <html>
-        <body>
-        <h1><a href="foo"/></h1>
-        Some text.
-        </body>
-        </html>
-        """
-
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-
-        self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
-
-    def test_empty(self) -> None:
-        """Test a body with no data in it."""
-        html = b""
-        tree = decode_body(html, "http://example.com/test.html")
-        self.assertIsNone(tree)
-
-    def test_no_tree(self) -> None:
-        """A valid body with no tree in it."""
-        html = b"\x00"
-        tree = decode_body(html, "http://example.com/test.html")
-        self.assertIsNone(tree)
-
-    def test_xml(self) -> None:
-        """Test decoding XML and ensure it works properly."""
-        # Note that the strip() call is important to ensure the xml tag starts
-        # at the initial byte.
-        html = b"""
-        <?xml version="1.0" encoding="UTF-8"?>
-
-        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-        <head><title>Foo</title></head><body>Some text.</body></html>
-        """.strip()
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
-
-    def test_invalid_encoding(self) -> None:
-        """An invalid character encoding should be ignored and treated as UTF-8, if possible."""
-        html = b"""
-        <html>
-        <head><title>Foo</title></head>
-        <body>
-        Some text.
-        </body>
-        </html>
-        """
-        tree = decode_body(html, "http://example.com/test.html", "invalid-encoding")
-        og = parse_html_to_open_graph(tree)
-        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
-
-    def test_invalid_encoding2(self) -> None:
-        """A body which doesn't match the sent character encoding."""
-        # Note that this contains an invalid UTF-8 sequence in the title.
-        html = b"""
-        <html>
-        <head><title>\xff\xff Foo</title></head>
-        <body>
-        Some text.
-        </body>
-        </html>
-        """
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-        self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})
-
-    def test_windows_1252(self) -> None:
-        """A body which uses cp1252, but doesn't declare that."""
-        html = b"""
-        <html>
-        <head><title>\xf3</title></head>
-        <body>
-        Some text.
-        </body>
-        </html>
-        """
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-        self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."})
-
-    def test_twitter_tag(self) -> None:
-        """Twitter card tags should be used if nothing else is available."""
-        html = b"""
-        <html>
-        <meta name="twitter:card" content="summary">
-        <meta name="twitter:description" content="Description">
-        <meta name="twitter:site" content="@matrixdotorg">
-        </html>
-        """
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-        self.assertEqual(
-            og,
-            {
-                "og:title": None,
-                "og:description": "Description",
-                "og:site_name": "@matrixdotorg",
-            },
-        )
-
-        # But they shouldn't override Open Graph values.
-        html = b"""
-        <html>
-        <meta name="twitter:card" content="summary">
-        <meta name="twitter:description" content="Description">
-        <meta property="og:description" content="Real Description">
-        <meta name="twitter:site" content="@matrixdotorg">
-        <meta property="og:site_name" content="matrix.org">
-        </html>
-        """
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-        self.assertEqual(
-            og,
-            {
-                "og:title": None,
-                "og:description": "Real Description",
-                "og:site_name": "matrix.org",
-            },
-        )
-
-    def test_nested_nodes(self) -> None:
-        """A body with some nested nodes. Tests that we iterate over children
-        in the right order (and don't reverse the order of the text)."""
-        html = b"""
-        <a href="somewhere">Welcome <b>the bold <u>and underlined text <svg>
-        with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
-        """
-        tree = decode_body(html, "http://example.com/test.html")
-        og = parse_html_to_open_graph(tree)
-        self.assertEqual(
-            og,
-            {
-                "og:title": None,
-                "og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text",
-            },
-        )
-
-
-class MediaEncodingTestCase(unittest.TestCase):
-    def test_meta_charset(self) -> None:
-        """A character encoding is found via the meta tag."""
-        encodings = _get_html_media_encodings(
-            b"""
-        <html>
-        <head><meta charset="ascii">
-        </head>
-        </html>
-        """,
-            "text/html",
-        )
-        self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"])
-
-        # A less well-formed version.
-        encodings = _get_html_media_encodings(
-            b"""
-        <html>
-        <head>< meta charset = ascii>
-        </head>
-        </html>
-        """,
-            "text/html",
-        )
-        self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"])
-
-    def test_meta_charset_underscores(self) -> None:
-        """A character encoding contains underscore."""
-        encodings = _get_html_media_encodings(
-            b"""
-        <html>
-        <head><meta charset="Shift_JIS">
-        </head>
-        </html>
-        """,
-            "text/html",
-        )
-        self.assertEqual(list(encodings), ["shift_jis", "utf-8", "cp1252"])
-
-    def test_xml_encoding(self) -> None:
-        """A character encoding is found via the meta tag."""
-        encodings = _get_html_media_encodings(
-            b"""
-        <?xml version="1.0" encoding="ascii"?>
-        <html>
-        </html>
-        """,
-            "text/html",
-        )
-        self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"])
-
-    def test_meta_xml_encoding(self) -> None:
-        """Meta tags take precedence over XML encoding."""
-        encodings = _get_html_media_encodings(
-            b"""
-        <?xml version="1.0" encoding="ascii"?>
-        <html>
-        <head><meta charset="UTF-16">
-        </head>
-        </html>
-        """,
-            "text/html",
-        )
-        self.assertEqual(list(encodings), ["utf-16", "ascii", "utf-8", "cp1252"])
-
-    def test_content_type(self) -> None:
-        """A character encoding is found via the Content-Type header."""
-        # Test a few variations of the header.
-        headers = (
-            'text/html; charset="ascii";',
-            "text/html;charset=ascii;",
-            'text/html;  charset="ascii"',
-            "text/html; charset=ascii",
-            'text/html; charset="ascii;',
-            'text/html; charset=ascii";',
-        )
-        for header in headers:
-            encodings = _get_html_media_encodings(b"", header)
-            self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"])
-
-    def test_fallback(self) -> None:
-        """A character encoding cannot be found in the body or header."""
-        encodings = _get_html_media_encodings(b"", "text/html")
-        self.assertEqual(list(encodings), ["utf-8", "cp1252"])
-
-    def test_duplicates(self) -> None:
-        """Ensure each encoding is only attempted once."""
-        encodings = _get_html_media_encodings(
-            b"""
-        <?xml version="1.0" encoding="utf8"?>
-        <html>
-        <head><meta charset="UTF-8">
-        </head>
-        </html>
-        """,
-            'text/html; charset="UTF_8"',
-        )
-        self.assertEqual(list(encodings), ["utf-8", "cp1252"])
-
-    def test_unknown_invalid(self) -> None:
-        """A character encoding should be ignored if it is unknown or invalid."""
-        encodings = _get_html_media_encodings(
-            b"""
-        <html>
-        <head><meta charset="invalid">
-        </head>
-        </html>
-        """,
-            'text/html; charset="invalid"',
-        )
-        self.assertEqual(list(encodings), ["utf-8", "cp1252"])
diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py
deleted file mode 100644
index 8ed27179c4..0000000000
--- a/tests/rest/media/v1/test_media_storage.py
+++ /dev/null
@@ -1,792 +0,0 @@
-# Copyright 2018-2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import shutil
-import tempfile
-from binascii import unhexlify
-from io import BytesIO
-from typing import Any, BinaryIO, ClassVar, Dict, List, Optional, Tuple, Union
-from unittest.mock import Mock
-from urllib import parse
-
-import attr
-from parameterized import parameterized, parameterized_class
-from PIL import Image as Image
-from typing_extensions import Literal
-
-from twisted.internet import defer
-from twisted.internet.defer import Deferred
-from twisted.test.proto_helpers import MemoryReactor
-
-from synapse.api.errors import Codes
-from synapse.events import EventBase
-from synapse.events.spamcheck import load_legacy_spam_checkers
-from synapse.http.types import QueryParams
-from synapse.logging.context import make_deferred_yieldable
-from synapse.module_api import ModuleApi
-from synapse.rest import admin
-from synapse.rest.client import login
-from synapse.rest.media.v1._base import FileInfo
-from synapse.rest.media.v1.filepath import MediaFilePaths
-from synapse.rest.media.v1.media_storage import MediaStorage, ReadableFileWrapper
-from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend
-from synapse.server import HomeServer
-from synapse.types import JsonDict, RoomAlias
-from synapse.util import Clock
-
-from tests import unittest
-from tests.server import FakeChannel, FakeSite, make_request
-from tests.test_utils import SMALL_PNG
-from tests.utils import default_config
-
-
-class MediaStorageTests(unittest.HomeserverTestCase):
-    needs_threadpool = True
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
-        self.addCleanup(shutil.rmtree, self.test_dir)
-
-        self.primary_base_path = os.path.join(self.test_dir, "primary")
-        self.secondary_base_path = os.path.join(self.test_dir, "secondary")
-
-        hs.config.media.media_store_path = self.primary_base_path
-
-        storage_providers = [FileStorageProviderBackend(hs, self.secondary_base_path)]
-
-        self.filepaths = MediaFilePaths(self.primary_base_path)
-        self.media_storage = MediaStorage(
-            hs, self.primary_base_path, self.filepaths, storage_providers
-        )
-
-    def test_ensure_media_is_in_local_cache(self) -> None:
-        media_id = "some_media_id"
-        test_body = "Test\n"
-
-        # First we create a file that is in a storage provider but not in the
-        # local primary media store
-        rel_path = self.filepaths.local_media_filepath_rel(media_id)
-        secondary_path = os.path.join(self.secondary_base_path, rel_path)
-
-        os.makedirs(os.path.dirname(secondary_path))
-
-        with open(secondary_path, "w") as f:
-            f.write(test_body)
-
-        # Now we run ensure_media_is_in_local_cache, which should copy the file
-        # to the local cache.
-        file_info = FileInfo(None, media_id)
-
-        # This uses a real blocking threadpool so we have to wait for it to be
-        # actually done :/
-        x = defer.ensureDeferred(
-            self.media_storage.ensure_media_is_in_local_cache(file_info)
-        )
-
-        # Hotloop until the threadpool does its job...
-        self.wait_on_thread(x)
-
-        local_path = self.get_success(x)
-
-        self.assertTrue(os.path.exists(local_path))
-
-        # Asserts the file is under the expected local cache directory
-        self.assertEqual(
-            os.path.commonprefix([self.primary_base_path, local_path]),
-            self.primary_base_path,
-        )
-
-        with open(local_path) as f:
-            body = f.read()
-
-        self.assertEqual(test_body, body)
-
-
-@attr.s(auto_attribs=True, slots=True, frozen=True)
-class _TestImage:
-    """An image for testing thumbnailing with the expected results
-
-    Attributes:
-        data: The raw image to thumbnail
-        content_type: The type of the image as a content type, e.g. "image/png"
-        extension: The extension associated with the format, e.g. ".png"
-        expected_cropped: The expected bytes from cropped thumbnailing, or None if
-            test should just check for success.
-        expected_scaled: The expected bytes from scaled thumbnailing, or None if
-            test should just check for a valid image returned.
-        expected_found: True if the file should exist on the server, or False if
-            a 404/400 is expected.
-        unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
-            False if the thumbnailing should succeed or a normal 404 is expected.
-    """
-
-    data: bytes
-    content_type: bytes
-    extension: bytes
-    expected_cropped: Optional[bytes] = None
-    expected_scaled: Optional[bytes] = None
-    expected_found: bool = True
-    unable_to_thumbnail: bool = False
-
-
-@parameterized_class(
-    ("test_image",),
-    [
-        # small png
-        (
-            _TestImage(
-                SMALL_PNG,
-                b"image/png",
-                b".png",
-                unhexlify(
-                    b"89504e470d0a1a0a0000000d4948445200000020000000200806"
-                    b"000000737a7af40000001a49444154789cedc101010000008220"
-                    b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
-                    b"44ae426082"
-                ),
-                unhexlify(
-                    b"89504e470d0a1a0a0000000d4948445200000001000000010806"
-                    b"0000001f15c4890000000d49444154789c636060606000000005"
-                    b"0001a5f645400000000049454e44ae426082"
-                ),
-            ),
-        ),
-        # small png with transparency.
-        (
-            _TestImage(
-                unhexlify(
-                    b"89504e470d0a1a0a0000000d49484452000000010000000101000"
-                    b"00000376ef9240000000274524e5300010194fdae0000000a4944"
-                    b"4154789c636800000082008177cd72b60000000049454e44ae426"
-                    b"082"
-                ),
-                b"image/png",
-                b".png",
-                # Note that we don't check the output since it varies across
-                # different versions of Pillow.
-            ),
-        ),
-        # small lossless webp
-        (
-            _TestImage(
-                unhexlify(
-                    b"524946461a000000574542505650384c0d0000002f0000001007"
-                    b"1011118888fe0700"
-                ),
-                b"image/webp",
-                b".webp",
-            ),
-        ),
-        # an empty file
-        (
-            _TestImage(
-                b"",
-                b"image/gif",
-                b".gif",
-                expected_found=False,
-                unable_to_thumbnail=True,
-            ),
-        ),
-    ],
-)
-class MediaRepoTests(unittest.HomeserverTestCase):
-    test_image: ClassVar[_TestImage]
-    hijack_auth = True
-    user_id = "@test:user"
-
-    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        self.fetches: List[
-            Tuple[
-                "Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]]",
-                str,
-                str,
-                Optional[QueryParams],
-            ]
-        ] = []
-
-        def get_file(
-            destination: str,
-            path: str,
-            output_stream: BinaryIO,
-            args: Optional[QueryParams] = None,
-            retry_on_dns_fail: bool = True,
-            max_size: Optional[int] = None,
-            ignore_backoff: bool = False,
-        ) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]":
-            """A mock for MatrixFederationHttpClient.get_file."""
-
-            def write_to(
-                r: Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]
-            ) -> Tuple[int, Dict[bytes, List[bytes]]]:
-                data, response = r
-                output_stream.write(data)
-                return response
-
-            d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred()
-            self.fetches.append((d, destination, path, args))
-            # Note that this callback changes the value held by d.
-            d_after_callback = d.addCallback(write_to)
-            return make_deferred_yieldable(d_after_callback)
-
-        # Mock out the homeserver's MatrixFederationHttpClient
-        client = Mock()
-        client.get_file = get_file
-
-        self.storage_path = self.mktemp()
-        self.media_store_path = self.mktemp()
-        os.mkdir(self.storage_path)
-        os.mkdir(self.media_store_path)
-
-        config = self.default_config()
-        config["media_store_path"] = self.media_store_path
-        config["max_image_pixels"] = 2000000
-
-        provider_config = {
-            "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend",
-            "store_local": True,
-            "store_synchronous": False,
-            "store_remote": True,
-            "config": {"directory": self.storage_path},
-        }
-        config["media_storage_providers"] = [provider_config]
-
-        hs = self.setup_test_homeserver(config=config, federation_http_client=client)
-
-        return hs
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        media_resource = hs.get_media_repository_resource()
-        self.download_resource = media_resource.children[b"download"]
-        self.thumbnail_resource = media_resource.children[b"thumbnail"]
-        self.store = hs.get_datastores().main
-        self.media_repo = hs.get_media_repository()
-
-        self.media_id = "example.com/12345"
-
-    def _req(
-        self, content_disposition: Optional[bytes], include_content_type: bool = True
-    ) -> FakeChannel:
-        channel = make_request(
-            self.reactor,
-            FakeSite(self.download_resource, self.reactor),
-            "GET",
-            self.media_id,
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        # We've made one fetch, to example.com, using the media URL, and asking
-        # the other server not to do a remote fetch
-        self.assertEqual(len(self.fetches), 1)
-        self.assertEqual(self.fetches[0][1], "example.com")
-        self.assertEqual(
-            self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id
-        )
-        self.assertEqual(self.fetches[0][3], {"allow_remote": "false"})
-
-        headers = {
-            b"Content-Length": [b"%d" % (len(self.test_image.data))],
-        }
-
-        if include_content_type:
-            headers[b"Content-Type"] = [self.test_image.content_type]
-
-        if content_disposition:
-            headers[b"Content-Disposition"] = [content_disposition]
-
-        self.fetches[0][0].callback(
-            (self.test_image.data, (len(self.test_image.data), headers))
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-
-        return channel
-
-    def test_handle_missing_content_type(self) -> None:
-        channel = self._req(
-            b"inline; filename=out" + self.test_image.extension,
-            include_content_type=False,
-        )
-        headers = channel.headers
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(
-            headers.getRawHeaders(b"Content-Type"), [b"application/octet-stream"]
-        )
-
-    def test_disposition_filename_ascii(self) -> None:
-        """
-        If the filename is filename=<ascii> then Synapse will decode it as an
-        ASCII string, and use filename= in the response.
-        """
-        channel = self._req(b"inline; filename=out" + self.test_image.extension)
-
-        headers = channel.headers
-        self.assertEqual(
-            headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
-        )
-        self.assertEqual(
-            headers.getRawHeaders(b"Content-Disposition"),
-            [b"inline; filename=out" + self.test_image.extension],
-        )
-
-    def test_disposition_filenamestar_utf8escaped(self) -> None:
-        """
-        If the filename is filename=*utf8''<utf8 escaped> then Synapse will
-        correctly decode it as the UTF-8 string, and use filename* in the
-        response.
-        """
-        filename = parse.quote("\u2603".encode()).encode("ascii")
-        channel = self._req(
-            b"inline; filename*=utf-8''" + filename + self.test_image.extension
-        )
-
-        headers = channel.headers
-        self.assertEqual(
-            headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
-        )
-        self.assertEqual(
-            headers.getRawHeaders(b"Content-Disposition"),
-            [b"inline; filename*=utf-8''" + filename + self.test_image.extension],
-        )
-
-    def test_disposition_none(self) -> None:
-        """
-        If there is no filename, one isn't passed on in the Content-Disposition
-        of the request.
-        """
-        channel = self._req(None)
-
-        headers = channel.headers
-        self.assertEqual(
-            headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
-        )
-        self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None)
-
-    def test_thumbnail_crop(self) -> None:
-        """Test that a cropped remote thumbnail is available."""
-        self._test_thumbnail(
-            "crop",
-            self.test_image.expected_cropped,
-            expected_found=self.test_image.expected_found,
-            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
-        )
-
-    def test_thumbnail_scale(self) -> None:
-        """Test that a scaled remote thumbnail is available."""
-        self._test_thumbnail(
-            "scale",
-            self.test_image.expected_scaled,
-            expected_found=self.test_image.expected_found,
-            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
-        )
-
-    def test_invalid_type(self) -> None:
-        """An invalid thumbnail type is never available."""
-        self._test_thumbnail(
-            "invalid",
-            None,
-            expected_found=False,
-            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
-        )
-
-    @unittest.override_config(
-        {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "scale"}]}
-    )
-    def test_no_thumbnail_crop(self) -> None:
-        """
-        Override the config to generate only scaled thumbnails, but request a cropped one.
-        """
-        self._test_thumbnail(
-            "crop",
-            None,
-            expected_found=False,
-            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
-        )
-
-    @unittest.override_config(
-        {"thumbnail_sizes": [{"width": 32, "height": 32, "method": "crop"}]}
-    )
-    def test_no_thumbnail_scale(self) -> None:
-        """
-        Override the config to generate only cropped thumbnails, but request a scaled one.
-        """
-        self._test_thumbnail(
-            "scale",
-            None,
-            expected_found=False,
-            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
-        )
-
-    def test_thumbnail_repeated_thumbnail(self) -> None:
-        """Test that fetching the same thumbnail works, and deleting the on disk
-        thumbnail regenerates it.
-        """
-        self._test_thumbnail(
-            "scale",
-            self.test_image.expected_scaled,
-            expected_found=self.test_image.expected_found,
-            unable_to_thumbnail=self.test_image.unable_to_thumbnail,
-        )
-
-        if not self.test_image.expected_found:
-            return
-
-        # Fetching again should work, without re-requesting the image from the
-        # remote.
-        params = "?width=32&height=32&method=scale"
-        channel = make_request(
-            self.reactor,
-            FakeSite(self.thumbnail_resource, self.reactor),
-            "GET",
-            self.media_id + params,
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        self.assertEqual(channel.code, 200)
-        if self.test_image.expected_scaled:
-            self.assertEqual(
-                channel.result["body"],
-                self.test_image.expected_scaled,
-                channel.result["body"],
-            )
-
-        # Deleting the thumbnail on disk then re-requesting it should work as
-        # Synapse should regenerate missing thumbnails.
-        origin, media_id = self.media_id.split("/")
-        info = self.get_success(self.store.get_cached_remote_media(origin, media_id))
-        assert info is not None
-        file_id = info["filesystem_id"]
-
-        thumbnail_dir = self.media_repo.filepaths.remote_media_thumbnail_dir(
-            origin, file_id
-        )
-        shutil.rmtree(thumbnail_dir, ignore_errors=True)
-
-        channel = make_request(
-            self.reactor,
-            FakeSite(self.thumbnail_resource, self.reactor),
-            "GET",
-            self.media_id + params,
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        self.assertEqual(channel.code, 200)
-        if self.test_image.expected_scaled:
-            self.assertEqual(
-                channel.result["body"],
-                self.test_image.expected_scaled,
-                channel.result["body"],
-            )
-
-    def _test_thumbnail(
-        self,
-        method: str,
-        expected_body: Optional[bytes],
-        expected_found: bool,
-        unable_to_thumbnail: bool = False,
-    ) -> None:
-        """Test the given thumbnailing method works as expected.
-
-        Args:
-            method: The thumbnailing method to use (crop, scale).
-            expected_body: The expected bytes from thumbnailing, or None if
-                test should just check for a valid image.
-            expected_found: True if the file should exist on the server, or False if
-                a 404/400 is expected.
-            unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
-                False if the thumbnailing should succeed or a normal 404 is expected.
-        """
-
-        params = "?width=32&height=32&method=" + method
-        channel = make_request(
-            self.reactor,
-            FakeSite(self.thumbnail_resource, self.reactor),
-            "GET",
-            self.media_id + params,
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        headers = {
-            b"Content-Length": [b"%d" % (len(self.test_image.data))],
-            b"Content-Type": [self.test_image.content_type],
-        }
-        self.fetches[0][0].callback(
-            (self.test_image.data, (len(self.test_image.data), headers))
-        )
-        self.pump()
-
-        if expected_found:
-            self.assertEqual(channel.code, 200)
-
-            self.assertEqual(
-                channel.headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
-                [b"cross-origin"],
-            )
-
-            if expected_body is not None:
-                self.assertEqual(
-                    channel.result["body"], expected_body, channel.result["body"]
-                )
-            else:
-                # ensure that the result is at least some valid image
-                Image.open(BytesIO(channel.result["body"]))
-        elif unable_to_thumbnail:
-            # A 400 with a JSON body.
-            self.assertEqual(channel.code, 400)
-            self.assertEqual(
-                channel.json_body,
-                {
-                    "errcode": "M_UNKNOWN",
-                    "error": "Cannot find any thumbnails for the requested media ([b'example.com', b'12345']). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
-                },
-            )
-        else:
-            # A 404 with a JSON body.
-            self.assertEqual(channel.code, 404)
-            self.assertEqual(
-                channel.json_body,
-                {
-                    "errcode": "M_NOT_FOUND",
-                    "error": "Not found [b'example.com', b'12345']",
-                },
-            )
-
-    @parameterized.expand([("crop", 16), ("crop", 64), ("scale", 16), ("scale", 64)])
-    def test_same_quality(self, method: str, desired_size: int) -> None:
-        """Test that choosing between thumbnails with the same quality rating succeeds.
-
-        We are not particular about which thumbnail is chosen."""
-        self.assertIsNotNone(
-            self.thumbnail_resource._select_thumbnail(
-                desired_width=desired_size,
-                desired_height=desired_size,
-                desired_method=method,
-                desired_type=self.test_image.content_type,
-                # Provide two identical thumbnails which are guaranteed to have the same
-                # quality rating.
-                thumbnail_infos=[
-                    {
-                        "thumbnail_width": 32,
-                        "thumbnail_height": 32,
-                        "thumbnail_method": method,
-                        "thumbnail_type": self.test_image.content_type,
-                        "thumbnail_length": 256,
-                        "filesystem_id": f"thumbnail1{self.test_image.extension.decode()}",
-                    },
-                    {
-                        "thumbnail_width": 32,
-                        "thumbnail_height": 32,
-                        "thumbnail_method": method,
-                        "thumbnail_type": self.test_image.content_type,
-                        "thumbnail_length": 256,
-                        "filesystem_id": f"thumbnail2{self.test_image.extension.decode()}",
-                    },
-                ],
-                file_id=f"image{self.test_image.extension.decode()}",
-                url_cache=None,
-                server_name=None,
-            )
-        )
-
-    def test_x_robots_tag_header(self) -> None:
-        """
-        Tests that the `X-Robots-Tag` header is present, which informs web crawlers
-        to not index, archive, or follow links in media.
-        """
-        channel = self._req(b"inline; filename=out" + self.test_image.extension)
-
-        headers = channel.headers
-        self.assertEqual(
-            headers.getRawHeaders(b"X-Robots-Tag"),
-            [b"noindex, nofollow, noarchive, noimageindex"],
-        )
-
-    def test_cross_origin_resource_policy_header(self) -> None:
-        """
-        Test that the Cross-Origin-Resource-Policy header is set to "cross-origin"
-        allowing web clients to embed media from the downloads API.
-        """
-        channel = self._req(b"inline; filename=out" + self.test_image.extension)
-
-        headers = channel.headers
-
-        self.assertEqual(
-            headers.getRawHeaders(b"Cross-Origin-Resource-Policy"),
-            [b"cross-origin"],
-        )
-
-
-class TestSpamCheckerLegacy:
-    """A spam checker module that rejects all media that includes the bytes
-    `evil`.
-
-    Uses the legacy Spam-Checker API.
-    """
-
-    def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None:
-        self.config = config
-        self.api = api
-
-    @staticmethod
-    def parse_config(config: Dict[str, Any]) -> Dict[str, Any]:
-        return config
-
-    async def check_event_for_spam(self, event: EventBase) -> Union[bool, str]:
-        return False  # allow all events
-
-    async def user_may_invite(
-        self,
-        inviter_userid: str,
-        invitee_userid: str,
-        room_id: str,
-    ) -> bool:
-        return True  # allow all invites
-
-    async def user_may_create_room(self, userid: str) -> bool:
-        return True  # allow all room creations
-
-    async def user_may_create_room_alias(
-        self, userid: str, room_alias: RoomAlias
-    ) -> bool:
-        return True  # allow all room aliases
-
-    async def user_may_publish_room(self, userid: str, room_id: str) -> bool:
-        return True  # allow publishing of all rooms
-
-    async def check_media_file_for_spam(
-        self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
-    ) -> bool:
-        buf = BytesIO()
-        await file_wrapper.write_chunks_to(buf.write)
-
-        return b"evil" in buf.getvalue()
-
-
-class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase):
-    servlets = [
-        login.register_servlets,
-        admin.register_servlets,
-    ]
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.user = self.register_user("user", "pass")
-        self.tok = self.login("user", "pass")
-
-        # Allow for uploading and downloading to/from the media repo
-        self.media_repo = hs.get_media_repository_resource()
-        self.download_resource = self.media_repo.children[b"download"]
-        self.upload_resource = self.media_repo.children[b"upload"]
-
-        load_legacy_spam_checkers(hs)
-
-    def default_config(self) -> Dict[str, Any]:
-        config = default_config("test")
-
-        config.update(
-            {
-                "spam_checker": [
-                    {
-                        "module": TestSpamCheckerLegacy.__module__
-                        + ".TestSpamCheckerLegacy",
-                        "config": {},
-                    }
-                ]
-            }
-        )
-
-        return config
-
-    def test_upload_innocent(self) -> None:
-        """Attempt to upload some innocent data that should be allowed."""
-        self.helper.upload_media(
-            self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
-        )
-
-    def test_upload_ban(self) -> None:
-        """Attempt to upload some data that includes bytes "evil", which should
-        get rejected by the spam checker.
-        """
-
-        data = b"Some evil data"
-
-        self.helper.upload_media(
-            self.upload_resource, data, tok=self.tok, expect_code=400
-        )
-
-
-EVIL_DATA = b"Some evil data"
-EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API"
-
-
-class SpamCheckerTestCase(unittest.HomeserverTestCase):
-    servlets = [
-        login.register_servlets,
-        admin.register_servlets,
-    ]
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.user = self.register_user("user", "pass")
-        self.tok = self.login("user", "pass")
-
-        # Allow for uploading and downloading to/from the media repo
-        self.media_repo = hs.get_media_repository_resource()
-        self.download_resource = self.media_repo.children[b"download"]
-        self.upload_resource = self.media_repo.children[b"upload"]
-
-        hs.get_module_api().register_spam_checker_callbacks(
-            check_media_file_for_spam=self.check_media_file_for_spam
-        )
-
-    async def check_media_file_for_spam(
-        self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
-    ) -> Union[Codes, Literal["NOT_SPAM"], Tuple[Codes, JsonDict]]:
-        buf = BytesIO()
-        await file_wrapper.write_chunks_to(buf.write)
-
-        if buf.getvalue() == EVIL_DATA:
-            return Codes.FORBIDDEN
-        elif buf.getvalue() == EVIL_DATA_EXPERIMENT:
-            return (Codes.FORBIDDEN, {})
-        else:
-            return "NOT_SPAM"
-
-    def test_upload_innocent(self) -> None:
-        """Attempt to upload some innocent data that should be allowed."""
-        self.helper.upload_media(
-            self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200
-        )
-
-    def test_upload_ban(self) -> None:
-        """Attempt to upload some data that includes bytes "evil", which should
-        get rejected by the spam checker.
-        """
-
-        self.helper.upload_media(
-            self.upload_resource, EVIL_DATA, tok=self.tok, expect_code=400
-        )
-
-        self.helper.upload_media(
-            self.upload_resource,
-            EVIL_DATA_EXPERIMENT,
-            tok=self.tok,
-            expect_code=400,
-        )
diff --git a/tests/rest/media/v1/test_oembed.py b/tests/rest/media/v1/test_oembed.py
deleted file mode 100644
index 3f7f1dbab9..0000000000
--- a/tests/rest/media/v1/test_oembed.py
+++ /dev/null
@@ -1,162 +0,0 @@
-#  Copyright 2021 The Matrix.org Foundation C.I.C.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import json
-
-from parameterized import parameterized
-
-from twisted.test.proto_helpers import MemoryReactor
-
-from synapse.rest.media.v1.oembed import OEmbedProvider, OEmbedResult
-from synapse.server import HomeServer
-from synapse.types import JsonDict
-from synapse.util import Clock
-
-from tests.unittest import HomeserverTestCase
-
-try:
-    import lxml
-except ImportError:
-    lxml = None
-
-
-class OEmbedTests(HomeserverTestCase):
-    if not lxml:
-        skip = "url preview feature requires lxml"
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.oembed = OEmbedProvider(hs)
-
-    def parse_response(self, response: JsonDict) -> OEmbedResult:
-        return self.oembed.parse_oembed_response(
-            "https://test", json.dumps(response).encode("utf-8")
-        )
-
-    def test_version(self) -> None:
-        """Accept versions that are similar to 1.0 as a string or int (or missing)."""
-        for version in ("1.0", 1.0, 1):
-            result = self.parse_response({"version": version})
-            # An empty Open Graph response is an error, ensure the URL is included.
-            self.assertIn("og:url", result.open_graph_result)
-
-        # A missing version should be treated as 1.0.
-        result = self.parse_response({"type": "link"})
-        self.assertIn("og:url", result.open_graph_result)
-
-        # Invalid versions should be rejected.
-        for version in ("2.0", "1", 1.1, 0, None, {}, []):
-            result = self.parse_response({"version": version, "type": "link"})
-            # An empty Open Graph response is an error, ensure the URL is included.
-            self.assertEqual({}, result.open_graph_result)
-
-    def test_cache_age(self) -> None:
-        """Ensure a cache-age is parsed properly."""
-        # Correct-ish cache ages are allowed.
-        for cache_age in ("1", 1.0, 1):
-            result = self.parse_response({"cache_age": cache_age})
-            self.assertEqual(result.cache_age, 1000)
-
-        # Invalid cache ages are ignored.
-        for cache_age in ("invalid", {}):
-            result = self.parse_response({"cache_age": cache_age})
-            self.assertIsNone(result.cache_age)
-
-        # Cache age is optional.
-        result = self.parse_response({})
-        self.assertIsNone(result.cache_age)
-
-    @parameterized.expand(
-        [
-            ("title", "title"),
-            ("provider_name", "site_name"),
-            ("thumbnail_url", "image"),
-        ],
-        name_func=lambda func, num, p: f"{func.__name__}_{p.args[0]}",
-    )
-    def test_property(self, oembed_property: str, open_graph_property: str) -> None:
-        """Test properties which must be strings."""
-        result = self.parse_response({oembed_property: "test"})
-        self.assertIn(f"og:{open_graph_property}", result.open_graph_result)
-        self.assertEqual(result.open_graph_result[f"og:{open_graph_property}"], "test")
-
-        result = self.parse_response({oembed_property: 1})
-        self.assertNotIn(f"og:{open_graph_property}", result.open_graph_result)
-
-    def test_author_name(self) -> None:
-        """Test the author_name property."""
-        result = self.parse_response({"author_name": "test"})
-        self.assertEqual(result.author_name, "test")
-
-        result = self.parse_response({"author_name": 1})
-        self.assertIsNone(result.author_name)
-
-    def test_rich(self) -> None:
-        """Test a type of rich."""
-        result = self.parse_response({"html": "test<img src='foo'>", "type": "rich"})
-        self.assertIn("og:description", result.open_graph_result)
-        self.assertIn("og:image", result.open_graph_result)
-        self.assertEqual(result.open_graph_result["og:description"], "test")
-        self.assertEqual(result.open_graph_result["og:image"], "foo")
-
-        result = self.parse_response({"type": "rich"})
-        self.assertNotIn("og:description", result.open_graph_result)
-
-        result = self.parse_response({"html": 1, "type": "rich"})
-        self.assertNotIn("og:description", result.open_graph_result)
-
-    def test_photo(self) -> None:
-        """Test a type of photo."""
-        result = self.parse_response({"url": "test", "type": "photo"})
-        self.assertIn("og:image", result.open_graph_result)
-        self.assertEqual(result.open_graph_result["og:image"], "test")
-
-        result = self.parse_response({"type": "photo"})
-        self.assertNotIn("og:image", result.open_graph_result)
-
-        result = self.parse_response({"url": 1, "type": "photo"})
-        self.assertNotIn("og:image", result.open_graph_result)
-
-    def test_video(self) -> None:
-        """Test a type of video."""
-        result = self.parse_response({"html": "test", "type": "video"})
-        self.assertIn("og:type", result.open_graph_result)
-        self.assertEqual(result.open_graph_result["og:type"], "video.other")
-        self.assertIn("og:description", result.open_graph_result)
-        self.assertEqual(result.open_graph_result["og:description"], "test")
-
-        result = self.parse_response({"type": "video"})
-        self.assertIn("og:type", result.open_graph_result)
-        self.assertEqual(result.open_graph_result["og:type"], "video.other")
-        self.assertNotIn("og:description", result.open_graph_result)
-
-        result = self.parse_response({"url": 1, "type": "video"})
-        self.assertIn("og:type", result.open_graph_result)
-        self.assertEqual(result.open_graph_result["og:type"], "video.other")
-        self.assertNotIn("og:description", result.open_graph_result)
-
-    def test_link(self) -> None:
-        """Test type of link."""
-        result = self.parse_response({"type": "link"})
-        self.assertIn("og:type", result.open_graph_result)
-        self.assertEqual(result.open_graph_result["og:type"], "website")
-
-    def test_title_html_entities(self) -> None:
-        """Test HTML entities in title"""
-        result = self.parse_response(
-            {"title": "Why JSON isn&#8217;t a Good Configuration Language"}
-        )
-        self.assertEqual(
-            result.open_graph_result["og:title"],
-            "Why JSON isn’t a Good Configuration Language",
-        )
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
deleted file mode 100644
index 2acfccec61..0000000000
--- a/tests/rest/media/v1/test_url_preview.py
+++ /dev/null
@@ -1,1234 +0,0 @@
-# Copyright 2018 New Vector Ltd
-# Copyright 2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import base64
-import json
-import os
-import re
-from typing import Any, Dict, Optional, Sequence, Tuple, Type
-from urllib.parse import quote, urlencode
-
-from twisted.internet._resolver import HostResolution
-from twisted.internet.address import IPv4Address, IPv6Address
-from twisted.internet.error import DNSLookupError
-from twisted.internet.interfaces import IAddress, IResolutionReceiver
-from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactor
-
-from synapse.config.oembed import OEmbedEndpointConfig
-from synapse.rest.media.v1.media_repository import MediaRepositoryResource
-from synapse.rest.media.v1.preview_url_resource import IMAGE_CACHE_EXPIRY_MS
-from synapse.server import HomeServer
-from synapse.types import JsonDict
-from synapse.util import Clock
-from synapse.util.stringutils import parse_and_validate_mxc_uri
-
-from tests import unittest
-from tests.server import FakeTransport
-from tests.test_utils import SMALL_PNG
-from tests.utils import MockClock
-
-try:
-    import lxml
-except ImportError:
-    lxml = None
-
-
-class URLPreviewTests(unittest.HomeserverTestCase):
-    if not lxml:
-        skip = "url preview feature requires lxml"
-
-    hijack_auth = True
-    user_id = "@test:user"
-    end_content = (
-        b"<html><head>"
-        b'<meta property="og:title" content="~matrix~" />'
-        b'<meta property="og:description" content="hi" />'
-        b"</head></html>"
-    )
-
-    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        config = self.default_config()
-        config["url_preview_enabled"] = True
-        config["max_spider_size"] = 9999999
-        config["url_preview_ip_range_blacklist"] = (
-            "192.168.1.1",
-            "1.0.0.0/8",
-            "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
-            "2001:800::/21",
-        )
-        config["url_preview_ip_range_whitelist"] = ("1.1.1.1",)
-        config["url_preview_accept_language"] = [
-            "en-UK",
-            "en-US;q=0.9",
-            "fr;q=0.8",
-            "*;q=0.7",
-        ]
-
-        self.storage_path = self.mktemp()
-        self.media_store_path = self.mktemp()
-        os.mkdir(self.storage_path)
-        os.mkdir(self.media_store_path)
-        config["media_store_path"] = self.media_store_path
-
-        provider_config = {
-            "module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend",
-            "store_local": True,
-            "store_synchronous": False,
-            "store_remote": True,
-            "config": {"directory": self.storage_path},
-        }
-
-        config["media_storage_providers"] = [provider_config]
-
-        hs = self.setup_test_homeserver(config=config)
-
-        # After the hs is created, modify the parsed oEmbed config (to avoid
-        # messing with files).
-        #
-        # Note that HTTP URLs are used to avoid having to deal with TLS in tests.
-        hs.config.oembed.oembed_patterns = [
-            OEmbedEndpointConfig(
-                api_endpoint="http://publish.twitter.com/oembed",
-                url_patterns=[
-                    re.compile(r"http://twitter\.com/.+/status/.+"),
-                ],
-                formats=None,
-            ),
-            OEmbedEndpointConfig(
-                api_endpoint="http://www.hulu.com/api/oembed.{format}",
-                url_patterns=[
-                    re.compile(r"http://www\.hulu\.com/watch/.+"),
-                ],
-                formats=["json"],
-            ),
-        ]
-
-        return hs
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.media_repo = hs.get_media_repository_resource()
-        self.preview_url = self.media_repo.children[b"preview_url"]
-
-        self.lookups: Dict[str, Any] = {}
-
-        class Resolver:
-            def resolveHostName(
-                _self,
-                resolutionReceiver: IResolutionReceiver,
-                hostName: str,
-                portNumber: int = 0,
-                addressTypes: Optional[Sequence[Type[IAddress]]] = None,
-                transportSemantics: str = "TCP",
-            ) -> IResolutionReceiver:
-                resolution = HostResolution(hostName)
-                resolutionReceiver.resolutionBegan(resolution)
-                if hostName not in self.lookups:
-                    raise DNSLookupError("OH NO")
-
-                for i in self.lookups[hostName]:
-                    resolutionReceiver.addressResolved(i[0]("TCP", i[1], portNumber))
-                resolutionReceiver.resolutionComplete()
-                return resolutionReceiver
-
-        self.reactor.nameResolver = Resolver()  # type: ignore[assignment]
-
-    def create_test_resource(self) -> MediaRepositoryResource:
-        return self.hs.get_media_repository_resource()
-
-    def _assert_small_png(self, json_body: JsonDict) -> None:
-        """Assert properties from the SMALL_PNG test image."""
-        self.assertTrue(json_body["og:image"].startswith("mxc://"))
-        self.assertEqual(json_body["og:image:height"], 1)
-        self.assertEqual(json_body["og:image:width"], 1)
-        self.assertEqual(json_body["og:image:type"], "image/png")
-        self.assertEqual(json_body["matrix:image:size"], 67)
-
-    def test_cache_returns_correct_type(self) -> None:
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
-            % (len(self.end_content),)
-            + self.end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(
-            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
-        )
-
-        # Check the cache returns the correct response
-        channel = self.make_request(
-            "GET", "preview_url?url=http://matrix.org", shorthand=False
-        )
-
-        # Check the cache response has the same content
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(
-            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
-        )
-
-        # Clear the in-memory cache
-        self.assertIn("http://matrix.org", self.preview_url._cache)
-        self.preview_url._cache.pop("http://matrix.org")
-        self.assertNotIn("http://matrix.org", self.preview_url._cache)
-
-        # Check the database cache returns the correct response
-        channel = self.make_request(
-            "GET", "preview_url?url=http://matrix.org", shorthand=False
-        )
-
-        # Check the cache response has the same content
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(
-            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
-        )
-
-    def test_non_ascii_preview_httpequiv(self) -> None:
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        end_content = (
-            b"<html><head>"
-            b'<meta http-equiv="Content-Type" content="text/html; charset=windows-1251"/>'
-            b'<meta property="og:title" content="\xe4\xea\xe0" />'
-            b'<meta property="og:description" content="hi" />'
-            b"</head></html>"
-        )
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
-            )
-            % (len(end_content),)
-            + end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
-
-    def test_video_rejected(self) -> None:
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        end_content = b"anything"
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b"Content-Type: video/mp4\r\n\r\n"
-            )
-            % (len(end_content))
-            + end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 502)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "Requested file's content type not allowed for this operation: video/mp4",
-            },
-        )
-
-    def test_audio_rejected(self) -> None:
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        end_content = b"anything"
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b"Content-Type: audio/aac\r\n\r\n"
-            )
-            % (len(end_content))
-            + end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 502)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "Requested file's content type not allowed for this operation: audio/aac",
-            },
-        )
-
-    def test_non_ascii_preview_content_type(self) -> None:
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        end_content = (
-            b"<html><head>"
-            b'<meta property="og:title" content="\xe4\xea\xe0" />'
-            b'<meta property="og:description" content="hi" />'
-            b"</head></html>"
-        )
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: text/html; charset="windows-1251"\r\n\r\n'
-            )
-            % (len(end_content),)
-            + end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
-
-    def test_overlong_title(self) -> None:
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        end_content = (
-            b"<html><head>"
-            b"<title>" + b"x" * 2000 + b"</title>"
-            b'<meta property="og:description" content="hi" />'
-            b"</head></html>"
-        )
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: text/html; charset="windows-1251"\r\n\r\n'
-            )
-            % (len(end_content),)
-            + end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        res = channel.json_body
-        # We should only see the `og:description` field, as `title` is too long and should be stripped out
-        self.assertCountEqual(["og:description"], res.keys())
-
-    def test_ipaddr(self) -> None:
-        """
-        IP addresses can be previewed directly.
-        """
-        self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://example.com",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
-            % (len(self.end_content),)
-            + self.end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(
-            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
-        )
-
-    def test_blacklisted_ip_specific(self) -> None:
-        """
-        Blacklisted IP addresses, found via DNS, are not spidered.
-        """
-        self.lookups["example.com"] = [(IPv4Address, "192.168.1.1")]
-
-        channel = self.make_request(
-            "GET", "preview_url?url=http://example.com", shorthand=False
-        )
-
-        # No requests made.
-        self.assertEqual(len(self.reactor.tcpClients), 0)
-        self.assertEqual(channel.code, 502)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "DNS resolution failure during URL preview generation",
-            },
-        )
-
-    def test_blacklisted_ip_range(self) -> None:
-        """
-        Blacklisted IP ranges, IPs found over DNS, are not spidered.
-        """
-        self.lookups["example.com"] = [(IPv4Address, "1.1.1.2")]
-
-        channel = self.make_request(
-            "GET", "preview_url?url=http://example.com", shorthand=False
-        )
-
-        self.assertEqual(channel.code, 502)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "DNS resolution failure during URL preview generation",
-            },
-        )
-
-    def test_blacklisted_ip_specific_direct(self) -> None:
-        """
-        Blacklisted IP addresses, accessed directly, are not spidered.
-        """
-        channel = self.make_request(
-            "GET", "preview_url?url=http://192.168.1.1", shorthand=False
-        )
-
-        # No requests made.
-        self.assertEqual(len(self.reactor.tcpClients), 0)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "IP address blocked by IP blacklist entry",
-            },
-        )
-        self.assertEqual(channel.code, 403)
-
-    def test_blacklisted_ip_range_direct(self) -> None:
-        """
-        Blacklisted IP ranges, accessed directly, are not spidered.
-        """
-        channel = self.make_request(
-            "GET", "preview_url?url=http://1.1.1.2", shorthand=False
-        )
-
-        self.assertEqual(channel.code, 403)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "IP address blocked by IP blacklist entry",
-            },
-        )
-
-    def test_blacklisted_ip_range_whitelisted_ip(self) -> None:
-        """
-        Blacklisted but then subsequently whitelisted IP addresses can be
-        spidered.
-        """
-        self.lookups["example.com"] = [(IPv4Address, "1.1.1.1")]
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://example.com",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-
-        client.dataReceived(
-            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
-            % (len(self.end_content),)
-            + self.end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(
-            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
-        )
-
-    def test_blacklisted_ip_with_external_ip(self) -> None:
-        """
-        If a hostname resolves a blacklisted IP, even if there's a
-        non-blacklisted one, it will be rejected.
-        """
-        # Hardcode the URL resolving to the IP we want.
-        self.lookups["example.com"] = [
-            (IPv4Address, "1.1.1.2"),
-            (IPv4Address, "10.1.2.3"),
-        ]
-
-        channel = self.make_request(
-            "GET", "preview_url?url=http://example.com", shorthand=False
-        )
-        self.assertEqual(channel.code, 502)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "DNS resolution failure during URL preview generation",
-            },
-        )
-
-    def test_blacklisted_ipv6_specific(self) -> None:
-        """
-        Blacklisted IP addresses, found via DNS, are not spidered.
-        """
-        self.lookups["example.com"] = [
-            (IPv6Address, "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")
-        ]
-
-        channel = self.make_request(
-            "GET", "preview_url?url=http://example.com", shorthand=False
-        )
-
-        # No requests made.
-        self.assertEqual(len(self.reactor.tcpClients), 0)
-        self.assertEqual(channel.code, 502)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "DNS resolution failure during URL preview generation",
-            },
-        )
-
-    def test_blacklisted_ipv6_range(self) -> None:
-        """
-        Blacklisted IP ranges, IPs found over DNS, are not spidered.
-        """
-        self.lookups["example.com"] = [(IPv6Address, "2001:800::1")]
-
-        channel = self.make_request(
-            "GET", "preview_url?url=http://example.com", shorthand=False
-        )
-
-        self.assertEqual(channel.code, 502)
-        self.assertEqual(
-            channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "DNS resolution failure during URL preview generation",
-            },
-        )
-
-    def test_OPTIONS(self) -> None:
-        """
-        OPTIONS returns the OPTIONS.
-        """
-        channel = self.make_request(
-            "OPTIONS", "preview_url?url=http://example.com", shorthand=False
-        )
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(channel.json_body, {})
-
-    def test_accept_language_config_option(self) -> None:
-        """
-        Accept-Language header is sent to the remote server
-        """
-        self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
-
-        # Build and make a request to the server
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://example.com",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        # Extract Synapse's tcp client
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-
-        # Build a fake remote server to reply with
-        server = AccumulatingProtocol()
-
-        # Connect the two together
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-
-        # Tell Synapse that it has received some data from the remote server
-        client.dataReceived(
-            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
-            % (len(self.end_content),)
-            + self.end_content
-        )
-
-        # Move the reactor along until we get a response on our original channel
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(
-            channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
-        )
-
-        # Check that the server received the Accept-Language header as part
-        # of the request from Synapse
-        self.assertIn(
-            (
-                b"Accept-Language: en-UK\r\n"
-                b"Accept-Language: en-US;q=0.9\r\n"
-                b"Accept-Language: fr;q=0.8\r\n"
-                b"Accept-Language: *;q=0.7"
-            ),
-            server.data,
-        )
-
-    def test_nonexistent_image(self) -> None:
-        """If the preview image doesn't exist, ensure some data is returned."""
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        result = (
-            b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
-        )
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
-            )
-            % (len(result),)
-            + result
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-
-        # The image should not be in the result.
-        self.assertNotIn("og:image", channel.json_body)
-
-    def test_oembed_failure(self) -> None:
-        """If the autodiscovered oEmbed URL fails, ensure some data is returned."""
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        result = b"""
-        <title>oEmbed Autodiscovery Fail</title>
-        <link rel="alternate" type="application/json+oembed"
-            href="http://example.com/oembed?url=http%3A%2F%2Fmatrix.org&format=json"
-            title="matrixdotorg" />
-        """
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
-            )
-            % (len(result),)
-            + result
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-
-        # The image should not be in the result.
-        self.assertEqual(channel.json_body["og:title"], "oEmbed Autodiscovery Fail")
-
-    def test_data_url(self) -> None:
-        """
-        Requesting to preview a data URL is not supported.
-        """
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        data = base64.b64encode(SMALL_PNG).decode()
-
-        query_params = urlencode(
-            {
-                "url": f'<html><head><img src="data:image/png;base64,{data}" /></head></html>'
-            }
-        )
-
-        channel = self.make_request(
-            "GET",
-            f"preview_url?{query_params}",
-            shorthand=False,
-        )
-        self.pump()
-
-        self.assertEqual(channel.code, 500)
-
-    def test_inline_data_url(self) -> None:
-        """
-        An inline image (as a data URL) should be parsed properly.
-        """
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        data = base64.b64encode(SMALL_PNG)
-
-        end_content = (
-            b"<html><head>" b'<img src="data:image/png;base64,%s" />' b"</head></html>"
-        ) % (data,)
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://matrix.org",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
-            )
-            % (len(end_content),)
-            + end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        self._assert_small_png(channel.json_body)
-
-    def test_oembed_photo(self) -> None:
-        """Test an oEmbed endpoint which returns a 'photo' type which redirects the preview to a new URL."""
-        self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
-        self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
-
-        result = {
-            "version": "1.0",
-            "type": "photo",
-            "url": "http://cdn.twitter.com/matrixdotorg",
-        }
-        oembed_content = json.dumps(result).encode("utf-8")
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://twitter.com/matrixdotorg/status/12345",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
-            )
-            % (len(oembed_content),)
-            + oembed_content
-        )
-
-        self.pump()
-
-        # Ensure a second request is made to the photo URL.
-        client = self.reactor.tcpClients[1][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b"Content-Type: image/png\r\n\r\n"
-            )
-            % (len(SMALL_PNG),)
-            + SMALL_PNG
-        )
-
-        self.pump()
-
-        # Ensure the URL is what was requested.
-        self.assertIn(b"/matrixdotorg", server.data)
-
-        self.assertEqual(channel.code, 200)
-        body = channel.json_body
-        self.assertEqual(body["og:url"], "http://twitter.com/matrixdotorg/status/12345")
-        self._assert_small_png(body)
-
-    def test_oembed_rich(self) -> None:
-        """Test an oEmbed endpoint which returns HTML content via the 'rich' type."""
-        self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
-
-        result = {
-            "version": "1.0",
-            "type": "rich",
-            # Note that this provides the author, not the title.
-            "author_name": "Alice",
-            "html": "<div>Content Preview</div>",
-        }
-        end_content = json.dumps(result).encode("utf-8")
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://twitter.com/matrixdotorg/status/12345",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
-            )
-            % (len(end_content),)
-            + end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        body = channel.json_body
-        self.assertEqual(
-            body,
-            {
-                "og:url": "http://twitter.com/matrixdotorg/status/12345",
-                "og:title": "Alice",
-                "og:description": "Content Preview",
-            },
-        )
-
-    def test_oembed_format(self) -> None:
-        """Test an oEmbed endpoint which requires the format in the URL."""
-        self.lookups["www.hulu.com"] = [(IPv4Address, "10.1.2.3")]
-
-        result = {
-            "version": "1.0",
-            "type": "rich",
-            "html": "<div>Content Preview</div>",
-        }
-        end_content = json.dumps(result).encode("utf-8")
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://www.hulu.com/watch/12345",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
-            )
-            % (len(end_content),)
-            + end_content
-        )
-
-        self.pump()
-
-        # The {format} should have been turned into json.
-        self.assertIn(b"/api/oembed.json", server.data)
-        # A URL parameter of format=json should be provided.
-        self.assertIn(b"format=json", server.data)
-
-        self.assertEqual(channel.code, 200)
-        body = channel.json_body
-        self.assertEqual(
-            body,
-            {
-                "og:url": "http://www.hulu.com/watch/12345",
-                "og:description": "Content Preview",
-            },
-        )
-
-    def test_oembed_autodiscovery(self) -> None:
-        """
-        Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL.
-        1. Request a preview of a URL which is not known to the oEmbed code.
-        2. It returns HTML including a link to an oEmbed preview.
-        3. The oEmbed preview is requested and returns a URL for an image.
-        4. The image is requested for thumbnailing.
-        """
-        # This is a little cheesy in that we use the www subdomain (which isn't the
-        # list of oEmbed patterns) to get "raw" HTML response.
-        self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")]
-        self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
-        self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
-
-        result = b"""
-        <link rel="alternate" type="application/json+oembed"
-            href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json"
-            title="matrixdotorg" />
-        """
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://www.twitter.com/matrixdotorg/status/12345",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
-            )
-            % (len(result),)
-            + result
-        )
-
-        self.pump()
-
-        # The oEmbed response.
-        result2 = {
-            "version": "1.0",
-            "type": "photo",
-            "url": "http://cdn.twitter.com/matrixdotorg",
-        }
-        oembed_content = json.dumps(result2).encode("utf-8")
-
-        # Ensure a second request is made to the oEmbed URL.
-        client = self.reactor.tcpClients[1][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
-            )
-            % (len(oembed_content),)
-            + oembed_content
-        )
-
-        self.pump()
-
-        # Ensure the URL is what was requested.
-        self.assertIn(b"/oembed?", server.data)
-
-        # Ensure a third request is made to the photo URL.
-        client = self.reactor.tcpClients[2][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            (
-                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b"Content-Type: image/png\r\n\r\n"
-            )
-            % (len(SMALL_PNG),)
-            + SMALL_PNG
-        )
-
-        self.pump()
-
-        # Ensure the URL is what was requested.
-        self.assertIn(b"/matrixdotorg", server.data)
-
-        self.assertEqual(channel.code, 200)
-        body = channel.json_body
-        self.assertEqual(
-            body["og:url"], "http://www.twitter.com/matrixdotorg/status/12345"
-        )
-        self._assert_small_png(body)
-
-    def _download_image(self) -> Tuple[str, str]:
-        """Downloads an image into the URL cache.
-        Returns:
-            A (host, media_id) tuple representing the MXC URI of the image.
-        """
-        self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=http://cdn.twitter.com/matrixdotorg",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: image/png\r\n\r\n"
-            % (len(SMALL_PNG),)
-            + SMALL_PNG
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-        body = channel.json_body
-        mxc_uri = body["og:image"]
-        host, _port, media_id = parse_and_validate_mxc_uri(mxc_uri)
-        self.assertIsNone(_port)
-        return host, media_id
-
-    def test_storage_providers_exclude_files(self) -> None:
-        """Test that files are not stored in or fetched from storage providers."""
-        host, media_id = self._download_image()
-
-        rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
-        media_store_path = os.path.join(self.media_store_path, rel_file_path)
-        storage_provider_path = os.path.join(self.storage_path, rel_file_path)
-
-        # Check storage
-        self.assertTrue(os.path.isfile(media_store_path))
-        self.assertFalse(
-            os.path.isfile(storage_provider_path),
-            "URL cache file was unexpectedly stored in a storage provider",
-        )
-
-        # Check fetching
-        channel = self.make_request(
-            "GET",
-            f"download/{host}/{media_id}",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-        self.assertEqual(channel.code, 200)
-
-        # Move cached file into the storage provider
-        os.makedirs(os.path.dirname(storage_provider_path), exist_ok=True)
-        os.rename(media_store_path, storage_provider_path)
-
-        channel = self.make_request(
-            "GET",
-            f"download/{host}/{media_id}",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-        self.assertEqual(
-            channel.code,
-            404,
-            "URL cache file was unexpectedly retrieved from a storage provider",
-        )
-
-    def test_storage_providers_exclude_thumbnails(self) -> None:
-        """Test that thumbnails are not stored in or fetched from storage providers."""
-        host, media_id = self._download_image()
-
-        rel_thumbnail_path = (
-            self.preview_url.filepaths.url_cache_thumbnail_directory_rel(media_id)
-        )
-        media_store_thumbnail_path = os.path.join(
-            self.media_store_path, rel_thumbnail_path
-        )
-        storage_provider_thumbnail_path = os.path.join(
-            self.storage_path, rel_thumbnail_path
-        )
-
-        # Check storage
-        self.assertTrue(os.path.isdir(media_store_thumbnail_path))
-        self.assertFalse(
-            os.path.isdir(storage_provider_thumbnail_path),
-            "URL cache thumbnails were unexpectedly stored in a storage provider",
-        )
-
-        # Check fetching
-        channel = self.make_request(
-            "GET",
-            f"thumbnail/{host}/{media_id}?width=32&height=32&method=scale",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-        self.assertEqual(channel.code, 200)
-
-        # Remove the original, otherwise thumbnails will regenerate
-        rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
-        media_store_path = os.path.join(self.media_store_path, rel_file_path)
-        os.remove(media_store_path)
-
-        # Move cached thumbnails into the storage provider
-        os.makedirs(os.path.dirname(storage_provider_thumbnail_path), exist_ok=True)
-        os.rename(media_store_thumbnail_path, storage_provider_thumbnail_path)
-
-        channel = self.make_request(
-            "GET",
-            f"thumbnail/{host}/{media_id}?width=32&height=32&method=scale",
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-        self.assertEqual(
-            channel.code,
-            404,
-            "URL cache thumbnail was unexpectedly retrieved from a storage provider",
-        )
-
-    def test_cache_expiry(self) -> None:
-        """Test that URL cache files and thumbnails are cleaned up properly on expiry."""
-        self.preview_url.clock = MockClock()
-
-        _host, media_id = self._download_image()
-
-        file_path = self.preview_url.filepaths.url_cache_filepath(media_id)
-        file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete(
-            media_id
-        )
-        thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory(
-            media_id
-        )
-        thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete(
-            media_id
-        )
-
-        self.assertTrue(os.path.isfile(file_path))
-        self.assertTrue(os.path.isdir(thumbnail_dir))
-
-        self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1)
-        self.get_success(self.preview_url._expire_url_cache_data())
-
-        for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
-            self.assertFalse(
-                os.path.exists(path),
-                f"{os.path.relpath(path, self.media_store_path)} was not deleted",
-            )
-
-    @unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]})
-    def test_blacklist_port(self) -> None:
-        """Tests that blacklisting URLs with a port makes previewing such URLs
-        fail with a 403 error and doesn't impact other previews.
-        """
-        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
-
-        bad_url = quote("http://matrix.org:8888/foo")
-        good_url = quote("http://matrix.org/foo")
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=" + bad_url,
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-        self.assertEqual(channel.code, 403, channel.result)
-
-        channel = self.make_request(
-            "GET",
-            "preview_url?url=" + good_url,
-            shorthand=False,
-            await_result=False,
-        )
-        self.pump()
-
-        client = self.reactor.tcpClients[0][2].buildProtocol(None)
-        server = AccumulatingProtocol()
-        server.makeConnection(FakeTransport(client, self.reactor))
-        client.makeConnection(FakeTransport(server, self.reactor))
-        client.dataReceived(
-            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
-            % (len(self.end_content),)
-            + self.end_content
-        )
-
-        self.pump()
-        self.assertEqual(channel.code, 200)
-- 
cgit 1.5.1


From b40657314e03583f45ad49504711698a70735313 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 27 Feb 2023 14:19:19 +0000
Subject: Add module API callbacks for adding and deleting local 3PID
 associations (#15044

---
 changelog.d/15044.feature                      |   1 +
 docs/modules/third_party_rules_callbacks.md    |  45 ++++++++-
 docs/upgrade.md                                |  24 +++++
 synapse/events/third_party_rules.py            |  63 +++++++++++++
 synapse/handlers/auth.py                       |  49 ++++++----
 synapse/handlers/deactivate_account.py         |  20 ++--
 synapse/module_api/__init__.py                 |  10 ++
 synapse/rest/admin/users.py                    |  11 ++-
 synapse/rest/client/account.py                 |   9 +-
 synapse/storage/databases/main/registration.py |  13 ---
 tests/push/test_email.py                       |   6 +-
 tests/rest/client/test_third_party_rules.py    | 121 +++++++++++++++++++++++++
 12 files changed, 324 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/15044.feature

(limited to 'synapse')

diff --git a/changelog.d/15044.feature b/changelog.d/15044.feature
new file mode 100644
index 0000000000..91e5cda8c3
--- /dev/null
+++ b/changelog.d/15044.feature
@@ -0,0 +1 @@
+Add two new Third Party Rules module API callbacks: [`on_add_user_third_party_identifier`](https://matrix-org.github.io/synapse/v1.79/modules/third_party_rules_callbacks.html#on_add_user_third_party_identifier) and [`on_remove_user_third_party_identifier`](https://matrix-org.github.io/synapse/v1.79/modules/third_party_rules_callbacks.html#on_remove_user_third_party_identifier).
\ No newline at end of file
diff --git a/docs/modules/third_party_rules_callbacks.md b/docs/modules/third_party_rules_callbacks.md
index 888e43bd10..4a27d976fb 100644
--- a/docs/modules/third_party_rules_callbacks.md
+++ b/docs/modules/third_party_rules_callbacks.md
@@ -254,6 +254,11 @@ If multiple modules implement this callback, Synapse runs them all in order.
 
 _First introduced in Synapse v1.56.0_
 
+**<span style="color:red">
+This callback is deprecated in favour of the `on_add_user_third_party_identifier` callback, which
+features the same functionality. The only difference is in name.
+</span>**
+
 ```python
 async def on_threepid_bind(user_id: str, medium: str, address: str) -> None:
 ```
@@ -268,6 +273,44 @@ server_.
 
 If multiple modules implement this callback, Synapse runs them all in order.
 
+### `on_add_user_third_party_identifier`
+
+_First introduced in Synapse v1.79.0_
+
+```python
+async def on_add_user_third_party_identifier(user_id: str, medium: str, address: str) -> None:
+```
+
+Called after successfully creating an association between a user and a third-party identifier
+(email address, phone number). The module is given the Matrix ID of the user the
+association is for, as well as the medium (`email` or `msisdn`) and address of the
+third-party identifier (i.e. an email address).
+
+Note that this callback is _not_ called if a user attempts to bind their third-party identifier
+to an identity server (via a call to [`POST
+/_matrix/client/v3/account/3pid/bind`](https://spec.matrix.org/v1.5/client-server-api/#post_matrixclientv3account3pidbind)).
+
+If multiple modules implement this callback, Synapse runs them all in order.
+
+### `on_remove_user_third_party_identifier`
+
+_First introduced in Synapse v1.79.0_
+
+```python
+async def on_remove_user_third_party_identifier(user_id: str, medium: str, address: str) -> None:
+```
+
+Called after successfully removing an association between a user and a third-party identifier
+(email address, phone number). The module is given the Matrix ID of the user the
+association is for, as well as the medium (`email` or `msisdn`) and address of the
+third-party identifier (i.e. an email address).
+
+Note that this callback is _not_ called if a user attempts to unbind their third-party
+identifier from an identity server (via a call to [`POST
+/_matrix/client/v3/account/3pid/unbind`](https://spec.matrix.org/v1.5/client-server-api/#post_matrixclientv3account3pidunbind)).
+
+If multiple modules implement this callback, Synapse runs them all in order.
+
 ## Example
 
 The example below is a module that implements the third-party rules callback
@@ -300,4 +343,4 @@ class EventCensorer:
         )
         event_dict["content"] = new_event_content
         return event_dict
-```
+```
\ No newline at end of file
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 15167b8c58..f06e874054 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,30 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.79.0
+
+## The `on_threepid_bind` module callback method has been deprecated
+
+Synapse v1.79.0 deprecates the
+[`on_threepid_bind`](modules/third_party_rules_callbacks.md#on_threepid_bind)
+"third-party rules" Synapse module callback method in favour of a new module method,
+[`on_add_user_third_party_identifier`](modules/third_party_rules_callbacks.md#on_add_user_third_party_identifier).
+`on_threepid_bind` will be removed in a future version of Synapse. You should check whether any Synapse
+modules in use in your deployment are making use of `on_threepid_bind`, and update them where possible.
+
+The arguments and functionality of the new method are the same.
+
+The justification behind the name change is that the old method's name, `on_threepid_bind`, was
+misleading. A user is considered to "bind" their third-party ID to their Matrix ID only if they
+do so via an [identity server](https://spec.matrix.org/latest/identity-service-api/)
+(so that users on other homeservers may find them). But this method was not called in that case -
+it was only called when a user added a third-party identifier on the local homeserver.
+
+Module developers may also be interested in the related
+[`on_remove_user_third_party_identifier`](modules/third_party_rules_callbacks.md#on_remove_user_third_party_identifier)
+module callback method that was also added in Synapse v1.79.0. This new method is called when a
+user removes a third-party identifier from their account.
+
 # Upgrading to v1.78.0
 
 ## Deprecate the `/_synapse/admin/v1/media/<server_name>/delete` admin API
diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py
index 9a25ed419b..3e4d52c8d8 100644
--- a/synapse/events/third_party_rules.py
+++ b/synapse/events/third_party_rules.py
@@ -45,6 +45,8 @@ CHECK_CAN_DEACTIVATE_USER_CALLBACK = Callable[[str, bool], Awaitable[bool]]
 ON_PROFILE_UPDATE_CALLBACK = Callable[[str, ProfileInfo, bool, bool], Awaitable]
 ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK = Callable[[str, bool, bool], Awaitable]
 ON_THREEPID_BIND_CALLBACK = Callable[[str, str, str], Awaitable]
+ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK = Callable[[str, str, str], Awaitable]
+ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK = Callable[[str, str, str], Awaitable]
 
 
 def load_legacy_third_party_event_rules(hs: "HomeServer") -> None:
@@ -172,6 +174,12 @@ class ThirdPartyEventRules:
             ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK
         ] = []
         self._on_threepid_bind_callbacks: List[ON_THREEPID_BIND_CALLBACK] = []
+        self._on_add_user_third_party_identifier_callbacks: List[
+            ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = []
+        self._on_remove_user_third_party_identifier_callbacks: List[
+            ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = []
 
     def register_third_party_rules_callbacks(
         self,
@@ -191,6 +199,12 @@ class ThirdPartyEventRules:
             ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK
         ] = None,
         on_threepid_bind: Optional[ON_THREEPID_BIND_CALLBACK] = None,
+        on_add_user_third_party_identifier: Optional[
+            ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = None,
+        on_remove_user_third_party_identifier: Optional[
+            ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = None,
     ) -> None:
         """Register callbacks from modules for each hook."""
         if check_event_allowed is not None:
@@ -228,6 +242,11 @@ class ThirdPartyEventRules:
         if on_threepid_bind is not None:
             self._on_threepid_bind_callbacks.append(on_threepid_bind)
 
+        if on_add_user_third_party_identifier is not None:
+            self._on_add_user_third_party_identifier_callbacks.append(
+                on_add_user_third_party_identifier
+            )
+
     async def check_event_allowed(
         self,
         event: EventBase,
@@ -511,6 +530,9 @@ class ThirdPartyEventRules:
         local homeserver, not when it's created on an identity server (and then kept track
         of so that it can be unbound on the same IS later on).
 
+        THIS MODULE CALLBACK METHOD HAS BEEN DEPRECATED. Please use the
+        `on_add_user_third_party_identifier` callback method instead.
+
         Args:
             user_id: the user being associated with the threepid.
             medium: the threepid's medium.
@@ -523,3 +545,44 @@ class ThirdPartyEventRules:
                 logger.exception(
                     "Failed to run module API callback %s: %s", callback, e
                 )
+
+    async def on_add_user_third_party_identifier(
+        self, user_id: str, medium: str, address: str
+    ) -> None:
+        """Called when an association between a user's Matrix ID and a third-party ID
+        (email, phone number) has successfully been registered on the homeserver.
+
+        Args:
+            user_id: The User ID included in the association.
+            medium: The medium of the third-party ID (email, msisdn).
+            address: The address of the third-party ID (i.e. an email address).
+        """
+        for callback in self._on_add_user_third_party_identifier_callbacks:
+            try:
+                await callback(user_id, medium, address)
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
+
+    async def on_remove_user_third_party_identifier(
+        self, user_id: str, medium: str, address: str
+    ) -> None:
+        """Called when an association between a user's Matrix ID and a third-party ID
+        (email, phone number) has been successfully removed on the homeserver.
+
+        This is called *after* any known bindings on identity servers for this
+        association have been removed.
+
+        Args:
+            user_id: The User ID included in the removed association.
+            medium: The medium of the third-party ID (email, msisdn).
+            address: The address of the third-party ID (i.e. an email address).
+        """
+        for callback in self._on_remove_user_third_party_identifier_callbacks:
+            try:
+                await callback(user_id, medium, address)
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index b12bc4c9a3..308e38edea 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -1542,6 +1542,17 @@ class AuthHandler:
     async def add_threepid(
         self, user_id: str, medium: str, address: str, validated_at: int
     ) -> None:
+        """
+        Adds an association between a user's Matrix ID and a third-party ID (email,
+        phone number).
+
+        Args:
+            user_id: The ID of the user to associate.
+            medium: The medium of the third-party ID (email, msisdn).
+            address: The address of the third-party ID (i.e. an email address).
+            validated_at: The timestamp in ms of when the validation that the user owns
+                this third-party ID occurred.
+        """
         # check if medium has a valid value
         if medium not in ["email", "msisdn"]:
             raise SynapseError(
@@ -1566,42 +1577,44 @@ class AuthHandler:
             user_id, medium, address, validated_at, self.hs.get_clock().time_msec()
         )
 
+        # Inform Synapse modules that a 3PID association has been created.
+        await self._third_party_rules.on_add_user_third_party_identifier(
+            user_id, medium, address
+        )
+
+        # Deprecated method for informing Synapse modules that a 3PID association
+        # has successfully been created.
         await self._third_party_rules.on_threepid_bind(user_id, medium, address)
 
-    async def delete_threepid(
-        self, user_id: str, medium: str, address: str, id_server: Optional[str] = None
-    ) -> bool:
-        """Attempts to unbind the 3pid on the identity servers and deletes it
-        from the local database.
+    async def delete_local_threepid(
+        self, user_id: str, medium: str, address: str
+    ) -> None:
+        """Deletes an association between a third-party ID and a user ID from the local
+        database. This method does not unbind the association from any identity servers.
+
+        If `medium` is 'email' and a pusher is associated with this third-party ID, the
+        pusher will also be deleted.
 
         Args:
             user_id: ID of user to remove the 3pid from.
             medium: The medium of the 3pid being removed: "email" or "msisdn".
             address: The 3pid address to remove.
-            id_server: Use the given identity server when unbinding
-                any threepids. If None then will attempt to unbind using the
-                identity server specified when binding (if known).
-
-        Returns:
-            Returns True if successfully unbound the 3pid on
-            the identity server, False if identity server doesn't support the
-            unbind API.
         """
-
         # 'Canonicalise' email addresses as per above
         if medium == "email":
             address = canonicalise_email(address)
 
-        result = await self.hs.get_identity_handler().try_unbind_threepid(
-            user_id, medium, address, id_server
+        await self.store.user_delete_threepid(user_id, medium, address)
+
+        # Inform Synapse modules that a 3PID association has been deleted.
+        await self._third_party_rules.on_remove_user_third_party_identifier(
+            user_id, medium, address
         )
 
-        await self.store.user_delete_threepid(user_id, medium, address)
         if medium == "email":
             await self.store.delete_pusher_by_app_id_pushkey_user_id(
                 app_id="m.email", pushkey=address, user_id=user_id
             )
-        return result
 
     async def hash(self, password: str) -> str:
         """Computes a secure hash of password.
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index d24f649382..d31263c717 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -100,26 +100,28 @@ class DeactivateAccountHandler:
         # unbinding
         identity_server_supports_unbinding = True
 
-        # Retrieve the 3PIDs this user has bound to an identity server
-        threepids = await self.store.user_get_bound_threepids(user_id)
-
-        for threepid in threepids:
+        # Attempt to unbind any known bound threepids to this account from identity
+        # server(s).
+        bound_threepids = await self.store.user_get_bound_threepids(user_id)
+        for threepid in bound_threepids:
             try:
                 result = await self._identity_handler.try_unbind_threepid(
                     user_id, threepid["medium"], threepid["address"], id_server
                 )
-                identity_server_supports_unbinding &= result
             except Exception:
                 # Do we want this to be a fatal error or should we carry on?
                 logger.exception("Failed to remove threepid from ID server")
                 raise SynapseError(400, "Failed to remove threepid from ID server")
-            await self.store.user_delete_threepid(
+
+            identity_server_supports_unbinding &= result
+
+        # Remove any local threepid associations for this account.
+        local_threepids = await self.store.user_get_threepids(user_id)
+        for threepid in local_threepids:
+            await self._auth_handler.delete_local_threepid(
                 user_id, threepid["medium"], threepid["address"]
             )
 
-        # Remove all 3PIDs this user has bound to the homeserver
-        await self.store.user_delete_threepids(user_id)
-
         # delete any devices belonging to the user, which will also
         # delete corresponding access tokens.
         await self._device_handler.delete_all_devices_for_user(user_id)
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 1964276a54..424239e3df 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -64,9 +64,11 @@ from synapse.events.third_party_rules import (
     CHECK_EVENT_ALLOWED_CALLBACK,
     CHECK_THREEPID_CAN_BE_INVITED_CALLBACK,
     CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK,
+    ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK,
     ON_CREATE_ROOM_CALLBACK,
     ON_NEW_EVENT_CALLBACK,
     ON_PROFILE_UPDATE_CALLBACK,
+    ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK,
     ON_THREEPID_BIND_CALLBACK,
     ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK,
 )
@@ -357,6 +359,12 @@ class ModuleApi:
             ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK
         ] = None,
         on_threepid_bind: Optional[ON_THREEPID_BIND_CALLBACK] = None,
+        on_add_user_third_party_identifier: Optional[
+            ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = None,
+        on_remove_user_third_party_identifier: Optional[
+            ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = None,
     ) -> None:
         """Registers callbacks for third party event rules capabilities.
 
@@ -373,6 +381,8 @@ class ModuleApi:
             on_profile_update=on_profile_update,
             on_user_deactivation_status_changed=on_user_deactivation_status_changed,
             on_threepid_bind=on_threepid_bind,
+            on_add_user_third_party_identifier=on_add_user_third_party_identifier,
+            on_remove_user_third_party_identifier=on_remove_user_third_party_identifier,
         )
 
     def register_presence_router_callbacks(
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 7cc4db20d6..357e9a574d 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -304,13 +304,20 @@ class UserRestServletV2(RestServlet):
                 # remove old threepids
                 for medium, address in del_threepids:
                     try:
-                        await self.auth_handler.delete_threepid(
-                            user_id, medium, address, None
+                        # Attempt to remove any known bindings of this third-party ID
+                        # and user ID from identity servers.
+                        await self.hs.get_identity_handler().try_unbind_threepid(
+                            user_id, medium, address, id_server=None
                         )
                     except Exception:
                         logger.exception("Failed to remove threepids")
                         raise SynapseError(500, "Failed to remove threepids")
 
+                    # Delete the local association of this user ID and third-party ID.
+                    await self.auth_handler.delete_local_threepid(
+                        user_id, medium, address
+                    )
+
                 # add new threepids
                 current_time = self.hs.get_clock().time_msec()
                 for medium, address in add_threepids:
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 662f5bf762..484d7440a4 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -768,7 +768,9 @@ class ThreepidDeleteRestServlet(RestServlet):
         user_id = requester.user.to_string()
 
         try:
-            ret = await self.auth_handler.delete_threepid(
+            # Attempt to remove any known bindings of this third-party ID
+            # and user ID from identity servers.
+            ret = await self.hs.get_identity_handler().try_unbind_threepid(
                 user_id, body.medium, body.address, body.id_server
             )
         except Exception:
@@ -783,6 +785,11 @@ class ThreepidDeleteRestServlet(RestServlet):
         else:
             id_server_unbind_result = "no-support"
 
+        # Delete the local association of this user ID and third-party ID.
+        await self.auth_handler.delete_local_threepid(
+            user_id, body.medium, body.address
+        )
+
         return 200, {"id_server_unbind_result": id_server_unbind_result}
 
 
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 9a55e17624..717237e024 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -1002,19 +1002,6 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             desc="user_delete_threepid",
         )
 
-    async def user_delete_threepids(self, user_id: str) -> None:
-        """Delete all threepid this user has bound
-
-        Args:
-             user_id: The user id to delete all threepids of
-
-        """
-        await self.db_pool.simple_delete(
-            "user_threepids",
-            keyvalues={"user_id": user_id},
-            desc="user_delete_threepids",
-        )
-
     async def add_user_bound_threepid(
         self, user_id: str, medium: str, address: str, id_server: str
     ) -> None:
diff --git a/tests/push/test_email.py b/tests/push/test_email.py
index 0a3aca5c50..4ea5472eb4 100644
--- a/tests/push/test_email.py
+++ b/tests/push/test_email.py
@@ -369,10 +369,8 @@ class EmailPusherTests(HomeserverTestCase):
 
         # disassociate the user's email address
         self.get_success(
-            self.auth_handler.delete_threepid(
-                user_id=self.user_id,
-                medium="email",
-                address="a@example.com",
+            self.auth_handler.delete_local_threepid(
+                user_id=self.user_id, medium="email", address="a@example.com"
             )
         )
 
diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py
index c0f93f898a..3b99513707 100644
--- a/tests/rest/client/test_third_party_rules.py
+++ b/tests/rest/client/test_third_party_rules.py
@@ -934,3 +934,124 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
 
         # Check that the mock was called with the right parameters
         self.assertEqual(args, (user_id, "email", "foo@example.com"))
+
+    def test_on_add_and_remove_user_third_party_identifier(self) -> None:
+        """Tests that the on_add_user_third_party_identifier and
+        on_remove_user_third_party_identifier module callbacks are called
+        just before associating and removing a 3PID to/from an account.
+        """
+        # Pretend to be a Synapse module and register both callbacks as mocks.
+        third_party_rules = self.hs.get_third_party_event_rules()
+        on_add_user_third_party_identifier_callback_mock = Mock(
+            return_value=make_awaitable(None)
+        )
+        on_remove_user_third_party_identifier_callback_mock = Mock(
+            return_value=make_awaitable(None)
+        )
+        third_party_rules._on_threepid_bind_callbacks.append(
+            on_add_user_third_party_identifier_callback_mock
+        )
+        third_party_rules._on_threepid_bind_callbacks.append(
+            on_remove_user_third_party_identifier_callback_mock
+        )
+
+        # Register an admin user.
+        self.register_user("admin", "password", admin=True)
+        admin_tok = self.login("admin", "password")
+
+        # Also register a normal user we can modify.
+        user_id = self.register_user("user", "password")
+
+        # Add a 3PID to the user.
+        channel = self.make_request(
+            "PUT",
+            "/_synapse/admin/v2/users/%s" % user_id,
+            {
+                "threepids": [
+                    {
+                        "medium": "email",
+                        "address": "foo@example.com",
+                    },
+                ],
+            },
+            access_token=admin_tok,
+        )
+
+        # Check that the mocked add callback was called with the appropriate
+        # 3PID details.
+        self.assertEqual(channel.code, 200, channel.json_body)
+        on_add_user_third_party_identifier_callback_mock.assert_called_once()
+        args = on_add_user_third_party_identifier_callback_mock.call_args[0]
+        self.assertEqual(args, (user_id, "email", "foo@example.com"))
+
+        # Now remove the 3PID from the user
+        channel = self.make_request(
+            "PUT",
+            "/_synapse/admin/v2/users/%s" % user_id,
+            {
+                "threepids": [],
+            },
+            access_token=admin_tok,
+        )
+
+        # Check that the mocked remove callback was called with the appropriate
+        # 3PID details.
+        self.assertEqual(channel.code, 200, channel.json_body)
+        on_remove_user_third_party_identifier_callback_mock.assert_called_once()
+        args = on_remove_user_third_party_identifier_callback_mock.call_args[0]
+        self.assertEqual(args, (user_id, "email", "foo@example.com"))
+
+    def test_on_remove_user_third_party_identifier_is_called_on_deactivate(
+        self,
+    ) -> None:
+        """Tests that the on_remove_user_third_party_identifier module callback is called
+        when a user is deactivated and their third-party ID associations are deleted.
+        """
+        # Pretend to be a Synapse module and register both callbacks as mocks.
+        third_party_rules = self.hs.get_third_party_event_rules()
+        on_remove_user_third_party_identifier_callback_mock = Mock(
+            return_value=make_awaitable(None)
+        )
+        third_party_rules._on_threepid_bind_callbacks.append(
+            on_remove_user_third_party_identifier_callback_mock
+        )
+
+        # Register an admin user.
+        self.register_user("admin", "password", admin=True)
+        admin_tok = self.login("admin", "password")
+
+        # Also register a normal user we can modify.
+        user_id = self.register_user("user", "password")
+
+        # Add a 3PID to the user.
+        channel = self.make_request(
+            "PUT",
+            "/_synapse/admin/v2/users/%s" % user_id,
+            {
+                "threepids": [
+                    {
+                        "medium": "email",
+                        "address": "foo@example.com",
+                    },
+                ],
+            },
+            access_token=admin_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.json_body)
+
+        # Now deactivate the user.
+        channel = self.make_request(
+            "PUT",
+            "/_synapse/admin/v2/users/%s" % user_id,
+            {
+                "deactivated": True,
+            },
+            access_token=admin_tok,
+        )
+
+        # Check that the mocked remove callback was called with the appropriate
+        # 3PID details.
+        self.assertEqual(channel.code, 200, channel.json_body)
+        on_remove_user_third_party_identifier_callback_mock.assert_called_once()
+        args = on_remove_user_third_party_identifier_callback_mock.call_args[0]
+        self.assertEqual(args, (user_id, "email", "foo@example.com"))
-- 
cgit 1.5.1


From 189a878a355f0c3933b24c7d7a4d22b11400f7f0 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Mon, 27 Feb 2023 13:08:18 -0700
Subject: Remove dangling reference to being a reference implementation
 (#15167)

* Remove dangling reference to being a reference implementation

* Create 15167.misc
---
 changelog.d/15167.misc | 1 +
 synapse/__init__.py    | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15167.misc

(limited to 'synapse')

diff --git a/changelog.d/15167.misc b/changelog.d/15167.misc
new file mode 100644
index 0000000000..175c2a3b83
--- /dev/null
+++ b/changelog.d/15167.misc
@@ -0,0 +1 @@
+Remove dangling reference to being a reference implementation in docstring.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index fbfd506a43..a203ed533a 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -1,5 +1,6 @@
 # Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2018-9 New Vector Ltd
+# Copyright 2018-2019 New Vector Ltd
+# Copyright 2023 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-""" This is a reference implementation of a Matrix homeserver.
+""" This is an implementation of a Matrix homeserver.
 """
 
 import json
-- 
cgit 1.5.1


From 93f7955eba50c827f96e1b2e8e44ef22a98cecc4 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Tue, 28 Feb 2023 13:09:10 +0100
Subject: Admin API endpoint to delete a reported event (#15116)

* Admin api to delete event report

* lint +  tests

* newsfile

* Apply suggestions from code review

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>

* revert changes - move to WorkerStore

* update unit test

* Note that timestamp is in millseconds

---------

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>
---
 changelog.d/15116.feature              |   1 +
 docs/admin_api/event_reports.md        |  14 ++++
 synapse/rest/admin/event_reports.py    |  41 ++++++++--
 synapse/storage/databases/main/room.py |  36 ++++++++-
 tests/rest/admin/test_event_reports.py | 143 ++++++++++++++++++++++++++++++++-
 5 files changed, 224 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/15116.feature

(limited to 'synapse')

diff --git a/changelog.d/15116.feature b/changelog.d/15116.feature
new file mode 100644
index 0000000000..087d8dc7f1
--- /dev/null
+++ b/changelog.d/15116.feature
@@ -0,0 +1 @@
+Add an [admin API](https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/index.html) to delete a [specific event report](https://spec.matrix.org/v1.6/client-server-api/#reporting-content).
\ No newline at end of file
diff --git a/docs/admin_api/event_reports.md b/docs/admin_api/event_reports.md
index beec8bb7ef..83f7dc37f4 100644
--- a/docs/admin_api/event_reports.md
+++ b/docs/admin_api/event_reports.md
@@ -169,3 +169,17 @@ The following fields are returned in the JSON response body:
 * `canonical_alias`: string - The canonical alias of the room. `null` if the room does not
   have a canonical alias set.
 * `event_json`: object - Details of the original event that was reported.
+
+# Delete a specific event report
+
+This API deletes a specific event report. If the request is successful, the response body
+will be an empty JSON object.
+
+The api is:
+```
+DELETE /_synapse/admin/v1/event_reports/<report_id>
+```
+
+**URL parameters:**
+
+* `report_id`: string - The ID of the event report.
diff --git a/synapse/rest/admin/event_reports.py b/synapse/rest/admin/event_reports.py
index a3beb74e2c..c546ef7e23 100644
--- a/synapse/rest/admin/event_reports.py
+++ b/synapse/rest/admin/event_reports.py
@@ -53,11 +53,11 @@ class EventReportsRestServlet(RestServlet):
     PATTERNS = admin_patterns("/event_reports$")
 
     def __init__(self, hs: "HomeServer"):
-        self.auth = hs.get_auth()
-        self.store = hs.get_datastores().main
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastores().main
 
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        await assert_requester_is_admin(self.auth, request)
+        await assert_requester_is_admin(self._auth, request)
 
         start = parse_integer(request, "from", default=0)
         limit = parse_integer(request, "limit", default=100)
@@ -79,7 +79,7 @@ class EventReportsRestServlet(RestServlet):
                 errcode=Codes.INVALID_PARAM,
             )
 
-        event_reports, total = await self.store.get_event_reports_paginate(
+        event_reports, total = await self._store.get_event_reports_paginate(
             start, limit, direction, user_id, room_id
         )
         ret = {"event_reports": event_reports, "total": total}
@@ -108,13 +108,13 @@ class EventReportDetailRestServlet(RestServlet):
     PATTERNS = admin_patterns("/event_reports/(?P<report_id>[^/]*)$")
 
     def __init__(self, hs: "HomeServer"):
-        self.auth = hs.get_auth()
-        self.store = hs.get_datastores().main
+        self._auth = hs.get_auth()
+        self._store = hs.get_datastores().main
 
     async def on_GET(
         self, request: SynapseRequest, report_id: str
     ) -> Tuple[int, JsonDict]:
-        await assert_requester_is_admin(self.auth, request)
+        await assert_requester_is_admin(self._auth, request)
 
         message = (
             "The report_id parameter must be a string representing a positive integer."
@@ -131,8 +131,33 @@ class EventReportDetailRestServlet(RestServlet):
                 HTTPStatus.BAD_REQUEST, message, errcode=Codes.INVALID_PARAM
             )
 
-        ret = await self.store.get_event_report(resolved_report_id)
+        ret = await self._store.get_event_report(resolved_report_id)
         if not ret:
             raise NotFoundError("Event report not found")
 
         return HTTPStatus.OK, ret
+
+    async def on_DELETE(
+        self, request: SynapseRequest, report_id: str
+    ) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self._auth, request)
+
+        message = (
+            "The report_id parameter must be a string representing a positive integer."
+        )
+        try:
+            resolved_report_id = int(report_id)
+        except ValueError:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST, message, errcode=Codes.INVALID_PARAM
+            )
+
+        if resolved_report_id < 0:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST, message, errcode=Codes.INVALID_PARAM
+            )
+
+        if await self._store.delete_event_report(resolved_report_id):
+            return HTTPStatus.OK, {}
+
+        raise NotFoundError("Event report not found")
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 39f89291b2..a2e9519cb6 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1417,6 +1417,27 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             get_un_partial_stated_rooms_from_stream_txn,
         )
 
+    async def delete_event_report(self, report_id: int) -> bool:
+        """Remove an event report from database.
+
+        Args:
+            report_id: Report to delete
+
+        Returns:
+            Whether the report was successfully deleted or not.
+        """
+        try:
+            await self.db_pool.simple_delete_one(
+                table="event_reports",
+                keyvalues={"id": report_id},
+                desc="delete_event_report",
+            )
+        except StoreError:
+            # Deletion failed because report does not exist
+            return False
+
+        return True
+
 
 class _BackgroundUpdates:
     REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
@@ -2139,7 +2160,19 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         reason: Optional[str],
         content: JsonDict,
         received_ts: int,
-    ) -> None:
+    ) -> int:
+        """Add an event report
+
+        Args:
+            room_id: Room that contains the reported event.
+            event_id: The reported event.
+            user_id: User who reports the event.
+            reason: Description that the user specifies.
+            content: Report request body (score and reason).
+            received_ts: Time when the user submitted the report (milliseconds).
+        Returns:
+            Id of the event report.
+        """
         next_id = self._event_reports_id_gen.get_next()
         await self.db_pool.simple_insert(
             table="event_reports",
@@ -2154,6 +2187,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             },
             desc="add_event_report",
         )
+        return next_id
 
     async def get_event_report(self, report_id: int) -> Optional[Dict[str, Any]]:
         """Retrieve an event report
diff --git a/tests/rest/admin/test_event_reports.py b/tests/rest/admin/test_event_reports.py
index 233eba3516..f189b07769 100644
--- a/tests/rest/admin/test_event_reports.py
+++ b/tests/rest/admin/test_event_reports.py
@@ -78,7 +78,7 @@ class EventReportsTestCase(unittest.HomeserverTestCase):
         """
         Try to get an event report without authentication.
         """
-        channel = self.make_request("GET", self.url, b"{}")
+        channel = self.make_request("GET", self.url, {})
 
         self.assertEqual(401, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
@@ -473,7 +473,7 @@ class EventReportDetailTestCase(unittest.HomeserverTestCase):
         """
         Try to get event report without authentication.
         """
-        channel = self.make_request("GET", self.url, b"{}")
+        channel = self.make_request("GET", self.url, {})
 
         self.assertEqual(401, channel.code, msg=channel.json_body)
         self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
@@ -599,3 +599,142 @@ class EventReportDetailTestCase(unittest.HomeserverTestCase):
         self.assertIn("room_id", content["event_json"])
         self.assertIn("sender", content["event_json"])
         self.assertIn("content", content["event_json"])
+
+
+class DeleteEventReportTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self._store = hs.get_datastores().main
+
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.other_user = self.register_user("user", "pass")
+        self.other_user_tok = self.login("user", "pass")
+
+        # create report
+        event_id = self.get_success(
+            self._store.add_event_report(
+                "room_id",
+                "event_id",
+                self.other_user,
+                "this makes me sad",
+                {},
+                self.clock.time_msec(),
+            )
+        )
+
+        self.url = f"/_synapse/admin/v1/event_reports/{event_id}"
+
+    def test_no_auth(self) -> None:
+        """
+        Try to delete event report without authentication.
+        """
+        channel = self.make_request("DELETE", self.url)
+
+        self.assertEqual(401, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
+
+    def test_requester_is_no_admin(self) -> None:
+        """
+        If the user is not a server admin, an error 403 is returned.
+        """
+
+        channel = self.make_request(
+            "DELETE",
+            self.url,
+            access_token=self.other_user_tok,
+        )
+
+        self.assertEqual(403, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
+
+    def test_delete_success(self) -> None:
+        """
+        Testing delete a report.
+        """
+
+        channel = self.make_request(
+            "DELETE",
+            self.url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual({}, channel.json_body)
+
+        channel = self.make_request(
+            "GET",
+            self.url,
+            access_token=self.admin_user_tok,
+        )
+
+        # check that report was deleted
+        self.assertEqual(404, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
+
+    def test_invalid_report_id(self) -> None:
+        """
+        Testing that an invalid `report_id` returns a 400.
+        """
+
+        # `report_id` is negative
+        channel = self.make_request(
+            "DELETE",
+            "/_synapse/admin/v1/event_reports/-123",
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(400, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
+        self.assertEqual(
+            "The report_id parameter must be a string representing a positive integer.",
+            channel.json_body["error"],
+        )
+
+        # `report_id` is a non-numerical string
+        channel = self.make_request(
+            "DELETE",
+            "/_synapse/admin/v1/event_reports/abcdef",
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(400, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
+        self.assertEqual(
+            "The report_id parameter must be a string representing a positive integer.",
+            channel.json_body["error"],
+        )
+
+        # `report_id` is undefined
+        channel = self.make_request(
+            "DELETE",
+            "/_synapse/admin/v1/event_reports/",
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(400, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
+        self.assertEqual(
+            "The report_id parameter must be a string representing a positive integer.",
+            channel.json_body["error"],
+        )
+
+    def test_report_id_not_found(self) -> None:
+        """
+        Testing that a not existing `report_id` returns a 404.
+        """
+
+        channel = self.make_request(
+            "DELETE",
+            "/_synapse/admin/v1/event_reports/123",
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(404, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
+        self.assertEqual("Event report not found", channel.json_body["error"])
-- 
cgit 1.5.1


From c369d82df0eac691ccb549051dd61dd77b83d1e9 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 28 Feb 2023 10:17:55 -0500
Subject: Add missing type hints to InsecureInterceptableContextFactory.
 (#15164)

---
 changelog.d/15164.misc | 1 +
 mypy.ini               | 3 ---
 synapse/http/client.py | 5 +++--
 3 files changed, 4 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/15164.misc

(limited to 'synapse')

diff --git a/changelog.d/15164.misc b/changelog.d/15164.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15164.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 94562d0bce..572734f8e7 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -36,9 +36,6 @@ exclude = (?x)
 [mypy-synapse.federation.transport.client]
 disallow_untyped_defs = False
 
-[mypy-synapse.http.client]
-disallow_untyped_defs = False
-
 [mypy-synapse.http.matrixfederationclient]
 disallow_untyped_defs = False
 
diff --git a/synapse/http/client.py b/synapse/http/client.py
index a05f297933..ae48e7c3f0 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -44,6 +44,7 @@ from twisted.internet.interfaces import (
     IAddress,
     IDelayedCall,
     IHostResolution,
+    IOpenSSLContextFactory,
     IReactorCore,
     IReactorPluggableNameResolver,
     IReactorTime,
@@ -958,8 +959,8 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory):
         self._context = SSL.Context(SSL.SSLv23_METHOD)
         self._context.set_verify(VERIFY_NONE, lambda *_: False)
 
-    def getContext(self, hostname=None, port=None):
+    def getContext(self) -> SSL.Context:
         return self._context
 
-    def creatorForNetloc(self, hostname: bytes, port: int):
+    def creatorForNetloc(self, hostname: bytes, port: int) -> IOpenSSLContextFactory:
         return self
-- 
cgit 1.5.1


From 682d31c7023b6b7299e74bc631e4d2acc60f91ac Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 28 Feb 2023 16:37:19 +0000
Subject: Allow use of the `/filter` Client-Server APIs on workers. (#15134)

---
 changelog.d/15134.feature                   |  1 +
 docker/configure_workers_and_start.py       |  1 +
 docs/workers.md                             |  1 +
 synapse/rest/__init__.py                    |  3 +--
 synapse/storage/databases/main/__init__.py  |  4 ++--
 synapse/storage/databases/main/filtering.py | 25 +++++++++++++++++++++----
 6 files changed, 27 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/15134.feature

(limited to 'synapse')

diff --git a/changelog.d/15134.feature b/changelog.d/15134.feature
new file mode 100644
index 0000000000..0dbb30bc8f
--- /dev/null
+++ b/changelog.d/15134.feature
@@ -0,0 +1 @@
+Allow use of the `/filter` Client-Server APIs on workers.
\ No newline at end of file
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 58c62f2231..7f615e5066 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -142,6 +142,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/(api/v1|r0|v3|unstable/.*)/rooms/.*/aliases",
             "^/_matrix/client/v1/rooms/.*/timestamp_to_event$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/search",
+            "^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)",
         ],
         "shared_extra_conf": {},
         "worker_extra_conf": "",
diff --git a/docs/workers.md b/docs/workers.md
index 2eb970ffa6..35a96f12a9 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -232,6 +232,7 @@ information.
     ^/_matrix/client/(api/v1|r0|v3|unstable)/joined_rooms$
     ^/_matrix/client/v1/rooms/.*/timestamp_to_event$
     ^/_matrix/client/(api/v1|r0|v3|unstable)/search$
+    ^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)
 
     # Encryption requests
     ^/_matrix/client/(r0|v3|unstable)/keys/query$
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 14c4e6ebbb..c327f15043 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -108,8 +108,7 @@ class ClientRestResource(JsonResource):
         if is_main_process:
             logout.register_servlets(hs, client_resource)
         sync.register_servlets(hs, client_resource)
-        if is_main_process:
-            filter.register_servlets(hs, client_resource)
+        filter.register_servlets(hs, client_resource)
         account.register_servlets(hs, client_resource)
         register.register_servlets(hs, client_resource)
         if is_main_process:
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 837dc7646e..dc3948c170 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -43,7 +43,7 @@ from .event_federation import EventFederationStore
 from .event_push_actions import EventPushActionsStore
 from .events_bg_updates import EventsBackgroundUpdatesStore
 from .events_forward_extremities import EventForwardExtremitiesStore
-from .filtering import FilteringStore
+from .filtering import FilteringWorkerStore
 from .keys import KeyStore
 from .lock import LockStore
 from .media_repository import MediaRepositoryStore
@@ -99,7 +99,7 @@ class DataStore(
     EventFederationStore,
     MediaRepositoryStore,
     RejectionsStore,
-    FilteringStore,
+    FilteringWorkerStore,
     PusherStore,
     PushRuleStore,
     ApplicationServiceTransactionStore,
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index 12f3b601f1..8e57c8e5a0 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -17,7 +17,7 @@ from typing import Optional, Tuple, Union, cast
 
 from canonicaljson import encode_canonical_json
 
-from synapse.api.errors import Codes, SynapseError
+from synapse.api.errors import Codes, StoreError, SynapseError
 from synapse.storage._base import SQLBaseStore, db_to_json
 from synapse.storage.database import LoggingTransaction
 from synapse.types import JsonDict
@@ -46,8 +46,6 @@ class FilteringWorkerStore(SQLBaseStore):
 
         return db_to_json(def_json)
 
-
-class FilteringStore(FilteringWorkerStore):
     async def add_user_filter(self, user_localpart: str, user_filter: JsonDict) -> int:
         def_json = encode_canonical_json(user_filter)
 
@@ -79,4 +77,23 @@ class FilteringStore(FilteringWorkerStore):
 
             return filter_id
 
-        return await self.db_pool.runInteraction("add_user_filter", _do_txn)
+        attempts = 0
+        while True:
+            # Try a few times.
+            # This is technically needed if a user tries to create two filters at once,
+            # leading to two concurrent transactions.
+            # The failure case would be:
+            # - SELECT filter_id ... filter_json = ? → both transactions return no rows
+            # - SELECT MAX(filter_id) ... → both transactions return e.g. 5
+            # - INSERT INTO ... → both transactions insert filter_id = 6
+            # One of the transactions will commit. The other will get a unique key
+            # constraint violation error (IntegrityError). This is not the same as a
+            # serialisability violation, which would be automatically retried by
+            # `runInteraction`.
+            try:
+                return await self.db_pool.runInteraction("add_user_filter", _do_txn)
+            except self.db_pool.engine.module.IntegrityError:
+                attempts += 1
+
+                if attempts >= 5:
+                    raise StoreError(500, "Couldn't generate a filter ID.")
-- 
cgit 1.5.1


From d62cd940cb38e706f7fadc279017b0be3f3f29a3 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 28 Feb 2023 17:11:26 +0000
Subject: Fix a long-standing bug where an initial sync would not respond to
 changes to the list of ignored users if there was an initial sync cached.
 (#15163)

---
 changelog.d/15163.bugfix                       |  1 +
 synapse/rest/client/sync.py                    | 25 +++++++++++++++++++--
 synapse/storage/databases/main/account_data.py | 31 ++++++++++++++++++++++++++
 tests/storage/test_account_data.py             | 22 ++++++++++++++++++
 4 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15163.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15163.bugfix b/changelog.d/15163.bugfix
new file mode 100644
index 0000000000..7ff1cd4463
--- /dev/null
+++ b/changelog.d/15163.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where an initial sync would not respond to changes to the list of ignored users if there was an initial sync cached.
\ No newline at end of file
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index f2013faeb2..8fcb8ac3d9 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -16,7 +16,7 @@ import logging
 from collections import defaultdict
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 
-from synapse.api.constants import EduTypes, Membership, PresenceState
+from synapse.api.constants import AccountDataTypes, EduTypes, Membership, PresenceState
 from synapse.api.errors import Codes, StoreError, SynapseError
 from synapse.api.filtering import FilterCollection
 from synapse.api.presence import UserPresenceState
@@ -139,7 +139,28 @@ class SyncRestServlet(RestServlet):
             device_id,
         )
 
-        request_key = (user, timeout, since, filter_id, full_state, device_id)
+        # Stream position of the last ignored users account data event for this user,
+        # if we're initial syncing.
+        # We include this in the request key to invalidate an initial sync
+        # in the response cache once the set of ignored users has changed.
+        # (We filter out ignored users from timeline events, so our sync response
+        # is invalid once the set of ignored users changes.)
+        last_ignore_accdata_streampos: Optional[int] = None
+        if not since:
+            # No `since`, so this is an initial sync.
+            last_ignore_accdata_streampos = await self.store.get_latest_stream_id_for_global_account_data_by_type_for_user(
+                user.to_string(), AccountDataTypes.IGNORED_USER_LIST
+            )
+
+        request_key = (
+            user,
+            timeout,
+            since,
+            filter_id,
+            full_state,
+            device_id,
+            last_ignore_accdata_streampos,
+        )
 
         if filter_id is None:
             filter_collection = self.filtering.DEFAULT_FILTER_COLLECTION
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 95567826f2..308d19440f 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -237,6 +237,37 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         else:
             return None
 
+    async def get_latest_stream_id_for_global_account_data_by_type_for_user(
+        self, user_id: str, data_type: str
+    ) -> Optional[int]:
+        """
+        Returns:
+            The stream ID of the account data,
+            or None if there is no such account data.
+        """
+
+        def get_latest_stream_id_for_global_account_data_by_type_for_user_txn(
+            txn: LoggingTransaction,
+        ) -> Optional[int]:
+            sql = """
+                SELECT stream_id FROM account_data
+                WHERE user_id = ? AND account_data_type = ?
+                ORDER BY stream_id DESC
+                LIMIT 1
+            """
+            txn.execute(sql, (user_id, data_type))
+
+            row = txn.fetchone()
+            if row:
+                return row[0]
+            else:
+                return None
+
+        return await self.db_pool.runInteraction(
+            "get_latest_stream_id_for_global_account_data_by_type_for_user",
+            get_latest_stream_id_for_global_account_data_by_type_for_user_txn,
+        )
+
     @cached(num_args=2, tree=True)
     async def get_account_data_for_room(
         self, user_id: str, room_id: str
diff --git a/tests/storage/test_account_data.py b/tests/storage/test_account_data.py
index 1bfd11ceae..b12691a9d3 100644
--- a/tests/storage/test_account_data.py
+++ b/tests/storage/test_account_data.py
@@ -140,3 +140,25 @@ class IgnoredUsersTestCase(unittest.HomeserverTestCase):
         # No one ignores the user now.
         self.assert_ignored(self.user, set())
         self.assert_ignorers("@other:test", set())
+
+    def test_ignoring_users_with_latest_stream_ids(self) -> None:
+        """Test that ignoring users updates the latest stream ID for the ignored
+        user list account data."""
+
+        def get_latest_ignore_streampos(user_id: str) -> Optional[int]:
+            return self.get_success(
+                self.store.get_latest_stream_id_for_global_account_data_by_type_for_user(
+                    user_id, AccountDataTypes.IGNORED_USER_LIST
+                )
+            )
+
+        self.assertIsNone(get_latest_ignore_streampos("@user:test"))
+
+        self._update_ignore_list("@other:test", "@another:remote")
+
+        self.assertEqual(get_latest_ignore_streampos("@user:test"), 2)
+
+        # Add one user, remove one user, and leave one user.
+        self._update_ignore_list("@foo:test", "@another:remote")
+
+        self.assertEqual(get_latest_ignore_streampos("@user:test"), 3)
-- 
cgit 1.5.1


From 2b78981736f9004f99b1760e3e77b234f92755a7 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 28 Feb 2023 18:49:28 +0000
Subject: Remove support for aggregating reactions (#15172)

It turns out that no clients rely on server-side aggregation of `m.annotation`
relationships: it's just not very useful as currently implemented.

It's also non-trivial to calculate.

I want to remove it from MSC2677, so to keep the implementation in line, let's
remove it here.
---
 changelog.d/15172.feature                          |   1 +
 synapse/events/utils.py                            |   5 -
 synapse/handlers/relations.py                      |  76 +--------
 synapse/storage/databases/main/cache.py            |   3 -
 synapse/storage/databases/main/events.py           |   4 -
 .../storage/databases/main/events_bg_updates.py    |   3 -
 synapse/storage/databases/main/relations.py        | 137 ----------------
 tests/rest/client/test_relations.py                | 178 ++++-----------------
 8 files changed, 30 insertions(+), 377 deletions(-)
 create mode 100644 changelog.d/15172.feature

(limited to 'synapse')

diff --git a/changelog.d/15172.feature b/changelog.d/15172.feature
new file mode 100644
index 0000000000..3f789edb7f
--- /dev/null
+++ b/changelog.d/15172.feature
@@ -0,0 +1 @@
+Remove support for server-side aggregation of reactions.
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index ebf8c7ed83..eaa6cad4af 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -516,11 +516,6 @@ class EventClientSerializer:
         # being serialized.
         serialized_aggregations = {}
 
-        if event_aggregations.annotations:
-            serialized_aggregations[
-                RelationTypes.ANNOTATION
-            ] = event_aggregations.annotations
-
         if event_aggregations.references:
             serialized_aggregations[
                 RelationTypes.REFERENCE
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 0fb15391e0..553053b694 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -60,13 +60,12 @@ class BundledAggregations:
     Some values require additional processing during serialization.
     """
 
-    annotations: Optional[JsonDict] = None
     references: Optional[JsonDict] = None
     replace: Optional[EventBase] = None
     thread: Optional[_ThreadAggregation] = None
 
     def __bool__(self) -> bool:
-        return bool(self.annotations or self.references or self.replace or self.thread)
+        return bool(self.references or self.replace or self.thread)
 
 
 class RelationsHandler:
@@ -227,67 +226,6 @@ class RelationsHandler:
                     e.msg,
                 )
 
-    async def get_annotations_for_events(
-        self, event_ids: Collection[str], ignored_users: FrozenSet[str] = frozenset()
-    ) -> Dict[str, List[JsonDict]]:
-        """Get a list of annotations to the given events, grouped by event type and
-        aggregation key, sorted by count.
-
-        This is used e.g. to get the what and how many reactions have happened
-        on an event.
-
-        Args:
-            event_ids: Fetch events that relate to these event IDs.
-            ignored_users: The users ignored by the requesting user.
-
-        Returns:
-            A map of event IDs to a list of groups of annotations that match.
-            Each entry is a dict with `type`, `key` and `count` fields.
-        """
-        # Get the base results for all users.
-        full_results = await self._main_store.get_aggregation_groups_for_events(
-            event_ids
-        )
-
-        # Avoid additional logic if there are no ignored users.
-        if not ignored_users:
-            return {
-                event_id: results
-                for event_id, results in full_results.items()
-                if results
-            }
-
-        # Then subtract off the results for any ignored users.
-        ignored_results = await self._main_store.get_aggregation_groups_for_users(
-            [event_id for event_id, results in full_results.items() if results],
-            ignored_users,
-        )
-
-        filtered_results = {}
-        for event_id, results in full_results.items():
-            # If no annotations, skip.
-            if not results:
-                continue
-
-            # If there are not ignored results for this event, copy verbatim.
-            if event_id not in ignored_results:
-                filtered_results[event_id] = results
-                continue
-
-            # Otherwise, subtract out the ignored results.
-            event_ignored_results = ignored_results[event_id]
-            for result in results:
-                key = (result["type"], result["key"])
-                if key in event_ignored_results:
-                    # Ensure to not modify the cache.
-                    result = result.copy()
-                    result["count"] -= event_ignored_results[key]
-                    if result["count"] <= 0:
-                        continue
-                filtered_results.setdefault(event_id, []).append(result)
-
-        return filtered_results
-
     async def get_references_for_events(
         self, event_ids: Collection[str], ignored_users: FrozenSet[str] = frozenset()
     ) -> Dict[str, List[_RelatedEvent]]:
@@ -531,17 +469,6 @@ class RelationsHandler:
                 # (as that is what makes it part of the thread).
                 relations_by_id[latest_thread_event.event_id] = RelationTypes.THREAD
 
-        async def _fetch_annotations() -> None:
-            """Fetch any annotations (ie, reactions) to bundle with this event."""
-            annotations_by_event_id = await self.get_annotations_for_events(
-                events_by_id.keys(), ignored_users=ignored_users
-            )
-            for event_id, annotations in annotations_by_event_id.items():
-                if annotations:
-                    results.setdefault(event_id, BundledAggregations()).annotations = {
-                        "chunk": annotations
-                    }
-
         async def _fetch_references() -> None:
             """Fetch any references to bundle with this event."""
             references_by_event_id = await self.get_references_for_events(
@@ -575,7 +502,6 @@ class RelationsHandler:
         await make_deferred_yieldable(
             gather_results(
                 (
-                    run_in_background(_fetch_annotations),
                     run_in_background(_fetch_references),
                     run_in_background(_fetch_edits),
                 )
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 5b66431691..096dec7f87 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -266,9 +266,6 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         if relates_to:
             self._attempt_to_invalidate_cache("get_relations_for_event", (relates_to,))
             self._attempt_to_invalidate_cache("get_references_for_event", (relates_to,))
-            self._attempt_to_invalidate_cache(
-                "get_aggregation_groups_for_event", (relates_to,)
-            )
             self._attempt_to_invalidate_cache("get_applicable_edit", (relates_to,))
             self._attempt_to_invalidate_cache("get_thread_summary", (relates_to,))
             self._attempt_to_invalidate_cache("get_thread_participated", (relates_to,))
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 73b8aea16c..a8a4ed4436 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -2024,10 +2024,6 @@ class PersistEventsStore:
         self.store._invalidate_cache_and_stream(
             txn, self.store.get_relations_for_event, (redacted_relates_to,)
         )
-        if rel_type == RelationTypes.ANNOTATION:
-            self.store._invalidate_cache_and_stream(
-                txn, self.store.get_aggregation_groups_for_event, (redacted_relates_to,)
-            )
         if rel_type == RelationTypes.REFERENCE:
             self.store._invalidate_cache_and_stream(
                 txn, self.store.get_references_for_event, (redacted_relates_to,)
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 0a275e6ce6..daef3685b0 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -1219,9 +1219,6 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
                     self._invalidate_cache_and_stream(  # type: ignore[attr-defined]
                         txn, self.get_relations_for_event, cache_tuple  # type: ignore[attr-defined]
                     )
-                    self._invalidate_cache_and_stream(  # type: ignore[attr-defined]
-                        txn, self.get_aggregation_groups_for_event, cache_tuple  # type: ignore[attr-defined]
-                    )
                     self._invalidate_cache_and_stream(  # type: ignore[attr-defined]
                         txn, self.get_thread_summary, cache_tuple  # type: ignore[attr-defined]
                     )
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index fa3266c081..bc3a83919c 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -397,143 +397,6 @@ class RelationsWorkerStore(SQLBaseStore):
         )
         return result is not None
 
-    @cached()
-    async def get_aggregation_groups_for_event(
-        self, event_id: str
-    ) -> Sequence[JsonDict]:
-        raise NotImplementedError()
-
-    @cachedList(
-        cached_method_name="get_aggregation_groups_for_event", list_name="event_ids"
-    )
-    async def get_aggregation_groups_for_events(
-        self, event_ids: Collection[str]
-    ) -> Mapping[str, Optional[List[JsonDict]]]:
-        """Get a list of annotations on the given events, grouped by event type and
-        aggregation key, sorted by count.
-
-        This is used e.g. to get the what and how many reactions have happend
-        on an event.
-
-        Args:
-            event_ids: Fetch events that relate to these event IDs.
-
-        Returns:
-            A map of event IDs to a list of groups of annotations that match.
-            Each entry is a dict with `type`, `key` and `count` fields.
-        """
-        # The number of entries to return per event ID.
-        limit = 5
-
-        clause, args = make_in_list_sql_clause(
-            self.database_engine, "relates_to_id", event_ids
-        )
-        args.append(RelationTypes.ANNOTATION)
-
-        sql = f"""
-            SELECT
-                relates_to_id,
-                annotation.type,
-                aggregation_key,
-                COUNT(DISTINCT annotation.sender)
-            FROM events AS annotation
-            INNER JOIN event_relations USING (event_id)
-            INNER JOIN events AS parent ON
-                parent.event_id = relates_to_id
-                AND parent.room_id = annotation.room_id
-            WHERE
-                {clause}
-                AND relation_type = ?
-            GROUP BY relates_to_id, annotation.type, aggregation_key
-            ORDER BY relates_to_id, COUNT(*) DESC
-        """
-
-        def _get_aggregation_groups_for_events_txn(
-            txn: LoggingTransaction,
-        ) -> Mapping[str, List[JsonDict]]:
-            txn.execute(sql, args)
-
-            result: Dict[str, List[JsonDict]] = {}
-            for event_id, type, key, count in cast(
-                List[Tuple[str, str, str, int]], txn
-            ):
-                event_results = result.setdefault(event_id, [])
-
-                # Limit the number of results per event ID.
-                if len(event_results) == limit:
-                    continue
-
-                event_results.append({"type": type, "key": key, "count": count})
-
-            return result
-
-        return await self.db_pool.runInteraction(
-            "get_aggregation_groups_for_events", _get_aggregation_groups_for_events_txn
-        )
-
-    async def get_aggregation_groups_for_users(
-        self, event_ids: Collection[str], users: FrozenSet[str]
-    ) -> Dict[str, Dict[Tuple[str, str], int]]:
-        """Fetch the partial aggregations for an event for specific users.
-
-        This is used, in conjunction with get_aggregation_groups_for_event, to
-        remove information from the results for ignored users.
-
-        Args:
-            event_ids: Fetch events that relate to these event IDs.
-            users: The users to fetch information for.
-
-        Returns:
-            A map of event ID to a map of (event type, aggregation key) to a
-            count of users.
-        """
-
-        if not users:
-            return {}
-
-        events_sql, args = make_in_list_sql_clause(
-            self.database_engine, "relates_to_id", event_ids
-        )
-
-        users_sql, users_args = make_in_list_sql_clause(
-            self.database_engine, "annotation.sender", users
-        )
-        args.extend(users_args)
-        args.append(RelationTypes.ANNOTATION)
-
-        sql = f"""
-            SELECT
-                relates_to_id,
-                annotation.type,
-                aggregation_key,
-                COUNT(DISTINCT annotation.sender)
-            FROM events AS annotation
-            INNER JOIN event_relations USING (event_id)
-            INNER JOIN events AS parent ON
-                parent.event_id = relates_to_id
-                AND parent.room_id = annotation.room_id
-            WHERE {events_sql} AND {users_sql} AND relation_type = ?
-            GROUP BY relates_to_id, annotation.type, aggregation_key
-            ORDER BY relates_to_id, COUNT(*) DESC
-        """
-
-        def _get_aggregation_groups_for_users_txn(
-            txn: LoggingTransaction,
-        ) -> Dict[str, Dict[Tuple[str, str], int]]:
-            txn.execute(sql, args)
-
-            result: Dict[str, Dict[Tuple[str, str], int]] = {}
-            for event_id, type, key, count in cast(
-                List[Tuple[str, str, str, int]], txn
-            ):
-                result.setdefault(event_id, {})[(type, key)] = count
-
-            return result
-
-        return await self.db_pool.runInteraction(
-            "get_aggregation_groups_for_users", _get_aggregation_groups_for_users_txn
-        )
-
     @cached()
     async def get_references_for_event(self, event_id: str) -> List[JsonDict]:
         raise NotImplementedError()
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index c8a6911d5e..a8a0a16141 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -1080,48 +1080,6 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
         ]
         assert_bundle(self._find_event_in_chunk(chunk))
 
-    def test_annotation(self) -> None:
-        """
-        Test that annotations get correctly bundled.
-        """
-        # Setup by sending a variety of relations.
-        self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
-        self._send_relation(
-            RelationTypes.ANNOTATION, "m.reaction", "a", access_token=self.user2_token
-        )
-        self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "b")
-
-        def assert_annotations(bundled_aggregations: JsonDict) -> None:
-            self.assertEqual(
-                {
-                    "chunk": [
-                        {"type": "m.reaction", "key": "a", "count": 2},
-                        {"type": "m.reaction", "key": "b", "count": 1},
-                    ]
-                },
-                bundled_aggregations,
-            )
-
-        self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 7)
-
-    def test_annotation_to_annotation(self) -> None:
-        """Any relation to an annotation should be ignored."""
-        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
-        event_id = channel.json_body["event_id"]
-        self._send_relation(
-            RelationTypes.ANNOTATION, "m.reaction", "b", parent_id=event_id
-        )
-
-        # Fetch the initial annotation event to see if it has bundled aggregations.
-        channel = self.make_request(
-            "GET",
-            f"/_matrix/client/v3/rooms/{self.room}/event/{event_id}",
-            access_token=self.user_token,
-        )
-        self.assertEquals(200, channel.code, channel.json_body)
-        # The first annotationt should not have any bundled aggregations.
-        self.assertNotIn("m.relations", channel.json_body["unsigned"])
-
     def test_reference(self) -> None:
         """
         Test that references get correctly bundled.
@@ -1138,7 +1096,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
                 bundled_aggregations,
             )
 
-        self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 7)
+        self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 6)
 
     def test_thread(self) -> None:
         """
@@ -1183,7 +1141,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
 
         # The "user" sent the root event and is making queries for the bundled
         # aggregations: they have participated.
-        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 7)
+        self._test_bundled_aggregations(RelationTypes.THREAD, _gen_assert(True), 6)
         # The "user2" sent replies in the thread and is making queries for the
         # bundled aggregations: they have participated.
         #
@@ -1208,9 +1166,10 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
         channel = self._send_relation(RelationTypes.THREAD, "m.room.test")
         thread_2 = channel.json_body["event_id"]
 
-        self._send_relation(
-            RelationTypes.ANNOTATION, "m.reaction", "a", parent_id=thread_2
+        channel = self._send_relation(
+            RelationTypes.REFERENCE, "org.matrix.test", parent_id=thread_2
         )
+        reference_event_id = channel.json_body["event_id"]
 
         def assert_thread(bundled_aggregations: JsonDict) -> None:
             self.assertEqual(2, bundled_aggregations.get("count"))
@@ -1235,17 +1194,15 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
             self.assert_dict(
                 {
                     "m.relations": {
-                        RelationTypes.ANNOTATION: {
-                            "chunk": [
-                                {"type": "m.reaction", "key": "a", "count": 1},
-                            ]
+                        RelationTypes.REFERENCE: {
+                            "chunk": [{"event_id": reference_event_id}]
                         },
                     }
                 },
                 bundled_aggregations["latest_event"].get("unsigned"),
             )
 
-        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 7)
+        self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 6)
 
     def test_nested_thread(self) -> None:
         """
@@ -1363,10 +1320,11 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
         channel = self._send_relation(RelationTypes.THREAD, "m.room.test")
         thread_id = channel.json_body["event_id"]
 
-        # Annotate the thread.
-        self._send_relation(
-            RelationTypes.ANNOTATION, "m.reaction", "a", parent_id=thread_id
+        # Make a reference to the thread.
+        channel = self._send_relation(
+            RelationTypes.REFERENCE, "org.matrix.test", parent_id=thread_id
         )
+        reference_event_id = channel.json_body["event_id"]
 
         channel = self.make_request(
             "GET",
@@ -1377,9 +1335,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
         self.assertEqual(
             channel.json_body["unsigned"].get("m.relations"),
             {
-                RelationTypes.ANNOTATION: {
-                    "chunk": [{"count": 1, "key": "a", "type": "m.reaction"}]
-                },
+                RelationTypes.REFERENCE: {"chunk": [{"event_id": reference_event_id}]},
             },
         )
 
@@ -1396,9 +1352,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
         self.assertEqual(
             thread_message["unsigned"].get("m.relations"),
             {
-                RelationTypes.ANNOTATION: {
-                    "chunk": [{"count": 1, "key": "a", "type": "m.reaction"}]
-                },
+                RelationTypes.REFERENCE: {"chunk": [{"event_id": reference_event_id}]},
             },
         )
 
@@ -1410,7 +1364,8 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
         Note that the spec allows for a server to return additional fields beyond
         what is specified.
         """
-        self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
+        channel = self._send_relation(RelationTypes.REFERENCE, "org.matrix.test")
+        reference_event_id = channel.json_body["event_id"]
 
         # Note that the sync filter does not include "unsigned" as a field.
         filter = urllib.parse.quote_plus(
@@ -1428,7 +1383,12 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
 
         # Ensure there's bundled aggregations on it.
         self.assertIn("unsigned", parent_event)
-        self.assertIn("m.relations", parent_event["unsigned"])
+        self.assertEqual(
+            parent_event["unsigned"].get("m.relations"),
+            {
+                RelationTypes.REFERENCE: {"chunk": [{"event_id": reference_event_id}]},
+            },
+        )
 
 
 class RelationIgnoredUserTestCase(BaseRelationsTestCase):
@@ -1475,53 +1435,8 @@ class RelationIgnoredUserTestCase(BaseRelationsTestCase):
 
         return before_aggregations[relation_type], after_aggregations[relation_type]
 
-    def test_annotation(self) -> None:
-        """Annotations should ignore"""
-        # Send 2 from us, 2 from the to be ignored user.
-        allowed_event_ids = []
-        ignored_event_ids = []
-        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a")
-        allowed_event_ids.append(channel.json_body["event_id"])
-        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="b")
-        allowed_event_ids.append(channel.json_body["event_id"])
-        channel = self._send_relation(
-            RelationTypes.ANNOTATION,
-            "m.reaction",
-            key="a",
-            access_token=self.user2_token,
-        )
-        ignored_event_ids.append(channel.json_body["event_id"])
-        channel = self._send_relation(
-            RelationTypes.ANNOTATION,
-            "m.reaction",
-            key="c",
-            access_token=self.user2_token,
-        )
-        ignored_event_ids.append(channel.json_body["event_id"])
-
-        before_aggregations, after_aggregations = self._test_ignored_user(
-            RelationTypes.ANNOTATION, allowed_event_ids, ignored_event_ids
-        )
-
-        self.assertCountEqual(
-            before_aggregations["chunk"],
-            [
-                {"type": "m.reaction", "key": "a", "count": 2},
-                {"type": "m.reaction", "key": "b", "count": 1},
-                {"type": "m.reaction", "key": "c", "count": 1},
-            ],
-        )
-
-        self.assertCountEqual(
-            after_aggregations["chunk"],
-            [
-                {"type": "m.reaction", "key": "a", "count": 1},
-                {"type": "m.reaction", "key": "b", "count": 1},
-            ],
-        )
-
     def test_reference(self) -> None:
-        """Annotations should ignore"""
+        """Aggregations should exclude reference relations from ignored users"""
         channel = self._send_relation(RelationTypes.REFERENCE, "m.room.test")
         allowed_event_ids = [channel.json_body["event_id"]]
 
@@ -1544,7 +1459,7 @@ class RelationIgnoredUserTestCase(BaseRelationsTestCase):
         )
 
     def test_thread(self) -> None:
-        """Annotations should ignore"""
+        """Aggregations should exclude thread releations from ignored users"""
         channel = self._send_relation(RelationTypes.THREAD, "m.room.test")
         allowed_event_ids = [channel.json_body["event_id"]]
 
@@ -1618,43 +1533,6 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
             for t in threads
         ]
 
-    def test_redact_relation_annotation(self) -> None:
-        """
-        Test that annotations of an event are properly handled after the
-        annotation is redacted.
-
-        The redacted relation should not be included in bundled aggregations or
-        the response to relations.
-        """
-        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a")
-        to_redact_event_id = channel.json_body["event_id"]
-
-        channel = self._send_relation(
-            RelationTypes.ANNOTATION, "m.reaction", "a", access_token=self.user2_token
-        )
-        unredacted_event_id = channel.json_body["event_id"]
-
-        # Both relations should exist.
-        event_ids = self._get_related_events()
-        relations = self._get_bundled_aggregations()
-        self.assertCountEqual(event_ids, [to_redact_event_id, unredacted_event_id])
-        self.assertEquals(
-            relations["m.annotation"],
-            {"chunk": [{"type": "m.reaction", "key": "a", "count": 2}]},
-        )
-
-        # Redact one of the reactions.
-        self._redact(to_redact_event_id)
-
-        # The unredacted relation should still exist.
-        event_ids = self._get_related_events()
-        relations = self._get_bundled_aggregations()
-        self.assertEquals(event_ids, [unredacted_event_id])
-        self.assertEquals(
-            relations["m.annotation"],
-            {"chunk": [{"type": "m.reaction", "key": "a", "count": 1}]},
-        )
-
     def test_redact_relation_thread(self) -> None:
         """
         Test that thread replies are properly handled after the thread reply redacted.
@@ -1775,14 +1653,14 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         is redacted.
         """
         # Add a relation
-        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="👍")
+        channel = self._send_relation(RelationTypes.REFERENCE, "org.matrix.test")
         related_event_id = channel.json_body["event_id"]
 
         # The relations should exist.
         event_ids = self._get_related_events()
         relations = self._get_bundled_aggregations()
         self.assertEqual(len(event_ids), 1)
-        self.assertIn(RelationTypes.ANNOTATION, relations)
+        self.assertIn(RelationTypes.REFERENCE, relations)
 
         # Redact the original event.
         self._redact(self.parent_id)
@@ -1792,8 +1670,8 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         relations = self._get_bundled_aggregations()
         self.assertEquals(event_ids, [related_event_id])
         self.assertEquals(
-            relations["m.annotation"],
-            {"chunk": [{"type": "m.reaction", "key": "👍", "count": 1}]},
+            relations[RelationTypes.REFERENCE],
+            {"chunk": [{"event_id": related_event_id}]},
         )
 
     def test_redact_parent_thread(self) -> None:
-- 
cgit 1.5.1


From 916b8061d20dc0902b7f2d42d994efc20300e9e7 Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@users.noreply.github.com>
Date: Thu, 2 Mar 2023 10:34:59 +0000
Subject: Implementation of MSC3967: Don't require UIA for initial upload of
 cross signing keys (#15077)

---
 changelog.d/15077.feature      |   1 +
 synapse/config/experimental.py |   3 +
 synapse/handlers/e2e_keys.py   |  14 ++++
 synapse/rest/client/keys.py    |  32 +++++++---
 tests/rest/client/test_keys.py | 141 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 182 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/15077.feature

(limited to 'synapse')

diff --git a/changelog.d/15077.feature b/changelog.d/15077.feature
new file mode 100644
index 0000000000..384e751056
--- /dev/null
+++ b/changelog.d/15077.feature
@@ -0,0 +1 @@
+Experimental support for MSC3967 to not require UIA for setting up cross-signing on first use.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index bc38fae0b6..7c81f055b6 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -194,3 +194,6 @@ class ExperimentalConfig(Config):
         self.msc3966_exact_event_property_contains = experimental.get(
             "msc3966_exact_event_property_contains", False
         )
+
+        # MSC3967: Do not require UIA when first uploading cross signing keys
+        self.msc3967_enabled = experimental.get("msc3967_enabled", False)
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 43cbece21b..4e9c8d8db0 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -1301,6 +1301,20 @@ class E2eKeysHandler:
 
         return desired_key_data
 
+    async def is_cross_signing_set_up_for_user(self, user_id: str) -> bool:
+        """Checks if the user has cross-signing set up
+
+        Args:
+            user_id: The user to check
+
+        Returns:
+            True if the user has cross-signing set up, False otherwise
+        """
+        existing_master_key = await self.store.get_e2e_cross_signing_key(
+            user_id, "master"
+        )
+        return existing_master_key is not None
+
 
 def _check_cross_signing_key(
     key: JsonDict, user_id: str, key_type: str, signing_key: Optional[VerifyKey] = None
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index 7873b363c0..32bb8b9a91 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -312,15 +312,29 @@ class SigningKeyUploadServlet(RestServlet):
         user_id = requester.user.to_string()
         body = parse_json_object_from_request(request)
 
-        await self.auth_handler.validate_user_via_ui_auth(
-            requester,
-            request,
-            body,
-            "add a device signing key to your account",
-            # Allow skipping of UI auth since this is frequently called directly
-            # after login and it is silly to ask users to re-auth immediately.
-            can_skip_ui_auth=True,
-        )
+        if self.hs.config.experimental.msc3967_enabled:
+            if await self.e2e_keys_handler.is_cross_signing_set_up_for_user(user_id):
+                # If we already have a master key then cross signing is set up and we require UIA to reset
+                await self.auth_handler.validate_user_via_ui_auth(
+                    requester,
+                    request,
+                    body,
+                    "reset the device signing key on your account",
+                    # Do not allow skipping of UIA auth.
+                    can_skip_ui_auth=False,
+                )
+            # Otherwise we don't require UIA since we are setting up cross signing for first time
+        else:
+            # Previous behaviour is to always require UIA but allow it to be skipped
+            await self.auth_handler.validate_user_via_ui_auth(
+                requester,
+                request,
+                body,
+                "add a device signing key to your account",
+                # Allow skipping of UI auth since this is frequently called directly
+                # after login and it is silly to ask users to re-auth immediately.
+                can_skip_ui_auth=True,
+            )
 
         result = await self.e2e_keys_handler.upload_signing_keys_for_user(user_id, body)
         return 200, result
diff --git a/tests/rest/client/test_keys.py b/tests/rest/client/test_keys.py
index 741fecea77..8ee5489057 100644
--- a/tests/rest/client/test_keys.py
+++ b/tests/rest/client/test_keys.py
@@ -14,12 +14,21 @@
 
 from http import HTTPStatus
 
+from signedjson.key import (
+    encode_verify_key_base64,
+    generate_signing_key,
+    get_verify_key,
+)
+from signedjson.sign import sign_json
+
 from synapse.api.errors import Codes
 from synapse.rest import admin
 from synapse.rest.client import keys, login
+from synapse.types import JsonDict
 
 from tests import unittest
 from tests.http.server._base import make_request_with_cancellation_test
+from tests.unittest import override_config
 
 
 class KeyQueryTestCase(unittest.HomeserverTestCase):
@@ -118,3 +127,135 @@ class KeyQueryTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(200, channel.code, msg=channel.result["body"])
         self.assertIn(bob, channel.json_body["device_keys"])
+
+    def make_device_keys(self, user_id: str, device_id: str) -> JsonDict:
+        # We only generate a master key to simplify the test.
+        master_signing_key = generate_signing_key(device_id)
+        master_verify_key = encode_verify_key_base64(get_verify_key(master_signing_key))
+
+        return {
+            "master_key": sign_json(
+                {
+                    "user_id": user_id,
+                    "usage": ["master"],
+                    "keys": {"ed25519:" + master_verify_key: master_verify_key},
+                },
+                user_id,
+                master_signing_key,
+            ),
+        }
+
+    def test_device_signing_with_uia(self) -> None:
+        """Device signing key upload requires UIA."""
+        password = "wonderland"
+        device_id = "ABCDEFGHI"
+        alice_id = self.register_user("alice", password)
+        alice_token = self.login("alice", password, device_id=device_id)
+
+        content = self.make_device_keys(alice_id, device_id)
+
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/keys/device_signing/upload",
+            content,
+            alice_token,
+        )
+
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
+        # Grab the session
+        session = channel.json_body["session"]
+        # Ensure that flows are what is expected.
+        self.assertIn({"stages": ["m.login.password"]}, channel.json_body["flows"])
+
+        # add UI auth
+        content["auth"] = {
+            "type": "m.login.password",
+            "identifier": {"type": "m.id.user", "user": alice_id},
+            "password": password,
+            "session": session,
+        }
+
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/keys/device_signing/upload",
+            content,
+            alice_token,
+        )
+
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
+
+    @override_config({"ui_auth": {"session_timeout": "15m"}})
+    def test_device_signing_with_uia_session_timeout(self) -> None:
+        """Device signing key upload requires UIA buy passes with grace period."""
+        password = "wonderland"
+        device_id = "ABCDEFGHI"
+        alice_id = self.register_user("alice", password)
+        alice_token = self.login("alice", password, device_id=device_id)
+
+        content = self.make_device_keys(alice_id, device_id)
+
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/keys/device_signing/upload",
+            content,
+            alice_token,
+        )
+
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
+
+    @override_config(
+        {
+            "experimental_features": {"msc3967_enabled": True},
+            "ui_auth": {"session_timeout": "15s"},
+        }
+    )
+    def test_device_signing_with_msc3967(self) -> None:
+        """Device signing key follows MSC3967 behaviour when enabled."""
+        password = "wonderland"
+        device_id = "ABCDEFGHI"
+        alice_id = self.register_user("alice", password)
+        alice_token = self.login("alice", password, device_id=device_id)
+
+        keys1 = self.make_device_keys(alice_id, device_id)
+
+        # Initial request should succeed as no existing keys are present.
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/keys/device_signing/upload",
+            keys1,
+            alice_token,
+        )
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
+
+        keys2 = self.make_device_keys(alice_id, device_id)
+
+        # Subsequent request should require UIA as keys already exist even though session_timeout is set.
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/keys/device_signing/upload",
+            keys2,
+            alice_token,
+        )
+        self.assertEqual(channel.code, HTTPStatus.UNAUTHORIZED, channel.result)
+
+        # Grab the session
+        session = channel.json_body["session"]
+        # Ensure that flows are what is expected.
+        self.assertIn({"stages": ["m.login.password"]}, channel.json_body["flows"])
+
+        # add UI auth
+        keys2["auth"] = {
+            "type": "m.login.password",
+            "identifier": {"type": "m.id.user", "user": alice_id},
+            "password": password,
+            "session": session,
+        }
+
+        # Request should complete
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/keys/device_signing/upload",
+            keys2,
+            alice_token,
+        )
+        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
-- 
cgit 1.5.1


From 65f10afb64127dc9412e24860c5e8a78f3dc9863 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Thu, 2 Mar 2023 11:38:46 +0100
Subject: Move event_reports to `RoomWorkerStore` (#15165)

---
 changelog.d/15165.misc                 |   1 +
 synapse/storage/databases/main/room.py | 354 ++++++++++++++++-----------------
 2 files changed, 178 insertions(+), 177 deletions(-)
 create mode 100644 changelog.d/15165.misc

(limited to 'synapse')

diff --git a/changelog.d/15165.misc b/changelog.d/15165.misc
new file mode 100644
index 0000000000..a75be84dac
--- /dev/null
+++ b/changelog.d/15165.misc
@@ -0,0 +1 @@
+Move `get_event_report` and `get_event_reports_paginate` from `RoomStore` to `RoomWorkerStore`.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index a2e9519cb6..3825bd6079 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1417,6 +1417,183 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             get_un_partial_stated_rooms_from_stream_txn,
         )
 
+    async def get_event_report(self, report_id: int) -> Optional[Dict[str, Any]]:
+        """Retrieve an event report
+
+        Args:
+            report_id: ID of reported event in database
+        Returns:
+            JSON dict of information from an event report or None if the
+            report does not exist.
+        """
+
+        def _get_event_report_txn(
+            txn: LoggingTransaction, report_id: int
+        ) -> Optional[Dict[str, Any]]:
+            sql = """
+                SELECT
+                    er.id,
+                    er.received_ts,
+                    er.room_id,
+                    er.event_id,
+                    er.user_id,
+                    er.content,
+                    events.sender,
+                    room_stats_state.canonical_alias,
+                    room_stats_state.name,
+                    event_json.json AS event_json
+                FROM event_reports AS er
+                LEFT JOIN events
+                    ON events.event_id = er.event_id
+                JOIN event_json
+                    ON event_json.event_id = er.event_id
+                JOIN room_stats_state
+                    ON room_stats_state.room_id = er.room_id
+                WHERE er.id = ?
+            """
+
+            txn.execute(sql, [report_id])
+            row = txn.fetchone()
+
+            if not row:
+                return None
+
+            event_report = {
+                "id": row[0],
+                "received_ts": row[1],
+                "room_id": row[2],
+                "event_id": row[3],
+                "user_id": row[4],
+                "score": db_to_json(row[5]).get("score"),
+                "reason": db_to_json(row[5]).get("reason"),
+                "sender": row[6],
+                "canonical_alias": row[7],
+                "name": row[8],
+                "event_json": db_to_json(row[9]),
+            }
+
+            return event_report
+
+        return await self.db_pool.runInteraction(
+            "get_event_report", _get_event_report_txn, report_id
+        )
+
+    async def get_event_reports_paginate(
+        self,
+        start: int,
+        limit: int,
+        direction: Direction = Direction.BACKWARDS,
+        user_id: Optional[str] = None,
+        room_id: Optional[str] = None,
+    ) -> Tuple[List[Dict[str, Any]], int]:
+        """Retrieve a paginated list of event reports
+
+        Args:
+            start: event offset to begin the query from
+            limit: number of rows to retrieve
+            direction: Whether to fetch the most recent first (backwards) or the
+                oldest first (forwards)
+            user_id: search for user_id. Ignored if user_id is None
+            room_id: search for room_id. Ignored if room_id is None
+        Returns:
+            Tuple of:
+                json list of event reports
+                total number of event reports matching the filter criteria
+        """
+
+        def _get_event_reports_paginate_txn(
+            txn: LoggingTransaction,
+        ) -> Tuple[List[Dict[str, Any]], int]:
+            filters = []
+            args: List[object] = []
+
+            if user_id:
+                filters.append("er.user_id LIKE ?")
+                args.extend(["%" + user_id + "%"])
+            if room_id:
+                filters.append("er.room_id LIKE ?")
+                args.extend(["%" + room_id + "%"])
+
+            if direction == Direction.BACKWARDS:
+                order = "DESC"
+            else:
+                order = "ASC"
+
+            where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
+
+            # We join on room_stats_state despite not using any columns from it
+            # because the join can influence the number of rows returned;
+            # e.g. a room that doesn't have state, maybe because it was deleted.
+            # The query returning the total count should be consistent with
+            # the query returning the results.
+            sql = """
+                SELECT COUNT(*) as total_event_reports
+                FROM event_reports AS er
+                JOIN room_stats_state ON room_stats_state.room_id = er.room_id
+                {}
+                """.format(
+                where_clause
+            )
+            txn.execute(sql, args)
+            count = cast(Tuple[int], txn.fetchone())[0]
+
+            sql = """
+                SELECT
+                    er.id,
+                    er.received_ts,
+                    er.room_id,
+                    er.event_id,
+                    er.user_id,
+                    er.content,
+                    events.sender,
+                    room_stats_state.canonical_alias,
+                    room_stats_state.name
+                FROM event_reports AS er
+                LEFT JOIN events
+                    ON events.event_id = er.event_id
+                JOIN room_stats_state
+                    ON room_stats_state.room_id = er.room_id
+                {where_clause}
+                ORDER BY er.received_ts {order}
+                LIMIT ?
+                OFFSET ?
+            """.format(
+                where_clause=where_clause,
+                order=order,
+            )
+
+            args += [limit, start]
+            txn.execute(sql, args)
+
+            event_reports = []
+            for row in txn:
+                try:
+                    s = db_to_json(row[5]).get("score")
+                    r = db_to_json(row[5]).get("reason")
+                except Exception:
+                    logger.error("Unable to parse json from event_reports: %s", row[0])
+                    continue
+                event_reports.append(
+                    {
+                        "id": row[0],
+                        "received_ts": row[1],
+                        "room_id": row[2],
+                        "event_id": row[3],
+                        "user_id": row[4],
+                        "score": s,
+                        "reason": r,
+                        "sender": row[6],
+                        "canonical_alias": row[7],
+                        "name": row[8],
+                    }
+                )
+
+            return event_reports, count
+
+        return await self.db_pool.runInteraction(
+            "get_event_reports_paginate", _get_event_reports_paginate_txn
+        )
+
     async def delete_event_report(self, report_id: int) -> bool:
         """Remove an event report from database.
 
@@ -2189,183 +2366,6 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
         )
         return next_id
 
-    async def get_event_report(self, report_id: int) -> Optional[Dict[str, Any]]:
-        """Retrieve an event report
-
-        Args:
-            report_id: ID of reported event in database
-        Returns:
-            JSON dict of information from an event report or None if the
-            report does not exist.
-        """
-
-        def _get_event_report_txn(
-            txn: LoggingTransaction, report_id: int
-        ) -> Optional[Dict[str, Any]]:
-            sql = """
-                SELECT
-                    er.id,
-                    er.received_ts,
-                    er.room_id,
-                    er.event_id,
-                    er.user_id,
-                    er.content,
-                    events.sender,
-                    room_stats_state.canonical_alias,
-                    room_stats_state.name,
-                    event_json.json AS event_json
-                FROM event_reports AS er
-                LEFT JOIN events
-                    ON events.event_id = er.event_id
-                JOIN event_json
-                    ON event_json.event_id = er.event_id
-                JOIN room_stats_state
-                    ON room_stats_state.room_id = er.room_id
-                WHERE er.id = ?
-            """
-
-            txn.execute(sql, [report_id])
-            row = txn.fetchone()
-
-            if not row:
-                return None
-
-            event_report = {
-                "id": row[0],
-                "received_ts": row[1],
-                "room_id": row[2],
-                "event_id": row[3],
-                "user_id": row[4],
-                "score": db_to_json(row[5]).get("score"),
-                "reason": db_to_json(row[5]).get("reason"),
-                "sender": row[6],
-                "canonical_alias": row[7],
-                "name": row[8],
-                "event_json": db_to_json(row[9]),
-            }
-
-            return event_report
-
-        return await self.db_pool.runInteraction(
-            "get_event_report", _get_event_report_txn, report_id
-        )
-
-    async def get_event_reports_paginate(
-        self,
-        start: int,
-        limit: int,
-        direction: Direction = Direction.BACKWARDS,
-        user_id: Optional[str] = None,
-        room_id: Optional[str] = None,
-    ) -> Tuple[List[Dict[str, Any]], int]:
-        """Retrieve a paginated list of event reports
-
-        Args:
-            start: event offset to begin the query from
-            limit: number of rows to retrieve
-            direction: Whether to fetch the most recent first (backwards) or the
-                oldest first (forwards)
-            user_id: search for user_id. Ignored if user_id is None
-            room_id: search for room_id. Ignored if room_id is None
-        Returns:
-            Tuple of:
-                json list of event reports
-                total number of event reports matching the filter criteria
-        """
-
-        def _get_event_reports_paginate_txn(
-            txn: LoggingTransaction,
-        ) -> Tuple[List[Dict[str, Any]], int]:
-            filters = []
-            args: List[object] = []
-
-            if user_id:
-                filters.append("er.user_id LIKE ?")
-                args.extend(["%" + user_id + "%"])
-            if room_id:
-                filters.append("er.room_id LIKE ?")
-                args.extend(["%" + room_id + "%"])
-
-            if direction == Direction.BACKWARDS:
-                order = "DESC"
-            else:
-                order = "ASC"
-
-            where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
-
-            # We join on room_stats_state despite not using any columns from it
-            # because the join can influence the number of rows returned;
-            # e.g. a room that doesn't have state, maybe because it was deleted.
-            # The query returning the total count should be consistent with
-            # the query returning the results.
-            sql = """
-                SELECT COUNT(*) as total_event_reports
-                FROM event_reports AS er
-                JOIN room_stats_state ON room_stats_state.room_id = er.room_id
-                {}
-                """.format(
-                where_clause
-            )
-            txn.execute(sql, args)
-            count = cast(Tuple[int], txn.fetchone())[0]
-
-            sql = """
-                SELECT
-                    er.id,
-                    er.received_ts,
-                    er.room_id,
-                    er.event_id,
-                    er.user_id,
-                    er.content,
-                    events.sender,
-                    room_stats_state.canonical_alias,
-                    room_stats_state.name
-                FROM event_reports AS er
-                LEFT JOIN events
-                    ON events.event_id = er.event_id
-                JOIN room_stats_state
-                    ON room_stats_state.room_id = er.room_id
-                {where_clause}
-                ORDER BY er.received_ts {order}
-                LIMIT ?
-                OFFSET ?
-            """.format(
-                where_clause=where_clause,
-                order=order,
-            )
-
-            args += [limit, start]
-            txn.execute(sql, args)
-
-            event_reports = []
-            for row in txn:
-                try:
-                    s = db_to_json(row[5]).get("score")
-                    r = db_to_json(row[5]).get("reason")
-                except Exception:
-                    logger.error("Unable to parse json from event_reports: %s", row[0])
-                    continue
-                event_reports.append(
-                    {
-                        "id": row[0],
-                        "received_ts": row[1],
-                        "room_id": row[2],
-                        "event_id": row[3],
-                        "user_id": row[4],
-                        "score": s,
-                        "reason": r,
-                        "sender": row[6],
-                        "canonical_alias": row[7],
-                        "name": row[8],
-                    }
-                )
-
-            return event_reports, count
-
-        return await self.db_pool.runInteraction(
-            "get_event_reports_paginate", _get_event_reports_paginate_txn
-        )
-
     async def block_room(self, room_id: str, user_id: str) -> None:
         """Marks the room as blocked.
 
-- 
cgit 1.5.1


From 33a85cf08ccf3713599a168ae1ed10d35ada2009 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 2 Mar 2023 07:24:29 -0500
Subject: Fix conflicting URLs for dehydrated devices. (#15180)

---
 changelog.d/15180.bugfix       | 1 +
 synapse/rest/client/devices.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15180.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15180.bugfix b/changelog.d/15180.bugfix
new file mode 100644
index 0000000000..e7a3dcd41a
--- /dev/null
+++ b/changelog.d/15180.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.78.0 where requests to claim dehydrated devices would fail with a `405` error.
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 486c6dbbc5..dab4a77f7e 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -255,7 +255,7 @@ class DehydratedDeviceServlet(RestServlet):
 
     """
 
-    PATTERNS = client_patterns("/org.matrix.msc2697.v2/dehydrated_device", releases=())
+    PATTERNS = client_patterns("/org.matrix.msc2697.v2/dehydrated_device$", releases=())
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
-- 
cgit 1.5.1


From 8ef324ea6f1390876940989eacc8734fe0d15582 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 2 Mar 2023 08:30:51 -0500
Subject: Update intentional mentions (MSC3952) to depend on
 `exact_event_property_contains` (MSC3966). (#15051)

This replaces the specific `is_user_mention` push rule condition
used in MSC3952 with the generic `exact_event_property_contains`
push rule condition from MSC3966.
---
 changelog.d/15051.misc                      |  1 +
 rust/benches/evaluator.rs                   |  4 ---
 rust/src/push/base_rules.rs                 |  9 ++++--
 rust/src/push/evaluator.rs                  | 50 ++++++++++++++++-------------
 rust/src/push/mod.rs                        | 28 ++++++++--------
 stubs/synapse/synapse_rust/push.pyi         |  3 +-
 synapse/config/experimental.py              |  8 ++++-
 synapse/push/bulk_push_rule_evaluator.py    | 18 +++--------
 synapse/push/clientformat.py                | 11 ++++---
 tests/push/test_bulk_push_rule_evaluator.py |  2 ++
 tests/push/test_push_rule_evaluator.py      | 33 ++-----------------
 11 files changed, 73 insertions(+), 94 deletions(-)
 create mode 100644 changelog.d/15051.misc

(limited to 'synapse')

diff --git a/changelog.d/15051.misc b/changelog.d/15051.misc
new file mode 100644
index 0000000000..fabfe77d35
--- /dev/null
+++ b/changelog.d/15051.misc
@@ -0,0 +1 @@
+Update [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952) support based on changes to the MSC.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 9a871f5693..7c987d4948 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -44,7 +44,6 @@ fn bench_match_exact(b: &mut Bencher) {
     let eval = PushRuleEvaluator::py_new(
         flattened_keys,
         false,
-        BTreeSet::new(),
         10,
         Some(0),
         Default::default(),
@@ -92,7 +91,6 @@ fn bench_match_word(b: &mut Bencher) {
     let eval = PushRuleEvaluator::py_new(
         flattened_keys,
         false,
-        BTreeSet::new(),
         10,
         Some(0),
         Default::default(),
@@ -140,7 +138,6 @@ fn bench_match_word_miss(b: &mut Bencher) {
     let eval = PushRuleEvaluator::py_new(
         flattened_keys,
         false,
-        BTreeSet::new(),
         10,
         Some(0),
         Default::default(),
@@ -188,7 +185,6 @@ fn bench_eval_message(b: &mut Bencher) {
     let eval = PushRuleEvaluator::py_new(
         flattened_keys,
         false,
-        BTreeSet::new(),
         10,
         Some(0),
         Default::default(),
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 62de51d915..3d72a4a4c3 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -21,13 +21,13 @@ use lazy_static::lazy_static;
 use serde_json::Value;
 
 use super::KnownCondition;
-use crate::push::PushRule;
 use crate::push::RelatedEventMatchTypeCondition;
 use crate::push::SetTweak;
 use crate::push::TweakValue;
 use crate::push::{Action, ExactEventMatchCondition, SimpleJsonValue};
 use crate::push::{Condition, EventMatchTypeCondition};
 use crate::push::{EventMatchCondition, EventMatchPatternType};
+use crate::push::{ExactEventMatchTypeCondition, PushRule};
 
 const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak {
     set_tweak: Cow::Borrowed("highlight"),
@@ -144,7 +144,12 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
     PushRule {
         rule_id: Cow::Borrowed(".org.matrix.msc3952.is_user_mention"),
         priority_class: 5,
-        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::IsUserMention)]),
+        conditions: Cow::Borrowed(&[Condition::Known(
+            KnownCondition::ExactEventPropertyContainsType(ExactEventMatchTypeCondition {
+                key: Cow::Borrowed("content.org.matrix.msc3952.mentions.user_ids"),
+                value_type: Cow::Borrowed(&EventMatchPatternType::UserId),
+            }),
+        )]),
         actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]),
         default: true,
         default_enabled: true,
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index a65c645caf..55846627cc 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 use std::borrow::Cow;
-use std::collections::{BTreeMap, BTreeSet};
+use std::collections::BTreeMap;
 
 use crate::push::{EventMatchPatternType, JsonValue};
 use anyhow::{Context, Error};
@@ -72,8 +72,6 @@ pub struct PushRuleEvaluator {
 
     /// True if the event has a mentions property and MSC3952 support is enabled.
     has_mentions: bool,
-    /// The user mentions that were part of the message.
-    user_mentions: BTreeSet<String>,
 
     /// The number of users in the room.
     room_member_count: u64,
@@ -114,7 +112,6 @@ impl PushRuleEvaluator {
     pub fn py_new(
         flattened_keys: BTreeMap<String, JsonValue>,
         has_mentions: bool,
-        user_mentions: BTreeSet<String>,
         room_member_count: u64,
         sender_power_level: Option<i64>,
         notification_power_levels: BTreeMap<String, i64>,
@@ -134,7 +131,6 @@ impl PushRuleEvaluator {
             flattened_keys,
             body,
             has_mentions,
-            user_mentions,
             room_member_count,
             notification_power_levels,
             sender_power_level,
@@ -310,15 +306,30 @@ impl PushRuleEvaluator {
                     Some(Cow::Borrowed(pattern)),
                 )?
             }
-            KnownCondition::ExactEventPropertyContains(exact_event_match) => {
-                self.match_exact_event_property_contains(exact_event_match)?
-            }
-            KnownCondition::IsUserMention => {
-                if let Some(uid) = user_id {
-                    self.user_mentions.contains(uid)
+            KnownCondition::ExactEventPropertyContains(exact_event_match) => self
+                .match_exact_event_property_contains(
+                    exact_event_match.key.clone(),
+                    exact_event_match.value.clone(),
+                )?,
+            KnownCondition::ExactEventPropertyContainsType(exact_event_match) => {
+                // The `pattern_type` can either be "user_id" or "user_localpart",
+                // either way if we don't have a `user_id` then the condition can't
+                // match.
+                let user_id = if let Some(user_id) = user_id {
+                    user_id
                 } else {
-                    false
-                }
+                    return Ok(false);
+                };
+
+                let pattern = match &*exact_event_match.value_type {
+                    EventMatchPatternType::UserId => user_id,
+                    EventMatchPatternType::UserLocalpart => get_localpart_from_id(user_id)?,
+                };
+
+                self.match_exact_event_property_contains(
+                    exact_event_match.key.clone(),
+                    Cow::Borrowed(&SimpleJsonValue::Str(pattern.to_string())),
+                )?
             }
             KnownCondition::ContainsDisplayName => {
                 if let Some(dn) = display_name {
@@ -456,24 +467,21 @@ impl PushRuleEvaluator {
     /// Evaluates a `exact_event_property_contains` condition. (MSC3758)
     fn match_exact_event_property_contains(
         &self,
-        exact_event_match: &ExactEventMatchCondition,
+        key: Cow<str>,
+        value: Cow<SimpleJsonValue>,
     ) -> Result<bool, Error> {
         // First check if the feature is enabled.
         if !self.msc3966_exact_event_property_contains {
             return Ok(false);
         }
 
-        let value = &exact_event_match.value;
-
-        let haystack = if let Some(JsonValue::Array(haystack)) =
-            self.flattened_keys.get(&*exact_event_match.key)
-        {
+        let haystack = if let Some(JsonValue::Array(haystack)) = self.flattened_keys.get(&*key) {
             haystack
         } else {
             return Ok(false);
         };
 
-        Ok(haystack.contains(&**value))
+        Ok(haystack.contains(&value))
     }
 
     /// Match the member count against an 'is' condition
@@ -510,7 +518,6 @@ fn push_rule_evaluator() {
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
         false,
-        BTreeSet::new(),
         10,
         Some(0),
         BTreeMap::new(),
@@ -542,7 +549,6 @@ fn test_requires_room_version_supports_condition() {
     let evaluator = PushRuleEvaluator::py_new(
         flattened_keys,
         false,
-        BTreeSet::new(),
         10,
         Some(0),
         BTreeMap::new(),
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 97feb6efc9..6391d2ed47 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -340,8 +340,12 @@ pub enum KnownCondition {
     RelatedEventMatchType(RelatedEventMatchTypeCondition),
     #[serde(rename = "org.matrix.msc3966.exact_event_property_contains")]
     ExactEventPropertyContains(ExactEventMatchCondition),
-    #[serde(rename = "org.matrix.msc3952.is_user_mention")]
-    IsUserMention,
+    // Identical to exact_event_property_contains but gives predefined patterns. Cannot be added by users.
+    #[serde(
+        skip_deserializing,
+        rename = "org.matrix.msc3966.exact_event_property_contains"
+    )]
+    ExactEventPropertyContainsType(ExactEventMatchTypeCondition),
     ContainsDisplayName,
     RoomMemberCount {
         #[serde(skip_serializing_if = "Option::is_none")]
@@ -398,6 +402,15 @@ pub struct ExactEventMatchCondition {
     pub value: Cow<'static, SimpleJsonValue>,
 }
 
+/// The body of a [`Condition::ExactEventMatch`] that uses user_id or user_localpart as a pattern.
+#[derive(Serialize, Debug, Clone)]
+pub struct ExactEventMatchTypeCondition {
+    pub key: Cow<'static, str>,
+    // During serialization, the pattern_type property gets replaced with a
+    // pattern property of the correct value in synapse.push.clientformat.format_push_rules_for_user.
+    pub value_type: Cow<'static, EventMatchPatternType>,
+}
+
 /// The body of a [`Condition::RelatedEventMatch`]
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct RelatedEventMatchCondition {
@@ -739,17 +752,6 @@ fn test_deserialize_unstable_msc3758_condition() {
     ));
 }
 
-#[test]
-fn test_deserialize_unstable_msc3952_user_condition() {
-    let json = r#"{"kind":"org.matrix.msc3952.is_user_mention"}"#;
-
-    let condition: Condition = serde_json::from_str(json).unwrap();
-    assert!(matches!(
-        condition,
-        Condition::Known(KnownCondition::IsUserMention)
-    ));
-}
-
 #[test]
 fn test_deserialize_custom_condition() {
     let json = r#"{"kind":"custom_tag"}"#;
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index a8f0ed2435..c17796ffbd 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Tuple, Union
 
 from synapse.types import JsonDict, JsonValue
 
@@ -58,7 +58,6 @@ class PushRuleEvaluator:
         self,
         flattened_keys: Mapping[str, JsonValue],
         has_mentions: bool,
-        user_mentions: Set[str],
         room_member_count: int,
         sender_power_level: Optional[int],
         notification_power_levels: Mapping[str, int],
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 7c81f055b6..fc64f2bda1 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -179,10 +179,16 @@ class ExperimentalConfig(Config):
             "msc3873_escape_event_match_key", False
         )
 
-        # MSC3952: Intentional mentions, this depends on MSC3758.
+        # MSC3966: exact_event_property_contains push rule condition.
+        self.msc3966_exact_event_property_contains = experimental.get(
+            "msc3966_exact_event_property_contains", False
+        )
+
+        # MSC3952: Intentional mentions, this depends on MSC3758 and MSC3966.
         self.msc3952_intentional_mentions = (
             experimental.get("msc3952_intentional_mentions", False)
             and self.msc3758_exact_event_match
+            and self.msc3966_exact_event_property_contains
         )
 
         # MSC3959: Do not generate notifications for edits.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 3c4a152d6b..abcf687f05 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -23,7 +23,6 @@ from typing import (
     Mapping,
     Optional,
     Sequence,
-    Set,
     Tuple,
     Union,
 )
@@ -396,18 +395,10 @@ class BulkPushRuleEvaluator:
                         del notification_levels[key]
 
         # Pull out any user and room mentions.
-        mentions = event.content.get(EventContentFields.MSC3952_MENTIONS)
-        has_mentions = self._intentional_mentions_enabled and isinstance(mentions, dict)
-        user_mentions: Set[str] = set()
-        if has_mentions:
-            # mypy seems to have lost the type even though it must be a dict here.
-            assert isinstance(mentions, dict)
-            # Remove out any non-string items and convert to a set.
-            user_mentions_raw = mentions.get("user_ids")
-            if isinstance(user_mentions_raw, list):
-                user_mentions = set(
-                    filter(lambda item: isinstance(item, str), user_mentions_raw)
-                )
+        has_mentions = (
+            self._intentional_mentions_enabled
+            and EventContentFields.MSC3952_MENTIONS in event.content
+        )
 
         evaluator = PushRuleEvaluator(
             _flatten_dict(
@@ -415,7 +406,6 @@ class BulkPushRuleEvaluator:
                 msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key,
             ),
             has_mentions,
-            user_mentions,
             room_member_count,
             sender_power_level,
             notification_levels,
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index bb76c169c6..222afbdcc8 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -41,11 +41,12 @@ def format_push_rules_for_user(
 
         rulearray.append(template_rule)
 
-        pattern_type = template_rule.pop("pattern_type", None)
-        if pattern_type == "user_id":
-            template_rule["pattern"] = user.to_string()
-        elif pattern_type == "user_localpart":
-            template_rule["pattern"] = user.localpart
+        for type_key in ("pattern", "value"):
+            type_value = template_rule.pop(f"{type_key}_type", None)
+            if type_value == "user_id":
+                template_rule[type_key] = user.to_string()
+            elif type_value == "user_localpart":
+                template_rule[type_key] = user.localpart
 
         template_rule["enabled"] = enabled
 
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 1458076a90..73fecfd4ad 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -233,6 +233,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
             "experimental_features": {
                 "msc3758_exact_event_match": True,
                 "msc3952_intentional_mentions": True,
+                "msc3966_exact_event_property_contains": True,
             }
         }
     )
@@ -336,6 +337,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
             "experimental_features": {
                 "msc3758_exact_event_match": True,
                 "msc3952_intentional_mentions": True,
+                "msc3966_exact_event_property_contains": True,
             }
         }
     )
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 1d30e3c3e4..d4a4bc4d93 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Dict, List, Optional, Set, Union, cast
+from typing import Any, Dict, List, Optional, Union, cast
 
 import frozendict
 
@@ -147,8 +147,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         self,
         content: JsonMapping,
         *,
-        has_mentions: bool = False,
-        user_mentions: Optional[Set[str]] = None,
         related_events: Optional[JsonDict] = None,
     ) -> PushRuleEvaluator:
         event = FrozenEvent(
@@ -167,8 +165,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         power_levels: Dict[str, Union[int, Dict[str, int]]] = {}
         return PushRuleEvaluator(
             _flatten_dict(event),
-            has_mentions,
-            user_mentions or set(),
+            False,
             room_member_count,
             sender_power_level,
             cast(Dict[str, int], power_levels.get("notifications", {})),
@@ -204,32 +201,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         # A display name with spaces should work fine.
         self.assertTrue(evaluator.matches(condition, "@user:test", "foo bar"))
 
-    def test_user_mentions(self) -> None:
-        """Check for user mentions."""
-        condition = {"kind": "org.matrix.msc3952.is_user_mention"}
-
-        # No mentions shouldn't match.
-        evaluator = self._get_evaluator({}, has_mentions=True)
-        self.assertFalse(evaluator.matches(condition, "@user:test", None))
-
-        # An empty set shouldn't match
-        evaluator = self._get_evaluator({}, has_mentions=True, user_mentions=set())
-        self.assertFalse(evaluator.matches(condition, "@user:test", None))
-
-        # The Matrix ID appearing anywhere in the mentions list should match
-        evaluator = self._get_evaluator(
-            {}, has_mentions=True, user_mentions={"@user:test"}
-        )
-        self.assertTrue(evaluator.matches(condition, "@user:test", None))
-
-        evaluator = self._get_evaluator(
-            {}, has_mentions=True, user_mentions={"@another:test", "@user:test"}
-        )
-        self.assertTrue(evaluator.matches(condition, "@user:test", None))
-
-        # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions
-        # since the BulkPushRuleEvaluator is what handles data sanitisation.
-
     def _assert_matches(
         self, condition: JsonDict, content: JsonMapping, msg: Optional[str] = None
     ) -> None:
-- 
cgit 1.5.1


From c8665dd25d18fa7d7176984cee191834002909a0 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Thu, 2 Mar 2023 18:16:54 +0100
Subject: Remove the unspecced and bugged PUT /knock/{roomIdOrAlias} endpoint
 (#15189)

---
 changelog.d/15189.misc       |  1 +
 synapse/rest/client/knock.py | 16 +---------------
 2 files changed, 2 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/15189.misc

(limited to 'synapse')

diff --git a/changelog.d/15189.misc b/changelog.d/15189.misc
new file mode 100644
index 0000000000..ded2feb79e
--- /dev/null
+++ b/changelog.d/15189.misc
@@ -0,0 +1 @@
+Remove the unspecced `PUT` on the `/knock/{roomIdOrAlias}` endpoint.
diff --git a/synapse/rest/client/knock.py b/synapse/rest/client/knock.py
index ad025c8a45..10975224c0 100644
--- a/synapse/rest/client/knock.py
+++ b/synapse/rest/client/knock.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Dict, List, Tuple
 
 from synapse.api.constants import Membership
 from synapse.api.errors import SynapseError
@@ -24,8 +24,6 @@ from synapse.http.servlet import (
     parse_strings_from_args,
 )
 from synapse.http.site import SynapseRequest
-from synapse.logging.opentracing import set_tag
-from synapse.rest.client.transactions import HttpTransactionCache
 from synapse.types import JsonDict, RoomAlias, RoomID
 
 if TYPE_CHECKING:
@@ -45,7 +43,6 @@ class KnockRoomAliasServlet(RestServlet):
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
-        self.txns = HttpTransactionCache(hs)
         self.room_member_handler = hs.get_room_member_handler()
         self.auth = hs.get_auth()
 
@@ -53,7 +50,6 @@ class KnockRoomAliasServlet(RestServlet):
         self,
         request: SynapseRequest,
         room_identifier: str,
-        txn_id: Optional[str] = None,
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
@@ -86,7 +82,6 @@ class KnockRoomAliasServlet(RestServlet):
             target=requester.user,
             room_id=room_id,
             action=Membership.KNOCK,
-            txn_id=txn_id,
             third_party_signed=None,
             remote_room_hosts=remote_room_hosts,
             content=event_content,
@@ -94,15 +89,6 @@ class KnockRoomAliasServlet(RestServlet):
 
         return 200, {"room_id": room_id}
 
-    def on_PUT(
-        self, request: SynapseRequest, room_identifier: str, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
-        set_tag("txn_id", txn_id)
-
-        return self.txns.fetch_or_execute_request(
-            request, self.on_POST, request, room_identifier, txn_id
-        )
-
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     KnockRoomAliasServlet(hs).register(http_server)
-- 
cgit 1.5.1


From ecbe0ddbe7c47e05bc27b39dc10a9c30eafd2960 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Thu, 2 Mar 2023 18:59:53 +0100
Subject: Add support for knocking to workers. (#15133)

---
 changelog.d/15133.feature              |  1 +
 docker/configure_workers_and_start.py  |  1 +
 docs/workers.md                        |  1 +
 synapse/handlers/room_member.py        |  4 +++-
 synapse/handlers/room_member_worker.py |  4 +++-
 synapse/replication/http/membership.py | 15 ++++-----------
 synapse/rest/__init__.py               |  2 +-
 synapse/rest/client/knock.py           |  1 -
 synapse/rest/client/room.py            |  2 +-
 9 files changed, 15 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/15133.feature

(limited to 'synapse')

diff --git a/changelog.d/15133.feature b/changelog.d/15133.feature
new file mode 100644
index 0000000000..e0af0d4554
--- /dev/null
+++ b/changelog.d/15133.feature
@@ -0,0 +1 @@
+Add support for knocking to workers.
\ No newline at end of file
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 81368069ec..add8bb1ff6 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -205,6 +205,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/send",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/join/",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/knock/",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/profile/",
             "^/_matrix/client/(v1|unstable/org.matrix.msc2716)/rooms/.*/batch_send",
         ],
diff --git a/docs/workers.md b/docs/workers.md
index 35a96f12a9..fa536cd310 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -252,6 +252,7 @@ information.
     ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state/
     ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$
     ^/_matrix/client/(api/v1|r0|v3|unstable)/join/
+    ^/_matrix/client/(api/v1|r0|v3|unstable)/knock/
     ^/_matrix/client/(api/v1|r0|v3|unstable)/profile/
 
     # Account data requests
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index de7476f300..509c557889 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -207,6 +207,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
     @abc.abstractmethod
     async def remote_knock(
         self,
+        requester: Requester,
         remote_room_hosts: List[str],
         room_id: str,
         user: UserID,
@@ -1073,7 +1074,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     )
 
                 return await self.remote_knock(
-                    remote_room_hosts, room_id, target, content
+                    requester, remote_room_hosts, room_id, target, content
                 )
 
         return await self._local_membership_update(
@@ -1984,6 +1985,7 @@ class RoomMemberMasterHandler(RoomMemberHandler):
 
     async def remote_knock(
         self,
+        requester: Requester,
         remote_room_hosts: List[str],
         room_id: str,
         user: UserID,
diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py
index ba261702d4..76e36b8a6d 100644
--- a/synapse/handlers/room_member_worker.py
+++ b/synapse/handlers/room_member_worker.py
@@ -113,6 +113,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler):
 
     async def remote_knock(
         self,
+        requester: Requester,
         remote_room_hosts: List[str],
         room_id: str,
         user: UserID,
@@ -123,9 +124,10 @@ class RoomMemberWorkerHandler(RoomMemberHandler):
         Implements RoomMemberHandler.remote_knock
         """
         ret = await self._remote_knock_client(
+            requester=requester,
             remote_room_hosts=remote_room_hosts,
             room_id=room_id,
-            user=user,
+            user_id=user.to_string(),
             content=content,
         )
         return ret["event_id"], ret["stream_id"]
diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py
index 9fa1060d48..67b01db67e 100644
--- a/synapse/replication/http/membership.py
+++ b/synapse/replication/http/membership.py
@@ -142,17 +142,12 @@ class ReplicationRemoteKnockRestServlet(ReplicationEndpoint):
         }
 
     async def _handle_request(  # type: ignore[override]
-        self,
-        request: SynapseRequest,
-        content: JsonDict,
-        room_id: str,
-        user_id: str,
+        self, request: SynapseRequest, content: JsonDict, room_id: str, user_id: str
     ) -> Tuple[int, JsonDict]:
         remote_room_hosts = content["remote_room_hosts"]
         event_content = content["content"]
 
         requester = Requester.deserialize(self.store, content["requester"])
-
         request.requester = requester
 
         logger.debug("remote_knock: %s on room: %s", user_id, room_id)
@@ -277,16 +272,12 @@ class ReplicationRemoteRescindKnockRestServlet(ReplicationEndpoint):
         }
 
     async def _handle_request(  # type: ignore[override]
-        self,
-        request: SynapseRequest,
-        content: JsonDict,
-        knock_event_id: str,
+        self, request: SynapseRequest, content: JsonDict, knock_event_id: str
     ) -> Tuple[int, JsonDict]:
         txn_id = content["txn_id"]
         event_content = content["content"]
 
         requester = Requester.deserialize(self.store, content["requester"])
-
         request.requester = requester
 
         # hopefully we're now on the master, so this won't recurse!
@@ -363,3 +354,5 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ReplicationRemoteJoinRestServlet(hs).register(http_server)
     ReplicationRemoteRejectInviteRestServlet(hs).register(http_server)
     ReplicationUserJoinedLeftRoomRestServlet(hs).register(http_server)
+    ReplicationRemoteKnockRestServlet(hs).register(http_server)
+    ReplicationRemoteRescindKnockRestServlet(hs).register(http_server)
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index c327f15043..2e19e055d3 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -139,7 +139,7 @@ class ClientRestResource(JsonResource):
         relations.register_servlets(hs, client_resource)
         if is_main_process:
             password_policy.register_servlets(hs, client_resource)
-            knock.register_servlets(hs, client_resource)
+        knock.register_servlets(hs, client_resource)
 
         # moving to /_synapse/admin
         if is_main_process:
diff --git a/synapse/rest/client/knock.py b/synapse/rest/client/knock.py
index 10975224c0..4fa66904ba 100644
--- a/synapse/rest/client/knock.py
+++ b/synapse/rest/client/knock.py
@@ -63,7 +63,6 @@ class KnockRoomAliasServlet(RestServlet):
 
             # twisted.web.server.Request.args is incorrectly defined as Optional[Any]
             args: Dict[bytes, List[bytes]] = request.args  # type: ignore
-
             remote_room_hosts = parse_strings_from_args(
                 args, "server_name", required=False
             )
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 14b04810a1..45aee3d3fe 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -926,7 +926,7 @@ class RoomMembershipRestServlet(TransactionRestServlet):
         self.auth = hs.get_auth()
 
     def register(self, http_server: HttpServer) -> None:
-        # /rooms/$roomid/[invite|join|leave]
+        # /rooms/$roomid/[join|invite|leave|ban|unban|kick]
         PATTERNS = (
             "/rooms/(?P<room_id>[^/]*)/"
             "(?P<membership_action>join|invite|leave|ban|unban|kick)"
-- 
cgit 1.5.1


From 1eea662780a6325af0a61ceb447b4c91a2d3ac98 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 2 Mar 2023 18:27:00 +0000
Subject: Add a `get_next_txn` method to `StreamIdGenerator` to match
 `MultiWriterIdGenerator` (#15191

---
 changelog.d/15191.misc                         |  1 +
 synapse/storage/databases/main/account_data.py | 11 ++-----
 synapse/storage/util/id_generators.py          | 45 +++++++++++++++++++++++++-
 synapse/storage/util/sequence.py               |  2 +-
 4 files changed, 48 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/15191.misc

(limited to 'synapse')

diff --git a/changelog.d/15191.misc b/changelog.d/15191.misc
new file mode 100644
index 0000000000..579f76d451
--- /dev/null
+++ b/changelog.d/15191.misc
@@ -0,0 +1 @@
+Add a `get_next_txn` method to `StreamIdGenerator` to match `MultiWriterIdGenerator`.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 308d19440f..2d2ba74347 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -40,7 +40,6 @@ from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.util.id_generators import (
     AbstractStreamIdGenerator,
-    AbstractStreamIdTracker,
     MultiWriterIdGenerator,
     StreamIdGenerator,
 )
@@ -64,14 +63,12 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     ):
         super().__init__(database, db_conn, hs)
 
-        # `_can_write_to_account_data` indicates whether the current worker is allowed
-        # to write account data. A value of `True` implies that `_account_data_id_gen`
-        # is an `AbstractStreamIdGenerator` and not just a tracker.
-        self._account_data_id_gen: AbstractStreamIdTracker
         self._can_write_to_account_data = (
             self._instance_name in hs.config.worker.writers.account_data
         )
 
+        self._account_data_id_gen: AbstractStreamIdGenerator
+
         if isinstance(database.engine, PostgresEngine):
             self._account_data_id_gen = MultiWriterIdGenerator(
                 db_conn=db_conn,
@@ -558,7 +555,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             The maximum stream ID.
         """
         assert self._can_write_to_account_data
-        assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator)
 
         content_json = json_encoder.encode(content)
 
@@ -598,7 +594,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             data to delete.
         """
         assert self._can_write_to_account_data
-        assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator)
 
         def _remove_account_data_for_room_txn(
             txn: LoggingTransaction, next_id: int
@@ -663,7 +658,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             The maximum stream ID.
         """
         assert self._can_write_to_account_data
-        assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator)
 
         async with self._account_data_id_gen.get_next() as next_id:
             await self.db_pool.runInteraction(
@@ -770,7 +764,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             to delete.
         """
         assert self._can_write_to_account_data
-        assert isinstance(self._account_data_id_gen, AbstractStreamIdGenerator)
 
         def _remove_account_data_for_user_txn(
             txn: LoggingTransaction, next_id: int
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 9adff3f4f5..334d3d718b 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -158,6 +158,15 @@ class AbstractStreamIdGenerator(AbstractStreamIdTracker):
         """
         raise NotImplementedError()
 
+    @abc.abstractmethod
+    def get_next_txn(self, txn: LoggingTransaction) -> int:
+        """
+        Usage:
+            stream_id_gen.get_next_txn(txn)
+            # ... persist events ...
+        """
+        raise NotImplementedError()
+
 
 class StreamIdGenerator(AbstractStreamIdGenerator):
     """Generates and tracks stream IDs for a stream with a single writer.
@@ -263,6 +272,40 @@ class StreamIdGenerator(AbstractStreamIdGenerator):
 
         return _AsyncCtxManagerWrapper(manager())
 
+    def get_next_txn(self, txn: LoggingTransaction) -> int:
+        """
+        Retrieve the next stream ID from within a database transaction.
+
+        Clean-up functions will be called when the transaction finishes.
+
+        Args:
+            txn: The database transaction object.
+
+        Returns:
+            The next stream ID.
+        """
+        if not self._is_writer:
+            raise Exception("Tried to allocate stream ID on non-writer")
+
+        # Get the next stream ID.
+        with self._lock:
+            self._current += self._step
+            next_id = self._current
+
+            self._unfinished_ids[next_id] = next_id
+
+        def clear_unfinished_id(id_to_clear: int) -> None:
+            """A function to mark processing this ID as finished"""
+            with self._lock:
+                self._unfinished_ids.pop(id_to_clear)
+
+        # Mark this ID as finished once the database transaction itself finishes.
+        txn.call_after(clear_unfinished_id, next_id)
+        txn.call_on_exception(clear_unfinished_id, next_id)
+
+        # Return the new ID.
+        return next_id
+
     def get_current_token(self) -> int:
         if not self._is_writer:
             return self._current
@@ -568,7 +611,7 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator):
         """
         Usage:
 
-            stream_id = stream_id_gen.get_next(txn)
+            stream_id = stream_id_gen.get_next_txn(txn)
             # ... persist event ...
         """
 
diff --git a/synapse/storage/util/sequence.py b/synapse/storage/util/sequence.py
index 75268cbe15..80915216de 100644
--- a/synapse/storage/util/sequence.py
+++ b/synapse/storage/util/sequence.py
@@ -205,7 +205,7 @@ class LocalSequenceGenerator(SequenceGenerator):
         """
         Args:
             get_first_callback: a callback which is called on the first call to
-                 get_next_id_txn; should return the curreent maximum id
+                 get_next_id_txn; should return the current maximum id
         """
         # the callback. this is cleared after it is called, so that it can be GCed.
         self._callback: Optional[GetFirstCallbackType] = get_first_callback
-- 
cgit 1.5.1


From 15e975f68fc354843a0647e53f285696e86de89b Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 3 Mar 2023 10:51:57 +0000
Subject: Experimental MSC3890 Implementation: Fix deleting account data when
 using an account data writer worker (#14869)

---
 changelog.d/14869.bugfix                       |  1 +
 synapse/handlers/account_data.py               |  7 ------
 synapse/storage/databases/main/account_data.py | 34 ++++++++++++--------------
 3 files changed, 16 insertions(+), 26 deletions(-)
 create mode 100644 changelog.d/14869.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14869.bugfix b/changelog.d/14869.bugfix
new file mode 100644
index 0000000000..865b597741
--- /dev/null
+++ b/changelog.d/14869.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in v1.75.0rc1 that caused experimental support for deleting account data to raise an internal server error while using an account data writer worker.
\ No newline at end of file
diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py
index 797de46dbc..7e01c18c6c 100644
--- a/synapse/handlers/account_data.py
+++ b/synapse/handlers/account_data.py
@@ -155,9 +155,6 @@ class AccountDataHandler:
             max_stream_id = await self._store.remove_account_data_for_room(
                 user_id, room_id, account_data_type
             )
-            if max_stream_id is None:
-                # The referenced account data did not exist, so no delete occurred.
-                return None
 
             self._notifier.on_new_event(
                 StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id]
@@ -230,9 +227,6 @@ class AccountDataHandler:
             max_stream_id = await self._store.remove_account_data_for_user(
                 user_id, account_data_type
             )
-            if max_stream_id is None:
-                # The referenced account data did not exist, so no delete occurred.
-                return None
 
             self._notifier.on_new_event(
                 StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id]
@@ -248,7 +242,6 @@ class AccountDataHandler:
                 instance_name=random.choice(self._account_data_writers),
                 user_id=user_id,
                 account_data_type=account_data_type,
-                content={},
             )
             return response["max_stream_id"]
 
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 2d2ba74347..a9843f6e17 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -581,7 +581,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
 
     async def remove_account_data_for_room(
         self, user_id: str, room_id: str, account_data_type: str
-    ) -> Optional[int]:
+    ) -> int:
         """Delete the room account data for the user of a given type.
 
         Args:
@@ -632,15 +632,13 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                 next_id,
             )
 
-            if not row_updated:
-                return None
-
-            self._account_data_stream_cache.entity_has_changed(user_id, next_id)
-            self.get_room_account_data_for_user.invalidate((user_id,))
-            self.get_account_data_for_room.invalidate((user_id, room_id))
-            self.get_account_data_for_room_and_type.prefill(
-                (user_id, room_id, account_data_type), {}
-            )
+            if row_updated:
+                self._account_data_stream_cache.entity_has_changed(user_id, next_id)
+                self.get_room_account_data_for_user.invalidate((user_id,))
+                self.get_account_data_for_room.invalidate((user_id, room_id))
+                self.get_account_data_for_room_and_type.prefill(
+                    (user_id, room_id, account_data_type), {}
+                )
 
         return self._account_data_id_gen.get_current_token()
 
@@ -747,7 +745,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
         self,
         user_id: str,
         account_data_type: str,
-    ) -> Optional[int]:
+    ) -> int:
         """
         Delete a single piece of user account data by type.
 
@@ -833,14 +831,12 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                 next_id,
             )
 
-            if not row_updated:
-                return None
-
-            self._account_data_stream_cache.entity_has_changed(user_id, next_id)
-            self.get_global_account_data_for_user.invalidate((user_id,))
-            self.get_global_account_data_by_type_for_user.prefill(
-                (user_id, account_data_type), {}
-            )
+            if row_updated:
+                self._account_data_stream_cache.entity_has_changed(user_id, next_id)
+                self.get_global_account_data_for_user.invalidate((user_id,))
+                self.get_global_account_data_by_type_for_user.prefill(
+                    (user_id, account_data_type), {}
+                )
 
         return self._account_data_id_gen.get_current_token()
 
-- 
cgit 1.5.1


From 848f7e3d5ff38ca28e56e6143e584974da1eec42 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 3 Mar 2023 13:22:49 +0100
Subject: Remove unspecced and buggy `PUT` method on the unstable
 `/rooms/<room_id>/batch_send` endpoint. (#15199)

---
 changelog.d/15199.misc            |  1 +
 synapse/rest/client/room_batch.py | 16 +---------------
 2 files changed, 2 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/15199.misc

(limited to 'synapse')

diff --git a/changelog.d/15199.misc b/changelog.d/15199.misc
new file mode 100644
index 0000000000..145b03fe16
--- /dev/null
+++ b/changelog.d/15199.misc
@@ -0,0 +1 @@
+Remove unspecced and buggy `PUT` method on the unstable `/rooms/<room_id>/batch_send` endpoint.
diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py
index 10be4a781b..ef284ecc11 100644
--- a/synapse/rest/client/room_batch.py
+++ b/synapse/rest/client/room_batch.py
@@ -15,9 +15,7 @@
 import logging
 import re
 from http import HTTPStatus
-from typing import TYPE_CHECKING, Awaitable, Tuple
-
-from twisted.web.server import Request
+from typing import TYPE_CHECKING, Tuple
 
 from synapse.api.constants import EventContentFields
 from synapse.api.errors import AuthError, Codes, SynapseError
@@ -30,7 +28,6 @@ from synapse.http.servlet import (
     parse_strings_from_args,
 )
 from synapse.http.site import SynapseRequest
-from synapse.rest.client.transactions import HttpTransactionCache
 from synapse.types import JsonDict
 
 if TYPE_CHECKING:
@@ -79,7 +76,6 @@ class RoomBatchSendEventRestServlet(RestServlet):
         self.event_creation_handler = hs.get_event_creation_handler()
         self.auth = hs.get_auth()
         self.room_batch_handler = hs.get_room_batch_handler()
-        self.txns = HttpTransactionCache(hs)
 
     async def on_POST(
         self, request: SynapseRequest, room_id: str
@@ -249,16 +245,6 @@ class RoomBatchSendEventRestServlet(RestServlet):
 
         return HTTPStatus.OK, response_dict
 
-    def on_GET(self, request: Request, room_id: str) -> Tuple[int, str]:
-        return HTTPStatus.NOT_IMPLEMENTED, "Not implemented"
-
-    def on_PUT(
-        self, request: SynapseRequest, room_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
-        return self.txns.fetch_or_execute_request(
-            request, self.on_POST, request, room_id
-        )
-
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     msc2716_enabled = hs.config.experimental.msc2716_enabled
-- 
cgit 1.5.1


From 02f74f3a997a4356b5bda957ebc51a829dad15f9 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 3 Mar 2023 08:13:37 -0500
Subject: Combine AbstractStreamIdTracker and AbstractStreamIdGenerator.
 (#15192)

AbstractStreamIdTracker (now) has only a single sub-class: AbstractStreamIdGenerator,
combine them to simplify some code and remove any direct references to
AbstractStreamIdTracker.
---
 changelog.d/15192.misc                          |  1 +
 synapse/storage/databases/main/devices.py       |  7 ++-----
 synapse/storage/databases/main/events_worker.py |  5 ++---
 synapse/storage/databases/main/push_rule.py     |  3 +--
 synapse/storage/databases/main/pusher.py        |  3 +--
 synapse/storage/databases/main/receipts.py      |  6 +++---
 synapse/storage/util/id_generators.py           | 17 +++++------------
 7 files changed, 15 insertions(+), 27 deletions(-)
 create mode 100644 changelog.d/15192.misc

(limited to 'synapse')

diff --git a/changelog.d/15192.misc b/changelog.d/15192.misc
new file mode 100644
index 0000000000..1076686875
--- /dev/null
+++ b/changelog.d/15192.misc
@@ -0,0 +1 @@
+Combine `AbstractStreamIdTracker` and `AbstractStreamIdGenerator`.
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 0dd15f16ff..5503621ad6 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -52,7 +52,6 @@ from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
 from synapse.storage.types import Cursor
 from synapse.storage.util.id_generators import (
     AbstractStreamIdGenerator,
-    AbstractStreamIdTracker,
     StreamIdGenerator,
 )
 from synapse.types import JsonDict, StrCollection, get_verify_key_from_cross_signing_key
@@ -91,7 +90,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
 
         # In the worker store this is an ID tracker which we overwrite in the non-worker
         # class below that is used on the main process.
-        self._device_list_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+        self._device_list_id_gen = StreamIdGenerator(
             db_conn,
             hs.get_replication_notifier(),
             "device_lists_stream",
@@ -712,9 +711,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             The new stream ID.
         """
 
-        # TODO: this looks like it's _writing_. Should this be on DeviceStore rather
-        #  than DeviceWorkerStore?
-        async with self._device_list_id_gen.get_next() as stream_id:  # type: ignore[attr-defined]
+        async with self._device_list_id_gen.get_next() as stream_id:
             await self.db_pool.runInteraction(
                 "add_user_sig_change_to_streams",
                 self._add_user_signature_change_txn,
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index b7e7498125..20b7a68362 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -72,7 +72,6 @@ from synapse.storage.engines import PostgresEngine
 from synapse.storage.types import Cursor
 from synapse.storage.util.id_generators import (
     AbstractStreamIdGenerator,
-    AbstractStreamIdTracker,
     MultiWriterIdGenerator,
     StreamIdGenerator,
 )
@@ -187,8 +186,8 @@ class EventsWorkerStore(SQLBaseStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        self._stream_id_gen: AbstractStreamIdTracker
-        self._backfill_id_gen: AbstractStreamIdTracker
+        self._stream_id_gen: AbstractStreamIdGenerator
+        self._backfill_id_gen: AbstractStreamIdGenerator
         if isinstance(database.engine, PostgresEngine):
             # If we're using Postgres than we can use `MultiWriterIdGenerator`
             # regardless of whether this process writes to the streams or not.
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 9b2bbe060d..9f862f00c1 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -46,7 +46,6 @@ from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException
 from synapse.storage.util.id_generators import (
     AbstractStreamIdGenerator,
-    AbstractStreamIdTracker,
     IdGenerator,
     StreamIdGenerator,
 )
@@ -118,7 +117,7 @@ class PushRulesWorkerStore(
 
         # In the worker store this is an ID tracker which we overwrite in the non-worker
         # class below that is used on the main process.
-        self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+        self._push_rules_stream_id_gen = StreamIdGenerator(
             db_conn,
             hs.get_replication_notifier(),
             "push_rules_stream",
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index fddbc07afa..9a24f7a655 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -36,7 +36,6 @@ from synapse.storage.database import (
 )
 from synapse.storage.util.id_generators import (
     AbstractStreamIdGenerator,
-    AbstractStreamIdTracker,
     StreamIdGenerator,
 )
 from synapse.types import JsonDict
@@ -60,7 +59,7 @@ class PusherWorkerStore(SQLBaseStore):
 
         # In the worker store this is an ID tracker which we overwrite in the non-worker
         # class below that is used on the main process.
-        self._pushers_id_gen: AbstractStreamIdTracker = StreamIdGenerator(
+        self._pushers_id_gen = StreamIdGenerator(
             db_conn,
             hs.get_replication_notifier(),
             "pushers",
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 92a82240ab..074942b167 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -39,7 +39,7 @@ from synapse.storage.database import (
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.engines._base import IsolationLevel
 from synapse.storage.util.id_generators import (
-    AbstractStreamIdTracker,
+    AbstractStreamIdGenerator,
     MultiWriterIdGenerator,
     StreamIdGenerator,
 )
@@ -65,7 +65,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
         # In the worker store this is an ID tracker which we overwrite in the non-worker
         # class below that is used on the main process.
-        self._receipts_id_gen: AbstractStreamIdTracker
+        self._receipts_id_gen: AbstractStreamIdGenerator
 
         if isinstance(database.engine, PostgresEngine):
             self._can_write_to_receipts = (
@@ -768,7 +768,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
                 "insert_receipt_conv", self._graph_to_linear, room_id, event_ids
             )
 
-        async with self._receipts_id_gen.get_next() as stream_id:  # type: ignore[attr-defined]
+        async with self._receipts_id_gen.get_next() as stream_id:
             event_ts = await self.db_pool.runInteraction(
                 "insert_linearized_receipt",
                 self._insert_linearized_receipt_txn,
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 334d3d718b..d2c874b9a8 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -93,8 +93,11 @@ def _load_current_id(
     return res
 
 
-class AbstractStreamIdTracker(metaclass=abc.ABCMeta):
-    """Tracks the "current" stream ID of a stream that may have multiple writers.
+class AbstractStreamIdGenerator(metaclass=abc.ABCMeta):
+    """Generates or tracks stream IDs for a stream that may have multiple writers.
+
+    Each stream ID represents a write transaction, whose completion is tracked
+    so that the "current" stream ID of the stream can be determined.
 
     Stream IDs are monotonically increasing or decreasing integers representing write
     transactions. The "current" stream ID is the stream ID such that all transactions
@@ -130,16 +133,6 @@ class AbstractStreamIdTracker(metaclass=abc.ABCMeta):
         """
         raise NotImplementedError()
 
-
-class AbstractStreamIdGenerator(AbstractStreamIdTracker):
-    """Generates stream IDs for a stream that may have multiple writers.
-
-    Each stream ID represents a write transaction, whose completion is tracked
-    so that the "current" stream ID of the stream can be determined.
-
-    See `AbstractStreamIdTracker` for more details.
-    """
-
     @abc.abstractmethod
     def get_next(self) -> AsyncContextManager[int]:
         """
-- 
cgit 1.5.1


From fd9cadcf532ce0dbd005541fe635b214aa6d2438 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 6 Mar 2023 08:38:01 -0500
Subject: Stabilize support for MSC3758: event_property_is push condition
 (#15185)

This removes the configuration flag & updates the identifiers to
use the stable version.
---
 changelog.d/15185.feature                   |  1 +
 rust/benches/evaluator.rs                   |  4 ----
 rust/src/push/base_rules.rs                 |  8 +++----
 rust/src/push/evaluator.rs                  | 36 ++++++++++-------------------
 rust/src/push/mod.rs                        | 36 +++++++++++++----------------
 stubs/synapse/synapse_rust/push.pyi         |  1 -
 synapse/config/experimental.py              |  8 +------
 synapse/push/bulk_push_rule_evaluator.py    |  1 -
 tests/push/test_bulk_push_rule_evaluator.py |  2 --
 tests/push/test_push_rule_evaluator.py      | 23 ++++--------------
 10 files changed, 39 insertions(+), 81 deletions(-)
 create mode 100644 changelog.d/15185.feature

(limited to 'synapse')

diff --git a/changelog.d/15185.feature b/changelog.d/15185.feature
new file mode 100644
index 0000000000..901900bdec
--- /dev/null
+++ b/changelog.d/15185.feature
@@ -0,0 +1 @@
+Stabilise support for [MSC3758](https://github.com/matrix-org/matrix-spec-proposals/pull/3758): `event_property_is` push condition.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 44477e63f7..79b553dbb0 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -53,7 +53,6 @@ fn bench_match_exact(b: &mut Bencher) {
         vec![],
         false,
         false,
-        false,
     )
     .unwrap();
 
@@ -100,7 +99,6 @@ fn bench_match_word(b: &mut Bencher) {
         vec![],
         false,
         false,
-        false,
     )
     .unwrap();
 
@@ -147,7 +145,6 @@ fn bench_match_word_miss(b: &mut Bencher) {
         vec![],
         false,
         false,
-        false,
     )
     .unwrap();
 
@@ -194,7 +191,6 @@ fn bench_eval_message(b: &mut Bencher) {
         vec![],
         false,
         false,
-        false,
     )
     .unwrap();
 
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 3d72a4a4c3..ec8d96656a 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -24,10 +24,10 @@ use super::KnownCondition;
 use crate::push::RelatedEventMatchTypeCondition;
 use crate::push::SetTweak;
 use crate::push::TweakValue;
-use crate::push::{Action, ExactEventMatchCondition, SimpleJsonValue};
+use crate::push::{Action, EventPropertyIsCondition, SimpleJsonValue};
 use crate::push::{Condition, EventMatchTypeCondition};
 use crate::push::{EventMatchCondition, EventMatchPatternType};
-use crate::push::{ExactEventMatchTypeCondition, PushRule};
+use crate::push::{EventPropertyIsTypeCondition, PushRule};
 
 const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak {
     set_tweak: Cow::Borrowed("highlight"),
@@ -145,7 +145,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         rule_id: Cow::Borrowed(".org.matrix.msc3952.is_user_mention"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[Condition::Known(
-            KnownCondition::ExactEventPropertyContainsType(ExactEventMatchTypeCondition {
+            KnownCondition::ExactEventPropertyContainsType(EventPropertyIsTypeCondition {
                 key: Cow::Borrowed("content.org.matrix.msc3952.mentions.user_ids"),
                 value_type: Cow::Borrowed(&EventMatchPatternType::UserId),
             }),
@@ -166,7 +166,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         rule_id: Cow::Borrowed(".org.matrix.msc3952.is_room_mention"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[
-            Condition::Known(KnownCondition::ExactEventMatch(ExactEventMatchCondition {
+            Condition::Known(KnownCondition::EventPropertyIs(EventPropertyIsCondition {
                 key: Cow::Borrowed("content.org.matrix.msc3952.mentions.room"),
                 value: Cow::Borrowed(&SimpleJsonValue::Bool(true)),
             })),
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 1c2a05ad9a..67fe6a4823 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -23,7 +23,7 @@ use regex::Regex;
 
 use super::{
     utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType},
-    Action, Condition, ExactEventMatchCondition, FilteredPushRules, KnownCondition,
+    Action, Condition, EventPropertyIsCondition, FilteredPushRules, KnownCondition,
     SimpleJsonValue,
 };
 use crate::push::{EventMatchPatternType, JsonValue};
@@ -97,9 +97,6 @@ pub struct PushRuleEvaluator {
     /// flag as MSC1767 (extensible events core).
     msc3931_enabled: bool,
 
-    /// If MSC3758 (exact_event_match push rule condition) is enabled.
-    msc3758_exact_event_match: bool,
-
     /// If MSC3966 (exact_event_property_contains push rule condition) is enabled.
     msc3966_exact_event_property_contains: bool,
 }
@@ -119,7 +116,6 @@ impl PushRuleEvaluator {
         related_event_match_enabled: bool,
         room_version_feature_flags: Vec<String>,
         msc3931_enabled: bool,
-        msc3758_exact_event_match: bool,
         msc3966_exact_event_property_contains: bool,
     ) -> Result<Self, Error> {
         let body = match flattened_keys.get("content.body") {
@@ -138,7 +134,6 @@ impl PushRuleEvaluator {
             related_event_match_enabled,
             room_version_feature_flags,
             msc3931_enabled,
-            msc3758_exact_event_match,
             msc3966_exact_event_property_contains,
         })
     }
@@ -275,8 +270,8 @@ impl PushRuleEvaluator {
 
                 self.match_event_match(&self.flattened_keys, &event_match.key, pattern)?
             }
-            KnownCondition::ExactEventMatch(exact_event_match) => {
-                self.match_exact_event_match(exact_event_match)?
+            KnownCondition::EventPropertyIs(event_property_is) => {
+                self.match_event_property_is(event_property_is)?
             }
             KnownCondition::RelatedEventMatch(event_match) => self.match_related_event_match(
                 &event_match.rel_type.clone(),
@@ -306,10 +301,10 @@ impl PushRuleEvaluator {
                     Some(Cow::Borrowed(pattern)),
                 )?
             }
-            KnownCondition::ExactEventPropertyContains(exact_event_match) => self
+            KnownCondition::ExactEventPropertyContains(event_property_is) => self
                 .match_exact_event_property_contains(
-                    exact_event_match.key.clone(),
-                    exact_event_match.value.clone(),
+                    event_property_is.key.clone(),
+                    event_property_is.value.clone(),
                 )?,
             KnownCondition::ExactEventPropertyContainsType(exact_event_match) => {
                 // The `pattern_type` can either be "user_id" or "user_localpart",
@@ -405,20 +400,15 @@ impl PushRuleEvaluator {
         compiled_pattern.is_match(haystack)
     }
 
-    /// Evaluates a `exact_event_match` condition. (MSC3758)
-    fn match_exact_event_match(
+    /// Evaluates a `event_property_is` condition.
+    fn match_event_property_is(
         &self,
-        exact_event_match: &ExactEventMatchCondition,
+        event_property_is: &EventPropertyIsCondition,
     ) -> Result<bool, Error> {
-        // First check if the feature is enabled.
-        if !self.msc3758_exact_event_match {
-            return Ok(false);
-        }
-
-        let value = &exact_event_match.value;
+        let value = &event_property_is.value;
 
         let haystack = if let Some(JsonValue::Value(haystack)) =
-            self.flattened_keys.get(&*exact_event_match.key)
+            self.flattened_keys.get(&*event_property_is.key)
         {
             haystack
         } else {
@@ -464,7 +454,7 @@ impl PushRuleEvaluator {
         }
     }
 
-    /// Evaluates a `exact_event_property_contains` condition. (MSC3758)
+    /// Evaluates a `exact_event_property_contains` condition. (MSC3966)
     fn match_exact_event_property_contains(
         &self,
         key: Cow<str>,
@@ -526,7 +516,6 @@ fn push_rule_evaluator() {
         vec![],
         true,
         true,
-        true,
     )
     .unwrap();
 
@@ -557,7 +546,6 @@ fn test_requires_room_version_supports_condition() {
         flags,
         true,
         true,
-        true,
     )
     .unwrap();
 
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 6391d2ed47..7fde88e825 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -331,21 +331,20 @@ pub enum KnownCondition {
     // Identical to event_match but gives predefined patterns. Cannot be added by users.
     #[serde(skip_deserializing, rename = "event_match")]
     EventMatchType(EventMatchTypeCondition),
-    #[serde(rename = "com.beeper.msc3758.exact_event_match")]
-    ExactEventMatch(ExactEventMatchCondition),
+    EventPropertyIs(EventPropertyIsCondition),
     #[serde(rename = "im.nheko.msc3664.related_event_match")]
     RelatedEventMatch(RelatedEventMatchCondition),
     // Identical to related_event_match but gives predefined patterns. Cannot be added by users.
     #[serde(skip_deserializing, rename = "im.nheko.msc3664.related_event_match")]
     RelatedEventMatchType(RelatedEventMatchTypeCondition),
     #[serde(rename = "org.matrix.msc3966.exact_event_property_contains")]
-    ExactEventPropertyContains(ExactEventMatchCondition),
+    ExactEventPropertyContains(EventPropertyIsCondition),
     // Identical to exact_event_property_contains but gives predefined patterns. Cannot be added by users.
     #[serde(
         skip_deserializing,
         rename = "org.matrix.msc3966.exact_event_property_contains"
     )]
-    ExactEventPropertyContainsType(ExactEventMatchTypeCondition),
+    ExactEventPropertyContainsType(EventPropertyIsTypeCondition),
     ContainsDisplayName,
     RoomMemberCount {
         #[serde(skip_serializing_if = "Option::is_none")]
@@ -395,16 +394,16 @@ pub struct EventMatchTypeCondition {
     pub pattern_type: Cow<'static, EventMatchPatternType>,
 }
 
-/// The body of a [`Condition::ExactEventMatch`]
+/// The body of a [`Condition::EventPropertyIs`]
 #[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct ExactEventMatchCondition {
+pub struct EventPropertyIsCondition {
     pub key: Cow<'static, str>,
     pub value: Cow<'static, SimpleJsonValue>,
 }
 
-/// The body of a [`Condition::ExactEventMatch`] that uses user_id or user_localpart as a pattern.
+/// The body of a [`Condition::EventPropertyIs`] that uses user_id or user_localpart as a pattern.
 #[derive(Serialize, Debug, Clone)]
-pub struct ExactEventMatchTypeCondition {
+pub struct EventPropertyIsTypeCondition {
     pub key: Cow<'static, str>,
     // During serialization, the pattern_type property gets replaced with a
     // pattern property of the correct value in synapse.push.clientformat.format_push_rules_for_user.
@@ -711,44 +710,41 @@ fn test_deserialize_unstable_msc3931_condition() {
 }
 
 #[test]
-fn test_deserialize_unstable_msc3758_condition() {
+fn test_deserialize_event_property_is_condition() {
     // A string condition should work.
-    let json =
-        r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":"foo"}"#;
+    let json = r#"{"kind":"event_property_is","key":"content.value","value":"foo"}"#;
 
     let condition: Condition = serde_json::from_str(json).unwrap();
     assert!(matches!(
         condition,
-        Condition::Known(KnownCondition::ExactEventMatch(_))
+        Condition::Known(KnownCondition::EventPropertyIs(_))
     ));
 
     // A boolean condition should work.
-    let json =
-        r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":true}"#;
+    let json = r#"{"kind":"event_property_is","key":"content.value","value":true}"#;
 
     let condition: Condition = serde_json::from_str(json).unwrap();
     assert!(matches!(
         condition,
-        Condition::Known(KnownCondition::ExactEventMatch(_))
+        Condition::Known(KnownCondition::EventPropertyIs(_))
     ));
 
     // An integer condition should work.
-    let json = r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":1}"#;
+    let json = r#"{"kind":"event_property_is","key":"content.value","value":1}"#;
 
     let condition: Condition = serde_json::from_str(json).unwrap();
     assert!(matches!(
         condition,
-        Condition::Known(KnownCondition::ExactEventMatch(_))
+        Condition::Known(KnownCondition::EventPropertyIs(_))
     ));
 
     // A null condition should work
-    let json =
-        r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":null}"#;
+    let json = r#"{"kind":"event_property_is","key":"content.value","value":null}"#;
 
     let condition: Condition = serde_json::from_str(json).unwrap();
     assert!(matches!(
         condition,
-        Condition::Known(KnownCondition::ExactEventMatch(_))
+        Condition::Known(KnownCondition::EventPropertyIs(_))
     ));
 }
 
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index c17796ffbd..c040944aac 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -65,7 +65,6 @@ class PushRuleEvaluator:
         related_event_match_enabled: bool,
         room_version_feature_flags: Tuple[str, ...],
         msc3931_enabled: bool,
-        msc3758_exact_event_match: bool,
         msc3966_exact_event_property_contains: bool,
     ): ...
     def run(
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index fc64f2bda1..9c58cee2c8 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -169,11 +169,6 @@ class ExperimentalConfig(Config):
         # MSC3925: do not replace events with their edits
         self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False)
 
-        # MSC3758: exact_event_match push rule condition
-        self.msc3758_exact_event_match = experimental.get(
-            "msc3758_exact_event_match", False
-        )
-
         # MSC3873: Disambiguate event_match keys.
         self.msc3873_escape_event_match_key = experimental.get(
             "msc3873_escape_event_match_key", False
@@ -184,10 +179,9 @@ class ExperimentalConfig(Config):
             "msc3966_exact_event_property_contains", False
         )
 
-        # MSC3952: Intentional mentions, this depends on MSC3758 and MSC3966.
+        # MSC3952: Intentional mentions, this depends on MSC3966.
         self.msc3952_intentional_mentions = (
             experimental.get("msc3952_intentional_mentions", False)
-            and self.msc3758_exact_event_match
             and self.msc3966_exact_event_property_contains
         )
 
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index abcf687f05..ba12b6d79a 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -413,7 +413,6 @@ class BulkPushRuleEvaluator:
             self._related_event_match_enabled,
             event.room_version.msc3931_push_features,
             self.hs.config.experimental.msc1767_enabled,  # MSC3931 flag
-            self.hs.config.experimental.msc3758_exact_event_match,
             self.hs.config.experimental.msc3966_exact_event_property_contains,
         )
 
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 73fecfd4ad..c6591c50de 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -231,7 +231,6 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
     @override_config(
         {
             "experimental_features": {
-                "msc3758_exact_event_match": True,
                 "msc3952_intentional_mentions": True,
                 "msc3966_exact_event_property_contains": True,
             }
@@ -335,7 +334,6 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
     @override_config(
         {
             "experimental_features": {
-                "msc3758_exact_event_match": True,
                 "msc3952_intentional_mentions": True,
                 "msc3966_exact_event_property_contains": True,
             }
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index d4a4bc4d93..ff5a9a66f5 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -173,7 +173,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             related_event_match_enabled=True,
             room_version_feature_flags=event.room_version.msc3931_push_features,
             msc3931_enabled=True,
-            msc3758_exact_event_match=True,
             msc3966_exact_event_property_contains=True,
         )
 
@@ -404,7 +403,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
 
         # Test against a string value.
         condition = {
-            "kind": "com.beeper.msc3758.exact_event_match",
+            "kind": "event_property_is",
             "key": "content.value",
             "value": "foobaz",
         }
@@ -442,11 +441,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         """Check that exact_event_match conditions work as expected for booleans."""
 
         # Test against a True boolean value.
-        condition = {
-            "kind": "com.beeper.msc3758.exact_event_match",
-            "key": "content.value",
-            "value": True,
-        }
+        condition = {"kind": "event_property_is", "key": "content.value", "value": True}
         self._assert_matches(
             condition,
             {"value": True},
@@ -466,7 +461,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
 
         # Test against a False boolean value.
         condition = {
-            "kind": "com.beeper.msc3758.exact_event_match",
+            "kind": "event_property_is",
             "key": "content.value",
             "value": False,
         }
@@ -491,11 +486,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
     def test_exact_event_match_null(self) -> None:
         """Check that exact_event_match conditions work as expected for null."""
 
-        condition = {
-            "kind": "com.beeper.msc3758.exact_event_match",
-            "key": "content.value",
-            "value": None,
-        }
+        condition = {"kind": "event_property_is", "key": "content.value", "value": None}
         self._assert_matches(
             condition,
             {"value": None},
@@ -511,11 +502,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
     def test_exact_event_match_integer(self) -> None:
         """Check that exact_event_match conditions work as expected for integers."""
 
-        condition = {
-            "kind": "com.beeper.msc3758.exact_event_match",
-            "key": "content.value",
-            "value": 1,
-        }
+        condition = {"kind": "event_property_is", "key": "content.value", "value": 1}
         self._assert_matches(
             condition,
             {"value": 1},
-- 
cgit 1.5.1


From 05e0a4089a013979e5d0642f6a0f1d22ad865ee1 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 6 Mar 2023 09:43:01 -0500
Subject: Stop applying edits to event contents (MSC3925). (#15193)

Enables MSC3925 support by default, which:

* Includes the full edit event in the bundled aggregations of an
  edited event.
* Stops modifying the original event's content to return the new
  content from the edit event.

This is a backwards-incompatible change that is considered to be
"correct" by the spec.
---
 changelog.d/15193.bugfix            |  1 +
 synapse/config/experimental.py      |  3 --
 synapse/events/utils.py             | 57 ++---------------------------------
 synapse/rest/client/room.py         |  2 +-
 synapse/server.py                   |  2 +-
 tests/rest/client/test_relations.py | 59 +++++++------------------------------
 6 files changed, 15 insertions(+), 109 deletions(-)
 create mode 100644 changelog.d/15193.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15193.bugfix b/changelog.d/15193.bugfix
new file mode 100644
index 0000000000..ca781e9631
--- /dev/null
+++ b/changelog.d/15193.bugfix
@@ -0,0 +1 @@
+Stop applying edits when bundling aggregations, per [MSC3925](https://github.com/matrix-org/matrix-spec-proposals/pull/3925).
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 9c58cee2c8..489f2601ac 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -166,9 +166,6 @@ class ExperimentalConfig(Config):
         # MSC3391: Removing account data.
         self.msc3391_enabled = experimental.get("msc3391_enabled", False)
 
-        # MSC3925: do not replace events with their edits
-        self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False)
-
         # MSC3873: Disambiguate event_match keys.
         self.msc3873_escape_event_match_key = experimental.get(
             "msc3873_escape_event_match_key", False
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index eaa6cad4af..45f46949a1 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -39,7 +39,6 @@ from synapse.api.constants import (
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersion
 from synapse.types import JsonDict
-from synapse.util.frozenutils import unfreeze
 
 from . import EventBase
 
@@ -403,14 +402,6 @@ class EventClientSerializer:
     clients.
     """
 
-    def __init__(self, inhibit_replacement_via_edits: bool = False):
-        """
-        Args:
-            inhibit_replacement_via_edits: If this is set to True, then events are
-               never replaced by their edits.
-        """
-        self._inhibit_replacement_via_edits = inhibit_replacement_via_edits
-
     def serialize_event(
         self,
         event: Union[JsonDict, EventBase],
@@ -418,7 +409,6 @@ class EventClientSerializer:
         *,
         config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG,
         bundle_aggregations: Optional[Dict[str, "BundledAggregations"]] = None,
-        apply_edits: bool = True,
     ) -> JsonDict:
         """Serializes a single event.
 
@@ -428,10 +418,7 @@ class EventClientSerializer:
             config: Event serialization config
             bundle_aggregations: A map from event_id to the aggregations to be bundled
                into the event.
-            apply_edits: Whether the content of the event should be modified to reflect
-               any replacement in `bundle_aggregations[<event_id>].replace`.
-               See also the `inhibit_replacement_via_edits` constructor arg: if that is
-               set to True, then this argument is ignored.
+
         Returns:
             The serialized event
         """
@@ -450,38 +437,10 @@ class EventClientSerializer:
                     config,
                     bundle_aggregations,
                     serialized_event,
-                    apply_edits=apply_edits,
                 )
 
         return serialized_event
 
-    def _apply_edit(
-        self, orig_event: EventBase, serialized_event: JsonDict, edit: EventBase
-    ) -> None:
-        """Replace the content, preserving existing relations of the serialized event.
-
-        Args:
-            orig_event: The original event.
-            serialized_event: The original event, serialized. This is modified.
-            edit: The event which edits the above.
-        """
-
-        # Ensure we take copies of the edit content, otherwise we risk modifying
-        # the original event.
-        edit_content = edit.content.copy()
-
-        # Unfreeze the event content if necessary, so that we may modify it below
-        edit_content = unfreeze(edit_content)
-        serialized_event["content"] = edit_content.get("m.new_content", {})
-
-        # Check for existing relations
-        relates_to = orig_event.content.get("m.relates_to")
-        if relates_to:
-            # Keep the relations, ensuring we use a dict copy of the original
-            serialized_event["content"]["m.relates_to"] = relates_to.copy()
-        else:
-            serialized_event["content"].pop("m.relates_to", None)
-
     def _inject_bundled_aggregations(
         self,
         event: EventBase,
@@ -489,7 +448,6 @@ class EventClientSerializer:
         config: SerializeEventConfig,
         bundled_aggregations: Dict[str, "BundledAggregations"],
         serialized_event: JsonDict,
-        apply_edits: bool,
     ) -> None:
         """Potentially injects bundled aggregations into the unsigned portion of the serialized event.
 
@@ -504,9 +462,6 @@ class EventClientSerializer:
                 While serializing the bundled aggregations this map may be searched
                 again for additional events in a recursive manner.
             serialized_event: The serialized event which may be modified.
-            apply_edits: Whether the content of the event should be modified to reflect
-               any replacement in `aggregations.replace` (subject to the
-               `inhibit_replacement_via_edits` constructor arg).
         """
 
         # We have already checked that aggregations exist for this event.
@@ -522,11 +477,6 @@ class EventClientSerializer:
             ] = event_aggregations.references
 
         if event_aggregations.replace:
-            # If there is an edit, optionally apply it to the event.
-            edit = event_aggregations.replace
-            if apply_edits and not self._inhibit_replacement_via_edits:
-                self._apply_edit(event, serialized_event, edit)
-
             # Include information about it in the relations dict.
             #
             # Matrix spec v1.5 (https://spec.matrix.org/v1.5/client-server-api/#server-side-aggregation-of-mreplace-relationships)
@@ -534,10 +484,7 @@ class EventClientSerializer:
             # `sender` of the edit; however MSC3925 proposes extending it to the whole
             # of the edit, which is what we do here.
             serialized_aggregations[RelationTypes.REPLACE] = self.serialize_event(
-                edit,
-                time_now,
-                config=config,
-                apply_edits=False,
+                event_aggregations.replace, time_now, config=config
             )
 
         # Include any threaded replies to this event.
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 45aee3d3fe..c5af07816a 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -818,7 +818,7 @@ class RoomEventServlet(RestServlet):
             # per MSC2676, /rooms/{roomId}/event/{eventId}, should return the
             # *original* event, rather than the edited version
             event_dict = self._event_serializer.serialize_event(
-                event, time_now, bundle_aggregations=aggregations, apply_edits=False
+                event, time_now, bundle_aggregations=aggregations
             )
             return 200, event_dict
 
diff --git a/synapse/server.py b/synapse/server.py
index a7c32e9a60..df80fc1beb 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -743,7 +743,7 @@ class HomeServer(metaclass=abc.ABCMeta):
 
     @cache_in_self
     def get_event_client_serializer(self) -> EventClientSerializer:
-        return EventClientSerializer(self.config.experimental.msc3925_inhibit_edit)
+        return EventClientSerializer()
 
     @cache_in_self
     def get_password_policy_handler(self) -> PasswordPolicyHandler:
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index a8a0a16141..fbbbcb23f1 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -30,7 +30,6 @@ from tests import unittest
 from tests.server import FakeChannel
 from tests.test_utils import make_awaitable
 from tests.test_utils.event_injection import inject_event
-from tests.unittest import override_config
 
 
 class BaseRelationsTestCase(unittest.HomeserverTestCase):
@@ -403,7 +402,7 @@ class RelationsTestCase(BaseRelationsTestCase):
 
     def test_edit(self) -> None:
         """Test that a simple edit works."""
-
+        orig_body = {"body": "Hi!", "msgtype": "m.text"}
         new_body = {"msgtype": "m.text", "body": "I've been edited!"}
         edit_event_content = {
             "msgtype": "m.text",
@@ -424,9 +423,7 @@ class RelationsTestCase(BaseRelationsTestCase):
             access_token=self.user_token,
         )
         self.assertEqual(200, channel.code, channel.json_body)
-        self.assertEqual(
-            channel.json_body["content"], {"body": "Hi!", "msgtype": "m.text"}
-        )
+        self.assertEqual(channel.json_body["content"], orig_body)
         self._assert_edit_bundle(channel.json_body, edit_event_id, edit_event_content)
 
         # Request the room messages.
@@ -443,7 +440,7 @@ class RelationsTestCase(BaseRelationsTestCase):
         )
 
         # Request the room context.
-        # /context should return the edited event.
+        # /context should return the event.
         channel = self.make_request(
             "GET",
             f"/rooms/{self.room}/context/{self.parent_id}",
@@ -453,7 +450,7 @@ class RelationsTestCase(BaseRelationsTestCase):
         self._assert_edit_bundle(
             channel.json_body["event"], edit_event_id, edit_event_content
         )
-        self.assertEqual(channel.json_body["event"]["content"], new_body)
+        self.assertEqual(channel.json_body["event"]["content"], orig_body)
 
         # Request sync, but limit the timeline so it becomes limited (and includes
         # bundled aggregations).
@@ -491,45 +488,11 @@ class RelationsTestCase(BaseRelationsTestCase):
             edit_event_content,
         )
 
-    @override_config({"experimental_features": {"msc3925_inhibit_edit": True}})
-    def test_edit_inhibit_replace(self) -> None:
-        """
-        If msc3925_inhibit_edit is enabled, then the original event should not be
-        replaced.
-        """
-
-        new_body = {"msgtype": "m.text", "body": "I've been edited!"}
-        edit_event_content = {
-            "msgtype": "m.text",
-            "body": "foo",
-            "m.new_content": new_body,
-        }
-        channel = self._send_relation(
-            RelationTypes.REPLACE,
-            "m.room.message",
-            content=edit_event_content,
-        )
-        edit_event_id = channel.json_body["event_id"]
-
-        # /context should return the *original* event.
-        channel = self.make_request(
-            "GET",
-            f"/rooms/{self.room}/context/{self.parent_id}",
-            access_token=self.user_token,
-        )
-        self.assertEqual(200, channel.code, channel.json_body)
-        self.assertEqual(
-            channel.json_body["event"]["content"], {"body": "Hi!", "msgtype": "m.text"}
-        )
-        self._assert_edit_bundle(
-            channel.json_body["event"], edit_event_id, edit_event_content
-        )
-
     def test_multi_edit(self) -> None:
         """Test that multiple edits, including attempts by people who
         shouldn't be allowed, are correctly handled.
         """
-
+        orig_body = orig_body = {"body": "Hi!", "msgtype": "m.text"}
         self._send_relation(
             RelationTypes.REPLACE,
             "m.room.message",
@@ -570,7 +533,7 @@ class RelationsTestCase(BaseRelationsTestCase):
         )
         self.assertEqual(200, channel.code, channel.json_body)
 
-        self.assertEqual(channel.json_body["event"]["content"], new_body)
+        self.assertEqual(channel.json_body["event"]["content"], orig_body)
         self._assert_edit_bundle(
             channel.json_body["event"], edit_event_id, edit_event_content
         )
@@ -642,6 +605,7 @@ class RelationsTestCase(BaseRelationsTestCase):
 
     def test_edit_edit(self) -> None:
         """Test that an edit cannot be edited."""
+        orig_body = {"body": "Hi!", "msgtype": "m.text"}
         new_body = {"msgtype": "m.text", "body": "Initial edit"}
         edit_event_content = {
             "msgtype": "m.text",
@@ -675,14 +639,12 @@ class RelationsTestCase(BaseRelationsTestCase):
             access_token=self.user_token,
         )
         self.assertEqual(200, channel.code, channel.json_body)
-        self.assertEqual(
-            channel.json_body["content"], {"body": "Hi!", "msgtype": "m.text"}
-        )
+        self.assertEqual(channel.json_body["content"], orig_body)
 
         # The relations information should not include the edit to the edit.
         self._assert_edit_bundle(channel.json_body, edit_event_id, edit_event_content)
 
-        # /context should return the event updated for the *first* edit
+        # /context should return the bundled edit for the *first* edit
         # (The edit to the edit should be ignored.)
         channel = self.make_request(
             "GET",
@@ -690,7 +652,7 @@ class RelationsTestCase(BaseRelationsTestCase):
             access_token=self.user_token,
         )
         self.assertEqual(200, channel.code, channel.json_body)
-        self.assertEqual(channel.json_body["event"]["content"], new_body)
+        self.assertEqual(channel.json_body["event"]["content"], orig_body)
         self._assert_edit_bundle(
             channel.json_body["event"], edit_event_id, edit_event_content
         )
@@ -1287,7 +1249,6 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
         thread_summary = relations_dict[RelationTypes.THREAD]
         self.assertIn("latest_event", thread_summary)
         latest_event_in_thread = thread_summary["latest_event"]
-        self.assertEqual(latest_event_in_thread["content"]["body"], "I've been edited!")
         # The latest event in the thread should have the edit appear under the
         # bundled aggregations.
         self.assertDictContainsSubset(
-- 
cgit 1.5.1


From 41f127e06861230024f43aa4ce272116dc886700 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Mon, 6 Mar 2023 17:08:39 +0100
Subject: Pass the requester during event serialization. (#15174)

This allows Synapse to properly include the transaction ID in the
unsigned data of events.
---
 changelog.d/15174.bugfix             |  1 +
 synapse/events/utils.py              | 30 +++++++++++++++------
 synapse/handlers/events.py           | 20 +++++++-------
 synapse/handlers/initial_sync.py     | 51 +++++++++++++++++++++++++-----------
 synapse/handlers/message.py          |  9 ++++---
 synapse/handlers/pagination.py       |  4 ++-
 synapse/handlers/relations.py        | 12 +++++++--
 synapse/handlers/search.py           | 43 +++++++++++++++++++-----------
 synapse/rest/client/events.py        | 16 ++++++-----
 synapse/rest/client/notifications.py | 12 ++++++---
 synapse/rest/client/room.py          | 18 +++++++++----
 synapse/rest/client/sync.py          | 10 +++----
 12 files changed, 151 insertions(+), 75 deletions(-)
 create mode 100644 changelog.d/15174.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15174.bugfix b/changelog.d/15174.bugfix
new file mode 100644
index 0000000000..a0c70cbe22
--- /dev/null
+++ b/changelog.d/15174.bugfix
@@ -0,0 +1 @@
+Add the `transaction_id` in the events included in many endpoints responses.
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index 45f46949a1..b9c15ffcdb 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -38,7 +38,7 @@ from synapse.api.constants import (
 )
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersion
-from synapse.types import JsonDict
+from synapse.types import JsonDict, Requester
 
 from . import EventBase
 
@@ -316,8 +316,9 @@ class SerializeEventConfig:
     as_client_event: bool = True
     # Function to convert from federation format to client format
     event_format: Callable[[JsonDict], JsonDict] = format_event_for_client_v1
-    # ID of the user's auth token - used for namespacing of transaction IDs
-    token_id: Optional[int] = None
+    # The entity that requested the event. This is used to determine whether to include
+    # the transaction_id in the unsigned section of the event.
+    requester: Optional[Requester] = None
     # List of event fields to include. If empty, all fields will be returned.
     only_event_fields: Optional[List[str]] = None
     # Some events can have stripped room state stored in the `unsigned` field.
@@ -367,11 +368,24 @@ def serialize_event(
             e.unsigned["redacted_because"], time_now_ms, config=config
         )
 
-    if config.token_id is not None:
-        if config.token_id == getattr(e.internal_metadata, "token_id", None):
-            txn_id = getattr(e.internal_metadata, "txn_id", None)
-            if txn_id is not None:
-                d["unsigned"]["transaction_id"] = txn_id
+    # If we have a txn_id saved in the internal_metadata, we should include it in the
+    # unsigned section of the event if it was sent by the same session as the one
+    # requesting the event.
+    # There is a special case for guests, because they only have one access token
+    # without associated access_token_id, so we always include the txn_id for events
+    # they sent.
+    txn_id = getattr(e.internal_metadata, "txn_id", None)
+    if txn_id is not None and config.requester is not None:
+        event_token_id = getattr(e.internal_metadata, "token_id", None)
+        if config.requester.user.to_string() == e.sender and (
+            (
+                event_token_id is not None
+                and config.requester.access_token_id is not None
+                and event_token_id == config.requester.access_token_id
+            )
+            or config.requester.is_guest
+        ):
+            d["unsigned"]["transaction_id"] = txn_id
 
     # invite_room_state and knock_room_state are a list of stripped room state events
     # that are meant to provide metadata about a room to an invitee/knocker. They are
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index 949b69cb41..68c07f0265 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -23,7 +23,7 @@ from synapse.events.utils import SerializeEventConfig
 from synapse.handlers.presence import format_user_presence_state
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.streams.config import PaginationConfig
-from synapse.types import JsonDict, UserID
+from synapse.types import JsonDict, Requester, UserID
 from synapse.visibility import filter_events_for_client
 
 if TYPE_CHECKING:
@@ -46,13 +46,12 @@ class EventStreamHandler:
 
     async def get_stream(
         self,
-        auth_user_id: str,
+        requester: Requester,
         pagin_config: PaginationConfig,
         timeout: int = 0,
         as_client_event: bool = True,
         affect_presence: bool = True,
         room_id: Optional[str] = None,
-        is_guest: bool = False,
     ) -> JsonDict:
         """Fetches the events stream for a given user."""
 
@@ -62,13 +61,12 @@ class EventStreamHandler:
                 raise SynapseError(403, "This room has been blocked on this server")
 
         # send any outstanding server notices to the user.
-        await self._server_notices_sender.on_user_syncing(auth_user_id)
+        await self._server_notices_sender.on_user_syncing(requester.user.to_string())
 
-        auth_user = UserID.from_string(auth_user_id)
         presence_handler = self.hs.get_presence_handler()
 
         context = await presence_handler.user_syncing(
-            auth_user_id,
+            requester.user.to_string(),
             affect_presence=affect_presence,
             presence_state=PresenceState.ONLINE,
         )
@@ -82,10 +80,10 @@ class EventStreamHandler:
                 timeout = random.randint(int(timeout * 0.9), int(timeout * 1.1))
 
             stream_result = await self.notifier.get_events_for(
-                auth_user,
+                requester.user,
                 pagin_config,
                 timeout,
-                is_guest=is_guest,
+                is_guest=requester.is_guest,
                 explicit_room_id=room_id,
             )
             events = stream_result.events
@@ -102,7 +100,7 @@ class EventStreamHandler:
                     if event.membership != Membership.JOIN:
                         continue
                     # Send down presence.
-                    if event.state_key == auth_user_id:
+                    if event.state_key == requester.user.to_string():
                         # Send down presence for everyone in the room.
                         users: Iterable[str] = await self.store.get_users_in_room(
                             event.room_id
@@ -124,7 +122,9 @@ class EventStreamHandler:
             chunks = self._event_serializer.serialize_events(
                 events,
                 time_now,
-                config=SerializeEventConfig(as_client_event=as_client_event),
+                config=SerializeEventConfig(
+                    as_client_event=as_client_event, requester=requester
+                ),
             )
 
             chunk = {
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index aead0b44b9..b3be7a86f0 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -318,11 +318,9 @@ class InitialSyncHandler:
         )
         is_peeking = member_event_id is None
 
-        user_id = requester.user.to_string()
-
         if membership == Membership.JOIN:
             result = await self._room_initial_sync_joined(
-                user_id, room_id, pagin_config, membership, is_peeking
+                requester, room_id, pagin_config, membership, is_peeking
             )
         elif membership == Membership.LEAVE:
             # The member_event_id will always be available if membership is set
@@ -330,10 +328,16 @@ class InitialSyncHandler:
             assert member_event_id
 
             result = await self._room_initial_sync_parted(
-                user_id, room_id, pagin_config, membership, member_event_id, is_peeking
+                requester,
+                room_id,
+                pagin_config,
+                membership,
+                member_event_id,
+                is_peeking,
             )
 
         account_data_events = []
+        user_id = requester.user.to_string()
         tags = await self.store.get_tags_for_room(user_id, room_id)
         if tags:
             account_data_events.append(
@@ -350,7 +354,7 @@ class InitialSyncHandler:
 
     async def _room_initial_sync_parted(
         self,
-        user_id: str,
+        requester: Requester,
         room_id: str,
         pagin_config: PaginationConfig,
         membership: str,
@@ -369,13 +373,17 @@ class InitialSyncHandler:
         )
 
         messages = await filter_events_for_client(
-            self._storage_controllers, user_id, messages, is_peeking=is_peeking
+            self._storage_controllers,
+            requester.user.to_string(),
+            messages,
+            is_peeking=is_peeking,
         )
 
         start_token = StreamToken.START.copy_and_replace(StreamKeyType.ROOM, token)
         end_token = StreamToken.START.copy_and_replace(StreamKeyType.ROOM, stream_token)
 
         time_now = self.clock.time_msec()
+        serialize_options = SerializeEventConfig(requester=requester)
 
         return {
             "membership": membership,
@@ -383,14 +391,18 @@ class InitialSyncHandler:
             "messages": {
                 "chunk": (
                     # Don't bundle aggregations as this is a deprecated API.
-                    self._event_serializer.serialize_events(messages, time_now)
+                    self._event_serializer.serialize_events(
+                        messages, time_now, config=serialize_options
+                    )
                 ),
                 "start": await start_token.to_string(self.store),
                 "end": await end_token.to_string(self.store),
             },
             "state": (
                 # Don't bundle aggregations as this is a deprecated API.
-                self._event_serializer.serialize_events(room_state.values(), time_now)
+                self._event_serializer.serialize_events(
+                    room_state.values(), time_now, config=serialize_options
+                )
             ),
             "presence": [],
             "receipts": [],
@@ -398,7 +410,7 @@ class InitialSyncHandler:
 
     async def _room_initial_sync_joined(
         self,
-        user_id: str,
+        requester: Requester,
         room_id: str,
         pagin_config: PaginationConfig,
         membership: str,
@@ -410,9 +422,12 @@ class InitialSyncHandler:
 
         # TODO: These concurrently
         time_now = self.clock.time_msec()
+        serialize_options = SerializeEventConfig(requester=requester)
         # Don't bundle aggregations as this is a deprecated API.
         state = self._event_serializer.serialize_events(
-            current_state.values(), time_now
+            current_state.values(),
+            time_now,
+            config=serialize_options,
         )
 
         now_token = self.hs.get_event_sources().get_current_token()
@@ -450,7 +465,10 @@ class InitialSyncHandler:
             if not receipts:
                 return []
 
-            return ReceiptEventSource.filter_out_private_receipts(receipts, user_id)
+            return ReceiptEventSource.filter_out_private_receipts(
+                receipts,
+                requester.user.to_string(),
+            )
 
         presence, receipts, (messages, token) = await make_deferred_yieldable(
             gather_results(
@@ -469,20 +487,23 @@ class InitialSyncHandler:
         )
 
         messages = await filter_events_for_client(
-            self._storage_controllers, user_id, messages, is_peeking=is_peeking
+            self._storage_controllers,
+            requester.user.to_string(),
+            messages,
+            is_peeking=is_peeking,
         )
 
         start_token = now_token.copy_and_replace(StreamKeyType.ROOM, token)
         end_token = now_token
 
-        time_now = self.clock.time_msec()
-
         ret = {
             "room_id": room_id,
             "messages": {
                 "chunk": (
                     # Don't bundle aggregations as this is a deprecated API.
-                    self._event_serializer.serialize_events(messages, time_now)
+                    self._event_serializer.serialize_events(
+                        messages, time_now, config=serialize_options
+                    )
                 ),
                 "start": await start_token.to_string(self.store),
                 "end": await end_token.to_string(self.store),
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index e433d6b01f..da129ec16a 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -50,7 +50,7 @@ from synapse.event_auth import validate_event_for_room_version
 from synapse.events import EventBase, relation_from_event
 from synapse.events.builder import EventBuilder
 from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
-from synapse.events.utils import maybe_upsert_event_field
+from synapse.events.utils import SerializeEventConfig, maybe_upsert_event_field
 from synapse.events.validator import EventValidator
 from synapse.handlers.directory import DirectoryHandler
 from synapse.logging import opentracing
@@ -245,8 +245,11 @@ class MessageHandler:
                 )
                 room_state = room_state_events[membership_event_id]
 
-        now = self.clock.time_msec()
-        events = self._event_serializer.serialize_events(room_state.values(), now)
+        events = self._event_serializer.serialize_events(
+            room_state.values(),
+            self.clock.time_msec(),
+            config=SerializeEventConfig(requester=requester),
+        )
         return events
 
     async def _user_can_see_state_at_event(
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index ceefa16b49..8c79c055ba 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -579,7 +579,9 @@ class PaginationHandler:
 
         time_now = self.clock.time_msec()
 
-        serialize_options = SerializeEventConfig(as_client_event=as_client_event)
+        serialize_options = SerializeEventConfig(
+            as_client_event=as_client_event, requester=requester
+        )
 
         chunk = {
             "chunk": (
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 553053b694..1d09fdf135 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -20,6 +20,7 @@ import attr
 from synapse.api.constants import Direction, EventTypes, RelationTypes
 from synapse.api.errors import SynapseError
 from synapse.events import EventBase, relation_from_event
+from synapse.events.utils import SerializeEventConfig
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.logging.opentracing import trace
 from synapse.storage.databases.main.relations import ThreadsNextBatch, _RelatedEvent
@@ -151,16 +152,23 @@ class RelationsHandler:
         )
 
         now = self._clock.time_msec()
+        serialize_options = SerializeEventConfig(requester=requester)
         return_value: JsonDict = {
             "chunk": self._event_serializer.serialize_events(
-                events, now, bundle_aggregations=aggregations
+                events,
+                now,
+                bundle_aggregations=aggregations,
+                config=serialize_options,
             ),
         }
         if include_original_event:
             # Do not bundle aggregations when retrieving the original event because
             # we want the content before relations are applied to it.
             return_value["original_event"] = self._event_serializer.serialize_event(
-                event, now, bundle_aggregations=None
+                event,
+                now,
+                bundle_aggregations=None,
+                config=serialize_options,
             )
 
         if next_token:
diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py
index 9bbf83047d..aad4706f14 100644
--- a/synapse/handlers/search.py
+++ b/synapse/handlers/search.py
@@ -23,7 +23,8 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.api.filtering import Filter
 from synapse.events import EventBase
-from synapse.types import JsonDict, StrCollection, StreamKeyType, UserID
+from synapse.events.utils import SerializeEventConfig
+from synapse.types import JsonDict, Requester, StrCollection, StreamKeyType, UserID
 from synapse.types.state import StateFilter
 from synapse.visibility import filter_events_for_client
 
@@ -109,12 +110,12 @@ class SearchHandler:
         return historical_room_ids
 
     async def search(
-        self, user: UserID, content: JsonDict, batch: Optional[str] = None
+        self, requester: Requester, content: JsonDict, batch: Optional[str] = None
     ) -> JsonDict:
         """Performs a full text search for a user.
 
         Args:
-            user: The user performing the search.
+            requester: The user performing the search.
             content: Search parameters
             batch: The next_batch parameter. Used for pagination.
 
@@ -199,7 +200,7 @@ class SearchHandler:
             )
 
         return await self._search(
-            user,
+            requester,
             batch_group,
             batch_group_key,
             batch_token,
@@ -217,7 +218,7 @@ class SearchHandler:
 
     async def _search(
         self,
-        user: UserID,
+        requester: Requester,
         batch_group: Optional[str],
         batch_group_key: Optional[str],
         batch_token: Optional[str],
@@ -235,7 +236,7 @@ class SearchHandler:
         """Performs a full text search for a user.
 
         Args:
-            user: The user performing the search.
+            requester: The user performing the search.
             batch_group: Pagination information.
             batch_group_key: Pagination information.
             batch_token: Pagination information.
@@ -269,7 +270,7 @@ class SearchHandler:
 
         # TODO: Search through left rooms too
         rooms = await self.store.get_rooms_for_local_user_where_membership_is(
-            user.to_string(),
+            requester.user.to_string(),
             membership_list=[Membership.JOIN],
             # membership_list=[Membership.JOIN, Membership.LEAVE, Membership.Ban],
         )
@@ -303,13 +304,13 @@ class SearchHandler:
 
         if order_by == "rank":
             search_result, sender_group = await self._search_by_rank(
-                user, room_ids, search_term, keys, search_filter
+                requester.user, room_ids, search_term, keys, search_filter
             )
             # Unused return values for rank search.
             global_next_batch = None
         elif order_by == "recent":
             search_result, global_next_batch = await self._search_by_recent(
-                user,
+                requester.user,
                 room_ids,
                 search_term,
                 keys,
@@ -334,7 +335,7 @@ class SearchHandler:
             assert after_limit is not None
 
             contexts = await self._calculate_event_contexts(
-                user,
+                requester.user,
                 search_result.allowed_events,
                 before_limit,
                 after_limit,
@@ -363,27 +364,37 @@ class SearchHandler:
                 # The returned events.
                 search_result.allowed_events,
             ),
-            user.to_string(),
+            requester.user.to_string(),
         )
 
         # We're now about to serialize the events. We should not make any
         # blocking calls after this. Otherwise, the 'age' will be wrong.
 
         time_now = self.clock.time_msec()
+        serialize_options = SerializeEventConfig(requester=requester)
 
         for context in contexts.values():
             context["events_before"] = self._event_serializer.serialize_events(
-                context["events_before"], time_now, bundle_aggregations=aggregations
+                context["events_before"],
+                time_now,
+                bundle_aggregations=aggregations,
+                config=serialize_options,
             )
             context["events_after"] = self._event_serializer.serialize_events(
-                context["events_after"], time_now, bundle_aggregations=aggregations
+                context["events_after"],
+                time_now,
+                bundle_aggregations=aggregations,
+                config=serialize_options,
             )
 
         results = [
             {
                 "rank": search_result.rank_map[e.event_id],
                 "result": self._event_serializer.serialize_event(
-                    e, time_now, bundle_aggregations=aggregations
+                    e,
+                    time_now,
+                    bundle_aggregations=aggregations,
+                    config=serialize_options,
                 ),
                 "context": contexts.get(e.event_id, {}),
             }
@@ -398,7 +409,9 @@ class SearchHandler:
 
         if state_results:
             rooms_cat_res["state"] = {
-                room_id: self._event_serializer.serialize_events(state_events, time_now)
+                room_id: self._event_serializer.serialize_events(
+                    state_events, time_now, config=serialize_options
+                )
                 for room_id, state_events in state_results.items()
             }
 
diff --git a/synapse/rest/client/events.py b/synapse/rest/client/events.py
index 782e7d14e8..694d77d287 100644
--- a/synapse/rest/client/events.py
+++ b/synapse/rest/client/events.py
@@ -17,6 +17,7 @@ import logging
 from typing import TYPE_CHECKING, Dict, List, Tuple, Union
 
 from synapse.api.errors import SynapseError
+from synapse.events.utils import SerializeEventConfig
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_string
 from synapse.http.site import SynapseRequest
@@ -43,9 +44,8 @@ class EventStreamRestServlet(RestServlet):
 
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request, allow_guest=True)
-        is_guest = requester.is_guest
         args: Dict[bytes, List[bytes]] = request.args  # type: ignore
-        if is_guest:
+        if requester.is_guest:
             if b"room_id" not in args:
                 raise SynapseError(400, "Guest users must specify room_id param")
         room_id = parse_string(request, "room_id")
@@ -63,13 +63,12 @@ class EventStreamRestServlet(RestServlet):
         as_client_event = b"raw" not in args
 
         chunk = await self.event_stream_handler.get_stream(
-            requester.user.to_string(),
+            requester,
             pagin_config,
             timeout=timeout,
             as_client_event=as_client_event,
-            affect_presence=(not is_guest),
+            affect_presence=(not requester.is_guest),
             room_id=room_id,
-            is_guest=is_guest,
         )
 
         return 200, chunk
@@ -91,9 +90,12 @@ class EventRestServlet(RestServlet):
         requester = await self.auth.get_user_by_req(request)
         event = await self.event_handler.get_event(requester.user, None, event_id)
 
-        time_now = self.clock.time_msec()
         if event:
-            result = self._event_serializer.serialize_event(event, time_now)
+            result = self._event_serializer.serialize_event(
+                event,
+                self.clock.time_msec(),
+                config=SerializeEventConfig(requester=requester),
+            )
             return 200, result
         else:
             return 404, "Event not found."
diff --git a/synapse/rest/client/notifications.py b/synapse/rest/client/notifications.py
index 61268e3af1..ea10042569 100644
--- a/synapse/rest/client/notifications.py
+++ b/synapse/rest/client/notifications.py
@@ -72,6 +72,12 @@ class NotificationsServlet(RestServlet):
 
         next_token = None
 
+        serialize_options = SerializeEventConfig(
+            event_format=format_event_for_client_v2_without_room_id,
+            requester=requester,
+        )
+        now = self.clock.time_msec()
+
         for pa in push_actions:
             returned_pa = {
                 "room_id": pa.room_id,
@@ -81,10 +87,8 @@ class NotificationsServlet(RestServlet):
                 "event": (
                     self._event_serializer.serialize_event(
                         notif_events[pa.event_id],
-                        self.clock.time_msec(),
-                        config=SerializeEventConfig(
-                            event_format=format_event_for_client_v2_without_room_id
-                        ),
+                        now,
+                        config=serialize_options,
                     )
                 ),
             }
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index c5af07816a..61e4cf0213 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -37,7 +37,7 @@ from synapse.api.errors import (
     UnredactedContentDeletedError,
 )
 from synapse.api.filtering import Filter
-from synapse.events.utils import format_event_for_client_v2
+from synapse.events.utils import SerializeEventConfig, format_event_for_client_v2
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     ResolveRoomIdMixin,
@@ -814,11 +814,13 @@ class RoomEventServlet(RestServlet):
                 [event], requester.user.to_string()
             )
 
-            time_now = self.clock.time_msec()
             # per MSC2676, /rooms/{roomId}/event/{eventId}, should return the
             # *original* event, rather than the edited version
             event_dict = self._event_serializer.serialize_event(
-                event, time_now, bundle_aggregations=aggregations
+                event,
+                self.clock.time_msec(),
+                bundle_aggregations=aggregations,
+                config=SerializeEventConfig(requester=requester),
             )
             return 200, event_dict
 
@@ -863,24 +865,30 @@ class RoomEventContextServlet(RestServlet):
             raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND)
 
         time_now = self.clock.time_msec()
+        serializer_options = SerializeEventConfig(requester=requester)
         results = {
             "events_before": self._event_serializer.serialize_events(
                 event_context.events_before,
                 time_now,
                 bundle_aggregations=event_context.aggregations,
+                config=serializer_options,
             ),
             "event": self._event_serializer.serialize_event(
                 event_context.event,
                 time_now,
                 bundle_aggregations=event_context.aggregations,
+                config=serializer_options,
             ),
             "events_after": self._event_serializer.serialize_events(
                 event_context.events_after,
                 time_now,
                 bundle_aggregations=event_context.aggregations,
+                config=serializer_options,
             ),
             "state": self._event_serializer.serialize_events(
-                event_context.state, time_now
+                event_context.state,
+                time_now,
+                config=serializer_options,
             ),
             "start": event_context.start,
             "end": event_context.end,
@@ -1192,7 +1200,7 @@ class SearchRestServlet(RestServlet):
         content = parse_json_object_from_request(request)
 
         batch = parse_string(request, "next_batch")
-        results = await self.search_handler.search(requester.user, content, batch)
+        results = await self.search_handler.search(requester, content, batch)
 
         return 200, results
 
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index 8fcb8ac3d9..e578b26fa3 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -38,7 +38,7 @@ from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.logging.opentracing import trace_with_opname
-from synapse.types import JsonDict, StreamToken
+from synapse.types import JsonDict, Requester, StreamToken
 from synapse.util import json_decoder
 
 from ._base import client_patterns, set_timeline_upper_limit
@@ -226,7 +226,7 @@ class SyncRestServlet(RestServlet):
         # We know that the the requester has an access token since appservices
         # cannot use sync.
         response_content = await self.encode_response(
-            time_now, sync_result, requester.access_token_id, filter_collection
+            time_now, sync_result, requester, filter_collection
         )
 
         logger.debug("Event formatting complete")
@@ -237,7 +237,7 @@ class SyncRestServlet(RestServlet):
         self,
         time_now: int,
         sync_result: SyncResult,
-        access_token_id: Optional[int],
+        requester: Requester,
         filter: FilterCollection,
     ) -> JsonDict:
         logger.debug("Formatting events in sync response")
@@ -250,12 +250,12 @@ class SyncRestServlet(RestServlet):
 
         serialize_options = SerializeEventConfig(
             event_format=event_formatter,
-            token_id=access_token_id,
+            requester=requester,
             only_event_fields=filter.event_fields,
         )
         stripped_serialize_options = SerializeEventConfig(
             event_format=event_formatter,
-            token_id=access_token_id,
+            requester=requester,
             include_stripped_room_state=True,
         )
 
-- 
cgit 1.5.1


From c69aae94cda9b62b2a82584b2f5ee72a95feb435 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 7 Mar 2023 08:51:34 +0000
Subject: Split up txn for fetching device keys (#15215)

We look up keys in batches, but we should do that outside of the
transaction to avoid starving the database pool.
---
 changelog.d/15215.misc                            |  1 +
 synapse/storage/database.py                       | 10 +++++++++-
 synapse/storage/databases/main/end_to_end_keys.py | 24 +++++++++++++++--------
 3 files changed, 26 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/15215.misc

(limited to 'synapse')

diff --git a/changelog.d/15215.misc b/changelog.d/15215.misc
new file mode 100644
index 0000000000..fe52a56a7e
--- /dev/null
+++ b/changelog.d/15215.misc
@@ -0,0 +1 @@
+Refactor database transaction for query users' devices to reduce database pool contention.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index feaa6cdd07..5efe31aa19 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -672,7 +672,15 @@ class DatabasePool:
             f = cast(types.FunctionType, func)  # type: ignore[redundant-cast]
             if f.__closure__:
                 for i, cell in enumerate(f.__closure__):
-                    if inspect.isgenerator(cell.cell_contents):
+                    try:
+                        contents = cell.cell_contents
+                    except ValueError:
+                        # cell.cell_contents can raise if the "cell" is empty,
+                        # which indicates that the variable is currently
+                        # unbound.
+                        continue
+
+                    if inspect.isgenerator(contents):
                         logger.error(
                             "Programming error: function %s references generator %s "
                             "via its closure",
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index b9c39b1718..a3b6c8ae8e 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -244,9 +244,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         set_tag("include_all_devices", include_all_devices)
         set_tag("include_deleted_devices", include_deleted_devices)
 
-        result = await self.db_pool.runInteraction(
-            "get_e2e_device_keys",
-            self._get_e2e_device_keys_txn,
+        result = await self._get_e2e_device_keys(
             query_list,
             include_all_devices,
             include_deleted_devices,
@@ -285,9 +283,8 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         log_kv(result)
         return result
 
-    def _get_e2e_device_keys_txn(
+    async def _get_e2e_device_keys(
         self,
-        txn: LoggingTransaction,
         query_list: Collection[Tuple[str, Optional[str]]],
         include_all_devices: bool = False,
         include_deleted_devices: bool = False,
@@ -319,7 +316,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
         if user_list:
             user_id_in_list_clause, user_args = make_in_list_sql_clause(
-                txn.database_engine, "user_id", user_list
+                self.database_engine, "user_id", user_list
             )
             query_clauses.append(user_id_in_list_clause)
             query_params_list.append(user_args)
@@ -332,13 +329,16 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                     user_device_id_in_list_clause,
                     user_device_args,
                 ) = make_tuple_in_list_sql_clause(
-                    txn.database_engine, ("user_id", "device_id"), user_device_batch
+                    self.database_engine, ("user_id", "device_id"), user_device_batch
                 )
                 query_clauses.append(user_device_id_in_list_clause)
                 query_params_list.append(user_device_args)
 
         result: Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]] = {}
-        for query_clause, query_params in zip(query_clauses, query_params_list):
+
+        def get_e2e_device_keys_txn(
+            txn: LoggingTransaction, query_clause: str, query_params: list
+        ) -> None:
             sql = (
                 "SELECT user_id, device_id, "
                 "    d.display_name, "
@@ -361,6 +361,14 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                     display_name, db_to_json(key_json) if key_json else None
                 )
 
+        for query_clause, query_params in zip(query_clauses, query_params_list):
+            await self.db_pool.runInteraction(
+                "_get_e2e_device_keys",
+                get_e2e_device_keys_txn,
+                query_clause,
+                query_params,
+            )
+
         if include_deleted_devices:
             for user_id, device_id in deleted_devices:
                 if device_id is None:
-- 
cgit 1.5.1


From 820f02b70badfc04d35c95f8ffb9682c8310e91e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 7 Mar 2023 10:06:02 -0500
Subject: Stabilize support for MSC3966: event_property_contains push
 condition. (#15187)

This removes the configuration flag & updates the identifiers to
use the stable version.
---
 changelog.d/15187.feature                   |  1 +
 rust/benches/evaluator.rs                   |  4 ----
 rust/src/push/evaluator.rs                  | 22 +++++-----------------
 rust/src/push/mod.rs                        |  8 ++------
 stubs/synapse/synapse_rust/push.pyi         |  1 -
 synapse/config/experimental.py              | 10 ++--------
 synapse/push/bulk_push_rule_evaluator.py    |  1 -
 tests/push/test_bulk_push_rule_evaluator.py | 18 ++----------------
 tests/push/test_push_rule_evaluator.py      |  3 +--
 9 files changed, 13 insertions(+), 55 deletions(-)
 create mode 100644 changelog.d/15187.feature

(limited to 'synapse')

diff --git a/changelog.d/15187.feature b/changelog.d/15187.feature
new file mode 100644
index 0000000000..f2b7689255
--- /dev/null
+++ b/changelog.d/15187.feature
@@ -0,0 +1 @@
+Stabilise support for [MSC3966](https://github.com/matrix-org/matrix-spec-proposals/pull/3966): `event_property_contains` push condition.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 79b553dbb0..64e13f6486 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -52,7 +52,6 @@ fn bench_match_exact(b: &mut Bencher) {
         true,
         vec![],
         false,
-        false,
     )
     .unwrap();
 
@@ -98,7 +97,6 @@ fn bench_match_word(b: &mut Bencher) {
         true,
         vec![],
         false,
-        false,
     )
     .unwrap();
 
@@ -144,7 +142,6 @@ fn bench_match_word_miss(b: &mut Bencher) {
         true,
         vec![],
         false,
-        false,
     )
     .unwrap();
 
@@ -190,7 +187,6 @@ fn bench_eval_message(b: &mut Bencher) {
         true,
         vec![],
         false,
-        false,
     )
     .unwrap();
 
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 67fe6a4823..6941c61ea4 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -96,9 +96,6 @@ pub struct PushRuleEvaluator {
     /// If MSC3931 (room version feature flags) is enabled. Usually controlled by the same
     /// flag as MSC1767 (extensible events core).
     msc3931_enabled: bool,
-
-    /// If MSC3966 (exact_event_property_contains push rule condition) is enabled.
-    msc3966_exact_event_property_contains: bool,
 }
 
 #[pymethods]
@@ -116,7 +113,6 @@ impl PushRuleEvaluator {
         related_event_match_enabled: bool,
         room_version_feature_flags: Vec<String>,
         msc3931_enabled: bool,
-        msc3966_exact_event_property_contains: bool,
     ) -> Result<Self, Error> {
         let body = match flattened_keys.get("content.body") {
             Some(JsonValue::Value(SimpleJsonValue::Str(s))) => s.clone(),
@@ -134,7 +130,6 @@ impl PushRuleEvaluator {
             related_event_match_enabled,
             room_version_feature_flags,
             msc3931_enabled,
-            msc3966_exact_event_property_contains,
         })
     }
 
@@ -301,8 +296,8 @@ impl PushRuleEvaluator {
                     Some(Cow::Borrowed(pattern)),
                 )?
             }
-            KnownCondition::ExactEventPropertyContains(event_property_is) => self
-                .match_exact_event_property_contains(
+            KnownCondition::EventPropertyContains(event_property_is) => self
+                .match_event_property_contains(
                     event_property_is.key.clone(),
                     event_property_is.value.clone(),
                 )?,
@@ -321,7 +316,7 @@ impl PushRuleEvaluator {
                     EventMatchPatternType::UserLocalpart => get_localpart_from_id(user_id)?,
                 };
 
-                self.match_exact_event_property_contains(
+                self.match_event_property_contains(
                     exact_event_match.key.clone(),
                     Cow::Borrowed(&SimpleJsonValue::Str(pattern.to_string())),
                 )?
@@ -454,17 +449,12 @@ impl PushRuleEvaluator {
         }
     }
 
-    /// Evaluates a `exact_event_property_contains` condition. (MSC3966)
-    fn match_exact_event_property_contains(
+    /// Evaluates a `event_property_contains` condition.
+    fn match_event_property_contains(
         &self,
         key: Cow<str>,
         value: Cow<SimpleJsonValue>,
     ) -> Result<bool, Error> {
-        // First check if the feature is enabled.
-        if !self.msc3966_exact_event_property_contains {
-            return Ok(false);
-        }
-
         let haystack = if let Some(JsonValue::Array(haystack)) = self.flattened_keys.get(&*key) {
             haystack
         } else {
@@ -515,7 +505,6 @@ fn push_rule_evaluator() {
         true,
         vec![],
         true,
-        true,
     )
     .unwrap();
 
@@ -545,7 +534,6 @@ fn test_requires_room_version_supports_condition() {
         false,
         flags,
         true,
-        true,
     )
     .unwrap();
 
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 7fde88e825..575a1c1e68 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -337,13 +337,9 @@ pub enum KnownCondition {
     // Identical to related_event_match but gives predefined patterns. Cannot be added by users.
     #[serde(skip_deserializing, rename = "im.nheko.msc3664.related_event_match")]
     RelatedEventMatchType(RelatedEventMatchTypeCondition),
-    #[serde(rename = "org.matrix.msc3966.exact_event_property_contains")]
-    ExactEventPropertyContains(EventPropertyIsCondition),
+    EventPropertyContains(EventPropertyIsCondition),
     // Identical to exact_event_property_contains but gives predefined patterns. Cannot be added by users.
-    #[serde(
-        skip_deserializing,
-        rename = "org.matrix.msc3966.exact_event_property_contains"
-    )]
+    #[serde(skip_deserializing, rename = "event_property_contains")]
     ExactEventPropertyContainsType(EventPropertyIsTypeCondition),
     ContainsDisplayName,
     RoomMemberCount {
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index c040944aac..5d0ce4b1a4 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -65,7 +65,6 @@ class PushRuleEvaluator:
         related_event_match_enabled: bool,
         room_version_feature_flags: Tuple[str, ...],
         msc3931_enabled: bool,
-        msc3966_exact_event_property_contains: bool,
     ): ...
     def run(
         self,
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 489f2601ac..9ff382ccc3 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -171,15 +171,9 @@ class ExperimentalConfig(Config):
             "msc3873_escape_event_match_key", False
         )
 
-        # MSC3966: exact_event_property_contains push rule condition.
-        self.msc3966_exact_event_property_contains = experimental.get(
-            "msc3966_exact_event_property_contains", False
-        )
-
         # MSC3952: Intentional mentions, this depends on MSC3966.
-        self.msc3952_intentional_mentions = (
-            experimental.get("msc3952_intentional_mentions", False)
-            and self.msc3966_exact_event_property_contains
+        self.msc3952_intentional_mentions = experimental.get(
+            "msc3952_intentional_mentions", False
         )
 
         # MSC3959: Do not generate notifications for edits.
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index ba12b6d79a..45622a9e9b 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -413,7 +413,6 @@ class BulkPushRuleEvaluator:
             self._related_event_match_enabled,
             event.room_version.msc3931_push_features,
             self.hs.config.experimental.msc1767_enabled,  # MSC3931 flag
-            self.hs.config.experimental.msc3966_exact_event_property_contains,
         )
 
         users = rules_by_user.keys()
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index c6591c50de..46df0102f7 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -228,14 +228,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         )
         return len(result) > 0
 
-    @override_config(
-        {
-            "experimental_features": {
-                "msc3952_intentional_mentions": True,
-                "msc3966_exact_event_property_contains": True,
-            }
-        }
-    )
+    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
     def test_user_mentions(self) -> None:
         """Test the behavior of an event which includes invalid user mentions."""
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
@@ -331,14 +324,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
             )
         )
 
-    @override_config(
-        {
-            "experimental_features": {
-                "msc3952_intentional_mentions": True,
-                "msc3966_exact_event_property_contains": True,
-            }
-        }
-    )
+    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
     def test_room_mentions(self) -> None:
         """Test the behavior of an event which includes invalid room mentions."""
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index ff5a9a66f5..6deee0fd02 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -173,7 +173,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             related_event_match_enabled=True,
             room_version_feature_flags=event.room_version.msc3931_push_features,
             msc3931_enabled=True,
-            msc3966_exact_event_property_contains=True,
         )
 
     def test_display_name(self) -> None:
@@ -526,7 +525,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
         """Check that exact_event_property_contains conditions work as expected."""
 
         condition = {
-            "kind": "org.matrix.msc3966.exact_event_property_contains",
+            "kind": "event_property_contains",
             "key": "content.value",
             "value": "foobaz",
         }
-- 
cgit 1.5.1


From 47bc84dd53b81523d9400af462dcff68185b22fd Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 7 Mar 2023 17:05:22 +0100
Subject: Pass the Requester down to the HttpTransactionCache. (#15200)

---
 changelog.d/15200.misc                      |   1 +
 synapse/rest/admin/server_notice_servlet.py |  34 ++++--
 synapse/rest/client/room.py                 | 174 +++++++++++++++++-----------
 synapse/rest/client/sendtodevice.py         |  25 ++--
 synapse/rest/client/transactions.py         |  55 ++++-----
 tests/rest/client/test_transactions.py      |  55 ++++++---
 6 files changed, 215 insertions(+), 129 deletions(-)
 create mode 100644 changelog.d/15200.misc

(limited to 'synapse')

diff --git a/changelog.d/15200.misc b/changelog.d/15200.misc
new file mode 100644
index 0000000000..dc66172226
--- /dev/null
+++ b/changelog.d/15200.misc
@@ -0,0 +1 @@
+Make the `HttpTransactionCache` use the `Requester` in addition of the just the `Request` to build the transaction key.
diff --git a/synapse/rest/admin/server_notice_servlet.py b/synapse/rest/admin/server_notice_servlet.py
index 15da9cd881..7dd1c10b91 100644
--- a/synapse/rest/admin/server_notice_servlet.py
+++ b/synapse/rest/admin/server_notice_servlet.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from http import HTTPStatus
-from typing import TYPE_CHECKING, Awaitable, Optional, Tuple
+from typing import TYPE_CHECKING, Optional, Tuple
 
 from synapse.api.constants import EventTypes
 from synapse.api.errors import NotFoundError, SynapseError
@@ -23,10 +23,10 @@ from synapse.http.servlet import (
     parse_json_object_from_request,
 )
 from synapse.http.site import SynapseRequest
-from synapse.rest.admin import assert_requester_is_admin
-from synapse.rest.admin._base import admin_patterns
+from synapse.logging.opentracing import set_tag
+from synapse.rest.admin._base import admin_patterns, assert_user_is_admin
 from synapse.rest.client.transactions import HttpTransactionCache
-from synapse.types import JsonDict, UserID
+from synapse.types import JsonDict, Requester, UserID
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -70,10 +70,13 @@ class SendServerNoticeServlet(RestServlet):
             self.__class__.__name__,
         )
 
-    async def on_POST(
-        self, request: SynapseRequest, txn_id: Optional[str] = None
+    async def _do(
+        self,
+        request: SynapseRequest,
+        requester: Requester,
+        txn_id: Optional[str],
     ) -> Tuple[int, JsonDict]:
-        await assert_requester_is_admin(self.auth, request)
+        await assert_user_is_admin(self.auth, requester)
         body = parse_json_object_from_request(request)
         assert_params_in_dict(body, ("user_id", "content"))
         event_type = body.get("type", EventTypes.Message)
@@ -106,9 +109,18 @@ class SendServerNoticeServlet(RestServlet):
 
         return HTTPStatus.OK, {"event_id": event.event_id}
 
-    def on_PUT(
+    async def on_POST(
+        self,
+        request: SynapseRequest,
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+        return await self._do(request, requester, None)
+
+    async def on_PUT(
         self, request: SynapseRequest, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
-        return self.txns.fetch_or_execute_request(
-            request, self.on_POST, request, txn_id
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+        set_tag("txn_id", txn_id)
+        return await self.txns.fetch_or_execute_request(
+            request, requester, self._do, request, requester, txn_id
         )
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 61e4cf0213..129b6fe6b0 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -57,7 +57,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.rest.client._base import client_patterns
 from synapse.rest.client.transactions import HttpTransactionCache
 from synapse.streams.config import PaginationConfig
-from synapse.types import JsonDict, StreamToken, ThirdPartyInstanceID, UserID
+from synapse.types import JsonDict, Requester, StreamToken, ThirdPartyInstanceID, UserID
 from synapse.types.state import StateFilter
 from synapse.util import json_decoder
 from synapse.util.cancellation import cancellable
@@ -151,15 +151,22 @@ class RoomCreateRestServlet(TransactionRestServlet):
         PATTERNS = "/createRoom"
         register_txn_path(self, PATTERNS, http_server)
 
-    def on_PUT(
+    async def on_PUT(
         self, request: SynapseRequest, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
         set_tag("txn_id", txn_id)
-        return self.txns.fetch_or_execute_request(request, self.on_POST, request)
+        return await self.txns.fetch_or_execute_request(
+            request, requester, self._do, request, requester
+        )
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
+        return await self._do(request, requester)
 
+    async def _do(
+        self, request: SynapseRequest, requester: Requester
+    ) -> Tuple[int, JsonDict]:
         room_id, _, _ = await self._room_creation_handler.create_room(
             requester, self.get_room_config(request)
         )
@@ -172,9 +179,9 @@ class RoomCreateRestServlet(TransactionRestServlet):
 
 
 # TODO: Needs unit testing for generic events
-class RoomStateEventRestServlet(TransactionRestServlet):
+class RoomStateEventRestServlet(RestServlet):
     def __init__(self, hs: "HomeServer"):
-        super().__init__(hs)
+        super().__init__()
         self.event_creation_handler = hs.get_event_creation_handler()
         self.room_member_handler = hs.get_room_member_handler()
         self.message_handler = hs.get_message_handler()
@@ -324,16 +331,16 @@ class RoomSendEventRestServlet(TransactionRestServlet):
     def register(self, http_server: HttpServer) -> None:
         # /rooms/$roomid/send/$event_type[/$txn_id]
         PATTERNS = "/rooms/(?P<room_id>[^/]*)/send/(?P<event_type>[^/]*)"
-        register_txn_path(self, PATTERNS, http_server, with_get=True)
+        register_txn_path(self, PATTERNS, http_server)
 
-    async def on_POST(
+    async def _do(
         self,
         request: SynapseRequest,
+        requester: Requester,
         room_id: str,
         event_type: str,
-        txn_id: Optional[str] = None,
+        txn_id: Optional[str],
     ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_guest=True)
         content = parse_json_object_from_request(request)
 
         event_dict: JsonDict = {
@@ -362,18 +369,30 @@ class RoomSendEventRestServlet(TransactionRestServlet):
         set_tag("event_id", event_id)
         return 200, {"event_id": event_id}
 
-    def on_GET(
-        self, request: SynapseRequest, room_id: str, event_type: str, txn_id: str
-    ) -> Tuple[int, str]:
-        return 200, "Not implemented"
+    async def on_POST(
+        self,
+        request: SynapseRequest,
+        room_id: str,
+        event_type: str,
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
+        return await self._do(request, requester, room_id, event_type, None)
 
-    def on_PUT(
+    async def on_PUT(
         self, request: SynapseRequest, room_id: str, event_type: str, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
         set_tag("txn_id", txn_id)
 
-        return self.txns.fetch_or_execute_request(
-            request, self.on_POST, request, room_id, event_type, txn_id
+        return await self.txns.fetch_or_execute_request(
+            request,
+            requester,
+            self._do,
+            request,
+            requester,
+            room_id,
+            event_type,
+            txn_id,
         )
 
 
@@ -389,14 +408,13 @@ class JoinRoomAliasServlet(ResolveRoomIdMixin, TransactionRestServlet):
         PATTERNS = "/join/(?P<room_identifier>[^/]*)"
         register_txn_path(self, PATTERNS, http_server)
 
-    async def on_POST(
+    async def _do(
         self,
         request: SynapseRequest,
+        requester: Requester,
         room_identifier: str,
-        txn_id: Optional[str] = None,
+        txn_id: Optional[str],
     ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_guest=True)
-
         content = parse_json_object_from_request(request, allow_empty_body=True)
 
         # twisted.web.server.Request.args is incorrectly defined as Optional[Any]
@@ -420,22 +438,31 @@ class JoinRoomAliasServlet(ResolveRoomIdMixin, TransactionRestServlet):
 
         return 200, {"room_id": room_id}
 
-    def on_PUT(
+    async def on_POST(
+        self,
+        request: SynapseRequest,
+        room_identifier: str,
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
+        return await self._do(request, requester, room_identifier, None)
+
+    async def on_PUT(
         self, request: SynapseRequest, room_identifier: str, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
         set_tag("txn_id", txn_id)
 
-        return self.txns.fetch_or_execute_request(
-            request, self.on_POST, request, room_identifier, txn_id
+        return await self.txns.fetch_or_execute_request(
+            request, requester, self._do, request, requester, room_identifier, txn_id
         )
 
 
 # TODO: Needs unit testing
-class PublicRoomListRestServlet(TransactionRestServlet):
+class PublicRoomListRestServlet(RestServlet):
     PATTERNS = client_patterns("/publicRooms$", v1=True)
 
     def __init__(self, hs: "HomeServer"):
-        super().__init__(hs)
+        super().__init__()
         self.hs = hs
         self.auth = hs.get_auth()
 
@@ -907,22 +934,25 @@ class RoomForgetRestServlet(TransactionRestServlet):
         PATTERNS = "/rooms/(?P<room_id>[^/]*)/forget"
         register_txn_path(self, PATTERNS, http_server)
 
-    async def on_POST(
-        self, request: SynapseRequest, room_id: str, txn_id: Optional[str] = None
-    ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_guest=False)
-
+    async def _do(self, requester: Requester, room_id: str) -> Tuple[int, JsonDict]:
         await self.room_member_handler.forget(user=requester.user, room_id=room_id)
 
         return 200, {}
 
-    def on_PUT(
+    async def on_POST(
+        self, request: SynapseRequest, room_id: str
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=False)
+        return await self._do(requester, room_id)
+
+    async def on_PUT(
         self, request: SynapseRequest, room_id: str, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=False)
         set_tag("txn_id", txn_id)
 
-        return self.txns.fetch_or_execute_request(
-            request, self.on_POST, request, room_id, txn_id
+        return await self.txns.fetch_or_execute_request(
+            request, requester, self._do, requester, room_id
         )
 
 
@@ -941,15 +971,14 @@ class RoomMembershipRestServlet(TransactionRestServlet):
         )
         register_txn_path(self, PATTERNS, http_server)
 
-    async def on_POST(
+    async def _do(
         self,
         request: SynapseRequest,
+        requester: Requester,
         room_id: str,
         membership_action: str,
-        txn_id: Optional[str] = None,
+        txn_id: Optional[str],
     ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_guest=True)
-
         if requester.is_guest and membership_action not in {
             Membership.JOIN,
             Membership.LEAVE,
@@ -1014,13 +1043,30 @@ class RoomMembershipRestServlet(TransactionRestServlet):
 
         return 200, return_value
 
-    def on_PUT(
+    async def on_POST(
+        self,
+        request: SynapseRequest,
+        room_id: str,
+        membership_action: str,
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
+        return await self._do(request, requester, room_id, membership_action, None)
+
+    async def on_PUT(
         self, request: SynapseRequest, room_id: str, membership_action: str, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
         set_tag("txn_id", txn_id)
 
-        return self.txns.fetch_or_execute_request(
-            request, self.on_POST, request, room_id, membership_action, txn_id
+        return await self.txns.fetch_or_execute_request(
+            request,
+            requester,
+            self._do,
+            request,
+            requester,
+            room_id,
+            membership_action,
+            txn_id,
         )
 
 
@@ -1036,14 +1082,14 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
         PATTERNS = "/rooms/(?P<room_id>[^/]*)/redact/(?P<event_id>[^/]*)"
         register_txn_path(self, PATTERNS, http_server)
 
-    async def on_POST(
+    async def _do(
         self,
         request: SynapseRequest,
+        requester: Requester,
         room_id: str,
         event_id: str,
-        txn_id: Optional[str] = None,
+        txn_id: Optional[str],
     ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request)
         content = parse_json_object_from_request(request)
 
         try:
@@ -1094,13 +1140,23 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
         set_tag("event_id", event_id)
         return 200, {"event_id": event_id}
 
-    def on_PUT(
+    async def on_POST(
+        self,
+        request: SynapseRequest,
+        room_id: str,
+        event_id: str,
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+        return await self._do(request, requester, room_id, event_id, None)
+
+    async def on_PUT(
         self, request: SynapseRequest, room_id: str, event_id: str, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
         set_tag("txn_id", txn_id)
 
-        return self.txns.fetch_or_execute_request(
-            request, self.on_POST, request, room_id, event_id, txn_id
+        return await self.txns.fetch_or_execute_request(
+            request, requester, self._do, request, requester, room_id, event_id, txn_id
         )
 
 
@@ -1224,7 +1280,6 @@ def register_txn_path(
     servlet: RestServlet,
     regex_string: str,
     http_server: HttpServer,
-    with_get: bool = False,
 ) -> None:
     """Registers a transaction-based path.
 
@@ -1236,7 +1291,6 @@ def register_txn_path(
         regex_string: The regex string to register. Must NOT have a
             trailing $ as this string will be appended to.
         http_server: The http_server to register paths with.
-        with_get: True to also register respective GET paths for the PUTs.
     """
     on_POST = getattr(servlet, "on_POST", None)
     on_PUT = getattr(servlet, "on_PUT", None)
@@ -1254,18 +1308,6 @@ def register_txn_path(
         on_PUT,
         servlet.__class__.__name__,
     )
-    on_GET = getattr(servlet, "on_GET", None)
-    if with_get:
-        if on_GET is None:
-            raise RuntimeError(
-                "register_txn_path called with with_get = True, but no on_GET method exists"
-            )
-        http_server.register_paths(
-            "GET",
-            client_patterns(regex_string + "/(?P<txn_id>[^/]*)$", v1=True),
-            on_GET,
-            servlet.__class__.__name__,
-        )
 
 
 class TimestampLookupRestServlet(RestServlet):
diff --git a/synapse/rest/client/sendtodevice.py b/synapse/rest/client/sendtodevice.py
index 55d52f0b28..110af6df47 100644
--- a/synapse/rest/client/sendtodevice.py
+++ b/synapse/rest/client/sendtodevice.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Awaitable, Tuple
+from typing import TYPE_CHECKING, Tuple
 
 from synapse.http import servlet
 from synapse.http.server import HttpServer
@@ -21,7 +21,7 @@ from synapse.http.servlet import assert_params_in_dict, parse_json_object_from_r
 from synapse.http.site import SynapseRequest
 from synapse.logging.opentracing import set_tag
 from synapse.rest.client.transactions import HttpTransactionCache
-from synapse.types import JsonDict
+from synapse.types import JsonDict, Requester
 
 from ._base import client_patterns
 
@@ -43,19 +43,26 @@ class SendToDeviceRestServlet(servlet.RestServlet):
         self.txns = HttpTransactionCache(hs)
         self.device_message_handler = hs.get_device_message_handler()
 
-    def on_PUT(
+    async def on_PUT(
         self, request: SynapseRequest, message_type: str, txn_id: str
-    ) -> Awaitable[Tuple[int, JsonDict]]:
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
         set_tag("txn_id", txn_id)
-        return self.txns.fetch_or_execute_request(
-            request, self._put, request, message_type, txn_id
+        return await self.txns.fetch_or_execute_request(
+            request,
+            requester,
+            self._put,
+            request,
+            requester,
+            message_type,
         )
 
     async def _put(
-        self, request: SynapseRequest, message_type: str, txn_id: str
+        self,
+        request: SynapseRequest,
+        requester: Requester,
+        message_type: str,
     ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_guest=True)
-
         content = parse_json_object_from_request(request)
         assert_params_in_dict(content, ("messages",))
 
diff --git a/synapse/rest/client/transactions.py b/synapse/rest/client/transactions.py
index 3f40f1874a..f2aaab6227 100644
--- a/synapse/rest/client/transactions.py
+++ b/synapse/rest/client/transactions.py
@@ -15,16 +15,16 @@
 """This module contains logic for storing HTTP PUT transactions. This is used
 to ensure idempotency when performing PUTs using the REST API."""
 import logging
-from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Tuple
+from typing import TYPE_CHECKING, Awaitable, Callable, Dict, Hashable, Tuple
 
 from typing_extensions import ParamSpec
 
 from twisted.internet.defer import Deferred
 from twisted.python.failure import Failure
-from twisted.web.server import Request
+from twisted.web.iweb import IRequest
 
 from synapse.logging.context import make_deferred_yieldable, run_in_background
-from synapse.types import JsonDict
+from synapse.types import JsonDict, Requester
 from synapse.util.async_helpers import ObservableDeferred
 
 if TYPE_CHECKING:
@@ -41,53 +41,47 @@ P = ParamSpec("P")
 class HttpTransactionCache:
     def __init__(self, hs: "HomeServer"):
         self.hs = hs
-        self.auth = self.hs.get_auth()
         self.clock = self.hs.get_clock()
         # $txn_key: (ObservableDeferred<(res_code, res_json_body)>, timestamp)
         self.transactions: Dict[
-            str, Tuple[ObservableDeferred[Tuple[int, JsonDict]], int]
+            Hashable, Tuple[ObservableDeferred[Tuple[int, JsonDict]], int]
         ] = {}
         # Try to clean entries every 30 mins. This means entries will exist
         # for at *LEAST* 30 mins, and at *MOST* 60 mins.
         self.cleaner = self.clock.looping_call(self._cleanup, CLEANUP_PERIOD_MS)
 
-    def _get_transaction_key(self, request: Request) -> str:
+    def _get_transaction_key(self, request: IRequest, requester: Requester) -> Hashable:
         """A helper function which returns a transaction key that can be used
         with TransactionCache for idempotent requests.
 
         Idempotency is based on the returned key being the same for separate
         requests to the same endpoint. The key is formed from the HTTP request
-        path and the access_token for the requesting user.
+        path and attributes from the requester: the access_token_id for regular users,
+        the user ID for guest users, and the appservice ID for appservice users.
 
         Args:
-            request: The incoming request. Must contain an access_token.
+            request: The incoming request.
+            requester: The requester doing the request.
         Returns:
             A transaction key
         """
         assert request.path is not None
-        token = self.auth.get_access_token_from_request(request)
-        return request.path.decode("utf8") + "/" + token
+        path: str = request.path.decode("utf8")
+        if requester.is_guest:
+            assert requester.user is not None, "Guest requester must have a user ID set"
+            return (path, "guest", requester.user)
+        elif requester.app_service is not None:
+            return (path, "appservice", requester.app_service.id)
+        else:
+            assert (
+                requester.access_token_id is not None
+            ), "Requester must have an access_token_id"
+            return (path, "user", requester.access_token_id)
 
     def fetch_or_execute_request(
         self,
-        request: Request,
-        fn: Callable[P, Awaitable[Tuple[int, JsonDict]]],
-        *args: P.args,
-        **kwargs: P.kwargs,
-    ) -> Awaitable[Tuple[int, JsonDict]]:
-        """A helper function for fetch_or_execute which extracts
-        a transaction key from the given request.
-
-        See:
-            fetch_or_execute
-        """
-        return self.fetch_or_execute(
-            self._get_transaction_key(request), fn, *args, **kwargs
-        )
-
-    def fetch_or_execute(
-        self,
-        txn_key: str,
+        request: IRequest,
+        requester: Requester,
         fn: Callable[P, Awaitable[Tuple[int, JsonDict]]],
         *args: P.args,
         **kwargs: P.kwargs,
@@ -96,14 +90,15 @@ class HttpTransactionCache:
         to produce a response for this transaction.
 
         Args:
-            txn_key: A key to ensure idempotency should fetch_or_execute be
-                called again at a later point in time.
+            request:
+            requester:
             fn: A function which returns a tuple of (response_code, response_dict).
             *args: Arguments to pass to fn.
             **kwargs: Keyword arguments to pass to fn.
         Returns:
             Deferred which resolves to a tuple of (response_code, response_dict).
         """
+        txn_key = self._get_transaction_key(request, requester)
         if txn_key in self.transactions:
             observable = self.transactions[txn_key][0]
         else:
diff --git a/tests/rest/client/test_transactions.py b/tests/rest/client/test_transactions.py
index 3086e1b565..d8dc56261a 100644
--- a/tests/rest/client/test_transactions.py
+++ b/tests/rest/client/test_transactions.py
@@ -39,15 +39,23 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
         self.cache = HttpTransactionCache(self.hs)
 
         self.mock_http_response = (HTTPStatus.OK, {"result": "GOOD JOB!"})
-        self.mock_key = "foo"
+
+        # Here we make sure that we're setting all the fields that HttpTransactionCache
+        # uses to build the transaction key.
+        self.mock_request = Mock()
+        self.mock_request.path = b"/foo/bar"
+        self.mock_requester = Mock()
+        self.mock_requester.app_service = None
+        self.mock_requester.is_guest = False
+        self.mock_requester.access_token_id = 1234
 
     @defer.inlineCallbacks
     def test_executes_given_function(
         self,
     ) -> Generator["defer.Deferred[Any]", object, None]:
         cb = Mock(return_value=make_awaitable(self.mock_http_response))
-        res = yield self.cache.fetch_or_execute(
-            self.mock_key, cb, "some_arg", keyword="arg"
+        res = yield self.cache.fetch_or_execute_request(
+            self.mock_request, self.mock_requester, cb, "some_arg", keyword="arg"
         )
         cb.assert_called_once_with("some_arg", keyword="arg")
         self.assertEqual(res, self.mock_http_response)
@@ -58,8 +66,13 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
     ) -> Generator["defer.Deferred[Any]", object, None]:
         cb = Mock(return_value=make_awaitable(self.mock_http_response))
         for i in range(3):  # invoke multiple times
-            res = yield self.cache.fetch_or_execute(
-                self.mock_key, cb, "some_arg", keyword="arg", changing_args=i
+            res = yield self.cache.fetch_or_execute_request(
+                self.mock_request,
+                self.mock_requester,
+                cb,
+                "some_arg",
+                keyword="arg",
+                changing_args=i,
             )
             self.assertEqual(res, self.mock_http_response)
         # expect only a single call to do the work
@@ -77,7 +90,9 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
         @defer.inlineCallbacks
         def test() -> Generator["defer.Deferred[Any]", object, None]:
             with LoggingContext("c") as c1:
-                res = yield self.cache.fetch_or_execute(self.mock_key, cb)
+                res = yield self.cache.fetch_or_execute_request(
+                    self.mock_request, self.mock_requester, cb
+                )
                 self.assertIs(current_context(), c1)
                 self.assertEqual(res, (1, {}))
 
@@ -106,12 +121,16 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
 
         with LoggingContext("test") as test_context:
             try:
-                yield self.cache.fetch_or_execute(self.mock_key, cb)
+                yield self.cache.fetch_or_execute_request(
+                    self.mock_request, self.mock_requester, cb
+                )
             except Exception as e:
                 self.assertEqual(e.args[0], "boo")
             self.assertIs(current_context(), test_context)
 
-            res = yield self.cache.fetch_or_execute(self.mock_key, cb)
+            res = yield self.cache.fetch_or_execute_request(
+                self.mock_request, self.mock_requester, cb
+            )
             self.assertEqual(res, self.mock_http_response)
             self.assertIs(current_context(), test_context)
 
@@ -134,29 +153,39 @@ class HttpTransactionCacheTestCase(unittest.TestCase):
 
         with LoggingContext("test") as test_context:
             try:
-                yield self.cache.fetch_or_execute(self.mock_key, cb)
+                yield self.cache.fetch_or_execute_request(
+                    self.mock_request, self.mock_requester, cb
+                )
             except Exception as e:
                 self.assertEqual(e.args[0], "boo")
             self.assertIs(current_context(), test_context)
 
-            res = yield self.cache.fetch_or_execute(self.mock_key, cb)
+            res = yield self.cache.fetch_or_execute_request(
+                self.mock_request, self.mock_requester, cb
+            )
             self.assertEqual(res, self.mock_http_response)
             self.assertIs(current_context(), test_context)
 
     @defer.inlineCallbacks
     def test_cleans_up(self) -> Generator["defer.Deferred[Any]", object, None]:
         cb = Mock(return_value=make_awaitable(self.mock_http_response))
-        yield self.cache.fetch_or_execute(self.mock_key, cb, "an arg")
+        yield self.cache.fetch_or_execute_request(
+            self.mock_request, self.mock_requester, cb, "an arg"
+        )
         # should NOT have cleaned up yet
         self.clock.advance_time_msec(CLEANUP_PERIOD_MS / 2)
 
-        yield self.cache.fetch_or_execute(self.mock_key, cb, "an arg")
+        yield self.cache.fetch_or_execute_request(
+            self.mock_request, self.mock_requester, cb, "an arg"
+        )
         # still using cache
         cb.assert_called_once_with("an arg")
 
         self.clock.advance_time_msec(CLEANUP_PERIOD_MS)
 
-        yield self.cache.fetch_or_execute(self.mock_key, cb, "an arg")
+        yield self.cache.fetch_or_execute_request(
+            self.mock_request, self.mock_requester, cb, "an arg"
+        )
         # no longer using cache
         self.assertEqual(cb.call_count, 2)
         self.assertEqual(cb.call_args_list, [call("an arg"), call("an arg")])
-- 
cgit 1.5.1


From 20ed8c926b518809e67e4d1696189413e851d2e4 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 7 Mar 2023 11:27:57 -0500
Subject: Stabilize support for MSC3873: disambuguated event push keys.
 (#15190)

This removes the experimental configuration option and
always escapes the push rule condition keys.

Also escapes any (experimental) push rule condition keys
in the base rules which contain dot in a field name.
---
 changelog.d/15190.bugfix                 |  1 +
 rust/src/push/base_rules.rs              |  6 +++---
 synapse/config/experimental.py           | 10 ----------
 synapse/push/bulk_push_rule_evaluator.py | 33 ++++++++------------------------
 tests/push/test_push_rule_evaluator.py   | 10 +++-------
 5 files changed, 15 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/15190.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15190.bugfix b/changelog.d/15190.bugfix
new file mode 100644
index 0000000000..5c3a86320e
--- /dev/null
+++ b/changelog.d/15190.bugfix
@@ -0,0 +1 @@
+Implement [MSC3873](https://github.com/matrix-org/matrix-spec-proposals/pull/3873) to fix a long-standing bug where properties with dots were handled ambiguously in push rules.
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index ec8d96656a..d7c73c1f25 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -71,7 +71,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         priority_class: 5,
         conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
             EventMatchCondition {
-                key: Cow::Borrowed("content.m.relates_to.rel_type"),
+                key: Cow::Borrowed("content.m\\.relates_to.rel_type"),
                 pattern: Cow::Borrowed("m.replace"),
             },
         ))]),
@@ -146,7 +146,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         priority_class: 5,
         conditions: Cow::Borrowed(&[Condition::Known(
             KnownCondition::ExactEventPropertyContainsType(EventPropertyIsTypeCondition {
-                key: Cow::Borrowed("content.org.matrix.msc3952.mentions.user_ids"),
+                key: Cow::Borrowed("content.org\\.matrix\\.msc3952\\.mentions.user_ids"),
                 value_type: Cow::Borrowed(&EventMatchPatternType::UserId),
             }),
         )]),
@@ -167,7 +167,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         priority_class: 5,
         conditions: Cow::Borrowed(&[
             Condition::Known(KnownCondition::EventPropertyIs(EventPropertyIsCondition {
-                key: Cow::Borrowed("content.org.matrix.msc3952.mentions.room"),
+                key: Cow::Borrowed("content.org\\.matrix\\.msc3952\\.mentions.room"),
                 value: Cow::Borrowed(&SimpleJsonValue::Bool(true)),
             })),
             Condition::Known(KnownCondition::SenderNotificationPermission {
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 9ff382ccc3..7e05f78f70 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -166,11 +166,6 @@ class ExperimentalConfig(Config):
         # MSC3391: Removing account data.
         self.msc3391_enabled = experimental.get("msc3391_enabled", False)
 
-        # MSC3873: Disambiguate event_match keys.
-        self.msc3873_escape_event_match_key = experimental.get(
-            "msc3873_escape_event_match_key", False
-        )
-
         # MSC3952: Intentional mentions, this depends on MSC3966.
         self.msc3952_intentional_mentions = experimental.get(
             "msc3952_intentional_mentions", False
@@ -181,10 +176,5 @@ class ExperimentalConfig(Config):
             "msc3958_supress_edit_notifs", False
         )
 
-        # MSC3966: exact_event_property_contains push rule condition.
-        self.msc3966_exact_event_property_contains = experimental.get(
-            "msc3966_exact_event_property_contains", False
-        )
-
         # MSC3967: Do not require UIA when first uploading cross signing keys
         self.msc3967_enabled = experimental.get("msc3967_enabled", False)
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 45622a9e9b..199337673f 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -273,10 +273,7 @@ class BulkPushRuleEvaluator:
                     related_event_id, allow_none=True
                 )
                 if related_event is not None:
-                    related_events[relation_type] = _flatten_dict(
-                        related_event,
-                        msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key,
-                    )
+                    related_events[relation_type] = _flatten_dict(related_event)
 
             reply_event_id = (
                 event.content.get("m.relates_to", {})
@@ -291,10 +288,7 @@ class BulkPushRuleEvaluator:
                 )
 
                 if related_event is not None:
-                    related_events["m.in_reply_to"] = _flatten_dict(
-                        related_event,
-                        msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key,
-                    )
+                    related_events["m.in_reply_to"] = _flatten_dict(related_event)
 
                     # indicate that this is from a fallback relation.
                     if relation_type == "m.thread" and event.content.get(
@@ -401,10 +395,7 @@ class BulkPushRuleEvaluator:
         )
 
         evaluator = PushRuleEvaluator(
-            _flatten_dict(
-                event,
-                msc3873_escape_event_match_key=self.hs.config.experimental.msc3873_escape_event_match_key,
-            ),
+            _flatten_dict(event),
             has_mentions,
             room_member_count,
             sender_power_level,
@@ -494,8 +485,6 @@ def _flatten_dict(
     d: Union[EventBase, Mapping[str, Any]],
     prefix: Optional[List[str]] = None,
     result: Optional[Dict[str, JsonValue]] = None,
-    *,
-    msc3873_escape_event_match_key: bool = False,
 ) -> Dict[str, JsonValue]:
     """
     Given a JSON dictionary (or event) which might contain sub dictionaries,
@@ -524,11 +513,10 @@ def _flatten_dict(
     if result is None:
         result = {}
     for key, value in d.items():
-        if msc3873_escape_event_match_key:
-            # Escape periods in the key with a backslash (and backslashes with an
-            # extra backslash). This is since a period is used as a separator between
-            # nested fields.
-            key = key.replace("\\", "\\\\").replace(".", "\\.")
+        # Escape periods in the key with a backslash (and backslashes with an
+        # extra backslash). This is since a period is used as a separator between
+        # nested fields.
+        key = key.replace("\\", "\\\\").replace(".", "\\.")
 
         if _is_simple_value(value):
             result[".".join(prefix + [key])] = value
@@ -536,12 +524,7 @@ def _flatten_dict(
             result[".".join(prefix + [key])] = [v for v in value if _is_simple_value(v)]
         elif isinstance(value, Mapping):
             # do not set `room_version` due to recursion considerations below
-            _flatten_dict(
-                value,
-                prefix=(prefix + [key]),
-                result=result,
-                msc3873_escape_event_match_key=msc3873_escape_event_match_key,
-            )
+            _flatten_dict(value, prefix=(prefix + [key]), result=result)
 
     # `room_version` should only ever be set when looking at the top level of an event
     if (
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 6deee0fd02..52c4aafea6 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -51,11 +51,7 @@ class FlattenDictTestCase(unittest.TestCase):
 
         # If a field has a dot in it, escape it.
         input = {"m.foo": {"b\\ar": "abc"}}
-        self.assertEqual({"m.foo.b\\ar": "abc"}, _flatten_dict(input))
-        self.assertEqual(
-            {"m\\.foo.b\\\\ar": "abc"},
-            _flatten_dict(input, msc3873_escape_event_match_key=True),
-        )
+        self.assertEqual({"m\\.foo.b\\\\ar": "abc"}, _flatten_dict(input))
 
     def test_non_string(self) -> None:
         """String, booleans, ints, nulls and list of those should be kept while other items are dropped."""
@@ -125,7 +121,7 @@ class FlattenDictTestCase(unittest.TestCase):
             "room_id": "!test:test",
             "sender": "@alice:test",
             "type": "m.room.message",
-            "content.org.matrix.msc1767.markup": [],
+            "content.org\\.matrix\\.msc1767\\.markup": [],
         }
         self.assertEqual(expected, _flatten_dict(event))
 
@@ -137,7 +133,7 @@ class FlattenDictTestCase(unittest.TestCase):
             "room_id": "!test:test",
             "sender": "@alice:test",
             "type": "m.room.message",
-            "content.org.matrix.msc1767.markup": [],
+            "content.org\\.matrix\\.msc1767\\.markup": [],
         }
         self.assertEqual(expected, _flatten_dict(event))
 
-- 
cgit 1.5.1


From a368d30c1cfe7457fca4fcdd03ae481ba65a226c Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 7 Mar 2023 13:54:39 -0800
Subject: More speedups/fixes to creating batched events (#15195)

---
 changelog.d/15195.misc         |  1 +
 synapse/event_auth.py          | 23 +++++++++++++++++------
 synapse/events/snapshot.py     |  1 +
 synapse/handlers/event_auth.py | 13 +++++++++++--
 synapse/handlers/room.py       |  4 +++-
 5 files changed, 33 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/15195.misc

(limited to 'synapse')

diff --git a/changelog.d/15195.misc b/changelog.d/15195.misc
new file mode 100644
index 0000000000..d8beea917d
--- /dev/null
+++ b/changelog.d/15195.misc
@@ -0,0 +1 @@
+Improve performance of creating and authenticating events.
\ No newline at end of file
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 4d6d1b8ebd..af55874b5c 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -168,13 +168,24 @@ async def check_state_independent_auth_rules(
         return
 
     # 2. Reject if event has auth_events that: ...
-    auth_events = await store.get_events(
-        event.auth_event_ids(),
-        redact_behaviour=EventRedactBehaviour.as_is,
-        allow_rejected=True,
-    )
     if batched_auth_events:
-        auth_events.update(batched_auth_events)
+        # Copy the batched auth events to avoid mutating them.
+        auth_events = dict(batched_auth_events)
+        needed_auth_event_ids = set(event.auth_event_ids()) - batched_auth_events.keys()
+        if needed_auth_event_ids:
+            auth_events.update(
+                await store.get_events(
+                    needed_auth_event_ids,
+                    redact_behaviour=EventRedactBehaviour.as_is,
+                    allow_rejected=True,
+                )
+            )
+    else:
+        auth_events = await store.get_events(
+            event.auth_event_ids(),
+            redact_behaviour=EventRedactBehaviour.as_is,
+            allow_rejected=True,
+        )
 
     room_id = event.room_id
     auth_dict: MutableStateMap[str] = {}
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index a91a5d1e3c..c04ad08cbb 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -293,6 +293,7 @@ class EventContext(UnpersistedEventContextBase):
             Maps a (type, state_key) to the event ID of the state event matching
             this tuple.
         """
+
         assert self.state_group_before_event is not None
         return await self._storage.state.get_state_ids_for_group(
             self.state_group_before_event, state_filter
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index c508861b6a..0db0bd7304 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -63,9 +63,18 @@ class EventAuthHandler:
             self._store, event, batched_auth_events
         )
         auth_event_ids = event.auth_event_ids()
-        auth_events_by_id = await self._store.get_events(auth_event_ids)
+
         if batched_auth_events:
-            auth_events_by_id.update(batched_auth_events)
+            # Copy the batched auth events to avoid mutating them.
+            auth_events_by_id = dict(batched_auth_events)
+            needed_auth_event_ids = set(auth_event_ids) - set(batched_auth_events)
+            if needed_auth_event_ids:
+                auth_events_by_id.update(
+                    await self._store.get_events(needed_auth_event_ids)
+                )
+        else:
+            auth_events_by_id = await self._store.get_events(auth_event_ids)
+
         check_state_dependent_auth_rules(event, auth_events_by_id.values())
 
     def compute_auth_events(
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index b1784638f4..32451670f3 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1123,7 +1123,9 @@ class RoomCreationHandler:
                 event_dict,
                 prev_event_ids=prev_event,
                 depth=depth,
-                state_map=state_map,
+                # Take a copy to ensure each event gets a unique copy of
+                # state_map since it is modified below.
+                state_map=dict(state_map),
                 for_batch=for_batch,
             )
 
-- 
cgit 1.5.1


From 88efc75bab2849b7b1cee52770dea3cf9925b2e8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 8 Mar 2023 15:08:56 -0500
Subject: Include the room ID in more purge room log lines. (#15222)

---
 changelog.d/15222.misc                         |  1 +
 synapse/handlers/pagination.py                 |  2 +-
 synapse/storage/controllers/purge_events.py    | 22 +++++++++++++---------
 synapse/storage/databases/main/purge_events.py | 11 ++++++-----
 synapse/storage/databases/state/store.py       |  2 ++
 5 files changed, 23 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/15222.misc

(limited to 'synapse')

diff --git a/changelog.d/15222.misc b/changelog.d/15222.misc
new file mode 100644
index 0000000000..6361676a15
--- /dev/null
+++ b/changelog.d/15222.misc
@@ -0,0 +1 @@
+Improve log lines when purging rooms.
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 8c79c055ba..63b35c8d62 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -683,7 +683,7 @@ class PaginationHandler:
 
                     await self._storage_controllers.purge_events.purge_room(room_id)
 
-            logger.info("complete")
+            logger.info("purge complete for room_id %s", room_id)
             self._delete_by_id[delete_id].status = DeleteStatus.STATUS_COMPLETE
         except Exception:
             f = Failure()
diff --git a/synapse/storage/controllers/purge_events.py b/synapse/storage/controllers/purge_events.py
index 9ca50d6a09..c599397b86 100644
--- a/synapse/storage/controllers/purge_events.py
+++ b/synapse/storage/controllers/purge_events.py
@@ -16,6 +16,7 @@ import itertools
 import logging
 from typing import TYPE_CHECKING, Set
 
+from synapse.logging.context import nested_logging_context
 from synapse.storage.databases import Databases
 
 if TYPE_CHECKING:
@@ -33,8 +34,9 @@ class PurgeEventsStorageController:
     async def purge_room(self, room_id: str) -> None:
         """Deletes all record of a room"""
 
-        state_groups_to_delete = await self.stores.main.purge_room(room_id)
-        await self.stores.state.purge_room_state(room_id, state_groups_to_delete)
+        with nested_logging_context(room_id):
+            state_groups_to_delete = await self.stores.main.purge_room(room_id)
+            await self.stores.state.purge_room_state(room_id, state_groups_to_delete)
 
     async def purge_history(
         self, room_id: str, token: str, delete_local_events: bool
@@ -51,15 +53,17 @@ class PurgeEventsStorageController:
                 (instead of just marking them as outliers and deleting their
                 state groups).
         """
-        state_groups = await self.stores.main.purge_history(
-            room_id, token, delete_local_events
-        )
-
-        logger.info("[purge] finding state groups that can be deleted")
+        with nested_logging_context(room_id):
+            state_groups = await self.stores.main.purge_history(
+                room_id, token, delete_local_events
+            )
 
-        sg_to_delete = await self._find_unreferenced_groups(state_groups)
+            logger.info("[purge] finding state groups that can be deleted")
+            sg_to_delete = await self._find_unreferenced_groups(state_groups)
 
-        await self.stores.state.purge_unreferenced_state_groups(room_id, sg_to_delete)
+            await self.stores.state.purge_unreferenced_state_groups(
+                room_id, sg_to_delete
+            )
 
     async def _find_unreferenced_groups(self, state_groups: Set[int]) -> Set[int]:
         """Used when purging history to figure out which state groups can be
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 9c41d01e13..7a7c0d9c75 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -325,6 +325,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
         # We then run the same purge a second time without this isolation level to
         # purge any of those rows which were added during the first.
 
+        logger.info("[purge] Starting initial main purge of [1/2]")
         state_groups_to_delete = await self.db_pool.runInteraction(
             "purge_room",
             self._purge_room_txn,
@@ -332,6 +333,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             isolation_level=IsolationLevel.READ_COMMITTED,
         )
 
+        logger.info("[purge] Starting secondary main purge of [2/2]")
         state_groups_to_delete.extend(
             await self.db_pool.runInteraction(
                 "purge_room",
@@ -339,6 +341,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
                 room_id=room_id,
             ),
         )
+        logger.info("[purge] Done with main purge")
 
         return state_groups_to_delete
 
@@ -376,7 +379,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
         )
         referenced_chain_id_tuples = list(txn)
 
-        logger.info("[purge] removing events from event_auth_chain_links")
+        logger.info("[purge] removing from event_auth_chain_links")
         txn.executemany(
             """
             DELETE FROM event_auth_chain_links WHERE
@@ -399,7 +402,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "rejections",
             "state_events",
         ):
-            logger.info("[purge] removing %s from %s", room_id, table)
+            logger.info("[purge] removing from %s", table)
 
             txn.execute(
                 """
@@ -454,7 +457,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             # happy
             "rooms",
         ):
-            logger.info("[purge] removing %s from %s", room_id, table)
+            logger.info("[purge] removing from %s", table)
             txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,))
 
         # Other tables we do NOT need to clear out:
@@ -486,6 +489,4 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
         #   that already exist.
         self._invalidate_cache_and_stream(txn, self.have_seen_event, (room_id,))
 
-        logger.info("[purge] done")
-
         return state_groups
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index bf4cdfdf29..29ff64e876 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -805,12 +805,14 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
             state_groups_to_delete: State groups to delete
         """
 
+        logger.info("[purge] Starting state purge")
         await self.db_pool.runInteraction(
             "purge_room_state",
             self._purge_room_state_txn,
             room_id,
             state_groups_to_delete,
         )
+        logger.info("[purge] Done with state purge")
 
     def _purge_room_state_txn(
         self,
-- 
cgit 1.5.1


From be4ea209e8bf92fb3660807c1fe8ad3d7d05621f Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 8 Mar 2023 19:27:20 -0800
Subject: Add topic and name events to group of events that are batch persisted
 when creating a room. (#15229)

---
 changelog.d/15229.misc   |   1 +
 synapse/handlers/room.py | 108 +++++++++++++++++++++++------------------------
 2 files changed, 53 insertions(+), 56 deletions(-)
 create mode 100644 changelog.d/15229.misc

(limited to 'synapse')

diff --git a/changelog.d/15229.misc b/changelog.d/15229.misc
new file mode 100644
index 0000000000..4d8ea03b27
--- /dev/null
+++ b/changelog.d/15229.misc
@@ -0,0 +1 @@
+Add topic and name events to group of events that are batch persisted when creating a room.
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 32451670f3..be120cb12f 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -569,7 +569,7 @@ class RoomCreationHandler:
             new_room_id,
             # we expect to override all the presets with initial_state, so this is
             # somewhat arbitrary.
-            preset_config=RoomCreationPreset.PRIVATE_CHAT,
+            room_config={"preset": RoomCreationPreset.PRIVATE_CHAT},
             invite_list=[],
             initial_state=initial_state,
             creation_content=creation_content,
@@ -904,13 +904,6 @@ class RoomCreationHandler:
                 check_membership=False,
             )
 
-        preset_config = config.get(
-            "preset",
-            RoomCreationPreset.PRIVATE_CHAT
-            if visibility == "private"
-            else RoomCreationPreset.PUBLIC_CHAT,
-        )
-
         raw_initial_state = config.get("initial_state", [])
 
         initial_state = OrderedDict()
@@ -929,7 +922,7 @@ class RoomCreationHandler:
         ) = await self._send_events_for_new_room(
             requester,
             room_id,
-            preset_config=preset_config,
+            room_config=config,
             invite_list=invite_list,
             initial_state=initial_state,
             creation_content=creation_content,
@@ -938,48 +931,6 @@ class RoomCreationHandler:
             creator_join_profile=creator_join_profile,
         )
 
-        if "name" in config:
-            name = config["name"]
-            (
-                name_event,
-                last_stream_id,
-            ) = await self.event_creation_handler.create_and_send_nonmember_event(
-                requester,
-                {
-                    "type": EventTypes.Name,
-                    "room_id": room_id,
-                    "sender": user_id,
-                    "state_key": "",
-                    "content": {"name": name},
-                },
-                ratelimit=False,
-                prev_event_ids=[last_sent_event_id],
-                depth=depth,
-            )
-            last_sent_event_id = name_event.event_id
-            depth += 1
-
-        if "topic" in config:
-            topic = config["topic"]
-            (
-                topic_event,
-                last_stream_id,
-            ) = await self.event_creation_handler.create_and_send_nonmember_event(
-                requester,
-                {
-                    "type": EventTypes.Topic,
-                    "room_id": room_id,
-                    "sender": user_id,
-                    "state_key": "",
-                    "content": {"topic": topic},
-                },
-                ratelimit=False,
-                prev_event_ids=[last_sent_event_id],
-                depth=depth,
-            )
-            last_sent_event_id = topic_event.event_id
-            depth += 1
-
         # we avoid dropping the lock between invites, as otherwise joins can
         # start coming in and making the createRoom slow.
         #
@@ -1047,7 +998,7 @@ class RoomCreationHandler:
         self,
         creator: Requester,
         room_id: str,
-        preset_config: str,
+        room_config: JsonDict,
         invite_list: List[str],
         initial_state: MutableStateMap,
         creation_content: JsonDict,
@@ -1064,11 +1015,33 @@ class RoomCreationHandler:
 
         Rate limiting should already have been applied by this point.
 
+        Args:
+            creator:
+                the user requesting the room creation
+            room_id:
+                room id for the room being created
+            room_config:
+                A dict of configuration options. This will be the body of
+                a /createRoom request; see
+                https://spec.matrix.org/latest/client-server-api/#post_matrixclientv3createroom
+            invite_list:
+                a list of user ids to invite to the room
+            initial_state:
+                A list of state events to set in the new room.
+            creation_content:
+                Extra keys, such as m.federate, to be added to the content of the m.room.create event.
+            room_alias:
+                alias for the room
+            power_level_content_override:
+                The power level content to override in the default power level event.
+            creator_join_profile:
+                Set to override the displayname and avatar for the creating
+                user in this room.
+
         Returns:
             A tuple containing the stream ID, event ID and depth of the last
             event sent to the room.
         """
-
         creator_id = creator.user.to_string()
         event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""}
         depth = 1
@@ -1079,9 +1052,6 @@ class RoomCreationHandler:
         # created (but not persisted to the db) to determine state for future created events
         # (as this info can't be pulled from the db)
         state_map: MutableStateMap[str] = {}
-        # current_state_group of last event created. Used for computing event context of
-        # events to be batched
-        current_state_group: Optional[int] = None
 
         def create_event_dict(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict:
             e = {"type": etype, "content": content}
@@ -1135,6 +1105,14 @@ class RoomCreationHandler:
 
             return new_event, new_unpersisted_context
 
+        visibility = room_config.get("visibility", "private")
+        preset_config = room_config.get(
+            "preset",
+            RoomCreationPreset.PRIVATE_CHAT
+            if visibility == "private"
+            else RoomCreationPreset.PUBLIC_CHAT,
+        )
+
         try:
             config = self._presets_dict[preset_config]
         except KeyError:
@@ -1286,6 +1264,24 @@ class RoomCreationHandler:
             )
             events_to_send.append((encryption_event, encryption_context))
 
+        if "name" in room_config:
+            name = room_config["name"]
+            name_event, name_context = await create_event(
+                EventTypes.Name,
+                {"name": name},
+                True,
+            )
+            events_to_send.append((name_event, name_context))
+
+        if "topic" in room_config:
+            topic = room_config["topic"]
+            topic_event, topic_context = await create_event(
+                EventTypes.Topic,
+                {"topic": topic},
+                True,
+            )
+            events_to_send.append((topic_event, topic_context))
+
         datastore = self.hs.get_datastores().state
         events_and_context = (
             await UnpersistedEventContext.batch_persist_unpersisted_contexts(
-- 
cgit 1.5.1


From e7c3832ba65aa3b82d3738c6f8554e21d9d87d04 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 9 Mar 2023 07:09:49 -0500
Subject: Pull in netaddr type hints. (#15231)

And fix any issues from having those type hints.
---
 changelog.d/15231.misc                             |  1 +
 mypy.ini                                           |  5 -----
 poetry.lock                                        | 16 ++++++++++++++--
 pyproject.toml                                     |  1 +
 synapse/http/client.py                             |  8 +++++---
 synapse/http/federation/matrix_federation_agent.py |  2 +-
 tests/http/test_client.py                          |  2 +-
 7 files changed, 23 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15231.misc

(limited to 'synapse')

diff --git a/changelog.d/15231.misc b/changelog.d/15231.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15231.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 572734f8e7..cad3716389 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -74,11 +74,6 @@ ignore_missing_imports = True
 [mypy-msgpack]
 ignore_missing_imports = True
 
-# Note: WIP stubs available at
-# https://github.com/microsoft/python-type-stubs/tree/64934207f523ad6b611e6cfe039d85d7175d7d0d/netaddr
-[mypy-netaddr]
-ignore_missing_imports = True
-
 [mypy-parameterized.*]
 ignore_missing_imports = True
 
diff --git a/poetry.lock b/poetry.lock
index 24adc4c876..cd89418dd7 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1722,7 +1722,7 @@ files = [
 cffi = ">=1.4.1"
 
 [package.extras]
-docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
+docs = ["sphinx (>=1.6.5)", "sphinx_rtd_theme"]
 tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
 
 [[package]]
@@ -2597,6 +2597,18 @@ files = [
     {file = "types_jsonschema-4.17.0.5-py3-none-any.whl", hash = "sha256:79ac8a7763fe728947af90a24168b91621edf7e8425bf3670abd4ea0d4758fba"},
 ]
 
+[[package]]
+name = "types-netaddr"
+version = "0.8.0.6"
+description = "Typing stubs for netaddr"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "types-netaddr-0.8.0.6.tar.gz", hash = "sha256:e5048640c2412e7ea2d3eb02c94ae1b50442b2c7a50a7c48e957676139cdf19b"},
+    {file = "types_netaddr-0.8.0.6-py3-none-any.whl", hash = "sha256:d4d40d1ba35430a4e4c929596542cd37e6831f5d08676b33dc84e06e01a840f6"},
+]
+
 [[package]]
 name = "types-opentracing"
 version = "2.4.10.3"
@@ -2990,4 +3002,4 @@ user-search = ["pyicu"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.7.1"
-content-hash = "7bcffef7b6e6d4b1113222e2ca152b3798c997872789c8a1ea01238f199d56fe"
+content-hash = "de2c4c8de336593478ce02581a5336afe2544db93ea82f3955b34c3653c29a26"
diff --git a/pyproject.toml b/pyproject.toml
index 90a1187416..074ac2c11e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -321,6 +321,7 @@ mypy-zope = "*"
 types-bleach = ">=4.1.0"
 types-commonmark = ">=0.9.2"
 types-jsonschema = ">=3.2.0"
+types-netaddr = ">=0.8.0.6"
 types-opentracing = ">=2.4.2"
 types-Pillow = ">=8.3.4"
 types-psycopg2 = ">=2.9.9"
diff --git a/synapse/http/client.py b/synapse/http/client.py
index ae48e7c3f0..d777d59ccf 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -268,8 +268,8 @@ class BlacklistingAgentWrapper(Agent):
     def __init__(
         self,
         agent: IAgent,
+        ip_blacklist: IPSet,
         ip_whitelist: Optional[IPSet] = None,
-        ip_blacklist: Optional[IPSet] = None,
     ):
         """
         Args:
@@ -291,7 +291,9 @@ class BlacklistingAgentWrapper(Agent):
         h = urllib.parse.urlparse(uri.decode("ascii"))
 
         try:
-            ip_address = IPAddress(h.hostname)
+            # h.hostname is Optional[str], None raises an AddrFormatError, so
+            # this is safe even though IPAddress requires a str.
+            ip_address = IPAddress(h.hostname)  # type: ignore[arg-type]
         except AddrFormatError:
             # Not an IP
             pass
@@ -388,8 +390,8 @@ class SimpleHttpClient:
             # by the DNS resolution.
             self.agent = BlacklistingAgentWrapper(
                 self.agent,
-                ip_whitelist=self._ip_whitelist,
                 ip_blacklist=self._ip_blacklist,
+                ip_whitelist=self._ip_whitelist,
             )
 
     async def request(
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 0359231e7d..8d7d0a3875 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -87,7 +87,7 @@ class MatrixFederationAgent:
         reactor: ISynapseReactor,
         tls_client_options_factory: Optional[FederationPolicyForHTTPS],
         user_agent: bytes,
-        ip_whitelist: IPSet,
+        ip_whitelist: Optional[IPSet],
         ip_blacklist: IPSet,
         _srv_resolver: Optional[SrvResolver] = None,
         _well_known_resolver: Optional[WellKnownResolver] = None,
diff --git a/tests/http/test_client.py b/tests/http/test_client.py
index f6d6684985..57b6a84e23 100644
--- a/tests/http/test_client.py
+++ b/tests/http/test_client.py
@@ -210,8 +210,8 @@ class BlacklistingAgentTest(TestCase):
         """Apply the blacklisting agent and ensure it properly blocks connections to particular IPs."""
         agent = BlacklistingAgentWrapper(
             Agent(self.reactor),
-            ip_whitelist=self.ip_whitelist,
             ip_blacklist=self.ip_blacklist,
+            ip_whitelist=self.ip_whitelist,
         )
 
         # The unsafe IPs should be rejected.
-- 
cgit 1.5.1


From 3d060eae6c836f4153a3150c6970cb9b10516da6 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 9 Mar 2023 07:10:09 -0500
Subject: Add missing type hints to `synapse.storage.database`. (#15230)

---
 changelog.d/15230.misc      |  1 +
 mypy.ini                    |  3 ---
 synapse/storage/database.py | 21 ++++++++++++++++-----
 3 files changed, 17 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/15230.misc

(limited to 'synapse')

diff --git a/changelog.d/15230.misc b/changelog.d/15230.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15230.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index cad3716389..945f7925cb 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -48,9 +48,6 @@ warn_unused_ignores = False
 [mypy-synapse.util.caches.treecache]
 disallow_untyped_defs = False
 
-[mypy-synapse.storage.database]
-disallow_untyped_defs = False
-
 [mypy-tests.util.caches.test_descriptors]
 disallow_untyped_defs = False
 
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 5efe31aa19..fec4ae5b97 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -34,6 +34,7 @@ from typing import (
     Tuple,
     Type,
     TypeVar,
+    Union,
     cast,
     overload,
 )
@@ -100,6 +101,15 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = {
 }
 
 
+class _PoolConnection(Connection):
+    """
+    A Connection from twisted.enterprise.adbapi.Connection.
+    """
+
+    def reconnect(self) -> None:
+        ...
+
+
 def make_pool(
     reactor: IReactorCore,
     db_config: DatabaseConnectionConfig,
@@ -856,7 +866,8 @@ class DatabasePool:
             try:
                 with opentracing.start_active_span(f"db.{desc}"):
                     result = await self.runWithConnection(
-                        self.new_transaction,
+                        # mypy seems to have an issue with this, maybe a bug?
+                        self.new_transaction,  # type: ignore[arg-type]
                         desc,
                         after_callbacks,
                         async_after_callbacks,
@@ -892,7 +903,7 @@ class DatabasePool:
 
     async def runWithConnection(
         self,
-        func: Callable[..., R],
+        func: Callable[Concatenate[LoggingDatabaseConnection, P], R],
         *args: Any,
         db_autocommit: bool = False,
         isolation_level: Optional[int] = None,
@@ -926,7 +937,7 @@ class DatabasePool:
 
         start_time = monotonic_time()
 
-        def inner_func(conn, *args, **kwargs):
+        def inner_func(conn: _PoolConnection, *args: P.args, **kwargs: P.kwargs) -> R:
             # We shouldn't be in a transaction. If we are then something
             # somewhere hasn't committed after doing work. (This is likely only
             # possible during startup, as `run*` will ensure changes are
@@ -1019,7 +1030,7 @@ class DatabasePool:
         decoder: Optional[Callable[[Cursor], R]],
         query: str,
         *args: Any,
-    ) -> R:
+    ) -> Union[List[Tuple[Any, ...]], R]:
         """Runs a single query for a result set.
 
         Args:
@@ -1032,7 +1043,7 @@ class DatabasePool:
             The result of decoder(results)
         """
 
-        def interaction(txn):
+        def interaction(txn: LoggingTransaction) -> Union[List[Tuple[Any, ...]], R]:
             txn.execute(query, args)
             if decoder:
                 return decoder(txn)
-- 
cgit 1.5.1


From caf43c3d7c51edad250e50add5595e44720ba32f Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 9 Mar 2023 14:18:39 +0000
Subject: Faster joins: Fix spurious errors on incremental sync (#15232)

When pushing events in partial state rooms down incremental /sync, we
try to find the `m.room.member` state event for their senders by digging
through their auth events, so that we can present the membership to the
client. Events usually have a membership event in their auth events,
with the exception of the `m.room.create` event and a user's first join
into the room.

When implementing #13477, we took the case of a user's first join into
account, but forgot to handle the `m.room.create` case. This change
fixes that.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15232.bugfix | 1 +
 synapse/handlers/sync.py | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15232.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15232.bugfix b/changelog.d/15232.bugfix
new file mode 100644
index 0000000000..d75a4f2d99
--- /dev/null
+++ b/changelog.d/15232.bugfix
@@ -0,0 +1 @@
+Faster joins: Fix a bug introduced in Synapse 1.66 where spurious "Failed to find memberships ..." errors would be logged.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index fd6d946c37..9f5b83ed54 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1226,6 +1226,10 @@ class SyncHandler:
                 continue
 
             event_with_membership_auth = events_with_membership_auth[member]
+            is_create = (
+                event_with_membership_auth.is_state()
+                and event_with_membership_auth.type == EventTypes.Create
+            )
             is_join = (
                 event_with_membership_auth.is_state()
                 and event_with_membership_auth.type == EventTypes.Member
@@ -1233,9 +1237,10 @@ class SyncHandler:
                 and event_with_membership_auth.content.get("membership")
                 == Membership.JOIN
             )
-            if not is_join:
+            if not is_create and not is_join:
                 # The event must include the desired membership as an auth event, unless
-                # it's the first join event for a given user.
+                # it's the `m.room.create` event for a room or the first join event for
+                # a given user.
                 missing_members.add(member)
             auth_event_ids.update(event_with_membership_auth.auth_event_ids())
 
-- 
cgit 1.5.1


From ce54477f6fa0264ef00b15bc3e0c2503d85ab061 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 9 Mar 2023 19:12:09 +0000
Subject: Give PyCharm some help with `@cache_in_self` (#15238)

* Give PyCharm some help with `@cache_in_self`

* Changelog

* Fix import for old python versions
---
 changelog.d/15238.misc |  1 +
 synapse/server.py      | 29 ++++++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15238.misc

(limited to 'synapse')

diff --git a/changelog.d/15238.misc b/changelog.d/15238.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15238.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/server.py b/synapse/server.py
index df80fc1beb..8078463530 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -23,6 +23,8 @@ import functools
 import logging
 from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast
 
+from typing_extensions import TypeAlias
+
 from twisted.internet.interfaces import IOpenSSLContextFactory
 from twisted.internet.tcp import Port
 from twisted.web.iweb import IPolicyForHTTPS
@@ -142,10 +144,31 @@ if TYPE_CHECKING:
     from synapse.handlers.saml import SamlHandler
 
 
-T = TypeVar("T")
+# The annotation for `cache_in_self` used to be
+#     def (builder: Callable[["HomeServer"],T]) -> Callable[["HomeServer"],T]
+# which mypy was happy with.
+#
+# But PyCharm was confused by this. If `foo` was decorated by `@cache_in_self`, then
+# an expression like `hs.foo()`
+#
+# - would erroneously warn that we hadn't provided a `hs` argument to foo (PyCharm
+#   confused about boundmethods and unbound methods?), and
+# - would be considered to have type `Any`, making for a poor autocomplete and
+#   cross-referencing experience.
+#
+# Instead, use a typevar `F` to express that `@cache_in_self` returns exactly the
+# same type it receives. This isn't strictly true [*], but it's more than good
+# enough to keep PyCharm and mypy happy.
+#
+# [*]: (e.g. `builder` could be an object with a __call__ attribute rather than a
+#      types.FunctionType instance, whereas the return value is always a
+#      types.FunctionType instance.)
+
+T: TypeAlias = object
+F = TypeVar("F", bound=Callable[["HomeServer"], T])
 
 
-def cache_in_self(builder: Callable[["HomeServer"], T]) -> Callable[["HomeServer"], T]:
+def cache_in_self(builder: F) -> F:
     """Wraps a function called e.g. `get_foo`, checking if `self.foo` exists and
     returning if so. If not, calls the given function and sets `self.foo` to it.
 
@@ -183,7 +206,7 @@ def cache_in_self(builder: Callable[["HomeServer"], T]) -> Callable[["HomeServer
 
         return dep
 
-    return _get
+    return cast(F, _get)
 
 
 class HomeServer(metaclass=abc.ABCMeta):
-- 
cgit 1.5.1


From e157c63f68ef93c0c25d2df0c63f7da0b30f95ca Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 10 Mar 2023 10:35:18 +0000
Subject: Fix missing conditional for registering
 `on_remove_user_third_party_identifier` module api callbacks (#15227

---
 changelog.d/15227.bugfix                    |  1 +
 synapse/events/third_party_rules.py         |  5 +++++
 tests/rest/client/test_third_party_rules.py | 19 ++++++++++---------
 3 files changed, 16 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/15227.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15227.bugfix b/changelog.d/15227.bugfix
new file mode 100644
index 0000000000..eaa26c8f7f
--- /dev/null
+++ b/changelog.d/15227.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.79.0rc1 where attempting to register a `on_remove_user_third_party_identifier` module API callback would be a no-op.
\ No newline at end of file
diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py
index 3e4d52c8d8..61d4530be7 100644
--- a/synapse/events/third_party_rules.py
+++ b/synapse/events/third_party_rules.py
@@ -247,6 +247,11 @@ class ThirdPartyEventRules:
                 on_add_user_third_party_identifier
             )
 
+        if on_remove_user_third_party_identifier is not None:
+            self._on_remove_user_third_party_identifier_callbacks.append(
+                on_remove_user_third_party_identifier
+            )
+
     async def check_event_allowed(
         self,
         event: EventBase,
diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py
index 3b99513707..753ecc8d16 100644
--- a/tests/rest/client/test_third_party_rules.py
+++ b/tests/rest/client/test_third_party_rules.py
@@ -941,18 +941,16 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         just before associating and removing a 3PID to/from an account.
         """
         # Pretend to be a Synapse module and register both callbacks as mocks.
-        third_party_rules = self.hs.get_third_party_event_rules()
         on_add_user_third_party_identifier_callback_mock = Mock(
             return_value=make_awaitable(None)
         )
         on_remove_user_third_party_identifier_callback_mock = Mock(
             return_value=make_awaitable(None)
         )
-        third_party_rules._on_threepid_bind_callbacks.append(
-            on_add_user_third_party_identifier_callback_mock
-        )
-        third_party_rules._on_threepid_bind_callbacks.append(
-            on_remove_user_third_party_identifier_callback_mock
+        third_party_rules = self.hs.get_third_party_event_rules()
+        third_party_rules.register_third_party_rules_callbacks(
+            on_add_user_third_party_identifier=on_add_user_third_party_identifier_callback_mock,
+            on_remove_user_third_party_identifier=on_remove_user_third_party_identifier_callback_mock,
         )
 
         # Register an admin user.
@@ -1008,12 +1006,12 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         when a user is deactivated and their third-party ID associations are deleted.
         """
         # Pretend to be a Synapse module and register both callbacks as mocks.
-        third_party_rules = self.hs.get_third_party_event_rules()
         on_remove_user_third_party_identifier_callback_mock = Mock(
             return_value=make_awaitable(None)
         )
-        third_party_rules._on_threepid_bind_callbacks.append(
-            on_remove_user_third_party_identifier_callback_mock
+        third_party_rules = self.hs.get_third_party_event_rules()
+        third_party_rules.register_third_party_rules_callbacks(
+            on_remove_user_third_party_identifier=on_remove_user_third_party_identifier_callback_mock,
         )
 
         # Register an admin user.
@@ -1039,6 +1037,9 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         )
         self.assertEqual(channel.code, 200, channel.json_body)
 
+        # Check that the mock was not called on the act of adding a third-party ID.
+        on_remove_user_third_party_identifier_callback_mock.assert_not_called()
+
         # Now deactivate the user.
         channel = self.make_request(
             "PUT",
-- 
cgit 1.5.1


From 4bb26c95a931e0be79d6ab9649e4338f7467a987 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 10 Mar 2023 15:31:25 +0000
Subject: Refactor `filter_events_for_server` (#15240)

* Tweak docstring and type hint

* Flip logic and provide better name

* Separate decision from action

* Track a set of strings, not EventBases

* Require explicit boolean options from callers

* Add explicit option for partial state rooms

* Changelog

* Rename param
---
 changelog.d/15240.misc                             |  1 +
 synapse/federation/sender/per_destination_queue.py |  2 +
 synapse/handlers/federation.py                     | 29 ++++++++--
 synapse/visibility.py                              | 67 +++++++++++++++-------
 tests/test_visibility.py                           | 40 +++++++++++--
 5 files changed, 109 insertions(+), 30 deletions(-)
 create mode 100644 changelog.d/15240.misc

(limited to 'synapse')

diff --git a/changelog.d/15240.misc b/changelog.d/15240.misc
new file mode 100644
index 0000000000..2b7edf916e
--- /dev/null
+++ b/changelog.d/15240.misc
@@ -0,0 +1 @@
+Refactor `filter_events_for_server`.
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index ffc9d95ee7..478187ce44 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -547,6 +547,8 @@ class PerDestinationQueue:
                         self._server_name,
                         new_pdus,
                         redact=False,
+                        filter_out_erased_senders=True,
+                        filter_out_remote_partial_state_events=True,
                     )
 
                     # If we've filtered out all the extremities, fall back to
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 5f2057269d..80156ef343 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -392,7 +392,7 @@ class FederationHandler:
                 get_prev_content=False,
             )
 
-            # We set `check_history_visibility_only` as we might otherwise get false
+            # We unset `filter_out_erased_senders` as we might otherwise get false
             # positives from users having been erased.
             filtered_extremities = await filter_events_for_server(
                 self._storage_controllers,
@@ -400,7 +400,8 @@ class FederationHandler:
                 self.server_name,
                 events_to_check,
                 redact=False,
-                check_history_visibility_only=True,
+                filter_out_erased_senders=False,
+                filter_out_remote_partial_state_events=False,
             )
             if filtered_extremities:
                 extremities_to_request.append(bp.event_id)
@@ -1331,7 +1332,13 @@ class FederationHandler:
         )
 
         events = await filter_events_for_server(
-            self._storage_controllers, origin, self.server_name, events
+            self._storage_controllers,
+            origin,
+            self.server_name,
+            events,
+            redact=True,
+            filter_out_erased_senders=True,
+            filter_out_remote_partial_state_events=True,
         )
 
         return events
@@ -1362,7 +1369,13 @@ class FederationHandler:
         await self._event_auth_handler.assert_host_in_room(event.room_id, origin)
 
         events = await filter_events_for_server(
-            self._storage_controllers, origin, self.server_name, [event]
+            self._storage_controllers,
+            origin,
+            self.server_name,
+            [event],
+            redact=True,
+            filter_out_erased_senders=True,
+            filter_out_remote_partial_state_events=True,
         )
         event = events[0]
         return event
@@ -1390,7 +1403,13 @@ class FederationHandler:
         )
 
         missing_events = await filter_events_for_server(
-            self._storage_controllers, origin, self.server_name, missing_events
+            self._storage_controllers,
+            origin,
+            self.server_name,
+            missing_events,
+            redact=True,
+            filter_out_erased_senders=True,
+            filter_out_remote_partial_state_events=True,
         )
 
         return missing_events
diff --git a/synapse/visibility.py b/synapse/visibility.py
index e442de3173..468e22f8f6 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -14,7 +14,17 @@
 # limitations under the License.
 import logging
 from enum import Enum, auto
-from typing import Collection, Dict, FrozenSet, List, Optional, Tuple
+from typing import (
+    Collection,
+    Dict,
+    FrozenSet,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+)
 
 import attr
 from typing_extensions import Final
@@ -565,29 +575,43 @@ async def filter_events_for_server(
     storage: StorageControllers,
     target_server_name: str,
     local_server_name: str,
-    events: List[EventBase],
-    redact: bool = True,
-    check_history_visibility_only: bool = False,
+    events: Sequence[EventBase],
+    *,
+    redact: bool,
+    filter_out_erased_senders: bool,
+    filter_out_remote_partial_state_events: bool,
 ) -> List[EventBase]:
-    """Filter a list of events based on whether given server is allowed to
+    """Filter a list of events based on whether the target server is allowed to
     see them.
 
+    For a fully stated room, the target server is allowed to see an event E if:
+      - the state at E has world readable or shared history vis, OR
+      - the state at E says that the target server is in the room.
+
+    For a partially stated room, the target server is allowed to see E if:
+      - E was created by this homeserver, AND:
+          - the partial state at E has world readable or shared history vis, OR
+          - the partial state at E says that the target server is in the room.
+
+    TODO: state before or state after?
+
     Args:
         storage
-        server_name
+        target_server_name
+        local_server_name
         events
-        redact: Whether to return a redacted version of the event, or
-            to filter them out entirely.
-        check_history_visibility_only: Whether to only check the
-            history visibility, rather than things like if the sender has been
+        redact: Controls what to do with events which have been filtered out.
+            If True, include their redacted forms; if False, omit them entirely.
+        filter_out_erased_senders: If true, also filter out events whose sender has been
             erased. This is used e.g. during pagination to decide whether to
             backfill or not.
-
+        filter_out_remote_partial_state_events: If True, also filter out events in
+            partial state rooms created by other homeservers.
     Returns
         The filtered events.
     """
 
-    def is_sender_erased(event: EventBase, erased_senders: Dict[str, bool]) -> bool:
+    def is_sender_erased(event: EventBase, erased_senders: Mapping[str, bool]) -> bool:
         if erased_senders and erased_senders[event.sender]:
             logger.info("Sender of %s has been erased, redacting", event.event_id)
             return True
@@ -616,7 +640,7 @@ async def filter_events_for_server(
         # server has no users in the room: redact
         return False
 
-    if not check_history_visibility_only:
+    if filter_out_erased_senders:
         erased_senders = await storage.main.are_users_erased(e.sender for e in events)
     else:
         # We don't want to check whether users are erased, which is equivalent
@@ -631,15 +655,15 @@ async def filter_events_for_server(
     # otherwise a room could be fully joined after we retrieve those, which would then bypass
     # this check but would base the filtering on an outdated view of the membership events.
 
-    partial_state_invisible_events = set()
-    if not check_history_visibility_only:
+    partial_state_invisible_event_ids: Set[str] = set()
+    if filter_out_remote_partial_state_events:
         for e in events:
             sender_domain = get_domain_from_id(e.sender)
             if (
                 sender_domain != local_server_name
                 and await storage.main.is_partial_state_room(e.room_id)
             ):
-                partial_state_invisible_events.add(e)
+                partial_state_invisible_event_ids.add(e.event_id)
 
     # Let's check to see if all the events have a history visibility
     # of "shared" or "world_readable". If that's the case then we don't
@@ -658,17 +682,20 @@ async def filter_events_for_server(
         target_server_name,
     )
 
-    to_return = []
-    for e in events:
+    def include_event_in_output(e: EventBase) -> bool:
         erased = is_sender_erased(e, erased_senders)
         visible = check_event_is_visible(
             event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {})
         )
 
-        if e in partial_state_invisible_events:
+        if e.event_id in partial_state_invisible_event_ids:
             visible = False
 
-        if visible and not erased:
+        return visible and not erased
+
+    to_return = []
+    for e in events:
+        if include_event_in_output(e):
             to_return.append(e)
         elif redact:
             to_return.append(prune_event(e))
diff --git a/tests/test_visibility.py b/tests/test_visibility.py
index 2801a950a8..9ed330f554 100644
--- a/tests/test_visibility.py
+++ b/tests/test_visibility.py
@@ -63,7 +63,13 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
 
         filtered = self.get_success(
             filter_events_for_server(
-                self._storage_controllers, "test_server", "hs", events_to_filter
+                self._storage_controllers,
+                "test_server",
+                "hs",
+                events_to_filter,
+                redact=True,
+                filter_out_erased_senders=True,
+                filter_out_remote_partial_state_events=True,
             )
         )
 
@@ -85,7 +91,13 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
         self.assertEqual(
             self.get_success(
                 filter_events_for_server(
-                    self._storage_controllers, "remote_hs", "hs", [outlier]
+                    self._storage_controllers,
+                    "remote_hs",
+                    "hs",
+                    [outlier],
+                    redact=True,
+                    filter_out_erased_senders=True,
+                    filter_out_remote_partial_state_events=True,
                 )
             ),
             [outlier],
@@ -96,7 +108,13 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
 
         filtered = self.get_success(
             filter_events_for_server(
-                self._storage_controllers, "remote_hs", "local_hs", [outlier, evt]
+                self._storage_controllers,
+                "remote_hs",
+                "local_hs",
+                [outlier, evt],
+                redact=True,
+                filter_out_erased_senders=True,
+                filter_out_remote_partial_state_events=True,
             )
         )
         self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}")
@@ -108,7 +126,13 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
         # be redacted)
         filtered = self.get_success(
             filter_events_for_server(
-                self._storage_controllers, "other_server", "local_hs", [outlier, evt]
+                self._storage_controllers,
+                "other_server",
+                "local_hs",
+                [outlier, evt],
+                redact=True,
+                filter_out_erased_senders=True,
+                filter_out_remote_partial_state_events=True,
             )
         )
         self.assertEqual(filtered[0], outlier)
@@ -143,7 +167,13 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
         # ... and the filtering happens.
         filtered = self.get_success(
             filter_events_for_server(
-                self._storage_controllers, "test_server", "local_hs", events_to_filter
+                self._storage_controllers,
+                "test_server",
+                "local_hs",
+                events_to_filter,
+                redact=True,
+                filter_out_erased_senders=True,
+                filter_out_remote_partial_state_events=True,
             )
         )
 
-- 
cgit 1.5.1


From c071cd5a0ebc2983e5576036ffef3668ba2a30cd Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 13 Mar 2023 12:31:19 +0000
Subject: Ensure fed-sender catchup does not block for full state (#15248)

* Reproduce bad scenario in test
* Avoid catchup optimisation for partial state rooms
---
 changelog.d/15248.bugfix                           |  1 +
 synapse/federation/sender/per_destination_queue.py |  9 ++-
 tests/federation/test_federation_catch_up.py       | 87 +++++++++++++++++++++-
 tests/test_utils/event_injection.py                | 31 ++++++++
 4 files changed, 125 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15248.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15248.bugfix b/changelog.d/15248.bugfix
new file mode 100644
index 0000000000..8665acb493
--- /dev/null
+++ b/changelog.d/15248.bugfix
@@ -0,0 +1 @@
+Fix a rare bug introduced in Synapse 1.73 where events could remain unsent to other homeservers after a faster-join to a room.
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 478187ce44..31c5c2b7de 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -497,8 +497,8 @@ class PerDestinationQueue:
             #
             # Note: `catchup_pdus` will have exactly one PDU per room.
             for pdu in catchup_pdus:
-                # The PDU from the DB will be the last PDU in the room from
-                # *this server* that wasn't sent to the remote. However, other
+                # The PDU from the DB will be the newest PDU in the room from
+                # *this server* that we tried---but were unable---to send to the remote.
                 # servers may have sent lots of events since then, and we want
                 # to try and tell the remote only about the *latest* events in
                 # the room. This is so that it doesn't get inundated by events
@@ -516,6 +516,11 @@ class PerDestinationQueue:
                     # If the event is in the extremities, then great! We can just
                     # use that without having to do further checks.
                     room_catchup_pdus = [pdu]
+                elif await self._store.is_partial_state_room(pdu.room_id):
+                    # We can't be sure which events the destination should
+                    # see using only partial state. Avoid doing so, and just retry
+                    # sending our the newest PDU the remote is missing from us.
+                    room_catchup_pdus = [pdu]
                 else:
                     # If not, fetch the extremities and figure out which we can
                     # send.
diff --git a/tests/federation/test_federation_catch_up.py b/tests/federation/test_federation_catch_up.py
index 6381583c24..391ae51707 100644
--- a/tests/federation/test_federation_catch_up.py
+++ b/tests/federation/test_federation_catch_up.py
@@ -1,4 +1,5 @@
-from typing import Callable, List, Optional, Tuple
+from typing import Callable, Collection, List, Optional, Tuple
+from unittest import mock
 from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -500,3 +501,87 @@ class FederationCatchUpTestCases(FederatingHomeserverTestCase):
         self.assertEqual(len(sent_pdus), 1)
         self.assertEqual(sent_pdus[0].event_id, event_2.event_id)
         self.assertFalse(per_dest_queue._catching_up)
+
+    def test_catch_up_is_not_blocked_by_remote_event_in_partial_state_room(
+        self,
+    ) -> None:
+        """Detects (part of?) https://github.com/matrix-org/synapse/issues/15220."""
+        # ARRANGE:
+        # - a local user (u1)
+        # - a room which contains u1 and two remote users, @u2:host2 and @u3:other
+        # - events in that room such that
+        #   - history visibility is restricted
+        #   - u1 sent message events e1 and e2
+        #   - afterwards, u3 sent a remote event e3
+        # - catchup to begin for host2; last successfully sent event was e1
+        per_dest_queue, sent_pdus = self.make_fake_destination_queue()
+
+        self.register_user("u1", "you the one")
+        u1_token = self.login("u1", "you the one")
+        room = self.helper.create_room_as("u1", tok=u1_token)
+        self.helper.send_state(
+            room_id=room,
+            event_type="m.room.history_visibility",
+            body={"history_visibility": "joined"},
+            tok=u1_token,
+        )
+        self.get_success(
+            event_injection.inject_member_event(self.hs, room, "@u2:host2", "join")
+        )
+        self.get_success(
+            event_injection.inject_member_event(self.hs, room, "@u3:other", "join")
+        )
+
+        # create some events
+        event_id_1 = self.helper.send(room, "hello", tok=u1_token)["event_id"]
+        event_id_2 = self.helper.send(room, "world", tok=u1_token)["event_id"]
+        # pretend that u3 changes their displayname
+        event_id_3 = self.get_success(
+            event_injection.inject_member_event(self.hs, room, "@u3:other", "join")
+        ).event_id
+
+        # destination_rooms should already be populated, but let us pretend that we already
+        # sent (successfully) up to and including event id 1
+        event_1 = self.get_success(self.hs.get_datastores().main.get_event(event_id_1))
+        assert event_1.internal_metadata.stream_ordering is not None
+        self.get_success(
+            self.hs.get_datastores().main.set_destination_last_successful_stream_ordering(
+                "host2", event_1.internal_metadata.stream_ordering
+            )
+        )
+
+        # also fetch event 2 so we can compare its stream ordering to the sender's
+        # last_successful_stream_ordering later
+        event_2 = self.get_success(self.hs.get_datastores().main.get_event(event_id_2))
+
+        # Mock event 3 as having partial state
+        self.get_success(
+            event_injection.mark_event_as_partial_state(self.hs, event_id_3, room)
+        )
+
+        # Fail the test if we block on full state for event 3.
+        async def mock_await_full_state(event_ids: Collection[str]) -> None:
+            if event_id_3 in event_ids:
+                raise AssertionError("Tried to await full state for event_id_3")
+
+        # ACT
+        with mock.patch.object(
+            self.hs.get_storage_controllers().state._partial_state_events_tracker,
+            "await_full_state",
+            mock_await_full_state,
+        ):
+            self.get_success(per_dest_queue._catch_up_transmission_loop())
+
+        # ASSERT
+        # We should have:
+        # - not sent event 3: it's not ours, and the room is partial stated
+        # - fallen back to sending event 2: it's the most recent event in the room
+        #   we tried to send to host2
+        # - completed catch-up
+        self.assertEqual(len(sent_pdus), 1)
+        self.assertEqual(sent_pdus[0].event_id, event_id_2)
+        self.assertFalse(per_dest_queue._catching_up)
+        self.assertEqual(
+            per_dest_queue._last_successful_stream_ordering,
+            event_2.internal_metadata.stream_ordering,
+        )
diff --git a/tests/test_utils/event_injection.py b/tests/test_utils/event_injection.py
index a6330ed840..9679904c33 100644
--- a/tests/test_utils/event_injection.py
+++ b/tests/test_utils/event_injection.py
@@ -102,3 +102,34 @@ async def create_event(
     context = await unpersisted_context.persist(event)
 
     return event, context
+
+
+async def mark_event_as_partial_state(
+    hs: synapse.server.HomeServer,
+    event_id: str,
+    room_id: str,
+) -> None:
+    """
+    (Falsely) mark an event as having partial state.
+
+    Naughty, but occasionally useful when checking that partial state doesn't
+    block something from happening.
+
+    If the event already has partial state, this insert will fail (event_id is unique
+    in this table).
+    """
+    store = hs.get_datastores().main
+    await store.db_pool.simple_upsert(
+        table="partial_state_rooms",
+        keyvalues={"room_id": room_id},
+        values={},
+        insertion_values={"room_id": room_id},
+    )
+
+    await store.db_pool.simple_insert(
+        table="partial_state_events",
+        values={
+            "room_id": room_id,
+            "event_id": event_id,
+        },
+    )
-- 
cgit 1.5.1


From e7b559d2ca3d8ea11c32946bd1607078dc2873f8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 14 Mar 2023 08:18:49 -0400
Subject: Avoid unneeded work if auto-join rooms aren't configured. (#15262)

It is not necessary to reach out to the database to check some
parameters if the auto-join rooms are not configured, or (in some cases)
if auto-create rooms is not configured.
---
 changelog.d/15262.misc       |  1 +
 synapse/handlers/register.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15262.misc

(limited to 'synapse')

diff --git a/changelog.d/15262.misc b/changelog.d/15262.misc
new file mode 100644
index 0000000000..d519f151c4
--- /dev/null
+++ b/changelog.d/15262.misc
@@ -0,0 +1 @@
+Skip processing of auto-join room behaviour if there are not auto-join rooms configured.
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index e4e506e62c..6b110dcb6e 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -596,14 +596,20 @@ class RegistrationHandler:
         Args:
             user_id: The user to join
         """
+        # If there are no rooms to auto-join, just bail.
+        if not self.hs.config.registration.auto_join_rooms:
+            return
+
         # auto-join the user to any rooms we're supposed to dump them into
 
         # try to create the room if we're the first real user on the server. Note
         # that an auto-generated support or bot user is not a real user and will never be
         # the user to create the room
         should_auto_create_rooms = False
-        is_real_user = await self.store.is_real_user(user_id)
-        if self.hs.config.registration.autocreate_auto_join_rooms and is_real_user:
+        if (
+            self.hs.config.registration.autocreate_auto_join_rooms
+            and await self.store.is_real_user(user_id)
+        ):
             count = await self.store.count_real_users()
             should_auto_create_rooms = count == 1
 
-- 
cgit 1.5.1


From d0fe417f5c16db298fc56f0f5ebd32eeb0d6cf44 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 14 Mar 2023 17:32:46 +0000
Subject: Remove unused store method `_set_destination_retry_timings_emulated`.
 (#15266)

---
 changelog.d/15266.misc                         |  1 +
 synapse/storage/databases/main/transactions.py | 56 +-------------------------
 2 files changed, 3 insertions(+), 54 deletions(-)
 create mode 100644 changelog.d/15266.misc

(limited to 'synapse')

diff --git a/changelog.d/15266.misc b/changelog.d/15266.misc
new file mode 100644
index 0000000000..285b72cdd1
--- /dev/null
+++ b/changelog.d/15266.misc
@@ -0,0 +1 @@
+Remove unused store method `_set_destination_retry_timings_emulated`.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index 6d72bd9f67..c3bd36efc9 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -224,7 +224,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
 
         await self.db_pool.runInteraction(
             "set_destination_retry_timings",
-            self._set_destination_retry_timings_native,
+            self._set_destination_retry_timings_txn,
             destination,
             failure_ts,
             retry_last_ts,
@@ -232,7 +232,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
             db_autocommit=True,  # Safe as it's a single upsert
         )
 
-    def _set_destination_retry_timings_native(
+    def _set_destination_retry_timings_txn(
         self,
         txn: LoggingTransaction,
         destination: str,
@@ -266,58 +266,6 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
             txn, self.get_destination_retry_timings, (destination,)
         )
 
-    def _set_destination_retry_timings_emulated(
-        self,
-        txn: LoggingTransaction,
-        destination: str,
-        failure_ts: Optional[int],
-        retry_last_ts: int,
-        retry_interval: int,
-    ) -> None:
-        self.database_engine.lock_table(txn, "destinations")
-
-        # We need to be careful here as the data may have changed from under us
-        # due to a worker setting the timings.
-
-        prev_row = self.db_pool.simple_select_one_txn(
-            txn,
-            table="destinations",
-            keyvalues={"destination": destination},
-            retcols=("failure_ts", "retry_last_ts", "retry_interval"),
-            allow_none=True,
-        )
-
-        if not prev_row:
-            self.db_pool.simple_insert_txn(
-                txn,
-                table="destinations",
-                values={
-                    "destination": destination,
-                    "failure_ts": failure_ts,
-                    "retry_last_ts": retry_last_ts,
-                    "retry_interval": retry_interval,
-                },
-            )
-        elif (
-            retry_interval == 0
-            or prev_row["retry_interval"] is None
-            or prev_row["retry_interval"] < retry_interval
-        ):
-            self.db_pool.simple_update_one_txn(
-                txn,
-                "destinations",
-                keyvalues={"destination": destination},
-                updatevalues={
-                    "failure_ts": failure_ts,
-                    "retry_last_ts": retry_last_ts,
-                    "retry_interval": retry_interval,
-                },
-            )
-
-        self._invalidate_cache_and_stream(
-            txn, self.get_destination_retry_timings, (destination,)
-        )
-
     async def store_destination_rooms_entries(
         self,
         destinations: Iterable[str],
-- 
cgit 1.5.1


From 63d87c08c8e9acedb64ef6ee135acda34e4e370d Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 15 Mar 2023 09:25:58 +0000
Subject: Add schema comments about the `destinations` and `destination_rooms`
 tables. (#15247)

---
 changelog.d/15247.misc                             |  1 +
 .../delta/74/90COMMENTS_destinations.sql.postgres  | 52 ++++++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 changelog.d/15247.misc
 create mode 100644 synapse/storage/schema/main/delta/74/90COMMENTS_destinations.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/15247.misc b/changelog.d/15247.misc
new file mode 100644
index 0000000000..6e2ce1d4d8
--- /dev/null
+++ b/changelog.d/15247.misc
@@ -0,0 +1 @@
+Add schema comments about the `destinations` and `destination_rooms` tables.
\ No newline at end of file
diff --git a/synapse/storage/schema/main/delta/74/90COMMENTS_destinations.sql.postgres b/synapse/storage/schema/main/delta/74/90COMMENTS_destinations.sql.postgres
new file mode 100644
index 0000000000..cc7dda1a11
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/90COMMENTS_destinations.sql.postgres
@@ -0,0 +1,52 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+--- destinations
+COMMENT ON TABLE destinations IS
+    'Information about remote homeservers and the health of our connection to them.';
+
+COMMENT ON COLUMN destinations.destination IS 'server name of remote homeserver in question';
+
+COMMENT ON COLUMN destinations.last_successful_stream_ordering IS
+$$Stream ordering of the most recently successfully sent PDU to this server, sent through normal send (not e.g. backfill).
+In Catch-Up Mode, the original PDU persisted by us is represented here, even if we sent a later forward extremity in its stead.
+See `destination_rooms` for more information about catch-up.$$;
+
+COMMENT ON COLUMN destinations.retry_last_ts IS
+$$The last time we tried and failed to reach the remote server, in ms.
+This field is reset to `0` when we succeed in connecting again.$$;
+
+COMMENT ON COLUMN destinations.retry_interval IS
+$$How long, in milliseconds, to wait since the last time we tried to reach the remote server before trying again.
+This field is reset to `0` when we succeed in connecting again.$$;
+
+COMMENT ON COLUMN destinations.failure_ts IS
+$$The first time we tried and failed to reach the remote server, in ms.
+This field is reset to `NULL` when we succeed in connecting again.$$;
+
+
+
+--- destination_rooms
+COMMENT ON TABLE destination_rooms IS
+    'Information about transmission of PDUs in a given room to a given remote homeserver.';
+
+COMMENT ON COLUMN destination_rooms.destination IS 'server name of remote homeserver in question';
+
+COMMENT ON COLUMN destination_rooms.room_id IS 'room ID in question';
+
+COMMENT ON COLUMN destination_rooms.stream_ordering IS
+$$`stream_ordering` of the most recent PDU in this room that needs to be sent (by us) to this homeserver.
+This can only be pointing to our own PDU because we are only responsible for sending our own PDUs.$$;
-- 
cgit 1.5.1


From 3bf973edc7ecf911daab6d8c58d1264891f3ed39 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 15 Mar 2023 15:42:20 -0400
Subject: Remove unused class: DirectTcpReplicationClientFactory. (#15272)

---
 changelog.d/15272.misc            |  1 +
 synapse/replication/tcp/client.py | 51 ---------------------------------------
 2 files changed, 1 insertion(+), 51 deletions(-)
 create mode 100644 changelog.d/15272.misc

(limited to 'synapse')

diff --git a/changelog.d/15272.misc b/changelog.d/15272.misc
new file mode 100644
index 0000000000..7a3ef323e9
--- /dev/null
+++ b/changelog.d/15272.misc
@@ -0,0 +1 @@
+Remove unused class `DirectTcpReplicationClientFactory`.
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 424854efbe..200f667fdf 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -18,16 +18,12 @@ from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple
 
 from twisted.internet import defer
 from twisted.internet.defer import Deferred
-from twisted.internet.interfaces import IAddress, IConnector
-from twisted.internet.protocol import ReconnectingClientFactory
-from twisted.python.failure import Failure
 
 from synapse.api.constants import EventTypes, Membership, ReceiptTypes
 from synapse.federation import send_queue
 from synapse.federation.sender import FederationSender
 from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
 from synapse.replication.tcp.streams import (
     AccountDataStream,
     DeviceListsStream,
@@ -53,7 +49,6 @@ from synapse.util.async_helpers import Linearizer, timeout_deferred
 from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
-    from synapse.replication.tcp.handler import ReplicationCommandHandler
     from synapse.server import HomeServer
 
 logger = logging.getLogger(__name__)
@@ -62,52 +57,6 @@ logger = logging.getLogger(__name__)
 _WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 5
 
 
-class DirectTcpReplicationClientFactory(ReconnectingClientFactory):
-    """Factory for building connections to the master. Will reconnect if the
-    connection is lost.
-
-    Accepts a handler that is passed to `ClientReplicationStreamProtocol`.
-    """
-
-    initialDelay = 0.1
-    maxDelay = 1  # Try at least once every N seconds
-
-    def __init__(
-        self,
-        hs: "HomeServer",
-        client_name: str,
-        command_handler: "ReplicationCommandHandler",
-    ):
-        self.client_name = client_name
-        self.command_handler = command_handler
-        self.server_name = hs.config.server.server_name
-        self.hs = hs
-        self._clock = hs.get_clock()  # As self.clock is defined in super class
-
-        hs.get_reactor().addSystemEventTrigger("before", "shutdown", self.stopTrying)
-
-    def startedConnecting(self, connector: IConnector) -> None:
-        logger.info("Connecting to replication: %r", connector.getDestination())
-
-    def buildProtocol(self, addr: IAddress) -> ClientReplicationStreamProtocol:
-        logger.info("Connected to replication: %r", addr)
-        return ClientReplicationStreamProtocol(
-            self.hs,
-            self.client_name,
-            self.server_name,
-            self._clock,
-            self.command_handler,
-        )
-
-    def clientConnectionLost(self, connector: IConnector, reason: Failure) -> None:
-        logger.error("Lost replication conn: %r", reason)
-        ReconnectingClientFactory.clientConnectionLost(self, connector, reason)
-
-    def clientConnectionFailed(self, connector: IConnector, reason: Failure) -> None:
-        logger.error("Failed to connect to replication: %r", reason)
-        ReconnectingClientFactory.clientConnectionFailed(self, connector, reason)
-
-
 class ReplicationDataHandler:
     """Handles incoming stream updates from replication.
 
-- 
cgit 1.5.1


From f54f877f273b7115777b93524983ea7455be5919 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 16 Mar 2023 09:55:19 +0000
Subject: Preparatory work to fix the user directory assuming that any remote
 membership state events represent a profile change. [rei:userdirpriv]
 (#14755)

* Remove special-case method for new memberships only, use more generic method

* Only collect profiles from state events in public rooms

* Add a table to track stale remote user profiles

* Add store methods to set and delete rows in this new table

* Mark remote profiles as stale when a member state event comes in to a private room

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Simplify by removing Optionality of `event_id`

* Replace names and avatars with None if they're set to dodgy things

I think this makes more sense anyway.

* Move schema delta to 74 (I missed the boat?)

* Turns out these can be None after all

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/14755.bugfix                           |  1 +
 synapse/handlers/user_directory.py                 | 81 +++++++++++++---------
 synapse/storage/databases/main/user_directory.py   | 40 +++++++++++
 .../74/01_user_directory_stale_remote_users.sql    | 39 +++++++++++
 4 files changed, 127 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/14755.bugfix
 create mode 100644 synapse/storage/schema/main/delta/74/01_user_directory_stale_remote_users.sql

(limited to 'synapse')

diff --git a/changelog.d/14755.bugfix b/changelog.d/14755.bugfix
new file mode 100644
index 0000000000..12f979e9d0
--- /dev/null
+++ b/changelog.d/14755.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug in which the user directory would assume any remote membership state events represent a profile change.
\ No newline at end of file
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 3610b6bf78..0815be79fa 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -28,6 +28,11 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
+# Don't refresh a stale user directory entry, using a Federation /profile request,
+# for 60 seconds. This gives time for other state events to arrive (which will
+# then be coalesced such that only one /profile request is made).
+USER_DIRECTORY_STALE_REFRESH_TIME_MS = 60 * 1000
+
 
 class UserDirectoryHandler(StateDeltasHandler):
     """Handles queries and updates for the user_directory.
@@ -200,8 +205,8 @@ class UserDirectoryHandler(StateDeltasHandler):
             typ = delta["type"]
             state_key = delta["state_key"]
             room_id = delta["room_id"]
-            event_id = delta["event_id"]
-            prev_event_id = delta["prev_event_id"]
+            event_id: Optional[str] = delta["event_id"]
+            prev_event_id: Optional[str] = delta["prev_event_id"]
 
             logger.debug("Handling: %r %r, %s", typ, state_key, event_id)
 
@@ -297,8 +302,8 @@ class UserDirectoryHandler(StateDeltasHandler):
     async def _handle_room_membership_event(
         self,
         room_id: str,
-        prev_event_id: str,
-        event_id: str,
+        prev_event_id: Optional[str],
+        event_id: Optional[str],
         state_key: str,
     ) -> None:
         """Process a single room membershp event.
@@ -348,7 +353,8 @@ class UserDirectoryHandler(StateDeltasHandler):
             # Handle any profile changes for remote users.
             # (For local users the rest of the application calls
             # `handle_local_profile_change`.)
-            if is_remote:
+            # Only process if there is an event_id.
+            if is_remote and event_id is not None:
                 await self._handle_possible_remote_profile_change(
                     state_key, room_id, prev_event_id, event_id
                 )
@@ -356,29 +362,13 @@ class UserDirectoryHandler(StateDeltasHandler):
             # This may be the first time we've seen a remote user. If
             # so, ensure we have a directory entry for them. (For local users,
             # the rest of the application calls `handle_local_profile_change`.)
-            if is_remote:
-                await self._upsert_directory_entry_for_remote_user(state_key, event_id)
+            # Only process if there is an event_id.
+            if is_remote and event_id is not None:
+                await self._handle_possible_remote_profile_change(
+                    state_key, room_id, None, event_id
+                )
             await self._track_user_joined_room(room_id, state_key)
 
-    async def _upsert_directory_entry_for_remote_user(
-        self, user_id: str, event_id: str
-    ) -> None:
-        """A remote user has just joined a room. Ensure they have an entry in
-        the user directory. The caller is responsible for making sure they're
-        remote.
-        """
-        event = await self.store.get_event(event_id, allow_none=True)
-        # It isn't expected for this event to not exist, but we
-        # don't want the entire background process to break.
-        if event is None:
-            return
-
-        logger.debug("Adding new user to dir, %r", user_id)
-
-        await self.store.update_profile_in_user_dir(
-            user_id, event.content.get("displayname"), event.content.get("avatar_url")
-        )
-
     async def _track_user_joined_room(self, room_id: str, joining_user_id: str) -> None:
         """Someone's just joined a room. Update `users_in_public_rooms` or
         `users_who_share_private_rooms` as appropriate.
@@ -460,14 +450,17 @@ class UserDirectoryHandler(StateDeltasHandler):
         user_id: str,
         room_id: str,
         prev_event_id: Optional[str],
-        event_id: Optional[str],
+        event_id: str,
     ) -> None:
         """Check member event changes for any profile changes and update the
         database if there are. This is intended for remote users only. The caller
         is responsible for checking that the given user is remote.
         """
-        if not prev_event_id or not event_id:
-            return
+
+        if not prev_event_id:
+            # If we don't have an older event to fall back on, just fetch the same
+            # event itself.
+            prev_event_id = event_id
 
         prev_event = await self.store.get_event(prev_event_id, allow_none=True)
         event = await self.store.get_event(event_id, allow_none=True)
@@ -478,17 +471,37 @@ class UserDirectoryHandler(StateDeltasHandler):
         if event.membership != Membership.JOIN:
             return
 
+        is_public = await self.store.is_room_world_readable_or_publicly_joinable(
+            room_id
+        )
+        if not is_public:
+            # Don't collect user profiles from private rooms as they are not guaranteed
+            # to be the same as the user's global profile.
+            now_ts = self.clock.time_msec()
+            await self.store.set_remote_user_profile_in_user_dir_stale(
+                user_id,
+                next_try_at_ms=now_ts + USER_DIRECTORY_STALE_REFRESH_TIME_MS,
+                retry_counter=0,
+            )
+            return
+
         prev_name = prev_event.content.get("displayname")
         new_name = event.content.get("displayname")
-        # If the new name is an unexpected form, do not update the directory.
+        # If the new name is an unexpected form, replace with None.
         if not isinstance(new_name, str):
-            new_name = prev_name
+            new_name = None
 
         prev_avatar = prev_event.content.get("avatar_url")
         new_avatar = event.content.get("avatar_url")
-        # If the new avatar is an unexpected form, do not update the directory.
+        # If the new avatar is an unexpected form, replace with None.
         if not isinstance(new_avatar, str):
-            new_avatar = prev_avatar
+            new_avatar = None
 
-        if prev_name != new_name or prev_avatar != new_avatar:
+        if (
+            prev_name != new_name
+            or prev_avatar != new_avatar
+            or prev_event_id == event_id
+        ):
+            # Only update if something has changed, or we didn't have a previous event
+            # in the first place.
             await self.store.update_profile_in_user_dir(user_id, new_name, new_avatar)
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index f16a509ac4..9cf01b7f36 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -54,6 +54,7 @@ from synapse.storage.databases.main.state_deltas import StateDeltasStore
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.types import (
     JsonDict,
+    UserID,
     UserProfile,
     get_domain_from_id,
     get_localpart_from_id,
@@ -473,11 +474,42 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
 
         return False
 
+    async def set_remote_user_profile_in_user_dir_stale(
+        self, user_id: str, next_try_at_ms: int, retry_counter: int
+    ) -> None:
+        """
+        Marks a remote user as having a possibly-stale user directory profile.
+
+        Args:
+            user_id: the remote user who may have a stale profile on this server.
+            next_try_at_ms: timestamp in ms after which the user directory profile can be
+                refreshed.
+            retry_counter: number of failures in refreshing the profile so far. Used for
+                exponential backoff calculations.
+        """
+        assert not self.hs.is_mine_id(
+            user_id
+        ), "Can't mark a local user as a stale remote user."
+
+        server_name = UserID.from_string(user_id).domain
+
+        await self.db_pool.simple_upsert(
+            table="user_directory_stale_remote_users",
+            keyvalues={"user_id": user_id},
+            values={
+                "next_try_at_ts": next_try_at_ms,
+                "retry_counter": retry_counter,
+                "user_server_name": server_name,
+            },
+            desc="set_remote_user_profile_in_user_dir_stale",
+        )
+
     async def update_profile_in_user_dir(
         self, user_id: str, display_name: Optional[str], avatar_url: Optional[str]
     ) -> None:
         """
         Update or add a user's profile in the user directory.
+        If the user is remote, the profile will be marked as not stale.
         """
         # If the display name or avatar URL are unexpected types, replace with None.
         display_name = non_null_str_or_none(display_name)
@@ -491,6 +523,14 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 values={"display_name": display_name, "avatar_url": avatar_url},
             )
 
+            if not self.hs.is_mine_id(user_id):
+                # Remote users: Make sure the profile is not marked as stale anymore.
+                self.db_pool.simple_delete_txn(
+                    txn,
+                    table="user_directory_stale_remote_users",
+                    keyvalues={"user_id": user_id},
+                )
+
             # The display name that goes into the database index.
             index_display_name = display_name
             if index_display_name is not None:
diff --git a/synapse/storage/schema/main/delta/74/01_user_directory_stale_remote_users.sql b/synapse/storage/schema/main/delta/74/01_user_directory_stale_remote_users.sql
new file mode 100644
index 0000000000..dcb38f3d7b
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/01_user_directory_stale_remote_users.sql
@@ -0,0 +1,39 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Table containing a list of remote users whose profiles may have changed
+-- since their last update in the user directory.
+CREATE TABLE user_directory_stale_remote_users (
+    -- The User ID of the remote user whose profile may be stale.
+    user_id TEXT NOT NULL PRIMARY KEY,
+
+    -- The server name of the user.
+    user_server_name TEXT NOT NULL,
+
+    -- The timestamp (in ms) after which we should next try to request the user's
+    -- latest profile.
+    next_try_at_ts BIGINT NOT NULL,
+
+    -- The number of retries so far.
+    -- 0 means we have not yet attempted to refresh the profile.
+    -- Used for calculating exponential backoff.
+    retry_counter INTEGER NOT NULL
+);
+
+-- Create an index so we can easily query upcoming servers to try.
+CREATE INDEX user_directory_stale_remote_users_next_try_idx ON user_directory_stale_remote_users(next_try_at_ts, user_server_name);
+
+-- Create an index so we can easily query upcoming users to try for a particular server.
+CREATE INDEX user_directory_stale_remote_users_next_try_by_server_idx ON user_directory_stale_remote_users(user_server_name, next_try_at_ts);
-- 
cgit 1.5.1


From 4953cd71dfbb1925dc4a477efd2ed48c2dfd70d6 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 16 Mar 2023 10:35:31 +0000
Subject: Move Account Validity callbacks to a dedicated file (#15237)

---
 changelog.d/15237.misc                             |  1 +
 synapse/handlers/account_validity.py               | 99 +++-------------------
 synapse/module_api/__init__.py                     | 18 ++--
 synapse/module_api/callbacks/__init__.py           | 22 +++++
 .../callbacks/account_validity_callbacks.py        | 93 ++++++++++++++++++++
 synapse/rest/admin/users.py                        | 17 ++--
 synapse/server.py                                  |  5 ++
 tests/rest/client/test_account.py                  |  5 +-
 8 files changed, 154 insertions(+), 106 deletions(-)
 create mode 100644 changelog.d/15237.misc
 create mode 100644 synapse/module_api/callbacks/__init__.py
 create mode 100644 synapse/module_api/callbacks/account_validity_callbacks.py

(limited to 'synapse')

diff --git a/changelog.d/15237.misc b/changelog.d/15237.misc
new file mode 100644
index 0000000000..9981606c32
--- /dev/null
+++ b/changelog.d/15237.misc
@@ -0,0 +1 @@
+Move various module API callback registration methods to a dedicated class.
\ No newline at end of file
diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py
index 33e45e3a11..4aa4ebf7e4 100644
--- a/synapse/handlers/account_validity.py
+++ b/synapse/handlers/account_validity.py
@@ -15,9 +15,7 @@
 import email.mime.multipart
 import email.utils
 import logging
-from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional, Tuple
-
-from twisted.web.http import Request
+from typing import TYPE_CHECKING, List, Optional, Tuple
 
 from synapse.api.errors import AuthError, StoreError, SynapseError
 from synapse.metrics.background_process_metrics import wrap_as_background_process
@@ -30,25 +28,17 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-# Types for callbacks to be registered via the module api
-IS_USER_EXPIRED_CALLBACK = Callable[[str], Awaitable[Optional[bool]]]
-ON_USER_REGISTRATION_CALLBACK = Callable[[str], Awaitable]
-# Temporary hooks to allow for a transition from `/_matrix/client` endpoints
-# to `/_synapse/client/account_validity`. See `register_account_validity_callbacks`.
-ON_LEGACY_SEND_MAIL_CALLBACK = Callable[[str], Awaitable]
-ON_LEGACY_RENEW_CALLBACK = Callable[[str], Awaitable[Tuple[bool, bool, int]]]
-ON_LEGACY_ADMIN_REQUEST = Callable[[Request], Awaitable]
-
 
 class AccountValidityHandler:
     def __init__(self, hs: "HomeServer"):
         self.hs = hs
         self.config = hs.config
-        self.store = self.hs.get_datastores().main
-        self.send_email_handler = self.hs.get_send_email_handler()
-        self.clock = self.hs.get_clock()
+        self.store = hs.get_datastores().main
+        self.send_email_handler = hs.get_send_email_handler()
+        self.clock = hs.get_clock()
 
-        self._app_name = self.hs.config.email.email_app_name
+        self._app_name = hs.config.email.email_app_name
+        self._module_api_callbacks = hs.get_module_api_callbacks().account_validity
 
         self._account_validity_enabled = (
             hs.config.account_validity.account_validity_enabled
@@ -78,69 +68,6 @@ class AccountValidityHandler:
             if hs.config.worker.run_background_tasks:
                 self.clock.looping_call(self._send_renewal_emails, 30 * 60 * 1000)
 
-        self._is_user_expired_callbacks: List[IS_USER_EXPIRED_CALLBACK] = []
-        self._on_user_registration_callbacks: List[ON_USER_REGISTRATION_CALLBACK] = []
-        self._on_legacy_send_mail_callback: Optional[
-            ON_LEGACY_SEND_MAIL_CALLBACK
-        ] = None
-        self._on_legacy_renew_callback: Optional[ON_LEGACY_RENEW_CALLBACK] = None
-
-        # The legacy admin requests callback isn't a protected attribute because we need
-        # to access it from the admin servlet, which is outside of this handler.
-        self.on_legacy_admin_request_callback: Optional[ON_LEGACY_ADMIN_REQUEST] = None
-
-    def register_account_validity_callbacks(
-        self,
-        is_user_expired: Optional[IS_USER_EXPIRED_CALLBACK] = None,
-        on_user_registration: Optional[ON_USER_REGISTRATION_CALLBACK] = None,
-        on_legacy_send_mail: Optional[ON_LEGACY_SEND_MAIL_CALLBACK] = None,
-        on_legacy_renew: Optional[ON_LEGACY_RENEW_CALLBACK] = None,
-        on_legacy_admin_request: Optional[ON_LEGACY_ADMIN_REQUEST] = None,
-    ) -> None:
-        """Register callbacks from module for each hook."""
-        if is_user_expired is not None:
-            self._is_user_expired_callbacks.append(is_user_expired)
-
-        if on_user_registration is not None:
-            self._on_user_registration_callbacks.append(on_user_registration)
-
-        # The builtin account validity feature exposes 3 endpoints (send_mail, renew, and
-        # an admin one). As part of moving the feature into a module, we need to change
-        # the path from /_matrix/client/unstable/account_validity/... to
-        # /_synapse/client/account_validity, because:
-        #
-        #   * the feature isn't part of the Matrix spec thus shouldn't live under /_matrix
-        #   * the way we register servlets means that modules can't register resources
-        #     under /_matrix/client
-        #
-        # We need to allow for a transition period between the old and new endpoints
-        # in order to allow for clients to update (and for emails to be processed).
-        #
-        # Once the email-account-validity module is loaded, it will take control of account
-        # validity by moving the rows from our `account_validity` table into its own table.
-        #
-        # Therefore, we need to allow modules (in practice just the one implementing the
-        # email-based account validity) to temporarily hook into the legacy endpoints so we
-        # can route the traffic coming into the old endpoints into the module, which is
-        # why we have the following three temporary hooks.
-        if on_legacy_send_mail is not None:
-            if self._on_legacy_send_mail_callback is not None:
-                raise RuntimeError("Tried to register on_legacy_send_mail twice")
-
-            self._on_legacy_send_mail_callback = on_legacy_send_mail
-
-        if on_legacy_renew is not None:
-            if self._on_legacy_renew_callback is not None:
-                raise RuntimeError("Tried to register on_legacy_renew twice")
-
-            self._on_legacy_renew_callback = on_legacy_renew
-
-        if on_legacy_admin_request is not None:
-            if self.on_legacy_admin_request_callback is not None:
-                raise RuntimeError("Tried to register on_legacy_admin_request twice")
-
-            self.on_legacy_admin_request_callback = on_legacy_admin_request
-
     async def is_user_expired(self, user_id: str) -> bool:
         """Checks if a user has expired against third-party modules.
 
@@ -150,7 +77,7 @@ class AccountValidityHandler:
         Returns:
             Whether the user has expired.
         """
-        for callback in self._is_user_expired_callbacks:
+        for callback in self._module_api_callbacks.is_user_expired_callbacks:
             expired = await delay_cancellation(callback(user_id))
             if expired is not None:
                 return expired
@@ -168,7 +95,7 @@ class AccountValidityHandler:
         Args:
             user_id: The ID of the newly registered user.
         """
-        for callback in self._on_user_registration_callbacks:
+        for callback in self._module_api_callbacks.on_user_registration_callbacks:
             await callback(user_id)
 
     @wrap_as_background_process("send_renewals")
@@ -198,8 +125,8 @@ class AccountValidityHandler:
         """
         # If a module supports sending a renewal email from here, do that, otherwise do
         # the legacy dance.
-        if self._on_legacy_send_mail_callback is not None:
-            await self._on_legacy_send_mail_callback(user_id)
+        if self._module_api_callbacks.on_legacy_send_mail_callback is not None:
+            await self._module_api_callbacks.on_legacy_send_mail_callback(user_id)
             return
 
         if not self._account_validity_renew_by_email_enabled:
@@ -336,8 +263,10 @@ class AccountValidityHandler:
         """
         # If a module supports triggering a renew from here, do that, otherwise do the
         # legacy dance.
-        if self._on_legacy_renew_callback is not None:
-            return await self._on_legacy_renew_callback(renewal_token)
+        if self._module_api_callbacks.on_legacy_renew_callback is not None:
+            return await self._module_api_callbacks.on_legacy_renew_callback(
+                renewal_token
+            )
 
         try:
             (
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 424239e3df..595c23e78d 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -73,13 +73,6 @@ from synapse.events.third_party_rules import (
     ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK,
 )
 from synapse.handlers.account_data import ON_ACCOUNT_DATA_UPDATED_CALLBACK
-from synapse.handlers.account_validity import (
-    IS_USER_EXPIRED_CALLBACK,
-    ON_LEGACY_ADMIN_REQUEST,
-    ON_LEGACY_RENEW_CALLBACK,
-    ON_LEGACY_SEND_MAIL_CALLBACK,
-    ON_USER_REGISTRATION_CALLBACK,
-)
 from synapse.handlers.auth import (
     CHECK_3PID_AUTH_CALLBACK,
     CHECK_AUTH_CALLBACK,
@@ -105,6 +98,13 @@ from synapse.logging.context import (
     run_in_background,
 )
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.module_api.callbacks.account_validity_callbacks import (
+    IS_USER_EXPIRED_CALLBACK,
+    ON_LEGACY_ADMIN_REQUEST,
+    ON_LEGACY_RENEW_CALLBACK,
+    ON_LEGACY_SEND_MAIL_CALLBACK,
+    ON_USER_REGISTRATION_CALLBACK,
+)
 from synapse.rest.client.login import LoginResponse
 from synapse.storage import DataStore
 from synapse.storage.background_updates import (
@@ -250,6 +250,7 @@ class ModuleApi:
         self._push_rules_handler = hs.get_push_rules_handler()
         self._device_handler = hs.get_device_handler()
         self.custom_template_dir = hs.config.server.custom_template_directory
+        self._callbacks = hs.get_module_api_callbacks()
 
         try:
             app_name = self._hs.config.email.email_app_name
@@ -271,7 +272,6 @@ class ModuleApi:
         self._account_data_manager = AccountDataManager(hs)
 
         self._spam_checker = hs.get_spam_checker()
-        self._account_validity_handler = hs.get_account_validity_handler()
         self._third_party_event_rules = hs.get_third_party_event_rules()
         self._password_auth_provider = hs.get_password_auth_provider()
         self._presence_router = hs.get_presence_router()
@@ -332,7 +332,7 @@ class ModuleApi:
 
         Added in Synapse v1.39.0.
         """
-        return self._account_validity_handler.register_account_validity_callbacks(
+        return self._callbacks.account_validity.register_callbacks(
             is_user_expired=is_user_expired,
             on_user_registration=on_user_registration,
             on_legacy_send_mail=on_legacy_send_mail,
diff --git a/synapse/module_api/callbacks/__init__.py b/synapse/module_api/callbacks/__init__.py
new file mode 100644
index 0000000000..3d977bf655
--- /dev/null
+++ b/synapse/module_api/callbacks/__init__.py
@@ -0,0 +1,22 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.module_api.callbacks.account_validity_callbacks import (
+    AccountValidityModuleApiCallbacks,
+)
+
+
+class ModuleApiCallbacks:
+    def __init__(self) -> None:
+        self.account_validity = AccountValidityModuleApiCallbacks()
diff --git a/synapse/module_api/callbacks/account_validity_callbacks.py b/synapse/module_api/callbacks/account_validity_callbacks.py
new file mode 100644
index 0000000000..531d0c9ddc
--- /dev/null
+++ b/synapse/module_api/callbacks/account_validity_callbacks.py
@@ -0,0 +1,93 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Awaitable, Callable, List, Optional, Tuple
+
+from twisted.web.http import Request
+
+logger = logging.getLogger(__name__)
+
+# Types for callbacks to be registered via the module api
+IS_USER_EXPIRED_CALLBACK = Callable[[str], Awaitable[Optional[bool]]]
+ON_USER_REGISTRATION_CALLBACK = Callable[[str], Awaitable]
+# Temporary hooks to allow for a transition from `/_matrix/client` endpoints
+# to `/_synapse/client/account_validity`. See `register_callbacks` below.
+ON_LEGACY_SEND_MAIL_CALLBACK = Callable[[str], Awaitable]
+ON_LEGACY_RENEW_CALLBACK = Callable[[str], Awaitable[Tuple[bool, bool, int]]]
+ON_LEGACY_ADMIN_REQUEST = Callable[[Request], Awaitable]
+
+
+class AccountValidityModuleApiCallbacks:
+    def __init__(self) -> None:
+        self.is_user_expired_callbacks: List[IS_USER_EXPIRED_CALLBACK] = []
+        self.on_user_registration_callbacks: List[ON_USER_REGISTRATION_CALLBACK] = []
+        self.on_legacy_send_mail_callback: Optional[ON_LEGACY_SEND_MAIL_CALLBACK] = None
+        self.on_legacy_renew_callback: Optional[ON_LEGACY_RENEW_CALLBACK] = None
+
+        # The legacy admin requests callback isn't a protected attribute because we need
+        # to access it from the admin servlet, which is outside of this handler.
+        self.on_legacy_admin_request_callback: Optional[ON_LEGACY_ADMIN_REQUEST] = None
+
+    def register_callbacks(
+        self,
+        is_user_expired: Optional[IS_USER_EXPIRED_CALLBACK] = None,
+        on_user_registration: Optional[ON_USER_REGISTRATION_CALLBACK] = None,
+        on_legacy_send_mail: Optional[ON_LEGACY_SEND_MAIL_CALLBACK] = None,
+        on_legacy_renew: Optional[ON_LEGACY_RENEW_CALLBACK] = None,
+        on_legacy_admin_request: Optional[ON_LEGACY_ADMIN_REQUEST] = None,
+    ) -> None:
+        """Register callbacks from module for each hook."""
+        if is_user_expired is not None:
+            self.is_user_expired_callbacks.append(is_user_expired)
+
+        if on_user_registration is not None:
+            self.on_user_registration_callbacks.append(on_user_registration)
+
+        # The builtin account validity feature exposes 3 endpoints (send_mail, renew, and
+        # an admin one). As part of moving the feature into a module, we need to change
+        # the path from /_matrix/client/unstable/account_validity/... to
+        # /_synapse/client/account_validity, because:
+        #
+        #   * the feature isn't part of the Matrix spec thus shouldn't live under /_matrix
+        #   * the way we register servlets means that modules can't register resources
+        #     under /_matrix/client
+        #
+        # We need to allow for a transition period between the old and new endpoints
+        # in order to allow for clients to update (and for emails to be processed).
+        #
+        # Once the email-account-validity module is loaded, it will take control of account
+        # validity by moving the rows from our `account_validity` table into its own table.
+        #
+        # Therefore, we need to allow modules (in practice just the one implementing the
+        # email-based account validity) to temporarily hook into the legacy endpoints so we
+        # can route the traffic coming into the old endpoints into the module, which is
+        # why we have the following three temporary hooks.
+        if on_legacy_send_mail is not None:
+            if self.on_legacy_send_mail_callback is not None:
+                raise RuntimeError("Tried to register on_legacy_send_mail twice")
+
+            self.on_legacy_send_mail_callback = on_legacy_send_mail
+
+        if on_legacy_renew is not None:
+            if self.on_legacy_renew_callback is not None:
+                raise RuntimeError("Tried to register on_legacy_renew twice")
+
+            self.on_legacy_renew_callback = on_legacy_renew
+
+        if on_legacy_admin_request is not None:
+            if self.on_legacy_admin_request_callback is not None:
+                raise RuntimeError("Tried to register on_legacy_admin_request twice")
+
+            self.on_legacy_admin_request_callback = on_legacy_admin_request
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 357e9a574d..281e8fd0ad 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -683,19 +683,18 @@ class AccountValidityRenewServlet(RestServlet):
     PATTERNS = admin_patterns("/account_validity/validity$")
 
     def __init__(self, hs: "HomeServer"):
-        self.account_activity_handler = hs.get_account_validity_handler()
+        self.account_validity_handler = hs.get_account_validity_handler()
+        self.account_validity_module_callbacks = (
+            hs.get_module_api_callbacks().account_validity
+        )
         self.auth = hs.get_auth()
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self.auth, request)
 
-        if self.account_activity_handler.on_legacy_admin_request_callback:
-            expiration_ts = (
-                await (
-                    self.account_activity_handler.on_legacy_admin_request_callback(
-                        request
-                    )
-                )
+        if self.account_validity_module_callbacks.on_legacy_admin_request_callback:
+            expiration_ts = await self.account_validity_module_callbacks.on_legacy_admin_request_callback(
+                request
             )
         else:
             body = parse_json_object_from_request(request)
@@ -706,7 +705,7 @@ class AccountValidityRenewServlet(RestServlet):
                     "Missing property 'user_id' in the request body",
                 )
 
-            expiration_ts = await self.account_activity_handler.renew_account_for_user(
+            expiration_ts = await self.account_validity_handler.renew_account_for_user(
                 body["user_id"],
                 body.get("expiration_ts"),
                 not body.get("enable_renewal_emails", True),
diff --git a/synapse/server.py b/synapse/server.py
index 8078463530..a191c19993 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -110,6 +110,7 @@ from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.media.media_repository import MediaRepository
 from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
 from synapse.module_api import ModuleApi
+from synapse.module_api.callbacks import ModuleApiCallbacks
 from synapse.notifier import Notifier, ReplicationNotifier
 from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator
 from synapse.push.pusherpool import PusherPool
@@ -800,6 +801,10 @@ class HomeServer(metaclass=abc.ABCMeta):
     def get_module_api(self) -> ModuleApi:
         return ModuleApi(self, self.get_auth_handler())
 
+    @cache_in_self
+    def get_module_api_callbacks(self) -> ModuleApiCallbacks:
+        return ModuleApiCallbacks()
+
     @cache_in_self
     def get_account_data_handler(self) -> AccountDataHandler:
         return AccountDataHandler(self)
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index 2b05dffc7d..7f675c44a2 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -1249,9 +1249,8 @@ class AccountStatusTestCase(unittest.HomeserverTestCase):
             # account status will fail.
             return UserID.from_string(user_id).localpart == "someuser"
 
-        self.hs.get_account_validity_handler()._is_user_expired_callbacks.append(
-            is_expired
-        )
+        account_validity_callbacks = self.hs.get_module_api_callbacks().account_validity
+        account_validity_callbacks.is_user_expired_callbacks.append(is_expired)
 
         self._test_status(
             users=[user],
-- 
cgit 1.5.1


From 1f5473465d4cb08239bcc97dbbbf185af6841863 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 16 Mar 2023 11:44:11 +0000
Subject: Refresh remote profiles that have been marked as stale, in order to
 fill the user directory. [rei:userdirpriv] (#14756)

* Scaffolding for background process to refresh profiles

* Add scaffolding for background process to refresh profiles for a given server

* Implement the code to select servers to refresh from

* Ensure we don't build up multiple looping calls

* Make `get_profile` able to respect backoffs

* Add logic for refreshing users

* When backing off, schedule a refresh when the backoff is over

* Wake up the background processes when we receive an interesting state event

* Add tests

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Add comment about 1<<62

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/14756.bugfix                         |   1 +
 synapse/handlers/profile.py                      |   4 +-
 synapse/handlers/user_directory.py               | 242 +++++++++++++++++++++++
 synapse/storage/databases/main/user_directory.py |  74 +++++++
 tests/handlers/test_user_directory.py            | 187 +++++++++++++++++-
 5 files changed, 504 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/14756.bugfix

(limited to 'synapse')

diff --git a/changelog.d/14756.bugfix b/changelog.d/14756.bugfix
new file mode 100644
index 0000000000..12f979e9d0
--- /dev/null
+++ b/changelog.d/14756.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug in which the user directory would assume any remote membership state events represent a profile change.
\ No newline at end of file
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 4bf9a047a3..9a81a77cbd 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -63,7 +63,7 @@ class ProfileHandler:
 
         self._third_party_rules = hs.get_third_party_event_rules()
 
-    async def get_profile(self, user_id: str) -> JsonDict:
+    async def get_profile(self, user_id: str, ignore_backoff: bool = True) -> JsonDict:
         target_user = UserID.from_string(user_id)
 
         if self.hs.is_mine(target_user):
@@ -81,7 +81,7 @@ class ProfileHandler:
                     destination=target_user.domain,
                     query_type="profile",
                     args={"user_id": user_id},
-                    ignore_backoff=True,
+                    ignore_backoff=ignore_backoff,
                 )
                 return result
             except RequestSendFailed as e:
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 0815be79fa..28a92d41d6 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -13,15 +13,22 @@
 # limitations under the License.
 
 import logging
+from http import HTTPStatus
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
 
+from twisted.internet.interfaces import IDelayedCall
+
 import synapse.metrics
 from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, Membership
+from synapse.api.errors import Codes, SynapseError
 from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.databases.main.user_directory import SearchResult
 from synapse.storage.roommember import ProfileInfo
+from synapse.types import UserID
 from synapse.util.metrics import Measure
+from synapse.util.retryutils import NotRetryingDestination
+from synapse.util.stringutils import non_null_str_or_none
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -33,6 +40,25 @@ logger = logging.getLogger(__name__)
 # then be coalesced such that only one /profile request is made).
 USER_DIRECTORY_STALE_REFRESH_TIME_MS = 60 * 1000
 
+# Maximum number of remote servers that we will attempt to refresh profiles for
+# in one go.
+MAX_SERVERS_TO_REFRESH_PROFILES_FOR_IN_ONE_GO = 5
+
+# As long as we have servers to refresh (without backoff), keep adding more
+# every 15 seconds.
+INTERVAL_TO_ADD_MORE_SERVERS_TO_REFRESH_PROFILES = 15
+
+
+def calculate_time_of_next_retry(now_ts: int, retry_count: int) -> int:
+    """
+    Calculates the time of a next retry given `now_ts` in ms and the number
+    of failures encountered thus far.
+
+    Currently the sequence goes:
+    1 min, 5 min, 25 min, 2 hour, 10 hour, 52 hour, 10 day, 7.75 week
+    """
+    return now_ts + 60_000 * (5 ** min(retry_count, 7))
+
 
 class UserDirectoryHandler(StateDeltasHandler):
     """Handles queries and updates for the user_directory.
@@ -69,12 +95,24 @@ class UserDirectoryHandler(StateDeltasHandler):
         self.update_user_directory = hs.config.worker.should_update_user_directory
         self.search_all_users = hs.config.userdirectory.user_directory_search_all_users
         self.spam_checker = hs.get_spam_checker()
+        self._hs = hs
+
         # The current position in the current_state_delta stream
         self.pos: Optional[int] = None
 
         # Guard to ensure we only process deltas one at a time
         self._is_processing = False
 
+        # Guard to ensure we only have one process for refreshing remote profiles
+        self._is_refreshing_remote_profiles = False
+        # Handle to cancel the `call_later` of `kick_off_remote_profile_refresh_process`
+        self._refresh_remote_profiles_call_later: Optional[IDelayedCall] = None
+
+        # Guard to ensure we only have one process for refreshing remote profiles
+        # for the given servers.
+        # Set of server names.
+        self._is_refreshing_remote_profiles_for_servers: Set[str] = set()
+
         if self.update_user_directory:
             self.notifier.add_replication_callback(self.notify_new_event)
 
@@ -82,6 +120,11 @@ class UserDirectoryHandler(StateDeltasHandler):
             # we start populating the user directory
             self.clock.call_later(0, self.notify_new_event)
 
+            # Kick off the profile refresh process on startup
+            self._refresh_remote_profiles_call_later = self.clock.call_later(
+                10, self.kick_off_remote_profile_refresh_process
+            )
+
     async def search_users(
         self, user_id: str, search_term: str, limit: int
     ) -> SearchResult:
@@ -483,6 +526,20 @@ class UserDirectoryHandler(StateDeltasHandler):
                 next_try_at_ms=now_ts + USER_DIRECTORY_STALE_REFRESH_TIME_MS,
                 retry_counter=0,
             )
+            # Schedule a wake-up to refresh the user directory for this server.
+            # We intentionally wake up this server directly because we don't want
+            # other servers ahead of it in the queue to get in the way of updating
+            # the profile if the server only just sent us an event.
+            self.clock.call_later(
+                USER_DIRECTORY_STALE_REFRESH_TIME_MS // 1000 + 1,
+                self.kick_off_remote_profile_refresh_process_for_remote_server,
+                UserID.from_string(user_id).domain,
+            )
+            # Schedule a wake-up to handle any backoffs that may occur in the future.
+            self.clock.call_later(
+                2 * USER_DIRECTORY_STALE_REFRESH_TIME_MS // 1000 + 1,
+                self.kick_off_remote_profile_refresh_process,
+            )
             return
 
         prev_name = prev_event.content.get("displayname")
@@ -505,3 +562,188 @@ class UserDirectoryHandler(StateDeltasHandler):
             # Only update if something has changed, or we didn't have a previous event
             # in the first place.
             await self.store.update_profile_in_user_dir(user_id, new_name, new_avatar)
+
+    def kick_off_remote_profile_refresh_process(self) -> None:
+        """Called when there may be remote users with stale profiles to be refreshed"""
+        if not self.update_user_directory:
+            return
+
+        if self._is_refreshing_remote_profiles:
+            return
+
+        if self._refresh_remote_profiles_call_later:
+            if self._refresh_remote_profiles_call_later.active():
+                self._refresh_remote_profiles_call_later.cancel()
+            self._refresh_remote_profiles_call_later = None
+
+        async def process() -> None:
+            try:
+                await self._unsafe_refresh_remote_profiles()
+            finally:
+                self._is_refreshing_remote_profiles = False
+
+        self._is_refreshing_remote_profiles = True
+        run_as_background_process("user_directory.refresh_remote_profiles", process)
+
+    async def _unsafe_refresh_remote_profiles(self) -> None:
+        limit = MAX_SERVERS_TO_REFRESH_PROFILES_FOR_IN_ONE_GO - len(
+            self._is_refreshing_remote_profiles_for_servers
+        )
+        if limit <= 0:
+            # nothing to do: already refreshing the maximum number of servers
+            # at once.
+            # Come back later.
+            self._refresh_remote_profiles_call_later = self.clock.call_later(
+                INTERVAL_TO_ADD_MORE_SERVERS_TO_REFRESH_PROFILES,
+                self.kick_off_remote_profile_refresh_process,
+            )
+            return
+
+        servers_to_refresh = (
+            await self.store.get_remote_servers_with_profiles_to_refresh(
+                now_ts=self.clock.time_msec(), limit=limit
+            )
+        )
+
+        if not servers_to_refresh:
+            # Do we have any backing-off servers that we should try again
+            # for eventually?
+            # By setting `now` is a point in the far future, we can ask for
+            # which server/user is next to be refreshed, even though it is
+            # not actually refreshable *now*.
+            end_of_time = 1 << 62
+            backing_off_servers = (
+                await self.store.get_remote_servers_with_profiles_to_refresh(
+                    now_ts=end_of_time, limit=1
+                )
+            )
+            if backing_off_servers:
+                # Find out when the next user is refreshable and schedule a
+                # refresh then.
+                backing_off_server_name = backing_off_servers[0]
+                users = await self.store.get_remote_users_to_refresh_on_server(
+                    backing_off_server_name, now_ts=end_of_time, limit=1
+                )
+                if not users:
+                    return
+                _, _, next_try_at_ts = users[0]
+                self._refresh_remote_profiles_call_later = self.clock.call_later(
+                    ((next_try_at_ts - self.clock.time_msec()) // 1000) + 2,
+                    self.kick_off_remote_profile_refresh_process,
+                )
+
+            return
+
+        for server_to_refresh in servers_to_refresh:
+            self.kick_off_remote_profile_refresh_process_for_remote_server(
+                server_to_refresh
+            )
+
+        self._refresh_remote_profiles_call_later = self.clock.call_later(
+            INTERVAL_TO_ADD_MORE_SERVERS_TO_REFRESH_PROFILES,
+            self.kick_off_remote_profile_refresh_process,
+        )
+
+    def kick_off_remote_profile_refresh_process_for_remote_server(
+        self, server_name: str
+    ) -> None:
+        """Called when there may be remote users with stale profiles to be refreshed
+        on the given server."""
+        if not self.update_user_directory:
+            return
+
+        if server_name in self._is_refreshing_remote_profiles_for_servers:
+            return
+
+        async def process() -> None:
+            try:
+                await self._unsafe_refresh_remote_profiles_for_remote_server(
+                    server_name
+                )
+            finally:
+                self._is_refreshing_remote_profiles_for_servers.remove(server_name)
+
+        self._is_refreshing_remote_profiles_for_servers.add(server_name)
+        run_as_background_process(
+            "user_directory.refresh_remote_profiles_for_remote_server", process
+        )
+
+    async def _unsafe_refresh_remote_profiles_for_remote_server(
+        self, server_name: str
+    ) -> None:
+        logger.info("Refreshing profiles in user directory for %s", server_name)
+
+        while True:
+            # Get a handful of users to process.
+            next_batch = await self.store.get_remote_users_to_refresh_on_server(
+                server_name, now_ts=self.clock.time_msec(), limit=10
+            )
+            if not next_batch:
+                # Finished for now
+                return
+
+            for user_id, retry_counter, _ in next_batch:
+                # Request the profile of the user.
+                try:
+                    profile = await self._hs.get_profile_handler().get_profile(
+                        user_id, ignore_backoff=False
+                    )
+                except NotRetryingDestination as e:
+                    logger.info(
+                        "Failed to refresh profile for %r because the destination is undergoing backoff",
+                        user_id,
+                    )
+                    # As a special-case, we back off until the destination is no longer
+                    # backed off from.
+                    await self.store.set_remote_user_profile_in_user_dir_stale(
+                        user_id,
+                        e.retry_last_ts + e.retry_interval,
+                        retry_counter=retry_counter + 1,
+                    )
+                    continue
+                except SynapseError as e:
+                    if e.code == HTTPStatus.NOT_FOUND and e.errcode == Codes.NOT_FOUND:
+                        # The profile doesn't exist.
+                        # TODO Does this mean we should clear it from our user
+                        #      directory?
+                        await self.store.clear_remote_user_profile_in_user_dir_stale(
+                            user_id
+                        )
+                        logger.warning(
+                            "Refresh of remote profile %r: not found (%r)",
+                            user_id,
+                            e.msg,
+                        )
+                        continue
+
+                    logger.warning(
+                        "Failed to refresh profile for %r because %r", user_id, e
+                    )
+                    await self.store.set_remote_user_profile_in_user_dir_stale(
+                        user_id,
+                        calculate_time_of_next_retry(
+                            self.clock.time_msec(), retry_counter + 1
+                        ),
+                        retry_counter=retry_counter + 1,
+                    )
+                    continue
+                except Exception:
+                    logger.error(
+                        "Failed to refresh profile for %r due to unhandled exception",
+                        user_id,
+                        exc_info=True,
+                    )
+                    await self.store.set_remote_user_profile_in_user_dir_stale(
+                        user_id,
+                        calculate_time_of_next_retry(
+                            self.clock.time_msec(), retry_counter + 1
+                        ),
+                        retry_counter=retry_counter + 1,
+                    )
+                    continue
+
+                await self.store.update_profile_in_user_dir(
+                    user_id,
+                    display_name=non_null_str_or_none(profile.get("displayname")),
+                    avatar_url=non_null_str_or_none(profile.get("avatar_url")),
+                )
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 9cf01b7f36..97f09b73dd 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -504,6 +504,80 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
             desc="set_remote_user_profile_in_user_dir_stale",
         )
 
+    async def clear_remote_user_profile_in_user_dir_stale(self, user_id: str) -> None:
+        """
+        Marks a remote user as no longer having a possibly-stale user directory profile.
+
+        Args:
+            user_id: the remote user who no longer has a stale profile on this server.
+        """
+        await self.db_pool.simple_delete(
+            table="user_directory_stale_remote_users",
+            keyvalues={"user_id": user_id},
+            desc="clear_remote_user_profile_in_user_dir_stale",
+        )
+
+    async def get_remote_servers_with_profiles_to_refresh(
+        self, now_ts: int, limit: int
+    ) -> List[str]:
+        """
+        Get a list of up to `limit` server names which have users whose
+        locally-cached profiles we believe to be stale
+        and are refreshable given the current time `now_ts` in milliseconds.
+        """
+
+        def _get_remote_servers_with_refreshable_profiles_txn(
+            txn: LoggingTransaction,
+        ) -> List[str]:
+            sql = """
+                SELECT user_server_name
+                FROM user_directory_stale_remote_users
+                WHERE next_try_at_ts < ?
+                GROUP BY user_server_name
+                ORDER BY MIN(next_try_at_ts), user_server_name
+                LIMIT ?
+            """
+            txn.execute(sql, (now_ts, limit))
+            return [row[0] for row in txn]
+
+        return await self.db_pool.runInteraction(
+            "get_remote_servers_with_profiles_to_refresh",
+            _get_remote_servers_with_refreshable_profiles_txn,
+        )
+
+    async def get_remote_users_to_refresh_on_server(
+        self, server_name: str, now_ts: int, limit: int
+    ) -> List[Tuple[str, int, int]]:
+        """
+        Get a list of up to `limit` user IDs from the server `server_name`
+        whose locally-cached profiles we believe to be stale
+        and are refreshable given the current time `now_ts` in milliseconds.
+
+        Returns:
+            tuple of:
+                - User ID
+                - Retry counter (number of failures so far)
+                - Time the retry is scheduled for, in milliseconds
+        """
+
+        def _get_remote_users_to_refresh_on_server_txn(
+            txn: LoggingTransaction,
+        ) -> List[Tuple[str, int, int]]:
+            sql = """
+                SELECT user_id, retry_counter, next_try_at_ts
+                FROM user_directory_stale_remote_users
+                WHERE user_server_name = ? AND next_try_at_ts < ?
+                ORDER BY next_try_at_ts
+                LIMIT ?
+            """
+            txn.execute(sql, (server_name, now_ts, limit))
+            return cast(List[Tuple[str, int, int]], txn.fetchall())
+
+        return await self.db_pool.runInteraction(
+            "get_remote_users_to_refresh_on_server",
+            _get_remote_users_to_refresh_on_server_txn,
+        )
+
     async def update_profile_in_user_dir(
         self, user_id: str, display_name: Optional[str], avatar_url: Optional[str]
     ) -> None:
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index a02c1c6227..da4d240826 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -19,17 +19,18 @@ from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
 from synapse.api.constants import UserTypes
+from synapse.api.errors import SynapseError
 from synapse.api.room_versions import RoomVersion, RoomVersions
 from synapse.appservice import ApplicationService
 from synapse.rest.client import login, register, room, user_directory
 from synapse.server import HomeServer
 from synapse.storage.roommember import ProfileInfo
-from synapse.types import UserProfile, create_requester
+from synapse.types import JsonDict, UserProfile, create_requester
 from synapse.util import Clock
 
 from tests import unittest
 from tests.storage.test_user_directory import GetUserDirectoryTables
-from tests.test_utils import make_awaitable
+from tests.test_utils import event_injection, make_awaitable
 from tests.test_utils.event_injection import inject_member_event
 from tests.unittest import override_config
 
@@ -1103,3 +1104,185 @@ class TestUserDirSearchDisabled(unittest.HomeserverTestCase):
         )
         self.assertEqual(200, channel.code, channel.result)
         self.assertTrue(len(channel.json_body["results"]) == 0)
+
+
+class UserDirectoryRemoteProfileTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        login.register_servlets,
+        synapse.rest.admin.register_servlets,
+        register.register_servlets,
+        room.register_servlets,
+    ]
+
+    def default_config(self) -> JsonDict:
+        config = super().default_config()
+        # Re-enables updating the user directory, as that functionality is needed below.
+        config["update_user_directory_from_worker"] = None
+        return config
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+        self.alice = self.register_user("alice", "alice123")
+        self.alice_tok = self.login("alice", "alice123")
+        self.user_dir_helper = GetUserDirectoryTables(self.store)
+        self.user_dir_handler = hs.get_user_directory_handler()
+        self.profile_handler = hs.get_profile_handler()
+
+        # Cancel the startup call: in the steady-state case we can't rely on it anyway.
+        assert self.user_dir_handler._refresh_remote_profiles_call_later is not None
+        self.user_dir_handler._refresh_remote_profiles_call_later.cancel()
+
+    def test_public_rooms_have_profiles_collected(self) -> None:
+        """
+        In a public room, member state events are treated as reflecting the user's
+        real profile and they are accepted.
+        (The main motivation for accepting this is to prevent having to query
+        *every* single profile change over federation.)
+        """
+        room_id = self.helper.create_room_as(
+            self.alice, is_public=True, tok=self.alice_tok
+        )
+        self.get_success(
+            event_injection.inject_member_event(
+                self.hs,
+                room_id,
+                "@bruce:remote",
+                "join",
+                "@bruce:remote",
+                extra_content={
+                    "displayname": "Bruce!",
+                    "avatar_url": "mxc://remote/123",
+                },
+            )
+        )
+        # Sending this event makes the streams move forward after the injection...
+        self.helper.send(room_id, "Test", tok=self.alice_tok)
+        self.pump(0.1)
+
+        profiles = self.get_success(
+            self.user_dir_helper.get_profiles_in_user_directory()
+        )
+        self.assertEqual(
+            profiles.get("@bruce:remote"),
+            ProfileInfo(display_name="Bruce!", avatar_url="mxc://remote/123"),
+        )
+
+    def test_private_rooms_do_not_have_profiles_collected(self) -> None:
+        """
+        In a private room, member state events are not pulled out and used to populate
+        the user directory.
+        """
+        room_id = self.helper.create_room_as(
+            self.alice, is_public=False, tok=self.alice_tok
+        )
+        self.get_success(
+            event_injection.inject_member_event(
+                self.hs,
+                room_id,
+                "@bruce:remote",
+                "join",
+                "@bruce:remote",
+                extra_content={
+                    "displayname": "super-duper bruce",
+                    "avatar_url": "mxc://remote/456",
+                },
+            )
+        )
+        # Sending this event makes the streams move forward after the injection...
+        self.helper.send(room_id, "Test", tok=self.alice_tok)
+        self.pump(0.1)
+
+        profiles = self.get_success(
+            self.user_dir_helper.get_profiles_in_user_directory()
+        )
+        self.assertNotIn("@bruce:remote", profiles)
+
+    def test_private_rooms_have_profiles_requested(self) -> None:
+        """
+        When a name changes in a private room, the homeserver instead requests
+        the user's global profile over federation.
+        """
+
+        async def get_remote_profile(
+            user_id: str, ignore_backoff: bool = True
+        ) -> JsonDict:
+            if user_id == "@bruce:remote":
+                return {
+                    "displayname": "Sir Bruce Bruceson",
+                    "avatar_url": "mxc://remote/789",
+                }
+            else:
+                raise ValueError(f"unable to fetch {user_id}")
+
+        with patch.object(self.profile_handler, "get_profile", get_remote_profile):
+            # Continue from the earlier test...
+            self.test_private_rooms_do_not_have_profiles_collected()
+
+            # Advance by a minute
+            self.reactor.advance(61.0)
+
+        profiles = self.get_success(
+            self.user_dir_helper.get_profiles_in_user_directory()
+        )
+        self.assertEqual(
+            profiles.get("@bruce:remote"),
+            ProfileInfo(
+                display_name="Sir Bruce Bruceson", avatar_url="mxc://remote/789"
+            ),
+        )
+
+    def test_profile_requests_are_retried(self) -> None:
+        """
+        When we fail to fetch the user's profile over federation,
+        we try again later.
+        """
+        has_failed_once = False
+
+        async def get_remote_profile(
+            user_id: str, ignore_backoff: bool = True
+        ) -> JsonDict:
+            nonlocal has_failed_once
+            if user_id == "@bruce:remote":
+                if not has_failed_once:
+                    has_failed_once = True
+                    raise SynapseError(502, "temporary network problem")
+
+                return {
+                    "displayname": "Sir Bruce Bruceson",
+                    "avatar_url": "mxc://remote/789",
+                }
+            else:
+                raise ValueError(f"unable to fetch {user_id}")
+
+        with patch.object(self.profile_handler, "get_profile", get_remote_profile):
+            # Continue from the earlier test...
+            self.test_private_rooms_do_not_have_profiles_collected()
+
+            # Advance by a minute
+            self.reactor.advance(61.0)
+
+            # The request has already failed once
+            self.assertTrue(has_failed_once)
+
+            # The profile has yet to be updated.
+            profiles = self.get_success(
+                self.user_dir_helper.get_profiles_in_user_directory()
+            )
+            self.assertNotIn(
+                "@bruce:remote",
+                profiles,
+            )
+
+            # Advance by five minutes, after the backoff has finished
+            self.reactor.advance(301.0)
+
+            # The profile should have been updated now
+            profiles = self.get_success(
+                self.user_dir_helper.get_profiles_in_user_directory()
+            )
+            self.assertEqual(
+                profiles.get("@bruce:remote"),
+                ProfileInfo(
+                    display_name="Sir Bruce Bruceson", avatar_url="mxc://remote/789"
+                ),
+            )
-- 
cgit 1.5.1


From b0a0fb5c97449720c679045f1bb5a5f393b1c267 Mon Sep 17 00:00:00 2001
From: Tulir Asokan <tulir@maunium.net>
Date: Thu, 16 Mar 2023 16:00:03 +0200
Subject: Implement MSC2659: application service ping endpoint (#15249)

Signed-off-by: Tulir Asokan <tulir@maunium.net>
---
 changelog.d/15249.feature              |   1 +
 synapse/api/errors.py                  |   5 ++
 synapse/appservice/api.py              |  13 ++++
 synapse/config/experimental.py         |   3 +
 synapse/rest/__init__.py               |   2 +
 synapse/rest/client/appservice_ping.py | 115 +++++++++++++++++++++++++++++++++
 synapse/rest/client/versions.py        |   2 +
 7 files changed, 141 insertions(+)
 create mode 100644 changelog.d/15249.feature
 create mode 100644 synapse/rest/client/appservice_ping.py

(limited to 'synapse')

diff --git a/changelog.d/15249.feature b/changelog.d/15249.feature
new file mode 100644
index 0000000000..92d48a2087
--- /dev/null
+++ b/changelog.d/15249.feature
@@ -0,0 +1 @@
+Implement [MSC2659](https://github.com/matrix-org/matrix-spec-proposals/pull/2659): application service ping endpoint. Contributed by Tulir @ Beeper.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index e1737de59b..8c6822f3c6 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -108,6 +108,11 @@ class Codes(str, Enum):
 
     USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL"
 
+    AS_PING_URL_NOT_SET = "FI.MAU.MSC2659_URL_NOT_SET"
+    AS_PING_BAD_STATUS = "FI.MAU.MSC2659_BAD_STATUS"
+    AS_PING_CONNECTION_TIMEOUT = "FI.MAU.MSC2659_CONNECTION_TIMEOUT"
+    AS_PING_CONNECTION_FAILED = "FI.MAU.MSC2659_CONNECTION_FAILED"
+
     # Attempt to send a second annotation with the same event type & annotation key
     # MSC2677
     DUPLICATE_ANNOTATION = "M_DUPLICATE_ANNOTATION"
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 1a6f69e7d3..4812fb4496 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -266,6 +266,19 @@ class ApplicationServiceApi(SimpleHttpClient):
         key = (service.id, protocol)
         return await self.protocol_meta_cache.wrap(key, _get)
 
+    async def ping(self, service: "ApplicationService", txn_id: Optional[str]) -> None:
+        # The caller should check that url is set
+        assert service.url is not None, "ping called without URL being set"
+
+        # This is required by the configuration.
+        assert service.hs_token is not None
+
+        await self.post_json_get_json(
+            uri=service.url + "/_matrix/app/unstable/fi.mau.msc2659/ping",
+            post_json={"transaction_id": txn_id},
+            headers={"Authorization": [f"Bearer {service.hs_token}"]},
+        )
+
     async def push_bulk(
         self,
         service: "ApplicationService",
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 7e05f78f70..99dcd27c74 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -178,3 +178,6 @@ class ExperimentalConfig(Config):
 
         # MSC3967: Do not require UIA when first uploading cross signing keys
         self.msc3967_enabled = experimental.get("msc3967_enabled", False)
+
+        # MSC2659: Application service ping endpoint
+        self.msc2659_enabled = experimental.get("msc2659_enabled", False)
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 2e19e055d3..55b448adfd 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -20,6 +20,7 @@ from synapse.rest.client import (
     account,
     account_data,
     account_validity,
+    appservice_ping,
     auth,
     capabilities,
     devices,
@@ -140,6 +141,7 @@ class ClientRestResource(JsonResource):
         if is_main_process:
             password_policy.register_servlets(hs, client_resource)
         knock.register_servlets(hs, client_resource)
+        appservice_ping.register_servlets(hs, client_resource)
 
         # moving to /_synapse/admin
         if is_main_process:
diff --git a/synapse/rest/client/appservice_ping.py b/synapse/rest/client/appservice_ping.py
new file mode 100644
index 0000000000..31466a4ad4
--- /dev/null
+++ b/synapse/rest/client/appservice_ping.py
@@ -0,0 +1,115 @@
+# Copyright 2023 Tulir Asokan
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import time
+from http import HTTPStatus
+from typing import TYPE_CHECKING, Any, Dict, Tuple
+
+from synapse.api.errors import (
+    CodeMessageException,
+    Codes,
+    HttpResponseException,
+    SynapseError,
+)
+from synapse.http import RequestTimedOutError
+from synapse.http.server import HttpServer
+from synapse.http.servlet import RestServlet, parse_json_object_from_request
+from synapse.http.site import SynapseRequest
+from synapse.types import JsonDict
+
+from ._base import client_patterns
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class AppservicePingRestServlet(RestServlet):
+    PATTERNS = client_patterns(
+        "/fi.mau.msc2659/appservice/(?P<appservice_id>[^/]*)/ping",
+        unstable=True,
+        releases=(),
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.as_api = hs.get_application_service_api()
+        self.auth = hs.get_auth()
+
+    async def on_POST(
+        self, request: SynapseRequest, appservice_id: str
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+
+        if not requester.app_service:
+            raise SynapseError(
+                HTTPStatus.FORBIDDEN,
+                "Only application services can use the /appservice/ping endpoint",
+                Codes.FORBIDDEN,
+            )
+        elif requester.app_service.id != appservice_id:
+            raise SynapseError(
+                HTTPStatus.FORBIDDEN,
+                "Mismatching application service ID in path",
+                Codes.FORBIDDEN,
+            )
+        elif not requester.app_service.url:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "The application service does not have a URL set",
+                Codes.AS_PING_URL_NOT_SET,
+            )
+
+        content = parse_json_object_from_request(request)
+        txn_id = content.get("transaction_id", None)
+
+        start = time.monotonic()
+        try:
+            await self.as_api.ping(requester.app_service, txn_id)
+        except RequestTimedOutError as e:
+            raise SynapseError(
+                HTTPStatus.GATEWAY_TIMEOUT,
+                e.msg,
+                Codes.AS_PING_CONNECTION_TIMEOUT,
+            )
+        except CodeMessageException as e:
+            additional_fields: Dict[str, Any] = {"status": e.code}
+            if isinstance(e, HttpResponseException):
+                try:
+                    additional_fields["body"] = e.response.decode("utf-8")
+                except UnicodeDecodeError:
+                    pass
+            raise SynapseError(
+                HTTPStatus.BAD_GATEWAY,
+                f"HTTP {e.code} {e.msg}",
+                Codes.AS_PING_BAD_STATUS,
+                additional_fields=additional_fields,
+            )
+        except Exception as e:
+            raise SynapseError(
+                HTTPStatus.BAD_GATEWAY,
+                f"{type(e).__name__}: {e}",
+                Codes.AS_PING_CONNECTION_FAILED,
+            )
+
+        duration = time.monotonic() - start
+
+        return HTTPStatus.OK, {"duration": int(duration * 1000)}
+
+
+def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+    if hs.config.experimental.msc2659_enabled:
+        AppservicePingRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index e19c0946c0..dba0f0891a 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -109,6 +109,8 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3773": self.config.experimental.msc3773_enabled,
                     # Allows moderators to fetch redacted event content as described in MSC2815
                     "fi.mau.msc2815": self.config.experimental.msc2815_enabled,
+                    # Adds a ping endpoint for appservices to check HS->AS connection
+                    "fi.mau.msc2659": self.config.experimental.msc2659_enabled,
                     # Adds support for login token requests as per MSC3882
                     "org.matrix.msc3882": self.config.experimental.msc3882_enabled,
                     # Adds support for remotely enabling/disabling pushers, as per MSC3881
-- 
cgit 1.5.1


From afb216c202feb143ce70c74c16fa50ca93da6157 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 16 Mar 2023 11:13:30 -0400
Subject: Remove no-op send_command for Redis replication. (#15274)

With Redis commands do not need to be re-issued by the main
process (they fan-out to all processes at once) and thus it is no
longer necessary to worry about them reflecting recursively forever.
---
 changelog.d/15272.misc                         |  2 +-
 changelog.d/15274.misc                         |  1 +
 synapse/replication/tcp/handler.py             | 26 +----------
 tests/replication/tcp/test_remote_server_up.py | 63 --------------------------
 4 files changed, 3 insertions(+), 89 deletions(-)
 create mode 100644 changelog.d/15274.misc
 delete mode 100644 tests/replication/tcp/test_remote_server_up.py

(limited to 'synapse')

diff --git a/changelog.d/15272.misc b/changelog.d/15272.misc
index 7a3ef323e9..f7c0276ecc 100644
--- a/changelog.d/15272.misc
+++ b/changelog.d/15272.misc
@@ -1 +1 @@
-Remove unused class `DirectTcpReplicationClientFactory`.
+Clean-up direct TCP replication code.
diff --git a/changelog.d/15274.misc b/changelog.d/15274.misc
new file mode 100644
index 0000000000..f7c0276ecc
--- /dev/null
+++ b/changelog.d/15274.misc
@@ -0,0 +1 @@
+Clean-up direct TCP replication code.
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index d03a53d764..2290b3e6fe 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -625,23 +625,6 @@ class ReplicationCommandHandler:
 
         self._notifier.notify_remote_server_up(cmd.data)
 
-        # We relay to all other connections to ensure every instance gets the
-        # notification.
-        #
-        # When configured to use redis we'll always only have one connection and
-        # so this is a no-op (all instances will have already received the same
-        # REMOTE_SERVER_UP command).
-        #
-        # For direct TCP connections this will relay to all other connections
-        # connected to us. When on master this will correctly fan out to all
-        # other direct TCP clients and on workers there'll only be the one
-        # connection to master.
-        #
-        # (The logic here should also be sound if we have a mix of Redis and
-        # direct TCP connections so long as there is only one traffic route
-        # between two instances, but that is not currently supported).
-        self.send_command(cmd, ignore_conn=conn)
-
     def new_connection(self, connection: IReplicationConnection) -> None:
         """Called when we have a new connection."""
         self._connections.append(connection)
@@ -689,21 +672,14 @@ class ReplicationCommandHandler:
         """
         return bool(self._connections)
 
-    def send_command(
-        self, cmd: Command, ignore_conn: Optional[IReplicationConnection] = None
-    ) -> None:
+    def send_command(self, cmd: Command) -> None:
         """Send a command to all connected connections.
 
         Args:
             cmd
-            ignore_conn: If set don't send command to the given connection.
-                Used when relaying commands from one connection to all others.
         """
         if self._connections:
             for connection in self._connections:
-                if connection == ignore_conn:
-                    continue
-
                 try:
                     connection.send_command(cmd)
                 except Exception:
diff --git a/tests/replication/tcp/test_remote_server_up.py b/tests/replication/tcp/test_remote_server_up.py
deleted file mode 100644
index b75fc05fd5..0000000000
--- a/tests/replication/tcp/test_remote_server_up.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2020 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Tuple
-
-from twisted.internet.address import IPv4Address
-from twisted.internet.interfaces import IProtocol
-from twisted.test.proto_helpers import MemoryReactor, StringTransport
-
-from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
-from synapse.server import HomeServer
-from synapse.util import Clock
-
-from tests.unittest import HomeserverTestCase
-
-
-class RemoteServerUpTestCase(HomeserverTestCase):
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.factory = ReplicationStreamProtocolFactory(hs)
-
-    def _make_client(self) -> Tuple[IProtocol, StringTransport]:
-        """Create a new direct TCP replication connection"""
-
-        proto = self.factory.buildProtocol(IPv4Address("TCP", "127.0.0.1", 0))
-        transport = StringTransport()
-        proto.makeConnection(transport)
-
-        # We can safely ignore the commands received during connection.
-        self.pump()
-        transport.clear()
-
-        return proto, transport
-
-    def test_relay(self) -> None:
-        """Test that Synapse will relay REMOTE_SERVER_UP commands to all
-        other connections, but not the one that sent it.
-        """
-
-        proto1, transport1 = self._make_client()
-
-        # We shouldn't receive an echo.
-        proto1.dataReceived(b"REMOTE_SERVER_UP example.com\n")
-        self.pump()
-        self.assertEqual(transport1.value(), b"")
-
-        # But we should see an echo if we connect another client
-        proto2, transport2 = self._make_client()
-        proto1.dataReceived(b"REMOTE_SERVER_UP example.com\n")
-
-        self.pump()
-        self.assertEqual(transport1.value(), b"")
-        self.assertEqual(transport2.value(), b"REMOTE_SERVER_UP example.com\n")
-- 
cgit 1.5.1


From 3d70cc393fb32235bbeb94a0b97691dff5531f4d Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Fri, 17 Mar 2023 08:50:31 -0500
Subject: Load `/register/available` endpoint on workers (#15268)

---
 changelog.d/15268.feature             | 1 +
 docker/configure_workers_and_start.py | 1 +
 docs/workers.md                       | 1 +
 synapse/rest/client/register.py       | 2 +-
 4 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15268.feature

(limited to 'synapse')

diff --git a/changelog.d/15268.feature b/changelog.d/15268.feature
new file mode 100644
index 0000000000..5f1f1a0f58
--- /dev/null
+++ b/changelog.d/15268.feature
@@ -0,0 +1 @@
+Allow loading `/register/available` endpoint on workers.
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 376f9ed635..3f2f5c2daf 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -163,6 +163,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/versions$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$",
             "^/_matrix/client/(r0|v3|unstable)/register$",
+            "^/_matrix/client/(r0|v3|unstable)/register/available$",
             "^/_matrix/client/(r0|v3|unstable)/auth/.*/fallback/web$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/event",
diff --git a/docs/workers.md b/docs/workers.md
index e0e99b4453..bf7690f5af 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -245,6 +245,7 @@ information.
     # Registration/login requests
     ^/_matrix/client/(api/v1|r0|v3|unstable)/login$
     ^/_matrix/client/(r0|v3|unstable)/register$
+    ^/_matrix/client/(r0|v3|unstable)/register/available$
     ^/_matrix/client/v1/register/m.login.registration_token/validity$
 
     # Event sending requests
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index bce806f2bb..4adb5271d2 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -956,7 +956,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     if hs.config.worker.worker_app is None:
         EmailRegisterRequestTokenRestServlet(hs).register(http_server)
         MsisdnRegisterRequestTokenRestServlet(hs).register(http_server)
-        UsernameAvailabilityRestServlet(hs).register(http_server)
         RegistrationSubmitTokenServlet(hs).register(http_server)
+    UsernameAvailabilityRestServlet(hs).register(http_server)
     RegistrationTokenValidityRestServlet(hs).register(http_server)
     RegisterRestServlet(hs).register(http_server)
-- 
cgit 1.5.1


From 25006acc1766ca51fba0d898b4613d2588cfffb8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 20 Mar 2023 11:47:21 -0400
Subject: Add /versions flag for MSC3952. (#15293)

---
 changelog.d/15293.misc          | 1 +
 synapse/rest/client/versions.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/15293.misc

(limited to 'synapse')

diff --git a/changelog.d/15293.misc b/changelog.d/15293.misc
new file mode 100644
index 0000000000..5744795620
--- /dev/null
+++ b/changelog.d/15293.misc
@@ -0,0 +1 @@
+Add a `/versions` flag for [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952).
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index dba0f0891a..ec171582b3 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -122,6 +122,8 @@ class VersionsRestServlet(RestServlet):
                     is not None,
                     # Adds support for relation-based redactions as per MSC3912.
                     "org.matrix.msc3912": self.config.experimental.msc3912_enabled,
+                    # Adds support for unstable "intentional mentions" behaviour.
+                    "org.matrix.msc3952_intentional_mentions": self.config.experimental.msc3952_intentional_mentions,
                 },
             },
         )
-- 
cgit 1.5.1


From 5ab7146e191c4160cbecf4c6dec6a0f2ed00e171 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 20 Mar 2023 11:14:05 -0700
Subject: Add Synapse-Trace-Id to access-control-expose-headers header (#14974)

---
 changelog.d/14974.misc | 1 +
 synapse/http/server.py | 4 ++++
 tests/test_server.py   | 4 ++++
 3 files changed, 9 insertions(+)
 create mode 100644 changelog.d/14974.misc

(limited to 'synapse')

diff --git a/changelog.d/14974.misc b/changelog.d/14974.misc
new file mode 100644
index 0000000000..05c5f01444
--- /dev/null
+++ b/changelog.d/14974.misc
@@ -0,0 +1 @@
+Add `Synapse-Trace-Id` to `access-control-expose-headers` header.
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 9314454af1..7b760505b2 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -892,6 +892,10 @@ def set_cors_headers(request: SynapseRequest) -> None:
             b"Access-Control-Allow-Headers",
             b"X-Requested-With, Content-Type, Authorization, Date",
         )
+        request.setHeader(
+            b"Access-Control-Expose-Headers",
+            b"Synapse-Trace-Id",
+        )
 
 
 def set_corp_headers(request: Request) -> None:
diff --git a/tests/test_server.py b/tests/test_server.py
index d67d7722a4..e266c06a2c 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -266,6 +266,10 @@ class OptionsResourceTests(unittest.TestCase):
             [b"X-Requested-With, Content-Type, Authorization, Date"],
             "has correct CORS Headers header",
         )
+        self.assertEqual(
+            channel.headers.getRawHeaders(b"Access-Control-Expose-Headers"),
+            [b"Synapse-Trace-Id"],
+        )
 
     def _check_cors_msc3886_headers(self, channel: FakeChannel) -> None:
         # Ensure the correct CORS headers have been added
-- 
cgit 1.5.1


From a5fb382a29991c8eafcb8c54cdd8c7aab260c237 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 20 Mar 2023 14:32:26 -0400
Subject: Separate HTTP preview code and URL previewer. (#15269)

Separates REST layer code from the actual URL previewing.
---
 changelog.d/15269.misc                     |   1 +
 synapse/media/url_previewer.py             | 833 +++++++++++++++++++++++++++++
 synapse/rest/media/preview_url_resource.py | 796 +--------------------------
 tests/rest/media/test_url_preview.py       |  34 +-
 4 files changed, 854 insertions(+), 810 deletions(-)
 create mode 100644 changelog.d/15269.misc
 create mode 100644 synapse/media/url_previewer.py

(limited to 'synapse')

diff --git a/changelog.d/15269.misc b/changelog.d/15269.misc
new file mode 100644
index 0000000000..b3126fb1f4
--- /dev/null
+++ b/changelog.d/15269.misc
@@ -0,0 +1 @@
+Reorganize URL preview code.
diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py
new file mode 100644
index 0000000000..c8a4a809f1
--- /dev/null
+++ b/synapse/media/url_previewer.py
@@ -0,0 +1,833 @@
+# Copyright 2016 OpenMarket Ltd
+# Copyright 2020-2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import datetime
+import errno
+import fnmatch
+import logging
+import os
+import re
+import shutil
+import sys
+import traceback
+from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, Tuple
+from urllib.parse import urljoin, urlparse, urlsplit
+from urllib.request import urlopen
+
+import attr
+
+from twisted.internet.defer import Deferred
+from twisted.internet.error import DNSLookupError
+
+from synapse.api.errors import Codes, SynapseError
+from synapse.http.client import SimpleHttpClient
+from synapse.logging.context import make_deferred_yieldable, run_in_background
+from synapse.media._base import FileInfo, get_filename_from_headers
+from synapse.media.media_storage import MediaStorage
+from synapse.media.oembed import OEmbedProvider
+from synapse.media.preview_html import decode_body, parse_html_to_open_graph
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.types import JsonDict, UserID
+from synapse.util import json_encoder
+from synapse.util.async_helpers import ObservableDeferred
+from synapse.util.caches.expiringcache import ExpiringCache
+from synapse.util.stringutils import random_string
+
+if TYPE_CHECKING:
+    from synapse.media.media_repository import MediaRepository
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+OG_TAG_NAME_MAXLEN = 50
+OG_TAG_VALUE_MAXLEN = 1000
+
+ONE_HOUR = 60 * 60 * 1000
+ONE_DAY = 24 * ONE_HOUR
+IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class DownloadResult:
+    length: int
+    uri: str
+    response_code: int
+    media_type: str
+    download_name: Optional[str]
+    expires: int
+    etag: Optional[str]
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class MediaInfo:
+    """
+    Information parsed from downloading media being previewed.
+    """
+
+    # The Content-Type header of the response.
+    media_type: str
+    # The length (in bytes) of the downloaded media.
+    media_length: int
+    # The media filename, according to the server. This is parsed from the
+    # returned headers, if possible.
+    download_name: Optional[str]
+    # The time of the preview.
+    created_ts_ms: int
+    # Information from the media storage provider about where the file is stored
+    # on disk.
+    filesystem_id: str
+    filename: str
+    # The URI being previewed.
+    uri: str
+    # The HTTP response code.
+    response_code: int
+    # The timestamp (in milliseconds) of when this preview expires.
+    expires: int
+    # The ETag header of the response.
+    etag: Optional[str]
+
+
+class UrlPreviewer:
+    """
+    Generates an Open Graph (https://ogp.me/) responses (with some Matrix
+    specific additions) for a given URL.
+
+    When Synapse is asked to preview a URL it does the following:
+
+    1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
+       config).
+    2. Checks the URL against an in-memory cache and returns the result if it exists. (This
+       is also used to de-duplicate processing of multiple in-flight requests at once.)
+    3. Kicks off a background process to generate a preview:
+       1. Checks URL and timestamp against the database cache and returns the result if it
+          has not expired and was successful (a 2xx return code).
+       2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
+          does, update the URL to download.
+       3. Downloads the URL and stores it into a file via the media storage provider
+          and saves the local media metadata.
+       4. If the media is an image:
+          1. Generates thumbnails.
+          2. Generates an Open Graph response based on image properties.
+       5. If the media is HTML:
+          1. Decodes the HTML via the stored file.
+          2. Generates an Open Graph response from the HTML.
+          3. If a JSON oEmbed URL was found in the HTML via autodiscovery:
+             1. Downloads the URL and stores it into a file via the media storage provider
+                and saves the local media metadata.
+             2. Convert the oEmbed response to an Open Graph response.
+             3. Override any Open Graph data from the HTML with data from oEmbed.
+          4. If an image exists in the Open Graph response:
+             1. Downloads the URL and stores it into a file via the media storage
+                provider and saves the local media metadata.
+             2. Generates thumbnails.
+             3. Updates the Open Graph response based on image properties.
+       6. If the media is JSON and an oEmbed URL was found:
+          1. Convert the oEmbed response to an Open Graph response.
+          2. If a thumbnail or image is in the oEmbed response:
+             1. Downloads the URL and stores it into a file via the media storage
+                provider and saves the local media metadata.
+             2. Generates thumbnails.
+             3. Updates the Open Graph response based on image properties.
+       7. Stores the result in the database cache.
+    4. Returns the result.
+
+    If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
+    image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
+    does not fail. As much information as possible is returned.
+
+    The in-memory cache expires after 1 hour.
+
+    Expired entries in the database cache (and their associated media files) are
+    deleted every 10 seconds. The default expiration time is 1 hour from download.
+    """
+
+    def __init__(
+        self,
+        hs: "HomeServer",
+        media_repo: "MediaRepository",
+        media_storage: MediaStorage,
+    ):
+        self.clock = hs.get_clock()
+        self.filepaths = media_repo.filepaths
+        self.max_spider_size = hs.config.media.max_spider_size
+        self.server_name = hs.hostname
+        self.store = hs.get_datastores().main
+        self.client = SimpleHttpClient(
+            hs,
+            treq_args={"browser_like_redirects": True},
+            ip_whitelist=hs.config.media.url_preview_ip_range_whitelist,
+            ip_blacklist=hs.config.media.url_preview_ip_range_blacklist,
+            use_proxy=True,
+        )
+        self.media_repo = media_repo
+        self.primary_base_path = media_repo.primary_base_path
+        self.media_storage = media_storage
+
+        self._oembed = OEmbedProvider(hs)
+
+        # We run the background jobs if we're the instance specified (or no
+        # instance is specified, where we assume there is only one instance
+        # serving media).
+        instance_running_jobs = hs.config.media.media_instance_running_background_jobs
+        self._worker_run_media_background_jobs = (
+            instance_running_jobs is None
+            or instance_running_jobs == hs.get_instance_name()
+        )
+
+        self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist
+        self.url_preview_accept_language = hs.config.media.url_preview_accept_language
+
+        # memory cache mapping urls to an ObservableDeferred returning
+        # JSON-encoded OG metadata
+        self._cache: ExpiringCache[str, ObservableDeferred] = ExpiringCache(
+            cache_name="url_previews",
+            clock=self.clock,
+            # don't spider URLs more often than once an hour
+            expiry_ms=ONE_HOUR,
+        )
+
+        if self._worker_run_media_background_jobs:
+            self._cleaner_loop = self.clock.looping_call(
+                self._start_expire_url_cache_data, 10 * 1000
+            )
+
+    async def preview(self, url: str, user: UserID, ts: int) -> bytes:
+        # XXX: we could move this into _do_preview if we wanted.
+        url_tuple = urlsplit(url)
+        for entry in self.url_preview_url_blacklist:
+            match = True
+            for attrib in entry:
+                pattern = entry[attrib]
+                value = getattr(url_tuple, attrib)
+                logger.debug(
+                    "Matching attrib '%s' with value '%s' against pattern '%s'",
+                    attrib,
+                    value,
+                    pattern,
+                )
+
+                if value is None:
+                    match = False
+                    continue
+
+                # Some attributes might not be parsed as strings by urlsplit (such as the
+                # port, which is parsed as an int). Because we use match functions that
+                # expect strings, we want to make sure that's what we give them.
+                value_str = str(value)
+
+                if pattern.startswith("^"):
+                    if not re.match(pattern, value_str):
+                        match = False
+                        continue
+                else:
+                    if not fnmatch.fnmatch(value_str, pattern):
+                        match = False
+                        continue
+            if match:
+                logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
+                raise SynapseError(
+                    403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
+                )
+
+        # the in-memory cache:
+        # * ensures that only one request is active at a time
+        # * takes load off the DB for the thundering herds
+        # * also caches any failures (unlike the DB) so we don't keep
+        #    requesting the same endpoint
+
+        observable = self._cache.get(url)
+
+        if not observable:
+            download = run_in_background(self._do_preview, url, user, ts)
+            observable = ObservableDeferred(download, consumeErrors=True)
+            self._cache[url] = observable
+        else:
+            logger.info("Returning cached response")
+
+        return await make_deferred_yieldable(observable.observe())
+
+    async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
+        """Check the db, and download the URL and build a preview
+
+        Args:
+            url: The URL to preview.
+            user: The user requesting the preview.
+            ts: The timestamp requested for the preview.
+
+        Returns:
+            json-encoded og data
+        """
+        # check the URL cache in the DB (which will also provide us with
+        # historical previews, if we have any)
+        cache_result = await self.store.get_url_cache(url, ts)
+        if (
+            cache_result
+            and cache_result["expires_ts"] > ts
+            and cache_result["response_code"] / 100 == 2
+        ):
+            # It may be stored as text in the database, not as bytes (such as
+            # PostgreSQL). If so, encode it back before handing it on.
+            og = cache_result["og"]
+            if isinstance(og, str):
+                og = og.encode("utf8")
+            return og
+
+        # If this URL can be accessed via oEmbed, use that instead.
+        url_to_download = url
+        oembed_url = self._oembed.get_oembed_url(url)
+        if oembed_url:
+            url_to_download = oembed_url
+
+        media_info = await self._handle_url(url_to_download, user)
+
+        logger.debug("got media_info of '%s'", media_info)
+
+        # The number of milliseconds that the response should be considered valid.
+        expiration_ms = media_info.expires
+        author_name: Optional[str] = None
+
+        if _is_media(media_info.media_type):
+            file_id = media_info.filesystem_id
+            dims = await self.media_repo._generate_thumbnails(
+                None, file_id, file_id, media_info.media_type, url_cache=True
+            )
+
+            og = {
+                "og:description": media_info.download_name,
+                "og:image": f"mxc://{self.server_name}/{media_info.filesystem_id}",
+                "og:image:type": media_info.media_type,
+                "matrix:image:size": media_info.media_length,
+            }
+
+            if dims:
+                og["og:image:width"] = dims["width"]
+                og["og:image:height"] = dims["height"]
+            else:
+                logger.warning("Couldn't get dims for %s" % url)
+
+            # define our OG response for this media
+        elif _is_html(media_info.media_type):
+            # TODO: somehow stop a big HTML tree from exploding synapse's RAM
+
+            with open(media_info.filename, "rb") as file:
+                body = file.read()
+
+            tree = decode_body(body, media_info.uri, media_info.media_type)
+            if tree is not None:
+                # Check if this HTML document points to oEmbed information and
+                # defer to that.
+                oembed_url = self._oembed.autodiscover_from_html(tree)
+                og_from_oembed: JsonDict = {}
+                if oembed_url:
+                    try:
+                        oembed_info = await self._handle_url(
+                            oembed_url, user, allow_data_urls=True
+                        )
+                    except Exception as e:
+                        # Fetching the oEmbed info failed, don't block the entire URL preview.
+                        logger.warning(
+                            "oEmbed fetch failed during URL preview: %s errored with %s",
+                            oembed_url,
+                            e,
+                        )
+                    else:
+                        (
+                            og_from_oembed,
+                            author_name,
+                            expiration_ms,
+                        ) = await self._handle_oembed_response(
+                            url, oembed_info, expiration_ms
+                        )
+
+                # Parse Open Graph information from the HTML in case the oEmbed
+                # response failed or is incomplete.
+                og_from_html = parse_html_to_open_graph(tree)
+
+                # Compile the Open Graph response by using the scraped
+                # information from the HTML and overlaying any information
+                # from the oEmbed response.
+                og = {**og_from_html, **og_from_oembed}
+
+                await self._precache_image_url(user, media_info, og)
+            else:
+                og = {}
+
+        elif oembed_url:
+            # Handle the oEmbed information.
+            og, author_name, expiration_ms = await self._handle_oembed_response(
+                url, media_info, expiration_ms
+            )
+            await self._precache_image_url(user, media_info, og)
+
+        else:
+            logger.warning("Failed to find any OG data in %s", url)
+            og = {}
+
+        # If we don't have a title but we have author_name, copy it as
+        # title
+        if not og.get("og:title") and author_name:
+            og["og:title"] = author_name
+
+        # filter out any stupidly long values
+        keys_to_remove = []
+        for k, v in og.items():
+            # values can be numeric as well as strings, hence the cast to str
+            if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN:
+                logger.warning(
+                    "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
+                )
+                keys_to_remove.append(k)
+        for k in keys_to_remove:
+            del og[k]
+
+        logger.debug("Calculated OG for %s as %s", url, og)
+
+        jsonog = json_encoder.encode(og)
+
+        # Cap the amount of time to consider a response valid.
+        expiration_ms = min(expiration_ms, ONE_DAY)
+
+        # store OG in history-aware DB cache
+        await self.store.store_url_cache(
+            url,
+            media_info.response_code,
+            media_info.etag,
+            media_info.created_ts_ms + expiration_ms,
+            jsonog,
+            media_info.filesystem_id,
+            media_info.created_ts_ms,
+        )
+
+        return jsonog.encode("utf8")
+
+    async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
+        """
+        Fetches a remote URL and parses the headers.
+
+        Args:
+             url: The URL to fetch.
+             output_stream: The stream to write the content to.
+
+        Returns:
+            A tuple of:
+                Media length, URL downloaded, the HTTP response code,
+                the media type, the downloaded file name, the number of
+                milliseconds the result is valid for, the etag header.
+        """
+
+        try:
+            logger.debug("Trying to get preview for url '%s'", url)
+            length, headers, uri, code = await self.client.get_file(
+                url,
+                output_stream=output_stream,
+                max_size=self.max_spider_size,
+                headers={
+                    b"Accept-Language": self.url_preview_accept_language,
+                    # Use a custom user agent for the preview because some sites will only return
+                    # Open Graph metadata to crawler user agents. Omit the Synapse version
+                    # string to avoid leaking information.
+                    b"User-Agent": [
+                        "Synapse (bot; +https://github.com/matrix-org/synapse)"
+                    ],
+                },
+                is_allowed_content_type=_is_previewable,
+            )
+        except SynapseError:
+            # Pass SynapseErrors through directly, so that the servlet
+            # handler will return a SynapseError to the client instead of
+            # blank data or a 500.
+            raise
+        except DNSLookupError:
+            # DNS lookup returned no results
+            # Note: This will also be the case if one of the resolved IP
+            # addresses is blacklisted
+            raise SynapseError(
+                502,
+                "DNS resolution failure during URL preview generation",
+                Codes.UNKNOWN,
+            )
+        except Exception as e:
+            # FIXME: pass through 404s and other error messages nicely
+            logger.warning("Error downloading %s: %r", url, e)
+
+            raise SynapseError(
+                500,
+                "Failed to download content: %s"
+                % (traceback.format_exception_only(sys.exc_info()[0], e),),
+                Codes.UNKNOWN,
+            )
+
+        if b"Content-Type" in headers:
+            media_type = headers[b"Content-Type"][0].decode("ascii")
+        else:
+            media_type = "application/octet-stream"
+
+        download_name = get_filename_from_headers(headers)
+
+        # FIXME: we should calculate a proper expiration based on the
+        # Cache-Control and Expire headers.  But for now, assume 1 hour.
+        expires = ONE_HOUR
+        etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None
+
+        return DownloadResult(
+            length, uri, code, media_type, download_name, expires, etag
+        )
+
+    async def _parse_data_url(
+        self, url: str, output_stream: BinaryIO
+    ) -> DownloadResult:
+        """
+        Parses a data: URL.
+
+        Args:
+             url: The URL to parse.
+             output_stream: The stream to write the content to.
+
+        Returns:
+            A tuple of:
+                Media length, URL downloaded, the HTTP response code,
+                the media type, the downloaded file name, the number of
+                milliseconds the result is valid for, the etag header.
+        """
+
+        try:
+            logger.debug("Trying to parse data url '%s'", url)
+            with urlopen(url) as url_info:
+                # TODO Can this be more efficient.
+                output_stream.write(url_info.read())
+        except Exception as e:
+            logger.warning("Error parsing data: URL %s: %r", url, e)
+
+            raise SynapseError(
+                500,
+                "Failed to parse data URL: %s"
+                % (traceback.format_exception_only(sys.exc_info()[0], e),),
+                Codes.UNKNOWN,
+            )
+
+        return DownloadResult(
+            # Read back the length that has been written.
+            length=output_stream.tell(),
+            uri=url,
+            # If it was parsed, consider this a 200 OK.
+            response_code=200,
+            # urlopen shoves the media-type from the data URL into the content type
+            # header object.
+            media_type=url_info.headers.get_content_type(),
+            # Some features are not supported by data: URLs.
+            download_name=None,
+            expires=ONE_HOUR,
+            etag=None,
+        )
+
+    async def _handle_url(
+        self, url: str, user: UserID, allow_data_urls: bool = False
+    ) -> MediaInfo:
+        """
+        Fetches content from a URL and parses the result to generate a MediaInfo.
+
+        It uses the media storage provider to persist the fetched content and
+        stores the mapping into the database.
+
+        Args:
+             url: The URL to fetch.
+             user: The user who ahs requested this URL.
+             allow_data_urls: True if data URLs should be allowed.
+
+        Returns:
+            A MediaInfo object describing the fetched content.
+        """
+
+        # TODO: we should probably honour robots.txt... except in practice
+        # we're most likely being explicitly triggered by a human rather than a
+        # bot, so are we really a robot?
+
+        file_id = datetime.date.today().isoformat() + "_" + random_string(16)
+
+        file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
+
+        with self.media_storage.store_into_file(file_info) as (f, fname, finish):
+            if url.startswith("data:"):
+                if not allow_data_urls:
+                    raise SynapseError(
+                        500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN
+                    )
+
+                download_result = await self._parse_data_url(url, f)
+            else:
+                download_result = await self._download_url(url, f)
+
+            await finish()
+
+        try:
+            time_now_ms = self.clock.time_msec()
+
+            await self.store.store_local_media(
+                media_id=file_id,
+                media_type=download_result.media_type,
+                time_now_ms=time_now_ms,
+                upload_name=download_result.download_name,
+                media_length=download_result.length,
+                user_id=user,
+                url_cache=url,
+            )
+
+        except Exception as e:
+            logger.error("Error handling downloaded %s: %r", url, e)
+            # TODO: we really ought to delete the downloaded file in this
+            # case, since we won't have recorded it in the db, and will
+            # therefore not expire it.
+            raise
+
+        return MediaInfo(
+            media_type=download_result.media_type,
+            media_length=download_result.length,
+            download_name=download_result.download_name,
+            created_ts_ms=time_now_ms,
+            filesystem_id=file_id,
+            filename=fname,
+            uri=download_result.uri,
+            response_code=download_result.response_code,
+            expires=download_result.expires,
+            etag=download_result.etag,
+        )
+
+    async def _precache_image_url(
+        self, user: UserID, media_info: MediaInfo, og: JsonDict
+    ) -> None:
+        """
+        Pre-cache the image (if one exists) for posterity
+
+        Args:
+            user: The user requesting the preview.
+            media_info: The media being previewed.
+            og: The Open Graph dictionary. This is modified with image information.
+        """
+        # If there's no image or it is blank, there's nothing to do.
+        if "og:image" not in og:
+            return
+
+        # Remove the raw image URL, this will be replaced with an MXC URL, if successful.
+        image_url = og.pop("og:image")
+        if not image_url:
+            return
+
+        # The image URL from the HTML might be relative to the previewed page,
+        # convert it to an URL which can be requested directly.
+        url_parts = urlparse(image_url)
+        if url_parts.scheme != "data":
+            image_url = urljoin(media_info.uri, image_url)
+
+        # FIXME: it might be cleaner to use the same flow as the main /preview_url
+        # request itself and benefit from the same caching etc.  But for now we
+        # just rely on the caching on the master request to speed things up.
+        try:
+            image_info = await self._handle_url(image_url, user, allow_data_urls=True)
+        except Exception as e:
+            # Pre-caching the image failed, don't block the entire URL preview.
+            logger.warning(
+                "Pre-caching image failed during URL preview: %s errored with %s",
+                image_url,
+                e,
+            )
+            return
+
+        if _is_media(image_info.media_type):
+            # TODO: make sure we don't choke on white-on-transparent images
+            file_id = image_info.filesystem_id
+            dims = await self.media_repo._generate_thumbnails(
+                None, file_id, file_id, image_info.media_type, url_cache=True
+            )
+            if dims:
+                og["og:image:width"] = dims["width"]
+                og["og:image:height"] = dims["height"]
+            else:
+                logger.warning("Couldn't get dims for %s", image_url)
+
+            og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
+            og["og:image:type"] = image_info.media_type
+            og["matrix:image:size"] = image_info.media_length
+
+    async def _handle_oembed_response(
+        self, url: str, media_info: MediaInfo, expiration_ms: int
+    ) -> Tuple[JsonDict, Optional[str], int]:
+        """
+        Parse the downloaded oEmbed info.
+
+        Args:
+            url: The URL which is being previewed (not the one which was
+                requested).
+            media_info: The media being previewed.
+            expiration_ms: The length of time, in milliseconds, the media is valid for.
+
+        Returns:
+            A tuple of:
+                The Open Graph dictionary, if the oEmbed info can be parsed.
+                The author name if it could be retrieved from oEmbed.
+                The (possibly updated) length of time, in milliseconds, the media is valid for.
+        """
+        # If JSON was not returned, there's nothing to do.
+        if not _is_json(media_info.media_type):
+            return {}, None, expiration_ms
+
+        with open(media_info.filename, "rb") as file:
+            body = file.read()
+
+        oembed_response = self._oembed.parse_oembed_response(url, body)
+        open_graph_result = oembed_response.open_graph_result
+
+        # Use the cache age from the oEmbed result, if one was given.
+        if open_graph_result and oembed_response.cache_age is not None:
+            expiration_ms = oembed_response.cache_age
+
+        return open_graph_result, oembed_response.author_name, expiration_ms
+
+    def _start_expire_url_cache_data(self) -> Deferred:
+        return run_as_background_process(
+            "expire_url_cache_data", self._expire_url_cache_data
+        )
+
+    async def _expire_url_cache_data(self) -> None:
+        """Clean up expired url cache content, media and thumbnails."""
+
+        assert self._worker_run_media_background_jobs
+
+        now = self.clock.time_msec()
+
+        logger.debug("Running url preview cache expiry")
+
+        def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
+            """Attempt to remove the given chain of parent directories
+
+            Args:
+                dirs: The list of directory paths to delete, with children appearing
+                    before their parents.
+            """
+            for dir in dirs:
+                try:
+                    os.rmdir(dir)
+                except FileNotFoundError:
+                    # Already deleted, continue with deleting the rest
+                    pass
+                except OSError as e:
+                    # Failed, skip deleting the rest of the parent dirs
+                    if e.errno != errno.ENOTEMPTY:
+                        logger.warning(
+                            "Failed to remove media directory while clearing url preview cache: %r: %s",
+                            dir,
+                            e,
+                        )
+                    break
+
+        # First we delete expired url cache entries
+        media_ids = await self.store.get_expired_url_cache(now)
+
+        removed_media = []
+        for media_id in media_ids:
+            fname = self.filepaths.url_cache_filepath(media_id)
+            try:
+                os.remove(fname)
+            except FileNotFoundError:
+                pass  # If the path doesn't exist, meh
+            except OSError as e:
+                logger.warning(
+                    "Failed to remove media while clearing url preview cache: %r: %s",
+                    media_id,
+                    e,
+                )
+                continue
+
+            removed_media.append(media_id)
+
+            dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
+            try_remove_parent_dirs(dirs)
+
+        await self.store.delete_url_cache(removed_media)
+
+        if removed_media:
+            logger.debug(
+                "Deleted %d entries from url preview cache", len(removed_media)
+            )
+        else:
+            logger.debug("No entries removed from url preview cache")
+
+        # Now we delete old images associated with the url cache.
+        # These may be cached for a bit on the client (i.e., they
+        # may have a room open with a preview url thing open).
+        # So we wait a couple of days before deleting, just in case.
+        expire_before = now - IMAGE_CACHE_EXPIRY_MS
+        media_ids = await self.store.get_url_cache_media_before(expire_before)
+
+        removed_media = []
+        for media_id in media_ids:
+            fname = self.filepaths.url_cache_filepath(media_id)
+            try:
+                os.remove(fname)
+            except FileNotFoundError:
+                pass  # If the path doesn't exist, meh
+            except OSError as e:
+                logger.warning(
+                    "Failed to remove media from url preview cache: %r: %s", media_id, e
+                )
+                continue
+
+            dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
+            try_remove_parent_dirs(dirs)
+
+            thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
+            try:
+                shutil.rmtree(thumbnail_dir)
+            except FileNotFoundError:
+                pass  # If the path doesn't exist, meh
+            except OSError as e:
+                logger.warning(
+                    "Failed to remove media from url preview cache: %r: %s", media_id, e
+                )
+                continue
+
+            removed_media.append(media_id)
+
+            dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
+            # Note that one of the directories to be deleted has already been
+            # removed by the `rmtree` above.
+            try_remove_parent_dirs(dirs)
+
+        await self.store.delete_url_cache_media(removed_media)
+
+        if removed_media:
+            logger.debug("Deleted %d media from url preview cache", len(removed_media))
+        else:
+            logger.debug("No media removed from url preview cache")
+
+
+def _is_media(content_type: str) -> bool:
+    return content_type.lower().startswith("image/")
+
+
+def _is_html(content_type: str) -> bool:
+    content_type = content_type.lower()
+    return content_type.startswith("text/html") or content_type.startswith(
+        "application/xhtml"
+    )
+
+
+def _is_json(content_type: str) -> bool:
+    return content_type.lower().startswith("application/json")
+
+
+def _is_previewable(content_type: str) -> bool:
+    """Returns True for content types for which we will perform URL preview and False
+    otherwise."""
+
+    return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
diff --git a/synapse/rest/media/preview_url_resource.py b/synapse/rest/media/preview_url_resource.py
index 7ada728757..58513c4be4 100644
--- a/synapse/rest/media/preview_url_resource.py
+++ b/synapse/rest/media/preview_url_resource.py
@@ -12,26 +12,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import datetime
-import errno
-import fnmatch
-import logging
-import os
-import re
-import shutil
-import sys
-import traceback
-from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, Tuple
-from urllib.parse import urljoin, urlparse, urlsplit
-from urllib.request import urlopen
 
-import attr
+from typing import TYPE_CHECKING
 
-from twisted.internet.defer import Deferred
-from twisted.internet.error import DNSLookupError
-
-from synapse.api.errors import Codes, SynapseError
-from synapse.http.client import SimpleHttpClient
 from synapse.http.server import (
     DirectServeJsonResource,
     respond_with_json,
@@ -39,71 +22,13 @@ from synapse.http.server import (
 )
 from synapse.http.servlet import parse_integer, parse_string
 from synapse.http.site import SynapseRequest
-from synapse.logging.context import make_deferred_yieldable, run_in_background
-from synapse.media._base import FileInfo, get_filename_from_headers
 from synapse.media.media_storage import MediaStorage
-from synapse.media.oembed import OEmbedProvider
-from synapse.media.preview_html import decode_body, parse_html_to_open_graph
-from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.types import JsonDict, UserID
-from synapse.util import json_encoder
-from synapse.util.async_helpers import ObservableDeferred
-from synapse.util.caches.expiringcache import ExpiringCache
-from synapse.util.stringutils import random_string
+from synapse.media.url_previewer import UrlPreviewer
 
 if TYPE_CHECKING:
     from synapse.media.media_repository import MediaRepository
     from synapse.server import HomeServer
 
-logger = logging.getLogger(__name__)
-
-OG_TAG_NAME_MAXLEN = 50
-OG_TAG_VALUE_MAXLEN = 1000
-
-ONE_HOUR = 60 * 60 * 1000
-ONE_DAY = 24 * ONE_HOUR
-IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class DownloadResult:
-    length: int
-    uri: str
-    response_code: int
-    media_type: str
-    download_name: Optional[str]
-    expires: int
-    etag: Optional[str]
-
-
-@attr.s(slots=True, frozen=True, auto_attribs=True)
-class MediaInfo:
-    """
-    Information parsed from downloading media being previewed.
-    """
-
-    # The Content-Type header of the response.
-    media_type: str
-    # The length (in bytes) of the downloaded media.
-    media_length: int
-    # The media filename, according to the server. This is parsed from the
-    # returned headers, if possible.
-    download_name: Optional[str]
-    # The time of the preview.
-    created_ts_ms: int
-    # Information from the media storage provider about where the file is stored
-    # on disk.
-    filesystem_id: str
-    filename: str
-    # The URI being previewed.
-    uri: str
-    # The HTTP response code.
-    response_code: int
-    # The timestamp (in milliseconds) of when this preview expires.
-    expires: int
-    # The ETag header of the response.
-    etag: Optional[str]
-
 
 class PreviewUrlResource(DirectServeJsonResource):
     """
@@ -121,54 +46,6 @@ class PreviewUrlResource(DirectServeJsonResource):
       * The URL metadata must be stored somewhere, rather than just using Matrix
         itself to store the media.
       * Matrix cannot be used to distribute the metadata between homeservers.
-
-    When Synapse is asked to preview a URL it does the following:
-
-    1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
-       config).
-    2. Checks the URL against an in-memory cache and returns the result if it exists. (This
-       is also used to de-duplicate processing of multiple in-flight requests at once.)
-    3. Kicks off a background process to generate a preview:
-       1. Checks URL and timestamp against the database cache and returns the result if it
-          has not expired and was successful (a 2xx return code).
-       2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
-          does, update the URL to download.
-       3. Downloads the URL and stores it into a file via the media storage provider
-          and saves the local media metadata.
-       4. If the media is an image:
-          1. Generates thumbnails.
-          2. Generates an Open Graph response based on image properties.
-       5. If the media is HTML:
-          1. Decodes the HTML via the stored file.
-          2. Generates an Open Graph response from the HTML.
-          3. If a JSON oEmbed URL was found in the HTML via autodiscovery:
-             1. Downloads the URL and stores it into a file via the media storage provider
-                and saves the local media metadata.
-             2. Convert the oEmbed response to an Open Graph response.
-             3. Override any Open Graph data from the HTML with data from oEmbed.
-          4. If an image exists in the Open Graph response:
-             1. Downloads the URL and stores it into a file via the media storage
-                provider and saves the local media metadata.
-             2. Generates thumbnails.
-             3. Updates the Open Graph response based on image properties.
-       6. If the media is JSON and an oEmbed URL was found:
-          1. Convert the oEmbed response to an Open Graph response.
-          2. If a thumbnail or image is in the oEmbed response:
-             1. Downloads the URL and stores it into a file via the media storage
-                provider and saves the local media metadata.
-             2. Generates thumbnails.
-             3. Updates the Open Graph response based on image properties.
-       7. Stores the result in the database cache.
-    4. Returns the result.
-
-    If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
-    image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
-    does not fail. As much information as possible is returned.
-
-    The in-memory cache expires after 1 hour.
-
-    Expired entries in the database cache (and their associated media files) are
-    deleted every 10 seconds. The default expiration time is 1 hour from download.
     """
 
     isLeaf = True
@@ -183,48 +60,10 @@ class PreviewUrlResource(DirectServeJsonResource):
 
         self.auth = hs.get_auth()
         self.clock = hs.get_clock()
-        self.filepaths = media_repo.filepaths
-        self.max_spider_size = hs.config.media.max_spider_size
-        self.server_name = hs.hostname
-        self.store = hs.get_datastores().main
-        self.client = SimpleHttpClient(
-            hs,
-            treq_args={"browser_like_redirects": True},
-            ip_whitelist=hs.config.media.url_preview_ip_range_whitelist,
-            ip_blacklist=hs.config.media.url_preview_ip_range_blacklist,
-            use_proxy=True,
-        )
         self.media_repo = media_repo
-        self.primary_base_path = media_repo.primary_base_path
         self.media_storage = media_storage
 
-        self._oembed = OEmbedProvider(hs)
-
-        # We run the background jobs if we're the instance specified (or no
-        # instance is specified, where we assume there is only one instance
-        # serving media).
-        instance_running_jobs = hs.config.media.media_instance_running_background_jobs
-        self._worker_run_media_background_jobs = (
-            instance_running_jobs is None
-            or instance_running_jobs == hs.get_instance_name()
-        )
-
-        self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist
-        self.url_preview_accept_language = hs.config.media.url_preview_accept_language
-
-        # memory cache mapping urls to an ObservableDeferred returning
-        # JSON-encoded OG metadata
-        self._cache: ExpiringCache[str, ObservableDeferred] = ExpiringCache(
-            cache_name="url_previews",
-            clock=self.clock,
-            # don't spider URLs more often than once an hour
-            expiry_ms=ONE_HOUR,
-        )
-
-        if self._worker_run_media_background_jobs:
-            self._cleaner_loop = self.clock.looping_call(
-                self._start_expire_url_cache_data, 10 * 1000
-            )
+        self._url_previewer = UrlPreviewer(hs, media_repo, media_storage)
 
     async def _async_render_OPTIONS(self, request: SynapseRequest) -> None:
         request.setHeader(b"Allow", b"OPTIONS, GET")
@@ -238,632 +77,5 @@ class PreviewUrlResource(DirectServeJsonResource):
         if ts is None:
             ts = self.clock.time_msec()
 
-        # XXX: we could move this into _do_preview if we wanted.
-        url_tuple = urlsplit(url)
-        for entry in self.url_preview_url_blacklist:
-            match = True
-            for attrib in entry:
-                pattern = entry[attrib]
-                value = getattr(url_tuple, attrib)
-                logger.debug(
-                    "Matching attrib '%s' with value '%s' against pattern '%s'",
-                    attrib,
-                    value,
-                    pattern,
-                )
-
-                if value is None:
-                    match = False
-                    continue
-
-                # Some attributes might not be parsed as strings by urlsplit (such as the
-                # port, which is parsed as an int). Because we use match functions that
-                # expect strings, we want to make sure that's what we give them.
-                value_str = str(value)
-
-                if pattern.startswith("^"):
-                    if not re.match(pattern, value_str):
-                        match = False
-                        continue
-                else:
-                    if not fnmatch.fnmatch(value_str, pattern):
-                        match = False
-                        continue
-            if match:
-                logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
-                raise SynapseError(
-                    403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
-                )
-
-        # the in-memory cache:
-        # * ensures that only one request is active at a time
-        # * takes load off the DB for the thundering herds
-        # * also caches any failures (unlike the DB) so we don't keep
-        #    requesting the same endpoint
-
-        observable = self._cache.get(url)
-
-        if not observable:
-            download = run_in_background(self._do_preview, url, requester.user, ts)
-            observable = ObservableDeferred(download, consumeErrors=True)
-            self._cache[url] = observable
-        else:
-            logger.info("Returning cached response")
-
-        og = await make_deferred_yieldable(observable.observe())
+        og = await self._url_previewer.preview(url, requester.user, ts)
         respond_with_json_bytes(request, 200, og, send_cors=True)
-
-    async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
-        """Check the db, and download the URL and build a preview
-
-        Args:
-            url: The URL to preview.
-            user: The user requesting the preview.
-            ts: The timestamp requested for the preview.
-
-        Returns:
-            json-encoded og data
-        """
-        # check the URL cache in the DB (which will also provide us with
-        # historical previews, if we have any)
-        cache_result = await self.store.get_url_cache(url, ts)
-        if (
-            cache_result
-            and cache_result["expires_ts"] > ts
-            and cache_result["response_code"] / 100 == 2
-        ):
-            # It may be stored as text in the database, not as bytes (such as
-            # PostgreSQL). If so, encode it back before handing it on.
-            og = cache_result["og"]
-            if isinstance(og, str):
-                og = og.encode("utf8")
-            return og
-
-        # If this URL can be accessed via oEmbed, use that instead.
-        url_to_download = url
-        oembed_url = self._oembed.get_oembed_url(url)
-        if oembed_url:
-            url_to_download = oembed_url
-
-        media_info = await self._handle_url(url_to_download, user)
-
-        logger.debug("got media_info of '%s'", media_info)
-
-        # The number of milliseconds that the response should be considered valid.
-        expiration_ms = media_info.expires
-        author_name: Optional[str] = None
-
-        if _is_media(media_info.media_type):
-            file_id = media_info.filesystem_id
-            dims = await self.media_repo._generate_thumbnails(
-                None, file_id, file_id, media_info.media_type, url_cache=True
-            )
-
-            og = {
-                "og:description": media_info.download_name,
-                "og:image": f"mxc://{self.server_name}/{media_info.filesystem_id}",
-                "og:image:type": media_info.media_type,
-                "matrix:image:size": media_info.media_length,
-            }
-
-            if dims:
-                og["og:image:width"] = dims["width"]
-                og["og:image:height"] = dims["height"]
-            else:
-                logger.warning("Couldn't get dims for %s" % url)
-
-            # define our OG response for this media
-        elif _is_html(media_info.media_type):
-            # TODO: somehow stop a big HTML tree from exploding synapse's RAM
-
-            with open(media_info.filename, "rb") as file:
-                body = file.read()
-
-            tree = decode_body(body, media_info.uri, media_info.media_type)
-            if tree is not None:
-                # Check if this HTML document points to oEmbed information and
-                # defer to that.
-                oembed_url = self._oembed.autodiscover_from_html(tree)
-                og_from_oembed: JsonDict = {}
-                if oembed_url:
-                    try:
-                        oembed_info = await self._handle_url(
-                            oembed_url, user, allow_data_urls=True
-                        )
-                    except Exception as e:
-                        # Fetching the oEmbed info failed, don't block the entire URL preview.
-                        logger.warning(
-                            "oEmbed fetch failed during URL preview: %s errored with %s",
-                            oembed_url,
-                            e,
-                        )
-                    else:
-                        (
-                            og_from_oembed,
-                            author_name,
-                            expiration_ms,
-                        ) = await self._handle_oembed_response(
-                            url, oembed_info, expiration_ms
-                        )
-
-                # Parse Open Graph information from the HTML in case the oEmbed
-                # response failed or is incomplete.
-                og_from_html = parse_html_to_open_graph(tree)
-
-                # Compile the Open Graph response by using the scraped
-                # information from the HTML and overlaying any information
-                # from the oEmbed response.
-                og = {**og_from_html, **og_from_oembed}
-
-                await self._precache_image_url(user, media_info, og)
-            else:
-                og = {}
-
-        elif oembed_url:
-            # Handle the oEmbed information.
-            og, author_name, expiration_ms = await self._handle_oembed_response(
-                url, media_info, expiration_ms
-            )
-            await self._precache_image_url(user, media_info, og)
-
-        else:
-            logger.warning("Failed to find any OG data in %s", url)
-            og = {}
-
-        # If we don't have a title but we have author_name, copy it as
-        # title
-        if not og.get("og:title") and author_name:
-            og["og:title"] = author_name
-
-        # filter out any stupidly long values
-        keys_to_remove = []
-        for k, v in og.items():
-            # values can be numeric as well as strings, hence the cast to str
-            if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN:
-                logger.warning(
-                    "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
-                )
-                keys_to_remove.append(k)
-        for k in keys_to_remove:
-            del og[k]
-
-        logger.debug("Calculated OG for %s as %s", url, og)
-
-        jsonog = json_encoder.encode(og)
-
-        # Cap the amount of time to consider a response valid.
-        expiration_ms = min(expiration_ms, ONE_DAY)
-
-        # store OG in history-aware DB cache
-        await self.store.store_url_cache(
-            url,
-            media_info.response_code,
-            media_info.etag,
-            media_info.created_ts_ms + expiration_ms,
-            jsonog,
-            media_info.filesystem_id,
-            media_info.created_ts_ms,
-        )
-
-        return jsonog.encode("utf8")
-
-    async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
-        """
-        Fetches a remote URL and parses the headers.
-
-        Args:
-             url: The URL to fetch.
-             output_stream: The stream to write the content to.
-
-        Returns:
-            A tuple of:
-                Media length, URL downloaded, the HTTP response code,
-                the media type, the downloaded file name, the number of
-                milliseconds the result is valid for, the etag header.
-        """
-
-        try:
-            logger.debug("Trying to get preview for url '%s'", url)
-            length, headers, uri, code = await self.client.get_file(
-                url,
-                output_stream=output_stream,
-                max_size=self.max_spider_size,
-                headers={
-                    b"Accept-Language": self.url_preview_accept_language,
-                    # Use a custom user agent for the preview because some sites will only return
-                    # Open Graph metadata to crawler user agents. Omit the Synapse version
-                    # string to avoid leaking information.
-                    b"User-Agent": [
-                        "Synapse (bot; +https://github.com/matrix-org/synapse)"
-                    ],
-                },
-                is_allowed_content_type=_is_previewable,
-            )
-        except SynapseError:
-            # Pass SynapseErrors through directly, so that the servlet
-            # handler will return a SynapseError to the client instead of
-            # blank data or a 500.
-            raise
-        except DNSLookupError:
-            # DNS lookup returned no results
-            # Note: This will also be the case if one of the resolved IP
-            # addresses is blacklisted
-            raise SynapseError(
-                502,
-                "DNS resolution failure during URL preview generation",
-                Codes.UNKNOWN,
-            )
-        except Exception as e:
-            # FIXME: pass through 404s and other error messages nicely
-            logger.warning("Error downloading %s: %r", url, e)
-
-            raise SynapseError(
-                500,
-                "Failed to download content: %s"
-                % (traceback.format_exception_only(sys.exc_info()[0], e),),
-                Codes.UNKNOWN,
-            )
-
-        if b"Content-Type" in headers:
-            media_type = headers[b"Content-Type"][0].decode("ascii")
-        else:
-            media_type = "application/octet-stream"
-
-        download_name = get_filename_from_headers(headers)
-
-        # FIXME: we should calculate a proper expiration based on the
-        # Cache-Control and Expire headers.  But for now, assume 1 hour.
-        expires = ONE_HOUR
-        etag = headers[b"ETag"][0].decode("ascii") if b"ETag" in headers else None
-
-        return DownloadResult(
-            length, uri, code, media_type, download_name, expires, etag
-        )
-
-    async def _parse_data_url(
-        self, url: str, output_stream: BinaryIO
-    ) -> DownloadResult:
-        """
-        Parses a data: URL.
-
-        Args:
-             url: The URL to parse.
-             output_stream: The stream to write the content to.
-
-        Returns:
-            A tuple of:
-                Media length, URL downloaded, the HTTP response code,
-                the media type, the downloaded file name, the number of
-                milliseconds the result is valid for, the etag header.
-        """
-
-        try:
-            logger.debug("Trying to parse data url '%s'", url)
-            with urlopen(url) as url_info:
-                # TODO Can this be more efficient.
-                output_stream.write(url_info.read())
-        except Exception as e:
-            logger.warning("Error parsing data: URL %s: %r", url, e)
-
-            raise SynapseError(
-                500,
-                "Failed to parse data URL: %s"
-                % (traceback.format_exception_only(sys.exc_info()[0], e),),
-                Codes.UNKNOWN,
-            )
-
-        return DownloadResult(
-            # Read back the length that has been written.
-            length=output_stream.tell(),
-            uri=url,
-            # If it was parsed, consider this a 200 OK.
-            response_code=200,
-            # urlopen shoves the media-type from the data URL into the content type
-            # header object.
-            media_type=url_info.headers.get_content_type(),
-            # Some features are not supported by data: URLs.
-            download_name=None,
-            expires=ONE_HOUR,
-            etag=None,
-        )
-
-    async def _handle_url(
-        self, url: str, user: UserID, allow_data_urls: bool = False
-    ) -> MediaInfo:
-        """
-        Fetches content from a URL and parses the result to generate a MediaInfo.
-
-        It uses the media storage provider to persist the fetched content and
-        stores the mapping into the database.
-
-        Args:
-             url: The URL to fetch.
-             user: The user who ahs requested this URL.
-             allow_data_urls: True if data URLs should be allowed.
-
-        Returns:
-            A MediaInfo object describing the fetched content.
-        """
-
-        # TODO: we should probably honour robots.txt... except in practice
-        # we're most likely being explicitly triggered by a human rather than a
-        # bot, so are we really a robot?
-
-        file_id = datetime.date.today().isoformat() + "_" + random_string(16)
-
-        file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True)
-
-        with self.media_storage.store_into_file(file_info) as (f, fname, finish):
-            if url.startswith("data:"):
-                if not allow_data_urls:
-                    raise SynapseError(
-                        500, "Previewing of data: URLs is forbidden", Codes.UNKNOWN
-                    )
-
-                download_result = await self._parse_data_url(url, f)
-            else:
-                download_result = await self._download_url(url, f)
-
-            await finish()
-
-        try:
-            time_now_ms = self.clock.time_msec()
-
-            await self.store.store_local_media(
-                media_id=file_id,
-                media_type=download_result.media_type,
-                time_now_ms=time_now_ms,
-                upload_name=download_result.download_name,
-                media_length=download_result.length,
-                user_id=user,
-                url_cache=url,
-            )
-
-        except Exception as e:
-            logger.error("Error handling downloaded %s: %r", url, e)
-            # TODO: we really ought to delete the downloaded file in this
-            # case, since we won't have recorded it in the db, and will
-            # therefore not expire it.
-            raise
-
-        return MediaInfo(
-            media_type=download_result.media_type,
-            media_length=download_result.length,
-            download_name=download_result.download_name,
-            created_ts_ms=time_now_ms,
-            filesystem_id=file_id,
-            filename=fname,
-            uri=download_result.uri,
-            response_code=download_result.response_code,
-            expires=download_result.expires,
-            etag=download_result.etag,
-        )
-
-    async def _precache_image_url(
-        self, user: UserID, media_info: MediaInfo, og: JsonDict
-    ) -> None:
-        """
-        Pre-cache the image (if one exists) for posterity
-
-        Args:
-            user: The user requesting the preview.
-            media_info: The media being previewed.
-            og: The Open Graph dictionary. This is modified with image information.
-        """
-        # If there's no image or it is blank, there's nothing to do.
-        if "og:image" not in og:
-            return
-
-        # Remove the raw image URL, this will be replaced with an MXC URL, if successful.
-        image_url = og.pop("og:image")
-        if not image_url:
-            return
-
-        # The image URL from the HTML might be relative to the previewed page,
-        # convert it to an URL which can be requested directly.
-        url_parts = urlparse(image_url)
-        if url_parts.scheme != "data":
-            image_url = urljoin(media_info.uri, image_url)
-
-        # FIXME: it might be cleaner to use the same flow as the main /preview_url
-        # request itself and benefit from the same caching etc.  But for now we
-        # just rely on the caching on the master request to speed things up.
-        try:
-            image_info = await self._handle_url(image_url, user, allow_data_urls=True)
-        except Exception as e:
-            # Pre-caching the image failed, don't block the entire URL preview.
-            logger.warning(
-                "Pre-caching image failed during URL preview: %s errored with %s",
-                image_url,
-                e,
-            )
-            return
-
-        if _is_media(image_info.media_type):
-            # TODO: make sure we don't choke on white-on-transparent images
-            file_id = image_info.filesystem_id
-            dims = await self.media_repo._generate_thumbnails(
-                None, file_id, file_id, image_info.media_type, url_cache=True
-            )
-            if dims:
-                og["og:image:width"] = dims["width"]
-                og["og:image:height"] = dims["height"]
-            else:
-                logger.warning("Couldn't get dims for %s", image_url)
-
-            og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
-            og["og:image:type"] = image_info.media_type
-            og["matrix:image:size"] = image_info.media_length
-
-    async def _handle_oembed_response(
-        self, url: str, media_info: MediaInfo, expiration_ms: int
-    ) -> Tuple[JsonDict, Optional[str], int]:
-        """
-        Parse the downloaded oEmbed info.
-
-        Args:
-            url: The URL which is being previewed (not the one which was
-                requested).
-            media_info: The media being previewed.
-            expiration_ms: The length of time, in milliseconds, the media is valid for.
-
-        Returns:
-            A tuple of:
-                The Open Graph dictionary, if the oEmbed info can be parsed.
-                The author name if it could be retrieved from oEmbed.
-                The (possibly updated) length of time, in milliseconds, the media is valid for.
-        """
-        # If JSON was not returned, there's nothing to do.
-        if not _is_json(media_info.media_type):
-            return {}, None, expiration_ms
-
-        with open(media_info.filename, "rb") as file:
-            body = file.read()
-
-        oembed_response = self._oembed.parse_oembed_response(url, body)
-        open_graph_result = oembed_response.open_graph_result
-
-        # Use the cache age from the oEmbed result, if one was given.
-        if open_graph_result and oembed_response.cache_age is not None:
-            expiration_ms = oembed_response.cache_age
-
-        return open_graph_result, oembed_response.author_name, expiration_ms
-
-    def _start_expire_url_cache_data(self) -> Deferred:
-        return run_as_background_process(
-            "expire_url_cache_data", self._expire_url_cache_data
-        )
-
-    async def _expire_url_cache_data(self) -> None:
-        """Clean up expired url cache content, media and thumbnails."""
-
-        assert self._worker_run_media_background_jobs
-
-        now = self.clock.time_msec()
-
-        logger.debug("Running url preview cache expiry")
-
-        def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
-            """Attempt to remove the given chain of parent directories
-
-            Args:
-                dirs: The list of directory paths to delete, with children appearing
-                    before their parents.
-            """
-            for dir in dirs:
-                try:
-                    os.rmdir(dir)
-                except FileNotFoundError:
-                    # Already deleted, continue with deleting the rest
-                    pass
-                except OSError as e:
-                    # Failed, skip deleting the rest of the parent dirs
-                    if e.errno != errno.ENOTEMPTY:
-                        logger.warning(
-                            "Failed to remove media directory while clearing url preview cache: %r: %s",
-                            dir,
-                            e,
-                        )
-                    break
-
-        # First we delete expired url cache entries
-        media_ids = await self.store.get_expired_url_cache(now)
-
-        removed_media = []
-        for media_id in media_ids:
-            fname = self.filepaths.url_cache_filepath(media_id)
-            try:
-                os.remove(fname)
-            except FileNotFoundError:
-                pass  # If the path doesn't exist, meh
-            except OSError as e:
-                logger.warning(
-                    "Failed to remove media while clearing url preview cache: %r: %s",
-                    media_id,
-                    e,
-                )
-                continue
-
-            removed_media.append(media_id)
-
-            dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
-            try_remove_parent_dirs(dirs)
-
-        await self.store.delete_url_cache(removed_media)
-
-        if removed_media:
-            logger.debug(
-                "Deleted %d entries from url preview cache", len(removed_media)
-            )
-        else:
-            logger.debug("No entries removed from url preview cache")
-
-        # Now we delete old images associated with the url cache.
-        # These may be cached for a bit on the client (i.e., they
-        # may have a room open with a preview url thing open).
-        # So we wait a couple of days before deleting, just in case.
-        expire_before = now - IMAGE_CACHE_EXPIRY_MS
-        media_ids = await self.store.get_url_cache_media_before(expire_before)
-
-        removed_media = []
-        for media_id in media_ids:
-            fname = self.filepaths.url_cache_filepath(media_id)
-            try:
-                os.remove(fname)
-            except FileNotFoundError:
-                pass  # If the path doesn't exist, meh
-            except OSError as e:
-                logger.warning(
-                    "Failed to remove media from url preview cache: %r: %s", media_id, e
-                )
-                continue
-
-            dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
-            try_remove_parent_dirs(dirs)
-
-            thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
-            try:
-                shutil.rmtree(thumbnail_dir)
-            except FileNotFoundError:
-                pass  # If the path doesn't exist, meh
-            except OSError as e:
-                logger.warning(
-                    "Failed to remove media from url preview cache: %r: %s", media_id, e
-                )
-                continue
-
-            removed_media.append(media_id)
-
-            dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
-            # Note that one of the directories to be deleted has already been
-            # removed by the `rmtree` above.
-            try_remove_parent_dirs(dirs)
-
-        await self.store.delete_url_cache_media(removed_media)
-
-        if removed_media:
-            logger.debug("Deleted %d media from url preview cache", len(removed_media))
-        else:
-            logger.debug("No media removed from url preview cache")
-
-
-def _is_media(content_type: str) -> bool:
-    return content_type.lower().startswith("image/")
-
-
-def _is_html(content_type: str) -> bool:
-    content_type = content_type.lower()
-    return content_type.startswith("text/html") or content_type.startswith(
-        "application/xhtml"
-    )
-
-
-def _is_json(content_type: str) -> bool:
-    return content_type.lower().startswith("application/json")
-
-
-def _is_previewable(content_type: str) -> bool:
-    """Returns True for content types for which we will perform URL preview and False
-    otherwise."""
-
-    return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
diff --git a/tests/rest/media/test_url_preview.py b/tests/rest/media/test_url_preview.py
index e91dc581c2..e44beae8c1 100644
--- a/tests/rest/media/test_url_preview.py
+++ b/tests/rest/media/test_url_preview.py
@@ -26,8 +26,8 @@ from twisted.internet.interfaces import IAddress, IResolutionReceiver
 from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactor
 
 from synapse.config.oembed import OEmbedEndpointConfig
+from synapse.media.url_previewer import IMAGE_CACHE_EXPIRY_MS
 from synapse.rest.media.media_repository_resource import MediaRepositoryResource
-from synapse.rest.media.preview_url_resource import IMAGE_CACHE_EXPIRY_MS
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
@@ -36,7 +36,6 @@ from synapse.util.stringutils import parse_and_validate_mxc_uri
 from tests import unittest
 from tests.server import FakeTransport
 from tests.test_utils import SMALL_PNG
-from tests.utils import MockClock
 
 try:
     import lxml
@@ -117,8 +116,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.media_repo = hs.get_media_repository_resource()
-        self.preview_url = self.media_repo.children[b"preview_url"]
+        self.media_repo = hs.get_media_repository()
+        media_repo_resource = hs.get_media_repository_resource()
+        self.preview_url = media_repo_resource.children[b"preview_url"]
 
         self.lookups: Dict[str, Any] = {}
 
@@ -193,9 +193,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         )
 
         # Clear the in-memory cache
-        self.assertIn("http://matrix.org", self.preview_url._cache)
-        self.preview_url._cache.pop("http://matrix.org")
-        self.assertNotIn("http://matrix.org", self.preview_url._cache)
+        self.assertIn("http://matrix.org", self.preview_url._url_previewer._cache)
+        self.preview_url._url_previewer._cache.pop("http://matrix.org")
+        self.assertNotIn("http://matrix.org", self.preview_url._url_previewer._cache)
 
         # Check the database cache returns the correct response
         channel = self.make_request(
@@ -1073,7 +1073,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         """Test that files are not stored in or fetched from storage providers."""
         host, media_id = self._download_image()
 
-        rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
+        rel_file_path = self.media_repo.filepaths.url_cache_filepath_rel(media_id)
         media_store_path = os.path.join(self.media_store_path, rel_file_path)
         storage_provider_path = os.path.join(self.storage_path, rel_file_path)
 
@@ -1116,7 +1116,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         host, media_id = self._download_image()
 
         rel_thumbnail_path = (
-            self.preview_url.filepaths.url_cache_thumbnail_directory_rel(media_id)
+            self.media_repo.filepaths.url_cache_thumbnail_directory_rel(media_id)
         )
         media_store_thumbnail_path = os.path.join(
             self.media_store_path, rel_thumbnail_path
@@ -1143,7 +1143,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 200)
 
         # Remove the original, otherwise thumbnails will regenerate
-        rel_file_path = self.preview_url.filepaths.url_cache_filepath_rel(media_id)
+        rel_file_path = self.media_repo.filepaths.url_cache_filepath_rel(media_id)
         media_store_path = os.path.join(self.media_store_path, rel_file_path)
         os.remove(media_store_path)
 
@@ -1166,26 +1166,24 @@ class URLPreviewTests(unittest.HomeserverTestCase):
 
     def test_cache_expiry(self) -> None:
         """Test that URL cache files and thumbnails are cleaned up properly on expiry."""
-        self.preview_url.clock = MockClock()
-
         _host, media_id = self._download_image()
 
-        file_path = self.preview_url.filepaths.url_cache_filepath(media_id)
-        file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete(
+        file_path = self.media_repo.filepaths.url_cache_filepath(media_id)
+        file_dirs = self.media_repo.filepaths.url_cache_filepath_dirs_to_delete(
             media_id
         )
-        thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory(
+        thumbnail_dir = self.media_repo.filepaths.url_cache_thumbnail_directory(
             media_id
         )
-        thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete(
+        thumbnail_dirs = self.media_repo.filepaths.url_cache_thumbnail_dirs_to_delete(
             media_id
         )
 
         self.assertTrue(os.path.isfile(file_path))
         self.assertTrue(os.path.isdir(thumbnail_dir))
 
-        self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1)
-        self.get_success(self.preview_url._expire_url_cache_data())
+        self.reactor.advance(IMAGE_CACHE_EXPIRY_MS * 1000 + 1)
+        self.get_success(self.preview_url._url_previewer._expire_url_cache_data())
 
         for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
             self.assertFalse(
-- 
cgit 1.5.1


From 827f198177c4cf547b9d2d1eed41411e945fc199 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 21 Mar 2023 09:13:43 +0000
Subject: Fix error when sending message into deleted room. (#15235)

When a room is deleted in Synapse we remove the event forward
extremities in the room, so if (say a bot) tries to send a message into
the room we error out due to not being able to calculate prev events for
the new event *before* we check if the sender is in the room.

Fixes #8094
---
 changelog.d/15235.bugfix      |  1 +
 synapse/handlers/message.py   | 17 +++++++++++++++--
 tests/rest/admin/test_room.py | 15 +++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15235.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15235.bugfix b/changelog.d/15235.bugfix
new file mode 100644
index 0000000000..e6a6bb1b9d
--- /dev/null
+++ b/changelog.d/15235.bugfix
@@ -0,0 +1 @@
+Fix long-standing error when sending message into deleted room.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index da129ec16a..4c75433a63 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -987,10 +987,11 @@ class EventCreationHandler:
         # a situation where event persistence can't keep up, causing
         # extremities to pile up, which in turn leads to state resolution
         # taking longer.
-        async with self.limiter.queue(event_dict["room_id"]):
+        room_id = event_dict["room_id"]
+        async with self.limiter.queue(room_id):
             if txn_id:
                 event = await self.get_event_from_transaction(
-                    requester, txn_id, event_dict["room_id"]
+                    requester, txn_id, room_id
                 )
                 if event:
                     # we know it was persisted, so must have a stream ordering
@@ -1000,6 +1001,18 @@ class EventCreationHandler:
                         event.internal_metadata.stream_ordering,
                     )
 
+        # If we don't have any prev event IDs specified then we need to
+        # check that the host is in the room (as otherwise populating the
+        # prev events will fail), at which point we may as well check the
+        # local user is in the room.
+        if not prev_event_ids:
+            user_id = requester.user.to_string()
+            is_user_in_room = await self.store.check_local_user_in_room(
+                user_id, room_id
+            )
+            if not is_user_in_room:
+                raise AuthError(403, f"User {user_id} not in room {room_id}")
+
         # Try several times, it could fail with PartialStateConflictError
         # in handle_new_client_event, cf comment in except block.
         max_retries = 5
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index 9dbb778679..eb50086c50 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -402,6 +402,21 @@ class DeleteRoomTestCase(unittest.HomeserverTestCase):
         # Assert we can no longer peek into the room
         self._assert_peek(self.room_id, expect_code=403)
 
+    def test_room_delete_send(self) -> None:
+        """Test that sending into a deleted room returns a 403"""
+        channel = self.make_request(
+            "DELETE",
+            self.url,
+            content={},
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        self.helper.send(
+            self.room_id, "test message", expect_code=403, tok=self.other_user_tok
+        )
+
     def _is_blocked(self, room_id: str, expect: bool = True) -> None:
         """Assert that the room is blocked or not"""
         d = self.store.is_room_blocked(room_id)
-- 
cgit 1.5.1


From b6aef593347924d39b4ff8b07e375eb656001545 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 21 Mar 2023 13:23:47 +0000
Subject: Make `EventHandler.get_event` return `None` when the requested event
 is not found (#15298)

---
 changelog.d/15298.bugfix               | 1 +
 synapse/handlers/events.py             | 9 +++++----
 tests/rest/client/test_report_event.py | 5 +++++
 3 files changed, 11 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15298.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15298.bugfix b/changelog.d/15298.bugfix
new file mode 100644
index 0000000000..8f29b08444
--- /dev/null
+++ b/changelog.d/15298.bugfix
@@ -0,0 +1 @@
+Fix a bug in which the [`POST /_matrix/client/v3/rooms/{roomId}/report/{eventId}`](https://spec.matrix.org/v1.6/client-server-api/#post_matrixclientv3roomsroomidreporteventid) endpoint would return the wrong error if the user did not have permission to view the event. This aligns Synapse's implementation with [MSC2249](https://github.com/matrix-org/matrix-spec-proposals/pull/2249).
\ No newline at end of file
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index 68c07f0265..33359f6ed7 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -159,15 +159,16 @@ class EventHandler:
         Returns:
             An event, or None if there is no event matching this ID.
         Raises:
-            SynapseError if there was a problem retrieving this event, or
-            AuthError if the user does not have the rights to inspect this
-            event.
+            AuthError: if the user does not have the rights to inspect this event.
         """
         redact_behaviour = (
             EventRedactBehaviour.as_is if show_redacted else EventRedactBehaviour.redact
         )
         event = await self.store.get_event(
-            event_id, check_room_id=room_id, redact_behaviour=redact_behaviour
+            event_id,
+            check_room_id=room_id,
+            redact_behaviour=redact_behaviour,
+            allow_none=True,
         )
 
         if not event:
diff --git a/tests/rest/client/test_report_event.py b/tests/rest/client/test_report_event.py
index 1250685d39..1a8ab067a9 100644
--- a/tests/rest/client/test_report_event.py
+++ b/tests/rest/client/test_report_event.py
@@ -84,6 +84,11 @@ class ReportEventTestCase(unittest.HomeserverTestCase):
             access_token=self.other_user_tok,
         )
         self.assertEqual(404, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            "Unable to report event: it does not exist or you aren't able to see it.",
+            channel.json_body["error"],
+            msg=channel.result["body"],
+        )
 
     def _assert_status(self, response_status: int, data: JsonDict) -> None:
         channel = self.make_request(
-- 
cgit 1.5.1


From ec9224bf9a7bebb6c429ef45e0d1a293f0986836 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 21 Mar 2023 13:24:03 +0000
Subject: Make `POST /_matrix/client/v3/rooms/{roomId}/report/{eventId}`
 endpoint return 404 if event exists, but the user lacks access (#15300)

---
 changelog.d/15300.bugfix                        |  1 +
 docs/upgrade.md                                 | 12 ++++++++
 synapse/rest/client/report_event.py             | 16 +++++++----
 synapse/storage/databases/main/events_worker.py |  1 -
 tests/rest/client/test_report_event.py          | 37 +++++++++++++++++++++++++
 5 files changed, 61 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15300.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15300.bugfix b/changelog.d/15300.bugfix
new file mode 100644
index 0000000000..8f29b08444
--- /dev/null
+++ b/changelog.d/15300.bugfix
@@ -0,0 +1 @@
+Fix a bug in which the [`POST /_matrix/client/v3/rooms/{roomId}/report/{eventId}`](https://spec.matrix.org/v1.6/client-server-api/#post_matrixclientv3roomsroomidreporteventid) endpoint would return the wrong error if the user did not have permission to view the event. This aligns Synapse's implementation with [MSC2249](https://github.com/matrix-org/matrix-spec-proposals/pull/2249).
\ No newline at end of file
diff --git a/docs/upgrade.md b/docs/upgrade.md
index f06e874054..f14444a400 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,18 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.80.0
+
+## Reporting events error code change
+
+Before this update, the
+[`POST /_matrix/client/v3/rooms/{roomId}/report/{eventId}`](https://spec.matrix.org/v1.6/client-server-api/#post_matrixclientv3roomsroomidreporteventid)
+endpoint would return a `403` if a user attempted to report an event that they did not have access to.
+This endpoint will now return a `404` in this case instead.
+
+Clients that implement event reporting should check that their error handling code will handle this
+change.
+
 # Upgrading to v1.79.0
 
 ## The `on_threepid_bind` module callback method has been deprecated
diff --git a/synapse/rest/client/report_event.py b/synapse/rest/client/report_event.py
index 9be5860221..ac1a63ca27 100644
--- a/synapse/rest/client/report_event.py
+++ b/synapse/rest/client/report_event.py
@@ -16,7 +16,7 @@ import logging
 from http import HTTPStatus
 from typing import TYPE_CHECKING, Tuple
 
-from synapse.api.errors import Codes, NotFoundError, SynapseError
+from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
@@ -62,12 +62,18 @@ class ReportEventRestServlet(RestServlet):
                 Codes.BAD_JSON,
             )
 
-        event = await self._event_handler.get_event(
-            requester.user, room_id, event_id, show_redacted=False
-        )
+        try:
+            event = await self._event_handler.get_event(
+                requester.user, room_id, event_id, show_redacted=False
+            )
+        except AuthError:
+            # The event exists, but this user is not allowed to access this event.
+            event = None
+
         if event is None:
             raise NotFoundError(
-                "Unable to report event: it does not exist or you aren't able to see it."
+                "Unable to report event: "
+                "it does not exist or you aren't able to see it."
             )
 
         await self.store.add_event_report(
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 20b7a68362..0cf46626d2 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -805,7 +805,6 @@ class EventsWorkerStore(SQLBaseStore):
                 # the events have been redacted, and if so pulling the redaction event
                 # out of the database to check it.
                 #
-                missing_events = {}
                 try:
                     # Try to fetch from any external cache. We already checked the
                     # in-memory cache above.
diff --git a/tests/rest/client/test_report_event.py b/tests/rest/client/test_report_event.py
index 1a8ab067a9..b88f1d61a0 100644
--- a/tests/rest/client/test_report_event.py
+++ b/tests/rest/client/test_report_event.py
@@ -90,6 +90,43 @@ class ReportEventTestCase(unittest.HomeserverTestCase):
             msg=channel.result["body"],
         )
 
+    def test_cannot_report_event_if_not_in_room(self) -> None:
+        """
+        Tests that we don't accept event reports for events that exist, but for which
+        the reporter should not be able to view (because they are not in the room).
+        """
+        # Have the admin user create a room (the "other" user will not join this room).
+        new_room_id = self.helper.create_room_as(tok=self.admin_user_tok)
+
+        # Have the admin user send an event in this room.
+        response = self.helper.send_event(
+            new_room_id,
+            "m.room.message",
+            content={
+                "msgtype": "m.text",
+                "body": "This event has some bad words in it! Flip!",
+            },
+            tok=self.admin_user_tok,
+        )
+        event_id = response["event_id"]
+
+        # Have the "other" user attempt to report it. Perhaps they found the event ID
+        # in a screenshot or something...
+        channel = self.make_request(
+            "POST",
+            f"rooms/{new_room_id}/report/{event_id}",
+            {"reason": "I'm not in this room but I have opinions anyways!"},
+            access_token=self.other_user_tok,
+        )
+
+        # The "other" user is not in the room, so their report should be rejected.
+        self.assertEqual(404, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            "Unable to report event: it does not exist or you aren't able to see it.",
+            channel.json_body["error"],
+            msg=channel.result["body"],
+        )
+
     def _assert_status(self, response_status: int, data: JsonDict) -> None:
         channel = self.make_request(
             "POST", self.report_path, data, access_token=self.other_user_tok
-- 
cgit 1.5.1


From 96bcc5d9028e745df2f708c92b15e4e5bfc91328 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 21 Mar 2023 10:49:25 -0700
Subject: Revert  "check sqlite database file exists before porting/#14692" 
 (#15301)

---
 changelog.d/15301.bugfix            | 3 +++
 synapse/_scripts/synapse_port_db.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15301.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15301.bugfix b/changelog.d/15301.bugfix
new file mode 100644
index 0000000000..c8f3628d34
--- /dev/null
+++ b/changelog.d/15301.bugfix
@@ -0,0 +1,3 @@
+Fix a bug introduced in Synapse 1.75.0rc1 where the [SQLite port_db script](https://matrix-org.github.io/synapse/latest/postgres.html#porting-from-sqlite)
+would fail to open the SQLite database.
+
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 2c9cbf8b27..78d76d38ad 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -1329,7 +1329,7 @@ def main() -> None:
     sqlite_config = {
         "name": "sqlite3",
         "args": {
-            "database": "file:{}?mode=rw".format(args.sqlite_database),
+            "database": args.sqlite_database,
             "cp_min": 1,
             "cp_max": 1,
             "check_same_thread": False,
-- 
cgit 1.5.1


From 1bc4feb6c9762216e930daf0ddbdb86c77bf7724 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 21 Mar 2023 14:19:54 -0400
Subject: Apply & bundle edits for non-message events. (#15295)

---
 changelog.d/15295.bugfix                    |  1 +
 synapse/storage/databases/main/relations.py | 11 ++++-------
 2 files changed, 5 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/15295.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15295.bugfix b/changelog.d/15295.bugfix
new file mode 100644
index 0000000000..e437ef3a01
--- /dev/null
+++ b/changelog.d/15295.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where edits of non-`m.room.message` events would not be correctly bundled or have their new content applied.
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index bc3a83919c..3955a8a9a5 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -472,12 +472,11 @@ class RelationsWorkerStore(SQLBaseStore):
             the event will map to None.
         """
 
-        # We only allow edits for `m.room.message` events that have the same sender
-        # and event type. We can't assert these things during regular event auth so
-        # we have to do the checks post hoc.
+        # We only allow edits for events that have the same sender and event type.
+        # We can't assert these things during regular event auth so we have to do
+        # the checks post hoc.
 
-        # Fetches latest edit that has the same type and sender as the
-        # original, and is an `m.room.message`.
+        # Fetches latest edit that has the same type and sender as the original.
         if isinstance(self.database_engine, PostgresEngine):
             # The `DISTINCT ON` clause will pick the *first* row it encounters,
             # so ordering by origin server ts + event ID desc will ensure we get
@@ -493,7 +492,6 @@ class RelationsWorkerStore(SQLBaseStore):
                 WHERE
                     %s
                     AND relation_type = ?
-                    AND edit.type = 'm.room.message'
                 ORDER by original.event_id DESC, edit.origin_server_ts DESC, edit.event_id DESC
             """
         else:
@@ -512,7 +510,6 @@ class RelationsWorkerStore(SQLBaseStore):
                 WHERE
                     %s
                     AND relation_type = ?
-                    AND edit.type = 'm.room.message'
                 ORDER by edit.origin_server_ts, edit.event_id
             """
 
-- 
cgit 1.5.1


From 72f3f23c4db3f549a1aa060a7e98ba07812e570b Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 21 Mar 2023 17:59:55 -0700
Subject: Change the parameter `immediate` of `send_device_messages` to default
 to `True` (#15297)

---
 changelog.d/15297.bugfix              | 1 +
 synapse/federation/send_queue.py      | 2 +-
 synapse/federation/sender/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15297.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15297.bugfix b/changelog.d/15297.bugfix
new file mode 100644
index 0000000000..b5735fe4da
--- /dev/null
+++ b/changelog.d/15297.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse v1.55.0 which could delay remote homeservers being able to decrypt encrypted messages sent by local users.
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index 3063df7990..0b7c81677e 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -244,7 +244,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
 
         self.notifier.on_new_replication_data()
 
-    def send_device_messages(self, destination: str, immediate: bool = False) -> None:
+    def send_device_messages(self, destination: str, immediate: bool = True) -> None:
         """As per FederationSender"""
         # We don't need to replicate this as it gets sent down a different
         # stream.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 43421a9c72..106daa9184 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -783,7 +783,7 @@ class FederationSender(AbstractFederationSender):
         else:
             queue.send_edu(edu)
 
-    def send_device_messages(self, destination: str, immediate: bool = False) -> None:
+    def send_device_messages(self, destination: str, immediate: bool = True) -> None:
         if destination == self.server_name:
             logger.warning("Not sending device update to ourselves")
             return
-- 
cgit 1.5.1


From 1bc9985eb7feca2a8eb4a0125cd03dfa3ac631fe Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 22 Mar 2023 12:53:55 +0000
Subject: Have replication clients remove _INT_STREAM_POS (#15309)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Have replication clients remove _INT_STREAM_POS

Suppose worker A makes an internal http request from worker B. B may
make changes that A later learns about over replication. We want A's
request to block until it has seen those changes—mainly to ensure A's
caches are invalidated promptly. This helps provide read-after-write
consistency, eliminating entire categories of races and test flakes.

To implement this, B includes a top-level field `_INT_STREAM_POS` in its
response JSON. Roughly speaking, the field's value tells A what to wait
for. But we weren't removing that internal field before A's request
completed!

Introduced in https://github.com/matrix-org/synapse/pull/14820.
Fixes #15308.

* Changelog
---
 changelog.d/15309.bugfix          | 1 +
 synapse/replication/http/_base.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15309.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15309.bugfix b/changelog.d/15309.bugfix
new file mode 100644
index 0000000000..4d3fe4e4b1
--- /dev/null
+++ b/changelog.d/15309.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.76.0 where responses from worker deployments could include an internal `_INT_STREAM_POS` key.
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index c20d9c7e9d..8c2c54c07a 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -345,7 +345,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                 _outgoing_request_counter.labels(cls.NAME, 200).inc()
 
                 # Wait on any streams that the remote may have written to.
-                for stream_name, position in result.get(
+                for stream_name, position in result.pop(
                     _STREAM_POSITION_KEY, {}
                 ).items():
                     await replication.wait_for_stream_position(
-- 
cgit 1.5.1


From 7f02fafa28b112fe9d136dc3fcb8799f5958fb95 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 22 Mar 2023 08:36:42 -0700
Subject: Add a check to SQLite port DB script to ensure that the sqlite
 database passed to the script exists before trying to port from it (#15306)

---
 changelog.d/15306.bugfix            | 2 ++
 synapse/_scripts/synapse_port_db.py | 8 ++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 changelog.d/15306.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15306.bugfix b/changelog.d/15306.bugfix
new file mode 100644
index 0000000000..f5eb716f12
--- /dev/null
+++ b/changelog.d/15306.bugfix
@@ -0,0 +1,2 @@
+Add a check to [SQLite port_db script](https://matrix-org.github.io/synapse/latest/postgres.html#porting-from-sqlite)
+to ensure that the sqlite database passed to the script exists before trying to port from it.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 78d76d38ad..94b86c1d6f 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -18,6 +18,7 @@
 import argparse
 import curses
 import logging
+import os
 import sys
 import time
 import traceback
@@ -1326,6 +1327,13 @@ def main() -> None:
         filename="port-synapse.log" if args.curses else None,
     )
 
+    if not os.path.isfile(args.sqlite_database):
+        sys.stderr.write(
+            "The sqlite database you specified does not exist, please check that you have the"
+            "correct path."
+        )
+        sys.exit(1)
+
     sqlite_config = {
         "name": "sqlite3",
         "args": {
-- 
cgit 1.5.1


From 3b0083c92adf76daf4161908565de9e5efc08074 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 22 Mar 2023 17:15:34 +0000
Subject: Use immutabledict instead of frozendict (#15113)

Additionally:

* Consistently use `freeze()` in test

---------

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
Co-authored-by: 6543 <6543@obermui.de>
---
 changelog.d/15113.misc                   |   1 +
 poetry.lock                              | 125 ++++---------------------------
 pyproject.toml                           |  12 ++-
 stubs/frozendict.pyi                     |  39 ----------
 synapse/__init__.py                      |  19 +++--
 synapse/crypto/event_signing.py          |   2 +-
 synapse/events/snapshot.py               |   4 +-
 synapse/events/utils.py                  |   2 +-
 synapse/events/validator.py              |   2 +-
 synapse/state/__init__.py                |  10 ++-
 synapse/storage/databases/main/stream.py |   4 +-
 synapse/types/__init__.py                |  12 +--
 synapse/types/state.py                   |  26 ++++---
 synapse/util/__init__.py                 |  20 ++---
 synapse/util/frozenutils.py              |   6 +-
 tests/api/test_filtering.py              |   6 +-
 tests/config/test_workers.py             |   6 +-
 tests/push/test_push_rule_evaluator.py   |  18 ++---
 tests/storage/test_state.py              |  40 +++++-----
 tests/types/test_state.py                |  14 ++--
 20 files changed, 124 insertions(+), 244 deletions(-)
 create mode 100644 changelog.d/15113.misc
 delete mode 100644 stubs/frozendict.pyi

(limited to 'synapse')

diff --git a/changelog.d/15113.misc b/changelog.d/15113.misc
new file mode 100644
index 0000000000..6917dd5652
--- /dev/null
+++ b/changelog.d/15113.misc
@@ -0,0 +1 @@
+Use `immutabledict` instead of `frozendict`.
diff --git a/poetry.lock b/poetry.lock
index ff8b43bac7..76fbfafcf9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -160,23 +160,16 @@ css = ["tinycss2 (>=1.1.0,<1.2)"]
 
 [[package]]
 name = "canonicaljson"
-version = "1.6.5"
+version = "2.0.0"
 description = "Canonical JSON"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "canonicaljson-1.6.5-py3-none-any.whl", hash = "sha256:806ea6f2cbb7405d20259e1c36dd1214ba5c242fa9165f5bd0bf2081f82c23fb"},
-    {file = "canonicaljson-1.6.5.tar.gz", hash = "sha256:68dfc157b011e07d94bf74b5d4ccc01958584ed942d9dfd5fdd706609e81cd4b"},
+    {file = "canonicaljson-2.0.0-py3-none-any.whl", hash = "sha256:c38a315de3b5a0532f1ec1f9153cd3d716abfc565a558d00a4835428a34fca5b"},
+    {file = "canonicaljson-2.0.0.tar.gz", hash = "sha256:e2fdaef1d7fadc5d9cb59bd3d0d41b064ddda697809ac4325dced721d12f113f"},
 ]
 
-[package.dependencies]
-simplejson = ">=3.14.0"
-typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.8\""}
-
-[package.extras]
-frozendict = ["frozendict (>=1.0)"]
-
 [[package]]
 name = "certifi"
 version = "2022.12.7"
@@ -453,33 +446,6 @@ files = [
 [package.extras]
 dev = ["Sphinx", "coverage", "flake8", "lxml", "memory-profiler", "mypy (==0.910)", "tox", "xmlschema (>=1.8.0)"]
 
-[[package]]
-name = "frozendict"
-version = "2.3.4"
-description = "A simple immutable dictionary"
-category = "main"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "frozendict-2.3.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4a3b32d47282ae0098b9239a6d53ec539da720258bd762d62191b46f2f87c5fc"},
-    {file = "frozendict-2.3.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84c9887179a245a66a50f52afa08d4d92ae0f269839fab82285c70a0fa0dd782"},
-    {file = "frozendict-2.3.4-cp310-cp310-win_amd64.whl", hash = "sha256:b98a0d65a59af6da03f794f90b0c3085a7ee14e7bf8f0ef36b079ee8aa992439"},
-    {file = "frozendict-2.3.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3d8042b7dab5e992e30889c9b71b781d5feef19b372d47d735e4d7d45846fd4a"},
-    {file = "frozendict-2.3.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25a6d2e8b7cf6b6e5677a1a4b53b4073e5d9ec640d1db30dc679627668d25e90"},
-    {file = "frozendict-2.3.4-cp36-cp36m-win_amd64.whl", hash = "sha256:dbbe1339ac2646523e0bb00d1896085d1f70de23780e4927ca82b36ab8a044d3"},
-    {file = "frozendict-2.3.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95bac22f7f09d81f378f2b3f672b7a50a974ca180feae1507f5e21bc147e8bc8"},
-    {file = "frozendict-2.3.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dae686722c144b333c4dbdc16323a5de11406d26b76d2be1cc175f90afacb5ba"},
-    {file = "frozendict-2.3.4-cp37-cp37m-win_amd64.whl", hash = "sha256:389f395a74eb16992217ac1521e689c1dea2d70113bcb18714669ace1ed623b9"},
-    {file = "frozendict-2.3.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ccb6450a416c9cc9acef7683e637e28356e3ceeabf83521f74cc2718883076b7"},
-    {file = "frozendict-2.3.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aca59108b77cadc13ba7dfea7e8f50811208c7652a13dc6c7f92d7782a24d299"},
-    {file = "frozendict-2.3.4-cp38-cp38-win_amd64.whl", hash = "sha256:3ec86ebf143dd685184215c27ec416c36e0ba1b80d81b1b9482f7d380c049b4e"},
-    {file = "frozendict-2.3.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5809e6ff6b7257043a486f7a3b73a7da71cf69a38980b4171e4741291d0d9eb3"},
-    {file = "frozendict-2.3.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c550ed7fdf1962984bec21630c584d722b3ee5d5f57a0ae2527a0121dc0414a"},
-    {file = "frozendict-2.3.4-cp39-cp39-win_amd64.whl", hash = "sha256:3e93aebc6e69a8ef329bbe9afb8342bd33c7b5c7a0c480cb9f7e60b0cbe48072"},
-    {file = "frozendict-2.3.4-py3-none-any.whl", hash = "sha256:d722f3d89db6ae35ef35ecc243c40c800eb344848c83dba4798353312cd37b15"},
-    {file = "frozendict-2.3.4.tar.gz", hash = "sha256:15b4b18346259392b0d27598f240e9390fafbff882137a9c48a1e0104fb17f78"},
-]
-
 [[package]]
 name = "gitdb"
 version = "4.0.9"
@@ -725,6 +691,18 @@ files = [
     {file = "ijson-3.2.0.post0.tar.gz", hash = "sha256:80a5bd7e9923cab200701f67ad2372104328b99ddf249dbbe8834102c852d316"},
 ]
 
+[[package]]
+name = "immutabledict"
+version = "2.2.3"
+description = "Immutable wrapper around dictionaries (a fork of frozendict)"
+category = "main"
+optional = false
+python-versions = ">=3.7,<4.0"
+files = [
+    {file = "immutabledict-2.2.3-py3-none-any.whl", hash = "sha256:a7b078ebcc4a58ddc73b55f808b26e7c8c2d5183fad325615112689e1a63e714"},
+    {file = "immutabledict-2.2.3.tar.gz", hash = "sha256:0e1e8a3f2b3ff062daa19795f947e9ec7a58add269d44e34d3ab4319e1343853"},
+]
+
 [[package]]
 name = "importlib-metadata"
 version = "6.0.0"
@@ -2174,77 +2152,6 @@ unpaddedbase64 = ">=1.0.1"
 [package.extras]
 dev = ["typing-extensions (>=3.5)"]
 
-[[package]]
-name = "simplejson"
-version = "3.17.6"
-description = "Simple, fast, extensible JSON encoder/decoder for Python"
-category = "main"
-optional = false
-python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*"
-files = [
-    {file = "simplejson-3.17.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a89acae02b2975b1f8e4974cb8cdf9bf9f6c91162fb8dec50c259ce700f2770a"},
-    {file = "simplejson-3.17.6-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:82ff356ff91be0ab2293fc6d8d262451eb6ac4fd999244c4b5f863e049ba219c"},
-    {file = "simplejson-3.17.6-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:0de783e9c2b87bdd75b57efa2b6260c24b94605b5c9843517577d40ee0c3cc8a"},
-    {file = "simplejson-3.17.6-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:d24a9e61df7a7787b338a58abfba975414937b609eb6b18973e25f573bc0eeeb"},
-    {file = "simplejson-3.17.6-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:e8603e691580487f11306ecb066c76f1f4a8b54fb3bdb23fa40643a059509366"},
-    {file = "simplejson-3.17.6-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:9b01e7b00654115965a206e3015f0166674ec1e575198a62a977355597c0bef5"},
-    {file = "simplejson-3.17.6-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:37bc0cf0e5599f36072077e56e248f3336917ded1d33d2688624d8ed3cefd7d2"},
-    {file = "simplejson-3.17.6-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:cf6e7d5fe2aeb54898df18db1baf479863eae581cce05410f61f6b4188c8ada1"},
-    {file = "simplejson-3.17.6-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:bdfc54b4468ed4cd7415928cbe782f4d782722a81aeb0f81e2ddca9932632211"},
-    {file = "simplejson-3.17.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd16302d39c4d6f4afde80edd0c97d4db643327d355a312762ccd9bd2ca515ed"},
-    {file = "simplejson-3.17.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:deac4bdafa19bbb89edfb73b19f7f69a52d0b5bd3bb0c4ad404c1bbfd7b4b7fd"},
-    {file = "simplejson-3.17.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8bbdb166e2fb816e43ab034c865147edafe28e1b19c72433147789ac83e2dda"},
-    {file = "simplejson-3.17.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7854326920d41c3b5d468154318fe6ba4390cb2410480976787c640707e0180"},
-    {file = "simplejson-3.17.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:04e31fa6ac8e326480703fb6ded1488bfa6f1d3f760d32e29dbf66d0838982ce"},
-    {file = "simplejson-3.17.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f63600ec06982cdf480899026f4fda622776f5fabed9a869fdb32d72bc17e99a"},
-    {file = "simplejson-3.17.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e03c3b8cc7883a54c3f34a6a135c4a17bc9088a33f36796acdb47162791b02f6"},
-    {file = "simplejson-3.17.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a2d30d6c1652140181dc6861f564449ad71a45e4f165a6868c27d36745b65d40"},
-    {file = "simplejson-3.17.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a1aa6e4cae8e3b8d5321be4f51c5ce77188faf7baa9fe1e78611f93a8eed2882"},
-    {file = "simplejson-3.17.6-cp310-cp310-win32.whl", hash = "sha256:97202f939c3ff341fc3fa84d15db86156b1edc669424ba20b0a1fcd4a796a045"},
-    {file = "simplejson-3.17.6-cp310-cp310-win_amd64.whl", hash = "sha256:80d3bc9944be1d73e5b1726c3bbfd2628d3d7fe2880711b1eb90b617b9b8ac70"},
-    {file = "simplejson-3.17.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9fa621b3c0c05d965882c920347b6593751b7ab20d8fa81e426f1735ca1a9fc7"},
-    {file = "simplejson-3.17.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd2fb11922f58df8528adfca123f6a84748ad17d066007e7ac977720063556bd"},
-    {file = "simplejson-3.17.6-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:724c1fe135aa437d5126138d977004d165a3b5e2ee98fc4eb3e7c0ef645e7e27"},
-    {file = "simplejson-3.17.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4ff4ac6ff3aa8f814ac0f50bf218a2e1a434a17aafad4f0400a57a8cc62ef17f"},
-    {file = "simplejson-3.17.6-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:67093a526e42981fdd954868062e56c9b67fdd7e712616cc3265ad0c210ecb51"},
-    {file = "simplejson-3.17.6-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:5d6b4af7ad7e4ac515bc6e602e7b79e2204e25dbd10ab3aa2beef3c5a9cad2c7"},
-    {file = "simplejson-3.17.6-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:1c9b1ed7ed282b36571638297525f8ef80f34b3e2d600a56f962c6044f24200d"},
-    {file = "simplejson-3.17.6-cp36-cp36m-win32.whl", hash = "sha256:632ecbbd2228575e6860c9e49ea3cc5423764d5aa70b92acc4e74096fb434044"},
-    {file = "simplejson-3.17.6-cp36-cp36m-win_amd64.whl", hash = "sha256:4c09868ddb86bf79b1feb4e3e7e4a35cd6e61ddb3452b54e20cf296313622566"},
-    {file = "simplejson-3.17.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4b6bd8144f15a491c662f06814bd8eaa54b17f26095bb775411f39bacaf66837"},
-    {file = "simplejson-3.17.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5decdc78849617917c206b01e9fc1d694fd58caa961be816cb37d3150d613d9a"},
-    {file = "simplejson-3.17.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:521877c7bd060470806eb6335926e27453d740ac1958eaf0d8c00911bc5e1802"},
-    {file = "simplejson-3.17.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:65b998193bd7b0c7ecdfffbc825d808eac66279313cb67d8892bb259c9d91494"},
-    {file = "simplejson-3.17.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ac786f6cb7aa10d44e9641c7a7d16d7f6e095b138795cd43503769d4154e0dc2"},
-    {file = "simplejson-3.17.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3ff5b3464e1ce86a8de8c88e61d4836927d5595c2162cab22e96ff551b916e81"},
-    {file = "simplejson-3.17.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:69bd56b1d257a91e763256d63606937ae4eb890b18a789b66951c00062afec33"},
-    {file = "simplejson-3.17.6-cp37-cp37m-win32.whl", hash = "sha256:b81076552d34c27e5149a40187a8f7e2abb2d3185576a317aaf14aeeedad862a"},
-    {file = "simplejson-3.17.6-cp37-cp37m-win_amd64.whl", hash = "sha256:07ecaafc1b1501f275bf5acdee34a4ad33c7c24ede287183ea77a02dc071e0c0"},
-    {file = "simplejson-3.17.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:068670af975247acbb9fc3d5393293368cda17026db467bf7a51548ee8f17ee1"},
-    {file = "simplejson-3.17.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4d1c135af0c72cb28dd259cf7ba218338f4dc027061262e46fe058b4e6a4c6a3"},
-    {file = "simplejson-3.17.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:23fe704da910ff45e72543cbba152821685a889cf00fc58d5c8ee96a9bad5f94"},
-    {file = "simplejson-3.17.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f444762fed1bc1fd75187ef14a20ed900c1fbb245d45be9e834b822a0223bc81"},
-    {file = "simplejson-3.17.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:681eb4d37c9a9a6eb9b3245a5e89d7f7b2b9895590bb08a20aa598c1eb0a1d9d"},
-    {file = "simplejson-3.17.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8e8607d8f6b4f9d46fee11447e334d6ab50e993dd4dbfb22f674616ce20907ab"},
-    {file = "simplejson-3.17.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b10556817f09d46d420edd982dd0653940b90151d0576f09143a8e773459f6fe"},
-    {file = "simplejson-3.17.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e1ec8a9ee0987d4524ffd6299e778c16cc35fef6d1a2764e609f90962f0b293a"},
-    {file = "simplejson-3.17.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0b4126cac7d69ac06ff22efd3e0b3328a4a70624fcd6bca4fc1b4e6d9e2e12bf"},
-    {file = "simplejson-3.17.6-cp38-cp38-win32.whl", hash = "sha256:35a49ebef25f1ebdef54262e54ae80904d8692367a9f208cdfbc38dbf649e00a"},
-    {file = "simplejson-3.17.6-cp38-cp38-win_amd64.whl", hash = "sha256:743cd768affaa508a21499f4858c5b824ffa2e1394ed94eb85caf47ac0732198"},
-    {file = "simplejson-3.17.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb62d517a516128bacf08cb6a86ecd39fb06d08e7c4980251f5d5601d29989ba"},
-    {file = "simplejson-3.17.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:12133863178a8080a3dccbf5cb2edfab0001bc41e5d6d2446af2a1131105adfe"},
-    {file = "simplejson-3.17.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5540fba2d437edaf4aa4fbb80f43f42a8334206ad1ad3b27aef577fd989f20d9"},
-    {file = "simplejson-3.17.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d74ee72b5071818a1a5dab47338e87f08a738cb938a3b0653b9e4d959ddd1fd9"},
-    {file = "simplejson-3.17.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:28221620f4dcabdeac310846629b976e599a13f59abb21616356a85231ebd6ad"},
-    {file = "simplejson-3.17.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b09bc62e5193e31d7f9876220fb429ec13a6a181a24d897b9edfbbdbcd678851"},
-    {file = "simplejson-3.17.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7255a37ff50593c9b2f1afa8fafd6ef5763213c1ed5a9e2c6f5b9cc925ab979f"},
-    {file = "simplejson-3.17.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:401d40969cee3df7bda211e57b903a534561b77a7ade0dd622a8d1a31eaa8ba7"},
-    {file = "simplejson-3.17.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a649d0f66029c7eb67042b15374bd93a26aae202591d9afd71e111dd0006b198"},
-    {file = "simplejson-3.17.6-cp39-cp39-win32.whl", hash = "sha256:522fad7be85de57430d6d287c4b635813932946ebf41b913fe7e880d154ade2e"},
-    {file = "simplejson-3.17.6-cp39-cp39-win_amd64.whl", hash = "sha256:3fe87570168b2ae018391e2b43fbf66e8593a86feccb4b0500d134c998983ccc"},
-    {file = "simplejson-3.17.6.tar.gz", hash = "sha256:cf98038d2abf63a1ada5730e91e84c642ba6c225b0198c3684151b1f80c5f8a6"},
-]
-
 [[package]]
 name = "six"
 version = "1.16.0"
@@ -3013,4 +2920,4 @@ user-search = ["pyicu"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.7.1"
-content-hash = "de2c4c8de336593478ce02581a5336afe2544db93ea82f3955b34c3653c29a26"
+content-hash = "0ca92e52a1952f9485172efe25a039351280c28f0a158869557dc2f8855786fe"
diff --git a/pyproject.toml b/pyproject.toml
index 19dc7c1536..c0111dd796 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -153,15 +153,13 @@ python = "^3.7.1"
 # ----------------------
 # we use the TYPE_CHECKER.redefine method added in jsonschema 3.0.0
 jsonschema = ">=3.0.0"
-# frozendict 2.1.2 is broken on Debian 10: https://github.com/Marco-Sulla/python-frozendict/issues/41
-# We cannot test our wheels against the 2.3.5 release in CI. Putting in an upper bound for this
-# because frozendict has been more trouble than it's worth; we would like to move to immutabledict.
-frozendict = ">=1,!=2.1.2,<2.3.5"
+# We choose 2.0 as a lower bound: the most recent backwards incompatible release.
+# It seems generally available, judging by https://pkgs.org/search/?q=immutabledict
+immutabledict = ">=2.0"
 # We require 2.1.0 or higher for type hints. Previous guard was >= 1.1.0
 unpaddedbase64 = ">=2.1.0"
-# We require 1.5.0 to work around an issue when running against the C implementation of
-# frozendict: https://github.com/matrix-org/python-canonicaljson/issues/36
-canonicaljson = "^1.5.0"
+# We require 2.0.0 for immutabledict support.
+canonicaljson = "^2.0.0"
 # we use the type definitions added in signedjson 1.1.
 signedjson = "^1.1.0"
 # validating SSL certs for IP addresses requires service_identity 18.1.
diff --git a/stubs/frozendict.pyi b/stubs/frozendict.pyi
deleted file mode 100644
index 196dee4461..0000000000
--- a/stubs/frozendict.pyi
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2020 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stub for frozendict.
-
-from __future__ import annotations
-
-from typing import Any, Hashable, Iterable, Iterator, Mapping, Tuple, TypeVar, overload
-
-_KT = TypeVar("_KT", bound=Hashable)  # Key type.
-_VT = TypeVar("_VT")  # Value type.
-
-class frozendict(Mapping[_KT, _VT]):
-    @overload
-    def __init__(self, **kwargs: _VT) -> None: ...
-    @overload
-    def __init__(self, __map: Mapping[_KT, _VT], **kwargs: _VT) -> None: ...
-    @overload
-    def __init__(
-        self, __iterable: Iterable[Tuple[_KT, _VT]], **kwargs: _VT
-    ) -> None: ...
-    def __getitem__(self, key: _KT) -> _VT: ...
-    def __contains__(self, key: Any) -> bool: ...
-    def copy(self, **add_or_replace: Any) -> frozendict: ...
-    def __iter__(self) -> Iterator[_KT]: ...
-    def __len__(self) -> int: ...
-    def __repr__(self) -> str: ...
-    def __hash__(self) -> int: ...
diff --git a/synapse/__init__.py b/synapse/__init__.py
index a203ed533a..b97ee59f15 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -17,9 +17,9 @@
 """ This is an implementation of a Matrix homeserver.
 """
 
-import json
 import os
 import sys
+from typing import Any, Dict
 
 from synapse.util.rust import check_rust_lib_up_to_date
 from synapse.util.stringutils import strtobool
@@ -61,11 +61,20 @@ try:
 except ImportError:
     pass
 
-# Use the standard library json implementation instead of simplejson.
+# Teach canonicaljson how to serialise immutabledicts.
 try:
-    from canonicaljson import set_json_library
-
-    set_json_library(json)
+    from canonicaljson import register_preserialisation_callback
+    from immutabledict import immutabledict
+
+    def _immutabledict_cb(d: immutabledict) -> Dict[str, Any]:
+        try:
+            return d._dict
+        except Exception:
+            # Paranoia: fall back to a `dict()` call, in case a future version of
+            # immutabledict removes `_dict` from the implementation.
+            return dict(d)
+
+    register_preserialisation_callback(immutabledict, _immutabledict_cb)
 except ImportError:
     pass
 
diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py
index 23b799ac32..1a293f1df0 100644
--- a/synapse/crypto/event_signing.py
+++ b/synapse/crypto/event_signing.py
@@ -51,7 +51,7 @@ def check_event_content_hash(
     # some malformed events lack a 'hashes'. Protect against it being missing
     # or a weird type by basically treating it the same as an unhashed event.
     hashes = event.get("hashes")
-    # nb it might be a frozendict or a dict
+    # nb it might be a immutabledict or a dict
     if not isinstance(hashes, collections.abc.Mapping):
         raise SynapseError(
             400, "Malformed 'hashes': %s" % (type(hashes),), Codes.UNAUTHORIZED
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index c04ad08cbb..9b4d692cf4 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -15,7 +15,7 @@ from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, List, Optional, Tuple
 
 import attr
-from frozendict import frozendict
+from immutabledict import immutabledict
 
 from synapse.appservice import ApplicationService
 from synapse.events import EventBase
@@ -489,4 +489,4 @@ def _decode_state_dict(
     if input is None:
         return None
 
-    return frozendict({(etype, state_key): v for etype, state_key, v in input})
+    return immutabledict({(etype, state_key): v for etype, state_key, v in input})
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index b9c15ffcdb..e41c7a4b83 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -567,7 +567,7 @@ PowerLevelsContent = Mapping[str, Union[_PowerLevel, Mapping[str, _PowerLevel]]]
 def copy_and_fixup_power_levels_contents(
     old_power_levels: PowerLevelsContent,
 ) -> Dict[str, Union[int, Dict[str, int]]]:
-    """Copy the content of a power_levels event, unfreezing frozendicts along the way.
+    """Copy the content of a power_levels event, unfreezing immutabledicts along the way.
 
     We accept as input power level values which are strings, provided they represent an
     integer, e.g. `"`100"` instead of 100. Such strings are converted to integers
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index fb1737b910..6f0e4386d3 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -258,7 +258,7 @@ POWER_LEVELS_SCHEMA = {
 def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]:
     validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA)
 
-    # by default jsonschema does not consider a frozendict to be an object so
+    # by default jsonschema does not consider a immutabledict to be an object so
     # we need to use a custom type checker
     # https://python-jsonschema.readthedocs.io/en/stable/validate/?highlight=object#validating-with-additional-types
     type_checker = validator.TYPE_CHECKER.redefine(
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 4dc25df67e..6031095249 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -33,7 +33,7 @@ from typing import (
 )
 
 import attr
-from frozendict import frozendict
+from immutabledict import immutabledict
 from prometheus_client import Counter, Histogram
 
 from synapse.api.constants import EventTypes
@@ -105,14 +105,18 @@ class _StateCacheEntry:
         #
         # This can be None if we have a `state_group` (as then we can fetch the
         # state from the DB.)
-        self._state = frozendict(state) if state is not None else None
+        self._state: Optional[StateMap[str]] = (
+            immutabledict(state) if state is not None else None
+        )
 
         # the ID of a state group if one and only one is involved.
         # otherwise, None otherwise?
         self.state_group = state_group
 
         self.prev_group = prev_group
-        self.delta_ids = frozendict(delta_ids) if delta_ids is not None else None
+        self.delta_ids: Optional[StateMap[str]] = (
+            immutabledict(delta_ids) if delta_ids is not None else None
+        )
 
     async def get_state(
         self,
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index ac5fbf6b86..2b8779bbb8 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -50,7 +50,7 @@ from typing import (
 )
 
 import attr
-from frozendict import frozendict
+from immutabledict import immutabledict
 from typing_extensions import Literal
 
 from twisted.internet import defer
@@ -557,7 +557,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
                 if p > min_pos
             }
 
-        return RoomStreamToken(None, min_pos, frozendict(positions))
+        return RoomStreamToken(None, min_pos, immutabledict(positions))
 
     async def get_room_events_stream_for_rooms(
         self,
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 33363867c4..c09b9cf87d 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -35,7 +35,7 @@ from typing import (
 )
 
 import attr
-from frozendict import frozendict
+from immutabledict import immutabledict
 from signedjson.key import decode_verify_key_bytes
 from signedjson.types import VerifyKey
 from typing_extensions import Final, TypedDict
@@ -490,12 +490,12 @@ class RoomStreamToken:
     )
     stream: int = attr.ib(validator=attr.validators.instance_of(int))
 
-    instance_map: "frozendict[str, int]" = attr.ib(
-        factory=frozendict,
+    instance_map: "immutabledict[str, int]" = attr.ib(
+        factory=immutabledict,
         validator=attr.validators.deep_mapping(
             key_validator=attr.validators.instance_of(str),
             value_validator=attr.validators.instance_of(int),
-            mapping_validator=attr.validators.instance_of(frozendict),
+            mapping_validator=attr.validators.instance_of(immutabledict),
         ),
     )
 
@@ -531,7 +531,7 @@ class RoomStreamToken:
                 return cls(
                     topological=None,
                     stream=stream,
-                    instance_map=frozendict(instance_map),
+                    instance_map=immutabledict(instance_map),
                 )
         except CancelledError:
             raise
@@ -566,7 +566,7 @@ class RoomStreamToken:
             for instance in set(self.instance_map).union(other.instance_map)
         }
 
-        return RoomStreamToken(None, max_stream, frozendict(instance_map))
+        return RoomStreamToken(None, max_stream, immutabledict(instance_map))
 
     def as_historical_tuple(self) -> Tuple[int, int]:
         """Returns a tuple of `(topological, stream)` for historical tokens.
diff --git a/synapse/types/state.py b/synapse/types/state.py
index 4b3071acce..1e78a74047 100644
--- a/synapse/types/state.py
+++ b/synapse/types/state.py
@@ -28,7 +28,7 @@ from typing import (
 )
 
 import attr
-from frozendict import frozendict
+from immutabledict import immutabledict
 
 from synapse.api.constants import EventTypes
 from synapse.types import MutableStateMap, StateKey, StateMap
@@ -56,7 +56,7 @@ class StateFilter:
             appear in `types`.
     """
 
-    types: "frozendict[str, Optional[FrozenSet[str]]]"
+    types: "immutabledict[str, Optional[FrozenSet[str]]]"
     include_others: bool = False
 
     def __attrs_post_init__(self) -> None:
@@ -67,7 +67,7 @@ class StateFilter:
             object.__setattr__(
                 self,
                 "types",
-                frozendict({k: v for k, v in self.types.items() if v is not None}),
+                immutabledict({k: v for k, v in self.types.items() if v is not None}),
             )
 
     @staticmethod
@@ -112,7 +112,7 @@ class StateFilter:
             type_dict.setdefault(typ, set()).add(s)  # type: ignore
 
         return StateFilter(
-            types=frozendict(
+            types=immutabledict(
                 (k, frozenset(v) if v is not None else None)
                 for k, v in type_dict.items()
             )
@@ -139,7 +139,7 @@ class StateFilter:
             The new state filter
         """
         return StateFilter(
-            types=frozendict({EventTypes.Member: frozenset(members)}),
+            types=immutabledict({EventTypes.Member: frozenset(members)}),
             include_others=True,
         )
 
@@ -159,7 +159,7 @@ class StateFilter:
                 types_with_frozen_values[state_types] = None
 
         return StateFilter(
-            frozendict(types_with_frozen_values), include_others=include_others
+            immutabledict(types_with_frozen_values), include_others=include_others
         )
 
     def return_expanded(self) -> "StateFilter":
@@ -217,7 +217,7 @@ class StateFilter:
             # We want to return all non-members, but only particular
             # memberships
             return StateFilter(
-                types=frozendict({EventTypes.Member: self.types[EventTypes.Member]}),
+                types=immutabledict({EventTypes.Member: self.types[EventTypes.Member]}),
                 include_others=True,
             )
         else:
@@ -381,14 +381,16 @@ class StateFilter:
             if state_keys is None:
                 member_filter = StateFilter.all()
             else:
-                member_filter = StateFilter(frozendict({EventTypes.Member: state_keys}))
+                member_filter = StateFilter(
+                    immutabledict({EventTypes.Member: state_keys})
+                )
         elif self.include_others:
             member_filter = StateFilter.all()
         else:
             member_filter = StateFilter.none()
 
         non_member_filter = StateFilter(
-            types=frozendict(
+            types=immutabledict(
                 {k: v for k, v in self.types.items() if k != EventTypes.Member}
             ),
             include_others=self.include_others,
@@ -578,8 +580,8 @@ class StateFilter:
         return False
 
 
-_ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True)
+_ALL_STATE_FILTER = StateFilter(types=immutabledict(), include_others=True)
 _ALL_NON_MEMBER_STATE_FILTER = StateFilter(
-    types=frozendict({EventTypes.Member: frozenset()}), include_others=True
+    types=immutabledict({EventTypes.Member: frozenset()}), include_others=True
 )
-_NONE_STATE_FILTER = StateFilter(types=frozendict(), include_others=False)
+_NONE_STATE_FILTER = StateFilter(types=immutabledict(), include_others=False)
diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py
index 7be9d5f113..9ddd26ccaa 100644
--- a/synapse/util/__init__.py
+++ b/synapse/util/__init__.py
@@ -18,7 +18,7 @@ import typing
 from typing import Any, Callable, Dict, Generator, Optional, Sequence
 
 import attr
-from frozendict import frozendict
+from immutabledict import immutabledict
 from matrix_common.versionstring import get_distribution_version_string
 from typing_extensions import ParamSpec
 
@@ -41,22 +41,18 @@ def _reject_invalid_json(val: Any) -> None:
     raise ValueError("Invalid JSON value: '%s'" % val)
 
 
-def _handle_frozendict(obj: Any) -> Dict[Any, Any]:
-    """Helper for json_encoder. Makes frozendicts serializable by returning
+def _handle_immutabledict(obj: Any) -> Dict[Any, Any]:
+    """Helper for json_encoder. Makes immutabledicts serializable by returning
     the underlying dict
     """
-    if type(obj) is frozendict:
+    if type(obj) is immutabledict:
         # fishing the protected dict out of the object is a bit nasty,
         # but we don't really want the overhead of copying the dict.
         try:
             # Safety: we catch the AttributeError immediately below.
-            # See https://github.com/matrix-org/python-canonicaljson/issues/36#issuecomment-927816293
-            # for discussion on how frozendict's internals have changed over time.
-            return obj._dict  # type: ignore[attr-defined]
+            return obj._dict
         except AttributeError:
-            # When the C implementation of frozendict is used,
-            # there isn't a `_dict` attribute with a dict
-            # so we resort to making a copy of the frozendict
+            # If all else fails, resort to making a copy of the immutabledict
             return dict(obj)
     raise TypeError(
         "Object of type %s is not JSON serializable" % obj.__class__.__name__
@@ -64,11 +60,11 @@ def _handle_frozendict(obj: Any) -> Dict[Any, Any]:
 
 
 # A custom JSON encoder which:
-#   * handles frozendicts
+#   * handles immutabledicts
 #   * produces valid JSON (no NaNs etc)
 #   * reduces redundant whitespace
 json_encoder = json.JSONEncoder(
-    allow_nan=False, separators=(",", ":"), default=_handle_frozendict
+    allow_nan=False, separators=(",", ":"), default=_handle_immutabledict
 )
 
 # Create a custom decoder to reject Python extensions to JSON.
diff --git a/synapse/util/frozenutils.py b/synapse/util/frozenutils.py
index 7223af1a36..889caa2601 100644
--- a/synapse/util/frozenutils.py
+++ b/synapse/util/frozenutils.py
@@ -14,14 +14,14 @@
 import collections.abc
 from typing import Any
 
-from frozendict import frozendict
+from immutabledict import immutabledict
 
 
 def freeze(o: Any) -> Any:
     if isinstance(o, dict):
-        return frozendict({k: freeze(v) for k, v in o.items()})
+        return immutabledict({k: freeze(v) for k, v in o.items()})
 
-    if isinstance(o, frozendict):
+    if isinstance(o, immutabledict):
         return o
 
     if isinstance(o, (bytes, str)):
diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py
index 0f45615160..6c6a9ab4b4 100644
--- a/tests/api/test_filtering.py
+++ b/tests/api/test_filtering.py
@@ -18,7 +18,6 @@ from typing import List
 from unittest.mock import patch
 
 import jsonschema
-from frozendict import frozendict
 
 from twisted.test.proto_helpers import MemoryReactor
 
@@ -29,6 +28,7 @@ from synapse.api.presence import UserPresenceState
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
+from synapse.util.frozenutils import freeze
 
 from tests import unittest
 from tests.events.test_utils import MockEvent
@@ -343,12 +343,12 @@ class FilteringTestCase(unittest.HomeserverTestCase):
 
         self.assertFalse(Filter(self.hs, definition)._check(event))
 
-        # check it works with frozendicts too
+        # check it works with frozen dictionaries too
         event = MockEvent(
             sender="@foo:bar",
             type="m.room.message",
             room_id="!secretbase:unknown",
-            content=frozendict({EventContentFields.LABELS: ["#fun"]}),
+            content=freeze({EventContentFields.LABELS: ["#fun"]}),
         )
         self.assertTrue(Filter(self.hs, definition)._check(event))
 
diff --git a/tests/config/test_workers.py b/tests/config/test_workers.py
index ef6294ecb2..49a6bdf408 100644
--- a/tests/config/test_workers.py
+++ b/tests/config/test_workers.py
@@ -14,14 +14,14 @@
 from typing import Any, Mapping, Optional
 from unittest.mock import Mock
 
-from frozendict import frozendict
+from immutabledict import immutabledict
 
 from synapse.config import ConfigError
 from synapse.config.workers import WorkerConfig
 
 from tests.unittest import TestCase
 
-_EMPTY_FROZENDICT: Mapping[str, Any] = frozendict()
+_EMPTY_IMMUTABLEDICT: Mapping[str, Any] = immutabledict()
 
 
 class WorkerDutyConfigTestCase(TestCase):
@@ -29,7 +29,7 @@ class WorkerDutyConfigTestCase(TestCase):
         self,
         worker_app: str,
         worker_name: Optional[str],
-        extras: Mapping[str, Any] = _EMPTY_FROZENDICT,
+        extras: Mapping[str, Any] = _EMPTY_IMMUTABLEDICT,
     ) -> WorkerConfig:
         root_config = Mock()
         root_config.worker_app = worker_app
diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py
index 52c4aafea6..b2536562e0 100644
--- a/tests/push/test_push_rule_evaluator.py
+++ b/tests/push/test_push_rule_evaluator.py
@@ -14,8 +14,6 @@
 
 from typing import Any, Dict, List, Optional, Union, cast
 
-import frozendict
-
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
@@ -318,11 +316,11 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             "pattern should only match at the start/end of the value",
         )
 
-        # it should work on frozendicts too
+        # it should work on frozen dictionaries too
         self._assert_matches(
             condition,
-            frozendict.frozendict({"value": "FoobaZ"}),
-            "patterns should match on frozendicts",
+            freeze({"value": "FoobaZ"}),
+            "patterns should match on frozen dictionaries",
         )
 
         # wildcards should match
@@ -425,11 +423,11 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
                 "incorrect types should not match",
             )
 
-        # it should work on frozendicts too
+        # it should work on frozen dictionaries too
         self._assert_matches(
             condition,
-            frozendict.frozendict({"value": "foobaz"}),
-            "values should match on frozendicts",
+            freeze({"value": "foobaz"}),
+            "values should match on frozen dictionaries",
         )
 
     def test_exact_event_match_boolean(self) -> None:
@@ -546,11 +544,11 @@ class PushRuleEvaluatorTestCase(unittest.TestCase):
             "does not search in a string",
         )
 
-        # it should work on frozendicts too
+        # it should work on frozen dictionaries too
         self._assert_matches(
             condition,
             freeze({"value": ["foobaz"]}),
-            "values should match on frozendicts",
+            "values should match on frozen dictionaries",
         )
 
     def test_no_body(self) -> None:
diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py
index 62aed6af0a..0b9446c36c 100644
--- a/tests/storage/test_state.py
+++ b/tests/storage/test_state.py
@@ -14,7 +14,7 @@
 
 import logging
 
-from frozendict import frozendict
+from immutabledict import immutabledict
 
 from twisted.test.proto_helpers import MemoryReactor
 
@@ -198,7 +198,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.storage.state.get_state_for_event(
                 e5.event_id,
                 state_filter=StateFilter(
-                    types=frozendict(
+                    types=immutabledict(
                         {EventTypes.Member: frozenset({self.u_alice.to_string()})}
                     ),
                     include_others=True,
@@ -220,7 +220,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.storage.state.get_state_for_event(
                 e5.event_id,
                 state_filter=StateFilter(
-                    types=frozendict({EventTypes.Member: frozenset()}),
+                    types=immutabledict({EventTypes.Member: frozenset()}),
                     include_others=True,
                 ),
             )
@@ -246,7 +246,8 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset()}), include_others=True
+                types=immutabledict({EventTypes.Member: frozenset()}),
+                include_others=True,
             ),
         )
 
@@ -263,7 +264,8 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset()}), include_others=True
+                types=immutabledict({EventTypes.Member: frozenset()}),
+                include_others=True,
             ),
         )
 
@@ -276,7 +278,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: None}), include_others=True
+                types=immutabledict({EventTypes.Member: None}), include_others=True
             ),
         )
 
@@ -293,7 +295,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: None}), include_others=True
+                types=immutabledict({EventTypes.Member: None}), include_others=True
             ),
         )
 
@@ -313,7 +315,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset({e5.state_key})}),
+                types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}),
                 include_others=True,
             ),
         )
@@ -331,7 +333,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset({e5.state_key})}),
+                types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}),
                 include_others=True,
             ),
         )
@@ -345,7 +347,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset({e5.state_key})}),
+                types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}),
                 include_others=False,
             ),
         )
@@ -396,7 +398,8 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset()}), include_others=True
+                types=immutabledict({EventTypes.Member: frozenset()}),
+                include_others=True,
             ),
         )
 
@@ -408,7 +411,8 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset()}), include_others=True
+                types=immutabledict({EventTypes.Member: frozenset()}),
+                include_others=True,
             ),
         )
 
@@ -421,7 +425,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: None}), include_others=True
+                types=immutabledict({EventTypes.Member: None}), include_others=True
             ),
         )
 
@@ -432,7 +436,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: None}), include_others=True
+                types=immutabledict({EventTypes.Member: None}), include_others=True
             ),
         )
 
@@ -451,7 +455,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset({e5.state_key})}),
+                types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}),
                 include_others=True,
             ),
         )
@@ -463,7 +467,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset({e5.state_key})}),
+                types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}),
                 include_others=True,
             ),
         )
@@ -477,7 +481,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset({e5.state_key})}),
+                types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}),
                 include_others=False,
             ),
         )
@@ -489,7 +493,7 @@ class StateStoreTestCase(HomeserverTestCase):
             self.state_datastore._state_group_members_cache,
             group,
             state_filter=StateFilter(
-                types=frozendict({EventTypes.Member: frozenset({e5.state_key})}),
+                types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}),
                 include_others=False,
             ),
         )
diff --git a/tests/types/test_state.py b/tests/types/test_state.py
index eb809f9fb7..1d89582c44 100644
--- a/tests/types/test_state.py
+++ b/tests/types/test_state.py
@@ -1,4 +1,4 @@
-from frozendict import frozendict
+from immutabledict import immutabledict
 
 from synapse.api.constants import EventTypes
 from synapse.types.state import StateFilter
@@ -172,7 +172,7 @@ class StateFilterDifferenceTestCase(TestCase):
                 },
                 include_others=False,
             ),
-            StateFilter(types=frozendict(), include_others=True),
+            StateFilter(types=immutabledict(), include_others=True),
         )
 
         # (wildcard on state keys) - (no state keys)
@@ -188,7 +188,7 @@ class StateFilterDifferenceTestCase(TestCase):
                 include_others=False,
             ),
             StateFilter(
-                types=frozendict(),
+                types=immutabledict(),
                 include_others=True,
             ),
         )
@@ -279,7 +279,7 @@ class StateFilterDifferenceTestCase(TestCase):
                 {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
                 include_others=True,
             ),
-            StateFilter(types=frozendict(), include_others=False),
+            StateFilter(types=immutabledict(), include_others=False),
         )
 
         # (wildcard on state keys) - (specific state keys)
@@ -332,7 +332,7 @@ class StateFilterDifferenceTestCase(TestCase):
                 include_others=True,
             ),
             StateFilter(
-                types=frozendict(),
+                types=immutabledict(),
                 include_others=False,
             ),
         )
@@ -403,7 +403,7 @@ class StateFilterDifferenceTestCase(TestCase):
                 {EventTypes.Member: None, EventTypes.CanonicalAlias: None},
                 include_others=True,
             ),
-            StateFilter(types=frozendict(), include_others=False),
+            StateFilter(types=immutabledict(), include_others=False),
         )
 
         # (wildcard on state keys) - (specific state keys)
@@ -450,7 +450,7 @@ class StateFilterDifferenceTestCase(TestCase):
                 include_others=True,
             ),
             StateFilter(
-                types=frozendict(),
+                types=immutabledict(),
                 include_others=False,
             ),
         )
-- 
cgit 1.5.1


From 98fd558382226b347d78e5f6e6782c6e74c25e69 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 23 Mar 2023 12:11:14 +0000
Subject: Add a primitive helper script for listing worker endpoints. (#15243)

Co-authored-by: Patrick Cloke <patrickc@matrix.org>
---
 changelog.d/15243.feature                         |   1 +
 synapse/_scripts/generate_workers_map.py          | 302 ++++++++++++++++++++++
 synapse/federation/transport/server/__init__.py   |   2 +
 synapse/federation/transport/server/federation.py |  27 ++
 synapse/rest/client/_base.py                      |  23 +-
 synapse/rest/client/account.py                    |   4 +
 synapse/rest/client/account_data.py               |   2 +
 synapse/rest/client/devices.py                    |   2 +
 synapse/rest/client/events.py                     |   2 +
 synapse/rest/client/filter.py                     |   2 +
 synapse/rest/client/initial_sync.py               |   1 +
 synapse/rest/client/keys.py                       |   4 +
 synapse/rest/client/knock.py                      |   1 +
 synapse/rest/client/login.py                      |   4 +
 synapse/rest/client/presence.py                   |   1 +
 synapse/rest/client/profile.py                    |   3 +
 synapse/rest/client/push_rule.py                  |   3 +
 synapse/rest/client/read_marker.py                |   1 +
 synapse/rest/client/receipts.py                   |   1 +
 synapse/rest/client/register.py                   |   2 +
 synapse/rest/client/relations.py                  |   2 +
 synapse/rest/client/room.py                       |  33 ++-
 synapse/rest/client/room_batch.py                 |   1 +
 synapse/rest/client/room_keys.py                  |   3 +
 synapse/rest/client/sendtodevice.py               |   1 +
 synapse/rest/client/sync.py                       |   1 +
 synapse/rest/client/tags.py                       |   2 +
 synapse/rest/client/user_directory.py             |   1 +
 synapse/rest/client/versions.py                   |   1 +
 synapse/rest/client/voip.py                       |   1 +
 synapse/rest/key/v2/remote_key_resource.py        |   2 +
 31 files changed, 424 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15243.feature
 create mode 100755 synapse/_scripts/generate_workers_map.py

(limited to 'synapse')

diff --git a/changelog.d/15243.feature b/changelog.d/15243.feature
new file mode 100644
index 0000000000..c45e974c4c
--- /dev/null
+++ b/changelog.d/15243.feature
@@ -0,0 +1 @@
+Add a primitive helper script for listing worker endpoints.
\ No newline at end of file
diff --git a/synapse/_scripts/generate_workers_map.py b/synapse/_scripts/generate_workers_map.py
new file mode 100755
index 0000000000..6c08878523
--- /dev/null
+++ b/synapse/_scripts/generate_workers_map.py
@@ -0,0 +1,302 @@
+#!/usr/bin/env python
+# Copyright 2022-2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Dict, Iterable, Optional, Pattern, Set, Tuple
+
+import yaml
+
+from synapse.config.homeserver import HomeServerConfig
+from synapse.federation.transport.server import (
+    TransportLayerServer,
+    register_servlets as register_federation_servlets,
+)
+from synapse.http.server import HttpServer, ServletCallback
+from synapse.rest import ClientRestResource
+from synapse.rest.key.v2 import RemoteKey
+from synapse.server import HomeServer
+from synapse.storage import DataStore
+
+logger = logging.getLogger("generate_workers_map")
+
+
+class MockHomeserver(HomeServer):
+    DATASTORE_CLASS = DataStore  # type: ignore
+
+    def __init__(self, config: HomeServerConfig, worker_app: Optional[str]) -> None:
+        super().__init__(config.server.server_name, config=config)
+        self.config.worker.worker_app = worker_app
+
+
+GROUP_PATTERN = re.compile(r"\(\?P<[^>]+?>(.+?)\)")
+
+
+@dataclass
+class EndpointDescription:
+    """
+    Describes an endpoint and how it should be routed.
+    """
+
+    # The servlet class that handles this endpoint
+    servlet_class: object
+
+    # The category of this endpoint. Is read from the `CATEGORY` constant in the servlet
+    # class.
+    category: Optional[str]
+
+    # TODO:
+    #  - does it need to be routed based on a stream writer config?
+    #  - does it benefit from any optimised, but optional, routing?
+    #  - what 'opinionated synapse worker class' (event_creator, synchrotron, etc) does
+    #    it go in?
+
+
+class EnumerationResource(HttpServer):
+    """
+    Accepts servlet registrations for the purposes of building up a description of
+    all endpoints.
+    """
+
+    def __init__(self, is_worker: bool) -> None:
+        self.registrations: Dict[Tuple[str, str], EndpointDescription] = {}
+        self._is_worker = is_worker
+
+    def register_paths(
+        self,
+        method: str,
+        path_patterns: Iterable[Pattern],
+        callback: ServletCallback,
+        servlet_classname: str,
+    ) -> None:
+        # federation servlet callbacks are wrapped, so unwrap them.
+        callback = getattr(callback, "__wrapped__", callback)
+
+        # fish out the servlet class
+        servlet_class = callback.__self__.__class__  # type: ignore
+
+        if self._is_worker and method in getattr(
+            servlet_class, "WORKERS_DENIED_METHODS", ()
+        ):
+            # This endpoint would cause an error if called on a worker, so pretend it
+            # was never registered!
+            return
+
+        sd = EndpointDescription(
+            servlet_class=servlet_class,
+            category=getattr(servlet_class, "CATEGORY", None),
+        )
+
+        for pat in path_patterns:
+            self.registrations[(method, pat.pattern)] = sd
+
+
+def get_registered_paths_for_hs(
+    hs: HomeServer,
+) -> Dict[Tuple[str, str], EndpointDescription]:
+    """
+    Given a homeserver, get all registered endpoints and their descriptions.
+    """
+
+    enumerator = EnumerationResource(is_worker=hs.config.worker.worker_app is not None)
+    ClientRestResource.register_servlets(enumerator, hs)
+    federation_server = TransportLayerServer(hs)
+
+    # we can't use `federation_server.register_servlets` but this line does the
+    # same thing, only it uses this enumerator
+    register_federation_servlets(
+        federation_server.hs,
+        resource=enumerator,
+        ratelimiter=federation_server.ratelimiter,
+        authenticator=federation_server.authenticator,
+        servlet_groups=federation_server.servlet_groups,
+    )
+
+    # the key server endpoints are separate again
+    RemoteKey(hs).register(enumerator)
+
+    return enumerator.registrations
+
+
+def get_registered_paths_for_default(
+    worker_app: Optional[str], base_config: HomeServerConfig
+) -> Dict[Tuple[str, str], EndpointDescription]:
+    """
+    Given the name of a worker application and a base homeserver configuration,
+    returns:
+
+        Dict from (method, path) to EndpointDescription
+
+    TODO Don't require passing in a config
+    """
+
+    hs = MockHomeserver(base_config, worker_app)
+    # TODO We only do this to avoid an error, but don't need the database etc
+    hs.setup()
+    return get_registered_paths_for_hs(hs)
+
+
+def elide_http_methods_if_unconflicting(
+    registrations: Dict[Tuple[str, str], EndpointDescription],
+    all_possible_registrations: Dict[Tuple[str, str], EndpointDescription],
+) -> Dict[Tuple[str, str], EndpointDescription]:
+    """
+    Elides HTTP methods (by replacing them with `*`) if all possible registered methods
+    can be handled by the worker whose registration map is `registrations`.
+
+    i.e. the only endpoints left with methods (other than `*`) should be the ones where
+    the worker can't handle all possible methods for that path.
+    """
+
+    def paths_to_methods_dict(
+        methods_and_paths: Iterable[Tuple[str, str]]
+    ) -> Dict[str, Set[str]]:
+        """
+        Given (method, path) pairs, produces a dict from path to set of methods
+        available at that path.
+        """
+        result: Dict[str, Set[str]] = {}
+        for method, path in methods_and_paths:
+            result.setdefault(path, set()).add(method)
+        return result
+
+    all_possible_reg_methods = paths_to_methods_dict(all_possible_registrations)
+    reg_methods = paths_to_methods_dict(registrations)
+
+    output = {}
+
+    for path, handleable_methods in reg_methods.items():
+        if handleable_methods == all_possible_reg_methods[path]:
+            any_method = next(iter(handleable_methods))
+            # TODO This assumes that all methods have the same servlet.
+            #      I suppose that's possibly dubious?
+            output[("*", path)] = registrations[(any_method, path)]
+        else:
+            for method in handleable_methods:
+                output[(method, path)] = registrations[(method, path)]
+
+    return output
+
+
+def simplify_path_regexes(
+    registrations: Dict[Tuple[str, str], EndpointDescription]
+) -> Dict[Tuple[str, str], EndpointDescription]:
+    """
+    Simplify all the path regexes for the dict of endpoint descriptions,
+    so that we don't use the Python-specific regex extensions
+    (and also to remove needlessly specific detail).
+    """
+
+    def simplify_path_regex(path: str) -> str:
+        """
+        Given a regex pattern, replaces all named capturing groups (e.g. `(?P<blah>xyz)`)
+        with a simpler version available in more common regex dialects (e.g. `.*`).
+        """
+
+        # TODO it's hard to choose between these two;
+        #      `.*` is a vague simplification
+        # return GROUP_PATTERN.sub(r"\1", path)
+        return GROUP_PATTERN.sub(r".*", path)
+
+    return {(m, simplify_path_regex(p)): v for (m, p), v in registrations.items()}
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Updates a synapse database to the latest schema and optionally runs background updates"
+            " on it."
+        )
+    )
+    parser.add_argument("-v", action="store_true")
+    parser.add_argument(
+        "--config-path",
+        type=argparse.FileType("r"),
+        required=True,
+        help="Synapse configuration file",
+    )
+
+    args = parser.parse_args()
+
+    # TODO
+    # logging.basicConfig(**logging_config)
+
+    # Load, process and sanity-check the config.
+    hs_config = yaml.safe_load(args.config_path)
+
+    config = HomeServerConfig()
+    config.parse_config_dict(hs_config, "", "")
+
+    master_paths = get_registered_paths_for_default(None, config)
+    worker_paths = get_registered_paths_for_default(
+        "synapse.app.generic_worker", config
+    )
+
+    all_paths = {**master_paths, **worker_paths}
+
+    elided_worker_paths = elide_http_methods_if_unconflicting(worker_paths, all_paths)
+    elide_http_methods_if_unconflicting(master_paths, all_paths)
+
+    # TODO SSO endpoints (pick_idp etc) NOT REGISTERED BY THIS SCRIPT
+
+    categories_to_methods_and_paths: Dict[
+        Optional[str], Dict[Tuple[str, str], EndpointDescription]
+    ] = defaultdict(dict)
+
+    for (method, path), desc in elided_worker_paths.items():
+        categories_to_methods_and_paths[desc.category][method, path] = desc
+
+    for category, contents in categories_to_methods_and_paths.items():
+        print_category(category, contents)
+
+
+def print_category(
+    category_name: Optional[str],
+    elided_worker_paths: Dict[Tuple[str, str], EndpointDescription],
+) -> None:
+    """
+    Prints out a category, in documentation page style.
+
+    Example:
+    ```
+    # Category name
+    /path/xyz
+
+    GET /path/abc
+    ```
+    """
+
+    if category_name:
+        print(f"# {category_name}")
+    else:
+        print("# (Uncategorised requests)")
+
+    for ln in sorted(
+        p for m, p in simplify_path_regexes(elided_worker_paths) if m == "*"
+    ):
+        print(ln)
+    print()
+    for ln in sorted(
+        f"{m:6} {p}" for m, p in simplify_path_regexes(elided_worker_paths) if m != "*"
+    ):
+        print(ln)
+    print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py
index 2725f53cf6..753372fc54 100644
--- a/synapse/federation/transport/server/__init__.py
+++ b/synapse/federation/transport/server/__init__.py
@@ -108,6 +108,7 @@ class PublicRoomList(BaseFederationServlet):
     """
 
     PATH = "/publicRooms"
+    CATEGORY = "Federation requests"
 
     def __init__(
         self,
@@ -212,6 +213,7 @@ class OpenIdUserInfo(BaseFederationServlet):
     """
 
     PATH = "/openid/userinfo"
+    CATEGORY = "Federation requests"
 
     REQUIRE_AUTH = False
 
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index f7ca87adc4..ec5b5eeafa 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -70,6 +70,7 @@ class BaseFederationServerServlet(BaseFederationServlet):
 
 class FederationSendServlet(BaseFederationServerServlet):
     PATH = "/send/(?P<transaction_id>[^/]*)/?"
+    CATEGORY = "Inbound federation transaction request"
 
     # We ratelimit manually in the handler as we queue up the requests and we
     # don't want to fill up the ratelimiter with blocked requests.
@@ -138,6 +139,7 @@ class FederationSendServlet(BaseFederationServerServlet):
 
 class FederationEventServlet(BaseFederationServerServlet):
     PATH = "/event/(?P<event_id>[^/]*)/?"
+    CATEGORY = "Federation requests"
 
     # This is when someone asks for a data item for a given server data_id pair.
     async def on_GET(
@@ -152,6 +154,7 @@ class FederationEventServlet(BaseFederationServerServlet):
 
 class FederationStateV1Servlet(BaseFederationServerServlet):
     PATH = "/state/(?P<room_id>[^/]*)/?"
+    CATEGORY = "Federation requests"
 
     # This is when someone asks for all data for a given room.
     async def on_GET(
@@ -170,6 +173,7 @@ class FederationStateV1Servlet(BaseFederationServerServlet):
 
 class FederationStateIdsServlet(BaseFederationServerServlet):
     PATH = "/state_ids/(?P<room_id>[^/]*)/?"
+    CATEGORY = "Federation requests"
 
     async def on_GET(
         self,
@@ -187,6 +191,7 @@ class FederationStateIdsServlet(BaseFederationServerServlet):
 
 class FederationBackfillServlet(BaseFederationServerServlet):
     PATH = "/backfill/(?P<room_id>[^/]*)/?"
+    CATEGORY = "Federation requests"
 
     async def on_GET(
         self,
@@ -225,6 +230,7 @@ class FederationTimestampLookupServlet(BaseFederationServerServlet):
     """
 
     PATH = "/timestamp_to_event/(?P<room_id>[^/]*)/?"
+    CATEGORY = "Federation requests"
 
     async def on_GET(
         self,
@@ -246,6 +252,7 @@ class FederationTimestampLookupServlet(BaseFederationServerServlet):
 
 class FederationQueryServlet(BaseFederationServerServlet):
     PATH = "/query/(?P<query_type>[^/]*)"
+    CATEGORY = "Federation requests"
 
     # This is when we receive a server-server Query
     async def on_GET(
@@ -262,6 +269,7 @@ class FederationQueryServlet(BaseFederationServerServlet):
 
 class FederationMakeJoinServlet(BaseFederationServerServlet):
     PATH = "/make_join/(?P<room_id>[^/]*)/(?P<user_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_GET(
         self,
@@ -297,6 +305,7 @@ class FederationMakeJoinServlet(BaseFederationServerServlet):
 
 class FederationMakeLeaveServlet(BaseFederationServerServlet):
     PATH = "/make_leave/(?P<room_id>[^/]*)/(?P<user_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_GET(
         self,
@@ -312,6 +321,7 @@ class FederationMakeLeaveServlet(BaseFederationServerServlet):
 
 class FederationV1SendLeaveServlet(BaseFederationServerServlet):
     PATH = "/send_leave/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_PUT(
         self,
@@ -327,6 +337,7 @@ class FederationV1SendLeaveServlet(BaseFederationServerServlet):
 
 class FederationV2SendLeaveServlet(BaseFederationServerServlet):
     PATH = "/send_leave/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     PREFIX = FEDERATION_V2_PREFIX
 
@@ -344,6 +355,7 @@ class FederationV2SendLeaveServlet(BaseFederationServerServlet):
 
 class FederationMakeKnockServlet(BaseFederationServerServlet):
     PATH = "/make_knock/(?P<room_id>[^/]*)/(?P<user_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_GET(
         self,
@@ -366,6 +378,7 @@ class FederationMakeKnockServlet(BaseFederationServerServlet):
 
 class FederationV1SendKnockServlet(BaseFederationServerServlet):
     PATH = "/send_knock/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_PUT(
         self,
@@ -381,6 +394,7 @@ class FederationV1SendKnockServlet(BaseFederationServerServlet):
 
 class FederationEventAuthServlet(BaseFederationServerServlet):
     PATH = "/event_auth/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_GET(
         self,
@@ -395,6 +409,7 @@ class FederationEventAuthServlet(BaseFederationServerServlet):
 
 class FederationV1SendJoinServlet(BaseFederationServerServlet):
     PATH = "/send_join/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_PUT(
         self,
@@ -412,6 +427,7 @@ class FederationV1SendJoinServlet(BaseFederationServerServlet):
 
 class FederationV2SendJoinServlet(BaseFederationServerServlet):
     PATH = "/send_join/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     PREFIX = FEDERATION_V2_PREFIX
 
@@ -455,6 +471,7 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet):
 
 class FederationV1InviteServlet(BaseFederationServerServlet):
     PATH = "/invite/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_PUT(
         self,
@@ -479,6 +496,7 @@ class FederationV1InviteServlet(BaseFederationServerServlet):
 
 class FederationV2InviteServlet(BaseFederationServerServlet):
     PATH = "/invite/(?P<room_id>[^/]*)/(?P<event_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     PREFIX = FEDERATION_V2_PREFIX
 
@@ -515,6 +533,7 @@ class FederationV2InviteServlet(BaseFederationServerServlet):
 
 class FederationThirdPartyInviteExchangeServlet(BaseFederationServerServlet):
     PATH = "/exchange_third_party_invite/(?P<room_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_PUT(
         self,
@@ -529,6 +548,7 @@ class FederationThirdPartyInviteExchangeServlet(BaseFederationServerServlet):
 
 class FederationClientKeysQueryServlet(BaseFederationServerServlet):
     PATH = "/user/keys/query"
+    CATEGORY = "Federation requests"
 
     async def on_POST(
         self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]]
@@ -538,6 +558,7 @@ class FederationClientKeysQueryServlet(BaseFederationServerServlet):
 
 class FederationUserDevicesQueryServlet(BaseFederationServerServlet):
     PATH = "/user/devices/(?P<user_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_GET(
         self,
@@ -551,6 +572,7 @@ class FederationUserDevicesQueryServlet(BaseFederationServerServlet):
 
 class FederationClientKeysClaimServlet(BaseFederationServerServlet):
     PATH = "/user/keys/claim"
+    CATEGORY = "Federation requests"
 
     async def on_POST(
         self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]]
@@ -561,6 +583,7 @@ class FederationClientKeysClaimServlet(BaseFederationServerServlet):
 
 class FederationGetMissingEventsServlet(BaseFederationServerServlet):
     PATH = "/get_missing_events/(?P<room_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     async def on_POST(
         self,
@@ -586,6 +609,7 @@ class FederationGetMissingEventsServlet(BaseFederationServerServlet):
 
 class On3pidBindServlet(BaseFederationServerServlet):
     PATH = "/3pid/onbind"
+    CATEGORY = "Federation requests"
 
     REQUIRE_AUTH = False
 
@@ -618,6 +642,7 @@ class On3pidBindServlet(BaseFederationServerServlet):
 
 class FederationVersionServlet(BaseFederationServlet):
     PATH = "/version"
+    CATEGORY = "Federation requests"
 
     REQUIRE_AUTH = False
 
@@ -640,6 +665,7 @@ class FederationVersionServlet(BaseFederationServlet):
 
 class FederationRoomHierarchyServlet(BaseFederationServlet):
     PATH = "/hierarchy/(?P<room_id>[^/]*)"
+    CATEGORY = "Federation requests"
 
     def __init__(
         self,
@@ -672,6 +698,7 @@ class RoomComplexityServlet(BaseFederationServlet):
 
     PATH = "/rooms/(?P<room_id>[^/]*)/complexity"
     PREFIX = FEDERATION_UNSTABLE_PREFIX
+    CATEGORY = "Federation requests (unstable)"
 
     def __init__(
         self,
diff --git a/synapse/rest/client/_base.py b/synapse/rest/client/_base.py
index b4cb90cb76..5c1c19e1f3 100644
--- a/synapse/rest/client/_base.py
+++ b/synapse/rest/client/_base.py
@@ -43,19 +43,22 @@ def client_patterns(
     Returns:
         An iterable of patterns.
     """
-    patterns = []
+    versions = []
 
-    if unstable:
-        unstable_prefix = CLIENT_API_PREFIX + "/unstable"
-        patterns.append(re.compile("^" + unstable_prefix + path_regex))
     if v1:
-        v1_prefix = CLIENT_API_PREFIX + "/api/v1"
-        patterns.append(re.compile("^" + v1_prefix + path_regex))
-    for release in releases:
-        new_prefix = CLIENT_API_PREFIX + f"/{release}"
-        patterns.append(re.compile("^" + new_prefix + path_regex))
+        versions.append("api/v1")
+    versions.extend(releases)
+    if unstable:
+        versions.append("unstable")
+
+    if len(versions) == 1:
+        versions_str = versions[0]
+    elif len(versions) > 1:
+        versions_str = "(" + "|".join(versions) + ")"
+    else:
+        raise RuntimeError("Must have at least one version for a URL")
 
-    return patterns
+    return [re.compile("^" + CLIENT_API_PREFIX + "/" + versions_str + path_regex)]
 
 
 def set_timeline_upper_limit(filter_json: JsonDict, filter_timeline_limit: int) -> None:
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 484d7440a4..3d0c55daa0 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -576,6 +576,9 @@ class AddThreepidMsisdnSubmitTokenServlet(RestServlet):
 
 class ThreepidRestServlet(RestServlet):
     PATTERNS = client_patterns("/account/3pid$")
+    # This is used as a proxy for all the 3pid endpoints.
+
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -834,6 +837,7 @@ def assert_valid_next_link(hs: "HomeServer", next_link: str) -> None:
 
 class WhoamiRestServlet(RestServlet):
     PATTERNS = client_patterns("/account/whoami$")
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py
index e805196fec..43193ad086 100644
--- a/synapse/rest/client/account_data.py
+++ b/synapse/rest/client/account_data.py
@@ -38,6 +38,7 @@ class AccountDataServlet(RestServlet):
     PATTERNS = client_patterns(
         "/user/(?P<user_id>[^/]*)/account_data/(?P<account_data_type>[^/]*)"
     )
+    CATEGORY = "Account data requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -136,6 +137,7 @@ class RoomAccountDataServlet(RestServlet):
         "/rooms/(?P<room_id>[^/]*)"
         "/account_data/(?P<account_data_type>[^/]*)"
     )
+    CATEGORY = "Account data requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index dab4a77f7e..e97d0bf475 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -40,6 +40,7 @@ logger = logging.getLogger(__name__)
 
 class DevicesRestServlet(RestServlet):
     PATTERNS = client_patterns("/devices$")
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -123,6 +124,7 @@ class DeleteDevicesRestServlet(RestServlet):
 
 class DeviceRestServlet(RestServlet):
     PATTERNS = client_patterns("/devices/(?P<device_id>[^/]*)$")
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/events.py b/synapse/rest/client/events.py
index 694d77d287..3eca4fe21f 100644
--- a/synapse/rest/client/events.py
+++ b/synapse/rest/client/events.py
@@ -33,6 +33,7 @@ logger = logging.getLogger(__name__)
 
 class EventStreamRestServlet(RestServlet):
     PATTERNS = client_patterns("/events$", v1=True)
+    CATEGORY = "Sync requests"
 
     DEFAULT_LONGPOLL_TIME_MS = 30000
 
@@ -76,6 +77,7 @@ class EventStreamRestServlet(RestServlet):
 
 class EventRestServlet(RestServlet):
     PATTERNS = client_patterns("/events/(?P<event_id>[^/]*)$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/filter.py b/synapse/rest/client/filter.py
index 236199897c..ab7d8c9419 100644
--- a/synapse/rest/client/filter.py
+++ b/synapse/rest/client/filter.py
@@ -31,6 +31,7 @@ logger = logging.getLogger(__name__)
 
 class GetFilterRestServlet(RestServlet):
     PATTERNS = client_patterns("/user/(?P<user_id>[^/]*)/filter/(?P<filter_id>[^/]*)")
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -69,6 +70,7 @@ class GetFilterRestServlet(RestServlet):
 
 class CreateFilterRestServlet(RestServlet):
     PATTERNS = client_patterns("/user/(?P<user_id>[^/]*)/filter")
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/initial_sync.py b/synapse/rest/client/initial_sync.py
index 9b1bb8b521..046a4364f2 100644
--- a/synapse/rest/client/initial_sync.py
+++ b/synapse/rest/client/initial_sync.py
@@ -28,6 +28,7 @@ if TYPE_CHECKING:
 # TODO: Needs unit testing
 class InitialSyncRestServlet(RestServlet):
     PATTERNS = client_patterns("/initialSync$", v1=True)
+    CATEGORY = "Sync requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index 32bb8b9a91..6209b79b01 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -89,6 +89,7 @@ class KeyUploadServlet(RestServlet):
     """
 
     PATTERNS = client_patterns("/keys/upload(/(?P<device_id>[^/]+))?$")
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -182,6 +183,7 @@ class KeyQueryServlet(RestServlet):
     """
 
     PATTERNS = client_patterns("/keys/query$")
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -225,6 +227,7 @@ class KeyChangesServlet(RestServlet):
     """
 
     PATTERNS = client_patterns("/keys/changes$")
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -274,6 +277,7 @@ class OneTimeKeyServlet(RestServlet):
     """
 
     PATTERNS = client_patterns("/keys/claim$")
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/knock.py b/synapse/rest/client/knock.py
index 4fa66904ba..0dc7960872 100644
--- a/synapse/rest/client/knock.py
+++ b/synapse/rest/client/knock.py
@@ -40,6 +40,7 @@ class KnockRoomAliasServlet(RestServlet):
     """
 
     PATTERNS = client_patterns("/knock/(?P<room_identifier>[^/]*)")
+    CATEGORY = "Event sending requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index 8adced41e5..b7e9c8f6b5 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -72,6 +72,8 @@ class LoginResponse(TypedDict, total=False):
 
 class LoginRestServlet(RestServlet):
     PATTERNS = client_patterns("/login$", v1=True)
+    CATEGORY = "Registration/login requests"
+
     CAS_TYPE = "m.login.cas"
     SSO_TYPE = "m.login.sso"
     TOKEN_TYPE = "m.login.token"
@@ -537,6 +539,7 @@ def _get_auth_flow_dict_for_idp(idp: SsoIdentityProvider) -> JsonDict:
 
 class RefreshTokenServlet(RestServlet):
     PATTERNS = client_patterns("/refresh$")
+    CATEGORY = "Registration/login requests"
 
     def __init__(self, hs: "HomeServer"):
         self._auth_handler = hs.get_auth_handler()
@@ -590,6 +593,7 @@ class SsoRedirectServlet(RestServlet):
             + "/(r0|v3)/login/sso/redirect/(?P<idp_id>[A-Za-z0-9_.~-]+)$"
         )
     ]
+    CATEGORY = "SSO requests needed for all SSO providers"
 
     def __init__(self, hs: "HomeServer"):
         # make sure that the relevant handlers are instantiated, so that they
diff --git a/synapse/rest/client/presence.py b/synapse/rest/client/presence.py
index 94dd4fe2f4..8e193330f8 100644
--- a/synapse/rest/client/presence.py
+++ b/synapse/rest/client/presence.py
@@ -33,6 +33,7 @@ logger = logging.getLogger(__name__)
 
 class PresenceStatusRestServlet(RestServlet):
     PATTERNS = client_patterns("/presence/(?P<user_id>[^/]*)/status", v1=True)
+    CATEGORY = "Presence requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/profile.py b/synapse/rest/client/profile.py
index e69fa0829d..493e1acea0 100644
--- a/synapse/rest/client/profile.py
+++ b/synapse/rest/client/profile.py
@@ -29,6 +29,7 @@ if TYPE_CHECKING:
 
 class ProfileDisplaynameRestServlet(RestServlet):
     PATTERNS = client_patterns("/profile/(?P<user_id>[^/]*)/displayname", v1=True)
+    CATEGORY = "Event sending requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -86,6 +87,7 @@ class ProfileDisplaynameRestServlet(RestServlet):
 
 class ProfileAvatarURLRestServlet(RestServlet):
     PATTERNS = client_patterns("/profile/(?P<user_id>[^/]*)/avatar_url", v1=True)
+    CATEGORY = "Event sending requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -142,6 +144,7 @@ class ProfileAvatarURLRestServlet(RestServlet):
 
 class ProfileRestServlet(RestServlet):
     PATTERNS = client_patterns("/profile/(?P<user_id>[^/]*)", v1=True)
+    CATEGORY = "Event sending requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/push_rule.py b/synapse/rest/client/push_rule.py
index ad5c10c99d..1147b6f8ec 100644
--- a/synapse/rest/client/push_rule.py
+++ b/synapse/rest/client/push_rule.py
@@ -44,6 +44,9 @@ class PushRuleRestServlet(RestServlet):
         "Unrecognised request: You probably wanted a trailing slash"
     )
 
+    WORKERS_DENIED_METHODS = ["PUT", "DELETE"]
+    CATEGORY = "Push rule requests"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.auth = hs.get_auth()
diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py
index 852838515c..4f96e51eeb 100644
--- a/synapse/rest/client/read_marker.py
+++ b/synapse/rest/client/read_marker.py
@@ -31,6 +31,7 @@ logger = logging.getLogger(__name__)
 
 class ReadMarkerRestServlet(RestServlet):
     PATTERNS = client_patterns("/rooms/(?P<room_id>[^/]*)/read_markers$")
+    CATEGORY = "Receipts requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 28b7d30ea8..316e7b9982 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -36,6 +36,7 @@ class ReceiptRestServlet(RestServlet):
         "/receipt/(?P<receipt_type>[^/]*)"
         "/(?P<event_id>[^/]*)$"
     )
+    CATEGORY = "Receipts requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 4adb5271d2..7f84a17e29 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -367,6 +367,7 @@ class RegistrationTokenValidityRestServlet(RestServlet):
         f"/register/{LoginType.REGISTRATION_TOKEN}/validity",
         releases=("v1",),
     )
+    CATEGORY = "Registration/login requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -395,6 +396,7 @@ class RegistrationTokenValidityRestServlet(RestServlet):
 
 class RegisterRestServlet(RestServlet):
     PATTERNS = client_patterns("/register$")
+    CATEGORY = "Registration/login requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index 7456d6f507..b8b296bc0c 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -42,6 +42,7 @@ class RelationPaginationServlet(RestServlet):
         "(/(?P<relation_type>[^/]*)(/(?P<event_type>[^/]*))?)?$",
         releases=("v1",),
     )
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -84,6 +85,7 @@ class RelationPaginationServlet(RestServlet):
 
 class ThreadsServlet(RestServlet):
     PATTERNS = (re.compile("^/_matrix/client/v1/rooms/(?P<room_id>[^/]*)/threads"),)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 129b6fe6b0..c0705d4291 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -140,7 +140,7 @@ class TransactionRestServlet(RestServlet):
 
 
 class RoomCreateRestServlet(TransactionRestServlet):
-    # No PATTERN; we have custom dispatch rules here
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
@@ -180,6 +180,8 @@ class RoomCreateRestServlet(TransactionRestServlet):
 
 # TODO: Needs unit testing for generic events
 class RoomStateEventRestServlet(RestServlet):
+    CATEGORY = "Event sending requests"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.event_creation_handler = hs.get_event_creation_handler()
@@ -323,6 +325,8 @@ class RoomStateEventRestServlet(RestServlet):
 
 # TODO: Needs unit testing for generic events + feedback
 class RoomSendEventRestServlet(TransactionRestServlet):
+    CATEGORY = "Event sending requests"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         self.event_creation_handler = hs.get_event_creation_handler()
@@ -398,6 +402,8 @@ class RoomSendEventRestServlet(TransactionRestServlet):
 
 # TODO: Needs unit testing for room ID + alias joins
 class JoinRoomAliasServlet(ResolveRoomIdMixin, TransactionRestServlet):
+    CATEGORY = "Event sending requests"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         super(ResolveRoomIdMixin, self).__init__(hs)  # ensure the Mixin is set up
@@ -460,6 +466,7 @@ class JoinRoomAliasServlet(ResolveRoomIdMixin, TransactionRestServlet):
 # TODO: Needs unit testing
 class PublicRoomListRestServlet(RestServlet):
     PATTERNS = client_patterns("/publicRooms$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -578,6 +585,7 @@ class PublicRoomListRestServlet(RestServlet):
 # TODO: Needs unit testing
 class RoomMemberListRestServlet(RestServlet):
     PATTERNS = client_patterns("/rooms/(?P<room_id>[^/]*)/members$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -633,6 +641,7 @@ class RoomMemberListRestServlet(RestServlet):
 # except it does custom AS logic and has a simpler return format
 class JoinedRoomMemberListRestServlet(RestServlet):
     PATTERNS = client_patterns("/rooms/(?P<room_id>[^/]*)/joined_members$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -654,6 +663,10 @@ class JoinedRoomMemberListRestServlet(RestServlet):
 # TODO: Needs better unit testing
 class RoomMessageListRestServlet(RestServlet):
     PATTERNS = client_patterns("/rooms/(?P<room_id>[^/]*)/messages$", v1=True)
+    # TODO The routing information should be exposed programatically.
+    #      I want to do this but for now I felt bad about leaving this without
+    #      at least a visible warning on it.
+    CATEGORY = "Client API requests (ALL FOR SAME ROOM MUST GO TO SAME WORKER)"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -720,6 +733,7 @@ class RoomMessageListRestServlet(RestServlet):
 # TODO: Needs unit testing
 class RoomStateRestServlet(RestServlet):
     PATTERNS = client_patterns("/rooms/(?P<room_id>[^/]*)/state$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -742,6 +756,7 @@ class RoomStateRestServlet(RestServlet):
 # TODO: Needs unit testing
 class RoomInitialSyncRestServlet(RestServlet):
     PATTERNS = client_patterns("/rooms/(?P<room_id>[^/]*)/initialSync$", v1=True)
+    CATEGORY = "Sync requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -766,6 +781,7 @@ class RoomEventServlet(RestServlet):
     PATTERNS = client_patterns(
         "/rooms/(?P<room_id>[^/]*)/event/(?P<event_id>[^/]*)$", v1=True
     )
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -858,6 +874,7 @@ class RoomEventContextServlet(RestServlet):
     PATTERNS = client_patterns(
         "/rooms/(?P<room_id>[^/]*)/context/(?P<event_id>[^/]*)$", v1=True
     )
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -958,6 +975,8 @@ class RoomForgetRestServlet(TransactionRestServlet):
 
 # TODO: Needs unit testing
 class RoomMembershipRestServlet(TransactionRestServlet):
+    CATEGORY = "Event sending requests"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         self.room_member_handler = hs.get_room_member_handler()
@@ -1071,6 +1090,8 @@ class RoomMembershipRestServlet(TransactionRestServlet):
 
 
 class RoomRedactEventRestServlet(TransactionRestServlet):
+    CATEGORY = "Event sending requests"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         self.event_creation_handler = hs.get_event_creation_handler()
@@ -1164,6 +1185,7 @@ class RoomTypingRestServlet(RestServlet):
     PATTERNS = client_patterns(
         "/rooms/(?P<room_id>[^/]*)/typing/(?P<user_id>[^/]*)$", v1=True
     )
+    CATEGORY = "The typing stream"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -1195,7 +1217,7 @@ class RoomTypingRestServlet(RestServlet):
         # Limit timeout to stop people from setting silly typing timeouts.
         timeout = min(content.get("timeout", 30000), 120000)
 
-        # Defer getting the typing handler since it will raise on workers.
+        # Defer getting the typing handler since it will raise on WORKER_PATTERNS.
         typing_handler = self.hs.get_typing_writer_handler()
 
         try:
@@ -1224,6 +1246,7 @@ class RoomAliasListServlet(RestServlet):
             r"/rooms/(?P<room_id>[^/]*)/aliases"
         ),
     ] + list(client_patterns("/rooms/(?P<room_id>[^/]*)/aliases$", unstable=False))
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -1244,6 +1267,7 @@ class RoomAliasListServlet(RestServlet):
 
 class SearchRestServlet(RestServlet):
     PATTERNS = client_patterns("/search$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -1263,6 +1287,7 @@ class SearchRestServlet(RestServlet):
 
 class JoinedRoomsRestServlet(RestServlet):
     PATTERNS = client_patterns("/joined_rooms$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -1334,6 +1359,7 @@ class TimestampLookupRestServlet(RestServlet):
     PATTERNS = (
         re.compile("^/_matrix/client/v1/rooms/(?P<room_id>[^/]*)/timestamp_to_event$"),
     )
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -1365,6 +1391,8 @@ class TimestampLookupRestServlet(RestServlet):
 
 class RoomHierarchyRestServlet(RestServlet):
     PATTERNS = (re.compile("^/_matrix/client/v1/rooms/(?P<room_id>[^/]*)/hierarchy$"),)
+    WORKERS = PATTERNS
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -1405,6 +1433,7 @@ class RoomSummaryRestServlet(ResolveRoomIdMixin, RestServlet):
             "/rooms/(?P<room_identifier>[^/]*)/summary$"
         ),
     )
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py
index ef284ecc11..69f85112d8 100644
--- a/synapse/rest/client/room_batch.py
+++ b/synapse/rest/client/room_batch.py
@@ -69,6 +69,7 @@ class RoomBatchSendEventRestServlet(RestServlet):
             "/rooms/(?P<room_id>[^/]*)/batch_send$"
         ),
     )
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/room_keys.py b/synapse/rest/client/room_keys.py
index 4e7ffdb555..aad54f8c54 100644
--- a/synapse/rest/client/room_keys.py
+++ b/synapse/rest/client/room_keys.py
@@ -37,6 +37,7 @@ class RoomKeysServlet(RestServlet):
     PATTERNS = client_patterns(
         "/room_keys/keys(/(?P<room_id>[^/]+))?(/(?P<session_id>[^/]+))?$"
     )
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -253,6 +254,7 @@ class RoomKeysServlet(RestServlet):
 
 class RoomKeysNewVersionServlet(RestServlet):
     PATTERNS = client_patterns("/room_keys/version$")
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -328,6 +330,7 @@ class RoomKeysNewVersionServlet(RestServlet):
 
 class RoomKeysVersionServlet(RestServlet):
     PATTERNS = client_patterns("/room_keys/version/(?P<version>[^/]+)$")
+    CATEGORY = "Encryption requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/sendtodevice.py b/synapse/rest/client/sendtodevice.py
index 110af6df47..7dfa3a2596 100644
--- a/synapse/rest/client/sendtodevice.py
+++ b/synapse/rest/client/sendtodevice.py
@@ -35,6 +35,7 @@ class SendToDeviceRestServlet(servlet.RestServlet):
     PATTERNS = client_patterns(
         "/sendToDevice/(?P<message_type>[^/]*)/(?P<txn_id>[^/]*)$"
     )
+    CATEGORY = "The to_device stream"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index e578b26fa3..03b0578945 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -87,6 +87,7 @@ class SyncRestServlet(RestServlet):
 
     PATTERNS = client_patterns("/sync$")
     ALLOWED_PRESENCE = {"online", "offline", "unavailable"}
+    CATEGORY = "Sync requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/tags.py b/synapse/rest/client/tags.py
index dde08417a4..94bd51fe8f 100644
--- a/synapse/rest/client/tags.py
+++ b/synapse/rest/client/tags.py
@@ -37,6 +37,7 @@ class TagListServlet(RestServlet):
     PATTERNS = client_patterns(
         "/user/(?P<user_id>[^/]*)/rooms/(?P<room_id>[^/]*)/tags$"
     )
+    CATEGORY = "Account data requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
@@ -64,6 +65,7 @@ class TagServlet(RestServlet):
     PATTERNS = client_patterns(
         "/user/(?P<user_id>[^/]*)/rooms/(?P<room_id>[^/]*)/tags/(?P<tag>[^/]*)"
     )
+    CATEGORY = "Account data requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/user_directory.py b/synapse/rest/client/user_directory.py
index 4670fad608..5136497c77 100644
--- a/synapse/rest/client/user_directory.py
+++ b/synapse/rest/client/user_directory.py
@@ -31,6 +31,7 @@ logger = logging.getLogger(__name__)
 
 class UserDirectorySearchRestServlet(RestServlet):
     PATTERNS = client_patterns("/user_directory/search$")
+    CATEGORY = "User directory search requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index ec171582b3..59aed66464 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -34,6 +34,7 @@ logger = logging.getLogger(__name__)
 
 class VersionsRestServlet(RestServlet):
     PATTERNS = [re.compile("^/_matrix/client/versions$")]
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/client/voip.py b/synapse/rest/client/voip.py
index ea7e025156..133790c97c 100644
--- a/synapse/rest/client/voip.py
+++ b/synapse/rest/client/voip.py
@@ -29,6 +29,7 @@ if TYPE_CHECKING:
 
 class VoipRestServlet(RestServlet):
     PATTERNS = client_patterns("/voip/turnServer$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index 19820886f5..3bdb6ec909 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -93,6 +93,8 @@ class RemoteKey(RestServlet):
     }
     """
 
+    CATEGORY = "Federation requests"
+
     def __init__(self, hs: "HomeServer"):
         self.fetcher = ServerKeyFetcher(hs)
         self.store = hs.get_datastores().main
-- 
cgit 1.5.1


From e6af49fbea939d9e69ed05e0a0ced5948c722ea4 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@fizzadar.com>
Date: Fri, 24 Mar 2023 11:44:01 +0000
Subject: Reintroduce membership tables event stream ordering (#15128)

* Add `event_stream_ordering` column to membership state tables

Specifically this adds the column to `current_state_events`,
`local_current_membership` and `room_memberships`. Each of these tables
is regularly joined with the `events` table to get the stream ordering
and denormalising this into each table will yield significant query
performance improvements once used.

* Make denormalised `event_stream_ordering` columns foreign keys
* Add comment in schema file explaining new denormalised columns
* Add triggers to enforce consistency of `event_stream_ordering` columns
* Re-order purge room tables to account for foreign keys
* Bump schema version to 75

Co-authored-by: David Robertson <david.m.robertson1@gmail.com>
Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
---
 changelog.d/15128.misc                             |  1 +
 synapse/storage/databases/main/events.py           | 23 +++++--
 synapse/storage/databases/main/purge_events.py     |  6 +-
 synapse/storage/schema/__init__.py                 | 14 ++--
 .../01membership_tables_event_stream_ordering.sql  | 20 ++++++
 ...ership_tables_event_stream_ordering_triggers.py | 79 ++++++++++++++++++++++
 6 files changed, 131 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15128.misc
 create mode 100644 synapse/storage/schema/main/delta/74/01membership_tables_event_stream_ordering.sql
 create mode 100644 synapse/storage/schema/main/delta/74/02membership_tables_event_stream_ordering_triggers.py

(limited to 'synapse')

diff --git a/changelog.d/15128.misc b/changelog.d/15128.misc
new file mode 100644
index 0000000000..c09911e48d
--- /dev/null
+++ b/changelog.d/15128.misc
@@ -0,0 +1 @@
+Add denormalised event stream ordering column to membership state tables for future use. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index a8a4ed4436..193959b250 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1126,11 +1126,15 @@ class PersistEventsStore:
                 # been inserted into room_memberships.
                 txn.execute_batch(
                     """INSERT INTO current_state_events
-                        (room_id, type, state_key, event_id, membership)
-                    VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
+                        (room_id, type, state_key, event_id, membership, event_stream_ordering)
+                    VALUES (
+                        ?, ?, ?, ?,
+                        (SELECT membership FROM room_memberships WHERE event_id = ?),
+                        (SELECT stream_ordering FROM events WHERE event_id = ?)
+                    )
                     """,
                     [
-                        (room_id, key[0], key[1], ev_id, ev_id)
+                        (room_id, key[0], key[1], ev_id, ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                     ],
                 )
@@ -1157,11 +1161,15 @@ class PersistEventsStore:
             if to_insert:
                 txn.execute_batch(
                     """INSERT INTO local_current_membership
-                        (room_id, user_id, event_id, membership)
-                    VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
+                        (room_id, user_id, event_id, membership, event_stream_ordering)
+                    VALUES (
+                        ?, ?, ?,
+                        (SELECT membership FROM room_memberships WHERE event_id = ?),
+                        (SELECT stream_ordering FROM events WHERE event_id = ?)
+                    )
                     """,
                     [
-                        (room_id, key[1], ev_id, ev_id)
+                        (room_id, key[1], ev_id, ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                         if key[0] == EventTypes.Member and self.is_mine_id(key[1])
                     ],
@@ -1769,6 +1777,7 @@ class PersistEventsStore:
             table="room_memberships",
             keys=(
                 "event_id",
+                "event_stream_ordering",
                 "user_id",
                 "sender",
                 "room_id",
@@ -1779,6 +1788,7 @@ class PersistEventsStore:
             values=[
                 (
                     event.event_id,
+                    event.internal_metadata.stream_ordering,
                     event.state_key,
                     event.user_id,
                     event.room_id,
@@ -1811,6 +1821,7 @@ class PersistEventsStore:
                     keyvalues={"room_id": event.room_id, "user_id": event.state_key},
                     values={
                         "event_id": event.event_id,
+                        "event_stream_ordering": event.internal_metadata.stream_ordering,
                         "membership": event.membership,
                     },
                 )
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 7a7c0d9c75..efbd3e75d9 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -428,14 +428,16 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "partial_state_events",
             "partial_state_rooms_servers",
             "partial_state_rooms",
+            # Note: the _membership(s) tables have foreign keys to the `events` table
+            # so must be deleted first.
+            "local_current_membership",
+            "room_memberships",
             "events",
             "federation_inbound_events_staging",
-            "local_current_membership",
             "receipts_graph",
             "receipts_linearized",
             "room_aliases",
             "room_depth",
-            "room_memberships",
             "room_stats_state",
             "room_stats_current",
             "room_stats_earliest_token",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index d3103a6c7a..a28f2b997c 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 74  # remember to update the list below when updating
+SCHEMA_VERSION = 75  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -91,13 +91,19 @@ Changes in SCHEMA_VERSION = 74:
     - A query on `event_stream_ordering` column has now been disambiguated (i.e. the
       codebase can handle the `current_state_events`, `local_current_memberships` and
       `room_memberships` tables having an `event_stream_ordering` column).
+
+Changes in SCHEMA_VERSION = 75:
+    - The `event_stream_ordering` column in membership tables (`current_state_events`,
+      `local_current_membership` & `room_memberships`) is now being populated for new
+      rows. When the background job to populate historical rows lands this will
+      become the compat schema version.
 """
 
 
 SCHEMA_COMPAT_VERSION = (
-    # The threads_id column must exist for event_push_actions, event_push_summary,
-    # receipts_linearized, and receipts_graph.
-    73
+    # Queries against `event_stream_ordering` columns in membership tables must
+    # be disambiguated.
+    74
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/74/01membership_tables_event_stream_ordering.sql b/synapse/storage/schema/main/delta/74/01membership_tables_event_stream_ordering.sql
new file mode 100644
index 0000000000..e2608f3a2e
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/01membership_tables_event_stream_ordering.sql
@@ -0,0 +1,20 @@
+/* Copyright 2022 Beeper
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Each of these are denormalised copies of `stream_ordering` from the corresponding row in` events` which
+-- we use to improve database performance by reduring JOINs.
+ALTER TABLE current_state_events ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
+ALTER TABLE local_current_membership ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
+ALTER TABLE room_memberships ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
diff --git a/synapse/storage/schema/main/delta/74/02membership_tables_event_stream_ordering_triggers.py b/synapse/storage/schema/main/delta/74/02membership_tables_event_stream_ordering_triggers.py
new file mode 100644
index 0000000000..e32e9083b3
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/02membership_tables_event_stream_ordering_triggers.py
@@ -0,0 +1,79 @@
+# Copyright 2022 Beeper
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This migration adds triggers to the room membership tables to enforce consistency.
+Triggers cannot be expressed in .sql files, so we have to use a separate file.
+"""
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
+from synapse.storage.types import Cursor
+
+
+def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+    # Complain if the `event_stream_ordering` in membership tables doesn't match
+    # the `stream_ordering` row with the same `event_id` in `events`.
+    if isinstance(database_engine, Sqlite3Engine):
+        for table in (
+            "current_state_events",
+            "local_current_membership",
+            "room_memberships",
+        ):
+            cur.execute(
+                f"""
+                CREATE TRIGGER IF NOT EXISTS {table}_bad_event_stream_ordering
+                BEFORE INSERT ON {table}
+                FOR EACH ROW
+                BEGIN
+                    SELECT RAISE(ABORT, 'Incorrect event_stream_ordering in {table}')
+                    WHERE EXISTS (
+                        SELECT 1 FROM events
+                        WHERE events.event_id = NEW.event_id
+                           AND events.stream_ordering != NEW.event_stream_ordering
+                    );
+                END;
+                """
+            )
+    elif isinstance(database_engine, PostgresEngine):
+        cur.execute(
+            """
+            CREATE OR REPLACE FUNCTION check_event_stream_ordering() RETURNS trigger AS $BODY$
+            BEGIN
+                IF EXISTS (
+                    SELECT 1 FROM events
+                    WHERE events.event_id = NEW.event_id
+                       AND events.stream_ordering != NEW.event_stream_ordering
+                ) THEN
+                    RAISE EXCEPTION 'Incorrect event_stream_ordering';
+                END IF;
+                RETURN NEW;
+            END;
+            $BODY$ LANGUAGE plpgsql;
+            """
+        )
+
+        for table in (
+            "current_state_events",
+            "local_current_membership",
+            "room_memberships",
+        ):
+            cur.execute(
+                f"""
+                CREATE TRIGGER check_event_stream_ordering BEFORE INSERT OR UPDATE ON {table}
+                FOR EACH ROW
+                EXECUTE PROCEDURE check_event_stream_ordering()
+                """
+            )
+    else:
+        raise NotImplementedError("Unknown database engine")
-- 
cgit 1.5.1


From 68a671731207645f693e4e48676781b9a1acb838 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 24 Mar 2023 08:31:14 -0400
Subject: Reject mentions on the C-S API which are invalid. (#15311)

Invalid mentions data received over the Client-Server API should
be rejected with a 400 error. This will hopefully stop clients from
sending invalid data, although does not help with data received
over federation.
---
 changelog.d/15311.misc                      |  1 +
 synapse/events/validator.py                 | 42 ++++++++++---
 synapse/http/servlet.py                     | 22 +++++--
 tests/push/test_bulk_push_rule_evaluator.py | 94 +++++++++++++++++------------
 4 files changed, 105 insertions(+), 54 deletions(-)
 create mode 100644 changelog.d/15311.misc

(limited to 'synapse')

diff --git a/changelog.d/15311.misc b/changelog.d/15311.misc
new file mode 100644
index 0000000000..ce03cb9523
--- /dev/null
+++ b/changelog.d/15311.misc
@@ -0,0 +1 @@
+Reject events with an invalid "mentions" property pert [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952).
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index 6f0e4386d3..47203209db 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -12,11 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections.abc
-from typing import Iterable, Type, Union, cast
+from typing import Iterable, List, Type, Union, cast
 
 import jsonschema
+from pydantic import Field, StrictBool, StrictStr
 
-from synapse.api.constants import MAX_ALIAS_LENGTH, EventTypes, Membership
+from synapse.api.constants import (
+    MAX_ALIAS_LENGTH,
+    EventContentFields,
+    EventTypes,
+    Membership,
+)
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import EventFormatVersions
 from synapse.config.homeserver import HomeServerConfig
@@ -28,6 +34,8 @@ from synapse.events.utils import (
     validate_canonicaljson,
 )
 from synapse.federation.federation_server import server_matches_acl_event
+from synapse.http.servlet import validate_json_object
+from synapse.rest.models import RequestBodyModel
 from synapse.types import EventID, JsonDict, RoomID, UserID
 
 
@@ -88,27 +96,27 @@ class EventValidator:
                             Codes.INVALID_PARAM,
                         )
 
-        if event.type == EventTypes.Retention:
+        elif event.type == EventTypes.Retention:
             self._validate_retention(event)
 
-        if event.type == EventTypes.ServerACL:
+        elif event.type == EventTypes.ServerACL:
             if not server_matches_acl_event(config.server.server_name, event):
                 raise SynapseError(
                     400, "Can't create an ACL event that denies the local server"
                 )
 
-        if event.type == EventTypes.PowerLevels:
+        elif event.type == EventTypes.PowerLevels:
             try:
                 jsonschema.validate(
                     instance=event.content,
                     schema=POWER_LEVELS_SCHEMA,
-                    cls=plValidator,
+                    cls=POWER_LEVELS_VALIDATOR,
                 )
             except jsonschema.ValidationError as e:
                 if e.path:
                     # example: "users_default": '0' is not of type 'integer'
                     # cast safety: path entries can be integers, if we fail to validate
-                    # items in an array. However the POWER_LEVELS_SCHEMA doesn't expect
+                    # items in an array. However, the POWER_LEVELS_SCHEMA doesn't expect
                     # to see any arrays.
                     message = (
                         '"' + cast(str, e.path[-1]) + '": ' + e.message  # noqa: B306
@@ -125,6 +133,15 @@ class EventValidator:
                     errcode=Codes.BAD_JSON,
                 )
 
+        # If the event contains a mentions key, validate it.
+        if (
+            EventContentFields.MSC3952_MENTIONS in event.content
+            and config.experimental.msc3952_intentional_mentions
+        ):
+            validate_json_object(
+                event.content[EventContentFields.MSC3952_MENTIONS], Mentions
+            )
+
     def _validate_retention(self, event: EventBase) -> None:
         """Checks that an event that defines the retention policy for a room respects the
         format enforced by the spec.
@@ -253,10 +270,15 @@ POWER_LEVELS_SCHEMA = {
 }
 
 
+class Mentions(RequestBodyModel):
+    user_ids: List[StrictStr] = Field(default_factory=list)
+    room: StrictBool = False
+
+
 # This could return something newer than Draft 7, but that's the current "latest"
 # validator.
-def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]:
-    validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA)
+def _create_validator(schema: JsonDict) -> Type[jsonschema.Draft7Validator]:
+    validator = jsonschema.validators.validator_for(schema)
 
     # by default jsonschema does not consider a immutabledict to be an object so
     # we need to use a custom type checker
@@ -268,4 +290,4 @@ def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]:
     return jsonschema.validators.extend(validator, type_checker=type_checker)
 
 
-plValidator = _create_power_level_validator()
+POWER_LEVELS_VALIDATOR = _create_validator(POWER_LEVELS_SCHEMA)
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index 0070bd2940..fc62793628 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -778,17 +778,13 @@ def parse_json_object_from_request(
 Model = TypeVar("Model", bound=BaseModel)
 
 
-def parse_and_validate_json_object_from_request(
-    request: Request, model_type: Type[Model]
-) -> Model:
-    """Parse a JSON object from the body of a twisted HTTP request, then deserialise and
-    validate using the given pydantic model.
+def validate_json_object(content: JsonDict, model_type: Type[Model]) -> Model:
+    """Validate a deserialized JSON object using the given pydantic model.
 
     Raises:
         SynapseError if the request body couldn't be decoded as JSON or
             if it wasn't a JSON object.
     """
-    content = parse_json_object_from_request(request, allow_empty_body=False)
     try:
         instance = model_type.parse_obj(content)
     except ValidationError as e:
@@ -811,6 +807,20 @@ def parse_and_validate_json_object_from_request(
     return instance
 
 
+def parse_and_validate_json_object_from_request(
+    request: Request, model_type: Type[Model]
+) -> Model:
+    """Parse a JSON object from the body of a twisted HTTP request, then deserialise and
+    validate using the given pydantic model.
+
+    Raises:
+        SynapseError if the request body couldn't be decoded as JSON or
+            if it wasn't a JSON object.
+    """
+    content = parse_json_object_from_request(request, allow_empty_body=False)
+    return validate_json_object(content, model_type)
+
+
 def assert_params_in_dict(body: JsonDict, required: Iterable[str]) -> None:
     absent = []
     for k in required:
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 46df0102f7..9501096a77 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -243,22 +243,28 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         )
 
         # Non-dict mentions should be ignored.
-        mentions: Any
-        for mentions in (None, True, False, 1, "foo", []):
-            self.assertFalse(
-                self._create_and_process(
-                    bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: mentions}
+        #
+        # Avoid C-S validation as these aren't expected.
+        with patch(
+            "synapse.events.validator.EventValidator.validate_new",
+            new=lambda s, event, config: True,
+        ):
+            mentions: Any
+            for mentions in (None, True, False, 1, "foo", []):
+                self.assertFalse(
+                    self._create_and_process(
+                        bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: mentions}
+                    )
                 )
-            )
 
-        # A non-list should be ignored.
-        for mentions in (None, True, False, 1, "foo", {}):
-            self.assertFalse(
-                self._create_and_process(
-                    bulk_evaluator,
-                    {EventContentFields.MSC3952_MENTIONS: {"user_ids": mentions}},
+            # A non-list should be ignored.
+            for mentions in (None, True, False, 1, "foo", {}):
+                self.assertFalse(
+                    self._create_and_process(
+                        bulk_evaluator,
+                        {EventContentFields.MSC3952_MENTIONS: {"user_ids": mentions}},
+                    )
                 )
-            )
 
         # The Matrix ID appearing anywhere in the list should notify.
         self.assertTrue(
@@ -291,26 +297,32 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         )
 
         # Invalid entries in the list are ignored.
-        self.assertFalse(
-            self._create_and_process(
-                bulk_evaluator,
-                {
-                    EventContentFields.MSC3952_MENTIONS: {
-                        "user_ids": [None, True, False, {}, []]
-                    }
-                },
+        #
+        # Avoid C-S validation as these aren't expected.
+        with patch(
+            "synapse.events.validator.EventValidator.validate_new",
+            new=lambda s, event, config: True,
+        ):
+            self.assertFalse(
+                self._create_and_process(
+                    bulk_evaluator,
+                    {
+                        EventContentFields.MSC3952_MENTIONS: {
+                            "user_ids": [None, True, False, {}, []]
+                        }
+                    },
+                )
             )
-        )
-        self.assertTrue(
-            self._create_and_process(
-                bulk_evaluator,
-                {
-                    EventContentFields.MSC3952_MENTIONS: {
-                        "user_ids": [None, True, False, {}, [], self.alice]
-                    }
-                },
+            self.assertTrue(
+                self._create_and_process(
+                    bulk_evaluator,
+                    {
+                        EventContentFields.MSC3952_MENTIONS: {
+                            "user_ids": [None, True, False, {}, [], self.alice]
+                        }
+                    },
+                )
             )
-        )
 
         # The legacy push rule should not mention if the mentions field exists.
         self.assertFalse(
@@ -351,14 +363,20 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         )
 
         # Invalid data should not notify.
-        mentions: Any
-        for mentions in (None, False, 1, "foo", [], {}):
-            self.assertFalse(
-                self._create_and_process(
-                    bulk_evaluator,
-                    {EventContentFields.MSC3952_MENTIONS: {"room": mentions}},
+        #
+        # Avoid C-S validation as these aren't expected.
+        with patch(
+            "synapse.events.validator.EventValidator.validate_new",
+            new=lambda s, event, config: True,
+        ):
+            mentions: Any
+            for mentions in (None, False, 1, "foo", [], {}):
+                self.assertFalse(
+                    self._create_and_process(
+                        bulk_evaluator,
+                        {EventContentFields.MSC3952_MENTIONS: {"room": mentions}},
+                    )
                 )
-            )
 
         # The legacy push rule should not mention if the mentions field exists.
         self.assertFalse(
-- 
cgit 1.5.1


From 5b70f240cf70b390db7e74ab614ace108fc08d70 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 24 Mar 2023 16:09:39 +0100
Subject: Make cleaning up pushers depend on the device_id instead of the
 token_id (#15280)

This makes it so that we rely on the `device_id` to delete pushers on logout,
instead of relying on the `access_token_id`. This ensures we're not removing
pushers on token refresh, and prepares for a world without access token IDs
(also known as the OIDC).

This actually runs the `set_device_id_for_pushers` background update, which
was forgotten in #13831.

Note that for backwards compatibility it still deletes pushers based on the
`access_token` until the background update finishes.
---
 changelog.d/15280.misc                             |  1 +
 synapse/_scripts/synapse_port_db.py                |  6 ++-
 synapse/handlers/auth.py                           |  8 ++-
 synapse/handlers/device.py                         |  2 +
 synapse/handlers/register.py                       |  4 +-
 synapse/push/__init__.py                           |  7 ++-
 synapse/push/pusherpool.py                         | 58 ++++++++++++++++------
 synapse/rest/admin/users.py                        |  1 -
 synapse/rest/client/pusher.py                      |  1 -
 synapse/storage/databases/main/pusher.py           | 40 +++++++++++----
 .../74/02_set_device_id_for_pushers_bg_update.sql  | 19 +++++++
 tests/push/test_email.py                           |  6 +--
 tests/push/test_http.py                            | 46 ++++++++---------
 tests/replication/test_pusher_shard.py             |  4 +-
 tests/rest/admin/test_user.py                      |  4 +-
 15 files changed, 142 insertions(+), 65 deletions(-)
 create mode 100644 changelog.d/15280.misc
 create mode 100644 synapse/storage/schema/main/delta/74/02_set_device_id_for_pushers_bg_update.sql

(limited to 'synapse')

diff --git a/changelog.d/15280.misc b/changelog.d/15280.misc
new file mode 100644
index 0000000000..41d56b0cf0
--- /dev/null
+++ b/changelog.d/15280.misc
@@ -0,0 +1 @@
+Make the pushers rely on the `device_id` instead of the `access_token_id` for various operations.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 94b86c1d6f..1dcb397ba4 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -68,7 +68,10 @@ from synapse.storage.databases.main.media_repository import (
     MediaRepositoryBackgroundUpdateStore,
 )
 from synapse.storage.databases.main.presence import PresenceBackgroundUpdateStore
-from synapse.storage.databases.main.pusher import PusherWorkerStore
+from synapse.storage.databases.main.pusher import (
+    PusherBackgroundUpdatesStore,
+    PusherWorkerStore,
+)
 from synapse.storage.databases.main.receipts import ReceiptsBackgroundUpdateStore
 from synapse.storage.databases.main.registration import (
     RegistrationBackgroundUpdateStore,
@@ -226,6 +229,7 @@ class Store(
     AccountDataWorkerStore,
     PushRuleStore,
     PusherWorkerStore,
+    PusherBackgroundUpdatesStore,
     PresenceBackgroundUpdateStore,
     ReceiptsBackgroundUpdateStore,
     RelationsWorkerStore,
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 308e38edea..1e89447044 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -1504,8 +1504,10 @@ class AuthHandler:
         )
 
         # delete pushers associated with this access token
+        # XXX(quenting): This is only needed until the 'set_device_id_for_pushers'
+        # background update completes.
         if token.token_id is not None:
-            await self.hs.get_pusherpool().remove_pushers_by_access_token(
+            await self.hs.get_pusherpool().remove_pushers_by_access_tokens(
                 token.user_id, (token.token_id,)
             )
 
@@ -1535,7 +1537,9 @@ class AuthHandler:
             )
 
         # delete pushers associated with the access tokens
-        await self.hs.get_pusherpool().remove_pushers_by_access_token(
+        # XXX(quenting): This is only needed until the 'set_device_id_for_pushers'
+        # background update completes.
+        await self.hs.get_pusherpool().remove_pushers_by_access_tokens(
             user_id, (token_id for _, token_id, _ in tokens_and_devices)
         )
 
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 6f7963df43..9ded6389ac 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -503,6 +503,8 @@ class DeviceHandler(DeviceWorkerHandler):
             else:
                 raise
 
+        await self.hs.get_pusherpool().remove_pushers_by_devices(user_id, device_ids)
+
         # Delete data specific to each device. Not optimised as it is not
         # considered as part of a critical path.
         for device_id in device_ids:
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 6b110dcb6e..c8bf2439af 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -1013,11 +1013,11 @@ class RegistrationHandler:
             user_tuple = await self.store.get_user_by_access_token(token)
             # The token better still exist.
             assert user_tuple
-            token_id = user_tuple.token_id
+            device_id = user_tuple.device_id
 
             await self.pusher_pool.add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="email",
                 app_id="m.email",
                 app_display_name="Email Notifications",
diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py
index a0c760239d..9e3a98741a 100644
--- a/synapse/push/__init__.py
+++ b/synapse/push/__init__.py
@@ -103,7 +103,7 @@ class PusherConfig:
 
     id: Optional[str]
     user_name: str
-    access_token: Optional[int]
+
     profile_tag: str
     kind: str
     app_id: str
@@ -119,6 +119,11 @@ class PusherConfig:
     enabled: bool
     device_id: Optional[str]
 
+    # XXX(quenting): The access_token is not persisted anymore for new pushers, but we
+    # keep it when reading from the database, so that we don't get stale pushers
+    # while the "set_device_id_for_pushers" background update is running.
+    access_token: Optional[int]
+
     def as_dict(self) -> Dict[str, Any]:
         """Information that can be retrieved about a pusher after creation."""
         return {
diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py
index e2648cbc93..6517e3566f 100644
--- a/synapse/push/pusherpool.py
+++ b/synapse/push/pusherpool.py
@@ -25,7 +25,7 @@ from synapse.metrics.background_process_metrics import (
 from synapse.push import Pusher, PusherConfig, PusherConfigException
 from synapse.push.pusher import PusherFactory
 from synapse.replication.http.push import ReplicationRemovePusherRestServlet
-from synapse.types import JsonDict, RoomStreamToken
+from synapse.types import JsonDict, RoomStreamToken, StrCollection
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.threepids import canonicalise_email
 
@@ -97,7 +97,6 @@ class PusherPool:
     async def add_or_update_pusher(
         self,
         user_id: str,
-        access_token: Optional[int],
         kind: str,
         app_id: str,
         app_display_name: str,
@@ -128,6 +127,22 @@ class PusherPool:
         # stream ordering, so it will process pushes from this point onwards.
         last_stream_ordering = self.store.get_room_max_stream_ordering()
 
+        # Before we actually persist the pusher, we check if the user already has one
+        # for this app ID and pushkey. If so, we want to keep the access token and
+        # device ID in place, since this could be one device modifying
+        # (e.g. enabling/disabling) another device's pusher.
+        # XXX(quenting): Even though we're not persisting the access_token_id for new
+        # pushers anymore, we still need to copy existing access_token_ids over when
+        # updating a pusher, in case the "set_device_id_for_pushers" background update
+        # hasn't run yet.
+        access_token_id = None
+        existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey(
+            user_id, app_id, pushkey
+        )
+        if existing_config:
+            device_id = existing_config.device_id
+            access_token_id = existing_config.access_token
+
         # we try to create the pusher just to validate the config: it
         # will then get pulled out of the database,
         # recreated, added and started: this means we have only one
@@ -136,7 +151,6 @@ class PusherPool:
             PusherConfig(
                 id=None,
                 user_name=user_id,
-                access_token=access_token,
                 profile_tag=profile_tag,
                 kind=kind,
                 app_id=app_id,
@@ -151,23 +165,12 @@ class PusherPool:
                 failing_since=None,
                 enabled=enabled,
                 device_id=device_id,
+                access_token=access_token_id,
             )
         )
 
-        # Before we actually persist the pusher, we check if the user already has one
-        # this app ID and pushkey. If so, we want to keep the access token and device ID
-        # in place, since this could be one device modifying (e.g. enabling/disabling)
-        # another device's pusher.
-        existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey(
-            user_id, app_id, pushkey
-        )
-        if existing_config:
-            access_token = existing_config.access_token
-            device_id = existing_config.device_id
-
         await self.store.add_pusher(
             user_id=user_id,
-            access_token=access_token,
             kind=kind,
             app_id=app_id,
             app_display_name=app_display_name,
@@ -180,6 +183,7 @@ class PusherPool:
             profile_tag=profile_tag,
             enabled=enabled,
             device_id=device_id,
+            access_token_id=access_token_id,
         )
         pusher = await self.process_pusher_change_by_id(app_id, pushkey, user_id)
 
@@ -199,7 +203,7 @@ class PusherPool:
                 )
                 await self.remove_pusher(p.app_id, p.pushkey, p.user_name)
 
-    async def remove_pushers_by_access_token(
+    async def remove_pushers_by_access_tokens(
         self, user_id: str, access_tokens: Iterable[int]
     ) -> None:
         """Remove the pushers for a given user corresponding to a set of
@@ -209,6 +213,8 @@ class PusherPool:
             user_id: user to remove pushers for
             access_tokens: access token *ids* to remove pushers for
         """
+        # XXX(quenting): This is only needed until the "set_device_id_for_pushers"
+        # background update finishes
         tokens = set(access_tokens)
         for p in await self.store.get_pushers_by_user_id(user_id):
             if p.access_token in tokens:
@@ -220,6 +226,26 @@ class PusherPool:
                 )
                 await self.remove_pusher(p.app_id, p.pushkey, p.user_name)
 
+    async def remove_pushers_by_devices(
+        self, user_id: str, devices: StrCollection
+    ) -> None:
+        """Remove the pushers for a given user corresponding to a set of devices
+
+        Args:
+            user_id: user to remove pushers for
+            devices: device IDs to remove pushers for
+        """
+        device_ids = set(devices)
+        for p in await self.store.get_pushers_by_user_id(user_id):
+            if p.device_id in device_ids:
+                logger.info(
+                    "Removing pusher for app id %s, pushkey %s, user %s",
+                    p.app_id,
+                    p.pushkey,
+                    p.user_name,
+                )
+                await self.remove_pusher(p.app_id, p.pushkey, p.user_name)
+
     def on_new_notifications(self, max_token: RoomStreamToken) -> None:
         if not self.pushers:
             # nothing to do here.
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 281e8fd0ad..331f225116 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -425,7 +425,6 @@ class UserRestServletV2(RestServlet):
                     ):
                         await self.pusher_pool.add_or_update_pusher(
                             user_id=user_id,
-                            access_token=None,
                             kind="email",
                             app_id="m.email",
                             app_display_name="Email Notifications",
diff --git a/synapse/rest/client/pusher.py b/synapse/rest/client/pusher.py
index 975eef2144..1a8f5292ac 100644
--- a/synapse/rest/client/pusher.py
+++ b/synapse/rest/client/pusher.py
@@ -126,7 +126,6 @@ class PushersSetRestServlet(RestServlet):
         try:
             await self.pusher_pool.add_or_update_pusher(
                 user_id=user.to_string(),
-                access_token=requester.access_token_id,
                 kind=content["kind"],
                 app_id=content["app_id"],
                 app_display_name=content["app_display_name"],
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index 9a24f7a655..ab76b754e0 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -509,19 +509,24 @@ class PusherBackgroundUpdatesStore(SQLBaseStore):
     async def _set_device_id_for_pushers(
         self, progress: JsonDict, batch_size: int
     ) -> int:
-        """Background update to populate the device_id column of the pushers table."""
+        """
+        Background update to populate the device_id column and clear the access_token
+        column for the pushers table.
+        """
         last_pusher_id = progress.get("pusher_id", 0)
 
         def set_device_id_for_pushers_txn(txn: LoggingTransaction) -> int:
             txn.execute(
                 """
-                    SELECT p.id, at.device_id
+                    SELECT 
+                        p.id AS pusher_id,
+                        p.device_id AS pusher_device_id,
+                        at.device_id AS token_device_id
                     FROM pushers AS p
-                    INNER JOIN access_tokens AS at
+                    LEFT JOIN access_tokens AS at
                         ON p.access_token = at.id
                     WHERE
                         p.access_token IS NOT NULL
-                        AND at.device_id IS NOT NULL
                         AND p.id > ?
                     ORDER BY p.id
                     LIMIT ?
@@ -533,13 +538,27 @@ class PusherBackgroundUpdatesStore(SQLBaseStore):
             if len(rows) == 0:
                 return 0
 
+            # The reason we're clearing the access_token column here is a bit subtle.
+            # When a user logs out, we:
+            #  (1) delete the access token
+            #  (2) delete the device
+            #
+            # Ideally, we would delete the pushers only via its link to the device
+            # during (2), but since this background update might not have fully run yet,
+            # we're still deleting the pushers via the access token during (1).
             self.db_pool.simple_update_many_txn(
                 txn=txn,
                 table="pushers",
                 key_names=("id",),
-                key_values=[(row["id"],) for row in rows],
-                value_names=("device_id",),
-                value_values=[(row["device_id"],) for row in rows],
+                key_values=[(row["pusher_id"],) for row in rows],
+                value_names=("device_id", "access_token"),
+                # If there was already a device_id on the pusher, we only want to clear
+                # the access_token column, so we keep the existing device_id. Otherwise,
+                # we set the device_id we got from joining the access_tokens table.
+                value_values=[
+                    (row["pusher_device_id"] or row["token_device_id"], None)
+                    for row in rows
+                ],
             )
 
             self.db_pool.updates._background_update_progress_txn(
@@ -568,7 +587,6 @@ class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore):
     async def add_pusher(
         self,
         user_id: str,
-        access_token: Optional[int],
         kind: str,
         app_id: str,
         app_display_name: str,
@@ -581,13 +599,13 @@ class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore):
         profile_tag: str = "",
         enabled: bool = True,
         device_id: Optional[str] = None,
+        access_token_id: Optional[int] = None,
     ) -> None:
         async with self._pushers_id_gen.get_next() as stream_id:
             await self.db_pool.simple_upsert(
                 table="pushers",
                 keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
                 values={
-                    "access_token": access_token,
                     "kind": kind,
                     "app_display_name": app_display_name,
                     "device_display_name": device_display_name,
@@ -599,6 +617,10 @@ class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore):
                     "id": stream_id,
                     "enabled": enabled,
                     "device_id": device_id,
+                    # XXX(quenting): We're only really persisting the access token ID
+                    # when updating an existing pusher. This is in case the
+                    # 'set_device_id_for_pushers' background update hasn't finished yet.
+                    "access_token": access_token_id,
                 },
                 desc="add_pusher",
             )
diff --git a/synapse/storage/schema/main/delta/74/02_set_device_id_for_pushers_bg_update.sql b/synapse/storage/schema/main/delta/74/02_set_device_id_for_pushers_bg_update.sql
new file mode 100644
index 0000000000..1367fb6267
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/02_set_device_id_for_pushers_bg_update.sql
@@ -0,0 +1,19 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Triggers the background update to set the device_id for pushers
+-- that don't have one, and clear the access_token column.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+    (7402, 'set_device_id_for_pushers', '{}');
diff --git a/tests/push/test_email.py b/tests/push/test_email.py
index 4ea5472eb4..4b5c96aeae 100644
--- a/tests/push/test_email.py
+++ b/tests/push/test_email.py
@@ -105,7 +105,7 @@ class EmailPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(self.access_token)
         )
         assert user_tuple is not None
-        self.token_id = user_tuple.token_id
+        self.device_id = user_tuple.device_id
 
         # We need to add email to account before we can create a pusher.
         self.get_success(
@@ -117,7 +117,7 @@ class EmailPusherTests(HomeserverTestCase):
         pusher = self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=self.user_id,
-                access_token=self.token_id,
+                device_id=self.device_id,
                 kind="email",
                 app_id="m.email",
                 app_display_name="Email Notifications",
@@ -141,7 +141,7 @@ class EmailPusherTests(HomeserverTestCase):
             self.get_success_or_raise(
                 self.hs.get_pusherpool().add_or_update_pusher(
                     user_id=self.user_id,
-                    access_token=self.token_id,
+                    device_id=self.device_id,
                     kind="email",
                     app_id="m.email",
                     app_display_name="Email Notifications",
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index c280ddcdf6..99cec0836b 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -67,13 +67,13 @@ class HTTPPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         def test_data(data: Any) -> None:
             self.get_failure(
                 self.hs.get_pusherpool().add_or_update_pusher(
                     user_id=user_id,
-                    access_token=token_id,
+                    device_id=device_id,
                     kind="http",
                     app_id="m.http",
                     app_display_name="HTTP Push Notifications",
@@ -114,12 +114,12 @@ class HTTPPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
@@ -235,12 +235,12 @@ class HTTPPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
@@ -356,12 +356,12 @@ class HTTPPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
@@ -443,12 +443,12 @@ class HTTPPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
@@ -521,12 +521,12 @@ class HTTPPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
@@ -628,12 +628,12 @@ class HTTPPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
@@ -764,12 +764,12 @@ class HTTPPusherTests(HomeserverTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
@@ -778,7 +778,6 @@ class HTTPPusherTests(HomeserverTestCase):
                 lang=None,
                 data={"url": "http://example.com/_matrix/push/v1/notify"},
                 enabled=enabled,
-                device_id=user_tuple.device_id,
             )
         )
 
@@ -895,19 +894,17 @@ class HTTPPusherTests(HomeserverTestCase):
 
     def test_update_different_device_access_token_device_id(self) -> None:
         """Tests that if we create a pusher from one device, the update it from another
-        device, the access token and device ID associated with the pusher stays the
-        same.
+        device, the device ID associated with the pusher stays the same.
         """
         # Create a user with a pusher.
         user_id, access_token = self._make_user_with_pusher("user")
 
-        # Get the token ID for the current access token, since that's what we store in
-        # the pushers table. Also get the device ID from it.
+        # Get the device ID for the current access token, since that's what we store in
+        # the pushers table.
         user_tuple = self.get_success(
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
         device_id = user_tuple.device_id
 
         # Generate a new access token, and update the pusher with it.
@@ -920,10 +917,9 @@ class HTTPPusherTests(HomeserverTestCase):
         )
         pushers: List[PusherConfig] = list(ret)
 
-        # Check that we still have one pusher, and that the access token and device ID
-        # associated with it didn't change.
+        # Check that we still have one pusher, and that the device ID associated with
+        # it didn't change.
         self.assertEqual(len(pushers), 1)
-        self.assertEqual(pushers[0].access_token, token_id)
         self.assertEqual(pushers[0].device_id, device_id)
 
     @override_config({"experimental_features": {"msc3881_enabled": True}})
diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py
index 0798b021c3..dcb3e6669b 100644
--- a/tests/replication/test_pusher_shard.py
+++ b/tests/replication/test_pusher_shard.py
@@ -51,12 +51,12 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
             self.hs.get_datastores().main.get_user_by_access_token(access_token)
         )
         assert user_dict is not None
-        token_id = user_dict.token_id
+        device_id = user_dict.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=user_id,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 4b8f889a71..b4241ceaf0 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -3047,12 +3047,12 @@ class PushersRestTestCase(unittest.HomeserverTestCase):
             self.store.get_user_by_access_token(other_user_token)
         )
         assert user_tuple is not None
-        token_id = user_tuple.token_id
+        device_id = user_tuple.device_id
 
         self.get_success(
             self.hs.get_pusherpool().add_or_update_pusher(
                 user_id=self.other_user,
-                access_token=token_id,
+                device_id=device_id,
                 kind="http",
                 app_id="m.http",
                 app_display_name="HTTP Push Notifications",
-- 
cgit 1.5.1


From 5f7c9082805846cc07bfef2d48c6f6cfc9f723e9 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 24 Mar 2023 15:31:12 +0000
Subject: As an optimisation, use `TRUNCATE` on Postgres when clearing the user
 directory tables. (#15316)

---
 changelog.d/15316.misc                           |  1 +
 synapse/storage/databases/main/user_directory.py | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15316.misc

(limited to 'synapse')

diff --git a/changelog.d/15316.misc b/changelog.d/15316.misc
new file mode 100644
index 0000000000..1f408739f0
--- /dev/null
+++ b/changelog.d/15316.misc
@@ -0,0 +1 @@
+As an optimisation, use `TRUNCATE` on Postgres when clearing the user directory tables.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 97f09b73dd..9fced4b997 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -698,10 +698,17 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
         """Delete the entire user directory"""
 
         def _delete_all_from_user_dir_txn(txn: LoggingTransaction) -> None:
-            txn.execute("DELETE FROM user_directory")
-            txn.execute("DELETE FROM user_directory_search")
-            txn.execute("DELETE FROM users_in_public_rooms")
-            txn.execute("DELETE FROM users_who_share_private_rooms")
+            # SQLite doesn't support TRUNCATE.
+            # On Postgres, DELETE FROM does a table scan but TRUNCATE is more efficient.
+            truncate = (
+                "DELETE FROM"
+                if isinstance(self.database_engine, Sqlite3Engine)
+                else "TRUNCATE"
+            )
+            txn.execute(f"{truncate} user_directory")
+            txn.execute(f"{truncate} user_directory_search")
+            txn.execute(f"{truncate} users_in_public_rooms")
+            txn.execute(f"{truncate} users_who_share_private_rooms")
             txn.call_after(self.get_user_in_directory.invalidate_all)
 
         await self.db_pool.runInteraction(
-- 
cgit 1.5.1


From d5324ee111ea56fa466eab7e3974dc4894a64d46 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 24 Mar 2023 16:41:10 +0000
Subject: Add developer documentation for the Federation Sender and add a
 documentation mechanism using Sphinx. (#15265)

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 .ci/scripts/prepare_old_deps.sh       |    7 +-
 .github/workflows/docs.yaml           |   77 +-
 .gitignore                            |    1 +
 changelog.d/15265.misc                |    1 +
 dev-docs/Makefile                     |   20 +
 dev-docs/conf.py                      |   50 ++
 dev-docs/index.rst                    |   22 +
 dev-docs/modules/federation_sender.md |    5 +
 poetry.lock                           | 1480 ++++++++++++++++++++++-----------
 pyproject.toml                        |   12 +
 scripts-dev/lint.sh                   |    1 +
 synapse/federation/sender/__init__.py |  113 +++
 12 files changed, 1280 insertions(+), 509 deletions(-)
 create mode 100644 changelog.d/15265.misc
 create mode 100644 dev-docs/Makefile
 create mode 100644 dev-docs/conf.py
 create mode 100644 dev-docs/index.rst
 create mode 100644 dev-docs/modules/federation_sender.md

(limited to 'synapse')

diff --git a/.ci/scripts/prepare_old_deps.sh b/.ci/scripts/prepare_old_deps.sh
index 3398193ee5..e536a9db8b 100755
--- a/.ci/scripts/prepare_old_deps.sh
+++ b/.ci/scripts/prepare_old_deps.sh
@@ -35,9 +35,9 @@ sed -i \
 # compatible (as far the package metadata declares, anyway); pip's package resolver
 # is more lax.
 #
-# Rather than `poetry install --no-dev`, we drop all dev dependencies from the
-# toml file. This means we don't have to ensure compatibility between old deps and
-# dev tools.
+# Rather than `poetry install --no-dev`, we drop all dev dependencies and the dev-docs
+# group from the toml file. This means we don't have to ensure compatibility between
+# old deps and dev tools.
 
 pip install toml wheel
 
@@ -47,6 +47,7 @@ with open('pyproject.toml', 'r') as f:
     data = toml.loads(f.read())
 
 del data['tool']['poetry']['dev-dependencies']
+del data['tool']['poetry']['group']['dev-docs']
 
 with open('pyproject.toml', 'w') as f:
     toml.dump(data, f)
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 55b4b287f6..20a29e7cbf 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -13,25 +13,10 @@ on:
   workflow_dispatch:
 
 jobs:
-  pages:
-    name: GitHub Pages
+  pre:
+    name: Calculate variables for GitHub Pages deployment
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-
-      - name: Setup mdbook
-        uses: peaceiris/actions-mdbook@adeb05db28a0c0004681db83893d56c0388ea9ea # v1.2.0
-        with:
-          mdbook-version: '0.4.17'
-
-      - name: Build the documentation
-        # mdbook will only create an index.html if we're including docs/README.md in SUMMARY.md.
-        # However, we're using docs/README.md for other purposes and need to pick a new page
-        # as the default. Let's opt for the welcome page instead.
-        run: |
-          mdbook build
-          cp book/welcome_and_overview.html book/index.html
-
       # Figure out the target directory.
       #
       # The target directory depends on the name of the branch
@@ -55,11 +40,65 @@ jobs:
 
           # finally, set the 'branch-version' var.
           echo "branch-version=$branch" >> "$GITHUB_OUTPUT"
-          
+    outputs:
+      branch-version: ${{ steps.vars.outputs.branch-version }}
+
+################################################################################
+  pages-docs:
+    name: GitHub Pages
+    runs-on: ubuntu-latest
+    needs:
+      - pre
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup mdbook
+        uses: peaceiris/actions-mdbook@adeb05db28a0c0004681db83893d56c0388ea9ea # v1.2.0
+        with:
+          mdbook-version: '0.4.17'
+
+      - name: Build the documentation
+        # mdbook will only create an index.html if we're including docs/README.md in SUMMARY.md.
+        # However, we're using docs/README.md for other purposes and need to pick a new page
+        # as the default. Let's opt for the welcome page instead.
+        run: |
+          mdbook build
+          cp book/welcome_and_overview.html book/index.html
+
       # Deploy to the target directory.
       - name: Deploy to gh pages
         uses: peaceiris/actions-gh-pages@bd8c6b06eba6b3d25d72b7a1767993c0aeee42e7 # v3.9.2
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: ./book
-          destination_dir: ./${{ steps.vars.outputs.branch-version }}
+          destination_dir: ./${{ needs.pre.outputs.branch-version }}
+
+################################################################################
+  pages-devdocs:
+    name: GitHub Pages (developer docs)
+    runs-on: ubuntu-latest
+    needs:
+      - pre
+    steps:
+      - uses: action/checkout@v3
+
+      - name: "Set up Sphinx"
+        uses: matrix-org/setup-python-poetry@v1
+        with:
+          python-version: "3.x"
+          poetry-version: "1.3.2"
+          groups: "dev-docs"
+          extras: ""
+
+      - name: Build the documentation
+        run: |
+          cd dev-docs
+          poetry run make html
+
+      # Deploy to the target directory.
+      - name: Deploy to gh pages
+        uses: peaceiris/actions-gh-pages@bd8c6b06eba6b3d25d72b7a1767993c0aeee42e7 # v3.9.2
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: ./dev-docs/_build/html
+          destination_dir: ./dev-docs/${{ needs.pre.outputs.branch-version }}
diff --git a/.gitignore b/.gitignore
index 6937de88bc..96c451258e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,7 @@ __pycache__/
 /coverage.*
 /dist/
 /docs/build/
+/dev-docs/_build/
 /htmlcov
 /pip-wheel-metadata/
 
diff --git a/changelog.d/15265.misc b/changelog.d/15265.misc
new file mode 100644
index 0000000000..355c3cae2b
--- /dev/null
+++ b/changelog.d/15265.misc
@@ -0,0 +1 @@
+Add developer documentation for the Federation Sender and add a documentation mechanism using Sphinx.
\ No newline at end of file
diff --git a/dev-docs/Makefile b/dev-docs/Makefile
new file mode 100644
index 0000000000..d4bb2cbb9e
--- /dev/null
+++ b/dev-docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/dev-docs/conf.py b/dev-docs/conf.py
new file mode 100644
index 0000000000..826d578c0b
--- /dev/null
+++ b/dev-docs/conf.py
@@ -0,0 +1,50 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = "Synapse development"
+copyright = "2023, The Matrix.org Foundation C.I.C."
+author = "The Synapse Maintainers and Community"
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    "autodoc2",
+    "myst_parser",
+]
+
+templates_path = ["_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+
+# -- Options for Autodoc2 ----------------------------------------------------
+
+autodoc2_docstring_parser_regexes = [
+    # this will render all docstrings as 'MyST' Markdown
+    (r".*", "myst"),
+]
+
+autodoc2_packages = [
+    {
+        "path": "../synapse",
+        # Don't render documentation for everything as a matter of course
+        "auto_mode": False,
+    },
+]
+
+
+# -- Options for MyST (Markdown) ---------------------------------------------
+
+# myst_heading_anchors = 2
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "furo"
+html_static_path = ["_static"]
diff --git a/dev-docs/index.rst b/dev-docs/index.rst
new file mode 100644
index 0000000000..1ef210460a
--- /dev/null
+++ b/dev-docs/index.rst
@@ -0,0 +1,22 @@
+.. Synapse Developer Documentation documentation master file, created by
+   sphinx-quickstart on Mon Mar 13 08:59:51 2023.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to the Synapse Developer Documentation!
+===========================================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   modules/federation_sender
+
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/dev-docs/modules/federation_sender.md b/dev-docs/modules/federation_sender.md
new file mode 100644
index 0000000000..dac6852c16
--- /dev/null
+++ b/dev-docs/modules/federation_sender.md
@@ -0,0 +1,5 @@
+Federation Sender
+=================
+
+```{autodoc2-docstring} synapse.federation.sender
+```
diff --git a/poetry.lock b/poetry.lock
index 76fbfafcf9..d42b3a5710 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,5 +1,37 @@
 # This file is automatically @generated by Poetry and should not be changed by hand.
 
+[[package]]
+name = "alabaster"
+version = "0.7.13"
+description = "A configurable sidebar-enabled Sphinx theme"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3"},
+    {file = "alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2"},
+]
+
+[[package]]
+name = "astroid"
+version = "2.15.0"
+description = "An abstract syntax tree for Python with inference support."
+category = "dev"
+optional = false
+python-versions = ">=3.7.2"
+files = [
+    {file = "astroid-2.15.0-py3-none-any.whl", hash = "sha256:e3e4d0ffc2d15d954065579689c36aac57a339a4679a679579af6401db4d3fdb"},
+    {file = "astroid-2.15.0.tar.gz", hash = "sha256:525f126d5dc1b8b0b6ee398b33159105615d92dc4a17f2cd064125d57f6186fa"},
+]
+
+[package.dependencies]
+lazy-object-proxy = ">=1.4.0"
+typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
+wrapt = [
+    {version = ">=1.11,<2", markers = "python_version < \"3.11\""},
+    {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
+]
+
 [[package]]
 name = "attrs"
 version = "22.2.0"
@@ -53,6 +85,21 @@ six = "*"
 [package.extras]
 visualize = ["Twisted (>=16.1.1)", "graphviz (>0.5.1)"]
 
+[[package]]
+name = "babel"
+version = "2.12.1"
+description = "Internationalization utilities"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "Babel-2.12.1-py3-none-any.whl", hash = "sha256:b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610"},
+    {file = "Babel-2.12.1.tar.gz", hash = "sha256:cc2d99999cd01d44420ae725a21c9e3711b3aadc7976d6147f622d8581963455"},
+]
+
+[package.dependencies]
+pytz = {version = ">=2015.7", markers = "python_version < \"3.9\""}
+
 [[package]]
 name = "bcrypt"
 version = "4.0.1"
@@ -88,6 +135,25 @@ files = [
 tests = ["pytest (>=3.2.1,!=3.3.0)"]
 typecheck = ["mypy"]
 
+[[package]]
+name = "beautifulsoup4"
+version = "4.12.0"
+description = "Screen-scraping library"
+category = "dev"
+optional = false
+python-versions = ">=3.6.0"
+files = [
+    {file = "beautifulsoup4-4.12.0-py3-none-any.whl", hash = "sha256:2130a5ad7f513200fae61a17abb5e338ca980fa28c439c0571014bc0217e9591"},
+    {file = "beautifulsoup4-4.12.0.tar.gz", hash = "sha256:c5fceeaec29d09c84970e47c65f2f0efe57872f7cff494c9691a26ec0ff13234"},
+]
+
+[package.dependencies]
+soupsieve = ">1.2"
+
+[package.extras]
+html5lib = ["html5lib"]
+lxml = ["lxml"]
+
 [[package]]
 name = "black"
 version = "23.1.0"
@@ -261,19 +327,89 @@ pycparser = "*"
 
 [[package]]
 name = "charset-normalizer"
-version = "2.0.12"
+version = "3.1.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 category = "main"
 optional = false
-python-versions = ">=3.5.0"
+python-versions = ">=3.7.0"
 files = [
-    {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"},
-    {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
+    {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"},
+    {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"},
+    {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"},
+    {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"},
+    {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"},
+    {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"},
+    {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"},
 ]
 
-[package.extras]
-unicode-backport = ["unicodedata2"]
-
 [[package]]
 name = "click"
 version = "8.1.3"
@@ -306,14 +442,14 @@ click = "*"
 
 [[package]]
 name = "colorama"
-version = "0.4.4"
+version = "0.4.6"
 description = "Cross-platform colored terminal text."
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 files = [
-    {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
-    {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
 
 [[package]]
@@ -345,35 +481,31 @@ files = [
 
 [[package]]
 name = "cryptography"
-version = "39.0.2"
+version = "40.0.0"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "cryptography-39.0.2-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:2725672bb53bb92dc7b4150d233cd4b8c59615cd8288d495eaa86db00d4e5c06"},
-    {file = "cryptography-39.0.2-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:23df8ca3f24699167daf3e23e51f7ba7334d504af63a94af468f468b975b7dd7"},
-    {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:eb40fe69cfc6f5cdab9a5ebd022131ba21453cf7b8a7fd3631f45bbf52bed612"},
-    {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc0521cce2c1d541634b19f3ac661d7a64f9555135e9d8af3980965be717fd4a"},
-    {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffd394c7896ed7821a6d13b24657c6a34b6e2650bd84ae063cf11ccffa4f1a97"},
-    {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:e8a0772016feeb106efd28d4a328e77dc2edae84dfbac06061319fdb669ff828"},
-    {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8f35c17bd4faed2bc7797d2a66cbb4f986242ce2e30340ab832e5d99ae60e011"},
-    {file = "cryptography-39.0.2-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b49a88ff802e1993b7f749b1eeb31134f03c8d5c956e3c125c75558955cda536"},
-    {file = "cryptography-39.0.2-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5f8c682e736513db7d04349b4f6693690170f95aac449c56f97415c6980edef5"},
-    {file = "cryptography-39.0.2-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:d7d84a512a59f4412ca8549b01f94be4161c94efc598bf09d027d67826beddc0"},
-    {file = "cryptography-39.0.2-cp36-abi3-win32.whl", hash = "sha256:c43ac224aabcbf83a947eeb8b17eaf1547bce3767ee2d70093b461f31729a480"},
-    {file = "cryptography-39.0.2-cp36-abi3-win_amd64.whl", hash = "sha256:788b3921d763ee35dfdb04248d0e3de11e3ca8eb22e2e48fef880c42e1f3c8f9"},
-    {file = "cryptography-39.0.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d15809e0dbdad486f4ad0979753518f47980020b7a34e9fc56e8be4f60702fac"},
-    {file = "cryptography-39.0.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:50cadb9b2f961757e712a9737ef33d89b8190c3ea34d0fb6675e00edbe35d074"},
-    {file = "cryptography-39.0.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:103e8f7155f3ce2ffa0049fe60169878d47a4364b277906386f8de21c9234aa1"},
-    {file = "cryptography-39.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:6236a9610c912b129610eb1a274bdc1350b5df834d124fa84729ebeaf7da42c3"},
-    {file = "cryptography-39.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e944fe07b6f229f4c1a06a7ef906a19652bdd9fd54c761b0ff87e83ae7a30354"},
-    {file = "cryptography-39.0.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:35d658536b0a4117c885728d1a7032bdc9a5974722ae298d6c533755a6ee3915"},
-    {file = "cryptography-39.0.2-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:30b1d1bfd00f6fc80d11300a29f1d8ab2b8d9febb6ed4a38a76880ec564fae84"},
-    {file = "cryptography-39.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e029b844c21116564b8b61216befabca4b500e6816fa9f0ba49527653cae2108"},
-    {file = "cryptography-39.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fa507318e427169ade4e9eccef39e9011cdc19534f55ca2f36ec3f388c1f70f3"},
-    {file = "cryptography-39.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8bc0008ef798231fac03fe7d26e82d601d15bd16f3afaad1c6113771566570f3"},
-    {file = "cryptography-39.0.2.tar.gz", hash = "sha256:bc5b871e977c8ee5a1bbc42fa8d19bcc08baf0c51cbf1586b0e87a2694dde42f"},
+    {file = "cryptography-40.0.0-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:168ded448fb5d82dfa911156ab8b13b1716de65bd50ff977f4657643f998fa05"},
+    {file = "cryptography-40.0.0-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:71cb346b9dd1537102e7466a2d629385b01847f8d96cd7405f0e717d91cebc8e"},
+    {file = "cryptography-40.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e63fb48e2615cfab5a9c4bb457d35e7ae03ea8593996bfbe257e78244d12d0"},
+    {file = "cryptography-40.0.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2c4134d29cdce0735c16abf48fa8435f001a7b0031e68dd9a9ee1c80a29374a"},
+    {file = "cryptography-40.0.0-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:754dc5ab648113dc54197f242db43234a04e4d61193fb5d3ebb42bd569dca571"},
+    {file = "cryptography-40.0.0-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:43089be365c0ca4235c6e4e781f3bc125bc1fff576c9dd22cdfb585309b9bb9d"},
+    {file = "cryptography-40.0.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:23c42c59c2b5b9ddc6a85b5c46b8fabc4d63a1714f4dbea4bf20d25690bf2365"},
+    {file = "cryptography-40.0.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6b36e2864e04c82634879c7e7aad48824b1847fdb06b64cd410d2ec5e51d1b31"},
+    {file = "cryptography-40.0.0-cp36-abi3-win32.whl", hash = "sha256:e917a07094217edeefe8f6ea960b45d7aab650b982e4209da078332cc9d3ac3a"},
+    {file = "cryptography-40.0.0-cp36-abi3-win_amd64.whl", hash = "sha256:fba36ec552794a06a07ac8bdc5ad83a587f6959d98547f373d401975d55c7c9e"},
+    {file = "cryptography-40.0.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fafa997b9e6818db333ded4b379f5b7679b48bd88ac878428cea2a1aa6e79fd8"},
+    {file = "cryptography-40.0.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b05c9f25a1ea42e427085230815bbdebe15a53bb6163c4c06022e5630645046b"},
+    {file = "cryptography-40.0.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:14da8c26755ffa5c7863ffa5e8b87cb9596a21b6c34852cb19e0f48c226c64fb"},
+    {file = "cryptography-40.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7162ae4530958114ca2eee30a56eca46527def33493f622f059dc2e825fd0913"},
+    {file = "cryptography-40.0.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e5855a80c77565fe2464e88e0095764e25d8ddb2d24df2b1d31773e80be94435"},
+    {file = "cryptography-40.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:34f502619964210939bb7ee7cd5df53178534eb08d3526f941695a8f7aa0efe4"},
+    {file = "cryptography-40.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:722cfddae79684166840be2cbbae154f44a455519e644b60bf274a50ccb834db"},
+    {file = "cryptography-40.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7cc9fc3ffcb766c313ed0515d77d0deabb4f36bdcff3a9f115c43e5ec611b82a"},
+    {file = "cryptography-40.0.0.tar.gz", hash = "sha256:f421f6777592eb199ca8abac7c20b9ecef27c50ad63546e6c614b29771b46d0d"},
 ]
 
 [package.dependencies]
@@ -382,10 +514,10 @@ cffi = ">=1.12"
 [package.extras]
 docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"]
 docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
-pep8test = ["black", "check-manifest", "mypy", "ruff", "types-pytz", "types-requests"]
+pep8test = ["black", "check-manifest", "mypy", "ruff"]
 sdist = ["setuptools-rust (>=0.11.4)"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-shard (>=0.1.2)", "pytest-subtests", "pytest-xdist", "pytz"]
+test = ["iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-shard (>=0.1.2)", "pytest-subtests", "pytest-xdist"]
 test-randomorder = ["pytest-randomly"]
 tox = ["tox"]
 
@@ -421,41 +553,59 @@ dev = ["PyTest", "PyTest (<5)", "PyTest-Cov", "PyTest-Cov (<2.6)", "bump2version
 
 [[package]]
 name = "docutils"
-version = "0.18.1"
+version = "0.19"
 description = "Docutils -- Python Documentation Utilities"
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.7"
 files = [
-    {file = "docutils-0.18.1-py2.py3-none-any.whl", hash = "sha256:23010f129180089fbcd3bc08cfefccb3b890b0050e1ca00c867036e9d161b98c"},
-    {file = "docutils-0.18.1.tar.gz", hash = "sha256:679987caf361a7539d76e584cbeddc311e3aee937877c87346f31debc63e9d06"},
+    {file = "docutils-0.19-py3-none-any.whl", hash = "sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc"},
+    {file = "docutils-0.19.tar.gz", hash = "sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6"},
 ]
 
 [[package]]
 name = "elementpath"
-version = "2.5.0"
-description = "XPath 1.0/2.0 parsers and selectors for ElementTree and lxml"
+version = "4.1.0"
+description = "XPath 1.0/2.0/3.0/3.1 parsers and selectors for ElementTree and lxml"
 category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "elementpath-2.5.0-py3-none-any.whl", hash = "sha256:2a432775e37a19e4362443078130a7dbfc457d7d093cd421c03958d9034cc08b"},
-    {file = "elementpath-2.5.0.tar.gz", hash = "sha256:3a27aaf3399929fccda013899cb76d3ff111734abf4281e5f9d3721ba0b9ffa3"},
+    {file = "elementpath-4.1.0-py3-none-any.whl", hash = "sha256:2b1b524223d70fd6dd63a36b9bc32e4919c96a272c2d1454094c4d85086bc6f8"},
+    {file = "elementpath-4.1.0.tar.gz", hash = "sha256:dbd7eba3cf0b3b4934f627ba24851a3e0798ef2bc9104555a4cd831f2e6e8e14"},
 ]
 
 [package.extras]
-dev = ["Sphinx", "coverage", "flake8", "lxml", "memory-profiler", "mypy (==0.910)", "tox", "xmlschema (>=1.8.0)"]
+dev = ["Sphinx", "coverage", "flake8", "lxml", "lxml-stubs", "memory-profiler", "memray", "mypy", "tox", "xmlschema (>=2.0.0)"]
+
+[[package]]
+name = "furo"
+version = "2022.12.7"
+description = "A clean customisable Sphinx documentation theme."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "furo-2022.12.7-py3-none-any.whl", hash = "sha256:7cb76c12a25ef65db85ab0743df907573d03027a33631f17d267e598ebb191f7"},
+    {file = "furo-2022.12.7.tar.gz", hash = "sha256:d8008f8efbe7587a97ba533c8b2df1f9c21ee9b3e5cad0d27f61193d38b1a986"},
+]
+
+[package.dependencies]
+beautifulsoup4 = "*"
+pygments = ">=2.7"
+sphinx = ">=5.0,<7.0"
+sphinx-basic-ng = "*"
 
 [[package]]
 name = "gitdb"
-version = "4.0.9"
+version = "4.0.10"
 description = "Git Object Database"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "gitdb-4.0.9-py3-none-any.whl", hash = "sha256:8033ad4e853066ba6ca92050b9df2f89301b8fc8bf7e9324d412a63f8bf1a8fd"},
-    {file = "gitdb-4.0.9.tar.gz", hash = "sha256:bac2fd45c0a1c9cf619e63a90d62bdc63892ef92387424b855792a6cabe789aa"},
+    {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"},
+    {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"},
 ]
 
 [package.dependencies]
@@ -691,6 +841,18 @@ files = [
     {file = "ijson-3.2.0.post0.tar.gz", hash = "sha256:80a5bd7e9923cab200701f67ad2372104328b99ddf249dbbe8834102c852d316"},
 ]
 
+[[package]]
+name = "imagesize"
+version = "1.4.1"
+description = "Getting image size from png/jpeg/jpeg2000/gif file"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"},
+    {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"},
+]
+
 [[package]]
 name = "immutabledict"
 version = "2.2.3"
@@ -705,14 +867,14 @@ files = [
 
 [[package]]
 name = "importlib-metadata"
-version = "6.0.0"
+version = "6.1.0"
 description = "Read metadata from Python packages"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "importlib_metadata-6.0.0-py3-none-any.whl", hash = "sha256:7efb448ec9a5e313a57655d35aa54cd3e01b7e1fbcf72dce1bf06119420f5bad"},
-    {file = "importlib_metadata-6.0.0.tar.gz", hash = "sha256:e354bedeb60efa6affdcc8ae121b73544a7aa74156d047311948f6d711cd378d"},
+    {file = "importlib_metadata-6.1.0-py3-none-any.whl", hash = "sha256:ff80f3b5394912eb1b108fcfd444dc78b7f1f3e16b16188054bd01cb9cb86f09"},
+    {file = "importlib_metadata-6.1.0.tar.gz", hash = "sha256:43ce9281e097583d758c2c708c4376371261a02c34682491a8e98352365aad20"},
 ]
 
 [package.dependencies]
@@ -726,36 +888,37 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag
 
 [[package]]
 name = "importlib-resources"
-version = "5.4.0"
+version = "5.12.0"
 description = "Read resources from Python packages"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "importlib_resources-5.4.0-py3-none-any.whl", hash = "sha256:33a95faed5fc19b4bc16b29a6eeae248a3fe69dd55d4d229d2b480e23eeaad45"},
-    {file = "importlib_resources-5.4.0.tar.gz", hash = "sha256:d756e2f85dd4de2ba89be0b21dba2a3bbec2e871a42a3a16719258a11f87506b"},
+    {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"},
+    {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"},
 ]
 
 [package.dependencies]
 zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
 
 [package.extras]
-docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"]
+docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
 
 [[package]]
 name = "incremental"
-version = "21.3.0"
-description = "A small library that versions your Python projects."
+version = "22.10.0"
+description = "\"A small library that versions your Python projects.\""
 category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "incremental-21.3.0-py2.py3-none-any.whl", hash = "sha256:92014aebc6a20b78a8084cdd5645eeaa7f74b8933f70fa3ada2cfbd1e3b54321"},
-    {file = "incremental-21.3.0.tar.gz", hash = "sha256:02f5de5aff48f6b9f665d99d48bfc7ec03b6e3943210de7cfc88856d755d6f57"},
+    {file = "incremental-22.10.0-py2.py3-none-any.whl", hash = "sha256:b864a1f30885ee72c5ac2835a761b8fe8aa9c28b9395cacf27286602688d3e51"},
+    {file = "incremental-22.10.0.tar.gz", hash = "sha256:912feeb5e0f7e0188e6f42241d2f450002e11bbc0937c65865045854c24c0bd0"},
 ]
 
 [package.extras]
+mypy = ["click (>=6.0)", "mypy (==0.812)", "twisted (>=16.4.0)"]
 scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
 
 [[package]]
@@ -796,20 +959,39 @@ tornado = ">=4.3"
 [package.extras]
 tests = ["codecov", "coverage", "flake8", "flake8-quotes", "flake8-typing-imports", "mock", "mypy", "opentracing_instrumentation (>=3,<4)", "prometheus_client (==0.11.0)", "pycurl", "pytest", "pytest-benchmark[histogram]", "pytest-cov", "pytest-localserver", "pytest-timeout", "pytest-tornado", "tchannel (==2.1.0)"]
 
+[[package]]
+name = "jaraco-classes"
+version = "3.2.3"
+description = "Utility functions for Python class constructs"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jaraco.classes-3.2.3-py3-none-any.whl", hash = "sha256:2353de3288bc6b82120752201c6b1c1a14b058267fa424ed5ce5984e3b922158"},
+    {file = "jaraco.classes-3.2.3.tar.gz", hash = "sha256:89559fa5c1d3c34eff6f631ad80bb21f378dbcbb35dd161fd2c6b93f5be2f98a"},
+]
+
+[package.dependencies]
+more-itertools = "*"
+
+[package.extras]
+docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)"]
+testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
+
 [[package]]
 name = "jeepney"
-version = "0.7.1"
+version = "0.8.0"
 description = "Low-level, pure Python DBus protocol wrapper."
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "jeepney-0.7.1-py3-none-any.whl", hash = "sha256:1b5a0ea5c0e7b166b2f5895b91a08c14de8915afda4407fb5022a195224958ac"},
-    {file = "jeepney-0.7.1.tar.gz", hash = "sha256:fa9e232dfa0c498bd0b8a3a73b8d8a31978304dcef0515adc859d4e096f96f4f"},
+    {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"},
+    {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"},
 ]
 
 [package.extras]
-test = ["async-timeout", "pytest", "pytest-asyncio", "pytest-trio", "testpath", "trio"]
+test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"]
 trio = ["async_generator", "trio"]
 
 [[package]]
@@ -856,25 +1038,74 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-
 
 [[package]]
 name = "keyring"
-version = "23.5.0"
+version = "23.13.1"
 description = "Store and access your passwords safely."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "keyring-23.5.0-py3-none-any.whl", hash = "sha256:b0d28928ac3ec8e42ef4cc227822647a19f1d544f21f96457965dc01cf555261"},
-    {file = "keyring-23.5.0.tar.gz", hash = "sha256:9012508e141a80bd1c0b6778d5c610dd9f8c464d75ac6774248500503f972fb9"},
+    {file = "keyring-23.13.1-py3-none-any.whl", hash = "sha256:771ed2a91909389ed6148631de678f82ddc73737d85a927f382a8a1b157898cd"},
+    {file = "keyring-23.13.1.tar.gz", hash = "sha256:ba2e15a9b35e21908d0aaf4e0a47acc52d6ae33444df0da2b49d41a46ef6d678"},
 ]
 
 [package.dependencies]
-importlib-metadata = ">=3.6"
+importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""}
+importlib-resources = {version = "*", markers = "python_version < \"3.9\""}
+"jaraco.classes" = "*"
 jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""}
-pywin32-ctypes = {version = "<0.1.0 || >0.1.0,<0.1.1 || >0.1.1", markers = "sys_platform == \"win32\""}
+pywin32-ctypes = {version = ">=0.2.0", markers = "sys_platform == \"win32\""}
 SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
 
 [package.extras]
-docs = ["jaraco.packaging (>=8.2)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"]
+completion = ["shtab"]
+docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)"]
+testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
+
+[[package]]
+name = "lazy-object-proxy"
+version = "1.9.0"
+description = "A fast and thorough lazy object proxy."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "lazy-object-proxy-1.9.0.tar.gz", hash = "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-win32.whl", hash = "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455"},
+    {file = "lazy_object_proxy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-win32.whl", hash = "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586"},
+    {file = "lazy_object_proxy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win32.whl", hash = "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734"},
+    {file = "lazy_object_proxy-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-win32.whl", hash = "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82"},
+    {file = "lazy_object_proxy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-win32.whl", hash = "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821"},
+    {file = "lazy_object_proxy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f"},
+]
 
 [[package]]
 name = "ldap3"
@@ -984,54 +1215,90 @@ html5 = ["html5lib"]
 htmlsoup = ["BeautifulSoup4"]
 source = ["Cython (>=0.29.7)"]
 
+[[package]]
+name = "markdown-it-py"
+version = "2.2.0"
+description = "Python port of markdown-it. Markdown parsing, done right!"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "markdown-it-py-2.2.0.tar.gz", hash = "sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"},
+    {file = "markdown_it_py-2.2.0-py3-none-any.whl", hash = "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30"},
+]
+
+[package.dependencies]
+mdurl = ">=0.1,<1.0"
+typing_extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""}
+
+[package.extras]
+benchmarking = ["psutil", "pytest", "pytest-benchmark"]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
+linkify = ["linkify-it-py (>=1,<3)"]
+plugins = ["mdit-py-plugins"]
+profiling = ["gprof2dot"]
+rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
 [[package]]
 name = "markupsafe"
-version = "2.1.0"
+version = "2.1.2"
 description = "Safely add untrusted strings to HTML/XML markup."
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "MarkupSafe-2.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3028252424c72b2602a323f70fbf50aa80a5d3aa616ea6add4ba21ae9cc9da4c"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:290b02bab3c9e216da57c1d11d2ba73a9f73a614bbdcc027d299a60cdfabb11a"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e104c0c2b4cd765b4e83909cde7ec61a1e313f8a75775897db321450e928cce"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24c3be29abb6b34052fd26fc7a8e0a49b1ee9d282e3665e8ad09a0a68faee5b3"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:204730fd5fe2fe3b1e9ccadb2bd18ba8712b111dcabce185af0b3b5285a7c989"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d3b64c65328cb4cd252c94f83e66e3d7acf8891e60ebf588d7b493a55a1dbf26"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:96de1932237abe0a13ba68b63e94113678c379dca45afa040a17b6e1ad7ed076"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75bb36f134883fdbe13d8e63b8675f5f12b80bb6627f7714c7d6c5becf22719f"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-win32.whl", hash = "sha256:4056f752015dfa9828dce3140dbadd543b555afb3252507348c493def166d454"},
-    {file = "MarkupSafe-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:d4e702eea4a2903441f2735799d217f4ac1b55f7d8ad96ab7d4e25417cb0827c"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f0eddfcabd6936558ec020130f932d479930581171368fd728efcfb6ef0dd357"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ddea4c352a488b5e1069069f2f501006b1a4362cb906bee9a193ef1245a7a61"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09c86c9643cceb1d87ca08cdc30160d1b7ab49a8a21564868921959bd16441b8"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0a0abef2ca47b33fb615b491ce31b055ef2430de52c5b3fb19a4042dbc5cadb"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:736895a020e31b428b3382a7887bfea96102c529530299f426bf2e636aacec9e"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:679cbb78914ab212c49c67ba2c7396dc599a8479de51b9a87b174700abd9ea49"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:84ad5e29bf8bab3ad70fd707d3c05524862bddc54dc040982b0dbcff36481de7"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-win32.whl", hash = "sha256:8da5924cb1f9064589767b0f3fc39d03e3d0fb5aa29e0cb21d43106519bd624a"},
-    {file = "MarkupSafe-2.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:454ffc1cbb75227d15667c09f164a0099159da0c1f3d2636aa648f12675491ad"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:142119fb14a1ef6d758912b25c4e803c3ff66920635c44078666fe7cc3f8f759"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b2a5a856019d2833c56a3dcac1b80fe795c95f401818ea963594b345929dffa7"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d1fb9b2eec3c9714dd936860850300b51dbaa37404209c8d4cb66547884b7ed"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62c0285e91414f5c8f621a17b69fc0088394ccdaa961ef469e833dbff64bd5ea"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc3150f85e2dbcf99e65238c842d1cfe69d3e7649b19864c1cc043213d9cd730"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f02cf7221d5cd915d7fa58ab64f7ee6dd0f6cddbb48683debf5d04ae9b1c2cc1"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d5653619b3eb5cbd35bfba3c12d575db2a74d15e0e1c08bf1db788069d410ce8"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7d2f5d97fcbd004c03df8d8fe2b973fe2b14e7bfeb2cfa012eaa8759ce9a762f"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-win32.whl", hash = "sha256:3cace1837bc84e63b3fd2dfce37f08f8c18aeb81ef5cf6bb9b51f625cb4e6cd8"},
-    {file = "MarkupSafe-2.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:fabbe18087c3d33c5824cb145ffca52eccd053061df1d79d4b66dafa5ad2a5ea"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:023af8c54fe63530545f70dd2a2a7eed18d07a9a77b94e8bf1e2ff7f252db9a3"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d66624f04de4af8bbf1c7f21cc06649c1c69a7f84109179add573ce35e46d448"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c532d5ab79be0199fa2658e24a02fce8542df196e60665dd322409a03db6a52c"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e67ec74fada3841b8c5f4c4f197bea916025cb9aa3fe5abf7d52b655d042f956"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c653fde75a6e5eb814d2a0a89378f83d1d3f502ab710904ee585c38888816c"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:961eb86e5be7d0973789f30ebcf6caab60b844203f4396ece27310295a6082c7"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:598b65d74615c021423bd45c2bc5e9b59539c875a9bdb7e5f2a6b92dfcfc268d"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:599941da468f2cf22bf90a84f6e2a65524e87be2fce844f96f2dd9a6c9d1e635"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-win32.whl", hash = "sha256:e6f7f3f41faffaea6596da86ecc2389672fa949bd035251eab26dc6697451d05"},
-    {file = "MarkupSafe-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:b8811d48078d1cf2a6863dafb896e68406c5f513048451cd2ded0473133473c7"},
-    {file = "MarkupSafe-2.1.0.tar.gz", hash = "sha256:80beaf63ddfbc64a0452b841d8036ca0611e049650e20afcb882f5d3c266d65f"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:665a36ae6f8f20a4676b53224e33d456a6f5a72657d9c83c2aa00765072f31f7"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:340bea174e9761308703ae988e982005aedf427de816d1afe98147668cc03036"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22152d00bf4a9c7c83960521fc558f55a1adbc0631fbb00a9471e097b19d72e1"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28057e985dace2f478e042eaa15606c7efccb700797660629da387eb289b9323"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca244fa73f50a800cf8c3ebf7fd93149ec37f5cb9596aa8873ae2c1d23498601"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9d971ec1e79906046aa3ca266de79eac42f1dbf3612a05dc9368125952bd1a1"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7e007132af78ea9df29495dbf7b5824cb71648d7133cf7848a2a5dd00d36f9ff"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7313ce6a199651c4ed9d7e4cfb4aa56fe923b1adf9af3b420ee14e6d9a73df65"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-win32.whl", hash = "sha256:c4a549890a45f57f1ebf99c067a4ad0cb423a05544accaf2b065246827ed9603"},
+    {file = "MarkupSafe-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:835fb5e38fd89328e9c81067fd642b3593c33e1e17e2fdbf77f5676abb14a156"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2ec4f2d48ae59bbb9d1f9d7efb9236ab81429a764dedca114f5fdabbc3788013"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608e7073dfa9e38a85d38474c082d4281f4ce276ac0010224eaba11e929dd53a"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65608c35bfb8a76763f37036547f7adfd09270fbdbf96608be2bead319728fcd"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da25303d91526aac3672ee6d49a2f3db2d9502a4a60b55519feb1a4c7714e07d"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9cad97ab29dfc3f0249b483412c85c8ef4766d96cdf9dcf5a1e3caa3f3661cf1"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:085fd3201e7b12809f9e6e9bc1e5c96a368c8523fad5afb02afe3c051ae4afcc"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1bea30e9bf331f3fef67e0a3877b2288593c98a21ccb2cf29b74c581a4eb3af0"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-win32.whl", hash = "sha256:7df70907e00c970c60b9ef2938d894a9381f38e6b9db73c5be35e59d92e06625"},
+    {file = "MarkupSafe-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:e55e40ff0cc8cc5c07996915ad367fa47da6b3fc091fdadca7f5403239c5fec3"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6e40afa7f45939ca356f348c8e23048e02cb109ced1eb8420961b2f40fb373a"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf877ab4ed6e302ec1d04952ca358b381a882fbd9d1b07cccbfd61783561f98a"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63ba06c9941e46fa389d389644e2d8225e0e3e5ebcc4ff1ea8506dce646f8c8a"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1cd098434e83e656abf198f103a8207a8187c0fc110306691a2e94a78d0abb2"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:55f44b440d491028addb3b88f72207d71eeebfb7b5dbf0643f7c023ae1fba619"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a6f2fcca746e8d5910e18782f976489939d54a91f9411c32051b4aab2bd7c513"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0b462104ba25f1ac006fdab8b6a01ebbfbce9ed37fd37fd4acd70c67c973e460"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-win32.whl", hash = "sha256:7668b52e102d0ed87cb082380a7e2e1e78737ddecdde129acadb0eccc5423859"},
+    {file = "MarkupSafe-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6d6607f98fcf17e534162f0709aaad3ab7a96032723d8ac8750ffe17ae5a0666"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a806db027852538d2ad7555b203300173dd1b77ba116de92da9afbc3a3be3eed"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a4abaec6ca3ad8660690236d11bfe28dfd707778e2442b45addd2f086d6ef094"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f03a532d7dee1bed20bc4884194a16160a2de9ffc6354b3878ec9682bb623c54"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cf06cdc1dda95223e9d2d3c58d3b178aa5dacb35ee7e3bbac10e4e1faacb419"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22731d79ed2eb25059ae3df1dfc9cb1546691cc41f4e3130fe6bfbc3ecbbecfa"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8db032bf0ce9022a8e41a22598eefc802314e81b879ae093f36ce9ddf39ab1ba"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2298c859cfc5463f1b64bd55cb3e602528db6fa0f3cfd568d3605c50678f8f03"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-win32.whl", hash = "sha256:50c42830a633fa0cf9e7d27664637532791bfc31c731a87b202d2d8ac40c3ea2"},
+    {file = "MarkupSafe-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb06feb762bade6bf3c8b844462274db0c76acc95c52abe8dbed28ae3d44a147"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99625a92da8229df6d44335e6fcc558a5037dd0a760e11d84be2260e6f37002f"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8bca7e26c1dd751236cfb0c6c72d4ad61d986e9a41bbf76cb445f69488b2a2bd"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40627dcf047dadb22cd25ea7ecfe9cbf3bbbad0482ee5920b582f3809c97654f"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40dfd3fefbef579ee058f139733ac336312663c6706d1163b82b3003fb1925c4"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:090376d812fb6ac5f171e5938e82e7f2d7adc2b629101cec0db8b267815c85e2"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2e7821bffe00aa6bd07a23913b7f4e01328c3d5cc0b40b36c0bd81d362faeb65"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c0a33bc9f02c2b17c3ea382f91b4db0e6cde90b63b296422a939886a7a80de1c"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b8526c6d437855442cdd3d87eede9c425c4445ea011ca38d937db299382e6fa3"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-win32.whl", hash = "sha256:137678c63c977754abe9086a3ec011e8fd985ab90631145dfb9294ad09c102a7"},
+    {file = "MarkupSafe-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:0576fe974b40a400449768941d5d0858cc624e3249dfd1e0c33674e5c7ca7aed"},
+    {file = "MarkupSafe-2.1.2.tar.gz", hash = "sha256:abcabc8c2b26036d62d4c746381a6f7cf60aafcc653198ad678306986b09450d"},
 ]
 
 [[package]]
@@ -1074,6 +1341,50 @@ Twisted = ">=15.1.0"
 [package.extras]
 dev = ["black (==22.3.0)", "flake8 (==4.0.1)", "isort (==5.9.3)", "ldaptor", "matrix-synapse", "mypy (==0.910)", "tox", "types-setuptools"]
 
+[[package]]
+name = "mdit-py-plugins"
+version = "0.3.5"
+description = "Collection of plugins for markdown-it-py"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "mdit-py-plugins-0.3.5.tar.gz", hash = "sha256:eee0adc7195e5827e17e02d2a258a2ba159944a0748f59c5099a4a27f78fcf6a"},
+    {file = "mdit_py_plugins-0.3.5-py3-none-any.whl", hash = "sha256:ca9a0714ea59a24b2b044a1831f48d817dd0c817e84339f20e7889f392d77c4e"},
+]
+
+[package.dependencies]
+markdown-it-py = ">=1.0.0,<3.0.0"
+
+[package.extras]
+code-style = ["pre-commit"]
+rtd = ["attrs", "myst-parser (>=0.16.1,<0.17.0)", "sphinx-book-theme (>=0.1.0,<0.2.0)"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+description = "Markdown URL utilities"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
+    {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
+]
+
+[[package]]
+name = "more-itertools"
+version = "9.1.0"
+description = "More routines for operating on iterables, beyond itertools"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "more-itertools-9.1.0.tar.gz", hash = "sha256:cabaa341ad0389ea83c17a94566a53ae4c9d07349861ecb14dc6d0345cf9ac5d"},
+    {file = "more_itertools-9.1.0-py3-none-any.whl", hash = "sha256:d2bc7f02446e86a68911e58ded76d6561eea00cddfb2a91e7019bbb586c799f3"},
+]
+
 [[package]]
 name = "msgpack"
 version = "1.0.5"
@@ -1197,14 +1508,14 @@ reports = ["lxml"]
 
 [[package]]
 name = "mypy-extensions"
-version = "0.4.3"
-description = "Experimental type system extensions for programs checked with the mypy typechecker."
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
 category = "dev"
 optional = false
-python-versions = "*"
+python-versions = ">=3.5"
 files = [
-    {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
-    {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
 ]
 
 [[package]]
@@ -1227,6 +1538,33 @@ mypy = "1.0.0"
 [package.extras]
 test = ["lxml", "pytest (>=4.6)", "pytest-cov"]
 
+[[package]]
+name = "myst-parser"
+version = "1.0.0"
+description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser,"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "myst-parser-1.0.0.tar.gz", hash = "sha256:502845659313099542bd38a2ae62f01360e7dd4b1310f025dd014dfc0439cdae"},
+    {file = "myst_parser-1.0.0-py3-none-any.whl", hash = "sha256:69fb40a586c6fa68995e6521ac0a525793935db7e724ca9bac1d33be51be9a4c"},
+]
+
+[package.dependencies]
+docutils = ">=0.15,<0.20"
+jinja2 = "*"
+markdown-it-py = ">=1.0.0,<3.0.0"
+mdit-py-plugins = ">=0.3.4,<0.4.0"
+pyyaml = "*"
+sphinx = ">=5,<7"
+
+[package.extras]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+linkify = ["linkify-it-py (>=1.0,<2.0)"]
+rtd = ["ipython", "pydata-sphinx-theme (==v0.13.0rc4)", "sphinx-autodoc2 (>=0.4.2,<0.5.0)", "sphinx-book-theme (==1.0.0rc2)", "sphinx-copybutton", "sphinx-design2", "sphinx-pyscript", "sphinx-tippy (>=0.3.1)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.7.5,<0.8.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"]
+testing = ["beautifulsoup4", "coverage[toml]", "pytest (>=7,<8)", "pytest-cov", "pytest-param-files (>=0.3.4,<0.4.0)", "pytest-regressions", "sphinx-pytest"]
+testing-docutils = ["pygments", "pytest (>=7,<8)", "pytest-param-files (>=0.3.4,<0.4.0)"]
+
 [[package]]
 name = "netaddr"
 version = "0.8.0"
@@ -1282,26 +1620,26 @@ dev = ["jinja2"]
 
 [[package]]
 name = "pathspec"
-version = "0.9.0"
+version = "0.11.1"
 description = "Utility library for gitignore style pattern matching of file paths."
 category = "dev"
 optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
+python-versions = ">=3.7"
 files = [
-    {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"},
-    {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"},
+    {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"},
+    {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"},
 ]
 
 [[package]]
 name = "phonenumbers"
-version = "8.13.5"
+version = "8.13.7"
 description = "Python version of Google's common library for parsing, formatting, storing and validating international phone numbers."
 category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "phonenumbers-8.13.5-py2.py3-none-any.whl", hash = "sha256:2e3fd1f3fde226b289489275517c76edf223eafd9f43a2c2c36498a44b73d4b0"},
-    {file = "phonenumbers-8.13.5.tar.gz", hash = "sha256:6eb2faf29c19f946baf10f1c977a1f856cab90819fe7735b8e141d5407420c4a"},
+    {file = "phonenumbers-8.13.7-py2.py3-none-any.whl", hash = "sha256:d3e3555b38c89b121f5b2e917847003bdd07027569d758d5f40156c01aeac089"},
+    {file = "phonenumbers-8.13.7.tar.gz", hash = "sha256:253bb0e01250d21a11f2b42b3e6e161b7f6cb2ac440e2e2a95c1da71d221ee1a"},
 ]
 
 [[package]]
@@ -1397,21 +1735,21 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 
 [[package]]
 name = "pkginfo"
-version = "1.8.2"
-description = "Query metadatdata from sdists / bdists / installed packages."
+version = "1.9.6"
+description = "Query metadata from sdists / bdists / installed packages."
 category = "dev"
 optional = false
-python-versions = "*"
+python-versions = ">=3.6"
 files = [
-    {file = "pkginfo-1.8.2-py2.py3-none-any.whl", hash = "sha256:c24c487c6a7f72c66e816ab1796b96ac6c3d14d49338293d2141664330b55ffc"},
-    {file = "pkginfo-1.8.2.tar.gz", hash = "sha256:542e0d0b6750e2e21c20179803e40ab50598d8066d51097a0e382cba9eb02bff"},
+    {file = "pkginfo-1.9.6-py3-none-any.whl", hash = "sha256:4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546"},
+    {file = "pkginfo-1.9.6.tar.gz", hash = "sha256:8fd5896e8718a4372f0ea9cc9d96f6417c9b986e23a4d116dda26b62cc29d046"},
 ]
 
 [package.extras]
-testing = ["coverage", "nose"]
+testing = ["pytest", "pytest-cov"]
 
 [[package]]
-name = "pkgutil_resolve_name"
+name = "pkgutil-resolve-name"
 version = "1.3.10"
 description = "Resolve a name to an object."
 category = "main"
@@ -1424,19 +1762,22 @@ files = [
 
 [[package]]
 name = "platformdirs"
-version = "2.5.1"
-description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+version = "3.1.1"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "platformdirs-2.5.1-py3-none-any.whl", hash = "sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227"},
-    {file = "platformdirs-2.5.1.tar.gz", hash = "sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d"},
+    {file = "platformdirs-3.1.1-py3-none-any.whl", hash = "sha256:e5986afb596e4bb5bde29a79ac9061aa955b94fca2399b7aaac4090860920dd8"},
+    {file = "platformdirs-3.1.1.tar.gz", hash = "sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa"},
 ]
 
+[package.dependencies]
+typing-extensions = {version = ">=4.4", markers = "python_version < \"3.8\""}
+
 [package.extras]
-docs = ["Sphinx (>=4)", "furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)"]
-test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"]
+docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.2.2)", "pytest (>=7.2.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
 
 [[package]]
 name = "prometheus-client"
@@ -1546,48 +1887,48 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "1.10.6"
+version = "1.10.7"
 description = "Data validation and settings management using python type hints"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pydantic-1.10.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9289065611c48147c1dd1fd344e9d57ab45f1d99b0fb26c51f1cf72cd9bcd31"},
-    {file = "pydantic-1.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c32b6bba301490d9bb2bf5f631907803135e8085b6aa3e5fe5a770d46dd0160"},
-    {file = "pydantic-1.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd9b9e98068fa1068edfc9eabde70a7132017bdd4f362f8b4fd0abed79c33083"},
-    {file = "pydantic-1.10.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c84583b9df62522829cbc46e2b22e0ec11445625b5acd70c5681ce09c9b11c4"},
-    {file = "pydantic-1.10.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b41822064585fea56d0116aa431fbd5137ce69dfe837b599e310034171996084"},
-    {file = "pydantic-1.10.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:61f1f08adfaa9cc02e0cbc94f478140385cbd52d5b3c5a657c2fceb15de8d1fb"},
-    {file = "pydantic-1.10.6-cp310-cp310-win_amd64.whl", hash = "sha256:32937835e525d92c98a1512218db4eed9ddc8f4ee2a78382d77f54341972c0e7"},
-    {file = "pydantic-1.10.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbd5c531b22928e63d0cb1868dee76123456e1de2f1cb45879e9e7a3f3f1779b"},
-    {file = "pydantic-1.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e277bd18339177daa62a294256869bbe84df1fb592be2716ec62627bb8d7c81d"},
-    {file = "pydantic-1.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f15277d720aa57e173954d237628a8d304896364b9de745dcb722f584812c7"},
-    {file = "pydantic-1.10.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b243b564cea2576725e77aeeda54e3e0229a168bc587d536cd69941e6797543d"},
-    {file = "pydantic-1.10.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3ce13a558b484c9ae48a6a7c184b1ba0e5588c5525482681db418268e5f86186"},
-    {file = "pydantic-1.10.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3ac1cd4deed871dfe0c5f63721e29debf03e2deefa41b3ed5eb5f5df287c7b70"},
-    {file = "pydantic-1.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:b1eb6610330a1dfba9ce142ada792f26bbef1255b75f538196a39e9e90388bf4"},
-    {file = "pydantic-1.10.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4ca83739c1263a044ec8b79df4eefc34bbac87191f0a513d00dd47d46e307a65"},
-    {file = "pydantic-1.10.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea4e2a7cb409951988e79a469f609bba998a576e6d7b9791ae5d1e0619e1c0f2"},
-    {file = "pydantic-1.10.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53de12b4608290992a943801d7756f18a37b7aee284b9ffa794ee8ea8153f8e2"},
-    {file = "pydantic-1.10.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:60184e80aac3b56933c71c48d6181e630b0fbc61ae455a63322a66a23c14731a"},
-    {file = "pydantic-1.10.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:415a3f719ce518e95a92effc7ee30118a25c3d032455d13e121e3840985f2efd"},
-    {file = "pydantic-1.10.6-cp37-cp37m-win_amd64.whl", hash = "sha256:72cb30894a34d3a7ab6d959b45a70abac8a2a93b6480fc5a7bfbd9c935bdc4fb"},
-    {file = "pydantic-1.10.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3091d2eaeda25391405e36c2fc2ed102b48bac4b384d42b2267310abae350ca6"},
-    {file = "pydantic-1.10.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:751f008cd2afe812a781fd6aa2fb66c620ca2e1a13b6a2152b1ad51553cb4b77"},
-    {file = "pydantic-1.10.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12e837fd320dd30bd625be1b101e3b62edc096a49835392dcf418f1a5ac2b832"},
-    {file = "pydantic-1.10.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d92831d0115874d766b1f5fddcdde0c5b6c60f8c6111a394078ec227fca6d"},
-    {file = "pydantic-1.10.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:476f6674303ae7965730a382a8e8d7fae18b8004b7b69a56c3d8fa93968aa21c"},
-    {file = "pydantic-1.10.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3a2be0a0f32c83265fd71a45027201e1278beaa82ea88ea5b345eea6afa9ac7f"},
-    {file = "pydantic-1.10.6-cp38-cp38-win_amd64.whl", hash = "sha256:0abd9c60eee6201b853b6c4be104edfba4f8f6c5f3623f8e1dba90634d63eb35"},
-    {file = "pydantic-1.10.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6195ca908045054dd2d57eb9c39a5fe86409968b8040de8c2240186da0769da7"},
-    {file = "pydantic-1.10.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43cdeca8d30de9a897440e3fb8866f827c4c31f6c73838e3a01a14b03b067b1d"},
-    {file = "pydantic-1.10.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c19eb5163167489cb1e0161ae9220dadd4fc609a42649e7e84a8fa8fff7a80f"},
-    {file = "pydantic-1.10.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:012c99a9c0d18cfde7469aa1ebff922e24b0c706d03ead96940f5465f2c9cf62"},
-    {file = "pydantic-1.10.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:528dcf7ec49fb5a84bf6fe346c1cc3c55b0e7603c2123881996ca3ad79db5bfc"},
-    {file = "pydantic-1.10.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:163e79386c3547c49366e959d01e37fc30252285a70619ffc1b10ede4758250a"},
-    {file = "pydantic-1.10.6-cp39-cp39-win_amd64.whl", hash = "sha256:189318051c3d57821f7233ecc94708767dd67687a614a4e8f92b4a020d4ffd06"},
-    {file = "pydantic-1.10.6-py3-none-any.whl", hash = "sha256:acc6783751ac9c9bc4680379edd6d286468a1dc8d7d9906cd6f1186ed682b2b0"},
-    {file = "pydantic-1.10.6.tar.gz", hash = "sha256:cf95adb0d1671fc38d8c43dd921ad5814a735e7d9b4d9e437c088002863854fd"},
+    {file = "pydantic-1.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e79e999e539872e903767c417c897e729e015872040e56b96e67968c3b918b2d"},
+    {file = "pydantic-1.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:01aea3a42c13f2602b7ecbbea484a98169fb568ebd9e247593ea05f01b884b2e"},
+    {file = "pydantic-1.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:516f1ed9bc2406a0467dd777afc636c7091d71f214d5e413d64fef45174cfc7a"},
+    {file = "pydantic-1.10.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae150a63564929c675d7f2303008d88426a0add46efd76c3fc797cd71cb1b46f"},
+    {file = "pydantic-1.10.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ecbbc51391248116c0a055899e6c3e7ffbb11fb5e2a4cd6f2d0b93272118a209"},
+    {file = "pydantic-1.10.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f4a2b50e2b03d5776e7f21af73e2070e1b5c0d0df255a827e7c632962f8315af"},
+    {file = "pydantic-1.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:a7cd2251439988b413cb0a985c4ed82b6c6aac382dbaff53ae03c4b23a70e80a"},
+    {file = "pydantic-1.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:68792151e174a4aa9e9fc1b4e653e65a354a2fa0fed169f7b3d09902ad2cb6f1"},
+    {file = "pydantic-1.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe2507b8ef209da71b6fb5f4e597b50c5a34b78d7e857c4f8f3115effaef5fe"},
+    {file = "pydantic-1.10.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10a86d8c8db68086f1e30a530f7d5f83eb0685e632e411dbbcf2d5c0150e8dcd"},
+    {file = "pydantic-1.10.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75ae19d2a3dbb146b6f324031c24f8a3f52ff5d6a9f22f0683694b3afcb16fb"},
+    {file = "pydantic-1.10.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:464855a7ff7f2cc2cf537ecc421291b9132aa9c79aef44e917ad711b4a93163b"},
+    {file = "pydantic-1.10.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:193924c563fae6ddcb71d3f06fa153866423ac1b793a47936656e806b64e24ca"},
+    {file = "pydantic-1.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:b4a849d10f211389502059c33332e91327bc154acc1845f375a99eca3afa802d"},
+    {file = "pydantic-1.10.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cc1dde4e50a5fc1336ee0581c1612215bc64ed6d28d2c7c6f25d2fe3e7c3e918"},
+    {file = "pydantic-1.10.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0cfe895a504c060e5d36b287ee696e2fdad02d89e0d895f83037245218a87fe"},
+    {file = "pydantic-1.10.7-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:670bb4683ad1e48b0ecb06f0cfe2178dcf74ff27921cdf1606e527d2617a81ee"},
+    {file = "pydantic-1.10.7-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:950ce33857841f9a337ce07ddf46bc84e1c4946d2a3bba18f8280297157a3fd1"},
+    {file = "pydantic-1.10.7-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c15582f9055fbc1bfe50266a19771bbbef33dd28c45e78afbe1996fd70966c2a"},
+    {file = "pydantic-1.10.7-cp37-cp37m-win_amd64.whl", hash = "sha256:82dffb306dd20bd5268fd6379bc4bfe75242a9c2b79fec58e1041fbbdb1f7914"},
+    {file = "pydantic-1.10.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c7f51861d73e8b9ddcb9916ae7ac39fb52761d9ea0df41128e81e2ba42886cd"},
+    {file = "pydantic-1.10.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6434b49c0b03a51021ade5c4daa7d70c98f7a79e95b551201fff682fc1661245"},
+    {file = "pydantic-1.10.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64d34ab766fa056df49013bb6e79921a0265204c071984e75a09cbceacbbdd5d"},
+    {file = "pydantic-1.10.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:701daea9ffe9d26f97b52f1d157e0d4121644f0fcf80b443248434958fd03dc3"},
+    {file = "pydantic-1.10.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf135c46099ff3f919d2150a948ce94b9ce545598ef2c6c7bf55dca98a304b52"},
+    {file = "pydantic-1.10.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0f85904f73161817b80781cc150f8b906d521fa11e3cdabae19a581c3606209"},
+    {file = "pydantic-1.10.7-cp38-cp38-win_amd64.whl", hash = "sha256:9f6f0fd68d73257ad6685419478c5aece46432f4bdd8d32c7345f1986496171e"},
+    {file = "pydantic-1.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c230c0d8a322276d6e7b88c3f7ce885f9ed16e0910354510e0bae84d54991143"},
+    {file = "pydantic-1.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:976cae77ba6a49d80f461fd8bba183ff7ba79f44aa5cfa82f1346b5626542f8e"},
+    {file = "pydantic-1.10.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d45fc99d64af9aaf7e308054a0067fdcd87ffe974f2442312372dfa66e1001d"},
+    {file = "pydantic-1.10.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2a5ebb48958754d386195fe9e9c5106f11275867051bf017a8059410e9abf1f"},
+    {file = "pydantic-1.10.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:abfb7d4a7cd5cc4e1d1887c43503a7c5dd608eadf8bc615413fc498d3e4645cd"},
+    {file = "pydantic-1.10.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:80b1fab4deb08a8292d15e43a6edccdffa5377a36a4597bb545b93e79c5ff0a5"},
+    {file = "pydantic-1.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:d71e69699498b020ea198468e2480a2f1e7433e32a3a99760058c6520e2bea7e"},
+    {file = "pydantic-1.10.7-py3-none-any.whl", hash = "sha256:0cd181f1d0b1d00e2b705f1bf1ac7799a2d938cce3376b8007df62b29be3c2c6"},
+    {file = "pydantic-1.10.7.tar.gz", hash = "sha256:cfc83c0678b6ba51b0532bea66860617c4cd4251ecf76e9846fa5a9f3454e97e"},
 ]
 
 [package.dependencies]
@@ -1617,16 +1958,19 @@ requests = ">=2.14.0"
 
 [[package]]
 name = "pygments"
-version = "2.11.2"
+version = "2.14.0"
 description = "Pygments is a syntax highlighting package written in Python."
 category = "dev"
 optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.6"
 files = [
-    {file = "Pygments-2.11.2-py3-none-any.whl", hash = "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65"},
-    {file = "Pygments-2.11.2.tar.gz", hash = "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"},
+    {file = "Pygments-2.14.0-py3-none-any.whl", hash = "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717"},
+    {file = "Pygments-2.14.0.tar.gz", hash = "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297"},
 ]
 
+[package.extras]
+plugins = ["importlib-metadata"]
+
 [[package]]
 name = "pyicu"
 version = "2.10.2"
@@ -1640,23 +1984,23 @@ files = [
 
 [[package]]
 name = "pyjwt"
-version = "2.4.0"
+version = "2.6.0"
 description = "JSON Web Token implementation in Python"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "PyJWT-2.4.0-py3-none-any.whl", hash = "sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf"},
-    {file = "PyJWT-2.4.0.tar.gz", hash = "sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"},
+    {file = "PyJWT-2.6.0-py3-none-any.whl", hash = "sha256:d83c3d892a77bbb74d3e1a2cfa90afaadb60945205d1095d9221f04466f64c14"},
+    {file = "PyJWT-2.6.0.tar.gz", hash = "sha256:69285c7e31fc44f68a1feb309e948e0df53259d579295e6cfe2b1792329f05fd"},
 ]
 
 [package.dependencies]
-cryptography = {version = ">=3.3.1", optional = true, markers = "extra == \"crypto\""}
+cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""}
 
 [package.extras]
-crypto = ["cryptography (>=3.3.1)"]
-dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.3.1)", "mypy", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx", "sphinx-rtd-theme", "zope.interface"]
-docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
+crypto = ["cryptography (>=3.4.0)"]
+dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
+docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 
 [[package]]
@@ -1716,18 +2060,18 @@ tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
 
 [[package]]
 name = "pyopenssl"
-version = "23.0.0"
+version = "23.1.0"
 description = "Python wrapper module around the OpenSSL library"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "pyOpenSSL-23.0.0-py3-none-any.whl", hash = "sha256:df5fc28af899e74e19fccb5510df423581047e10ab6f1f4ba1763ff5fde844c0"},
-    {file = "pyOpenSSL-23.0.0.tar.gz", hash = "sha256:c1cc5f86bcacefc84dada7d31175cae1b1518d5f60d3d0bb595a67822a868a6f"},
+    {file = "pyOpenSSL-23.1.0-py3-none-any.whl", hash = "sha256:fb96e936866ad65662c22d0de84ca0fba58397893cdfe0f01334fa93382af23c"},
+    {file = "pyOpenSSL-23.1.0.tar.gz", hash = "sha256:8cb78010a1eb2c8e24b851693b7b04dfe9b1dc0a5ab3843927b10a85b1dfbb2e"},
 ]
 
 [package.dependencies]
-cryptography = ">=38.0.0,<40"
+cryptography = ">=38.0.0,<41"
 
 [package.extras]
 docs = ["sphinx (!=5.2.0,!=5.2.0.post0)", "sphinx-rtd-theme"]
@@ -1735,33 +2079,39 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"]
 
 [[package]]
 name = "pyrsistent"
-version = "0.18.1"
+version = "0.19.3"
 description = "Persistent/Functional/Immutable data structures"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1"},
-    {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d45866ececf4a5fff8742c25722da6d4c9e180daa7b405dc0a2a2790d668c26"},
-    {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ed6784ceac462a7d6fcb7e9b663e93b9a6fb373b7f43594f9ff68875788e01e"},
-    {file = "pyrsistent-0.18.1-cp310-cp310-win32.whl", hash = "sha256:e4f3149fd5eb9b285d6bfb54d2e5173f6a116fe19172686797c056672689daf6"},
-    {file = "pyrsistent-0.18.1-cp310-cp310-win_amd64.whl", hash = "sha256:636ce2dc235046ccd3d8c56a7ad54e99d5c1cd0ef07d9ae847306c91d11b5fec"},
-    {file = "pyrsistent-0.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e92a52c166426efbe0d1ec1332ee9119b6d32fc1f0bbfd55d5c1088070e7fc1b"},
-    {file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7a096646eab884bf8bed965bad63ea327e0d0c38989fc83c5ea7b8a87037bfc"},
-    {file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cdfd2c361b8a8e5d9499b9082b501c452ade8bbf42aef97ea04854f4a3f43b22"},
-    {file = "pyrsistent-0.18.1-cp37-cp37m-win32.whl", hash = "sha256:7ec335fc998faa4febe75cc5268a9eac0478b3f681602c1f27befaf2a1abe1d8"},
-    {file = "pyrsistent-0.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:6455fc599df93d1f60e1c5c4fe471499f08d190d57eca040c0ea182301321286"},
-    {file = "pyrsistent-0.18.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fd8da6d0124efa2f67d86fa70c851022f87c98e205f0594e1fae044e7119a5a6"},
-    {file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bfe2388663fd18bd8ce7db2c91c7400bf3e1a9e8bd7d63bf7e77d39051b85ec"},
-    {file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e3e1fcc45199df76053026a51cc59ab2ea3fc7c094c6627e93b7b44cdae2c8c"},
-    {file = "pyrsistent-0.18.1-cp38-cp38-win32.whl", hash = "sha256:b568f35ad53a7b07ed9b1b2bae09eb15cdd671a5ba5d2c66caee40dbf91c68ca"},
-    {file = "pyrsistent-0.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1b96547410f76078eaf66d282ddca2e4baae8964364abb4f4dcdde855cd123a"},
-    {file = "pyrsistent-0.18.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f87cc2863ef33c709e237d4b5f4502a62a00fab450c9e020892e8e2ede5847f5"},
-    {file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bc66318fb7ee012071b2792024564973ecc80e9522842eb4e17743604b5e045"},
-    {file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:914474c9f1d93080338ace89cb2acee74f4f666fb0424896fcfb8d86058bf17c"},
-    {file = "pyrsistent-0.18.1-cp39-cp39-win32.whl", hash = "sha256:1b34eedd6812bf4d33814fca1b66005805d3640ce53140ab8bbb1e2651b0d9bc"},
-    {file = "pyrsistent-0.18.1-cp39-cp39-win_amd64.whl", hash = "sha256:e24a828f57e0c337c8d8bb9f6b12f09dfdf0273da25fda9e314f0b684b415a07"},
-    {file = "pyrsistent-0.18.1.tar.gz", hash = "sha256:d4d61f8b993a7255ba714df3aca52700f8125289f84f704cf80916517c46eb96"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:20460ac0ea439a3e79caa1dbd560344b64ed75e85d8703943e0b66c2a6150e4a"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c18264cb84b5e68e7085a43723f9e4c1fd1d935ab240ce02c0324a8e01ccb64"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b774f9288dda8d425adb6544e5903f1fb6c273ab3128a355c6b972b7df39dcf"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-win32.whl", hash = "sha256:5a474fb80f5e0d6c9394d8db0fc19e90fa540b82ee52dba7d246a7791712f74a"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-win_amd64.whl", hash = "sha256:49c32f216c17148695ca0e02a5c521e28a4ee6c5089f97e34fe24163113722da"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f0774bf48631f3a20471dd7c5989657b639fd2d285b861237ea9e82c36a415a9"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab2204234c0ecd8b9368dbd6a53e83c3d4f3cab10ecaf6d0e772f456c442393"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e42296a09e83028b3476f7073fcb69ffebac0e66dbbfd1bd847d61f74db30f19"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-win32.whl", hash = "sha256:64220c429e42a7150f4bfd280f6f4bb2850f95956bde93c6fda1b70507af6ef3"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-win_amd64.whl", hash = "sha256:016ad1afadf318eb7911baa24b049909f7f3bb2c5b1ed7b6a8f21db21ea3faa8"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c4db1bd596fefd66b296a3d5d943c94f4fac5bcd13e99bffe2ba6a759d959a28"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aeda827381f5e5d65cced3024126529ddc4289d944f75e090572c77ceb19adbf"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:42ac0b2f44607eb92ae88609eda931a4f0dfa03038c44c772e07f43e738bcac9"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-win32.whl", hash = "sha256:e8f2b814a3dc6225964fa03d8582c6e0b6650d68a232df41e3cc1b66a5d2f8d1"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-win_amd64.whl", hash = "sha256:c9bb60a40a0ab9aba40a59f68214eed5a29c6274c83b2cc206a359c4a89fa41b"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a2471f3f8693101975b1ff85ffd19bb7ca7dd7c38f8a81701f67d6b4f97b87d8"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc5d149f31706762c1f8bda2e8c4f8fead6e80312e3692619a75301d3dbb819a"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3311cb4237a341aa52ab8448c27e3a9931e2ee09561ad150ba94e4cfd3fc888c"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-win32.whl", hash = "sha256:f0e7c4b2f77593871e918be000b96c8107da48444d57005b6a6bc61fb4331b2c"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-win_amd64.whl", hash = "sha256:c147257a92374fde8498491f53ffa8f4822cd70c0d85037e09028e478cababb7"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b735e538f74ec31378f5a1e3886a26d2ca6351106b4dfde376a26fc32a044edc"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99abb85579e2165bd8522f0c0138864da97847875ecbd45f3e7e2af569bfc6f2"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a8cb235fa6d3fd7aae6a4f1429bbb1fec1577d978098da1252f0489937786f3"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-win32.whl", hash = "sha256:c74bed51f9b41c48366a286395c67f4e894374306b197e62810e0fdaf2364da2"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-win_amd64.whl", hash = "sha256:878433581fc23e906d947a6814336eee031a00e6defba224234169ae3d3d6a98"},
+    {file = "pyrsistent-0.19.3-py3-none-any.whl", hash = "sha256:ccf0d6bd208f8111179f0c26fdf84ed7c3891982f2edaeae7422575f47e66b64"},
+    {file = "pyrsistent-0.19.3.tar.gz", hash = "sha256:1a2994773706bbb4995c31a97bc94f1418314923bd1048c6d964837040376440"},
 ]
 
 [[package]]
@@ -1807,14 +2157,14 @@ six = ">=1.5"
 
 [[package]]
 name = "pytz"
-version = "2021.3"
+version = "2022.7.1"
 description = "World timezone definitions, modern and historical"
 category = "main"
-optional = true
+optional = false
 python-versions = "*"
 files = [
-    {file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"},
-    {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"},
+    {file = "pytz-2022.7.1-py2.py3-none-any.whl", hash = "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"},
+    {file = "pytz-2022.7.1.tar.gz", hash = "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0"},
 ]
 
 [[package]]
@@ -1881,14 +2231,14 @@ files = [
 
 [[package]]
 name = "readme-renderer"
-version = "37.2"
+version = "37.3"
 description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "readme_renderer-37.2-py3-none-any.whl", hash = "sha256:d3f06a69e8c40fca9ab3174eca48f96d9771eddb43517b17d96583418427b106"},
-    {file = "readme_renderer-37.2.tar.gz", hash = "sha256:e8ad25293c98f781dbc2c5a36a309929390009f902f99e1798c761aaf04a7923"},
+    {file = "readme_renderer-37.3-py3-none-any.whl", hash = "sha256:f67a16caedfa71eef48a31b39708637a6f4664c4394801a7b0d6432d13907343"},
+    {file = "readme_renderer-37.3.tar.gz", hash = "sha256:cd653186dfc73055656f090f227f5cb22a046d7f71a841dfa305f55c9a513273"},
 ]
 
 [package.dependencies]
@@ -1901,36 +2251,36 @@ md = ["cmarkgfm (>=0.8.0)"]
 
 [[package]]
 name = "requests"
-version = "2.27.1"
+version = "2.28.2"
 description = "Python HTTP for Humans."
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+python-versions = ">=3.7, <4"
 files = [
-    {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
-    {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
+    {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"},
+    {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"},
 ]
 
 [package.dependencies]
 certifi = ">=2017.4.17"
-charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
-idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
 urllib3 = ">=1.21.1,<1.27"
 
 [package.extras]
-socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
-use-chardet-on-py3 = ["chardet (>=3.0.2,<5)"]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "requests-toolbelt"
-version = "0.9.1"
+version = "0.10.1"
 description = "A utility belt for advanced users of python-requests"
 category = "dev"
 optional = false
-python-versions = "*"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
-    {file = "requests-toolbelt-0.9.1.tar.gz", hash = "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0"},
-    {file = "requests_toolbelt-0.9.1-py2.py3-none-any.whl", hash = "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f"},
+    {file = "requests-toolbelt-0.10.1.tar.gz", hash = "sha256:62e09f7ff5ccbda92772a29f394a49c3ad6cb181d568b1337626b2abb628a63d"},
+    {file = "requests_toolbelt-0.10.1-py2.py3-none-any.whl", hash = "sha256:18565aa58116d9951ac39baa288d3adb5b3ff975c4f25eee78555d89e8f247f7"},
 ]
 
 [package.dependencies]
@@ -1953,23 +2303,23 @@ idna2008 = ["idna"]
 
 [[package]]
 name = "rich"
-version = "12.6.0"
+version = "13.3.2"
 description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
 category = "dev"
 optional = false
-python-versions = ">=3.6.3,<4.0.0"
+python-versions = ">=3.7.0"
 files = [
-    {file = "rich-12.6.0-py3-none-any.whl", hash = "sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e"},
-    {file = "rich-12.6.0.tar.gz", hash = "sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0"},
+    {file = "rich-13.3.2-py3-none-any.whl", hash = "sha256:a104f37270bf677148d8acb07d33be1569eeee87e2d1beb286a4e9113caf6f2f"},
+    {file = "rich-13.3.2.tar.gz", hash = "sha256:91954fe80cfb7985727a467ca98a7618e5dd15178cc2da10f553b36a93859001"},
 ]
 
 [package.dependencies]
-commonmark = ">=0.9.0,<0.10.0"
-pygments = ">=2.6.0,<3.0.0"
+markdown-it-py = ">=2.2.0,<3.0.0"
+pygments = ">=2.13.0,<3.0.0"
 typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""}
 
 [package.extras]
-jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"]
+jupyter = ["ipywidgets (>=7.5.1,<9)"]
 
 [[package]]
 name = "ruff"
@@ -2000,14 +2350,14 @@ files = [
 
 [[package]]
 name = "secretstorage"
-version = "3.3.1"
+version = "3.3.3"
 description = "Python bindings to FreeDesktop.org Secret Service API"
 category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "SecretStorage-3.3.1-py3-none-any.whl", hash = "sha256:422d82c36172d88d6a0ed5afdec956514b189ddbfb72fefab0c8a1cee4eaf71f"},
-    {file = "SecretStorage-3.3.1.tar.gz", hash = "sha256:fd666c51a6bf200643495a04abb261f83229dcb6fd8472ec393df7ffc8b6f195"},
+    {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"},
+    {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"},
 ]
 
 [package.dependencies]
@@ -2098,18 +2448,18 @@ tests = ["coverage[toml] (>=5.0.2)", "pytest"]
 
 [[package]]
 name = "setuptools"
-version = "65.5.1"
+version = "67.6.0"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "setuptools-65.5.1-py3-none-any.whl", hash = "sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31"},
-    {file = "setuptools-65.5.1.tar.gz", hash = "sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f"},
+    {file = "setuptools-67.6.0-py3-none-any.whl", hash = "sha256:b78aaa36f6b90a074c1fa651168723acbf45d14cb1196b6f02c0fd07f17623b2"},
+    {file = "setuptools-67.6.0.tar.gz", hash = "sha256:2ee892cd5f29f3373097f5a814697e397cf3ce313616df0af11231e2ad118077"},
 ]
 
 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
+docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
 testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
 
@@ -2176,6 +2526,18 @@ files = [
     {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
 ]
 
+[[package]]
+name = "snowballstemmer"
+version = "2.2.0"
+description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
+    {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
+]
+
 [[package]]
 name = "sortedcontainers"
 version = "2.4.0"
@@ -2188,6 +2550,190 @@ files = [
     {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
 ]
 
+[[package]]
+name = "soupsieve"
+version = "2.4"
+description = "A modern CSS selector implementation for Beautiful Soup."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "soupsieve-2.4-py3-none-any.whl", hash = "sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955"},
+    {file = "soupsieve-2.4.tar.gz", hash = "sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a"},
+]
+
+[[package]]
+name = "sphinx"
+version = "6.1.3"
+description = "Python documentation generator"
+category = "dev"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "Sphinx-6.1.3.tar.gz", hash = "sha256:0dac3b698538ffef41716cf97ba26c1c7788dba73ce6f150c1ff5b4720786dd2"},
+    {file = "sphinx-6.1.3-py3-none-any.whl", hash = "sha256:807d1cb3d6be87eb78a381c3e70ebd8d346b9a25f3753e9947e866b2786865fc"},
+]
+
+[package.dependencies]
+alabaster = ">=0.7,<0.8"
+babel = ">=2.9"
+colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
+docutils = ">=0.18,<0.20"
+imagesize = ">=1.3"
+importlib-metadata = {version = ">=4.8", markers = "python_version < \"3.10\""}
+Jinja2 = ">=3.0"
+packaging = ">=21.0"
+Pygments = ">=2.13"
+requests = ">=2.25.0"
+snowballstemmer = ">=2.0"
+sphinxcontrib-applehelp = "*"
+sphinxcontrib-devhelp = "*"
+sphinxcontrib-htmlhelp = ">=2.0.0"
+sphinxcontrib-jsmath = "*"
+sphinxcontrib-qthelp = "*"
+sphinxcontrib-serializinghtml = ">=1.1.5"
+
+[package.extras]
+docs = ["sphinxcontrib-websupport"]
+lint = ["docutils-stubs", "flake8 (>=3.5.0)", "flake8-simplify", "isort", "mypy (>=0.990)", "ruff", "sphinx-lint", "types-requests"]
+test = ["cython", "html5lib", "pytest (>=4.6)"]
+
+[[package]]
+name = "sphinx-autodoc2"
+version = "0.4.2"
+description = "Analyse a python project and create documentation for it."
+category = "dev"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sphinx-autodoc2-0.4.2.tar.gz", hash = "sha256:06da226a25a4339e173b34bb0e590e0ba9b4570b414796140aee1939d09acb3a"},
+    {file = "sphinx_autodoc2-0.4.2-py3-none-any.whl", hash = "sha256:00835ba8c980b9c510ea794c3e2060e5a254a74c6c22badc9bfd3642dc1034b4"},
+]
+
+[package.dependencies]
+astroid = ">=2.7"
+tomli = {version = "*", markers = "python_version < \"3.11\""}
+typing-extensions = "*"
+
+[package.extras]
+cli = ["typer[all]"]
+docs = ["furo", "myst-parser", "sphinx (>=4.0.0)"]
+sphinx = ["sphinx (>=4.0.0)"]
+testing = ["pytest", "pytest-cov", "pytest-regressions", "sphinx (>=4.0.0)"]
+
+[[package]]
+name = "sphinx-basic-ng"
+version = "1.0.0b1"
+description = "A modern skeleton for Sphinx themes."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "sphinx_basic_ng-1.0.0b1-py3-none-any.whl", hash = "sha256:ade597a3029c7865b24ad0eda88318766bcc2f9f4cef60df7e28126fde94db2a"},
+    {file = "sphinx_basic_ng-1.0.0b1.tar.gz", hash = "sha256:89374bd3ccd9452a301786781e28c8718e99960f2d4f411845ea75fc7bb5a9b0"},
+]
+
+[package.dependencies]
+sphinx = ">=4.0"
+
+[package.extras]
+docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs"]
+
+[[package]]
+name = "sphinxcontrib-applehelp"
+version = "1.0.4"
+description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books"
+category = "dev"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e"},
+    {file = "sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228"},
+]
+
+[package.extras]
+lint = ["docutils-stubs", "flake8", "mypy"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-devhelp"
+version = "1.0.2"
+description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp document."
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"},
+    {file = "sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e"},
+]
+
+[package.extras]
+lint = ["docutils-stubs", "flake8", "mypy"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-htmlhelp"
+version = "2.0.1"
+description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files"
+category = "dev"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff"},
+    {file = "sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903"},
+]
+
+[package.extras]
+lint = ["docutils-stubs", "flake8", "mypy"]
+test = ["html5lib", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-jsmath"
+version = "1.0.1"
+description = "A sphinx extension which renders display math in HTML via JavaScript"
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"},
+    {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
+]
+
+[package.extras]
+test = ["flake8", "mypy", "pytest"]
+
+[[package]]
+name = "sphinxcontrib-qthelp"
+version = "1.0.3"
+description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp document."
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72"},
+    {file = "sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"},
+]
+
+[package.extras]
+lint = ["docutils-stubs", "flake8", "mypy"]
+test = ["pytest"]
+
+[[package]]
+name = "sphinxcontrib-serializinghtml"
+version = "1.1.5"
+description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)."
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"},
+    {file = "sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd"},
+]
+
+[package.extras]
+lint = ["docutils-stubs", "flake8", "mypy"]
+test = ["pytest"]
+
 [[package]]
 name = "systemd-python"
 version = "235"
@@ -2216,13 +2762,13 @@ tornado = "*"
 
 [[package]]
 name = "thrift"
-version = "0.15.0"
+version = "0.16.0"
 description = "Python bindings for the Apache Thrift RPC system"
 category = "main"
 optional = true
 python-versions = "*"
 files = [
-    {file = "thrift-0.15.0.tar.gz", hash = "sha256:87c8205a71cf8bbb111cb99b1f7495070fbc9cabb671669568854210da5b3e29"},
+    {file = "thrift-0.16.0.tar.gz", hash = "sha256:2b5b6488fcded21f9d312aa23c9ff6a0195d0f6ae26ddbd5ad9e3e25dfc14408"},
 ]
 
 [package.dependencies]
@@ -2235,65 +2781,35 @@ twisted = ["twisted"]
 
 [[package]]
 name = "tomli"
-version = "1.2.3"
+version = "2.0.1"
 description = "A lil' TOML parser"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "tomli-1.2.3-py3-none-any.whl", hash = "sha256:e3069e4be3ead9668e21cb9b074cd948f7b3113fd9c8bba083f48247aab8b11c"},
-    {file = "tomli-1.2.3.tar.gz", hash = "sha256:05b6166bff487dc068d322585c7ea4ef78deed501cc124060e0f238e89a9231f"},
+    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
 ]
 
 [[package]]
 name = "tornado"
-version = "6.1"
+version = "6.2"
 description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
 category = "main"
 optional = true
-python-versions = ">= 3.5"
-files = [
-    {file = "tornado-6.1-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:d371e811d6b156d82aa5f9a4e08b58debf97c302a35714f6f45e35139c332e32"},
-    {file = "tornado-6.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:0d321a39c36e5f2c4ff12b4ed58d41390460f798422c4504e09eb5678e09998c"},
-    {file = "tornado-6.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9de9e5188a782be6b1ce866e8a51bc76a0fbaa0e16613823fc38e4fc2556ad05"},
-    {file = "tornado-6.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:61b32d06ae8a036a6607805e6720ef00a3c98207038444ba7fd3d169cd998910"},
-    {file = "tornado-6.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:3e63498f680547ed24d2c71e6497f24bca791aca2fe116dbc2bd0ac7f191691b"},
-    {file = "tornado-6.1-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:6c77c9937962577a6a76917845d06af6ab9197702a42e1346d8ae2e76b5e3675"},
-    {file = "tornado-6.1-cp35-cp35m-win32.whl", hash = "sha256:6286efab1ed6e74b7028327365cf7346b1d777d63ab30e21a0f4d5b275fc17d5"},
-    {file = "tornado-6.1-cp35-cp35m-win_amd64.whl", hash = "sha256:fa2ba70284fa42c2a5ecb35e322e68823288a4251f9ba9cc77be04ae15eada68"},
-    {file = "tornado-6.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0a00ff4561e2929a2c37ce706cb8233b7907e0cdc22eab98888aca5dd3775feb"},
-    {file = "tornado-6.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:748290bf9112b581c525e6e6d3820621ff020ed95af6f17fedef416b27ed564c"},
-    {file = "tornado-6.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:e385b637ac3acaae8022e7e47dfa7b83d3620e432e3ecb9a3f7f58f150e50921"},
-    {file = "tornado-6.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:25ad220258349a12ae87ede08a7b04aca51237721f63b1808d39bdb4b2164558"},
-    {file = "tornado-6.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:65d98939f1a2e74b58839f8c4dab3b6b3c1ce84972ae712be02845e65391ac7c"},
-    {file = "tornado-6.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:e519d64089b0876c7b467274468709dadf11e41d65f63bba207e04217f47c085"},
-    {file = "tornado-6.1-cp36-cp36m-win32.whl", hash = "sha256:b87936fd2c317b6ee08a5741ea06b9d11a6074ef4cc42e031bc6403f82a32575"},
-    {file = "tornado-6.1-cp36-cp36m-win_amd64.whl", hash = "sha256:cc0ee35043162abbf717b7df924597ade8e5395e7b66d18270116f8745ceb795"},
-    {file = "tornado-6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7250a3fa399f08ec9cb3f7b1b987955d17e044f1ade821b32e5f435130250d7f"},
-    {file = "tornado-6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:ed3ad863b1b40cd1d4bd21e7498329ccaece75db5a5bf58cd3c9f130843e7102"},
-    {file = "tornado-6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:dcef026f608f678c118779cd6591c8af6e9b4155c44e0d1bc0c87c036fb8c8c4"},
-    {file = "tornado-6.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:70dec29e8ac485dbf57481baee40781c63e381bebea080991893cd297742b8fd"},
-    {file = "tornado-6.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:d3f7594930c423fd9f5d1a76bee85a2c36fd8b4b16921cae7e965f22575e9c01"},
-    {file = "tornado-6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3447475585bae2e77ecb832fc0300c3695516a47d46cefa0528181a34c5b9d3d"},
-    {file = "tornado-6.1-cp37-cp37m-win32.whl", hash = "sha256:e7229e60ac41a1202444497ddde70a48d33909e484f96eb0da9baf8dc68541df"},
-    {file = "tornado-6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:cb5ec8eead331e3bb4ce8066cf06d2dfef1bfb1b2a73082dfe8a161301b76e37"},
-    {file = "tornado-6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:20241b3cb4f425e971cb0a8e4ffc9b0a861530ae3c52f2b0434e6c1b57e9fd95"},
-    {file = "tornado-6.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:c77da1263aa361938476f04c4b6c8916001b90b2c2fdd92d8d535e1af48fba5a"},
-    {file = "tornado-6.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:fba85b6cd9c39be262fcd23865652920832b61583de2a2ca907dbd8e8a8c81e5"},
-    {file = "tornado-6.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:1e8225a1070cd8eec59a996c43229fe8f95689cb16e552d130b9793cb570a288"},
-    {file = "tornado-6.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:d14d30e7f46a0476efb0deb5b61343b1526f73ebb5ed84f23dc794bdb88f9d9f"},
-    {file = "tornado-6.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8f959b26f2634a091bb42241c3ed8d3cedb506e7c27b8dd5c7b9f745318ddbb6"},
-    {file = "tornado-6.1-cp38-cp38-win32.whl", hash = "sha256:34ca2dac9e4d7afb0bed4677512e36a52f09caa6fded70b4e3e1c89dbd92c326"},
-    {file = "tornado-6.1-cp38-cp38-win_amd64.whl", hash = "sha256:6196a5c39286cc37c024cd78834fb9345e464525d8991c21e908cc046d1cc02c"},
-    {file = "tornado-6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0ba29bafd8e7e22920567ce0d232c26d4d47c8b5cf4ed7b562b5db39fa199c5"},
-    {file = "tornado-6.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:33892118b165401f291070100d6d09359ca74addda679b60390b09f8ef325ffe"},
-    {file = "tornado-6.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7da13da6f985aab7f6f28debab00c67ff9cbacd588e8477034c0652ac141feea"},
-    {file = "tornado-6.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:e0791ac58d91ac58f694d8d2957884df8e4e2f6687cdf367ef7eb7497f79eaa2"},
-    {file = "tornado-6.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:66324e4e1beede9ac79e60f88de548da58b1f8ab4b2f1354d8375774f997e6c0"},
-    {file = "tornado-6.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:a48900ecea1cbb71b8c71c620dee15b62f85f7c14189bdeee54966fbd9a0c5bd"},
-    {file = "tornado-6.1-cp39-cp39-win32.whl", hash = "sha256:d3d20ea5782ba63ed13bc2b8c291a053c8d807a8fa927d941bd718468f7b950c"},
-    {file = "tornado-6.1-cp39-cp39-win_amd64.whl", hash = "sha256:548430be2740e327b3fe0201abe471f314741efcb0067ec4f2d7dcfb4825f3e4"},
-    {file = "tornado-6.1.tar.gz", hash = "sha256:33c6e81d7bd55b468d2e793517c909b139960b6c790a60b7991b9b6b76fb9791"},
+python-versions = ">= 3.7"
+files = [
+    {file = "tornado-6.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72"},
+    {file = "tornado-6.2-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9"},
+    {file = "tornado-6.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac"},
+    {file = "tornado-6.2-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75"},
+    {file = "tornado-6.2-cp37-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e"},
+    {file = "tornado-6.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8"},
+    {file = "tornado-6.2-cp37-abi3-musllinux_1_1_i686.whl", hash = "sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b"},
+    {file = "tornado-6.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca"},
+    {file = "tornado-6.2-cp37-abi3-win32.whl", hash = "sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23"},
+    {file = "tornado-6.2-cp37-abi3-win_amd64.whl", hash = "sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b"},
+    {file = "tornado-6.2.tar.gz", hash = "sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13"},
 ]
 
 [[package]]
@@ -2447,48 +2963,48 @@ twisted = "*"
 
 [[package]]
 name = "typed-ast"
-version = "1.5.2"
+version = "1.5.4"
 description = "a fork of Python 2 and 3 ast modules with type comment support"
 category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "typed_ast-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:183b183b7771a508395d2cbffd6db67d6ad52958a5fdc99f450d954003900266"},
-    {file = "typed_ast-1.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:676d051b1da67a852c0447621fdd11c4e104827417bf216092ec3e286f7da596"},
-    {file = "typed_ast-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc2542e83ac8399752bc16e0b35e038bdb659ba237f4222616b4e83fb9654985"},
-    {file = "typed_ast-1.5.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:74cac86cc586db8dfda0ce65d8bcd2bf17b58668dfcc3652762f3ef0e6677e76"},
-    {file = "typed_ast-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:18fe320f354d6f9ad3147859b6e16649a0781425268c4dde596093177660e71a"},
-    {file = "typed_ast-1.5.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:31d8c6b2df19a777bc8826770b872a45a1f30cfefcfd729491baa5237faae837"},
-    {file = "typed_ast-1.5.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:963a0ccc9a4188524e6e6d39b12c9ca24cc2d45a71cfdd04a26d883c922b4b78"},
-    {file = "typed_ast-1.5.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0eb77764ea470f14fcbb89d51bc6bbf5e7623446ac4ed06cbd9ca9495b62e36e"},
-    {file = "typed_ast-1.5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:294a6903a4d087db805a7656989f613371915fc45c8cc0ddc5c5a0a8ad9bea4d"},
-    {file = "typed_ast-1.5.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:26a432dc219c6b6f38be20a958cbe1abffcc5492821d7e27f08606ef99e0dffd"},
-    {file = "typed_ast-1.5.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7407cfcad702f0b6c0e0f3e7ab876cd1d2c13b14ce770e412c0c4b9728a0f88"},
-    {file = "typed_ast-1.5.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f30ddd110634c2d7534b2d4e0e22967e88366b0d356b24de87419cc4410c41b7"},
-    {file = "typed_ast-1.5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:8c08d6625bb258179b6e512f55ad20f9dfef019bbfbe3095247401e053a3ea30"},
-    {file = "typed_ast-1.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:90904d889ab8e81a956f2c0935a523cc4e077c7847a836abee832f868d5c26a4"},
-    {file = "typed_ast-1.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bbebc31bf11762b63bf61aaae232becb41c5bf6b3461b80a4df7e791fabb3aca"},
-    {file = "typed_ast-1.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c29dd9a3a9d259c9fa19d19738d021632d673f6ed9b35a739f48e5f807f264fb"},
-    {file = "typed_ast-1.5.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:58ae097a325e9bb7a684572d20eb3e1809802c5c9ec7108e85da1eb6c1a3331b"},
-    {file = "typed_ast-1.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:da0a98d458010bf4fe535f2d1e367a2e2060e105978873c04c04212fb20543f7"},
-    {file = "typed_ast-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:33b4a19ddc9fc551ebabca9765d54d04600c4a50eda13893dadf67ed81d9a098"},
-    {file = "typed_ast-1.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1098df9a0592dd4c8c0ccfc2e98931278a6c6c53cb3a3e2cf7e9ee3b06153344"},
-    {file = "typed_ast-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c47c3b43fe3a39ddf8de1d40dbbfca60ac8530a36c9b198ea5b9efac75c09e"},
-    {file = "typed_ast-1.5.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f290617f74a610849bd8f5514e34ae3d09eafd521dceaa6cf68b3f4414266d4e"},
-    {file = "typed_ast-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:df05aa5b241e2e8045f5f4367a9f6187b09c4cdf8578bb219861c4e27c443db5"},
-    {file = "typed_ast-1.5.2.tar.gz", hash = "sha256:525a2d4088e70a9f75b08b3f87a51acc9cde640e19cc523c7e41aa355564ae27"},
+    {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"},
+    {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"},
+    {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"},
+    {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"},
+    {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"},
+    {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"},
+    {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"},
+    {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"},
+    {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"},
+    {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"},
+    {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"},
+    {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"},
+    {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"},
+    {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"},
+    {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"},
+    {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"},
+    {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"},
+    {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"},
+    {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"},
+    {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"},
+    {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"},
+    {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"},
+    {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"},
+    {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"},
 ]
 
 [[package]]
 name = "types-bleach"
-version = "6.0.0.0"
+version = "6.0.0.1"
 description = "Typing stubs for bleach"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-bleach-6.0.0.0.tar.gz", hash = "sha256:770ce9c7ea6173743ef1a4a70f2619bb1819bf53c7cd0336d939af93f488fbe2"},
-    {file = "types_bleach-6.0.0.0-py3-none-any.whl", hash = "sha256:75f55f035837c5fce2cd0bd5162a2a90057680a89c9275588a5c12f5f597a14a"},
+    {file = "types-bleach-6.0.0.1.tar.gz", hash = "sha256:43d9129deb9e82918747437edf78f09ff440f2973f4702625b61994f3e698518"},
+    {file = "types_bleach-6.0.0.1-py3-none-any.whl", hash = "sha256:440df967254007be80bb0f4d851f026c29c709cc48359bf4935d2b2f3a6f9f90"},
 ]
 
 [[package]]
@@ -2505,14 +3021,14 @@ files = [
 
 [[package]]
 name = "types-jsonschema"
-version = "4.17.0.5"
+version = "4.17.0.6"
 description = "Typing stubs for jsonschema"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-jsonschema-4.17.0.5.tar.gz", hash = "sha256:7adc7bfca4afe291de0c93eca9367aa72a4fbe8ce87fe15642c600ad97d45dd6"},
-    {file = "types_jsonschema-4.17.0.5-py3-none-any.whl", hash = "sha256:79ac8a7763fe728947af90a24168b91621edf7e8425bf3670abd4ea0d4758fba"},
+    {file = "types-jsonschema-4.17.0.6.tar.gz", hash = "sha256:e9b15e34b4f2fd5587bd68530fa0eb2a17c73ead212f4471d71eea032d231c46"},
+    {file = "types_jsonschema-4.17.0.6-py3-none-any.whl", hash = "sha256:ecef99bc64848f3798ad18922dfb2b40da25f17796fafcee50da984a21c5d6e6"},
 ]
 
 [[package]]
@@ -2580,26 +3096,26 @@ cryptography = ">=35.0.0"
 
 [[package]]
 name = "types-pyyaml"
-version = "6.0.12.3"
+version = "6.0.12.8"
 description = "Typing stubs for PyYAML"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-PyYAML-6.0.12.3.tar.gz", hash = "sha256:17ce17b3ead8f06e416a3b1d5b8ddc6cb82a422bb200254dd8b469434b045ffc"},
-    {file = "types_PyYAML-6.0.12.3-py3-none-any.whl", hash = "sha256:879700e9f215afb20ab5f849590418ab500989f83a57e635689e1d50ccc63f0c"},
+    {file = "types-PyYAML-6.0.12.8.tar.gz", hash = "sha256:19304869a89d49af00be681e7b267414df213f4eb89634c4495fa62e8f942b9f"},
+    {file = "types_PyYAML-6.0.12.8-py3-none-any.whl", hash = "sha256:5314a4b2580999b2ea06b2e5f9a7763d860d6e09cdf21c0e9561daa9cbd60178"},
 ]
 
 [[package]]
 name = "types-requests"
-version = "2.28.11.15"
+version = "2.28.11.16"
 description = "Typing stubs for requests"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-requests-2.28.11.15.tar.gz", hash = "sha256:fc8eaa09cc014699c6b63c60c2e3add0c8b09a410c818b5ac6e65f92a26dde09"},
-    {file = "types_requests-2.28.11.15-py3-none-any.whl", hash = "sha256:a05e4c7bc967518fba5789c341ea8b0c942776ee474c7873129a61161978e586"},
+    {file = "types-requests-2.28.11.16.tar.gz", hash = "sha256:9d4002056df7ebc4ec1f28fd701fba82c5c22549c4477116cb2656aa30ace6db"},
+    {file = "types_requests-2.28.11.16-py3-none-any.whl", hash = "sha256:a86921028335fdcc3aaf676c9d3463f867db6af2303fc65aa309b13ae1e6dd53"},
 ]
 
 [package.dependencies]
@@ -2607,26 +3123,26 @@ types-urllib3 = "<1.27"
 
 [[package]]
 name = "types-setuptools"
-version = "67.5.0.0"
+version = "67.6.0.5"
 description = "Typing stubs for setuptools"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-setuptools-67.5.0.0.tar.gz", hash = "sha256:fa6f231eeb27e86b1d6e8260f73de300e91f99c205b9a5e21debd49f3726a849"},
-    {file = "types_setuptools-67.5.0.0-py3-none-any.whl", hash = "sha256:f7f4bf4ab777e88631d3a387bbfdd4d480a2a4693ca896130f8ef738370377b8"},
+    {file = "types-setuptools-67.6.0.5.tar.gz", hash = "sha256:3a708e66c7bdc620e4d0439f344c750c57a4340c895a4c3ed2d0fc4ae8eb9962"},
+    {file = "types_setuptools-67.6.0.5-py3-none-any.whl", hash = "sha256:dae5a4a659dbb6dba57773440f6e2dbdd8ef282dc136a174a8a59bd33d949945"},
 ]
 
 [[package]]
 name = "types-urllib3"
-version = "1.26.10"
+version = "1.26.25.8"
 description = "Typing stubs for urllib3"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-urllib3-1.26.10.tar.gz", hash = "sha256:a26898f530e6c3f43f25b907f2b884486868ffd56a9faa94cbf9b3eb6e165d6a"},
-    {file = "types_urllib3-1.26.10-py3-none-any.whl", hash = "sha256:d755278d5ecd7a7a6479a190e54230f241f1a99c19b81518b756b19dc69e518c"},
+    {file = "types-urllib3-1.26.25.8.tar.gz", hash = "sha256:ecf43c42d8ee439d732a1110b4901e9017a79a38daca26f08e42c8460069392c"},
+    {file = "types_urllib3-1.26.25.8-py3-none-any.whl", hash = "sha256:95ea847fbf0bf675f50c8ae19a665baedcf07e6b4641662c4c3c72e7b2edf1a9"},
 ]
 
 [[package]]
@@ -2655,14 +3171,14 @@ files = [
 
 [[package]]
 name = "urllib3"
-version = "1.26.12"
+version = "1.26.15"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
-    {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"},
-    {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"},
+    {file = "urllib3-1.26.15-py2.py3-none-any.whl", hash = "sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42"},
+    {file = "urllib3-1.26.15.tar.gz", hash = "sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305"},
 ]
 
 [package.extras]
@@ -2684,124 +3200,135 @@ files = [
 
 [[package]]
 name = "wrapt"
-version = "1.14.1"
+version = "1.15.0"
 description = "Module for decorators, wrappers and monkey patching."
 category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 files = [
-    {file = "wrapt-1.14.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1b376b3f4896e7930f1f772ac4b064ac12598d1c38d04907e696cc4d794b43d3"},
-    {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:903500616422a40a98a5a3c4ff4ed9d0066f3b4c951fa286018ecdf0750194ef"},
-    {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5a9a0d155deafd9448baff28c08e150d9b24ff010e899311ddd63c45c2445e28"},
-    {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ddaea91abf8b0d13443f6dac52e89051a5063c7d014710dcb4d4abb2ff811a59"},
-    {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:36f582d0c6bc99d5f39cd3ac2a9062e57f3cf606ade29a0a0d6b323462f4dd87"},
-    {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7ef58fb89674095bfc57c4069e95d7a31cfdc0939e2a579882ac7d55aadfd2a1"},
-    {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:e2f83e18fe2f4c9e7db597e988f72712c0c3676d337d8b101f6758107c42425b"},
-    {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ee2b1b1769f6707a8a445162ea16dddf74285c3964f605877a20e38545c3c462"},
-    {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:833b58d5d0b7e5b9832869f039203389ac7cbf01765639c7309fd50ef619e0b1"},
-    {file = "wrapt-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80bb5c256f1415f747011dc3604b59bc1f91c6e7150bd7db03b19170ee06b320"},
-    {file = "wrapt-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07f7a7d0f388028b2df1d916e94bbb40624c59b48ecc6cbc232546706fac74c2"},
-    {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02b41b633c6261feff8ddd8d11c711df6842aba629fdd3da10249a53211a72c4"},
-    {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fe803deacd09a233e4762a1adcea5db5d31e6be577a43352936179d14d90069"},
-    {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:257fd78c513e0fb5cdbe058c27a0624c9884e735bbd131935fd49e9fe719d310"},
-    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4fcc4649dc762cddacd193e6b55bc02edca674067f5f98166d7713b193932b7f"},
-    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:11871514607b15cfeb87c547a49bca19fde402f32e2b1c24a632506c0a756656"},
-    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
-    {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
-    {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
-    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
-    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
-    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:a85d2b46be66a71bedde836d9e41859879cc54a2a04fad1191eb50c2066f6e9d"},
-    {file = "wrapt-1.14.1-cp35-cp35m-win32.whl", hash = "sha256:dbcda74c67263139358f4d188ae5faae95c30929281bc6866d00573783c422b7"},
-    {file = "wrapt-1.14.1-cp35-cp35m-win_amd64.whl", hash = "sha256:b21bb4c09ffabfa0e85e3a6b623e19b80e7acd709b9f91452b8297ace2a8ab00"},
-    {file = "wrapt-1.14.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9e0fd32e0148dd5dea6af5fee42beb949098564cc23211a88d799e434255a1f4"},
-    {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9736af4641846491aedb3c3f56b9bc5568d92b0692303b5a305301a95dfd38b1"},
-    {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b02d65b9ccf0ef6c34cba6cf5bf2aab1bb2f49c6090bafeecc9cd81ad4ea1c1"},
-    {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ac0156c4b089b330b7666db40feee30a5d52634cc4560e1905d6529a3897ff"},
-    {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:9f3e6f9e05148ff90002b884fbc2a86bd303ae847e472f44ecc06c2cd2fcdb2d"},
-    {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:6e743de5e9c3d1b7185870f480587b75b1cb604832e380d64f9504a0535912d1"},
-    {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d79d7d5dc8a32b7093e81e97dad755127ff77bcc899e845f41bf71747af0c569"},
-    {file = "wrapt-1.14.1-cp36-cp36m-win32.whl", hash = "sha256:81b19725065dcb43df02b37e03278c011a09e49757287dca60c5aecdd5a0b8ed"},
-    {file = "wrapt-1.14.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b014c23646a467558be7da3d6b9fa409b2c567d2110599b7cf9a0c5992b3b471"},
-    {file = "wrapt-1.14.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:88bd7b6bd70a5b6803c1abf6bca012f7ed963e58c68d76ee20b9d751c74a3248"},
-    {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5901a312f4d14c59918c221323068fad0540e34324925c8475263841dbdfe68"},
-    {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77c85fedff92cf788face9bfa3ebaa364448ebb1d765302e9af11bf449ca36d"},
-    {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d649d616e5c6a678b26d15ece345354f7c2286acd6db868e65fcc5ff7c24a77"},
-    {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7d2872609603cb35ca513d7404a94d6d608fc13211563571117046c9d2bcc3d7"},
-    {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:ee6acae74a2b91865910eef5e7de37dc6895ad96fa23603d1d27ea69df545015"},
-    {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2b39d38039a1fdad98c87279b48bc5dce2c0ca0d73483b12cb72aa9609278e8a"},
-    {file = "wrapt-1.14.1-cp37-cp37m-win32.whl", hash = "sha256:60db23fa423575eeb65ea430cee741acb7c26a1365d103f7b0f6ec412b893853"},
-    {file = "wrapt-1.14.1-cp37-cp37m-win_amd64.whl", hash = "sha256:709fe01086a55cf79d20f741f39325018f4df051ef39fe921b1ebe780a66184c"},
-    {file = "wrapt-1.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c0ce1e99116d5ab21355d8ebe53d9460366704ea38ae4d9f6933188f327b456"},
-    {file = "wrapt-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3fb1677c720409d5f671e39bac6c9e0e422584e5f518bfd50aa4cbbea02433f"},
-    {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:642c2e7a804fcf18c222e1060df25fc210b9c58db7c91416fb055897fc27e8cc"},
-    {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b7c050ae976e286906dd3f26009e117eb000fb2cf3533398c5ad9ccc86867b1"},
-    {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef3f72c9666bba2bab70d2a8b79f2c6d2c1a42a7f7e2b0ec83bb2f9e383950af"},
-    {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01c205616a89d09827986bc4e859bcabd64f5a0662a7fe95e0d359424e0e071b"},
-    {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5a0f54ce2c092aaf439813735584b9537cad479575a09892b8352fea5e988dc0"},
-    {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2cf71233a0ed05ccdabe209c606fe0bac7379fdcf687f39b944420d2a09fdb57"},
-    {file = "wrapt-1.14.1-cp38-cp38-win32.whl", hash = "sha256:aa31fdcc33fef9eb2552cbcbfee7773d5a6792c137b359e82879c101e98584c5"},
-    {file = "wrapt-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1967f46ea8f2db647c786e78d8cc7e4313dbd1b0aca360592d8027b8508e24d"},
-    {file = "wrapt-1.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3232822c7d98d23895ccc443bbdf57c7412c5a65996c30442ebe6ed3df335383"},
-    {file = "wrapt-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:988635d122aaf2bdcef9e795435662bcd65b02f4f4c1ae37fbee7401c440b3a7"},
-    {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cca3c2cdadb362116235fdbd411735de4328c61425b0aa9f872fd76d02c4e86"},
-    {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d52a25136894c63de15a35bc0bdc5adb4b0e173b9c0d07a2be9d3ca64a332735"},
-    {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40e7bc81c9e2b2734ea4bc1aceb8a8f0ceaac7c5299bc5d69e37c44d9081d43b"},
-    {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b9b7a708dd92306328117d8c4b62e2194d00c365f18eff11a9b53c6f923b01e3"},
-    {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6a9a25751acb379b466ff6be78a315e2b439d4c94c1e99cb7266d40a537995d3"},
-    {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:34aa51c45f28ba7f12accd624225e2b1e5a3a45206aa191f6f9aac931d9d56fe"},
-    {file = "wrapt-1.14.1-cp39-cp39-win32.whl", hash = "sha256:dee0ce50c6a2dd9056c20db781e9c1cfd33e77d2d569f5d1d9321c641bb903d5"},
-    {file = "wrapt-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb"},
-    {file = "wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d"},
+    {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"},
+    {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"},
+    {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"},
+    {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46"},
+    {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c"},
+    {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09"},
+    {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079"},
+    {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e"},
+    {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a"},
+    {file = "wrapt-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923"},
+    {file = "wrapt-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee"},
+    {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727"},
+    {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7"},
+    {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0"},
+    {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec"},
+    {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90"},
+    {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975"},
+    {file = "wrapt-1.15.0-cp310-cp310-win32.whl", hash = "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1"},
+    {file = "wrapt-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e"},
+    {file = "wrapt-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7"},
+    {file = "wrapt-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72"},
+    {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb"},
+    {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e"},
+    {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c"},
+    {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3"},
+    {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92"},
+    {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98"},
+    {file = "wrapt-1.15.0-cp311-cp311-win32.whl", hash = "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416"},
+    {file = "wrapt-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705"},
+    {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29"},
+    {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd"},
+    {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb"},
+    {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248"},
+    {file = "wrapt-1.15.0-cp35-cp35m-win32.whl", hash = "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559"},
+    {file = "wrapt-1.15.0-cp35-cp35m-win_amd64.whl", hash = "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639"},
+    {file = "wrapt-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba"},
+    {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752"},
+    {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364"},
+    {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475"},
+    {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8"},
+    {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418"},
+    {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2"},
+    {file = "wrapt-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1"},
+    {file = "wrapt-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420"},
+    {file = "wrapt-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317"},
+    {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e"},
+    {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e"},
+    {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0"},
+    {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019"},
+    {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034"},
+    {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653"},
+    {file = "wrapt-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0"},
+    {file = "wrapt-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e"},
+    {file = "wrapt-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145"},
+    {file = "wrapt-1.15.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f"},
+    {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd"},
+    {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b"},
+    {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f"},
+    {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6"},
+    {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094"},
+    {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7"},
+    {file = "wrapt-1.15.0-cp38-cp38-win32.whl", hash = "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b"},
+    {file = "wrapt-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1"},
+    {file = "wrapt-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86"},
+    {file = "wrapt-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c"},
+    {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d"},
+    {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc"},
+    {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29"},
+    {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a"},
+    {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8"},
+    {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9"},
+    {file = "wrapt-1.15.0-cp39-cp39-win32.whl", hash = "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff"},
+    {file = "wrapt-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6"},
+    {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"},
+    {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"},
 ]
 
 [[package]]
 name = "xmlschema"
-version = "1.10.0"
+version = "2.2.2"
 description = "An XML Schema validator and decoder"
 category = "main"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "xmlschema-1.10.0-py3-none-any.whl", hash = "sha256:dbd68bded2fef00c19cf37110ca0565eca34cf0b6c9e1d3b62ad0de8cbb582ca"},
-    {file = "xmlschema-1.10.0.tar.gz", hash = "sha256:be1eedce6a4b911fd3a7f4060d0811951820a13410e61f0454b30e9f4e7cf197"},
+    {file = "xmlschema-2.2.2-py3-none-any.whl", hash = "sha256:557f3632b54b6ff10576736bba62e43db84eb60f6465a83818576cd9ffcc1799"},
+    {file = "xmlschema-2.2.2.tar.gz", hash = "sha256:0caa96668807b4b51c42a0fe2b6610752bc59f069615df3e34dcfffb962973fd"},
 ]
 
 [package.dependencies]
-elementpath = ">=2.5.0,<3.0.0"
+elementpath = ">=4.0.0,<5.0.0"
 
 [package.extras]
-codegen = ["elementpath (>=2.5.0,<3.0.0)", "jinja2"]
-dev = ["Sphinx", "coverage", "elementpath (>=2.5.0,<3.0.0)", "flake8", "jinja2", "lxml", "lxml-stubs", "memory-profiler", "mypy", "sphinx-rtd-theme", "tox"]
-docs = ["Sphinx", "elementpath (>=2.5.0,<3.0.0)", "jinja2", "sphinx-rtd-theme"]
+codegen = ["elementpath (>=4.0.0,<5.0.0)", "jinja2"]
+dev = ["Sphinx", "coverage", "elementpath (>=4.0.0,<5.0.0)", "flake8", "jinja2", "lxml", "lxml-stubs", "memory-profiler", "mypy", "sphinx-rtd-theme", "tox"]
+docs = ["Sphinx", "elementpath (>=4.0.0,<5.0.0)", "jinja2", "sphinx-rtd-theme"]
 
 [[package]]
 name = "zipp"
-version = "3.7.0"
+version = "3.15.0"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "zipp-3.7.0-py3-none-any.whl", hash = "sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375"},
-    {file = "zipp-3.7.0.tar.gz", hash = "sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d"},
+    {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"},
+    {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"},
 ]
 
 [package.extras]
-docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"]
+docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
 
 [[package]]
-name = "zope.event"
-version = "4.5.0"
+name = "zope-event"
+version = "4.6"
 description = "Very basic event publishing system"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "zope.event-4.5.0-py2.py3-none-any.whl", hash = "sha256:2666401939cdaa5f4e0c08cf7f20c9b21423b95e88f4675b1443973bdb080c42"},
-    {file = "zope.event-4.5.0.tar.gz", hash = "sha256:5e76517f5b9b119acf37ca8819781db6c16ea433f7e2062c4afc2b6fbedb1330"},
+    {file = "zope.event-4.6-py2.py3-none-any.whl", hash = "sha256:73d9e3ef750cca14816a9c322c7250b0d7c9dbc337df5d1b807ff8d3d0b9e97c"},
+    {file = "zope.event-4.6.tar.gz", hash = "sha256:81d98813046fc86cc4136e3698fee628a3282f9c320db18658c21749235fce80"},
 ]
 
 [package.dependencies]
@@ -2812,64 +3339,43 @@ docs = ["Sphinx"]
 test = ["zope.testrunner"]
 
 [[package]]
-name = "zope.interface"
-version = "5.4.0"
+name = "zope-interface"
+version = "6.0"
 description = "Interfaces for Python"
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.7"
 files = [
-    {file = "zope.interface-5.4.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:7df1e1c05304f26faa49fa752a8c690126cf98b40b91d54e6e9cc3b7d6ffe8b7"},
-    {file = "zope.interface-5.4.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:2c98384b254b37ce50eddd55db8d381a5c53b4c10ee66e1e7fe749824f894021"},
-    {file = "zope.interface-5.4.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:08f9636e99a9d5410181ba0729e0408d3d8748026ea938f3b970a0249daa8192"},
-    {file = "zope.interface-5.4.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:0ea1d73b7c9dcbc5080bb8aaffb776f1c68e807767069b9ccdd06f27a161914a"},
-    {file = "zope.interface-5.4.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:273f158fabc5ea33cbc936da0ab3d4ba80ede5351babc4f577d768e057651531"},
-    {file = "zope.interface-5.4.0-cp27-cp27m-win32.whl", hash = "sha256:a1e6e96217a0f72e2b8629e271e1b280c6fa3fe6e59fa8f6701bec14e3354325"},
-    {file = "zope.interface-5.4.0-cp27-cp27m-win_amd64.whl", hash = "sha256:877473e675fdcc113c138813a5dd440da0769a2d81f4d86614e5d62b69497155"},
-    {file = "zope.interface-5.4.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f7ee479e96f7ee350db1cf24afa5685a5899e2b34992fb99e1f7c1b0b758d263"},
-    {file = "zope.interface-5.4.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:b0297b1e05fd128d26cc2460c810d42e205d16d76799526dfa8c8ccd50e74959"},
-    {file = "zope.interface-5.4.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:af310ec8335016b5e52cae60cda4a4f2a60a788cbb949a4fbea13d441aa5a09e"},
-    {file = "zope.interface-5.4.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:9a9845c4c6bb56e508651f005c4aeb0404e518c6f000d5a1123ab077ab769f5c"},
-    {file = "zope.interface-5.4.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:0b465ae0962d49c68aa9733ba92a001b2a0933c317780435f00be7ecb959c702"},
-    {file = "zope.interface-5.4.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:5dd9ca406499444f4c8299f803d4a14edf7890ecc595c8b1c7115c2342cadc5f"},
-    {file = "zope.interface-5.4.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:469e2407e0fe9880ac690a3666f03eb4c3c444411a5a5fddfdabc5d184a79f05"},
-    {file = "zope.interface-5.4.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:52de7fc6c21b419078008f697fd4103dbc763288b1406b4562554bd47514c004"},
-    {file = "zope.interface-5.4.0-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:3dd4952748521205697bc2802e4afac5ed4b02909bb799ba1fe239f77fd4e117"},
-    {file = "zope.interface-5.4.0-cp35-cp35m-win32.whl", hash = "sha256:dd93ea5c0c7f3e25335ab7d22a507b1dc43976e1345508f845efc573d3d779d8"},
-    {file = "zope.interface-5.4.0-cp35-cp35m-win_amd64.whl", hash = "sha256:3748fac0d0f6a304e674955ab1365d515993b3a0a865e16a11ec9d86fb307f63"},
-    {file = "zope.interface-5.4.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:66c0061c91b3b9cf542131148ef7ecbecb2690d48d1612ec386de9d36766058f"},
-    {file = "zope.interface-5.4.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:d0c1bc2fa9a7285719e5678584f6b92572a5b639d0e471bb8d4b650a1a910920"},
-    {file = "zope.interface-5.4.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:2876246527c91e101184f63ccd1d716ec9c46519cc5f3d5375a3351c46467c46"},
-    {file = "zope.interface-5.4.0-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:334701327f37c47fa628fc8b8d28c7d7730ce7daaf4bda1efb741679c2b087fc"},
-    {file = "zope.interface-5.4.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:71aace0c42d53abe6fc7f726c5d3b60d90f3c5c055a447950ad6ea9cec2e37d9"},
-    {file = "zope.interface-5.4.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:5bb3489b4558e49ad2c5118137cfeaf59434f9737fa9c5deefc72d22c23822e2"},
-    {file = "zope.interface-5.4.0-cp36-cp36m-win32.whl", hash = "sha256:1c0e316c9add0db48a5b703833881351444398b04111188069a26a61cfb4df78"},
-    {file = "zope.interface-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f0c02cbb9691b7c91d5009108f975f8ffeab5dff8f26d62e21c493060eff2a1"},
-    {file = "zope.interface-5.4.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:7d97a4306898b05404a0dcdc32d9709b7d8832c0c542b861d9a826301719794e"},
-    {file = "zope.interface-5.4.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:867a5ad16892bf20e6c4ea2aab1971f45645ff3102ad29bd84c86027fa99997b"},
-    {file = "zope.interface-5.4.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5f931a1c21dfa7a9c573ec1f50a31135ccce84e32507c54e1ea404894c5eb96f"},
-    {file = "zope.interface-5.4.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:194d0bcb1374ac3e1e023961610dc8f2c78a0f5f634d0c737691e215569e640d"},
-    {file = "zope.interface-5.4.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:8270252effc60b9642b423189a2fe90eb6b59e87cbee54549db3f5562ff8d1b8"},
-    {file = "zope.interface-5.4.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:15e7d1f7a6ee16572e21e3576d2012b2778cbacf75eb4b7400be37455f5ca8bf"},
-    {file = "zope.interface-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:8892f89999ffd992208754851e5a052f6b5db70a1e3f7d54b17c5211e37a98c7"},
-    {file = "zope.interface-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2e5a26f16503be6c826abca904e45f1a44ff275fdb7e9d1b75c10671c26f8b94"},
-    {file = "zope.interface-5.4.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:0f91b5b948686659a8e28b728ff5e74b1be6bf40cb04704453617e5f1e945ef3"},
-    {file = "zope.interface-5.4.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:4de4bc9b6d35c5af65b454d3e9bc98c50eb3960d5a3762c9438df57427134b8e"},
-    {file = "zope.interface-5.4.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:bf68f4b2b6683e52bec69273562df15af352e5ed25d1b6641e7efddc5951d1a7"},
-    {file = "zope.interface-5.4.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:63b82bb63de7c821428d513607e84c6d97d58afd1fe2eb645030bdc185440120"},
-    {file = "zope.interface-5.4.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:db1fa631737dab9fa0b37f3979d8d2631e348c3b4e8325d6873c2541d0ae5a48"},
-    {file = "zope.interface-5.4.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:f44e517131a98f7a76696a7b21b164bcb85291cee106a23beccce454e1f433a4"},
-    {file = "zope.interface-5.4.0-cp38-cp38-win32.whl", hash = "sha256:a9506a7e80bcf6eacfff7f804c0ad5350c8c95b9010e4356a4b36f5322f09abb"},
-    {file = "zope.interface-5.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:3c02411a3b62668200910090a0dff17c0b25aaa36145082a5a6adf08fa281e54"},
-    {file = "zope.interface-5.4.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:0cee5187b60ed26d56eb2960136288ce91bcf61e2a9405660d271d1f122a69a4"},
-    {file = "zope.interface-5.4.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:a8156e6a7f5e2a0ff0c5b21d6bcb45145efece1909efcbbbf48c56f8da68221d"},
-    {file = "zope.interface-5.4.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:205e40ccde0f37496904572035deea747390a8b7dc65146d30b96e2dd1359a83"},
-    {file = "zope.interface-5.4.0-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:3f24df7124c323fceb53ff6168da70dbfbae1442b4f3da439cd441681f54fe25"},
-    {file = "zope.interface-5.4.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:5208ebd5152e040640518a77827bdfcc73773a15a33d6644015b763b9c9febc1"},
-    {file = "zope.interface-5.4.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:17776ecd3a1fdd2b2cd5373e5ef8b307162f581c693575ec62e7c5399d80794c"},
-    {file = "zope.interface-5.4.0-cp39-cp39-win32.whl", hash = "sha256:d4d9d6c1a455d4babd320203b918ccc7fcbefe308615c521062bc2ba1aa4d26e"},
-    {file = "zope.interface-5.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:0cba8477e300d64a11a9789ed40ee8932b59f9ee05f85276dbb4b59acee5dd09"},
-    {file = "zope.interface-5.4.0.tar.gz", hash = "sha256:5dba5f530fec3f0988d83b78cc591b58c0b6eb8431a85edd1569a0539a8a5a0e"},
+    {file = "zope.interface-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f299c020c6679cb389814a3b81200fe55d428012c5e76da7e722491f5d205990"},
+    {file = "zope.interface-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee4b43f35f5dc15e1fec55ccb53c130adb1d11e8ad8263d68b1284b66a04190d"},
+    {file = "zope.interface-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a158846d0fca0a908c1afb281ddba88744d403f2550dc34405c3691769cdd85"},
+    {file = "zope.interface-6.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f72f23bab1848edb7472309e9898603141644faec9fd57a823ea6b4d1c4c8995"},
+    {file = "zope.interface-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48f4d38cf4b462e75fac78b6f11ad47b06b1c568eb59896db5b6ec1094eb467f"},
+    {file = "zope.interface-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:87b690bbee9876163210fd3f500ee59f5803e4a6607d1b1238833b8885ebd410"},
+    {file = "zope.interface-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f2363e5fd81afb650085c6686f2ee3706975c54f331b426800b53531191fdf28"},
+    {file = "zope.interface-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af169ba897692e9cd984a81cb0f02e46dacdc07d6cf9fd5c91e81f8efaf93d52"},
+    {file = "zope.interface-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa90bac61c9dc3e1a563e5babb3fd2c0c1c80567e815442ddbe561eadc803b30"},
+    {file = "zope.interface-6.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:89086c9d3490a0f265a3c4b794037a84541ff5ffa28bb9c24cc9f66566968464"},
+    {file = "zope.interface-6.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:809fe3bf1a91393abc7e92d607976bbb8586512913a79f2bf7d7ec15bd8ea518"},
+    {file = "zope.interface-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:0ec9653825f837fbddc4e4b603d90269b501486c11800d7c761eee7ce46d1bbb"},
+    {file = "zope.interface-6.0-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:790c1d9d8f9c92819c31ea660cd43c3d5451df1df61e2e814a6f99cebb292788"},
+    {file = "zope.interface-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b39b8711578dcfd45fc0140993403b8a81e879ec25d53189f3faa1f006087dca"},
+    {file = "zope.interface-6.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eba51599370c87088d8882ab74f637de0c4f04a6d08a312dce49368ba9ed5c2a"},
+    {file = "zope.interface-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ee934f023f875ec2cfd2b05a937bd817efcc6c4c3f55c5778cbf78e58362ddc"},
+    {file = "zope.interface-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:042f2381118b093714081fd82c98e3b189b68db38ee7d35b63c327c470ef8373"},
+    {file = "zope.interface-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:dfbbbf0809a3606046a41f8561c3eada9db811be94138f42d9135a5c47e75f6f"},
+    {file = "zope.interface-6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:424d23b97fa1542d7be882eae0c0fc3d6827784105264a8169a26ce16db260d8"},
+    {file = "zope.interface-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e538f2d4a6ffb6edfb303ce70ae7e88629ac6e5581870e66c306d9ad7b564a58"},
+    {file = "zope.interface-6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12175ca6b4db7621aedd7c30aa7cfa0a2d65ea3a0105393e05482d7a2d367446"},
+    {file = "zope.interface-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3d7dfd897a588ec27e391edbe3dd320a03684457470415870254e714126b1f"},
+    {file = "zope.interface-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b3f543ae9d3408549a9900720f18c0194ac0fe810cecda2a584fd4dca2eb3bb8"},
+    {file = "zope.interface-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d0583b75f2e70ec93f100931660328965bb9ff65ae54695fb3fa0a1255daa6f2"},
+    {file = "zope.interface-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:23ac41d52fd15dd8be77e3257bc51bbb82469cf7f5e9a30b75e903e21439d16c"},
+    {file = "zope.interface-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99856d6c98a326abbcc2363827e16bd6044f70f2ef42f453c0bd5440c4ce24e5"},
+    {file = "zope.interface-6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1592f68ae11e557b9ff2bc96ac8fc30b187e77c45a3c9cd876e3368c53dc5ba8"},
+    {file = "zope.interface-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4407b1435572e3e1610797c9203ad2753666c62883b921318c5403fb7139dec2"},
+    {file = "zope.interface-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:5171eb073474a5038321409a630904fd61f12dd1856dd7e9d19cd6fe092cbbc5"},
+    {file = "zope.interface-6.0.tar.gz", hash = "sha256:aab584725afd10c710b8f1e6e208dbee2d0ad009f57d674cb9d1b3964037275d"},
 ]
 
 [package.dependencies]
@@ -2881,15 +3387,15 @@ test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 
 [[package]]
-name = "zope.schema"
-version = "6.2.0"
+name = "zope-schema"
+version = "7.0.1"
 description = "zope.interface extension for defining data schemas"
 category = "dev"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.7"
 files = [
-    {file = "zope.schema-6.2.0-py2.py3-none-any.whl", hash = "sha256:03150d8670549590b45109e06b7b964f4e751fa9cb5297ec4985c3bc38641b07"},
-    {file = "zope.schema-6.2.0.tar.gz", hash = "sha256:2201aef8ad75ee5a881284d7a6acd384661d6dca7bde5e80a22839a77124595b"},
+    {file = "zope.schema-7.0.1-py3-none-any.whl", hash = "sha256:cf006c678793b00e0075ad54d55281c8785ea21e5bc1f5ec0584787719c2aab2"},
+    {file = "zope.schema-7.0.1.tar.gz", hash = "sha256:ead4dbcb03354d4e410c9a3b904451eb44d90254751b1cbdedf4a61aede9fbb9"},
 ]
 
 [package.dependencies]
@@ -2920,4 +3426,4 @@ user-search = ["pyicu"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.7.1"
-content-hash = "0ca92e52a1952f9485172efe25a039351280c28f0a158869557dc2f8855786fe"
+content-hash = "1455d7b3de98a85a0361ae7018c5cde92264aa35d0bf842d79fe4d778787bbf6"
diff --git a/pyproject.toml b/pyproject.toml
index c0111dd796..d10c390043 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -350,6 +350,18 @@ towncrier = ">=18.6.0rc1"
 # Used for checking the Poetry lockfile
 tomli = ">=1.2.3"
 
+
+# Dependencies for building the development documentation
+[tool.poetry.group.dev-docs]
+optional = true
+
+[tool.poetry.group.dev-docs.dependencies]
+sphinx = {version = "^6.1", python = "^3.8"}
+sphinx-autodoc2 = {version = "^0.4.2", python = "^3.8"}
+myst-parser = {version = "^1.0.0", python = "^3.8"}
+furo = "^2022.12.7"
+
+
 [build-system]
 # The upper bounds here are defensive, intended to prevent situations like
 # #13849 and #14079 where we see buildtime or runtime errors caused by build
diff --git a/scripts-dev/lint.sh b/scripts-dev/lint.sh
index 9e4ed3246e..1c0e6582f6 100755
--- a/scripts-dev/lint.sh
+++ b/scripts-dev/lint.sh
@@ -91,6 +91,7 @@ else
           "synapse" "docker" "tests"
           "scripts-dev"
           "contrib" "synmark" "stubs" ".ci"
+          "dev-docs"
       )
   fi
 fi
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 106daa9184..edc4b1768c 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -11,6 +11,119 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+The Federation Sender is responsible for sending Persistent Data Units (PDUs)
+and Ephemeral Data Units (EDUs) to other homeservers using
+the `/send` Federation API.
+
+
+## How do PDUs get sent?
+
+The Federation Sender is made aware of new PDUs due to `FederationSender.notify_new_events`.
+When the sender is notified about a newly-persisted PDU that originates from this homeserver
+and is not an out-of-band event, we pass the PDU to the `_PerDestinationQueue` for each
+remote homeserver that is in the room at that point in the DAG.
+
+
+### Per-Destination Queues
+
+There is one `PerDestinationQueue` per 'destination' homeserver.
+The `PerDestinationQueue` maintains the following information about the destination:
+
+- whether the destination is currently in [catch-up mode (see below)](#catch-up-mode);
+- a queue of PDUs to be sent to the destination; and
+- a queue of EDUs to be sent to the destination (not considered in this section).
+
+Upon a new PDU being enqueued, `attempt_new_transaction` is called to start a new
+transaction if there is not already one in progress.
+
+
+### Transactions and the Transaction Transmission Loop
+
+Each federation HTTP request to the `/send` endpoint is referred to as a 'transaction'.
+The body of the HTTP request contains a list of PDUs and EDUs to send to the destination.
+
+The *Transaction Transmission Loop* (`_transaction_transmission_loop`) is responsible
+for emptying the queued PDUs (and EDUs) from a `PerDestinationQueue` by sending
+them to the destination.
+
+There can only be one transaction in flight for a given destination at any time.
+(Other than preventing us from overloading the destination, this also makes it easier to
+reason about because we process events sequentially for each destination.
+This is useful for *Catch-Up Mode*, described later.)
+
+The loop continues so long as there is anything to send. At each iteration of the loop, we:
+
+- dequeue up to 50 PDUs (and up to 100 EDUs).
+- make the `/send` request to the destination homeserver with the dequeued PDUs and EDUs.
+- if successful, make note of the fact that we succeeded in transmitting PDUs up to
+  the given `stream_ordering` of the latest PDU by
+- if unsuccessful, back off from the remote homeserver for some time.
+  If we have been unsuccessful for too long (when the backoff interval grows to exceed 1 hour),
+  the in-memory queues are emptied and we enter [*Catch-Up Mode*, described below](#catch-up-mode).
+
+
+### Catch-Up Mode
+
+When the `PerDestinationQueue` has the catch-up flag set, the *Catch-Up Transmission Loop*
+(`_catch_up_transmission_loop`) is used in lieu of the regular `_transaction_transmission_loop`.
+(Only once the catch-up mode has been exited can the regular tranaction transmission behaviour
+be resumed.)
+
+*Catch-Up Mode*, entered upon Synapse startup or once a homeserver has fallen behind due to
+connection problems, is responsible for sending PDUs that have been missed by the destination
+homeserver. (PDUs can be missed because the `PerDestinationQueue` is volatile — i.e. resets
+on startup — and it does not hold PDUs forever if `/send` requests to the destination fail.)
+
+The catch-up mechanism makes use of the `last_successful_stream_ordering` column in the
+`destinations` table (which gives the `stream_ordering` of the most recent successfully
+sent PDU) and the `stream_ordering` column in the `destination_rooms` table (which gives,
+for each room, the `stream_ordering` of the most recent PDU that needs to be sent to this
+destination).
+
+Each iteration of the loop pulls out 50 `destination_rooms` entries with the oldest
+`stream_ordering`s that are greater than the `last_successful_stream_ordering`.
+In other words, from the set of latest PDUs in each room to be sent to the destination,
+the 50 oldest such PDUs are pulled out.
+
+These PDUs could, in principle, now be directly sent to the destination. However, as an
+optimisation intended to prevent overloading destination homeservers, we instead attempt
+to send the latest forward extremities so long as the destination homeserver is still
+eligible to receive those.
+This reduces load on the destination **in aggregate** because all Synapse homeservers
+will behave according to this principle and therefore avoid sending lots of different PDUs
+at different points in the DAG to a recovering homeserver.
+*This optimisation is not currently valid in rooms which are partial-state on this homeserver,
+since we are unable to determine whether the destination homeserver is eligible to receive
+the latest forward extremities unless this homeserver sent those PDUs — in this case, we
+just send the latest PDUs originating from this server and skip this optimisation.*
+
+Whilst PDUs are sent through this mechanism, the position of `last_successful_stream_ordering`
+is advanced as normal.
+Once there are no longer any rooms containing outstanding PDUs to be sent to the destination
+*that are not already in the `PerDestinationQueue` because they arrived since Catch-Up Mode
+was enabled*, Catch-Up Mode is exited and we return to `_transaction_transmission_loop`.
+
+
+#### A note on failures and back-offs
+
+If a remote server is unreachable over federation, we back off from that server,
+with an exponentially-increasing retry interval.
+Whilst we don't automatically retry after the interval, we prevent making new attempts
+until such time as the back-off has cleared.
+Once the back-off is cleared and a new PDU or EDU arrives for transmission, the transmission
+loop resumes and empties the queue by making federation requests.
+
+If the backoff grows too large (> 1 hour), the in-memory queue is emptied (to prevent
+unbounded growth) and Catch-Up Mode is entered.
+
+It is worth noting that the back-off for a remote server is cleared once an inbound
+request from that remote server is received (see `notify_remote_server_up`).
+At this point, the transaction transmission loop is also started up, to proactively
+send missed PDUs and EDUs to the destination (i.e. you don't need to wait for a new PDU
+or EDU, destined for that destination, to be created in order to send out missed PDUs and
+EDUs).
+"""
 
 import abc
 import logging
-- 
cgit 1.5.1


From 4fc85e5a921c7200a54a7fd1e9b56b5d2fedc453 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Mon, 27 Mar 2023 13:37:17 +0200
Subject: Load `/password_policy` endpoint on workers. (#15331)

---
 changelog.d/15331.feature              | 1 +
 docker/configure_workers_and_start.py  | 1 +
 docs/workers.md                        | 1 +
 synapse/rest/__init__.py               | 3 +--
 synapse/rest/client/password_policy.py | 1 +
 5 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15331.feature

(limited to 'synapse')

diff --git a/changelog.d/15331.feature b/changelog.d/15331.feature
new file mode 100644
index 0000000000..b4c2eddc48
--- /dev/null
+++ b/changelog.d/15331.feature
@@ -0,0 +1 @@
+Allow loading `/password_policy` endpoint on workers.
\ No newline at end of file
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 3f2f5c2daf..2a50ee1e4b 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -172,6 +172,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/v1/rooms/.*/timestamp_to_event$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/search",
             "^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)",
+            "^/_matrix/client/(r0|v3|unstable)/password_policy$",
         ],
         "shared_extra_conf": {},
         "worker_extra_conf": "",
diff --git a/docs/workers.md b/docs/workers.md
index bf7690f5af..e9a477d32c 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -247,6 +247,7 @@ information.
     ^/_matrix/client/(r0|v3|unstable)/register$
     ^/_matrix/client/(r0|v3|unstable)/register/available$
     ^/_matrix/client/v1/register/m.login.registration_token/validity$
+    ^/_matrix/client/(r0|v3|unstable)/password_policy$
 
     # Event sending requests
     ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/redact
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 55b448adfd..1d7c11b42d 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -138,8 +138,7 @@ class ClientRestResource(JsonResource):
             capabilities.register_servlets(hs, client_resource)
             account_validity.register_servlets(hs, client_resource)
         relations.register_servlets(hs, client_resource)
-        if is_main_process:
-            password_policy.register_servlets(hs, client_resource)
+        password_policy.register_servlets(hs, client_resource)
         knock.register_servlets(hs, client_resource)
         appservice_ping.register_servlets(hs, client_resource)
 
diff --git a/synapse/rest/client/password_policy.py b/synapse/rest/client/password_policy.py
index 9f1908004b..0ee4f9da16 100644
--- a/synapse/rest/client/password_policy.py
+++ b/synapse/rest/client/password_policy.py
@@ -31,6 +31,7 @@ logger = logging.getLogger(__name__)
 
 class PasswordPolicyServlet(RestServlet):
     PATTERNS = client_patterns("/password_policy$")
+    CATEGORY = "Registration/login requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
-- 
cgit 1.5.1


From 96f163d932626e2c95a9ebdda293a14ee5b1dfea Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 27 Mar 2023 14:32:36 +0100
Subject: Prune old typing notifications (#15332)

Rather than keeping them around forever in memory, slowing things down.

Fixes #11750.
---
 changelog.d/15332.bugfix   |  1 +
 synapse/handlers/typing.py | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 changelog.d/15332.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15332.bugfix b/changelog.d/15332.bugfix
new file mode 100644
index 0000000000..ca6fb1d2fd
--- /dev/null
+++ b/changelog.d/15332.bugfix
@@ -0,0 +1 @@
+Fix bug in worker mode where on a rolling restart of workers the "typing" worker would consume 100% CPU until it got restarted.
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 3f656ea4f5..39ae44ea95 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -52,6 +52,11 @@ FEDERATION_TIMEOUT = 60 * 1000
 FEDERATION_PING_INTERVAL = 40 * 1000
 
 
+# How long to remember a typing notification happened in a room before
+# forgetting about it.
+FORGET_TIMEOUT = 10 * 60 * 1000
+
+
 class FollowerTypingHandler:
     """A typing handler on a different process than the writer that is updated
     via replication.
@@ -83,7 +88,10 @@ class FollowerTypingHandler:
         self.wheel_timer: WheelTimer[RoomMember] = WheelTimer(bucket_size=5000)
         self._latest_room_serial = 0
 
+        self._rooms_updated: Set[str] = set()
+
         self.clock.looping_call(self._handle_timeouts, 5000)
+        self.clock.looping_call(self._prune_old_typing, FORGET_TIMEOUT)
 
     def _reset(self) -> None:
         """Reset the typing handler's data caches."""
@@ -92,6 +100,8 @@ class FollowerTypingHandler:
         # map room IDs to sets of users currently typing
         self._room_typing = {}
 
+        self._rooms_updated = set()
+
         self._member_last_federation_poke = {}
         self.wheel_timer = WheelTimer(bucket_size=5000)
 
@@ -178,6 +188,7 @@ class FollowerTypingHandler:
             prev_typing = self._room_typing.get(row.room_id, set())
             now_typing = set(row.user_ids)
             self._room_typing[row.room_id] = now_typing
+            self._rooms_updated.add(row.room_id)
 
             if self.federation:
                 run_as_background_process(
@@ -209,6 +220,19 @@ class FollowerTypingHandler:
     def get_current_token(self) -> int:
         return self._latest_room_serial
 
+    def _prune_old_typing(self) -> None:
+        """Prune rooms that haven't seen typing updates since last time.
+
+        This is safe to do as clients should time out old typing notifications.
+        """
+        stale_rooms = self._room_serials.keys() - self._rooms_updated
+
+        for room_id in stale_rooms:
+            self._room_serials.pop(room_id, None)
+            self._room_typing.pop(room_id, None)
+
+        self._rooms_updated = set()
+
 
 class TypingWriterHandler(FollowerTypingHandler):
     def __init__(self, hs: "HomeServer"):
@@ -388,6 +412,7 @@ class TypingWriterHandler(FollowerTypingHandler):
         self._typing_stream_change_cache.entity_has_changed(
             member.room_id, self._latest_room_serial
         )
+        self._rooms_updated.add(member.room_id)
 
         self.notifier.on_new_event(
             StreamKeyType.TYPING, self._latest_room_serial, rooms=[member.room_id]
-- 
cgit 1.5.1


From bd4d958aaf7c2123abb3665e7a7b199cf8ce27ee Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 28 Mar 2023 09:46:47 +0100
Subject: Bump ruff from 0.0.252 to 0.0.259 (#15328)

* Bump ruff from 0.0.252 to 0.0.259

Bumps [ruff](https://github.com/charliermarsh/ruff) from 0.0.252 to 0.0.259.
- [Release notes](https://github.com/charliermarsh/ruff/releases)
- [Changelog](https://github.com/charliermarsh/ruff/blob/main/BREAKING_CHANGES.md)
- [Commits](https://github.com/charliermarsh/ruff/compare/v0.0.252...v0.0.259)

---
updated-dependencies:
- dependency-name: ruff
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Fix new warnings

* Mypy

* Newsfile

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/15328.misc                         |  1 +
 poetry.lock                                    | 38 +++++++++++++-------------
 pyproject.toml                                 |  2 +-
 synapse/events/__init__.py                     |  4 +--
 synapse/events/utils.py                        |  2 +-
 synapse/storage/database.py                    |  4 +--
 synapse/storage/databases/main/events.py       |  5 ++--
 synapse/storage/databases/main/pusher.py       |  2 +-
 synapse/storage/databases/main/stats.py        | 14 ++++++++--
 synapse/storage/databases/main/stream.py       |  5 +++-
 tests/replication/slave/storage/test_events.py |  2 +-
 tests/server.py                                | 10 +++++--
 12 files changed, 54 insertions(+), 35 deletions(-)
 create mode 100644 changelog.d/15328.misc

(limited to 'synapse')

diff --git a/changelog.d/15328.misc b/changelog.d/15328.misc
new file mode 100644
index 0000000000..e3e5953332
--- /dev/null
+++ b/changelog.d/15328.misc
@@ -0,0 +1 @@
+Bump ruff from 0.0.252 to 0.0.259.
diff --git a/poetry.lock b/poetry.lock
index 294ce49a8d..978a6e1598 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2323,29 +2323,29 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"]
 
 [[package]]
 name = "ruff"
-version = "0.0.252"
+version = "0.0.259"
 description = "An extremely fast Python linter, written in Rust."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.0.252-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:349367a227c4db7abbc3a9993efea8a608b5bea4bb4a1e5fc6f0d56819524f92"},
-    {file = "ruff-0.0.252-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:ce77f9106d96b4faf7865860fb5155b9deaf6f699d9c279118c5ad947739ecaf"},
-    {file = "ruff-0.0.252-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edadb0b050293b4e60dab979ba6a4e734d9c899cbe316a0ee5b65e3cdd39c750"},
-    {file = "ruff-0.0.252-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4efdae98937d1e4d23ab0b7fc7e8e6b6836cc7d2d42238ceeacbc793ef780542"},
-    {file = "ruff-0.0.252-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8546d879f7d3f669379a03e7b103d90e11901976ab508aeda59c03dfd8a359e"},
-    {file = "ruff-0.0.252-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:83fdc7169b6c1fb5fe8d1cdf345697f558c1b433ef97df9ca11defa2a8f3ee9e"},
-    {file = "ruff-0.0.252-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84ed9be1a17e2a556a571a5b959398633dd10910abd8dcf8b098061e746e892d"},
-    {file = "ruff-0.0.252-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f5e77bd9ba4438cf2ee32154e2673afe22f538ef29f5d65ca47e3dc46c42cf8"},
-    {file = "ruff-0.0.252-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5179b94b45c0f8512eaff3ab304c14714a46df2e9ca72a9d96084adc376b71"},
-    {file = "ruff-0.0.252-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:92efd8a71157595df5bc46aaaa0613d8a2fbc5cddc53ae7b749c16025c324732"},
-    {file = "ruff-0.0.252-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd350fc10832cfd28e681d829a8aa83ea3e653326e0ea9d98637dfb8d46177d2"},
-    {file = "ruff-0.0.252-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f119240c9631216e846166e06023b1d878e25fbac93bf20da50069e91cfbfaee"},
-    {file = "ruff-0.0.252-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5c5a49f89f5ede93d16eddfeeadd7e5739ec703e8f63ac95eac30236b9e49da3"},
-    {file = "ruff-0.0.252-py3-none-win32.whl", hash = "sha256:89a897dc743f2fe063483ea666097e72e848f4bbe40493fe0533e61799959f6e"},
-    {file = "ruff-0.0.252-py3-none-win_amd64.whl", hash = "sha256:cdc89ad6ff88519b1fb1816ac82a9ad910762c90ff5fd64dda7691b72d36aff7"},
-    {file = "ruff-0.0.252-py3-none-win_arm64.whl", hash = "sha256:4b594a17cf53077165429486650658a0e1b2ac6ab88954f5afd50d2b1b5657a9"},
-    {file = "ruff-0.0.252.tar.gz", hash = "sha256:6992611ab7bdbe7204e4831c95ddd3febfeece2e6f5e44bbed044454c7db0f63"},
+    {file = "ruff-0.0.259-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:f3938dc45e2a3f818e9cbd53007265c22246fbfded8837b2c563bf0ebde1a226"},
+    {file = "ruff-0.0.259-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:22e1e35bf5f12072cd644d22afd9203641ccf258bc14ff91aa1c43dc14f6047d"},
+    {file = "ruff-0.0.259-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2fb20e89e85d147c85caa807707a1488bccc1f3854dc3d53533e89b52a0c5ff"},
+    {file = "ruff-0.0.259-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49e903bcda19f6bb0725a962c058eb5d61f40d84ef52ed53b61939b69402ab4e"},
+    {file = "ruff-0.0.259-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71f0ef1985e9a6696fa97da8459917fa34bdaa2c16bd33bd5edead585b7d44f7"},
+    {file = "ruff-0.0.259-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7cfef26619cba184d59aa7fa17b48af5891d51fc0b755a9bc533478a10d4d066"},
+    {file = "ruff-0.0.259-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79b02fa17ec1fd8d306ae302cb47fb614b71e1f539997858243769bcbe78c6d9"},
+    {file = "ruff-0.0.259-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:428507fb321b386dda70d66cd1a8aa0abf51d7c197983d83bb9e4fa5ee60300b"},
+    {file = "ruff-0.0.259-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5fbaea9167f1852757f02133e5daacdb8c75b3431343205395da5b10499927a"},
+    {file = "ruff-0.0.259-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:40ae87f2638484b7e8a7567b04a7af719f1c484c5bf132038b702bb32e1f6577"},
+    {file = "ruff-0.0.259-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:29e2b77b7d5da6a7dd5cf9b738b511355c5734ece56f78e500d4b5bffd58c1a0"},
+    {file = "ruff-0.0.259-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b3c1beacf6037e7f0781d4699d9a2dd4ba2462f475be5b1f45cf84c4ba3c69d"},
+    {file = "ruff-0.0.259-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:daaea322e7e85f4c13d82be9536309e1c4b8b9851bb0cbc7eeb15d490fd46bf9"},
+    {file = "ruff-0.0.259-py3-none-win32.whl", hash = "sha256:38704f151323aa5858370a2f792e122cc25e5d1aabe7d42ceeab83da18f0b456"},
+    {file = "ruff-0.0.259-py3-none-win_amd64.whl", hash = "sha256:aa9449b898287e621942cc71b9327eceb8f0c357e4065fecefb707ef2d978df8"},
+    {file = "ruff-0.0.259-py3-none-win_arm64.whl", hash = "sha256:e4f39e18702de69faaaee3969934b92d7467285627f99a5b6ecd55a7d9f5d086"},
+    {file = "ruff-0.0.259.tar.gz", hash = "sha256:8b56496063ab3bfdf72339a5fbebb8bd46e5c5fee25ef11a9f03b208fa0562ec"},
 ]
 
 [[package]]
@@ -3426,4 +3426,4 @@ user-search = ["pyicu"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.7.1"
-content-hash = "0a1dd4be3dff3c8cc71bd57a4eb48e1d92f155db7230e61fbb54f8af03619509"
+content-hash = "102eed4faa13eab195555ea070f235acd1e3f0ff9cf028afcac6c51b3e409071"
diff --git a/pyproject.toml b/pyproject.toml
index b04edb611d..9a6306ee70 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -311,7 +311,7 @@ all = [
 # We pin black so that our tests don't start failing on new releases.
 isort = ">=5.10.1"
 black = ">=22.3.0"
-ruff = "0.0.252"
+ruff = "0.0.259"
 
 # Typechecking
 mypy = "*"
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 91118a8d84..d475fe7ae5 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -462,7 +462,7 @@ class FrozenEvent(EventBase):
         # Signatures is a dict of dicts, and this is faster than doing a
         # copy.deepcopy
         signatures = {
-            name: {sig_id: sig for sig_id, sig in sigs.items()}
+            name: dict(sigs.items())
             for name, sigs in event_dict.pop("signatures", {}).items()
         }
 
@@ -510,7 +510,7 @@ class FrozenEventV2(EventBase):
         # Signatures is a dict of dicts, and this is faster than doing a
         # copy.deepcopy
         signatures = {
-            name: {sig_id: sig for sig_id, sig in sigs.items()}
+            name: dict(sigs.items())
             for name, sigs in event_dict.pop("signatures", {}).items()
         }
 
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index e41c7a4b83..c14c7791db 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -355,7 +355,7 @@ def serialize_event(
     time_now_ms = int(time_now_ms)
 
     # Should this strip out None's?
-    d = {k: v for k, v in e.get_dict().items()}
+    d = dict(e.get_dict().items())
 
     d["event_id"] = e.event_id
 
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index fec4ae5b97..226ccc1671 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1504,8 +1504,8 @@ class DatabasePool:
         self.engine.lock_table(txn, "user_ips")
 
         for keyv, valv in zip(key_values, value_values):
-            _keys = {x: y for x, y in zip(key_names, keyv)}
-            _vals = {x: y for x, y in zip(value_names, valv)}
+            _keys = dict(zip(key_names, keyv))
+            _vals = dict(zip(value_names, valv))
 
             self.simple_upsert_txn_emulated(txn, table, _keys, _vals, lock=False)
 
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 193959b250..ccd9f9d141 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -27,6 +27,7 @@ from typing import (
     Optional,
     Set,
     Tuple,
+    cast,
 )
 
 import attr
@@ -1348,9 +1349,7 @@ class PersistEventsStore:
             [event.event_id for event, _ in events_and_contexts],
         )
 
-        have_persisted: Dict[str, bool] = {
-            event_id: outlier for event_id, outlier in txn
-        }
+        have_persisted = dict(cast(Iterable[Tuple[str, bool]], txn))
 
         logger.debug(
             "_update_outliers_txn: events=%s have_persisted=%s",
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index ab76b754e0..aeb6034f46 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -518,7 +518,7 @@ class PusherBackgroundUpdatesStore(SQLBaseStore):
         def set_device_id_for_pushers_txn(txn: LoggingTransaction) -> int:
             txn.execute(
                 """
-                    SELECT 
+                    SELECT
                         p.id AS pusher_id,
                         p.device_id AS pusher_device_id,
                         at.device_id AS token_device_id
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index d3393d8e49..97c4dc2603 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -16,7 +16,17 @@
 import logging
 from enum import Enum
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
 
 from typing_extensions import Counter
 
@@ -523,7 +533,7 @@ class StatsStore(StateDeltasStore):
                 """,
                 (room_id,),
             )
-            membership_counts = {membership: cnt for membership, cnt in txn}
+            membership_counts = dict(cast(Iterable[Tuple[str, int]], txn))
 
             txn.execute(
                 """
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 2b8779bbb8..92cbe262a6 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -41,6 +41,7 @@ from typing import (
     Any,
     Collection,
     Dict,
+    Iterable,
     List,
     Optional,
     Set,
@@ -1343,7 +1344,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             GROUP BY type
         """
         txn.execute(sql)
-        min_positions = {typ: pos for typ, pos in txn}  # Map from type -> min position
+        min_positions = dict(
+            cast(Iterable[Tuple[str, int]], txn)
+        )  # Map from type -> min position
 
         # Ensure we do actually have some values here
         assert set(min_positions) == {"federation", "events"}
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
index 57c781a0c3..b2125b1fea 100644
--- a/tests/replication/slave/storage/test_events.py
+++ b/tests/replication/slave/storage/test_events.py
@@ -412,7 +412,7 @@ class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase):
         self.get_success(
             self.master_store.add_push_actions_to_staging(
                 event.event_id,
-                {user_id: actions for user_id, actions in push_actions},
+                dict(push_actions),
                 False,
                 "main",
             )
diff --git a/tests/server.py b/tests/server.py
index 5de9722766..bb059630fa 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -983,7 +983,9 @@ def setup_test_homeserver(
                     dropped = True
                 except psycopg2.OperationalError as e:
                     warnings.warn(
-                        "Couldn't drop old db: " + str(e), category=UserWarning
+                        "Couldn't drop old db: " + str(e),
+                        category=UserWarning,
+                        stacklevel=2,
                     )
                     time.sleep(0.5)
 
@@ -991,7 +993,11 @@ def setup_test_homeserver(
             db_conn.close()
 
             if not dropped:
-                warnings.warn("Failed to drop old DB.", category=UserWarning)
+                warnings.warn(
+                    "Failed to drop old DB.",
+                    category=UserWarning,
+                    stacklevel=2,
+                )
 
         if not LEAVE_DB:
             # Register the cleanup hook
-- 
cgit 1.5.1


From 5282ba1e2bbff2635dc09aec45fd42a56c1a4545 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 28 Mar 2023 14:26:27 -0400
Subject: Implement MSC3983 to proxy /keys/claim queries to appservices.
 (#15314)

Experimental support for MSC3983 is behind a configuration flag.
If enabled, for users which are exclusively owned by an application
service then the appservice will be queried for one-time keys *if*
there are none uploaded to Synapse.
---
 changelog.d/15314.feature                         |  1 +
 synapse/appservice/api.py                         | 56 +++++++++++++++++
 synapse/config/experimental.py                    |  5 ++
 synapse/federation/federation_server.py           | 20 +++---
 synapse/handlers/appservice.py                    | 74 +++++++++++++++++++++-
 synapse/handlers/e2e_keys.py                      | 57 ++++++++++++++---
 synapse/storage/databases/main/end_to_end_keys.py | 36 ++++++++---
 tests/appservice/test_api.py                      | 59 ++++++++++++++++++
 tests/handlers/test_e2e_keys.py                   | 76 ++++++++++++++++++++++-
 9 files changed, 355 insertions(+), 29 deletions(-)
 create mode 100644 changelog.d/15314.feature

(limited to 'synapse')

diff --git a/changelog.d/15314.feature b/changelog.d/15314.feature
new file mode 100644
index 0000000000..68b289b0cc
--- /dev/null
+++ b/changelog.d/15314.feature
@@ -0,0 +1 @@
+Experimental support for passing One Time Key requests to application services ([MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983)).
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 4812fb4496..51ee0e79df 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -388,6 +388,62 @@ class ApplicationServiceApi(SimpleHttpClient):
         failed_transactions_counter.labels(service.id).inc()
         return False
 
+    async def claim_client_keys(
+        self, service: "ApplicationService", query: List[Tuple[str, str, str]]
+    ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]:
+        """Claim one time keys from an application service.
+
+        Args:
+            query: An iterable of tuples of (user ID, device ID, algorithm).
+
+        Returns:
+            A tuple of:
+                A map of user ID -> a map device ID -> a map of key ID -> JSON dict.
+
+                A copy of the input which has not been fulfilled because the
+                appservice doesn't support this endpoint or has not returned
+                data for that tuple.
+        """
+        if service.url is None:
+            return {}, query
+
+        # This is required by the configuration.
+        assert service.hs_token is not None
+
+        # Create the expected payload shape.
+        body: Dict[str, Dict[str, List[str]]] = {}
+        for user_id, device, algorithm in query:
+            body.setdefault(user_id, {}).setdefault(device, []).append(algorithm)
+
+        uri = f"{service.url}/_matrix/app/unstable/org.matrix.msc3983/keys/claim"
+        try:
+            response = await self.post_json_get_json(
+                uri,
+                body,
+                headers={"Authorization": [f"Bearer {service.hs_token}"]},
+            )
+        except CodeMessageException as e:
+            # The appservice doesn't support this endpoint.
+            if e.code == 404 or e.code == 405:
+                return {}, query
+            logger.warning("claim_keys to %s received %s", uri, e.code)
+            return {}, query
+        except Exception as ex:
+            logger.warning("claim_keys to %s threw exception %s", uri, ex)
+            return {}, query
+
+        # Check if the appservice fulfilled all of the queried user/device/algorithms
+        # or if some are still missing.
+        #
+        # TODO This places a lot of faith in the response shape being correct.
+        missing = [
+            (user_id, device, algorithm)
+            for user_id, device, algorithm in query
+            if algorithm not in response.get(user_id, {}).get(device, [])
+        ]
+
+        return response, missing
+
     def _serialize(
         self, service: "ApplicationService", events: Iterable[EventBase]
     ) -> List[JsonDict]:
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 99dcd27c74..53e6fc2b54 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -74,6 +74,11 @@ class ExperimentalConfig(Config):
             "msc3202_transaction_extensions", False
         )
 
+        # MSC3983: Proxying OTK claim requests to exclusive ASes.
+        self.msc3983_appservice_otk_claims: bool = experimental.get(
+            "msc3983_appservice_otk_claims", False
+        )
+
         # MSC3706 (server-side support for partial state in /send_join responses)
         # Synapse will always serve partial state responses to requests using the stable
         # query parameter `omit_members`. If this flag is set, Synapse will also serve
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 6d99845de5..64e99292ec 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -86,7 +86,7 @@ from synapse.storage.databases.main.lock import Lock
 from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
 from synapse.storage.roommember import MemberSummary
 from synapse.types import JsonDict, StateMap, get_domain_from_id
-from synapse.util import json_decoder, unwrapFirstError
+from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import Linearizer, concurrently_execute, gather_results
 from synapse.util.caches.response_cache import ResponseCache
 from synapse.util.stringutils import parse_server_name
@@ -135,6 +135,7 @@ class FederationServer(FederationBase):
         self.state = hs.get_state_handler()
         self._event_auth_handler = hs.get_event_auth_handler()
         self._room_member_handler = hs.get_room_member_handler()
+        self._e2e_keys_handler = hs.get_e2e_keys_handler()
 
         self._state_storage_controller = hs.get_storage_controllers().state
 
@@ -1012,15 +1013,14 @@ class FederationServer(FederationBase):
                 query.append((user_id, device_id, algorithm))
 
         log_kv({"message": "Claiming one time keys.", "user, device pairs": query})
-        results = await self.store.claim_e2e_one_time_keys(query)
-
-        json_result: Dict[str, Dict[str, dict]] = {}
-        for user_id, device_keys in results.items():
-            for device_id, keys in device_keys.items():
-                for key_id, json_str in keys.items():
-                    json_result.setdefault(user_id, {})[device_id] = {
-                        key_id: json_decoder.decode(json_str)
-                    }
+        results = await self._e2e_keys_handler.claim_local_one_time_keys(query)
+
+        json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
+        for result in results:
+            for user_id, device_keys in result.items():
+                for device_id, keys in device_keys.items():
+                    for key_id, key in keys.items():
+                        json_result.setdefault(user_id, {})[device_id] = {key_id: key}
 
         logger.info(
             "Claimed one-time-keys: %s",
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index ec3ab968e9..953df4d9cd 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -12,7 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Union
+from typing import (
+    TYPE_CHECKING,
+    Collection,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
 
 from prometheus_client import Counter
 
@@ -829,3 +838,66 @@ class ApplicationServicesHandler:
         if unknown_user:
             return await self.query_user_exists(user_id)
         return True
+
+    async def claim_e2e_one_time_keys(
+        self, query: Iterable[Tuple[str, str, str]]
+    ) -> Tuple[
+        Iterable[Dict[str, Dict[str, Dict[str, JsonDict]]]], List[Tuple[str, str, str]]
+    ]:
+        """Claim one time keys from application services.
+
+        Args:
+            query: An iterable of tuples of (user ID, device ID, algorithm).
+
+        Returns:
+            A tuple of:
+                An iterable of maps of user ID -> a map device ID -> a map of key ID -> JSON bytes.
+
+                A copy of the input which has not been fulfilled (either because
+                they are not appservice users or the appservice does not support
+                providing OTKs).
+        """
+        services = self.store.get_app_services()
+
+        # Partition the users by appservice.
+        query_by_appservice: Dict[str, List[Tuple[str, str, str]]] = {}
+        missing = []
+        for user_id, device, algorithm in query:
+            if not self.store.get_if_app_services_interested_in_user(user_id):
+                missing.append((user_id, device, algorithm))
+                continue
+
+            # Find the associated appservice.
+            for service in services:
+                if service.is_exclusive_user(user_id):
+                    query_by_appservice.setdefault(service.id, []).append(
+                        (user_id, device, algorithm)
+                    )
+                    continue
+
+        # Query each service in parallel.
+        results = await make_deferred_yieldable(
+            defer.DeferredList(
+                [
+                    run_in_background(
+                        self.appservice_api.claim_client_keys,
+                        # We know this must be an app service.
+                        self.store.get_app_service_by_id(service_id),  # type: ignore[arg-type]
+                        service_query,
+                    )
+                    for service_id, service_query in query_by_appservice.items()
+                ],
+                consumeErrors=True,
+            )
+        )
+
+        # Patch together the results -- they are all independent (since they
+        # require exclusive control over the users). They get returned as a list
+        # and the caller combines them.
+        claimed_keys: List[Dict[str, Dict[str, Dict[str, JsonDict]]]] = []
+        for success, result in results:
+            if success:
+                claimed_keys.append(result[0])
+                missing.extend(result[1])
+
+        return claimed_keys, missing
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 4e9c8d8db0..9e7c2c45b5 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import logging
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Tuple
 
@@ -53,6 +52,7 @@ class E2eKeysHandler:
         self.store = hs.get_datastores().main
         self.federation = hs.get_federation_client()
         self.device_handler = hs.get_device_handler()
+        self._appservice_handler = hs.get_application_service_handler()
         self.is_mine = hs.is_mine
         self.clock = hs.get_clock()
 
@@ -88,6 +88,10 @@ class E2eKeysHandler:
             max_count=10,
         )
 
+        self._query_appservices_for_otks = (
+            hs.config.experimental.msc3983_appservice_otk_claims
+        )
+
     @trace
     @cancellable
     async def query_devices(
@@ -542,6 +546,42 @@ class E2eKeysHandler:
 
         return ret
 
+    async def claim_local_one_time_keys(
+        self, local_query: List[Tuple[str, str, str]]
+    ) -> Iterable[Dict[str, Dict[str, Dict[str, JsonDict]]]]:
+        """Claim one time keys for local users.
+
+        1. Attempt to claim OTKs from the database.
+        2. Ask application services if they provide OTKs.
+        3. Attempt to fetch fallback keys from the database.
+
+        Args:
+            local_query: An iterable of tuples of (user ID, device ID, algorithm).
+
+        Returns:
+            An iterable of maps of user ID -> a map device ID -> a map of key ID -> JSON bytes.
+        """
+
+        otk_results, not_found = await self.store.claim_e2e_one_time_keys(local_query)
+
+        # If the application services have not provided any keys via the C-S
+        # API, query it directly for one-time keys.
+        if self._query_appservices_for_otks:
+            (
+                appservice_results,
+                not_found,
+            ) = await self._appservice_handler.claim_e2e_one_time_keys(not_found)
+        else:
+            appservice_results = []
+
+        # For each user that does not have a one-time keys available, see if
+        # there is a fallback key.
+        fallback_results = await self.store.claim_e2e_fallback_keys(not_found)
+
+        # Return the results in order, each item from the input query should
+        # only appear once in the combined list.
+        return (otk_results, *appservice_results, fallback_results)
+
     @trace
     async def claim_one_time_keys(
         self, query: Dict[str, Dict[str, Dict[str, str]]], timeout: Optional[int]
@@ -561,17 +601,18 @@ class E2eKeysHandler:
         set_tag("local_key_query", str(local_query))
         set_tag("remote_key_query", str(remote_queries))
 
-        results = await self.store.claim_e2e_one_time_keys(local_query)
+        results = await self.claim_local_one_time_keys(local_query)
 
         # A map of user ID -> device ID -> key ID -> key.
         json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
+        for result in results:
+            for user_id, device_keys in result.items():
+                for device_id, keys in device_keys.items():
+                    for key_id, key in keys.items():
+                        json_result.setdefault(user_id, {})[device_id] = {key_id: key}
+
+        # Remote failures.
         failures: Dict[str, JsonDict] = {}
-        for user_id, device_keys in results.items():
-            for device_id, keys in device_keys.items():
-                for key_id, json_str in keys.items():
-                    json_result.setdefault(user_id, {})[device_id] = {
-                        key_id: json_decoder.decode(json_str)
-                    }
 
         @trace
         async def claim_client_keys(destination: str) -> None:
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index a3b6c8ae8e..dc7768c50c 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -51,7 +51,7 @@ from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.util.id_generators import StreamIdGenerator
 from synapse.types import JsonDict
-from synapse.util import json_encoder
+from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.cancellation import cancellable
 from synapse.util.iterutils import batch_iter
@@ -1028,14 +1028,17 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
     async def claim_e2e_one_time_keys(
         self, query_list: Iterable[Tuple[str, str, str]]
-    ) -> Dict[str, Dict[str, Dict[str, str]]]:
+    ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]:
         """Take a list of one time keys out of the database.
 
         Args:
             query_list: An iterable of tuples of (user ID, device ID, algorithm).
 
         Returns:
-            A map of user ID -> a map device ID -> a map of key ID -> JSON bytes.
+            A tuple pf:
+                A map of user ID -> a map device ID -> a map of key ID -> JSON.
+
+                A copy of the input which has not been fulfilled.
         """
 
         @trace
@@ -1115,7 +1118,8 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             key_id, key_json = otk_row
             return f"{algorithm}:{key_id}", key_json
 
-        results: Dict[str, Dict[str, Dict[str, str]]] = {}
+        results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
+        missing: List[Tuple[str, str, str]] = []
         for user_id, device_id, algorithm in query_list:
             if self.database_engine.supports_returning:
                 # If we support RETURNING clause we can use a single query that
@@ -1138,11 +1142,25 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                 device_results = results.setdefault(user_id, {}).setdefault(
                     device_id, {}
                 )
-                device_results[claim_row[0]] = claim_row[1]
-                continue
+                device_results[claim_row[0]] = json_decoder.decode(claim_row[1])
+            else:
+                missing.append((user_id, device_id, algorithm))
+
+        return results, missing
+
+    async def claim_e2e_fallback_keys(
+        self, query_list: Iterable[Tuple[str, str, str]]
+    ) -> Dict[str, Dict[str, Dict[str, JsonDict]]]:
+        """Take a list of fallback keys out of the database.
 
-            # No one-time key available, so see if there's a fallback
-            # key
+        Args:
+            query_list: An iterable of tuples of (user ID, device ID, algorithm).
+
+        Returns:
+            A map of user ID -> a map device ID -> a map of key ID -> JSON.
+        """
+        results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
+        for user_id, device_id, algorithm in query_list:
             row = await self.db_pool.simple_select_one(
                 table="e2e_fallback_keys_json",
                 keyvalues={
@@ -1179,7 +1197,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                 )
 
             device_results = results.setdefault(user_id, {}).setdefault(device_id, {})
-            device_results[f"{algorithm}:{key_id}"] = key_json
+            device_results[f"{algorithm}:{key_id}"] = json_decoder.decode(key_json)
 
         return results
 
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 9d183b733e..0dd02b7d58 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -105,3 +105,62 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(self.request_url, URL_LOCATION)
         self.assertEqual(result, SUCCESS_RESULT_LOCATION)
+
+    def test_claim_keys(self) -> None:
+        """
+        Tests that the /keys/claim response is properly parsed for missing
+        keys.
+        """
+
+        RESPONSE: JsonDict = {
+            "@alice:example.org": {
+                "DEVICE_1": {
+                    "signed_curve25519:AAAAHg": {
+                        # We don't really care about the content of the keys,
+                        # they get passed back transparently.
+                    },
+                    "signed_curve25519:BBBBHg": {},
+                },
+                "DEVICE_2": {"signed_curve25519:CCCCHg": {}},
+            },
+        }
+
+        async def post_json_get_json(
+            uri: str,
+            post_json: Any,
+            headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]],
+        ) -> JsonDict:
+            # Ensure the access token is passed as both a header and query arg.
+            if not headers.get("Authorization"):
+                raise RuntimeError("Access token not provided")
+
+            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
+            return RESPONSE
+
+        # We assign to a method, which mypy doesn't like.
+        self.api.post_json_get_json = Mock(side_effect=post_json_get_json)  # type: ignore[assignment]
+
+        MISSING_KEYS = [
+            # Known user, known device, missing algorithm.
+            ("@alice:example.org", "DEVICE_1", "signed_curve25519:DDDDHg"),
+            # Known user, missing device.
+            ("@alice:example.org", "DEVICE_3", "signed_curve25519:EEEEHg"),
+            # Unknown user.
+            ("@bob:example.org", "DEVICE_4", "signed_curve25519:FFFFHg"),
+        ]
+
+        claimed_keys, missing = self.get_success(
+            self.api.claim_client_keys(
+                self.service,
+                [
+                    # Found devices
+                    ("@alice:example.org", "DEVICE_1", "signed_curve25519:AAAAHg"),
+                    ("@alice:example.org", "DEVICE_1", "signed_curve25519:BBBBHg"),
+                    ("@alice:example.org", "DEVICE_2", "signed_curve25519:CCCCHg"),
+                ]
+                + MISSING_KEYS,
+            )
+        )
+
+        self.assertEqual(claimed_keys, RESPONSE)
+        self.assertEqual(missing, MISSING_KEYS)
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 6b4cba65d0..4ff04fc66b 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -23,18 +23,24 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import RoomEncryptionAlgorithms
 from synapse.api.errors import Codes, SynapseError
+from synapse.appservice import ApplicationService
 from synapse.handlers.device import DeviceHandler
 from synapse.server import HomeServer
+from synapse.storage.databases.main.appservice import _make_exclusive_regex
 from synapse.types import JsonDict
 from synapse.util import Clock
 
 from tests import unittest
 from tests.test_utils import make_awaitable
+from tests.unittest import override_config
 
 
 class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        return self.setup_test_homeserver(federation_client=mock.Mock())
+        self.appservice_api = mock.Mock()
+        return self.setup_test_homeserver(
+            federation_client=mock.Mock(), application_service_api=self.appservice_api
+        )
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.handler = hs.get_e2e_keys_handler()
@@ -941,3 +947,71 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
             # The two requests to the local homeserver should be identical.
             self.assertEqual(response_1, response_2)
+
+    @override_config({"experimental_features": {"msc3983_appservice_otk_claims": True}})
+    def test_query_appservice(self) -> None:
+        local_user = "@boris:" + self.hs.hostname
+        device_id_1 = "xyz"
+        fallback_key = {"alg1:k1": "fallback_key1"}
+        device_id_2 = "abc"
+        otk = {"alg1:k2": "key2"}
+
+        # Inject an appservice interested in this user.
+        appservice = ApplicationService(
+            token="i_am_an_app_service",
+            id="1234",
+            namespaces={"users": [{"regex": r"@boris:*", "exclusive": True}]},
+            # Note: this user does not have to match the regex above
+            sender="@as_main:test",
+        )
+        self.hs.get_datastores().main.services_cache = [appservice]
+        self.hs.get_datastores().main.exclusive_user_regex = _make_exclusive_regex(
+            [appservice]
+        )
+
+        # Setup a response, but only for device 2.
+        self.appservice_api.claim_client_keys.return_value = make_awaitable(
+            ({local_user: {device_id_2: otk}}, [(local_user, device_id_1, "alg1")])
+        )
+
+        # we shouldn't have any unused fallback keys yet
+        res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id_1)
+        )
+        self.assertEqual(res, [])
+
+        self.get_success(
+            self.handler.upload_keys_for_user(
+                local_user,
+                device_id_1,
+                {"fallback_keys": fallback_key},
+            )
+        )
+
+        # we should now have an unused alg1 key
+        fallback_res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id_1)
+        )
+        self.assertEqual(fallback_res, ["alg1"])
+
+        # claiming an OTK when no OTKs are available should ask the appservice, then
+        # query the fallback keys.
+        claim_res = self.get_success(
+            self.handler.claim_one_time_keys(
+                {
+                    "one_time_keys": {
+                        local_user: {device_id_1: "alg1", device_id_2: "alg1"}
+                    }
+                },
+                timeout=None,
+            )
+        )
+        self.assertEqual(
+            claim_res,
+            {
+                "failures": {},
+                "one_time_keys": {
+                    local_user: {device_id_1: fallback_key, device_id_2: otk}
+                },
+            },
+        )
-- 
cgit 1.5.1


From 753d1d9cde08940edfd3851d230faaf18a2ba1ff Mon Sep 17 00:00:00 2001
From: "DeepBlueV7.X" <nicolas.werner@hotmail.de>
Date: Wed, 29 Mar 2023 08:37:27 +0000
Subject: Fix joining rooms you have been unbanned from (#15323)

* Fix joining rooms you have been unbanned from

Since forever synapse did not allow you to join a room after you have
been unbanned from it over federation. This was not actually because of
the unban event not federating. Synapse simply used outdated state to
validate the join transition. This skips the validation if we are not in
the room and for that reason won't have the current room state.

Fixes #1563

Signed-off-by: Nicolas Werner <nicolas.werner@hotmail.de>

* Add changelog

Signed-off-by: Nicolas Werner <nicolas.werner@hotmail.de>

* Update changelog.d/15323.bugfix

---------

Signed-off-by: Nicolas Werner <nicolas.werner@hotmail.de>
---
 changelog.d/15323.bugfix             |   1 +
 synapse/handlers/federation_event.py |   2 +-
 synapse/handlers/room_member.py      | 109 ++++++++++++++++++-----------------
 3 files changed, 59 insertions(+), 53 deletions(-)
 create mode 100644 changelog.d/15323.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15323.bugfix b/changelog.d/15323.bugfix
new file mode 100644
index 0000000000..bc1ab35532
--- /dev/null
+++ b/changelog.d/15323.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug preventing users from joining rooms, that they had been unbanned from, over federation. Contributed by Nico.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index b7136f8d1c..648843cdbe 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -583,7 +583,7 @@ class FederationEventHandler:
 
             await self._check_event_auth(origin, event, context)
             if context.rejected:
-                raise SynapseError(400, "Join event was rejected")
+                raise SynapseError(403, "Join event was rejected")
 
             # the remote server is responsible for sending our join event to the rest
             # of the federation. Indeed, attempting to do so will result in problems
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 509c557889..1d8b0aee6f 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -850,63 +850,68 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # `is_partial_state_room` also indicates whether `partial_state_before_join` is
         # partial.
 
-        # TODO: Refactor into dictionary of explicitly allowed transitions
-        # between old and new state, with specific error messages for some
-        # transitions and generic otherwise
-        old_state_id = partial_state_before_join.get(
-            (EventTypes.Member, target.to_string())
-        )
-        if old_state_id:
-            old_state = await self.store.get_event(old_state_id, allow_none=True)
-            old_membership = old_state.content.get("membership") if old_state else None
-            if action == "unban" and old_membership != "ban":
-                raise SynapseError(
-                    403,
-                    "Cannot unban user who was not banned"
-                    " (membership=%s)" % old_membership,
-                    errcode=Codes.BAD_STATE,
-                )
-            if old_membership == "ban" and action not in ["ban", "unban", "leave"]:
-                raise SynapseError(
-                    403,
-                    "Cannot %s user who was banned" % (action,),
-                    errcode=Codes.BAD_STATE,
-                )
-
-            if old_state:
-                same_content = content == old_state.content
-                same_membership = old_membership == effective_membership_state
-                same_sender = requester.user.to_string() == old_state.sender
-                if same_sender and same_membership and same_content:
-                    # duplicate event.
-                    # we know it was persisted, so must have a stream ordering.
-                    assert old_state.internal_metadata.stream_ordering
-                    return (
-                        old_state.event_id,
-                        old_state.internal_metadata.stream_ordering,
-                    )
+        is_host_in_room = await self._is_host_in_room(partial_state_before_join)
 
-            if old_membership in ["ban", "leave"] and action == "kick":
-                raise AuthError(403, "The target user is not in the room")
+        # if we are not in the room, we won't have the current state
+        if is_host_in_room:
+            # TODO: Refactor into dictionary of explicitly allowed transitions
+            # between old and new state, with specific error messages for some
+            # transitions and generic otherwise
+            old_state_id = partial_state_before_join.get(
+                (EventTypes.Member, target.to_string())
+            )
 
-            # we don't allow people to reject invites to the server notice
-            # room, but they can leave it once they are joined.
-            if (
-                old_membership == Membership.INVITE
-                and effective_membership_state == Membership.LEAVE
-            ):
-                is_blocked = await self.store.is_server_notice_room(room_id)
-                if is_blocked:
+            if old_state_id:
+                old_state = await self.store.get_event(old_state_id, allow_none=True)
+                old_membership = (
+                    old_state.content.get("membership") if old_state else None
+                )
+                if action == "unban" and old_membership != "ban":
                     raise SynapseError(
-                        HTTPStatus.FORBIDDEN,
-                        "You cannot reject this invite",
-                        errcode=Codes.CANNOT_LEAVE_SERVER_NOTICE_ROOM,
+                        403,
+                        "Cannot unban user who was not banned"
+                        " (membership=%s)" % old_membership,
+                        errcode=Codes.BAD_STATE,
+                    )
+                if old_membership == "ban" and action not in ["ban", "unban", "leave"]:
+                    raise SynapseError(
+                        403,
+                        "Cannot %s user who was banned" % (action,),
+                        errcode=Codes.BAD_STATE,
                     )
-        else:
-            if action == "kick":
-                raise AuthError(403, "The target user is not in the room")
 
-        is_host_in_room = await self._is_host_in_room(partial_state_before_join)
+                if old_state:
+                    same_content = content == old_state.content
+                    same_membership = old_membership == effective_membership_state
+                    same_sender = requester.user.to_string() == old_state.sender
+                    if same_sender and same_membership and same_content:
+                        # duplicate event.
+                        # we know it was persisted, so must have a stream ordering.
+                        assert old_state.internal_metadata.stream_ordering
+                        return (
+                            old_state.event_id,
+                            old_state.internal_metadata.stream_ordering,
+                        )
+
+                if old_membership in ["ban", "leave"] and action == "kick":
+                    raise AuthError(403, "The target user is not in the room")
+
+                # we don't allow people to reject invites to the server notice
+                # room, but they can leave it once they are joined.
+                if (
+                    old_membership == Membership.INVITE
+                    and effective_membership_state == Membership.LEAVE
+                ):
+                    is_blocked = await self.store.is_server_notice_room(room_id)
+                    if is_blocked:
+                        raise SynapseError(
+                            HTTPStatus.FORBIDDEN,
+                            "You cannot reject this invite",
+                            errcode=Codes.CANNOT_LEAVE_SERVER_NOTICE_ROOM,
+                        )
+            else:
+                if action == "kick":
+                    raise AuthError(403, "The target user is not in the room")
 
         if effective_membership_state == Membership.JOIN:
             if requester.is_guest:
-- 
cgit 1.5.1


From 78cdb72cd6b0e007c314d9fed9f629dfc5b937a6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 29 Mar 2023 12:07:14 +0100
Subject: Delete stale non-e2e devices for users, take 3 (#15183)

This should help reduce the number of devices e.g. simple bots the repeatedly login rack up.

We only delete non-e2e devices as they should be safe to delete, whereas if we delete e2e devices for a user we may accidentally break their ability to receive e2e keys for a message.
---
 changelog.d/15183.misc                    |  1 +
 synapse/handlers/device.py                |  2 +-
 synapse/handlers/register.py              | 50 ++++++++++++++++++-
 synapse/storage/databases/main/devices.py | 80 ++++++++++++++++++++++++++++++-
 tests/handlers/test_admin.py              |  2 +-
 tests/handlers/test_device.py             |  2 +-
 tests/storage/test_client_ips.py          |  4 +-
 7 files changed, 134 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/15183.misc

(limited to 'synapse')

diff --git a/changelog.d/15183.misc b/changelog.d/15183.misc
new file mode 100644
index 0000000000..f9bfc581ad
--- /dev/null
+++ b/changelog.d/15183.misc
@@ -0,0 +1 @@
+Prune user's old devices on login if they have too many.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 9ded6389ac..0fc165a8d6 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -485,7 +485,7 @@ class DeviceHandler(DeviceWorkerHandler):
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+    async def delete_devices(self, user_id: str, device_ids: StrCollection) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index c8bf2439af..bb1df1e60f 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -16,7 +16,7 @@
 """Contains functions for registering clients."""
 
 import logging
-from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple
+from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple
 
 from prometheus_client import Counter
 from typing_extensions import TypedDict
@@ -40,6 +40,7 @@ from synapse.appservice import ApplicationService
 from synapse.config.server import is_threepid_reserved
 from synapse.handlers.device import DeviceHandler
 from synapse.http.servlet import assert_params_in_dict
+from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.login import RegisterDeviceReplicationServlet
 from synapse.replication.http.register import (
     ReplicationPostRegisterActionsServlet,
@@ -48,6 +49,7 @@ from synapse.replication.http.register import (
 from synapse.spam_checker_api import RegistrationBehaviour
 from synapse.types import RoomAlias, UserID, create_requester
 from synapse.types.state import StateFilter
+from synapse.util.iterutils import batch_iter
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -110,6 +112,10 @@ class RegistrationHandler:
         self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
         self._server_name = hs.hostname
 
+        # The set of users that we're currently pruning devices for. Ensures
+        # that we don't have two such jobs for the same user running at once.
+        self._currently_pruning_devices_for_users: Set[str] = set()
+
         self.spam_checker = hs.get_spam_checker()
 
         if hs.config.worker.worker_app:
@@ -121,7 +127,10 @@ class RegistrationHandler:
                 ReplicationPostRegisterActionsServlet.make_client(hs)
             )
         else:
-            self.device_handler = hs.get_device_handler()
+            device_handler = hs.get_device_handler()
+            assert isinstance(device_handler, DeviceHandler)
+            self.device_handler = device_handler
+
             self._register_device_client = self.register_device_inner
             self.pusher_pool = hs.get_pusherpool()
 
@@ -851,6 +860,9 @@ class RegistrationHandler:
         # This can only run on the main process.
         assert isinstance(self.device_handler, DeviceHandler)
 
+        # Prune the user's device list if they already have a lot of devices.
+        await self._maybe_prune_too_many_devices(user_id)
+
         registered_device_id = await self.device_handler.check_device_registered(
             user_id,
             device_id,
@@ -919,6 +931,40 @@ class RegistrationHandler:
             "refresh_token": refresh_token,
         }
 
+    async def _maybe_prune_too_many_devices(self, user_id: str) -> None:
+        """Delete any excess old devices this user may have."""
+
+        if user_id in self._currently_pruning_devices_for_users:
+            return
+
+        # We also cap the number of users whose devices we prune at the same
+        # time, to avoid performance problems.
+        if len(self._currently_pruning_devices_for_users) > 5:
+            return
+
+        device_ids = await self.store.check_too_many_devices_for_user(user_id)
+        if not device_ids:
+            return
+
+        # Now spawn a background loop that deletes said devices.
+        async def _prune_too_many_devices_loop() -> None:
+            if user_id in self._currently_pruning_devices_for_users:
+                return
+
+            self._currently_pruning_devices_for_users.add(user_id)
+
+            try:
+                for batch in batch_iter(device_ids, 10):
+                    await self.device_handler.delete_devices(user_id, batch)
+
+                    await self.clock.sleep(60)
+            finally:
+                self._currently_pruning_devices_for_users.discard(user_id)
+
+        run_as_background_process(
+            "_prune_too_many_devices_loop", _prune_too_many_devices_loop
+        )
+
     async def post_registration_actions(
         self, user_id: str, auth_result: dict, access_token: Optional[str]
     ) -> None:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 5503621ad6..7647cda2c6 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1599,6 +1599,73 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         return rows
 
+    async def check_too_many_devices_for_user(self, user_id: str) -> List[str]:
+        """Check if the user has a lot of devices, and if so return the set of
+        devices we can prune.
+
+        This does *not* return hidden devices or devices with E2E keys.
+        """
+
+        num_devices = await self.db_pool.simple_select_one_onecol(
+            table="devices",
+            keyvalues={"user_id": user_id, "hidden": False},
+            retcol="COALESCE(COUNT(*), 0)",
+            desc="count_devices",
+        )
+
+        # We let users have up to ten devices without pruning.
+        if num_devices <= 10:
+            return []
+
+        # We always prune devices not seen in the last 14 days...
+        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
+
+        # ... but we also cap the maximum number of devices the user can have to
+        # 50.
+        if num_devices > 50:
+            # Choose a last seen that ensures we keep at most 50 devices.
+            sql = """
+                SELECT last_seen FROM devices
+                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
+                WHERE
+                    user_id = ?
+                    AND NOT hidden
+                    AND last_seen IS NOT NULL
+                    AND key_json IS NULL
+                ORDER BY last_seen DESC
+                LIMIT 1
+                OFFSET 50
+            """
+
+            rows = await self.db_pool.execute(
+                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
+            )
+            if rows:
+                max_last_seen = max(rows[0][0], max_last_seen)
+
+        # Fetch the devices to delete.
+        sql = """
+            SELECT DISTINCT device_id FROM devices
+            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
+            WHERE
+                user_id = ?
+                AND NOT hidden
+                AND last_seen < ?
+                AND key_json IS NULL
+            ORDER BY last_seen
+        """
+
+        def check_too_many_devices_for_user_txn(
+            txn: LoggingTransaction,
+        ) -> List[str]:
+            txn.execute(sql, (user_id, max_last_seen))
+            return [device_id for device_id, in txn]
+
+        return await self.db_pool.runInteraction(
+            "check_too_many_devices_for_user",
+            check_too_many_devices_for_user_txn,
+        )
+
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1657,6 +1724,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
+                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1702,7 +1770,15 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             )
             raise StoreError(500, "Problem storing device.")
 
-    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+    @cached(max_entries=0)
+    async def delete_device(self, user_id: str, device_id: str) -> None:
+        raise NotImplementedError()
+
+    # Note: sometimes deleting rows out of `device_inbox` can take a long time,
+    # so we use a cache so that we deduplicate in flight requests to delete
+    # devices.
+    @cachedList(cached_method_name="delete_device", list_name="device_ids")
+    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
         """Deletes several devices.
 
         Args:
@@ -1739,6 +1815,8 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
+        return {}
+
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py
index 5569ccef8a..f0ba3775c8 100644
--- a/tests/handlers/test_admin.py
+++ b/tests/handlers/test_admin.py
@@ -272,7 +272,7 @@ class ExfiltrateData(unittest.HomeserverTestCase):
         self.assertIn("device_id", args[0][0])
         self.assertIsNone(args[0][0]["display_name"])
         self.assertIsNone(args[0][0]["last_seen_user_agent"])
-        self.assertIsNone(args[0][0]["last_seen_ts"])
+        self.assertEqual(args[0][0]["last_seen_ts"], 600)
         self.assertIsNone(args[0][0]["last_seen_ip"])
 
     def test_connections(self) -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index ce7525e29c..a456bffd63 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": None,
+                "last_seen_ts": 1000000,
             },
             device_map["xyz"],
         )
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index cd0079871c..f989986538 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -170,6 +170,8 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             )
         )
 
+        last_seen = self.clock.time_msec()
+
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -190,7 +192,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": None,
+                        "last_seen": last_seen,
                     },
                 ],
             )
-- 
cgit 1.5.1


From 5350b5d04da8aca80b60a6b4970020d13d789501 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 29 Mar 2023 13:24:28 +0100
Subject: Revert "Reintroduce membership tables event stream ordering (#15128)"
 (#15347)

This reverts commit e6af49fbea939d9e69ed05e0a0ced5948c722ea4.
---
 changelog.d/15128.misc                             |  1 -
 synapse/storage/databases/main/events.py           | 23 ++-----
 synapse/storage/databases/main/purge_events.py     |  6 +-
 synapse/storage/schema/__init__.py                 | 14 ++--
 .../01membership_tables_event_stream_ordering.sql  | 20 ------
 ...ership_tables_event_stream_ordering_triggers.py | 79 ----------------------
 6 files changed, 12 insertions(+), 131 deletions(-)
 delete mode 100644 changelog.d/15128.misc
 delete mode 100644 synapse/storage/schema/main/delta/74/01membership_tables_event_stream_ordering.sql
 delete mode 100644 synapse/storage/schema/main/delta/74/02membership_tables_event_stream_ordering_triggers.py

(limited to 'synapse')

diff --git a/changelog.d/15128.misc b/changelog.d/15128.misc
deleted file mode 100644
index c09911e48d..0000000000
--- a/changelog.d/15128.misc
+++ /dev/null
@@ -1 +0,0 @@
-Add denormalised event stream ordering column to membership state tables for future use. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index ccd9f9d141..9c1e506da6 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1127,15 +1127,11 @@ class PersistEventsStore:
                 # been inserted into room_memberships.
                 txn.execute_batch(
                     """INSERT INTO current_state_events
-                        (room_id, type, state_key, event_id, membership, event_stream_ordering)
-                    VALUES (
-                        ?, ?, ?, ?,
-                        (SELECT membership FROM room_memberships WHERE event_id = ?),
-                        (SELECT stream_ordering FROM events WHERE event_id = ?)
-                    )
+                        (room_id, type, state_key, event_id, membership)
+                    VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
                     """,
                     [
-                        (room_id, key[0], key[1], ev_id, ev_id, ev_id)
+                        (room_id, key[0], key[1], ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                     ],
                 )
@@ -1162,15 +1158,11 @@ class PersistEventsStore:
             if to_insert:
                 txn.execute_batch(
                     """INSERT INTO local_current_membership
-                        (room_id, user_id, event_id, membership, event_stream_ordering)
-                    VALUES (
-                        ?, ?, ?,
-                        (SELECT membership FROM room_memberships WHERE event_id = ?),
-                        (SELECT stream_ordering FROM events WHERE event_id = ?)
-                    )
+                        (room_id, user_id, event_id, membership)
+                    VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
                     """,
                     [
-                        (room_id, key[1], ev_id, ev_id, ev_id)
+                        (room_id, key[1], ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                         if key[0] == EventTypes.Member and self.is_mine_id(key[1])
                     ],
@@ -1776,7 +1768,6 @@ class PersistEventsStore:
             table="room_memberships",
             keys=(
                 "event_id",
-                "event_stream_ordering",
                 "user_id",
                 "sender",
                 "room_id",
@@ -1787,7 +1778,6 @@ class PersistEventsStore:
             values=[
                 (
                     event.event_id,
-                    event.internal_metadata.stream_ordering,
                     event.state_key,
                     event.user_id,
                     event.room_id,
@@ -1820,7 +1810,6 @@ class PersistEventsStore:
                     keyvalues={"room_id": event.room_id, "user_id": event.state_key},
                     values={
                         "event_id": event.event_id,
-                        "event_stream_ordering": event.internal_metadata.stream_ordering,
                         "membership": event.membership,
                     },
                 )
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index efbd3e75d9..7a7c0d9c75 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -428,16 +428,14 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "partial_state_events",
             "partial_state_rooms_servers",
             "partial_state_rooms",
-            # Note: the _membership(s) tables have foreign keys to the `events` table
-            # so must be deleted first.
-            "local_current_membership",
-            "room_memberships",
             "events",
             "federation_inbound_events_staging",
+            "local_current_membership",
             "receipts_graph",
             "receipts_linearized",
             "room_aliases",
             "room_depth",
+            "room_memberships",
             "room_stats_state",
             "room_stats_current",
             "room_stats_earliest_token",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index a28f2b997c..d3103a6c7a 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 75  # remember to update the list below when updating
+SCHEMA_VERSION = 74  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -91,19 +91,13 @@ Changes in SCHEMA_VERSION = 74:
     - A query on `event_stream_ordering` column has now been disambiguated (i.e. the
       codebase can handle the `current_state_events`, `local_current_memberships` and
       `room_memberships` tables having an `event_stream_ordering` column).
-
-Changes in SCHEMA_VERSION = 75:
-    - The `event_stream_ordering` column in membership tables (`current_state_events`,
-      `local_current_membership` & `room_memberships`) is now being populated for new
-      rows. When the background job to populate historical rows lands this will
-      become the compat schema version.
 """
 
 
 SCHEMA_COMPAT_VERSION = (
-    # Queries against `event_stream_ordering` columns in membership tables must
-    # be disambiguated.
-    74
+    # The threads_id column must exist for event_push_actions, event_push_summary,
+    # receipts_linearized, and receipts_graph.
+    73
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/74/01membership_tables_event_stream_ordering.sql b/synapse/storage/schema/main/delta/74/01membership_tables_event_stream_ordering.sql
deleted file mode 100644
index e2608f3a2e..0000000000
--- a/synapse/storage/schema/main/delta/74/01membership_tables_event_stream_ordering.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright 2022 Beeper
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Each of these are denormalised copies of `stream_ordering` from the corresponding row in` events` which
--- we use to improve database performance by reduring JOINs.
-ALTER TABLE current_state_events ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
-ALTER TABLE local_current_membership ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
-ALTER TABLE room_memberships ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
diff --git a/synapse/storage/schema/main/delta/74/02membership_tables_event_stream_ordering_triggers.py b/synapse/storage/schema/main/delta/74/02membership_tables_event_stream_ordering_triggers.py
deleted file mode 100644
index e32e9083b3..0000000000
--- a/synapse/storage/schema/main/delta/74/02membership_tables_event_stream_ordering_triggers.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright 2022 Beeper
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-This migration adds triggers to the room membership tables to enforce consistency.
-Triggers cannot be expressed in .sql files, so we have to use a separate file.
-"""
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
-from synapse.storage.types import Cursor
-
-
-def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
-    # Complain if the `event_stream_ordering` in membership tables doesn't match
-    # the `stream_ordering` row with the same `event_id` in `events`.
-    if isinstance(database_engine, Sqlite3Engine):
-        for table in (
-            "current_state_events",
-            "local_current_membership",
-            "room_memberships",
-        ):
-            cur.execute(
-                f"""
-                CREATE TRIGGER IF NOT EXISTS {table}_bad_event_stream_ordering
-                BEFORE INSERT ON {table}
-                FOR EACH ROW
-                BEGIN
-                    SELECT RAISE(ABORT, 'Incorrect event_stream_ordering in {table}')
-                    WHERE EXISTS (
-                        SELECT 1 FROM events
-                        WHERE events.event_id = NEW.event_id
-                           AND events.stream_ordering != NEW.event_stream_ordering
-                    );
-                END;
-                """
-            )
-    elif isinstance(database_engine, PostgresEngine):
-        cur.execute(
-            """
-            CREATE OR REPLACE FUNCTION check_event_stream_ordering() RETURNS trigger AS $BODY$
-            BEGIN
-                IF EXISTS (
-                    SELECT 1 FROM events
-                    WHERE events.event_id = NEW.event_id
-                       AND events.stream_ordering != NEW.event_stream_ordering
-                ) THEN
-                    RAISE EXCEPTION 'Incorrect event_stream_ordering';
-                END IF;
-                RETURN NEW;
-            END;
-            $BODY$ LANGUAGE plpgsql;
-            """
-        )
-
-        for table in (
-            "current_state_events",
-            "local_current_membership",
-            "room_memberships",
-        ):
-            cur.execute(
-                f"""
-                CREATE TRIGGER check_event_stream_ordering BEFORE INSERT OR UPDATE ON {table}
-                FOR EACH ROW
-                EXECUTE PROCEDURE check_event_stream_ordering()
-                """
-            )
-    else:
-        raise NotImplementedError("Unknown database engine")
-- 
cgit 1.5.1


From f0d8f66eaaacfa75bed65bc5d0c602fbc5339c85 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 29 Mar 2023 14:37:06 +0100
Subject: Fix registering a device on an account with lots of devices (#15348)

Fixes up #15183
---
 changelog.d/15348.misc                    |  1 +
 synapse/handlers/register.py              |  2 ++
 synapse/storage/databases/main/devices.py |  9 ++++--
 tests/rest/client/test_register.py        | 47 +++++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15348.misc

(limited to 'synapse')

diff --git a/changelog.d/15348.misc b/changelog.d/15348.misc
new file mode 100644
index 0000000000..f9bfc581ad
--- /dev/null
+++ b/changelog.d/15348.misc
@@ -0,0 +1 @@
+Prune user's old devices on login if they have too many.
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index bb1df1e60f..7e9d065f50 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -946,6 +946,8 @@ class RegistrationHandler:
         if not device_ids:
             return
 
+        logger.info("Pruning %d stale devices for %s", len(device_ids), user_id)
+
         # Now spawn a background loop that deletes said devices.
         async def _prune_too_many_devices_loop() -> None:
             if user_id in self._currently_pruning_devices_for_users:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 7647cda2c6..f61b7bc96e 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1638,19 +1638,22 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
             """
 
             rows = await self.db_pool.execute(
-                "check_too_many_devices_for_user_last_seen", None, sql, (user_id,)
+                "check_too_many_devices_for_user_last_seen",
+                None,
+                sql,
+                user_id,
             )
             if rows:
                 max_last_seen = max(rows[0][0], max_last_seen)
 
         # Fetch the devices to delete.
         sql = """
-            SELECT DISTINCT device_id FROM devices
+            SELECT device_id FROM devices
             LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
             WHERE
                 user_id = ?
                 AND NOT hidden
-                AND last_seen < ?
+                AND last_seen <= ?
                 AND key_json IS NULL
             ORDER BY last_seen
         """
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index b228dba861..7ae84e3139 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -794,6 +794,53 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"]
         )
 
+    def test_check_stale_devices_get_pruned(self) -> None:
+        """Check that if a user has some stale devices we log them out when they
+        log in a new device."""
+
+        # Register some devices, but not too many that we go over the threshold
+        # where we prune more aggressively.
+        user_id = self.register_user("user", "pass")
+        for _ in range(0, 50):
+            self.login(user_id, "pass")
+
+        store = self.hs.get_datastores().main
+
+        res = self.get_success(store.get_devices_by_user(user_id))
+        self.assertEqual(len(res), 50)
+
+        # Advance time so that the above devices are considered "old".
+        self.reactor.advance(30 * 24 * 60 * 60 * 1000)
+
+        self.login(user_id, "pass")
+
+        self.reactor.pump([60] * 10)  # Ensure background job runs
+
+        # We expect all old devices to have been logged out
+        res = self.get_success(store.get_devices_by_user(user_id))
+        self.assertEqual(len(res), 1)
+
+    def test_check_recent_devices_get_pruned(self) -> None:
+        """Check that if a user has many devices we log out the last oldest
+        ones.
+
+        Note: this is similar to above, except if we lots of devices we prune
+        devices even if they're not old.
+        """
+
+        # Register a lot of devices in a short amount of time
+        user_id = self.register_user("user", "pass")
+        for _ in range(0, 100):
+            self.login(user_id, "pass")
+            self.reactor.advance(100)
+
+        store = self.hs.get_datastores().main
+
+        # We keep up to 50 devices that have been used in the last week, plus
+        # the device that was last logged in.
+        res = self.get_success(store.get_devices_by_user(user_id))
+        self.assertEqual(len(res), 51)
+
 
 class AccountValidityTestCase(unittest.HomeserverTestCase):
     servlets = [
-- 
cgit 1.5.1


From 9d641d88b785dd4b4e6e7dca3356678a42a3ac23 Mon Sep 17 00:00:00 2001
From: Cyberes <64224601+Cyberes@users.noreply.github.com>
Date: Thu, 30 Mar 2023 04:44:53 -0600
Subject: Fix missing app variable in mail subject for password resets (#15352)

* Update mailer.py

Fix `KeyError: 'app'`

* Create 15352.bugfix

Signed-off-by: Cyberes <cyberes@evulid.cc>

---------

Signed-off-by: Cyberes <cyberes@evulid.cc>
---
 changelog.d/15352.bugfix | 1 +
 synapse/push/mailer.py   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15352.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15352.bugfix b/changelog.d/15352.bugfix
new file mode 100644
index 0000000000..36d6615cac
--- /dev/null
+++ b/changelog.d/15352.bugfix
@@ -0,0 +1 @@
+Fix missing app variable in mail subject for password resets. Contributed by Cyberes.
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 93b255ced5..491a09b71d 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -149,7 +149,7 @@ class Mailer:
         await self.send_email(
             email_address,
             self.email_subjects.password_reset
-            % {"server_name": self.hs.config.server.server_name},
+            % {"server_name": self.hs.config.server.server_name, "app": self.app_name},
             template_vars,
         )
 
-- 
cgit 1.5.1


From 9228ae633f209212ed55de7943d1a8aee3645b57 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 30 Mar 2023 12:51:35 +0200
Subject: Add some clarification to the doc/comments regarding TCP replication
 (#15354)

---
 changelog.d/15354.misc                   |  1 +
 docs/tcp_replication.md                  |  9 +++++----
 synapse/replication/tcp/protocol.py      | 31 +------------------------------
 synapse/replication/tcp/streams/_base.py |  4 ++--
 4 files changed, 9 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/15354.misc

(limited to 'synapse')

diff --git a/changelog.d/15354.misc b/changelog.d/15354.misc
new file mode 100644
index 0000000000..862444edfb
--- /dev/null
+++ b/changelog.d/15354.misc
@@ -0,0 +1 @@
+Add some clarification to the doc/comments regarding TCP replication.
diff --git a/docs/tcp_replication.md b/docs/tcp_replication.md
index 15df949deb..083cda8413 100644
--- a/docs/tcp_replication.md
+++ b/docs/tcp_replication.md
@@ -25,7 +25,7 @@ position of all streams. The server then periodically sends `RDATA` commands
 which have the format `RDATA <stream_name> <instance_name> <token> <row>`, where
 the format of `<row>` is defined by the individual streams. The
 `<instance_name>` is the name of the Synapse process that generated the data
-(usually "master").
+(usually "master"). We expect an RDATA for every row in the DB.
 
 Error reporting happens by either the client or server sending an ERROR
 command, and usually the connection will be closed.
@@ -107,7 +107,7 @@ reconnect, following the steps above.
 If the server sends messages faster than the client can consume them the
 server will first buffer a (fairly large) number of commands and then
 disconnect the client. This ensures that we don't queue up an unbounded
-number of commands in memory and gives us a potential oppurtunity to
+number of commands in memory and gives us a potential opportunity to
 squawk loudly. When/if the client recovers it can reconnect to the
 server and ask for missed messages.
 
@@ -122,7 +122,7 @@ since these include tokens which can be used to restart the stream on
 connection errors.
 
 The client should keep track of the token in the last RDATA command
-received for each stream so that on reconneciton it can start streaming
+received for each stream so that on reconnection it can start streaming
 from the correct place. Note: not all RDATA have valid tokens due to
 batching. See `RdataCommand` for more details.
 
@@ -188,7 +188,8 @@ client (C):
    Two positions are included, the "new" position and the last position sent respectively.
    This allows servers to tell instances that the positions have advanced but no
    data has been written, without clients needlessly checking to see if they
-   have missed any updates.
+   have missed any updates. Instances will only fetch stuff if there is a gap between
+   their current position and the given last position.
 
 #### ERROR (S, C)
 
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 56a5c21910..a7248d7b2e 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -14,36 +14,7 @@
 """This module contains the implementation of both the client and server
 protocols.
 
-The basic structure of the protocol is line based, where the initial word of
-each line specifies the command. The rest of the line is parsed based on the
-command. For example, the `RDATA` command is defined as::
-
-    RDATA <stream_name> <token> <row_json>
-
-(Note that `<row_json>` may contains spaces, but cannot contain newlines.)
-
-Blank lines are ignored.
-
-# Example
-
-An example iteraction is shown below. Each line is prefixed with '>' or '<' to
-indicate which side is sending, these are *not* included on the wire::
-
-    * connection established *
-    > SERVER localhost:8823
-    > PING 1490197665618
-    < NAME synapse.app.appservice
-    < PING 1490197665618
-    < REPLICATE
-    > POSITION events 1
-    > POSITION backfill 1
-    > POSITION caches 1
-    > RDATA caches 2 ["get_user_by_id",["@01register-user:localhost:8823"],1490197670513]
-    > RDATA events 14 ["ev", ["$149019767112vOHxz:localhost:8823",
-        "!AFDCvgApUmpdfVjIXm:localhost:8823","m.room.guest_access","",null]]
-    < PING 1490197675618
-    > ERROR server stopping
-    * connection closed by server *
+An explanation of this protocol is available in docs/tcp_replication.md
 """
 import fcntl
 import logging
diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py
index a4bdb48c0c..c6088a0f99 100644
--- a/synapse/replication/tcp/streams/_base.py
+++ b/synapse/replication/tcp/streams/_base.py
@@ -152,8 +152,8 @@ class Stream:
         Returns:
             A triplet `(updates, new_last_token, limited)`, where `updates` is
             a list of `(token, row)` entries, `new_last_token` is the new
-            position in stream, and `limited` is whether there are more updates
-            to fetch.
+            position in stream (ie the highest token returned in the updates),
+            and `limited` is whether there are more updates to fetch.
         """
         current_token = self.current_token(self.local_instance_name)
         updates, current_token, limited = await self.get_updates_since(
-- 
cgit 1.5.1


From a3bad89d57645b2ea304d2900adab71a786b0172 Mon Sep 17 00:00:00 2001
From: Warren Bailey <warren@warrenbailey.net>
Date: Thu, 30 Mar 2023 12:09:41 +0100
Subject: Add the ability to enable/disable registrations when in the OIDC flow
 (#14978)

Signed-off-by: Warren Bailey <warren@warrenbailey.net>
---
 changelog.d/14978.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  6 ++++++
 synapse/config/oidc.py                           |  5 +++++
 synapse/handlers/oidc.py                         |  1 +
 synapse/handlers/sso.py                          | 17 +++++++++++++++--
 tests/handlers/test_oidc.py                      | 17 ++++++++++++++++-
 6 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/14978.feature

(limited to 'synapse')

diff --git a/changelog.d/14978.feature b/changelog.d/14978.feature
new file mode 100644
index 0000000000..14f6fee658
--- /dev/null
+++ b/changelog.d/14978.feature
@@ -0,0 +1 @@
+Add the ability to enable/disable registrations when in the OIDC flow.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 060d0d5e69..c5c2c2b615 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3100,6 +3100,11 @@ Options for each entry include:
    match a pre-existing account instead of failing. This could be used if
    switching from password logins to OIDC. Defaults to false.
 
+* `enable_registration`: set to 'false' to disable automatic registration of new
+   users. This allows the OIDC SSO flow to be limited to sign in only, rather than
+   automatically registering users that have a valid SSO login but do not have
+   a pre-registered account. Defaults to true.
+
 * `user_mapping_provider`: Configuration for how attributes returned from a OIDC
    provider are mapped onto a matrix user. This setting has the following
    sub-properties:
@@ -3216,6 +3221,7 @@ oidc_providers:
     userinfo_endpoint: "https://accounts.example.com/userinfo"
     jwks_uri: "https://accounts.example.com/.well-known/jwks.json"
     skip_verification: true
+    enable_registration: true
     user_mapping_provider:
       config:
         subject_claim: "id"
diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py
index df8c422043..77c1d1dc8e 100644
--- a/synapse/config/oidc.py
+++ b/synapse/config/oidc.py
@@ -136,6 +136,7 @@ OIDC_PROVIDER_CONFIG_SCHEMA = {
             "type": "array",
             "items": SsoAttributeRequirement.JSON_SCHEMA,
         },
+        "enable_registration": {"type": "boolean"},
     },
 }
 
@@ -306,6 +307,7 @@ def _parse_oidc_config_dict(
         user_mapping_provider_class=user_mapping_provider_class,
         user_mapping_provider_config=user_mapping_provider_config,
         attribute_requirements=attribute_requirements,
+        enable_registration=oidc_config.get("enable_registration", True),
     )
 
 
@@ -405,3 +407,6 @@ class OidcProviderConfig:
 
     # required attributes to require in userinfo to allow login/registration
     attribute_requirements: List[SsoAttributeRequirement]
+
+    # Whether automatic registrations are enabled in the ODIC flow. Defaults to True
+    enable_registration: bool
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index 0fc829acf7..e7e0b5e049 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -1239,6 +1239,7 @@ class OidcProvider:
             grandfather_existing_users,
             extra_attributes,
             auth_provider_session_id=sid,
+            registration_enabled=self._config.enable_registration,
         )
 
     def _remote_id_from_userinfo(self, userinfo: UserInfo) -> str:
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 4a27c0f051..c28325323c 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -383,6 +383,7 @@ class SsoHandler:
         grandfather_existing_users: Callable[[], Awaitable[Optional[str]]],
         extra_login_attributes: Optional[JsonDict] = None,
         auth_provider_session_id: Optional[str] = None,
+        registration_enabled: bool = True,
     ) -> None:
         """
         Given an SSO ID, retrieve the user ID for it and possibly register the user.
@@ -435,6 +436,10 @@ class SsoHandler:
 
             auth_provider_session_id: An optional session ID from the IdP.
 
+            registration_enabled: An optional boolean to enable/disable automatic
+            registrations of new users. If false and the user does not exist then the
+            flow is aborted. Defaults to true.
+
         Raises:
             MappingException if there was a problem mapping the response to a user.
             RedirectException: if the mapping provider needs to redirect the user
@@ -462,8 +467,16 @@ class SsoHandler:
                         auth_provider_id, remote_user_id, user_id
                     )
 
-            # Otherwise, generate a new user.
-            if not user_id:
+            if not user_id and not registration_enabled:
+                logger.info(
+                    "User does not exist and registration are disabled for IdP '%s' and remote_user_id '%s'",
+                    auth_provider_id,
+                    remote_user_id,
+                )
+                raise MappingException(
+                    "User does not exist and registrations are disabled"
+                )
+            elif not user_id:  # Otherwise, generate a new user.
                 attributes = await self._call_attribute_mapper(sso_to_matrix_id_mapper)
 
                 next_step_url = self._get_url_for_next_new_user_step(
diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py
index 951caaa6b3..0a8bae54fb 100644
--- a/tests/handlers/test_oidc.py
+++ b/tests/handlers/test_oidc.py
@@ -922,7 +922,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
             auth_provider_session_id=None,
         )
 
-    @override_config({"oidc_config": DEFAULT_CONFIG})
+    @override_config({"oidc_config": {**DEFAULT_CONFIG, "enable_registration": True}})
     def test_map_userinfo_to_user(self) -> None:
         """Ensure that mapping the userinfo returned from a provider to an MXID works properly."""
         userinfo: dict = {
@@ -975,6 +975,21 @@ class OidcHandlerTestCase(HomeserverTestCase):
             "Mapping provider does not support de-duplicating Matrix IDs",
         )
 
+    @override_config({"oidc_config": {**DEFAULT_CONFIG, "enable_registration": False}})
+    def test_map_userinfo_to_user_does_not_register_new_user(self) -> None:
+        """Ensures new users are not registered if the enabled registration flag is disabled."""
+        userinfo: dict = {
+            "sub": "test_user",
+            "username": "test_user",
+        }
+        request, _ = self.start_authorization(userinfo)
+        self.get_success(self.handler.handle_oidc_callback(request))
+        self.complete_sso_login.assert_not_called()
+        self.assertRenderedError(
+            "mapping_error",
+            "User does not exist and registrations are disabled",
+        )
+
     @override_config({"oidc_config": {**DEFAULT_CONFIG, "allow_existing_users": True}})
     def test_map_userinfo_to_existing_user(self) -> None:
         """Existing users can log in with OpenID Connect when allow_existing_users is True."""
-- 
cgit 1.5.1


From d9f694932c64d68e791ecb4c860e911e21a0baeb Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 30 Mar 2023 13:36:41 +0100
Subject: Fix spinloop during partial state sync when a prev event is in
 backoff (#15351)

Previously, we would spin in a tight loop until
`update_state_for_partial_state_event` stopped raising
`FederationPullAttemptBackoffError`s. Replace the spinloop with a wait
until the backoff period has expired.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15351.bugfix                           |  1 +
 synapse/api/errors.py                              | 17 +++++++---
 synapse/handlers/federation.py                     | 36 +++++++++++-----------
 synapse/handlers/federation_event.py               | 24 ++++++++++-----
 synapse/storage/databases/main/event_federation.py | 35 ++++++++++++---------
 tests/storage/test_event_federation.py             | 13 +++++---
 6 files changed, 79 insertions(+), 47 deletions(-)
 create mode 100644 changelog.d/15351.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15351.bugfix b/changelog.d/15351.bugfix
new file mode 100644
index 0000000000..e68023c671
--- /dev/null
+++ b/changelog.d/15351.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0 where the background sync from a faster join could spin for hours when one of the events involved had been marked for backoff.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 8c6822f3c6..f2d6f9ab2d 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -27,7 +27,7 @@ from synapse.util import json_decoder
 
 if typing.TYPE_CHECKING:
     from synapse.config.homeserver import HomeServerConfig
-    from synapse.types import JsonDict
+    from synapse.types import JsonDict, StrCollection
 
 logger = logging.getLogger(__name__)
 
@@ -682,18 +682,27 @@ class FederationPullAttemptBackoffError(RuntimeError):
     Attributes:
         event_id: The event_id which we are refusing to pull
         message: A custom error message that gives more context
+        retry_after_ms: The remaining backoff interval, in milliseconds
     """
 
-    def __init__(self, event_ids: List[str], message: Optional[str]):
-        self.event_ids = event_ids
+    def __init__(
+        self, event_ids: "StrCollection", message: Optional[str], retry_after_ms: int
+    ):
+        event_ids = list(event_ids)
 
         if message:
             error_message = message
         else:
-            error_message = f"Not attempting to pull event_ids={self.event_ids} because we already tried to pull them recently (backing off)."
+            error_message = (
+                f"Not attempting to pull event_ids={event_ids} because we already "
+                "tried to pull them recently (backing off)."
+            )
 
         super().__init__(error_message)
 
+        self.event_ids = event_ids
+        self.retry_after_ms = retry_after_ms
+
 
 class HttpResponseException(CodeMessageException):
     """
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 80156ef343..65461a0787 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1949,27 +1949,25 @@ class FederationHandler:
             )
             for event in events:
                 for attempt in itertools.count():
+                    # We try a new destination on every iteration.
                     try:
-                        await self._federation_event_handler.update_state_for_partial_state_event(
-                            destination, event
-                        )
+                        while True:
+                            try:
+                                await self._federation_event_handler.update_state_for_partial_state_event(
+                                    destination, event
+                                )
+                                break
+                            except FederationPullAttemptBackoffError as e:
+                                # We are in the backoff period for one of the event's
+                                # prev_events. Wait it out and try again after.
+                                logger.warning(
+                                    "%s; waiting for %d ms...", e, e.retry_after_ms
+                                )
+                                await self.clock.sleep(e.retry_after_ms / 1000)
+
+                        # Success, no need to try the rest of the destinations.
                         break
-                    except FederationPullAttemptBackoffError as exc:
-                        # Log a warning about why we failed to process the event (the error message
-                        # for `FederationPullAttemptBackoffError` is pretty good)
-                        logger.warning("_sync_partial_state_room: %s", exc)
-                        # We do not record a failed pull attempt when we backoff fetching a missing
-                        # `prev_event` because not being able to fetch the `prev_events` just means
-                        # we won't be able to de-outlier the pulled event. But we can still use an
-                        # `outlier` in the state/auth chain for another event. So we shouldn't stop
-                        # a downstream event from trying to pull it.
-                        #
-                        # This avoids a cascade of backoff for all events in the DAG downstream from
-                        # one event backoff upstream.
                     except FederationError as e:
-                        # TODO: We should `record_event_failed_pull_attempt` here,
-                        #   see https://github.com/matrix-org/synapse/issues/13700
-
                         if attempt == len(destinations) - 1:
                             # We have tried every remote server for this event. Give up.
                             # TODO(faster_joins) giving up isn't the right thing to do
@@ -1986,6 +1984,8 @@ class FederationHandler:
                                 destination,
                                 e,
                             )
+                            # TODO: We should `record_event_failed_pull_attempt` here,
+                            #   see https://github.com/matrix-org/synapse/issues/13700
                             raise
 
                         # Try the next remote server.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 648843cdbe..982c8d3b2f 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -140,6 +140,7 @@ class FederationEventHandler:
     """
 
     def __init__(self, hs: "HomeServer"):
+        self._clock = hs.get_clock()
         self._store = hs.get_datastores().main
         self._storage_controllers = hs.get_storage_controllers()
         self._state_storage_controller = self._storage_controllers.state
@@ -1038,8 +1039,8 @@ class FederationEventHandler:
 
         Raises:
             FederationPullAttemptBackoffError if we are are deliberately not attempting
-                to pull the given event over federation because we've already done so
-                recently and are backing off.
+                to pull one of the given event's `prev_event`s over federation because
+                we've already done so recently and are backing off.
             FederationError if we fail to get the state from the remote server after any
                 missing `prev_event`s.
         """
@@ -1053,13 +1054,22 @@ class FederationEventHandler:
         # If we've already recently attempted to pull this missing event, don't
         # try it again so soon. Since we have to fetch all of the prev_events, we can
         # bail early here if we find any to ignore.
-        prevs_to_ignore = await self._store.get_event_ids_to_not_pull_from_backoff(
-            room_id, missing_prevs
+        prevs_with_pull_backoff = (
+            await self._store.get_event_ids_to_not_pull_from_backoff(
+                room_id, missing_prevs
+            )
         )
-        if len(prevs_to_ignore) > 0:
+        if len(prevs_with_pull_backoff) > 0:
             raise FederationPullAttemptBackoffError(
-                event_ids=prevs_to_ignore,
-                message=f"While computing context for event={event_id}, not attempting to pull missing prev_event={prevs_to_ignore[0]} because we already tried to pull recently (backing off).",
+                event_ids=prevs_with_pull_backoff.keys(),
+                message=(
+                    f"While computing context for event={event_id}, not attempting to "
+                    f"pull missing prev_events={list(prevs_with_pull_backoff.keys())} "
+                    "because we already tried to pull recently (backing off)."
+                ),
+                retry_after_ms=(
+                    max(prevs_with_pull_backoff.values()) - self._clock.time_msec()
+                ),
             )
 
         if not missing_prevs:
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index ff3edeb716..a19ba88bf8 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1544,7 +1544,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         self,
         room_id: str,
         event_ids: Collection[str],
-    ) -> List[str]:
+    ) -> Dict[str, int]:
         """
         Filter down the events to ones that we've failed to pull before recently. Uses
         exponential backoff.
@@ -1554,7 +1554,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             event_ids: A list of events to filter down
 
         Returns:
-            List of event_ids that should not be attempted to be pulled
+            A dictionary of event_ids that should not be attempted to be pulled and the
+            next timestamp at which we may try pulling them again.
         """
         event_failed_pull_attempts = await self.db_pool.simple_select_many_batch(
             table="event_failed_pull_attempts",
@@ -1570,22 +1571,28 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         )
 
         current_time = self._clock.time_msec()
-        return [
-            event_failed_pull_attempt["event_id"]
-            for event_failed_pull_attempt in event_failed_pull_attempts
+
+        event_ids_with_backoff = {}
+        for event_failed_pull_attempt in event_failed_pull_attempts:
+            event_id = event_failed_pull_attempt["event_id"]
             # Exponential back-off (up to the upper bound) so we don't try to
             # pull the same event over and over. ex. 2hr, 4hr, 8hr, 16hr, etc.
-            if current_time
-            < event_failed_pull_attempt["last_attempt_ts"]
-            + (
-                2
-                ** min(
-                    event_failed_pull_attempt["num_attempts"],
-                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
+            backoff_end_time = (
+                event_failed_pull_attempt["last_attempt_ts"]
+                + (
+                    2
+                    ** min(
+                        event_failed_pull_attempt["num_attempts"],
+                        BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
+                    )
                 )
+                * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS
             )
-            * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS
-        ]
+
+            if current_time < backoff_end_time:  # `backoff_end_time` is exclusive
+                event_ids_with_backoff[event_id] = backoff_end_time
+
+        return event_ids_with_backoff
 
     async def get_missing_events(
         self,
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 3e1984c15c..81e50bdd55 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -1143,19 +1143,24 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         tok = self.login("alice", "test")
         room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
 
+        failure_time = self.clock.time_msec()
         self.get_success(
             self.store.record_event_failed_pull_attempt(
                 room_id, "$failed_event_id", "fake cause"
             )
         )
 
-        event_ids_to_backoff = self.get_success(
+        event_ids_with_backoff = self.get_success(
             self.store.get_event_ids_to_not_pull_from_backoff(
                 room_id=room_id, event_ids=["$failed_event_id", "$normal_event_id"]
             )
         )
 
-        self.assertEqual(event_ids_to_backoff, ["$failed_event_id"])
+        self.assertEqual(
+            event_ids_with_backoff,
+            # We expect a 2^1 hour backoff after a single failed attempt.
+            {"$failed_event_id": failure_time + 2 * 60 * 60 * 1000},
+        )
 
     def test_get_event_ids_to_not_pull_from_backoff_retry_after_backoff_duration(
         self,
@@ -1179,14 +1184,14 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         # attempt (2^1 hours).
         self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
 
-        event_ids_to_backoff = self.get_success(
+        event_ids_with_backoff = self.get_success(
             self.store.get_event_ids_to_not_pull_from_backoff(
                 room_id=room_id, event_ids=["$failed_event_id", "$normal_event_id"]
             )
         )
         # Since this function only returns events we should backoff from, time has
         # elapsed past the backoff range so there is no events to backoff from.
-        self.assertEqual(event_ids_to_backoff, [])
+        self.assertEqual(event_ids_with_backoff, {})
 
 
 @attr.s(auto_attribs=True)
-- 
cgit 1.5.1


From ae4acda1bb903f504e946442bfc66dd0e5757dad Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 30 Mar 2023 08:39:38 -0400
Subject: Implement MSC3984 to proxy /keys/query requests to appservices.
 (#15321)

If enabled, for users which are exclusively owned by an application
service then the appservice will be queried for devices in addition
to any information stored in the Synapse database.
---
 changelog.d/15314.feature               |   2 +-
 changelog.d/15321.feature               |   1 +
 synapse/appservice/api.py               |  54 ++++++++++++--
 synapse/config/experimental.py          |   5 ++
 synapse/federation/federation_client.py |  48 ++-----------
 synapse/handlers/appservice.py          |  61 ++++++++++++++++
 synapse/handlers/e2e_keys.py            |  16 +++++
 synapse/http/client.py                  |  38 ++++++++++
 tests/handlers/test_e2e_keys.py         | 121 +++++++++++++++++++++++++++++++-
 9 files changed, 298 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/15321.feature

(limited to 'synapse')

diff --git a/changelog.d/15314.feature b/changelog.d/15314.feature
index 68b289b0cc..5ce0c029ce 100644
--- a/changelog.d/15314.feature
+++ b/changelog.d/15314.feature
@@ -1 +1 @@
-Experimental support for passing One Time Key requests to application services ([MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983)).
+Experimental support for passing One Time Key and device key requests to application services ([MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983) and [MSC3984](https://github.com/matrix-org/matrix-spec-proposals/pull/3984)).
diff --git a/changelog.d/15321.feature b/changelog.d/15321.feature
new file mode 100644
index 0000000000..5ce0c029ce
--- /dev/null
+++ b/changelog.d/15321.feature
@@ -0,0 +1 @@
+Experimental support for passing One Time Key and device key requests to application services ([MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983) and [MSC3984](https://github.com/matrix-org/matrix-spec-proposals/pull/3984)).
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 51ee0e79df..b27eedef99 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -30,7 +30,7 @@ from prometheus_client import Counter
 from typing_extensions import TypeGuard
 
 from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind
-from synapse.api.errors import CodeMessageException
+from synapse.api.errors import CodeMessageException, HttpResponseException
 from synapse.appservice import (
     ApplicationService,
     TransactionOneTimeKeysCount,
@@ -38,7 +38,7 @@ from synapse.appservice import (
 )
 from synapse.events import EventBase
 from synapse.events.utils import SerializeEventConfig, serialize_event
-from synapse.http.client import SimpleHttpClient
+from synapse.http.client import SimpleHttpClient, is_unknown_endpoint
 from synapse.types import DeviceListUpdates, JsonDict, ThirdPartyInstanceID
 from synapse.util.caches.response_cache import ResponseCache
 
@@ -393,7 +393,11 @@ class ApplicationServiceApi(SimpleHttpClient):
     ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]:
         """Claim one time keys from an application service.
 
+        Note that any error (including a timeout) is treated as the application
+        service having no information.
+
         Args:
+            service: The application service to query.
             query: An iterable of tuples of (user ID, device ID, algorithm).
 
         Returns:
@@ -422,9 +426,9 @@ class ApplicationServiceApi(SimpleHttpClient):
                 body,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
-        except CodeMessageException as e:
+        except HttpResponseException as e:
             # The appservice doesn't support this endpoint.
-            if e.code == 404 or e.code == 405:
+            if is_unknown_endpoint(e):
                 return {}, query
             logger.warning("claim_keys to %s received %s", uri, e.code)
             return {}, query
@@ -444,6 +448,48 @@ class ApplicationServiceApi(SimpleHttpClient):
 
         return response, missing
 
+    async def query_keys(
+        self, service: "ApplicationService", query: Dict[str, List[str]]
+    ) -> Dict[str, Dict[str, Dict[str, JsonDict]]]:
+        """Query the application service for keys.
+
+        Note that any error (including a timeout) is treated as the application
+        service having no information.
+
+        Args:
+            service: The application service to query.
+            query: An iterable of tuples of (user ID, device ID, algorithm).
+
+        Returns:
+            A map of device_keys/master_keys/self_signing_keys/user_signing_keys:
+
+            device_keys is a map of user ID -> a map device ID -> device info.
+        """
+        if service.url is None:
+            return {}
+
+        # This is required by the configuration.
+        assert service.hs_token is not None
+
+        uri = f"{service.url}/_matrix/app/unstable/org.matrix.msc3984/keys/query"
+        try:
+            response = await self.post_json_get_json(
+                uri,
+                query,
+                headers={"Authorization": [f"Bearer {service.hs_token}"]},
+            )
+        except HttpResponseException as e:
+            # The appservice doesn't support this endpoint.
+            if is_unknown_endpoint(e):
+                return {}
+            logger.warning("query_keys to %s received %s", uri, e.code)
+            return {}
+        except Exception as ex:
+            logger.warning("query_keys to %s threw exception %s", uri, ex)
+            return {}
+
+        return response
+
     def _serialize(
         self, service: "ApplicationService", events: Iterable[EventBase]
     ) -> List[JsonDict]:
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 53e6fc2b54..7687c80ea0 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -79,6 +79,11 @@ class ExperimentalConfig(Config):
             "msc3983_appservice_otk_claims", False
         )
 
+        # MSC3984: Proxying key queries to exclusive ASes.
+        self.msc3984_appservice_key_query: bool = experimental.get(
+            "msc3984_appservice_key_query", False
+        )
+
         # MSC3706 (server-side support for partial state in /send_join responses)
         # Synapse will always serve partial state responses to requests using the stable
         # query parameter `omit_members`. If this flag is set, Synapse will also serve
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 7d04560dca..4cf4957a42 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -61,6 +61,7 @@ from synapse.federation.federation_base import (
     event_from_pdu_json,
 )
 from synapse.federation.transport.client import SendJoinResponse
+from synapse.http.client import is_unknown_endpoint
 from synapse.http.types import QueryParams
 from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace
 from synapse.types import JsonDict, UserID, get_domain_from_id
@@ -759,43 +760,6 @@ class FederationClient(FederationBase):
 
         return signed_auth
 
-    def _is_unknown_endpoint(
-        self, e: HttpResponseException, synapse_error: Optional[SynapseError] = None
-    ) -> bool:
-        """
-        Returns true if the response was due to an endpoint being unimplemented.
-
-        Args:
-            e: The error response received from the remote server.
-            synapse_error: The above error converted to a SynapseError. This is
-                automatically generated if not provided.
-
-        """
-        if synapse_error is None:
-            synapse_error = e.to_synapse_error()
-        # MSC3743 specifies that servers should return a 404 or 405 with an errcode
-        # of M_UNRECOGNIZED when they receive a request to an unknown endpoint or
-        # to an unknown method, respectively.
-        #
-        # Older versions of servers don't properly handle this. This needs to be
-        # rather specific as some endpoints truly do return 404 errors.
-        return (
-            # 404 is an unknown endpoint, 405 is a known endpoint, but unknown method.
-            (e.code == 404 or e.code == 405)
-            and (
-                # Older Dendrites returned a text or empty body.
-                # Older Conduit returned an empty body.
-                not e.response
-                or e.response == b"404 page not found"
-                # The proper response JSON with M_UNRECOGNIZED errcode.
-                or synapse_error.errcode == Codes.UNRECOGNIZED
-            )
-        ) or (
-            # Older Synapses returned a 400 error.
-            e.code == 400
-            and synapse_error.errcode == Codes.UNRECOGNIZED
-        )
-
     async def _try_destination_list(
         self,
         description: str,
@@ -887,7 +851,7 @@ class FederationClient(FederationBase):
                 elif 400 <= e.code < 500 and synapse_error.errcode in failover_errcodes:
                     failover = True
 
-                elif failover_on_unknown_endpoint and self._is_unknown_endpoint(
+                elif failover_on_unknown_endpoint and is_unknown_endpoint(
                     e, synapse_error
                 ):
                     failover = True
@@ -1223,7 +1187,7 @@ class FederationClient(FederationBase):
             # If an error is received that is due to an unrecognised endpoint,
             # fallback to the v1 endpoint. Otherwise, consider it a legitimate error
             # and raise.
-            if not self._is_unknown_endpoint(e):
+            if not is_unknown_endpoint(e):
                 raise
 
         logger.debug("Couldn't send_join with the v2 API, falling back to the v1 API")
@@ -1297,7 +1261,7 @@ class FederationClient(FederationBase):
             # fallback to the v1 endpoint if the room uses old-style event IDs.
             # Otherwise, consider it a legitimate error and raise.
             err = e.to_synapse_error()
-            if self._is_unknown_endpoint(e, err):
+            if is_unknown_endpoint(e, err):
                 if room_version.event_format != EventFormatVersions.ROOM_V1_V2:
                     raise SynapseError(
                         400,
@@ -1358,7 +1322,7 @@ class FederationClient(FederationBase):
             # If an error is received that is due to an unrecognised endpoint,
             # fallback to the v1 endpoint. Otherwise, consider it a legitimate error
             # and raise.
-            if not self._is_unknown_endpoint(e):
+            if not is_unknown_endpoint(e):
                 raise
 
         logger.debug("Couldn't send_leave with the v2 API, falling back to the v1 API")
@@ -1629,7 +1593,7 @@ class FederationClient(FederationBase):
                 # If an error is received that is due to an unrecognised endpoint,
                 # fallback to the unstable endpoint. Otherwise, consider it a
                 # legitimate error and raise.
-                if not self._is_unknown_endpoint(e):
+                if not is_unknown_endpoint(e):
                     raise
 
                 logger.debug(
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 953df4d9cd..da887647d4 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -18,6 +18,7 @@ from typing import (
     Dict,
     Iterable,
     List,
+    Mapping,
     Optional,
     Tuple,
     Union,
@@ -846,6 +847,10 @@ class ApplicationServicesHandler:
     ]:
         """Claim one time keys from application services.
 
+        Users which are exclusively owned by an application service are sent a
+        key claim request to check if the application service provides keys
+        directly.
+
         Args:
             query: An iterable of tuples of (user ID, device ID, algorithm).
 
@@ -901,3 +906,59 @@ class ApplicationServicesHandler:
                 missing.extend(result[1])
 
         return claimed_keys, missing
+
+    async def query_keys(
+        self, query: Mapping[str, Optional[List[str]]]
+    ) -> Dict[str, Dict[str, Dict[str, JsonDict]]]:
+        """Query application services for device keys.
+
+        Users which are exclusively owned by an application service are queried
+        for keys to check if the application service provides keys directly.
+
+        Args:
+            query: map from user_id to a list of devices to query
+
+        Returns:
+            A map from user_id -> device_id -> device details
+        """
+        services = self.store.get_app_services()
+
+        # Partition the users by appservice.
+        query_by_appservice: Dict[str, Dict[str, List[str]]] = {}
+        for user_id, device_ids in query.items():
+            if not self.store.get_if_app_services_interested_in_user(user_id):
+                continue
+
+            # Find the associated appservice.
+            for service in services:
+                if service.is_exclusive_user(user_id):
+                    query_by_appservice.setdefault(service.id, {})[user_id] = (
+                        device_ids or []
+                    )
+                    continue
+
+        # Query each service in parallel.
+        results = await make_deferred_yieldable(
+            defer.DeferredList(
+                [
+                    run_in_background(
+                        self.appservice_api.query_keys,
+                        # We know this must be an app service.
+                        self.store.get_app_service_by_id(service_id),  # type: ignore[arg-type]
+                        service_query,
+                    )
+                    for service_id, service_query in query_by_appservice.items()
+                ],
+                consumeErrors=True,
+            )
+        )
+
+        # Patch together the results -- they are all independent (since they
+        # require exclusive control over the users). They get returned as a single
+        # dictionary.
+        key_queries: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
+        for success, result in results:
+            if success:
+                key_queries.update(result)
+
+        return key_queries
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 9e7c2c45b5..0073667470 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -91,6 +91,9 @@ class E2eKeysHandler:
         self._query_appservices_for_otks = (
             hs.config.experimental.msc3983_appservice_otk_claims
         )
+        self._query_appservices_for_keys = (
+            hs.config.experimental.msc3984_appservice_key_query
+        )
 
     @trace
     @cancellable
@@ -497,6 +500,19 @@ class E2eKeysHandler:
             local_query, include_displaynames
         )
 
+        # Check if the application services have any additional results.
+        if self._query_appservices_for_keys:
+            # Query the appservices for any keys.
+            appservice_results = await self._appservice_handler.query_keys(query)
+
+            # Merge results, overriding with what the appservice returned.
+            for user_id, devices in appservice_results.get("device_keys", {}).items():
+                # Copy the appservice device info over the homeserver device info, but
+                # don't completely overwrite it.
+                results.setdefault(user_id, {}).update(devices)
+
+            # TODO Handle cross-signing keys.
+
         # Build the result structure
         for user_id, device_keys in results.items():
             for device_id, device_info in device_keys.items():
diff --git a/synapse/http/client.py b/synapse/http/client.py
index d777d59ccf..5ee55981d9 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -966,3 +966,41 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory):
 
     def creatorForNetloc(self, hostname: bytes, port: int) -> IOpenSSLContextFactory:
         return self
+
+
+def is_unknown_endpoint(
+    e: HttpResponseException, synapse_error: Optional[SynapseError] = None
+) -> bool:
+    """
+    Returns true if the response was due to an endpoint being unimplemented.
+
+    Args:
+        e: The error response received from the remote server.
+        synapse_error: The above error converted to a SynapseError. This is
+            automatically generated if not provided.
+
+    """
+    if synapse_error is None:
+        synapse_error = e.to_synapse_error()
+    # MSC3743 specifies that servers should return a 404 or 405 with an errcode
+    # of M_UNRECOGNIZED when they receive a request to an unknown endpoint or
+    # to an unknown method, respectively.
+    #
+    # Older versions of servers don't properly handle this. This needs to be
+    # rather specific as some endpoints truly do return 404 errors.
+    return (
+        # 404 is an unknown endpoint, 405 is a known endpoint, but unknown method.
+        (e.code == 404 or e.code == 405)
+        and (
+            # Older Dendrites returned a text body or empty body.
+            # Older Conduit returned an empty body.
+            not e.response
+            or e.response == b"404 page not found"
+            # The proper response JSON with M_UNRECOGNIZED errcode.
+            or synapse_error.errcode == Codes.UNRECOGNIZED
+        )
+    ) or (
+        # Older Synapses returned a 400 error.
+        e.code == 400
+        and synapse_error.errcode == Codes.UNRECOGNIZED
+    )
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 4ff04fc66b..013b9ee550 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -960,7 +960,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         appservice = ApplicationService(
             token="i_am_an_app_service",
             id="1234",
-            namespaces={"users": [{"regex": r"@boris:*", "exclusive": True}]},
+            namespaces={"users": [{"regex": r"@boris:.+", "exclusive": True}]},
             # Note: this user does not have to match the regex above
             sender="@as_main:test",
         )
@@ -1015,3 +1015,122 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
                 },
             },
         )
+
+    @override_config({"experimental_features": {"msc3984_appservice_key_query": True}})
+    def test_query_local_devices_appservice(self) -> None:
+        """Test that querying of appservices for keys overrides responses from the database."""
+        local_user = "@boris:" + self.hs.hostname
+        device_1 = "abc"
+        device_2 = "def"
+        device_3 = "ghi"
+
+        # There are 3 devices:
+        #
+        # 1. One which is uploaded to the homeserver.
+        # 2. One which is uploaded to the homeserver, but a newer copy is returned
+        #     by the appservice.
+        # 3. One which is only returned by the appservice.
+        device_key_1: JsonDict = {
+            "user_id": local_user,
+            "device_id": device_1,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+                RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2,
+            ],
+            "keys": {
+                "ed25519:abc": "base64+ed25519+key",
+                "curve25519:abc": "base64+curve25519+key",
+            },
+            "signatures": {local_user: {"ed25519:abc": "base64+signature"}},
+        }
+        device_key_2a: JsonDict = {
+            "user_id": local_user,
+            "device_id": device_2,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+                RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2,
+            ],
+            "keys": {
+                "ed25519:def": "base64+ed25519+key",
+                "curve25519:def": "base64+curve25519+key",
+            },
+            "signatures": {local_user: {"ed25519:def": "base64+signature"}},
+        }
+
+        device_key_2b: JsonDict = {
+            "user_id": local_user,
+            "device_id": device_2,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+                RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2,
+            ],
+            # The device ID is the same (above), but the keys are different.
+            "keys": {
+                "ed25519:xyz": "base64+ed25519+key",
+                "curve25519:xyz": "base64+curve25519+key",
+            },
+            "signatures": {local_user: {"ed25519:xyz": "base64+signature"}},
+        }
+        device_key_3: JsonDict = {
+            "user_id": local_user,
+            "device_id": device_3,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+                RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2,
+            ],
+            "keys": {
+                "ed25519:jkl": "base64+ed25519+key",
+                "curve25519:jkl": "base64+curve25519+key",
+            },
+            "signatures": {local_user: {"ed25519:jkl": "base64+signature"}},
+        }
+
+        # Upload keys for devices 1 & 2a.
+        self.get_success(
+            self.handler.upload_keys_for_user(
+                local_user, device_1, {"device_keys": device_key_1}
+            )
+        )
+        self.get_success(
+            self.handler.upload_keys_for_user(
+                local_user, device_2, {"device_keys": device_key_2a}
+            )
+        )
+
+        # Inject an appservice interested in this user.
+        appservice = ApplicationService(
+            token="i_am_an_app_service",
+            id="1234",
+            namespaces={"users": [{"regex": r"@boris:.+", "exclusive": True}]},
+            # Note: this user does not have to match the regex above
+            sender="@as_main:test",
+        )
+        self.hs.get_datastores().main.services_cache = [appservice]
+        self.hs.get_datastores().main.exclusive_user_regex = _make_exclusive_regex(
+            [appservice]
+        )
+
+        # Setup a response.
+        self.appservice_api.query_keys.return_value = make_awaitable(
+            {
+                "device_keys": {
+                    local_user: {device_2: device_key_2b, device_3: device_key_3}
+                }
+            }
+        )
+
+        # Request all devices.
+        res = self.get_success(self.handler.query_local_devices({local_user: None}))
+        self.assertIn(local_user, res)
+        for res_key in res[local_user].values():
+            res_key.pop("unsigned", None)
+        self.assertDictEqual(
+            res,
+            {
+                local_user: {
+                    device_1: device_key_1,
+                    device_2: device_key_2b,
+                    device_3: device_key_3,
+                }
+            },
+        )
-- 
cgit 1.5.1


From 91c3f32673e0c62ea603dcc18f7f21f73c011f33 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 30 Mar 2023 16:21:12 +0100
Subject: Speed up SQLite unit test CI (#15334)

Tests now take 40% of the time.
---
 changelog.d/15334.misc            |  1 +
 synapse/storage/engines/sqlite.py | 17 ++++++++++++++++-
 tests/server.py                   | 23 +++++++++++++++++++++++
 tests/unittest.py                 | 16 +++++++++++++---
 4 files changed, 53 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15334.misc

(limited to 'synapse')

diff --git a/changelog.d/15334.misc b/changelog.d/15334.misc
new file mode 100644
index 0000000000..0c30818ed0
--- /dev/null
+++ b/changelog.d/15334.misc
@@ -0,0 +1 @@
+Speed up unit tests when using SQLite3.
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index 28751e89a5..ca8c59297c 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -34,6 +34,13 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
             ":memory:",
         )
 
+        # A connection to a database that has already been prepared, to use as a
+        # base for an in-memory connection. This is used during unit tests to
+        # speed up setting up the DB.
+        self._prepped_conn: Optional[sqlite3.Connection] = database_config.get(
+            "_TEST_PREPPED_CONN"
+        )
+
         if platform.python_implementation() == "PyPy":
             # pypy's sqlite3 module doesn't handle bytearrays, convert them
             # back to bytes.
@@ -84,7 +91,15 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
             # In memory databases need to be rebuilt each time. Ideally we'd
             # reuse the same connection as we do when starting up, but that
             # would involve using adbapi before we have started the reactor.
-            prepare_database(db_conn, self, config=None)
+            #
+            # If we have a `prepped_conn` we can use that to initialise the DB,
+            # otherwise we need to call `prepare_database`.
+            if self._prepped_conn is not None:
+                # Initialise the new DB from the pre-prepared DB.
+                assert isinstance(db_conn.conn, sqlite3.Connection)
+                self._prepped_conn.backup(db_conn.conn)
+            else:
+                prepare_database(db_conn, self, config=None)
 
         db_conn.create_function("rank", 1, _rank)
         db_conn.execute("PRAGMA foreign_keys = ON;")
diff --git a/tests/server.py b/tests/server.py
index bb059630fa..b52ff1c463 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -16,6 +16,7 @@ import json
 import logging
 import os
 import os.path
+import sqlite3
 import time
 import uuid
 import warnings
@@ -79,7 +80,9 @@ from synapse.http.site import SynapseRequest
 from synapse.logging.context import ContextResourceUsage
 from synapse.server import HomeServer
 from synapse.storage import DataStore
+from synapse.storage.database import LoggingDatabaseConnection
 from synapse.storage.engines import PostgresEngine, create_engine
+from synapse.storage.prepare_database import prepare_database
 from synapse.types import ISynapseReactor, JsonDict
 from synapse.util import Clock
 
@@ -104,6 +107,10 @@ P = ParamSpec("P")
 # the type of thing that can be passed into `make_request` in the headers list
 CustomHeaderType = Tuple[Union[str, bytes], Union[str, bytes]]
 
+# A pre-prepared SQLite DB that is used as a template when creating new SQLite
+# DB each test run. This dramatically speeds up test set up when using SQLite.
+PREPPED_SQLITE_DB_CONN: Optional[LoggingDatabaseConnection] = None
+
 
 class TimedOutException(Exception):
     """
@@ -899,6 +906,22 @@ def setup_test_homeserver(
             "args": {"database": test_db_location, "cp_min": 1, "cp_max": 1},
         }
 
+        # Check if we have set up a DB that we can use as a template.
+        global PREPPED_SQLITE_DB_CONN
+        if PREPPED_SQLITE_DB_CONN is None:
+            temp_engine = create_engine(database_config)
+            PREPPED_SQLITE_DB_CONN = LoggingDatabaseConnection(
+                sqlite3.connect(":memory:"), temp_engine, "PREPPED_CONN"
+            )
+
+            database = DatabaseConnectionConfig("master", database_config)
+            config.database.databases = [database]
+            prepare_database(
+                PREPPED_SQLITE_DB_CONN, create_engine(database_config), config
+            )
+
+        database_config["_TEST_PREPPED_CONN"] = PREPPED_SQLITE_DB_CONN
+
     if "db_txn_limit" in kwargs:
         database_config["txn_limit"] = kwargs["db_txn_limit"]
 
diff --git a/tests/unittest.py b/tests/unittest.py
index f9160faa1d..8a16fd3665 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -146,6 +146,9 @@ class TestCase(unittest.TestCase):
                     % (current_context(),)
                 )
 
+            # Disable GC for duration of test. See below for why.
+            gc.disable()
+
             old_level = logging.getLogger().level
             if level is not None and old_level != level:
 
@@ -163,12 +166,19 @@ class TestCase(unittest.TestCase):
 
             return orig()
 
+        # We want to force a GC to workaround problems with deferreds leaking
+        # logcontexts when they are GCed (see the logcontext docs).
+        #
+        # The easiest way to do this would be to do a full GC after each test
+        # run, but that is very expensive. Instead, we disable GC (above) for
+        # the duration of the test so that we only need to run a gen-0 GC, which
+        # is a lot quicker.
+
         @around(self)
         def tearDown(orig: Callable[[], R]) -> R:
             ret = orig()
-            # force a GC to workaround problems with deferreds leaking logcontexts when
-            # they are GCed (see the logcontext docs)
-            gc.collect()
+            gc.collect(0)
+            gc.enable()
             set_current_context(SENTINEL_CONTEXT)
 
             return ret
-- 
cgit 1.5.1


From 6f68e32bfbe439435410e81ac70fdca10f28fbf7 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 30 Mar 2023 19:41:14 +0200
Subject: to_device updates could be dropped when consuming the replication
 stream (#15349)

Co-authored-by: reivilibre <oliverw@matrix.org>
---
 changelog.d/15349.bugfix                           |  1 +
 synapse/storage/databases/main/deviceinbox.py      | 14 +---
 tests/replication/_base.py                         |  4 +
 tests/replication/tcp/streams/test_account_data.py |  5 --
 tests/replication/tcp/streams/test_to_device.py    | 89 ++++++++++++++++++++++
 5 files changed, 98 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/15349.bugfix
 create mode 100644 tests/replication/tcp/streams/test_to_device.py

(limited to 'synapse')

diff --git a/changelog.d/15349.bugfix b/changelog.d/15349.bugfix
new file mode 100644
index 0000000000..65ea7ae7eb
--- /dev/null
+++ b/changelog.d/15349.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where some to_device messages could be dropped when using workers.
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 0d75d9739a..b471fcb064 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -617,14 +617,14 @@ class DeviceInboxWorkerStore(SQLBaseStore):
             # We limit like this as we might have multiple rows per stream_id, and
             # we want to make sure we always get all entries for any stream_id
             # we return.
-            upper_pos = min(current_id, last_id + limit)
+            upto_token = min(current_id, last_id + limit)
             sql = (
                 "SELECT max(stream_id), user_id"
                 " FROM device_inbox"
                 " WHERE ? < stream_id AND stream_id <= ?"
                 " GROUP BY user_id"
             )
-            txn.execute(sql, (last_id, upper_pos))
+            txn.execute(sql, (last_id, upto_token))
             updates = [(row[0], row[1:]) for row in txn]
 
             sql = (
@@ -633,19 +633,13 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                 " WHERE ? < stream_id AND stream_id <= ?"
                 " GROUP BY destination"
             )
-            txn.execute(sql, (last_id, upper_pos))
+            txn.execute(sql, (last_id, upto_token))
             updates.extend((row[0], row[1:]) for row in txn)
 
             # Order by ascending stream ordering
             updates.sort()
 
-            limited = False
-            upto_token = current_id
-            if len(updates) >= limit:
-                upto_token = updates[-1][0]
-                limited = True
-
-            return updates, upto_token, limited
+            return updates, upto_token, upto_token < current_id
 
         return await self.db_pool.runInteraction(
             "get_all_new_device_messages", get_all_new_device_messages_txn
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index 46a8e2013e..0f1a8a145f 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -54,6 +54,10 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
     if not hiredis:
         skip = "Requires hiredis"
 
+    if not USE_POSTGRES_FOR_TESTS:
+        # Redis replication only takes place on Postgres
+        skip = "Requires Postgres"
+
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         # build a replication server
         server_factory = ReplicationStreamProtocolFactory(hs)
diff --git a/tests/replication/tcp/streams/test_account_data.py b/tests/replication/tcp/streams/test_account_data.py
index 01df1be047..b9075e3f20 100644
--- a/tests/replication/tcp/streams/test_account_data.py
+++ b/tests/replication/tcp/streams/test_account_data.py
@@ -37,11 +37,6 @@ class AccountDataStreamTestCase(BaseStreamTestCase):
         # also one global update
         self.get_success(store.add_account_data_for_user("test_user", "m.global", {}))
 
-        # tell the notifier to catch up to avoid duplicate rows.
-        # workaround for https://github.com/matrix-org/synapse/issues/7360
-        # FIXME remove this when the above is fixed
-        self.replicate()
-
         # check we're testing what we think we are: no rows should yet have been
         # received
         self.assertEqual([], self.test_handler.received_rdata_rows)
diff --git a/tests/replication/tcp/streams/test_to_device.py b/tests/replication/tcp/streams/test_to_device.py
new file mode 100644
index 0000000000..fb9eac668f
--- /dev/null
+++ b/tests/replication/tcp/streams/test_to_device.py
@@ -0,0 +1,89 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+import synapse
+from synapse.replication.tcp.streams._base import _STREAM_UPDATE_TARGET_ROW_COUNT
+from synapse.types import JsonDict
+
+from tests.replication._base import BaseStreamTestCase
+
+logger = logging.getLogger(__name__)
+
+
+class ToDeviceStreamTestCase(BaseStreamTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        synapse.rest.client.login.register_servlets,
+    ]
+
+    def test_to_device_stream(self) -> None:
+        store = self.hs.get_datastores().main
+
+        user1 = self.register_user("user1", "pass")
+        self.login("user1", "pass", "device")
+        user2 = self.register_user("user2", "pass")
+        self.login("user2", "pass", "device")
+
+        # connect to pull the updates related to users creation/login
+        self.reconnect()
+        self.replicate()
+        self.test_handler.received_rdata_rows.clear()
+        # disconnect so we can accumulate the updates without pulling them
+        self.disconnect()
+
+        msg: JsonDict = {}
+        msg["sender"] = "@sender:example.org"
+        msg["type"] = "m.new_device"
+
+        # add messages to the device inbox for user1 up until the
+        # limit defined for a stream update batch
+        for i in range(0, _STREAM_UPDATE_TARGET_ROW_COUNT):
+            msg["content"] = {"device": {}}
+            messages = {user1: {"device": msg}}
+
+            self.get_success(
+                store.add_messages_from_remote_to_device_inbox(
+                    "example.org",
+                    f"{i}",
+                    messages,
+                )
+            )
+
+        # add one more message, for user2 this time
+        # this message would be dropped before fixing #15335
+        msg["content"] = {"device": {}}
+        messages = {user2: {"device": msg}}
+
+        self.get_success(
+            store.add_messages_from_remote_to_device_inbox(
+                "example.org",
+                f"{_STREAM_UPDATE_TARGET_ROW_COUNT}",
+                messages,
+            )
+        )
+
+        # replication is disconnected so we shouldn't get any updates yet
+        self.assertEqual([], self.test_handler.received_rdata_rows)
+
+        # now reconnect to pull the updates
+        self.reconnect()
+        self.replicate()
+
+        # we should receive the fact that we have to_device updates
+        # for user1 and user2
+        received_rows = self.test_handler.received_rdata_rows
+        self.assertEqual(len(received_rows), 2)
+        self.assertEqual(received_rows[0][2].entity, user1)
+        self.assertEqual(received_rows[1][2].entity, user2)
-- 
cgit 1.5.1


From 2a234b788e2b5706ee83cf8eb86dfd004bc7c166 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 30 Mar 2023 15:11:31 -0400
Subject: Set thread_id column to non-null for
 event_push_{actions,actions_staging,summary} (#15350)

Clean-up from adding the thread_id column, which was initially
null but backfilled with values. It is desirable to require it to now
be non-null.

In addition to altering this column to be non-null, we clean up
obsolete background jobs, indexes, and just-in-time updating
code.
---
 changelog.d/15350.misc                             |   1 +
 .../storage/databases/main/event_push_actions.py   | 240 ---------------------
 synapse/storage/schema/__init__.py                 |   6 +-
 .../delta/74/02thread_notifications_backfill.sql   |  28 +++
 .../03thread_notifications_not_null.sql.postgres   |  23 ++
 .../74/03thread_notifications_not_null.sql.sqlite  |  99 +++++++++
 6 files changed, 154 insertions(+), 243 deletions(-)
 create mode 100644 changelog.d/15350.misc
 create mode 100644 synapse/storage/schema/main/delta/74/02thread_notifications_backfill.sql
 create mode 100644 synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/15350.misc b/changelog.d/15350.misc
new file mode 100644
index 0000000000..2dea23784f
--- /dev/null
+++ b/changelog.d/15350.misc
@@ -0,0 +1 @@
+Make the `thread_id` column on `event_push_actions`, `event_push_actions_staging`, and `event_push_summary` non-null.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index eeccf5db24..6afc51320a 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -100,7 +100,6 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
-from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 
@@ -289,180 +288,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             unique=True,
         )
 
-        self.db_pool.updates.register_background_update_handler(
-            "event_push_backfill_thread_id",
-            self._background_backfill_thread_id,
-        )
-
-        # Indexes which will be used to quickly make the thread_id column non-null.
-        self.db_pool.updates.register_background_index_update(
-            "event_push_actions_thread_id_null",
-            index_name="event_push_actions_thread_id_null",
-            table="event_push_actions",
-            columns=["thread_id"],
-            where_clause="thread_id IS NULL",
-        )
-        self.db_pool.updates.register_background_index_update(
-            "event_push_summary_thread_id_null",
-            index_name="event_push_summary_thread_id_null",
-            table="event_push_summary",
-            columns=["thread_id"],
-            where_clause="thread_id IS NULL",
-        )
-
-        # Check ASAP (and then later, every 1s) to see if we have finished
-        # background updates the event_push_actions and event_push_summary tables.
-        self._clock.call_later(0.0, self._check_event_push_backfill_thread_id)
-        self._event_push_backfill_thread_id_done = False
-
-    @wrap_as_background_process("check_event_push_backfill_thread_id")
-    async def _check_event_push_backfill_thread_id(self) -> None:
-        """
-        Has thread_id finished backfilling?
-
-        If not, we need to just-in-time update it so the queries work.
-        """
-        done = await self.db_pool.updates.has_completed_background_update(
-            "event_push_backfill_thread_id"
-        )
-
-        if done:
-            self._event_push_backfill_thread_id_done = True
-        else:
-            # Reschedule to run.
-            self._clock.call_later(15.0, self._check_event_push_backfill_thread_id)
-
-    async def _background_backfill_thread_id(
-        self, progress: JsonDict, batch_size: int
-    ) -> int:
-        """
-        Fill in the thread_id field for event_push_actions and event_push_summary.
-
-        This is preparatory so that it can be made non-nullable in the future.
-
-        Because all current (null) data is done in an unthreaded manner this
-        simply assumes it is on the "main" timeline. Since event_push_actions
-        are periodically cleared it is not possible to correctly re-calculate
-        the thread_id.
-        """
-        event_push_actions_done = progress.get("event_push_actions_done", False)
-
-        def add_thread_id_txn(
-            txn: LoggingTransaction, start_stream_ordering: int
-        ) -> int:
-            sql = """
-            SELECT stream_ordering
-            FROM event_push_actions
-            WHERE
-                thread_id IS NULL
-                AND stream_ordering > ?
-            ORDER BY stream_ordering
-            LIMIT ?
-            """
-            txn.execute(sql, (start_stream_ordering, batch_size))
-
-            # No more rows to process.
-            rows = txn.fetchall()
-            if not rows:
-                progress["event_push_actions_done"] = True
-                self.db_pool.updates._background_update_progress_txn(
-                    txn, "event_push_backfill_thread_id", progress
-                )
-                return 0
-
-            # Update the thread ID for any of those rows.
-            max_stream_ordering = rows[-1][0]
-
-            sql = """
-            UPDATE event_push_actions
-            SET thread_id = 'main'
-            WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
-            """
-            txn.execute(
-                sql,
-                (
-                    start_stream_ordering,
-                    max_stream_ordering,
-                ),
-            )
-
-            # Update progress.
-            processed_rows = txn.rowcount
-            progress["max_event_push_actions_stream_ordering"] = max_stream_ordering
-            self.db_pool.updates._background_update_progress_txn(
-                txn, "event_push_backfill_thread_id", progress
-            )
-
-            return processed_rows
-
-        def add_thread_id_summary_txn(txn: LoggingTransaction) -> int:
-            min_user_id = progress.get("max_summary_user_id", "")
-            min_room_id = progress.get("max_summary_room_id", "")
-
-            # Slightly overcomplicated query for getting the Nth user ID / room
-            # ID tuple, or the last if there are less than N remaining.
-            sql = """
-            SELECT user_id, room_id FROM (
-                SELECT user_id, room_id FROM event_push_summary
-                WHERE (user_id, room_id) > (?, ?)
-                    AND thread_id IS NULL
-                ORDER BY user_id, room_id
-                LIMIT ?
-            ) AS e
-            ORDER BY user_id DESC, room_id DESC
-            LIMIT 1
-            """
-
-            txn.execute(sql, (min_user_id, min_room_id, batch_size))
-            row = txn.fetchone()
-            if not row:
-                return 0
-
-            max_user_id, max_room_id = row
-
-            sql = """
-            UPDATE event_push_summary
-            SET thread_id = 'main'
-            WHERE
-                (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?)
-                AND thread_id IS NULL
-            """
-            txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id))
-            processed_rows = txn.rowcount
-
-            progress["max_summary_user_id"] = max_user_id
-            progress["max_summary_room_id"] = max_room_id
-            self.db_pool.updates._background_update_progress_txn(
-                txn, "event_push_backfill_thread_id", progress
-            )
-
-            return processed_rows
-
-        # First update the event_push_actions table, then the event_push_summary table.
-        #
-        # Note that the event_push_actions_staging table is ignored since it is
-        # assumed that items in that table will only exist for a short period of
-        # time.
-        if not event_push_actions_done:
-            result = await self.db_pool.runInteraction(
-                "event_push_backfill_thread_id",
-                add_thread_id_txn,
-                progress.get("max_event_push_actions_stream_ordering", 0),
-            )
-        else:
-            result = await self.db_pool.runInteraction(
-                "event_push_backfill_thread_id",
-                add_thread_id_summary_txn,
-            )
-
-            # Only done after the event_push_summary table is done.
-            if not result:
-                await self.db_pool.updates._end_background_update(
-                    "event_push_backfill_thread_id"
-                )
-
-        return result
-
     async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, int]:
         """Get the notification count by room for a user. Only considers notifications,
         not highlight or unread counts, and threads are currently aggregated under their room.
@@ -711,25 +536,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
 
-        # First ensure that the existing rows have an updated thread_id field.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute(
-                """
-                UPDATE event_push_summary
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                (MAIN_TIMELINE, room_id, user_id),
-            )
-            txn.execute(
-                """
-                UPDATE event_push_actions
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                (MAIN_TIMELINE, room_id, user_id),
-            )
-
         # First we pull the counts from the summary table.
         #
         # We check that `last_receipt_stream_ordering` matches the stream ordering of the
@@ -1545,25 +1351,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 (room_id, user_id, stream_ordering, *thread_args),
             )
 
-            # First ensure that the existing rows have an updated thread_id field.
-            if not self._event_push_backfill_thread_id_done:
-                txn.execute(
-                    """
-                    UPDATE event_push_summary
-                    SET thread_id = ?
-                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                    """,
-                    (MAIN_TIMELINE, room_id, user_id),
-                )
-                txn.execute(
-                    """
-                    UPDATE event_push_actions
-                    SET thread_id = ?
-                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                    """,
-                    (MAIN_TIMELINE, room_id, user_id),
-                )
-
             # Fetch the notification counts between the stream ordering of the
             # latest receipt and what was previously summarised.
             unread_counts = self._get_notif_unread_count_for_user_room(
@@ -1698,19 +1485,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             rotate_to_stream_ordering: The new maximum event stream ordering to summarise.
         """
 
-        # Ensure that any new actions have an updated thread_id.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute(
-                """
-                UPDATE event_push_actions
-                SET thread_id = ?
-                WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
-                """,
-                (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering),
-            )
-
-        # XXX Do we need to update summaries here too?
-
         # Calculate the new counts that should be upserted into event_push_summary
         sql = """
             SELECT user_id, room_id, thread_id,
@@ -1773,20 +1547,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
-        # Ensure that any updated threads have the proper thread_id.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute_batch(
-                """
-                UPDATE event_push_summary
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                [
-                    (MAIN_TIMELINE, room_id, user_id)
-                    for user_id, room_id, _ in summaries
-                ],
-            )
-
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index d3103a6c7a..72bbb3a7c2 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -95,9 +95,9 @@ Changes in SCHEMA_VERSION = 74:
 
 
 SCHEMA_COMPAT_VERSION = (
-    # The threads_id column must exist for event_push_actions, event_push_summary,
-    # receipts_linearized, and receipts_graph.
-    73
+    # The threads_id column must written to with non-null values event_push_actions,
+    # event_push_actions_staging, and event_push_summary.
+    74
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/74/02thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/74/02thread_notifications_backfill.sql
new file mode 100644
index 0000000000..ce6f9ff937
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/02thread_notifications_backfill.sql
@@ -0,0 +1,28 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Force the background updates from 06thread_notifications.sql to run in the
+-- foreground as code will now require those to be "done".
+
+DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id';
+
+-- Overwrite any null thread_id values.
+UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL;
+UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
+UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
+
+-- Drop the background updates to calculate the indexes used to find null thread_ids.
+DELETE FROM background_updates WHERE update_name = 'event_push_actions_thread_id_null';
+DELETE FROM background_updates WHERE update_name = 'event_push_summary_thread_id_null';
diff --git a/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.postgres
new file mode 100644
index 0000000000..5f68667425
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.postgres
@@ -0,0 +1,23 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Drop the indexes used to find null thread_ids.
+DROP INDEX IF EXISTS event_push_actions_thread_id_null;
+DROP INDEX IF EXISTS event_push_summary_thread_id_null;
+
+-- The thread_id columns can now be made non-nullable.
+ALTER TABLE event_push_actions_staging ALTER COLUMN thread_id SET NOT NULL;
+ALTER TABLE event_push_actions ALTER COLUMN thread_id SET NOT NULL;
+ALTER TABLE event_push_summary ALTER COLUMN thread_id SET NOT NULL;
diff --git a/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.sqlite
new file mode 100644
index 0000000000..f46b233560
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.sqlite
@@ -0,0 +1,99 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ -- The thread_id columns can now be made non-nullable.
+--
+-- SQLite doesn't support modifying columns to an existing table, so it must
+-- be recreated.
+
+-- Create the new tables.
+CREATE TABLE event_push_actions_staging_new (
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    actions TEXT NOT NULL,
+    notif SMALLINT NOT NULL,
+    highlight SMALLINT NOT NULL,
+    unread SMALLINT,
+    thread_id TEXT NOT NULL,
+    inserted_ts BIGINT
+);
+
+CREATE TABLE event_push_actions_new (
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    profile_tag VARCHAR(32),
+    actions TEXT NOT NULL,
+    topological_ordering BIGINT,
+    stream_ordering BIGINT,
+    notif SMALLINT,
+    highlight SMALLINT,
+    unread SMALLINT,
+    thread_id TEXT NOT NULL,
+    CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag)
+);
+
+CREATE TABLE event_push_summary_new (
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    notif_count BIGINT NOT NULL,
+    stream_ordering BIGINT NOT NULL,
+    unread_count BIGINT,
+    last_receipt_stream_ordering BIGINT,
+    thread_id TEXT NOT NULL
+);
+
+-- Copy the data.
+INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts)
+    SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts
+    FROM event_push_actions_staging;
+
+INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id)
+    SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id
+    FROM event_push_actions;
+
+INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id)
+    SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id
+    FROM event_push_summary;
+
+-- Drop the old tables.
+DROP TABLE event_push_actions_staging;
+DROP TABLE event_push_actions;
+DROP TABLE event_push_summary;
+
+-- Rename the tables.
+ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging;
+ALTER TABLE event_push_actions_new RENAME TO event_push_actions;
+ALTER TABLE event_push_summary_new RENAME TO event_push_summary;
+
+-- Recreate the indexes.
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id);
+
+CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering);
+CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering );
+CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
+CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id );
+CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering);
+
+CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary (user_id, room_id, thread_id) ;
+
+-- Recreate some indexes in the background, by re-running the background updates
+-- from 72/02event_push_actions_index.sql and 72/06thread_notifications.sql.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7403, 'event_push_summary_unique_index2', '{}')
+  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7403, 'event_push_actions_stream_highlight_index', '{}')
+  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
-- 
cgit 1.5.1


From 72d2ceaa9adfbc0f19a0c540e5a63263af47755f Mon Sep 17 00:00:00 2001
From: "Olivier Wilkinson (reivilibre)" <oliverw@matrix.org>
Date: Fri, 31 Mar 2023 12:10:09 +0100
Subject: Revert "Set thread_id column to non-null for
 event_push_{actions,actions_staging,summary} (#15350)"

This reverts commit 2a234b788e2b5706ee83cf8eb86dfd004bc7c166.

See #15359 for context.
---
 changelog.d/15350.misc                             |   1 -
 .../storage/databases/main/event_push_actions.py   | 240 +++++++++++++++++++++
 synapse/storage/schema/__init__.py                 |   6 +-
 .../delta/74/02thread_notifications_backfill.sql   |  28 ---
 .../03thread_notifications_not_null.sql.postgres   |  23 --
 .../74/03thread_notifications_not_null.sql.sqlite  |  99 ---------
 6 files changed, 243 insertions(+), 154 deletions(-)
 delete mode 100644 changelog.d/15350.misc
 delete mode 100644 synapse/storage/schema/main/delta/74/02thread_notifications_backfill.sql
 delete mode 100644 synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.postgres
 delete mode 100644 synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/15350.misc b/changelog.d/15350.misc
deleted file mode 100644
index 2dea23784f..0000000000
--- a/changelog.d/15350.misc
+++ /dev/null
@@ -1 +0,0 @@
-Make the `thread_id` column on `event_push_actions`, `event_push_actions_staging`, and `event_push_summary` non-null.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 6afc51320a..eeccf5db24 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -100,6 +100,7 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
+from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 
@@ -288,6 +289,180 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             unique=True,
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            "event_push_backfill_thread_id",
+            self._background_backfill_thread_id,
+        )
+
+        # Indexes which will be used to quickly make the thread_id column non-null.
+        self.db_pool.updates.register_background_index_update(
+            "event_push_actions_thread_id_null",
+            index_name="event_push_actions_thread_id_null",
+            table="event_push_actions",
+            columns=["thread_id"],
+            where_clause="thread_id IS NULL",
+        )
+        self.db_pool.updates.register_background_index_update(
+            "event_push_summary_thread_id_null",
+            index_name="event_push_summary_thread_id_null",
+            table="event_push_summary",
+            columns=["thread_id"],
+            where_clause="thread_id IS NULL",
+        )
+
+        # Check ASAP (and then later, every 1s) to see if we have finished
+        # background updates the event_push_actions and event_push_summary tables.
+        self._clock.call_later(0.0, self._check_event_push_backfill_thread_id)
+        self._event_push_backfill_thread_id_done = False
+
+    @wrap_as_background_process("check_event_push_backfill_thread_id")
+    async def _check_event_push_backfill_thread_id(self) -> None:
+        """
+        Has thread_id finished backfilling?
+
+        If not, we need to just-in-time update it so the queries work.
+        """
+        done = await self.db_pool.updates.has_completed_background_update(
+            "event_push_backfill_thread_id"
+        )
+
+        if done:
+            self._event_push_backfill_thread_id_done = True
+        else:
+            # Reschedule to run.
+            self._clock.call_later(15.0, self._check_event_push_backfill_thread_id)
+
+    async def _background_backfill_thread_id(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """
+        Fill in the thread_id field for event_push_actions and event_push_summary.
+
+        This is preparatory so that it can be made non-nullable in the future.
+
+        Because all current (null) data is done in an unthreaded manner this
+        simply assumes it is on the "main" timeline. Since event_push_actions
+        are periodically cleared it is not possible to correctly re-calculate
+        the thread_id.
+        """
+        event_push_actions_done = progress.get("event_push_actions_done", False)
+
+        def add_thread_id_txn(
+            txn: LoggingTransaction, start_stream_ordering: int
+        ) -> int:
+            sql = """
+            SELECT stream_ordering
+            FROM event_push_actions
+            WHERE
+                thread_id IS NULL
+                AND stream_ordering > ?
+            ORDER BY stream_ordering
+            LIMIT ?
+            """
+            txn.execute(sql, (start_stream_ordering, batch_size))
+
+            # No more rows to process.
+            rows = txn.fetchall()
+            if not rows:
+                progress["event_push_actions_done"] = True
+                self.db_pool.updates._background_update_progress_txn(
+                    txn, "event_push_backfill_thread_id", progress
+                )
+                return 0
+
+            # Update the thread ID for any of those rows.
+            max_stream_ordering = rows[-1][0]
+
+            sql = """
+            UPDATE event_push_actions
+            SET thread_id = 'main'
+            WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
+            """
+            txn.execute(
+                sql,
+                (
+                    start_stream_ordering,
+                    max_stream_ordering,
+                ),
+            )
+
+            # Update progress.
+            processed_rows = txn.rowcount
+            progress["max_event_push_actions_stream_ordering"] = max_stream_ordering
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "event_push_backfill_thread_id", progress
+            )
+
+            return processed_rows
+
+        def add_thread_id_summary_txn(txn: LoggingTransaction) -> int:
+            min_user_id = progress.get("max_summary_user_id", "")
+            min_room_id = progress.get("max_summary_room_id", "")
+
+            # Slightly overcomplicated query for getting the Nth user ID / room
+            # ID tuple, or the last if there are less than N remaining.
+            sql = """
+            SELECT user_id, room_id FROM (
+                SELECT user_id, room_id FROM event_push_summary
+                WHERE (user_id, room_id) > (?, ?)
+                    AND thread_id IS NULL
+                ORDER BY user_id, room_id
+                LIMIT ?
+            ) AS e
+            ORDER BY user_id DESC, room_id DESC
+            LIMIT 1
+            """
+
+            txn.execute(sql, (min_user_id, min_room_id, batch_size))
+            row = txn.fetchone()
+            if not row:
+                return 0
+
+            max_user_id, max_room_id = row
+
+            sql = """
+            UPDATE event_push_summary
+            SET thread_id = 'main'
+            WHERE
+                (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?)
+                AND thread_id IS NULL
+            """
+            txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id))
+            processed_rows = txn.rowcount
+
+            progress["max_summary_user_id"] = max_user_id
+            progress["max_summary_room_id"] = max_room_id
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "event_push_backfill_thread_id", progress
+            )
+
+            return processed_rows
+
+        # First update the event_push_actions table, then the event_push_summary table.
+        #
+        # Note that the event_push_actions_staging table is ignored since it is
+        # assumed that items in that table will only exist for a short period of
+        # time.
+        if not event_push_actions_done:
+            result = await self.db_pool.runInteraction(
+                "event_push_backfill_thread_id",
+                add_thread_id_txn,
+                progress.get("max_event_push_actions_stream_ordering", 0),
+            )
+        else:
+            result = await self.db_pool.runInteraction(
+                "event_push_backfill_thread_id",
+                add_thread_id_summary_txn,
+            )
+
+            # Only done after the event_push_summary table is done.
+            if not result:
+                await self.db_pool.updates._end_background_update(
+                    "event_push_backfill_thread_id"
+                )
+
+        return result
+
     async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, int]:
         """Get the notification count by room for a user. Only considers notifications,
         not highlight or unread counts, and threads are currently aggregated under their room.
@@ -536,6 +711,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
 
+        # First ensure that the existing rows have an updated thread_id field.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute(
+                """
+                UPDATE event_push_summary
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                (MAIN_TIMELINE, room_id, user_id),
+            )
+            txn.execute(
+                """
+                UPDATE event_push_actions
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                (MAIN_TIMELINE, room_id, user_id),
+            )
+
         # First we pull the counts from the summary table.
         #
         # We check that `last_receipt_stream_ordering` matches the stream ordering of the
@@ -1351,6 +1545,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 (room_id, user_id, stream_ordering, *thread_args),
             )
 
+            # First ensure that the existing rows have an updated thread_id field.
+            if not self._event_push_backfill_thread_id_done:
+                txn.execute(
+                    """
+                    UPDATE event_push_summary
+                    SET thread_id = ?
+                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                    """,
+                    (MAIN_TIMELINE, room_id, user_id),
+                )
+                txn.execute(
+                    """
+                    UPDATE event_push_actions
+                    SET thread_id = ?
+                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                    """,
+                    (MAIN_TIMELINE, room_id, user_id),
+                )
+
             # Fetch the notification counts between the stream ordering of the
             # latest receipt and what was previously summarised.
             unread_counts = self._get_notif_unread_count_for_user_room(
@@ -1485,6 +1698,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             rotate_to_stream_ordering: The new maximum event stream ordering to summarise.
         """
 
+        # Ensure that any new actions have an updated thread_id.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute(
+                """
+                UPDATE event_push_actions
+                SET thread_id = ?
+                WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
+                """,
+                (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering),
+            )
+
+        # XXX Do we need to update summaries here too?
+
         # Calculate the new counts that should be upserted into event_push_summary
         sql = """
             SELECT user_id, room_id, thread_id,
@@ -1547,6 +1773,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
+        # Ensure that any updated threads have the proper thread_id.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute_batch(
+                """
+                UPDATE event_push_summary
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                [
+                    (MAIN_TIMELINE, room_id, user_id)
+                    for user_id, room_id, _ in summaries
+                ],
+            )
+
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 72bbb3a7c2..d3103a6c7a 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -95,9 +95,9 @@ Changes in SCHEMA_VERSION = 74:
 
 
 SCHEMA_COMPAT_VERSION = (
-    # The threads_id column must written to with non-null values event_push_actions,
-    # event_push_actions_staging, and event_push_summary.
-    74
+    # The threads_id column must exist for event_push_actions, event_push_summary,
+    # receipts_linearized, and receipts_graph.
+    73
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/74/02thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/74/02thread_notifications_backfill.sql
deleted file mode 100644
index ce6f9ff937..0000000000
--- a/synapse/storage/schema/main/delta/74/02thread_notifications_backfill.sql
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Force the background updates from 06thread_notifications.sql to run in the
--- foreground as code will now require those to be "done".
-
-DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id';
-
--- Overwrite any null thread_id values.
-UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL;
-UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
-UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
-
--- Drop the background updates to calculate the indexes used to find null thread_ids.
-DELETE FROM background_updates WHERE update_name = 'event_push_actions_thread_id_null';
-DELETE FROM background_updates WHERE update_name = 'event_push_summary_thread_id_null';
diff --git a/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.postgres
deleted file mode 100644
index 5f68667425..0000000000
--- a/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.postgres
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright 2022 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Drop the indexes used to find null thread_ids.
-DROP INDEX IF EXISTS event_push_actions_thread_id_null;
-DROP INDEX IF EXISTS event_push_summary_thread_id_null;
-
--- The thread_id columns can now be made non-nullable.
-ALTER TABLE event_push_actions_staging ALTER COLUMN thread_id SET NOT NULL;
-ALTER TABLE event_push_actions ALTER COLUMN thread_id SET NOT NULL;
-ALTER TABLE event_push_summary ALTER COLUMN thread_id SET NOT NULL;
diff --git a/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.sqlite
deleted file mode 100644
index f46b233560..0000000000
--- a/synapse/storage/schema/main/delta/74/03thread_notifications_not_null.sql.sqlite
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Copyright 2022 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- -- The thread_id columns can now be made non-nullable.
---
--- SQLite doesn't support modifying columns to an existing table, so it must
--- be recreated.
-
--- Create the new tables.
-CREATE TABLE event_push_actions_staging_new (
-    event_id TEXT NOT NULL,
-    user_id TEXT NOT NULL,
-    actions TEXT NOT NULL,
-    notif SMALLINT NOT NULL,
-    highlight SMALLINT NOT NULL,
-    unread SMALLINT,
-    thread_id TEXT NOT NULL,
-    inserted_ts BIGINT
-);
-
-CREATE TABLE event_push_actions_new (
-    room_id TEXT NOT NULL,
-    event_id TEXT NOT NULL,
-    user_id TEXT NOT NULL,
-    profile_tag VARCHAR(32),
-    actions TEXT NOT NULL,
-    topological_ordering BIGINT,
-    stream_ordering BIGINT,
-    notif SMALLINT,
-    highlight SMALLINT,
-    unread SMALLINT,
-    thread_id TEXT NOT NULL,
-    CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag)
-);
-
-CREATE TABLE event_push_summary_new (
-    user_id TEXT NOT NULL,
-    room_id TEXT NOT NULL,
-    notif_count BIGINT NOT NULL,
-    stream_ordering BIGINT NOT NULL,
-    unread_count BIGINT,
-    last_receipt_stream_ordering BIGINT,
-    thread_id TEXT NOT NULL
-);
-
--- Copy the data.
-INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts)
-    SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts
-    FROM event_push_actions_staging;
-
-INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id)
-    SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id
-    FROM event_push_actions;
-
-INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id)
-    SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id
-    FROM event_push_summary;
-
--- Drop the old tables.
-DROP TABLE event_push_actions_staging;
-DROP TABLE event_push_actions;
-DROP TABLE event_push_summary;
-
--- Rename the tables.
-ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging;
-ALTER TABLE event_push_actions_new RENAME TO event_push_actions;
-ALTER TABLE event_push_summary_new RENAME TO event_push_summary;
-
--- Recreate the indexes.
-CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id);
-
-CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering);
-CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering );
-CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
-CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id );
-CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering);
-
-CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary (user_id, room_id, thread_id) ;
-
--- Recreate some indexes in the background, by re-running the background updates
--- from 72/02event_push_actions_index.sql and 72/06thread_notifications.sql.
-INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
-  (7403, 'event_push_summary_unique_index2', '{}')
-  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
-INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
-  (7403, 'event_push_actions_stream_highlight_index', '{}')
-  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
-- 
cgit 1.5.1


From 6204c3663eabec57e897e7e75180b959a936e1fe Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 31 Mar 2023 13:51:51 +0100
Subject: Revert pruning of old devices (#15360)

* Revert "Fix registering a device on an account with lots of devices (#15348)"

This reverts commit f0d8f66eaaacfa75bed65bc5d0c602fbc5339c85.

* Revert "Delete stale non-e2e devices for users, take 3 (#15183)"

This reverts commit 78cdb72cd6b0e007c314d9fed9f629dfc5b937a6.
---
 changelog.d/15183.misc                    |  1 -
 changelog.d/15348.misc                    |  1 -
 synapse/handlers/device.py                |  2 +-
 synapse/handlers/register.py              | 52 +------------------
 synapse/storage/databases/main/devices.py | 83 +------------------------------
 tests/handlers/test_admin.py              |  2 +-
 tests/handlers/test_device.py             |  2 +-
 tests/rest/client/test_register.py        | 47 -----------------
 tests/storage/test_client_ips.py          |  4 +-
 9 files changed, 7 insertions(+), 187 deletions(-)
 delete mode 100644 changelog.d/15183.misc
 delete mode 100644 changelog.d/15348.misc

(limited to 'synapse')

diff --git a/changelog.d/15183.misc b/changelog.d/15183.misc
deleted file mode 100644
index f9bfc581ad..0000000000
--- a/changelog.d/15183.misc
+++ /dev/null
@@ -1 +0,0 @@
-Prune user's old devices on login if they have too many.
diff --git a/changelog.d/15348.misc b/changelog.d/15348.misc
deleted file mode 100644
index f9bfc581ad..0000000000
--- a/changelog.d/15348.misc
+++ /dev/null
@@ -1 +0,0 @@
-Prune user's old devices on login if they have too many.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 0fc165a8d6..9ded6389ac 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -485,7 +485,7 @@ class DeviceHandler(DeviceWorkerHandler):
             device_ids = [d for d in device_ids if d != except_device_id]
         await self.delete_devices(user_id, device_ids)
 
-    async def delete_devices(self, user_id: str, device_ids: StrCollection) -> None:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Delete several devices
 
         Args:
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 7e9d065f50..c8bf2439af 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -16,7 +16,7 @@
 """Contains functions for registering clients."""
 
 import logging
-from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple
 
 from prometheus_client import Counter
 from typing_extensions import TypedDict
@@ -40,7 +40,6 @@ from synapse.appservice import ApplicationService
 from synapse.config.server import is_threepid_reserved
 from synapse.handlers.device import DeviceHandler
 from synapse.http.servlet import assert_params_in_dict
-from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.login import RegisterDeviceReplicationServlet
 from synapse.replication.http.register import (
     ReplicationPostRegisterActionsServlet,
@@ -49,7 +48,6 @@ from synapse.replication.http.register import (
 from synapse.spam_checker_api import RegistrationBehaviour
 from synapse.types import RoomAlias, UserID, create_requester
 from synapse.types.state import StateFilter
-from synapse.util.iterutils import batch_iter
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -112,10 +110,6 @@ class RegistrationHandler:
         self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
         self._server_name = hs.hostname
 
-        # The set of users that we're currently pruning devices for. Ensures
-        # that we don't have two such jobs for the same user running at once.
-        self._currently_pruning_devices_for_users: Set[str] = set()
-
         self.spam_checker = hs.get_spam_checker()
 
         if hs.config.worker.worker_app:
@@ -127,10 +121,7 @@ class RegistrationHandler:
                 ReplicationPostRegisterActionsServlet.make_client(hs)
             )
         else:
-            device_handler = hs.get_device_handler()
-            assert isinstance(device_handler, DeviceHandler)
-            self.device_handler = device_handler
-
+            self.device_handler = hs.get_device_handler()
             self._register_device_client = self.register_device_inner
             self.pusher_pool = hs.get_pusherpool()
 
@@ -860,9 +851,6 @@ class RegistrationHandler:
         # This can only run on the main process.
         assert isinstance(self.device_handler, DeviceHandler)
 
-        # Prune the user's device list if they already have a lot of devices.
-        await self._maybe_prune_too_many_devices(user_id)
-
         registered_device_id = await self.device_handler.check_device_registered(
             user_id,
             device_id,
@@ -931,42 +919,6 @@ class RegistrationHandler:
             "refresh_token": refresh_token,
         }
 
-    async def _maybe_prune_too_many_devices(self, user_id: str) -> None:
-        """Delete any excess old devices this user may have."""
-
-        if user_id in self._currently_pruning_devices_for_users:
-            return
-
-        # We also cap the number of users whose devices we prune at the same
-        # time, to avoid performance problems.
-        if len(self._currently_pruning_devices_for_users) > 5:
-            return
-
-        device_ids = await self.store.check_too_many_devices_for_user(user_id)
-        if not device_ids:
-            return
-
-        logger.info("Pruning %d stale devices for %s", len(device_ids), user_id)
-
-        # Now spawn a background loop that deletes said devices.
-        async def _prune_too_many_devices_loop() -> None:
-            if user_id in self._currently_pruning_devices_for_users:
-                return
-
-            self._currently_pruning_devices_for_users.add(user_id)
-
-            try:
-                for batch in batch_iter(device_ids, 10):
-                    await self.device_handler.delete_devices(user_id, batch)
-
-                    await self.clock.sleep(60)
-            finally:
-                self._currently_pruning_devices_for_users.discard(user_id)
-
-        run_as_background_process(
-            "_prune_too_many_devices_loop", _prune_too_many_devices_loop
-        )
-
     async def post_registration_actions(
         self, user_id: str, auth_result: dict, access_token: Optional[str]
     ) -> None:
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index f61b7bc96e..5503621ad6 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1599,76 +1599,6 @@ class DeviceBackgroundUpdateStore(SQLBaseStore):
 
         return rows
 
-    async def check_too_many_devices_for_user(self, user_id: str) -> List[str]:
-        """Check if the user has a lot of devices, and if so return the set of
-        devices we can prune.
-
-        This does *not* return hidden devices or devices with E2E keys.
-        """
-
-        num_devices = await self.db_pool.simple_select_one_onecol(
-            table="devices",
-            keyvalues={"user_id": user_id, "hidden": False},
-            retcol="COALESCE(COUNT(*), 0)",
-            desc="count_devices",
-        )
-
-        # We let users have up to ten devices without pruning.
-        if num_devices <= 10:
-            return []
-
-        # We always prune devices not seen in the last 14 days...
-        max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000
-
-        # ... but we also cap the maximum number of devices the user can have to
-        # 50.
-        if num_devices > 50:
-            # Choose a last seen that ensures we keep at most 50 devices.
-            sql = """
-                SELECT last_seen FROM devices
-                LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-                WHERE
-                    user_id = ?
-                    AND NOT hidden
-                    AND last_seen IS NOT NULL
-                    AND key_json IS NULL
-                ORDER BY last_seen DESC
-                LIMIT 1
-                OFFSET 50
-            """
-
-            rows = await self.db_pool.execute(
-                "check_too_many_devices_for_user_last_seen",
-                None,
-                sql,
-                user_id,
-            )
-            if rows:
-                max_last_seen = max(rows[0][0], max_last_seen)
-
-        # Fetch the devices to delete.
-        sql = """
-            SELECT device_id FROM devices
-            LEFT JOIN e2e_device_keys_json USING (user_id, device_id)
-            WHERE
-                user_id = ?
-                AND NOT hidden
-                AND last_seen <= ?
-                AND key_json IS NULL
-            ORDER BY last_seen
-        """
-
-        def check_too_many_devices_for_user_txn(
-            txn: LoggingTransaction,
-        ) -> List[str]:
-            txn.execute(sql, (user_id, max_last_seen))
-            return [device_id for device_id, in txn]
-
-        return await self.db_pool.runInteraction(
-            "check_too_many_devices_for_user",
-            check_too_many_devices_for_user_txn,
-        )
-
 
 class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
     # Because we have write access, this will be a StreamIdGenerator
@@ -1727,7 +1657,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 values={},
                 insertion_values={
                     "display_name": initial_device_display_name,
-                    "last_seen": self._clock.time_msec(),
                     "hidden": False,
                 },
                 desc="store_device",
@@ -1773,15 +1702,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             )
             raise StoreError(500, "Problem storing device.")
 
-    @cached(max_entries=0)
-    async def delete_device(self, user_id: str, device_id: str) -> None:
-        raise NotImplementedError()
-
-    # Note: sometimes deleting rows out of `device_inbox` can take a long time,
-    # so we use a cache so that we deduplicate in flight requests to delete
-    # devices.
-    @cachedList(cached_method_name="delete_device", list_name="device_ids")
-    async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict:
+    async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
         """Deletes several devices.
 
         Args:
@@ -1818,8 +1739,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
-        return {}
-
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py
index f0ba3775c8..5569ccef8a 100644
--- a/tests/handlers/test_admin.py
+++ b/tests/handlers/test_admin.py
@@ -272,7 +272,7 @@ class ExfiltrateData(unittest.HomeserverTestCase):
         self.assertIn("device_id", args[0][0])
         self.assertIsNone(args[0][0]["display_name"])
         self.assertIsNone(args[0][0]["last_seen_user_agent"])
-        self.assertEqual(args[0][0]["last_seen_ts"], 600)
+        self.assertIsNone(args[0][0]["last_seen_ts"])
         self.assertIsNone(args[0][0]["last_seen_ip"])
 
     def test_connections(self) -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index a456bffd63..ce7525e29c 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
-                "last_seen_ts": 1000000,
+                "last_seen_ts": None,
             },
             device_map["xyz"],
         )
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index 7ae84e3139..b228dba861 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -794,53 +794,6 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"]
         )
 
-    def test_check_stale_devices_get_pruned(self) -> None:
-        """Check that if a user has some stale devices we log them out when they
-        log in a new device."""
-
-        # Register some devices, but not too many that we go over the threshold
-        # where we prune more aggressively.
-        user_id = self.register_user("user", "pass")
-        for _ in range(0, 50):
-            self.login(user_id, "pass")
-
-        store = self.hs.get_datastores().main
-
-        res = self.get_success(store.get_devices_by_user(user_id))
-        self.assertEqual(len(res), 50)
-
-        # Advance time so that the above devices are considered "old".
-        self.reactor.advance(30 * 24 * 60 * 60 * 1000)
-
-        self.login(user_id, "pass")
-
-        self.reactor.pump([60] * 10)  # Ensure background job runs
-
-        # We expect all old devices to have been logged out
-        res = self.get_success(store.get_devices_by_user(user_id))
-        self.assertEqual(len(res), 1)
-
-    def test_check_recent_devices_get_pruned(self) -> None:
-        """Check that if a user has many devices we log out the last oldest
-        ones.
-
-        Note: this is similar to above, except if we lots of devices we prune
-        devices even if they're not old.
-        """
-
-        # Register a lot of devices in a short amount of time
-        user_id = self.register_user("user", "pass")
-        for _ in range(0, 100):
-            self.login(user_id, "pass")
-            self.reactor.advance(100)
-
-        store = self.hs.get_datastores().main
-
-        # We keep up to 50 devices that have been used in the last week, plus
-        # the device that was last logged in.
-        res = self.get_success(store.get_devices_by_user(user_id))
-        self.assertEqual(len(res), 51)
-
 
 class AccountValidityTestCase(unittest.HomeserverTestCase):
     servlets = [
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index f989986538..cd0079871c 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -170,8 +170,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        last_seen = self.clock.time_msec()
-
         if after_persisting:
             # Trigger the storage loop
             self.reactor.advance(10)
@@ -192,7 +190,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
                         "device_id": device_id,
                         "ip": None,
                         "user_agent": None,
-                        "last_seen": last_seen,
+                        "last_seen": None,
                     },
                 ],
             )
-- 
cgit 1.5.1


From 157092d97ad14f79878ae90009b9c60489679a8f Mon Sep 17 00:00:00 2001
From: Jason Robinson <jasonr@element.io>
Date: Fri, 31 Mar 2023 20:20:40 +0300
Subject: Fix copyright year in SSO footer template (#15358)

---
 changelog.d/15358.misc                | 1 +
 synapse/res/templates/sso_footer.html | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15358.misc

(limited to 'synapse')

diff --git a/changelog.d/15358.misc b/changelog.d/15358.misc
new file mode 100644
index 0000000000..daf261a9f1
--- /dev/null
+++ b/changelog.d/15358.misc
@@ -0,0 +1 @@
+Fix copyright year in SSO footer template.
diff --git a/synapse/res/templates/sso_footer.html b/synapse/res/templates/sso_footer.html
index b46e0d83fe..fdcb206c32 100644
--- a/synapse/res/templates/sso_footer.html
+++ b/synapse/res/templates/sso_footer.html
@@ -15,5 +15,5 @@
               </g>
           </g>
       </svg>
-      <p>An open network for secure, decentralized communication.<br>© 2022 The Matrix.org Foundation C.I.C.</p>
-</footer>
\ No newline at end of file
+      <p>An open network for secure, decentralized communication.<br>© 2023 The Matrix.org Foundation C.I.C.</p>
+</footer>
-- 
cgit 1.5.1


From 56efa9b1672abcdf1f42bebcc3c7dd0b4fa40067 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Mon, 3 Apr 2023 04:27:51 -0500
Subject: Experimental Unix socket support (#15353)

* Add IReactorUNIX to ISynapseReactor type hint.

* Create listen_unix().

Two options, 'path' to the file and 'mode' of permissions(not umask, recommend 666 as default as
nginx/other reverse proxies write to it and it's setup as user www-data)

For the moment, leave the option to always create a PID lockfile turned on by default

* Create UnixListenerConfig and wire it up.

Rename ListenerConfig to TCPListenerConfig, then Union them together into ListenerConfig.
This spidered around a bit, but I think I got it all. Metrics and manhole have been placed
behind a conditional in case of accidental putting them onto a unix socket.

Use new helpers to get if a listener is configured for TLS, and to help create a site tag
for logging.

There are 2 TODO things in parse_listener_def() to finish up at a later point.

* Refactor SynapseRequest to handle logging correctly when using a unix socket.

This prevents an exception when an IP address can not be retrieved for a request.

* Make the 'Synapse now listening on Unix socket' log line a little prettier.

* No silent failures on generic workers when trying to use a unix socket with metrics or manhole.

* Inline variables in app/_base.py

* Update docstring for listen_unix() to remove reference to a hardcoded permission of 0o666 and add a few comments saying where the default IS declared.

* Disallow both a unix socket and a ip/port combo on the same listener resource

* Linting

* Changelog

* review: simplify how listen_unix returns(and get rid of a type: ignore)

* review: fix typo from ConfigError in app/homeserver.py

* review: roll conditional for http_options.tag into get_site_tag() helper(and add docstring)

* review: enhance the conditionals for checking if a port or path is valid, remove a TODO line

* review: Try updating comment in get_client_ip_if_available to clarify what is being retrieved and why

* Pretty up how 'Synapse now listening on Unix Socket' looks by decoding the byte string.

* review: In parse_listener_def(), raise ConfigError if neither socket_path nor port is declared(and fix a typo)
---
 changelog.d/15353.misc        |   1 +
 synapse/app/_base.py          |  92 ++++++++++++++++++++++----------
 synapse/app/generic_worker.py |  34 ++++++++----
 synapse/app/homeserver.py     |  42 +++++++++------
 synapse/config/server.py      | 118 ++++++++++++++++++++++++++++++++----------
 synapse/config/workers.py     |  13 +++--
 synapse/http/site.py          |  27 ++++++++--
 synapse/types/__init__.py     |   2 +
 8 files changed, 239 insertions(+), 90 deletions(-)
 create mode 100644 changelog.d/15353.misc

(limited to 'synapse')

diff --git a/changelog.d/15353.misc b/changelog.d/15353.misc
new file mode 100644
index 0000000000..23927fea8f
--- /dev/null
+++ b/changelog.d/15353.misc
@@ -0,0 +1 @@
+Add experimental support for Unix sockets. Contributed by Jason Little.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 28062dd69d..f7b866978c 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -41,7 +41,12 @@ from typing_extensions import ParamSpec
 
 import twisted
 from twisted.internet import defer, error, reactor as _reactor
-from twisted.internet.interfaces import IOpenSSLContextFactory, IReactorSSL, IReactorTCP
+from twisted.internet.interfaces import (
+    IOpenSSLContextFactory,
+    IReactorSSL,
+    IReactorTCP,
+    IReactorUNIX,
+)
 from twisted.internet.protocol import ServerFactory
 from twisted.internet.tcp import Port
 from twisted.logger import LoggingFile, LogLevel
@@ -56,7 +61,7 @@ from synapse.app.phone_stats_home import start_phone_stats_home
 from synapse.config import ConfigError
 from synapse.config._base import format_config_error
 from synapse.config.homeserver import HomeServerConfig
-from synapse.config.server import ListenerConfig, ManholeConfig
+from synapse.config.server import ListenerConfig, ManholeConfig, TCPListenerConfig
 from synapse.crypto import context_factory
 from synapse.events.presence_router import load_legacy_presence_router
 from synapse.events.spamcheck import load_legacy_spam_checkers
@@ -351,6 +356,28 @@ def listen_tcp(
     return r  # type: ignore[return-value]
 
 
+def listen_unix(
+    path: str,
+    mode: int,
+    factory: ServerFactory,
+    reactor: IReactorUNIX = reactor,
+    backlog: int = 50,
+) -> List[Port]:
+    """
+    Create a UNIX socket for a given path and 'mode' permission
+
+    Returns:
+        list of twisted.internet.tcp.Port listening for TCP connections
+    """
+    wantPID = True
+
+    return [
+        # IReactorUNIX returns an object implementing IListeningPort from listenUNIX,
+        # but we know it will be a Port instance.
+        cast(Port, reactor.listenUNIX(path, factory, backlog, mode, wantPID))
+    ]
+
+
 def listen_http(
     listener_config: ListenerConfig,
     root_resource: Resource,
@@ -359,18 +386,13 @@ def listen_http(
     context_factory: Optional[IOpenSSLContextFactory],
     reactor: ISynapseReactor = reactor,
 ) -> List[Port]:
-    port = listener_config.port
-    bind_addresses = listener_config.bind_addresses
-    tls = listener_config.tls
-
     assert listener_config.http_options is not None
 
-    site_tag = listener_config.http_options.tag
-    if site_tag is None:
-        site_tag = str(port)
+    site_tag = listener_config.get_site_tag()
 
     site = SynapseSite(
-        "synapse.access.%s.%s" % ("https" if tls else "http", site_tag),
+        "synapse.access.%s.%s"
+        % ("https" if listener_config.is_tls() else "http", site_tag),
         site_tag,
         listener_config,
         root_resource,
@@ -378,25 +400,41 @@ def listen_http(
         max_request_body_size=max_request_body_size,
         reactor=reactor,
     )
-    if tls:
-        # refresh_certificate should have been called before this.
-        assert context_factory is not None
-        ports = listen_ssl(
-            bind_addresses,
-            port,
-            site,
-            context_factory,
-            reactor=reactor,
-        )
-        logger.info("Synapse now listening on TCP port %d (TLS)", port)
+
+    if isinstance(listener_config, TCPListenerConfig):
+        if listener_config.is_tls():
+            # refresh_certificate should have been called before this.
+            assert context_factory is not None
+            ports = listen_ssl(
+                listener_config.bind_addresses,
+                listener_config.port,
+                site,
+                context_factory,
+                reactor=reactor,
+            )
+            logger.info(
+                "Synapse now listening on TCP port %d (TLS)", listener_config.port
+            )
+        else:
+            ports = listen_tcp(
+                listener_config.bind_addresses,
+                listener_config.port,
+                site,
+                reactor=reactor,
+            )
+            logger.info("Synapse now listening on TCP port %d", listener_config.port)
+
     else:
-        ports = listen_tcp(
-            bind_addresses,
-            port,
-            site,
-            reactor=reactor,
+        ports = listen_unix(
+            listener_config.path, listener_config.mode, site, reactor=reactor
         )
-        logger.info("Synapse now listening on TCP port %d", port)
+        # getHost() returns a UNIXAddress which contains an instance variable of 'name'
+        # encoded as a byte string. Decode as utf-8 so pretty.
+        logger.info(
+            "Synapse now listening on Unix Socket at: "
+            f"{ports[0].getHost().name.decode('utf-8')}"
+        )
+
     return ports
 
 
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 0dec24369a..e17ce35b8e 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -38,7 +38,7 @@ from synapse.app._base import (
 from synapse.config._base import ConfigError
 from synapse.config.homeserver import HomeServerConfig
 from synapse.config.logger import setup_logging
-from synapse.config.server import ListenerConfig
+from synapse.config.server import ListenerConfig, TCPListenerConfig
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.http.server import JsonResource, OptionsResource
 from synapse.logging.context import LoggingContext
@@ -236,12 +236,18 @@ class GenericWorkerServer(HomeServer):
             if listener.type == "http":
                 self._listen_http(listener)
             elif listener.type == "manhole":
-                _base.listen_manhole(
-                    listener.bind_addresses,
-                    listener.port,
-                    manhole_settings=self.config.server.manhole_settings,
-                    manhole_globals={"hs": self},
-                )
+                if isinstance(listener, TCPListenerConfig):
+                    _base.listen_manhole(
+                        listener.bind_addresses,
+                        listener.port,
+                        manhole_settings=self.config.server.manhole_settings,
+                        manhole_globals={"hs": self},
+                    )
+                else:
+                    raise ConfigError(
+                        "Can not using a unix socket for manhole at this time."
+                    )
+
             elif listener.type == "metrics":
                 if not self.config.metrics.enable_metrics:
                     logger.warning(
@@ -249,10 +255,16 @@ class GenericWorkerServer(HomeServer):
                         "enable_metrics is not True!"
                     )
                 else:
-                    _base.listen_metrics(
-                        listener.bind_addresses,
-                        listener.port,
-                    )
+                    if isinstance(listener, TCPListenerConfig):
+                        _base.listen_metrics(
+                            listener.bind_addresses,
+                            listener.port,
+                        )
+                    else:
+                        raise ConfigError(
+                            "Can not use a unix socket for metrics at this time."
+                        )
+
             else:
                 logger.warning("Unsupported listener type: %s", listener.type)
 
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index b8830b1a9c..84236ac299 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -44,7 +44,7 @@ from synapse.app._base import (
 )
 from synapse.config._base import ConfigError, format_config_error
 from synapse.config.homeserver import HomeServerConfig
-from synapse.config.server import ListenerConfig
+from synapse.config.server import ListenerConfig, TCPListenerConfig
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.http.additional_resource import AdditionalResource
 from synapse.http.server import (
@@ -78,14 +78,13 @@ class SynapseHomeServer(HomeServer):
     DATASTORE_CLASS = DataStore  # type: ignore
 
     def _listener_http(
-        self, config: HomeServerConfig, listener_config: ListenerConfig
+        self,
+        config: HomeServerConfig,
+        listener_config: ListenerConfig,
     ) -> Iterable[Port]:
-        port = listener_config.port
         # Must exist since this is an HTTP listener.
         assert listener_config.http_options is not None
-        site_tag = listener_config.http_options.tag
-        if site_tag is None:
-            site_tag = str(port)
+        site_tag = listener_config.get_site_tag()
 
         # We always include a health resource.
         resources: Dict[str, Resource] = {"/health": HealthResource()}
@@ -252,12 +251,17 @@ class SynapseHomeServer(HomeServer):
                     self._listener_http(self.config, listener)
                 )
             elif listener.type == "manhole":
-                _base.listen_manhole(
-                    listener.bind_addresses,
-                    listener.port,
-                    manhole_settings=self.config.server.manhole_settings,
-                    manhole_globals={"hs": self},
-                )
+                if isinstance(listener, TCPListenerConfig):
+                    _base.listen_manhole(
+                        listener.bind_addresses,
+                        listener.port,
+                        manhole_settings=self.config.server.manhole_settings,
+                        manhole_globals={"hs": self},
+                    )
+                else:
+                    raise ConfigError(
+                        "Can not use a unix socket for manhole at this time."
+                    )
             elif listener.type == "metrics":
                 if not self.config.metrics.enable_metrics:
                     logger.warning(
@@ -265,10 +269,16 @@ class SynapseHomeServer(HomeServer):
                         "enable_metrics is not True!"
                     )
                 else:
-                    _base.listen_metrics(
-                        listener.bind_addresses,
-                        listener.port,
-                    )
+                    if isinstance(listener, TCPListenerConfig):
+                        _base.listen_metrics(
+                            listener.bind_addresses,
+                            listener.port,
+                        )
+                    else:
+                        raise ConfigError(
+                            "Can not use a unix socket for metrics at this time."
+                        )
+
             else:
                 # this shouldn't happen, as the listener type should have been checked
                 # during parsing
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 0e46b849cf..386c3194b8 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -214,17 +214,52 @@ class HttpListenerConfig:
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
-class ListenerConfig:
-    """Object describing the configuration of a single listener."""
+class TCPListenerConfig:
+    """Object describing the configuration of a single TCP listener."""
 
     port: int = attr.ib(validator=attr.validators.instance_of(int))
-    bind_addresses: List[str]
+    bind_addresses: List[str] = attr.ib(validator=attr.validators.instance_of(List))
     type: str = attr.ib(validator=attr.validators.in_(KNOWN_LISTENER_TYPES))
     tls: bool = False
 
     # http_options is only populated if type=http
     http_options: Optional[HttpListenerConfig] = None
 
+    def get_site_tag(self) -> str:
+        """Retrieves http_options.tag if it exists, otherwise the port number."""
+        if self.http_options and self.http_options.tag is not None:
+            return self.http_options.tag
+        else:
+            return str(self.port)
+
+    def is_tls(self) -> bool:
+        return self.tls
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class UnixListenerConfig:
+    """Object describing the configuration of a single Unix socket listener."""
+
+    # Note: unix sockets can not be tls encrypted, so HAVE to be behind a tls-handling
+    # reverse proxy
+    path: str = attr.ib()
+    # A default(0o666) for this is set in parse_listener_def() below
+    mode: int
+    type: str = attr.ib(validator=attr.validators.in_(KNOWN_LISTENER_TYPES))
+
+    # http_options is only populated if type=http
+    http_options: Optional[HttpListenerConfig] = None
+
+    def get_site_tag(self) -> str:
+        return "unix"
+
+    def is_tls(self) -> bool:
+        """Unix sockets can't have TLS"""
+        return False
+
+
+ListenerConfig = Union[TCPListenerConfig, UnixListenerConfig]
+
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class ManholeConfig:
@@ -531,12 +566,12 @@ class ServerConfig(Config):
 
         self.listeners = [parse_listener_def(i, x) for i, x in enumerate(listeners)]
 
-        # no_tls is not really supported any more, but let's grandfather it in
-        # here.
+        # no_tls is not really supported anymore, but let's grandfather it in here.
         if config.get("no_tls", False):
             l2 = []
             for listener in self.listeners:
-                if listener.tls:
+                if isinstance(listener, TCPListenerConfig) and listener.tls:
+                    # Use isinstance() as the assertion this *has* a listener.port
                     logger.info(
                         "Ignoring TLS-enabled listener on port %i due to no_tls",
                         listener.port,
@@ -577,7 +612,7 @@ class ServerConfig(Config):
             )
 
             self.listeners.append(
-                ListenerConfig(
+                TCPListenerConfig(
                     port=bind_port,
                     bind_addresses=[bind_host],
                     tls=True,
@@ -589,7 +624,7 @@ class ServerConfig(Config):
             unsecure_port = config.get("unsecure_port", bind_port - 400)
             if unsecure_port:
                 self.listeners.append(
-                    ListenerConfig(
+                    TCPListenerConfig(
                         port=unsecure_port,
                         bind_addresses=[bind_host],
                         tls=False,
@@ -601,7 +636,7 @@ class ServerConfig(Config):
         manhole = config.get("manhole")
         if manhole:
             self.listeners.append(
-                ListenerConfig(
+                TCPListenerConfig(
                     port=manhole,
                     bind_addresses=["127.0.0.1"],
                     type="manhole",
@@ -648,7 +683,7 @@ class ServerConfig(Config):
             logger.warning(METRICS_PORT_WARNING)
 
             self.listeners.append(
-                ListenerConfig(
+                TCPListenerConfig(
                     port=metrics_port,
                     bind_addresses=[config.get("metrics_bind_host", "127.0.0.1")],
                     type="http",
@@ -724,7 +759,7 @@ class ServerConfig(Config):
             self.delete_stale_devices_after = None
 
     def has_tls_listener(self) -> bool:
-        return any(listener.tls for listener in self.listeners)
+        return any(listener.is_tls() for listener in self.listeners)
 
     def generate_config_section(
         self,
@@ -904,25 +939,25 @@ def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
         raise ConfigError(DIRECT_TCP_ERROR, ("listeners", str(num), "type"))
 
     port = listener.get("port")
-    if type(port) is not int:
+    socket_path = listener.get("path")
+    # Either a port or a path should be declared at a minimum. Using both would be bad.
+    if port is not None and not isinstance(port, int):
         raise ConfigError("Listener configuration is lacking a valid 'port' option")
+    if socket_path is not None and not isinstance(socket_path, str):
+        raise ConfigError("Listener configuration is lacking a valid 'path' option")
+    if port and socket_path:
+        raise ConfigError(
+            "Can not have both a UNIX socket and an IP/port declared for the same "
+            "resource!"
+        )
+    if port is None and socket_path is None:
+        raise ConfigError(
+            "Must have either a UNIX socket or an IP/port declared for a given "
+            "resource!"
+        )
 
     tls = listener.get("tls", False)
 
-    bind_addresses = listener.get("bind_addresses", [])
-    bind_address = listener.get("bind_address")
-    # if bind_address was specified, add it to the list of addresses
-    if bind_address:
-        bind_addresses.append(bind_address)
-
-    # if we still have an empty list of addresses, use the default list
-    if not bind_addresses:
-        if listener_type == "metrics":
-            # the metrics listener doesn't support IPv6
-            bind_addresses.append("0.0.0.0")
-        else:
-            bind_addresses.extend(DEFAULT_BIND_ADDRESSES)
-
     http_config = None
     if listener_type == "http":
         try:
@@ -932,8 +967,12 @@ def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
         except ValueError as e:
             raise ConfigError("Unknown listener resource") from e
 
+        # For a unix socket, default x_forwarded to True, as this is the only way of
+        # getting a client IP.
+        # Note: a reverse proxy is required anyway, as there is no way of exposing a
+        # unix socket to the internet.
         http_config = HttpListenerConfig(
-            x_forwarded=listener.get("x_forwarded", False),
+            x_forwarded=listener.get("x_forwarded", (True if socket_path else False)),
             resources=resources,
             additional_resources=listener.get("additional_resources", {}),
             tag=listener.get("tag"),
@@ -941,7 +980,30 @@ def parse_listener_def(num: int, listener: Any) -> ListenerConfig:
             experimental_cors_msc3886=listener.get("experimental_cors_msc3886", False),
         )
 
-    return ListenerConfig(port, bind_addresses, listener_type, tls, http_config)
+    if socket_path:
+        # TODO: Add in path validation, like if the directory exists and is writable?
+        # Set a default for the permission, in case it's left out
+        socket_mode = listener.get("mode", 0o666)
+
+        return UnixListenerConfig(socket_path, socket_mode, listener_type, http_config)
+
+    else:
+        assert port is not None
+        bind_addresses = listener.get("bind_addresses", [])
+        bind_address = listener.get("bind_address")
+        # if bind_address was specified, add it to the list of addresses
+        if bind_address:
+            bind_addresses.append(bind_address)
+
+        # if we still have an empty list of addresses, use the default list
+        if not bind_addresses:
+            if listener_type == "metrics":
+                # the metrics listener doesn't support IPv6
+                bind_addresses.append("0.0.0.0")
+            else:
+                bind_addresses.extend(DEFAULT_BIND_ADDRESSES)
+
+        return TCPListenerConfig(port, bind_addresses, listener_type, tls, http_config)
 
 
 _MANHOLE_SETTINGS_SCHEMA = {
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 2580660b6c..1dfbe27e89 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -19,15 +19,18 @@ from typing import Any, Dict, List, Union
 
 import attr
 
-from synapse.types import JsonDict
-
-from ._base import (
+from synapse.config._base import (
     Config,
     ConfigError,
     RoutableShardedWorkerHandlingConfig,
     ShardedWorkerHandlingConfig,
 )
-from .server import DIRECT_TCP_ERROR, ListenerConfig, parse_listener_def
+from synapse.config.server import (
+    DIRECT_TCP_ERROR,
+    TCPListenerConfig,
+    parse_listener_def,
+)
+from synapse.types import JsonDict
 
 _DEPRECATED_WORKER_DUTY_OPTION_USED = """
 The '%s' configuration option is deprecated and will be removed in a future
@@ -161,7 +164,7 @@ class WorkerConfig(Config):
         manhole = config.get("worker_manhole")
         if manhole:
             self.worker_listeners.append(
-                ListenerConfig(
+                TCPListenerConfig(
                     port=manhole,
                     bind_addresses=["127.0.0.1"],
                     type="manhole",
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 6a1dbf7f33..c530966ef3 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -19,6 +19,7 @@ from typing import TYPE_CHECKING, Any, Generator, Optional, Tuple, Union
 import attr
 from zope.interface import implementer
 
+from twisted.internet.address import UNIXAddress
 from twisted.internet.defer import Deferred
 from twisted.internet.interfaces import IAddress, IReactorTime
 from twisted.python.failure import Failure
@@ -257,7 +258,7 @@ class SynapseRequest(Request):
             request_id,
             request=ContextRequest(
                 request_id=request_id,
-                ip_address=self.getClientAddress().host,
+                ip_address=self.get_client_ip_if_available(),
                 site_tag=self.synapse_site.site_tag,
                 # The requester is going to be unknown at this point.
                 requester=None,
@@ -414,7 +415,7 @@ class SynapseRequest(Request):
 
         self.synapse_site.access_logger.debug(
             "%s - %s - Received request: %s %s",
-            self.getClientAddress().host,
+            self.get_client_ip_if_available(),
             self.synapse_site.site_tag,
             self.get_method(),
             self.get_redacted_uri(),
@@ -462,7 +463,7 @@ class SynapseRequest(Request):
             "%s - %s - {%s}"
             " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
             ' %sB %s "%s %s %s" "%s" [%d dbevts]',
-            self.getClientAddress().host,
+            self.get_client_ip_if_available(),
             self.synapse_site.site_tag,
             requester,
             processing_time,
@@ -500,6 +501,26 @@ class SynapseRequest(Request):
 
         return True
 
+    def get_client_ip_if_available(self) -> str:
+        """Logging helper. Return something useful when a client IP is not retrievable
+        from a unix socket.
+
+        In practice, this returns the socket file path on a SynapseRequest if using a
+        unix socket and the normal IP address for TCP sockets.
+
+        """
+        # getClientAddress().host returns a proper IP address for a TCP socket. But
+        # unix sockets have no concept of IP addresses or ports and return a
+        # UNIXAddress containing a 'None' value. In order to get something usable for
+        # logs(where this is used) get the unix socket file. getHost() returns a
+        # UNIXAddress containing a value of the socket file and has an instance
+        # variable of 'name' encoded as a byte string containing the path we want.
+        # Decode to utf-8 so it looks nice.
+        if isinstance(self.getClientAddress(), UNIXAddress):
+            return self.getHost().name.decode("utf-8")
+        else:
+            return self.getClientAddress().host
+
 
 class XForwardedForRequest(SynapseRequest):
     """Request object which honours proxy headers
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index c09b9cf87d..5cee9c3194 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -50,6 +50,7 @@ from twisted.internet.interfaces import (
     IReactorTCP,
     IReactorThreads,
     IReactorTime,
+    IReactorUNIX,
 )
 
 from synapse.api.errors import Codes, SynapseError
@@ -91,6 +92,7 @@ StrCollection = Union[Tuple[str, ...], List[str], AbstractSet[str]]
 class ISynapseReactor(
     IReactorTCP,
     IReactorSSL,
+    IReactorUNIX,
     IReactorPluggableNameResolver,
     IReactorTime,
     IReactorCore,
-- 
cgit 1.5.1


From cf2f2934ad6c94a269e750684d1d8170b1173b7a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 3 Apr 2023 13:20:32 -0400
Subject: Call appservices on modern paths, falling back to legacy paths.
 (#15317)

This uses the specced /_matrix/app/v1/... paths instead of the
"legacy" paths. If the homeserver receives an error it will retry
using the legacy path.
---
 changelog.d/15317.bugfix     |   1 +
 docs/upgrade.md              |  16 ++++++
 synapse/appservice/api.py    | 133 ++++++++++++++++++++++++++++++-------------
 synapse/http/client.py       |  13 +++--
 tests/appservice/test_api.py |  57 ++++++++++++++++++-
 5 files changed, 172 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/15317.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15317.bugfix b/changelog.d/15317.bugfix
new file mode 100644
index 0000000000..194e4c46c6
--- /dev/null
+++ b/changelog.d/15317.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug that Synpase only used the [legacy appservice routes](https://spec.matrix.org/v1.6/application-service-api/#legacy-routes).
diff --git a/docs/upgrade.md b/docs/upgrade.md
index f14444a400..1ddfc31ff6 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,22 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.81.0
+
+## Application service path & authentication deprecations
+
+Synapse now attempts the versioned appservice paths before falling back to the
+[legacy paths](https://spec.matrix.org/v1.6/application-service-api/#legacy-routes).
+Usage of the legacy routes should be considered deprecated.
+
+Additionally, Synapse has supported sending the application service access token
+via [the `Authorization` header](https://spec.matrix.org/v1.6/application-service-api/#authorization)
+since v1.70.0. For backwards compatibility it is *also* sent as the `access_token`
+query parameter. This is insecure and should be considered deprecated.
+
+A future version of Synapse (v1.88.0 or later) will remove support for legacy
+application service routes and query parameter authorization.
+
 # Upgrading to v1.80.0
 
 ## Reporting events error code change
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index b27eedef99..86ddb1bb28 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -17,6 +17,8 @@ import urllib.parse
 from typing import (
     TYPE_CHECKING,
     Any,
+    Awaitable,
+    Callable,
     Dict,
     Iterable,
     List,
@@ -24,10 +26,11 @@ from typing import (
     Optional,
     Sequence,
     Tuple,
+    TypeVar,
 )
 
 from prometheus_client import Counter
-from typing_extensions import TypeGuard
+from typing_extensions import Concatenate, ParamSpec, TypeGuard
 
 from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind
 from synapse.api.errors import CodeMessageException, HttpResponseException
@@ -78,7 +81,11 @@ sent_todevice_counter = Counter(
 HOUR_IN_MS = 60 * 60 * 1000
 
 
-APP_SERVICE_PREFIX = "/_matrix/app/unstable"
+APP_SERVICE_PREFIX = "/_matrix/app/v1"
+APP_SERVICE_UNSTABLE_PREFIX = "/_matrix/app/unstable"
+
+P = ParamSpec("P")
+R = TypeVar("R")
 
 
 def _is_valid_3pe_metadata(info: JsonDict) -> bool:
@@ -121,6 +128,47 @@ class ApplicationServiceApi(SimpleHttpClient):
             hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS
         )
 
+    async def _send_with_fallbacks(
+        self,
+        service: "ApplicationService",
+        prefixes: List[str],
+        path: str,
+        func: Callable[Concatenate[str, P], Awaitable[R]],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> R:
+        """
+        Attempt to call an application service with multiple paths, falling back
+        until one succeeds.
+
+        Args:
+            service: The appliacation service, this provides the base URL.
+            prefixes: A last of paths to try in order for the requests.
+            path: A suffix to append to each prefix.
+            func: The function to call, the first argument will be the full
+                endpoint to fetch. Other arguments are provided by args/kwargs.
+
+        Returns:
+            The return value of func.
+        """
+        for i, prefix in enumerate(prefixes, start=1):
+            uri = f"{service.url}{prefix}{path}"
+            try:
+                return await func(uri, *args, **kwargs)
+            except HttpResponseException as e:
+                # If an error is received that is due to an unrecognised path,
+                # fallback to next path (if one exists). Otherwise, consider it
+                # a legitimate error and raise.
+                if i < len(prefixes) and is_unknown_endpoint(e):
+                    continue
+                raise
+            except Exception:
+                # Unexpected exceptions get sent to the caller.
+                raise
+
+        # The function should always exit via the return or raise above this.
+        raise RuntimeError("Unexpected fallback behaviour. This should never be seen.")
+
     async def query_user(self, service: "ApplicationService", user_id: str) -> bool:
         if service.url is None:
             return False
@@ -128,10 +176,12 @@ class ApplicationServiceApi(SimpleHttpClient):
         # This is required by the configuration.
         assert service.hs_token is not None
 
-        uri = service.url + ("/users/%s" % urllib.parse.quote(user_id))
         try:
-            response = await self.get_json(
-                uri,
+            response = await self._send_with_fallbacks(
+                service,
+                [APP_SERVICE_PREFIX, ""],
+                f"/users/{urllib.parse.quote(user_id)}",
+                self.get_json,
                 {"access_token": service.hs_token},
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
@@ -140,9 +190,9 @@ class ApplicationServiceApi(SimpleHttpClient):
         except CodeMessageException as e:
             if e.code == 404:
                 return False
-            logger.warning("query_user to %s received %s", uri, e.code)
+            logger.warning("query_user to %s received %s", service.url, e.code)
         except Exception as ex:
-            logger.warning("query_user to %s threw exception %s", uri, ex)
+            logger.warning("query_user to %s threw exception %s", service.url, ex)
         return False
 
     async def query_alias(self, service: "ApplicationService", alias: str) -> bool:
@@ -152,21 +202,23 @@ class ApplicationServiceApi(SimpleHttpClient):
         # This is required by the configuration.
         assert service.hs_token is not None
 
-        uri = service.url + ("/rooms/%s" % urllib.parse.quote(alias))
         try:
-            response = await self.get_json(
-                uri,
+            response = await self._send_with_fallbacks(
+                service,
+                [APP_SERVICE_PREFIX, ""],
+                f"/rooms/{urllib.parse.quote(alias)}",
+                self.get_json,
                 {"access_token": service.hs_token},
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if response is not None:  # just an empty json object
                 return True
         except CodeMessageException as e:
-            logger.warning("query_alias to %s received %s", uri, e.code)
+            logger.warning("query_alias to %s received %s", service.url, e.code)
             if e.code == 404:
                 return False
         except Exception as ex:
-            logger.warning("query_alias to %s threw exception %s", uri, ex)
+            logger.warning("query_alias to %s threw exception %s", service.url, ex)
         return False
 
     async def query_3pe(
@@ -188,25 +240,24 @@ class ApplicationServiceApi(SimpleHttpClient):
         # This is required by the configuration.
         assert service.hs_token is not None
 
-        uri = "%s%s/thirdparty/%s/%s" % (
-            service.url,
-            APP_SERVICE_PREFIX,
-            kind,
-            urllib.parse.quote(protocol),
-        )
         try:
             args: Mapping[Any, Any] = {
                 **fields,
                 b"access_token": service.hs_token,
             }
-            response = await self.get_json(
-                uri,
+            response = await self._send_with_fallbacks(
+                service,
+                [APP_SERVICE_PREFIX, APP_SERVICE_UNSTABLE_PREFIX],
+                f"/thirdparty/{kind}/{urllib.parse.quote(protocol)}",
+                self.get_json,
                 args=args,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if not isinstance(response, list):
                 logger.warning(
-                    "query_3pe to %s returned an invalid response %r", uri, response
+                    "query_3pe to %s returned an invalid response %r",
+                    service.url,
+                    response,
                 )
                 return []
 
@@ -216,12 +267,12 @@ class ApplicationServiceApi(SimpleHttpClient):
                     ret.append(r)
                 else:
                     logger.warning(
-                        "query_3pe to %s returned an invalid result %r", uri, r
+                        "query_3pe to %s returned an invalid result %r", service.url, r
                     )
 
             return ret
         except Exception as ex:
-            logger.warning("query_3pe to %s threw exception %s", uri, ex)
+            logger.warning("query_3pe to %s threw exception %s", service.url, ex)
             return []
 
     async def get_3pe_protocol(
@@ -233,21 +284,20 @@ class ApplicationServiceApi(SimpleHttpClient):
         async def _get() -> Optional[JsonDict]:
             # This is required by the configuration.
             assert service.hs_token is not None
-            uri = "%s%s/thirdparty/protocol/%s" % (
-                service.url,
-                APP_SERVICE_PREFIX,
-                urllib.parse.quote(protocol),
-            )
             try:
-                info = await self.get_json(
-                    uri,
+                info = await self._send_with_fallbacks(
+                    service,
+                    [APP_SERVICE_PREFIX, APP_SERVICE_UNSTABLE_PREFIX],
+                    f"/thirdparty/protocol/{urllib.parse.quote(protocol)}",
+                    self.get_json,
                     {"access_token": service.hs_token},
                     headers={"Authorization": [f"Bearer {service.hs_token}"]},
                 )
 
                 if not _is_valid_3pe_metadata(info):
                     logger.warning(
-                        "query_3pe_protocol to %s did not return a valid result", uri
+                        "query_3pe_protocol to %s did not return a valid result",
+                        service.url,
                     )
                     return None
 
@@ -260,7 +310,9 @@ class ApplicationServiceApi(SimpleHttpClient):
 
                 return info
             except Exception as ex:
-                logger.warning("query_3pe_protocol to %s threw exception %s", uri, ex)
+                logger.warning(
+                    "query_3pe_protocol to %s threw exception %s", service.url, ex
+                )
                 return None
 
         key = (service.id, protocol)
@@ -274,7 +326,7 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         await self.post_json_get_json(
-            uri=service.url + "/_matrix/app/unstable/fi.mau.msc2659/ping",
+            uri=f"{service.url}{APP_SERVICE_UNSTABLE_PREFIX}/fi.mau.msc2659/ping",
             post_json={"transaction_id": txn_id},
             headers={"Authorization": [f"Bearer {service.hs_token}"]},
         )
@@ -318,8 +370,6 @@ class ApplicationServiceApi(SimpleHttpClient):
             )
             txn_id = 0
 
-        uri = service.url + ("/transactions/%s" % urllib.parse.quote(str(txn_id)))
-
         # Never send ephemeral events to appservices that do not support it
         body: JsonDict = {"events": serialized_events}
         if service.supports_ephemeral:
@@ -351,8 +401,11 @@ class ApplicationServiceApi(SimpleHttpClient):
                 }
 
         try:
-            await self.put_json(
-                uri=uri,
+            await self._send_with_fallbacks(
+                service,
+                [APP_SERVICE_PREFIX, ""],
+                f"/transactions/{urllib.parse.quote(str(txn_id))}",
+                self.put_json,
                 json_body=body,
                 args={"access_token": service.hs_token},
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
@@ -360,7 +413,7 @@ class ApplicationServiceApi(SimpleHttpClient):
             if logger.isEnabledFor(logging.DEBUG):
                 logger.debug(
                     "push_bulk to %s succeeded! events=%s",
-                    uri,
+                    service.url,
                     [event.get("event_id") for event in events],
                 )
             sent_transactions_counter.labels(service.id).inc()
@@ -371,7 +424,7 @@ class ApplicationServiceApi(SimpleHttpClient):
         except CodeMessageException as e:
             logger.warning(
                 "push_bulk to %s received code=%s msg=%s",
-                uri,
+                service.url,
                 e.code,
                 e.msg,
                 exc_info=logger.isEnabledFor(logging.DEBUG),
@@ -379,7 +432,7 @@ class ApplicationServiceApi(SimpleHttpClient):
         except Exception as ex:
             logger.warning(
                 "push_bulk to %s threw exception(%s) %s args=%s",
-                uri,
+                service.url,
                 type(ex).__name__,
                 ex,
                 ex.args,
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 5ee55981d9..b5cf8123ce 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -982,20 +982,21 @@ def is_unknown_endpoint(
     """
     if synapse_error is None:
         synapse_error = e.to_synapse_error()
-    # MSC3743 specifies that servers should return a 404 or 405 with an errcode
+
+    # Matrix v1.6 specifies that servers should return a 404 or 405 with an errcode
     # of M_UNRECOGNIZED when they receive a request to an unknown endpoint or
     # to an unknown method, respectively.
     #
-    # Older versions of servers don't properly handle this. This needs to be
-    # rather specific as some endpoints truly do return 404 errors.
+    # Older versions of servers don't return proper errors, so be graceful. But,
+    # also handle that some endpoints truly do return 404 errors.
     return (
         # 404 is an unknown endpoint, 405 is a known endpoint, but unknown method.
         (e.code == 404 or e.code == 405)
         and (
-            # Older Dendrites returned a text body or empty body.
-            # Older Conduit returned an empty body.
+            # Consider empty body or non-JSON bodies to be unrecognised (matches
+            # older Dendrites & Conduits).
             not e.response
-            or e.response == b"404 page not found"
+            or not e.response.startswith(b"{")
             # The proper response JSON with M_UNRECOGNIZED errcode.
             or synapse_error.errcode == Codes.UNRECOGNIZED
         )
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 0dd02b7d58..7deb923a28 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -16,6 +16,7 @@ from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.api.errors import HttpResponseException
 from synapse.appservice import ApplicationService
 from synapse.server import HomeServer
 from synapse.types import JsonDict
@@ -64,8 +65,8 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             }
         ]
 
-        URL_USER = f"{URL}/_matrix/app/unstable/thirdparty/user/{PROTOCOL}"
-        URL_LOCATION = f"{URL}/_matrix/app/unstable/thirdparty/location/{PROTOCOL}"
+        URL_USER = f"{URL}/_matrix/app/v1/thirdparty/user/{PROTOCOL}"
+        URL_LOCATION = f"{URL}/_matrix/app/v1/thirdparty/location/{PROTOCOL}"
 
         self.request_url = None
 
@@ -106,6 +107,58 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
         self.assertEqual(self.request_url, URL_LOCATION)
         self.assertEqual(result, SUCCESS_RESULT_LOCATION)
 
+    def test_fallback(self) -> None:
+        """
+        Tests that the fallback to legacy URLs works.
+        """
+        SUCCESS_RESULT_USER = [
+            {
+                "protocol": PROTOCOL,
+                "userid": "@a:user",
+                "fields": {
+                    "more": "fields",
+                },
+            }
+        ]
+
+        URL_USER = f"{URL}/_matrix/app/v1/thirdparty/user/{PROTOCOL}"
+        FALLBACK_URL_USER = f"{URL}/_matrix/app/unstable/thirdparty/user/{PROTOCOL}"
+
+        self.request_url = None
+        self.v1_seen = False
+
+        async def get_json(
+            url: str,
+            args: Mapping[Any, Any],
+            headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]],
+        ) -> List[JsonDict]:
+            # Ensure the access token is passed as both a header and query arg.
+            if not headers.get("Authorization") or not args.get(b"access_token"):
+                raise RuntimeError("Access token not provided")
+
+            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
+            self.assertEqual(args.get(b"access_token"), TOKEN)
+            self.request_url = url
+            if url == URL_USER:
+                self.v1_seen = True
+                raise HttpResponseException(404, "NOT_FOUND", b"NOT_FOUND")
+            elif url == FALLBACK_URL_USER:
+                return SUCCESS_RESULT_USER
+            else:
+                raise RuntimeError(
+                    "URL provided was invalid. This should never be seen."
+                )
+
+        # We assign to a method, which mypy doesn't like.
+        self.api.get_json = Mock(side_effect=get_json)  # type: ignore[assignment]
+
+        result = self.get_success(
+            self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]})
+        )
+        self.assertTrue(self.v1_seen)
+        self.assertEqual(self.request_url, FALLBACK_URL_USER)
+        self.assertEqual(result, SUCCESS_RESULT_USER)
+
     def test_claim_keys(self) -> None:
         """
         Tests that the /keys/claim response is properly parsed for missing
-- 
cgit 1.5.1


From 89a71e73905ffa1c97ae8be27d521cd2ef3f3a0c Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 4 Apr 2023 13:10:25 +0100
Subject: Fix a rare bug where initial /syncs would fail (#15383)

This change fixes a rare bug where initial /syncs would fail with a
`KeyError` under the following circumstances:
 1. A user fast joins a remote room.
 2. The user is kicked from the room before the room's full state has
    been synced.
 3. A second local user fast joins the room.
 4. Events are backfilled into the room with a higher topological
    ordering than the original user's leave. They are assigned a
    negative stream ordering. It's not clear how backfill happened here,
    since it is expected to be equivalent to syncing the full state.
 5. The second local user leaves the room before the room's full state
    has been synced. The homeserver does not complete the sync.
 6. The original user performs an initial /sync with lazy_load_members
    enabled.
     * Because they were kicked from the room, the room is included in
       the /sync response even though the include_leave option is not
       specified.
     * To populate the room's timeline, `_load_filtered_recents` /
       `get_recent_events_for_room` fetches events with a lower stream
       ordering than the leave event and picks the ones with the highest
       topological orderings (which are most recent). This captures the
       backfilled events after the leave, since they have a negative
       stream ordering. These events are filtered out of the timeline,
       since the user was not in the room at the time and cannot view
       them. The sync code ends up with an empty timeline for the room
       that notably does not include the user's leave event.
       This seems buggy, but at least we don't disclose events the user
       isn't allowed to see.
     * Normally, `compute_state_delta` would fetch the state at the
       start and end of the room's timeline to generate the sync
       response. Since the timeline is empty, it fetches the state at
       `min(now, last event in the room)`, which corresponds with the
       second user's leave. The state during the entirety of the second
       user's membership does not include the membership for the first
       user because of partial state.
       This part is also questionable, since we are fetching state from
       outside the bounds of the user's membership.
     * `compute_state_delta` then tries and fails to find the user's
       membership in the auth events of timeline events. Because there
       is no timeline event whose auth events are expected to contain
       the user's membership, a `KeyError` is raised.

Also contains a drive-by fix for a separate unlikely race condition.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15383.bugfix |  1 +
 synapse/handlers/sync.py | 24 +++++++++++++++++++-----
 2 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/15383.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15383.bugfix b/changelog.d/15383.bugfix
new file mode 100644
index 0000000000..28c66ef454
--- /dev/null
+++ b/changelog.d/15383.bugfix
@@ -0,0 +1 @@
+Fix a rare bug introduced in Synapse 1.66.0 where initial syncs would fail when the user had been kicked from a faster joined room that had not finished syncing.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 9f5b83ed54..64d298408d 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -943,6 +943,8 @@ class SyncHandler:
 
                 timeline_state = {}
 
+                # Membership events to fetch that can be found in the room state, or in
+                # the case of partial state rooms, the auth events of timeline events.
                 members_to_fetch = set()
                 first_event_by_sender_map = {}
                 for event in batch.events:
@@ -964,9 +966,19 @@ class SyncHandler:
                     # (if we are) to fix https://github.com/vector-im/riot-web/issues/7209
                     # We only need apply this on full state syncs given we disabled
                     # LL for incr syncs in #3840.
-                    members_to_fetch.add(sync_config.user.to_string())
-
-                state_filter = StateFilter.from_lazy_load_member_list(members_to_fetch)
+                    # We don't insert ourselves into `members_to_fetch`, because in some
+                    # rare cases (an empty event batch with a now_token after the user's
+                    # leave in a partial state room which another local user has
+                    # joined), the room state will be missing our membership and there
+                    # is no guarantee that our membership will be in the auth events of
+                    # timeline events when the room is partial stated.
+                    state_filter = StateFilter.from_lazy_load_member_list(
+                        members_to_fetch.union((sync_config.user.to_string(),))
+                    )
+                else:
+                    state_filter = StateFilter.from_lazy_load_member_list(
+                        members_to_fetch
+                    )
 
                 # We are happy to use partial state to compute the `/sync` response.
                 # Since partial state may not include the lazy-loaded memberships we
@@ -988,7 +1000,9 @@ class SyncHandler:
             # sync's timeline and the start of the current sync's timeline.
             # See the docstring above for details.
             state_ids: StateMap[str]
-
+            # We need to know whether the state we fetch may be partial, so check
+            # whether the room is partial stated *before* fetching it.
+            is_partial_state_room = await self.store.is_partial_state_room(room_id)
             if full_state:
                 if batch:
                     state_at_timeline_end = (
@@ -1119,7 +1133,7 @@ class SyncHandler:
             # If we only have partial state for the room, `state_ids` may be missing the
             # memberships we wanted. We attempt to find some by digging through the auth
             # events of timeline events.
-            if lazy_load_members and await self.store.is_partial_state_room(room_id):
+            if lazy_load_members and is_partial_state_room:
                 assert members_to_fetch is not None
                 assert first_event_by_sender_map is not None
 
-- 
cgit 1.5.1


From 79d2e2e79c97b21a4b3b786594d0d9ebebd33964 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 4 Apr 2023 14:11:34 +0100
Subject: Speed up membership queries for users with forgotten rooms (#15385)

---
 changelog.d/15385.misc                                |  1 +
 synapse/storage/databases/main/roommember.py          | 12 +++++++++++-
 .../schema/main/delta/74/03_room_membership_index.sql | 19 +++++++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15385.misc
 create mode 100644 synapse/storage/schema/main/delta/74/03_room_membership_index.sql

(limited to 'synapse')

diff --git a/changelog.d/15385.misc b/changelog.d/15385.misc
new file mode 100644
index 0000000000..76350c398d
--- /dev/null
+++ b/changelog.d/15385.misc
@@ -0,0 +1 @@
+Speed up membership queries for users with forgotten rooms.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 694a5b802c..daad58291a 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -419,7 +419,11 @@ class RoomMemberWorkerStore(EventsWorkerStore):
         )
 
         # Now we filter out forgotten and excluded rooms
-        rooms_to_exclude = await self.get_forgotten_rooms_for_user(user_id)
+        rooms_to_exclude: AbstractSet[str] = set()
+
+        # Users can't forget joined/invited rooms, so we skip the check for such look ups.
+        if any(m not in (Membership.JOIN, Membership.INVITE) for m in membership_list):
+            rooms_to_exclude = await self.get_forgotten_rooms_for_user(user_id)
 
         if excluded_rooms is not None:
             # Take a copy to avoid mutating the in-cache set
@@ -1391,6 +1395,12 @@ class RoomMemberBackgroundUpdateStore(SQLBaseStore):
             columns=["user_id", "room_id"],
             where_clause="forgotten = 1",
         )
+        self.db_pool.updates.register_background_index_update(
+            "room_membership_user_room_index",
+            index_name="room_membership_user_room_idx",
+            table="room_memberships",
+            columns=["user_id", "room_id"],
+        )
 
     async def _background_add_membership_profile(
         self, progress: JsonDict, batch_size: int
diff --git a/synapse/storage/schema/main/delta/74/03_room_membership_index.sql b/synapse/storage/schema/main/delta/74/03_room_membership_index.sql
new file mode 100644
index 0000000000..81a7d9ff9c
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/03_room_membership_index.sql
@@ -0,0 +1,19 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add an index to `room_membership(user_id, room_id)` to make querying for
+-- forgotten rooms faster.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+    (7403, 'room_membership_user_room_index', '{}');
-- 
cgit 1.5.1


From 6b23d74ad160d96e06bcc5b62acad56ade06bf6e Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 4 Apr 2023 13:16:08 -0700
Subject: Delete server-side backup keys when deactivating an account. (#15181)

---
 changelog.d/15181.bugfix                           |   1 +
 synapse/_scripts/synapse_port_db.py                |   2 +
 synapse/handlers/deactivate_account.py             |   3 +
 synapse/storage/databases/main/e2e_room_keys.py    | 114 ++++++++++++++-
 ...elete_e2e_backup_keys_for_deactivated_users.sql |  17 +++
 tests/rest/client/test_account.py                  | 157 +++++++++++++++++++++
 6 files changed, 291 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15181.bugfix
 create mode 100644 synapse/storage/schema/main/delta/74/04_delete_e2e_backup_keys_for_deactivated_users.sql

(limited to 'synapse')

diff --git a/changelog.d/15181.bugfix b/changelog.d/15181.bugfix
new file mode 100644
index 0000000000..191bb6f611
--- /dev/null
+++ b/changelog.d/15181.bugfix
@@ -0,0 +1 @@
+Delete server-side backup keys when deactivating an account.
\ No newline at end of file
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 1dcb397ba4..a58ae2a308 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -59,6 +59,7 @@ from synapse.storage.databases.main.account_data import AccountDataWorkerStore
 from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateStore
 from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore
 from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore
+from synapse.storage.databases.main.e2e_room_keys import EndToEndRoomKeyBackgroundStore
 from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyBackgroundStore
 from synapse.storage.databases.main.event_push_actions import EventPushActionsStore
 from synapse.storage.databases.main.events_bg_updates import (
@@ -225,6 +226,7 @@ class Store(
     MainStateBackgroundUpdateStore,
     UserDirectoryBackgroundUpdateStore,
     EndToEndKeyBackgroundStore,
+    EndToEndRoomKeyBackgroundStore,
     StatsStore,
     AccountDataWorkerStore,
     PushRuleStore,
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index d31263c717..bd5867491b 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -176,6 +176,9 @@ class DeactivateAccountHandler:
         # Remove account data (including ignored users and push rules).
         await self.store.purge_account_data_for_user(user_id)
 
+        # Delete any server-side backup keys
+        await self.store.bulk_delete_backup_keys_and_versions_for_user(user_id)
+
         # Let modules know the user has been deactivated.
         await self._third_party_rules.on_user_deactivation_status_changed(
             user_id,
diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py
index 9f8d2e4bea..d01f28cc80 100644
--- a/synapse/storage/databases/main/e2e_room_keys.py
+++ b/synapse/storage/databases/main/e2e_room_keys.py
@@ -13,17 +13,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, Iterable, Mapping, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Dict, Iterable, Mapping, Optional, Tuple, cast
 
 from typing_extensions import Literal, TypedDict
 
 from synapse.api.errors import StoreError
 from synapse.logging.opentracing import log_kv, trace
 from synapse.storage._base import SQLBaseStore, db_to_json
-from synapse.storage.database import LoggingTransaction
+from synapse.storage.database import (
+    DatabasePool,
+    LoggingDatabaseConnection,
+    LoggingTransaction,
+)
 from synapse.types import JsonDict, JsonSerializable, StreamKeyType
 from synapse.util import json_encoder
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 
 class RoomKey(TypedDict):
     """`KeyBackupData` in the Matrix spec.
@@ -37,7 +44,82 @@ class RoomKey(TypedDict):
     session_data: JsonSerializable
 
 
-class EndToEndRoomKeyStore(SQLBaseStore):
+class EndToEndRoomKeyBackgroundStore(SQLBaseStore):
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_update_handler(
+            "delete_e2e_backup_keys_for_deactivated_users",
+            self._delete_e2e_backup_keys_for_deactivated_users,
+        )
+
+    def _delete_keys_txn(self, txn: LoggingTransaction, user_id: str) -> None:
+        self.db_pool.simple_delete_txn(
+            txn,
+            table="e2e_room_keys",
+            keyvalues={"user_id": user_id},
+        )
+
+        self.db_pool.simple_delete_txn(
+            txn,
+            table="e2e_room_keys_versions",
+            keyvalues={"user_id": user_id},
+        )
+
+    async def _delete_e2e_backup_keys_for_deactivated_users(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """
+        Retroactively purges account data for users that have already been deactivated.
+        Gets run as a background update caused by a schema delta.
+        """
+
+        last_user: str = progress.get("last_user", "")
+
+        def _delete_backup_keys_for_deactivated_users_txn(
+            txn: LoggingTransaction,
+        ) -> int:
+            sql = """
+                SELECT name FROM users
+                WHERE deactivated = ? and name > ?
+                ORDER BY name ASC
+                LIMIT ?
+            """
+
+            txn.execute(sql, (1, last_user, batch_size))
+            users = [row[0] for row in txn]
+
+            for user in users:
+                self._delete_keys_txn(txn, user)
+
+            if users:
+                self.db_pool.updates._background_update_progress_txn(
+                    txn,
+                    "delete_e2e_backup_keys_for_deactivated_users",
+                    {"last_user": users[-1]},
+                )
+
+            return len(users)
+
+        number_deleted = await self.db_pool.runInteraction(
+            "_delete_backup_keys_for_deactivated_users",
+            _delete_backup_keys_for_deactivated_users_txn,
+        )
+
+        if number_deleted < batch_size:
+            await self.db_pool.updates._end_background_update(
+                "delete_e2e_backup_keys_for_deactivated_users"
+            )
+
+        return number_deleted
+
+
+class EndToEndRoomKeyStore(EndToEndRoomKeyBackgroundStore):
     """The store for end to end room key backups.
 
     See https://spec.matrix.org/v1.1/client-server-api/#server-side-key-backups
@@ -550,3 +632,29 @@ class EndToEndRoomKeyStore(SQLBaseStore):
         await self.db_pool.runInteraction(
             "delete_e2e_room_keys_version", _delete_e2e_room_keys_version_txn
         )
+
+    async def bulk_delete_backup_keys_and_versions_for_user(self, user_id: str) -> None:
+        """
+        Bulk deletes all backup room keys and versions for a given user.
+
+        Args:
+            user_id: the user whose backup keys and versions we're deleting
+        """
+
+        def _delete_all_e2e_room_keys_and_versions_txn(txn: LoggingTransaction) -> None:
+            self.db_pool.simple_delete_txn(
+                txn,
+                table="e2e_room_keys",
+                keyvalues={"user_id": user_id},
+            )
+
+            self.db_pool.simple_delete_txn(
+                txn,
+                table="e2e_room_keys_versions",
+                keyvalues={"user_id": user_id},
+            )
+
+        await self.db_pool.runInteraction(
+            "delete_all_e2e_room_keys_and_versions",
+            _delete_all_e2e_room_keys_and_versions_txn,
+        )
diff --git a/synapse/storage/schema/main/delta/74/04_delete_e2e_backup_keys_for_deactivated_users.sql b/synapse/storage/schema/main/delta/74/04_delete_e2e_backup_keys_for_deactivated_users.sql
new file mode 100644
index 0000000000..a194f4cece
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/04_delete_e2e_backup_keys_for_deactivated_users.sql
@@ -0,0 +1,17 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7404, 'delete_e2e_backup_keys_for_deactivated_users', '{}');
\ No newline at end of file
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index 7f675c44a2..ac19f3c6da 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -474,6 +474,163 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(memberships), 1, memberships)
         self.assertEqual(memberships[0].room_id, room_id, memberships)
 
+    def test_deactivate_account_deletes_server_side_backup_keys(self) -> None:
+        key_handler = self.hs.get_e2e_room_keys_handler()
+        room_keys = {
+            "rooms": {
+                "!abc:matrix.org": {
+                    "sessions": {
+                        "c0ff33": {
+                            "first_message_index": 1,
+                            "forwarded_count": 1,
+                            "is_verified": False,
+                            "session_data": "SSBBTSBBIEZJU0gK",
+                        }
+                    }
+                }
+            }
+        }
+
+        user_id = self.register_user("missPiggy", "test")
+        tok = self.login("missPiggy", "test")
+
+        # add some backup keys/versions
+        version = self.get_success(
+            key_handler.create_version(
+                user_id,
+                {
+                    "algorithm": "m.megolm_backup.v1",
+                    "auth_data": "first_version_auth_data",
+                },
+            )
+        )
+
+        self.get_success(key_handler.upload_room_keys(user_id, version, room_keys))
+
+        version2 = self.get_success(
+            key_handler.create_version(
+                user_id,
+                {
+                    "algorithm": "m.megolm_backup.v1",
+                    "auth_data": "second_version_auth_data",
+                },
+            )
+        )
+
+        self.get_success(key_handler.upload_room_keys(user_id, version2, room_keys))
+
+        self.deactivate(user_id, tok)
+        store = self.hs.get_datastores().main
+
+        # Check that the user has been marked as deactivated.
+        self.assertTrue(self.get_success(store.get_user_deactivated_status(user_id)))
+
+        # Check that there are no entries in 'e2e_room_keys` and `e2e_room_keys_versions`
+        res = self.get_success(
+            self.hs.get_datastores().main.db_pool.simple_select_list(
+                "e2e_room_keys", {"user_id": user_id}, "*", "simple_select"
+            )
+        )
+        self.assertEqual(len(res), 0)
+
+        res2 = self.get_success(
+            self.hs.get_datastores().main.db_pool.simple_select_list(
+                "e2e_room_keys_versions", {"user_id": user_id}, "*", "simple_select"
+            )
+        )
+        self.assertEqual(len(res2), 0)
+
+    def test_background_update_deletes_deactivated_users_server_side_backup_keys(
+        self,
+    ) -> None:
+        key_handler = self.hs.get_e2e_room_keys_handler()
+        room_keys = {
+            "rooms": {
+                "!abc:matrix.org": {
+                    "sessions": {
+                        "c0ff33": {
+                            "first_message_index": 1,
+                            "forwarded_count": 1,
+                            "is_verified": False,
+                            "session_data": "SSBBTSBBIEZJU0gK",
+                        }
+                    }
+                }
+            }
+        }
+        self.store = self.hs.get_datastores().main
+
+        # create a bunch of users and add keys for them
+        users = []
+        for i in range(0, 20):
+            user_id = self.register_user("missPiggy" + str(i), "test")
+            users.append((user_id,))
+
+            # add some backup keys/versions
+            version = self.get_success(
+                key_handler.create_version(
+                    user_id,
+                    {
+                        "algorithm": "m.megolm_backup.v1",
+                        "auth_data": str(i) + "_version_auth_data",
+                    },
+                )
+            )
+
+            self.get_success(key_handler.upload_room_keys(user_id, version, room_keys))
+
+            version2 = self.get_success(
+                key_handler.create_version(
+                    user_id,
+                    {
+                        "algorithm": "m.megolm_backup.v1",
+                        "auth_data": str(i) + "_version_auth_data",
+                    },
+                )
+            )
+
+            self.get_success(key_handler.upload_room_keys(user_id, version2, room_keys))
+
+        # deactivate most of the users by editing DB
+        self.get_success(
+            self.store.db_pool.simple_update_many(
+                table="users",
+                key_names=("name",),
+                key_values=users[0:18],
+                value_names=("deactivated",),
+                value_values=[(1,) for i in range(1, 19)],
+                desc="",
+            )
+        )
+
+        # run background update
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                {
+                    "update_name": "delete_e2e_backup_keys_for_deactivated_users",
+                    "progress_json": "{}",
+                },
+            )
+        )
+        self.store.db_pool.updates._all_done = False
+        self.wait_for_background_updates()
+
+        # check that keys are deleted for the deactivated users but not the others
+        res = self.get_success(
+            self.hs.get_datastores().main.db_pool.simple_select_list(
+                "e2e_room_keys", None, ("user_id",), "simple_select"
+            )
+        )
+        self.assertEqual(len(res), 4)
+
+        res2 = self.get_success(
+            self.hs.get_datastores().main.db_pool.simple_select_list(
+                "e2e_room_keys_versions", None, ("user_id",), "simple_select"
+            )
+        )
+        self.assertEqual(len(res2), 4)
+
     def deactivate(self, user_id: str, tok: str) -> None:
         request_data = {
             "auth": {
-- 
cgit 1.5.1


From 6eb3edec473899f9145124d33a85b153e4a0cda9 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 5 Apr 2023 13:49:15 +0200
Subject: Fix the 'set_device_id_for_pushers_txn' background update. (#15391)

Refer to the correct field from the response when updating
the background update progress.
---
 changelog.d/15391.bugfix                 | 1 +
 synapse/storage/databases/main/pusher.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15391.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15391.bugfix b/changelog.d/15391.bugfix
new file mode 100644
index 0000000000..22b3bfe668
--- /dev/null
+++ b/changelog.d/15391.bugfix
@@ -0,0 +1 @@
+Fix the `set_device_id_for_pushers_txn` background update crash.
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
index aeb6034f46..87e28e22d3 100644
--- a/synapse/storage/databases/main/pusher.py
+++ b/synapse/storage/databases/main/pusher.py
@@ -562,7 +562,7 @@ class PusherBackgroundUpdatesStore(SQLBaseStore):
             )
 
             self.db_pool.updates._background_update_progress_txn(
-                txn, "set_device_id_for_pushers", {"pusher_id": rows[-1]["id"]}
+                txn, "set_device_id_for_pushers", {"pusher_id": rows[-1]["pusher_id"]}
             )
 
             return len(rows)
-- 
cgit 1.5.1


From 83649b891db5fcca7b96c606293925bc47358730 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 5 Apr 2023 14:42:46 -0400
Subject: Implement MSC3989 to redact the origin field. (#15393)

This will be done in a future room version, for now an unstable
room version is added which redacts the origin field.
---
 changelog.d/15393.misc       |  1 +
 synapse/api/room_versions.py | 37 +++++++++++++++++++++++++++++++++++++
 synapse/events/utils.py      |  5 ++++-
 tests/events/test_utils.py   |  7 +++++++
 4 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15393.misc

(limited to 'synapse')

diff --git a/changelog.d/15393.misc b/changelog.d/15393.misc
new file mode 100644
index 0000000000..24483c8d78
--- /dev/null
+++ b/changelog.d/15393.misc
@@ -0,0 +1 @@
+Implement [MSC3989](https://github.com/matrix-org/matrix-spec-proposals/pull/3989) redaction algorithm.
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index c397920fe5..bc15f2d063 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -104,6 +104,8 @@ class RoomVersion:
     # support the flag. Unknown flags are ignored by the evaluator, making conditions
     # fail if used.
     msc3931_push_features: Tuple[str, ...]  # values from PushRuleRoomFlag
+    # MSC3989: Redact the origin field.
+    msc3989_redaction_rules: bool
 
 
 class RoomVersions:
@@ -125,6 +127,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V2 = RoomVersion(
         "2",
@@ -144,6 +147,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V3 = RoomVersion(
         "3",
@@ -163,6 +167,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V4 = RoomVersion(
         "4",
@@ -182,6 +187,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V5 = RoomVersion(
         "5",
@@ -201,6 +207,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V6 = RoomVersion(
         "6",
@@ -220,6 +227,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     MSC2176 = RoomVersion(
         "org.matrix.msc2176",
@@ -239,6 +247,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V7 = RoomVersion(
         "7",
@@ -258,6 +267,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V8 = RoomVersion(
         "8",
@@ -277,6 +287,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V9 = RoomVersion(
         "9",
@@ -296,6 +307,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     MSC3787 = RoomVersion(
         "org.matrix.msc3787",
@@ -315,6 +327,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     V10 = RoomVersion(
         "10",
@@ -334,6 +347,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     MSC2716v4 = RoomVersion(
         "org.matrix.msc2716v4",
@@ -353,6 +367,7 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
+        msc3989_redaction_rules=False,
     )
     MSC1767v10 = RoomVersion(
         # MSC1767 (Extensible Events) based on room version "10"
@@ -373,6 +388,27 @@ class RoomVersions:
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
         msc3931_push_features=(PushRuleRoomFlag.EXTENSIBLE_EVENTS,),
+        msc3989_redaction_rules=False,
+    )
+    MSC3989 = RoomVersion(
+        "org.matrix.msc3989",
+        RoomDisposition.UNSTABLE,
+        EventFormatVersions.ROOM_V4_PLUS,
+        StateResolutionVersions.V2,
+        enforce_key_validity=True,
+        special_case_aliases_auth=False,
+        strict_canonicaljson=True,
+        limit_notifications_power_levels=True,
+        msc2176_redaction_rules=False,
+        msc3083_join_rules=True,
+        msc3375_redaction_rules=True,
+        msc2403_knocking=True,
+        msc2716_historical=False,
+        msc2716_redactions=False,
+        msc3787_knock_restricted_join_rule=True,
+        msc3667_int_only_power_levels=True,
+        msc3931_push_features=(),
+        msc3989_redaction_rules=True,
     )
 
 
@@ -392,6 +428,7 @@ KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
         RoomVersions.MSC3787,
         RoomVersions.V10,
         RoomVersions.MSC2716v4,
+        RoomVersions.MSC3989,
     )
 }
 
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index c14c7791db..1d5d7491cd 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -106,7 +106,6 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
         "depth",
         "prev_events",
         "auth_events",
-        "origin",
         "origin_server_ts",
     ]
 
@@ -114,6 +113,10 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
     if not room_version.msc2176_redaction_rules:
         allowed_keys.extend(["prev_state", "membership"])
 
+    # Room versions before MSC3989 kept the origin field.
+    if not room_version.msc3989_redaction_rules:
+        allowed_keys.append("origin")
+
     event_type = event_dict["type"]
 
     new_content = {}
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index 4174a237ec..c35f58f462 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -143,6 +143,13 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.MSC2176,
         )
 
+        # As of MSC3989 we now redact the origin key.
+        self.run_test(
+            {"type": "A", "origin": "example.com"},
+            {"type": "A", "content": {}, "signatures": {}, "unsigned": {}},
+            room_version=RoomVersions.MSC3989,
+        )
+
     def test_unsigned(self) -> None:
         """Ensure that unsigned properties get stripped (except age_ts and replaces_state)."""
         self.run_test(
-- 
cgit 1.5.1


From 485b9fdefb9f45df172ff5044d6a02a177b7de19 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 6 Apr 2023 17:42:39 +0100
Subject: Don't keep old stream_ordering_to_exterm around (#15382)

---
 changelog.d/15382.misc                             |  1 +
 synapse/handlers/device.py                         | 10 +++++
 synapse/storage/databases/main/event_federation.py | 52 +++++++++++++++++-----
 3 files changed, 53 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/15382.misc

(limited to 'synapse')

diff --git a/changelog.d/15382.misc b/changelog.d/15382.misc
new file mode 100644
index 0000000000..c5b054d19e
--- /dev/null
+++ b/changelog.d/15382.misc
@@ -0,0 +1 @@
+Improve DB performance of clearing out old data from `stream_ordering_to_exterm`.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 9ded6389ac..d2063d4435 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -215,6 +215,16 @@ class DeviceWorkerHandler:
         possibly_changed = set(changed)
         possibly_left = set()
         for room_id in rooms_changed:
+            # Check if the forward extremities have changed. If not then we know
+            # the current state won't have changed, and so we can skip this room.
+            try:
+                if not await self.store.have_room_forward_extremities_changed_since(
+                    room_id, stream_ordering
+                ):
+                    continue
+            except errors.StoreError:
+                pass
+
             current_state_ids = await self._state_storage.get_current_state_ids(
                 room_id, await_full_state=False
             )
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index a19ba88bf8..9e6011e8ea 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1171,6 +1171,38 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         return int(min_depth) if min_depth is not None else None
 
+    async def have_room_forward_extremities_changed_since(
+        self,
+        room_id: str,
+        stream_ordering: int,
+    ) -> bool:
+        """Check if the forward extremities in a room have changed since the
+        given stream ordering
+
+        Throws a StoreError if we have since purged the index for
+        stream_orderings from that point.
+        """
+
+        if stream_ordering <= self.stream_ordering_month_ago:  # type: ignore[attr-defined]
+            raise StoreError(400, f"stream_ordering too old {stream_ordering}")
+
+        sql = """
+            SELECT 1 FROM stream_ordering_to_exterm
+            WHERE stream_ordering > ? AND room_id = ?
+            LIMIT 1
+        """
+
+        def have_room_forward_extremities_changed_since_txn(
+            txn: LoggingTransaction,
+        ) -> bool:
+            txn.execute(sql, (stream_ordering, room_id))
+            return txn.fetchone() is not None
+
+        return await self.db_pool.runInteraction(
+            "have_room_forward_extremities_changed_since",
+            have_room_forward_extremities_changed_since_txn,
+        )
+
     @cancellable
     async def get_forward_extremities_for_room_at_stream_ordering(
         self, room_id: str, stream_ordering: int
@@ -1232,10 +1264,17 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             txn.execute(sql, (stream_ordering, room_id))
             return [event_id for event_id, in txn]
 
-        return await self.db_pool.runInteraction(
+        event_ids = await self.db_pool.runInteraction(
             "get_forward_extremeties_for_room", get_forward_extremeties_for_room_txn
         )
 
+        # If we didn't find any IDs, then we must have cleared out the
+        # associated `stream_ordering_to_exterm`.
+        if not event_ids:
+            raise StoreError(400, "stream_ordering too old %s" % (stream_ordering,))
+
+        return event_ids
+
     def _get_connected_batch_event_backfill_results_txn(
         self, txn: LoggingTransaction, insertion_event_id: str, limit: int
     ) -> List[BackfillQueueNavigationItem]:
@@ -1664,19 +1703,12 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
     @wrap_as_background_process("delete_old_forward_extrem_cache")
     async def _delete_old_forward_extrem_cache(self) -> None:
         def _delete_old_forward_extrem_cache_txn(txn: LoggingTransaction) -> None:
-            # Delete entries older than a month, while making sure we don't delete
-            # the only entries for a room.
             sql = """
                 DELETE FROM stream_ordering_to_exterm
-                WHERE
-                room_id IN (
-                    SELECT room_id
-                    FROM stream_ordering_to_exterm
-                    WHERE stream_ordering > ?
-                ) AND stream_ordering < ?
+                WHERE stream_ordering < ?
             """
             txn.execute(
-                sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago)  # type: ignore[attr-defined]
+                sql, (self.stream_ordering_month_ago)  # type: ignore[attr-defined]
             )
 
         await self.db_pool.runInteraction(
-- 
cgit 1.5.1


From d07d2558305057cba6a7e2cb7644c24cf2dcf9be Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 6 Apr 2023 16:26:28 -0400
Subject: Implement MSC2175: remove the creator field from create events.
 (#15394)

---
 changelog.d/15394.misc                 |  1 +
 synapse/api/constants.py               |  2 ++
 synapse/api/room_versions.py           | 17 +++++++++++++++++
 synapse/event_auth.py                  | 25 ++++++++++++++++++-------
 synapse/handlers/federation_event.py   |  5 ++++-
 synapse/handlers/message.py            |  7 ++++++-
 synapse/handlers/room.py               | 22 ++++++++++++----------
 synapse/storage/databases/main/room.py | 17 ++++++++++++-----
 8 files changed, 72 insertions(+), 24 deletions(-)
 create mode 100644 changelog.d/15394.misc

(limited to 'synapse')

diff --git a/changelog.d/15394.misc b/changelog.d/15394.misc
new file mode 100644
index 0000000000..91e6540438
--- /dev/null
+++ b/changelog.d/15394.misc
@@ -0,0 +1 @@
+Implement [MSC2175](https://github.com/matrix-org/matrix-doc/pull/2175) to stop adding `creator` to create events.
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index 0f224b34cd..c56b2f2561 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -215,6 +215,8 @@ class EventContentFields:
     FEDERATE: Final = "m.federate"
 
     # The creator of the room, as used in `m.room.create` events.
+    #
+    # This is deprecated in MSC2175.
     ROOM_CREATOR: Final = "creator"
 
     # Used in m.room.guest_access events.
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index bc15f2d063..3dcae12161 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -78,6 +78,8 @@ class RoomVersion:
     # MSC2209: Check 'notifications' key while verifying
     # m.room.power_levels auth rules.
     limit_notifications_power_levels: bool
+    # MSC2175: No longer include the creator in m.room.create events.
+    msc2175_implicit_room_creator: bool
     # MSC2174/MSC2176: Apply updated redaction rules algorithm.
     msc2176_redaction_rules: bool
     # MSC3083: Support the 'restricted' join_rule.
@@ -118,6 +120,7 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -138,6 +141,7 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -158,6 +162,7 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -178,6 +183,7 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -198,6 +204,7 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -218,6 +225,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -238,6 +246,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=True,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -258,6 +267,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -278,6 +288,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=True,
         msc3375_redaction_rules=False,
@@ -298,6 +309,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
@@ -318,6 +330,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
@@ -338,6 +351,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
@@ -358,6 +372,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
@@ -379,6 +394,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
@@ -399,6 +415,7 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
         msc2176_redaction_rules=False,
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index af55874b5c..f95d00d472 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -455,8 +455,11 @@ def _check_create(event: "EventBase") -> None:
             "room appears to have unsupported version %s" % (room_version_prop,),
         )
 
-    # 1.4 If content has no creator field, reject.
-    if EventContentFields.ROOM_CREATOR not in event.content:
+    # 1.4 If content has no creator field, reject if the room version requires it.
+    if (
+        not event.room_version.msc2175_implicit_room_creator
+        and EventContentFields.ROOM_CREATOR not in event.content
+    ):
         raise AuthError(403, "Create event lacks a 'creator' property")
 
 
@@ -491,7 +494,11 @@ def _is_membership_change_allowed(
         key = (EventTypes.Create, "")
         create = auth_events.get(key)
         if create and event.prev_event_ids()[0] == create.event_id:
-            if create.content["creator"] == event.state_key:
+            if room_version.msc2175_implicit_room_creator:
+                creator = create.sender
+            else:
+                creator = create.content[EventContentFields.ROOM_CREATOR]
+            if creator == event.state_key:
                 return
 
     target_user_id = event.state_key
@@ -1004,10 +1011,14 @@ def get_user_power_level(user_id: str, auth_events: StateMap["EventBase"]) -> in
         # that.
         key = (EventTypes.Create, "")
         create_event = auth_events.get(key)
-        if create_event is not None and create_event.content["creator"] == user_id:
-            return 100
-        else:
-            return 0
+        if create_event is not None:
+            if create_event.room_version.msc2175_implicit_room_creator:
+                creator = create_event.sender
+            else:
+                creator = create_event.content[EventContentFields.ROOM_CREATOR]
+            if creator == user_id:
+                return 100
+        return 0
 
 
 def get_named_level(auth_events: StateMap["EventBase"], name: str, default: int) -> int:
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 982c8d3b2f..8d5be81a92 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1515,7 +1515,10 @@ class FederationEventHandler:
         # support it or the event is not from the room creator.
         room_version = await self._store.get_room_version(marker_event.room_id)
         create_event = await self._store.get_create_event_for_room(marker_event.room_id)
-        room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
+        if not room_version.msc2175_implicit_room_creator:
+            room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
+        else:
+            room_creator = create_event.sender
         if not room_version.msc2716_historical and (
             not self._config.experimental.msc2716_enabled
             or marker_event.sender != room_creator
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 4c75433a63..a17fe3bf53 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1909,7 +1909,12 @@ class EventCreationHandler:
                 room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
 
                 create_event = await self.store.get_create_event_for_room(event.room_id)
-                room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
+                if not room_version_obj.msc2175_implicit_room_creator:
+                    room_creator = create_event.content.get(
+                        EventContentFields.ROOM_CREATOR
+                    )
+                else:
+                    room_creator = create_event.sender
 
                 # Only check an insertion event if the room version
                 # supports it or the event is from the room creator.
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index be120cb12f..2d69cabf43 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -567,6 +567,7 @@ class RoomCreationHandler:
         await self._send_events_for_new_room(
             requester,
             new_room_id,
+            new_room_version,
             # we expect to override all the presets with initial_state, so this is
             # somewhat arbitrary.
             room_config={"preset": RoomCreationPreset.PRIVATE_CHAT},
@@ -922,6 +923,7 @@ class RoomCreationHandler:
         ) = await self._send_events_for_new_room(
             requester,
             room_id,
+            room_version,
             room_config=config,
             invite_list=invite_list,
             initial_state=initial_state,
@@ -998,6 +1000,7 @@ class RoomCreationHandler:
         self,
         creator: Requester,
         room_id: str,
+        room_version: RoomVersion,
         room_config: JsonDict,
         invite_list: List[str],
         initial_state: MutableStateMap,
@@ -1020,6 +1023,8 @@ class RoomCreationHandler:
                 the user requesting the room creation
             room_id:
                 room id for the room being created
+            room_version:
+                The room version of the new room.
             room_config:
                 A dict of configuration options. This will be the body of
                 a /createRoom request; see
@@ -1053,14 +1058,6 @@ class RoomCreationHandler:
         # (as this info can't be pulled from the db)
         state_map: MutableStateMap[str] = {}
 
-        def create_event_dict(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict:
-            e = {"type": etype, "content": content}
-
-            e.update(event_keys)
-            e.update(kwargs)
-
-            return e
-
         async def create_event(
             etype: str,
             content: JsonDict,
@@ -1083,7 +1080,10 @@ class RoomCreationHandler:
             nonlocal depth
             nonlocal prev_event
 
-            event_dict = create_event_dict(etype, content, **kwargs)
+            # Create the event dictionary.
+            event_dict = {"type": etype, "content": content}
+            event_dict.update(event_keys)
+            event_dict.update(kwargs)
 
             (
                 new_event,
@@ -1120,7 +1120,9 @@ class RoomCreationHandler:
                 400, f"'{preset_config}' is not a valid preset", errcode=Codes.BAD_JSON
             )
 
-        creation_content.update({"creator": creator_id})
+        # MSC2175 removes the creator field from the create event.
+        if not room_version.msc2175_implicit_room_creator:
+            creation_content["creator"] = creator_id
         creation_event, unpersisted_creation_context = await create_event(
             EventTypes.Create, creation_content, False
         )
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 3825bd6079..dd7dbb6901 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -1998,6 +1998,9 @@ class RoomBackgroundUpdateStore(SQLBaseStore):
             for room_id, event_json in room_id_to_create_event_results:
                 event_dict = db_to_json(event_json)
 
+                # The creator property might not exist in newer room versions, but
+                # for those versions the creator column should be properly populate
+                # during room creation.
                 creator = event_dict.get("content").get(EventContentFields.ROOM_CREATOR)
 
                 self.db_pool.simple_update_txn(
@@ -2132,12 +2135,16 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             # invalid, and it would fail auth checks anyway.
             raise StoreError(400, "No create event in state")
 
-        room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
+        # Before MSC2175, the room creator was a separate field.
+        if not room_version.msc2175_implicit_room_creator:
+            room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
 
-        if not isinstance(room_creator, str):
-            # If the create event does not have a creator then the room is
-            # invalid, and it would fail auth checks anyway.
-            raise StoreError(400, "No creator defined on the create event")
+            if not isinstance(room_creator, str):
+                # If the create event does not have a creator then the room is
+                # invalid, and it would fail auth checks anyway.
+                raise StoreError(400, "No creator defined on the create event")
+        else:
+            room_creator = create_event.sender
 
         await self.db_pool.simple_upsert(
             desc="upsert_room_on_join",
-- 
cgit 1.5.1


From 253e86a72e0c8e4e014b4b08fa587afe1de4db22 Mon Sep 17 00:00:00 2001
From: Will Hunt <will@half-shot.uk>
Date: Wed, 12 Apr 2023 12:28:46 +0100
Subject: Throw if the appservice config list is the wrong type (#15425)

* raise a ConfigError on an invalid app_service_config_files

* changelog

* Move config check to read_config

* Add test

* Ensure list also contains strings
---
 changelog.d/15425.bugfix        |  1 +
 synapse/config/appservice.py    | 14 ++++++++++----
 tests/config/test_appservice.py | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15425.bugfix
 create mode 100644 tests/config/test_appservice.py

(limited to 'synapse')

diff --git a/changelog.d/15425.bugfix b/changelog.d/15425.bugfix
new file mode 100644
index 0000000000..fd104a63b3
--- /dev/null
+++ b/changelog.d/15425.bugfix
@@ -0,0 +1 @@
+Synapse now correctly fails to start if the config option `app_service_config_files` is not a list.
\ No newline at end of file
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index 00182090b2..fd89960e72 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -33,6 +33,16 @@ class AppServiceConfig(Config):
 
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         self.app_service_config_files = config.get("app_service_config_files", [])
+        if not isinstance(self.app_service_config_files, list) or not all(
+            type(x) is str for x in self.app_service_config_files
+        ):
+            # type-ignore: this function gets arbitrary json value; we do use this path.
+            raise ConfigError(
+                "Expected '%s' to be a list of AS config files:"
+                % (self.app_service_config_files),
+                "app_service_config_files",
+            )
+
         self.track_appservice_user_ips = config.get("track_appservice_user_ips", False)
 
 
@@ -40,10 +50,6 @@ def load_appservices(
     hostname: str, config_files: List[str]
 ) -> List[ApplicationService]:
     """Returns a list of Application Services from the config files."""
-    if not isinstance(config_files, list):
-        # type-ignore: this function gets arbitrary json value; we do use this path.
-        logger.warning("Expected %s to be a list of AS config files.", config_files)  # type: ignore[unreachable]
-        return []
 
     # Dicts of value -> filename
     seen_as_tokens: Dict[str, str] = {}
diff --git a/tests/config/test_appservice.py b/tests/config/test_appservice.py
new file mode 100644
index 0000000000..d2d1a40dfc
--- /dev/null
+++ b/tests/config/test_appservice.py
@@ -0,0 +1,40 @@
+# Copyright 2023 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.config.appservice import AppServiceConfig, ConfigError
+
+from tests.unittest import TestCase
+
+
+class AppServiceConfigTest(TestCase):
+    def test_invalid_app_service_config_files(self) -> None:
+        for invalid_value in [
+            "foobar",
+            1,
+            None,
+            True,
+            False,
+            {},
+            ["foo", "bar", False],
+        ]:
+            with self.assertRaises(ConfigError):
+                AppServiceConfig().read_config(
+                    {"app_service_config_files": invalid_value}
+                )
+
+    def test_valid_app_service_config_files(self) -> None:
+        AppServiceConfig().read_config({"app_service_config_files": []})
+        AppServiceConfig().read_config(
+            {"app_service_config_files": ["/not/a/real/path", "/not/a/real/path/2"]}
+        )
-- 
cgit 1.5.1


From be36600327b47b93f8462bdf343c4c12f6c966b9 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Thu, 13 Apr 2023 13:28:55 +0200
Subject: Disable loading `RefreshTokenServlet` on workers (#15428)

---
 changelog.d/15428.bugfix     | 1 +
 synapse/rest/client/login.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15428.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15428.bugfix b/changelog.d/15428.bugfix
new file mode 100644
index 0000000000..1083f00b81
--- /dev/null
+++ b/changelog.d/15428.bugfix
@@ -0,0 +1 @@
+Disable loading `RefreshTokenServlet` (`/_matrix/client/(r0|v3|unstable)/refresh`) on workers.
\ No newline at end of file
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index b7e9c8f6b5..32c2f5ce0c 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -670,7 +670,10 @@ class CasTicketServlet(RestServlet):
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     LoginRestServlet(hs).register(http_server)
-    if hs.config.registration.refreshable_access_token_lifetime is not None:
+    if (
+        hs.config.worker.worker_app is None
+        and hs.config.registration.refreshable_access_token_lifetime is not None
+    ):
         RefreshTokenServlet(hs).register(http_server)
     SsoRedirectServlet(hs).register(http_server)
     if hs.config.cas.cas_enabled:
-- 
cgit 1.5.1


From c9723a1c1fbae1cc172fc9257fd1f1f259d2a23f Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Thu, 13 Apr 2023 15:08:00 +0200
Subject: Only load the SSO redirect servlet if SSO is enabled. (#15421)

---
 changelog.d/15421.misc       | 1 +
 synapse/rest/client/login.py | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15421.misc

(limited to 'synapse')

diff --git a/changelog.d/15421.misc b/changelog.d/15421.misc
new file mode 100644
index 0000000000..5deea3ac5b
--- /dev/null
+++ b/changelog.d/15421.misc
@@ -0,0 +1 @@
+Only load the SSO redirect servlet if SSO is enabled.
\ No newline at end of file
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index 32c2f5ce0c..a348720131 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -675,7 +675,12 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
         and hs.config.registration.refreshable_access_token_lifetime is not None
     ):
         RefreshTokenServlet(hs).register(http_server)
-    SsoRedirectServlet(hs).register(http_server)
+    if (
+        hs.config.cas.cas_enabled
+        or hs.config.saml2.saml2_enabled
+        or hs.config.oidc.oidc_enabled
+    ):
+        SsoRedirectServlet(hs).register(http_server)
     if hs.config.cas.cas_enabled:
         CasTicketServlet(hs).register(http_server)
 
-- 
cgit 1.5.1


From 2503126d5245586b89c76e5f15f27c0a07774a45 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 13 Apr 2023 09:47:07 -0400
Subject: Implement MSC2174: move redacts to a content property. (#15395)

This moves `redacts` from being a top-level property to
a `content` property in a new room version.

MSC2176 (which was previously implemented) states to not
`redact` this property.
---
 changelog.d/15395.misc               |  1 +
 synapse/api/room_versions.py         |  3 ++-
 synapse/event_auth.py                |  2 +-
 synapse/events/__init__.py           |  8 +++++++-
 synapse/events/builder.py            |  4 +++-
 synapse/rest/client/room.py          | 35 +++++++++++++++++++++++---------
 tests/events/test_utils.py           | 12 +++++++++--
 tests/rest/client/test_redactions.py | 39 ++++++++++++++++++++++++++++++++++--
 8 files changed, 87 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/15395.misc

(limited to 'synapse')

diff --git a/changelog.d/15395.misc b/changelog.d/15395.misc
new file mode 100644
index 0000000000..ee93845241
--- /dev/null
+++ b/changelog.d/15395.misc
@@ -0,0 +1 @@
+Implement [MSC2174](https://github.com/matrix-org/matrix-spec-proposals/pull/2174) to move the `redacts` key to a `content` property.
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 3dcae12161..5d9c13e3c3 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -80,7 +80,8 @@ class RoomVersion:
     limit_notifications_power_levels: bool
     # MSC2175: No longer include the creator in m.room.create events.
     msc2175_implicit_room_creator: bool
-    # MSC2174/MSC2176: Apply updated redaction rules algorithm.
+    # MSC2174/MSC2176: Apply updated redaction rules algorithm, move redacts to
+    # content property.
     msc2176_redaction_rules: bool
     # MSC3083: Support the 'restricted' join_rule.
     msc3083_join_rules: bool
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index f95d00d472..25898b95a5 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -793,7 +793,7 @@ def check_redaction(
     """Check whether the event sender is allowed to redact the target event.
 
     Returns:
-        True if the the sender is allowed to redact the target event if the
+        True if the sender is allowed to redact the target event if the
         target event was created by them.
         False if the sender is allowed to redact the target event with no
         further checks.
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index d475fe7ae5..4501518cf0 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -326,7 +326,6 @@ class EventBase(metaclass=abc.ABCMeta):
     hashes: DictProperty[Dict[str, str]] = DictProperty("hashes")
     origin: DictProperty[str] = DictProperty("origin")
     origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts")
-    redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None)
     room_id: DictProperty[str] = DictProperty("room_id")
     sender: DictProperty[str] = DictProperty("sender")
     # TODO state_key should be Optional[str]. This is generally asserted in Synapse
@@ -346,6 +345,13 @@ class EventBase(metaclass=abc.ABCMeta):
     def membership(self) -> str:
         return self.content["membership"]
 
+    @property
+    def redacts(self) -> Optional[str]:
+        """MSC2176 moved the redacts field into the content."""
+        if self.room_version.msc2176_redaction_rules:
+            return self.content.get("redacts")
+        return self.get("redacts")
+
     def is_state(self) -> bool:
         return self.get_state_key() is not None
 
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index c82745275f..a254548c6c 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -173,7 +173,9 @@ class EventBuilder:
         if self.is_state():
             event_dict["state_key"] = self._state_key
 
-        if self._redacts is not None:
+        # MSC2174 moves the redacts property to the content, it is invalid to
+        # provide it as a top-level property.
+        if self._redacts is not None and not self.room_version.msc2176_redaction_rules:
             event_dict["redacts"] = self._redacts
 
         if self._origin_server_ts is not None:
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index c0705d4291..7699cc8d1b 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -1096,6 +1096,7 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
         super().__init__(hs)
         self.event_creation_handler = hs.get_event_creation_handler()
         self.auth = hs.get_auth()
+        self._store = hs.get_datastores().main
         self._relation_handler = hs.get_relations_handler()
         self._msc3912_enabled = hs.config.experimental.msc3912_enabled
 
@@ -1113,6 +1114,19 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
     ) -> Tuple[int, JsonDict]:
         content = parse_json_object_from_request(request)
 
+        # Ensure the redacts property in the content matches the one provided in
+        # the URL.
+        room_version = await self._store.get_room_version(room_id)
+        if room_version.msc2176_redaction_rules:
+            if "redacts" in content and content["redacts"] != event_id:
+                raise SynapseError(
+                    400,
+                    "Cannot provide a redacts value incoherent with the event_id of the URL parameter",
+                    Codes.INVALID_PARAM,
+                )
+            else:
+                content["redacts"] = event_id
+
         try:
             with_relations = None
             if self._msc3912_enabled and "org.matrix.msc3912.with_relations" in content:
@@ -1128,20 +1142,23 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
                     requester, txn_id, room_id
                 )
 
+            # Event is not yet redacted, create a new event to redact it.
             if event is None:
+                event_dict = {
+                    "type": EventTypes.Redaction,
+                    "content": content,
+                    "room_id": room_id,
+                    "sender": requester.user.to_string(),
+                }
+                # Earlier room versions had a top-level redacts property.
+                if not room_version.msc2176_redaction_rules:
+                    event_dict["redacts"] = event_id
+
                 (
                     event,
                     _,
                 ) = await self.event_creation_handler.create_and_send_nonmember_event(
-                    requester,
-                    {
-                        "type": EventTypes.Redaction,
-                        "content": content,
-                        "room_id": room_id,
-                        "sender": requester.user.to_string(),
-                        "redacts": event_id,
-                    },
-                    txn_id=txn_id,
+                    requester, event_dict, txn_id=txn_id
                 )
 
                 if with_relations:
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index c35f58f462..1b179acb20 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -318,7 +318,11 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
         """Redaction events have no special behaviour until MSC2174/MSC2176."""
 
         self.run_test(
-            {"type": "m.room.redaction", "content": {"redacts": "$test2:domain"}},
+            {
+                "type": "m.room.redaction",
+                "content": {"redacts": "$test2:domain"},
+                "redacts": "$test2:domain",
+            },
             {
                 "type": "m.room.redaction",
                 "content": {},
@@ -330,7 +334,11 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
 
         # After MSC2174, redaction events keep the redacts content key.
         self.run_test(
-            {"type": "m.room.redaction", "content": {"redacts": "$test2:domain"}},
+            {
+                "type": "m.room.redaction",
+                "content": {"redacts": "$test2:domain"},
+                "redacts": "$test2:domain",
+            },
             {
                 "type": "m.room.redaction",
                 "content": {"redacts": "$test2:domain"},
diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py
index 5dfe44defb..84a60c0b07 100644
--- a/tests/rest/client/test_redactions.py
+++ b/tests/rest/client/test_redactions.py
@@ -16,6 +16,7 @@ from typing import List, Optional
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import EventTypes, RelationTypes
+from synapse.api.room_versions import RoomVersions
 from synapse.rest import admin
 from synapse.rest.client import login, room, sync
 from synapse.server import HomeServer
@@ -74,6 +75,7 @@ class RedactionsTestCase(HomeserverTestCase):
         event_id: str,
         expect_code: int = 200,
         with_relations: Optional[List[str]] = None,
+        content: Optional[JsonDict] = None,
     ) -> JsonDict:
         """Helper function to send a redaction event.
 
@@ -81,7 +83,7 @@ class RedactionsTestCase(HomeserverTestCase):
         """
         path = "/_matrix/client/r0/rooms/%s/redact/%s" % (room_id, event_id)
 
-        request_content = {}
+        request_content = content or {}
         if with_relations:
             request_content["org.matrix.msc3912.with_relations"] = with_relations
 
@@ -92,7 +94,7 @@ class RedactionsTestCase(HomeserverTestCase):
         return channel.json_body
 
     def _sync_room_timeline(self, access_token: str, room_id: str) -> List[JsonDict]:
-        channel = self.make_request("GET", "sync", access_token=self.mod_access_token)
+        channel = self.make_request("GET", "sync", access_token=access_token)
         self.assertEqual(channel.code, 200)
         room_sync = channel.json_body["rooms"]["join"][room_id]
         return room_sync["timeline"]["events"]
@@ -466,3 +468,36 @@ class RedactionsTestCase(HomeserverTestCase):
         )
         self.assertIn("body", event_dict["content"], event_dict)
         self.assertEqual("I'm in a thread!", event_dict["content"]["body"])
+
+    def test_content_redaction(self) -> None:
+        """MSC2174 moved the redacts property to the content."""
+        # Create a room with the newer room version.
+        room_id = self.helper.create_room_as(
+            self.mod_user_id,
+            tok=self.mod_access_token,
+            room_version=RoomVersions.MSC2176.identifier,
+        )
+
+        # Create an event.
+        b = self.helper.send(room_id=room_id, tok=self.mod_access_token)
+        event_id = b["event_id"]
+
+        # Attempt to redact it with a bogus event ID.
+        self._redact_event(
+            self.mod_access_token,
+            room_id,
+            event_id,
+            expect_code=400,
+            content={"redacts": "foo"},
+        )
+
+        # Redact it for real.
+        self._redact_event(self.mod_access_token, room_id, event_id)
+
+        # Sync the room, to get the id of the create event
+        timeline = self._sync_room_timeline(self.mod_access_token, room_id)
+        redact_event = timeline[-1]
+        self.assertEqual(redact_event["type"], EventTypes.Redaction)
+        # The redacts key should be in the content.
+        self.assertNotIn("redacts", redact_event)
+        self.assertEquals(redact_event["content"]["redacts"], event_id)
-- 
cgit 1.5.1


From 38272be03710f0675d7f73d15a8a9c4398619b68 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 13 Apr 2023 14:06:25 +0000
Subject: Add comma missing from #15382. (#15429)

* Add missing comma

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/15429.misc                             | 1 +
 synapse/storage/databases/main/event_federation.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15429.misc

(limited to 'synapse')

diff --git a/changelog.d/15429.misc b/changelog.d/15429.misc
new file mode 100644
index 0000000000..c5b054d19e
--- /dev/null
+++ b/changelog.d/15429.misc
@@ -0,0 +1 @@
+Improve DB performance of clearing out old data from `stream_ordering_to_exterm`.
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 9e6011e8ea..2ad6fa7d5e 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1708,7 +1708,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 WHERE stream_ordering < ?
             """
             txn.execute(
-                sql, (self.stream_ordering_month_ago)  # type: ignore[attr-defined]
+                sql, (self.stream_ordering_month_ago,)  # type: ignore[attr-defined]
             )
 
         await self.db_pool.runInteraction(
-- 
cgit 1.5.1


From edae20f926d9d1225111f1d40a1073ce3f1d3fb7 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 13 Apr 2023 14:35:03 +0000
Subject: Improve robustness when handling a perspective key response by
 deduplicating received server keys. (#15423)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Change `store_server_verify_keys` to take a `Mapping[(str, str), FKR]`

This is because we already can't handle duplicate keys — leads to cardinality violation

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/15423.bugfix               |  1 +
 synapse/crypto/keyring.py              | 26 ++++++++++++++++++++++----
 synapse/storage/databases/main/keys.py |  6 +++---
 tests/crypto/test_keyring.py           |  4 ++--
 tests/storage/test_keys.py             | 18 +++++++++---------
 tests/unittest.py                      | 16 ++++++----------
 6 files changed, 43 insertions(+), 28 deletions(-)
 create mode 100644 changelog.d/15423.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15423.bugfix b/changelog.d/15423.bugfix
new file mode 100644
index 0000000000..dfb60ddd2f
--- /dev/null
+++ b/changelog.d/15423.bugfix
@@ -0,0 +1 @@
+Improve robustness when handling a perspective key response by deduplicating received server keys.
\ No newline at end of file
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index d710607c63..d2f99dc2ac 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -721,7 +721,7 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher):
         )
 
         keys: Dict[str, Dict[str, FetchKeyResult]] = {}
-        added_keys: List[Tuple[str, str, FetchKeyResult]] = []
+        added_keys: Dict[Tuple[str, str], FetchKeyResult] = {}
 
         time_now_ms = self.clock.time_msec()
 
@@ -752,9 +752,27 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher):
                 # we continue to process the rest of the response
                 continue
 
-            added_keys.extend(
-                (server_name, key_id, key) for key_id, key in processed_response.items()
-            )
+            for key_id, key in processed_response.items():
+                dict_key = (server_name, key_id)
+                if dict_key in added_keys:
+                    already_present_key = added_keys[dict_key]
+                    logger.warning(
+                        "Duplicate server keys for %s (%s) from perspective %s (%r, %r)",
+                        server_name,
+                        key_id,
+                        perspective_name,
+                        already_present_key,
+                        key,
+                    )
+
+                    if already_present_key.valid_until_ts > key.valid_until_ts:
+                        # Favour the entry with the largest valid_until_ts,
+                        # as `old_verify_keys` are also collected from this
+                        # response.
+                        continue
+
+                added_keys[dict_key] = key
+
             keys.setdefault(server_name, {}).update(processed_response)
 
         await self.store.store_server_verify_keys(
diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
index 0a19f607bd..89c37a4eb5 100644
--- a/synapse/storage/databases/main/keys.py
+++ b/synapse/storage/databases/main/keys.py
@@ -15,7 +15,7 @@
 
 import itertools
 import logging
-from typing import Any, Dict, Iterable, List, Optional, Tuple
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
 
 from signedjson.key import decode_verify_key_bytes
 
@@ -95,7 +95,7 @@ class KeyStore(SQLBaseStore):
         self,
         from_server: str,
         ts_added_ms: int,
-        verify_keys: Iterable[Tuple[str, str, FetchKeyResult]],
+        verify_keys: Mapping[Tuple[str, str], FetchKeyResult],
     ) -> None:
         """Stores NACL verification keys for remote servers.
         Args:
@@ -108,7 +108,7 @@ class KeyStore(SQLBaseStore):
         key_values = []
         value_values = []
         invalidations = []
-        for server_name, key_id, fetch_result in verify_keys:
+        for (server_name, key_id), fetch_result in verify_keys.items():
             key_values.append((server_name, key_id))
             value_values.append(
                 (
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 1b9696748f..66102ab934 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -193,7 +193,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         r = self.hs.get_datastores().main.store_server_verify_keys(
             "server9",
             int(time.time() * 1000),
-            [("server9", get_key_id(key1), FetchKeyResult(get_verify_key(key1), 1000))],
+            {("server9", get_key_id(key1)): FetchKeyResult(get_verify_key(key1), 1000)},
         )
         self.get_success(r)
 
@@ -291,7 +291,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
             # None is not a valid value in FetchKeyResult, but we're abusing this
             # API to insert null values into the database. The nulls get converted
             # to 0 when fetched in KeyStore.get_server_verify_keys.
-            [("server9", get_key_id(key1), FetchKeyResult(get_verify_key(key1), None))],  # type: ignore[arg-type]
+            {("server9", get_key_id(key1)): FetchKeyResult(get_verify_key(key1), None)},  # type: ignore[arg-type]
         )
         self.get_success(r)
 
diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py
index ba68171ad7..5901d80f26 100644
--- a/tests/storage/test_keys.py
+++ b/tests/storage/test_keys.py
@@ -46,10 +46,10 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
             store.store_server_verify_keys(
                 "from_server",
                 10,
-                [
-                    ("server1", key_id_1, FetchKeyResult(KEY_1, 100)),
-                    ("server1", key_id_2, FetchKeyResult(KEY_2, 200)),
-                ],
+                {
+                    ("server1", key_id_1): FetchKeyResult(KEY_1, 100),
+                    ("server1", key_id_2): FetchKeyResult(KEY_2, 200),
+                },
             )
         )
 
@@ -90,10 +90,10 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
             store.store_server_verify_keys(
                 "from_server",
                 0,
-                [
-                    ("srv1", key_id_1, FetchKeyResult(KEY_1, 100)),
-                    ("srv1", key_id_2, FetchKeyResult(KEY_2, 200)),
-                ],
+                {
+                    ("srv1", key_id_1): FetchKeyResult(KEY_1, 100),
+                    ("srv1", key_id_2): FetchKeyResult(KEY_2, 200),
+                },
             )
         )
 
@@ -119,7 +119,7 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
             signedjson.key.generate_signing_key("key2")
         )
         d = store.store_server_verify_keys(
-            "from_server", 10, [("srv1", key_id_2, FetchKeyResult(new_key_2, 300))]
+            "from_server", 10, {("srv1", key_id_2): FetchKeyResult(new_key_2, 300)}
         )
         self.get_success(d)
 
diff --git a/tests/unittest.py b/tests/unittest.py
index 8a16fd3665..93fee1c0e6 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -793,16 +793,12 @@ class FederatingHomeserverTestCase(HomeserverTestCase):
             hs.get_datastores().main.store_server_verify_keys(
                 from_server=self.OTHER_SERVER_NAME,
                 ts_added_ms=clock.time_msec(),
-                verify_keys=[
-                    (
-                        self.OTHER_SERVER_NAME,
-                        verify_key_id,
-                        FetchKeyResult(
-                            verify_key=verify_key,
-                            valid_until_ts=clock.time_msec() + 10000,
-                        ),
-                    )
-                ],
+                verify_keys={
+                    (self.OTHER_SERVER_NAME, verify_key_id): FetchKeyResult(
+                        verify_key=verify_key,
+                        valid_until_ts=clock.time_msec() + 10000,
+                    ),
+                },
             )
         )
 
-- 
cgit 1.5.1


From d751f65e71bef80ec4181e4495c954551ddf33f7 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 13 Apr 2023 11:36:29 -0400
Subject: Remove registration fallback code. (#15405)

The registration fallback is broken and unspecced. This removes it
since there is no plan to spec it.

Note that this does not modify the login fallback code.
---
 changelog.d/15405.removal                          |   1 +
 synapse/res/templates/recaptcha.html               |   6 +-
 synapse/res/templates/registration_token.html      |   6 +-
 synapse/res/templates/style.css                    |   4 +
 synapse/res/templates/terms.html                   |   7 +-
 synapse/static/client/register/index.html          |  34 ------
 .../static/client/register/js/jquery-3.4.1.min.js  |   2 -
 synapse/static/client/register/js/register.js      | 117 ---------------------
 .../client/register/register_config.sample.js      |   3 -
 synapse/static/client/register/style.css           |  64 -----------
 10 files changed, 17 insertions(+), 227 deletions(-)
 create mode 100644 changelog.d/15405.removal
 delete mode 100644 synapse/static/client/register/index.html
 delete mode 100644 synapse/static/client/register/js/jquery-3.4.1.min.js
 delete mode 100644 synapse/static/client/register/js/register.js
 delete mode 100644 synapse/static/client/register/register_config.sample.js
 delete mode 100644 synapse/static/client/register/style.css

(limited to 'synapse')

diff --git a/changelog.d/15405.removal b/changelog.d/15405.removal
new file mode 100644
index 0000000000..8334004153
--- /dev/null
+++ b/changelog.d/15405.removal
@@ -0,0 +1 @@
+Remove the broken, unspecced registration fallback. Note that the *login* fallback is unaffected by this change.
diff --git a/synapse/res/templates/recaptcha.html b/synapse/res/templates/recaptcha.html
index f00992a24b..b80e5e8f24 100644
--- a/synapse/res/templates/recaptcha.html
+++ b/synapse/res/templates/recaptcha.html
@@ -3,7 +3,11 @@
 
 {% block header %}
 <script src="https://www.recaptcha.net/recaptcha/api.js" async defer></script>
-<link rel="stylesheet" href="/_matrix/static/client/register/style.css">
+<style type="text/css">
+  .g-recaptcha div {
+    margin: auto;
+  }
+</style>
 <script>
 function captchaDone() {
     document.getElementById('registrationForm').submit(); 
diff --git a/synapse/res/templates/registration_token.html b/synapse/res/templates/registration_token.html
index ee4e5295e7..179e994279 100644
--- a/synapse/res/templates/registration_token.html
+++ b/synapse/res/templates/registration_token.html
@@ -1,12 +1,8 @@
 {% extends "_base.html" %}
 {% block title %}Authentication{% endblock %}
 
-{% block header %}
-<link rel="stylesheet" href="/_matrix/static/client/register/style.css">
-{% endblock %}
-
 {% block body %}
-<form id="registrationForm" method="post" action="{{ myurl }}">
+<form method="post" action="{{ myurl }}">
     <div>
         {% if error is defined %}
             <p class="error"><strong>Error: {{ error }}</strong></p>
diff --git a/synapse/res/templates/style.css b/synapse/res/templates/style.css
index 097b235ae5..9899238bb6 100644
--- a/synapse/res/templates/style.css
+++ b/synapse/res/templates/style.css
@@ -27,3 +27,7 @@ body {
     h3 { font-size: .85rem; }
     h4 { font-size: .8rem; }
 }
+
+.error {
+    color: red;
+}
diff --git a/synapse/res/templates/terms.html b/synapse/res/templates/terms.html
index ffabebdd8b..66c40a7000 100644
--- a/synapse/res/templates/terms.html
+++ b/synapse/res/templates/terms.html
@@ -2,7 +2,12 @@
 {% block title %}Authentication{% endblock %}
 
 {% block header %}
-<link rel="stylesheet" href="/_matrix/static/client/register/style.css">
+<style type="text/css">
+  #registrationForm input {
+    display: block;
+    margin: auto;
+  }
+</style>
 {% endblock %}
 
 {% block body %}
diff --git a/synapse/static/client/register/index.html b/synapse/static/client/register/index.html
deleted file mode 100644
index 27bbd76f51..0000000000
--- a/synapse/static/client/register/index.html
+++ /dev/null
@@ -1,34 +0,0 @@
-<!doctype html>
-<html>
-<head>
-<title> Registration </title>
-<meta http-equiv="X-UA-Compatible" content="IE=edge">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<link rel="stylesheet" href="style.css">
-<script src="js/jquery-3.4.1.min.js"></script>
-<script src="https://www.recaptcha.net/recaptcha/api/js/recaptcha_ajax.js"></script>
-<script src="register_config.js"></script>
-<script src="js/register.js"></script>
-</head>
-<body onload="matrixRegistration.onLoad()">
-<form id="registrationForm" onsubmit="matrixRegistration.signUp(); return false;">
-    <div>
-        Create account:<br/>
-        
-        <div style="text-align: center">
-            <input id="desired_user_id" size="32" type="text" placeholder="Matrix ID (e.g. bob)" autocapitalize="off" autocorrect="off" />
-            <br/>
-            <input id="pwd1" size="32" type="password" placeholder="Type a password"/>
-            <br/>
-            <input id="pwd2" size="32" type="password" placeholder="Confirm your password"/>
-            <br/>
-            <span id="feedback" style="color: #f00"></span>
-            <br/>
-            <div id="regcaptcha"></div>
-
-            <button type="submit" style="margin: 10px">Sign up</button>
-        </div>
-    </div>
-</form>
-</body>
-</html>
diff --git a/synapse/static/client/register/js/jquery-3.4.1.min.js b/synapse/static/client/register/js/jquery-3.4.1.min.js
deleted file mode 100644
index a1c07fd803..0000000000
--- a/synapse/static/client/register/js/jquery-3.4.1.min.js
+++ /dev/null
@@ -1,2 +0,0 @@
-/*! jQuery v3.4.1 | (c) JS Foundation and other contributors | jquery.org/license */
-!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],E=C.document,r=Object.getPrototypeOf,s=t.slice,g=t.concat,u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.4.1",k=function(e,t){return new k.fn.init(e,t)},p=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;function d(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0<t&&t-1 in e)}k.fn=k.prototype={jquery:f,constructor:k,length:0,toArray:function(){return s.call(this)},get:function(e){return null==e?s.call(this):e<0?this[e+this.length]:this[e]},pushStack:function(e){var t=k.merge(this.constructor(),e);return t.prevObject=this,t},each:function(e){return k.each(this,e)},map:function(n){return this.pushStack(k.map(this,function(e,t){return n.call(e,t,e)}))},slice:function(){return this.pushStack(s.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(e){var t=this.length,n=+e+(e<0?t:0);return this.pushStack(0<=n&&n<t?[this[n]]:[])},end:function(){return this.prevObject||this.constructor()},push:u,sort:t.sort,splice:t.splice},k.extend=k.fn.extend=function(){var e,t,n,r,i,o,a=arguments[0]||{},s=1,u=arguments.length,l=!1;for("boolean"==typeof a&&(l=a,a=arguments[s]||{},s++),"object"==typeof a||m(a)||(a={}),s===u&&(a=this,s--);s<u;s++)if(null!=(e=arguments[s]))for(t in e)r=e[t],"__proto__"!==t&&a!==r&&(l&&r&&(k.isPlainObject(r)||(i=Array.isArray(r)))?(n=a[t],o=i&&!Array.isArray(n)?[]:i||k.isPlainObject(n)?n:{},i=!1,a[t]=k.extend(l,o,r)):void 0!==r&&(a[t]=r));return a},k.extend({expando:"jQuery"+(f+Math.random()).replace(/\D/g,""),isReady:!0,error:function(e){throw new Error(e)},noop:function(){},isPlainObject:function(e){var t,n;return!(!e||"[object Object]"!==o.call(e))&&(!(t=r(e))||"function"==typeof(n=v.call(t,"constructor")&&t.constructor)&&a.call(n)===l)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},globalEval:function(e,t){b(e,{nonce:t&&t.nonce})},each:function(e,t){var n,r=0;if(d(e)){for(n=e.length;r<n;r++)if(!1===t.call(e[r],r,e[r]))break}else for(r in e)if(!1===t.call(e[r],r,e[r]))break;return e},trim:function(e){return null==e?"":(e+"").replace(p,"")},makeArray:function(e,t){var n=t||[];return null!=e&&(d(Object(e))?k.merge(n,"string"==typeof e?[e]:e):u.call(n,e)),n},inArray:function(e,t,n){return null==t?-1:i.call(t,e,n)},merge:function(e,t){for(var n=+t.length,r=0,i=e.length;r<n;r++)e[i++]=t[r];return e.length=i,e},grep:function(e,t,n){for(var r=[],i=0,o=e.length,a=!n;i<o;i++)!t(e[i],i)!==a&&r.push(e[i]);return r},map:function(e,t,n){var r,i,o=0,a=[];if(d(e))for(r=e.length;o<r;o++)null!=(i=t(e[o],o,n))&&a.push(i);else for(o in e)null!=(i=t(e[o],o,n))&&a.push(i);return g.apply([],a)},guid:1,support:y}),"function"==typeof Symbol&&(k.fn[Symbol.iterator]=t[Symbol.iterator]),k.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(e,t){n["[object "+t+"]"]=t.toLowerCase()});var h=function(n){var e,d,b,o,i,h,f,g,w,u,l,T,C,a,E,v,s,c,y,k="sizzle"+1*new Date,m=n.document,S=0,r=0,p=ue(),x=ue(),N=ue(),A=ue(),D=function(e,t){return e===t&&(l=!0),0},j={}.hasOwnProperty,t=[],q=t.pop,L=t.push,H=t.push,O=t.slice,P=function(e,t){for(var n=0,r=e.length;n<r;n++)if(e[n]===t)return n;return-1},R="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",I="(?:\\\\.|[\\w-]|[^\0-\\xa0])+",W="\\["+M+"*("+I+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+I+"))|)"+M+"*\\]",$=":("+I+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+W+")*)|.*)\\)|)",F=new RegExp(M+"+","g"),B=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),_=new RegExp("^"+M+"*,"+M+"*"),z=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp($),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+$),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),ne=function(e,t,n){var r="0x"+t-65536;return r!=r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(m.childNodes),m.childNodes),t[m.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&((e?e.ownerDocument||e:m)!==C&&T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!A[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&U.test(t)){(s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=k),o=(l=h(t)).length;while(o--)l[o]="#"+s+" "+xe(l[o]);c=l.join(","),f=ee.test(t)&&ye(e.parentNode)||e}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){A(t,!0)}finally{s===k&&e.removeAttribute("id")}}}return g(t.replace(B,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[k]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:m;return r!==C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),m!==C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=k,!C.getElementsByName||!C.getElementsByName(k).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){a.appendChild(e).innerHTML="<a id='"+k+"'></a><select id='"+k+"-\r\\' msallowcapture=''><option selected=''></option></select>",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+k+"-]").length||v.push("~="),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+k+"+*").length||v.push(".#.+[+~]")}),ce(function(e){e.innerHTML="<a href='' disabled='disabled'></a><select disabled='disabled'><option/></select>";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",$)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e===C||e.ownerDocument===m&&y(m,e)?-1:t===C||t.ownerDocument===m&&y(m,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e===C?-1:t===C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]===m?-1:s[r]===m?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if((e.ownerDocument||e)!==C&&T(e),d.matchesSelector&&E&&!A[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){A(t,!0)}return 0<se(t,C,null,[e]).length},se.contains=function(e,t){return(e.ownerDocument||e)!==C&&T(e),y(e,t)},se.attr=function(e,t){(e.ownerDocument||e)!==C&&T(e);var n=b.attrHandle[t.toLowerCase()],r=n&&j.call(b.attrHandle,t.toLowerCase())?n(e,t,!E):void 0;return void 0!==r?r:d.attributes||!E?e.getAttribute(t):(r=e.getAttributeNode(t))&&r.specified?r.value:null},se.escape=function(e){return(e+"").replace(re,ie)},se.error=function(e){throw new Error("Syntax error, unrecognized expression: "+e)},se.uniqueSort=function(e){var t,n=[],r=0,i=0;if(l=!d.detectDuplicates,u=!d.sortStable&&e.slice(0),e.sort(D),l){while(t=e[i++])t===e[i]&&(r=n.push(i));while(r--)e.splice(n[r],1)}return u=null,e},o=se.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=o(e)}else if(3===i||4===i)return e.nodeValue}else while(t=e[r++])n+=o(t);return n},(b=se.selectors={cacheLength:50,createPseudo:le,match:G,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=p[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&p(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1<t.indexOf(i):"$="===r?i&&t.slice(-i.length)===i:"~="===r?-1<(" "+t.replace(F," ")+" ").indexOf(i):"|="===r&&(t===i||t.slice(0,i.length+1)===i+"-"))}},CHILD:function(h,e,t,g,v){var y="nth"!==h.slice(0,3),m="last"!==h.slice(-4),x="of-type"===e;return 1===g&&0===v?function(e){return!!e.parentNode}:function(e,t,n){var r,i,o,a,s,u,l=y!==m?"nextSibling":"previousSibling",c=e.parentNode,f=x&&e.nodeName.toLowerCase(),p=!n&&!x,d=!1;if(c){if(y){while(l){a=e;while(a=a[l])if(x?a.nodeName.toLowerCase()===f:1===a.nodeType)return!1;u=l="only"===h&&!u&&"nextSibling"}return!0}if(u=[m?c.firstChild:c.lastChild],m&&p){d=(s=(r=(i=(o=(a=c)[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===S&&r[1])&&r[2],a=s&&c.childNodes[s];while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if(1===a.nodeType&&++d&&a===e){i[h]=[S,s,d];break}}else if(p&&(d=s=(r=(i=(o=(a=e)[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===S&&r[1]),!1===d)while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if((x?a.nodeName.toLowerCase()===f:1===a.nodeType)&&++d&&(p&&((i=(o=a[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]=[S,d]),a===e))break;return(d-=v)===g||d%g==0&&0<=d/g}}},PSEUDO:function(e,o){var t,a=b.pseudos[e]||b.setFilters[e.toLowerCase()]||se.error("unsupported pseudo: "+e);return a[k]?a(o):1<a.length?(t=[e,e,"",o],b.setFilters.hasOwnProperty(e.toLowerCase())?le(function(e,t){var n,r=a(e,o),i=r.length;while(i--)e[n=P(e,r[i])]=!(t[n]=r[i])}):function(e){return a(e,0,t)}):a}},pseudos:{not:le(function(e){var r=[],i=[],s=f(e.replace(B,"$1"));return s[k]?le(function(e,t,n,r){var i,o=s(e,null,r,[]),a=e.length;while(a--)(i=o[a])&&(e[a]=!(t[a]=i))}):function(e,t,n){return r[0]=e,s(r,null,n,i),r[0]=null,!i.pop()}}),has:le(function(t){return function(e){return 0<se(t,e).length}}),contains:le(function(t){return t=t.replace(te,ne),function(e){return-1<(e.textContent||o(e)).indexOf(t)}}),lang:le(function(n){return V.test(n||"")||se.error("unsupported lang: "+n),n=n.replace(te,ne).toLowerCase(),function(e){var t;do{if(t=E?e.lang:e.getAttribute("xml:lang")||e.getAttribute("lang"))return(t=t.toLowerCase())===n||0===t.indexOf(n+"-")}while((e=e.parentNode)&&1===e.nodeType);return!1}}),target:function(e){var t=n.location&&n.location.hash;return t&&t.slice(1)===e.id},root:function(e){return e===a},focus:function(e){return e===C.activeElement&&(!C.hasFocus||C.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:ge(!1),disabled:ge(!0),checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,!0===e.selected},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeType<6)return!1;return!0},parent:function(e){return!b.pseudos.empty(e)},header:function(e){return J.test(e.nodeName)},input:function(e){return Q.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||"text"===t.toLowerCase())},first:ve(function(){return[0]}),last:ve(function(e,t){return[t-1]}),eq:ve(function(e,t,n){return[n<0?n+t:n]}),even:ve(function(e,t){for(var n=0;n<t;n+=2)e.push(n);return e}),odd:ve(function(e,t){for(var n=1;n<t;n+=2)e.push(n);return e}),lt:ve(function(e,t,n){for(var r=n<0?n+t:t<n?t:n;0<=--r;)e.push(r);return e}),gt:ve(function(e,t,n){for(var r=n<0?n+t:n;++r<t;)e.push(r);return e})}}).pseudos.nth=b.pseudos.eq,{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})b.pseudos[e]=de(e);for(e in{submit:!0,reset:!0})b.pseudos[e]=he(e);function me(){}function xe(e){for(var t=0,n=e.length,r="";t<n;t++)r+=e[t].value;return r}function be(s,e,t){var u=e.dir,l=e.next,c=l||u,f=t&&"parentNode"===c,p=r++;return e.first?function(e,t,n){while(e=e[u])if(1===e.nodeType||f)return s(e,t,n);return!1}:function(e,t,n){var r,i,o,a=[S,p];if(n){while(e=e[u])if((1===e.nodeType||f)&&s(e,t,n))return!0}else while(e=e[u])if(1===e.nodeType||f)if(i=(o=e[k]||(e[k]={}))[e.uniqueID]||(o[e.uniqueID]={}),l&&l===e.nodeName.toLowerCase())e=e[u]||e;else{if((r=i[c])&&r[0]===S&&r[1]===p)return a[2]=r[2];if((i[c]=a)[2]=s(e,t,n))return!0}return!1}}function we(i){return 1<i.length?function(e,t,n){var r=i.length;while(r--)if(!i[r](e,t,n))return!1;return!0}:i[0]}function Te(e,t,n,r,i){for(var o,a=[],s=0,u=e.length,l=null!=t;s<u;s++)(o=e[s])&&(n&&!n(o,r,i)||(a.push(o),l&&t.push(s)));return a}function Ce(d,h,g,v,y,e){return v&&!v[k]&&(v=Ce(v)),y&&!y[k]&&(y=Ce(y,e)),le(function(e,t,n,r){var i,o,a,s=[],u=[],l=t.length,c=e||function(e,t,n){for(var r=0,i=t.length;r<i;r++)se(e,t[r],n);return n}(h||"*",n.nodeType?[n]:n,[]),f=!d||!e&&h?c:Te(c,s,d,n,r),p=g?y||(e?d:l||v)?[]:t:f;if(g&&g(f,p,n,r),v){i=Te(p,u),v(i,[],n,r),o=i.length;while(o--)(a=i[o])&&(p[u[o]]=!(f[u[o]]=a))}if(e){if(y||d){if(y){i=[],o=p.length;while(o--)(a=p[o])&&i.push(f[o]=a);y(null,p=[],i,r)}o=p.length;while(o--)(a=p[o])&&-1<(i=y?P(e,a):s[o])&&(e[i]=!(t[i]=a))}}else p=Te(p===t?p.splice(l,p.length):p),y?y(null,t,p,r):H.apply(t,p)})}function Ee(e){for(var i,t,n,r=e.length,o=b.relative[e[0].type],a=o||b.relative[" "],s=o?1:0,u=be(function(e){return e===i},a,!0),l=be(function(e){return-1<P(i,e)},a,!0),c=[function(e,t,n){var r=!o&&(n||t!==w)||((i=t).nodeType?u(e,t,n):l(e,t,n));return i=null,r}];s<r;s++)if(t=b.relative[e[s].type])c=[be(we(c),t)];else{if((t=b.filter[e[s].type].apply(null,e[s].matches))[k]){for(n=++s;n<r;n++)if(b.relative[e[n].type])break;return Ce(1<s&&we(c),1<s&&xe(e.slice(0,s-1).concat({value:" "===e[s-2].type?"*":""})).replace(B,"$1"),t,s<n&&Ee(e.slice(s,n)),n<r&&Ee(e=e.slice(n)),n<r&&xe(e))}c.push(t)}return we(c)}return me.prototype=b.filters=b.pseudos,b.setFilters=new me,h=se.tokenize=function(e,t){var n,r,i,o,a,s,u,l=x[e+" "];if(l)return t?0:l.slice(0);a=e,s=[],u=b.preFilter;while(a){for(o in n&&!(r=_.exec(a))||(r&&(a=a.slice(r[0].length)||a),s.push(i=[])),n=!1,(r=z.exec(a))&&(n=r.shift(),i.push({value:n,type:r[0].replace(B," ")}),a=a.slice(n.length)),b.filter)!(r=G[o].exec(a))||u[o]&&!(r=u[o](r))||(n=r.shift(),i.push({value:n,type:o,matches:r}),a=a.slice(n.length));if(!n)break}return t?a.length:a?se.error(e):x(e,s).slice(0)},f=se.compile=function(e,t){var n,v,y,m,x,r,i=[],o=[],a=N[e+" "];if(!a){t||(t=h(e)),n=t.length;while(n--)(a=Ee(t[n]))[k]?i.push(a):o.push(a);(a=N(e,(v=o,m=0<(y=i).length,x=0<v.length,r=function(e,t,n,r,i){var o,a,s,u=0,l="0",c=e&&[],f=[],p=w,d=e||x&&b.find.TAG("*",i),h=S+=null==p?1:Math.random()||.1,g=d.length;for(i&&(w=t===C||t||i);l!==g&&null!=(o=d[l]);l++){if(x&&o){a=0,t||o.ownerDocument===C||(T(o),n=!E);while(s=v[a++])if(s(o,t||C,n)){r.push(o);break}i&&(S=h)}m&&((o=!s&&o)&&u--,e&&c.push(o))}if(u+=l,m&&l!==u){a=0;while(s=y[a++])s(c,f,t,n);if(e){if(0<u)while(l--)c[l]||f[l]||(f[l]=q.call(r));f=Te(f)}H.apply(r,f),i&&!e&&0<f.length&&1<u+y.length&&se.uniqueSort(r)}return i&&(S=h,w=p),c},m?le(r):r))).selector=e}return a},g=se.select=function(e,t,n,r){var i,o,a,s,u,l="function"==typeof e&&e,c=!r&&h(e=l.selector||e);if(n=n||[],1===c.length){if(2<(o=c[0]=c[0].slice(0)).length&&"ID"===(a=o[0]).type&&9===t.nodeType&&E&&b.relative[o[1].type]){if(!(t=(b.find.ID(a.matches[0].replace(te,ne),t)||[])[0]))return n;l&&(t=t.parentNode),e=e.slice(o.shift().value.length)}i=G.needsContext.test(e)?0:o.length;while(i--){if(a=o[i],b.relative[s=a.type])break;if((u=b.find[s])&&(r=u(a.matches[0].replace(te,ne),ee.test(o[0].type)&&ye(t.parentNode)||t))){if(o.splice(i,1),!(e=r.length&&xe(o)))return H.apply(n,r),n;break}}}return(l||f(e,c))(r,t,!E,n,!t||ee.test(e)&&ye(t.parentNode)||t),n},d.sortStable=k.split("").sort(D).join("")===k,d.detectDuplicates=!!l,T(),d.sortDetached=ce(function(e){return 1&e.compareDocumentPosition(C.createElement("fieldset"))}),ce(function(e){return e.innerHTML="<a href='#'></a>","#"===e.firstChild.getAttribute("href")})||fe("type|href|height|width",function(e,t,n){if(!n)return e.getAttribute(t,"type"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML="<input/>",e.firstChild.setAttribute("value",""),""===e.firstChild.getAttribute("value")})||fe("value",function(e,t,n){if(!n&&"input"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute("disabled")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);k.find=h,k.expr=h.selectors,k.expr[":"]=k.expr.pseudos,k.uniqueSort=k.unique=h.uniqueSort,k.text=h.getText,k.isXMLDoc=h.isXML,k.contains=h.contains,k.escapeSelector=h.escape;var T=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&k(e).is(n))break;r.push(e)}return r},S=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},N=k.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var D=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?k.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?k.grep(e,function(e){return e===n!==r}):"string"!=typeof n?k.grep(e,function(e){return-1<i.call(n,e)!==r}):k.filter(n,e,r)}k.filter=function(e,t,n){var r=t[0];return n&&(e=":not("+e+")"),1===t.length&&1===r.nodeType?k.find.matchesSelector(r,e)?[r]:[]:k.find.matches(e,k.grep(t,function(e){return 1===e.nodeType}))},k.fn.extend({find:function(e){var t,n,r=this.length,i=this;if("string"!=typeof e)return this.pushStack(k(e).filter(function(){for(t=0;t<r;t++)if(k.contains(i[t],this))return!0}));for(n=this.pushStack([]),t=0;t<r;t++)k.find(e,i[t],n);return 1<r?k.uniqueSort(n):n},filter:function(e){return this.pushStack(j(this,e||[],!1))},not:function(e){return this.pushStack(j(this,e||[],!0))},is:function(e){return!!j(this,"string"==typeof e&&N.test(e)?k(e):e||[],!1).length}});var q,L=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/;(k.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||q,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:L.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof k?t[0]:t,k.merge(this,k.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),D.test(r[1])&&k.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(k):k.makeArray(e,this)}).prototype=k.fn,q=k(E);var H=/^(?:parents|prev(?:Until|All))/,O={children:!0,contents:!0,next:!0,prev:!0};function P(e,t){while((e=e[t])&&1!==e.nodeType);return e}k.fn.extend({has:function(e){var t=k(e,this),n=t.length;return this.filter(function(){for(var e=0;e<n;e++)if(k.contains(this,t[e]))return!0})},closest:function(e,t){var n,r=0,i=this.length,o=[],a="string"!=typeof e&&k(e);if(!N.test(e))for(;r<i;r++)for(n=this[r];n&&n!==t;n=n.parentNode)if(n.nodeType<11&&(a?-1<a.index(n):1===n.nodeType&&k.find.matchesSelector(n,e))){o.push(n);break}return this.pushStack(1<o.length?k.uniqueSort(o):o)},index:function(e){return e?"string"==typeof e?i.call(k(e),this[0]):i.call(this,e.jquery?e[0]:e):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){return this.pushStack(k.uniqueSort(k.merge(this.get(),k(e,t))))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),k.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return T(e,"parentNode")},parentsUntil:function(e,t,n){return T(e,"parentNode",n)},next:function(e){return P(e,"nextSibling")},prev:function(e){return P(e,"previousSibling")},nextAll:function(e){return T(e,"nextSibling")},prevAll:function(e){return T(e,"previousSibling")},nextUntil:function(e,t,n){return T(e,"nextSibling",n)},prevUntil:function(e,t,n){return T(e,"previousSibling",n)},siblings:function(e){return S((e.parentNode||{}).firstChild,e)},children:function(e){return S(e.firstChild)},contents:function(e){return"undefined"!=typeof e.contentDocument?e.contentDocument:(A(e,"template")&&(e=e.content||e),k.merge([],e.childNodes))}},function(r,i){k.fn[r]=function(e,t){var n=k.map(this,i,e);return"Until"!==r.slice(-5)&&(t=e),t&&"string"==typeof t&&(n=k.filter(t,n)),1<this.length&&(O[r]||k.uniqueSort(n),H.test(r)&&n.reverse()),this.pushStack(n)}});var R=/[^\x20\t\r\n\f]+/g;function M(e){return e}function I(e){throw e}function W(e,t,n,r){var i;try{e&&m(i=e.promise)?i.call(e).done(t).fail(n):e&&m(i=e.then)?i.call(e,t,n):t.apply(void 0,[e].slice(r))}catch(e){n.apply(void 0,[e])}}k.Callbacks=function(r){var e,n;r="string"==typeof r?(e=r,n={},k.each(e.match(R)||[],function(e,t){n[t]=!0}),n):k.extend({},r);var i,t,o,a,s=[],u=[],l=-1,c=function(){for(a=a||r.once,o=i=!0;u.length;l=-1){t=u.shift();while(++l<s.length)!1===s[l].apply(t[0],t[1])&&r.stopOnFalse&&(l=s.length,t=!1)}r.memory||(t=!1),i=!1,a&&(s=t?[]:"")},f={add:function(){return s&&(t&&!i&&(l=s.length-1,u.push(t)),function n(e){k.each(e,function(e,t){m(t)?r.unique&&f.has(t)||s.push(t):t&&t.length&&"string"!==w(t)&&n(t)})}(arguments),t&&!i&&c()),this},remove:function(){return k.each(arguments,function(e,t){var n;while(-1<(n=k.inArray(t,s,n)))s.splice(n,1),n<=l&&l--}),this},has:function(e){return e?-1<k.inArray(e,s):0<s.length},empty:function(){return s&&(s=[]),this},disable:function(){return a=u=[],s=t="",this},disabled:function(){return!s},lock:function(){return a=u=[],t||i||(s=t=""),this},locked:function(){return!!a},fireWith:function(e,t){return a||(t=[e,(t=t||[]).slice?t.slice():t],u.push(t),i||c()),this},fire:function(){return f.fireWith(this,arguments),this},fired:function(){return!!o}};return f},k.extend({Deferred:function(e){var o=[["notify","progress",k.Callbacks("memory"),k.Callbacks("memory"),2],["resolve","done",k.Callbacks("once memory"),k.Callbacks("once memory"),0,"resolved"],["reject","fail",k.Callbacks("once memory"),k.Callbacks("once memory"),1,"rejected"]],i="pending",a={state:function(){return i},always:function(){return s.done(arguments).fail(arguments),this},"catch":function(e){return a.then(null,e)},pipe:function(){var i=arguments;return k.Deferred(function(r){k.each(o,function(e,t){var n=m(i[t[4]])&&i[t[4]];s[t[1]](function(){var e=n&&n.apply(this,arguments);e&&m(e.promise)?e.promise().progress(r.notify).done(r.resolve).fail(r.reject):r[t[0]+"With"](this,n?[e]:arguments)})}),i=null}).promise()},then:function(t,n,r){var u=0;function l(i,o,a,s){return function(){var n=this,r=arguments,e=function(){var e,t;if(!(i<u)){if((e=a.apply(n,r))===o.promise())throw new TypeError("Thenable self-resolution");t=e&&("object"==typeof e||"function"==typeof e)&&e.then,m(t)?s?t.call(e,l(u,o,M,s),l(u,o,I,s)):(u++,t.call(e,l(u,o,M,s),l(u,o,I,s),l(u,o,M,o.notifyWith))):(a!==M&&(n=void 0,r=[e]),(s||o.resolveWith)(n,r))}},t=s?e:function(){try{e()}catch(e){k.Deferred.exceptionHook&&k.Deferred.exceptionHook(e,t.stackTrace),u<=i+1&&(a!==I&&(n=void 0,r=[e]),o.rejectWith(n,r))}};i?t():(k.Deferred.getStackHook&&(t.stackTrace=k.Deferred.getStackHook()),C.setTimeout(t))}}return k.Deferred(function(e){o[0][3].add(l(0,e,m(r)?r:M,e.notifyWith)),o[1][3].add(l(0,e,m(t)?t:M)),o[2][3].add(l(0,e,m(n)?n:I))}).promise()},promise:function(e){return null!=e?k.extend(e,a):a}},s={};return k.each(o,function(e,t){var n=t[2],r=t[5];a[t[1]]=n.add,r&&n.add(function(){i=r},o[3-e][2].disable,o[3-e][3].disable,o[0][2].lock,o[0][3].lock),n.add(t[3].fire),s[t[0]]=function(){return s[t[0]+"With"](this===s?void 0:this,arguments),this},s[t[0]+"With"]=n.fireWith}),a.promise(s),e&&e.call(s,s),s},when:function(e){var n=arguments.length,t=n,r=Array(t),i=s.call(arguments),o=k.Deferred(),a=function(t){return function(e){r[t]=this,i[t]=1<arguments.length?s.call(arguments):e,--n||o.resolveWith(r,i)}};if(n<=1&&(W(e,o.done(a(t)).resolve,o.reject,!n),"pending"===o.state()||m(i[t]&&i[t].then)))return o.then();while(t--)W(i[t],a(t),o.reject);return o.promise()}});var $=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;k.Deferred.exceptionHook=function(e,t){C.console&&C.console.warn&&e&&$.test(e.name)&&C.console.warn("jQuery.Deferred exception: "+e.message,e.stack,t)},k.readyException=function(e){C.setTimeout(function(){throw e})};var F=k.Deferred();function B(){E.removeEventListener("DOMContentLoaded",B),C.removeEventListener("load",B),k.ready()}k.fn.ready=function(e){return F.then(e)["catch"](function(e){k.readyException(e)}),this},k.extend({isReady:!1,readyWait:1,ready:function(e){(!0===e?--k.readyWait:k.isReady)||(k.isReady=!0)!==e&&0<--k.readyWait||F.resolveWith(E,[k])}}),k.ready.then=F.then,"complete"===E.readyState||"loading"!==E.readyState&&!E.documentElement.doScroll?C.setTimeout(k.ready):(E.addEventListener("DOMContentLoaded",B),C.addEventListener("load",B));var _=function(e,t,n,r,i,o,a){var s=0,u=e.length,l=null==n;if("object"===w(n))for(s in i=!0,n)_(e,t,s,n[s],!0,o,a);else if(void 0!==r&&(i=!0,m(r)||(a=!0),l&&(a?(t.call(e,r),t=null):(l=t,t=function(e,t,n){return l.call(k(e),n)})),t))for(;s<u;s++)t(e[s],n,a?r:r.call(e[s],s,t(e[s],n)));return i?e:l?t.call(e):u?t(e[0],n):o},z=/^-ms-/,U=/-([a-z])/g;function X(e,t){return t.toUpperCase()}function V(e){return e.replace(z,"ms-").replace(U,X)}var G=function(e){return 1===e.nodeType||9===e.nodeType||!+e.nodeType};function Y(){this.expando=k.expando+Y.uid++}Y.uid=1,Y.prototype={cache:function(e){var t=e[this.expando];return t||(t={},G(e)&&(e.nodeType?e[this.expando]=t:Object.defineProperty(e,this.expando,{value:t,configurable:!0}))),t},set:function(e,t,n){var r,i=this.cache(e);if("string"==typeof t)i[V(t)]=n;else for(r in t)i[V(r)]=t[r];return i},get:function(e,t){return void 0===t?this.cache(e):e[this.expando]&&e[this.expando][V(t)]},access:function(e,t,n){return void 0===t||t&&"string"==typeof t&&void 0===n?this.get(e,t):(this.set(e,t,n),void 0!==n?n:t)},remove:function(e,t){var n,r=e[this.expando];if(void 0!==r){if(void 0!==t){n=(t=Array.isArray(t)?t.map(V):(t=V(t))in r?[t]:t.match(R)||[]).length;while(n--)delete r[t[n]]}(void 0===t||k.isEmptyObject(r))&&(e.nodeType?e[this.expando]=void 0:delete e[this.expando])}},hasData:function(e){var t=e[this.expando];return void 0!==t&&!k.isEmptyObject(t)}};var Q=new Y,J=new Y,K=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,Z=/[A-Z]/g;function ee(e,t,n){var r,i;if(void 0===n&&1===e.nodeType)if(r="data-"+t.replace(Z,"-$&").toLowerCase(),"string"==typeof(n=e.getAttribute(r))){try{n="true"===(i=n)||"false"!==i&&("null"===i?null:i===+i+""?+i:K.test(i)?JSON.parse(i):i)}catch(e){}J.set(e,t,n)}else n=void 0;return n}k.extend({hasData:function(e){return J.hasData(e)||Q.hasData(e)},data:function(e,t,n){return J.access(e,t,n)},removeData:function(e,t){J.remove(e,t)},_data:function(e,t,n){return Q.access(e,t,n)},_removeData:function(e,t){Q.remove(e,t)}}),k.fn.extend({data:function(n,e){var t,r,i,o=this[0],a=o&&o.attributes;if(void 0===n){if(this.length&&(i=J.get(o),1===o.nodeType&&!Q.get(o,"hasDataAttrs"))){t=a.length;while(t--)a[t]&&0===(r=a[t].name).indexOf("data-")&&(r=V(r.slice(5)),ee(o,r,i[r]));Q.set(o,"hasDataAttrs",!0)}return i}return"object"==typeof n?this.each(function(){J.set(this,n)}):_(this,function(e){var t;if(o&&void 0===e)return void 0!==(t=J.get(o,n))?t:void 0!==(t=ee(o,n))?t:void 0;this.each(function(){J.set(this,n,e)})},null,e,1<arguments.length,null,!0)},removeData:function(e){return this.each(function(){J.remove(this,e)})}}),k.extend({queue:function(e,t,n){var r;if(e)return t=(t||"fx")+"queue",r=Q.get(e,t),n&&(!r||Array.isArray(n)?r=Q.access(e,t,k.makeArray(n)):r.push(n)),r||[]},dequeue:function(e,t){t=t||"fx";var n=k.queue(e,t),r=n.length,i=n.shift(),o=k._queueHooks(e,t);"inprogress"===i&&(i=n.shift(),r--),i&&("fx"===t&&n.unshift("inprogress"),delete o.stop,i.call(e,function(){k.dequeue(e,t)},o)),!r&&o&&o.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return Q.get(e,n)||Q.access(e,n,{empty:k.Callbacks("once memory").add(function(){Q.remove(e,[t+"queue",n])})})}}),k.fn.extend({queue:function(t,n){var e=2;return"string"!=typeof t&&(n=t,t="fx",e--),arguments.length<e?k.queue(this[0],t):void 0===n?this:this.each(function(){var e=k.queue(this,t,n);k._queueHooks(this,t),"fx"===t&&"inprogress"!==e[0]&&k.dequeue(this,t)})},dequeue:function(e){return this.each(function(){k.dequeue(this,e)})},clearQueue:function(e){return this.queue(e||"fx",[])},promise:function(e,t){var n,r=1,i=k.Deferred(),o=this,a=this.length,s=function(){--r||i.resolveWith(o,[o])};"string"!=typeof e&&(t=e,e=void 0),e=e||"fx";while(a--)(n=Q.get(o[a],e+"queueHooks"))&&n.empty&&(r++,n.empty.add(s));return s(),i.promise(t)}});var te=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,ne=new RegExp("^(?:([+-])=|)("+te+")([a-z%]*)$","i"),re=["Top","Right","Bottom","Left"],ie=E.documentElement,oe=function(e){return k.contains(e.ownerDocument,e)},ae={composed:!0};ie.getRootNode&&(oe=function(e){return k.contains(e.ownerDocument,e)||e.getRootNode(ae)===e.ownerDocument});var se=function(e,t){return"none"===(e=t||e).style.display||""===e.style.display&&oe(e)&&"none"===k.css(e,"display")},ue=function(e,t,n,r){var i,o,a={};for(o in t)a[o]=e.style[o],e.style[o]=t[o];for(o in i=n.apply(e,r||[]),t)e.style[o]=a[o];return i};function le(e,t,n,r){var i,o,a=20,s=r?function(){return r.cur()}:function(){return k.css(e,t,"")},u=s(),l=n&&n[3]||(k.cssNumber[t]?"":"px"),c=e.nodeType&&(k.cssNumber[t]||"px"!==l&&+u)&&ne.exec(k.css(e,t));if(c&&c[3]!==l){u/=2,l=l||c[3],c=+u||1;while(a--)k.style(e,t,c+l),(1-o)*(1-(o=s()/u||.5))<=0&&(a=0),c/=o;c*=2,k.style(e,t,c+l),n=n||[]}return n&&(c=+c||+u||0,i=n[1]?c+(n[1]+1)*n[2]:+n[2],r&&(r.unit=l,r.start=c,r.end=i)),i}var ce={};function fe(e,t){for(var n,r,i,o,a,s,u,l=[],c=0,f=e.length;c<f;c++)(r=e[c]).style&&(n=r.style.display,t?("none"===n&&(l[c]=Q.get(r,"display")||null,l[c]||(r.style.display="")),""===r.style.display&&se(r)&&(l[c]=(u=a=o=void 0,a=(i=r).ownerDocument,s=i.nodeName,(u=ce[s])||(o=a.body.appendChild(a.createElement(s)),u=k.css(o,"display"),o.parentNode.removeChild(o),"none"===u&&(u="block"),ce[s]=u)))):"none"!==n&&(l[c]="none",Q.set(r,"display",n)));for(c=0;c<f;c++)null!=l[c]&&(e[c].style.display=l[c]);return e}k.fn.extend({show:function(){return fe(this,!0)},hide:function(){return fe(this)},toggle:function(e){return"boolean"==typeof e?e?this.show():this.hide():this.each(function(){se(this)?k(this).show():k(this).hide()})}});var pe=/^(?:checkbox|radio)$/i,de=/<([a-z][^\/\0>\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i,ge={option:[1,"<select multiple='multiple'>","</select>"],thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?k.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n<r;n++)Q.set(e[n],"globalEval",!t||Q.get(t[n],"globalEval"))}ge.optgroup=ge.option,ge.tbody=ge.tfoot=ge.colgroup=ge.caption=ge.thead,ge.th=ge.td;var me,xe,be=/<|&#?\w+;/;function we(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d<h;d++)if((o=e[d])||0===o)if("object"===w(o))k.merge(p,o.nodeType?[o]:o);else if(be.test(o)){a=a||f.appendChild(t.createElement("div")),s=(de.exec(o)||["",""])[1].toLowerCase(),u=ge[s]||ge._default,a.innerHTML=u[1]+k.htmlPrefilter(o)+u[2],c=u[0];while(c--)a=a.lastChild;k.merge(p,a.childNodes),(a=f.firstChild).textContent=""}else p.push(t.createTextNode(o));f.textContent="",d=0;while(o=p[d++])if(r&&-1<k.inArray(o,r))i&&i.push(o);else if(l=oe(o),a=ve(f.appendChild(o),"script"),l&&ye(a),n){c=0;while(o=a[c++])he.test(o.type||"")&&n.push(o)}return f}me=E.createDocumentFragment().appendChild(E.createElement("div")),(xe=E.createElement("input")).setAttribute("type","radio"),xe.setAttribute("checked","checked"),xe.setAttribute("name","t"),me.appendChild(xe),y.checkClone=me.cloneNode(!0).cloneNode(!0).lastChild.checked,me.innerHTML="<textarea>x</textarea>",y.noCloneChecked=!!me.cloneNode(!0).lastChild.defaultValue;var Te=/^key/,Ce=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Ee=/^([^.]*)(?:\.(.+)|)/;function ke(){return!0}function Se(){return!1}function Ne(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function Ae(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)Ae(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Se;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return k().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=k.guid++)),e.each(function(){k.event.add(this,t,i,r,n)})}function De(e,i,o){o?(Q.set(e,i,!1),k.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Q.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(k.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Q.set(this,i,r),t=o(this,i),this[i](),r!==(n=Q.get(this,i))||t?Q.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Q.set(this,i,{value:k.event.trigger(k.extend(r[0],k.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Q.get(e,i)&&k.event.add(e,i,ke)}k.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.get(t);if(v){n.handler&&(n=(o=n).handler,i=o.selector),i&&k.find.matchesSelector(ie,i),n.guid||(n.guid=k.guid++),(u=v.events)||(u=v.events={}),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof k&&k.event.triggered!==e.type?k.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(R)||[""]).length;while(l--)d=g=(s=Ee.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=k.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=k.event.special[d]||{},c=k.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&k.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),k.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.hasData(e)&&Q.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(R)||[""]).length;while(l--)if(d=g=(s=Ee.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=k.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||k.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)k.event.remove(e,d+t[l],n,r,!0);k.isEmptyObject(u)&&Q.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=k.event.fix(e),u=new Array(arguments.length),l=(Q.get(this,"events")||{})[s.type]||[],c=k.event.special[s.type]||{};for(u[0]=s,t=1;t<arguments.length;t++)u[t]=arguments[t];if(s.delegateTarget=this,!c.preDispatch||!1!==c.preDispatch.call(this,s)){a=k.event.handlers.call(this,s,l),t=0;while((i=a[t++])&&!s.isPropagationStopped()){s.currentTarget=i.elem,n=0;while((o=i.handlers[n++])&&!s.isImmediatePropagationStopped())s.rnamespace&&!1!==o.namespace&&!s.rnamespace.test(o.namespace)||(s.handleObj=o,s.data=o.data,void 0!==(r=((k.event.special[o.origType]||{}).handle||o.handler).apply(i.elem,u))&&!1===(s.result=r)&&(s.preventDefault(),s.stopPropagation()))}return c.postDispatch&&c.postDispatch.call(this,s),s.result}},handlers:function(e,t){var n,r,i,o,a,s=[],u=t.delegateCount,l=e.target;if(u&&l.nodeType&&!("click"===e.type&&1<=e.button))for(;l!==this;l=l.parentNode||this)if(1===l.nodeType&&("click"!==e.type||!0!==l.disabled)){for(o=[],a={},n=0;n<u;n++)void 0===a[i=(r=t[n]).selector+" "]&&(a[i]=r.needsContext?-1<k(i,this).index(l):k.find(i,this,null,[l]).length),a[i]&&o.push(r);o.length&&s.push({elem:l,handlers:o})}return l=this,u<t.length&&s.push({elem:l,handlers:t.slice(u)}),s},addProp:function(t,e){Object.defineProperty(k.Event.prototype,t,{enumerable:!0,configurable:!0,get:m(e)?function(){if(this.originalEvent)return e(this.originalEvent)}:function(){if(this.originalEvent)return this.originalEvent[t]},set:function(e){Object.defineProperty(this,t,{enumerable:!0,configurable:!0,writable:!0,value:e})}})},fix:function(e){return e[k.expando]?e:new k.Event(e)},special:{load:{noBubble:!0},click:{setup:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&De(t,"click",ke),!1},trigger:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&De(t,"click"),!0},_default:function(e){var t=e.target;return pe.test(t.type)&&t.click&&A(t,"input")&&Q.get(t,"click")||A(t,"a")}},beforeunload:{postDispatch:function(e){void 0!==e.result&&e.originalEvent&&(e.originalEvent.returnValue=e.result)}}}},k.removeEvent=function(e,t,n){e.removeEventListener&&e.removeEventListener(t,n)},k.Event=function(e,t){if(!(this instanceof k.Event))return new k.Event(e,t);e&&e.type?(this.originalEvent=e,this.type=e.type,this.isDefaultPrevented=e.defaultPrevented||void 0===e.defaultPrevented&&!1===e.returnValue?ke:Se,this.target=e.target&&3===e.target.nodeType?e.target.parentNode:e.target,this.currentTarget=e.currentTarget,this.relatedTarget=e.relatedTarget):this.type=e,t&&k.extend(this,t),this.timeStamp=e&&e.timeStamp||Date.now(),this[k.expando]=!0},k.Event.prototype={constructor:k.Event,isDefaultPrevented:Se,isPropagationStopped:Se,isImmediatePropagationStopped:Se,isSimulated:!1,preventDefault:function(){var e=this.originalEvent;this.isDefaultPrevented=ke,e&&!this.isSimulated&&e.preventDefault()},stopPropagation:function(){var e=this.originalEvent;this.isPropagationStopped=ke,e&&!this.isSimulated&&e.stopPropagation()},stopImmediatePropagation:function(){var e=this.originalEvent;this.isImmediatePropagationStopped=ke,e&&!this.isSimulated&&e.stopImmediatePropagation(),this.stopPropagation()}},k.each({altKey:!0,bubbles:!0,cancelable:!0,changedTouches:!0,ctrlKey:!0,detail:!0,eventPhase:!0,metaKey:!0,pageX:!0,pageY:!0,shiftKey:!0,view:!0,"char":!0,code:!0,charCode:!0,key:!0,keyCode:!0,button:!0,buttons:!0,clientX:!0,clientY:!0,offsetX:!0,offsetY:!0,pointerId:!0,pointerType:!0,screenX:!0,screenY:!0,targetTouches:!0,toElement:!0,touches:!0,which:function(e){var t=e.button;return null==e.which&&Te.test(e.type)?null!=e.charCode?e.charCode:e.keyCode:!e.which&&void 0!==t&&Ce.test(e.type)?1&t?1:2&t?3:4&t?2:0:e.which}},k.event.addProp),k.each({focus:"focusin",blur:"focusout"},function(e,t){k.event.special[e]={setup:function(){return De(this,e,Ne),!1},trigger:function(){return De(this,e),!0},delegateType:t}}),k.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(e,i){k.event.special[e]={delegateType:i,bindType:i,handle:function(e){var t,n=e.relatedTarget,r=e.handleObj;return n&&(n===this||k.contains(this,n))||(e.type=r.origType,t=r.handler.apply(this,arguments),e.type=i),t}}}),k.fn.extend({on:function(e,t,n,r){return Ae(this,e,t,n,r)},one:function(e,t,n,r){return Ae(this,e,t,n,r,1)},off:function(e,t,n){var r,i;if(e&&e.preventDefault&&e.handleObj)return r=e.handleObj,k(e.delegateTarget).off(r.namespace?r.origType+"."+r.namespace:r.origType,r.selector,r.handler),this;if("object"==typeof e){for(i in e)this.off(i,t,e[i]);return this}return!1!==t&&"function"!=typeof t||(n=t,t=void 0),!1===n&&(n=Se),this.each(function(){k.event.remove(this,e,n,t)})}});var je=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([a-z][^\/\0>\x20\t\r\n\f]*)[^>]*)\/>/gi,qe=/<script|<style|<link/i,Le=/checked\s*(?:[^=]|=\s*.checked.)/i,He=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function Oe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&k(e).children("tbody")[0]||e}function Pe(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function Re(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Me(e,t){var n,r,i,o,a,s,u,l;if(1===t.nodeType){if(Q.hasData(e)&&(o=Q.access(e),a=Q.set(t,o),l=o.events))for(i in delete a.handle,a.events={},l)for(n=0,r=l[i].length;n<r;n++)k.event.add(t,i,l[i][n]);J.hasData(e)&&(s=J.access(e),u=k.extend({},s),J.set(t,u))}}function Ie(n,r,i,o){r=g.apply([],r);var e,t,a,s,u,l,c=0,f=n.length,p=f-1,d=r[0],h=m(d);if(h||1<f&&"string"==typeof d&&!y.checkClone&&Le.test(d))return n.each(function(e){var t=n.eq(e);h&&(r[0]=d.call(this,e,t.html())),Ie(t,r,i,o)});if(f&&(t=(e=we(r,n[0].ownerDocument,!1,n,o)).firstChild,1===e.childNodes.length&&(e=t),t||o)){for(s=(a=k.map(ve(e,"script"),Pe)).length;c<f;c++)u=e,c!==p&&(u=k.clone(u,!0,!0),s&&k.merge(a,ve(u,"script"))),i.call(n[c],u,c);if(s)for(l=a[a.length-1].ownerDocument,k.map(a,Re),c=0;c<s;c++)u=a[c],he.test(u.type||"")&&!Q.access(u,"globalEval")&&k.contains(l,u)&&(u.src&&"module"!==(u.type||"").toLowerCase()?k._evalUrl&&!u.noModule&&k._evalUrl(u.src,{nonce:u.nonce||u.getAttribute("nonce")}):b(u.textContent.replace(He,""),u,l))}return n}function We(e,t,n){for(var r,i=t?k.filter(t,e):e,o=0;null!=(r=i[o]);o++)n||1!==r.nodeType||k.cleanData(ve(r)),r.parentNode&&(n&&oe(r)&&ye(ve(r,"script")),r.parentNode.removeChild(r));return e}k.extend({htmlPrefilter:function(e){return e.replace(je,"<$1></$2>")},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=oe(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||k.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r<i;r++)s=o[r],u=a[r],void 0,"input"===(l=u.nodeName.toLowerCase())&&pe.test(s.type)?u.checked=s.checked:"input"!==l&&"textarea"!==l||(u.defaultValue=s.defaultValue);if(t)if(n)for(o=o||ve(e),a=a||ve(c),r=0,i=o.length;r<i;r++)Me(o[r],a[r]);else Me(e,c);return 0<(a=ve(c,"script")).length&&ye(a,!f&&ve(e,"script")),c},cleanData:function(e){for(var t,n,r,i=k.event.special,o=0;void 0!==(n=e[o]);o++)if(G(n)){if(t=n[Q.expando]){if(t.events)for(r in t.events)i[r]?k.event.remove(n,r):k.removeEvent(n,r,t.handle);n[Q.expando]=void 0}n[J.expando]&&(n[J.expando]=void 0)}}}),k.fn.extend({detach:function(e){return We(this,e,!0)},remove:function(e){return We(this,e)},text:function(e){return _(this,function(e){return void 0===e?k.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=e)})},null,e,arguments.length)},append:function(){return Ie(this,arguments,function(e){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||Oe(this,e).appendChild(e)})},prepend:function(){return Ie(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=Oe(this,e);t.insertBefore(e,t.firstChild)}})},before:function(){return Ie(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return Ie(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},empty:function(){for(var e,t=0;null!=(e=this[t]);t++)1===e.nodeType&&(k.cleanData(ve(e,!1)),e.textContent="");return this},clone:function(e,t){return e=null!=e&&e,t=null==t?e:t,this.map(function(){return k.clone(this,e,t)})},html:function(e){return _(this,function(e){var t=this[0]||{},n=0,r=this.length;if(void 0===e&&1===t.nodeType)return t.innerHTML;if("string"==typeof e&&!qe.test(e)&&!ge[(de.exec(e)||["",""])[1].toLowerCase()]){e=k.htmlPrefilter(e);try{for(;n<r;n++)1===(t=this[n]||{}).nodeType&&(k.cleanData(ve(t,!1)),t.innerHTML=e);t=0}catch(e){}}t&&this.empty().append(e)},null,e,arguments.length)},replaceWith:function(){var n=[];return Ie(this,arguments,function(e){var t=this.parentNode;k.inArray(this,n)<0&&(k.cleanData(ve(this)),t&&t.replaceChild(e,this))},n)}}),k.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(e,a){k.fn[e]=function(e){for(var t,n=[],r=k(e),i=r.length-1,o=0;o<=i;o++)t=o===i?this:this.clone(!0),k(r[o])[a](t),u.apply(n,t.get());return this.pushStack(n)}});var $e=new RegExp("^("+te+")(?!px)[a-z%]+$","i"),Fe=function(e){var t=e.ownerDocument.defaultView;return t&&t.opener||(t=C),t.getComputedStyle(e)},Be=new RegExp(re.join("|"),"i");function _e(e,t,n){var r,i,o,a,s=e.style;return(n=n||Fe(e))&&(""!==(a=n.getPropertyValue(t)||n[t])||oe(e)||(a=k.style(e,t)),!y.pixelBoxStyles()&&$e.test(a)&&Be.test(t)&&(r=s.width,i=s.minWidth,o=s.maxWidth,s.minWidth=s.maxWidth=s.width=a,a=n.width,s.width=r,s.minWidth=i,s.maxWidth=o)),void 0!==a?a+"":a}function ze(e,t){return{get:function(){if(!e())return(this.get=t).apply(this,arguments);delete this.get}}}!function(){function e(){if(u){s.style.cssText="position:absolute;left:-11111px;width:60px;margin-top:1px;padding:0;border:0",u.style.cssText="position:relative;display:block;box-sizing:border-box;overflow:scroll;margin:auto;border:1px;padding:1px;width:60%;top:1%",ie.appendChild(s).appendChild(u);var e=C.getComputedStyle(u);n="1%"!==e.top,a=12===t(e.marginLeft),u.style.right="60%",o=36===t(e.right),r=36===t(e.width),u.style.position="absolute",i=12===t(u.offsetWidth/3),ie.removeChild(s),u=null}}function t(e){return Math.round(parseFloat(e))}var n,r,i,o,a,s=E.createElement("div"),u=E.createElement("div");u.style&&(u.style.backgroundClip="content-box",u.cloneNode(!0).style.backgroundClip="",y.clearCloneStyle="content-box"===u.style.backgroundClip,k.extend(y,{boxSizingReliable:function(){return e(),r},pixelBoxStyles:function(){return e(),o},pixelPosition:function(){return e(),n},reliableMarginLeft:function(){return e(),a},scrollboxSize:function(){return e(),i}}))}();var Ue=["Webkit","Moz","ms"],Xe=E.createElement("div").style,Ve={};function Ge(e){var t=k.cssProps[e]||Ve[e];return t||(e in Xe?e:Ve[e]=function(e){var t=e[0].toUpperCase()+e.slice(1),n=Ue.length;while(n--)if((e=Ue[n]+t)in Xe)return e}(e)||e)}var Ye=/^(none|table(?!-c[ea]).+)/,Qe=/^--/,Je={position:"absolute",visibility:"hidden",display:"block"},Ke={letterSpacing:"0",fontWeight:"400"};function Ze(e,t,n){var r=ne.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[3]||"px"):t}function et(e,t,n,r,i,o){var a="width"===t?1:0,s=0,u=0;if(n===(r?"border":"content"))return 0;for(;a<4;a+=2)"margin"===n&&(u+=k.css(e,n+re[a],!0,i)),r?("content"===n&&(u-=k.css(e,"padding"+re[a],!0,i)),"margin"!==n&&(u-=k.css(e,"border"+re[a]+"Width",!0,i))):(u+=k.css(e,"padding"+re[a],!0,i),"padding"!==n?u+=k.css(e,"border"+re[a]+"Width",!0,i):s+=k.css(e,"border"+re[a]+"Width",!0,i));return!r&&0<=o&&(u+=Math.max(0,Math.ceil(e["offset"+t[0].toUpperCase()+t.slice(1)]-o-u-s-.5))||0),u}function tt(e,t,n){var r=Fe(e),i=(!y.boxSizingReliable()||n)&&"border-box"===k.css(e,"boxSizing",!1,r),o=i,a=_e(e,t,r),s="offset"+t[0].toUpperCase()+t.slice(1);if($e.test(a)){if(!n)return a;a="auto"}return(!y.boxSizingReliable()&&i||"auto"===a||!parseFloat(a)&&"inline"===k.css(e,"display",!1,r))&&e.getClientRects().length&&(i="border-box"===k.css(e,"boxSizing",!1,r),(o=s in e)&&(a=e[s])),(a=parseFloat(a)||0)+et(e,t,n||(i?"border":"content"),o,r,a)+"px"}function nt(e,t,n,r,i){return new nt.prototype.init(e,t,n,r,i)}k.extend({cssHooks:{opacity:{get:function(e,t){if(t){var n=_e(e,"opacity");return""===n?"1":n}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,gridArea:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnStart:!0,gridRow:!0,gridRowEnd:!0,gridRowStart:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{},style:function(e,t,n,r){if(e&&3!==e.nodeType&&8!==e.nodeType&&e.style){var i,o,a,s=V(t),u=Qe.test(t),l=e.style;if(u||(t=Ge(s)),a=k.cssHooks[t]||k.cssHooks[s],void 0===n)return a&&"get"in a&&void 0!==(i=a.get(e,!1,r))?i:l[t];"string"===(o=typeof n)&&(i=ne.exec(n))&&i[1]&&(n=le(e,t,i),o="number"),null!=n&&n==n&&("number"!==o||u||(n+=i&&i[3]||(k.cssNumber[s]?"":"px")),y.clearCloneStyle||""!==n||0!==t.indexOf("background")||(l[t]="inherit"),a&&"set"in a&&void 0===(n=a.set(e,n,r))||(u?l.setProperty(t,n):l[t]=n))}},css:function(e,t,n,r){var i,o,a,s=V(t);return Qe.test(t)||(t=Ge(s)),(a=k.cssHooks[t]||k.cssHooks[s])&&"get"in a&&(i=a.get(e,!0,n)),void 0===i&&(i=_e(e,t,r)),"normal"===i&&t in Ke&&(i=Ke[t]),""===n||n?(o=parseFloat(i),!0===n||isFinite(o)?o||0:i):i}}),k.each(["height","width"],function(e,u){k.cssHooks[u]={get:function(e,t,n){if(t)return!Ye.test(k.css(e,"display"))||e.getClientRects().length&&e.getBoundingClientRect().width?tt(e,u,n):ue(e,Je,function(){return tt(e,u,n)})},set:function(e,t,n){var r,i=Fe(e),o=!y.scrollboxSize()&&"absolute"===i.position,a=(o||n)&&"border-box"===k.css(e,"boxSizing",!1,i),s=n?et(e,u,n,a,i):0;return a&&o&&(s-=Math.ceil(e["offset"+u[0].toUpperCase()+u.slice(1)]-parseFloat(i[u])-et(e,u,"border",!1,i)-.5)),s&&(r=ne.exec(t))&&"px"!==(r[3]||"px")&&(e.style[u]=t,t=k.css(e,u)),Ze(0,t,s)}}}),k.cssHooks.marginLeft=ze(y.reliableMarginLeft,function(e,t){if(t)return(parseFloat(_e(e,"marginLeft"))||e.getBoundingClientRect().left-ue(e,{marginLeft:0},function(){return e.getBoundingClientRect().left}))+"px"}),k.each({margin:"",padding:"",border:"Width"},function(i,o){k.cssHooks[i+o]={expand:function(e){for(var t=0,n={},r="string"==typeof e?e.split(" "):[e];t<4;t++)n[i+re[t]+o]=r[t]||r[t-2]||r[0];return n}},"margin"!==i&&(k.cssHooks[i+o].set=Ze)}),k.fn.extend({css:function(e,t){return _(this,function(e,t,n){var r,i,o={},a=0;if(Array.isArray(t)){for(r=Fe(e),i=t.length;a<i;a++)o[t[a]]=k.css(e,t[a],!1,r);return o}return void 0!==n?k.style(e,t,n):k.css(e,t)},e,t,1<arguments.length)}}),((k.Tween=nt).prototype={constructor:nt,init:function(e,t,n,r,i,o){this.elem=e,this.prop=n,this.easing=i||k.easing._default,this.options=t,this.start=this.now=this.cur(),this.end=r,this.unit=o||(k.cssNumber[n]?"":"px")},cur:function(){var e=nt.propHooks[this.prop];return e&&e.get?e.get(this):nt.propHooks._default.get(this)},run:function(e){var t,n=nt.propHooks[this.prop];return this.options.duration?this.pos=t=k.easing[this.easing](e,this.options.duration*e,0,1,this.options.duration):this.pos=t=e,this.now=(this.end-this.start)*t+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),n&&n.set?n.set(this):nt.propHooks._default.set(this),this}}).init.prototype=nt.prototype,(nt.propHooks={_default:{get:function(e){var t;return 1!==e.elem.nodeType||null!=e.elem[e.prop]&&null==e.elem.style[e.prop]?e.elem[e.prop]:(t=k.css(e.elem,e.prop,""))&&"auto"!==t?t:0},set:function(e){k.fx.step[e.prop]?k.fx.step[e.prop](e):1!==e.elem.nodeType||!k.cssHooks[e.prop]&&null==e.elem.style[Ge(e.prop)]?e.elem[e.prop]=e.now:k.style(e.elem,e.prop,e.now+e.unit)}}}).scrollTop=nt.propHooks.scrollLeft={set:function(e){e.elem.nodeType&&e.elem.parentNode&&(e.elem[e.prop]=e.now)}},k.easing={linear:function(e){return e},swing:function(e){return.5-Math.cos(e*Math.PI)/2},_default:"swing"},k.fx=nt.prototype.init,k.fx.step={};var rt,it,ot,at,st=/^(?:toggle|show|hide)$/,ut=/queueHooks$/;function lt(){it&&(!1===E.hidden&&C.requestAnimationFrame?C.requestAnimationFrame(lt):C.setTimeout(lt,k.fx.interval),k.fx.tick())}function ct(){return C.setTimeout(function(){rt=void 0}),rt=Date.now()}function ft(e,t){var n,r=0,i={height:e};for(t=t?1:0;r<4;r+=2-t)i["margin"+(n=re[r])]=i["padding"+n]=e;return t&&(i.opacity=i.width=e),i}function pt(e,t,n){for(var r,i=(dt.tweeners[t]||[]).concat(dt.tweeners["*"]),o=0,a=i.length;o<a;o++)if(r=i[o].call(n,t,e))return r}function dt(o,e,t){var n,a,r=0,i=dt.prefilters.length,s=k.Deferred().always(function(){delete u.elem}),u=function(){if(a)return!1;for(var e=rt||ct(),t=Math.max(0,l.startTime+l.duration-e),n=1-(t/l.duration||0),r=0,i=l.tweens.length;r<i;r++)l.tweens[r].run(n);return s.notifyWith(o,[l,n,t]),n<1&&i?t:(i||s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l]),!1)},l=s.promise({elem:o,props:k.extend({},e),opts:k.extend(!0,{specialEasing:{},easing:k.easing._default},t),originalProperties:e,originalOptions:t,startTime:rt||ct(),duration:t.duration,tweens:[],createTween:function(e,t){var n=k.Tween(o,l.opts,e,t,l.opts.specialEasing[e]||l.opts.easing);return l.tweens.push(n),n},stop:function(e){var t=0,n=e?l.tweens.length:0;if(a)return this;for(a=!0;t<n;t++)l.tweens[t].run(1);return e?(s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l,e])):s.rejectWith(o,[l,e]),this}}),c=l.props;for(!function(e,t){var n,r,i,o,a;for(n in e)if(i=t[r=V(n)],o=e[n],Array.isArray(o)&&(i=o[1],o=e[n]=o[0]),n!==r&&(e[r]=o,delete e[n]),(a=k.cssHooks[r])&&"expand"in a)for(n in o=a.expand(o),delete e[r],o)n in e||(e[n]=o[n],t[n]=i);else t[r]=i}(c,l.opts.specialEasing);r<i;r++)if(n=dt.prefilters[r].call(l,o,c,l.opts))return m(n.stop)&&(k._queueHooks(l.elem,l.opts.queue).stop=n.stop.bind(n)),n;return k.map(c,pt,l),m(l.opts.start)&&l.opts.start.call(o,l),l.progress(l.opts.progress).done(l.opts.done,l.opts.complete).fail(l.opts.fail).always(l.opts.always),k.fx.timer(k.extend(u,{elem:o,anim:l,queue:l.opts.queue})),l}k.Animation=k.extend(dt,{tweeners:{"*":[function(e,t){var n=this.createTween(e,t);return le(n.elem,e,ne.exec(t),n),n}]},tweener:function(e,t){m(e)?(t=e,e=["*"]):e=e.match(R);for(var n,r=0,i=e.length;r<i;r++)n=e[r],dt.tweeners[n]=dt.tweeners[n]||[],dt.tweeners[n].unshift(t)},prefilters:[function(e,t,n){var r,i,o,a,s,u,l,c,f="width"in t||"height"in t,p=this,d={},h=e.style,g=e.nodeType&&se(e),v=Q.get(e,"fxshow");for(r in n.queue||(null==(a=k._queueHooks(e,"fx")).unqueued&&(a.unqueued=0,s=a.empty.fire,a.empty.fire=function(){a.unqueued||s()}),a.unqueued++,p.always(function(){p.always(function(){a.unqueued--,k.queue(e,"fx").length||a.empty.fire()})})),t)if(i=t[r],st.test(i)){if(delete t[r],o=o||"toggle"===i,i===(g?"hide":"show")){if("show"!==i||!v||void 0===v[r])continue;g=!0}d[r]=v&&v[r]||k.style(e,r)}if((u=!k.isEmptyObject(t))||!k.isEmptyObject(d))for(r in f&&1===e.nodeType&&(n.overflow=[h.overflow,h.overflowX,h.overflowY],null==(l=v&&v.display)&&(l=Q.get(e,"display")),"none"===(c=k.css(e,"display"))&&(l?c=l:(fe([e],!0),l=e.style.display||l,c=k.css(e,"display"),fe([e]))),("inline"===c||"inline-block"===c&&null!=l)&&"none"===k.css(e,"float")&&(u||(p.done(function(){h.display=l}),null==l&&(c=h.display,l="none"===c?"":c)),h.display="inline-block")),n.overflow&&(h.overflow="hidden",p.always(function(){h.overflow=n.overflow[0],h.overflowX=n.overflow[1],h.overflowY=n.overflow[2]})),u=!1,d)u||(v?"hidden"in v&&(g=v.hidden):v=Q.access(e,"fxshow",{display:l}),o&&(v.hidden=!g),g&&fe([e],!0),p.done(function(){for(r in g||fe([e]),Q.remove(e,"fxshow"),d)k.style(e,r,d[r])})),u=pt(g?v[r]:0,r,p),r in v||(v[r]=u.start,g&&(u.end=u.start,u.start=0))}],prefilter:function(e,t){t?dt.prefilters.unshift(e):dt.prefilters.push(e)}}),k.speed=function(e,t,n){var r=e&&"object"==typeof e?k.extend({},e):{complete:n||!n&&t||m(e)&&e,duration:e,easing:n&&t||t&&!m(t)&&t};return k.fx.off?r.duration=0:"number"!=typeof r.duration&&(r.duration in k.fx.speeds?r.duration=k.fx.speeds[r.duration]:r.duration=k.fx.speeds._default),null!=r.queue&&!0!==r.queue||(r.queue="fx"),r.old=r.complete,r.complete=function(){m(r.old)&&r.old.call(this),r.queue&&k.dequeue(this,r.queue)},r},k.fn.extend({fadeTo:function(e,t,n,r){return this.filter(se).css("opacity",0).show().end().animate({opacity:t},e,n,r)},animate:function(t,e,n,r){var i=k.isEmptyObject(t),o=k.speed(e,n,r),a=function(){var e=dt(this,k.extend({},t),o);(i||Q.get(this,"finish"))&&e.stop(!0)};return a.finish=a,i||!1===o.queue?this.each(a):this.queue(o.queue,a)},stop:function(i,e,o){var a=function(e){var t=e.stop;delete e.stop,t(o)};return"string"!=typeof i&&(o=e,e=i,i=void 0),e&&!1!==i&&this.queue(i||"fx",[]),this.each(function(){var e=!0,t=null!=i&&i+"queueHooks",n=k.timers,r=Q.get(this);if(t)r[t]&&r[t].stop&&a(r[t]);else for(t in r)r[t]&&r[t].stop&&ut.test(t)&&a(r[t]);for(t=n.length;t--;)n[t].elem!==this||null!=i&&n[t].queue!==i||(n[t].anim.stop(o),e=!1,n.splice(t,1));!e&&o||k.dequeue(this,i)})},finish:function(a){return!1!==a&&(a=a||"fx"),this.each(function(){var e,t=Q.get(this),n=t[a+"queue"],r=t[a+"queueHooks"],i=k.timers,o=n?n.length:0;for(t.finish=!0,k.queue(this,a,[]),r&&r.stop&&r.stop.call(this,!0),e=i.length;e--;)i[e].elem===this&&i[e].queue===a&&(i[e].anim.stop(!0),i.splice(e,1));for(e=0;e<o;e++)n[e]&&n[e].finish&&n[e].finish.call(this);delete t.finish})}}),k.each(["toggle","show","hide"],function(e,r){var i=k.fn[r];k.fn[r]=function(e,t,n){return null==e||"boolean"==typeof e?i.apply(this,arguments):this.animate(ft(r,!0),e,t,n)}}),k.each({slideDown:ft("show"),slideUp:ft("hide"),slideToggle:ft("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(e,r){k.fn[e]=function(e,t,n){return this.animate(r,e,t,n)}}),k.timers=[],k.fx.tick=function(){var e,t=0,n=k.timers;for(rt=Date.now();t<n.length;t++)(e=n[t])()||n[t]!==e||n.splice(t--,1);n.length||k.fx.stop(),rt=void 0},k.fx.timer=function(e){k.timers.push(e),k.fx.start()},k.fx.interval=13,k.fx.start=function(){it||(it=!0,lt())},k.fx.stop=function(){it=null},k.fx.speeds={slow:600,fast:200,_default:400},k.fn.delay=function(r,e){return r=k.fx&&k.fx.speeds[r]||r,e=e||"fx",this.queue(e,function(e,t){var n=C.setTimeout(e,r);t.stop=function(){C.clearTimeout(n)}})},ot=E.createElement("input"),at=E.createElement("select").appendChild(E.createElement("option")),ot.type="checkbox",y.checkOn=""!==ot.value,y.optSelected=at.selected,(ot=E.createElement("input")).value="t",ot.type="radio",y.radioValue="t"===ot.value;var ht,gt=k.expr.attrHandle;k.fn.extend({attr:function(e,t){return _(this,k.attr,e,t,1<arguments.length)},removeAttr:function(e){return this.each(function(){k.removeAttr(this,e)})}}),k.extend({attr:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return"undefined"==typeof e.getAttribute?k.prop(e,t,n):(1===o&&k.isXMLDoc(e)||(i=k.attrHooks[t.toLowerCase()]||(k.expr.match.bool.test(t)?ht:void 0)),void 0!==n?null===n?void k.removeAttr(e,t):i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:(e.setAttribute(t,n+""),n):i&&"get"in i&&null!==(r=i.get(e,t))?r:null==(r=k.find.attr(e,t))?void 0:r)},attrHooks:{type:{set:function(e,t){if(!y.radioValue&&"radio"===t&&A(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},removeAttr:function(e,t){var n,r=0,i=t&&t.match(R);if(i&&1===e.nodeType)while(n=i[r++])e.removeAttribute(n)}}),ht={set:function(e,t,n){return!1===t?k.removeAttr(e,n):e.setAttribute(n,n),n}},k.each(k.expr.match.bool.source.match(/\w+/g),function(e,t){var a=gt[t]||k.find.attr;gt[t]=function(e,t,n){var r,i,o=t.toLowerCase();return n||(i=gt[o],gt[o]=r,r=null!=a(e,t,n)?o:null,gt[o]=i),r}});var vt=/^(?:input|select|textarea|button)$/i,yt=/^(?:a|area)$/i;function mt(e){return(e.match(R)||[]).join(" ")}function xt(e){return e.getAttribute&&e.getAttribute("class")||""}function bt(e){return Array.isArray(e)?e:"string"==typeof e&&e.match(R)||[]}k.fn.extend({prop:function(e,t){return _(this,k.prop,e,t,1<arguments.length)},removeProp:function(e){return this.each(function(){delete this[k.propFix[e]||e]})}}),k.extend({prop:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return 1===o&&k.isXMLDoc(e)||(t=k.propFix[t]||t,i=k.propHooks[t]),void 0!==n?i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:e[t]=n:i&&"get"in i&&null!==(r=i.get(e,t))?r:e[t]},propHooks:{tabIndex:{get:function(e){var t=k.find.attr(e,"tabindex");return t?parseInt(t,10):vt.test(e.nodeName)||yt.test(e.nodeName)&&e.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),y.optSelected||(k.propHooks.selected={get:function(e){var t=e.parentNode;return t&&t.parentNode&&t.parentNode.selectedIndex,null},set:function(e){var t=e.parentNode;t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex)}}),k.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){k.propFix[this.toLowerCase()]=this}),k.fn.extend({addClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){k(this).addClass(t.call(this,e,xt(this)))});if((e=bt(t)).length)while(n=this[u++])if(i=xt(n),r=1===n.nodeType&&" "+mt(i)+" "){a=0;while(o=e[a++])r.indexOf(" "+o+" ")<0&&(r+=o+" ");i!==(s=mt(r))&&n.setAttribute("class",s)}return this},removeClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){k(this).removeClass(t.call(this,e,xt(this)))});if(!arguments.length)return this.attr("class","");if((e=bt(t)).length)while(n=this[u++])if(i=xt(n),r=1===n.nodeType&&" "+mt(i)+" "){a=0;while(o=e[a++])while(-1<r.indexOf(" "+o+" "))r=r.replace(" "+o+" "," ");i!==(s=mt(r))&&n.setAttribute("class",s)}return this},toggleClass:function(i,t){var o=typeof i,a="string"===o||Array.isArray(i);return"boolean"==typeof t&&a?t?this.addClass(i):this.removeClass(i):m(i)?this.each(function(e){k(this).toggleClass(i.call(this,e,xt(this),t),t)}):this.each(function(){var e,t,n,r;if(a){t=0,n=k(this),r=bt(i);while(e=r[t++])n.hasClass(e)?n.removeClass(e):n.addClass(e)}else void 0!==i&&"boolean"!==o||((e=xt(this))&&Q.set(this,"__className__",e),this.setAttribute&&this.setAttribute("class",e||!1===i?"":Q.get(this,"__className__")||""))})},hasClass:function(e){var t,n,r=0;t=" "+e+" ";while(n=this[r++])if(1===n.nodeType&&-1<(" "+mt(xt(n))+" ").indexOf(t))return!0;return!1}});var wt=/\r/g;k.fn.extend({val:function(n){var r,e,i,t=this[0];return arguments.length?(i=m(n),this.each(function(e){var t;1===this.nodeType&&(null==(t=i?n.call(this,e,k(this).val()):n)?t="":"number"==typeof t?t+="":Array.isArray(t)&&(t=k.map(t,function(e){return null==e?"":e+""})),(r=k.valHooks[this.type]||k.valHooks[this.nodeName.toLowerCase()])&&"set"in r&&void 0!==r.set(this,t,"value")||(this.value=t))})):t?(r=k.valHooks[t.type]||k.valHooks[t.nodeName.toLowerCase()])&&"get"in r&&void 0!==(e=r.get(t,"value"))?e:"string"==typeof(e=t.value)?e.replace(wt,""):null==e?"":e:void 0}}),k.extend({valHooks:{option:{get:function(e){var t=k.find.attr(e,"value");return null!=t?t:mt(k.text(e))}},select:{get:function(e){var t,n,r,i=e.options,o=e.selectedIndex,a="select-one"===e.type,s=a?null:[],u=a?o+1:i.length;for(r=o<0?u:a?o:0;r<u;r++)if(((n=i[r]).selected||r===o)&&!n.disabled&&(!n.parentNode.disabled||!A(n.parentNode,"optgroup"))){if(t=k(n).val(),a)return t;s.push(t)}return s},set:function(e,t){var n,r,i=e.options,o=k.makeArray(t),a=i.length;while(a--)((r=i[a]).selected=-1<k.inArray(k.valHooks.option.get(r),o))&&(n=!0);return n||(e.selectedIndex=-1),o}}}}),k.each(["radio","checkbox"],function(){k.valHooks[this]={set:function(e,t){if(Array.isArray(t))return e.checked=-1<k.inArray(k(e).val(),t)}},y.checkOn||(k.valHooks[this].get=function(e){return null===e.getAttribute("value")?"on":e.value})}),y.focusin="onfocusin"in C;var Tt=/^(?:focusinfocus|focusoutblur)$/,Ct=function(e){e.stopPropagation()};k.extend(k.event,{trigger:function(e,t,n,r){var i,o,a,s,u,l,c,f,p=[n||E],d=v.call(e,"type")?e.type:e,h=v.call(e,"namespace")?e.namespace.split("."):[];if(o=f=a=n=n||E,3!==n.nodeType&&8!==n.nodeType&&!Tt.test(d+k.event.triggered)&&(-1<d.indexOf(".")&&(d=(h=d.split(".")).shift(),h.sort()),u=d.indexOf(":")<0&&"on"+d,(e=e[k.expando]?e:new k.Event(d,"object"==typeof e&&e)).isTrigger=r?2:3,e.namespace=h.join("."),e.rnamespace=e.namespace?new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,e.result=void 0,e.target||(e.target=n),t=null==t?[e]:k.makeArray(t,[e]),c=k.event.special[d]||{},r||!c.trigger||!1!==c.trigger.apply(n,t))){if(!r&&!c.noBubble&&!x(n)){for(s=c.delegateType||d,Tt.test(s+d)||(o=o.parentNode);o;o=o.parentNode)p.push(o),a=o;a===(n.ownerDocument||E)&&p.push(a.defaultView||a.parentWindow||C)}i=0;while((o=p[i++])&&!e.isPropagationStopped())f=o,e.type=1<i?s:c.bindType||d,(l=(Q.get(o,"events")||{})[e.type]&&Q.get(o,"handle"))&&l.apply(o,t),(l=u&&o[u])&&l.apply&&G(o)&&(e.result=l.apply(o,t),!1===e.result&&e.preventDefault());return e.type=d,r||e.isDefaultPrevented()||c._default&&!1!==c._default.apply(p.pop(),t)||!G(n)||u&&m(n[d])&&!x(n)&&((a=n[u])&&(n[u]=null),k.event.triggered=d,e.isPropagationStopped()&&f.addEventListener(d,Ct),n[d](),e.isPropagationStopped()&&f.removeEventListener(d,Ct),k.event.triggered=void 0,a&&(n[u]=a)),e.result}},simulate:function(e,t,n){var r=k.extend(new k.Event,n,{type:e,isSimulated:!0});k.event.trigger(r,null,t)}}),k.fn.extend({trigger:function(e,t){return this.each(function(){k.event.trigger(e,t,this)})},triggerHandler:function(e,t){var n=this[0];if(n)return k.event.trigger(e,t,n,!0)}}),y.focusin||k.each({focus:"focusin",blur:"focusout"},function(n,r){var i=function(e){k.event.simulate(r,e.target,k.event.fix(e))};k.event.special[r]={setup:function(){var e=this.ownerDocument||this,t=Q.access(e,r);t||e.addEventListener(n,i,!0),Q.access(e,r,(t||0)+1)},teardown:function(){var e=this.ownerDocument||this,t=Q.access(e,r)-1;t?Q.access(e,r,t):(e.removeEventListener(n,i,!0),Q.remove(e,r))}}});var Et=C.location,kt=Date.now(),St=/\?/;k.parseXML=function(e){var t;if(!e||"string"!=typeof e)return null;try{t=(new C.DOMParser).parseFromString(e,"text/xml")}catch(e){t=void 0}return t&&!t.getElementsByTagName("parsererror").length||k.error("Invalid XML: "+e),t};var Nt=/\[\]$/,At=/\r?\n/g,Dt=/^(?:submit|button|image|reset|file)$/i,jt=/^(?:input|select|textarea|keygen)/i;function qt(n,e,r,i){var t;if(Array.isArray(e))k.each(e,function(e,t){r||Nt.test(n)?i(n,t):qt(n+"["+("object"==typeof t&&null!=t?e:"")+"]",t,r,i)});else if(r||"object"!==w(e))i(n,e);else for(t in e)qt(n+"["+t+"]",e[t],r,i)}k.param=function(e,t){var n,r=[],i=function(e,t){var n=m(t)?t():t;r[r.length]=encodeURIComponent(e)+"="+encodeURIComponent(null==n?"":n)};if(null==e)return"";if(Array.isArray(e)||e.jquery&&!k.isPlainObject(e))k.each(e,function(){i(this.name,this.value)});else for(n in e)qt(n,e[n],t,i);return r.join("&")},k.fn.extend({serialize:function(){return k.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var e=k.prop(this,"elements");return e?k.makeArray(e):this}).filter(function(){var e=this.type;return this.name&&!k(this).is(":disabled")&&jt.test(this.nodeName)&&!Dt.test(e)&&(this.checked||!pe.test(e))}).map(function(e,t){var n=k(this).val();return null==n?null:Array.isArray(n)?k.map(n,function(e){return{name:t.name,value:e.replace(At,"\r\n")}}):{name:t.name,value:n.replace(At,"\r\n")}}).get()}});var Lt=/%20/g,Ht=/#.*$/,Ot=/([?&])_=[^&]*/,Pt=/^(.*?):[ \t]*([^\r\n]*)$/gm,Rt=/^(?:GET|HEAD)$/,Mt=/^\/\//,It={},Wt={},$t="*/".concat("*"),Ft=E.createElement("a");function Bt(o){return function(e,t){"string"!=typeof e&&(t=e,e="*");var n,r=0,i=e.toLowerCase().match(R)||[];if(m(t))while(n=i[r++])"+"===n[0]?(n=n.slice(1)||"*",(o[n]=o[n]||[]).unshift(t)):(o[n]=o[n]||[]).push(t)}}function _t(t,i,o,a){var s={},u=t===Wt;function l(e){var r;return s[e]=!0,k.each(t[e]||[],function(e,t){var n=t(i,o,a);return"string"!=typeof n||u||s[n]?u?!(r=n):void 0:(i.dataTypes.unshift(n),l(n),!1)}),r}return l(i.dataTypes[0])||!s["*"]&&l("*")}function zt(e,t){var n,r,i=k.ajaxSettings.flatOptions||{};for(n in t)void 0!==t[n]&&((i[n]?e:r||(r={}))[n]=t[n]);return r&&k.extend(!0,e,r),e}Ft.href=Et.href,k.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:Et.href,type:"GET",isLocal:/^(?:about|app|app-storage|.+-extension|file|res|widget):$/.test(Et.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":$t,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":k.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(e,t){return t?zt(zt(e,k.ajaxSettings),t):zt(k.ajaxSettings,e)},ajaxPrefilter:Bt(It),ajaxTransport:Bt(Wt),ajax:function(e,t){"object"==typeof e&&(t=e,e=void 0),t=t||{};var c,f,p,n,d,r,h,g,i,o,v=k.ajaxSetup({},t),y=v.context||v,m=v.context&&(y.nodeType||y.jquery)?k(y):k.event,x=k.Deferred(),b=k.Callbacks("once memory"),w=v.statusCode||{},a={},s={},u="canceled",T={readyState:0,getResponseHeader:function(e){var t;if(h){if(!n){n={};while(t=Pt.exec(p))n[t[1].toLowerCase()+" "]=(n[t[1].toLowerCase()+" "]||[]).concat(t[2])}t=n[e.toLowerCase()+" "]}return null==t?null:t.join(", ")},getAllResponseHeaders:function(){return h?p:null},setRequestHeader:function(e,t){return null==h&&(e=s[e.toLowerCase()]=s[e.toLowerCase()]||e,a[e]=t),this},overrideMimeType:function(e){return null==h&&(v.mimeType=e),this},statusCode:function(e){var t;if(e)if(h)T.always(e[T.status]);else for(t in e)w[t]=[w[t],e[t]];return this},abort:function(e){var t=e||u;return c&&c.abort(t),l(0,t),this}};if(x.promise(T),v.url=((e||v.url||Et.href)+"").replace(Mt,Et.protocol+"//"),v.type=t.method||t.type||v.method||v.type,v.dataTypes=(v.dataType||"*").toLowerCase().match(R)||[""],null==v.crossDomain){r=E.createElement("a");try{r.href=v.url,r.href=r.href,v.crossDomain=Ft.protocol+"//"+Ft.host!=r.protocol+"//"+r.host}catch(e){v.crossDomain=!0}}if(v.data&&v.processData&&"string"!=typeof v.data&&(v.data=k.param(v.data,v.traditional)),_t(It,v,t,T),h)return T;for(i in(g=k.event&&v.global)&&0==k.active++&&k.event.trigger("ajaxStart"),v.type=v.type.toUpperCase(),v.hasContent=!Rt.test(v.type),f=v.url.replace(Ht,""),v.hasContent?v.data&&v.processData&&0===(v.contentType||"").indexOf("application/x-www-form-urlencoded")&&(v.data=v.data.replace(Lt,"+")):(o=v.url.slice(f.length),v.data&&(v.processData||"string"==typeof v.data)&&(f+=(St.test(f)?"&":"?")+v.data,delete v.data),!1===v.cache&&(f=f.replace(Ot,"$1"),o=(St.test(f)?"&":"?")+"_="+kt+++o),v.url=f+o),v.ifModified&&(k.lastModified[f]&&T.setRequestHeader("If-Modified-Since",k.lastModified[f]),k.etag[f]&&T.setRequestHeader("If-None-Match",k.etag[f])),(v.data&&v.hasContent&&!1!==v.contentType||t.contentType)&&T.setRequestHeader("Content-Type",v.contentType),T.setRequestHeader("Accept",v.dataTypes[0]&&v.accepts[v.dataTypes[0]]?v.accepts[v.dataTypes[0]]+("*"!==v.dataTypes[0]?", "+$t+"; q=0.01":""):v.accepts["*"]),v.headers)T.setRequestHeader(i,v.headers[i]);if(v.beforeSend&&(!1===v.beforeSend.call(y,T,v)||h))return T.abort();if(u="abort",b.add(v.complete),T.done(v.success),T.fail(v.error),c=_t(Wt,v,t,T)){if(T.readyState=1,g&&m.trigger("ajaxSend",[T,v]),h)return T;v.async&&0<v.timeout&&(d=C.setTimeout(function(){T.abort("timeout")},v.timeout));try{h=!1,c.send(a,l)}catch(e){if(h)throw e;l(-1,e)}}else l(-1,"No Transport");function l(e,t,n,r){var i,o,a,s,u,l=t;h||(h=!0,d&&C.clearTimeout(d),c=void 0,p=r||"",T.readyState=0<e?4:0,i=200<=e&&e<300||304===e,n&&(s=function(e,t,n){var r,i,o,a,s=e.contents,u=e.dataTypes;while("*"===u[0])u.shift(),void 0===r&&(r=e.mimeType||t.getResponseHeader("Content-Type"));if(r)for(i in s)if(s[i]&&s[i].test(r)){u.unshift(i);break}if(u[0]in n)o=u[0];else{for(i in n){if(!u[0]||e.converters[i+" "+u[0]]){o=i;break}a||(a=i)}o=o||a}if(o)return o!==u[0]&&u.unshift(o),n[o]}(v,T,n)),s=function(e,t,n,r){var i,o,a,s,u,l={},c=e.dataTypes.slice();if(c[1])for(a in e.converters)l[a.toLowerCase()]=e.converters[a];o=c.shift();while(o)if(e.responseFields[o]&&(n[e.responseFields[o]]=t),!u&&r&&e.dataFilter&&(t=e.dataFilter(t,e.dataType)),u=o,o=c.shift())if("*"===o)o=u;else if("*"!==u&&u!==o){if(!(a=l[u+" "+o]||l["* "+o]))for(i in l)if((s=i.split(" "))[1]===o&&(a=l[u+" "+s[0]]||l["* "+s[0]])){!0===a?a=l[i]:!0!==l[i]&&(o=s[0],c.unshift(s[1]));break}if(!0!==a)if(a&&e["throws"])t=a(t);else try{t=a(t)}catch(e){return{state:"parsererror",error:a?e:"No conversion from "+u+" to "+o}}}return{state:"success",data:t}}(v,s,T,i),i?(v.ifModified&&((u=T.getResponseHeader("Last-Modified"))&&(k.lastModified[f]=u),(u=T.getResponseHeader("etag"))&&(k.etag[f]=u)),204===e||"HEAD"===v.type?l="nocontent":304===e?l="notmodified":(l=s.state,o=s.data,i=!(a=s.error))):(a=l,!e&&l||(l="error",e<0&&(e=0))),T.status=e,T.statusText=(t||l)+"",i?x.resolveWith(y,[o,l,T]):x.rejectWith(y,[T,l,a]),T.statusCode(w),w=void 0,g&&m.trigger(i?"ajaxSuccess":"ajaxError",[T,v,i?o:a]),b.fireWith(y,[T,l]),g&&(m.trigger("ajaxComplete",[T,v]),--k.active||k.event.trigger("ajaxStop")))}return T},getJSON:function(e,t,n){return k.get(e,t,n,"json")},getScript:function(e,t){return k.get(e,void 0,t,"script")}}),k.each(["get","post"],function(e,i){k[i]=function(e,t,n,r){return m(t)&&(r=r||n,n=t,t=void 0),k.ajax(k.extend({url:e,type:i,dataType:r,data:t,success:n},k.isPlainObject(e)&&e))}}),k._evalUrl=function(e,t){return k.ajax({url:e,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,converters:{"text script":function(){}},dataFilter:function(e){k.globalEval(e,t)}})},k.fn.extend({wrapAll:function(e){var t;return this[0]&&(m(e)&&(e=e.call(this[0])),t=k(e,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){var e=this;while(e.firstElementChild)e=e.firstElementChild;return e}).append(this)),this},wrapInner:function(n){return m(n)?this.each(function(e){k(this).wrapInner(n.call(this,e))}):this.each(function(){var e=k(this),t=e.contents();t.length?t.wrapAll(n):e.append(n)})},wrap:function(t){var n=m(t);return this.each(function(e){k(this).wrapAll(n?t.call(this,e):t)})},unwrap:function(e){return this.parent(e).not("body").each(function(){k(this).replaceWith(this.childNodes)}),this}}),k.expr.pseudos.hidden=function(e){return!k.expr.pseudos.visible(e)},k.expr.pseudos.visible=function(e){return!!(e.offsetWidth||e.offsetHeight||e.getClientRects().length)},k.ajaxSettings.xhr=function(){try{return new C.XMLHttpRequest}catch(e){}};var Ut={0:200,1223:204},Xt=k.ajaxSettings.xhr();y.cors=!!Xt&&"withCredentials"in Xt,y.ajax=Xt=!!Xt,k.ajaxTransport(function(i){var o,a;if(y.cors||Xt&&!i.crossDomain)return{send:function(e,t){var n,r=i.xhr();if(r.open(i.type,i.url,i.async,i.username,i.password),i.xhrFields)for(n in i.xhrFields)r[n]=i.xhrFields[n];for(n in i.mimeType&&r.overrideMimeType&&r.overrideMimeType(i.mimeType),i.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest"),e)r.setRequestHeader(n,e[n]);o=function(e){return function(){o&&(o=a=r.onload=r.onerror=r.onabort=r.ontimeout=r.onreadystatechange=null,"abort"===e?r.abort():"error"===e?"number"!=typeof r.status?t(0,"error"):t(r.status,r.statusText):t(Ut[r.status]||r.status,r.statusText,"text"!==(r.responseType||"text")||"string"!=typeof r.responseText?{binary:r.response}:{text:r.responseText},r.getAllResponseHeaders()))}},r.onload=o(),a=r.onerror=r.ontimeout=o("error"),void 0!==r.onabort?r.onabort=a:r.onreadystatechange=function(){4===r.readyState&&C.setTimeout(function(){o&&a()})},o=o("abort");try{r.send(i.hasContent&&i.data||null)}catch(e){if(o)throw e}},abort:function(){o&&o()}}}),k.ajaxPrefilter(function(e){e.crossDomain&&(e.contents.script=!1)}),k.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(e){return k.globalEval(e),e}}}),k.ajaxPrefilter("script",function(e){void 0===e.cache&&(e.cache=!1),e.crossDomain&&(e.type="GET")}),k.ajaxTransport("script",function(n){var r,i;if(n.crossDomain||n.scriptAttrs)return{send:function(e,t){r=k("<script>").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Vt,Gt=[],Yt=/(=)\?(?=&|$)|\?\?/;k.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Gt.pop()||k.expando+"_"+kt++;return this[e]=!0,e}}),k.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Yt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Yt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Yt,"$1"+r):!1!==e.jsonp&&(e.url+=(St.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||k.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?k(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Gt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Vt=E.implementation.createHTMLDocument("").body).innerHTML="<form></form><form></form>",2===Vt.childNodes.length),k.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=D.exec(e))?[t.createElement(i[1])]:(i=we([e],t,o),o&&o.length&&k(o).remove(),k.merge([],i.childNodes)));var r,i,o},k.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1<s&&(r=mt(e.slice(s)),e=e.slice(0,s)),m(t)?(n=t,t=void 0):t&&"object"==typeof t&&(i="POST"),0<a.length&&k.ajax({url:e,type:i||"GET",dataType:"html",data:t}).done(function(e){o=arguments,a.html(r?k("<div>").append(k.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},k.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){k.fn[t]=function(e){return this.on(t,e)}}),k.expr.pseudos.animated=function(t){return k.grep(k.timers,function(e){return t===e.elem}).length},k.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=k.css(e,"position"),c=k(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=k.css(e,"top"),u=k.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,k.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},k.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){k.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===k.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===k.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=k(e).offset()).top+=k.css(e,"borderTopWidth",!0),i.left+=k.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-k.css(r,"marginTop",!0),left:t.left-i.left-k.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===k.css(e,"position"))e=e.offsetParent;return e||ie})}}),k.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;k.fn[t]=function(e){return _(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),k.each(["top","left"],function(e,n){k.cssHooks[n]=ze(y.pixelPosition,function(e,t){if(t)return t=_e(e,n),$e.test(t)?k(e).position()[n]+"px":t})}),k.each({Height:"height",Width:"width"},function(a,s){k.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){k.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return _(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?k.css(e,t,i):k.style(e,t,n,i)},s,n?e:void 0,n)}})}),k.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){k.fn[n]=function(e,t){return 0<arguments.length?this.on(n,null,e,t):this.trigger(n)}}),k.fn.extend({hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),k.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)}}),k.proxy=function(e,t){var n,r,i;if("string"==typeof t&&(n=e[t],t=e,e=n),m(e))return r=s.call(arguments,2),(i=function(){return e.apply(t||this,r.concat(s.call(arguments)))}).guid=e.guid=e.guid||k.guid++,i},k.holdReady=function(e){e?k.readyWait++:k.ready(!0)},k.isArray=Array.isArray,k.parseJSON=JSON.parse,k.nodeName=A,k.isFunction=m,k.isWindow=x,k.camelCase=V,k.type=w,k.now=Date.now,k.isNumeric=function(e){var t=k.type(e);return("number"===t||"string"===t)&&!isNaN(e-parseFloat(e))},"function"==typeof define&&define.amd&&define("jquery",[],function(){return k});var Qt=C.jQuery,Jt=C.$;return k.noConflict=function(e){return C.$===k&&(C.$=Jt),e&&C.jQuery===k&&(C.jQuery=Qt),k},e||(C.jQuery=C.$=k),k});
diff --git a/synapse/static/client/register/js/register.js b/synapse/static/client/register/js/register.js
deleted file mode 100644
index 3547f7be4f..0000000000
--- a/synapse/static/client/register/js/register.js
+++ /dev/null
@@ -1,117 +0,0 @@
-window.matrixRegistration = {
-    endpoint: location.origin + "/_matrix/client/api/v1/register"
-};
-
-var setupCaptcha = function() {
-    if (!window.matrixRegistrationConfig) {
-        return;
-    }
-    $.get(matrixRegistration.endpoint, function(response) {
-        var serverExpectsCaptcha = false;
-        for (var i=0; i<response.flows.length; i++) {
-            var flow = response.flows[i];
-            if ("m.login.recaptcha" === flow.type) {
-                serverExpectsCaptcha = true;
-                break;
-            }
-        }
-        if (!serverExpectsCaptcha) {
-            console.log("This server does not require a captcha.");
-            return;
-        }
-        console.log("Setting up ReCaptcha for "+matrixRegistration.endpoint);
-        var public_key = window.matrixRegistrationConfig.recaptcha_public_key;
-        if (public_key === undefined) {
-            console.error("No public key defined for captcha!");
-            setFeedbackString("Misconfigured captcha for server. Contact server admin.");
-            return;
-        }
-        Recaptcha.create(public_key,
-        "regcaptcha",
-        {
-            theme: "red",
-            callback: Recaptcha.focus_response_field
-        });
-        window.matrixRegistration.isUsingRecaptcha = true;
-    }).fail(errorFunc);
-    
-};
-
-var submitCaptcha = function(user, pwd) {
-    var challengeToken = Recaptcha.get_challenge();
-    var captchaEntry = Recaptcha.get_response();
-    var data = {
-        type: "m.login.recaptcha",
-        challenge: challengeToken,
-        response: captchaEntry
-    };
-    console.log("Submitting captcha");
-    $.post(matrixRegistration.endpoint, JSON.stringify(data), function(response) {
-        console.log("Success -> "+JSON.stringify(response));
-        submitPassword(user, pwd, response.session);
-    }).fail(function(err) {
-        Recaptcha.reload();
-        errorFunc(err);
-    });
-};
-
-var submitPassword = function(user, pwd, session) {
-    console.log("Registering...");
-    var data = {
-        type: "m.login.password",
-        user: user,
-        password: pwd,
-        session: session
-    };
-    $.post(matrixRegistration.endpoint, JSON.stringify(data), function(response) {
-        matrixRegistration.onRegistered(
-            response.home_server, response.user_id, response.access_token
-        );
-    }).fail(errorFunc);
-};
-
-var errorFunc = function(err) {
-    if (err.responseJSON && err.responseJSON.error) {
-        setFeedbackString(err.responseJSON.error + " (" + err.responseJSON.errcode + ")");
-    }
-    else {
-        setFeedbackString("Request failed: " + err.status);
-    }
-};
-
-var setFeedbackString = function(text) {
-    $("#feedback").text(text);
-};
-
-matrixRegistration.onLoad = function() {
-    setupCaptcha();
-};
-
-matrixRegistration.signUp = function() {
-    var user = $("#desired_user_id").val();
-    if (user.length == 0) {
-        setFeedbackString("Must specify a username.");
-        return;
-    }
-    var pwd1 = $("#pwd1").val();
-    var pwd2 = $("#pwd2").val();
-    if (pwd1.length < 6) {
-        setFeedbackString("Password: min. 6 characters.");
-        return;
-    }
-    if (pwd1 != pwd2) {
-        setFeedbackString("Passwords do not match.");
-        return;
-    }
-    if (window.matrixRegistration.isUsingRecaptcha) {
-        submitCaptcha(user, pwd1);
-    }
-    else {
-        submitPassword(user, pwd1);
-    }
-};
-
-matrixRegistration.onRegistered = function(hs_url, user_id, access_token) {
-    // clobber this function
-    console.warn("onRegistered - This function should be replaced to proceed.");
-};
diff --git a/synapse/static/client/register/register_config.sample.js b/synapse/static/client/register/register_config.sample.js
deleted file mode 100644
index c7ea180dee..0000000000
--- a/synapse/static/client/register/register_config.sample.js
+++ /dev/null
@@ -1,3 +0,0 @@
-window.matrixRegistrationConfig = {
-  recaptcha_public_key: "YOUR_PUBLIC_KEY"
-};
diff --git a/synapse/static/client/register/style.css b/synapse/static/client/register/style.css
deleted file mode 100644
index 8a39b5d0f5..0000000000
--- a/synapse/static/client/register/style.css
+++ /dev/null
@@ -1,64 +0,0 @@
-html {
-    height: 100%;
-}
-
-body {
-    height: 100%;
-    font-family: "Myriad Pro", "Myriad", Helvetica, Arial, sans-serif;
-    font-size: 12pt;
-    margin: 0px;
-}
-
-h1 {
-    font-size: 20pt;
-}
-
-a:link    { color: #666; }
-a:visited { color: #666; }
-a:hover   { color: #000; }
-a:active  { color: #000; }
-
-input {
-   width: 100%
-}
-
-textarea, input {
-   font-family: inherit;
-   font-size: inherit;
-}
-
-.smallPrint {
-    color: #888;
-    font-size: 9pt ! important;
-    font-style: italic ! important;
-}
-
-#recaptcha_area { 
-    margin: auto
-}
-
-.g-recaptcha div {
-    margin: auto;
-}
-
-#registrationForm {
-    text-align: left;
-    padding: 5px;
-    margin-bottom: 40px;
-    display: inline-block;
-    
-    -webkit-border-radius: 10px;
-    -moz-border-radius: 10px;
-    border-radius: 10px;
-    
-    -webkit-box-shadow: 0px 0px 20px 0px rgba(0,0,0,0.15);
-    -moz-box-shadow: 0px 0px 20px 0px rgba(0,0,0,0.15);
-    box-shadow: 0px 0px 20px 0px rgba(0,0,0,0.15);
-    
-    background-color: #f8f8f8;
-    border: 1px #ccc solid;
-}
-
-.error {
-	color: red;
-}
-- 
cgit 1.5.1


From 4af0aec54dad261bcad240d8a878a1c16934e77c Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Fri, 14 Apr 2023 11:24:06 +0200
Subject: Load `/directory/room/{roomAlias}` endpoint on workers (#15333)

* Enable `directory`

* move to worker store

* newsfile

* disable `ClientDirectoryListServer` and `ClientAppserviceDirectoryListServer` for workers
---
 changelog.d/15333.feature                   | 1 +
 docker/configure_workers_and_start.py       | 1 +
 docs/workers.md                             | 1 +
 synapse/rest/__init__.py                    | 3 +--
 synapse/rest/client/directory.py            | 6 ++++--
 synapse/storage/databases/main/directory.py | 6 ++++--
 6 files changed, 12 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15333.feature

(limited to 'synapse')

diff --git a/changelog.d/15333.feature b/changelog.d/15333.feature
new file mode 100644
index 0000000000..35ea89ad89
--- /dev/null
+++ b/changelog.d/15333.feature
@@ -0,0 +1 @@
+Allow loading `/directory/room/{roomAlias}` endpoint on workers.
\ No newline at end of file
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 2a50ee1e4b..26f92b3f1a 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -173,6 +173,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/(api/v1|r0|v3|unstable)/search",
             "^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)",
             "^/_matrix/client/(r0|v3|unstable)/password_policy$",
+            "^/_matrix/client/(api/v1|r0|v3|unstable)/directory/room/.*$",
         ],
         "shared_extra_conf": {},
         "worker_extra_conf": "",
diff --git a/docs/workers.md b/docs/workers.md
index e9a477d32c..cb2333e4a5 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -234,6 +234,7 @@ information.
     ^/_matrix/client/(api/v1|r0|v3|unstable/.*)/rooms/.*/aliases
     ^/_matrix/client/(api/v1|r0|v3|unstable)/search$
     ^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)
+    ^/_matrix/client/(api/v1|r0|v3|unstable)/directory/room/.*$
 
     # Encryption requests
     ^/_matrix/client/(r0|v3|unstable)/keys/query$
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 1d7c11b42d..19603ed137 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -100,8 +100,7 @@ class ClientRestResource(JsonResource):
         login.register_servlets(hs, client_resource)
         profile.register_servlets(hs, client_resource)
         presence.register_servlets(hs, client_resource)
-        if is_main_process:
-            directory.register_servlets(hs, client_resource)
+        directory.register_servlets(hs, client_resource)
         voip.register_servlets(hs, client_resource)
         if is_main_process:
             pusher.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index f17b4c8d22..570bb52747 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -39,12 +39,14 @@ logger = logging.getLogger(__name__)
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ClientDirectoryServer(hs).register(http_server)
-    ClientDirectoryListServer(hs).register(http_server)
-    ClientAppserviceDirectoryListServer(hs).register(http_server)
+    if hs.config.worker.worker_app is None:
+        ClientDirectoryListServer(hs).register(http_server)
+        ClientAppserviceDirectoryListServer(hs).register(http_server)
 
 
 class ClientDirectoryServer(RestServlet):
     PATTERNS = client_patterns("/directory/room/(?P<room_alias>[^/]*)$", v1=True)
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py
index 44aa181174..3cb4c90729 100644
--- a/synapse/storage/databases/main/directory.py
+++ b/synapse/storage/databases/main/directory.py
@@ -129,8 +129,6 @@ class DirectoryWorkerStore(CacheInvalidationWorkerStore):
                 409, "Room alias %s already exists" % room_alias.to_string()
             )
 
-
-class DirectoryStore(DirectoryWorkerStore):
     async def delete_room_alias(self, room_alias: RoomAlias) -> Optional[str]:
         room_id = await self.db_pool.runInteraction(
             "delete_room_alias", self._delete_room_alias_txn, room_alias
@@ -201,3 +199,7 @@ class DirectoryStore(DirectoryWorkerStore):
         await self.db_pool.runInteraction(
             "_update_aliases_for_room_txn", _update_aliases_for_room_txn
         )
+
+
+class DirectoryStore(DirectoryWorkerStore):
+    pass
-- 
cgit 1.5.1


From dabbb94fafd335966bbdb5bd2187678872731a0d Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Fri, 14 Apr 2023 14:12:37 +0200
Subject: Delete pushers after calling on_logged_out module hook on device
 delete (#15410)

---
 changelog.d/15410.bugfix                         |  1 +
 docs/modules/password_auth_provider_callbacks.md |  3 ++
 synapse/handlers/device.py                       |  6 ++-
 tests/module_api/test_api.py                     | 51 +++++++++++++++++++++++-
 4 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15410.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15410.bugfix b/changelog.d/15410.bugfix
new file mode 100644
index 0000000000..eb540e33c5
--- /dev/null
+++ b/changelog.d/15410.bugfix
@@ -0,0 +1 @@
+Fix and document untold assumption that `on_logged_out` module hooks will be called before pushers deletion.
diff --git a/docs/modules/password_auth_provider_callbacks.md b/docs/modules/password_auth_provider_callbacks.md
index f6349d5404..8275f7ebdc 100644
--- a/docs/modules/password_auth_provider_callbacks.md
+++ b/docs/modules/password_auth_provider_callbacks.md
@@ -103,6 +103,9 @@ Called during a logout request for a user. It is passed the qualified user ID, t
 deactivated device (if any: access tokens are occasionally created without an associated
 device ID), and the (now deactivated) access token.
 
+Deleting the related pushers is done after calling `on_logged_out`, so you can rely on them
+to still be present.
+
 If multiple modules implement this callback, Synapse runs them all in order.
 
 ### `get_username_for_registration`
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index d2063d4435..ae1d9337ad 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -513,8 +513,6 @@ class DeviceHandler(DeviceWorkerHandler):
             else:
                 raise
 
-        await self.hs.get_pusherpool().remove_pushers_by_devices(user_id, device_ids)
-
         # Delete data specific to each device. Not optimised as it is not
         # considered as part of a critical path.
         for device_id in device_ids:
@@ -533,6 +531,10 @@ class DeviceHandler(DeviceWorkerHandler):
                     f"org.matrix.msc3890.local_notification_settings.{device_id}",
                 )
 
+        # Pushers are deleted after `delete_access_tokens_for_user` is called so that
+        # modules using `on_logged_out` hook can use them if needed.
+        await self.hs.get_pusherpool().remove_pushers_by_devices(user_id, device_ids)
+
         await self.notify_device_update(user_id, device_ids)
 
     async def update_device(self, user_id: str, device_id: str, content: dict) -> None:
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index 3a1929691e..758b4bc38b 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 from unittest.mock import Mock
 
 from twisted.internet import defer
@@ -21,6 +21,7 @@ from synapse.api.constants import EduTypes, EventTypes
 from synapse.api.errors import NotFoundError
 from synapse.events import EventBase
 from synapse.federation.units import Transaction
+from synapse.handlers.device import DeviceHandler
 from synapse.handlers.presence import UserPresenceState
 from synapse.handlers.push_rules import InvalidRuleException
 from synapse.module_api import ModuleApi
@@ -773,6 +774,54 @@ class ModuleApiTestCase(BaseModuleApiTestCase):
         # Check room alias.
         self.assertIsNone(room_alias)
 
+    def test_on_logged_out(self) -> None:
+        """Test that on_logged_out module hook is properly called when logging out
+        a device, and that related pushers are still available at this time.
+        """
+        device_id = "AAAAAAA"
+        user_id = self.register_user("test_on_logged_out", "secret")
+        self.login("test_on_logged_out", "secret", device_id)
+
+        self.get_success(
+            self.hs.get_pusherpool().add_or_update_pusher(
+                user_id=user_id,
+                device_id=device_id,
+                kind="http",
+                app_id="m.http",
+                app_display_name="HTTP Push Notifications",
+                device_display_name="pushy push",
+                pushkey="a@example.com",
+                lang=None,
+                data={"url": "http://example.com/_matrix/push/v1/notify"},
+            )
+        )
+
+        # Setup a callback counting the number of pushers.
+        number_of_pushers_in_callback: Optional[int] = None
+
+        async def _on_logged_out_mock(
+            user_id: str, device_id: Optional[str], access_token: str
+        ) -> None:
+            nonlocal number_of_pushers_in_callback
+            number_of_pushers_in_callback = len(
+                self.hs.get_pusherpool().pushers[user_id].values()
+            )
+
+        self.module_api.register_password_auth_provider_callbacks(
+            on_logged_out=_on_logged_out_mock
+        )
+
+        # Delete the device.
+        device_handler = self.hs.get_device_handler()
+        assert isinstance(device_handler, DeviceHandler)
+        self.get_success(device_handler.delete_devices(user_id, [device_id]))
+
+        # Check that the callback was called and the pushers still existed.
+        self.assertEqual(number_of_pushers_in_callback, 1)
+
+        # Ensure the pushers were deleted after the callback.
+        self.assertEqual(len(self.hs.get_pusherpool().pushers[user_id].values()), 0)
+
 
 class ModuleApiWorkerTestCase(BaseModuleApiTestCase, BaseMultiWorkerStreamTestCase):
     """For testing ModuleApi functionality in a multi-worker setup"""
-- 
cgit 1.5.1


From b5192355f6ac11eec4781d73a59b14cfc8732d1f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 14 Apr 2023 16:10:32 +0100
Subject: User directory background update speedup (#15435)

c.f. #15264

The two changes are:
1. Add indexes so that the select / deletes don't do sequential scans
2. Don't repeatedly call `SELECT count(*)` each iteration, as that's slow
---
 changelog.d/15435.misc                           |  1 +
 synapse/storage/databases/main/user_directory.py | 89 ++++++++++++------------
 2 files changed, 45 insertions(+), 45 deletions(-)
 create mode 100644 changelog.d/15435.misc

(limited to 'synapse')

diff --git a/changelog.d/15435.misc b/changelog.d/15435.misc
new file mode 100644
index 0000000000..e0f591b6d1
--- /dev/null
+++ b/changelog.d/15435.misc
@@ -0,0 +1 @@
+Speed up the user directory background update.
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 9fced4b997..5d65faed16 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -102,44 +102,34 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
     ) -> int:
         # Get all the rooms that we want to process.
         def _make_staging_area(txn: LoggingTransaction) -> None:
-            sql = (
-                "CREATE TABLE IF NOT EXISTS "
-                + TEMP_TABLE
-                + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)"
-            )
-            txn.execute(sql)
-
-            sql = (
-                "CREATE TABLE IF NOT EXISTS "
-                + TEMP_TABLE
-                + "_position(position TEXT NOT NULL)"
-            )
-            txn.execute(sql)
-
-            # Get rooms we want to process from the database
-            sql = """
-                SELECT room_id, count(*) FROM current_state_events
+            sql = f"""
+                CREATE TABLE IF NOT EXISTS {TEMP_TABLE}_rooms AS
+                SELECT room_id, count(*) AS events
+                FROM current_state_events
                 GROUP BY room_id
             """
             txn.execute(sql)
-            rooms = list(txn.fetchall())
-            self.db_pool.simple_insert_many_txn(
-                txn, TEMP_TABLE + "_rooms", keys=("room_id", "events"), values=rooms
+            txn.execute(
+                f"CREATE INDEX IF NOT EXISTS {TEMP_TABLE}_rooms_rm ON {TEMP_TABLE}_rooms (room_id)"
             )
-            del rooms
-
-            sql = (
-                "CREATE TABLE IF NOT EXISTS "
-                + TEMP_TABLE
-                + "_users(user_id TEXT NOT NULL)"
+            txn.execute(
+                f"CREATE INDEX IF NOT EXISTS {TEMP_TABLE}_rooms_evs ON {TEMP_TABLE}_rooms (events)"
             )
-            txn.execute(sql)
 
-            txn.execute("SELECT name FROM users")
-            users = list(txn.fetchall())
+            sql = f"""
+                CREATE TABLE IF NOT EXISTS {TEMP_TABLE}_position (
+                    position TEXT NOT NULL
+                )
+            """
+            txn.execute(sql)
 
-            self.db_pool.simple_insert_many_txn(
-                txn, TEMP_TABLE + "_users", keys=("user_id",), values=users
+            sql = f"""
+                CREATE TABLE IF NOT EXISTS {TEMP_TABLE}_users AS
+                SELECT name AS user_id FROM users
+            """
+            txn.execute(sql)
+            txn.execute(
+                f"CREATE INDEX IF NOT EXISTS {TEMP_TABLE}_users_idx ON {TEMP_TABLE}_users (user_id)"
             )
 
         new_pos = await self.get_max_stream_id_in_current_state_deltas()
@@ -222,12 +212,13 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
             if not rooms_to_work_on:
                 return None
 
-            # Get how many are left to process, so we can give status on how
-            # far we are in processing
-            txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
-            result = txn.fetchone()
-            assert result is not None
-            progress["remaining"] = result[0]
+            if "remaining" not in progress:
+                # Get how many are left to process, so we can give status on how
+                # far we are in processing
+                txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
+                result = txn.fetchone()
+                assert result is not None
+                progress["remaining"] = result[0]
 
             return rooms_to_work_on
 
@@ -332,7 +323,14 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
 
             if processed_event_count > batch_size:
                 # Don't process any more rooms, we've hit our batch size.
-                return processed_event_count
+                break
+
+        await self.db_pool.runInteraction(
+            "populate_user_directory",
+            self.db_pool.updates._background_update_progress_txn,
+            "populate_user_directory_process_rooms",
+            progress,
+        )
 
         return processed_event_count
 
@@ -356,13 +354,14 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
 
             users_to_work_on = [x[0] for x in user_result]
 
-            # Get how many are left to process, so we can give status on how
-            # far we are in processing
-            sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users"
-            txn.execute(sql)
-            count_result = txn.fetchone()
-            assert count_result is not None
-            progress["remaining"] = count_result[0]
+            if "remaining" not in progress:
+                # Get how many are left to process, so we can give status on how
+                # far we are in processing
+                sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users"
+                txn.execute(sql)
+                count_result = txn.fetchone()
+                assert count_result is not None
+                progress["remaining"] = count_result[0]
 
             return users_to_work_on
 
-- 
cgit 1.5.1


From e4a25d022c1e4b71e043b07324d95362f7fb4067 Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Fri, 14 Apr 2023 18:26:07 +0200
Subject: Load `/capabilities` endpoint on workers (#15436)

---
 changelog.d/15436.feature             | 1 +
 docker/configure_workers_and_start.py | 1 +
 docs/workers.md                       | 1 +
 synapse/rest/__init__.py              | 2 +-
 synapse/rest/client/capabilities.py   | 1 +
 5 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15436.feature

(limited to 'synapse')

diff --git a/changelog.d/15436.feature b/changelog.d/15436.feature
new file mode 100644
index 0000000000..d83f8c3e4a
--- /dev/null
+++ b/changelog.d/15436.feature
@@ -0,0 +1 @@
+Allow loading `/capabilities` endpoint on workers.
\ No newline at end of file
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 26f92b3f1a..4beec3daaf 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -174,6 +174,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)",
             "^/_matrix/client/(r0|v3|unstable)/password_policy$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/directory/room/.*$",
+            "^/_matrix/client/(r0|v3|unstable)/capabilities$",
         ],
         "shared_extra_conf": {},
         "worker_extra_conf": "",
diff --git a/docs/workers.md b/docs/workers.md
index cb2333e4a5..6192a46e09 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -235,6 +235,7 @@ information.
     ^/_matrix/client/(api/v1|r0|v3|unstable)/search$
     ^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)
     ^/_matrix/client/(api/v1|r0|v3|unstable)/directory/room/.*$
+    ^/_matrix/client/(r0|v3|unstable)/capabilities$
 
     # Encryption requests
     ^/_matrix/client/(r0|v3|unstable)/keys/query$
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 19603ed137..1af8d99d20 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -133,8 +133,8 @@ class ClientRestResource(JsonResource):
         if is_main_process:
             room_upgrade_rest_servlet.register_servlets(hs, client_resource)
         room_batch.register_servlets(hs, client_resource)
+        capabilities.register_servlets(hs, client_resource)
         if is_main_process:
-            capabilities.register_servlets(hs, client_resource)
             account_validity.register_servlets(hs, client_resource)
         relations.register_servlets(hs, client_resource)
         password_policy.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/capabilities.py b/synapse/rest/client/capabilities.py
index e84dde31b1..0dbf8f6818 100644
--- a/synapse/rest/client/capabilities.py
+++ b/synapse/rest/client/capabilities.py
@@ -33,6 +33,7 @@ class CapabilitiesRestServlet(RestServlet):
     """End point to expose the capabilities of the server."""
 
     PATTERNS = client_patterns("/capabilities$")
+    CATEGORY = "Client API requests"
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
-- 
cgit 1.5.1


From 24b61f32ff7a2f49aaf2d3d81045d2187eccce7d Mon Sep 17 00:00:00 2001
From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com>
Date: Fri, 14 Apr 2023 19:49:47 +0200
Subject: Disable directory listing for `StaticResource` (#15438)

---
 changelog.d/15438.misc |  1 +
 synapse/http/server.py | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 changelog.d/15438.misc

(limited to 'synapse')

diff --git a/changelog.d/15438.misc b/changelog.d/15438.misc
new file mode 100644
index 0000000000..1edcbac7e2
--- /dev/null
+++ b/changelog.d/15438.misc
@@ -0,0 +1 @@
+Disable directory listing for static resources in `/_matrix/static/`.
\ No newline at end of file
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 7b760505b2..101dc2e747 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -46,6 +46,13 @@ from twisted.internet import defer, interfaces
 from twisted.internet.defer import CancelledError
 from twisted.python import failure
 from twisted.web import resource
+
+try:
+    from twisted.web.pages import notFound
+except ImportError:
+    from twisted.web.resource import NoResource as notFound  # type: ignore[assignment]
+
+from twisted.web.resource import IResource
 from twisted.web.server import NOT_DONE_YET, Request
 from twisted.web.static import File
 from twisted.web.util import redirectTo
@@ -569,6 +576,9 @@ class StaticResource(File):
         set_clickjacking_protection_headers(request)
         return super().render_GET(request)
 
+    def directoryListing(self) -> IResource:
+        return notFound()
+
 
 class UnrecognizedRequestResource(resource.Resource):
     """
-- 
cgit 1.5.1


From 8a47d6e3a685bd45237b7dae9c138209df509f64 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 14 Apr 2023 19:04:49 +0100
Subject: More precise type for LoggingTransaction.execute (#15432)

* More precise type for LoggingTransaction.execute
* Add an annotation for stream_ordering_month_ago

This would have spotted the error that was fixed in "Add comma missing from #15382. (#15429)"
---
 changelog.d/15432.misc                             |  1 +
 synapse/storage/database.py                        | 20 +++++++++++++++++---
 synapse/storage/databases/main/event_federation.py | 19 +++++++++++--------
 synapse/storage/types.py                           |  6 +++---
 4 files changed, 32 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/15432.misc

(limited to 'synapse')

diff --git a/changelog.d/15432.misc b/changelog.d/15432.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15432.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 226ccc1671..1f5f5eb6f8 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -58,7 +58,7 @@ from synapse.metrics import register_threadpool
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.background_updates import BackgroundUpdater
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
-from synapse.storage.types import Connection, Cursor
+from synapse.storage.types import Connection, Cursor, SQLQueryParameters
 from synapse.util.async_helpers import delay_cancellation
 from synapse.util.iterutils import batch_iter
 
@@ -371,10 +371,18 @@ class LoggingTransaction:
         if isinstance(self.database_engine, PostgresEngine):
             from psycopg2.extras import execute_batch
 
+            # TODO: is it safe for values to be Iterable[Iterable[Any]] here?
+            # https://www.psycopg.org/docs/extras.html?highlight=execute_batch#psycopg2.extras.execute_batch
+            # suggests each arg in args should be a sequence or mapping
             self._do_execute(
                 lambda the_sql: execute_batch(self.txn, the_sql, args), sql
             )
         else:
+            # TODO: is it safe for values to be Iterable[Iterable[Any]] here?
+            # https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3#sqlite3.Cursor.executemany
+            # suggests that the outer collection may be iterable, but
+            # https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3#how-to-use-placeholders-to-bind-values-in-sql-queries
+            # suggests that the inner collection should be a sequence or dict.
             self.executemany(sql, args)
 
     def execute_values(
@@ -390,14 +398,20 @@ class LoggingTransaction:
         from psycopg2.extras import execute_values
 
         return self._do_execute(
+            # TODO: is it safe for values to be Iterable[Iterable[Any]] here?
+            # https://www.psycopg.org/docs/extras.html?highlight=execute_batch#psycopg2.extras.execute_values says values should be Sequence[Sequence]
             lambda the_sql: execute_values(self.txn, the_sql, values, fetch=fetch),
             sql,
         )
 
-    def execute(self, sql: str, *args: Any) -> None:
-        self._do_execute(self.txn.execute, sql, *args)
+    def execute(self, sql: str, parameters: SQLQueryParameters = ()) -> None:
+        self._do_execute(self.txn.execute, sql, parameters)
 
     def executemany(self, sql: str, *args: Any) -> None:
+        # TODO: we should add a type for *args here. Looking at Cursor.executemany
+        # and DBAPI2 it ought to be Sequence[_Parameter], but we pass in
+        # Iterable[Iterable[Any]] in execute_batch and execute_values above, which mypy
+        # complains about.
         self._do_execute(self.txn.executemany, sql, *args)
 
     def executescript(self, sql: str) -> None:
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 2ad6fa7d5e..ac19de183c 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -114,6 +114,10 @@ class _NoChainCoverIndex(Exception):
 
 
 class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBaseStore):
+    # TODO: this attribute comes from EventPushActionWorkerStore. Should we inherit from
+    # that store so that mypy can deduce this for itself?
+    stream_ordering_month_ago: Optional[int]
+
     def __init__(
         self,
         database: DatabasePool,
@@ -1182,8 +1186,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         Throws a StoreError if we have since purged the index for
         stream_orderings from that point.
         """
-
-        if stream_ordering <= self.stream_ordering_month_ago:  # type: ignore[attr-defined]
+        assert self.stream_ordering_month_ago is not None
+        if stream_ordering <= self.stream_ordering_month_ago:
             raise StoreError(400, f"stream_ordering too old {stream_ordering}")
 
         sql = """
@@ -1231,7 +1235,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         # provided the last_change is recent enough, we now clamp the requested
         # stream_ordering to it.
-        if last_change > self.stream_ordering_month_ago:  # type: ignore[attr-defined]
+        assert self.stream_ordering_month_ago is not None
+        if last_change > self.stream_ordering_month_ago:
             stream_ordering = min(last_change, stream_ordering)
 
         return await self._get_forward_extremeties_for_room(room_id, stream_ordering)
@@ -1246,8 +1251,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         Throws a StoreError if we have since purged the index for
         stream_orderings from that point.
         """
-
-        if stream_ordering <= self.stream_ordering_month_ago:  # type: ignore[attr-defined]
+        assert self.stream_ordering_month_ago is not None
+        if stream_ordering <= self.stream_ordering_month_ago:
             raise StoreError(400, "stream_ordering too old %s" % (stream_ordering,))
 
         sql = """
@@ -1707,9 +1712,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 DELETE FROM stream_ordering_to_exterm
                 WHERE stream_ordering < ?
             """
-            txn.execute(
-                sql, (self.stream_ordering_month_ago,)  # type: ignore[attr-defined]
-            )
+            txn.execute(sql, (self.stream_ordering_month_ago,))
 
         await self.db_pool.runInteraction(
             "_delete_old_forward_extrem_cache",
diff --git a/synapse/storage/types.py b/synapse/storage/types.py
index 56a0048539..34ac807530 100644
--- a/synapse/storage/types.py
+++ b/synapse/storage/types.py
@@ -31,14 +31,14 @@ from typing_extensions import Protocol
 Some very basic protocol definitions for the DB-API2 classes specified in PEP-249
 """
 
-_Parameters = Union[Sequence[Any], Mapping[str, Any]]
+SQLQueryParameters = Union[Sequence[Any], Mapping[str, Any]]
 
 
 class Cursor(Protocol):
-    def execute(self, sql: str, parameters: _Parameters = ...) -> Any:
+    def execute(self, sql: str, parameters: SQLQueryParameters = ...) -> Any:
         ...
 
-    def executemany(self, sql: str, parameters: Sequence[_Parameters]) -> Any:
+    def executemany(self, sql: str, parameters: Sequence[SQLQueryParameters]) -> Any:
         ...
 
     def fetchone(self) -> Optional[Tuple]:
-- 
cgit 1.5.1


From c9326140dc9f8ea849356b5a8397e468636df8d4 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Fri, 14 Apr 2023 15:46:04 -0500
Subject: Refactor `SimpleHttpClient` to pull out reusable methods (#15427)

Pulls out some methods to `BaseHttpClient` to eventually be
reused in other contexts.
---
 changelog.d/15427.misc |   1 +
 synapse/http/client.py | 132 ++++++++++++++++++++++++++++---------------------
 2 files changed, 77 insertions(+), 56 deletions(-)
 create mode 100644 changelog.d/15427.misc

(limited to 'synapse')

diff --git a/changelog.d/15427.misc b/changelog.d/15427.misc
new file mode 100644
index 0000000000..ef873e3b2b
--- /dev/null
+++ b/changelog.d/15427.misc
@@ -0,0 +1 @@
+Refactor `SimpleHttpClient` to pull out a base class.
diff --git a/synapse/http/client.py b/synapse/http/client.py
index b5cf8123ce..91fe474f36 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -312,35 +312,27 @@ class BlacklistingAgentWrapper(Agent):
         )
 
 
-class SimpleHttpClient:
+class BaseHttpClient:
     """
     A simple, no-frills HTTP client with methods that wrap up common ways of
-    using HTTP in Matrix
+    using HTTP in Matrix. Does not come with a default Agent, subclasses will need to
+    define their own.
+
+    Args:
+        hs: The HomeServer instance to pass in
+        treq_args: Extra keyword arguments to be given to treq.request.
     """
 
+    agent: IAgent
+
     def __init__(
         self,
         hs: "HomeServer",
         treq_args: Optional[Dict[str, Any]] = None,
-        ip_whitelist: Optional[IPSet] = None,
-        ip_blacklist: Optional[IPSet] = None,
-        use_proxy: bool = False,
     ):
-        """
-        Args:
-            hs
-            treq_args: Extra keyword arguments to be given to treq.request.
-            ip_blacklist: The IP addresses that are blacklisted that
-                we may not request.
-            ip_whitelist: The whitelisted IP addresses, that we can
-               request if it were otherwise caught in a blacklist.
-            use_proxy: Whether proxy settings should be discovered and used
-                from conventional environment variables.
-        """
         self.hs = hs
+        self.reactor = hs.get_reactor()
 
-        self._ip_whitelist = ip_whitelist
-        self._ip_blacklist = ip_blacklist
         self._extra_treq_args = treq_args or {}
         self.clock = hs.get_clock()
 
@@ -356,44 +348,6 @@ class SimpleHttpClient:
         # reactor.
         self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor()))
 
-        if self._ip_blacklist:
-            # If we have an IP blacklist, we need to use a DNS resolver which
-            # filters out blacklisted IP addresses, to prevent DNS rebinding.
-            self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
-                hs.get_reactor(), self._ip_whitelist, self._ip_blacklist
-            )
-        else:
-            self.reactor = hs.get_reactor()
-
-        # the pusher makes lots of concurrent SSL connections to sygnal, and
-        # tends to do so in batches, so we need to allow the pool to keep
-        # lots of idle connections around.
-        pool = HTTPConnectionPool(self.reactor)
-        # XXX: The justification for using the cache factor here is that larger instances
-        # will need both more cache and more connections.
-        # Still, this should probably be a separate dial
-        pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
-        pool.cachedConnectionTimeout = 2 * 60
-
-        self.agent: IAgent = ProxyAgent(
-            self.reactor,
-            hs.get_reactor(),
-            connectTimeout=15,
-            contextFactory=self.hs.get_http_client_context_factory(),
-            pool=pool,
-            use_proxy=use_proxy,
-        )
-
-        if self._ip_blacklist:
-            # If we have an IP blacklist, we then install the blacklisting Agent
-            # which prevents direct access to IP addresses, that are not caught
-            # by the DNS resolution.
-            self.agent = BlacklistingAgentWrapper(
-                self.agent,
-                ip_blacklist=self._ip_blacklist,
-                ip_whitelist=self._ip_whitelist,
-            )
-
     async def request(
         self,
         method: str,
@@ -799,6 +753,72 @@ class SimpleHttpClient:
         )
 
 
+class SimpleHttpClient(BaseHttpClient):
+    """
+    An HTTP client capable of crossing a proxy and respecting a block/allow list.
+
+    This also configures a larger / longer lasting HTTP connection pool.
+
+    Args:
+        hs: The HomeServer instance to pass in
+        treq_args: Extra keyword arguments to be given to treq.request.
+        ip_blacklist: The IP addresses that are blacklisted that
+            we may not request.
+        ip_whitelist: The whitelisted IP addresses, that we can
+           request if it were otherwise caught in a blacklist.
+        use_proxy: Whether proxy settings should be discovered and used
+            from conventional environment variables.
+    """
+
+    def __init__(
+        self,
+        hs: "HomeServer",
+        treq_args: Optional[Dict[str, Any]] = None,
+        ip_whitelist: Optional[IPSet] = None,
+        ip_blacklist: Optional[IPSet] = None,
+        use_proxy: bool = False,
+    ):
+        super().__init__(hs, treq_args=treq_args)
+        self._ip_whitelist = ip_whitelist
+        self._ip_blacklist = ip_blacklist
+
+        if self._ip_blacklist:
+            # If we have an IP blacklist, we need to use a DNS resolver which
+            # filters out blacklisted IP addresses, to prevent DNS rebinding.
+            self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
+                self.reactor, self._ip_whitelist, self._ip_blacklist
+            )
+
+        # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
+        # do so in batches, so we need to allow the pool to keep lots of idle
+        # connections around.
+        pool = HTTPConnectionPool(self.reactor)
+        # XXX: The justification for using the cache factor here is that larger
+        # instances will need both more cache and more connections.
+        # Still, this should probably be a separate dial
+        pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
+        pool.cachedConnectionTimeout = 2 * 60
+
+        self.agent: IAgent = ProxyAgent(
+            self.reactor,
+            hs.get_reactor(),
+            connectTimeout=15,
+            contextFactory=self.hs.get_http_client_context_factory(),
+            pool=pool,
+            use_proxy=use_proxy,
+        )
+
+        if self._ip_blacklist:
+            # If we have an IP blacklist, we then install the blacklisting Agent
+            # which prevents direct access to IP addresses, that are not caught
+            # by the DNS resolution.
+            self.agent = BlacklistingAgentWrapper(
+                self.agent,
+                ip_blacklist=self._ip_blacklist,
+                ip_whitelist=self._ip_whitelist,
+            )
+
+
 def _timeout_to_request_timed_out_error(f: Failure) -> Failure:
     if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError):
         # The TCP connection has its own timeout (set by the 'connectTimeout' param
-- 
cgit 1.5.1


From e12d788bb7526b7d9ab09bb3921b3cd17f20a939 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Mon, 17 Apr 2023 18:53:43 -0500
Subject: Switch `InstanceLocationConfig` to a pydantic `BaseModel` (#15431)

* Switch InstanceLocationConfig to a pydantic BaseModel, apply Strict* types and add a few helper methods(that will make more sense in follow up work).

Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/15431.feature |  1 +
 synapse/config/_util.py   | 28 ++++++++++++++++++++++++-
 synapse/config/workers.py | 52 +++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 71 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/15431.feature

(limited to 'synapse')

diff --git a/changelog.d/15431.feature b/changelog.d/15431.feature
new file mode 100644
index 0000000000..4492406b49
--- /dev/null
+++ b/changelog.d/15431.feature
@@ -0,0 +1 @@
+Add some validation to `instance_map` configuration loading.
diff --git a/synapse/config/_util.py b/synapse/config/_util.py
index d3a4b484ab..dfc5d12210 100644
--- a/synapse/config/_util.py
+++ b/synapse/config/_util.py
@@ -11,9 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Iterable
+from typing import Any, Dict, Iterable, Type, TypeVar
 
 import jsonschema
+from pydantic import BaseModel, ValidationError, parse_obj_as
 
 from synapse.config._base import ConfigError
 from synapse.types import JsonDict
@@ -64,3 +65,28 @@ def json_error_to_config_error(
         else:
             path.append(str(p))
     return ConfigError(e.message, path)
+
+
+Model = TypeVar("Model", bound=BaseModel)
+
+
+def parse_and_validate_mapping(
+    config: Any,
+    model_type: Type[Model],
+) -> Dict[str, Model]:
+    """Parse `config` as a mapping from strings to a given `Model` type.
+    Args:
+        config: The configuration data to check
+        model_type: The BaseModel to validate and parse against.
+    Returns:
+        Fully validated and parsed Dict[str, Model].
+    Raises:
+        ConfigError, if given improper input.
+    """
+    try:
+        # type-ignore: mypy doesn't like constructing `Dict[str, model_type]` because
+        # `model_type` is a runtime variable. Pydantic is fine with this.
+        instances = parse_obj_as(Dict[str, model_type], config)  # type: ignore[valid-type]
+    except ValidationError as e:
+        raise ConfigError(str(e)) from e
+    return instances
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 1dfbe27e89..95b4047f1d 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -18,6 +18,7 @@ import logging
 from typing import Any, Dict, List, Union
 
 import attr
+from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr
 
 from synapse.config._base import (
     Config,
@@ -25,6 +26,7 @@ from synapse.config._base import (
     RoutableShardedWorkerHandlingConfig,
     ShardedWorkerHandlingConfig,
 )
+from synapse.config._util import parse_and_validate_mapping
 from synapse.config.server import (
     DIRECT_TCP_ERROR,
     TCPListenerConfig,
@@ -50,13 +52,43 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]:
     return obj
 
 
-@attr.s(auto_attribs=True)
-class InstanceLocationConfig:
+class ConfigModel(BaseModel):
+    """A custom version of Pydantic's BaseModel which
+
+     - ignores unknown fields and
+     - does not allow fields to be overwritten after construction,
+
+    but otherwise uses Pydantic's default behaviour.
+
+    For now, ignore unknown fields. In the future, we could change this so that unknown
+    config values cause a ValidationError, provided the error messages are meaningful to
+    server operators.
+
+    Subclassing in this way is recommended by
+    https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
+    """
+
+    class Config:
+        # By default, ignore fields that we don't recognise.
+        extra = Extra.ignore
+        # By default, don't allow fields to be reassigned after parsing.
+        allow_mutation = False
+
+
+class InstanceLocationConfig(ConfigModel):
     """The host and port to talk to an instance via HTTP replication."""
 
-    host: str
-    port: int
-    tls: bool = False
+    host: StrictStr
+    port: StrictInt
+    tls: StrictBool = False
+
+    def scheme(self) -> str:
+        """Hardcode a retrievable scheme based on self.tls"""
+        return "https" if self.tls else "http"
+
+    def netloc(self) -> str:
+        """Nicely format the network location data"""
+        return f"{self.host}:{self.port}"
 
 
 @attr.s
@@ -183,10 +215,12 @@ class WorkerConfig(Config):
         )
 
         # A map from instance name to host/port of their HTTP replication endpoint.
-        instance_map = config.get("instance_map") or {}
-        self.instance_map = {
-            name: InstanceLocationConfig(**c) for name, c in instance_map.items()
-        }
+        self.instance_map: Dict[
+            str, InstanceLocationConfig
+        ] = parse_and_validate_mapping(
+            config.get("instance_map", {}),
+            InstanceLocationConfig,
+        )
 
         # Map from type of streams to source, c.f. WriterLocations.
         writers = config.get("stream_writers") or {}
-- 
cgit 1.5.1


From aec639e3e33f5ca2f3456c715d28fd7a63c63c8a Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 17 Apr 2023 18:57:40 -0600
Subject: Move Spam Checker callbacks to a dedicated file (#15453)

---
 changelog.d/15453.misc                             |   1 +
 synapse/app/_base.py                               |   2 +-
 synapse/events/spamcheck.py                        | 822 ---------------------
 synapse/federation/federation_base.py              |   6 +-
 synapse/federation/federation_server.py            |   8 +-
 synapse/handlers/directory.py                      |  14 +-
 synapse/handlers/federation.py                     |   4 +-
 synapse/handlers/message.py                        |  10 +-
 synapse/handlers/register.py                       |   4 +-
 synapse/handlers/room.py                           |  10 +-
 synapse/handlers/room_member.py                    |  22 +-
 synapse/handlers/user_directory.py                 |   6 +-
 synapse/media/media_storage.py                     |   7 +-
 synapse/module_api/__init__.py                     |  33 +-
 synapse/module_api/callbacks/__init__.py           |  11 +-
 .../module_api/callbacks/spamchecker_callbacks.py  | 821 ++++++++++++++++++++
 synapse/server.py                                  |   7 +-
 tests/handlers/test_user_directory.py              |   2 +-
 tests/media/test_media_storage.py                  |   2 +-
 tests/rest/client/test_rooms.py                    |  26 +-
 tests/server.py                                    |   2 +-
 21 files changed, 927 insertions(+), 893 deletions(-)
 create mode 100644 changelog.d/15453.misc
 delete mode 100644 synapse/events/spamcheck.py
 create mode 100644 synapse/module_api/callbacks/spamchecker_callbacks.py

(limited to 'synapse')

diff --git a/changelog.d/15453.misc b/changelog.d/15453.misc
new file mode 100644
index 0000000000..9981606c32
--- /dev/null
+++ b/changelog.d/15453.misc
@@ -0,0 +1 @@
+Move various module API callback registration methods to a dedicated class.
\ No newline at end of file
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index f7b866978c..954402e4d2 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -64,7 +64,6 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.server import ListenerConfig, ManholeConfig, TCPListenerConfig
 from synapse.crypto import context_factory
 from synapse.events.presence_router import load_legacy_presence_router
-from synapse.events.spamcheck import load_legacy_spam_checkers
 from synapse.events.third_party_rules import load_legacy_third_party_event_rules
 from synapse.handlers.auth import load_legacy_password_auth_providers
 from synapse.http.site import SynapseSite
@@ -73,6 +72,7 @@ from synapse.logging.opentracing import init_tracer
 from synapse.metrics import install_gc_manager, register_threadpool
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.metrics.jemalloc import setup_jemalloc_stats
+from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
 from synapse.types import ISynapseReactor
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.caches.lrucache import setup_expire_lru_cache_entries
diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py
deleted file mode 100644
index 765c15bb51..0000000000
--- a/synapse/events/spamcheck.py
+++ /dev/null
@@ -1,822 +0,0 @@
-# Copyright 2017 New Vector Ltd
-# Copyright 2019 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import inspect
-import logging
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Awaitable,
-    Callable,
-    Collection,
-    List,
-    Optional,
-    Tuple,
-    Union,
-)
-
-# `Literal` appears with Python 3.8.
-from typing_extensions import Literal
-
-import synapse
-from synapse.api.errors import Codes
-from synapse.logging.opentracing import trace
-from synapse.media._base import FileInfo
-from synapse.media.media_storage import ReadableFileWrapper
-from synapse.spam_checker_api import RegistrationBehaviour
-from synapse.types import JsonDict, RoomAlias, UserProfile
-from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
-from synapse.util.metrics import Measure
-
-if TYPE_CHECKING:
-    import synapse.events
-    import synapse.server
-
-logger = logging.getLogger(__name__)
-
-CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[
-    ["synapse.events.EventBase"],
-    Awaitable[
-        Union[
-            str,
-            Codes,
-            # Highly experimental, not officially part of the spamchecker API, may
-            # disappear without warning depending on the results of ongoing
-            # experiments.
-            # Use this to return additional information as part of an error.
-            Tuple[Codes, JsonDict],
-            # Deprecated
-            bool,
-        ]
-    ],
-]
-SHOULD_DROP_FEDERATED_EVENT_CALLBACK = Callable[
-    ["synapse.events.EventBase"],
-    Awaitable[Union[bool, str]],
-]
-USER_MAY_JOIN_ROOM_CALLBACK = Callable[
-    [str, str, bool],
-    Awaitable[
-        Union[
-            Literal["NOT_SPAM"],
-            Codes,
-            # Highly experimental, not officially part of the spamchecker API, may
-            # disappear without warning depending on the results of ongoing
-            # experiments.
-            # Use this to return additional information as part of an error.
-            Tuple[Codes, JsonDict],
-            # Deprecated
-            bool,
-        ]
-    ],
-]
-USER_MAY_INVITE_CALLBACK = Callable[
-    [str, str, str],
-    Awaitable[
-        Union[
-            Literal["NOT_SPAM"],
-            Codes,
-            # Highly experimental, not officially part of the spamchecker API, may
-            # disappear without warning depending on the results of ongoing
-            # experiments.
-            # Use this to return additional information as part of an error.
-            Tuple[Codes, JsonDict],
-            # Deprecated
-            bool,
-        ]
-    ],
-]
-USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[
-    [str, str, str, str],
-    Awaitable[
-        Union[
-            Literal["NOT_SPAM"],
-            Codes,
-            # Highly experimental, not officially part of the spamchecker API, may
-            # disappear without warning depending on the results of ongoing
-            # experiments.
-            # Use this to return additional information as part of an error.
-            Tuple[Codes, JsonDict],
-            # Deprecated
-            bool,
-        ]
-    ],
-]
-USER_MAY_CREATE_ROOM_CALLBACK = Callable[
-    [str],
-    Awaitable[
-        Union[
-            Literal["NOT_SPAM"],
-            Codes,
-            # Highly experimental, not officially part of the spamchecker API, may
-            # disappear without warning depending on the results of ongoing
-            # experiments.
-            # Use this to return additional information as part of an error.
-            Tuple[Codes, JsonDict],
-            # Deprecated
-            bool,
-        ]
-    ],
-]
-USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[
-    [str, RoomAlias],
-    Awaitable[
-        Union[
-            Literal["NOT_SPAM"],
-            Codes,
-            # Highly experimental, not officially part of the spamchecker API, may
-            # disappear without warning depending on the results of ongoing
-            # experiments.
-            # Use this to return additional information as part of an error.
-            Tuple[Codes, JsonDict],
-            # Deprecated
-            bool,
-        ]
-    ],
-]
-USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[
-    [str, str],
-    Awaitable[
-        Union[
-            Literal["NOT_SPAM"],
-            Codes,
-            # Highly experimental, not officially part of the spamchecker API, may
-            # disappear without warning depending on the results of ongoing
-            # experiments.
-            # Use this to return additional information as part of an error.
-            Tuple[Codes, JsonDict],
-            # Deprecated
-            bool,
-        ]
-    ],
-]
-CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[UserProfile], Awaitable[bool]]
-LEGACY_CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[
-    [
-        Optional[dict],
-        Optional[str],
-        Collection[Tuple[str, str]],
-    ],
-    Awaitable[RegistrationBehaviour],
-]
-CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[
-    [
-        Optional[dict],
-        Optional[str],
-        Collection[Tuple[str, str]],
-        Optional[str],
-    ],
-    Awaitable[RegistrationBehaviour],
-]
-CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[
-    [ReadableFileWrapper, FileInfo],
-    Awaitable[
-        Union[
-            Literal["NOT_SPAM"],
-            Codes,
-            # Highly experimental, not officially part of the spamchecker API, may
-            # disappear without warning depending on the results of ongoing
-            # experiments.
-            # Use this to return additional information as part of an error.
-            Tuple[Codes, JsonDict],
-            # Deprecated
-            bool,
-        ]
-    ],
-]
-
-
-def load_legacy_spam_checkers(hs: "synapse.server.HomeServer") -> None:
-    """Wrapper that loads spam checkers configured using the old configuration, and
-    registers the spam checker hooks they implement.
-    """
-    spam_checkers: List[Any] = []
-    api = hs.get_module_api()
-    for module, config in hs.config.spamchecker.spam_checkers:
-        # Older spam checkers don't accept the `api` argument, so we
-        # try and detect support.
-        spam_args = inspect.getfullargspec(module)
-        if "api" in spam_args.args:
-            spam_checkers.append(module(config=config, api=api))
-        else:
-            spam_checkers.append(module(config=config))
-
-    # The known spam checker hooks. If a spam checker module implements a method
-    # which name appears in this set, we'll want to register it.
-    spam_checker_methods = {
-        "check_event_for_spam",
-        "user_may_invite",
-        "user_may_create_room",
-        "user_may_create_room_alias",
-        "user_may_publish_room",
-        "check_username_for_spam",
-        "check_registration_for_spam",
-        "check_media_file_for_spam",
-    }
-
-    for spam_checker in spam_checkers:
-        # Methods on legacy spam checkers might not be async, so we wrap them around a
-        # wrapper that will call maybe_awaitable on the result.
-        def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]:
-            # f might be None if the callback isn't implemented by the module. In this
-            # case we don't want to register a callback at all so we return None.
-            if f is None:
-                return None
-
-            wrapped_func = f
-
-            if f.__name__ == "check_registration_for_spam":
-                checker_args = inspect.signature(f)
-                if len(checker_args.parameters) == 3:
-                    # Backwards compatibility; some modules might implement a hook that
-                    # doesn't expect a 4th argument. In this case, wrap it in a function
-                    # that gives it only 3 arguments and drops the auth_provider_id on
-                    # the floor.
-                    def wrapper(
-                        email_threepid: Optional[dict],
-                        username: Optional[str],
-                        request_info: Collection[Tuple[str, str]],
-                        auth_provider_id: Optional[str],
-                    ) -> Union[Awaitable[RegistrationBehaviour], RegistrationBehaviour]:
-                        # Assertion required because mypy can't prove we won't
-                        # change `f` back to `None`. See
-                        # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
-                        assert f is not None
-
-                        return f(
-                            email_threepid,
-                            username,
-                            request_info,
-                        )
-
-                    wrapped_func = wrapper
-                elif len(checker_args.parameters) != 4:
-                    raise RuntimeError(
-                        "Bad signature for callback check_registration_for_spam",
-                    )
-
-            def run(*args: Any, **kwargs: Any) -> Awaitable:
-                # Assertion required because mypy can't prove we won't change `f`
-                # back to `None`. See
-                # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
-                assert wrapped_func is not None
-
-                return maybe_awaitable(wrapped_func(*args, **kwargs))
-
-            return run
-
-        # Register the hooks through the module API.
-        hooks = {
-            hook: async_wrapper(getattr(spam_checker, hook, None))
-            for hook in spam_checker_methods
-        }
-
-        api.register_spam_checker_callbacks(**hooks)
-
-
-class SpamChecker:
-    NOT_SPAM: Literal["NOT_SPAM"] = "NOT_SPAM"
-
-    def __init__(self, hs: "synapse.server.HomeServer") -> None:
-        self.hs = hs
-        self.clock = hs.get_clock()
-
-        self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = []
-        self._should_drop_federated_event_callbacks: List[
-            SHOULD_DROP_FEDERATED_EVENT_CALLBACK
-        ] = []
-        self._user_may_join_room_callbacks: List[USER_MAY_JOIN_ROOM_CALLBACK] = []
-        self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = []
-        self._user_may_send_3pid_invite_callbacks: List[
-            USER_MAY_SEND_3PID_INVITE_CALLBACK
-        ] = []
-        self._user_may_create_room_callbacks: List[USER_MAY_CREATE_ROOM_CALLBACK] = []
-        self._user_may_create_room_alias_callbacks: List[
-            USER_MAY_CREATE_ROOM_ALIAS_CALLBACK
-        ] = []
-        self._user_may_publish_room_callbacks: List[USER_MAY_PUBLISH_ROOM_CALLBACK] = []
-        self._check_username_for_spam_callbacks: List[
-            CHECK_USERNAME_FOR_SPAM_CALLBACK
-        ] = []
-        self._check_registration_for_spam_callbacks: List[
-            CHECK_REGISTRATION_FOR_SPAM_CALLBACK
-        ] = []
-        self._check_media_file_for_spam_callbacks: List[
-            CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK
-        ] = []
-
-    def register_callbacks(
-        self,
-        check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None,
-        should_drop_federated_event: Optional[
-            SHOULD_DROP_FEDERATED_EVENT_CALLBACK
-        ] = None,
-        user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None,
-        user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None,
-        user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None,
-        user_may_create_room: Optional[USER_MAY_CREATE_ROOM_CALLBACK] = None,
-        user_may_create_room_alias: Optional[
-            USER_MAY_CREATE_ROOM_ALIAS_CALLBACK
-        ] = None,
-        user_may_publish_room: Optional[USER_MAY_PUBLISH_ROOM_CALLBACK] = None,
-        check_username_for_spam: Optional[CHECK_USERNAME_FOR_SPAM_CALLBACK] = None,
-        check_registration_for_spam: Optional[
-            CHECK_REGISTRATION_FOR_SPAM_CALLBACK
-        ] = None,
-        check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None,
-    ) -> None:
-        """Register callbacks from module for each hook."""
-        if check_event_for_spam is not None:
-            self._check_event_for_spam_callbacks.append(check_event_for_spam)
-
-        if should_drop_federated_event is not None:
-            self._should_drop_federated_event_callbacks.append(
-                should_drop_federated_event
-            )
-
-        if user_may_join_room is not None:
-            self._user_may_join_room_callbacks.append(user_may_join_room)
-
-        if user_may_invite is not None:
-            self._user_may_invite_callbacks.append(user_may_invite)
-
-        if user_may_send_3pid_invite is not None:
-            self._user_may_send_3pid_invite_callbacks.append(
-                user_may_send_3pid_invite,
-            )
-
-        if user_may_create_room is not None:
-            self._user_may_create_room_callbacks.append(user_may_create_room)
-
-        if user_may_create_room_alias is not None:
-            self._user_may_create_room_alias_callbacks.append(
-                user_may_create_room_alias,
-            )
-
-        if user_may_publish_room is not None:
-            self._user_may_publish_room_callbacks.append(user_may_publish_room)
-
-        if check_username_for_spam is not None:
-            self._check_username_for_spam_callbacks.append(check_username_for_spam)
-
-        if check_registration_for_spam is not None:
-            self._check_registration_for_spam_callbacks.append(
-                check_registration_for_spam,
-            )
-
-        if check_media_file_for_spam is not None:
-            self._check_media_file_for_spam_callbacks.append(check_media_file_for_spam)
-
-    @trace
-    async def check_event_for_spam(
-        self, event: "synapse.events.EventBase"
-    ) -> Union[Tuple[Codes, JsonDict], str]:
-        """Checks if a given event is considered "spammy" by this server.
-
-        If the server considers an event spammy, then it will be rejected if
-        sent by a local user. If it is sent by a user on another server, the
-        event is soft-failed.
-
-        Args:
-            event: the event to be checked
-
-        Returns:
-            - `NOT_SPAM` if the event is considered good (non-spammy) and should be let
-                through. Other spamcheck filters may still reject it.
-            - A `Code` if the event is considered spammy and is rejected with a specific
-                error message/code.
-            - A string that isn't `NOT_SPAM` if the event is considered spammy and the
-                string should be used as the client-facing error message. This usage is
-                generally discouraged as it doesn't support internationalization.
-        """
-        for callback in self._check_event_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res = await delay_cancellation(callback(event))
-                if res is False or res == self.NOT_SPAM:
-                    # This spam-checker accepts the event.
-                    # Other spam-checkers may reject it, though.
-                    continue
-                elif res is True:
-                    # This spam-checker rejects the event with deprecated
-                    # return value `True`
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-                elif (
-                    isinstance(res, tuple)
-                    and len(res) == 2
-                    and isinstance(res[0], synapse.api.errors.Codes)
-                    and isinstance(res[1], dict)
-                ):
-                    return res
-                elif isinstance(res, synapse.api.errors.Codes):
-                    return res, {}
-                elif not isinstance(res, str):
-                    # mypy complains that we can't reach this code because of the
-                    # return type in CHECK_EVENT_FOR_SPAM_CALLBACK, but we don't know
-                    # for sure that the module actually returns it.
-                    logger.warning(
-                        "Module returned invalid value, rejecting message as spam"
-                    )
-                    res = "This message has been rejected as probable spam"
-                else:
-                    # The module rejected the event either with a `Codes`
-                    # or some other `str`. In either case, we stop here.
-                    pass
-
-                return res
-
-        # No spam-checker has rejected the event, let it pass.
-        return self.NOT_SPAM
-
-    async def should_drop_federated_event(
-        self, event: "synapse.events.EventBase"
-    ) -> Union[bool, str]:
-        """Checks if a given federated event is considered "spammy" by this
-        server.
-
-        If the server considers an event spammy, it will be silently dropped,
-        and in doing so will split-brain our view of the room's DAG.
-
-        Args:
-            event: the event to be checked
-
-        Returns:
-            True if the event should be silently dropped
-        """
-        for callback in self._should_drop_federated_event_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res: Union[bool, str] = await delay_cancellation(callback(event))
-            if res:
-                return res
-
-        return False
-
-    async def user_may_join_room(
-        self, user_id: str, room_id: str, is_invited: bool
-    ) -> Union[Tuple[Codes, JsonDict], Literal["NOT_SPAM"]]:
-        """Checks if a given users is allowed to join a room.
-        Not called when a user creates a room.
-
-        Args:
-            userid: The ID of the user wanting to join the room
-            room_id: The ID of the room the user wants to join
-            is_invited: Whether the user is invited into the room
-
-        Returns:
-            NOT_SPAM if the operation is permitted, [Codes, Dict] otherwise.
-        """
-        for callback in self._user_may_join_room_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res = await delay_cancellation(callback(user_id, room_id, is_invited))
-                # Normalize return values to `Codes` or `"NOT_SPAM"`.
-                if res is True or res is self.NOT_SPAM:
-                    continue
-                elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-                elif isinstance(res, synapse.api.errors.Codes):
-                    return res, {}
-                elif (
-                    isinstance(res, tuple)
-                    and len(res) == 2
-                    and isinstance(res[0], synapse.api.errors.Codes)
-                    and isinstance(res[1], dict)
-                ):
-                    return res
-                else:
-                    logger.warning(
-                        "Module returned invalid value, rejecting join as spam"
-                    )
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-
-        # No spam-checker has rejected the request, let it pass.
-        return self.NOT_SPAM
-
-    async def user_may_invite(
-        self, inviter_userid: str, invitee_userid: str, room_id: str
-    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
-        """Checks if a given user may send an invite
-
-        Args:
-            inviter_userid: The user ID of the sender of the invitation
-            invitee_userid: The user ID targeted in the invitation
-            room_id: The room ID
-
-        Returns:
-            NOT_SPAM if the operation is permitted, Codes otherwise.
-        """
-        for callback in self._user_may_invite_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res = await delay_cancellation(
-                    callback(inviter_userid, invitee_userid, room_id)
-                )
-                # Normalize return values to `Codes` or `"NOT_SPAM"`.
-                if res is True or res is self.NOT_SPAM:
-                    continue
-                elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-                elif isinstance(res, synapse.api.errors.Codes):
-                    return res, {}
-                elif (
-                    isinstance(res, tuple)
-                    and len(res) == 2
-                    and isinstance(res[0], synapse.api.errors.Codes)
-                    and isinstance(res[1], dict)
-                ):
-                    return res
-                else:
-                    logger.warning(
-                        "Module returned invalid value, rejecting invite as spam"
-                    )
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-
-        # No spam-checker has rejected the request, let it pass.
-        return self.NOT_SPAM
-
-    async def user_may_send_3pid_invite(
-        self, inviter_userid: str, medium: str, address: str, room_id: str
-    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
-        """Checks if a given user may invite a given threepid into the room
-
-        Note that if the threepid is already associated with a Matrix user ID, Synapse
-        will call user_may_invite with said user ID instead.
-
-        Args:
-            inviter_userid: The user ID of the sender of the invitation
-            medium: The 3PID's medium (e.g. "email")
-            address: The 3PID's address (e.g. "alice@example.com")
-            room_id: The room ID
-
-        Returns:
-            NOT_SPAM if the operation is permitted, Codes otherwise.
-        """
-        for callback in self._user_may_send_3pid_invite_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res = await delay_cancellation(
-                    callback(inviter_userid, medium, address, room_id)
-                )
-                # Normalize return values to `Codes` or `"NOT_SPAM"`.
-                if res is True or res is self.NOT_SPAM:
-                    continue
-                elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-                elif isinstance(res, synapse.api.errors.Codes):
-                    return res, {}
-                elif (
-                    isinstance(res, tuple)
-                    and len(res) == 2
-                    and isinstance(res[0], synapse.api.errors.Codes)
-                    and isinstance(res[1], dict)
-                ):
-                    return res
-                else:
-                    logger.warning(
-                        "Module returned invalid value, rejecting 3pid invite as spam"
-                    )
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-
-        return self.NOT_SPAM
-
-    async def user_may_create_room(
-        self, userid: str
-    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
-        """Checks if a given user may create a room
-
-        Args:
-            userid: The ID of the user attempting to create a room
-        """
-        for callback in self._user_may_create_room_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res = await delay_cancellation(callback(userid))
-                if res is True or res is self.NOT_SPAM:
-                    continue
-                elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-                elif isinstance(res, synapse.api.errors.Codes):
-                    return res, {}
-                elif (
-                    isinstance(res, tuple)
-                    and len(res) == 2
-                    and isinstance(res[0], synapse.api.errors.Codes)
-                    and isinstance(res[1], dict)
-                ):
-                    return res
-                else:
-                    logger.warning(
-                        "Module returned invalid value, rejecting room creation as spam"
-                    )
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-
-        return self.NOT_SPAM
-
-    async def user_may_create_room_alias(
-        self, userid: str, room_alias: RoomAlias
-    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
-        """Checks if a given user may create a room alias
-
-        Args:
-            userid: The ID of the user attempting to create a room alias
-            room_alias: The alias to be created
-
-        """
-        for callback in self._user_may_create_room_alias_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res = await delay_cancellation(callback(userid, room_alias))
-                if res is True or res is self.NOT_SPAM:
-                    continue
-                elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-                elif isinstance(res, synapse.api.errors.Codes):
-                    return res, {}
-                elif (
-                    isinstance(res, tuple)
-                    and len(res) == 2
-                    and isinstance(res[0], synapse.api.errors.Codes)
-                    and isinstance(res[1], dict)
-                ):
-                    return res
-                else:
-                    logger.warning(
-                        "Module returned invalid value, rejecting room create as spam"
-                    )
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-
-        return self.NOT_SPAM
-
-    async def user_may_publish_room(
-        self, userid: str, room_id: str
-    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
-        """Checks if a given user may publish a room to the directory
-
-        Args:
-            userid: The user ID attempting to publish the room
-            room_id: The ID of the room that would be published
-        """
-        for callback in self._user_may_publish_room_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res = await delay_cancellation(callback(userid, room_id))
-                if res is True or res is self.NOT_SPAM:
-                    continue
-                elif res is False:
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-                elif isinstance(res, synapse.api.errors.Codes):
-                    return res, {}
-                elif (
-                    isinstance(res, tuple)
-                    and len(res) == 2
-                    and isinstance(res[0], synapse.api.errors.Codes)
-                    and isinstance(res[1], dict)
-                ):
-                    return res
-                else:
-                    logger.warning(
-                        "Module returned invalid value, rejecting room publication as spam"
-                    )
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-
-        return self.NOT_SPAM
-
-    async def check_username_for_spam(self, user_profile: UserProfile) -> bool:
-        """Checks if a user ID or display name are considered "spammy" by this server.
-
-        If the server considers a username spammy, then it will not be included in
-        user directory results.
-
-        Args:
-            user_profile: The user information to check, it contains the keys:
-                * user_id
-                * display_name
-                * avatar_url
-
-        Returns:
-            True if the user is spammy.
-        """
-        for callback in self._check_username_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                # Make a copy of the user profile object to ensure the spam checker cannot
-                # modify it.
-                res = await delay_cancellation(callback(user_profile.copy()))
-            if res:
-                return True
-
-        return False
-
-    async def check_registration_for_spam(
-        self,
-        email_threepid: Optional[dict],
-        username: Optional[str],
-        request_info: Collection[Tuple[str, str]],
-        auth_provider_id: Optional[str] = None,
-    ) -> RegistrationBehaviour:
-        """Checks if we should allow the given registration request.
-
-        Args:
-            email_threepid: The email threepid used for registering, if any
-            username: The request user name, if any
-            request_info: List of tuples of user agent and IP that
-                were used during the registration process.
-            auth_provider_id: The SSO IdP the user used, e.g "oidc", "saml",
-                "cas". If any. Note this does not include users registered
-                via a password provider.
-
-        Returns:
-            Enum for how the request should be handled
-        """
-
-        for callback in self._check_registration_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                behaviour = await delay_cancellation(
-                    callback(email_threepid, username, request_info, auth_provider_id)
-                )
-            assert isinstance(behaviour, RegistrationBehaviour)
-            if behaviour != RegistrationBehaviour.ALLOW:
-                return behaviour
-
-        return RegistrationBehaviour.ALLOW
-
-    async def check_media_file_for_spam(
-        self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
-    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
-        """Checks if a piece of newly uploaded media should be blocked.
-
-        This will be called for local uploads, downloads of remote media, each
-        thumbnail generated for those, and web pages/images used for URL
-        previews.
-
-        Note that care should be taken to not do blocking IO operations in the
-        main thread. For example, to get the contents of a file a module
-        should do::
-
-            async def check_media_file_for_spam(
-                self, file: ReadableFileWrapper, file_info: FileInfo
-            ) -> Union[Codes, Literal["NOT_SPAM"]]:
-                buffer = BytesIO()
-                await file.write_chunks_to(buffer.write)
-
-                if buffer.getvalue() == b"Hello World":
-                    return synapse.module_api.NOT_SPAM
-
-                return Codes.FORBIDDEN
-
-
-        Args:
-            file: An object that allows reading the contents of the media.
-            file_info: Metadata about the file.
-        """
-
-        for callback in self._check_media_file_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
-                res = await delay_cancellation(callback(file_wrapper, file_info))
-                # Normalize return values to `Codes` or `"NOT_SPAM"`.
-                if res is False or res is self.NOT_SPAM:
-                    continue
-                elif res is True:
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-                elif isinstance(res, synapse.api.errors.Codes):
-                    return res, {}
-                elif (
-                    isinstance(res, tuple)
-                    and len(res) == 2
-                    and isinstance(res[0], synapse.api.errors.Codes)
-                    and isinstance(res[1], dict)
-                ):
-                    return res
-                else:
-                    logger.warning(
-                        "Module returned invalid value, rejecting media file as spam"
-                    )
-                    return synapse.api.errors.Codes.FORBIDDEN, {}
-
-        return self.NOT_SPAM
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index 29fae716f5..3df975958d 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -51,7 +51,7 @@ class FederationBase:
 
         self.server_name = hs.hostname
         self.keyring = hs.get_keyring()
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.store = hs.get_datastores().main
         self._clock = hs.get_clock()
         self._storage_controllers = hs.get_storage_controllers()
@@ -137,9 +137,9 @@ class FederationBase:
                     )
             return redacted_event
 
-        spam_check = await self.spam_checker.check_event_for_spam(pdu)
+        spam_check = await self._spam_checker_module_callbacks.check_event_for_spam(pdu)
 
-        if spam_check != self.spam_checker.NOT_SPAM:
+        if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
             logger.warning("Event contains spam, soft-failing %s", pdu.event_id)
             log_kv(
                 {
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 64e99292ec..d7740eb3b4 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -130,7 +130,7 @@ class FederationServer(FederationBase):
         super().__init__(hs)
 
         self.handler = hs.get_federation_handler()
-        self._spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self._federation_event_handler = hs.get_federation_event_handler()
         self.state = hs.get_state_handler()
         self._event_auth_handler = hs.get_event_auth_handler()
@@ -1129,7 +1129,7 @@ class FederationServer(FederationBase):
             logger.warning("event id %s: %s", pdu.event_id, e)
             raise FederationError("ERROR", 403, str(e), affected=pdu.event_id)
 
-        if await self._spam_checker.should_drop_federated_event(pdu):
+        if await self._spam_checker_module_callbacks.should_drop_federated_event(pdu):
             logger.warning(
                 "Unstaged federated event contains spam, dropping %s", pdu.event_id
             )
@@ -1174,7 +1174,9 @@ class FederationServer(FederationBase):
 
             origin, event = next
 
-            if await self._spam_checker.should_drop_federated_event(event):
+            if await self._spam_checker_module_callbacks.should_drop_federated_event(
+                event
+            ):
                 logger.warning(
                     "Staged federated event contains spam, dropping %s",
                     event.event_id,
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 1fb23cc9bf..5e8316e2e5 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -60,7 +60,7 @@ class DirectoryHandler:
             "directory", self.on_directory_query
         )
 
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
 
     async def _create_association(
         self,
@@ -145,10 +145,12 @@ class DirectoryHandler:
                         403, "You must be in the room to create an alias for it"
                     )
 
-            spam_check = await self.spam_checker.user_may_create_room_alias(
-                user_id, room_alias
+            spam_check = (
+                await self._spam_checker_module_callbacks.user_may_create_room_alias(
+                    user_id, room_alias
+                )
             )
-            if spam_check != self.spam_checker.NOT_SPAM:
+            if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
                 raise AuthError(
                     403,
                     "This user is not permitted to create this alias",
@@ -444,7 +446,9 @@ class DirectoryHandler:
         """
         user_id = requester.user.to_string()
 
-        spam_check = await self.spam_checker.user_may_publish_room(user_id, room_id)
+        spam_check = await self._spam_checker_module_callbacks.user_may_publish_room(
+            user_id, room_id
+        )
         if spam_check != NOT_SPAM:
             raise AuthError(
                 403,
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 65461a0787..d1a88cc604 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -141,7 +141,7 @@ class FederationHandler:
         self.server_name = hs.hostname
         self.keyring = hs.get_keyring()
         self.is_mine_id = hs.is_mine_id
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.event_creation_handler = hs.get_event_creation_handler()
         self.event_builder_factory = hs.get_event_builder_factory()
         self._event_auth_handler = hs.get_event_auth_handler()
@@ -1042,7 +1042,7 @@ class FederationHandler:
         if self.hs.config.server.block_non_admin_invites:
             raise SynapseError(403, "This server does not accept room invites")
 
-        spam_check = await self.spam_checker.user_may_invite(
+        spam_check = await self._spam_checker_module_callbacks.user_may_invite(
             event.sender, event.state_key, event.room_id
         )
         if spam_check != NOT_SPAM:
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index a17fe3bf53..2e964ed37e 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -508,7 +508,7 @@ class EventCreationHandler:
 
         self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator()
 
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.third_party_event_rules: "ThirdPartyEventRules" = (
             self.hs.get_third_party_event_rules()
         )
@@ -1035,8 +1035,12 @@ class EventCreationHandler:
                     event.sender,
                 )
 
-                spam_check_result = await self.spam_checker.check_event_for_spam(event)
-                if spam_check_result != self.spam_checker.NOT_SPAM:
+                spam_check_result = (
+                    await self._spam_checker_module_callbacks.check_event_for_spam(
+                        event
+                    )
+                )
+                if spam_check_result != self._spam_checker_module_callbacks.NOT_SPAM:
                     if isinstance(spam_check_result, tuple):
                         try:
                             [code, dict] = spam_check_result
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index c8bf2439af..61c4b833bd 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -110,7 +110,7 @@ class RegistrationHandler:
         self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
         self._server_name = hs.hostname
 
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
 
         if hs.config.worker.worker_app:
             self._register_client = ReplicationRegisterServlet.make_client(hs)
@@ -259,7 +259,7 @@ class RegistrationHandler:
 
         await self.check_registration_ratelimit(address)
 
-        result = await self.spam_checker.check_registration_for_spam(
+        result = await self._spam_checker_module_callbacks.check_registration_for_spam(
             threepid,
             localpart,
             user_agent_ips or [],
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 2d69cabf43..efd9612d90 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -106,7 +106,7 @@ class RoomCreationHandler:
         self.auth_blocking = hs.get_auth_blocking()
         self.clock = hs.get_clock()
         self.hs = hs
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.event_creation_handler = hs.get_event_creation_handler()
         self.room_member_handler = hs.get_room_member_handler()
         self._event_auth_handler = hs.get_event_auth_handler()
@@ -449,7 +449,9 @@ class RoomCreationHandler:
         """
         user_id = requester.user.to_string()
 
-        spam_check = await self.spam_checker.user_may_create_room(user_id)
+        spam_check = await self._spam_checker_module_callbacks.user_may_create_room(
+            user_id
+        )
         if spam_check != NOT_SPAM:
             raise SynapseError(
                 403,
@@ -761,7 +763,9 @@ class RoomCreationHandler:
                 )
 
         if not is_requester_admin:
-            spam_check = await self.spam_checker.user_may_create_room(user_id)
+            spam_check = await self._spam_checker_module_callbacks.user_may_create_room(
+                user_id
+            )
             if spam_check != NOT_SPAM:
                 raise SynapseError(
                     403,
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 1d8b0aee6f..ec317e6023 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -96,7 +96,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self.member_as_limiter = Linearizer(max_count=10, name="member_as_limiter")
 
         self.clock = hs.get_clock()
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.third_party_event_rules = hs.get_third_party_event_rules()
         self._server_notices_mxid = self.config.servernotices.server_notices_mxid
         self._enable_lookup = hs.config.registration.enable_3pid_lookup
@@ -806,7 +806,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     )
                     block_invite_result = (Codes.FORBIDDEN, {})
 
-                spam_check = await self.spam_checker.user_may_invite(
+                spam_check = await self._spam_checker_module_callbacks.user_may_invite(
                     requester.user.to_string(), target_id, room_id
                 )
                 if spam_check != NOT_SPAM:
@@ -940,8 +940,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 # a room then they're allowed to join it.
                 and not new_room
             ):
-                spam_check = await self.spam_checker.user_may_join_room(
-                    target.to_string(), room_id, is_invited=inviter is not None
+                spam_check = (
+                    await self._spam_checker_module_callbacks.user_may_join_room(
+                        target.to_string(), room_id, is_invited=inviter is not None
+                    )
                 )
                 if spam_check != NOT_SPAM:
                     raise SynapseError(
@@ -1550,11 +1552,13 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             )
         else:
             # Check if the spamchecker(s) allow this invite to go through.
-            spam_check = await self.spam_checker.user_may_send_3pid_invite(
-                inviter_userid=requester.user.to_string(),
-                medium=medium,
-                address=address,
-                room_id=room_id,
+            spam_check = (
+                await self._spam_checker_module_callbacks.user_may_send_3pid_invite(
+                    inviter_userid=requester.user.to_string(),
+                    medium=medium,
+                    address=address,
+                    room_id=room_id,
+                )
             )
             if spam_check != NOT_SPAM:
                 raise SynapseError(
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 28a92d41d6..05197edc95 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -94,7 +94,7 @@ class UserDirectoryHandler(StateDeltasHandler):
         self.is_mine_id = hs.is_mine_id
         self.update_user_directory = hs.config.worker.should_update_user_directory
         self.search_all_users = hs.config.userdirectory.user_directory_search_all_users
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self._hs = hs
 
         # The current position in the current_state_delta stream
@@ -149,7 +149,9 @@ class UserDirectoryHandler(StateDeltasHandler):
         # Remove any spammy users from the results.
         non_spammy_users = []
         for user in results["results"]:
-            if not await self.spam_checker.check_username_for_spam(user):
+            if not await self._spam_checker_module_callbacks.check_username_for_spam(
+                user
+            ):
                 non_spammy_users.append(user)
         results["results"] = non_spammy_users
 
diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py
index a7e22a91e1..a819d95407 100644
--- a/synapse/media/media_storage.py
+++ b/synapse/media/media_storage.py
@@ -36,7 +36,6 @@ from twisted.internet.defer import Deferred
 from twisted.internet.interfaces import IConsumer
 from twisted.protocols.basic import FileSender
 
-import synapse
 from synapse.api.errors import NotFoundError
 from synapse.logging.context import defer_to_thread, make_deferred_yieldable
 from synapse.util import Clock
@@ -74,7 +73,7 @@ class MediaStorage:
         self.local_media_directory = local_media_directory
         self.filepaths = filepaths
         self.storage_providers = storage_providers
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.clock = hs.get_clock()
 
     async def store_file(self, source: IO, file_info: FileInfo) -> str:
@@ -145,10 +144,10 @@ class MediaStorage:
                     f.flush()
                     f.close()
 
-                    spam_check = await self.spam_checker.check_media_file_for_spam(
+                    spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
                         ReadableFileWrapper(self.clock, fname), file_info
                     )
-                    if spam_check != synapse.module_api.NOT_SPAM:
+                    if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
                         logger.info("Blocking media due to spam checker")
                         # Note that we'll delete the stored media, due to the
                         # try/except below. The media also won't be stored in
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 595c23e78d..eeafea74d1 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -44,20 +44,6 @@ from synapse.events.presence_router import (
     GET_USERS_FOR_STATES_CALLBACK,
     PresenceRouter,
 )
-from synapse.events.spamcheck import (
-    CHECK_EVENT_FOR_SPAM_CALLBACK,
-    CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK,
-    CHECK_REGISTRATION_FOR_SPAM_CALLBACK,
-    CHECK_USERNAME_FOR_SPAM_CALLBACK,
-    SHOULD_DROP_FEDERATED_EVENT_CALLBACK,
-    USER_MAY_CREATE_ROOM_ALIAS_CALLBACK,
-    USER_MAY_CREATE_ROOM_CALLBACK,
-    USER_MAY_INVITE_CALLBACK,
-    USER_MAY_JOIN_ROOM_CALLBACK,
-    USER_MAY_PUBLISH_ROOM_CALLBACK,
-    USER_MAY_SEND_3PID_INVITE_CALLBACK,
-    SpamChecker,
-)
 from synapse.events.third_party_rules import (
     CHECK_CAN_DEACTIVATE_USER_CALLBACK,
     CHECK_CAN_SHUTDOWN_ROOM_CALLBACK,
@@ -105,6 +91,20 @@ from synapse.module_api.callbacks.account_validity_callbacks import (
     ON_LEGACY_SEND_MAIL_CALLBACK,
     ON_USER_REGISTRATION_CALLBACK,
 )
+from synapse.module_api.callbacks.spamchecker_callbacks import (
+    CHECK_EVENT_FOR_SPAM_CALLBACK,
+    CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK,
+    CHECK_REGISTRATION_FOR_SPAM_CALLBACK,
+    CHECK_USERNAME_FOR_SPAM_CALLBACK,
+    SHOULD_DROP_FEDERATED_EVENT_CALLBACK,
+    USER_MAY_CREATE_ROOM_ALIAS_CALLBACK,
+    USER_MAY_CREATE_ROOM_CALLBACK,
+    USER_MAY_INVITE_CALLBACK,
+    USER_MAY_JOIN_ROOM_CALLBACK,
+    USER_MAY_PUBLISH_ROOM_CALLBACK,
+    USER_MAY_SEND_3PID_INVITE_CALLBACK,
+    SpamCheckerModuleApiCallbacks,
+)
 from synapse.rest.client.login import LoginResponse
 from synapse.storage import DataStore
 from synapse.storage.background_updates import (
@@ -147,7 +147,7 @@ are loaded into Synapse.
 """
 
 PRESENCE_ALL_USERS = PresenceRouter.ALL_USERS
-NOT_SPAM = SpamChecker.NOT_SPAM
+NOT_SPAM = SpamCheckerModuleApiCallbacks.NOT_SPAM
 
 __all__ = [
     "errors",
@@ -271,7 +271,6 @@ class ModuleApi:
         self._public_room_list_manager = PublicRoomListManager(hs)
         self._account_data_manager = AccountDataManager(hs)
 
-        self._spam_checker = hs.get_spam_checker()
         self._third_party_event_rules = hs.get_third_party_event_rules()
         self._password_auth_provider = hs.get_password_auth_provider()
         self._presence_router = hs.get_presence_router()
@@ -305,7 +304,7 @@ class ModuleApi:
 
         Added in Synapse v1.37.0.
         """
-        return self._spam_checker.register_callbacks(
+        return self._callbacks.spam_checker.register_callbacks(
             check_event_for_spam=check_event_for_spam,
             should_drop_federated_event=should_drop_federated_event,
             user_may_join_room=user_may_join_room,
diff --git a/synapse/module_api/callbacks/__init__.py b/synapse/module_api/callbacks/__init__.py
index 3d977bf655..5cdb2c003a 100644
--- a/synapse/module_api/callbacks/__init__.py
+++ b/synapse/module_api/callbacks/__init__.py
@@ -12,11 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 from synapse.module_api.callbacks.account_validity_callbacks import (
     AccountValidityModuleApiCallbacks,
 )
+from synapse.module_api.callbacks.spamchecker_callbacks import (
+    SpamCheckerModuleApiCallbacks,
+)
 
 
 class ModuleApiCallbacks:
-    def __init__(self) -> None:
+    def __init__(self, hs: "HomeServer") -> None:
         self.account_validity = AccountValidityModuleApiCallbacks()
+        self.spam_checker = SpamCheckerModuleApiCallbacks(hs)
diff --git a/synapse/module_api/callbacks/spamchecker_callbacks.py b/synapse/module_api/callbacks/spamchecker_callbacks.py
new file mode 100644
index 0000000000..4456d1b81e
--- /dev/null
+++ b/synapse/module_api/callbacks/spamchecker_callbacks.py
@@ -0,0 +1,821 @@
+# Copyright 2017 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+import logging
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Awaitable,
+    Callable,
+    Collection,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+# `Literal` appears with Python 3.8.
+from typing_extensions import Literal
+
+import synapse
+from synapse.api.errors import Codes
+from synapse.logging.opentracing import trace
+from synapse.media._base import FileInfo
+from synapse.media.media_storage import ReadableFileWrapper
+from synapse.spam_checker_api import RegistrationBehaviour
+from synapse.types import JsonDict, RoomAlias, UserProfile
+from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
+from synapse.util.metrics import Measure
+
+if TYPE_CHECKING:
+    import synapse.events
+    import synapse.server
+
+logger = logging.getLogger(__name__)
+
+CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[
+    ["synapse.events.EventBase"],
+    Awaitable[
+        Union[
+            str,
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+            # Deprecated
+            bool,
+        ]
+    ],
+]
+SHOULD_DROP_FEDERATED_EVENT_CALLBACK = Callable[
+    ["synapse.events.EventBase"],
+    Awaitable[Union[bool, str]],
+]
+USER_MAY_JOIN_ROOM_CALLBACK = Callable[
+    [str, str, bool],
+    Awaitable[
+        Union[
+            Literal["NOT_SPAM"],
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+            # Deprecated
+            bool,
+        ]
+    ],
+]
+USER_MAY_INVITE_CALLBACK = Callable[
+    [str, str, str],
+    Awaitable[
+        Union[
+            Literal["NOT_SPAM"],
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+            # Deprecated
+            bool,
+        ]
+    ],
+]
+USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[
+    [str, str, str, str],
+    Awaitable[
+        Union[
+            Literal["NOT_SPAM"],
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+            # Deprecated
+            bool,
+        ]
+    ],
+]
+USER_MAY_CREATE_ROOM_CALLBACK = Callable[
+    [str],
+    Awaitable[
+        Union[
+            Literal["NOT_SPAM"],
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+            # Deprecated
+            bool,
+        ]
+    ],
+]
+USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[
+    [str, RoomAlias],
+    Awaitable[
+        Union[
+            Literal["NOT_SPAM"],
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+            # Deprecated
+            bool,
+        ]
+    ],
+]
+USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[
+    [str, str],
+    Awaitable[
+        Union[
+            Literal["NOT_SPAM"],
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+            # Deprecated
+            bool,
+        ]
+    ],
+]
+CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[UserProfile], Awaitable[bool]]
+LEGACY_CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[
+    [
+        Optional[dict],
+        Optional[str],
+        Collection[Tuple[str, str]],
+    ],
+    Awaitable[RegistrationBehaviour],
+]
+CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[
+    [
+        Optional[dict],
+        Optional[str],
+        Collection[Tuple[str, str]],
+        Optional[str],
+    ],
+    Awaitable[RegistrationBehaviour],
+]
+CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[
+    [ReadableFileWrapper, FileInfo],
+    Awaitable[
+        Union[
+            Literal["NOT_SPAM"],
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+            # Deprecated
+            bool,
+        ]
+    ],
+]
+
+
+def load_legacy_spam_checkers(hs: "synapse.server.HomeServer") -> None:
+    """Wrapper that loads spam checkers configured using the old configuration, and
+    registers the spam checker hooks they implement.
+    """
+    spam_checkers: List[Any] = []
+    api = hs.get_module_api()
+    for module, config in hs.config.spamchecker.spam_checkers:
+        # Older spam checkers don't accept the `api` argument, so we
+        # try and detect support.
+        spam_args = inspect.getfullargspec(module)
+        if "api" in spam_args.args:
+            spam_checkers.append(module(config=config, api=api))
+        else:
+            spam_checkers.append(module(config=config))
+
+    # The known spam checker hooks. If a spam checker module implements a method
+    # which name appears in this set, we'll want to register it.
+    spam_checker_methods = {
+        "check_event_for_spam",
+        "user_may_invite",
+        "user_may_create_room",
+        "user_may_create_room_alias",
+        "user_may_publish_room",
+        "check_username_for_spam",
+        "check_registration_for_spam",
+        "check_media_file_for_spam",
+    }
+
+    for spam_checker in spam_checkers:
+        # Methods on legacy spam checkers might not be async, so we wrap them around a
+        # wrapper that will call maybe_awaitable on the result.
+        def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]:
+            # f might be None if the callback isn't implemented by the module. In this
+            # case we don't want to register a callback at all so we return None.
+            if f is None:
+                return None
+
+            wrapped_func = f
+
+            if f.__name__ == "check_registration_for_spam":
+                checker_args = inspect.signature(f)
+                if len(checker_args.parameters) == 3:
+                    # Backwards compatibility; some modules might implement a hook that
+                    # doesn't expect a 4th argument. In this case, wrap it in a function
+                    # that gives it only 3 arguments and drops the auth_provider_id on
+                    # the floor.
+                    def wrapper(
+                        email_threepid: Optional[dict],
+                        username: Optional[str],
+                        request_info: Collection[Tuple[str, str]],
+                        auth_provider_id: Optional[str],
+                    ) -> Union[Awaitable[RegistrationBehaviour], RegistrationBehaviour]:
+                        # Assertion required because mypy can't prove we won't
+                        # change `f` back to `None`. See
+                        # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
+                        assert f is not None
+
+                        return f(
+                            email_threepid,
+                            username,
+                            request_info,
+                        )
+
+                    wrapped_func = wrapper
+                elif len(checker_args.parameters) != 4:
+                    raise RuntimeError(
+                        "Bad signature for callback check_registration_for_spam",
+                    )
+
+            def run(*args: Any, **kwargs: Any) -> Awaitable:
+                # Assertion required because mypy can't prove we won't change `f`
+                # back to `None`. See
+                # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
+                assert wrapped_func is not None
+
+                return maybe_awaitable(wrapped_func(*args, **kwargs))
+
+            return run
+
+        # Register the hooks through the module API.
+        hooks = {
+            hook: async_wrapper(getattr(spam_checker, hook, None))
+            for hook in spam_checker_methods
+        }
+
+        api.register_spam_checker_callbacks(**hooks)
+
+
+class SpamCheckerModuleApiCallbacks:
+    NOT_SPAM: Literal["NOT_SPAM"] = "NOT_SPAM"
+
+    def __init__(self, hs: "synapse.server.HomeServer") -> None:
+        self.clock = hs.get_clock()
+
+        self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = []
+        self._should_drop_federated_event_callbacks: List[
+            SHOULD_DROP_FEDERATED_EVENT_CALLBACK
+        ] = []
+        self._user_may_join_room_callbacks: List[USER_MAY_JOIN_ROOM_CALLBACK] = []
+        self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = []
+        self._user_may_send_3pid_invite_callbacks: List[
+            USER_MAY_SEND_3PID_INVITE_CALLBACK
+        ] = []
+        self._user_may_create_room_callbacks: List[USER_MAY_CREATE_ROOM_CALLBACK] = []
+        self._user_may_create_room_alias_callbacks: List[
+            USER_MAY_CREATE_ROOM_ALIAS_CALLBACK
+        ] = []
+        self._user_may_publish_room_callbacks: List[USER_MAY_PUBLISH_ROOM_CALLBACK] = []
+        self._check_username_for_spam_callbacks: List[
+            CHECK_USERNAME_FOR_SPAM_CALLBACK
+        ] = []
+        self._check_registration_for_spam_callbacks: List[
+            CHECK_REGISTRATION_FOR_SPAM_CALLBACK
+        ] = []
+        self._check_media_file_for_spam_callbacks: List[
+            CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK
+        ] = []
+
+    def register_callbacks(
+        self,
+        check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None,
+        should_drop_federated_event: Optional[
+            SHOULD_DROP_FEDERATED_EVENT_CALLBACK
+        ] = None,
+        user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None,
+        user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None,
+        user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None,
+        user_may_create_room: Optional[USER_MAY_CREATE_ROOM_CALLBACK] = None,
+        user_may_create_room_alias: Optional[
+            USER_MAY_CREATE_ROOM_ALIAS_CALLBACK
+        ] = None,
+        user_may_publish_room: Optional[USER_MAY_PUBLISH_ROOM_CALLBACK] = None,
+        check_username_for_spam: Optional[CHECK_USERNAME_FOR_SPAM_CALLBACK] = None,
+        check_registration_for_spam: Optional[
+            CHECK_REGISTRATION_FOR_SPAM_CALLBACK
+        ] = None,
+        check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None,
+    ) -> None:
+        """Register callbacks from module for each hook."""
+        if check_event_for_spam is not None:
+            self._check_event_for_spam_callbacks.append(check_event_for_spam)
+
+        if should_drop_federated_event is not None:
+            self._should_drop_federated_event_callbacks.append(
+                should_drop_federated_event
+            )
+
+        if user_may_join_room is not None:
+            self._user_may_join_room_callbacks.append(user_may_join_room)
+
+        if user_may_invite is not None:
+            self._user_may_invite_callbacks.append(user_may_invite)
+
+        if user_may_send_3pid_invite is not None:
+            self._user_may_send_3pid_invite_callbacks.append(
+                user_may_send_3pid_invite,
+            )
+
+        if user_may_create_room is not None:
+            self._user_may_create_room_callbacks.append(user_may_create_room)
+
+        if user_may_create_room_alias is not None:
+            self._user_may_create_room_alias_callbacks.append(
+                user_may_create_room_alias,
+            )
+
+        if user_may_publish_room is not None:
+            self._user_may_publish_room_callbacks.append(user_may_publish_room)
+
+        if check_username_for_spam is not None:
+            self._check_username_for_spam_callbacks.append(check_username_for_spam)
+
+        if check_registration_for_spam is not None:
+            self._check_registration_for_spam_callbacks.append(
+                check_registration_for_spam,
+            )
+
+        if check_media_file_for_spam is not None:
+            self._check_media_file_for_spam_callbacks.append(check_media_file_for_spam)
+
+    @trace
+    async def check_event_for_spam(
+        self, event: "synapse.events.EventBase"
+    ) -> Union[Tuple[Codes, JsonDict], str]:
+        """Checks if a given event is considered "spammy" by this server.
+
+        If the server considers an event spammy, then it will be rejected if
+        sent by a local user. If it is sent by a user on another server, the
+        event is soft-failed.
+
+        Args:
+            event: the event to be checked
+
+        Returns:
+            - `NOT_SPAM` if the event is considered good (non-spammy) and should be let
+                through. Other spamcheck filters may still reject it.
+            - A `Code` if the event is considered spammy and is rejected with a specific
+                error message/code.
+            - A string that isn't `NOT_SPAM` if the event is considered spammy and the
+                string should be used as the client-facing error message. This usage is
+                generally discouraged as it doesn't support internationalization.
+        """
+        for callback in self._check_event_for_spam_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(callback(event))
+                if res is False or res == self.NOT_SPAM:
+                    # This spam-checker accepts the event.
+                    # Other spam-checkers may reject it, though.
+                    continue
+                elif res is True:
+                    # This spam-checker rejects the event with deprecated
+                    # return value `True`
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif not isinstance(res, str):
+                    # mypy complains that we can't reach this code because of the
+                    # return type in CHECK_EVENT_FOR_SPAM_CALLBACK, but we don't know
+                    # for sure that the module actually returns it.
+                    logger.warning(
+                        "Module returned invalid value, rejecting message as spam"
+                    )
+                    res = "This message has been rejected as probable spam"
+                else:
+                    # The module rejected the event either with a `Codes`
+                    # or some other `str`. In either case, we stop here.
+                    pass
+
+                return res
+
+        # No spam-checker has rejected the event, let it pass.
+        return self.NOT_SPAM
+
+    async def should_drop_federated_event(
+        self, event: "synapse.events.EventBase"
+    ) -> Union[bool, str]:
+        """Checks if a given federated event is considered "spammy" by this
+        server.
+
+        If the server considers an event spammy, it will be silently dropped,
+        and in doing so will split-brain our view of the room's DAG.
+
+        Args:
+            event: the event to be checked
+
+        Returns:
+            True if the event should be silently dropped
+        """
+        for callback in self._should_drop_federated_event_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res: Union[bool, str] = await delay_cancellation(callback(event))
+            if res:
+                return res
+
+        return False
+
+    async def user_may_join_room(
+        self, user_id: str, room_id: str, is_invited: bool
+    ) -> Union[Tuple[Codes, JsonDict], Literal["NOT_SPAM"]]:
+        """Checks if a given users is allowed to join a room.
+        Not called when a user creates a room.
+
+        Args:
+            userid: The ID of the user wanting to join the room
+            room_id: The ID of the room the user wants to join
+            is_invited: Whether the user is invited into the room
+
+        Returns:
+            NOT_SPAM if the operation is permitted, [Codes, Dict] otherwise.
+        """
+        for callback in self._user_may_join_room_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(callback(user_id, room_id, is_invited))
+                # Normalize return values to `Codes` or `"NOT_SPAM"`.
+                if res is True or res is self.NOT_SPAM:
+                    continue
+                elif res is False:
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                else:
+                    logger.warning(
+                        "Module returned invalid value, rejecting join as spam"
+                    )
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+
+        # No spam-checker has rejected the request, let it pass.
+        return self.NOT_SPAM
+
+    async def user_may_invite(
+        self, inviter_userid: str, invitee_userid: str, room_id: str
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
+        """Checks if a given user may send an invite
+
+        Args:
+            inviter_userid: The user ID of the sender of the invitation
+            invitee_userid: The user ID targeted in the invitation
+            room_id: The room ID
+
+        Returns:
+            NOT_SPAM if the operation is permitted, Codes otherwise.
+        """
+        for callback in self._user_may_invite_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(
+                    callback(inviter_userid, invitee_userid, room_id)
+                )
+                # Normalize return values to `Codes` or `"NOT_SPAM"`.
+                if res is True or res is self.NOT_SPAM:
+                    continue
+                elif res is False:
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                else:
+                    logger.warning(
+                        "Module returned invalid value, rejecting invite as spam"
+                    )
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+
+        # No spam-checker has rejected the request, let it pass.
+        return self.NOT_SPAM
+
+    async def user_may_send_3pid_invite(
+        self, inviter_userid: str, medium: str, address: str, room_id: str
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
+        """Checks if a given user may invite a given threepid into the room
+
+        Note that if the threepid is already associated with a Matrix user ID, Synapse
+        will call user_may_invite with said user ID instead.
+
+        Args:
+            inviter_userid: The user ID of the sender of the invitation
+            medium: The 3PID's medium (e.g. "email")
+            address: The 3PID's address (e.g. "alice@example.com")
+            room_id: The room ID
+
+        Returns:
+            NOT_SPAM if the operation is permitted, Codes otherwise.
+        """
+        for callback in self._user_may_send_3pid_invite_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(
+                    callback(inviter_userid, medium, address, room_id)
+                )
+                # Normalize return values to `Codes` or `"NOT_SPAM"`.
+                if res is True or res is self.NOT_SPAM:
+                    continue
+                elif res is False:
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                else:
+                    logger.warning(
+                        "Module returned invalid value, rejecting 3pid invite as spam"
+                    )
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+
+        return self.NOT_SPAM
+
+    async def user_may_create_room(
+        self, userid: str
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
+        """Checks if a given user may create a room
+
+        Args:
+            userid: The ID of the user attempting to create a room
+        """
+        for callback in self._user_may_create_room_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(callback(userid))
+                if res is True or res is self.NOT_SPAM:
+                    continue
+                elif res is False:
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                else:
+                    logger.warning(
+                        "Module returned invalid value, rejecting room creation as spam"
+                    )
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+
+        return self.NOT_SPAM
+
+    async def user_may_create_room_alias(
+        self, userid: str, room_alias: RoomAlias
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
+        """Checks if a given user may create a room alias
+
+        Args:
+            userid: The ID of the user attempting to create a room alias
+            room_alias: The alias to be created
+
+        """
+        for callback in self._user_may_create_room_alias_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(callback(userid, room_alias))
+                if res is True or res is self.NOT_SPAM:
+                    continue
+                elif res is False:
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                else:
+                    logger.warning(
+                        "Module returned invalid value, rejecting room create as spam"
+                    )
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+
+        return self.NOT_SPAM
+
+    async def user_may_publish_room(
+        self, userid: str, room_id: str
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
+        """Checks if a given user may publish a room to the directory
+
+        Args:
+            userid: The user ID attempting to publish the room
+            room_id: The ID of the room that would be published
+        """
+        for callback in self._user_may_publish_room_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(callback(userid, room_id))
+                if res is True or res is self.NOT_SPAM:
+                    continue
+                elif res is False:
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                else:
+                    logger.warning(
+                        "Module returned invalid value, rejecting room publication as spam"
+                    )
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+
+        return self.NOT_SPAM
+
+    async def check_username_for_spam(self, user_profile: UserProfile) -> bool:
+        """Checks if a user ID or display name are considered "spammy" by this server.
+
+        If the server considers a username spammy, then it will not be included in
+        user directory results.
+
+        Args:
+            user_profile: The user information to check, it contains the keys:
+                * user_id
+                * display_name
+                * avatar_url
+
+        Returns:
+            True if the user is spammy.
+        """
+        for callback in self._check_username_for_spam_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                # Make a copy of the user profile object to ensure the spam checker cannot
+                # modify it.
+                res = await delay_cancellation(callback(user_profile.copy()))
+            if res:
+                return True
+
+        return False
+
+    async def check_registration_for_spam(
+        self,
+        email_threepid: Optional[dict],
+        username: Optional[str],
+        request_info: Collection[Tuple[str, str]],
+        auth_provider_id: Optional[str] = None,
+    ) -> RegistrationBehaviour:
+        """Checks if we should allow the given registration request.
+
+        Args:
+            email_threepid: The email threepid used for registering, if any
+            username: The request user name, if any
+            request_info: List of tuples of user agent and IP that
+                were used during the registration process.
+            auth_provider_id: The SSO IdP the user used, e.g "oidc", "saml",
+                "cas". If any. Note this does not include users registered
+                via a password provider.
+
+        Returns:
+            Enum for how the request should be handled
+        """
+
+        for callback in self._check_registration_for_spam_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                behaviour = await delay_cancellation(
+                    callback(email_threepid, username, request_info, auth_provider_id)
+                )
+            assert isinstance(behaviour, RegistrationBehaviour)
+            if behaviour != RegistrationBehaviour.ALLOW:
+                return behaviour
+
+        return RegistrationBehaviour.ALLOW
+
+    async def check_media_file_for_spam(
+        self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
+        """Checks if a piece of newly uploaded media should be blocked.
+
+        This will be called for local uploads, downloads of remote media, each
+        thumbnail generated for those, and web pages/images used for URL
+        previews.
+
+        Note that care should be taken to not do blocking IO operations in the
+        main thread. For example, to get the contents of a file a module
+        should do::
+
+            async def check_media_file_for_spam(
+                self, file: ReadableFileWrapper, file_info: FileInfo
+            ) -> Union[Codes, Literal["NOT_SPAM"]]:
+                buffer = BytesIO()
+                await file.write_chunks_to(buffer.write)
+
+                if buffer.getvalue() == b"Hello World":
+                    return synapse.module_api.NOT_SPAM
+
+                return Codes.FORBIDDEN
+
+
+        Args:
+            file: An object that allows reading the contents of the media.
+            file_info: Metadata about the file.
+        """
+
+        for callback in self._check_media_file_for_spam_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(callback(file_wrapper, file_info))
+                # Normalize return values to `Codes` or `"NOT_SPAM"`.
+                if res is False or res is self.NOT_SPAM:
+                    continue
+                elif res is True:
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                else:
+                    logger.warning(
+                        "Module returned invalid value, rejecting media file as spam"
+                    )
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+
+        return self.NOT_SPAM
diff --git a/synapse/server.py b/synapse/server.py
index a191c19993..559724594b 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -42,7 +42,6 @@ from synapse.crypto.context_factory import RegularPolicyForHTTPS
 from synapse.crypto.keyring import Keyring
 from synapse.events.builder import EventBuilderFactory
 from synapse.events.presence_router import PresenceRouter
-from synapse.events.spamcheck import SpamChecker
 from synapse.events.third_party_rules import ThirdPartyEventRules
 from synapse.events.utils import EventClientSerializer
 from synapse.federation.federation_client import FederationClient
@@ -687,10 +686,6 @@ class HomeServer(metaclass=abc.ABCMeta):
     def get_stats_handler(self) -> StatsHandler:
         return StatsHandler(self)
 
-    @cache_in_self
-    def get_spam_checker(self) -> SpamChecker:
-        return SpamChecker(self)
-
     @cache_in_self
     def get_third_party_event_rules(self) -> ThirdPartyEventRules:
         return ThirdPartyEventRules(self)
@@ -803,7 +798,7 @@ class HomeServer(metaclass=abc.ABCMeta):
 
     @cache_in_self
     def get_module_api_callbacks(self) -> ModuleApiCallbacks:
-        return ModuleApiCallbacks()
+        return ModuleApiCallbacks(self)
 
     @cache_in_self
     def get_account_data_handler(self) -> AccountDataHandler:
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index da4d240826..15a7dc6818 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -792,7 +792,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
             return False
 
         # Configure a spam checker that does not filter any users.
-        spam_checker = self.hs.get_spam_checker()
+        spam_checker = self.hs.get_module_api_callbacks().spam_checker
         spam_checker._check_username_for_spam_callbacks = [allow_all]
 
         # The results do not change:
diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py
index 870047d0f2..f0f2da65db 100644
--- a/tests/media/test_media_storage.py
+++ b/tests/media/test_media_storage.py
@@ -31,7 +31,6 @@ from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import Codes
 from synapse.events import EventBase
-from synapse.events.spamcheck import load_legacy_spam_checkers
 from synapse.http.types import QueryParams
 from synapse.logging.context import make_deferred_yieldable
 from synapse.media._base import FileInfo
@@ -39,6 +38,7 @@ from synapse.media.filepath import MediaFilePaths
 from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
 from synapse.media.storage_provider import FileStorageProviderBackend
 from synapse.module_api import ModuleApi
+from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
 from synapse.rest import admin
 from synapse.rest.client import login
 from synapse.server import HomeServer
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index a4900703c4..4d39c89f6f 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -814,7 +814,9 @@ class RoomsCreateTestCase(RoomBase):
             return False
 
         join_mock = Mock(side_effect=user_may_join_room)
-        self.hs.get_spam_checker()._user_may_join_room_callbacks.append(join_mock)
+        self.hs.get_module_api_callbacks().spam_checker._user_may_join_room_callbacks.append(
+            join_mock
+        )
 
         channel = self.make_request(
             "POST",
@@ -840,7 +842,9 @@ class RoomsCreateTestCase(RoomBase):
             return Codes.CONSENT_NOT_GIVEN
 
         join_mock = Mock(side_effect=user_may_join_room_codes)
-        self.hs.get_spam_checker()._user_may_join_room_callbacks.append(join_mock)
+        self.hs.get_module_api_callbacks().spam_checker._user_may_join_room_callbacks.append(
+            join_mock
+        )
 
         channel = self.make_request(
             "POST",
@@ -1162,7 +1166,9 @@ class RoomJoinTestCase(RoomBase):
         # `spec` argument is needed for this function mock to have `__qualname__`, which
         # is needed for `Measure` metrics buried in SpamChecker.
         callback_mock = Mock(side_effect=user_may_join_room, spec=lambda *x: None)
-        self.hs.get_spam_checker()._user_may_join_room_callbacks.append(callback_mock)
+        self.hs.get_module_api_callbacks().spam_checker._user_may_join_room_callbacks.append(
+            callback_mock
+        )
 
         # Join a first room, without being invited to it.
         self.helper.join(self.room1, self.user2, tok=self.tok2)
@@ -1227,7 +1233,9 @@ class RoomJoinTestCase(RoomBase):
         # `spec` argument is needed for this function mock to have `__qualname__`, which
         # is needed for `Measure` metrics buried in SpamChecker.
         callback_mock = Mock(side_effect=user_may_join_room, spec=lambda *x: None)
-        self.hs.get_spam_checker()._user_may_join_room_callbacks.append(callback_mock)
+        self.hs.get_module_api_callbacks().spam_checker._user_may_join_room_callbacks.append(
+            callback_mock
+        )
 
         # Join a first room, without being invited to it.
         self.helper.join(self.room1, self.user2, tok=self.tok2)
@@ -1643,7 +1651,7 @@ class RoomMessagesTestCase(RoomBase):
 
         spam_checker = SpamCheck()
 
-        self.hs.get_spam_checker()._check_event_for_spam_callbacks.append(
+        self.hs.get_module_api_callbacks().spam_checker._check_event_for_spam_callbacks.append(
             spam_checker.check_event_for_spam
         )
 
@@ -3381,7 +3389,9 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
         # `spec` argument is needed for this function mock to have `__qualname__`, which
         # is needed for `Measure` metrics buried in SpamChecker.
         mock = Mock(return_value=make_awaitable(True), spec=lambda *x: None)
-        self.hs.get_spam_checker()._user_may_send_3pid_invite_callbacks.append(mock)
+        self.hs.get_module_api_callbacks().spam_checker._user_may_send_3pid_invite_callbacks.append(
+            mock
+        )
 
         # Send a 3PID invite into the room and check that it succeeded.
         email_to_invite = "teresa@example.com"
@@ -3446,7 +3456,9 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
             return_value=make_awaitable(synapse.module_api.NOT_SPAM),
             spec=lambda *x: None,
         )
-        self.hs.get_spam_checker()._user_may_send_3pid_invite_callbacks.append(mock)
+        self.hs.get_module_api_callbacks().spam_checker._user_may_send_3pid_invite_callbacks.append(
+            mock
+        )
 
         # Send a 3PID invite into the room and check that it succeeded.
         email_to_invite = "teresa@example.com"
diff --git a/tests/server.py b/tests/server.py
index b52ff1c463..a49dc90e32 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -73,11 +73,11 @@ from twisted.web.server import Request, Site
 from synapse.config.database import DatabaseConnectionConfig
 from synapse.config.homeserver import HomeServerConfig
 from synapse.events.presence_router import load_legacy_presence_router
-from synapse.events.spamcheck import load_legacy_spam_checkers
 from synapse.events.third_party_rules import load_legacy_third_party_event_rules
 from synapse.handlers.auth import load_legacy_password_auth_providers
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import ContextResourceUsage
+from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
 from synapse.server import HomeServer
 from synapse.storage import DataStore
 from synapse.storage.database import LoggingDatabaseConnection
-- 
cgit 1.5.1


From 5e024a0645733a816dc0da21a034dd70b053f2be Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 20 Apr 2023 12:30:32 -0400
Subject: Modify StoreKeyFetcher to read from server_keys_json. (#15417)

Before this change:

* `PerspectivesKeyFetcher` and `ServerKeyFetcher` write to `server_keys_json`.
* `PerspectivesKeyFetcher` also writes to `server_signature_keys`.
* `StoreKeyFetcher` reads from `server_signature_keys`.

After this change:

* `PerspectivesKeyFetcher` and `ServerKeyFetcher` write to `server_keys_json`.
* `PerspectivesKeyFetcher` also writes to `server_signature_keys`.
* `StoreKeyFetcher` reads from `server_keys_json`.

This results in `StoreKeyFetcher` now using the results from `ServerKeyFetcher`
in addition to those from `PerspectivesKeyFetcher`, i.e. keys which are directly
fetched from a server will now be pulled from the database instead of refetched.

An additional minor change is included to avoid creating a `PerspectivesKeyFetcher`
(and checking it) if no `trusted_key_servers` are configured.

The overall impact of this should be better usage of cached results:

* If a server has no trusted key servers configured then it should reduce how often keys
  are fetched.
* if a server's trusted key server does not have a requested server's keys cached then it
  should reduce how often keys are directly fetched.
---
 changelog.d/15417.bugfix                   |  1 +
 synapse/crypto/keyring.py                  | 30 ++++-----
 synapse/rest/key/v2/remote_key_resource.py |  2 +-
 synapse/storage/databases/main/keys.py     | 99 ++++++++++++++++++++++++++----
 tests/crypto/test_keyring.py               | 62 +++++++++----------
 tests/storage/test_keys.py                 | 16 ++---
 tests/unittest.py                          | 25 +++++---
 tests/utils.py                             |  3 +
 8 files changed, 162 insertions(+), 76 deletions(-)
 create mode 100644 changelog.d/15417.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15417.bugfix b/changelog.d/15417.bugfix
new file mode 100644
index 0000000000..300635cbdc
--- /dev/null
+++ b/changelog.d/15417.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where cached key results which were directly fetched would not be properly re-used.
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index d2f99dc2ac..afdf6863d6 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -150,18 +150,19 @@ class Keyring:
     def __init__(
         self, hs: "HomeServer", key_fetchers: "Optional[Iterable[KeyFetcher]]" = None
     ):
-        self.clock = hs.get_clock()
-
         if key_fetchers is None:
-            key_fetchers = (
-                # Fetch keys from the database.
-                StoreKeyFetcher(hs),
-                # Fetch keys from a configured Perspectives server.
-                PerspectivesKeyFetcher(hs),
-                # Fetch keys from the origin server directly.
-                ServerKeyFetcher(hs),
-            )
-        self._key_fetchers = key_fetchers
+            # Always fetch keys from the database.
+            mutable_key_fetchers: List[KeyFetcher] = [StoreKeyFetcher(hs)]
+            # Fetch keys from configured trusted key servers, if any exist.
+            key_servers = hs.config.key.key_servers
+            if key_servers:
+                mutable_key_fetchers.append(PerspectivesKeyFetcher(hs))
+            # Finally, fetch keys from the origin server directly.
+            mutable_key_fetchers.append(ServerKeyFetcher(hs))
+
+            self._key_fetchers: Iterable[KeyFetcher] = tuple(mutable_key_fetchers)
+        else:
+            self._key_fetchers = key_fetchers
 
         self._fetch_keys_queue: BatchingQueue[
             _FetchKeyRequest, Dict[str, Dict[str, FetchKeyResult]]
@@ -510,7 +511,7 @@ class StoreKeyFetcher(KeyFetcher):
             for key_id in queue_value.key_ids
         )
 
-        res = await self.store.get_server_verify_keys(key_ids_to_fetch)
+        res = await self.store.get_server_keys_json(key_ids_to_fetch)
         keys: Dict[str, Dict[str, FetchKeyResult]] = {}
         for (server_name, key_id), key in res.items():
             keys.setdefault(server_name, {})[key_id] = key
@@ -522,7 +523,6 @@ class BaseV2KeyFetcher(KeyFetcher):
         super().__init__(hs)
 
         self.store = hs.get_datastores().main
-        self.config = hs.config
 
     async def process_v2_response(
         self, from_server: str, response_json: JsonDict, time_added_ms: int
@@ -626,7 +626,7 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher):
         super().__init__(hs)
         self.clock = hs.get_clock()
         self.client = hs.get_federation_http_client()
-        self.key_servers = self.config.key.key_servers
+        self.key_servers = hs.config.key.key_servers
 
     async def _fetch_keys(
         self, keys_to_fetch: List[_FetchKeyRequest]
@@ -775,7 +775,7 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher):
 
             keys.setdefault(server_name, {}).update(processed_response)
 
-        await self.store.store_server_verify_keys(
+        await self.store.store_server_signature_keys(
             perspective_name, time_now_ms, added_keys
         )
 
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index 3bdb6ec909..ff0454ca57 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -155,7 +155,7 @@ class RemoteKey(RestServlet):
             for key_id in key_ids:
                 store_queries.append((server_name, key_id, None))
 
-        cached = await self.store.get_server_keys_json(store_queries)
+        cached = await self.store.get_server_keys_json_for_remote(store_queries)
 
         json_results: Set[bytes] = set()
 
diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
index 89c37a4eb5..1666e3c43b 100644
--- a/synapse/storage/databases/main/keys.py
+++ b/synapse/storage/databases/main/keys.py
@@ -14,10 +14,12 @@
 # limitations under the License.
 
 import itertools
+import json
 import logging
 from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
 
 from signedjson.key import decode_verify_key_bytes
+from unpaddedbase64 import decode_base64
 
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import LoggingTransaction
@@ -36,15 +38,16 @@ class KeyStore(SQLBaseStore):
     """Persistence for signature verification keys"""
 
     @cached()
-    def _get_server_verify_key(
+    def _get_server_signature_key(
         self, server_name_and_key_id: Tuple[str, str]
     ) -> FetchKeyResult:
         raise NotImplementedError()
 
     @cachedList(
-        cached_method_name="_get_server_verify_key", list_name="server_name_and_key_ids"
+        cached_method_name="_get_server_signature_key",
+        list_name="server_name_and_key_ids",
     )
-    async def get_server_verify_keys(
+    async def get_server_signature_keys(
         self, server_name_and_key_ids: Iterable[Tuple[str, str]]
     ) -> Dict[Tuple[str, str], FetchKeyResult]:
         """
@@ -62,10 +65,12 @@ class KeyStore(SQLBaseStore):
             """Processes a batch of keys to fetch, and adds the result to `keys`."""
 
             # batch_iter always returns tuples so it's safe to do len(batch)
-            sql = (
-                "SELECT server_name, key_id, verify_key, ts_valid_until_ms "
-                "FROM server_signature_keys WHERE 1=0"
-            ) + " OR (server_name=? AND key_id=?)" * len(batch)
+            sql = """
+            SELECT server_name, key_id, verify_key, ts_valid_until_ms
+            FROM server_signature_keys WHERE 1=0
+            """ + " OR (server_name=? AND key_id=?)" * len(
+                batch
+            )
 
             txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
 
@@ -89,9 +94,9 @@ class KeyStore(SQLBaseStore):
                 _get_keys(txn, batch)
             return keys
 
-        return await self.db_pool.runInteraction("get_server_verify_keys", _txn)
+        return await self.db_pool.runInteraction("get_server_signature_keys", _txn)
 
-    async def store_server_verify_keys(
+    async def store_server_signature_keys(
         self,
         from_server: str,
         ts_added_ms: int,
@@ -119,7 +124,7 @@ class KeyStore(SQLBaseStore):
                 )
             )
             # invalidate takes a tuple corresponding to the params of
-            # _get_server_verify_key. _get_server_verify_key only takes one
+            # _get_server_signature_key. _get_server_signature_key only takes one
             # param, which is itself the 2-tuple (server_name, key_id).
             invalidations.append((server_name, key_id))
 
@@ -134,10 +139,10 @@ class KeyStore(SQLBaseStore):
                 "verify_key",
             ),
             value_values=value_values,
-            desc="store_server_verify_keys",
+            desc="store_server_signature_keys",
         )
 
-        invalidate = self._get_server_verify_key.invalidate
+        invalidate = self._get_server_signature_key.invalidate
         for i in invalidations:
             invalidate((i,))
 
@@ -180,7 +185,75 @@ class KeyStore(SQLBaseStore):
             desc="store_server_keys_json",
         )
 
+        # invalidate takes a tuple corresponding to the params of
+        # _get_server_keys_json. _get_server_keys_json only takes one
+        # param, which is itself the 2-tuple (server_name, key_id).
+        self._get_server_keys_json.invalidate((((server_name, key_id),)))
+
+    @cached()
+    def _get_server_keys_json(
+        self, server_name_and_key_id: Tuple[str, str]
+    ) -> FetchKeyResult:
+        raise NotImplementedError()
+
+    @cachedList(
+        cached_method_name="_get_server_keys_json", list_name="server_name_and_key_ids"
+    )
     async def get_server_keys_json(
+        self, server_name_and_key_ids: Iterable[Tuple[str, str]]
+    ) -> Dict[Tuple[str, str], FetchKeyResult]:
+        """
+        Args:
+            server_name_and_key_ids:
+                iterable of (server_name, key-id) tuples to fetch keys for
+
+        Returns:
+            A map from (server_name, key_id) -> FetchKeyResult, or None if the
+            key is unknown
+        """
+        keys = {}
+
+        def _get_keys(txn: Cursor, batch: Tuple[Tuple[str, str], ...]) -> None:
+            """Processes a batch of keys to fetch, and adds the result to `keys`."""
+
+            # batch_iter always returns tuples so it's safe to do len(batch)
+            sql = """
+            SELECT server_name, key_id, key_json, ts_valid_until_ms
+            FROM server_keys_json WHERE 1=0
+            """ + " OR (server_name=? AND key_id=?)" * len(
+                batch
+            )
+
+            txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
+
+            for server_name, key_id, key_json_bytes, ts_valid_until_ms in txn:
+                if ts_valid_until_ms is None:
+                    # Old keys may be stored with a ts_valid_until_ms of null,
+                    # in which case we treat this as if it was set to `0`, i.e.
+                    # it won't match key requests that define a minimum
+                    # `ts_valid_until_ms`.
+                    ts_valid_until_ms = 0
+
+                # The entire signed JSON response is stored in server_keys_json,
+                # fetch out the bits needed.
+                key_json = json.loads(bytes(key_json_bytes))
+                key_base64 = key_json["verify_keys"][key_id]["key"]
+
+                keys[(server_name, key_id)] = FetchKeyResult(
+                    verify_key=decode_verify_key_bytes(
+                        key_id, decode_base64(key_base64)
+                    ),
+                    valid_until_ts=ts_valid_until_ms,
+                )
+
+        def _txn(txn: Cursor) -> Dict[Tuple[str, str], FetchKeyResult]:
+            for batch in batch_iter(server_name_and_key_ids, 50):
+                _get_keys(txn, batch)
+            return keys
+
+        return await self.db_pool.runInteraction("get_server_keys_json", _txn)
+
+    async def get_server_keys_json_for_remote(
         self, server_keys: Iterable[Tuple[str, Optional[str], Optional[str]]]
     ) -> Dict[Tuple[str, Optional[str], Optional[str]], List[Dict[str, Any]]]:
         """Retrieve the key json for a list of server_keys and key ids.
@@ -188,8 +261,10 @@ class KeyStore(SQLBaseStore):
         that server, key_id, and source triplet entry will be an empty list.
         The JSON is returned as a byte array so that it can be efficiently
         used in an HTTP response.
+
         Args:
             server_keys: List of (server_name, key_id, source) triplets.
+
         Returns:
             A mapping from (server_name, key_id, source) triplets to a list of dicts
         """
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 66102ab934..7c63b2ea4c 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -190,10 +190,23 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         kr = keyring.Keyring(self.hs)
 
         key1 = signedjson.key.generate_signing_key("1")
-        r = self.hs.get_datastores().main.store_server_verify_keys(
+        r = self.hs.get_datastores().main.store_server_keys_json(
             "server9",
-            int(time.time() * 1000),
-            {("server9", get_key_id(key1)): FetchKeyResult(get_verify_key(key1), 1000)},
+            get_key_id(key1),
+            from_server="test",
+            ts_now_ms=int(time.time() * 1000),
+            ts_expires_ms=1000,
+            # The entire response gets signed & stored, just include the bits we
+            # care about.
+            key_json_bytes=canonicaljson.encode_canonical_json(
+                {
+                    "verify_keys": {
+                        get_key_id(key1): {
+                            "key": encode_verify_key_base64(get_verify_key(key1))
+                        }
+                    }
+                }
+            ),
         )
         self.get_success(r)
 
@@ -280,17 +293,13 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         mock_fetcher = Mock()
         mock_fetcher.get_keys = Mock(return_value=make_awaitable({}))
 
-        kr = keyring.Keyring(
-            self.hs, key_fetchers=(StoreKeyFetcher(self.hs), mock_fetcher)
-        )
-
         key1 = signedjson.key.generate_signing_key("1")
-        r = self.hs.get_datastores().main.store_server_verify_keys(
+        r = self.hs.get_datastores().main.store_server_signature_keys(
             "server9",
             int(time.time() * 1000),
             # None is not a valid value in FetchKeyResult, but we're abusing this
             # API to insert null values into the database. The nulls get converted
-            # to 0 when fetched in KeyStore.get_server_verify_keys.
+            # to 0 when fetched in KeyStore.get_server_signature_keys.
             {("server9", get_key_id(key1)): FetchKeyResult(get_verify_key(key1), None)},  # type: ignore[arg-type]
         )
         self.get_success(r)
@@ -298,27 +307,12 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         json1: JsonDict = {}
         signedjson.sign.sign_json(json1, "server9", key1)
 
-        # should fail immediately on an unsigned object
-        d = kr.verify_json_for_server("server9", {}, 0)
-        self.get_failure(d, SynapseError)
-
-        # should fail on a signed object with a non-zero minimum_valid_until_ms,
-        # as it tries to refetch the keys and fails.
-        d = kr.verify_json_for_server("server9", json1, 500)
-        self.get_failure(d, SynapseError)
-
-        # We expect the keyring tried to refetch the key once.
-        mock_fetcher.get_keys.assert_called_once_with(
-            "server9", [get_key_id(key1)], 500
-        )
-
         # should succeed on a signed object with a 0 minimum_valid_until_ms
-        d = kr.verify_json_for_server(
-            "server9",
-            json1,
-            0,
+        d = self.hs.get_datastores().main.get_server_signature_keys(
+            [("server9", get_key_id(key1))]
         )
-        self.get_success(d)
+        result = self.get_success(d)
+        self.assertEquals(result[("server9", get_key_id(key1))].valid_until_ts, 0)
 
     def test_verify_json_dedupes_key_requests(self) -> None:
         """Two requests for the same key should be deduped."""
@@ -464,7 +458,9 @@ class ServerKeyFetcherTestCase(unittest.HomeserverTestCase):
         # check that the perspectives store is correctly updated
         lookup_triplet = (SERVER_NAME, testverifykey_id, None)
         key_json = self.get_success(
-            self.hs.get_datastores().main.get_server_keys_json([lookup_triplet])
+            self.hs.get_datastores().main.get_server_keys_json_for_remote(
+                [lookup_triplet]
+            )
         )
         res_keys = key_json[lookup_triplet]
         self.assertEqual(len(res_keys), 1)
@@ -582,7 +578,9 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase):
         # check that the perspectives store is correctly updated
         lookup_triplet = (SERVER_NAME, testverifykey_id, None)
         key_json = self.get_success(
-            self.hs.get_datastores().main.get_server_keys_json([lookup_triplet])
+            self.hs.get_datastores().main.get_server_keys_json_for_remote(
+                [lookup_triplet]
+            )
         )
         res_keys = key_json[lookup_triplet]
         self.assertEqual(len(res_keys), 1)
@@ -703,7 +701,9 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase):
         # check that the perspectives store is correctly updated
         lookup_triplet = (SERVER_NAME, testverifykey_id, None)
         key_json = self.get_success(
-            self.hs.get_datastores().main.get_server_keys_json([lookup_triplet])
+            self.hs.get_datastores().main.get_server_keys_json_for_remote(
+                [lookup_triplet]
+            )
         )
         res_keys = key_json[lookup_triplet]
         self.assertEqual(len(res_keys), 1)
diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py
index 5901d80f26..5d7c13e6d0 100644
--- a/tests/storage/test_keys.py
+++ b/tests/storage/test_keys.py
@@ -37,13 +37,13 @@ KEY_2 = decode_verify_key_base64(
 
 
 class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
-    def test_get_server_verify_keys(self) -> None:
+    def test_get_server_signature_keys(self) -> None:
         store = self.hs.get_datastores().main
 
         key_id_1 = "ed25519:key1"
         key_id_2 = "ed25519:KEY_ID_2"
         self.get_success(
-            store.store_server_verify_keys(
+            store.store_server_signature_keys(
                 "from_server",
                 10,
                 {
@@ -54,7 +54,7 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
         )
 
         res = self.get_success(
-            store.get_server_verify_keys(
+            store.get_server_signature_keys(
                 [
                     ("server1", key_id_1),
                     ("server1", key_id_2),
@@ -87,7 +87,7 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
         key_id_2 = "ed25519:key2"
 
         self.get_success(
-            store.store_server_verify_keys(
+            store.store_server_signature_keys(
                 "from_server",
                 0,
                 {
@@ -98,7 +98,7 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
         )
 
         res = self.get_success(
-            store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)])
+            store.get_server_signature_keys([("srv1", key_id_1), ("srv1", key_id_2)])
         )
         self.assertEqual(len(res.keys()), 2)
 
@@ -111,20 +111,20 @@ class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
         self.assertEqual(res2.valid_until_ts, 200)
 
         # we should be able to look up the same thing again without a db hit
-        res = self.get_success(store.get_server_verify_keys([("srv1", key_id_1)]))
+        res = self.get_success(store.get_server_signature_keys([("srv1", key_id_1)]))
         self.assertEqual(len(res.keys()), 1)
         self.assertEqual(res[("srv1", key_id_1)].verify_key, KEY_1)
 
         new_key_2 = signedjson.key.get_verify_key(
             signedjson.key.generate_signing_key("key2")
         )
-        d = store.store_server_verify_keys(
+        d = store.store_server_signature_keys(
             "from_server", 10, {("srv1", key_id_2): FetchKeyResult(new_key_2, 300)}
         )
         self.get_success(d)
 
         res = self.get_success(
-            store.get_server_verify_keys([("srv1", key_id_1), ("srv1", key_id_2)])
+            store.get_server_signature_keys([("srv1", key_id_1), ("srv1", key_id_2)])
         )
         self.assertEqual(len(res.keys()), 2)
 
diff --git a/tests/unittest.py b/tests/unittest.py
index 96ae8fca67..ee2f78ab01 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -69,7 +69,6 @@ from synapse.logging.context import (
 )
 from synapse.rest import RegisterServletsFunc
 from synapse.server import HomeServer
-from synapse.storage.keys import FetchKeyResult
 from synapse.types import JsonDict, Requester, UserID, create_requester
 from synapse.util import Clock
 from synapse.util.httpresourcetree import create_resource_tree
@@ -848,15 +847,23 @@ class FederatingHomeserverTestCase(HomeserverTestCase):
         verify_key_id = "%s:%s" % (verify_key.alg, verify_key.version)
 
         self.get_success(
-            hs.get_datastores().main.store_server_verify_keys(
+            hs.get_datastores().main.store_server_keys_json(
+                self.OTHER_SERVER_NAME,
+                verify_key_id,
                 from_server=self.OTHER_SERVER_NAME,
-                ts_added_ms=clock.time_msec(),
-                verify_keys={
-                    (self.OTHER_SERVER_NAME, verify_key_id): FetchKeyResult(
-                        verify_key=verify_key,
-                        valid_until_ts=clock.time_msec() + 10000,
-                    ),
-                },
+                ts_now_ms=clock.time_msec(),
+                ts_expires_ms=clock.time_msec() + 10000,
+                key_json_bytes=canonicaljson.encode_canonical_json(
+                    {
+                        "verify_keys": {
+                            verify_key_id: {
+                                "key": signedjson.key.encode_verify_key_base64(
+                                    verify_key
+                                )
+                            }
+                        }
+                    }
+                ),
             )
         )
 
diff --git a/tests/utils.py b/tests/utils.py
index a0ac11bc5c..e73b46944b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -131,6 +131,9 @@ def default_config(
         # the test signing key is just an arbitrary ed25519 key to keep the config
         # parser happy
         "signing_key": "ed25519 a_lPym qvioDNmfExFBRPgdTU+wtFYKq4JfwFRv7sYVgWvmgJg",
+        # Disable trusted key servers, otherwise unit tests might try to actually
+        # reach out to matrix.org.
+        "trusted_key_servers": [],
         "event_cache_size": 1,
         "enable_registration": True,
         "enable_registration_captcha": False,
-- 
cgit 1.5.1


From 197fbb123bc1a08a3d64e3acd658d3765f86d2fc Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <62210712+akstron@users.noreply.github.com>
Date: Fri, 21 Apr 2023 16:36:39 +0530
Subject: Remove legacy code of single user device resync api (#15418)

* Removed single-user resync usage and updated it to use multi-user counterpart

Signed-off-by: Alok Kumar Singh alokaks601@gmail.com
---
 changelog.d/15418.misc               |  1 +
 synapse/handlers/device.py           | 58 ++++--------------------------------
 synapse/handlers/devicemessage.py    | 14 +++++----
 synapse/handlers/federation_event.py | 14 +++++----
 synapse/replication/http/devices.py  | 57 -----------------------------------
 tests/test_federation.py             |  4 ++-
 6 files changed, 26 insertions(+), 122 deletions(-)
 create mode 100644 changelog.d/15418.misc

(limited to 'synapse')

diff --git a/changelog.d/15418.misc b/changelog.d/15418.misc
new file mode 100644
index 0000000000..ca6f995a9c
--- /dev/null
+++ b/changelog.d/15418.misc
@@ -0,0 +1 @@
+Always use multi-user device resync replication endpoints.
\ No newline at end of file
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index ae1d9337ad..b9d3b7fbc6 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from http import HTTPStatus
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -921,12 +920,8 @@ class DeviceListWorkerUpdater:
     def __init__(self, hs: "HomeServer"):
         from synapse.replication.http.devices import (
             ReplicationMultiUserDevicesResyncRestServlet,
-            ReplicationUserDevicesResyncRestServlet,
         )
 
-        self._user_device_resync_client = (
-            ReplicationUserDevicesResyncRestServlet.make_client(hs)
-        )
         self._multi_user_device_resync_client = (
             ReplicationMultiUserDevicesResyncRestServlet.make_client(hs)
         )
@@ -948,37 +943,7 @@ class DeviceListWorkerUpdater:
             # Shortcut empty requests
             return {}
 
-        try:
-            return await self._multi_user_device_resync_client(user_ids=user_ids)
-        except SynapseError as err:
-            if not (
-                err.code == HTTPStatus.NOT_FOUND and err.errcode == Codes.UNRECOGNIZED
-            ):
-                raise
-
-            # Fall back to single requests
-            result: Dict[str, Optional[JsonDict]] = {}
-            for user_id in user_ids:
-                result[user_id] = await self._user_device_resync_client(user_id=user_id)
-            return result
-
-    async def user_device_resync(
-        self, user_id: str, mark_failed_as_stale: bool = True
-    ) -> Optional[JsonDict]:
-        """Fetches all devices for a user and updates the device cache with them.
-
-        Args:
-            user_id: The user's id whose device_list will be updated.
-            mark_failed_as_stale: Whether to mark the user's device list as stale
-                if the attempt to resync failed.
-        Returns:
-            A dict with device info as under the "devices" in the result of this
-            request:
-            https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid
-            None when we weren't able to fetch the device info for some reason,
-            e.g. due to a connection problem.
-        """
-        return (await self.multi_user_device_resync([user_id]))[user_id]
+        return await self._multi_user_device_resync_client(user_ids=user_ids)
 
 
 class DeviceListUpdater(DeviceListWorkerUpdater):
@@ -1131,7 +1096,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
                 )
 
             if resync:
-                await self.user_device_resync(user_id)
+                await self.multi_user_device_resync([user_id])
             else:
                 # Simply update the single device, since we know that is the only
                 # change (because of the single prev_id matching the current cache)
@@ -1198,10 +1163,9 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
             for user_id in need_resync:
                 try:
                     # Try to resync the current user's devices list.
-                    result = await self.user_device_resync(
-                        user_id=user_id,
-                        mark_failed_as_stale=False,
-                    )
+                    result = (await self.multi_user_device_resync([user_id], False))[
+                        user_id
+                    ]
 
                     # user_device_resync only returns a result if it managed to
                     # successfully resync and update the database. Updating the table
@@ -1260,18 +1224,6 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
 
         return result
 
-    async def user_device_resync(
-        self, user_id: str, mark_failed_as_stale: bool = True
-    ) -> Optional[JsonDict]:
-        result, failed = await self._user_device_resync_returning_failed(user_id)
-
-        if failed and mark_failed_as_stale:
-            # Mark the remote user's device list as stale so we know we need to retry
-            # it later.
-            await self.store.mark_remote_users_device_caches_as_stale((user_id,))
-
-        return result
-
     async def _user_device_resync_returning_failed(
         self, user_id: str
     ) -> Tuple[Optional[JsonDict], bool]:
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 00c403db49..3caf9b31cc 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -25,7 +25,9 @@ from synapse.logging.opentracing import (
     log_kv,
     set_tag,
 )
-from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet
+from synapse.replication.http.devices import (
+    ReplicationMultiUserDevicesResyncRestServlet,
+)
 from synapse.types import JsonDict, Requester, StreamKeyType, UserID, get_domain_from_id
 from synapse.util import json_encoder
 from synapse.util.stringutils import random_string
@@ -71,12 +73,12 @@ class DeviceMessageHandler:
         # sync. We do all device list resyncing on the master instance, so if
         # we're on a worker we hit the device resync replication API.
         if hs.config.worker.worker_app is None:
-            self._user_device_resync = (
-                hs.get_device_handler().device_list_updater.user_device_resync
+            self._multi_user_device_resync = (
+                hs.get_device_handler().device_list_updater.multi_user_device_resync
             )
         else:
-            self._user_device_resync = (
-                ReplicationUserDevicesResyncRestServlet.make_client(hs)
+            self._multi_user_device_resync = (
+                ReplicationMultiUserDevicesResyncRestServlet.make_client(hs)
             )
 
         # a rate limiter for room key requests.  The keys are
@@ -198,7 +200,7 @@ class DeviceMessageHandler:
             await self.store.mark_remote_users_device_caches_as_stale((sender_user_id,))
 
             # Immediately attempt a resync in the background
-            run_in_background(self._user_device_resync, user_id=sender_user_id)
+            run_in_background(self._multi_user_device_resync, user_ids=[sender_user_id])
 
     async def send_device_message(
         self,
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 8d5be81a92..06609fab93 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -70,7 +70,9 @@ from synapse.logging.opentracing import (
     trace,
 )
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet
+from synapse.replication.http.devices import (
+    ReplicationMultiUserDevicesResyncRestServlet,
+)
 from synapse.replication.http.federation import (
     ReplicationFederationSendEventsRestServlet,
 )
@@ -167,8 +169,8 @@ class FederationEventHandler:
 
         self._send_events = ReplicationFederationSendEventsRestServlet.make_client(hs)
         if hs.config.worker.worker_app:
-            self._user_device_resync = (
-                ReplicationUserDevicesResyncRestServlet.make_client(hs)
+            self._multi_user_device_resync = (
+                ReplicationMultiUserDevicesResyncRestServlet.make_client(hs)
             )
         else:
             self._device_list_updater = hs.get_device_handler().device_list_updater
@@ -1487,9 +1489,11 @@ class FederationEventHandler:
 
             # Immediately attempt a resync in the background
             if self._config.worker.worker_app:
-                await self._user_device_resync(user_id=sender)
+                await self._multi_user_device_resync(user_ids=[sender])
             else:
-                await self._device_list_updater.user_device_resync(sender)
+                await self._device_list_updater.multi_user_device_resync(
+                    user_ids=[sender]
+                )
         except Exception:
             logger.exception("Failed to resync device for %s", sender)
 
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index cc3929dcf5..f874f072f9 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -28,62 +28,6 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-class ReplicationUserDevicesResyncRestServlet(ReplicationEndpoint):
-    """Ask master to resync the device list for a user by contacting their
-    server.
-
-    This must happen on master so that the results can be correctly cached in
-    the database and streamed to workers.
-
-    Request format:
-
-        POST /_synapse/replication/user_device_resync/:user_id
-
-        {}
-
-    Response is equivalent to ` /_matrix/federation/v1/user/devices/:user_id`
-    response, e.g.:
-
-        {
-            "user_id": "@alice:example.org",
-            "devices": [
-                {
-                    "device_id": "JLAFKJWSCS",
-                    "keys": { ... },
-                    "device_display_name": "Alice's Mobile Phone"
-                }
-            ]
-        }
-    """
-
-    NAME = "user_device_resync"
-    PATH_ARGS = ("user_id",)
-    CACHE = False
-
-    def __init__(self, hs: "HomeServer"):
-        super().__init__(hs)
-
-        from synapse.handlers.device import DeviceHandler
-
-        handler = hs.get_device_handler()
-        assert isinstance(handler, DeviceHandler)
-        self.device_list_updater = handler.device_list_updater
-
-        self.store = hs.get_datastores().main
-        self.clock = hs.get_clock()
-
-    @staticmethod
-    async def _serialize_payload(user_id: str) -> JsonDict:  # type: ignore[override]
-        return {}
-
-    async def _handle_request(  # type: ignore[override]
-        self, request: Request, content: JsonDict, user_id: str
-    ) -> Tuple[int, Optional[JsonDict]]:
-        user_devices = await self.device_list_updater.user_device_resync(user_id)
-
-        return 200, user_devices
-
-
 class ReplicationMultiUserDevicesResyncRestServlet(ReplicationEndpoint):
     """Ask master to resync the device list for multiple users from the same
     remote server by contacting their server.
@@ -216,6 +160,5 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    ReplicationUserDevicesResyncRestServlet(hs).register(http_server)
     ReplicationMultiUserDevicesResyncRestServlet(hs).register(http_server)
     ReplicationUploadKeysForUserRestServlet(hs).register(http_server)
diff --git a/tests/test_federation.py b/tests/test_federation.py
index 46d2f99eac..6d15ac7597 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -267,7 +267,9 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
         # Resync the device list.
         device_handler = self.hs.get_device_handler()
         self.get_success(
-            device_handler.device_list_updater.user_device_resync(remote_user_id),
+            device_handler.device_list_updater.multi_user_device_resync(
+                [remote_user_id]
+            ),
         )
 
         # Retrieve the cross-signing keys for this user.
-- 
cgit 1.5.1


From ea5c3ede4f70b5702ebd1fd18286dff9ed0328d2 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 24 Apr 2023 13:12:06 -0400
Subject: Finish type hints for federation client HTTP code. (#15465)

---
 changelog.d/15465.misc                    |  1 +
 mypy.ini                                  |  6 ---
 synapse/federation/federation_client.py   |  8 +---
 synapse/federation/transport/client.py    | 17 +++++--
 synapse/http/matrixfederationclient.py    | 76 +++++++++++++++++++++++--------
 tests/federation/test_complexity.py       | 10 ++--
 tests/http/test_matrixfederationclient.py |  6 +--
 7 files changed, 82 insertions(+), 42 deletions(-)
 create mode 100644 changelog.d/15465.misc

(limited to 'synapse')

diff --git a/changelog.d/15465.misc b/changelog.d/15465.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15465.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 945f7925cb..8fb87b9b74 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -33,12 +33,6 @@ exclude = (?x)
    |synapse/storage/schema/
    )$
 
-[mypy-synapse.federation.transport.client]
-disallow_untyped_defs = False
-
-[mypy-synapse.http.matrixfederationclient]
-disallow_untyped_defs = False
-
 [mypy-synapse.metrics._reactor_metrics]
 disallow_untyped_defs = False
 # This module imports select.epoll. That exists on Linux, but doesn't on macOS.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 4cf4957a42..ba34573d46 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -280,15 +280,11 @@ class FederationClient(FederationBase):
         logger.debug("backfill transaction_data=%r", transaction_data)
 
         if not isinstance(transaction_data, dict):
-            # TODO we probably want an exception type specific to federation
-            # client validation.
-            raise TypeError("Backfill transaction_data is not a dict.")
+            raise InvalidResponseError("Backfill transaction_data is not a dict.")
 
         transaction_data_pdus = transaction_data.get("pdus")
         if not isinstance(transaction_data_pdus, list):
-            # TODO we probably want an exception type specific to federation
-            # client validation.
-            raise TypeError("transaction_data.pdus is not a list.")
+            raise InvalidResponseError("transaction_data.pdus is not a list.")
 
         room_version = await self.store.get_room_version(room_id)
 
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index c05d598b70..bedbd23ded 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -16,6 +16,7 @@
 import logging
 import urllib
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Collection,
@@ -42,18 +43,21 @@ from synapse.api.urls import (
 )
 from synapse.events import EventBase, make_event_from_dict
 from synapse.federation.units import Transaction
-from synapse.http.matrixfederationclient import ByteParser
+from synapse.http.matrixfederationclient import ByteParser, LegacyJsonSendParser
 from synapse.http.types import QueryParams
 from synapse.types import JsonDict
 from synapse.util import ExceptionBundle
 
+if TYPE_CHECKING:
+    from synapse.app.homeserver import HomeServer
+
 logger = logging.getLogger(__name__)
 
 
 class TransportLayerClient:
     """Sends federation HTTP requests to other servers"""
 
-    def __init__(self, hs):
+    def __init__(self, hs: "HomeServer"):
         self.server_name = hs.hostname
         self.client = hs.get_federation_http_client()
         self._faster_joins_enabled = hs.config.experimental.faster_joins_enabled
@@ -133,7 +137,7 @@ class TransportLayerClient:
 
     async def backfill(
         self, destination: str, room_id: str, event_tuples: Collection[str], limit: int
-    ) -> Optional[JsonDict]:
+    ) -> Optional[Union[JsonDict, list]]:
         """Requests `limit` previous PDUs in a given context before list of
         PDUs.
 
@@ -388,6 +392,7 @@ class TransportLayerClient:
             # server was just having a momentary blip, the room will be out of
             # sync.
             ignore_backoff=True,
+            parser=LegacyJsonSendParser(),
         )
 
     async def send_leave_v2(
@@ -445,7 +450,11 @@ class TransportLayerClient:
         path = _create_v1_path("/invite/%s/%s", room_id, event_id)
 
         return await self.client.put_json(
-            destination=destination, path=path, data=content, ignore_backoff=True
+            destination=destination,
+            path=path,
+            data=content,
+            ignore_backoff=True,
+            parser=LegacyJsonSendParser(),
         )
 
     async def send_invite_v2(
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 3302d4e48a..634882487c 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -17,7 +17,6 @@ import codecs
 import logging
 import random
 import sys
-import typing
 import urllib.parse
 from http import HTTPStatus
 from io import BytesIO, StringIO
@@ -30,9 +29,11 @@ from typing import (
     Generic,
     List,
     Optional,
+    TextIO,
     Tuple,
     TypeVar,
     Union,
+    cast,
     overload,
 )
 
@@ -183,20 +184,61 @@ class MatrixFederationRequest:
         return self.json
 
 
-class JsonParser(ByteParser[Union[JsonDict, list]]):
+class _BaseJsonParser(ByteParser[T]):
     """A parser that buffers the response and tries to parse it as JSON."""
 
     CONTENT_TYPE = "application/json"
 
-    def __init__(self) -> None:
+    def __init__(
+        self, validator: Optional[Callable[[Optional[object]], bool]] = None
+    ) -> None:
+        """
+        Args:
+            validator: A callable which takes the parsed JSON value and returns
+                true if the value is valid.
+        """
         self._buffer = StringIO()
         self._binary_wrapper = BinaryIOWrapper(self._buffer)
+        self._validator = validator
 
     def write(self, data: bytes) -> int:
         return self._binary_wrapper.write(data)
 
-    def finish(self) -> Union[JsonDict, list]:
-        return json_decoder.decode(self._buffer.getvalue())
+    def finish(self) -> T:
+        result = json_decoder.decode(self._buffer.getvalue())
+        if self._validator is not None and not self._validator(result):
+            raise ValueError(
+                f"Received incorrect JSON value: {result.__class__.__name__}"
+            )
+        return result
+
+
+class JsonParser(_BaseJsonParser[JsonDict]):
+    """A parser that buffers the response and tries to parse it as a JSON object."""
+
+    def __init__(self) -> None:
+        super().__init__(self._validate)
+
+    @staticmethod
+    def _validate(v: Any) -> bool:
+        return isinstance(v, dict)
+
+
+class LegacyJsonSendParser(_BaseJsonParser[Tuple[int, JsonDict]]):
+    """Ensure the legacy responses of /send_join & /send_leave are correct."""
+
+    def __init__(self) -> None:
+        super().__init__(self._validate)
+
+    @staticmethod
+    def _validate(v: Any) -> bool:
+        # Match [integer, JSON dict]
+        return (
+            isinstance(v, list)
+            and len(v) == 2
+            and type(v[0]) == int
+            and isinstance(v[1], dict)
+        )
 
 
 async def _handle_response(
@@ -313,9 +355,7 @@ async def _handle_response(
 class BinaryIOWrapper:
     """A wrapper for a TextIO which converts from bytes on the fly."""
 
-    def __init__(
-        self, file: typing.TextIO, encoding: str = "utf-8", errors: str = "strict"
-    ):
+    def __init__(self, file: TextIO, encoding: str = "utf-8", errors: str = "strict"):
         self.decoder = codecs.getincrementaldecoder(encoding)(errors)
         self.file = file
 
@@ -793,7 +833,7 @@ class MatrixFederationHttpClient:
         backoff_on_404: bool = False,
         try_trailing_slash_on_400: bool = False,
         parser: Literal[None] = None,
-    ) -> Union[JsonDict, list]:
+    ) -> JsonDict:
         ...
 
     @overload
@@ -825,8 +865,8 @@ class MatrixFederationHttpClient:
         ignore_backoff: bool = False,
         backoff_on_404: bool = False,
         try_trailing_slash_on_400: bool = False,
-        parser: Optional[ByteParser] = None,
-    ):
+        parser: Optional[ByteParser[T]] = None,
+    ) -> Union[JsonDict, T]:
         """Sends the specified json data using PUT
 
         Args:
@@ -902,7 +942,7 @@ class MatrixFederationHttpClient:
             _sec_timeout = self.default_timeout
 
         if parser is None:
-            parser = JsonParser()
+            parser = cast(ByteParser[T], JsonParser())
 
         body = await _handle_response(
             self.reactor,
@@ -924,7 +964,7 @@ class MatrixFederationHttpClient:
         timeout: Optional[int] = None,
         ignore_backoff: bool = False,
         args: Optional[QueryParams] = None,
-    ) -> Union[JsonDict, list]:
+    ) -> JsonDict:
         """Sends the specified json data using POST
 
         Args:
@@ -998,7 +1038,7 @@ class MatrixFederationHttpClient:
         ignore_backoff: bool = False,
         try_trailing_slash_on_400: bool = False,
         parser: Literal[None] = None,
-    ) -> Union[JsonDict, list]:
+    ) -> JsonDict:
         ...
 
     @overload
@@ -1024,8 +1064,8 @@ class MatrixFederationHttpClient:
         timeout: Optional[int] = None,
         ignore_backoff: bool = False,
         try_trailing_slash_on_400: bool = False,
-        parser: Optional[ByteParser] = None,
-    ):
+        parser: Optional[ByteParser[T]] = None,
+    ) -> Union[JsonDict, T]:
         """GETs some json from the given host homeserver and path
 
         Args:
@@ -1091,7 +1131,7 @@ class MatrixFederationHttpClient:
             _sec_timeout = self.default_timeout
 
         if parser is None:
-            parser = JsonParser()
+            parser = cast(ByteParser[T], JsonParser())
 
         body = await _handle_response(
             self.reactor,
@@ -1112,7 +1152,7 @@ class MatrixFederationHttpClient:
         timeout: Optional[int] = None,
         ignore_backoff: bool = False,
         args: Optional[QueryParams] = None,
-    ) -> Union[JsonDict, list]:
+    ) -> JsonDict:
         """Send a DELETE request to the remote expecting some json response
 
         Args:
diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py
index 33af8770fd..129d7cfd93 100644
--- a/tests/federation/test_complexity.py
+++ b/tests/federation/test_complexity.py
@@ -75,7 +75,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = Mock(return_value=make_awaitable({"v1": 9999}))
+        fed_transport.client.get_json = Mock(return_value=make_awaitable({"v1": 9999}))  # type: ignore[assignment]
         handler.federation_handler.do_invite_join = Mock(  # type: ignore[assignment]
             return_value=make_awaitable(("", 1))
         )
@@ -106,7 +106,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = Mock(return_value=make_awaitable({"v1": 9999}))
+        fed_transport.client.get_json = Mock(return_value=make_awaitable({"v1": 9999}))  # type: ignore[assignment]
         handler.federation_handler.do_invite_join = Mock(  # type: ignore[assignment]
             return_value=make_awaitable(("", 1))
         )
@@ -143,7 +143,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = Mock(return_value=make_awaitable(None))
+        fed_transport.client.get_json = Mock(return_value=make_awaitable(None))  # type: ignore[assignment]
         handler.federation_handler.do_invite_join = Mock(  # type: ignore[assignment]
             return_value=make_awaitable(("", 1))
         )
@@ -200,7 +200,7 @@ class RoomComplexityAdminTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = Mock(return_value=make_awaitable({"v1": 9999}))
+        fed_transport.client.get_json = Mock(return_value=make_awaitable({"v1": 9999}))  # type: ignore[assignment]
         handler.federation_handler.do_invite_join = Mock(  # type: ignore[assignment]
             return_value=make_awaitable(("", 1))
         )
@@ -230,7 +230,7 @@ class RoomComplexityAdminTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = Mock(return_value=make_awaitable({"v1": 9999}))
+        fed_transport.client.get_json = Mock(return_value=make_awaitable({"v1": 9999}))  # type: ignore[assignment]
         handler.federation_handler.do_invite_join = Mock(  # type: ignore[assignment]
             return_value=make_awaitable(("", 1))
         )
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index fdd22a8e94..d89a91c59d 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -26,7 +26,7 @@ from twisted.web.http import HTTPChannel
 
 from synapse.api.errors import RequestSendFailed
 from synapse.http.matrixfederationclient import (
-    JsonParser,
+    ByteParser,
     MatrixFederationHttpClient,
     MatrixFederationRequest,
 )
@@ -618,9 +618,9 @@ class FederationClientTests(HomeserverTestCase):
         while not test_d.called:
             protocol.dataReceived(b"a" * chunk_size)
             sent += chunk_size
-            self.assertLessEqual(sent, JsonParser.MAX_RESPONSE_SIZE)
+            self.assertLessEqual(sent, ByteParser.MAX_RESPONSE_SIZE)
 
-        self.assertEqual(sent, JsonParser.MAX_RESPONSE_SIZE)
+        self.assertEqual(sent, ByteParser.MAX_RESPONSE_SIZE)
 
         f = self.failureResultOf(test_d)
         self.assertIsInstance(f.value, RequestSendFailed)
-- 
cgit 1.5.1


From 8b3a50299658a27175f55f1051e9470553c76d8e Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 25 Apr 2023 10:37:09 +0200
Subject: Experimental support for MSC3970: per-device transaction IDs (#15318)

---
 changelog.d/15318.feature                          |  1 +
 synapse/config/experimental.py                     |  3 +
 synapse/events/__init__.py                         |  9 ++-
 synapse/events/utils.py                            | 58 +++++++++++++-----
 synapse/handlers/message.py                        | 38 ++++++++++--
 synapse/handlers/room_member.py                    | 33 ++++++++---
 synapse/rest/client/transactions.py                | 13 +++++
 synapse/server.py                                  |  4 +-
 synapse/storage/databases/main/events.py           | 68 +++++++++++++++++-----
 synapse/storage/databases/main/events_worker.py    | 33 +++++++++--
 .../main/delta/74/05_events_txn_id_device_id.sql   | 53 +++++++++++++++++
 11 files changed, 265 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/15318.feature
 create mode 100644 synapse/storage/schema/main/delta/74/05_events_txn_id_device_id.sql

(limited to 'synapse')

diff --git a/changelog.d/15318.feature b/changelog.d/15318.feature
new file mode 100644
index 0000000000..47bb2e17a7
--- /dev/null
+++ b/changelog.d/15318.feature
@@ -0,0 +1 @@
+Experimental support for MSC3970: Scope transaction IDs to devices.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 7687c80ea0..6599679731 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -191,3 +191,6 @@ class ExperimentalConfig(Config):
 
         # MSC2659: Application service ping endpoint
         self.msc2659_enabled = experimental.get("msc2659_enabled", False)
+
+        # MSC3970: Scope transaction IDs to devices
+        self.msc3970_enabled = experimental.get("msc3970_enabled", False)
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 4501518cf0..de7e5be42b 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -198,9 +198,16 @@ class _EventInternalMetadata:
     soft_failed: DictProperty[bool] = DictProperty("soft_failed")
     proactively_send: DictProperty[bool] = DictProperty("proactively_send")
     redacted: DictProperty[bool] = DictProperty("redacted")
+    historical: DictProperty[bool] = DictProperty("historical")
+
     txn_id: DictProperty[str] = DictProperty("txn_id")
+    """The transaction ID, if it was set when the event was created."""
+
     token_id: DictProperty[int] = DictProperty("token_id")
-    historical: DictProperty[bool] = DictProperty("historical")
+    """The access token ID of the user who sent this event, if any."""
+
+    device_id: DictProperty[str] = DictProperty("device_id")
+    """The device ID of the user who sent this event, if any."""
 
     # XXX: These are set by StreamWorkerStore._set_before_and_after.
     # I'm pretty sure that these are never persisted to the database, so shouldn't
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index 1d5d7491cd..0802eb1963 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -339,6 +339,7 @@ def serialize_event(
     time_now_ms: int,
     *,
     config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG,
+    msc3970_enabled: bool = False,
 ) -> JsonDict:
     """Serialize event for clients
 
@@ -346,6 +347,8 @@ def serialize_event(
         e
         time_now_ms
         config: Event serialization config
+        msc3970_enabled: Whether MSC3970 is enabled. It changes whether we should
+            include the `transaction_id` in the event's `unsigned` section.
 
     Returns:
         The serialized event dictionary.
@@ -368,27 +371,43 @@ def serialize_event(
 
     if "redacted_because" in e.unsigned:
         d["unsigned"]["redacted_because"] = serialize_event(
-            e.unsigned["redacted_because"], time_now_ms, config=config
+            e.unsigned["redacted_because"],
+            time_now_ms,
+            config=config,
+            msc3970_enabled=msc3970_enabled,
         )
 
     # If we have a txn_id saved in the internal_metadata, we should include it in the
     # unsigned section of the event if it was sent by the same session as the one
     # requesting the event.
-    # There is a special case for guests, because they only have one access token
-    # without associated access_token_id, so we always include the txn_id for events
-    # they sent.
-    txn_id = getattr(e.internal_metadata, "txn_id", None)
+    txn_id: Optional[str] = getattr(e.internal_metadata, "txn_id", None)
     if txn_id is not None and config.requester is not None:
-        event_token_id = getattr(e.internal_metadata, "token_id", None)
-        if config.requester.user.to_string() == e.sender and (
-            (
-                event_token_id is not None
-                and config.requester.access_token_id is not None
-                and event_token_id == config.requester.access_token_id
+        # For the MSC3970 rules to be applied, we *need* to have the device ID in the
+        # event internal metadata. Since we were not recording them before, if it hasn't
+        # been recorded, we fallback to the old behaviour.
+        event_device_id: Optional[str] = getattr(e.internal_metadata, "device_id", None)
+        if msc3970_enabled and event_device_id is not None:
+            if event_device_id == config.requester.device_id:
+                d["unsigned"]["transaction_id"] = txn_id
+
+        else:
+            # The pre-MSC3970 behaviour is to only include the transaction ID if the
+            # event was sent from the same access token. For regular users, we can use
+            # the access token ID to determine this. For guests, we can't, but since
+            # each guest only has one access token, we can just check that the event was
+            # sent by the same user as the one requesting the event.
+            event_token_id: Optional[int] = getattr(
+                e.internal_metadata, "token_id", None
             )
-            or config.requester.is_guest
-        ):
-            d["unsigned"]["transaction_id"] = txn_id
+            if config.requester.user.to_string() == e.sender and (
+                (
+                    event_token_id is not None
+                    and config.requester.access_token_id is not None
+                    and event_token_id == config.requester.access_token_id
+                )
+                or config.requester.is_guest
+            ):
+                d["unsigned"]["transaction_id"] = txn_id
 
     # invite_room_state and knock_room_state are a list of stripped room state events
     # that are meant to provide metadata about a room to an invitee/knocker. They are
@@ -419,6 +438,9 @@ class EventClientSerializer:
     clients.
     """
 
+    def __init__(self, *, msc3970_enabled: bool = False):
+        self._msc3970_enabled = msc3970_enabled
+
     def serialize_event(
         self,
         event: Union[JsonDict, EventBase],
@@ -443,7 +465,9 @@ class EventClientSerializer:
         if not isinstance(event, EventBase):
             return event
 
-        serialized_event = serialize_event(event, time_now, config=config)
+        serialized_event = serialize_event(
+            event, time_now, config=config, msc3970_enabled=self._msc3970_enabled
+        )
 
         # Check if there are any bundled aggregations to include with the event.
         if bundle_aggregations:
@@ -501,7 +525,9 @@ class EventClientSerializer:
             # `sender` of the edit; however MSC3925 proposes extending it to the whole
             # of the edit, which is what we do here.
             serialized_aggregations[RelationTypes.REPLACE] = self.serialize_event(
-                event_aggregations.replace, time_now, config=config
+                event_aggregations.replace,
+                time_now,
+                config=config,
             )
 
         # Include any threaded replies to this event.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 2e964ed37e..ac1932a7f9 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -561,6 +561,8 @@ class EventCreationHandler:
                 expiry_ms=30 * 60 * 1000,
             )
 
+        self._msc3970_enabled = hs.config.experimental.msc3970_enabled
+
     async def create_event(
         self,
         requester: Requester,
@@ -701,9 +703,16 @@ class EventCreationHandler:
         if require_consent and not is_exempt:
             await self.assert_accepted_privacy_policy(requester)
 
+        # Save the access token ID, the device ID and the transaction ID in the event
+        # internal metadata. This is useful to determine if we should echo the
+        # transaction_id in events.
+        # See `synapse.events.utils.EventClientSerializer.serialize_event`
         if requester.access_token_id is not None:
             builder.internal_metadata.token_id = requester.access_token_id
 
+        if requester.device_id is not None:
+            builder.internal_metadata.device_id = requester.device_id
+
         if txn_id is not None:
             builder.internal_metadata.txn_id = txn_id
 
@@ -897,12 +906,31 @@ class EventCreationHandler:
         Returns:
             An event if one could be found, None otherwise.
         """
+
+        if self._msc3970_enabled and requester.device_id:
+            # When MSC3970 is enabled, we lookup for events sent by the same device first,
+            # and fallback to the old behaviour if none were found.
+            existing_event_id = (
+                await self.store.get_event_id_from_transaction_id_and_device_id(
+                    room_id,
+                    requester.user.to_string(),
+                    requester.device_id,
+                    txn_id,
+                )
+            )
+            if existing_event_id:
+                return await self.store.get_event(existing_event_id)
+
+        # Pre-MSC3970, we looked up for events that were sent by the same session by
+        # using the access token ID.
         if requester.access_token_id:
-            existing_event_id = await self.store.get_event_id_from_transaction_id(
-                room_id,
-                requester.user.to_string(),
-                requester.access_token_id,
-                txn_id,
+            existing_event_id = (
+                await self.store.get_event_id_from_transaction_id_and_token_id(
+                    room_id,
+                    requester.user.to_string(),
+                    requester.access_token_id,
+                    txn_id,
+                )
             )
             if existing_event_id:
                 return await self.store.get_event(existing_event_id)
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index ec317e6023..ed805d6ec8 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -169,6 +169,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self.request_ratelimiter = hs.get_request_ratelimiter()
         hs.get_notifier().add_new_join_in_room_callback(self._on_user_joined_room)
 
+        self._msc3970_enabled = hs.config.experimental.msc3970_enabled
+
     def _on_user_joined_room(self, event_id: str, room_id: str) -> None:
         """Notify the rate limiter that a room join has occurred.
 
@@ -399,13 +401,30 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # Check if we already have an event with a matching transaction ID. (We
         # do this check just before we persist an event as well, but may as well
         # do it up front for efficiency.)
-        if txn_id and requester.access_token_id:
-            existing_event_id = await self.store.get_event_id_from_transaction_id(
-                room_id,
-                requester.user.to_string(),
-                requester.access_token_id,
-                txn_id,
-            )
+        if txn_id:
+            existing_event_id = None
+            if self._msc3970_enabled and requester.device_id:
+                # When MSC3970 is enabled, we lookup for events sent by the same device
+                # first, and fallback to the old behaviour if none were found.
+                existing_event_id = (
+                    await self.store.get_event_id_from_transaction_id_and_device_id(
+                        room_id,
+                        requester.user.to_string(),
+                        requester.device_id,
+                        txn_id,
+                    )
+                )
+
+            if requester.access_token_id and not existing_event_id:
+                existing_event_id = (
+                    await self.store.get_event_id_from_transaction_id_and_token_id(
+                        room_id,
+                        requester.user.to_string(),
+                        requester.access_token_id,
+                        txn_id,
+                    )
+                )
+
             if existing_event_id:
                 event_pos = await self.store.get_position_for_event(existing_event_id)
                 return existing_event_id, event_pos.stream
diff --git a/synapse/rest/client/transactions.py b/synapse/rest/client/transactions.py
index f2aaab6227..0d8a63d8be 100644
--- a/synapse/rest/client/transactions.py
+++ b/synapse/rest/client/transactions.py
@@ -50,6 +50,8 @@ class HttpTransactionCache:
         # for at *LEAST* 30 mins, and at *MOST* 60 mins.
         self.cleaner = self.clock.looping_call(self._cleanup, CLEANUP_PERIOD_MS)
 
+        self._msc3970_enabled = hs.config.experimental.msc3970_enabled
+
     def _get_transaction_key(self, request: IRequest, requester: Requester) -> Hashable:
         """A helper function which returns a transaction key that can be used
         with TransactionCache for idempotent requests.
@@ -58,6 +60,7 @@ class HttpTransactionCache:
         requests to the same endpoint. The key is formed from the HTTP request
         path and attributes from the requester: the access_token_id for regular users,
         the user ID for guest users, and the appservice ID for appservice users.
+        With MSC3970, for regular users, the key is based on the user ID and device ID.
 
         Args:
             request: The incoming request.
@@ -67,11 +70,21 @@ class HttpTransactionCache:
         """
         assert request.path is not None
         path: str = request.path.decode("utf8")
+
         if requester.is_guest:
             assert requester.user is not None, "Guest requester must have a user ID set"
             return (path, "guest", requester.user)
+
         elif requester.app_service is not None:
             return (path, "appservice", requester.app_service.id)
+
+        # With MSC3970, we use the user ID and device ID as the transaction key
+        elif self._msc3970_enabled:
+            assert requester.user, "Requester must have a user"
+            assert requester.device_id, "Requester must have a device_id"
+            return (path, "user", requester.user, requester.device_id)
+
+        # Otherwise, the pre-MSC3970 behaviour is to use the access token ID
         else:
             assert (
                 requester.access_token_id is not None
diff --git a/synapse/server.py b/synapse/server.py
index 559724594b..08ad97b952 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -762,7 +762,9 @@ class HomeServer(metaclass=abc.ABCMeta):
 
     @cache_in_self
     def get_event_client_serializer(self) -> EventClientSerializer:
-        return EventClientSerializer()
+        return EventClientSerializer(
+            msc3970_enabled=self.config.experimental.msc3970_enabled
+        )
 
     @cache_in_self
     def get_password_policy_handler(self) -> PasswordPolicyHandler:
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 9c1e506da6..c229de48c8 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -127,6 +127,8 @@ class PersistEventsStore:
         self._backfill_id_gen: AbstractStreamIdGenerator = self.store._backfill_id_gen
         self._stream_id_gen: AbstractStreamIdGenerator = self.store._stream_id_gen
 
+        self._msc3970_enabled = hs.config.experimental.msc3970_enabled
+
     @trace
     async def _persist_events_and_state_updates(
         self,
@@ -977,23 +979,43 @@ class PersistEventsStore:
     ) -> None:
         """Persist the mapping from transaction IDs to event IDs (if defined)."""
 
-        to_insert = []
+        inserted_ts = self._clock.time_msec()
+        to_insert_token_id: List[Tuple[str, str, str, int, str, int]] = []
+        to_insert_device_id: List[Tuple[str, str, str, str, str, int]] = []
         for event, _ in events_and_contexts:
-            token_id = getattr(event.internal_metadata, "token_id", None)
             txn_id = getattr(event.internal_metadata, "txn_id", None)
-            if token_id and txn_id:
-                to_insert.append(
-                    (
-                        event.event_id,
-                        event.room_id,
-                        event.sender,
-                        token_id,
-                        txn_id,
-                        self._clock.time_msec(),
+            token_id = getattr(event.internal_metadata, "token_id", None)
+            device_id = getattr(event.internal_metadata, "device_id", None)
+
+            if txn_id is not None:
+                if token_id is not None:
+                    to_insert_token_id.append(
+                        (
+                            event.event_id,
+                            event.room_id,
+                            event.sender,
+                            token_id,
+                            txn_id,
+                            inserted_ts,
+                        )
                     )
-                )
 
-        if to_insert:
+                if device_id is not None:
+                    to_insert_device_id.append(
+                        (
+                            event.event_id,
+                            event.room_id,
+                            event.sender,
+                            device_id,
+                            txn_id,
+                            inserted_ts,
+                        )
+                    )
+
+        # Pre-MSC3970, we rely on the access_token_id to scope the txn_id for events.
+        # Since this is an experimental flag, we still store the mapping even if the
+        # flag is disabled.
+        if to_insert_token_id:
             self.db_pool.simple_insert_many_txn(
                 txn,
                 table="event_txn_id",
@@ -1005,7 +1027,25 @@ class PersistEventsStore:
                     "txn_id",
                     "inserted_ts",
                 ),
-                values=to_insert,
+                values=to_insert_token_id,
+            )
+
+        # With MSC3970, we rely on the device_id instead to scope the txn_id for events.
+        # We're only inserting if MSC3970 is *enabled*, because else the pre-MSC3970
+        # behaviour would allow for a UNIQUE constraint violation on this table
+        if to_insert_device_id and self._msc3970_enabled:
+            self.db_pool.simple_insert_many_txn(
+                txn,
+                table="event_txn_id_device_id",
+                keys=(
+                    "event_id",
+                    "room_id",
+                    "user_id",
+                    "device_id",
+                    "txn_id",
+                    "inserted_ts",
+                ),
+                values=to_insert_device_id,
             )
 
     async def update_current_state(
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 0cf46626d2..0ff3fc7369 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -2022,7 +2022,7 @@ class EventsWorkerStore(SQLBaseStore):
             desc="get_next_event_to_expire", func=get_next_event_to_expire_txn
         )
 
-    async def get_event_id_from_transaction_id(
+    async def get_event_id_from_transaction_id_and_token_id(
         self, room_id: str, user_id: str, token_id: int, txn_id: str
     ) -> Optional[str]:
         """Look up if we have already persisted an event for the transaction ID,
@@ -2038,7 +2038,26 @@ class EventsWorkerStore(SQLBaseStore):
             },
             retcol="event_id",
             allow_none=True,
-            desc="get_event_id_from_transaction_id",
+            desc="get_event_id_from_transaction_id_and_token_id",
+        )
+
+    async def get_event_id_from_transaction_id_and_device_id(
+        self, room_id: str, user_id: str, device_id: str, txn_id: str
+    ) -> Optional[str]:
+        """Look up if we have already persisted an event for the transaction ID,
+        returning the event ID if so.
+        """
+        return await self.db_pool.simple_select_one_onecol(
+            table="event_txn_id_device_id",
+            keyvalues={
+                "room_id": room_id,
+                "user_id": user_id,
+                "device_id": device_id,
+                "txn_id": txn_id,
+            },
+            retcol="event_id",
+            allow_none=True,
+            desc="get_event_id_from_transaction_id_and_device_id",
         )
 
     async def get_already_persisted_events(
@@ -2068,7 +2087,7 @@ class EventsWorkerStore(SQLBaseStore):
 
                 # Check if this is a duplicate of an event we've already
                 # persisted.
-                existing = await self.get_event_id_from_transaction_id(
+                existing = await self.get_event_id_from_transaction_id_and_token_id(
                     event.room_id, event.sender, token_id, txn_id
                 )
                 if existing:
@@ -2084,11 +2103,17 @@ class EventsWorkerStore(SQLBaseStore):
         """Cleans out transaction id mappings older than 24hrs."""
 
         def _cleanup_old_transaction_ids_txn(txn: LoggingTransaction) -> None:
+            one_day_ago = self._clock.time_msec() - 24 * 60 * 60 * 1000
             sql = """
                 DELETE FROM event_txn_id
                 WHERE inserted_ts < ?
             """
-            one_day_ago = self._clock.time_msec() - 24 * 60 * 60 * 1000
+            txn.execute(sql, (one_day_ago,))
+
+            sql = """
+                DELETE FROM event_txn_id_device_id
+                WHERE inserted_ts < ?
+            """
             txn.execute(sql, (one_day_ago,))
 
         return await self.db_pool.runInteraction(
diff --git a/synapse/storage/schema/main/delta/74/05_events_txn_id_device_id.sql b/synapse/storage/schema/main/delta/74/05_events_txn_id_device_id.sql
new file mode 100644
index 0000000000..517a821a56
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/05_events_txn_id_device_id.sql
@@ -0,0 +1,53 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- For MSC3970, in addition to the (room_id, user_id, token_id, txn_id) -> event_id mapping for each local event,
+-- we also store the (room_id, user_id, device_id, txn_id) -> event_id mapping.
+--
+-- This adds a new event_txn_id_device_id table.
+
+-- A map of recent events persisted with transaction IDs. Used to deduplicate
+-- send event requests with the same transaction ID.
+--
+-- Note: with MSC3970, transaction IDs are scoped to the 
+-- room ID/user ID/device ID that was used to make the request.
+--
+-- Note: The foreign key constraints are ON DELETE CASCADE, as if we delete the
+-- event or device we don't want to try and de-duplicate the event.
+CREATE TABLE IF NOT EXISTS event_txn_id_device_id (
+    event_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    device_id TEXT NOT NULL,
+    txn_id TEXT NOT NULL,
+    inserted_ts BIGINT NOT NULL,
+    FOREIGN KEY (event_id)
+        REFERENCES events (event_id) ON DELETE CASCADE,
+    FOREIGN KEY (user_id, device_id)
+        REFERENCES devices (user_id, device_id) ON DELETE CASCADE
+);
+
+-- This ensures that there is only one mapping per event_id.
+CREATE UNIQUE INDEX IF NOT EXISTS event_txn_id_device_id_event_id
+    ON event_txn_id_device_id(event_id);
+
+-- This ensures that there is only one mapping per (room_id, user_id, device_id, txn_id) tuple.
+-- Events are usually looked up using this index.
+CREATE UNIQUE INDEX IF NOT EXISTS event_txn_id_device_id_txn_id 
+    ON event_txn_id_device_id(room_id, user_id, device_id, txn_id);
+
+-- This table is cleaned up regularly, removing the oldest entries, hence this index.
+CREATE INDEX IF NOT EXISTS event_txn_id_device_id_ts
+    ON event_txn_id_device_id(inserted_ts);
-- 
cgit 1.5.1


From c55293c2306ce72b168203f3685ace48300b9a76 Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Tue, 25 Apr 2023 09:44:29 +0100
Subject: Re re introduce membership tables event stream ordering (#15356)

---
 changelog.d/15356.misc                             |  1 +
 synapse/storage/databases/main/events.py           | 23 +++++--
 synapse/storage/databases/main/purge_events.py     |  6 +-
 synapse/storage/schema/__init__.py                 | 14 ++--
 ...rship_tables_event_stream_ordering.sql.postgres | 29 ++++++++
 ...bership_tables_event_stream_ordering.sql.sqlite | 23 +++++++
 ...ership_tables_event_stream_ordering_triggers.py | 79 ++++++++++++++++++++++
 7 files changed, 163 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15356.misc
 create mode 100644 synapse/storage/schema/main/delta/74/03_membership_tables_event_stream_ordering.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/74/03_membership_tables_event_stream_ordering.sql.sqlite
 create mode 100644 synapse/storage/schema/main/delta/74/04_membership_tables_event_stream_ordering_triggers.py

(limited to 'synapse')

diff --git a/changelog.d/15356.misc b/changelog.d/15356.misc
new file mode 100644
index 0000000000..c09911e48d
--- /dev/null
+++ b/changelog.d/15356.misc
@@ -0,0 +1 @@
+Add denormalised event stream ordering column to membership state tables for future use. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index c229de48c8..e2e6eb479f 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1167,11 +1167,15 @@ class PersistEventsStore:
                 # been inserted into room_memberships.
                 txn.execute_batch(
                     """INSERT INTO current_state_events
-                        (room_id, type, state_key, event_id, membership)
-                    VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
+                        (room_id, type, state_key, event_id, membership, event_stream_ordering)
+                    VALUES (
+                        ?, ?, ?, ?,
+                        (SELECT membership FROM room_memberships WHERE event_id = ?),
+                        (SELECT stream_ordering FROM events WHERE event_id = ?)
+                    )
                     """,
                     [
-                        (room_id, key[0], key[1], ev_id, ev_id)
+                        (room_id, key[0], key[1], ev_id, ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                     ],
                 )
@@ -1198,11 +1202,15 @@ class PersistEventsStore:
             if to_insert:
                 txn.execute_batch(
                     """INSERT INTO local_current_membership
-                        (room_id, user_id, event_id, membership)
-                    VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
+                        (room_id, user_id, event_id, membership, event_stream_ordering)
+                    VALUES (
+                        ?, ?, ?,
+                        (SELECT membership FROM room_memberships WHERE event_id = ?),
+                        (SELECT stream_ordering FROM events WHERE event_id = ?)
+                    )
                     """,
                     [
-                        (room_id, key[1], ev_id, ev_id)
+                        (room_id, key[1], ev_id, ev_id, ev_id)
                         for key, ev_id in to_insert.items()
                         if key[0] == EventTypes.Member and self.is_mine_id(key[1])
                     ],
@@ -1808,6 +1816,7 @@ class PersistEventsStore:
             table="room_memberships",
             keys=(
                 "event_id",
+                "event_stream_ordering",
                 "user_id",
                 "sender",
                 "room_id",
@@ -1818,6 +1827,7 @@ class PersistEventsStore:
             values=[
                 (
                     event.event_id,
+                    event.internal_metadata.stream_ordering,
                     event.state_key,
                     event.user_id,
                     event.room_id,
@@ -1850,6 +1860,7 @@ class PersistEventsStore:
                     keyvalues={"room_id": event.room_id, "user_id": event.state_key},
                     values={
                         "event_id": event.event_id,
+                        "event_stream_ordering": event.internal_metadata.stream_ordering,
                         "membership": event.membership,
                     },
                 )
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 7a7c0d9c75..efbd3e75d9 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -428,14 +428,16 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "partial_state_events",
             "partial_state_rooms_servers",
             "partial_state_rooms",
+            # Note: the _membership(s) tables have foreign keys to the `events` table
+            # so must be deleted first.
+            "local_current_membership",
+            "room_memberships",
             "events",
             "federation_inbound_events_staging",
-            "local_current_membership",
             "receipts_graph",
             "receipts_linearized",
             "room_aliases",
             "room_depth",
-            "room_memberships",
             "room_stats_state",
             "room_stats_current",
             "room_stats_earliest_token",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index d3103a6c7a..a28f2b997c 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 74  # remember to update the list below when updating
+SCHEMA_VERSION = 75  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -91,13 +91,19 @@ Changes in SCHEMA_VERSION = 74:
     - A query on `event_stream_ordering` column has now been disambiguated (i.e. the
       codebase can handle the `current_state_events`, `local_current_memberships` and
       `room_memberships` tables having an `event_stream_ordering` column).
+
+Changes in SCHEMA_VERSION = 75:
+    - The `event_stream_ordering` column in membership tables (`current_state_events`,
+      `local_current_membership` & `room_memberships`) is now being populated for new
+      rows. When the background job to populate historical rows lands this will
+      become the compat schema version.
 """
 
 
 SCHEMA_COMPAT_VERSION = (
-    # The threads_id column must exist for event_push_actions, event_push_summary,
-    # receipts_linearized, and receipts_graph.
-    73
+    # Queries against `event_stream_ordering` columns in membership tables must
+    # be disambiguated.
+    74
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/74/03_membership_tables_event_stream_ordering.sql.postgres b/synapse/storage/schema/main/delta/74/03_membership_tables_event_stream_ordering.sql.postgres
new file mode 100644
index 0000000000..ceb750a9fa
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/03_membership_tables_event_stream_ordering.sql.postgres
@@ -0,0 +1,29 @@
+/* Copyright 2022 Beeper
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Each of these are denormalised copies of `stream_ordering` from the corresponding row in` events` which
+-- we use to improve database performance by reduring JOINs.
+
+-- NOTE: these are set to NOT VALID to prevent locks while adding the column on large existing tables,
+-- which will be validated in a later migration. For all new/updated rows the FKEY will be checked.
+
+ALTER TABLE current_state_events ADD COLUMN event_stream_ordering BIGINT;
+ALTER TABLE current_state_events ADD CONSTRAINT event_stream_ordering_fkey FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering) NOT VALID;
+
+ALTER TABLE local_current_membership ADD COLUMN event_stream_ordering BIGINT;
+ALTER TABLE local_current_membership ADD CONSTRAINT event_stream_ordering_fkey FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering) NOT VALID;
+
+ALTER TABLE room_memberships ADD COLUMN event_stream_ordering BIGINT;
+ALTER TABLE room_memberships ADD CONSTRAINT event_stream_ordering_fkey FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering) NOT VALID;
diff --git a/synapse/storage/schema/main/delta/74/03_membership_tables_event_stream_ordering.sql.sqlite b/synapse/storage/schema/main/delta/74/03_membership_tables_event_stream_ordering.sql.sqlite
new file mode 100644
index 0000000000..6f6283fdb7
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/03_membership_tables_event_stream_ordering.sql.sqlite
@@ -0,0 +1,23 @@
+/* Copyright 2022 Beeper
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Each of these are denormalised copies of `stream_ordering` from the corresponding row in` events` which
+-- we use to improve database performance by reduring JOINs.
+
+-- NOTE: sqlite does not support ADD CONSTRAINT so we add the new columns with FK constraint as-is
+
+ALTER TABLE current_state_events ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
+ALTER TABLE local_current_membership ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
+ALTER TABLE room_memberships ADD COLUMN event_stream_ordering BIGINT REFERENCES events(stream_ordering);
diff --git a/synapse/storage/schema/main/delta/74/04_membership_tables_event_stream_ordering_triggers.py b/synapse/storage/schema/main/delta/74/04_membership_tables_event_stream_ordering_triggers.py
new file mode 100644
index 0000000000..e32e9083b3
--- /dev/null
+++ b/synapse/storage/schema/main/delta/74/04_membership_tables_event_stream_ordering_triggers.py
@@ -0,0 +1,79 @@
+# Copyright 2022 Beeper
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This migration adds triggers to the room membership tables to enforce consistency.
+Triggers cannot be expressed in .sql files, so we have to use a separate file.
+"""
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
+from synapse.storage.types import Cursor
+
+
+def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+    # Complain if the `event_stream_ordering` in membership tables doesn't match
+    # the `stream_ordering` row with the same `event_id` in `events`.
+    if isinstance(database_engine, Sqlite3Engine):
+        for table in (
+            "current_state_events",
+            "local_current_membership",
+            "room_memberships",
+        ):
+            cur.execute(
+                f"""
+                CREATE TRIGGER IF NOT EXISTS {table}_bad_event_stream_ordering
+                BEFORE INSERT ON {table}
+                FOR EACH ROW
+                BEGIN
+                    SELECT RAISE(ABORT, 'Incorrect event_stream_ordering in {table}')
+                    WHERE EXISTS (
+                        SELECT 1 FROM events
+                        WHERE events.event_id = NEW.event_id
+                           AND events.stream_ordering != NEW.event_stream_ordering
+                    );
+                END;
+                """
+            )
+    elif isinstance(database_engine, PostgresEngine):
+        cur.execute(
+            """
+            CREATE OR REPLACE FUNCTION check_event_stream_ordering() RETURNS trigger AS $BODY$
+            BEGIN
+                IF EXISTS (
+                    SELECT 1 FROM events
+                    WHERE events.event_id = NEW.event_id
+                       AND events.stream_ordering != NEW.event_stream_ordering
+                ) THEN
+                    RAISE EXCEPTION 'Incorrect event_stream_ordering';
+                END IF;
+                RETURN NEW;
+            END;
+            $BODY$ LANGUAGE plpgsql;
+            """
+        )
+
+        for table in (
+            "current_state_events",
+            "local_current_membership",
+            "room_memberships",
+        ):
+            cur.execute(
+                f"""
+                CREATE TRIGGER check_event_stream_ordering BEFORE INSERT OR UPDATE ON {table}
+                FOR EACH ROW
+                EXECUTE PROCEDURE check_event_stream_ordering()
+                """
+            )
+    else:
+        raise NotImplementedError("Unknown database engine")
-- 
cgit 1.5.1


From 8e9739449dd6d3c133adf9e995d27d06518a0bcf Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 25 Apr 2023 13:30:41 -0400
Subject: Add unstable /keys/claim endpoint which always returns fallback keys.
 (#15462)

It can be useful to always return the fallback key when attempting to
claim keys. This adds an unstable endpoint for `/keys/claim` which
always returns fallback keys in addition to one-time-keys.

The fallback key(s) are not marked as "used" unless there are no
corresponding OTKs.

This is currently defined in MSC3983 (although likely to be split out
to a separate MSC). The endpoint shape may change or be requested
differently (i.e. a keyword parameter on the current endpoint), but the
core logic should be reasonable.
---
 changelog.d/15462.misc                            |   1 +
 synapse/federation/federation_server.py           |   6 +-
 synapse/federation/transport/server/__init__.py   |   6 +
 synapse/federation/transport/server/federation.py |  23 ++-
 synapse/handlers/appservice.py                    |  13 +-
 synapse/handlers/e2e_keys.py                      |  70 ++++++-
 synapse/rest/client/keys.py                       |  31 ++-
 synapse/storage/databases/main/end_to_end_keys.py |   9 +-
 tests/handlers/test_e2e_keys.py                   | 241 +++++++++++++++++++++-
 9 files changed, 371 insertions(+), 29 deletions(-)
 create mode 100644 changelog.d/15462.misc

(limited to 'synapse')

diff --git a/changelog.d/15462.misc b/changelog.d/15462.misc
new file mode 100644
index 0000000000..36e4bffbc8
--- /dev/null
+++ b/changelog.d/15462.misc
@@ -0,0 +1 @@
+Update support for [MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983) to allow always returning fallback-keys in a `/keys/claim` request.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index d7740eb3b4..c618f3d7a6 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -1005,7 +1005,7 @@ class FederationServer(FederationBase):
 
     @trace
     async def on_claim_client_keys(
-        self, origin: str, content: JsonDict
+        self, origin: str, content: JsonDict, always_include_fallback_keys: bool
     ) -> Dict[str, Any]:
         query = []
         for user_id, device_keys in content.get("one_time_keys", {}).items():
@@ -1013,7 +1013,9 @@ class FederationServer(FederationBase):
                 query.append((user_id, device_id, algorithm))
 
         log_kv({"message": "Claiming one time keys.", "user, device pairs": query})
-        results = await self._e2e_keys_handler.claim_local_one_time_keys(query)
+        results = await self._e2e_keys_handler.claim_local_one_time_keys(
+            query, always_include_fallback_keys=always_include_fallback_keys
+        )
 
         json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
         for result in results:
diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py
index 753372fc54..55d2cd0a9a 100644
--- a/synapse/federation/transport/server/__init__.py
+++ b/synapse/federation/transport/server/__init__.py
@@ -25,6 +25,7 @@ from synapse.federation.transport.server._base import (
 from synapse.federation.transport.server.federation import (
     FEDERATION_SERVLET_CLASSES,
     FederationAccountStatusServlet,
+    FederationUnstableClientKeysClaimServlet,
 )
 from synapse.http.server import HttpServer, JsonResource
 from synapse.http.servlet import (
@@ -298,6 +299,11 @@ def register_servlets(
                 and not hs.config.experimental.msc3720_enabled
             ):
                 continue
+            if (
+                servletclass == FederationUnstableClientKeysClaimServlet
+                and not hs.config.experimental.msc3983_appservice_otk_claims
+            ):
+                continue
 
             servletclass(
                 hs=hs,
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index ec5b5eeafa..e2340d70d5 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -577,7 +577,28 @@ class FederationClientKeysClaimServlet(BaseFederationServerServlet):
     async def on_POST(
         self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]]
     ) -> Tuple[int, JsonDict]:
-        response = await self.handler.on_claim_client_keys(origin, content)
+        response = await self.handler.on_claim_client_keys(
+            origin, content, always_include_fallback_keys=False
+        )
+        return 200, response
+
+
+class FederationUnstableClientKeysClaimServlet(BaseFederationServerServlet):
+    """
+    Identical to the stable endpoint (FederationClientKeysClaimServlet) except it
+    always includes fallback keys in the response.
+    """
+
+    PREFIX = FEDERATION_UNSTABLE_PREFIX
+    PATH = "/user/keys/claim"
+    CATEGORY = "Federation requests"
+
+    async def on_POST(
+        self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]]
+    ) -> Tuple[int, JsonDict]:
+        response = await self.handler.on_claim_client_keys(
+            origin, content, always_include_fallback_keys=True
+        )
         return 200, response
 
 
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index da887647d4..4ca2bc0420 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -842,9 +842,7 @@ class ApplicationServicesHandler:
 
     async def claim_e2e_one_time_keys(
         self, query: Iterable[Tuple[str, str, str]]
-    ) -> Tuple[
-        Iterable[Dict[str, Dict[str, Dict[str, JsonDict]]]], List[Tuple[str, str, str]]
-    ]:
+    ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]:
         """Claim one time keys from application services.
 
         Users which are exclusively owned by an application service are sent a
@@ -856,7 +854,7 @@ class ApplicationServicesHandler:
 
         Returns:
             A tuple of:
-                An iterable of maps of user ID -> a map device ID -> a map of key ID -> JSON bytes.
+                A map of user ID -> a map device ID -> a map of key ID -> JSON.
 
                 A copy of the input which has not been fulfilled (either because
                 they are not appservice users or the appservice does not support
@@ -897,12 +895,11 @@ class ApplicationServicesHandler:
         )
 
         # Patch together the results -- they are all independent (since they
-        # require exclusive control over the users). They get returned as a list
-        # and the caller combines them.
-        claimed_keys: List[Dict[str, Dict[str, Dict[str, JsonDict]]]] = []
+        # require exclusive control over the users, which is the outermost key).
+        claimed_keys: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
         for success, result in results:
             if success:
-                claimed_keys.append(result[0])
+                claimed_keys.update(result[0])
                 missing.extend(result[1])
 
         return claimed_keys, missing
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 0073667470..d1ab95126c 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -563,7 +563,9 @@ class E2eKeysHandler:
         return ret
 
     async def claim_local_one_time_keys(
-        self, local_query: List[Tuple[str, str, str]]
+        self,
+        local_query: List[Tuple[str, str, str]],
+        always_include_fallback_keys: bool,
     ) -> Iterable[Dict[str, Dict[str, Dict[str, JsonDict]]]]:
         """Claim one time keys for local users.
 
@@ -573,6 +575,7 @@ class E2eKeysHandler:
 
         Args:
             local_query: An iterable of tuples of (user ID, device ID, algorithm).
+            always_include_fallback_keys: True to always include fallback keys.
 
         Returns:
             An iterable of maps of user ID -> a map device ID -> a map of key ID -> JSON bytes.
@@ -583,24 +586,73 @@ class E2eKeysHandler:
         # If the application services have not provided any keys via the C-S
         # API, query it directly for one-time keys.
         if self._query_appservices_for_otks:
+            # TODO Should this query for fallback keys of uploaded OTKs if
+            #      always_include_fallback_keys is True? The MSC is ambiguous.
             (
                 appservice_results,
                 not_found,
             ) = await self._appservice_handler.claim_e2e_one_time_keys(not_found)
         else:
-            appservice_results = []
+            appservice_results = {}
+
+        # Calculate which user ID / device ID / algorithm tuples to get fallback
+        # keys for. This can be either only missing results *or* all results
+        # (which don't already have a fallback key).
+        if always_include_fallback_keys:
+            # Build the fallback query as any part of the original query where
+            # the appservice didn't respond with a fallback key.
+            fallback_query = []
+
+            # Iterate each item in the original query and search the results
+            # from the appservice for that user ID / device ID. If it is found,
+            # check if any of the keys match the requested algorithm & are a
+            # fallback key.
+            for user_id, device_id, algorithm in local_query:
+                # Check if the appservice responded for this query.
+                as_result = appservice_results.get(user_id, {}).get(device_id, {})
+                found_otk = False
+                for key_id, key_json in as_result.items():
+                    if key_id.startswith(f"{algorithm}:"):
+                        # A OTK or fallback key was found for this query.
+                        found_otk = True
+                        # A fallback key was found for this query, no need to
+                        # query further.
+                        if key_json.get("fallback", False):
+                            break
+
+                else:
+                    # No fallback key was found from appservices, query for it.
+                    # Only mark the fallback key as used if no OTK was found
+                    # (from either the database or appservices).
+                    mark_as_used = not found_otk and not any(
+                        key_id.startswith(f"{algorithm}:")
+                        for key_id in otk_results.get(user_id, {})
+                        .get(device_id, {})
+                        .keys()
+                    )
+                    fallback_query.append((user_id, device_id, algorithm, mark_as_used))
+
+        else:
+            # All fallback keys get marked as used.
+            fallback_query = [
+                (user_id, device_id, algorithm, True)
+                for user_id, device_id, algorithm in not_found
+            ]
 
         # For each user that does not have a one-time keys available, see if
         # there is a fallback key.
-        fallback_results = await self.store.claim_e2e_fallback_keys(not_found)
+        fallback_results = await self.store.claim_e2e_fallback_keys(fallback_query)
 
         # Return the results in order, each item from the input query should
         # only appear once in the combined list.
-        return (otk_results, *appservice_results, fallback_results)
+        return (otk_results, appservice_results, fallback_results)
 
     @trace
     async def claim_one_time_keys(
-        self, query: Dict[str, Dict[str, Dict[str, str]]], timeout: Optional[int]
+        self,
+        query: Dict[str, Dict[str, Dict[str, str]]],
+        timeout: Optional[int],
+        always_include_fallback_keys: bool,
     ) -> JsonDict:
         local_query: List[Tuple[str, str, str]] = []
         remote_queries: Dict[str, Dict[str, Dict[str, str]]] = {}
@@ -617,7 +669,9 @@ class E2eKeysHandler:
         set_tag("local_key_query", str(local_query))
         set_tag("remote_key_query", str(remote_queries))
 
-        results = await self.claim_local_one_time_keys(local_query)
+        results = await self.claim_local_one_time_keys(
+            local_query, always_include_fallback_keys
+        )
 
         # A map of user ID -> device ID -> key ID -> key.
         json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
@@ -625,7 +679,9 @@ class E2eKeysHandler:
             for user_id, device_keys in result.items():
                 for device_id, keys in device_keys.items():
                     for key_id, key in keys.items():
-                        json_result.setdefault(user_id, {})[device_id] = {key_id: key}
+                        json_result.setdefault(user_id, {}).setdefault(
+                            device_id, {}
+                        ).update({key_id: key})
 
         # Remote failures.
         failures: Dict[str, JsonDict] = {}
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index 6209b79b01..2a25094109 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 import logging
+import re
 from typing import TYPE_CHECKING, Any, Optional, Tuple
 
 from synapse.api.errors import InvalidAPICallError, SynapseError
@@ -288,7 +289,33 @@ class OneTimeKeyServlet(RestServlet):
         await self.auth.get_user_by_req(request, allow_guest=True)
         timeout = parse_integer(request, "timeout", 10 * 1000)
         body = parse_json_object_from_request(request)
-        result = await self.e2e_keys_handler.claim_one_time_keys(body, timeout)
+        result = await self.e2e_keys_handler.claim_one_time_keys(
+            body, timeout, always_include_fallback_keys=False
+        )
+        return 200, result
+
+
+class UnstableOneTimeKeyServlet(RestServlet):
+    """
+    Identical to the stable endpoint (OneTimeKeyServlet) except it always includes
+    fallback keys in the response.
+    """
+
+    PATTERNS = [re.compile(r"^/_matrix/client/unstable/org.matrix.msc3983/keys/claim$")]
+    CATEGORY = "Encryption requests"
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.auth = hs.get_auth()
+        self.e2e_keys_handler = hs.get_e2e_keys_handler()
+
+    async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        await self.auth.get_user_by_req(request, allow_guest=True)
+        timeout = parse_integer(request, "timeout", 10 * 1000)
+        body = parse_json_object_from_request(request)
+        result = await self.e2e_keys_handler.claim_one_time_keys(
+            body, timeout, always_include_fallback_keys=True
+        )
         return 200, result
 
 
@@ -394,6 +421,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     KeyQueryServlet(hs).register(http_server)
     KeyChangesServlet(hs).register(http_server)
     OneTimeKeyServlet(hs).register(http_server)
+    if hs.config.experimental.msc3983_appservice_otk_claims:
+        UnstableOneTimeKeyServlet(hs).register(http_server)
     if hs.config.worker.worker_app is None:
         SigningKeyUploadServlet(hs).register(http_server)
         SignaturesUploadServlet(hs).register(http_server)
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index dc7768c50c..1a4ae55304 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -1149,18 +1149,19 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         return results, missing
 
     async def claim_e2e_fallback_keys(
-        self, query_list: Iterable[Tuple[str, str, str]]
+        self, query_list: Iterable[Tuple[str, str, str, bool]]
     ) -> Dict[str, Dict[str, Dict[str, JsonDict]]]:
         """Take a list of fallback keys out of the database.
 
         Args:
-            query_list: An iterable of tuples of (user ID, device ID, algorithm).
+            query_list: An iterable of tuples of
+                (user ID, device ID, algorithm, whether the key should be marked as used).
 
         Returns:
             A map of user ID -> a map device ID -> a map of key ID -> JSON.
         """
         results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
-        for user_id, device_id, algorithm in query_list:
+        for user_id, device_id, algorithm, mark_as_used in query_list:
             row = await self.db_pool.simple_select_one(
                 table="e2e_fallback_keys_json",
                 keyvalues={
@@ -1180,7 +1181,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             used = row["used"]
 
             # Mark fallback key as used if not already.
-            if not used:
+            if not used and mark_as_used:
                 await self.db_pool.simple_update_one(
                     table="e2e_fallback_keys_json",
                     keyvalues={
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 013b9ee550..18edebd652 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -160,7 +160,9 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         res2 = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}}, timeout=None
+                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=False,
             )
         )
         self.assertEqual(
@@ -203,7 +205,9 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # key
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}}, timeout=None
+                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=False,
             )
         )
         self.assertEqual(
@@ -220,7 +224,9 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # claiming an OTK again should return the same fallback key
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}}, timeout=None
+                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=False,
             )
         )
         self.assertEqual(
@@ -267,7 +273,9 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}}, timeout=None
+                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=False,
             )
         )
         self.assertEqual(
@@ -277,7 +285,9 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}}, timeout=None
+                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=False,
             )
         )
         self.assertEqual(
@@ -296,7 +306,9 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}}, timeout=None
+                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=False,
             )
         )
         self.assertEqual(
@@ -304,6 +316,75 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
             {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key3}}},
         )
 
+    def test_fallback_key_always_returned(self) -> None:
+        local_user = "@boris:" + self.hs.hostname
+        device_id = "xyz"
+        fallback_key = {"alg1:k1": "fallback_key1"}
+        otk = {"alg1:k2": "key2"}
+
+        # we shouldn't have any unused fallback keys yet
+        res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id)
+        )
+        self.assertEqual(res, [])
+
+        # Upload a OTK & fallback key.
+        self.get_success(
+            self.handler.upload_keys_for_user(
+                local_user,
+                device_id,
+                {"one_time_keys": otk, "fallback_keys": fallback_key},
+            )
+        )
+
+        # we should now have an unused alg1 key
+        fallback_res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id)
+        )
+        self.assertEqual(fallback_res, ["alg1"])
+
+        # Claiming an OTK and requesting to always return the fallback key should
+        # return both.
+        claim_res = self.get_success(
+            self.handler.claim_one_time_keys(
+                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=True,
+            )
+        )
+        self.assertEqual(
+            claim_res,
+            {
+                "failures": {},
+                "one_time_keys": {local_user: {device_id: {**fallback_key, **otk}}},
+            },
+        )
+
+        # This should not mark the key as used.
+        fallback_res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id)
+        )
+        self.assertEqual(fallback_res, ["alg1"])
+
+        # Claiming an OTK again should return only the fallback key.
+        claim_res = self.get_success(
+            self.handler.claim_one_time_keys(
+                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=True,
+            )
+        )
+        self.assertEqual(
+            claim_res,
+            {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key}}},
+        )
+
+        # And mark it as used.
+        fallback_res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id)
+        )
+        self.assertEqual(fallback_res, [])
+
     def test_replace_master_key(self) -> None:
         """uploading a new signing key should make the old signing key unavailable"""
         local_user = "@boris:" + self.hs.hostname
@@ -1004,6 +1085,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
                     }
                 },
                 timeout=None,
+                always_include_fallback_keys=False,
             )
         )
         self.assertEqual(
@@ -1016,6 +1098,153 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
             },
         )
 
+    @override_config({"experimental_features": {"msc3983_appservice_otk_claims": True}})
+    def test_query_appservice_with_fallback(self) -> None:
+        local_user = "@boris:" + self.hs.hostname
+        device_id_1 = "xyz"
+        fallback_key = {"alg1:k1": {"desc": "fallback_key1", "fallback": True}}
+        otk = {"alg1:k2": {"desc": "key2"}}
+        as_fallback_key = {"alg1:k3": {"desc": "fallback_key3", "fallback": True}}
+        as_otk = {"alg1:k4": {"desc": "key4"}}
+
+        # Inject an appservice interested in this user.
+        appservice = ApplicationService(
+            token="i_am_an_app_service",
+            id="1234",
+            namespaces={"users": [{"regex": r"@boris:.+", "exclusive": True}]},
+            # Note: this user does not have to match the regex above
+            sender="@as_main:test",
+        )
+        self.hs.get_datastores().main.services_cache = [appservice]
+        self.hs.get_datastores().main.exclusive_user_regex = _make_exclusive_regex(
+            [appservice]
+        )
+
+        # Setup a response.
+        self.appservice_api.claim_client_keys.return_value = make_awaitable(
+            ({local_user: {device_id_1: {**as_otk, **as_fallback_key}}}, [])
+        )
+
+        # Claim OTKs, which will ask the appservice and do nothing else.
+        claim_res = self.get_success(
+            self.handler.claim_one_time_keys(
+                {"one_time_keys": {local_user: {device_id_1: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=True,
+            )
+        )
+        self.assertEqual(
+            claim_res,
+            {
+                "failures": {},
+                "one_time_keys": {
+                    local_user: {device_id_1: {**as_otk, **as_fallback_key}}
+                },
+            },
+        )
+
+        # Now upload a fallback key.
+        res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id_1)
+        )
+        self.assertEqual(res, [])
+
+        self.get_success(
+            self.handler.upload_keys_for_user(
+                local_user,
+                device_id_1,
+                {"fallback_keys": fallback_key},
+            )
+        )
+
+        # we should now have an unused alg1 key
+        fallback_res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id_1)
+        )
+        self.assertEqual(fallback_res, ["alg1"])
+
+        # The appservice will return only the OTK.
+        self.appservice_api.claim_client_keys.return_value = make_awaitable(
+            ({local_user: {device_id_1: as_otk}}, [])
+        )
+
+        # Claim OTKs, which should return the OTK from the appservice and the
+        # uploaded fallback key.
+        claim_res = self.get_success(
+            self.handler.claim_one_time_keys(
+                {"one_time_keys": {local_user: {device_id_1: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=True,
+            )
+        )
+        self.assertEqual(
+            claim_res,
+            {
+                "failures": {},
+                "one_time_keys": {
+                    local_user: {device_id_1: {**as_otk, **fallback_key}}
+                },
+            },
+        )
+
+        # But the fallback key should not be marked as used.
+        fallback_res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id_1)
+        )
+        self.assertEqual(fallback_res, ["alg1"])
+
+        # Now upload a OTK.
+        self.get_success(
+            self.handler.upload_keys_for_user(
+                local_user,
+                device_id_1,
+                {"one_time_keys": otk},
+            )
+        )
+
+        # Claim OTKs, which will return information only from the database.
+        claim_res = self.get_success(
+            self.handler.claim_one_time_keys(
+                {"one_time_keys": {local_user: {device_id_1: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=True,
+            )
+        )
+        self.assertEqual(
+            claim_res,
+            {
+                "failures": {},
+                "one_time_keys": {local_user: {device_id_1: {**otk, **fallback_key}}},
+            },
+        )
+
+        # But the fallback key should not be marked as used.
+        fallback_res = self.get_success(
+            self.store.get_e2e_unused_fallback_key_types(local_user, device_id_1)
+        )
+        self.assertEqual(fallback_res, ["alg1"])
+
+        # Finally, return only the fallback key from the appservice.
+        self.appservice_api.claim_client_keys.return_value = make_awaitable(
+            ({local_user: {device_id_1: as_fallback_key}}, [])
+        )
+
+        # Claim OTKs, which will return only the fallback key from the database.
+        claim_res = self.get_success(
+            self.handler.claim_one_time_keys(
+                {"one_time_keys": {local_user: {device_id_1: "alg1"}}},
+                timeout=None,
+                always_include_fallback_keys=True,
+            )
+        )
+        self.assertEqual(
+            claim_res,
+            {
+                "failures": {},
+                "one_time_keys": {local_user: {device_id_1: as_fallback_key}},
+            },
+        )
+
     @override_config({"experimental_features": {"msc3984_appservice_key_query": True}})
     def test_query_local_devices_appservice(self) -> None:
         """Test that querying of appservices for keys overrides responses from the database."""
-- 
cgit 1.5.1


From 9900f7c231f8af536fce229117b0a406dc629293 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 26 Apr 2023 17:00:11 +0100
Subject: Add admin endpoint to query room sizes (#15482)

---
 changelog.d/15482.feature               |   1 +
 docs/admin_api/statistics.md            |  49 ++++++++++++++
 synapse/rest/admin/__init__.py          |   6 +-
 synapse/rest/admin/statistics.py        |  25 +++++++
 synapse/storage/controllers/__init__.py |   2 +
 synapse/storage/controllers/stats.py    | 113 ++++++++++++++++++++++++++++++++
 6 files changed, 195 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15482.feature
 create mode 100644 synapse/storage/controllers/stats.py

(limited to 'synapse')

diff --git a/changelog.d/15482.feature b/changelog.d/15482.feature
new file mode 100644
index 0000000000..f3e9f2a5b2
--- /dev/null
+++ b/changelog.d/15482.feature
@@ -0,0 +1 @@
+Add admin endpoint to query the largest rooms by disk space used in the database.
diff --git a/docs/admin_api/statistics.md b/docs/admin_api/statistics.md
index 03b3621e55..2bd417e900 100644
--- a/docs/admin_api/statistics.md
+++ b/docs/admin_api/statistics.md
@@ -81,3 +81,52 @@ The following fields are returned in the JSON response body:
   - `user_id` - string - Fully-qualified user ID (ex. `@user:server.com`).
 * `next_token` - integer - Opaque value used for pagination. See above.
 * `total` - integer - Total number of users after filtering.
+
+
+# Get largest rooms by size in database
+
+Returns the 10 largest rooms and an estimate of how much space in the database
+they are taking.
+
+This does not include the size of any associated media associated with the room.
+
+Returns an error on SQLite.
+
+*Note:* This uses the planner statistics from PostgreSQL to do the estimates,
+which means that the returned information can vary widely from reality. However,
+it should be enough to get a rough idea of where database disk space is going.
+
+
+The API is:
+
+```
+GET /_synapse/admin/v1/statistics/statistics/database/rooms
+```
+
+A response body like the following is returned:
+
+```json
+{
+  "rooms": [
+    {
+      "room_id": "!OGEhHVWSdvArJzumhm:matrix.org",
+      "estimated_size": 47325417353
+    }
+  ],
+}
+```
+
+
+
+**Response**
+
+The following fields are returned in the JSON response body:
+
+* `rooms` - An array of objects, sorted by largest room first. Objects contain
+  the following fields:
+  - `room_id` - string - The room ID.
+  - `estimated_size` - integer - Estimated disk space used in bytes by the room
+    in the database.
+
+
+*Added in Synapse 1.83.0*
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 79f22a59f1..770df261ce 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -68,7 +68,10 @@ from synapse.rest.admin.rooms import (
     RoomTimestampToEventRestServlet,
 )
 from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet
-from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet
+from synapse.rest.admin.statistics import (
+    LargestRoomsStatistics,
+    UserMediaStatisticsRestServlet,
+)
 from synapse.rest.admin.username_available import UsernameAvailableRestServlet
 from synapse.rest.admin.users import (
     AccountDataRestServlet,
@@ -259,6 +262,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     UserRestServletV2(hs).register(http_server)
     UsersRestServletV2(hs).register(http_server)
     UserMediaStatisticsRestServlet(hs).register(http_server)
+    LargestRoomsStatistics(hs).register(http_server)
     EventReportDetailRestServlet(hs).register(http_server)
     EventReportsRestServlet(hs).register(http_server)
     AccountDataRestServlet(hs).register(http_server)
diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py
index 9c45f4650d..19780e4b4c 100644
--- a/synapse/rest/admin/statistics.py
+++ b/synapse/rest/admin/statistics.py
@@ -113,3 +113,28 @@ class UserMediaStatisticsRestServlet(RestServlet):
             ret["next_token"] = start + len(users_media)
 
         return HTTPStatus.OK, ret
+
+
+class LargestRoomsStatistics(RestServlet):
+    """Get the largest rooms by database size.
+
+    Only works when using PostgreSQL.
+    """
+
+    PATTERNS = admin_patterns("/statistics/database/rooms$")
+
+    def __init__(self, hs: "HomeServer"):
+        self.auth = hs.get_auth()
+        self.stats_controller = hs.get_storage_controllers().stats
+
+    async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self.auth, request)
+
+        room_sizes = await self.stats_controller.get_room_db_size_estimate()
+
+        return HTTPStatus.OK, {
+            "rooms": [
+                {"room_id": room_id, "estimated_size": size}
+                for room_id, size in room_sizes
+            ]
+        }
diff --git a/synapse/storage/controllers/__init__.py b/synapse/storage/controllers/__init__.py
index 45101cda7a..0ef8602631 100644
--- a/synapse/storage/controllers/__init__.py
+++ b/synapse/storage/controllers/__init__.py
@@ -19,6 +19,7 @@ from synapse.storage.controllers.persist_events import (
 )
 from synapse.storage.controllers.purge_events import PurgeEventsStorageController
 from synapse.storage.controllers.state import StateStorageController
+from synapse.storage.controllers.stats import StatsController
 from synapse.storage.databases import Databases
 from synapse.storage.databases.main import DataStore
 
@@ -40,6 +41,7 @@ class StorageControllers:
 
         self.purge_events = PurgeEventsStorageController(hs, stores)
         self.state = StateStorageController(hs, stores)
+        self.stats = StatsController(hs, stores)
 
         self.persistence = None
         if stores.persist_events:
diff --git a/synapse/storage/controllers/stats.py b/synapse/storage/controllers/stats.py
new file mode 100644
index 0000000000..988e44c6af
--- /dev/null
+++ b/synapse/storage/controllers/stats.py
@@ -0,0 +1,113 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from collections import Counter
+from typing import TYPE_CHECKING, Collection, List, Tuple
+
+from synapse.api.errors import SynapseError
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.databases import Databases
+from synapse.storage.engines import PostgresEngine
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class StatsController:
+    """High level interface for getting statistics."""
+
+    def __init__(self, hs: "HomeServer", stores: Databases):
+        self.stores = stores
+
+    async def get_room_db_size_estimate(self) -> List[Tuple[str, int]]:
+        """Get an estimate of the largest rooms and how much database space they
+        use, in bytes.
+
+        Only works against PostgreSQL.
+
+        Note: this uses the postgres statistics so is a very rough estimate.
+        """
+
+        # Note: We look at both tables on the main and state databases.
+        if not isinstance(self.stores.main.database_engine, PostgresEngine):
+            raise SynapseError(400, "Endpoint requires using PostgreSQL")
+
+        if not isinstance(self.stores.state.database_engine, PostgresEngine):
+            raise SynapseError(400, "Endpoint requires using PostgreSQL")
+
+        # For each "large" table, we go through and get the largest rooms
+        # and an estimate of how much space they take. We can then sum the
+        # results and return the top 10.
+        #
+        # This isn't the most accurate, but given all of these are estimates
+        # anyway its good enough.
+        room_estimates: Counter[str] = Counter()
+
+        # Return size of the table on disk, including indexes and TOAST.
+        table_sql = """
+            SELECT pg_total_relation_size(?)
+        """
+
+        # Get an estimate for the largest rooms and their frequency.
+        #
+        # Note: the cast here is a hack to cast from `anyarray` to an actual
+        # type. This ensures that psycopg2 passes us a back a a Python list.
+        column_sql = """
+            SELECT
+                most_common_vals::TEXT::TEXT[], most_common_freqs::TEXT::NUMERIC[]
+            FROM pg_stats
+            WHERE tablename = ? and attname = 'room_id'
+        """
+
+        def get_room_db_size_estimate_txn(
+            txn: LoggingTransaction,
+            tables: Collection[str],
+        ) -> None:
+            for table in tables:
+                txn.execute(table_sql, (table,))
+                row = txn.fetchone()
+                assert row is not None
+                (table_size,) = row
+
+                txn.execute(column_sql, (table,))
+                row = txn.fetchone()
+                assert row is not None
+                vals, freqs = row
+
+                for room_id, freq in zip(vals, freqs):
+                    room_estimates[room_id] += int(freq * table_size)
+
+        await self.stores.main.db_pool.runInteraction(
+            "get_room_db_size_estimate_main",
+            get_room_db_size_estimate_txn,
+            (
+                "event_json",
+                "events",
+                "event_search",
+                "event_edges",
+                "event_push_actions",
+                "stream_ordering_to_exterm",
+            ),
+        )
+
+        await self.stores.state.db_pool.runInteraction(
+            "get_room_db_size_estimate_state",
+            get_room_db_size_estimate_txn,
+            ("state_groups_state",),
+        )
+
+        return room_estimates.most_common(10)
-- 
cgit 1.5.1


From 247e6a8a7883a38d4e9f5f8981f4f6af69d2314d Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 26 Apr 2023 21:10:51 +0200
Subject: Add a module API to send an HTTP push notification (#15387)

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/15387.feature      |   1 +
 synapse/module_api/__init__.py |  46 ++++++++++
 synapse/push/httppusher.py     | 190 ++++++++++++++++++++++++-----------------
 3 files changed, 160 insertions(+), 77 deletions(-)
 create mode 100644 changelog.d/15387.feature

(limited to 'synapse')

diff --git a/changelog.d/15387.feature b/changelog.d/15387.feature
new file mode 100644
index 0000000000..b36e331520
--- /dev/null
+++ b/changelog.d/15387.feature
@@ -0,0 +1 @@
+Add a module API to send an HTTP push notification.
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index eeafea74d1..90eff030b5 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -105,6 +105,7 @@ from synapse.module_api.callbacks.spamchecker_callbacks import (
     USER_MAY_SEND_3PID_INVITE_CALLBACK,
     SpamCheckerModuleApiCallbacks,
 )
+from synapse.push.httppusher import HttpPusher
 from synapse.rest.client.login import LoginResponse
 from synapse.storage import DataStore
 from synapse.storage.background_updates import (
@@ -248,6 +249,7 @@ class ModuleApi:
         self._registration_handler = hs.get_registration_handler()
         self._send_email_handler = hs.get_send_email_handler()
         self._push_rules_handler = hs.get_push_rules_handler()
+        self._pusherpool = hs.get_pusherpool()
         self._device_handler = hs.get_device_handler()
         self.custom_template_dir = hs.config.server.custom_template_directory
         self._callbacks = hs.get_module_api_callbacks()
@@ -1225,6 +1227,50 @@ class ModuleApi:
 
         await self._clock.sleep(seconds)
 
+    async def send_http_push_notification(
+        self,
+        user_id: str,
+        device_id: Optional[str],
+        content: JsonDict,
+        tweaks: Optional[JsonMapping] = None,
+        default_payload: Optional[JsonMapping] = None,
+    ) -> Dict[str, bool]:
+        """Send an HTTP push notification that is forwarded to the registered push gateway
+        for the specified user/device.
+
+        Added in Synapse v1.82.0.
+
+        Args:
+            user_id: The user ID to send the push notification to.
+            device_id: The device ID of the device where to send the push notification. If `None`,
+            the notification will be sent to all registered HTTP pushers of the user.
+            content: A dict of values that will be put in the `notification` field of the push
+            (cf Push Gateway spec). `devices` field will be overrided if included.
+            tweaks: A dict of `tweaks` that will be inserted in the `devices` section, cf spec.
+            default_payload: default payload to add in `devices[0].data.default_payload`.
+            This will be merged (and override if some matching values already exist there)
+            with existing `default_payload`.
+
+        Returns:
+            a dict reprensenting the status of the push per device ID
+        """
+        status = {}
+        if user_id in self._pusherpool.pushers:
+            for p in self._pusherpool.pushers[user_id].values():
+                if isinstance(p, HttpPusher) and (
+                    not device_id or p.device_id == device_id
+                ):
+                    res = await p.dispatch_push(content, tweaks, default_payload)
+                    # Check if the push was successful and no pushers were rejected.
+                    sent = res is not False and not res
+
+                    # This is mainly to accomodate mypy
+                    # device_id should never be empty after the `set_device_id_for_pushers`
+                    # background job has been properly run.
+                    if p.device_id:
+                        status[p.device_id] = sent
+        return status
+
     async def send_mail(
         self,
         recipient: str,
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index b048b03a74..4f8fa445d9 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import logging
 import urllib.parse
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
 
 from prometheus_client import Counter
 
@@ -27,6 +27,7 @@ from synapse.logging import opentracing
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.push import Pusher, PusherConfig, PusherConfigException
 from synapse.storage.databases.main.event_push_actions import HttpPushAction
+from synapse.types import JsonDict, JsonMapping
 
 from . import push_tools
 
@@ -56,7 +57,7 @@ http_badges_failed_counter = Counter(
 )
 
 
-def tweaks_for_actions(actions: List[Union[str, Dict]]) -> Dict[str, Any]:
+def tweaks_for_actions(actions: List[Union[str, Dict]]) -> JsonMapping:
     """
     Converts a list of actions into a `tweaks` dict (which can then be passed to
         the push gateway).
@@ -101,6 +102,7 @@ class HttpPusher(Pusher):
         self._storage_controllers = self.hs.get_storage_controllers()
         self.app_display_name = pusher_config.app_display_name
         self.device_display_name = pusher_config.device_display_name
+        self.device_id = pusher_config.device_id
         self.pushkey_ts = pusher_config.ts
         self.data = pusher_config.data
         self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC
@@ -324,7 +326,7 @@ class HttpPusher(Pusher):
         event = await self.store.get_event(push_action.event_id, allow_none=True)
         if event is None:
             return True  # It's been redacted
-        rejected = await self.dispatch_push(event, tweaks, badge)
+        rejected = await self.dispatch_push_event(event, tweaks, badge)
         if rejected is False:
             return False
 
@@ -342,9 +344,83 @@ class HttpPusher(Pusher):
                     await self._pusherpool.remove_pusher(self.app_id, pk, self.user_id)
         return True
 
-    async def _build_notification_dict(
-        self, event: EventBase, tweaks: Dict[str, bool], badge: int
-    ) -> Dict[str, Any]:
+    async def dispatch_push(
+        self,
+        content: JsonDict,
+        tweaks: Optional[JsonMapping] = None,
+        default_payload: Optional[JsonMapping] = None,
+    ) -> Union[bool, List[str]]:
+        """Send a notification to the registered push gateway, with `content` being
+        the content of the `notification` top property specified in the spec.
+        Note that the `devices` property will be added with device-specific
+        information for this pusher.
+
+        Args:
+            content: the content
+            tweaks: tweaks to add into the `devices` section
+            default_payload: default payload to add in `devices[0].data.default_payload`.
+                This will be merged (and override if some matching values already exist there)
+                with existing `default_payload`.
+
+        Returns:
+            False if an error occured when calling the push gateway, or an array of
+            rejected push keys otherwise. If this array is empty, the push fully
+            succeeded.
+        """
+        content = content.copy()
+
+        data = self.data_minus_url.copy()
+        if default_payload:
+            data.setdefault("default_payload", {}).update(default_payload)
+
+        device = {
+            "app_id": self.app_id,
+            "pushkey": self.pushkey,
+            "pushkey_ts": int(self.pushkey_ts / 1000),
+            "data": data,
+        }
+        if tweaks:
+            device["tweaks"] = tweaks
+
+        content["devices"] = [device]
+
+        try:
+            resp = await self.http_client.post_json_get_json(
+                self.url, {"notification": content}
+            )
+        except Exception as e:
+            logger.warning(
+                "Failed to push data to %s: %s %s",
+                self.name,
+                type(e),
+                e,
+            )
+            return False
+        rejected = []
+        if "rejected" in resp:
+            rejected = resp["rejected"]
+        return rejected
+
+    async def dispatch_push_event(
+        self,
+        event: EventBase,
+        tweaks: JsonMapping,
+        badge: int,
+    ) -> Union[bool, List[str]]:
+        """Send a notification to the registered push gateway by building it
+        from an event.
+
+        Args:
+            event: the event
+            tweaks: tweaks to add into the `devices` section, used to decide the
+                push priority
+            badge: unread count to send with the push notification
+
+        Returns:
+            False if an error occured when calling the push gateway, or an array of
+            rejected push keys otherwise. If this array is empty, the push fully
+            succeeded.
+        """
         priority = "low"
         if (
             event.type == EventTypes.Encrypted
@@ -358,30 +434,20 @@ class HttpPusher(Pusher):
         # This was checked in the __init__, but mypy doesn't seem to know that.
         assert self.data is not None
         if self.data.get("format") == "event_id_only":
-            d: Dict[str, Any] = {
-                "notification": {
-                    "event_id": event.event_id,
-                    "room_id": event.room_id,
-                    "counts": {"unread": badge},
-                    "prio": priority,
-                    "devices": [
-                        {
-                            "app_id": self.app_id,
-                            "pushkey": self.pushkey,
-                            "pushkey_ts": int(self.pushkey_ts / 1000),
-                            "data": self.data_minus_url,
-                        }
-                    ],
-                }
+            content: JsonDict = {
+                "event_id": event.event_id,
+                "room_id": event.room_id,
+                "counts": {"unread": badge},
+                "prio": priority,
             }
-            return d
-
-        ctx = await push_tools.get_context_for_event(
-            self._storage_controllers, event, self.user_id
-        )
+            # event_id_only doesn't include the tweaks, so override them.
+            tweaks = {}
+        else:
+            ctx = await push_tools.get_context_for_event(
+                self._storage_controllers, event, self.user_id
+            )
 
-        d = {
-            "notification": {
+            content = {
                 "id": event.event_id,  # deprecated: remove soon
                 "event_id": event.event_id,
                 "room_id": event.room_id,
@@ -392,57 +458,27 @@ class HttpPusher(Pusher):
                     "unread": badge,
                     # 'missed_calls': 2
                 },
-                "devices": [
-                    {
-                        "app_id": self.app_id,
-                        "pushkey": self.pushkey,
-                        "pushkey_ts": int(self.pushkey_ts / 1000),
-                        "data": self.data_minus_url,
-                        "tweaks": tweaks,
-                    }
-                ],
             }
-        }
-        if event.type == "m.room.member" and event.is_state():
-            d["notification"]["membership"] = event.content["membership"]
-            d["notification"]["user_is_target"] = event.state_key == self.user_id
-        if self.hs.config.push.push_include_content and event.content:
-            d["notification"]["content"] = event.content
-
-        # We no longer send aliases separately, instead, we send the human
-        # readable name of the room, which may be an alias.
-        if "sender_display_name" in ctx and len(ctx["sender_display_name"]) > 0:
-            d["notification"]["sender_display_name"] = ctx["sender_display_name"]
-        if "name" in ctx and len(ctx["name"]) > 0:
-            d["notification"]["room_name"] = ctx["name"]
-
-        return d
-
-    async def dispatch_push(
-        self, event: EventBase, tweaks: Dict[str, bool], badge: int
-    ) -> Union[bool, Iterable[str]]:
-        notification_dict = await self._build_notification_dict(event, tweaks, badge)
-        if not notification_dict:
-            return []
-        try:
-            resp = await self.http_client.post_json_get_json(
-                self.url, notification_dict
-            )
-        except Exception as e:
-            logger.warning(
-                "Failed to push event %s to %s: %s %s",
-                event.event_id,
-                self.name,
-                type(e),
-                e,
-            )
-            return False
-        rejected = []
-        if "rejected" in resp:
-            rejected = resp["rejected"]
-        if not rejected:
+            if event.type == "m.room.member" and event.is_state():
+                content["membership"] = event.content["membership"]
+                content["user_is_target"] = event.state_key == self.user_id
+            if self.hs.config.push.push_include_content and event.content:
+                content["content"] = event.content
+
+            # We no longer send aliases separately, instead, we send the human
+            # readable name of the room, which may be an alias.
+            if "sender_display_name" in ctx and len(ctx["sender_display_name"]) > 0:
+                content["sender_display_name"] = ctx["sender_display_name"]
+            if "name" in ctx and len(ctx["name"]) > 0:
+                content["room_name"] = ctx["name"]
+
+        res = await self.dispatch_push(content, tweaks)
+
+        # If the push is successful and none are rejected, update the badge count.
+        if res is not False and not res:
             self.badge_count_last_call = badge
-        return rejected
+
+        return res
 
     async def _send_badge(self, badge: int) -> None:
         """
-- 
cgit 1.5.1


From 301b4156d5574521e4fa3df8fed2f8a1c8617745 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 26 Apr 2023 16:03:26 -0700
Subject: Add column `full_user_id` to tables `profiles` and `user_filters`.
 (#15458)

---
 changelog.d/15458.misc                             |  1 +
 synapse/_scripts/synapse_port_db.py                |  5 ++-
 synapse/api/filtering.py                           |  6 +--
 synapse/handlers/profile.py                        |  8 +---
 synapse/rest/client/filter.py                      |  2 +-
 synapse/storage/databases/main/filtering.py        | 47 ++++++++++++++++++----
 synapse/storage/databases/main/profile.py          | 42 +++++++++++++++----
 synapse/storage/databases/main/registration.py     |  4 +-
 synapse/storage/schema/__init__.py                 |  5 ++-
 .../76/01_add_profiles_full_user_id_column.sql     | 20 +++++++++
 .../76/02_add_user_filters_full_user_id_column.sql | 20 +++++++++
 tests/api/test_filtering.py                        | 16 ++++----
 tests/handlers/test_profile.py                     | 26 ++++++------
 tests/rest/admin/test_user.py                      | 30 +++++++++++---
 tests/rest/client/test_filter.py                   |  4 +-
 tests/storage/test_main.py                         |  4 +-
 tests/storage/test_profile.py                      | 20 +++------
 17 files changed, 186 insertions(+), 74 deletions(-)
 create mode 100644 changelog.d/15458.misc
 create mode 100644 synapse/storage/schema/main/delta/76/01_add_profiles_full_user_id_column.sql
 create mode 100644 synapse/storage/schema/main/delta/76/02_add_user_filters_full_user_id_column.sql

(limited to 'synapse')

diff --git a/changelog.d/15458.misc b/changelog.d/15458.misc
new file mode 100644
index 0000000000..5183161d25
--- /dev/null
+++ b/changelog.d/15458.misc
@@ -0,0 +1 @@
+Add column `full_user_id` to tables `profiles` and `user_filters`.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index a58ae2a308..56d5aeb0dd 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -54,7 +54,7 @@ from synapse.logging.context import (
 )
 from synapse.notifier import ReplicationNotifier
 from synapse.storage.database import DatabasePool, LoggingTransaction, make_conn
-from synapse.storage.databases.main import PushRuleStore
+from synapse.storage.databases.main import FilteringWorkerStore, PushRuleStore
 from synapse.storage.databases.main.account_data import AccountDataWorkerStore
 from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateStore
 from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore
@@ -69,6 +69,7 @@ from synapse.storage.databases.main.media_repository import (
     MediaRepositoryBackgroundUpdateStore,
 )
 from synapse.storage.databases.main.presence import PresenceBackgroundUpdateStore
+from synapse.storage.databases.main.profile import ProfileWorkerStore
 from synapse.storage.databases.main.pusher import (
     PusherBackgroundUpdatesStore,
     PusherWorkerStore,
@@ -229,6 +230,8 @@ class Store(
     EndToEndRoomKeyBackgroundStore,
     StatsStore,
     AccountDataWorkerStore,
+    FilteringWorkerStore,
+    ProfileWorkerStore,
     PushRuleStore,
     PusherWorkerStore,
     PusherBackgroundUpdatesStore,
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index b9f432cc23..de7c56bc0f 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -170,11 +170,9 @@ class Filtering:
         result = await self.store.get_user_filter(user_localpart, filter_id)
         return FilterCollection(self._hs, result)
 
-    def add_user_filter(
-        self, user_localpart: str, user_filter: JsonDict
-    ) -> Awaitable[int]:
+    def add_user_filter(self, user_id: UserID, user_filter: JsonDict) -> Awaitable[int]:
         self.check_valid_filter(user_filter)
-        return self.store.add_user_filter(user_localpart, user_filter)
+        return self.store.add_user_filter(user_id, user_filter)
 
     # TODO(paul): surely we should probably add a delete_user_filter or
     #   replace_user_filter at some point? There's no REST API specified for
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 9a81a77cbd..440d3f4acd 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -178,9 +178,7 @@ class ProfileHandler:
                 authenticated_entity=requester.authenticated_entity,
             )
 
-        await self.store.set_profile_displayname(
-            target_user.localpart, displayname_to_set
-        )
+        await self.store.set_profile_displayname(target_user, displayname_to_set)
 
         profile = await self.store.get_profileinfo(target_user.localpart)
         await self.user_directory_handler.handle_local_profile_change(
@@ -272,9 +270,7 @@ class ProfileHandler:
                 target_user, authenticated_entity=requester.authenticated_entity
             )
 
-        await self.store.set_profile_avatar_url(
-            target_user.localpart, avatar_url_to_set
-        )
+        await self.store.set_profile_avatar_url(target_user, avatar_url_to_set)
 
         profile = await self.store.get_profileinfo(target_user.localpart)
         await self.user_directory_handler.handle_local_profile_change(
diff --git a/synapse/rest/client/filter.py b/synapse/rest/client/filter.py
index ab7d8c9419..04561f36d7 100644
--- a/synapse/rest/client/filter.py
+++ b/synapse/rest/client/filter.py
@@ -94,7 +94,7 @@ class CreateFilterRestServlet(RestServlet):
         set_timeline_upper_limit(content, self.hs.config.server.filter_timeline_limit)
 
         filter_id = await self.filtering.add_user_filter(
-            user_localpart=target_user.localpart, user_filter=content
+            user_id=target_user, user_filter=content
         )
 
         return 200, {"filter_id": str(filter_id)}
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index 8e57c8e5a0..50516402f9 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -16,15 +16,38 @@
 from typing import Optional, Tuple, Union, cast
 
 from canonicaljson import encode_canonical_json
+from typing_extensions import TYPE_CHECKING
 
 from synapse.api.errors import Codes, StoreError, SynapseError
 from synapse.storage._base import SQLBaseStore, db_to_json
-from synapse.storage.database import LoggingTransaction
-from synapse.types import JsonDict
+from synapse.storage.database import (
+    DatabasePool,
+    LoggingDatabaseConnection,
+    LoggingTransaction,
+)
+from synapse.types import JsonDict, UserID
 from synapse.util.caches.descriptors import cached
 
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
 
 class FilteringWorkerStore(SQLBaseStore):
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+        self.db_pool.updates.register_background_index_update(
+            "full_users_filters_unique_idx",
+            index_name="full_users_unique_idx",
+            table="user_filters",
+            columns=["full_user_id, filter_id"],
+            unique=True,
+        )
+
     @cached(num_args=2)
     async def get_user_filter(
         self, user_localpart: str, filter_id: Union[int, str]
@@ -46,7 +69,7 @@ class FilteringWorkerStore(SQLBaseStore):
 
         return db_to_json(def_json)
 
-    async def add_user_filter(self, user_localpart: str, user_filter: JsonDict) -> int:
+    async def add_user_filter(self, user_id: UserID, user_filter: JsonDict) -> int:
         def_json = encode_canonical_json(user_filter)
 
         # Need an atomic transaction to SELECT the maximal ID so far then
@@ -56,13 +79,13 @@ class FilteringWorkerStore(SQLBaseStore):
                 "SELECT filter_id FROM user_filters "
                 "WHERE user_id = ? AND filter_json = ?"
             )
-            txn.execute(sql, (user_localpart, bytearray(def_json)))
+            txn.execute(sql, (user_id.localpart, bytearray(def_json)))
             filter_id_response = txn.fetchone()
             if filter_id_response is not None:
                 return filter_id_response[0]
 
             sql = "SELECT MAX(filter_id) FROM user_filters WHERE user_id = ?"
-            txn.execute(sql, (user_localpart,))
+            txn.execute(sql, (user_id.localpart,))
             max_id = cast(Tuple[Optional[int]], txn.fetchone())[0]
             if max_id is None:
                 filter_id = 0
@@ -70,10 +93,18 @@ class FilteringWorkerStore(SQLBaseStore):
                 filter_id = max_id + 1
 
             sql = (
-                "INSERT INTO user_filters (user_id, filter_id, filter_json)"
-                "VALUES(?, ?, ?)"
+                "INSERT INTO user_filters (full_user_id, user_id, filter_id, filter_json)"
+                "VALUES(?, ?, ?, ?)"
+            )
+            txn.execute(
+                sql,
+                (
+                    user_id.to_string(),
+                    user_id.localpart,
+                    filter_id,
+                    bytearray(def_json),
+                ),
             )
-            txn.execute(sql, (user_localpart, filter_id, bytearray(def_json)))
 
             return filter_id
 
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index a1747f04ce..b109f8c07f 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -11,14 +11,34 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 
 from synapse.api.errors import StoreError
 from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
 from synapse.storage.databases.main.roommember import ProfileInfo
+from synapse.types import UserID
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
 
 
 class ProfileWorkerStore(SQLBaseStore):
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+        self.db_pool.updates.register_background_index_update(
+            "profiles_full_user_id_key_idx",
+            index_name="profiles_full_user_id_key",
+            table="profiles",
+            columns=["full_user_id"],
+            unique=True,
+        )
+
     async def get_profileinfo(self, user_localpart: str) -> ProfileInfo:
         try:
             profile = await self.db_pool.simple_select_one(
@@ -54,28 +74,36 @@ class ProfileWorkerStore(SQLBaseStore):
             desc="get_profile_avatar_url",
         )
 
-    async def create_profile(self, user_localpart: str) -> None:
+    async def create_profile(self, user_id: UserID) -> None:
+        user_localpart = user_id.localpart
         await self.db_pool.simple_insert(
-            table="profiles", values={"user_id": user_localpart}, desc="create_profile"
+            table="profiles",
+            values={"user_id": user_localpart, "full_user_id": user_id.to_string()},
+            desc="create_profile",
         )
 
     async def set_profile_displayname(
-        self, user_localpart: str, new_displayname: Optional[str]
+        self, user_id: UserID, new_displayname: Optional[str]
     ) -> None:
+        user_localpart = user_id.localpart
         await self.db_pool.simple_upsert(
             table="profiles",
             keyvalues={"user_id": user_localpart},
-            values={"displayname": new_displayname},
+            values={
+                "displayname": new_displayname,
+                "full_user_id": user_id.to_string(),
+            },
             desc="set_profile_displayname",
         )
 
     async def set_profile_avatar_url(
-        self, user_localpart: str, new_avatar_url: Optional[str]
+        self, user_id: UserID, new_avatar_url: Optional[str]
     ) -> None:
+        user_localpart = user_id.localpart
         await self.db_pool.simple_upsert(
             table="profiles",
             keyvalues={"user_id": user_localpart},
-            values={"avatar_url": new_avatar_url},
+            values={"avatar_url": new_avatar_url, "full_user_id": user_id.to_string()},
             desc="set_profile_avatar_url",
         )
 
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 717237e024..676d03bb7e 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -2414,8 +2414,8 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
             # *obviously* the 'profiles' table uses localpart for user_id
             # while everything else uses the full mxid.
             txn.execute(
-                "INSERT INTO profiles(user_id, displayname) VALUES (?,?)",
-                (user_id_obj.localpart, create_profile_with_displayname),
+                "INSERT INTO profiles(full_user_id, user_id, displayname) VALUES (?,?,?)",
+                (user_id, user_id_obj.localpart, create_profile_with_displayname),
             )
 
         if self.hs.config.stats.stats_enabled:
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index a28f2b997c..1672976209 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 75  # remember to update the list below when updating
+SCHEMA_VERSION = 76  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -97,6 +97,9 @@ Changes in SCHEMA_VERSION = 75:
       `local_current_membership` & `room_memberships`) is now being populated for new
       rows. When the background job to populate historical rows lands this will
       become the compat schema version.
+
+Changes in SCHEMA_VERSION = 76:
+    - Adds a full_user_id column to tables profiles and user_filters.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/76/01_add_profiles_full_user_id_column.sql b/synapse/storage/schema/main/delta/76/01_add_profiles_full_user_id_column.sql
new file mode 100644
index 0000000000..9cd680325a
--- /dev/null
+++ b/synapse/storage/schema/main/delta/76/01_add_profiles_full_user_id_column.sql
@@ -0,0 +1,20 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE profiles ADD COLUMN full_user_id TEXT;
+
+-- Make sure the column has a unique constraint, mirroring the `profiles_user_id_key`
+-- constraint.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (7501, 'profiles_full_user_id_key_idx', '{}');
diff --git a/synapse/storage/schema/main/delta/76/02_add_user_filters_full_user_id_column.sql b/synapse/storage/schema/main/delta/76/02_add_user_filters_full_user_id_column.sql
new file mode 100644
index 0000000000..fd231adeef
--- /dev/null
+++ b/synapse/storage/schema/main/delta/76/02_add_user_filters_full_user_id_column.sql
@@ -0,0 +1,20 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE user_filters ADD COLUMN full_user_id TEXT;
+
+-- Add a unique index on the new column, mirroring the `user_filters_unique` unique
+-- index.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (7502, 'full_users_filters_unique_idx', '{}');
\ No newline at end of file
diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py
index 6c6a9ab4b4..222449baac 100644
--- a/tests/api/test_filtering.py
+++ b/tests/api/test_filtering.py
@@ -26,13 +26,15 @@ from synapse.api.errors import SynapseError
 from synapse.api.filtering import Filter
 from synapse.api.presence import UserPresenceState
 from synapse.server import HomeServer
-from synapse.types import JsonDict
+from synapse.types import JsonDict, UserID
 from synapse.util import Clock
 from synapse.util.frozenutils import freeze
 
 from tests import unittest
 from tests.events.test_utils import MockEvent
 
+user_id = UserID.from_string("@test_user:test")
+user2_id = UserID.from_string("@test_user2:test")
 user_localpart = "test_user"
 
 
@@ -437,7 +439,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         user_filter_json = {"presence": {"senders": ["@foo:bar"]}}
         filter_id = self.get_success(
             self.datastore.add_user_filter(
-                user_localpart=user_localpart, user_filter=user_filter_json
+                user_id=user_id, user_filter=user_filter_json
             )
         )
         presence_states = [
@@ -467,7 +469,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
 
         filter_id = self.get_success(
             self.datastore.add_user_filter(
-                user_localpart=user_localpart + "2", user_filter=user_filter_json
+                user_id=user2_id, user_filter=user_filter_json
             )
         )
         presence_states = [
@@ -495,7 +497,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         user_filter_json = {"room": {"state": {"types": ["m.*"]}}}
         filter_id = self.get_success(
             self.datastore.add_user_filter(
-                user_localpart=user_localpart, user_filter=user_filter_json
+                user_id=user_id, user_filter=user_filter_json
             )
         )
         event = MockEvent(sender="@foo:bar", type="m.room.topic", room_id="!foo:bar")
@@ -514,7 +516,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         user_filter_json = {"room": {"state": {"types": ["m.*"]}}}
         filter_id = self.get_success(
             self.datastore.add_user_filter(
-                user_localpart=user_localpart, user_filter=user_filter_json
+                user_id=user_id, user_filter=user_filter_json
             )
         )
         event = MockEvent(
@@ -598,7 +600,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
 
         filter_id = self.get_success(
             self.filtering.add_user_filter(
-                user_localpart=user_localpart, user_filter=user_filter_json
+                user_id=user_id, user_filter=user_filter_json
             )
         )
 
@@ -619,7 +621,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
 
         filter_id = self.get_success(
             self.datastore.add_user_filter(
-                user_localpart=user_localpart, user_filter=user_filter_json
+                user_id=user_id, user_filter=user_filter_json
             )
         )
 
diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py
index 7c174782da..64a9a22afe 100644
--- a/tests/handlers/test_profile.py
+++ b/tests/handlers/test_profile.py
@@ -66,9 +66,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         self.handler = hs.get_profile_handler()
 
     def test_get_my_name(self) -> None:
-        self.get_success(
-            self.store.set_profile_displayname(self.frank.localpart, "Frank")
-        )
+        self.get_success(self.store.set_profile_displayname(self.frank, "Frank"))
 
         displayname = self.get_success(self.handler.get_displayname(self.frank))
 
@@ -121,9 +119,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         self.hs.config.registration.enable_set_displayname = False
 
         # Setting displayname for the first time is allowed
-        self.get_success(
-            self.store.set_profile_displayname(self.frank.localpart, "Frank")
-        )
+        self.get_success(self.store.set_profile_displayname(self.frank, "Frank"))
 
         self.assertEqual(
             (
@@ -166,8 +162,14 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
     def test_incoming_fed_query(self) -> None:
-        self.get_success(self.store.create_profile("caroline"))
-        self.get_success(self.store.set_profile_displayname("caroline", "Caroline"))
+        self.get_success(
+            self.store.create_profile(UserID.from_string("@caroline:test"))
+        )
+        self.get_success(
+            self.store.set_profile_displayname(
+                UserID.from_string("@caroline:test"), "Caroline"
+            )
+        )
 
         response = self.get_success(
             self.query_handlers["profile"](
@@ -183,9 +185,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
 
     def test_get_my_avatar(self) -> None:
         self.get_success(
-            self.store.set_profile_avatar_url(
-                self.frank.localpart, "http://my.server/me.png"
-            )
+            self.store.set_profile_avatar_url(self.frank, "http://my.server/me.png")
         )
         avatar_url = self.get_success(self.handler.get_avatar_url(self.frank))
 
@@ -237,9 +237,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
 
         # Setting displayname for the first time is allowed
         self.get_success(
-            self.store.set_profile_avatar_url(
-                self.frank.localpart, "http://my.server/me.png"
-            )
+            self.store.set_profile_avatar_url(self.frank, "http://my.server/me.png")
         )
 
         self.assertEqual(
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index b4241ceaf0..434bb56d44 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -802,9 +802,21 @@ class UsersListTestCase(unittest.HomeserverTestCase):
 
         # Set avatar URL to all users, that no user has a NULL value to avoid
         # different sort order between SQlite and PostreSQL
-        self.get_success(self.store.set_profile_avatar_url("user1", "mxc://url3"))
-        self.get_success(self.store.set_profile_avatar_url("user2", "mxc://url2"))
-        self.get_success(self.store.set_profile_avatar_url("admin", "mxc://url1"))
+        self.get_success(
+            self.store.set_profile_avatar_url(
+                UserID.from_string("@user1:test"), "mxc://url3"
+            )
+        )
+        self.get_success(
+            self.store.set_profile_avatar_url(
+                UserID.from_string("@user2:test"), "mxc://url2"
+            )
+        )
+        self.get_success(
+            self.store.set_profile_avatar_url(
+                UserID.from_string("@admin:test"), "mxc://url1"
+            )
+        )
 
         # order by default (name)
         self._order_test([self.admin_user, user1, user2], None)
@@ -1127,7 +1139,9 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
 
         # set attributes for user
         self.get_success(
-            self.store.set_profile_avatar_url("user", "mxc://servername/mediaid")
+            self.store.set_profile_avatar_url(
+                UserID.from_string("@user:test"), "mxc://servername/mediaid"
+            )
         )
         self.get_success(
             self.store.user_add_threepid("@user:test", "email", "foo@bar.com", 0, 0)
@@ -1257,7 +1271,9 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
         Reproduces #12257.
         """
         # Patch `self.other_user` to have an empty string as their avatar.
-        self.get_success(self.store.set_profile_avatar_url("user", ""))
+        self.get_success(
+            self.store.set_profile_avatar_url(UserID.from_string("@user:test"), "")
+        )
 
         # Check we can still erase them.
         channel = self.make_request(
@@ -2311,7 +2327,9 @@ class UserRestTestCase(unittest.HomeserverTestCase):
 
         # set attributes for user
         self.get_success(
-            self.store.set_profile_avatar_url("user", "mxc://servername/mediaid")
+            self.store.set_profile_avatar_url(
+                UserID.from_string("@user:test"), "mxc://servername/mediaid"
+            )
         )
         self.get_success(
             self.store.user_add_threepid("@user:test", "email", "foo@bar.com", 0, 0)
diff --git a/tests/rest/client/test_filter.py b/tests/rest/client/test_filter.py
index 91678abf13..9faa9de050 100644
--- a/tests/rest/client/test_filter.py
+++ b/tests/rest/client/test_filter.py
@@ -17,6 +17,7 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.api.errors import Codes
 from synapse.rest.client import filter
 from synapse.server import HomeServer
+from synapse.types import UserID
 from synapse.util import Clock
 
 from tests import unittest
@@ -76,7 +77,8 @@ class FilterTestCase(unittest.HomeserverTestCase):
     def test_get_filter(self) -> None:
         filter_id = self.get_success(
             self.filtering.add_user_filter(
-                user_localpart="apple", user_filter=self.EXAMPLE_FILTER
+                user_id=UserID.from_string("@apple:test"),
+                user_filter=self.EXAMPLE_FILTER,
             )
         )
         self.reactor.advance(1)
diff --git a/tests/storage/test_main.py b/tests/storage/test_main.py
index 5806cb0e4b..27f450e22d 100644
--- a/tests/storage/test_main.py
+++ b/tests/storage/test_main.py
@@ -29,9 +29,9 @@ class DataStoreTestCase(unittest.HomeserverTestCase):
 
     def test_get_users_paginate(self) -> None:
         self.get_success(self.store.register_user(self.user.to_string(), "pass"))
-        self.get_success(self.store.create_profile(self.user.localpart))
+        self.get_success(self.store.create_profile(self.user))
         self.get_success(
-            self.store.set_profile_displayname(self.user.localpart, self.displayname)
+            self.store.set_profile_displayname(self.user, self.displayname)
         )
 
         users, total = self.get_success(
diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py
index a019d06e09..6ec34997ea 100644
--- a/tests/storage/test_profile.py
+++ b/tests/storage/test_profile.py
@@ -27,11 +27,9 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         self.u_frank = UserID.from_string("@frank:test")
 
     def test_displayname(self) -> None:
-        self.get_success(self.store.create_profile(self.u_frank.localpart))
+        self.get_success(self.store.create_profile(self.u_frank))
 
-        self.get_success(
-            self.store.set_profile_displayname(self.u_frank.localpart, "Frank")
-        )
+        self.get_success(self.store.set_profile_displayname(self.u_frank, "Frank"))
 
         self.assertEqual(
             "Frank",
@@ -43,21 +41,17 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         )
 
         # test set to None
-        self.get_success(
-            self.store.set_profile_displayname(self.u_frank.localpart, None)
-        )
+        self.get_success(self.store.set_profile_displayname(self.u_frank, None))
 
         self.assertIsNone(
             self.get_success(self.store.get_profile_displayname(self.u_frank.localpart))
         )
 
     def test_avatar_url(self) -> None:
-        self.get_success(self.store.create_profile(self.u_frank.localpart))
+        self.get_success(self.store.create_profile(self.u_frank))
 
         self.get_success(
-            self.store.set_profile_avatar_url(
-                self.u_frank.localpart, "http://my.site/here"
-            )
+            self.store.set_profile_avatar_url(self.u_frank, "http://my.site/here")
         )
 
         self.assertEqual(
@@ -70,9 +64,7 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         )
 
         # test set to None
-        self.get_success(
-            self.store.set_profile_avatar_url(self.u_frank.localpart, None)
-        )
+        self.get_success(self.store.set_profile_avatar_url(self.u_frank, None))
 
         self.assertIsNone(
             self.get_success(self.store.get_profile_avatar_url(self.u_frank.localpart))
-- 
cgit 1.5.1


From 486c059479c3f1fefa8470d41f34a9571d447c00 Mon Sep 17 00:00:00 2001
From: mcalinghee <mcalinghee.dev@gmail.com>
Date: Thu, 27 Apr 2023 12:32:02 +0200
Subject: Disable push rule evaluation for rooms excluded from sync (#15361)

* no push for excluded room from sync

* add changelog
Signed-off-by: Maghen Calinghee <maghen.calinghee@beta.gouv.fr>

* correct changelog
---
 changelog.d/15361.bugfix                 | 1 +
 synapse/push/bulk_push_rule_evaluator.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/15361.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15361.bugfix b/changelog.d/15361.bugfix
new file mode 100644
index 0000000000..2cd795e576
--- /dev/null
+++ b/changelog.d/15361.bugfix
@@ -0,0 +1 @@
+Disable push rule evaluation for rooms excluded from sync.
\ No newline at end of file
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 199337673f..320084f5f5 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -326,6 +326,7 @@ class BulkPushRuleEvaluator:
         if (
             not event.internal_metadata.is_notifiable()
             or event.internal_metadata.is_historical()
+            or event.room_id in self.hs.config.server.rooms_to_exclude_from_sync
         ):
             # Push rules for events that aren't notifiable can't be processed by this and
             # we want to skip push notification actions for historical messages
-- 
cgit 1.5.1


From a346b43837ed83e311bc7fe6108a789f91a5199f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 27 Apr 2023 07:59:14 -0400
Subject: Check databases/__init__ and main/cache with mypy. (#15496)

---
 changelog.d/15496.misc                  |  1 +
 mypy.ini                                |  3 ---
 synapse/storage/databases/__init__.py   |  4 ++--
 synapse/storage/databases/main/cache.py | 16 +++++++++-------
 4 files changed, 12 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15496.misc

(limited to 'synapse')

diff --git a/changelog.d/15496.misc b/changelog.d/15496.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15496.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 8fb87b9b74..3b17c59dfc 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -28,13 +28,10 @@ files =
 # https://docs.python.org/3/library/re.html#re.X
 exclude = (?x)
   ^(
-   |synapse/storage/databases/__init__.py
-   |synapse/storage/databases/main/cache.py
    |synapse/storage/schema/
    )$
 
 [mypy-synapse.metrics._reactor_metrics]
-disallow_untyped_defs = False
 # This module imports select.epoll. That exists on Linux, but doesn't on macOS.
 # See https://github.com/matrix-org/synapse/pull/11771.
 warn_unused_ignores = False
diff --git a/synapse/storage/databases/__init__.py b/synapse/storage/databases/__init__.py
index ce3d1d4e94..7aa24ccf21 100644
--- a/synapse/storage/databases/__init__.py
+++ b/synapse/storage/databases/__init__.py
@@ -95,7 +95,7 @@ class Databases(Generic[DataStoreT]):
                     # If we're on a process that can persist events also
                     # instantiate a `PersistEventsStore`
                     if hs.get_instance_name() in hs.config.worker.writers.events:
-                        persist_events = PersistEventsStore(hs, database, main, db_conn)
+                        persist_events = PersistEventsStore(hs, database, main, db_conn)  # type: ignore[arg-type]
 
                 if "state" in database_config.databases:
                     logger.info(
@@ -133,6 +133,6 @@ class Databases(Generic[DataStoreT]):
 
         # We use local variables here to ensure that the databases do not have
         # optional types.
-        self.main = main
+        self.main = main  # type: ignore[assignment]
         self.state = state
         self.persist_events = persist_events
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 096dec7f87..bd07d20171 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -205,13 +205,13 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             )
         elif row.type == EventsStreamCurrentStateRow.TypeId:
             assert isinstance(data, EventsStreamCurrentStateRow)
-            self._curr_state_delta_stream_cache.entity_has_changed(data.room_id, token)
+            self._curr_state_delta_stream_cache.entity_has_changed(data.room_id, token)  # type: ignore[attr-defined]
 
             if data.type == EventTypes.Member:
-                self.get_rooms_for_user_with_stream_ordering.invalidate(
+                self.get_rooms_for_user_with_stream_ordering.invalidate(  # type: ignore[attr-defined]
                     (data.state_key,)
                 )
-                self.get_rooms_for_user.invalidate((data.state_key,))
+                self.get_rooms_for_user.invalidate((data.state_key,))  # type: ignore[attr-defined]
         else:
             raise Exception("Unknown events stream row type %s" % (row.type,))
 
@@ -229,7 +229,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         # This invalidates any local in-memory cached event objects, the original
         # process triggering the invalidation is responsible for clearing any external
         # cached objects.
-        self._invalidate_local_get_event_cache(event_id)
+        self._invalidate_local_get_event_cache(event_id)  # type: ignore[attr-defined]
 
         self._attempt_to_invalidate_cache("have_seen_event", (room_id, event_id))
         self._attempt_to_invalidate_cache("get_latest_event_ids_in_room", (room_id,))
@@ -242,10 +242,10 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         self._attempt_to_invalidate_cache("_get_membership_from_event_id", (event_id,))
 
         if not backfilled:
-            self._events_stream_cache.entity_has_changed(room_id, stream_ordering)
+            self._events_stream_cache.entity_has_changed(room_id, stream_ordering)  # type: ignore[attr-defined]
 
         if redacts:
-            self._invalidate_local_get_event_cache(redacts)
+            self._invalidate_local_get_event_cache(redacts)  # type: ignore[attr-defined]
             # Caches which might leak edits must be invalidated for the event being
             # redacted.
             self._attempt_to_invalidate_cache("get_relations_for_event", (redacts,))
@@ -254,7 +254,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._attempt_to_invalidate_cache("get_thread_id_for_receipts", (redacts,))
 
         if etype == EventTypes.Member:
-            self._membership_stream_cache.entity_has_changed(state_key, stream_ordering)
+            self._membership_stream_cache.entity_has_changed(state_key, stream_ordering)  # type: ignore[attr-defined]
             self._attempt_to_invalidate_cache(
                 "get_invited_rooms_for_local_user", (state_key,)
             )
@@ -378,6 +378,8 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             )
 
         if isinstance(self.database_engine, PostgresEngine):
+            assert self._cache_id_gen is not None
+
             # get_next() returns a context manager which is designed to wrap
             # the transaction. However, we want to only get an ID when we want
             # to use it, here, so we need to call __enter__ manually, and have
-- 
cgit 1.5.1


From 6efa6740044bc240691115135660d901db358ce9 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 27 Apr 2023 08:44:53 -0400
Subject: Add type hints to schema deltas (#15497)

Cleans-up the schema delta files:

* Removes no-op functions.
* Adds missing type hints to function parameters.
* Fixes any issues with type hints.

This also renames one (very old) schema delta to avoid a conflict
that mypy complains about.
---
 changelog.d/15497.misc                             |  1 +
 mypy.ini                                           | 10 ---
 synapse/storage/prepare_database.py                |  8 +-
 synapse/storage/schema/main/delta/20/pushers.py    | 13 ++--
 synapse/storage/schema/main/delta/25/fts.py        |  9 +--
 synapse/storage/schema/main/delta/27/ts.py         |  8 +-
 synapse/storage/schema/main/delta/30/as_users.py   | 16 +++-
 synapse/storage/schema/main/delta/31/pushers.py    | 86 ----------------------
 synapse/storage/schema/main/delta/31/pushers_0.py  | 85 +++++++++++++++++++++
 .../storage/schema/main/delta/31/search_update.py  |  9 +--
 .../storage/schema/main/delta/33/event_fields.py   |  8 +-
 .../schema/main/delta/33/remote_media_ts.py        | 12 ++-
 .../storage/schema/main/delta/34/cache_stream.py   |  9 +--
 .../schema/main/delta/34/received_txn_purge.py     |  9 +--
 .../schema/main/delta/37/remove_auth_idx.py        |  9 +--
 synapse/storage/schema/main/delta/42/user_dir.py   |  9 +--
 .../schema/main/delta/48/group_unique_indexes.py   | 10 +--
 .../main/delta/50/make_event_content_nullable.py   | 17 +++--
 .../main/delta/56/unique_user_filter_index.py      |  9 +--
 .../main/delta/57/local_current_membership.py      | 13 +++-
 .../schema/main/delta/58/06dlols_unique_idx.py     |  8 +-
 .../storage/schema/main/delta/58/11user_id_seq.py  |  9 +--
 .../storage/schema/main/delta/59/01ignored_user.py |  8 +-
 .../schema/main/delta/61/03recreate_min_depth.py   |  8 +-
 .../delta/68/05partial_state_rooms_triggers.py     |  4 +-
 .../storage/schema/main/delta/69/01as_txn_seq.py   |  7 +-
 .../main/delta/72/03bg_populate_events_columns.py  |  9 ++-
 ...force_update_current_state_events_membership.py |  9 ++-
 .../delta/73/10_update_sqlite_fts4_tokenizer.py    |  6 +-
 ...ership_tables_event_stream_ordering_triggers.py |  4 +-
 .../schema/state/delta/47/state_group_seq.py       | 10 +--
 31 files changed, 210 insertions(+), 222 deletions(-)
 create mode 100644 changelog.d/15497.misc
 delete mode 100644 synapse/storage/schema/main/delta/31/pushers.py
 create mode 100644 synapse/storage/schema/main/delta/31/pushers_0.py

(limited to 'synapse')

diff --git a/changelog.d/15497.misc b/changelog.d/15497.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15497.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 3b17c59dfc..5e7057cfb7 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -21,16 +21,6 @@ files =
   tests/,
   build_rust.py
 
-# Note: Better exclusion syntax coming in mypy > 0.910
-# https://github.com/python/mypy/pull/11329
-#
-# For now, set the (?x) flag enable "verbose" regexes
-# https://docs.python.org/3/library/re.html#re.X
-exclude = (?x)
-  ^(
-   |synapse/storage/schema/
-   )$
-
 [mypy-synapse.metrics._reactor_metrics]
 # This module imports select.epoll. That exists on Linux, but doesn't on macOS.
 # See https://github.com/matrix-org/synapse/pull/11771.
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 2a1c6fa31b..38b7abd801 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -22,7 +22,7 @@ import attr
 from typing_extensions import Counter as CounterType
 
 from synapse.config.homeserver import HomeServerConfig
-from synapse.storage.database import LoggingDatabaseConnection
+from synapse.storage.database import LoggingDatabaseConnection, LoggingTransaction
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
 from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION
 from synapse.storage.types import Cursor
@@ -168,7 +168,9 @@ def prepare_database(
 
 
 def _setup_new_database(
-    cur: Cursor, database_engine: BaseDatabaseEngine, databases: Collection[str]
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+    databases: Collection[str],
 ) -> None:
     """Sets up the physical database by finding a base set of "full schemas" and
     then applying any necessary deltas, including schemas from the given data
@@ -289,7 +291,7 @@ def _setup_new_database(
 
 
 def _upgrade_existing_database(
-    cur: Cursor,
+    cur: LoggingTransaction,
     current_schema_state: _SchemaState,
     database_engine: BaseDatabaseEngine,
     config: Optional[HomeServerConfig],
diff --git a/synapse/storage/schema/main/delta/20/pushers.py b/synapse/storage/schema/main/delta/20/pushers.py
index 45b846e6a7..08ae0efc21 100644
--- a/synapse/storage/schema/main/delta/20/pushers.py
+++ b/synapse/storage/schema/main/delta/20/pushers.py
@@ -24,10 +24,13 @@ UTF-8 bytes, so we have to do it in Python.
 
 import logging
 
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
+
 logger = logging.getLogger(__name__)
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     logger.info("Porting pushers table...")
     cur.execute(
         """
@@ -61,8 +64,8 @@ def run_create(cur, database_engine, *args, **kwargs):
     """
     )
     count = 0
-    for row in cur.fetchall():
-        row = list(row)
+    for tuple_row in cur.fetchall():
+        row = list(tuple_row)
         row[8] = bytes(row[8]).decode("utf-8")
         row[11] = bytes(row[11]).decode("utf-8")
         cur.execute(
@@ -81,7 +84,3 @@ def run_create(cur, database_engine, *args, **kwargs):
     cur.execute("DROP TABLE pushers")
     cur.execute("ALTER TABLE pushers2 RENAME TO pushers")
     logger.info("Moved %d pushers to new table", count)
-
-
-def run_upgrade(*args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/25/fts.py b/synapse/storage/schema/main/delta/25/fts.py
index 21f57825d4..831f8e914d 100644
--- a/synapse/storage/schema/main/delta/25/fts.py
+++ b/synapse/storage/schema/main/delta/25/fts.py
@@ -14,7 +14,8 @@
 import json
 import logging
 
-from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
 from synapse.storage.prepare_database import get_statements
 
 logger = logging.getLogger(__name__)
@@ -41,7 +42,7 @@ SQLITE_TABLE = (
 )
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if isinstance(database_engine, PostgresEngine):
         for statement in get_statements(POSTGRES_TABLE.splitlines()):
             cur.execute(statement)
@@ -72,7 +73,3 @@ def run_create(cur, database_engine, *args, **kwargs):
         )
 
         cur.execute(sql, ("event_search", progress_json))
-
-
-def run_upgrade(*args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/27/ts.py b/synapse/storage/schema/main/delta/27/ts.py
index 1c6058063f..8962afdeda 100644
--- a/synapse/storage/schema/main/delta/27/ts.py
+++ b/synapse/storage/schema/main/delta/27/ts.py
@@ -14,6 +14,8 @@
 import json
 import logging
 
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
 from synapse.storage.prepare_database import get_statements
 
 logger = logging.getLogger(__name__)
@@ -25,7 +27,7 @@ ALTER_TABLE = (
 )
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     for statement in get_statements(ALTER_TABLE.splitlines()):
         cur.execute(statement)
 
@@ -51,7 +53,3 @@ def run_create(cur, database_engine, *args, **kwargs):
         )
 
         cur.execute(sql, ("event_origin_server_ts", progress_json))
-
-
-def run_upgrade(*args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/30/as_users.py b/synapse/storage/schema/main/delta/30/as_users.py
index 4b4b166e37..b9d8df1231 100644
--- a/synapse/storage/schema/main/delta/30/as_users.py
+++ b/synapse/storage/schema/main/delta/30/as_users.py
@@ -12,13 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from typing import Dict, Iterable, List, Tuple, cast
 
 from synapse.config.appservice import load_appservices
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
 
 logger = logging.getLogger(__name__)
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     # NULL indicates user was not registered by an appservice.
     try:
         cur.execute("ALTER TABLE users ADD COLUMN appservice_id TEXT")
@@ -27,9 +31,13 @@ def run_create(cur, database_engine, *args, **kwargs):
         pass
 
 
-def run_upgrade(cur, database_engine, config, *args, **kwargs):
+def run_upgrade(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+    config: HomeServerConfig,
+) -> None:
     cur.execute("SELECT name FROM users")
-    rows = cur.fetchall()
+    rows = cast(Iterable[Tuple[str]], cur.fetchall())
 
     config_files = []
     try:
@@ -39,7 +47,7 @@ def run_upgrade(cur, database_engine, config, *args, **kwargs):
 
     appservices = load_appservices(config.server.server_name, config_files)
 
-    owned = {}
+    owned: Dict[str, List[str]] = {}
 
     for row in rows:
         user_id = row[0]
diff --git a/synapse/storage/schema/main/delta/31/pushers.py b/synapse/storage/schema/main/delta/31/pushers.py
deleted file mode 100644
index 5be81c806a..0000000000
--- a/synapse/storage/schema/main/delta/31/pushers.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Change the last_token to last_stream_ordering now that pushers no longer
-# listen on an event stream but instead select out of the event_push_actions
-# table.
-
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-def token_to_stream_ordering(token):
-    return int(token[1:].split("_")[0])
-
-
-def run_create(cur, database_engine, *args, **kwargs):
-    logger.info("Porting pushers table, delta 31...")
-    cur.execute(
-        """
-        CREATE TABLE IF NOT EXISTS pushers2 (
-          id BIGINT PRIMARY KEY,
-          user_name TEXT NOT NULL,
-          access_token BIGINT DEFAULT NULL,
-          profile_tag VARCHAR(32) NOT NULL,
-          kind VARCHAR(8) NOT NULL,
-          app_id VARCHAR(64) NOT NULL,
-          app_display_name VARCHAR(64) NOT NULL,
-          device_display_name VARCHAR(128) NOT NULL,
-          pushkey TEXT NOT NULL,
-          ts BIGINT NOT NULL,
-          lang VARCHAR(8),
-          data TEXT,
-          last_stream_ordering INTEGER,
-          last_success BIGINT,
-          failing_since BIGINT,
-          UNIQUE (app_id, pushkey, user_name)
-        )
-    """
-    )
-    cur.execute(
-        """SELECT
-        id, user_name, access_token, profile_tag, kind,
-        app_id, app_display_name, device_display_name,
-        pushkey, ts, lang, data, last_token, last_success,
-        failing_since
-        FROM pushers
-    """
-    )
-    count = 0
-    for row in cur.fetchall():
-        row = list(row)
-        row[12] = token_to_stream_ordering(row[12])
-        cur.execute(
-            """
-                INSERT into pushers2 (
-                id, user_name, access_token, profile_tag, kind,
-                app_id, app_display_name, device_display_name,
-                pushkey, ts, lang, data, last_stream_ordering, last_success,
-                failing_since
-                ) values (%s)
-            """
-            % (",".join(["?" for _ in range(len(row))])),
-            row,
-        )
-        count += 1
-    cur.execute("DROP TABLE pushers")
-    cur.execute("ALTER TABLE pushers2 RENAME TO pushers")
-    logger.info("Moved %d pushers to new table", count)
-
-
-def run_upgrade(cur, database_engine, *args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/31/pushers_0.py b/synapse/storage/schema/main/delta/31/pushers_0.py
new file mode 100644
index 0000000000..e772e2dc65
--- /dev/null
+++ b/synapse/storage/schema/main/delta/31/pushers_0.py
@@ -0,0 +1,85 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Change the last_token to last_stream_ordering now that pushers no longer
+# listen on an event stream but instead select out of the event_push_actions
+# table.
+
+
+import logging
+
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
+
+logger = logging.getLogger(__name__)
+
+
+def token_to_stream_ordering(token: str) -> int:
+    return int(token[1:].split("_")[0])
+
+
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
+    logger.info("Porting pushers table, delta 31...")
+    cur.execute(
+        """
+        CREATE TABLE IF NOT EXISTS pushers2 (
+          id BIGINT PRIMARY KEY,
+          user_name TEXT NOT NULL,
+          access_token BIGINT DEFAULT NULL,
+          profile_tag VARCHAR(32) NOT NULL,
+          kind VARCHAR(8) NOT NULL,
+          app_id VARCHAR(64) NOT NULL,
+          app_display_name VARCHAR(64) NOT NULL,
+          device_display_name VARCHAR(128) NOT NULL,
+          pushkey TEXT NOT NULL,
+          ts BIGINT NOT NULL,
+          lang VARCHAR(8),
+          data TEXT,
+          last_stream_ordering INTEGER,
+          last_success BIGINT,
+          failing_since BIGINT,
+          UNIQUE (app_id, pushkey, user_name)
+        )
+    """
+    )
+    cur.execute(
+        """SELECT
+        id, user_name, access_token, profile_tag, kind,
+        app_id, app_display_name, device_display_name,
+        pushkey, ts, lang, data, last_token, last_success,
+        failing_since
+        FROM pushers
+    """
+    )
+    count = 0
+    for tuple_row in cur.fetchall():
+        row = list(tuple_row)
+        row[12] = token_to_stream_ordering(row[12])
+        cur.execute(
+            """
+                INSERT into pushers2 (
+                id, user_name, access_token, profile_tag, kind,
+                app_id, app_display_name, device_display_name,
+                pushkey, ts, lang, data, last_stream_ordering, last_success,
+                failing_since
+                ) values (%s)
+            """
+            % (",".join(["?" for _ in range(len(row))])),
+            row,
+        )
+        count += 1
+    cur.execute("DROP TABLE pushers")
+    cur.execute("ALTER TABLE pushers2 RENAME TO pushers")
+    logger.info("Moved %d pushers to new table", count)
diff --git a/synapse/storage/schema/main/delta/31/search_update.py b/synapse/storage/schema/main/delta/31/search_update.py
index b84c844e3a..e20e92e454 100644
--- a/synapse/storage/schema/main/delta/31/search_update.py
+++ b/synapse/storage/schema/main/delta/31/search_update.py
@@ -14,7 +14,8 @@
 import json
 import logging
 
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 from synapse.storage.prepare_database import get_statements
 
 logger = logging.getLogger(__name__)
@@ -26,7 +27,7 @@ ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT;
 """
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if not isinstance(database_engine, PostgresEngine):
         return
 
@@ -56,7 +57,3 @@ def run_create(cur, database_engine, *args, **kwargs):
         )
 
         cur.execute(sql, ("event_search_order", progress_json))
-
-
-def run_upgrade(cur, database_engine, *args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/33/event_fields.py b/synapse/storage/schema/main/delta/33/event_fields.py
index e928c66a8f..8d806f5b52 100644
--- a/synapse/storage/schema/main/delta/33/event_fields.py
+++ b/synapse/storage/schema/main/delta/33/event_fields.py
@@ -14,6 +14,8 @@
 import json
 import logging
 
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
 from synapse.storage.prepare_database import get_statements
 
 logger = logging.getLogger(__name__)
@@ -25,7 +27,7 @@ ALTER TABLE events ADD COLUMN contains_url BOOLEAN;
 """
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     for statement in get_statements(ALTER_TABLE.splitlines()):
         cur.execute(statement)
 
@@ -51,7 +53,3 @@ def run_create(cur, database_engine, *args, **kwargs):
         )
 
         cur.execute(sql, ("event_fields_sender_url", progress_json))
-
-
-def run_upgrade(cur, database_engine, *args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/33/remote_media_ts.py b/synapse/storage/schema/main/delta/33/remote_media_ts.py
index 3907189e29..35499e43b5 100644
--- a/synapse/storage/schema/main/delta/33/remote_media_ts.py
+++ b/synapse/storage/schema/main/delta/33/remote_media_ts.py
@@ -14,14 +14,22 @@
 
 import time
 
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
+
 ALTER_TABLE = "ALTER TABLE remote_media_cache ADD COLUMN last_access_ts BIGINT"
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     cur.execute(ALTER_TABLE)
 
 
-def run_upgrade(cur, database_engine, *args, **kwargs):
+def run_upgrade(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+    config: HomeServerConfig,
+) -> None:
     cur.execute(
         "UPDATE remote_media_cache SET last_access_ts = ?",
         (int(time.time() * 1000),),
diff --git a/synapse/storage/schema/main/delta/34/cache_stream.py b/synapse/storage/schema/main/delta/34/cache_stream.py
index cf09e43e2b..682c86da1a 100644
--- a/synapse/storage/schema/main/delta/34/cache_stream.py
+++ b/synapse/storage/schema/main/delta/34/cache_stream.py
@@ -14,7 +14,8 @@
 
 import logging
 
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 from synapse.storage.prepare_database import get_statements
 
 logger = logging.getLogger(__name__)
@@ -34,13 +35,9 @@ CREATE INDEX cache_invalidation_stream_id ON cache_invalidation_stream(stream_id
 """
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if not isinstance(database_engine, PostgresEngine):
         return
 
     for statement in get_statements(CREATE_TABLE.splitlines()):
         cur.execute(statement)
-
-
-def run_upgrade(cur, database_engine, *args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/34/received_txn_purge.py b/synapse/storage/schema/main/delta/34/received_txn_purge.py
index 67d505e68b..dcfe3bc45a 100644
--- a/synapse/storage/schema/main/delta/34/received_txn_purge.py
+++ b/synapse/storage/schema/main/delta/34/received_txn_purge.py
@@ -14,19 +14,16 @@
 
 import logging
 
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 
 logger = logging.getLogger(__name__)
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if isinstance(database_engine, PostgresEngine):
         cur.execute("TRUNCATE received_transactions")
     else:
         cur.execute("DELETE FROM received_transactions")
 
     cur.execute("CREATE INDEX received_transactions_ts ON received_transactions(ts)")
-
-
-def run_upgrade(cur, database_engine, *args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/37/remove_auth_idx.py b/synapse/storage/schema/main/delta/37/remove_auth_idx.py
index a377884169..d672f9b43c 100644
--- a/synapse/storage/schema/main/delta/37/remove_auth_idx.py
+++ b/synapse/storage/schema/main/delta/37/remove_auth_idx.py
@@ -14,7 +14,8 @@
 
 import logging
 
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 from synapse.storage.prepare_database import get_statements
 
 logger = logging.getLogger(__name__)
@@ -68,7 +69,7 @@ CREATE INDEX evauth_edges_id ON event_auth(event_id);
 """
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     for statement in get_statements(DROP_INDICES.splitlines()):
         cur.execute(statement)
 
@@ -79,7 +80,3 @@ def run_create(cur, database_engine, *args, **kwargs):
 
     for statement in get_statements(drop_constraint.splitlines()):
         cur.execute(statement)
-
-
-def run_upgrade(cur, database_engine, *args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/42/user_dir.py b/synapse/storage/schema/main/delta/42/user_dir.py
index 506f326f4d..7e5c307c62 100644
--- a/synapse/storage/schema/main/delta/42/user_dir.py
+++ b/synapse/storage/schema/main/delta/42/user_dir.py
@@ -14,7 +14,8 @@
 
 import logging
 
-from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
 from synapse.storage.prepare_database import get_statements
 
 logger = logging.getLogger(__name__)
@@ -66,7 +67,7 @@ CREATE VIRTUAL TABLE user_directory_search
 """
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     for statement in get_statements(BOTH_TABLES.splitlines()):
         cur.execute(statement)
 
@@ -78,7 +79,3 @@ def run_create(cur, database_engine, *args, **kwargs):
             cur.execute(statement)
     else:
         raise Exception("Unrecognized database engine")
-
-
-def run_upgrade(*args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/48/group_unique_indexes.py b/synapse/storage/schema/main/delta/48/group_unique_indexes.py
index 49f5f2c003..ad2da4c8af 100644
--- a/synapse/storage/schema/main/delta/48/group_unique_indexes.py
+++ b/synapse/storage/schema/main/delta/48/group_unique_indexes.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.storage.engines import PostgresEngine
+
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 from synapse.storage.prepare_database import get_statements
 
 FIX_INDEXES = """
@@ -34,7 +36,7 @@ CREATE INDEX group_rooms_r_idx ON group_rooms(room_id);
 """
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     rowid = "ctid" if isinstance(database_engine, PostgresEngine) else "rowid"
 
     # remove duplicates from group_users & group_invites tables
@@ -57,7 +59,3 @@ def run_create(cur, database_engine, *args, **kwargs):
 
     for statement in get_statements(FIX_INDEXES.splitlines()):
         cur.execute(statement)
-
-
-def run_upgrade(*args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/50/make_event_content_nullable.py b/synapse/storage/schema/main/delta/50/make_event_content_nullable.py
index acd6ad1e1f..3e8a348b8a 100644
--- a/synapse/storage/schema/main/delta/50/make_event_content_nullable.py
+++ b/synapse/storage/schema/main/delta/50/make_event_content_nullable.py
@@ -53,16 +53,13 @@ SQLite:
 
 import logging
 
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 
 logger = logging.getLogger(__name__)
 
 
-def run_create(cur, database_engine, *args, **kwargs):
-    pass
-
-
-def run_upgrade(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if isinstance(database_engine, PostgresEngine):
         cur.execute(
             """
@@ -76,7 +73,9 @@ def run_upgrade(cur, database_engine, *args, **kwargs):
     cur.execute(
         "SELECT sql FROM sqlite_master WHERE tbl_name='events' AND type='table'"
     )
-    (oldsql,) = cur.fetchone()
+    row = cur.fetchone()
+    assert row is not None
+    (oldsql,) = row
 
     sql = oldsql.replace("content TEXT NOT NULL", "content TEXT")
     if sql == oldsql:
@@ -85,7 +84,9 @@ def run_upgrade(cur, database_engine, *args, **kwargs):
     logger.info("Replacing definition of 'events' with: %s", sql)
 
     cur.execute("PRAGMA schema_version")
-    (oldver,) = cur.fetchone()
+    row = cur.fetchone()
+    assert row is not None
+    (oldver,) = row
     cur.execute("PRAGMA writable_schema=ON")
     cur.execute(
         "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'",
diff --git a/synapse/storage/schema/main/delta/56/unique_user_filter_index.py b/synapse/storage/schema/main/delta/56/unique_user_filter_index.py
index bb7296852a..2461f87d77 100644
--- a/synapse/storage/schema/main/delta/56/unique_user_filter_index.py
+++ b/synapse/storage/schema/main/delta/56/unique_user_filter_index.py
@@ -1,7 +1,8 @@
 import logging
 from io import StringIO
 
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 from synapse.storage.prepare_database import execute_statements_from_stream
 
 logger = logging.getLogger(__name__)
@@ -16,11 +17,7 @@ This migration updates the user_filters table as follows:
 """
 
 
-def run_upgrade(cur, database_engine, *args, **kwargs):
-    pass
-
-
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if isinstance(database_engine, PostgresEngine):
         select_clause = """
             SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json
diff --git a/synapse/storage/schema/main/delta/57/local_current_membership.py b/synapse/storage/schema/main/delta/57/local_current_membership.py
index d25093c19f..cc0f2109bb 100644
--- a/synapse/storage/schema/main/delta/57/local_current_membership.py
+++ b/synapse/storage/schema/main/delta/57/local_current_membership.py
@@ -27,7 +27,16 @@
 # equivalent behaviour as if the server had remained in the room).
 
 
-def run_upgrade(cur, database_engine, config, *args, **kwargs):
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
+
+
+def run_upgrade(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+    config: HomeServerConfig,
+) -> None:
     # We need to do the insert in `run_upgrade` section as we don't have access
     # to `config` in `run_create`.
 
@@ -77,7 +86,7 @@ def run_upgrade(cur, database_engine, config, *args, **kwargs):
     )
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     cur.execute(
         """
         CREATE TABLE local_current_membership (
diff --git a/synapse/storage/schema/main/delta/58/06dlols_unique_idx.py b/synapse/storage/schema/main/delta/58/06dlols_unique_idx.py
index d353f2bcb3..4eaab9e086 100644
--- a/synapse/storage/schema/main/delta/58/06dlols_unique_idx.py
+++ b/synapse/storage/schema/main/delta/58/06dlols_unique_idx.py
@@ -20,18 +20,14 @@ entries, and with a UNIQUE index.
 import logging
 from io import StringIO
 
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 from synapse.storage.prepare_database import execute_statements_from_stream
-from synapse.storage.types import Cursor
 
 logger = logging.getLogger(__name__)
 
 
-def run_upgrade(*args, **kwargs):
-    pass
-
-
-def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     # some instances might already have this index, in which case we can skip this
     if isinstance(database_engine, PostgresEngine):
         cur.execute(
diff --git a/synapse/storage/schema/main/delta/58/11user_id_seq.py b/synapse/storage/schema/main/delta/58/11user_id_seq.py
index 4310ec12ce..32f7e0a252 100644
--- a/synapse/storage/schema/main/delta/58/11user_id_seq.py
+++ b/synapse/storage/schema/main/delta/58/11user_id_seq.py
@@ -16,19 +16,16 @@
 Adds a postgres SEQUENCE for generating guest user IDs.
 """
 
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.databases.main.registration import (
     find_max_generated_user_id_localpart,
 )
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if not isinstance(database_engine, PostgresEngine):
         return
 
     next_id = find_max_generated_user_id_localpart(cur) + 1
     cur.execute("CREATE SEQUENCE user_id_seq START WITH %s", (next_id,))
-
-
-def run_upgrade(*args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/59/01ignored_user.py b/synapse/storage/schema/main/delta/59/01ignored_user.py
index 9e8f35c1d2..c53e2bade2 100644
--- a/synapse/storage/schema/main/delta/59/01ignored_user.py
+++ b/synapse/storage/schema/main/delta/59/01ignored_user.py
@@ -20,18 +20,14 @@ import logging
 from io import StringIO
 
 from synapse.storage._base import db_to_json
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.engines import BaseDatabaseEngine
 from synapse.storage.prepare_database import execute_statements_from_stream
-from synapse.storage.types import Cursor
 
 logger = logging.getLogger(__name__)
 
 
-def run_upgrade(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
-    pass
-
-
-def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     logger.info("Creating ignored_users table")
     execute_statements_from_stream(cur, StringIO(_create_commands))
 
diff --git a/synapse/storage/schema/main/delta/61/03recreate_min_depth.py b/synapse/storage/schema/main/delta/61/03recreate_min_depth.py
index f8d7db9f2e..4a06b65888 100644
--- a/synapse/storage/schema/main/delta/61/03recreate_min_depth.py
+++ b/synapse/storage/schema/main/delta/61/03recreate_min_depth.py
@@ -16,11 +16,11 @@
 This migration handles the process of changing the type of `room_depth.min_depth` to
 a BIGINT.
 """
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
-from synapse.storage.types import Cursor
 
 
-def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if not isinstance(database_engine, PostgresEngine):
         # this only applies to postgres - sqlite does not distinguish between big and
         # little ints.
@@ -64,7 +64,3 @@ def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs
             (6103, 'replace_room_depth_min_depth', '{}', 'populate_room_depth2')
         """
     )
-
-
-def run_upgrade(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
-    pass
diff --git a/synapse/storage/schema/main/delta/68/05partial_state_rooms_triggers.py b/synapse/storage/schema/main/delta/68/05partial_state_rooms_triggers.py
index a2ec4fc26e..9210026dde 100644
--- a/synapse/storage/schema/main/delta/68/05partial_state_rooms_triggers.py
+++ b/synapse/storage/schema/main/delta/68/05partial_state_rooms_triggers.py
@@ -18,11 +18,11 @@ This migration adds triggers to the partial_state_events tables to enforce uniqu
 
 Triggers cannot be expressed in .sql files, so we have to use a separate file.
 """
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
-from synapse.storage.types import Cursor
 
 
-def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     # complain if the room_id in partial_state_events doesn't match
     # that in `events`. We already have a fk constraint which ensures that the event
     # exists in `events`, so all we have to do is raise if there is a row with a
diff --git a/synapse/storage/schema/main/delta/69/01as_txn_seq.py b/synapse/storage/schema/main/delta/69/01as_txn_seq.py
index 24bd4b391e..6c112425f2 100644
--- a/synapse/storage/schema/main/delta/69/01as_txn_seq.py
+++ b/synapse/storage/schema/main/delta/69/01as_txn_seq.py
@@ -17,10 +17,11 @@
 Adds a postgres SEQUENCE for generating application service transaction IDs.
 """
 
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if isinstance(database_engine, PostgresEngine):
         # If we already have some AS TXNs we want to start from the current
         # maximum value. There are two potential places this is stored - the
@@ -30,10 +31,12 @@ def run_create(cur, database_engine, *args, **kwargs):
 
         cur.execute("SELECT COALESCE(max(txn_id), 0) FROM application_services_txns")
         row = cur.fetchone()
+        assert row is not None
         txn_max = row[0]
 
         cur.execute("SELECT COALESCE(max(last_txn), 0) FROM application_services_state")
         row = cur.fetchone()
+        assert row is not None
         last_txn_max = row[0]
 
         start_val = max(last_txn_max, txn_max) + 1
diff --git a/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py b/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py
index 55a5d092cc..2ec1830c6f 100644
--- a/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py
+++ b/synapse/storage/schema/main/delta/72/03bg_populate_events_columns.py
@@ -14,10 +14,11 @@
 
 import json
 
-from synapse.storage.types import Cursor
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
 
 
-def run_create(cur: Cursor, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     """Add a bg update to populate the `state_key` and `rejection_reason` columns of `events`"""
 
     # we know that any new events will have the columns populated (and that has been
@@ -27,7 +28,9 @@ def run_create(cur: Cursor, database_engine, *args, **kwargs):
     # current min and max stream orderings, since that is guaranteed to include all
     # the events that were stored before the new columns were added.
     cur.execute("SELECT MIN(stream_ordering), MAX(stream_ordering) FROM events")
-    (min_stream_ordering, max_stream_ordering) = cur.fetchone()
+    row = cur.fetchone()
+    assert row is not None
+    (min_stream_ordering, max_stream_ordering) = row
 
     if min_stream_ordering is None:
         # no rows, nothing to do.
diff --git a/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
index b5853d125c..5c3e3584a2 100644
--- a/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
+++ b/synapse/storage/schema/main/delta/72/07force_update_current_state_events_membership.py
@@ -19,9 +19,16 @@ for its completion can be removed.
 
 Note the background job must still remain defined in the database class.
 """
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
 
 
-def run_upgrade(cur, database_engine, *args, **kwargs):
+def run_upgrade(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+    config: HomeServerConfig,
+) -> None:
     cur.execute("SELECT update_name FROM background_updates")
     rows = cur.fetchall()
     for row in rows:
diff --git a/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
index 3de0a709eb..c7ed258e9d 100644
--- a/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
+++ b/synapse/storage/schema/main/delta/73/10_update_sqlite_fts4_tokenizer.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 import json
 
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.engines import BaseDatabaseEngine, Sqlite3Engine
-from synapse.storage.types import Cursor
 
 
-def run_create(cur: Cursor, database_engine: BaseDatabaseEngine) -> None:
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     """
     Upgrade the event_search table to use the porter tokenizer if it isn't already
 
@@ -38,6 +38,7 @@ def run_create(cur: Cursor, database_engine: BaseDatabaseEngine) -> None:
     # Re-run the background job to re-populate the event_search table.
     cur.execute("SELECT MIN(stream_ordering) FROM events")
     row = cur.fetchone()
+    assert row is not None
     min_stream_id = row[0]
 
     # If there are not any events, nothing to do.
@@ -46,6 +47,7 @@ def run_create(cur: Cursor, database_engine: BaseDatabaseEngine) -> None:
 
     cur.execute("SELECT MAX(stream_ordering) FROM events")
     row = cur.fetchone()
+    assert row is not None
     max_stream_id = row[0]
 
     progress = {
diff --git a/synapse/storage/schema/main/delta/74/04_membership_tables_event_stream_ordering_triggers.py b/synapse/storage/schema/main/delta/74/04_membership_tables_event_stream_ordering_triggers.py
index e32e9083b3..2ee2bc9422 100644
--- a/synapse/storage/schema/main/delta/74/04_membership_tables_event_stream_ordering_triggers.py
+++ b/synapse/storage/schema/main/delta/74/04_membership_tables_event_stream_ordering_triggers.py
@@ -17,11 +17,11 @@
 This migration adds triggers to the room membership tables to enforce consistency.
 Triggers cannot be expressed in .sql files, so we have to use a separate file.
 """
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
-from synapse.storage.types import Cursor
 
 
-def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     # Complain if the `event_stream_ordering` in membership tables doesn't match
     # the `stream_ordering` row with the same `event_id` in `events`.
     if isinstance(database_engine, Sqlite3Engine):
diff --git a/synapse/storage/schema/state/delta/47/state_group_seq.py b/synapse/storage/schema/state/delta/47/state_group_seq.py
index 9fd1ccf6f7..42aff50227 100644
--- a/synapse/storage/schema/state/delta/47/state_group_seq.py
+++ b/synapse/storage/schema/state/delta/47/state_group_seq.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.storage.engines import PostgresEngine
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
 
 
-def run_create(cur, database_engine, *args, **kwargs):
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
     if isinstance(database_engine, PostgresEngine):
         # if we already have some state groups, we want to start making new
         # ones with a higher id.
         cur.execute("SELECT max(id) FROM state_groups")
         row = cur.fetchone()
+        assert row is not None
 
         if row[0] is None:
             start_val = 1
@@ -28,7 +30,3 @@ def run_create(cur, database_engine, *args, **kwargs):
             start_val = row[0] + 1
 
         cur.execute("CREATE SEQUENCE state_group_id_seq START WITH %s", (start_val,))
-
-
-def run_upgrade(*args, **kwargs):
-    pass
-- 
cgit 1.5.1


From 57aeeb308b39c4fd455682966eabc9c0fa17c65d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 27 Apr 2023 12:57:46 -0400
Subject: Add support for claiming multiple OTKs at once. (#15468)

MSC3983 provides a way to request multiple OTKs at once from appservices,
this extends this concept to the Client-Server API.

Note that this will likely be spit out into a separate MSC, but is currently part of
MSC3983.
---
 changelog.d/15468.misc                            |  1 +
 synapse/appservice/api.py                         | 31 ++++++---
 synapse/federation/federation_client.py           | 49 ++++++++++++++-
 synapse/federation/federation_server.py           |  7 +--
 synapse/federation/transport/client.py            | 49 ++++++++++++++-
 synapse/federation/transport/server/federation.py | 25 ++++++--
 synapse/handlers/appservice.py                    | 14 +++--
 synapse/handlers/e2e_keys.py                      | 31 ++++++---
 synapse/rest/client/keys.py                       | 42 +++++++++++--
 synapse/storage/databases/main/end_to_end_keys.py | 77 ++++++++++++++---------
 tests/appservice/test_api.py                      | 11 ++--
 tests/handlers/test_e2e_keys.py                   | 32 +++++-----
 12 files changed, 271 insertions(+), 98 deletions(-)
 create mode 100644 changelog.d/15468.misc

(limited to 'synapse')

diff --git a/changelog.d/15468.misc b/changelog.d/15468.misc
new file mode 100644
index 0000000000..e0a94f36fd
--- /dev/null
+++ b/changelog.d/15468.misc
@@ -0,0 +1 @@
+Support claiming more than one OTK at a time.
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 86ddb1bb28..024098e9cb 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -442,8 +442,10 @@ class ApplicationServiceApi(SimpleHttpClient):
         return False
 
     async def claim_client_keys(
-        self, service: "ApplicationService", query: List[Tuple[str, str, str]]
-    ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]:
+        self, service: "ApplicationService", query: List[Tuple[str, str, str, int]]
+    ) -> Tuple[
+        Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str, int]]
+    ]:
         """Claim one time keys from an application service.
 
         Note that any error (including a timeout) is treated as the application
@@ -469,8 +471,10 @@ class ApplicationServiceApi(SimpleHttpClient):
 
         # Create the expected payload shape.
         body: Dict[str, Dict[str, List[str]]] = {}
-        for user_id, device, algorithm in query:
-            body.setdefault(user_id, {}).setdefault(device, []).append(algorithm)
+        for user_id, device, algorithm, count in query:
+            body.setdefault(user_id, {}).setdefault(device, []).extend(
+                [algorithm] * count
+            )
 
         uri = f"{service.url}/_matrix/app/unstable/org.matrix.msc3983/keys/claim"
         try:
@@ -493,11 +497,20 @@ class ApplicationServiceApi(SimpleHttpClient):
         # or if some are still missing.
         #
         # TODO This places a lot of faith in the response shape being correct.
-        missing = [
-            (user_id, device, algorithm)
-            for user_id, device, algorithm in query
-            if algorithm not in response.get(user_id, {}).get(device, [])
-        ]
+        missing = []
+        for user_id, device, algorithm, count in query:
+            # Count the number of keys in the response for this algorithm by
+            # checking which key IDs start with the algorithm. This uses that
+            # True == 1 in Python to generate a count.
+            response_count = sum(
+                key_id.startswith(f"{algorithm}:")
+                for key_id in response.get(user_id, {}).get(device, {})
+            )
+            count -= response_count
+            # If the appservice responds with fewer keys than requested, then
+            # consider the request unfulfilled.
+            if count > 0:
+                missing.append((user_id, device, algorithm, count))
 
         return response, missing
 
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index ba34573d46..0b2d1a78f7 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -235,7 +235,10 @@ class FederationClient(FederationBase):
         )
 
     async def claim_client_keys(
-        self, destination: str, content: JsonDict, timeout: Optional[int]
+        self,
+        destination: str,
+        query: Dict[str, Dict[str, Dict[str, int]]],
+        timeout: Optional[int],
     ) -> JsonDict:
         """Claims one-time keys for a device hosted on a remote server.
 
@@ -247,6 +250,50 @@ class FederationClient(FederationBase):
             The JSON object from the response
         """
         sent_queries_counter.labels("client_one_time_keys").inc()
+
+        # Convert the query with counts into a stable and unstable query and check
+        # if attempting to claim more than 1 OTK.
+        content: Dict[str, Dict[str, str]] = {}
+        unstable_content: Dict[str, Dict[str, List[str]]] = {}
+        use_unstable = False
+        for user_id, one_time_keys in query.items():
+            for device_id, algorithms in one_time_keys.items():
+                if any(count > 1 for count in algorithms.values()):
+                    use_unstable = True
+                if algorithms:
+                    # For the stable query, choose only the first algorithm.
+                    content.setdefault(user_id, {})[device_id] = next(iter(algorithms))
+                    # For the unstable query, repeat each algorithm by count, then
+                    # splat those into chain to get a flattened list of all algorithms.
+                    #
+                    # Converts from {"algo1": 2, "algo2": 2} to ["algo1", "algo1", "algo2"].
+                    unstable_content.setdefault(user_id, {})[device_id] = list(
+                        itertools.chain(
+                            *(
+                                itertools.repeat(algorithm, count)
+                                for algorithm, count in algorithms.items()
+                            )
+                        )
+                    )
+
+        if use_unstable:
+            try:
+                return await self.transport_layer.claim_client_keys_unstable(
+                    destination, unstable_content, timeout
+                )
+            except HttpResponseException as e:
+                # If an error is received that is due to an unrecognised endpoint,
+                # fallback to the v1 endpoint. Otherwise, consider it a legitimate error
+                # and raise.
+                if not is_unknown_endpoint(e):
+                    raise
+
+            logger.debug(
+                "Couldn't claim client keys with the unstable API, falling back to the v1 API"
+            )
+        else:
+            logger.debug("Skipping unstable claim client keys API")
+
         return await self.transport_layer.claim_client_keys(
             destination, content, timeout
         )
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index c618f3d7a6..ca43c7bfc0 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -1005,13 +1005,8 @@ class FederationServer(FederationBase):
 
     @trace
     async def on_claim_client_keys(
-        self, origin: str, content: JsonDict, always_include_fallback_keys: bool
+        self, query: List[Tuple[str, str, str, int]], always_include_fallback_keys: bool
     ) -> Dict[str, Any]:
-        query = []
-        for user_id, device_keys in content.get("one_time_keys", {}).items():
-            for device_id, algorithm in device_keys.items():
-                query.append((user_id, device_id, algorithm))
-
         log_kv({"message": "Claiming one time keys.", "user, device pairs": query})
         results = await self._e2e_keys_handler.claim_local_one_time_keys(
             query, always_include_fallback_keys=always_include_fallback_keys
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index bedbd23ded..bc70b94f68 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -650,10 +650,10 @@ class TransportLayerClient:
 
         Response:
             {
-              "device_keys": {
+              "one_time_keys": {
                 "<user_id>": {
                   "<device_id>": {
-                    "<algorithm>:<key_id>": "<key_base64>"
+                    "<algorithm>:<key_id>": <OTK JSON>
                   }
                 }
               }
@@ -669,7 +669,50 @@ class TransportLayerClient:
         path = _create_v1_path("/user/keys/claim")
 
         return await self.client.post_json(
-            destination=destination, path=path, data=query_content, timeout=timeout
+            destination=destination,
+            path=path,
+            data={"one_time_keys": query_content},
+            timeout=timeout,
+        )
+
+    async def claim_client_keys_unstable(
+        self, destination: str, query_content: JsonDict, timeout: Optional[int]
+    ) -> JsonDict:
+        """Claim one-time keys for a list of devices hosted on a remote server.
+
+        Request:
+            {
+              "one_time_keys": {
+                "<user_id>": {
+                  "<device_id>": {"<algorithm>": <count>}
+                }
+              }
+            }
+
+        Response:
+            {
+              "one_time_keys": {
+                "<user_id>": {
+                  "<device_id>": {
+                    "<algorithm>:<key_id>": <OTK JSON>
+                  }
+                }
+              }
+            }
+
+        Args:
+            destination: The server to query.
+            query_content: The user ids to query.
+        Returns:
+            A dict containing the one-time keys.
+        """
+        path = _create_path(FEDERATION_UNSTABLE_PREFIX, "/user/keys/claim")
+
+        return await self.client.post_json(
+            destination=destination,
+            path=path,
+            data={"one_time_keys": query_content},
+            timeout=timeout,
         )
 
     async def get_missing_events(
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index e2340d70d5..36b0362504 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -12,6 +12,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import logging
+from collections import Counter
 from typing import (
     TYPE_CHECKING,
     Dict,
@@ -577,16 +578,23 @@ class FederationClientKeysClaimServlet(BaseFederationServerServlet):
     async def on_POST(
         self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]]
     ) -> Tuple[int, JsonDict]:
+        # Generate a count for each algorithm, which is hard-coded to 1.
+        key_query: List[Tuple[str, str, str, int]] = []
+        for user_id, device_keys in content.get("one_time_keys", {}).items():
+            for device_id, algorithm in device_keys.items():
+                key_query.append((user_id, device_id, algorithm, 1))
+
         response = await self.handler.on_claim_client_keys(
-            origin, content, always_include_fallback_keys=False
+            key_query, always_include_fallback_keys=False
         )
         return 200, response
 
 
 class FederationUnstableClientKeysClaimServlet(BaseFederationServerServlet):
     """
-    Identical to the stable endpoint (FederationClientKeysClaimServlet) except it
-    always includes fallback keys in the response.
+    Identical to the stable endpoint (FederationClientKeysClaimServlet) except
+    it allows for querying for multiple OTKs at once and always includes fallback
+    keys in the response.
     """
 
     PREFIX = FEDERATION_UNSTABLE_PREFIX
@@ -596,8 +604,16 @@ class FederationUnstableClientKeysClaimServlet(BaseFederationServerServlet):
     async def on_POST(
         self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]]
     ) -> Tuple[int, JsonDict]:
+        # Generate a count for each algorithm.
+        key_query: List[Tuple[str, str, str, int]] = []
+        for user_id, device_keys in content.get("one_time_keys", {}).items():
+            for device_id, algorithms in device_keys.items():
+                counts = Counter(algorithms)
+                for algorithm, count in counts.items():
+                    key_query.append((user_id, device_id, algorithm, count))
+
         response = await self.handler.on_claim_client_keys(
-            origin, content, always_include_fallback_keys=True
+            key_query, always_include_fallback_keys=True
         )
         return 200, response
 
@@ -805,6 +821,7 @@ FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = (
     FederationClientKeysQueryServlet,
     FederationUserDevicesQueryServlet,
     FederationClientKeysClaimServlet,
+    FederationUnstableClientKeysClaimServlet,
     FederationThirdPartyInviteExchangeServlet,
     On3pidBindServlet,
     FederationVersionServlet,
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 4ca2bc0420..6429545c98 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -841,8 +841,10 @@ class ApplicationServicesHandler:
         return True
 
     async def claim_e2e_one_time_keys(
-        self, query: Iterable[Tuple[str, str, str]]
-    ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]:
+        self, query: Iterable[Tuple[str, str, str, int]]
+    ) -> Tuple[
+        Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str, int]]
+    ]:
         """Claim one time keys from application services.
 
         Users which are exclusively owned by an application service are sent a
@@ -863,18 +865,18 @@ class ApplicationServicesHandler:
         services = self.store.get_app_services()
 
         # Partition the users by appservice.
-        query_by_appservice: Dict[str, List[Tuple[str, str, str]]] = {}
+        query_by_appservice: Dict[str, List[Tuple[str, str, str, int]]] = {}
         missing = []
-        for user_id, device, algorithm in query:
+        for user_id, device, algorithm, count in query:
             if not self.store.get_if_app_services_interested_in_user(user_id):
-                missing.append((user_id, device, algorithm))
+                missing.append((user_id, device, algorithm, count))
                 continue
 
             # Find the associated appservice.
             for service in services:
                 if service.is_exclusive_user(user_id):
                     query_by_appservice.setdefault(service.id, []).append(
-                        (user_id, device, algorithm)
+                        (user_id, device, algorithm, count)
                     )
                     continue
 
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index d1ab95126c..24741b667b 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -564,7 +564,7 @@ class E2eKeysHandler:
 
     async def claim_local_one_time_keys(
         self,
-        local_query: List[Tuple[str, str, str]],
+        local_query: List[Tuple[str, str, str, int]],
         always_include_fallback_keys: bool,
     ) -> Iterable[Dict[str, Dict[str, Dict[str, JsonDict]]]]:
         """Claim one time keys for local users.
@@ -581,6 +581,12 @@ class E2eKeysHandler:
             An iterable of maps of user ID -> a map device ID -> a map of key ID -> JSON bytes.
         """
 
+        # Cap the number of OTKs that can be claimed at once to avoid abuse.
+        local_query = [
+            (user_id, device_id, algorithm, min(count, 5))
+            for user_id, device_id, algorithm, count in local_query
+        ]
+
         otk_results, not_found = await self.store.claim_e2e_one_time_keys(local_query)
 
         # If the application services have not provided any keys via the C-S
@@ -607,7 +613,7 @@ class E2eKeysHandler:
             # from the appservice for that user ID / device ID. If it is found,
             # check if any of the keys match the requested algorithm & are a
             # fallback key.
-            for user_id, device_id, algorithm in local_query:
+            for user_id, device_id, algorithm, _count in local_query:
                 # Check if the appservice responded for this query.
                 as_result = appservice_results.get(user_id, {}).get(device_id, {})
                 found_otk = False
@@ -630,13 +636,17 @@ class E2eKeysHandler:
                         .get(device_id, {})
                         .keys()
                     )
+                    # Note that it doesn't make sense to request more than 1 fallback key
+                    # per (user_id, device_id, algorithm).
                     fallback_query.append((user_id, device_id, algorithm, mark_as_used))
 
         else:
             # All fallback keys get marked as used.
             fallback_query = [
+                # Note that it doesn't make sense to request more than 1 fallback key
+                # per (user_id, device_id, algorithm).
                 (user_id, device_id, algorithm, True)
-                for user_id, device_id, algorithm in not_found
+                for user_id, device_id, algorithm, count in not_found
             ]
 
         # For each user that does not have a one-time keys available, see if
@@ -650,18 +660,19 @@ class E2eKeysHandler:
     @trace
     async def claim_one_time_keys(
         self,
-        query: Dict[str, Dict[str, Dict[str, str]]],
+        query: Dict[str, Dict[str, Dict[str, int]]],
         timeout: Optional[int],
         always_include_fallback_keys: bool,
     ) -> JsonDict:
-        local_query: List[Tuple[str, str, str]] = []
-        remote_queries: Dict[str, Dict[str, Dict[str, str]]] = {}
+        local_query: List[Tuple[str, str, str, int]] = []
+        remote_queries: Dict[str, Dict[str, Dict[str, Dict[str, int]]]] = {}
 
-        for user_id, one_time_keys in query.get("one_time_keys", {}).items():
+        for user_id, one_time_keys in query.items():
             # we use UserID.from_string to catch invalid user ids
             if self.is_mine(UserID.from_string(user_id)):
-                for device_id, algorithm in one_time_keys.items():
-                    local_query.append((user_id, device_id, algorithm))
+                for device_id, algorithms in one_time_keys.items():
+                    for algorithm, count in algorithms.items():
+                        local_query.append((user_id, device_id, algorithm, count))
             else:
                 domain = get_domain_from_id(user_id)
                 remote_queries.setdefault(domain, {})[user_id] = one_time_keys
@@ -692,7 +703,7 @@ class E2eKeysHandler:
             device_keys = remote_queries[destination]
             try:
                 remote_result = await self.federation.claim_client_keys(
-                    destination, {"one_time_keys": device_keys}, timeout=timeout
+                    destination, device_keys, timeout=timeout
                 )
                 for user_id, keys in remote_result["one_time_keys"].items():
                     if user_id in device_keys:
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index 2a25094109..9bbab5e624 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -16,7 +16,8 @@
 
 import logging
 import re
-from typing import TYPE_CHECKING, Any, Optional, Tuple
+from collections import Counter
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
 
 from synapse.api.errors import InvalidAPICallError, SynapseError
 from synapse.http.server import HttpServer
@@ -289,16 +290,40 @@ class OneTimeKeyServlet(RestServlet):
         await self.auth.get_user_by_req(request, allow_guest=True)
         timeout = parse_integer(request, "timeout", 10 * 1000)
         body = parse_json_object_from_request(request)
+
+        # Generate a count for each algorithm, which is hard-coded to 1.
+        query: Dict[str, Dict[str, Dict[str, int]]] = {}
+        for user_id, one_time_keys in body.get("one_time_keys", {}).items():
+            for device_id, algorithm in one_time_keys.items():
+                query.setdefault(user_id, {})[device_id] = {algorithm: 1}
+
         result = await self.e2e_keys_handler.claim_one_time_keys(
-            body, timeout, always_include_fallback_keys=False
+            query, timeout, always_include_fallback_keys=False
         )
         return 200, result
 
 
 class UnstableOneTimeKeyServlet(RestServlet):
     """
-    Identical to the stable endpoint (OneTimeKeyServlet) except it always includes
-    fallback keys in the response.
+    Identical to the stable endpoint (OneTimeKeyServlet) except it allows for
+    querying for multiple OTKs at once and always includes fallback keys in the
+    response.
+
+    POST /keys/claim HTTP/1.1
+    {
+      "one_time_keys": {
+        "<user_id>": {
+          "<device_id>": ["<algorithm>", ...]
+    } } }
+
+    HTTP/1.1 200 OK
+    {
+      "one_time_keys": {
+        "<user_id>": {
+          "<device_id>": {
+            "<algorithm>:<key_id>": "<key_base64>"
+    } } } }
+
     """
 
     PATTERNS = [re.compile(r"^/_matrix/client/unstable/org.matrix.msc3983/keys/claim$")]
@@ -313,8 +338,15 @@ class UnstableOneTimeKeyServlet(RestServlet):
         await self.auth.get_user_by_req(request, allow_guest=True)
         timeout = parse_integer(request, "timeout", 10 * 1000)
         body = parse_json_object_from_request(request)
+
+        # Generate a count for each algorithm.
+        query: Dict[str, Dict[str, Dict[str, int]]] = {}
+        for user_id, one_time_keys in body.get("one_time_keys", {}).items():
+            for device_id, algorithms in one_time_keys.items():
+                query.setdefault(user_id, {})[device_id] = Counter(algorithms)
+
         result = await self.e2e_keys_handler.claim_one_time_keys(
-            body, timeout, always_include_fallback_keys=True
+            query, timeout, always_include_fallback_keys=True
         )
         return 200, result
 
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 1a4ae55304..4bc391f213 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -1027,8 +1027,10 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         ...
 
     async def claim_e2e_one_time_keys(
-        self, query_list: Iterable[Tuple[str, str, str]]
-    ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]:
+        self, query_list: Iterable[Tuple[str, str, str, int]]
+    ) -> Tuple[
+        Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str, int]]
+    ]:
         """Take a list of one time keys out of the database.
 
         Args:
@@ -1043,8 +1045,12 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
         @trace
         def _claim_e2e_one_time_key_simple(
-            txn: LoggingTransaction, user_id: str, device_id: str, algorithm: str
-        ) -> Optional[Tuple[str, str]]:
+            txn: LoggingTransaction,
+            user_id: str,
+            device_id: str,
+            algorithm: str,
+            count: int,
+        ) -> List[Tuple[str, str]]:
             """Claim OTK for device for DBs that don't support RETURNING.
 
             Returns:
@@ -1055,36 +1061,41 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             sql = """
                 SELECT key_id, key_json FROM e2e_one_time_keys_json
                 WHERE user_id = ? AND device_id = ? AND algorithm = ?
-                LIMIT 1
+                LIMIT ?
             """
 
-            txn.execute(sql, (user_id, device_id, algorithm))
-            otk_row = txn.fetchone()
-            if otk_row is None:
-                return None
+            txn.execute(sql, (user_id, device_id, algorithm, count))
+            otk_rows = list(txn)
+            if not otk_rows:
+                return []
 
-            key_id, key_json = otk_row
-
-            self.db_pool.simple_delete_one_txn(
+            self.db_pool.simple_delete_many_txn(
                 txn,
                 table="e2e_one_time_keys_json",
+                column="key_id",
+                values=[otk_row[0] for otk_row in otk_rows],
                 keyvalues={
                     "user_id": user_id,
                     "device_id": device_id,
                     "algorithm": algorithm,
-                    "key_id": key_id,
                 },
             )
             self._invalidate_cache_and_stream(
                 txn, self.count_e2e_one_time_keys, (user_id, device_id)
             )
 
-            return f"{algorithm}:{key_id}", key_json
+            return [
+                (f"{algorithm}:{key_id}", key_json) for key_id, key_json in otk_rows
+            ]
 
         @trace
         def _claim_e2e_one_time_key_returning(
-            txn: LoggingTransaction, user_id: str, device_id: str, algorithm: str
-        ) -> Optional[Tuple[str, str]]:
+            txn: LoggingTransaction,
+            user_id: str,
+            device_id: str,
+            algorithm: str,
+            count: int,
+        ) -> List[Tuple[str, str]]:
             """Claim OTK for device for DBs that support RETURNING.
 
             Returns:
@@ -1099,28 +1110,30 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                     AND key_id IN (
                         SELECT key_id FROM e2e_one_time_keys_json
                         WHERE user_id = ? AND device_id = ? AND algorithm = ?
-                        LIMIT 1
+                        LIMIT ?
                     )
                 RETURNING key_id, key_json
             """
 
             txn.execute(
-                sql, (user_id, device_id, algorithm, user_id, device_id, algorithm)
+                sql,
+                (user_id, device_id, algorithm, user_id, device_id, algorithm, count),
             )
-            otk_row = txn.fetchone()
-            if otk_row is None:
-                return None
+            otk_rows = list(txn)
+            if not otk_rows:
+                return []
 
             self._invalidate_cache_and_stream(
                 txn, self.count_e2e_one_time_keys, (user_id, device_id)
             )
 
-            key_id, key_json = otk_row
-            return f"{algorithm}:{key_id}", key_json
+            return [
+                (f"{algorithm}:{key_id}", key_json) for key_id, key_json in otk_rows
+            ]
 
         results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
-        missing: List[Tuple[str, str, str]] = []
-        for user_id, device_id, algorithm in query_list:
+        missing: List[Tuple[str, str, str, int]] = []
+        for user_id, device_id, algorithm, count in query_list:
             if self.database_engine.supports_returning:
                 # If we support RETURNING clause we can use a single query that
                 # allows us to use autocommit mode.
@@ -1130,21 +1143,25 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
                 _claim_e2e_one_time_key = _claim_e2e_one_time_key_simple
                 db_autocommit = False
 
-            claim_row = await self.db_pool.runInteraction(
+            claim_rows = await self.db_pool.runInteraction(
                 "claim_e2e_one_time_keys",
                 _claim_e2e_one_time_key,
                 user_id,
                 device_id,
                 algorithm,
+                count,
                 db_autocommit=db_autocommit,
             )
-            if claim_row:
+            if claim_rows:
                 device_results = results.setdefault(user_id, {}).setdefault(
                     device_id, {}
                 )
-                device_results[claim_row[0]] = json_decoder.decode(claim_row[1])
-            else:
-                missing.append((user_id, device_id, algorithm))
+                for claim_row in claim_rows:
+                    device_results[claim_row[0]] = json_decoder.decode(claim_row[1])
+            # Did we get enough OTKs?
+            count -= len(claim_rows)
+            if count:
+                missing.append((user_id, device_id, algorithm, count))
 
         return results, missing
 
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 7deb923a28..15fce165b6 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -195,11 +195,11 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
 
         MISSING_KEYS = [
             # Known user, known device, missing algorithm.
-            ("@alice:example.org", "DEVICE_1", "signed_curve25519:DDDDHg"),
+            ("@alice:example.org", "DEVICE_2", "xyz", 1),
             # Known user, missing device.
-            ("@alice:example.org", "DEVICE_3", "signed_curve25519:EEEEHg"),
+            ("@alice:example.org", "DEVICE_3", "signed_curve25519", 1),
             # Unknown user.
-            ("@bob:example.org", "DEVICE_4", "signed_curve25519:FFFFHg"),
+            ("@bob:example.org", "DEVICE_4", "signed_curve25519", 1),
         ]
 
         claimed_keys, missing = self.get_success(
@@ -207,9 +207,8 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
                 self.service,
                 [
                     # Found devices
-                    ("@alice:example.org", "DEVICE_1", "signed_curve25519:AAAAHg"),
-                    ("@alice:example.org", "DEVICE_1", "signed_curve25519:BBBBHg"),
-                    ("@alice:example.org", "DEVICE_2", "signed_curve25519:CCCCHg"),
+                    ("@alice:example.org", "DEVICE_1", "signed_curve25519", 1),
+                    ("@alice:example.org", "DEVICE_2", "signed_curve25519", 1),
                 ]
                 + MISSING_KEYS,
             )
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 18edebd652..72d0584061 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -160,7 +160,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         res2 = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                {local_user: {device_id: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -205,7 +205,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # key
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                {local_user: {device_id: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -224,7 +224,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # claiming an OTK again should return the same fallback key
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                {local_user: {device_id: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -273,7 +273,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                {local_user: {device_id: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -285,7 +285,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                {local_user: {device_id: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -306,7 +306,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                {local_user: {device_id: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -347,7 +347,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # return both.
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                {local_user: {device_id: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -369,7 +369,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # Claiming an OTK again should return only the fallback key.
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id: "alg1"}}},
+                {local_user: {device_id: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -1052,7 +1052,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
 
         # Setup a response, but only for device 2.
         self.appservice_api.claim_client_keys.return_value = make_awaitable(
-            ({local_user: {device_id_2: otk}}, [(local_user, device_id_1, "alg1")])
+            ({local_user: {device_id_2: otk}}, [(local_user, device_id_1, "alg1", 1)])
         )
 
         # we shouldn't have any unused fallback keys yet
@@ -1079,11 +1079,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # query the fallback keys.
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {
-                    "one_time_keys": {
-                        local_user: {device_id_1: "alg1", device_id_2: "alg1"}
-                    }
-                },
+                {local_user: {device_id_1: {"alg1": 1}, device_id_2: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -1128,7 +1124,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # Claim OTKs, which will ask the appservice and do nothing else.
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id_1: "alg1"}}},
+                {local_user: {device_id_1: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -1172,7 +1168,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # uploaded fallback key.
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id_1: "alg1"}}},
+                {local_user: {device_id_1: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -1205,7 +1201,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # Claim OTKs, which will return information only from the database.
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id_1: "alg1"}}},
+                {local_user: {device_id_1: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -1232,7 +1228,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         # Claim OTKs, which will return only the fallback key from the database.
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
-                {"one_time_keys": {local_user: {device_id_1: "alg1"}}},
+                {local_user: {device_id_1: {"alg1": 1}}},
                 timeout=None,
                 always_include_fallback_keys=True,
             )
-- 
cgit 1.5.1


From 89f6fb0d5a87d7415d1e67c600f47cb2b4370971 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Fri, 28 Apr 2023 11:33:45 -0700
Subject: Add an admin API endpoint to support per-user feature flags (#15344)

---
 changelog.d/15344.feature                          |   1 +
 docs/admin_api/experimental_features.md            |  54 +++++++++
 synapse/_scripts/synapse_port_db.py                |   1 +
 synapse/rest/admin/__init__.py                     |   2 +
 synapse/rest/admin/experimental_features.py        | 119 +++++++++++++++++++
 synapse/storage/databases/main/__init__.py         |   2 +
 .../databases/main/experimental_features.py        |  75 ++++++++++++
 .../delta/76/03_per_user_experimental_features.sql |  27 +++++
 tests/rest/admin/test_admin.py                     | 127 +++++++++++++++++++++
 9 files changed, 408 insertions(+)
 create mode 100644 changelog.d/15344.feature
 create mode 100644 docs/admin_api/experimental_features.md
 create mode 100644 synapse/rest/admin/experimental_features.py
 create mode 100644 synapse/storage/databases/main/experimental_features.py
 create mode 100644 synapse/storage/schema/main/delta/76/03_per_user_experimental_features.sql

(limited to 'synapse')

diff --git a/changelog.d/15344.feature b/changelog.d/15344.feature
new file mode 100644
index 0000000000..44262e9bd8
--- /dev/null
+++ b/changelog.d/15344.feature
@@ -0,0 +1 @@
+Add an admin API endpoint to support per-user feature flags.
diff --git a/docs/admin_api/experimental_features.md b/docs/admin_api/experimental_features.md
new file mode 100644
index 0000000000..c1aebe4b01
--- /dev/null
+++ b/docs/admin_api/experimental_features.md
@@ -0,0 +1,54 @@
+# Experimental Features API
+
+This API allows a server administrator to enable or disable some experimental features on a per-user
+basis. Currently supported features are [msc3026](https://github.com/matrix-org/matrix-spec-proposals/pull/3026): busy 
+presence state enabled, [msc2654](https://github.com/matrix-org/matrix-spec-proposals/pull/2654): enable unread counts,
+[msc3881](https://github.com/matrix-org/matrix-spec-proposals/pull/3881): enable remotely toggling push notifications 
+for another client, and [msc3967](https://github.com/matrix-org/matrix-spec-proposals/pull/3967): do not require
+UIA when first uploading cross-signing keys. 
+
+
+To use it, you will need to authenticate by providing an `access_token`
+for a server admin: see [Admin API](../usage/administration/admin_api/).
+
+## Enabling/Disabling Features
+
+This API allows a server administrator to enable experimental features for a given user. The request must 
+provide a body containing the user id and listing the features to enable/disable in the following format:
+```json
+{
+   "features": {
+      "msc3026":true,
+      "msc2654":true
+   }
+}
+```
+where true is  used to enable the feature, and false is used to disable the feature.
+
+
+The API is:
+
+```
+PUT /_synapse/admin/v1/experimental_features/<user_id>
+```
+
+## Listing Enabled Features
+ 
+To list which features are enabled/disabled for a given user send a request to the following API:
+
+```
+GET /_synapse/admin/v1/experimental_features/<user_id>
+```
+
+It will return a list of possible features and indicate whether they are enabled or disabled for the
+user like so:
+```json
+{
+   "features": {
+      "msc3026": true,
+      "msc2654": true,
+      "msc3881": false,
+      "msc3967": false
+   }
+}
+```
\ No newline at end of file
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 56d5aeb0dd..27fee3d9a9 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -125,6 +125,7 @@ BOOLEAN_COLUMNS = {
     "users": ["shadow_banned", "approved"],
     "un_partial_stated_event_stream": ["rejection_status_changed"],
     "users_who_share_rooms": ["share_private"],
+    "per_user_experimental_features": ["enabled"],
 }
 
 
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 770df261ce..c729364839 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -39,6 +39,7 @@ from synapse.rest.admin.event_reports import (
     EventReportDetailRestServlet,
     EventReportsRestServlet,
 )
+from synapse.rest.admin.experimental_features import ExperimentalFeaturesRestServlet
 from synapse.rest.admin.federation import (
     DestinationMembershipRestServlet,
     DestinationResetConnectionRestServlet,
@@ -292,6 +293,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     BackgroundUpdateEnabledRestServlet(hs).register(http_server)
     BackgroundUpdateRestServlet(hs).register(http_server)
     BackgroundUpdateStartJobRestServlet(hs).register(http_server)
+    ExperimentalFeaturesRestServlet(hs).register(http_server)
 
 
 def register_servlets_for_client_rest_resource(
diff --git a/synapse/rest/admin/experimental_features.py b/synapse/rest/admin/experimental_features.py
new file mode 100644
index 0000000000..1d409ac2b7
--- /dev/null
+++ b/synapse/rest/admin/experimental_features.py
@@ -0,0 +1,119 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+
+from enum import Enum
+from http import HTTPStatus
+from typing import TYPE_CHECKING, Dict, Tuple
+
+from synapse.api.errors import SynapseError
+from synapse.http.servlet import RestServlet, parse_json_object_from_request
+from synapse.http.site import SynapseRequest
+from synapse.rest.admin import admin_patterns, assert_requester_is_admin
+from synapse.types import JsonDict, UserID
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+class ExperimentalFeature(str, Enum):
+    """
+    Currently supported per-user features
+    """
+
+    MSC3026 = "msc3026"
+    MSC2654 = "msc2654"
+    MSC3881 = "msc3881"
+    MSC3967 = "msc3967"
+
+
+class ExperimentalFeaturesRestServlet(RestServlet):
+    """
+    Enable or disable experimental features for a user or determine which features are enabled
+    for a given user
+    """
+
+    PATTERNS = admin_patterns("/experimental_features/(?P<user_id>[^/]*)")
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastores().main
+        self.is_mine = hs.is_mine
+
+    async def on_GET(
+        self,
+        request: SynapseRequest,
+        user_id: str,
+    ) -> Tuple[int, JsonDict]:
+        """
+        List which features are enabled for a given user
+        """
+        await assert_requester_is_admin(self.auth, request)
+
+        target_user = UserID.from_string(user_id)
+        if not self.is_mine(target_user):
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "User must be local to check what experimental features are enabled.",
+            )
+
+        enabled_features = await self.store.list_enabled_features(user_id)
+
+        user_features = {}
+        for feature in ExperimentalFeature:
+            if feature in enabled_features:
+                user_features[feature] = True
+            else:
+                user_features[feature] = False
+        return HTTPStatus.OK, {"features": user_features}
+
+    async def on_PUT(
+        self, request: SynapseRequest, user_id: str
+    ) -> Tuple[HTTPStatus, Dict]:
+        """
+        Enable or disable the provided features for the requester
+        """
+        await assert_requester_is_admin(self.auth, request)
+
+        body = parse_json_object_from_request(request)
+
+        target_user = UserID.from_string(user_id)
+        if not self.is_mine(target_user):
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "User must be local to enable experimental features.",
+            )
+
+        features = body.get("features")
+        if not features:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST, "You must provide features to set."
+            )
+
+        # validate the provided features
+        validated_features = {}
+        for feature, enabled in features.items():
+            try:
+                validated_feature = ExperimentalFeature(feature)
+                validated_features[validated_feature] = enabled
+            except ValueError:
+                raise SynapseError(
+                    HTTPStatus.BAD_REQUEST,
+                    f"{feature!r} is not recognised as a valid experimental feature.",
+                )
+
+        await self.store.set_features_for_user(user_id, validated_features)
+
+        return HTTPStatus.OK, {}
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index dc3948c170..0032a92f49 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -43,6 +43,7 @@ from .event_federation import EventFederationStore
 from .event_push_actions import EventPushActionsStore
 from .events_bg_updates import EventsBackgroundUpdatesStore
 from .events_forward_extremities import EventForwardExtremitiesStore
+from .experimental_features import ExperimentalFeaturesStore
 from .filtering import FilteringWorkerStore
 from .keys import KeyStore
 from .lock import LockStore
@@ -82,6 +83,7 @@ logger = logging.getLogger(__name__)
 
 class DataStore(
     EventsBackgroundUpdatesStore,
+    ExperimentalFeaturesStore,
     DeviceStore,
     RoomMemberStore,
     RoomStore,
diff --git a/synapse/storage/databases/main/experimental_features.py b/synapse/storage/databases/main/experimental_features.py
new file mode 100644
index 0000000000..cf3226ae5a
--- /dev/null
+++ b/synapse/storage/databases/main/experimental_features.py
@@ -0,0 +1,75 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from typing import TYPE_CHECKING, Dict
+
+from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
+from synapse.storage.databases.main import CacheInvalidationWorkerStore
+from synapse.types import StrCollection
+from synapse.util.caches.descriptors import cached
+
+if TYPE_CHECKING:
+    from synapse.rest.admin.experimental_features import ExperimentalFeature
+    from synapse.server import HomeServer
+
+
+class ExperimentalFeaturesStore(CacheInvalidationWorkerStore):
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ) -> None:
+        super().__init__(database, db_conn, hs)
+
+    @cached()
+    async def list_enabled_features(self, user_id: str) -> StrCollection:
+        """
+        Checks to see what features are enabled for a given user
+        Args:
+            user:
+                the user to be queried on
+        Returns:
+            the features currently enabled for the user
+        """
+        enabled = await self.db_pool.simple_select_list(
+            "per_user_experimental_features",
+            {"user_id": user_id, "enabled": True},
+            ["feature"],
+        )
+
+        return [feature["feature"] for feature in enabled]
+
+    async def set_features_for_user(
+        self,
+        user: str,
+        features: Dict["ExperimentalFeature", bool],
+    ) -> None:
+        """
+        Enables or disables features for a given user
+        Args:
+            user:
+                the user for whom to enable/disable the features
+            features:
+                pairs of features and True/False for whether the feature should be enabled
+        """
+        for feature, enabled in features.items():
+            await self.db_pool.simple_upsert(
+                table="per_user_experimental_features",
+                keyvalues={"feature": feature, "user_id": user},
+                values={"enabled": enabled},
+                insertion_values={"user_id": user, "feature": feature},
+            )
+
+            await self.invalidate_cache_and_stream("list_enabled_features", (user,))
diff --git a/synapse/storage/schema/main/delta/76/03_per_user_experimental_features.sql b/synapse/storage/schema/main/delta/76/03_per_user_experimental_features.sql
new file mode 100644
index 0000000000..c4ef81846c
--- /dev/null
+++ b/synapse/storage/schema/main/delta/76/03_per_user_experimental_features.sql
@@ -0,0 +1,27 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Table containing experimental features and whether they are enabled for a given user
+CREATE TABLE per_user_experimental_features (
+    -- The User ID to check/set the feature for
+    user_id TEXT NOT NULL,
+    -- Contains features to be enabled/disabled
+    feature TEXT NOT NULL,
+    -- whether the feature is enabled/disabled for a given user, defaults to disabled
+    enabled BOOLEAN DEFAULT FALSE,
+    FOREIGN KEY (user_id) REFERENCES users(name),
+    PRIMARY KEY (user_id, feature)
+);
+
diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py
index a8f6436836..645a00b4b1 100644
--- a/tests/rest/admin/test_admin.py
+++ b/tests/rest/admin/test_admin.py
@@ -372,3 +372,130 @@ class PurgeHistoryTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual("complete", channel.json_body["status"])
+
+
+class ExperimentalFeaturesTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.other_user = self.register_user("user", "pass")
+        self.other_user_tok = self.login("user", "pass")
+
+        self.url = "/_synapse/admin/v1/experimental_features"
+
+    def test_enable_and_disable(self) -> None:
+        """
+        Test basic functionality of ExperimentalFeatures endpoint
+        """
+        # test enabling features works
+        url = f"{self.url}/{self.other_user}"
+        channel = self.make_request(
+            "PUT",
+            url,
+            content={
+                "features": {"msc3026": True, "msc2654": True},
+            },
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # list which features are enabled and ensure the ones we enabled are listed
+        self.assertEqual(channel.code, 200)
+        url = f"{self.url}/{self.other_user}"
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            True,
+            channel.json_body["features"]["msc3026"],
+        )
+        self.assertEqual(
+            True,
+            channel.json_body["features"]["msc2654"],
+        )
+
+        # test disabling a feature works
+        url = f"{self.url}/{self.other_user}"
+        channel = self.make_request(
+            "PUT",
+            url,
+            content={"features": {"msc3026": False}},
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # list the features enabled/disabled and ensure they are still are correct
+        self.assertEqual(channel.code, 200)
+        url = f"{self.url}/{self.other_user}"
+        channel = self.make_request(
+            "GET",
+            url,
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            False,
+            channel.json_body["features"]["msc3026"],
+        )
+        self.assertEqual(
+            True,
+            channel.json_body["features"]["msc2654"],
+        )
+        self.assertEqual(
+            False,
+            channel.json_body["features"]["msc3881"],
+        )
+        self.assertEqual(
+            False,
+            channel.json_body["features"]["msc3967"],
+        )
+
+        # test nothing blows up if you try to disable a feature that isn't already enabled
+        url = f"{self.url}/{self.other_user}"
+        channel = self.make_request(
+            "PUT",
+            url,
+            content={"features": {"msc3026": False}},
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # test trying to enable a feature without an admin access token is denied
+        url = f"{self.url}/f{self.other_user}"
+        channel = self.make_request(
+            "PUT",
+            url,
+            content={"features": {"msc3881": True}},
+            access_token=self.other_user_tok,
+        )
+        self.assertEqual(channel.code, 403)
+        self.assertEqual(
+            channel.json_body,
+            {"errcode": "M_FORBIDDEN", "error": "You are not a server admin"},
+        )
+
+        # test trying to enable a bogus msc is denied
+        url = f"{self.url}/{self.other_user}"
+        channel = self.make_request(
+            "PUT",
+            url,
+            content={"features": {"msc6666": True}},
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(channel.code, 400)
+        self.assertEqual(
+            channel.json_body,
+            {
+                "errcode": "M_UNKNOWN",
+                "error": "'msc6666' is not recognised as a valid experimental feature.",
+            },
+        )
-- 
cgit 1.5.1


From 07b1c70d6b11d6b8feca23442a09b60ab0c930e3 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 2 May 2023 07:59:55 -0400
Subject: Initial implementation of MSC3981: recursive relations API (#15315)

Adds an optional keyword argument to the /relations API which
will recurse a limited number of event relationships.

This will cause the API to return not just the events related to the
parent event, but also events related to those related to the parent
event, etc.

This is disabled by default behind an experimental configuration
flag and is currently implemented using prefixed parameters.
---
 changelog.d/15315.feature                   |   1 +
 synapse/config/experimental.py              |   5 ++
 synapse/handlers/relations.py               |   3 +
 synapse/rest/client/relations.py            |  10 ++-
 synapse/storage/databases/main/relations.py |  65 +++++++++++----
 tests/rest/client/test_relations.py         | 120 ++++++++++++++++++++++++++++
 6 files changed, 186 insertions(+), 18 deletions(-)
 create mode 100644 changelog.d/15315.feature

(limited to 'synapse')

diff --git a/changelog.d/15315.feature b/changelog.d/15315.feature
new file mode 100644
index 0000000000..30b2abdc62
--- /dev/null
+++ b/changelog.d/15315.feature
@@ -0,0 +1 @@
+Experimental support to recursively provide relations per [MSC3981](https://github.com/matrix-org/matrix-spec-proposals/pull/3981).
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 6599679731..cab7ccf4b7 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -192,5 +192,10 @@ class ExperimentalConfig(Config):
         # MSC2659: Application service ping endpoint
         self.msc2659_enabled = experimental.get("msc2659_enabled", False)
 
+        # MSC3981: Recurse relations
+        self.msc3981_recurse_relations = experimental.get(
+            "msc3981_recurse_relations", False
+        )
+
         # MSC3970: Scope transaction IDs to devices
         self.msc3970_enabled = experimental.get("msc3970_enabled", False)
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 1d09fdf135..4824635162 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -85,6 +85,7 @@ class RelationsHandler:
         event_id: str,
         room_id: str,
         pagin_config: PaginationConfig,
+        recurse: bool,
         include_original_event: bool,
         relation_type: Optional[str] = None,
         event_type: Optional[str] = None,
@@ -98,6 +99,7 @@ class RelationsHandler:
             event_id: Fetch events that relate to this event ID.
             room_id: The room the event belongs to.
             pagin_config: The pagination config rules to apply, if any.
+            recurse: Whether to recursively find relations.
             include_original_event: Whether to include the parent event.
             relation_type: Only fetch events with this relation type, if given.
             event_type: Only fetch events with this event type, if given.
@@ -132,6 +134,7 @@ class RelationsHandler:
             direction=pagin_config.direction,
             from_token=pagin_config.from_token,
             to_token=pagin_config.to_token,
+            recurse=recurse,
         )
 
         events = await self._main_store.get_events_as_list(
diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py
index b8b296bc0c..785dfa08d8 100644
--- a/synapse/rest/client/relations.py
+++ b/synapse/rest/client/relations.py
@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Optional, Tuple
 from synapse.api.constants import Direction
 from synapse.handlers.relations import ThreadsListInclude
 from synapse.http.server import HttpServer
-from synapse.http.servlet import RestServlet, parse_integer, parse_string
+from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string
 from synapse.http.site import SynapseRequest
 from synapse.rest.client._base import client_patterns
 from synapse.storage.databases.main.relations import ThreadsNextBatch
@@ -49,6 +49,7 @@ class RelationPaginationServlet(RestServlet):
         self.auth = hs.get_auth()
         self._store = hs.get_datastores().main
         self._relations_handler = hs.get_relations_handler()
+        self._support_recurse = hs.config.experimental.msc3981_recurse_relations
 
     async def on_GET(
         self,
@@ -63,6 +64,12 @@ class RelationPaginationServlet(RestServlet):
         pagination_config = await PaginationConfig.from_request(
             self._store, request, default_limit=5, default_dir=Direction.BACKWARDS
         )
+        if self._support_recurse:
+            recurse = parse_boolean(
+                request, "org.matrix.msc3981.recurse", default=False
+            )
+        else:
+            recurse = False
 
         # The unstable version of this API returns an extra field for client
         # compatibility, see https://github.com/matrix-org/synapse/issues/12930.
@@ -75,6 +82,7 @@ class RelationPaginationServlet(RestServlet):
             event_id=parent_id,
             room_id=room_id,
             pagin_config=pagination_config,
+            recurse=recurse,
             include_original_event=include_original_event,
             relation_type=relation_type,
             event_type=event_type,
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 3955a8a9a5..4a6c6c724d 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -172,6 +172,7 @@ class RelationsWorkerStore(SQLBaseStore):
         direction: Direction = Direction.BACKWARDS,
         from_token: Optional[StreamToken] = None,
         to_token: Optional[StreamToken] = None,
+        recurse: bool = False,
     ) -> Tuple[Sequence[_RelatedEvent], Optional[StreamToken]]:
         """Get a list of relations for an event, ordered by topological ordering.
 
@@ -186,6 +187,7 @@ class RelationsWorkerStore(SQLBaseStore):
                 oldest first (forwards).
             from_token: Fetch rows from the given token, or from the start if None.
             to_token: Fetch rows up to the given token, or up to the end if None.
+            recurse: Whether to recursively find relations.
 
         Returns:
             A tuple of:
@@ -200,8 +202,8 @@ class RelationsWorkerStore(SQLBaseStore):
         # Ensure bad limits aren't being passed in.
         assert limit >= 0
 
-        where_clause = ["relates_to_id = ?", "room_id = ?"]
-        where_args: List[Union[str, int]] = [event.event_id, room_id]
+        where_clause = ["room_id = ?"]
+        where_args: List[Union[str, int]] = [room_id]
         is_redacted = event.internal_metadata.is_redacted()
 
         if relation_type is not None:
@@ -229,23 +231,52 @@ class RelationsWorkerStore(SQLBaseStore):
         if pagination_clause:
             where_clause.append(pagination_clause)
 
-        sql = """
-            SELECT event_id, relation_type, sender, topological_ordering, stream_ordering
-            FROM event_relations
-            INNER JOIN events USING (event_id)
-            WHERE %s
-            ORDER BY topological_ordering %s, stream_ordering %s
-            LIMIT ?
-        """ % (
-            " AND ".join(where_clause),
-            order,
-            order,
-        )
+        # If a recursive query is requested then the filters are applied after
+        # recursively following relationships from the requested event to children
+        # up to 3-relations deep.
+        #
+        # If no recursion is needed then the event_relations table is queried
+        # for direct children of the requested event.
+        if recurse:
+            sql = """
+                WITH RECURSIVE related_events AS (
+                    SELECT event_id, relation_type, relates_to_id, 0 AS depth
+                    FROM event_relations
+                    WHERE relates_to_id = ?
+                    UNION SELECT e.event_id, e.relation_type, e.relates_to_id, depth + 1
+                    FROM event_relations e
+                    INNER JOIN related_events r ON r.event_id = e.relates_to_id
+                    WHERE depth <= 3
+                )
+                SELECT event_id, relation_type, sender, topological_ordering, stream_ordering
+                FROM related_events
+                INNER JOIN events USING (event_id)
+                WHERE %s
+                ORDER BY topological_ordering %s, stream_ordering %s
+                LIMIT ?;
+            """ % (
+                " AND ".join(where_clause),
+                order,
+                order,
+            )
+        else:
+            sql = """
+                SELECT event_id, relation_type, sender, topological_ordering, stream_ordering
+                FROM event_relations
+                INNER JOIN events USING (event_id)
+                WHERE relates_to_id = ? AND %s
+                ORDER BY topological_ordering %s, stream_ordering %s
+                LIMIT ?
+            """ % (
+                " AND ".join(where_clause),
+                order,
+                order,
+            )
 
         def _get_recent_references_for_event_txn(
             txn: LoggingTransaction,
         ) -> Tuple[List[_RelatedEvent], Optional[StreamToken]]:
-            txn.execute(sql, where_args + [limit + 1])
+            txn.execute(sql, [event.event_id] + where_args + [limit + 1])
 
             events = []
             topo_orderings: List[int] = []
@@ -965,7 +996,7 @@ class RelationsWorkerStore(SQLBaseStore):
         # relation.
         sql = """
             WITH RECURSIVE related_events AS (
-                SELECT event_id, relates_to_id, relation_type, 0 depth
+                SELECT event_id, relates_to_id, relation_type, 0 AS depth
                 FROM event_relations
                 WHERE event_id = ?
                 UNION SELECT e.event_id, e.relates_to_id, e.relation_type, depth + 1
@@ -1025,7 +1056,7 @@ class RelationsWorkerStore(SQLBaseStore):
         sql = """
         SELECT relates_to_id FROM event_relations WHERE relates_to_id = COALESCE((
             WITH RECURSIVE related_events AS (
-                SELECT event_id, relates_to_id, relation_type, 0 depth
+                SELECT event_id, relates_to_id, relation_type, 0 AS depth
                 FROM event_relations
                 WHERE event_id = ?
                 UNION SELECT e.event_id, e.relates_to_id, e.relation_type, depth + 1
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index fbbbcb23f1..75439416c1 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -30,6 +30,7 @@ from tests import unittest
 from tests.server import FakeChannel
 from tests.test_utils import make_awaitable
 from tests.test_utils.event_injection import inject_event
+from tests.unittest import override_config
 
 
 class BaseRelationsTestCase(unittest.HomeserverTestCase):
@@ -949,6 +950,125 @@ class RelationPaginationTestCase(BaseRelationsTestCase):
             )
 
 
+class RecursiveRelationTestCase(BaseRelationsTestCase):
+    @override_config({"experimental_features": {"msc3981_recurse_relations": True}})
+    def test_recursive_relations(self) -> None:
+        """Generate a complex, multi-level relationship tree and query it."""
+        # Create a thread with a few messages in it.
+        channel = self._send_relation(RelationTypes.THREAD, "m.room.test")
+        thread_1 = channel.json_body["event_id"]
+
+        channel = self._send_relation(RelationTypes.THREAD, "m.room.test")
+        thread_2 = channel.json_body["event_id"]
+
+        # Add annotations.
+        channel = self._send_relation(
+            RelationTypes.ANNOTATION, "m.reaction", "a", parent_id=thread_2
+        )
+        annotation_1 = channel.json_body["event_id"]
+
+        channel = self._send_relation(
+            RelationTypes.ANNOTATION, "m.reaction", "b", parent_id=thread_1
+        )
+        annotation_2 = channel.json_body["event_id"]
+
+        # Add a reference to part of the thread, then edit the reference and annotate it.
+        channel = self._send_relation(
+            RelationTypes.REFERENCE, "m.room.test", parent_id=thread_2
+        )
+        reference_1 = channel.json_body["event_id"]
+
+        channel = self._send_relation(
+            RelationTypes.ANNOTATION, "m.reaction", "c", parent_id=reference_1
+        )
+        annotation_3 = channel.json_body["event_id"]
+
+        channel = self._send_relation(
+            RelationTypes.REPLACE,
+            "m.room.test",
+            parent_id=reference_1,
+        )
+        edit = channel.json_body["event_id"]
+
+        # Also more events off the root.
+        channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "d")
+        annotation_4 = channel.json_body["event_id"]
+
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}"
+            "?dir=f&limit=20&org.matrix.msc3981.recurse=true",
+            access_token=self.user_token,
+        )
+        self.assertEqual(200, channel.code, channel.json_body)
+
+        # The above events should be returned in creation order.
+        event_ids = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(
+            event_ids,
+            [
+                thread_1,
+                thread_2,
+                annotation_1,
+                annotation_2,
+                reference_1,
+                annotation_3,
+                edit,
+                annotation_4,
+            ],
+        )
+
+    @override_config({"experimental_features": {"msc3981_recurse_relations": True}})
+    def test_recursive_relations_with_filter(self) -> None:
+        """The event_type and rel_type still apply."""
+        # Create a thread with a few messages in it.
+        channel = self._send_relation(RelationTypes.THREAD, "m.room.test")
+        thread_1 = channel.json_body["event_id"]
+
+        # Add annotations.
+        channel = self._send_relation(
+            RelationTypes.ANNOTATION, "m.reaction", "b", parent_id=thread_1
+        )
+        annotation_1 = channel.json_body["event_id"]
+
+        # Add a reference to part of the thread, then edit the reference and annotate it.
+        channel = self._send_relation(
+            RelationTypes.REFERENCE, "m.room.test", parent_id=thread_1
+        )
+        reference_1 = channel.json_body["event_id"]
+
+        channel = self._send_relation(
+            RelationTypes.ANNOTATION, "org.matrix.reaction", "c", parent_id=reference_1
+        )
+        annotation_2 = channel.json_body["event_id"]
+
+        # Fetch only annotations, but recursively.
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}/{RelationTypes.ANNOTATION}"
+            "?dir=f&limit=20&org.matrix.msc3981.recurse=true",
+            access_token=self.user_token,
+        )
+        self.assertEqual(200, channel.code, channel.json_body)
+
+        # The above events should be returned in creation order.
+        event_ids = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(event_ids, [annotation_1, annotation_2])
+
+        # Fetch only m.reactions, but recursively.
+        channel = self.make_request(
+            "GET",
+            f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}/{RelationTypes.ANNOTATION}/m.reaction"
+            "?dir=f&limit=20&org.matrix.msc3981.recurse=true",
+            access_token=self.user_token,
+        )
+        self.assertEqual(200, channel.code, channel.json_body)
+
+        # The above events should be returned in creation order.
+        event_ids = [ev["event_id"] for ev in channel.json_body["chunk"]]
+        self.assertEqual(event_ids, [annotation_1])
+
+
 class BundledAggregationsTestCase(BaseRelationsTestCase):
     """
     See RelationsTestCase.test_edit for a similar test for edits.
-- 
cgit 1.5.1


From 6aca4e7cb8818a6d0928108f5e25a6b582842a7d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 2 May 2023 09:29:40 -0400
Subject: Reduce the size of the HTTP connection pool for non-pushers. (#15514)

Pushers tend to make many connections to the same HTTP host
(e.g. a new event comes in, causes events to be pushed, and then
the homeserver connects to the same host many times). Due to this
the per-host HTTP connection pool size was increased, but this does
not make sense for other SimpleHttpClients.

Add a parameter for the connection pool and override it for pushers
(making a separate SimpleHttpClient for pushers with the increased
configuration).

This returns the HTTP connection pool settings to the default Twisted
ones for non-pusher HTTP clients.
---
 changelog.d/15514.misc                 |  1 +
 synapse/http/client.py                 | 14 +++-----------
 synapse/push/httppusher.py             |  3 ++-
 synapse/server.py                      | 21 +++++++++++++++++++++
 tests/push/test_http.py                |  2 +-
 tests/replication/test_pusher_shard.py |  6 +++---
 6 files changed, 31 insertions(+), 16 deletions(-)
 create mode 100644 changelog.d/15514.misc

(limited to 'synapse')

diff --git a/changelog.d/15514.misc b/changelog.d/15514.misc
new file mode 100644
index 0000000000..f24c18cf47
--- /dev/null
+++ b/changelog.d/15514.misc
@@ -0,0 +1 @@
+Reduce the size of the HTTP connection pool for non-pushers.
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 91fe474f36..164abe9fc7 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -768,6 +768,7 @@ class SimpleHttpClient(BaseHttpClient):
            request if it were otherwise caught in a blacklist.
         use_proxy: Whether proxy settings should be discovered and used
             from conventional environment variables.
+        connection_pool: The connection pool to use for this client's agent.
     """
 
     def __init__(
@@ -777,6 +778,7 @@ class SimpleHttpClient(BaseHttpClient):
         ip_whitelist: Optional[IPSet] = None,
         ip_blacklist: Optional[IPSet] = None,
         use_proxy: bool = False,
+        connection_pool: Optional[HTTPConnectionPool] = None,
     ):
         super().__init__(hs, treq_args=treq_args)
         self._ip_whitelist = ip_whitelist
@@ -789,22 +791,12 @@ class SimpleHttpClient(BaseHttpClient):
                 self.reactor, self._ip_whitelist, self._ip_blacklist
             )
 
-        # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
-        # do so in batches, so we need to allow the pool to keep lots of idle
-        # connections around.
-        pool = HTTPConnectionPool(self.reactor)
-        # XXX: The justification for using the cache factor here is that larger
-        # instances will need both more cache and more connections.
-        # Still, this should probably be a separate dial
-        pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
-        pool.cachedConnectionTimeout = 2 * 60
-
         self.agent: IAgent = ProxyAgent(
             self.reactor,
             hs.get_reactor(),
             connectTimeout=15,
             contextFactory=self.hs.get_http_client_context_factory(),
-            pool=pool,
+            pool=connection_pool,
             use_proxy=use_proxy,
         )
 
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index 4f8fa445d9..a01445e374 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -140,7 +140,8 @@ class HttpPusher(Pusher):
             )
 
         self.url = url
-        self.http_client = hs.get_proxied_blacklisted_http_client()
+        self.http_client = hs.get_pusher_http_client()
+
         self.data_minus_url = {}
         self.data_minus_url.update(self.data)
         del self.data_minus_url["url"]
diff --git a/synapse/server.py b/synapse/server.py
index 08ad97b952..75a902d64d 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -27,6 +27,7 @@ from typing_extensions import TypeAlias
 
 from twisted.internet.interfaces import IOpenSSLContextFactory
 from twisted.internet.tcp import Port
+from twisted.web.client import HTTPConnectionPool
 from twisted.web.iweb import IPolicyForHTTPS
 from twisted.web.resource import Resource
 
@@ -453,6 +454,26 @@ class HomeServer(metaclass=abc.ABCMeta):
             use_proxy=True,
         )
 
+    @cache_in_self
+    def get_pusher_http_client(self) -> SimpleHttpClient:
+        # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
+        # do so in batches, so we need to allow the pool to keep lots of idle
+        # connections around.
+        pool = HTTPConnectionPool(self.get_reactor())
+        # XXX: The justification for using the cache factor here is that larger
+        # instances will need both more cache and more connections.
+        # Still, this should probably be a separate dial
+        pool.maxPersistentPerHost = max(int(100 * self.config.caches.global_factor), 5)
+        pool.cachedConnectionTimeout = 2 * 60
+
+        return SimpleHttpClient(
+            self,
+            ip_whitelist=self.config.server.ip_range_whitelist,
+            ip_blacklist=self.config.server.ip_range_blacklist,
+            use_proxy=True,
+            connection_pool=pool,
+        )
+
     @cache_in_self
     def get_federation_http_client(self) -> MatrixFederationHttpClient:
         """
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index 99cec0836b..0fbbef7c8b 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -52,7 +52,7 @@ class HTTPPusherTests(HomeserverTestCase):
 
         m.post_json_get_json = post_json_get_json
 
-        hs = self.setup_test_homeserver(proxied_blacklisted_http_client=m)
+        hs = self.setup_test_homeserver(pusher_http_client=m)
 
         return hs
 
diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py
index dcb3e6669b..b9bb1a6497 100644
--- a/tests/replication/test_pusher_shard.py
+++ b/tests/replication/test_pusher_shard.py
@@ -93,7 +93,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {"worker_name": "pusher1", "pusher_instances": ["pusher1"]},
-            proxied_blacklisted_http_client=http_client_mock,
+            pusher_http_client=http_client_mock,
         )
 
         event_id = self._create_pusher_and_send_msg("user")
@@ -126,7 +126,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
                 "worker_name": "pusher1",
                 "pusher_instances": ["pusher1", "pusher2"],
             },
-            proxied_blacklisted_http_client=http_client_mock1,
+            pusher_http_client=http_client_mock1,
         )
 
         http_client_mock2 = Mock(spec_set=["post_json_get_json"])
@@ -140,7 +140,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
                 "worker_name": "pusher2",
                 "pusher_instances": ["pusher1", "pusher2"],
             },
-            proxied_blacklisted_http_client=http_client_mock2,
+            pusher_http_client=http_client_mock2,
         )
 
         # We choose a user name that we know should go to pusher1.
-- 
cgit 1.5.1


From 4de271a7fcde6b46611ba2aa9d45cdc6cc7275ab Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 2 May 2023 17:45:44 +0100
Subject: Allow adding random delay to push (#15516)

This is to discourage timing based profiling on the push gateways.
---
 changelog.d/15516.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  4 +++
 synapse/config/push.py                           | 10 +++++--
 synapse/push/httppusher.py                       | 18 ++++++++++++
 tests/push/test_http.py                          | 37 ++++++++++++++++++++++++
 5 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15516.feature

(limited to 'synapse')

diff --git a/changelog.d/15516.feature b/changelog.d/15516.feature
new file mode 100644
index 0000000000..02a101bb88
--- /dev/null
+++ b/changelog.d/15516.feature
@@ -0,0 +1 @@
+Add a config option to delay push notifications by a random amount, to discourage time-based profiling.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 1b6f256949..b6516191e8 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3442,6 +3442,9 @@ This option has a number of sub-options. They are as follows:
    user has unread messages in. Defaults to true, meaning push clients will see the number of
    rooms with unread messages in them. Set to false to instead send the number
    of unread messages.
+* `jitter_delay`: Delays push notifications by a random amount up to the given
+  duration. Useful for mitigating timing attacks. Optional, defaults to no
+  delay. _Added in Synapse 1.84.0._
 
 Example configuration:
 ```yaml
@@ -3449,6 +3452,7 @@ push:
   enabled: true
   include_content: false
   group_unread_count_by_room: false
+  jitter_delay: "10s"
 ```
 ---
 ## Rooms
diff --git a/synapse/config/push.py b/synapse/config/push.py
index 3b5378e6ea..8177ff52e2 100644
--- a/synapse/config/push.py
+++ b/synapse/config/push.py
@@ -42,11 +42,17 @@ class PushConfig(Config):
 
         # Now check for the one in the 'email' section and honour it,
         # with a warning.
-        push_config = config.get("email") or {}
-        redact_content = push_config.get("redact_content")
+        email_push_config = config.get("email") or {}
+        redact_content = email_push_config.get("redact_content")
         if redact_content is not None:
             print(
                 "The 'email.redact_content' option is deprecated: "
                 "please set push.include_content instead"
             )
             self.push_include_content = not redact_content
+
+        # Whether to apply a random delay to outbound push.
+        self.push_jitter_delay_ms = None
+        push_jitter_delay = push_config.get("jitter_delay", None)
+        if push_jitter_delay:
+            self.push_jitter_delay_ms = self.parse_duration(push_jitter_delay)
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index a01445e374..e628b484a9 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import random
 import urllib.parse
 from typing import TYPE_CHECKING, Dict, List, Optional, Union
 
@@ -114,6 +115,8 @@ class HttpPusher(Pusher):
         )
         self._pusherpool = hs.get_pusherpool()
 
+        self.push_jitter_delay_ms = hs.config.push.push_jitter_delay_ms
+
         self.data = pusher_config.data
         if self.data is None:
             raise PusherConfigException("'data' key can not be null for HTTP pusher")
@@ -327,6 +330,21 @@ class HttpPusher(Pusher):
         event = await self.store.get_event(push_action.event_id, allow_none=True)
         if event is None:
             return True  # It's been redacted
+
+        # Check if we should delay sending out the notification by a random
+        # amount.
+        #
+        # Note: we base the delay off of when the event was sent, rather than
+        # now, to handle the case where we need to send out many notifications
+        # at once. If we just slept the random amount each loop then the last
+        # push notification in the set could be delayed by many times the max
+        # delay.
+        if self.push_jitter_delay_ms:
+            delay_ms = random.randint(1, self.push_jitter_delay_ms)
+            diff_ms = event.origin_server_ts + delay_ms - self.clock.time_msec()
+            if diff_ms > 0:
+                await self.clock.sleep(diff_ms / 1000)
+
         rejected = await self.dispatch_push_event(event, tweaks, badge)
         if rejected is False:
             return False
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index 0fbbef7c8b..4f811bb9c0 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -962,3 +962,40 @@ class HTTPPusherTests(HomeserverTestCase):
             channel.json_body["pushers"][0]["org.matrix.msc3881.device_id"],
             lookup_result.device_id,
         )
+
+    @override_config({"push": {"jitter_delay": "10s"}})
+    def test_jitter(self) -> None:
+        """Tests that enabling jitter actually delays sending push."""
+        user_id, access_token = self._make_user_with_pusher("user")
+        other_user_id, other_access_token = self._make_user_with_pusher("otheruser")
+
+        room = self.helper.create_room_as(user_id, tok=access_token)
+        self.helper.join(room=room, user=other_user_id, tok=other_access_token)
+
+        # Send a message and check that it did not generate a push, as it should
+        # be delayed.
+        self.helper.send(room, body="Hi!", tok=other_access_token)
+        self.assertEqual(len(self.push_attempts), 0)
+
+        # Now advance time past the max jitter, and assert the message was sent.
+        self.reactor.advance(15)
+        self.assertEqual(len(self.push_attempts), 1)
+
+        self.push_attempts[0][0].callback({})
+
+        # Now we send a bunch of messages and assert that they were all sent
+        # within the 10s max delay.
+        for _ in range(10):
+            self.helper.send(room, body="Hi!", tok=other_access_token)
+
+        index = 1
+        for _ in range(11):
+            while len(self.push_attempts) > index:
+                self.push_attempts[index][0].callback({})
+                self.pump()
+                index += 1
+
+            self.reactor.advance(1)
+            self.pump()
+
+        self.assertEqual(len(self.push_attempts), 11)
-- 
cgit 1.5.1


From 0e8aa2a1b28dfce374294450a015d18884c89d36 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 2 May 2023 14:21:36 -0700
Subject: Remove references to supporting per-user flag for msc2654 (#15522)

---
 changelog.d/15522.misc                      |  1 +
 docs/admin_api/experimental_features.md     | 13 +++++++------
 synapse/rest/admin/experimental_features.py |  1 -
 tests/rest/admin/test_admin.py              |  8 ++------
 4 files changed, 10 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/15522.misc

(limited to 'synapse')

diff --git a/changelog.d/15522.misc b/changelog.d/15522.misc
new file mode 100644
index 0000000000..a5a229e4a0
--- /dev/null
+++ b/changelog.d/15522.misc
@@ -0,0 +1 @@
+Remove references to supporting per-user flag for [MSC2654](https://github.com/matrix-org/matrix-spec-proposals/pull/2654) (#15522).
diff --git a/docs/admin_api/experimental_features.md b/docs/admin_api/experimental_features.md
index c1aebe4b01..07b630915d 100644
--- a/docs/admin_api/experimental_features.md
+++ b/docs/admin_api/experimental_features.md
@@ -1,10 +1,12 @@
 # Experimental Features API
 
 This API allows a server administrator to enable or disable some experimental features on a per-user
-basis. Currently supported features are [msc3026](https://github.com/matrix-org/matrix-spec-proposals/pull/3026): busy 
-presence state enabled, [msc2654](https://github.com/matrix-org/matrix-spec-proposals/pull/2654): enable unread counts,
-[msc3881](https://github.com/matrix-org/matrix-spec-proposals/pull/3881): enable remotely toggling push notifications 
-for another client, and [msc3967](https://github.com/matrix-org/matrix-spec-proposals/pull/3967): do not require
+basis. The currently supported features are: 
+- [MSC3026](https://github.com/matrix-org/matrix-spec-proposals/pull/3026): busy 
+presence state enabled
+- [MSC3881](https://github.com/matrix-org/matrix-spec-proposals/pull/3881): enable remotely toggling push notifications 
+for another client 
+- [MSC3967](https://github.com/matrix-org/matrix-spec-proposals/pull/3967): do not require
 UIA when first uploading cross-signing keys. 
 
 
@@ -19,7 +21,7 @@ provide a body containing the user id and listing the features to enable/disable
 {
    "features": {
       "msc3026":true,
-      "msc2654":true
+      "msc3881":true
    }
 }
 ```
@@ -46,7 +48,6 @@ user like so:
 {
    "features": {
       "msc3026": true,
-      "msc2654": true,
       "msc3881": false,
       "msc3967": false
    }
diff --git a/synapse/rest/admin/experimental_features.py b/synapse/rest/admin/experimental_features.py
index 1d409ac2b7..abf273af10 100644
--- a/synapse/rest/admin/experimental_features.py
+++ b/synapse/rest/admin/experimental_features.py
@@ -33,7 +33,6 @@ class ExperimentalFeature(str, Enum):
     """
 
     MSC3026 = "msc3026"
-    MSC2654 = "msc2654"
     MSC3881 = "msc3881"
     MSC3967 = "msc3967"
 
diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py
index 645a00b4b1..695e84357a 100644
--- a/tests/rest/admin/test_admin.py
+++ b/tests/rest/admin/test_admin.py
@@ -399,7 +399,7 @@ class ExperimentalFeaturesTestCase(unittest.HomeserverTestCase):
             "PUT",
             url,
             content={
-                "features": {"msc3026": True, "msc2654": True},
+                "features": {"msc3026": True, "msc3881": True},
             },
             access_token=self.admin_user_tok,
         )
@@ -420,7 +420,7 @@ class ExperimentalFeaturesTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(
             True,
-            channel.json_body["features"]["msc2654"],
+            channel.json_body["features"]["msc3881"],
         )
 
         # test disabling a feature works
@@ -448,10 +448,6 @@ class ExperimentalFeaturesTestCase(unittest.HomeserverTestCase):
         )
         self.assertEqual(
             True,
-            channel.json_body["features"]["msc2654"],
-        )
-        self.assertEqual(
-            False,
             channel.json_body["features"]["msc3881"],
         )
         self.assertEqual(
-- 
cgit 1.5.1


From 04e79e6a185f466c9a2c8d79f6c9de7f42efc6f7 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Wed, 3 May 2023 12:27:33 +0100
Subject: Add config option to forget rooms automatically when users leave them
 (#15224)

This is largely based off the stats and user directory updater code.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15224.feature                          |   1 +
 docs/usage/configuration/config_documentation.md   |  10 ++
 synapse/config/room.py                             |   4 +
 synapse/handlers/room_member.py                    | 173 ++++++++++++++++++---
 synapse/handlers/room_member_worker.py             |   3 -
 synapse/server.py                                  |  11 +-
 synapse/storage/databases/main/roommember.py       |  69 +++++---
 .../schema/main/delta/76/04_add_room_forgetter.sql |  24 +++
 tests/handlers/test_room_member.py                 |  11 ++
 9 files changed, 259 insertions(+), 47 deletions(-)
 create mode 100644 changelog.d/15224.feature
 create mode 100644 synapse/storage/schema/main/delta/76/04_add_room_forgetter.sql

(limited to 'synapse')

diff --git a/changelog.d/15224.feature b/changelog.d/15224.feature
new file mode 100644
index 0000000000..5d8413f8be
--- /dev/null
+++ b/changelog.d/15224.feature
@@ -0,0 +1 @@
+Add `forget_rooms_on_leave` config option to automatically forget rooms when users leave them or are removed from them.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index b6516191e8..14c21f73fe 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3699,6 +3699,16 @@ default_power_level_content_override:
    trusted_private_chat: null
    public_chat: null
 ```
+---
+### `forget_rooms_on_leave`
+
+Set to true to automatically forget rooms for users when they leave them, either
+normally or via a kick or ban. Defaults to false.
+
+Example configuration:
+```yaml
+forget_rooms_on_leave: false
+```
 
 ---
 ## Opentracing
diff --git a/synapse/config/room.py b/synapse/config/room.py
index 4a7ac00540..b6696cd129 100644
--- a/synapse/config/room.py
+++ b/synapse/config/room.py
@@ -75,3 +75,7 @@ class RoomConfig(Config):
                         % preset
                     )
                 # We validate the actual overrides when we try to apply them.
+
+        # When enabled, users will forget rooms when they leave them, either via a
+        # leave, kick or ban.
+        self.forget_on_leave = config.get("forget_rooms_on_leave", False)
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index ed805d6ec8..fbef600acd 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -16,7 +16,7 @@ import abc
 import logging
 import random
 from http import HTTPStatus
-from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple
 
 from synapse import types
 from synapse.api.constants import (
@@ -38,7 +38,10 @@ from synapse.event_auth import get_named_level, get_power_level_event
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN
+from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler
 from synapse.logging import opentracing
+from synapse.metrics import event_processing_positions
+from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.module_api import NOT_SPAM
 from synapse.types import (
     JsonDict,
@@ -280,9 +283,25 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         """
         raise NotImplementedError()
 
-    @abc.abstractmethod
     async def forget(self, user: UserID, room_id: str) -> None:
-        raise NotImplementedError()
+        user_id = user.to_string()
+
+        member = await self._storage_controllers.state.get_current_state_event(
+            room_id=room_id, event_type=EventTypes.Member, state_key=user_id
+        )
+        membership = member.membership if member else None
+
+        if membership is not None and membership not in [
+            Membership.LEAVE,
+            Membership.BAN,
+        ]:
+            raise SynapseError(400, "User %s in room %s" % (user_id, room_id))
+
+        # In normal case this call is only required if `membership` is not `None`.
+        # But: After the last member had left the room, the background update
+        # `_background_remove_left_rooms` is deleting rows related to this room from
+        # the table `current_state_events` and `get_current_state_events` is `None`.
+        await self.store.forget(user_id, room_id)
 
     async def ratelimit_multiple_invites(
         self,
@@ -2046,25 +2065,141 @@ class RoomMemberMasterHandler(RoomMemberHandler):
         """Implements RoomMemberHandler._user_left_room"""
         user_left_room(self.distributor, target, room_id)
 
-    async def forget(self, user: UserID, room_id: str) -> None:
-        user_id = user.to_string()
 
-        member = await self._storage_controllers.state.get_current_state_event(
-            room_id=room_id, event_type=EventTypes.Member, state_key=user_id
-        )
-        membership = member.membership if member else None
+class RoomForgetterHandler(StateDeltasHandler):
+    """Forgets rooms when they are left, when enabled in the homeserver config.
 
-        if membership is not None and membership not in [
-            Membership.LEAVE,
-            Membership.BAN,
-        ]:
-            raise SynapseError(400, "User %s in room %s" % (user_id, room_id))
+    For the purposes of this feature, kicks, bans and "leaves" via state resolution
+    weirdness are all considered to be leaves.
 
-        # In normal case this call is only required if `membership` is not `None`.
-        # But: After the last member had left the room, the background update
-        # `_background_remove_left_rooms` is deleting rows related to this room from
-        # the table `current_state_events` and `get_current_state_events` is `None`.
-        await self.store.forget(user_id, room_id)
+    Derived from `StatsHandler` and `UserDirectoryHandler`.
+    """
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        self._hs = hs
+        self._store = hs.get_datastores().main
+        self._storage_controllers = hs.get_storage_controllers()
+        self._clock = hs.get_clock()
+        self._notifier = hs.get_notifier()
+        self._room_member_handler = hs.get_room_member_handler()
+
+        # The current position in the current_state_delta stream
+        self.pos: Optional[int] = None
+
+        # Guard to ensure we only process deltas one at a time
+        self._is_processing = False
+
+        if hs.config.worker.run_background_tasks:
+            self._notifier.add_replication_callback(self.notify_new_event)
+
+            # We kick this off to pick up outstanding work from before the last restart.
+            self._clock.call_later(0, self.notify_new_event)
+
+    def notify_new_event(self) -> None:
+        """Called when there may be more deltas to process"""
+        if self._is_processing:
+            return
+
+        self._is_processing = True
+
+        async def process() -> None:
+            try:
+                await self._unsafe_process()
+            finally:
+                self._is_processing = False
+
+        run_as_background_process("room_forgetter.notify_new_event", process)
+
+    async def _unsafe_process(self) -> None:
+        # If self.pos is None then means we haven't fetched it from DB
+        if self.pos is None:
+            self.pos = await self._store.get_room_forgetter_stream_pos()
+            room_max_stream_ordering = self._store.get_room_max_stream_ordering()
+            if self.pos > room_max_stream_ordering:
+                # apparently, we've processed more events than exist in the database!
+                # this can happen if events are removed with history purge or similar.
+                logger.warning(
+                    "Event stream ordering appears to have gone backwards (%i -> %i): "
+                    "rewinding room forgetter processor",
+                    self.pos,
+                    room_max_stream_ordering,
+                )
+                self.pos = room_max_stream_ordering
+
+        if not self._hs.config.room.forget_on_leave:
+            # Update the processing position, so that if the server admin turns the
+            # feature on at a later date, we don't decide to forget every room that
+            # has ever been left in the past.
+            self.pos = self._store.get_room_max_stream_ordering()
+            await self._store.update_room_forgetter_stream_pos(self.pos)
+            return
+
+        # Loop round handling deltas until we're up to date
+
+        while True:
+            # Be sure to read the max stream_ordering *before* checking if there are any outstanding
+            # deltas, since there is otherwise a chance that we could miss updates which arrive
+            # after we check the deltas.
+            room_max_stream_ordering = self._store.get_room_max_stream_ordering()
+            if self.pos == room_max_stream_ordering:
+                break
+
+            logger.debug(
+                "Processing room forgetting %s->%s", self.pos, room_max_stream_ordering
+            )
+            (
+                max_pos,
+                deltas,
+            ) = await self._storage_controllers.state.get_current_state_deltas(
+                self.pos, room_max_stream_ordering
+            )
+
+            logger.debug("Handling %d state deltas", len(deltas))
+            await self._handle_deltas(deltas)
+
+            self.pos = max_pos
+
+            # Expose current event processing position to prometheus
+            event_processing_positions.labels("room_forgetter").set(max_pos)
+
+            await self._store.update_room_forgetter_stream_pos(max_pos)
+
+    async def _handle_deltas(self, deltas: List[Dict[str, Any]]) -> None:
+        """Called with the state deltas to process"""
+        for delta in deltas:
+            typ = delta["type"]
+            state_key = delta["state_key"]
+            room_id = delta["room_id"]
+            event_id = delta["event_id"]
+            prev_event_id = delta["prev_event_id"]
+
+            if typ != EventTypes.Member:
+                continue
+
+            if not self._hs.is_mine_id(state_key):
+                continue
+
+            change = await self._get_key_change(
+                prev_event_id,
+                event_id,
+                key_name="membership",
+                public_value=Membership.JOIN,
+            )
+            is_leave = change is MatchChange.now_false
+
+            if is_leave:
+                try:
+                    await self._room_member_handler.forget(
+                        UserID.from_string(state_key), room_id
+                    )
+                except SynapseError as e:
+                    if e.code == 400:
+                        # The user is back in the room.
+                        pass
+                    else:
+                        raise
 
 
 def get_users_which_can_issue_invite(auth_events: StateMap[EventBase]) -> List[str]:
diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py
index 76e36b8a6d..e8ff1ad063 100644
--- a/synapse/handlers/room_member_worker.py
+++ b/synapse/handlers/room_member_worker.py
@@ -137,6 +137,3 @@ class RoomMemberWorkerHandler(RoomMemberHandler):
         await self._notify_change_client(
             user_id=target.to_string(), room_id=room_id, change="left"
         )
-
-    async def forget(self, target: UserID, room_id: str) -> None:
-        raise RuntimeError("Cannot forget rooms on workers.")
diff --git a/synapse/server.py b/synapse/server.py
index 75a902d64d..a0036578b1 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -94,7 +94,11 @@ from synapse.handlers.room import (
 )
 from synapse.handlers.room_batch import RoomBatchHandler
 from synapse.handlers.room_list import RoomListHandler
-from synapse.handlers.room_member import RoomMemberHandler, RoomMemberMasterHandler
+from synapse.handlers.room_member import (
+    RoomForgetterHandler,
+    RoomMemberHandler,
+    RoomMemberMasterHandler,
+)
 from synapse.handlers.room_member_worker import RoomMemberWorkerHandler
 from synapse.handlers.room_summary import RoomSummaryHandler
 from synapse.handlers.search import SearchHandler
@@ -233,6 +237,7 @@ class HomeServer(metaclass=abc.ABCMeta):
         "message",
         "pagination",
         "profile",
+        "room_forgetter",
         "stats",
     ]
 
@@ -847,6 +852,10 @@ class HomeServer(metaclass=abc.ABCMeta):
     def get_push_rules_handler(self) -> PushRulesHandler:
         return PushRulesHandler(self)
 
+    @cache_in_self
+    def get_room_forgetter_handler(self) -> RoomForgetterHandler:
+        return RoomForgetterHandler(self)
+
     @cache_in_self
     def get_outbound_redis_connection(self) -> "ConnectionHandler":
         """
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index daad58291a..e068f27a10 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -82,7 +82,7 @@ class EventIdMembership:
     membership: str
 
 
-class RoomMemberWorkerStore(EventsWorkerStore):
+class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
     def __init__(
         self,
         database: DatabasePool,
@@ -1372,6 +1372,50 @@ class RoomMemberWorkerStore(EventsWorkerStore):
             _is_local_host_in_room_ignoring_users_txn,
         )
 
+    async def forget(self, user_id: str, room_id: str) -> None:
+        """Indicate that user_id wishes to discard history for room_id."""
+
+        def f(txn: LoggingTransaction) -> None:
+            self.db_pool.simple_update_txn(
+                txn,
+                table="room_memberships",
+                keyvalues={"user_id": user_id, "room_id": room_id},
+                updatevalues={"forgotten": 1},
+            )
+
+            self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id))
+            self._invalidate_cache_and_stream(
+                txn, self.get_forgotten_rooms_for_user, (user_id,)
+            )
+
+        await self.db_pool.runInteraction("forget_membership", f)
+
+    async def get_room_forgetter_stream_pos(self) -> int:
+        """Get the stream position of the background process to forget rooms when left
+        by users.
+        """
+        return await self.db_pool.simple_select_one_onecol(
+            table="room_forgetter_stream_pos",
+            keyvalues={},
+            retcol="stream_id",
+            desc="room_forgetter_stream_pos",
+        )
+
+    async def update_room_forgetter_stream_pos(self, stream_id: int) -> None:
+        """Update the stream position of the background process to forget rooms when
+        left by users.
+
+        Must only be used by the worker running the background process.
+        """
+        assert self.hs.config.worker.run_background_tasks
+
+        await self.db_pool.simple_update_one(
+            table="room_forgetter_stream_pos",
+            keyvalues={},
+            updatevalues={"stream_id": stream_id},
+            desc="room_forgetter_stream_pos",
+        )
+
 
 class RoomMemberBackgroundUpdateStore(SQLBaseStore):
     def __init__(
@@ -1553,29 +1597,6 @@ class RoomMemberStore(
     ):
         super().__init__(database, db_conn, hs)
 
-    async def forget(self, user_id: str, room_id: str) -> None:
-        """Indicate that user_id wishes to discard history for room_id."""
-
-        def f(txn: LoggingTransaction) -> None:
-            sql = (
-                "UPDATE"
-                "  room_memberships"
-                " SET"
-                "  forgotten = 1"
-                " WHERE"
-                "  user_id = ?"
-                " AND"
-                "  room_id = ?"
-            )
-            txn.execute(sql, (user_id, room_id))
-
-            self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id))
-            self._invalidate_cache_and_stream(
-                txn, self.get_forgotten_rooms_for_user, (user_id,)
-            )
-
-        await self.db_pool.runInteraction("forget_membership", f)
-
 
 def extract_heroes_from_room_summary(
     details: Mapping[str, MemberSummary], me: str
diff --git a/synapse/storage/schema/main/delta/76/04_add_room_forgetter.sql b/synapse/storage/schema/main/delta/76/04_add_room_forgetter.sql
new file mode 100644
index 0000000000..be4b57d86f
--- /dev/null
+++ b/synapse/storage/schema/main/delta/76/04_add_room_forgetter.sql
@@ -0,0 +1,24 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE room_forgetter_stream_pos (
+    Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,  -- Makes sure this table only has one row.
+    stream_id  BIGINT NOT NULL,
+    CHECK (Lock='X')
+);
+
+INSERT INTO room_forgetter_stream_pos (
+    stream_id
+) SELECT COALESCE(MAX(stream_ordering), 0) from events;
diff --git a/tests/handlers/test_room_member.py b/tests/handlers/test_room_member.py
index 6a38893b68..a444d822cd 100644
--- a/tests/handlers/test_room_member.py
+++ b/tests/handlers/test_room_member.py
@@ -333,6 +333,17 @@ class RoomMemberMasterHandlerTestCase(HomeserverTestCase):
             self.get_success(self.store.is_locally_forgotten_room(self.room_id))
         )
 
+    @override_config({"forget_rooms_on_leave": True})
+    def test_leave_and_auto_forget(self) -> None:
+        """Tests the `forget_rooms_on_leave` config option."""
+        self.helper.join(self.room_id, user=self.bob, tok=self.bob_token)
+
+        # alice is not the last room member that leaves and forgets the room
+        self.helper.leave(self.room_id, user=self.alice, tok=self.alice_token)
+        self.assertTrue(
+            self.get_success(self.store.did_forget(self.alice, self.room_id))
+        )
+
     def test_leave_and_forget_last_user(self) -> None:
         """Tests that forget a room is successfully when the last user has left the room."""
 
-- 
cgit 1.5.1


From a7b3e9ce65335e452de216cb42b9e724e8f3ad1d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 3 May 2023 07:49:03 -0400
Subject: Set thread_id column to non-null for
 event_push_{actions,actions_staging,summary} (#15437)

Updates the database schema to require a thread_id (by adding a
constraint that the column is non-null) for event_push_actions,
event_push_actions_staging, and event_push_actions_summary.

For PostgreSQL we add the constraint as NOT VALID, then
VALIDATE the constraint a background job to avoid locking
the table during an upgrade.

For SQLite we simply rebuild the table & copy the data.
---
 changelog.d/15437.misc                             |   1 +
 synapse/storage/background_updates.py              |  44 ++++
 .../storage/databases/main/event_push_actions.py   | 244 +--------------------
 synapse/storage/schema/__init__.py                 |   3 +
 .../delta/76/04thread_notifications_backfill.sql   |  28 +++
 .../05thread_notifications_not_null.sql.postgres   |  37 ++++
 .../76/05thread_notifications_not_null.sql.sqlite  | 102 +++++++++
 7 files changed, 225 insertions(+), 234 deletions(-)
 create mode 100644 changelog.d/15437.misc
 create mode 100644 synapse/storage/schema/main/delta/76/04thread_notifications_backfill.sql
 create mode 100644 synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/15437.misc b/changelog.d/15437.misc
new file mode 100644
index 0000000000..2dea23784f
--- /dev/null
+++ b/changelog.d/15437.misc
@@ -0,0 +1 @@
+Make the `thread_id` column on `event_push_actions`, `event_push_actions_staging`, and `event_push_summary` non-null.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index a99aea8926..ca085ef800 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -561,6 +561,50 @@ class BackgroundUpdater:
             updater, oneshot=True
         )
 
+    def register_background_validate_constraint(
+        self, update_name: str, constraint_name: str, table: str
+    ) -> None:
+        """Helper for store classes to do a background validate constraint.
+
+        This only applies on PostgreSQL.
+
+        To use:
+
+        1. use a schema delta file to add a background update. Example:
+            INSERT INTO background_updates (update_name, progress_json) VALUES
+                ('validate_my_constraint', '{}');
+
+        2. In the Store constructor, call this method
+
+        Args:
+            update_name: update_name to register for
+            constraint_name: name of constraint to validate
+            table: table the constraint is applied to
+        """
+
+        def runner(conn: Connection) -> None:
+            c = conn.cursor()
+
+            sql = f"""
+            ALTER TABLE {table} VALIDATE CONSTRAINT {constraint_name};
+            """
+            logger.debug("[SQL] %s", sql)
+            c.execute(sql)
+
+        async def updater(progress: JsonDict, batch_size: int) -> int:
+            assert isinstance(
+                self.db_pool.engine, engines.PostgresEngine
+            ), "validate constraint background update registered for non-Postres database"
+
+            logger.info("Validating constraint %s to %s", constraint_name, table)
+            await self.db_pool.runWithConnection(runner)
+            await self._end_background_update(update_name)
+            return 1
+
+        self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
+            updater, oneshot=True
+        )
+
     async def create_index_in_background(
         self,
         index_name: str,
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index eeccf5db24..ab8f354dc1 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -100,7 +100,6 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
-from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 
@@ -289,180 +288,22 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             unique=True,
         )
 
-        self.db_pool.updates.register_background_update_handler(
-            "event_push_backfill_thread_id",
-            self._background_backfill_thread_id,
+        self.db_pool.updates.register_background_validate_constraint(
+            "event_push_actions_staging_thread_id",
+            constraint_name="event_push_actions_staging_thread_id",
+            table="event_push_actions_staging",
         )
-
-        # Indexes which will be used to quickly make the thread_id column non-null.
-        self.db_pool.updates.register_background_index_update(
-            "event_push_actions_thread_id_null",
-            index_name="event_push_actions_thread_id_null",
+        self.db_pool.updates.register_background_validate_constraint(
+            "event_push_actions_thread_id",
+            constraint_name="event_push_actions_thread_id",
             table="event_push_actions",
-            columns=["thread_id"],
-            where_clause="thread_id IS NULL",
         )
-        self.db_pool.updates.register_background_index_update(
-            "event_push_summary_thread_id_null",
-            index_name="event_push_summary_thread_id_null",
+        self.db_pool.updates.register_background_validate_constraint(
+            "event_push_summary_thread_id",
+            constraint_name="event_push_summary_thread_id",
             table="event_push_summary",
-            columns=["thread_id"],
-            where_clause="thread_id IS NULL",
         )
 
-        # Check ASAP (and then later, every 1s) to see if we have finished
-        # background updates the event_push_actions and event_push_summary tables.
-        self._clock.call_later(0.0, self._check_event_push_backfill_thread_id)
-        self._event_push_backfill_thread_id_done = False
-
-    @wrap_as_background_process("check_event_push_backfill_thread_id")
-    async def _check_event_push_backfill_thread_id(self) -> None:
-        """
-        Has thread_id finished backfilling?
-
-        If not, we need to just-in-time update it so the queries work.
-        """
-        done = await self.db_pool.updates.has_completed_background_update(
-            "event_push_backfill_thread_id"
-        )
-
-        if done:
-            self._event_push_backfill_thread_id_done = True
-        else:
-            # Reschedule to run.
-            self._clock.call_later(15.0, self._check_event_push_backfill_thread_id)
-
-    async def _background_backfill_thread_id(
-        self, progress: JsonDict, batch_size: int
-    ) -> int:
-        """
-        Fill in the thread_id field for event_push_actions and event_push_summary.
-
-        This is preparatory so that it can be made non-nullable in the future.
-
-        Because all current (null) data is done in an unthreaded manner this
-        simply assumes it is on the "main" timeline. Since event_push_actions
-        are periodically cleared it is not possible to correctly re-calculate
-        the thread_id.
-        """
-        event_push_actions_done = progress.get("event_push_actions_done", False)
-
-        def add_thread_id_txn(
-            txn: LoggingTransaction, start_stream_ordering: int
-        ) -> int:
-            sql = """
-            SELECT stream_ordering
-            FROM event_push_actions
-            WHERE
-                thread_id IS NULL
-                AND stream_ordering > ?
-            ORDER BY stream_ordering
-            LIMIT ?
-            """
-            txn.execute(sql, (start_stream_ordering, batch_size))
-
-            # No more rows to process.
-            rows = txn.fetchall()
-            if not rows:
-                progress["event_push_actions_done"] = True
-                self.db_pool.updates._background_update_progress_txn(
-                    txn, "event_push_backfill_thread_id", progress
-                )
-                return 0
-
-            # Update the thread ID for any of those rows.
-            max_stream_ordering = rows[-1][0]
-
-            sql = """
-            UPDATE event_push_actions
-            SET thread_id = 'main'
-            WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
-            """
-            txn.execute(
-                sql,
-                (
-                    start_stream_ordering,
-                    max_stream_ordering,
-                ),
-            )
-
-            # Update progress.
-            processed_rows = txn.rowcount
-            progress["max_event_push_actions_stream_ordering"] = max_stream_ordering
-            self.db_pool.updates._background_update_progress_txn(
-                txn, "event_push_backfill_thread_id", progress
-            )
-
-            return processed_rows
-
-        def add_thread_id_summary_txn(txn: LoggingTransaction) -> int:
-            min_user_id = progress.get("max_summary_user_id", "")
-            min_room_id = progress.get("max_summary_room_id", "")
-
-            # Slightly overcomplicated query for getting the Nth user ID / room
-            # ID tuple, or the last if there are less than N remaining.
-            sql = """
-            SELECT user_id, room_id FROM (
-                SELECT user_id, room_id FROM event_push_summary
-                WHERE (user_id, room_id) > (?, ?)
-                    AND thread_id IS NULL
-                ORDER BY user_id, room_id
-                LIMIT ?
-            ) AS e
-            ORDER BY user_id DESC, room_id DESC
-            LIMIT 1
-            """
-
-            txn.execute(sql, (min_user_id, min_room_id, batch_size))
-            row = txn.fetchone()
-            if not row:
-                return 0
-
-            max_user_id, max_room_id = row
-
-            sql = """
-            UPDATE event_push_summary
-            SET thread_id = 'main'
-            WHERE
-                (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?)
-                AND thread_id IS NULL
-            """
-            txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id))
-            processed_rows = txn.rowcount
-
-            progress["max_summary_user_id"] = max_user_id
-            progress["max_summary_room_id"] = max_room_id
-            self.db_pool.updates._background_update_progress_txn(
-                txn, "event_push_backfill_thread_id", progress
-            )
-
-            return processed_rows
-
-        # First update the event_push_actions table, then the event_push_summary table.
-        #
-        # Note that the event_push_actions_staging table is ignored since it is
-        # assumed that items in that table will only exist for a short period of
-        # time.
-        if not event_push_actions_done:
-            result = await self.db_pool.runInteraction(
-                "event_push_backfill_thread_id",
-                add_thread_id_txn,
-                progress.get("max_event_push_actions_stream_ordering", 0),
-            )
-        else:
-            result = await self.db_pool.runInteraction(
-                "event_push_backfill_thread_id",
-                add_thread_id_summary_txn,
-            )
-
-            # Only done after the event_push_summary table is done.
-            if not result:
-                await self.db_pool.updates._end_background_update(
-                    "event_push_backfill_thread_id"
-                )
-
-        return result
-
     async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, int]:
         """Get the notification count by room for a user. Only considers notifications,
         not highlight or unread counts, and threads are currently aggregated under their room.
@@ -711,25 +552,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
 
-        # First ensure that the existing rows have an updated thread_id field.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute(
-                """
-                UPDATE event_push_summary
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                (MAIN_TIMELINE, room_id, user_id),
-            )
-            txn.execute(
-                """
-                UPDATE event_push_actions
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                (MAIN_TIMELINE, room_id, user_id),
-            )
-
         # First we pull the counts from the summary table.
         #
         # We check that `last_receipt_stream_ordering` matches the stream ordering of the
@@ -1545,25 +1367,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 (room_id, user_id, stream_ordering, *thread_args),
             )
 
-            # First ensure that the existing rows have an updated thread_id field.
-            if not self._event_push_backfill_thread_id_done:
-                txn.execute(
-                    """
-                    UPDATE event_push_summary
-                    SET thread_id = ?
-                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                    """,
-                    (MAIN_TIMELINE, room_id, user_id),
-                )
-                txn.execute(
-                    """
-                    UPDATE event_push_actions
-                    SET thread_id = ?
-                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                    """,
-                    (MAIN_TIMELINE, room_id, user_id),
-                )
-
             # Fetch the notification counts between the stream ordering of the
             # latest receipt and what was previously summarised.
             unread_counts = self._get_notif_unread_count_for_user_room(
@@ -1698,19 +1501,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             rotate_to_stream_ordering: The new maximum event stream ordering to summarise.
         """
 
-        # Ensure that any new actions have an updated thread_id.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute(
-                """
-                UPDATE event_push_actions
-                SET thread_id = ?
-                WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
-                """,
-                (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering),
-            )
-
-        # XXX Do we need to update summaries here too?
-
         # Calculate the new counts that should be upserted into event_push_summary
         sql = """
             SELECT user_id, room_id, thread_id,
@@ -1773,20 +1563,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
-        # Ensure that any updated threads have the proper thread_id.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute_batch(
-                """
-                UPDATE event_push_summary
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                [
-                    (MAIN_TIMELINE, room_id, user_id)
-                    for user_id, room_id, _ in summaries
-                ],
-            )
-
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 1672976209..741563abc6 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -106,6 +106,9 @@ Changes in SCHEMA_VERSION = 76:
 SCHEMA_COMPAT_VERSION = (
     # Queries against `event_stream_ordering` columns in membership tables must
     # be disambiguated.
+    #
+    # The threads_id column must written to with non-null values for the
+    # event_push_actions, event_push_actions_staging, and event_push_summary tables.
     74
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
diff --git a/synapse/storage/schema/main/delta/76/04thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/76/04thread_notifications_backfill.sql
new file mode 100644
index 0000000000..ce6f9ff937
--- /dev/null
+++ b/synapse/storage/schema/main/delta/76/04thread_notifications_backfill.sql
@@ -0,0 +1,28 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Force the background updates from 06thread_notifications.sql to run in the
+-- foreground as code will now require those to be "done".
+
+DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id';
+
+-- Overwrite any null thread_id values.
+UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL;
+UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
+UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
+
+-- Drop the background updates to calculate the indexes used to find null thread_ids.
+DELETE FROM background_updates WHERE update_name = 'event_push_actions_thread_id_null';
+DELETE FROM background_updates WHERE update_name = 'event_push_summary_thread_id_null';
diff --git a/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.postgres
new file mode 100644
index 0000000000..40936def6f
--- /dev/null
+++ b/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.postgres
@@ -0,0 +1,37 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- The thread_id columns can now be made non-nullable, this is done by using a
+-- constraint (and not altering the column) to avoid taking out a full table lock.
+--
+-- We initially add an invalid constraint which guards against new data (this
+-- doesn't lock the table).
+ALTER TABLE event_push_actions_staging
+    ADD CONSTRAINT event_push_actions_staging_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
+ALTER TABLE event_push_actions
+    ADD CONSTRAINT event_push_actions_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
+ALTER TABLE event_push_summary
+    ADD CONSTRAINT event_push_summary_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
+
+-- We then validate the constraint which doesn't need to worry about new data. It
+-- only needs a SHARE UPDATE EXCLUSIVE lock but can still take a while to complete.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7605, 'event_push_actions_staging_thread_id', '{}'),
+  (7605, 'event_push_actions_thread_id', '{}'),
+  (7605, 'event_push_summary_thread_id', '{}');
+
+-- Drop the indexes used to find null thread_ids.
+DROP INDEX IF EXISTS event_push_actions_thread_id_null;
+DROP INDEX IF EXISTS event_push_summary_thread_id_null;
diff --git a/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.sqlite
new file mode 100644
index 0000000000..e9372b6cf9
--- /dev/null
+++ b/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.sqlite
@@ -0,0 +1,102 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ -- The thread_id columns can now be made non-nullable.
+--
+-- SQLite doesn't support modifying columns to an existing table, so it must
+-- be recreated.
+
+-- Create the new tables.
+CREATE TABLE event_push_actions_staging_new (
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    actions TEXT NOT NULL,
+    notif SMALLINT NOT NULL,
+    highlight SMALLINT NOT NULL,
+    unread SMALLINT,
+    thread_id TEXT,
+    inserted_ts BIGINT,
+    CONSTRAINT event_push_actions_staging_thread_id CHECK (thread_id is NOT NULL)
+);
+
+CREATE TABLE event_push_actions_new (
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    profile_tag VARCHAR(32),
+    actions TEXT NOT NULL,
+    topological_ordering BIGINT,
+    stream_ordering BIGINT,
+    notif SMALLINT,
+    highlight SMALLINT,
+    unread SMALLINT,
+    thread_id TEXT,
+    CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag),
+    CONSTRAINT event_push_actions_thread_id CHECK (thread_id is NOT NULL)
+);
+
+CREATE TABLE event_push_summary_new (
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    notif_count BIGINT NOT NULL,
+    stream_ordering BIGINT NOT NULL,
+    unread_count BIGINT,
+    last_receipt_stream_ordering BIGINT,
+    thread_id TEXT,
+    CONSTRAINT event_push_summary_thread_id CHECK (thread_id is NOT NULL)
+);
+
+-- Copy the data.
+INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts)
+    SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts
+    FROM event_push_actions_staging;
+
+INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id)
+    SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id
+    FROM event_push_actions;
+
+INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id)
+    SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id
+    FROM event_push_summary;
+
+-- Drop the old tables.
+DROP TABLE event_push_actions_staging;
+DROP TABLE event_push_actions;
+DROP TABLE event_push_summary;
+
+-- Rename the tables.
+ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging;
+ALTER TABLE event_push_actions_new RENAME TO event_push_actions;
+ALTER TABLE event_push_summary_new RENAME TO event_push_summary;
+
+-- Recreate the indexes.
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id);
+
+CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering);
+CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering );
+CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
+CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id );
+CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering);
+
+CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary (user_id, room_id, thread_id) ;
+
+-- Recreate some indexes in the background, by re-running the background updates
+-- from 72/02event_push_actions_index.sql and 72/06thread_notifications.sql.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7403, 'event_push_summary_unique_index2', '{}')
+  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7403, 'event_push_actions_stream_highlight_index', '{}')
+  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
-- 
cgit 1.5.1


From 3b837d856c4f867377d738eacb262cad28b14ad7 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Wed, 3 May 2023 13:09:20 +0100
Subject: Revert "Reduce the size of the HTTP connection pool for non-pushers"
 (#15530)

#15514 introduced a regression where Synapse would encounter
`PartialDownloadError`s when fetching OpenID metadata for certain
providers on startup. Due to #8088, this prevents Synapse from starting
entirely.

Revert the change while we decide what to do about the regression.
---
 CHANGES.md                             |  1 -
 synapse/http/client.py                 | 14 +++++++++++---
 synapse/push/httppusher.py             |  3 +--
 synapse/server.py                      | 21 ---------------------
 tests/push/test_http.py                |  2 +-
 tests/replication/test_pusher_shard.py |  6 +++---
 6 files changed, 16 insertions(+), 31 deletions(-)

(limited to 'synapse')

diff --git a/CHANGES.md b/CHANGES.md
index b047697f8f..f055772ca0 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -48,7 +48,6 @@ Internal Changes
 - Bump packaging from 23.0 to 23.1. ([\#15510](https://github.com/matrix-org/synapse/issues/15510))
 - Bump types-requests from 2.28.11.16 to 2.29.0.0. ([\#15511](https://github.com/matrix-org/synapse/issues/15511))
 - Bump setuptools-rust from 1.5.2 to 1.6.0. ([\#15512](https://github.com/matrix-org/synapse/issues/15512))
-- Reduce the size of the HTTP connection pool for non-pushers. ([\#15514](https://github.com/matrix-org/synapse/issues/15514))
 - Update the check_schema_delta script to account for when the schema version has been bumped locally. ([\#15466](https://github.com/matrix-org/synapse/issues/15466))
 
 
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 164abe9fc7..91fe474f36 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -768,7 +768,6 @@ class SimpleHttpClient(BaseHttpClient):
            request if it were otherwise caught in a blacklist.
         use_proxy: Whether proxy settings should be discovered and used
             from conventional environment variables.
-        connection_pool: The connection pool to use for this client's agent.
     """
 
     def __init__(
@@ -778,7 +777,6 @@ class SimpleHttpClient(BaseHttpClient):
         ip_whitelist: Optional[IPSet] = None,
         ip_blacklist: Optional[IPSet] = None,
         use_proxy: bool = False,
-        connection_pool: Optional[HTTPConnectionPool] = None,
     ):
         super().__init__(hs, treq_args=treq_args)
         self._ip_whitelist = ip_whitelist
@@ -791,12 +789,22 @@ class SimpleHttpClient(BaseHttpClient):
                 self.reactor, self._ip_whitelist, self._ip_blacklist
             )
 
+        # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
+        # do so in batches, so we need to allow the pool to keep lots of idle
+        # connections around.
+        pool = HTTPConnectionPool(self.reactor)
+        # XXX: The justification for using the cache factor here is that larger
+        # instances will need both more cache and more connections.
+        # Still, this should probably be a separate dial
+        pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
+        pool.cachedConnectionTimeout = 2 * 60
+
         self.agent: IAgent = ProxyAgent(
             self.reactor,
             hs.get_reactor(),
             connectTimeout=15,
             contextFactory=self.hs.get_http_client_context_factory(),
-            pool=connection_pool,
+            pool=pool,
             use_proxy=use_proxy,
         )
 
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index a01445e374..4f8fa445d9 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -140,8 +140,7 @@ class HttpPusher(Pusher):
             )
 
         self.url = url
-        self.http_client = hs.get_pusher_http_client()
-
+        self.http_client = hs.get_proxied_blacklisted_http_client()
         self.data_minus_url = {}
         self.data_minus_url.update(self.data)
         del self.data_minus_url["url"]
diff --git a/synapse/server.py b/synapse/server.py
index 75a902d64d..08ad97b952 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -27,7 +27,6 @@ from typing_extensions import TypeAlias
 
 from twisted.internet.interfaces import IOpenSSLContextFactory
 from twisted.internet.tcp import Port
-from twisted.web.client import HTTPConnectionPool
 from twisted.web.iweb import IPolicyForHTTPS
 from twisted.web.resource import Resource
 
@@ -454,26 +453,6 @@ class HomeServer(metaclass=abc.ABCMeta):
             use_proxy=True,
         )
 
-    @cache_in_self
-    def get_pusher_http_client(self) -> SimpleHttpClient:
-        # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
-        # do so in batches, so we need to allow the pool to keep lots of idle
-        # connections around.
-        pool = HTTPConnectionPool(self.get_reactor())
-        # XXX: The justification for using the cache factor here is that larger
-        # instances will need both more cache and more connections.
-        # Still, this should probably be a separate dial
-        pool.maxPersistentPerHost = max(int(100 * self.config.caches.global_factor), 5)
-        pool.cachedConnectionTimeout = 2 * 60
-
-        return SimpleHttpClient(
-            self,
-            ip_whitelist=self.config.server.ip_range_whitelist,
-            ip_blacklist=self.config.server.ip_range_blacklist,
-            use_proxy=True,
-            connection_pool=pool,
-        )
-
     @cache_in_self
     def get_federation_http_client(self) -> MatrixFederationHttpClient:
         """
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index 0fbbef7c8b..99cec0836b 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -52,7 +52,7 @@ class HTTPPusherTests(HomeserverTestCase):
 
         m.post_json_get_json = post_json_get_json
 
-        hs = self.setup_test_homeserver(pusher_http_client=m)
+        hs = self.setup_test_homeserver(proxied_blacklisted_http_client=m)
 
         return hs
 
diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py
index b9bb1a6497..dcb3e6669b 100644
--- a/tests/replication/test_pusher_shard.py
+++ b/tests/replication/test_pusher_shard.py
@@ -93,7 +93,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {"worker_name": "pusher1", "pusher_instances": ["pusher1"]},
-            pusher_http_client=http_client_mock,
+            proxied_blacklisted_http_client=http_client_mock,
         )
 
         event_id = self._create_pusher_and_send_msg("user")
@@ -126,7 +126,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
                 "worker_name": "pusher1",
                 "pusher_instances": ["pusher1", "pusher2"],
             },
-            pusher_http_client=http_client_mock1,
+            proxied_blacklisted_http_client=http_client_mock1,
         )
 
         http_client_mock2 = Mock(spec_set=["post_json_get_json"])
@@ -140,7 +140,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
                 "worker_name": "pusher2",
                 "pusher_instances": ["pusher1", "pusher2"],
             },
-            pusher_http_client=http_client_mock2,
+            proxied_blacklisted_http_client=http_client_mock2,
         )
 
         # We choose a user name that we know should go to pusher1.
-- 
cgit 1.5.1


From fc3a878220f934a248b008277e89b85ad187d220 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 3 May 2023 14:41:37 +0100
Subject: Speed up rebuilding of the user directory for local users (#15529)

The idea here is to batch up the work.
---
 changelog.d/15529.misc                           |   1 +
 synapse/storage/database.py                      |  13 +-
 synapse/storage/databases/main/user_directory.py | 235 +++++++++++++++--------
 3 files changed, 172 insertions(+), 77 deletions(-)
 create mode 100644 changelog.d/15529.misc

(limited to 'synapse')

diff --git a/changelog.d/15529.misc b/changelog.d/15529.misc
new file mode 100644
index 0000000000..7ad424d8df
--- /dev/null
+++ b/changelog.d/15529.misc
@@ -0,0 +1 @@
+Speed up rebuilding of the user directory for local users.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 1f5f5eb6f8..313cf1a8d0 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -386,13 +386,20 @@ class LoggingTransaction:
             self.executemany(sql, args)
 
     def execute_values(
-        self, sql: str, values: Iterable[Iterable[Any]], fetch: bool = True
+        self,
+        sql: str,
+        values: Iterable[Iterable[Any]],
+        template: Optional[str] = None,
+        fetch: bool = True,
     ) -> List[Tuple]:
         """Corresponds to psycopg2.extras.execute_values. Only available when
         using postgres.
 
         The `fetch` parameter must be set to False if the query does not return
         rows (e.g. INSERTs).
+
+        The `template` is the snippet to merge to every item in argslist to
+        compose the query.
         """
         assert isinstance(self.database_engine, PostgresEngine)
         from psycopg2.extras import execute_values
@@ -400,7 +407,9 @@ class LoggingTransaction:
         return self._do_execute(
             # TODO: is it safe for values to be Iterable[Iterable[Any]] here?
             # https://www.psycopg.org/docs/extras.html?highlight=execute_batch#psycopg2.extras.execute_values says values should be Sequence[Sequence]
-            lambda the_sql: execute_values(self.txn, the_sql, values, fetch=fetch),
+            lambda the_sql: execute_values(
+                self.txn, the_sql, values, template=template, fetch=fetch
+            ),
             sql,
         )
 
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 5d65faed16..b7d58978de 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -27,6 +27,8 @@ from typing import (
     cast,
 )
 
+import attr
+
 try:
     # Figure out if ICU support is available for searching users.
     import icu
@@ -66,6 +68,19 @@ logger = logging.getLogger(__name__)
 TEMP_TABLE = "_temp_populate_user_directory"
 
 
+@attr.s(auto_attribs=True, frozen=True)
+class _UserDirProfile:
+    """Helper type for the user directory code for an entry to be inserted into
+    the directory.
+    """
+
+    user_id: str
+
+    # If the display name or avatar URL are unexpected types, replace with None
+    display_name: Optional[str] = attr.ib(default=None, converter=non_null_str_or_none)
+    avatar_url: Optional[str] = attr.ib(default=None, converter=non_null_str_or_none)
+
+
 class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
     # How many records do we calculate before sending it to
     # add_users_who_share_private_rooms?
@@ -381,25 +396,65 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
             % (len(users_to_work_on), progress["remaining"])
         )
 
-        for user_id in users_to_work_on:
-            if await self.should_include_local_user_in_dir(user_id):
-                profile = await self.get_profileinfo(get_localpart_from_id(user_id))  # type: ignore[attr-defined]
-                await self.update_profile_in_user_dir(
-                    user_id, profile.display_name, profile.avatar_url
-                )
-
-            # We've finished processing a user. Delete it from the table.
-            await self.db_pool.simple_delete_one(
-                TEMP_TABLE + "_users", {"user_id": user_id}
-            )
-            # Update the remaining counter.
-            progress["remaining"] -= 1
-            await self.db_pool.runInteraction(
-                "populate_user_directory",
-                self.db_pool.updates._background_update_progress_txn,
-                "populate_user_directory_process_users",
-                progress,
+        # First filter down to users we want to insert into the user directory.
+        users_to_insert = [
+            user_id
+            for user_id in users_to_work_on
+            if await self.should_include_local_user_in_dir(user_id)
+        ]
+
+        # Next fetch their profiles. Note that the `user_id` here is the
+        # *localpart*, and that not all users have profiles.
+        profile_rows = await self.db_pool.simple_select_many_batch(
+            table="profiles",
+            column="user_id",
+            iterable=[get_localpart_from_id(u) for u in users_to_insert],
+            retcols=(
+                "user_id",
+                "displayname",
+                "avatar_url",
+            ),
+            keyvalues={},
+            desc="populate_user_directory_process_users_get_profiles",
+        )
+        profiles = {
+            f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
+                f"@{row['user_id']}:{self.server_name}",
+                row["displayname"],
+                row["avatar_url"],
             )
+            for row in profile_rows
+        }
+
+        profiles_to_insert = [
+            profiles.get(user_id) or _UserDirProfile(user_id)
+            for user_id in users_to_insert
+        ]
+
+        # Actually insert the users with their profiles into the directory.
+        await self.db_pool.runInteraction(
+            "populate_user_directory_process_users_insertion",
+            self._update_profiles_in_user_dir_txn,
+            profiles_to_insert,
+        )
+
+        # We've finished processing the users. Delete it from the table.
+        await self.db_pool.simple_delete_many(
+            table=TEMP_TABLE + "_users",
+            column="user_id",
+            iterable=users_to_work_on,
+            keyvalues={},
+            desc="populate_user_directory_process_users_delete",
+        )
+
+        # Update the remaining counter.
+        progress["remaining"] -= len(users_to_work_on)
+        await self.db_pool.runInteraction(
+            "populate_user_directory",
+            self.db_pool.updates._background_update_progress_txn,
+            "populate_user_directory_process_users",
+            progress,
+        )
 
         return len(users_to_work_on)
 
@@ -584,72 +639,102 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
         Update or add a user's profile in the user directory.
         If the user is remote, the profile will be marked as not stale.
         """
-        # If the display name or avatar URL are unexpected types, replace with None.
-        display_name = non_null_str_or_none(display_name)
-        avatar_url = non_null_str_or_none(avatar_url)
+        await self.db_pool.runInteraction(
+            "update_profiles_in_user_dir",
+            self._update_profiles_in_user_dir_txn,
+            [_UserDirProfile(user_id, display_name, avatar_url)],
+        )
+
+    def _update_profiles_in_user_dir_txn(
+        self,
+        txn: LoggingTransaction,
+        profiles: Sequence[_UserDirProfile],
+    ) -> None:
+        self.db_pool.simple_upsert_many_txn(
+            txn,
+            table="user_directory",
+            key_names=("user_id",),
+            key_values=[(p.user_id,) for p in profiles],
+            value_names=("display_name", "avatar_url"),
+            value_values=[
+                (
+                    p.display_name,
+                    p.avatar_url,
+                )
+                for p in profiles
+            ],
+        )
 
-        def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None:
-            self.db_pool.simple_upsert_txn(
+        # Remote users: Make sure the profile is not marked as stale anymore.
+        remote_users = [
+            p.user_id for p in profiles if not self.hs.is_mine_id(p.user_id)
+        ]
+        if remote_users:
+            self.db_pool.simple_delete_many_txn(
                 txn,
-                table="user_directory",
-                keyvalues={"user_id": user_id},
-                values={"display_name": display_name, "avatar_url": avatar_url},
+                table="user_directory_stale_remote_users",
+                column="user_id",
+                values=remote_users,
+                keyvalues={},
             )
 
-            if not self.hs.is_mine_id(user_id):
-                # Remote users: Make sure the profile is not marked as stale anymore.
-                self.db_pool.simple_delete_txn(
-                    txn,
-                    table="user_directory_stale_remote_users",
-                    keyvalues={"user_id": user_id},
+        if isinstance(self.database_engine, PostgresEngine):
+            # We weight the localpart most highly, then display name and finally
+            # server name
+            template = """
+                (
+                    %s,
+                    setweight(to_tsvector('simple', %s), 'A')
+                    || setweight(to_tsvector('simple', %s), 'D')
+                    || setweight(to_tsvector('simple', COALESCE(%s, '')), 'B')
                 )
+            """
 
-            # The display name that goes into the database index.
-            index_display_name = display_name
-            if index_display_name is not None:
-                index_display_name = _filter_text_for_index(index_display_name)
-
-            if isinstance(self.database_engine, PostgresEngine):
-                # We weight the localpart most highly, then display name and finally
-                # server name
-                sql = """
-                        INSERT INTO user_directory_search(user_id, vector)
-                        VALUES (?,
-                            setweight(to_tsvector('simple', ?), 'A')
-                            || setweight(to_tsvector('simple', ?), 'D')
-                            || setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
-                        ) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
-                    """
-                txn.execute(
-                    sql,
+            sql = """
+                    INSERT INTO user_directory_search(user_id, vector)
+                    VALUES ? ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
+                """
+            txn.execute_values(
+                sql,
+                [
                     (
-                        user_id,
-                        get_localpart_from_id(user_id),
-                        get_domain_from_id(user_id),
-                        index_display_name,
-                    ),
-                )
-            elif isinstance(self.database_engine, Sqlite3Engine):
-                value = (
-                    "%s %s" % (user_id, index_display_name)
-                    if index_display_name
-                    else user_id
-                )
-                self.db_pool.simple_upsert_txn(
-                    txn,
-                    table="user_directory_search",
-                    keyvalues={"user_id": user_id},
-                    values={"value": value},
-                )
-            else:
-                # This should be unreachable.
-                raise Exception("Unrecognized database engine")
+                        p.user_id,
+                        get_localpart_from_id(p.user_id),
+                        get_domain_from_id(p.user_id),
+                        _filter_text_for_index(p.display_name)
+                        if p.display_name
+                        else None,
+                    )
+                    for p in profiles
+                ],
+                template=template,
+                fetch=False,
+            )
+        elif isinstance(self.database_engine, Sqlite3Engine):
+            values = []
+            for p in profiles:
+                if p.display_name is not None:
+                    index_display_name = _filter_text_for_index(p.display_name)
+                    value = f"{p.user_id} {index_display_name}"
+                else:
+                    value = p.user_id
 
-            txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
+                values.append((value,))
 
-        await self.db_pool.runInteraction(
-            "update_profile_in_user_dir", _update_profile_in_user_dir_txn
-        )
+            self.db_pool.simple_upsert_many_txn(
+                txn,
+                table="user_directory_search",
+                key_names=("user_id",),
+                key_values=[(p.user_id,) for p in profiles],
+                value_names=("value",),
+                value_values=values,
+            )
+        else:
+            # This should be unreachable.
+            raise Exception("Unrecognized database engine")
+
+        for p in profiles:
+            txn.call_after(self.get_user_in_directory.invalidate, (p.user_id,))
 
     async def add_users_who_share_private_room(
         self, room_id: str, user_id_tuples: Iterable[Tuple[str, str]]
-- 
cgit 1.5.1


From 28ac1a1a91c972c19649e21a6e8d92bb786d8a57 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 3 May 2023 14:42:43 +0100
Subject: Speed up deleting of old rows in `event_push_actions` (#15531)

Enforce that we use index scans (rather than seq scans), which we also do for state queries. The reason to enforce this is that we can't correctly get PostgreSQL to understand the distribution of `stream_ordering` depends on `highlight`, and so it always defaults (on matrix.org) to sequential scans.
---
 changelog.d/15531.misc                               | 1 +
 synapse/storage/databases/main/event_push_actions.py | 9 +++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 changelog.d/15531.misc

(limited to 'synapse')

diff --git a/changelog.d/15531.misc b/changelog.d/15531.misc
new file mode 100644
index 0000000000..6d4da961b5
--- /dev/null
+++ b/changelog.d/15531.misc
@@ -0,0 +1 @@
+Speed up deleting of old rows in `event_push_actions`.
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index ab8f354dc1..2e98a29fef 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1612,6 +1612,15 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             # deletes.
             batch_size = self._rotate_count
 
+            if isinstance(self.database_engine, PostgresEngine):
+                # Temporarily disable sequential scans in this transaction. We
+                # need to do this as the postgres statistics don't take into
+                # account the `highlight = 0` part when estimating the
+                # distribution of `stream_ordering`. I.e. since we keep old
+                # highlight rows the query planner thinks there are way more old
+                # rows to delete than there actually are.
+                txn.execute("SET LOCAL enable_seqscan=off")
+
             txn.execute(
                 """
                 SELECT stream_ordering FROM event_push_actions
-- 
cgit 1.5.1


From 5f8822854d0427d132674615f13becf420c57bd2 Mon Sep 17 00:00:00 2001
From: Sandro <sandro.jaeckel@gmail.com>
Date: Wed, 3 May 2023 18:54:42 +0200
Subject: Use oEmbed for YouTube Shorts (#15025)

It seems that YouTube Short previews do not work in some
regions, but the oEmbed information for those areas is still
valid.

This causes YouTube Shorts to always use (only) the oEmbed
endpoint which is a minor regression for regions where the URL
preview was already working -- some of the additional video
metadata is lost. It is not likely that clients are using this today
and it is more beneficial to have a limited preview working everywhere
than unused metadata in the Open Graph response.
---
 changelog.d/15025.misc     |  1 +
 synapse/res/providers.json | 13 +++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 changelog.d/15025.misc

(limited to 'synapse')

diff --git a/changelog.d/15025.misc b/changelog.d/15025.misc
new file mode 100644
index 0000000000..1f04d85729
--- /dev/null
+++ b/changelog.d/15025.misc
@@ -0,0 +1 @@
+Use oEmbed to generate URL previews for YouTube Shorts.
diff --git a/synapse/res/providers.json b/synapse/res/providers.json
index 7b9958e454..2dc9fec8e3 100644
--- a/synapse/res/providers.json
+++ b/synapse/res/providers.json
@@ -11,5 +11,18 @@
                 "url": "https://publish.twitter.com/oembed"
             }
         ]
+    },
+    {
+        "provider_name": "YouTube Shorts",
+        "provider_url": "http://www.youtube.com/",
+        "endpoints": [
+            {
+                "schemes": [
+                    "https://youtube.com/shorts/*",
+                    "https://*.youtube.com/shorts/*"
+                ],
+                "url": "https://www.youtube.com/oembed"
+            }
+        ]
     }
 ]
-- 
cgit 1.5.1


From ded8f3d349d8481d1c9a48835cde0b94f785e371 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 4 May 2023 07:54:13 -0400
Subject: Update the base rules to remove the dont_notify action. (MSC3987)
 (#15534)

A dont_notify action is a no-op (and coalesce is undefined). These are
both considered no-ops by the spec, per MSC3987 and the predefined
push rules were updated to remove dont_notify from the list of actions.
---
 changelog.d/15534.misc         | 1 +
 rust/src/push/base_rules.rs    | 6 +++---
 rust/src/push/evaluator.rs     | 7 ++++---
 rust/src/push/mod.rs           | 6 ++++--
 synapse/handlers/push_rules.py | 2 ++
 5 files changed, 14 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/15534.misc

(limited to 'synapse')

diff --git a/changelog.d/15534.misc b/changelog.d/15534.misc
new file mode 100644
index 0000000000..fd9ba2a6e1
--- /dev/null
+++ b/changelog.d/15534.misc
@@ -0,0 +1 @@
+Implement [MSC3987](https://github.com/matrix-org/matrix-spec-proposals/pull/3987) by removing `"dont_notify"` from the list of actions in default push rules.
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index d7c73c1f25..51372e1553 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -57,7 +57,7 @@ pub const BASE_PREPEND_OVERRIDE_RULES: &[PushRule] = &[PushRule {
     rule_id: Cow::Borrowed("global/override/.m.rule.master"),
     priority_class: 5,
     conditions: Cow::Borrowed(&[]),
-    actions: Cow::Borrowed(&[Action::DontNotify]),
+    actions: Cow::Borrowed(&[]),
     default: true,
     default_enabled: false,
 }];
@@ -88,7 +88,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
                 pattern: Cow::Borrowed("m.notice"),
             },
         ))]),
-        actions: Cow::Borrowed(&[Action::DontNotify]),
+        actions: Cow::Borrowed(&[]),
         default: true,
         default_enabled: true,
     },
@@ -122,7 +122,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
                 pattern: Cow::Borrowed("m.room.member"),
             },
         ))]),
-        actions: Cow::Borrowed(&[Action::DontNotify]),
+        actions: Cow::Borrowed(&[]),
         default: true,
         default_enabled: true,
     },
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 6941c61ea4..2d7c4c06be 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -140,7 +140,7 @@ impl PushRuleEvaluator {
     /// name.
     ///
     /// Returns the set of actions, if any, that match (filtering out any
-    /// `dont_notify` actions).
+    /// `dont_notify` and `coalesce` actions).
     pub fn run(
         &self,
         push_rules: &FilteredPushRules,
@@ -198,8 +198,9 @@ impl PushRuleEvaluator {
             let actions = push_rule
                 .actions
                 .iter()
-                // Filter out "dont_notify" actions, as we don't store them.
-                .filter(|a| **a != Action::DontNotify)
+                // Filter out "dont_notify" and "coalesce" actions, as we don't store them
+                // (since they result in no action by the pushers).
+                .filter(|a| **a != Action::DontNotify && **a != Action::Coalesce)
                 .cloned()
                 .collect();
 
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 42c7c84132..f19d3c739f 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -164,11 +164,13 @@ impl PushRule {
 /// The "action" Synapse should perform for a matching push rule.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum Action {
-    DontNotify,
     Notify,
-    Coalesce,
     SetTweak(SetTweak),
 
+    // Legacy actions that should be understood, but are equivalent to no-ops.
+    DontNotify,
+    Coalesce,
+
     // An unrecognized custom action.
     Unknown(Value),
 }
diff --git a/synapse/handlers/push_rules.py b/synapse/handlers/push_rules.py
index 1219672a59..813f3aa2d5 100644
--- a/synapse/handlers/push_rules.py
+++ b/synapse/handlers/push_rules.py
@@ -129,6 +129,8 @@ def check_actions(actions: List[Union[str, JsonDict]]) -> None:
         raise InvalidRuleException("No actions found")
 
     for a in actions:
+        # "dont_notify" and "coalesce" are legacy actions. They are allowed, but
+        # ignored (resulting in no action from the pusher).
         if a in ["notify", "dont_notify", "coalesce"]:
             pass
         elif isinstance(a, dict) and "set_tweak" in a:
-- 
cgit 1.5.1


From 2e59e97ebd02e93da39e6c90335d3b24ed01217a Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 4 May 2023 15:18:22 +0100
Subject: Move ThirdPartyEventRules into module_api/callbacks (#15535)

---
 changelog.d/15535.misc                             |   1 +
 synapse/app/_base.py                               |   4 +-
 synapse/events/third_party_rules.py                | 593 ---------------------
 synapse/handlers/auth.py                           |   2 +-
 synapse/handlers/deactivate_account.py             |   4 +-
 synapse/handlers/directory.py                      |   6 +-
 synapse/handlers/federation.py                     |   6 +-
 synapse/handlers/federation_event.py               |   4 +-
 synapse/handlers/message.py                        |   7 +-
 synapse/handlers/profile.py                        |   2 +-
 synapse/handlers/room.py                           |  10 +-
 synapse/handlers/room_member.py                    |   6 +-
 synapse/module_api/__init__.py                     |  31 +-
 synapse/module_api/callbacks/__init__.py           |   4 +
 .../callbacks/third_party_event_rules_callbacks.py | 591 ++++++++++++++++++++
 synapse/notifier.py                                |   2 +-
 synapse/rest/admin/rooms.py                        |   2 +-
 synapse/server.py                                  |   5 -
 tests/rest/client/test_third_party_rules.py        |  56 +-
 tests/server.py                                    |   4 +-
 20 files changed, 682 insertions(+), 658 deletions(-)
 create mode 100644 changelog.d/15535.misc
 delete mode 100644 synapse/events/third_party_rules.py
 create mode 100644 synapse/module_api/callbacks/third_party_event_rules_callbacks.py

(limited to 'synapse')

diff --git a/changelog.d/15535.misc b/changelog.d/15535.misc
new file mode 100644
index 0000000000..9981606c32
--- /dev/null
+++ b/changelog.d/15535.misc
@@ -0,0 +1 @@
+Move various module API callback registration methods to a dedicated class.
\ No newline at end of file
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 954402e4d2..7f83b34d89 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -64,7 +64,6 @@ from synapse.config.homeserver import HomeServerConfig
 from synapse.config.server import ListenerConfig, ManholeConfig, TCPListenerConfig
 from synapse.crypto import context_factory
 from synapse.events.presence_router import load_legacy_presence_router
-from synapse.events.third_party_rules import load_legacy_third_party_event_rules
 from synapse.handlers.auth import load_legacy_password_auth_providers
 from synapse.http.site import SynapseSite
 from synapse.logging.context import PreserveLoggingContext
@@ -73,6 +72,9 @@ from synapse.metrics import install_gc_manager, register_threadpool
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.metrics.jemalloc import setup_jemalloc_stats
 from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
+from synapse.module_api.callbacks.third_party_event_rules_callbacks import (
+    load_legacy_third_party_event_rules,
+)
 from synapse.types import ISynapseReactor
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.caches.lrucache import setup_expire_lru_cache_entries
diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py
deleted file mode 100644
index 61d4530be7..0000000000
--- a/synapse/events/third_party_rules.py
+++ /dev/null
@@ -1,593 +0,0 @@
-# Copyright 2019 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from typing import TYPE_CHECKING, Any, Awaitable, Callable, List, Optional, Tuple
-
-from twisted.internet.defer import CancelledError
-
-from synapse.api.errors import ModuleFailedException, SynapseError
-from synapse.events import EventBase
-from synapse.events.snapshot import UnpersistedEventContextBase
-from synapse.storage.roommember import ProfileInfo
-from synapse.types import Requester, StateMap
-from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-CHECK_EVENT_ALLOWED_CALLBACK = Callable[
-    [EventBase, StateMap[EventBase]], Awaitable[Tuple[bool, Optional[dict]]]
-]
-ON_CREATE_ROOM_CALLBACK = Callable[[Requester, dict, bool], Awaitable]
-CHECK_THREEPID_CAN_BE_INVITED_CALLBACK = Callable[
-    [str, str, StateMap[EventBase]], Awaitable[bool]
-]
-CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK = Callable[
-    [str, StateMap[EventBase], str], Awaitable[bool]
-]
-ON_NEW_EVENT_CALLBACK = Callable[[EventBase, StateMap[EventBase]], Awaitable]
-CHECK_CAN_SHUTDOWN_ROOM_CALLBACK = Callable[[str, str], Awaitable[bool]]
-CHECK_CAN_DEACTIVATE_USER_CALLBACK = Callable[[str, bool], Awaitable[bool]]
-ON_PROFILE_UPDATE_CALLBACK = Callable[[str, ProfileInfo, bool, bool], Awaitable]
-ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK = Callable[[str, bool, bool], Awaitable]
-ON_THREEPID_BIND_CALLBACK = Callable[[str, str, str], Awaitable]
-ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK = Callable[[str, str, str], Awaitable]
-ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK = Callable[[str, str, str], Awaitable]
-
-
-def load_legacy_third_party_event_rules(hs: "HomeServer") -> None:
-    """Wrapper that loads a third party event rules module configured using the old
-    configuration, and registers the hooks they implement.
-    """
-    if hs.config.thirdpartyrules.third_party_event_rules is None:
-        return
-
-    module, config = hs.config.thirdpartyrules.third_party_event_rules
-
-    api = hs.get_module_api()
-    third_party_rules = module(config=config, module_api=api)
-
-    # The known hooks. If a module implements a method which name appears in this set,
-    # we'll want to register it.
-    third_party_event_rules_methods = {
-        "check_event_allowed",
-        "on_create_room",
-        "check_threepid_can_be_invited",
-        "check_visibility_can_be_modified",
-    }
-
-    def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]:
-        # f might be None if the callback isn't implemented by the module. In this
-        # case we don't want to register a callback at all so we return None.
-        if f is None:
-            return None
-
-        # We return a separate wrapper for these methods because, in order to wrap them
-        # correctly, we need to await its result. Therefore it doesn't make a lot of
-        # sense to make it go through the run() wrapper.
-        if f.__name__ == "check_event_allowed":
-            # We need to wrap check_event_allowed because its old form would return either
-            # a boolean or a dict, but now we want to return the dict separately from the
-            # boolean.
-            async def wrap_check_event_allowed(
-                event: EventBase,
-                state_events: StateMap[EventBase],
-            ) -> Tuple[bool, Optional[dict]]:
-                # Assertion required because mypy can't prove we won't change
-                # `f` back to `None`. See
-                # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
-                assert f is not None
-
-                res = await f(event, state_events)
-                if isinstance(res, dict):
-                    return True, res
-                else:
-                    return res, None
-
-            return wrap_check_event_allowed
-
-        if f.__name__ == "on_create_room":
-            # We need to wrap on_create_room because its old form would return a boolean
-            # if the room creation is denied, but now we just want it to raise an
-            # exception.
-            async def wrap_on_create_room(
-                requester: Requester, config: dict, is_requester_admin: bool
-            ) -> None:
-                # Assertion required because mypy can't prove we won't change
-                # `f` back to `None`. See
-                # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
-                assert f is not None
-
-                res = await f(requester, config, is_requester_admin)
-                if res is False:
-                    raise SynapseError(
-                        403,
-                        "Room creation forbidden with these parameters",
-                    )
-
-            return wrap_on_create_room
-
-        def run(*args: Any, **kwargs: Any) -> Awaitable:
-            # Assertion required because mypy can't prove we won't change  `f`
-            # back to `None`. See
-            # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
-            assert f is not None
-
-            return maybe_awaitable(f(*args, **kwargs))
-
-        return run
-
-    # Register the hooks through the module API.
-    hooks = {
-        hook: async_wrapper(getattr(third_party_rules, hook, None))
-        for hook in third_party_event_rules_methods
-    }
-
-    api.register_third_party_rules_callbacks(**hooks)
-
-
-class ThirdPartyEventRules:
-    """Allows server admins to provide a Python module implementing an extra
-    set of rules to apply when processing events.
-
-    This is designed to help admins of closed federations with enforcing custom
-    behaviours.
-    """
-
-    def __init__(self, hs: "HomeServer"):
-        self.third_party_rules = None
-
-        self.store = hs.get_datastores().main
-        self._storage_controllers = hs.get_storage_controllers()
-
-        self._check_event_allowed_callbacks: List[CHECK_EVENT_ALLOWED_CALLBACK] = []
-        self._on_create_room_callbacks: List[ON_CREATE_ROOM_CALLBACK] = []
-        self._check_threepid_can_be_invited_callbacks: List[
-            CHECK_THREEPID_CAN_BE_INVITED_CALLBACK
-        ] = []
-        self._check_visibility_can_be_modified_callbacks: List[
-            CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK
-        ] = []
-        self._on_new_event_callbacks: List[ON_NEW_EVENT_CALLBACK] = []
-        self._check_can_shutdown_room_callbacks: List[
-            CHECK_CAN_SHUTDOWN_ROOM_CALLBACK
-        ] = []
-        self._check_can_deactivate_user_callbacks: List[
-            CHECK_CAN_DEACTIVATE_USER_CALLBACK
-        ] = []
-        self._on_profile_update_callbacks: List[ON_PROFILE_UPDATE_CALLBACK] = []
-        self._on_user_deactivation_status_changed_callbacks: List[
-            ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK
-        ] = []
-        self._on_threepid_bind_callbacks: List[ON_THREEPID_BIND_CALLBACK] = []
-        self._on_add_user_third_party_identifier_callbacks: List[
-            ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
-        ] = []
-        self._on_remove_user_third_party_identifier_callbacks: List[
-            ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
-        ] = []
-
-    def register_third_party_rules_callbacks(
-        self,
-        check_event_allowed: Optional[CHECK_EVENT_ALLOWED_CALLBACK] = None,
-        on_create_room: Optional[ON_CREATE_ROOM_CALLBACK] = None,
-        check_threepid_can_be_invited: Optional[
-            CHECK_THREEPID_CAN_BE_INVITED_CALLBACK
-        ] = None,
-        check_visibility_can_be_modified: Optional[
-            CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK
-        ] = None,
-        on_new_event: Optional[ON_NEW_EVENT_CALLBACK] = None,
-        check_can_shutdown_room: Optional[CHECK_CAN_SHUTDOWN_ROOM_CALLBACK] = None,
-        check_can_deactivate_user: Optional[CHECK_CAN_DEACTIVATE_USER_CALLBACK] = None,
-        on_profile_update: Optional[ON_PROFILE_UPDATE_CALLBACK] = None,
-        on_user_deactivation_status_changed: Optional[
-            ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK
-        ] = None,
-        on_threepid_bind: Optional[ON_THREEPID_BIND_CALLBACK] = None,
-        on_add_user_third_party_identifier: Optional[
-            ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
-        ] = None,
-        on_remove_user_third_party_identifier: Optional[
-            ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
-        ] = None,
-    ) -> None:
-        """Register callbacks from modules for each hook."""
-        if check_event_allowed is not None:
-            self._check_event_allowed_callbacks.append(check_event_allowed)
-
-        if on_create_room is not None:
-            self._on_create_room_callbacks.append(on_create_room)
-
-        if check_threepid_can_be_invited is not None:
-            self._check_threepid_can_be_invited_callbacks.append(
-                check_threepid_can_be_invited,
-            )
-
-        if check_visibility_can_be_modified is not None:
-            self._check_visibility_can_be_modified_callbacks.append(
-                check_visibility_can_be_modified,
-            )
-
-        if on_new_event is not None:
-            self._on_new_event_callbacks.append(on_new_event)
-
-        if check_can_shutdown_room is not None:
-            self._check_can_shutdown_room_callbacks.append(check_can_shutdown_room)
-
-        if check_can_deactivate_user is not None:
-            self._check_can_deactivate_user_callbacks.append(check_can_deactivate_user)
-        if on_profile_update is not None:
-            self._on_profile_update_callbacks.append(on_profile_update)
-
-        if on_user_deactivation_status_changed is not None:
-            self._on_user_deactivation_status_changed_callbacks.append(
-                on_user_deactivation_status_changed,
-            )
-
-        if on_threepid_bind is not None:
-            self._on_threepid_bind_callbacks.append(on_threepid_bind)
-
-        if on_add_user_third_party_identifier is not None:
-            self._on_add_user_third_party_identifier_callbacks.append(
-                on_add_user_third_party_identifier
-            )
-
-        if on_remove_user_third_party_identifier is not None:
-            self._on_remove_user_third_party_identifier_callbacks.append(
-                on_remove_user_third_party_identifier
-            )
-
-    async def check_event_allowed(
-        self,
-        event: EventBase,
-        context: UnpersistedEventContextBase,
-    ) -> Tuple[bool, Optional[dict]]:
-        """Check if a provided event should be allowed in the given context.
-
-        The module can return:
-            * True: the event is allowed.
-            * False: the event is not allowed, and should be rejected with M_FORBIDDEN.
-
-        If the event is allowed, the module can also return a dictionary to use as a
-        replacement for the event.
-
-        Args:
-            event: The event to be checked.
-            context: The context of the event.
-
-        Returns:
-            The result from the ThirdPartyRules module, as above.
-        """
-        # Bail out early without hitting the store if we don't have any callbacks to run.
-        if len(self._check_event_allowed_callbacks) == 0:
-            return True, None
-
-        prev_state_ids = await context.get_prev_state_ids()
-
-        # Retrieve the state events from the database.
-        events = await self.store.get_events(prev_state_ids.values())
-        state_events = {(ev.type, ev.state_key): ev for ev in events.values()}
-
-        # Ensure that the event is frozen, to make sure that the module is not tempted
-        # to try to modify it. Any attempt to modify it at this point will invalidate
-        # the hashes and signatures.
-        event.freeze()
-
-        for callback in self._check_event_allowed_callbacks:
-            try:
-                res, replacement_data = await delay_cancellation(
-                    callback(event, state_events)
-                )
-            except CancelledError:
-                raise
-            except SynapseError as e:
-                # FIXME: Being able to throw SynapseErrors is relied upon by
-                # some modules. PR #10386 accidentally broke this ability.
-                # That said, we aren't keen on exposing this implementation detail
-                # to modules and we should one day have a proper way to do what
-                # is wanted.
-                # This module callback needs a rework so that hacks such as
-                # this one are not necessary.
-                raise e
-            except Exception:
-                raise ModuleFailedException(
-                    "Failed to run `check_event_allowed` module API callback"
-                )
-
-            # Return if the event shouldn't be allowed or if the module came up with a
-            # replacement dict for the event.
-            if res is False:
-                return res, None
-            elif isinstance(replacement_data, dict):
-                return True, replacement_data
-
-        return True, None
-
-    async def on_create_room(
-        self, requester: Requester, config: dict, is_requester_admin: bool
-    ) -> None:
-        """Intercept requests to create room to maybe deny it (via an exception) or
-        update the request config.
-
-        Args:
-            requester
-            config: The creation config from the client.
-            is_requester_admin: If the requester is an admin
-        """
-        for callback in self._on_create_room_callbacks:
-            try:
-                await callback(requester, config, is_requester_admin)
-            except Exception as e:
-                # Don't silence the errors raised by this callback since we expect it to
-                # raise an exception to deny the creation of the room; instead make sure
-                # it's a SynapseError we can send to clients.
-                if not isinstance(e, SynapseError):
-                    e = SynapseError(
-                        403, "Room creation forbidden with these parameters"
-                    )
-
-                raise e
-
-    async def check_threepid_can_be_invited(
-        self, medium: str, address: str, room_id: str
-    ) -> bool:
-        """Check if a provided 3PID can be invited in the given room.
-
-        Args:
-            medium: The 3PID's medium.
-            address: The 3PID's address.
-            room_id: The room we want to invite the threepid to.
-
-        Returns:
-            True if the 3PID can be invited, False if not.
-        """
-        # Bail out early without hitting the store if we don't have any callbacks to run.
-        if len(self._check_threepid_can_be_invited_callbacks) == 0:
-            return True
-
-        state_events = await self._get_state_map_for_room(room_id)
-
-        for callback in self._check_threepid_can_be_invited_callbacks:
-            try:
-                threepid_can_be_invited = await delay_cancellation(
-                    callback(medium, address, state_events)
-                )
-                if threepid_can_be_invited is False:
-                    return False
-            except CancelledError:
-                raise
-            except Exception as e:
-                logger.warning("Failed to run module API callback %s: %s", callback, e)
-
-        return True
-
-    async def check_visibility_can_be_modified(
-        self, room_id: str, new_visibility: str
-    ) -> bool:
-        """Check if a room is allowed to be published to, or removed from, the public room
-        list.
-
-        Args:
-            room_id: The ID of the room.
-            new_visibility: The new visibility state. Either "public" or "private".
-
-        Returns:
-            True if the room's visibility can be modified, False if not.
-        """
-        # Bail out early without hitting the store if we don't have any callback
-        if len(self._check_visibility_can_be_modified_callbacks) == 0:
-            return True
-
-        state_events = await self._get_state_map_for_room(room_id)
-
-        for callback in self._check_visibility_can_be_modified_callbacks:
-            try:
-                visibility_can_be_modified = await delay_cancellation(
-                    callback(room_id, state_events, new_visibility)
-                )
-                if visibility_can_be_modified is False:
-                    return False
-            except CancelledError:
-                raise
-            except Exception as e:
-                logger.warning("Failed to run module API callback %s: %s", callback, e)
-
-        return True
-
-    async def on_new_event(self, event_id: str) -> None:
-        """Let modules act on events after they've been sent (e.g. auto-accepting
-        invites, etc.)
-
-        Args:
-            event_id: The ID of the event.
-        """
-        # Bail out early without hitting the store if we don't have any callbacks
-        if len(self._on_new_event_callbacks) == 0:
-            return
-
-        event = await self.store.get_event(event_id)
-        state_events = await self._get_state_map_for_room(event.room_id)
-
-        for callback in self._on_new_event_callbacks:
-            try:
-                await callback(event, state_events)
-            except Exception as e:
-                logger.exception(
-                    "Failed to run module API callback %s: %s", callback, e
-                )
-
-    async def check_can_shutdown_room(self, user_id: str, room_id: str) -> bool:
-        """Intercept requests to shutdown a room. If `False` is returned, the
-         room must not be shut down.
-
-        Args:
-            requester: The ID of the user requesting the shutdown.
-            room_id: The ID of the room.
-        """
-        for callback in self._check_can_shutdown_room_callbacks:
-            try:
-                can_shutdown_room = await delay_cancellation(callback(user_id, room_id))
-                if can_shutdown_room is False:
-                    return False
-            except CancelledError:
-                raise
-            except Exception as e:
-                logger.exception(
-                    "Failed to run module API callback %s: %s", callback, e
-                )
-        return True
-
-    async def check_can_deactivate_user(
-        self,
-        user_id: str,
-        by_admin: bool,
-    ) -> bool:
-        """Intercept requests to deactivate a user. If `False` is returned, the
-        user should not be deactivated.
-
-        Args:
-            requester
-            user_id: The ID of the room.
-        """
-        for callback in self._check_can_deactivate_user_callbacks:
-            try:
-                can_deactivate_user = await delay_cancellation(
-                    callback(user_id, by_admin)
-                )
-                if can_deactivate_user is False:
-                    return False
-            except CancelledError:
-                raise
-            except Exception as e:
-                logger.exception(
-                    "Failed to run module API callback %s: %s", callback, e
-                )
-        return True
-
-    async def _get_state_map_for_room(self, room_id: str) -> StateMap[EventBase]:
-        """Given a room ID, return the state events of that room.
-
-        Args:
-            room_id: The ID of the room.
-
-        Returns:
-            A dict mapping (event type, state key) to state event.
-        """
-        return await self._storage_controllers.state.get_current_state(room_id)
-
-    async def on_profile_update(
-        self, user_id: str, new_profile: ProfileInfo, by_admin: bool, deactivation: bool
-    ) -> None:
-        """Called after the global profile of a user has been updated. Does not include
-        per-room profile changes.
-
-        Args:
-            user_id: The user whose profile was changed.
-            new_profile: The updated profile for the user.
-            by_admin: Whether the profile update was performed by a server admin.
-            deactivation: Whether this change was made while deactivating the user.
-        """
-        for callback in self._on_profile_update_callbacks:
-            try:
-                await callback(user_id, new_profile, by_admin, deactivation)
-            except Exception as e:
-                logger.exception(
-                    "Failed to run module API callback %s: %s", callback, e
-                )
-
-    async def on_user_deactivation_status_changed(
-        self, user_id: str, deactivated: bool, by_admin: bool
-    ) -> None:
-        """Called after a user has been deactivated or reactivated.
-
-        Args:
-            user_id: The deactivated user.
-            deactivated: Whether the user is now deactivated.
-            by_admin: Whether the deactivation was performed by a server admin.
-        """
-        for callback in self._on_user_deactivation_status_changed_callbacks:
-            try:
-                await callback(user_id, deactivated, by_admin)
-            except Exception as e:
-                logger.exception(
-                    "Failed to run module API callback %s: %s", callback, e
-                )
-
-    async def on_threepid_bind(self, user_id: str, medium: str, address: str) -> None:
-        """Called after a threepid association has been verified and stored.
-
-        Note that this callback is called when an association is created on the
-        local homeserver, not when it's created on an identity server (and then kept track
-        of so that it can be unbound on the same IS later on).
-
-        THIS MODULE CALLBACK METHOD HAS BEEN DEPRECATED. Please use the
-        `on_add_user_third_party_identifier` callback method instead.
-
-        Args:
-            user_id: the user being associated with the threepid.
-            medium: the threepid's medium.
-            address: the threepid's address.
-        """
-        for callback in self._on_threepid_bind_callbacks:
-            try:
-                await callback(user_id, medium, address)
-            except Exception as e:
-                logger.exception(
-                    "Failed to run module API callback %s: %s", callback, e
-                )
-
-    async def on_add_user_third_party_identifier(
-        self, user_id: str, medium: str, address: str
-    ) -> None:
-        """Called when an association between a user's Matrix ID and a third-party ID
-        (email, phone number) has successfully been registered on the homeserver.
-
-        Args:
-            user_id: The User ID included in the association.
-            medium: The medium of the third-party ID (email, msisdn).
-            address: The address of the third-party ID (i.e. an email address).
-        """
-        for callback in self._on_add_user_third_party_identifier_callbacks:
-            try:
-                await callback(user_id, medium, address)
-            except Exception as e:
-                logger.exception(
-                    "Failed to run module API callback %s: %s", callback, e
-                )
-
-    async def on_remove_user_third_party_identifier(
-        self, user_id: str, medium: str, address: str
-    ) -> None:
-        """Called when an association between a user's Matrix ID and a third-party ID
-        (email, phone number) has been successfully removed on the homeserver.
-
-        This is called *after* any known bindings on identity servers for this
-        association have been removed.
-
-        Args:
-            user_id: The User ID included in the removed association.
-            medium: The medium of the third-party ID (email, msisdn).
-            address: The address of the third-party ID (i.e. an email address).
-        """
-        for callback in self._on_remove_user_third_party_identifier_callbacks:
-            try:
-                await callback(user_id, medium, address)
-            except Exception as e:
-                logger.exception(
-                    "Failed to run module API callback %s: %s", callback, e
-                )
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 1e89447044..59e340974d 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -212,7 +212,7 @@ class AuthHandler:
         self._password_enabled_for_login = hs.config.auth.password_enabled_for_login
         self._password_enabled_for_reauth = hs.config.auth.password_enabled_for_reauth
         self._password_localdb_enabled = hs.config.auth.password_localdb_enabled
-        self._third_party_rules = hs.get_third_party_event_rules()
+        self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
 
         # Ratelimiter for failed auth during UIA. Uses same ratelimit config
         # as per `rc_login.failed_attempts`.
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index bd5867491b..f299b89a1b 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -39,11 +39,11 @@ class DeactivateAccountHandler:
         self._profile_handler = hs.get_profile_handler()
         self.user_directory_handler = hs.get_user_directory_handler()
         self._server_name = hs.hostname
-        self._third_party_rules = hs.get_third_party_event_rules()
+        self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
 
         # Flag that indicates whether the process to part users from rooms is running
         self._user_parter_running = False
-        self._third_party_rules = hs.get_third_party_event_rules()
+        self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
 
         # Start the user parter loop so it can resume parting users from rooms where
         # it left off (if it has work left to do).
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 5e8316e2e5..1e0623c7f8 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -52,7 +52,9 @@ class DirectoryHandler:
         self.config = hs.config
         self.enable_room_list_search = hs.config.roomdirectory.enable_room_list_search
         self.require_membership = hs.config.server.require_membership_for_aliases
-        self.third_party_event_rules = hs.get_third_party_event_rules()
+        self._third_party_event_rules = (
+            hs.get_module_api_callbacks().third_party_event_rules
+        )
         self.server_name = hs.hostname
 
         self.federation = hs.get_federation_client()
@@ -503,7 +505,7 @@ class DirectoryHandler:
             # Check if publishing is blocked by a third party module
             allowed_by_third_party_rules = (
                 await (
-                    self.third_party_event_rules.check_visibility_can_be_modified(
+                    self._third_party_event_rules.check_visibility_can_be_modified(
                         room_id, visibility
                     )
                 )
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index d1a88cc604..4ad808a5b4 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -169,7 +169,9 @@ class FederationHandler:
 
         self._room_backfill = Linearizer("room_backfill")
 
-        self.third_party_event_rules = hs.get_third_party_event_rules()
+        self._third_party_event_rules = (
+            hs.get_module_api_callbacks().third_party_event_rules
+        )
 
         # Tracks running partial state syncs by room ID.
         # Partial state syncs currently only run on the main process, so it's okay to
@@ -1253,7 +1255,7 @@ class FederationHandler:
             unpersisted_context,
         ) = await self.event_creation_handler.create_new_client_event(builder=builder)
 
-        event_allowed, _ = await self.third_party_event_rules.check_event_allowed(
+        event_allowed, _ = await self._third_party_event_rules.check_event_allowed(
             event, unpersisted_context
         )
         if not event_allowed:
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 06609fab93..fc15024166 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -157,7 +157,9 @@ class FederationEventHandler:
         self._get_room_member_handler = hs.get_room_member_handler
 
         self._federation_client = hs.get_federation_client()
-        self._third_party_event_rules = hs.get_third_party_event_rules()
+        self._third_party_event_rules = (
+            hs.get_module_api_callbacks().third_party_event_rules
+        )
         self._notifier = hs.get_notifier()
 
         self._is_mine_id = hs.is_mine_id
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index ac1932a7f9..0b61c2272b 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -77,7 +77,6 @@ from synapse.util.metrics import measure_func
 from synapse.visibility import get_effective_room_visibility_from_state
 
 if TYPE_CHECKING:
-    from synapse.events.third_party_rules import ThirdPartyEventRules
     from synapse.server import HomeServer
 
 logger = logging.getLogger(__name__)
@@ -509,8 +508,8 @@ class EventCreationHandler:
         self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator()
 
         self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
-        self.third_party_event_rules: "ThirdPartyEventRules" = (
-            self.hs.get_third_party_event_rules()
+        self._third_party_event_rules = (
+            self.hs.get_module_api_callbacks().third_party_event_rules
         )
 
         self._block_events_without_consent_error = (
@@ -1314,7 +1313,7 @@ class EventCreationHandler:
         if requester:
             context.app_service = requester.app_service
 
-        res, new_content = await self.third_party_event_rules.check_event_allowed(
+        res, new_content = await self._third_party_event_rules.check_event_allowed(
             event, context
         )
         if res is False:
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 440d3f4acd..983b9b66fb 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -61,7 +61,7 @@ class ProfileHandler:
 
         self.server_name = hs.config.server.server_name
 
-        self._third_party_rules = hs.get_third_party_event_rules()
+        self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
 
     async def get_profile(self, user_id: str, ignore_backoff: bool = True) -> JsonDict:
         target_user = UserID.from_string(user_id)
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index efd9612d90..5e1702d78a 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -160,7 +160,9 @@ class RoomCreationHandler:
         )
         self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
 
-        self.third_party_event_rules = hs.get_third_party_event_rules()
+        self._third_party_event_rules = (
+            hs.get_module_api_callbacks().third_party_event_rules
+        )
 
     async def upgrade_room(
         self, requester: Requester, old_room_id: str, new_version: RoomVersion
@@ -742,7 +744,7 @@ class RoomCreationHandler:
 
         # Let the third party rules modify the room creation config if needed, or abort
         # the room creation entirely with an exception.
-        await self.third_party_event_rules.on_create_room(
+        await self._third_party_event_rules.on_create_room(
             requester, config, is_requester_admin=is_requester_admin
         )
 
@@ -879,7 +881,7 @@ class RoomCreationHandler:
         # Check whether this visibility value is blocked by a third party module
         allowed_by_third_party_rules = (
             await (
-                self.third_party_event_rules.check_visibility_can_be_modified(
+                self._third_party_event_rules.check_visibility_can_be_modified(
                     room_id, visibility
                 )
             )
@@ -1731,7 +1733,7 @@ class RoomShutdownHandler:
         self.room_member_handler = hs.get_room_member_handler()
         self._room_creation_handler = hs.get_room_creation_handler()
         self._replication = hs.get_replication_data_handler()
-        self._third_party_rules = hs.get_third_party_event_rules()
+        self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
         self.event_creation_handler = hs.get_event_creation_handler()
         self.store = hs.get_datastores().main
 
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index fbef600acd..af0ca5c26d 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -100,7 +100,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
 
         self.clock = hs.get_clock()
         self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
-        self.third_party_event_rules = hs.get_third_party_event_rules()
+        self._third_party_event_rules = (
+            hs.get_module_api_callbacks().third_party_event_rules
+        )
         self._server_notices_mxid = self.config.servernotices.server_notices_mxid
         self._enable_lookup = hs.config.registration.enable_3pid_lookup
         self.allow_per_room_profiles = self.config.server.allow_per_room_profiles
@@ -1560,7 +1562,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # can't just rely on the standard ratelimiting of events.
         await self._third_party_invite_limiter.ratelimit(requester)
 
-        can_invite = await self.third_party_event_rules.check_threepid_can_be_invited(
+        can_invite = await self._third_party_event_rules.check_threepid_can_be_invited(
             medium, address, room_id
         )
         if not can_invite:
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 90eff030b5..4b59e6825b 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -44,20 +44,6 @@ from synapse.events.presence_router import (
     GET_USERS_FOR_STATES_CALLBACK,
     PresenceRouter,
 )
-from synapse.events.third_party_rules import (
-    CHECK_CAN_DEACTIVATE_USER_CALLBACK,
-    CHECK_CAN_SHUTDOWN_ROOM_CALLBACK,
-    CHECK_EVENT_ALLOWED_CALLBACK,
-    CHECK_THREEPID_CAN_BE_INVITED_CALLBACK,
-    CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK,
-    ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK,
-    ON_CREATE_ROOM_CALLBACK,
-    ON_NEW_EVENT_CALLBACK,
-    ON_PROFILE_UPDATE_CALLBACK,
-    ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK,
-    ON_THREEPID_BIND_CALLBACK,
-    ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK,
-)
 from synapse.handlers.account_data import ON_ACCOUNT_DATA_UPDATED_CALLBACK
 from synapse.handlers.auth import (
     CHECK_3PID_AUTH_CALLBACK,
@@ -105,6 +91,20 @@ from synapse.module_api.callbacks.spamchecker_callbacks import (
     USER_MAY_SEND_3PID_INVITE_CALLBACK,
     SpamCheckerModuleApiCallbacks,
 )
+from synapse.module_api.callbacks.third_party_event_rules_callbacks import (
+    CHECK_CAN_DEACTIVATE_USER_CALLBACK,
+    CHECK_CAN_SHUTDOWN_ROOM_CALLBACK,
+    CHECK_EVENT_ALLOWED_CALLBACK,
+    CHECK_THREEPID_CAN_BE_INVITED_CALLBACK,
+    CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK,
+    ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK,
+    ON_CREATE_ROOM_CALLBACK,
+    ON_NEW_EVENT_CALLBACK,
+    ON_PROFILE_UPDATE_CALLBACK,
+    ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK,
+    ON_THREEPID_BIND_CALLBACK,
+    ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK,
+)
 from synapse.push.httppusher import HttpPusher
 from synapse.rest.client.login import LoginResponse
 from synapse.storage import DataStore
@@ -273,7 +273,6 @@ class ModuleApi:
         self._public_room_list_manager = PublicRoomListManager(hs)
         self._account_data_manager = AccountDataManager(hs)
 
-        self._third_party_event_rules = hs.get_third_party_event_rules()
         self._password_auth_provider = hs.get_password_auth_provider()
         self._presence_router = hs.get_presence_router()
         self._account_data_handler = hs.get_account_data_handler()
@@ -371,7 +370,7 @@ class ModuleApi:
 
         Added in Synapse v1.39.0.
         """
-        return self._third_party_event_rules.register_third_party_rules_callbacks(
+        return self._callbacks.third_party_event_rules.register_third_party_rules_callbacks(
             check_event_allowed=check_event_allowed,
             on_create_room=on_create_room,
             check_threepid_can_be_invited=check_threepid_can_be_invited,
diff --git a/synapse/module_api/callbacks/__init__.py b/synapse/module_api/callbacks/__init__.py
index 5cdb2c003a..dcb036552b 100644
--- a/synapse/module_api/callbacks/__init__.py
+++ b/synapse/module_api/callbacks/__init__.py
@@ -23,9 +23,13 @@ from synapse.module_api.callbacks.account_validity_callbacks import (
 from synapse.module_api.callbacks.spamchecker_callbacks import (
     SpamCheckerModuleApiCallbacks,
 )
+from synapse.module_api.callbacks.third_party_event_rules_callbacks import (
+    ThirdPartyEventRulesModuleApiCallbacks,
+)
 
 
 class ModuleApiCallbacks:
     def __init__(self, hs: "HomeServer") -> None:
         self.account_validity = AccountValidityModuleApiCallbacks()
         self.spam_checker = SpamCheckerModuleApiCallbacks(hs)
+        self.third_party_event_rules = ThirdPartyEventRulesModuleApiCallbacks(hs)
diff --git a/synapse/module_api/callbacks/third_party_event_rules_callbacks.py b/synapse/module_api/callbacks/third_party_event_rules_callbacks.py
new file mode 100644
index 0000000000..911f37ba42
--- /dev/null
+++ b/synapse/module_api/callbacks/third_party_event_rules_callbacks.py
@@ -0,0 +1,591 @@
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Any, Awaitable, Callable, List, Optional, Tuple
+
+from twisted.internet.defer import CancelledError
+
+from synapse.api.errors import ModuleFailedException, SynapseError
+from synapse.events import EventBase
+from synapse.events.snapshot import UnpersistedEventContextBase
+from synapse.storage.roommember import ProfileInfo
+from synapse.types import Requester, StateMap
+from synapse.util.async_helpers import delay_cancellation, maybe_awaitable
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+CHECK_EVENT_ALLOWED_CALLBACK = Callable[
+    [EventBase, StateMap[EventBase]], Awaitable[Tuple[bool, Optional[dict]]]
+]
+ON_CREATE_ROOM_CALLBACK = Callable[[Requester, dict, bool], Awaitable]
+CHECK_THREEPID_CAN_BE_INVITED_CALLBACK = Callable[
+    [str, str, StateMap[EventBase]], Awaitable[bool]
+]
+CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK = Callable[
+    [str, StateMap[EventBase], str], Awaitable[bool]
+]
+ON_NEW_EVENT_CALLBACK = Callable[[EventBase, StateMap[EventBase]], Awaitable]
+CHECK_CAN_SHUTDOWN_ROOM_CALLBACK = Callable[[str, str], Awaitable[bool]]
+CHECK_CAN_DEACTIVATE_USER_CALLBACK = Callable[[str, bool], Awaitable[bool]]
+ON_PROFILE_UPDATE_CALLBACK = Callable[[str, ProfileInfo, bool, bool], Awaitable]
+ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK = Callable[[str, bool, bool], Awaitable]
+ON_THREEPID_BIND_CALLBACK = Callable[[str, str, str], Awaitable]
+ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK = Callable[[str, str, str], Awaitable]
+ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK = Callable[[str, str, str], Awaitable]
+
+
+def load_legacy_third_party_event_rules(hs: "HomeServer") -> None:
+    """Wrapper that loads a third party event rules module configured using the old
+    configuration, and registers the hooks they implement.
+    """
+    if hs.config.thirdpartyrules.third_party_event_rules is None:
+        return
+
+    module, config = hs.config.thirdpartyrules.third_party_event_rules
+
+    api = hs.get_module_api()
+    third_party_rules = module(config=config, module_api=api)
+
+    # The known hooks. If a module implements a method which name appears in this set,
+    # we'll want to register it.
+    third_party_event_rules_methods = {
+        "check_event_allowed",
+        "on_create_room",
+        "check_threepid_can_be_invited",
+        "check_visibility_can_be_modified",
+    }
+
+    def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]:
+        # f might be None if the callback isn't implemented by the module. In this
+        # case we don't want to register a callback at all so we return None.
+        if f is None:
+            return None
+
+        # We return a separate wrapper for these methods because, in order to wrap them
+        # correctly, we need to await its result. Therefore it doesn't make a lot of
+        # sense to make it go through the run() wrapper.
+        if f.__name__ == "check_event_allowed":
+            # We need to wrap check_event_allowed because its old form would return either
+            # a boolean or a dict, but now we want to return the dict separately from the
+            # boolean.
+            async def wrap_check_event_allowed(
+                event: EventBase,
+                state_events: StateMap[EventBase],
+            ) -> Tuple[bool, Optional[dict]]:
+                # Assertion required because mypy can't prove we won't change
+                # `f` back to `None`. See
+                # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
+                assert f is not None
+
+                res = await f(event, state_events)
+                if isinstance(res, dict):
+                    return True, res
+                else:
+                    return res, None
+
+            return wrap_check_event_allowed
+
+        if f.__name__ == "on_create_room":
+            # We need to wrap on_create_room because its old form would return a boolean
+            # if the room creation is denied, but now we just want it to raise an
+            # exception.
+            async def wrap_on_create_room(
+                requester: Requester, config: dict, is_requester_admin: bool
+            ) -> None:
+                # Assertion required because mypy can't prove we won't change
+                # `f` back to `None`. See
+                # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
+                assert f is not None
+
+                res = await f(requester, config, is_requester_admin)
+                if res is False:
+                    raise SynapseError(
+                        403,
+                        "Room creation forbidden with these parameters",
+                    )
+
+            return wrap_on_create_room
+
+        def run(*args: Any, **kwargs: Any) -> Awaitable:
+            # Assertion required because mypy can't prove we won't change  `f`
+            # back to `None`. See
+            # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions
+            assert f is not None
+
+            return maybe_awaitable(f(*args, **kwargs))
+
+        return run
+
+    # Register the hooks through the module API.
+    hooks = {
+        hook: async_wrapper(getattr(third_party_rules, hook, None))
+        for hook in third_party_event_rules_methods
+    }
+
+    api.register_third_party_rules_callbacks(**hooks)
+
+
+class ThirdPartyEventRulesModuleApiCallbacks:
+    """Allows server admins to provide a Python module implementing an extra
+    set of rules to apply when processing events.
+
+    This is designed to help admins of closed federations with enforcing custom
+    behaviours.
+    """
+
+    def __init__(self, hs: "HomeServer"):
+        self.store = hs.get_datastores().main
+        self._storage_controllers = hs.get_storage_controllers()
+
+        self._check_event_allowed_callbacks: List[CHECK_EVENT_ALLOWED_CALLBACK] = []
+        self._on_create_room_callbacks: List[ON_CREATE_ROOM_CALLBACK] = []
+        self._check_threepid_can_be_invited_callbacks: List[
+            CHECK_THREEPID_CAN_BE_INVITED_CALLBACK
+        ] = []
+        self._check_visibility_can_be_modified_callbacks: List[
+            CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK
+        ] = []
+        self._on_new_event_callbacks: List[ON_NEW_EVENT_CALLBACK] = []
+        self._check_can_shutdown_room_callbacks: List[
+            CHECK_CAN_SHUTDOWN_ROOM_CALLBACK
+        ] = []
+        self._check_can_deactivate_user_callbacks: List[
+            CHECK_CAN_DEACTIVATE_USER_CALLBACK
+        ] = []
+        self._on_profile_update_callbacks: List[ON_PROFILE_UPDATE_CALLBACK] = []
+        self._on_user_deactivation_status_changed_callbacks: List[
+            ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK
+        ] = []
+        self._on_threepid_bind_callbacks: List[ON_THREEPID_BIND_CALLBACK] = []
+        self._on_add_user_third_party_identifier_callbacks: List[
+            ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = []
+        self._on_remove_user_third_party_identifier_callbacks: List[
+            ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = []
+
+    def register_third_party_rules_callbacks(
+        self,
+        check_event_allowed: Optional[CHECK_EVENT_ALLOWED_CALLBACK] = None,
+        on_create_room: Optional[ON_CREATE_ROOM_CALLBACK] = None,
+        check_threepid_can_be_invited: Optional[
+            CHECK_THREEPID_CAN_BE_INVITED_CALLBACK
+        ] = None,
+        check_visibility_can_be_modified: Optional[
+            CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK
+        ] = None,
+        on_new_event: Optional[ON_NEW_EVENT_CALLBACK] = None,
+        check_can_shutdown_room: Optional[CHECK_CAN_SHUTDOWN_ROOM_CALLBACK] = None,
+        check_can_deactivate_user: Optional[CHECK_CAN_DEACTIVATE_USER_CALLBACK] = None,
+        on_profile_update: Optional[ON_PROFILE_UPDATE_CALLBACK] = None,
+        on_user_deactivation_status_changed: Optional[
+            ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK
+        ] = None,
+        on_threepid_bind: Optional[ON_THREEPID_BIND_CALLBACK] = None,
+        on_add_user_third_party_identifier: Optional[
+            ON_ADD_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = None,
+        on_remove_user_third_party_identifier: Optional[
+            ON_REMOVE_USER_THIRD_PARTY_IDENTIFIER_CALLBACK
+        ] = None,
+    ) -> None:
+        """Register callbacks from modules for each hook."""
+        if check_event_allowed is not None:
+            self._check_event_allowed_callbacks.append(check_event_allowed)
+
+        if on_create_room is not None:
+            self._on_create_room_callbacks.append(on_create_room)
+
+        if check_threepid_can_be_invited is not None:
+            self._check_threepid_can_be_invited_callbacks.append(
+                check_threepid_can_be_invited,
+            )
+
+        if check_visibility_can_be_modified is not None:
+            self._check_visibility_can_be_modified_callbacks.append(
+                check_visibility_can_be_modified,
+            )
+
+        if on_new_event is not None:
+            self._on_new_event_callbacks.append(on_new_event)
+
+        if check_can_shutdown_room is not None:
+            self._check_can_shutdown_room_callbacks.append(check_can_shutdown_room)
+
+        if check_can_deactivate_user is not None:
+            self._check_can_deactivate_user_callbacks.append(check_can_deactivate_user)
+        if on_profile_update is not None:
+            self._on_profile_update_callbacks.append(on_profile_update)
+
+        if on_user_deactivation_status_changed is not None:
+            self._on_user_deactivation_status_changed_callbacks.append(
+                on_user_deactivation_status_changed,
+            )
+
+        if on_threepid_bind is not None:
+            self._on_threepid_bind_callbacks.append(on_threepid_bind)
+
+        if on_add_user_third_party_identifier is not None:
+            self._on_add_user_third_party_identifier_callbacks.append(
+                on_add_user_third_party_identifier
+            )
+
+        if on_remove_user_third_party_identifier is not None:
+            self._on_remove_user_third_party_identifier_callbacks.append(
+                on_remove_user_third_party_identifier
+            )
+
+    async def check_event_allowed(
+        self,
+        event: EventBase,
+        context: UnpersistedEventContextBase,
+    ) -> Tuple[bool, Optional[dict]]:
+        """Check if a provided event should be allowed in the given context.
+
+        The module can return:
+            * True: the event is allowed.
+            * False: the event is not allowed, and should be rejected with M_FORBIDDEN.
+
+        If the event is allowed, the module can also return a dictionary to use as a
+        replacement for the event.
+
+        Args:
+            event: The event to be checked.
+            context: The context of the event.
+
+        Returns:
+            The result from the ThirdPartyRules module, as above.
+        """
+        # Bail out early without hitting the store if we don't have any callbacks to run.
+        if len(self._check_event_allowed_callbacks) == 0:
+            return True, None
+
+        prev_state_ids = await context.get_prev_state_ids()
+
+        # Retrieve the state events from the database.
+        events = await self.store.get_events(prev_state_ids.values())
+        state_events = {(ev.type, ev.state_key): ev for ev in events.values()}
+
+        # Ensure that the event is frozen, to make sure that the module is not tempted
+        # to try to modify it. Any attempt to modify it at this point will invalidate
+        # the hashes and signatures.
+        event.freeze()
+
+        for callback in self._check_event_allowed_callbacks:
+            try:
+                res, replacement_data = await delay_cancellation(
+                    callback(event, state_events)
+                )
+            except CancelledError:
+                raise
+            except SynapseError as e:
+                # FIXME: Being able to throw SynapseErrors is relied upon by
+                # some modules. PR #10386 accidentally broke this ability.
+                # That said, we aren't keen on exposing this implementation detail
+                # to modules and we should one day have a proper way to do what
+                # is wanted.
+                # This module callback needs a rework so that hacks such as
+                # this one are not necessary.
+                raise e
+            except Exception:
+                raise ModuleFailedException(
+                    "Failed to run `check_event_allowed` module API callback"
+                )
+
+            # Return if the event shouldn't be allowed or if the module came up with a
+            # replacement dict for the event.
+            if res is False:
+                return res, None
+            elif isinstance(replacement_data, dict):
+                return True, replacement_data
+
+        return True, None
+
+    async def on_create_room(
+        self, requester: Requester, config: dict, is_requester_admin: bool
+    ) -> None:
+        """Intercept requests to create room to maybe deny it (via an exception) or
+        update the request config.
+
+        Args:
+            requester
+            config: The creation config from the client.
+            is_requester_admin: If the requester is an admin
+        """
+        for callback in self._on_create_room_callbacks:
+            try:
+                await callback(requester, config, is_requester_admin)
+            except Exception as e:
+                # Don't silence the errors raised by this callback since we expect it to
+                # raise an exception to deny the creation of the room; instead make sure
+                # it's a SynapseError we can send to clients.
+                if not isinstance(e, SynapseError):
+                    e = SynapseError(
+                        403, "Room creation forbidden with these parameters"
+                    )
+
+                raise e
+
+    async def check_threepid_can_be_invited(
+        self, medium: str, address: str, room_id: str
+    ) -> bool:
+        """Check if a provided 3PID can be invited in the given room.
+
+        Args:
+            medium: The 3PID's medium.
+            address: The 3PID's address.
+            room_id: The room we want to invite the threepid to.
+
+        Returns:
+            True if the 3PID can be invited, False if not.
+        """
+        # Bail out early without hitting the store if we don't have any callbacks to run.
+        if len(self._check_threepid_can_be_invited_callbacks) == 0:
+            return True
+
+        state_events = await self._get_state_map_for_room(room_id)
+
+        for callback in self._check_threepid_can_be_invited_callbacks:
+            try:
+                threepid_can_be_invited = await delay_cancellation(
+                    callback(medium, address, state_events)
+                )
+                if threepid_can_be_invited is False:
+                    return False
+            except CancelledError:
+                raise
+            except Exception as e:
+                logger.warning("Failed to run module API callback %s: %s", callback, e)
+
+        return True
+
+    async def check_visibility_can_be_modified(
+        self, room_id: str, new_visibility: str
+    ) -> bool:
+        """Check if a room is allowed to be published to, or removed from, the public room
+        list.
+
+        Args:
+            room_id: The ID of the room.
+            new_visibility: The new visibility state. Either "public" or "private".
+
+        Returns:
+            True if the room's visibility can be modified, False if not.
+        """
+        # Bail out early without hitting the store if we don't have any callback
+        if len(self._check_visibility_can_be_modified_callbacks) == 0:
+            return True
+
+        state_events = await self._get_state_map_for_room(room_id)
+
+        for callback in self._check_visibility_can_be_modified_callbacks:
+            try:
+                visibility_can_be_modified = await delay_cancellation(
+                    callback(room_id, state_events, new_visibility)
+                )
+                if visibility_can_be_modified is False:
+                    return False
+            except CancelledError:
+                raise
+            except Exception as e:
+                logger.warning("Failed to run module API callback %s: %s", callback, e)
+
+        return True
+
+    async def on_new_event(self, event_id: str) -> None:
+        """Let modules act on events after they've been sent (e.g. auto-accepting
+        invites, etc.)
+
+        Args:
+            event_id: The ID of the event.
+        """
+        # Bail out early without hitting the store if we don't have any callbacks
+        if len(self._on_new_event_callbacks) == 0:
+            return
+
+        event = await self.store.get_event(event_id)
+        state_events = await self._get_state_map_for_room(event.room_id)
+
+        for callback in self._on_new_event_callbacks:
+            try:
+                await callback(event, state_events)
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
+
+    async def check_can_shutdown_room(self, user_id: str, room_id: str) -> bool:
+        """Intercept requests to shutdown a room. If `False` is returned, the
+         room must not be shut down.
+
+        Args:
+            requester: The ID of the user requesting the shutdown.
+            room_id: The ID of the room.
+        """
+        for callback in self._check_can_shutdown_room_callbacks:
+            try:
+                can_shutdown_room = await delay_cancellation(callback(user_id, room_id))
+                if can_shutdown_room is False:
+                    return False
+            except CancelledError:
+                raise
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
+        return True
+
+    async def check_can_deactivate_user(
+        self,
+        user_id: str,
+        by_admin: bool,
+    ) -> bool:
+        """Intercept requests to deactivate a user. If `False` is returned, the
+        user should not be deactivated.
+
+        Args:
+            requester
+            user_id: The ID of the room.
+        """
+        for callback in self._check_can_deactivate_user_callbacks:
+            try:
+                can_deactivate_user = await delay_cancellation(
+                    callback(user_id, by_admin)
+                )
+                if can_deactivate_user is False:
+                    return False
+            except CancelledError:
+                raise
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
+        return True
+
+    async def _get_state_map_for_room(self, room_id: str) -> StateMap[EventBase]:
+        """Given a room ID, return the state events of that room.
+
+        Args:
+            room_id: The ID of the room.
+
+        Returns:
+            A dict mapping (event type, state key) to state event.
+        """
+        return await self._storage_controllers.state.get_current_state(room_id)
+
+    async def on_profile_update(
+        self, user_id: str, new_profile: ProfileInfo, by_admin: bool, deactivation: bool
+    ) -> None:
+        """Called after the global profile of a user has been updated. Does not include
+        per-room profile changes.
+
+        Args:
+            user_id: The user whose profile was changed.
+            new_profile: The updated profile for the user.
+            by_admin: Whether the profile update was performed by a server admin.
+            deactivation: Whether this change was made while deactivating the user.
+        """
+        for callback in self._on_profile_update_callbacks:
+            try:
+                await callback(user_id, new_profile, by_admin, deactivation)
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
+
+    async def on_user_deactivation_status_changed(
+        self, user_id: str, deactivated: bool, by_admin: bool
+    ) -> None:
+        """Called after a user has been deactivated or reactivated.
+
+        Args:
+            user_id: The deactivated user.
+            deactivated: Whether the user is now deactivated.
+            by_admin: Whether the deactivation was performed by a server admin.
+        """
+        for callback in self._on_user_deactivation_status_changed_callbacks:
+            try:
+                await callback(user_id, deactivated, by_admin)
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
+
+    async def on_threepid_bind(self, user_id: str, medium: str, address: str) -> None:
+        """Called after a threepid association has been verified and stored.
+
+        Note that this callback is called when an association is created on the
+        local homeserver, not when it's created on an identity server (and then kept track
+        of so that it can be unbound on the same IS later on).
+
+        THIS MODULE CALLBACK METHOD HAS BEEN DEPRECATED. Please use the
+        `on_add_user_third_party_identifier` callback method instead.
+
+        Args:
+            user_id: the user being associated with the threepid.
+            medium: the threepid's medium.
+            address: the threepid's address.
+        """
+        for callback in self._on_threepid_bind_callbacks:
+            try:
+                await callback(user_id, medium, address)
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
+
+    async def on_add_user_third_party_identifier(
+        self, user_id: str, medium: str, address: str
+    ) -> None:
+        """Called when an association between a user's Matrix ID and a third-party ID
+        (email, phone number) has successfully been registered on the homeserver.
+
+        Args:
+            user_id: The User ID included in the association.
+            medium: The medium of the third-party ID (email, msisdn).
+            address: The address of the third-party ID (i.e. an email address).
+        """
+        for callback in self._on_add_user_third_party_identifier_callbacks:
+            try:
+                await callback(user_id, medium, address)
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
+
+    async def on_remove_user_third_party_identifier(
+        self, user_id: str, medium: str, address: str
+    ) -> None:
+        """Called when an association between a user's Matrix ID and a third-party ID
+        (email, phone number) has been successfully removed on the homeserver.
+
+        This is called *after* any known bindings on identity servers for this
+        association have been removed.
+
+        Args:
+            user_id: The User ID included in the removed association.
+            medium: The medium of the third-party ID (email, msisdn).
+            address: The address of the third-party ID (i.e. an email address).
+        """
+        for callback in self._on_remove_user_third_party_identifier_callbacks:
+            try:
+                await callback(user_id, medium, address)
+            except Exception as e:
+                logger.exception(
+                    "Failed to run module API callback %s: %s", callback, e
+                )
diff --git a/synapse/notifier.py b/synapse/notifier.py
index a8832a3f8e..897272ad5b 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -232,7 +232,7 @@ class Notifier:
 
         self._federation_client = hs.get_federation_http_client()
 
-        self._third_party_rules = hs.get_third_party_event_rules()
+        self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
 
         self.clock = hs.get_clock()
         self.appservice_handler = hs.get_application_service_handler()
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 4de56bf13f..1d65560265 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -70,7 +70,7 @@ class RoomRestV2Servlet(RestServlet):
         self._auth = hs.get_auth()
         self._store = hs.get_datastores().main
         self._pagination_handler = hs.get_pagination_handler()
-        self._third_party_rules = hs.get_third_party_event_rules()
+        self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
 
     async def on_DELETE(
         self, request: SynapseRequest, room_id: str
diff --git a/synapse/server.py b/synapse/server.py
index e597627a6d..c557c60482 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -42,7 +42,6 @@ from synapse.crypto.context_factory import RegularPolicyForHTTPS
 from synapse.crypto.keyring import Keyring
 from synapse.events.builder import EventBuilderFactory
 from synapse.events.presence_router import PresenceRouter
-from synapse.events.third_party_rules import ThirdPartyEventRules
 from synapse.events.utils import EventClientSerializer
 from synapse.federation.federation_client import FederationClient
 from synapse.federation.federation_server import (
@@ -691,10 +690,6 @@ class HomeServer(metaclass=abc.ABCMeta):
     def get_stats_handler(self) -> StatsHandler:
         return StatsHandler(self)
 
-    @cache_in_self
-    def get_third_party_event_rules(self) -> ThirdPartyEventRules:
-        return ThirdPartyEventRules(self)
-
     @cache_in_self
     def get_password_auth_provider(self) -> PasswordAuthProvider:
         return PasswordAuthProvider()
diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py
index 753ecc8d16..e5ba5a9706 100644
--- a/tests/rest/client/test_third_party_rules.py
+++ b/tests/rest/client/test_third_party_rules.py
@@ -22,7 +22,9 @@ from synapse.api.errors import SynapseError
 from synapse.api.room_versions import RoomVersion
 from synapse.config.homeserver import HomeServerConfig
 from synapse.events import EventBase
-from synapse.events.third_party_rules import load_legacy_third_party_event_rules
+from synapse.module_api.callbacks.third_party_event_rules_callbacks import (
+    load_legacy_third_party_event_rules,
+)
 from synapse.rest import admin
 from synapse.rest.client import account, login, profile, room
 from synapse.server import HomeServer
@@ -146,7 +148,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
             return ev.type != "foo.bar.forbidden", None
 
         callback = Mock(spec=[], side_effect=check)
-        self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [
+        self.hs.get_module_api_callbacks().third_party_event_rules._check_event_allowed_callbacks = [
             callback
         ]
 
@@ -202,7 +204,9 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         ) -> Tuple[bool, Optional[JsonDict]]:
             raise NastyHackException(429, "message")
 
-        self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [check]
+        self.hs.get_module_api_callbacks().third_party_event_rules._check_event_allowed_callbacks = [
+            check
+        ]
 
         # Make a request
         channel = self.make_request(
@@ -229,7 +233,9 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
             ev.content = {"x": "y"}
             return True, None
 
-        self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [check]
+        self.hs.get_module_api_callbacks().third_party_event_rules._check_event_allowed_callbacks = [
+            check
+        ]
 
         # now send the event
         channel = self.make_request(
@@ -253,7 +259,9 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
             d["content"] = {"x": "y"}
             return True, d
 
-        self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [check]
+        self.hs.get_module_api_callbacks().third_party_event_rules._check_event_allowed_callbacks = [
+            check
+        ]
 
         # now send the event
         channel = self.make_request(
@@ -289,7 +297,9 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
             }
             return True, d
 
-        self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [check]
+        self.hs.get_module_api_callbacks().third_party_event_rules._check_event_allowed_callbacks = [
+            check
+        ]
 
         # Send an event, then edit it.
         channel = self.make_request(
@@ -440,7 +450,9 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
                 )
             return True, None
 
-        self.hs.get_third_party_event_rules()._check_event_allowed_callbacks = [test_fn]
+        self.hs.get_module_api_callbacks().third_party_event_rules._check_event_allowed_callbacks = [
+            test_fn
+        ]
 
         # Sometimes the bug might not happen the first time the event type is added
         # to the state but might happen when an event updates the state of the room for
@@ -466,7 +478,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
     def test_on_new_event(self) -> None:
         """Test that the on_new_event callback is called on new events"""
         on_new_event = Mock(make_awaitable(None))
-        self.hs.get_third_party_event_rules()._on_new_event_callbacks.append(
+        self.hs.get_module_api_callbacks().third_party_event_rules._on_new_event_callbacks.append(
             on_new_event
         )
 
@@ -569,7 +581,9 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
 
         # Register a mock callback.
         m = Mock(return_value=make_awaitable(None))
-        self.hs.get_third_party_event_rules()._on_profile_update_callbacks.append(m)
+        self.hs.get_module_api_callbacks().third_party_event_rules._on_profile_update_callbacks.append(
+            m
+        )
 
         # Change the display name.
         channel = self.make_request(
@@ -628,7 +642,9 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
 
         # Register a mock callback.
         m = Mock(return_value=make_awaitable(None))
-        self.hs.get_third_party_event_rules()._on_profile_update_callbacks.append(m)
+        self.hs.get_module_api_callbacks().third_party_event_rules._on_profile_update_callbacks.append(
+            m
+        )
 
         # Register an admin user.
         self.register_user("admin", "password", admin=True)
@@ -667,7 +683,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         """
         # Register a mocked callback.
         deactivation_mock = Mock(return_value=make_awaitable(None))
-        third_party_rules = self.hs.get_third_party_event_rules()
+        third_party_rules = self.hs.get_module_api_callbacks().third_party_event_rules
         third_party_rules._on_user_deactivation_status_changed_callbacks.append(
             deactivation_mock,
         )
@@ -675,7 +691,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         # deactivation code calls it in a way that let modules know the user is being
         # deactivated.
         profile_mock = Mock(return_value=make_awaitable(None))
-        self.hs.get_third_party_event_rules()._on_profile_update_callbacks.append(
+        self.hs.get_module_api_callbacks().third_party_event_rules._on_profile_update_callbacks.append(
             profile_mock,
         )
 
@@ -725,7 +741,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         """
         # Register a mock callback.
         m = Mock(return_value=make_awaitable(None))
-        third_party_rules = self.hs.get_third_party_event_rules()
+        third_party_rules = self.hs.get_module_api_callbacks().third_party_event_rules
         third_party_rules._on_user_deactivation_status_changed_callbacks.append(m)
 
         # Register an admin user.
@@ -779,7 +795,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         """
         # Register a mocked callback.
         deactivation_mock = Mock(return_value=make_awaitable(False))
-        third_party_rules = self.hs.get_third_party_event_rules()
+        third_party_rules = self.hs.get_module_api_callbacks().third_party_event_rules
         third_party_rules._check_can_deactivate_user_callbacks.append(
             deactivation_mock,
         )
@@ -825,7 +841,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         """
         # Register a mocked callback.
         deactivation_mock = Mock(return_value=make_awaitable(False))
-        third_party_rules = self.hs.get_third_party_event_rules()
+        third_party_rules = self.hs.get_module_api_callbacks().third_party_event_rules
         third_party_rules._check_can_deactivate_user_callbacks.append(
             deactivation_mock,
         )
@@ -864,7 +880,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         """
         # Register a mocked callback.
         shutdown_mock = Mock(return_value=make_awaitable(False))
-        third_party_rules = self.hs.get_third_party_event_rules()
+        third_party_rules = self.hs.get_module_api_callbacks().third_party_event_rules
         third_party_rules._check_can_shutdown_room_callbacks.append(
             shutdown_mock,
         )
@@ -900,7 +916,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         """
         # Register a mocked callback.
         threepid_bind_mock = Mock(return_value=make_awaitable(None))
-        third_party_rules = self.hs.get_third_party_event_rules()
+        third_party_rules = self.hs.get_module_api_callbacks().third_party_event_rules
         third_party_rules._on_threepid_bind_callbacks.append(threepid_bind_mock)
 
         # Register an admin user.
@@ -947,8 +963,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         on_remove_user_third_party_identifier_callback_mock = Mock(
             return_value=make_awaitable(None)
         )
-        third_party_rules = self.hs.get_third_party_event_rules()
-        third_party_rules.register_third_party_rules_callbacks(
+        self.hs.get_module_api().register_third_party_rules_callbacks(
             on_add_user_third_party_identifier=on_add_user_third_party_identifier_callback_mock,
             on_remove_user_third_party_identifier=on_remove_user_third_party_identifier_callback_mock,
         )
@@ -1009,8 +1024,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         on_remove_user_third_party_identifier_callback_mock = Mock(
             return_value=make_awaitable(None)
         )
-        third_party_rules = self.hs.get_third_party_event_rules()
-        third_party_rules.register_third_party_rules_callbacks(
+        self.hs.get_module_api().register_third_party_rules_callbacks(
             on_remove_user_third_party_identifier=on_remove_user_third_party_identifier_callback_mock,
         )
 
diff --git a/tests/server.py b/tests/server.py
index a49dc90e32..7296f0a552 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -73,11 +73,13 @@ from twisted.web.server import Request, Site
 from synapse.config.database import DatabaseConnectionConfig
 from synapse.config.homeserver import HomeServerConfig
 from synapse.events.presence_router import load_legacy_presence_router
-from synapse.events.third_party_rules import load_legacy_third_party_event_rules
 from synapse.handlers.auth import load_legacy_password_auth_providers
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import ContextResourceUsage
 from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
+from synapse.module_api.callbacks.third_party_event_rules_callbacks import (
+    load_legacy_third_party_event_rules,
+)
 from synapse.server import HomeServer
 from synapse.storage import DataStore
 from synapse.storage.database import LoggingDatabaseConnection
-- 
cgit 1.5.1


From e46d5f3586025a491d11a31ce2be4c540c38d404 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 5 May 2023 15:06:22 +0100
Subject: Factor out an `is_mine_server_name` method (#15542)

Add an `is_mine_server_name` method, similar to `is_mine_id`.

Ideally we would use this consistently, instead of sometimes comparing
against `hs.hostname` and other times reaching into
`hs.config.server.server_name`.

Also fix a bug in the tests where `hs.hostname` would sometimes differ
from `hs.config.server.server_name`.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15542.misc                       |  1 +
 synapse/api/auth_blocking.py                 |  4 ++--
 synapse/crypto/keyring.py                    |  4 ++--
 synapse/federation/federation_base.py        |  2 +-
 synapse/federation/federation_client.py      |  4 ++--
 synapse/federation/federation_server.py      |  3 ++-
 synapse/federation/send_queue.py             |  3 ++-
 synapse/federation/sender/__init__.py        | 11 ++++++-----
 synapse/federation/transport/client.py       |  4 ++--
 synapse/federation/transport/server/_base.py |  5 ++++-
 synapse/handlers/event_auth.py               |  5 +++--
 synapse/handlers/federation.py               |  3 ++-
 synapse/handlers/federation_event.py         |  3 ++-
 synapse/handlers/profile.py                  |  4 ++--
 synapse/handlers/sso.py                      |  3 ++-
 synapse/handlers/typing.py                   |  3 ++-
 synapse/rest/admin/media.py                  |  4 ++--
 synapse/rest/client/room.py                  |  4 ++--
 synapse/rest/media/download_resource.py      |  4 ++--
 synapse/rest/media/thumbnail_resource.py     |  4 ++--
 synapse/server.py                            |  4 ++++
 synapse/storage/databases/main/room.py       |  2 +-
 tests/unittest.py                            | 16 ++++++++++++++--
 23 files changed, 64 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/15542.misc

(limited to 'synapse')

diff --git a/changelog.d/15542.misc b/changelog.d/15542.misc
new file mode 100644
index 0000000000..32e3d678a1
--- /dev/null
+++ b/changelog.d/15542.misc
@@ -0,0 +1 @@
+Factor out an `is_mine_server_name` method.
diff --git a/synapse/api/auth_blocking.py b/synapse/api/auth_blocking.py
index 22348d2d86..fcf5b842c6 100644
--- a/synapse/api/auth_blocking.py
+++ b/synapse/api/auth_blocking.py
@@ -39,7 +39,7 @@ class AuthBlocking:
         self._mau_limits_reserved_threepids = (
             hs.config.server.mau_limits_reserved_threepids
         )
-        self._server_name = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
         self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips
 
     async def check_auth_blocking(
@@ -77,7 +77,7 @@ class AuthBlocking:
         if requester:
             if requester.authenticated_entity.startswith("@"):
                 user_id = requester.authenticated_entity
-            elif requester.authenticated_entity == self._server_name:
+            elif self._is_mine_server_name(requester.authenticated_entity):
                 # We never block the server from doing actions on behalf of
                 # users.
                 return
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index afdf6863d6..260aab3241 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -173,7 +173,7 @@ class Keyring:
             process_batch_callback=self._inner_fetch_key_requests,
         )
 
-        self._hostname = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
 
         # build a FetchKeyResult for each of our own keys, to shortcircuit the
         # fetcher.
@@ -277,7 +277,7 @@ class Keyring:
 
         # If we are the originating server, short-circuit the key-fetch for any keys
         # we already have
-        if verify_request.server_name == self._hostname:
+        if self._is_mine_server_name(verify_request.server_name):
             for key_id in verify_request.key_ids:
                 if key_id in self._local_verify_keys:
                     found_keys[key_id] = self._local_verify_keys[key_id]
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index 3df975958d..b77022b406 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -49,7 +49,7 @@ class FederationBase:
     def __init__(self, hs: "HomeServer"):
         self.hs = hs
 
-        self.server_name = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
         self.keyring = hs.get_keyring()
         self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.store = hs.get_datastores().main
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 0b2d1a78f7..076b9287c6 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -854,7 +854,7 @@ class FederationClient(FederationBase):
 
         for destination in destinations:
             # We don't want to ask our own server for information we don't have
-            if destination == self.server_name:
+            if self._is_mine_server_name(destination):
                 continue
 
             try:
@@ -1536,7 +1536,7 @@ class FederationClient(FederationBase):
         self, destinations: Iterable[str], room_id: str, event_dict: JsonDict
     ) -> None:
         for destination in destinations:
-            if destination == self.server_name:
+            if self._is_mine_server_name(destination):
                 continue
 
             try:
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index ca43c7bfc0..c590d8f96f 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -129,6 +129,7 @@ class FederationServer(FederationBase):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
 
+        self.server_name = hs.hostname
         self.handler = hs.get_federation_handler()
         self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self._federation_event_handler = hs.get_federation_event_handler()
@@ -942,7 +943,7 @@ class FederationServer(FederationBase):
             authorising_server = get_domain_from_id(
                 event.content[EventContentFields.AUTHORISING_USER]
             )
-            if authorising_server != self.server_name:
+            if not self._is_mine_server_name(authorising_server):
                 raise SynapseError(
                     400,
                     f"Cannot authorise request from resident server: {authorising_server}",
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index 0b7c81677e..fb448f2155 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -68,6 +68,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
         self.clock = hs.get_clock()
         self.notifier = hs.get_notifier()
         self.is_mine_id = hs.is_mine_id
+        self.is_mine_server_name = hs.is_mine_server_name
 
         # We may have multiple federation sender instances, so we need to track
         # their positions separately.
@@ -198,7 +199,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
         key: Optional[Hashable] = None,
     ) -> None:
         """As per FederationSender"""
-        if destination == self.server_name:
+        if self.is_mine_server_name(destination):
             logger.info("Not sending EDU to ourselves")
             return
 
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index edc4b1768c..f3bdc5a4d2 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -362,6 +362,7 @@ class FederationSender(AbstractFederationSender):
 
         self.clock = hs.get_clock()
         self.is_mine_id = hs.is_mine_id
+        self.is_mine_server_name = hs.is_mine_server_name
 
         self._presence_router: Optional["PresenceRouter"] = None
         self._transaction_manager = TransactionManager(hs)
@@ -766,7 +767,7 @@ class FederationSender(AbstractFederationSender):
         domains = [
             d
             for d in domains_set
-            if d != self.server_name
+            if not self.is_mine_server_name(d)
             and self._federation_shard_config.should_handle(self._instance_name, d)
         ]
         if not domains:
@@ -832,7 +833,7 @@ class FederationSender(AbstractFederationSender):
             assert self.is_mine_id(state.user_id)
 
         for destination in destinations:
-            if destination == self.server_name:
+            if self.is_mine_server_name(destination):
                 continue
             if not self._federation_shard_config.should_handle(
                 self._instance_name, destination
@@ -860,7 +861,7 @@ class FederationSender(AbstractFederationSender):
             content: content of EDU
             key: clobbering key for this edu
         """
-        if destination == self.server_name:
+        if self.is_mine_server_name(destination):
             logger.info("Not sending EDU to ourselves")
             return
 
@@ -897,7 +898,7 @@ class FederationSender(AbstractFederationSender):
             queue.send_edu(edu)
 
     def send_device_messages(self, destination: str, immediate: bool = True) -> None:
-        if destination == self.server_name:
+        if self.is_mine_server_name(destination):
             logger.warning("Not sending device update to ourselves")
             return
 
@@ -919,7 +920,7 @@ class FederationSender(AbstractFederationSender):
         might have come back.
         """
 
-        if destination == self.server_name:
+        if self.is_mine_server_name(destination):
             logger.warning("Not waking up ourselves")
             return
 
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index bc70b94f68..d2fa9976da 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -58,9 +58,9 @@ class TransportLayerClient:
     """Sends federation HTTP requests to other servers"""
 
     def __init__(self, hs: "HomeServer"):
-        self.server_name = hs.hostname
         self.client = hs.get_federation_http_client()
         self._faster_joins_enabled = hs.config.experimental.faster_joins_enabled
+        self._is_mine_server_name = hs.is_mine_server_name
 
     async def get_room_state_ids(
         self, destination: str, room_id: str, event_id: str
@@ -235,7 +235,7 @@ class TransportLayerClient:
             transaction.transaction_id,
         )
 
-        if transaction.destination == self.server_name:
+        if self._is_mine_server_name(transaction.destination):
             raise RuntimeError("Transport layer cannot send to itself!")
 
         # FIXME: This is only used by the tests. The actual json sent is
diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py
index cdaf0d5de7..b6e9c58760 100644
--- a/synapse/federation/transport/server/_base.py
+++ b/synapse/federation/transport/server/_base.py
@@ -57,6 +57,7 @@ class Authenticator:
         self._clock = hs.get_clock()
         self.keyring = hs.get_keyring()
         self.server_name = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
         self.store = hs.get_datastores().main
         self.federation_domain_whitelist = (
             hs.config.federation.federation_domain_whitelist
@@ -100,7 +101,9 @@ class Authenticator:
                 json_request["signatures"].setdefault(origin, {})[key] = sig
 
                 # if the origin_server sent a destination along it needs to match our own server_name
-                if destination is not None and destination != self.server_name:
+                if destination is not None and not self._is_mine_server_name(
+                    destination
+                ):
                     raise AuthenticationError(
                         HTTPStatus.UNAUTHORIZED,
                         "Destination mismatch in auth header",
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index 0db0bd7304..3e37c0cbe2 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -29,7 +29,7 @@ from synapse.event_auth import (
 )
 from synapse.events import EventBase
 from synapse.events.builder import EventBuilder
-from synapse.types import StateMap, StrCollection, get_domain_from_id
+from synapse.types import StateMap, StrCollection
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -47,6 +47,7 @@ class EventAuthHandler:
         self._store = hs.get_datastores().main
         self._state_storage_controller = hs.get_storage_controllers().state
         self._server_name = hs.hostname
+        self._is_mine_id = hs.is_mine_id
 
     async def check_auth_rules_from_context(
         self,
@@ -247,7 +248,7 @@ class EventAuthHandler:
         if not await self.is_user_in_rooms(allowed_rooms, user_id):
             # If this is a remote request, the user might be in an allowed room
             # that we do not know about.
-            if get_domain_from_id(user_id) != self._server_name:
+            if not self._is_mine_id(user_id):
                 for room_id in allowed_rooms:
                     if not await self._store.is_host_joined(room_id, self._server_name):
                         raise SynapseError(
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 4ad808a5b4..19dec4812f 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -141,6 +141,7 @@ class FederationHandler:
         self.server_name = hs.hostname
         self.keyring = hs.get_keyring()
         self.is_mine_id = hs.is_mine_id
+        self.is_mine_server_name = hs.is_mine_server_name
         self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.event_creation_handler = hs.get_event_creation_handler()
         self.event_builder_factory = hs.get_event_builder_factory()
@@ -453,7 +454,7 @@ class FederationHandler:
 
             for dom in domains:
                 # We don't want to ask our own server for information we don't have
-                if dom == self.server_name:
+                if self.is_mine_server_name(dom):
                     continue
 
                 try:
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index fc15024166..06343d40e4 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -163,6 +163,7 @@ class FederationEventHandler:
         self._notifier = hs.get_notifier()
 
         self._is_mine_id = hs.is_mine_id
+        self._is_mine_server_name = hs.is_mine_server_name
         self._server_name = hs.hostname
         self._instance_name = hs.get_instance_name()
 
@@ -688,7 +689,7 @@ class FederationEventHandler:
         server from invalid events (there is probably no point in trying to
         re-fetch invalid events from every other HS in the room.)
         """
-        if dest == self._server_name:
+        if self._is_mine_server_name(dest):
             raise SynapseError(400, "Can't backfill from self.")
 
         events = await self._federation_client.backfill(
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 983b9b66fb..48f9858931 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -59,7 +59,7 @@ class ProfileHandler:
         self.max_avatar_size = hs.config.server.max_avatar_size
         self.allowed_avatar_mimetypes = hs.config.server.allowed_avatar_mimetypes
 
-        self.server_name = hs.config.server.server_name
+        self._is_mine_server_name = hs.is_mine_server_name
 
         self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
 
@@ -309,7 +309,7 @@ class ProfileHandler:
         else:
             server_name = host
 
-        if server_name == self.server_name:
+        if self._is_mine_server_name(server_name):
             media_info = await self.store.get_local_media(media_id)
         else:
             media_info = await self.store.get_cached_remote_media(server_name, media_id)
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index c28325323c..92c3742625 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -194,6 +194,7 @@ class SsoHandler:
         self._clock = hs.get_clock()
         self._store = hs.get_datastores().main
         self._server_name = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
         self._registration_handler = hs.get_registration_handler()
         self._auth_handler = hs.get_auth_handler()
         self._device_handler = hs.get_device_handler()
@@ -802,7 +803,7 @@ class SsoHandler:
             if profile["avatar_url"] is not None:
                 server_name = profile["avatar_url"].split("/")[-2]
                 media_id = profile["avatar_url"].split("/")[-1]
-                if server_name == self._server_name:
+                if self._is_mine_server_name(server_name):
                     media = await self._media_repo.store.get_local_media(media_id)
                     if media is not None and upload_name == media["upload_name"]:
                         logger.info("skipping saving the user avatar")
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 39ae44ea95..7aeae5319c 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -68,6 +68,7 @@ class FollowerTypingHandler:
         self.server_name = hs.config.server.server_name
         self.clock = hs.get_clock()
         self.is_mine_id = hs.is_mine_id
+        self.is_mine_server_name = hs.is_mine_server_name
 
         self.federation = None
         if hs.should_send_federation():
@@ -153,7 +154,7 @@ class FollowerTypingHandler:
                 member.room_id
             )
             for domain in hosts:
-                if domain != self.server_name:
+                if not self.is_mine_server_name(domain):
                     logger.debug("sending typing update to %s", domain)
                     self.federation.build_and_send_edu(
                         destination=domain,
diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py
index c134ccfb3d..b7637dff0b 100644
--- a/synapse/rest/admin/media.py
+++ b/synapse/rest/admin/media.py
@@ -258,7 +258,7 @@ class DeleteMediaByID(RestServlet):
     def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastores().main
         self.auth = hs.get_auth()
-        self.server_name = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
         self.media_repository = hs.get_media_repository()
 
     async def on_DELETE(
@@ -266,7 +266,7 @@ class DeleteMediaByID(RestServlet):
     ) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self.auth, request)
 
-        if self.server_name != server_name:
+        if not self._is_mine_server_name(server_name):
             raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only delete local media")
 
         if await self.store.get_local_media(media_id) is None:
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 7699cc8d1b..951bd033f5 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -501,7 +501,7 @@ class PublicRoomListRestServlet(RestServlet):
             limit = None
 
         handler = self.hs.get_room_list_handler()
-        if server and server != self.hs.config.server.server_name:
+        if server and not self.hs.is_mine_server_name(server):
             # Ensure the server is valid.
             try:
                 parse_and_validate_server_name(server)
@@ -551,7 +551,7 @@ class PublicRoomListRestServlet(RestServlet):
             limit = None
 
         handler = self.hs.get_room_list_handler()
-        if server and server != self.hs.config.server.server_name:
+        if server and not self.hs.is_mine_server_name(server):
             # Ensure the server is valid.
             try:
                 parse_and_validate_server_name(server)
diff --git a/synapse/rest/media/download_resource.py b/synapse/rest/media/download_resource.py
index 8f270cf4cc..3c618ef60a 100644
--- a/synapse/rest/media/download_resource.py
+++ b/synapse/rest/media/download_resource.py
@@ -37,7 +37,7 @@ class DownloadResource(DirectServeJsonResource):
     def __init__(self, hs: "HomeServer", media_repo: "MediaRepository"):
         super().__init__()
         self.media_repo = media_repo
-        self.server_name = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
 
     async def _async_render_GET(self, request: SynapseRequest) -> None:
         set_cors_headers(request)
@@ -59,7 +59,7 @@ class DownloadResource(DirectServeJsonResource):
             b"no-referrer",
         )
         server_name, media_id, name = parse_media_id(request)
-        if server_name == self.server_name:
+        if self._is_mine_server_name(server_name):
             await self.media_repo.get_local_media(request, media_id, name)
         else:
             allow_remote = parse_boolean(request, "allow_remote", default=True)
diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py
index 4ee2a0dbda..a6396fb05a 100644
--- a/synapse/rest/media/thumbnail_resource.py
+++ b/synapse/rest/media/thumbnail_resource.py
@@ -59,7 +59,7 @@ class ThumbnailResource(DirectServeJsonResource):
         self.media_repo = media_repo
         self.media_storage = media_storage
         self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
-        self.server_name = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
 
     async def _async_render_GET(self, request: SynapseRequest) -> None:
         set_cors_headers(request)
@@ -71,7 +71,7 @@ class ThumbnailResource(DirectServeJsonResource):
         # TODO Parse the Accept header to get an prioritised list of thumbnail types.
         m_type = "image/png"
 
-        if server_name == self.server_name:
+        if self._is_mine_server_name(server_name):
             if self.dynamic_thumbnails:
                 await self._select_or_generate_local_thumbnail(
                     request, media_id, width, height, method, m_type
diff --git a/synapse/server.py b/synapse/server.py
index c557c60482..fd29c28173 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -377,6 +377,10 @@ class HomeServer(metaclass=abc.ABCMeta):
             return False
         return localpart_hostname[1] == self.hostname
 
+    def is_mine_server_name(self, server_name: str) -> bool:
+        """Determines whether a server name refers to this homeserver."""
+        return server_name == self.hostname
+
     @cache_in_self
     def get_clock(self) -> Clock:
         return Clock(self._reactor)
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index dd7dbb6901..ca8be8c80d 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -996,7 +996,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
                 If it is `None` media will be removed from quarantine
         """
         logger.info("Quarantining media: %s/%s", server_name, media_id)
-        is_local = server_name == self.config.server.server_name
+        is_local = self.hs.is_mine_server_name(server_name)
 
         def _quarantine_media_by_id_txn(txn: LoggingTransaction) -> int:
             local_mxcs = [media_id] if is_local else []
diff --git a/tests/unittest.py b/tests/unittest.py
index ee2f78ab01..b6fdf69635 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -566,7 +566,9 @@ class HomeserverTestCase(TestCase):
             client_ip,
         )
 
-    def setup_test_homeserver(self, *args: Any, **kwargs: Any) -> HomeServer:
+    def setup_test_homeserver(
+        self, name: Optional[str] = None, **kwargs: Any
+    ) -> HomeServer:
         """
         Set up the test homeserver, meant to be called by the overridable
         make_homeserver. It automatically passes through the test class's
@@ -585,15 +587,25 @@ class HomeserverTestCase(TestCase):
         else:
             config = kwargs["config"]
 
+        # The server name can be specified using either the `name` argument or a config
+        # override. The `name` argument takes precedence over any config overrides.
+        if name is not None:
+            config["server_name"] = name
+
         # Parse the config from a config dict into a HomeServerConfig
         config_obj = make_homeserver_config_obj(config)
         kwargs["config"] = config_obj
 
+        # The server name in the config is now `name`, if provided, or the `server_name`
+        # from a config override, or the default of "test". Whichever it is, we
+        # construct a homeserver with a matching name.
+        kwargs["name"] = config_obj.server.server_name
+
         async def run_bg_updates() -> None:
             with LoggingContext("run_bg_updates"):
                 self.get_success(stor.db_pool.updates.run_background_updates(False))
 
-        hs = setup_test_homeserver(self.addCleanup, *args, **kwargs)
+        hs = setup_test_homeserver(self.addCleanup, **kwargs)
         stor = hs.get_datastores().main
 
         # Run the database background updates, when running against "master".
-- 
cgit 1.5.1


From 7c95b65873c7a858388b9c99c7e9e15dc5ccb2b5 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 5 May 2023 15:51:46 +0100
Subject: Clean up and clarify "Create or modify Account" Admin API
 documentation (#15544)

---
 changelog.d/15544.doc                     |  1 +
 docs/admin_api/user_admin_api.md          | 87 +++++++++++++++++--------------
 synapse/handlers/profile.py               |  4 +-
 synapse/rest/admin/users.py               |  2 +-
 synapse/storage/databases/main/profile.py | 16 ++++++
 synapse/util/msisdn.py                    |  6 ++-
 6 files changed, 74 insertions(+), 42 deletions(-)
 create mode 100644 changelog.d/15544.doc

(limited to 'synapse')

diff --git a/changelog.d/15544.doc b/changelog.d/15544.doc
new file mode 100644
index 0000000000..a6d1e96900
--- /dev/null
+++ b/changelog.d/15544.doc
@@ -0,0 +1 @@
+Clarify documentation of the "Create or modify account" Admin API.
\ No newline at end of file
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 86c29ab380..6b952ba396 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -62,7 +62,7 @@ URL parameters:
 
 - `user_id`: fully-qualified user id: for example, `@user:server.com`.
 
-## Create or modify Account
+## Create or modify account
 
 This API allows an administrator to create or modify a user account with a
 specific `user_id`.
@@ -78,28 +78,29 @@ with a body of:
 ```json
 {
     "password": "user_password",
-    "displayname": "User",
+    "logout_devices": false,
+    "displayname": "Alice Marigold",
+    "avatar_url": "mxc://example.com/abcde12345",
     "threepids": [
         {
             "medium": "email",
-            "address": "<user_mail_1>"
+            "address": "alice@example.com"
         },
         {
             "medium": "email",
-            "address": "<user_mail_2>"
+            "address": "alice@domain.org"
         }
     ],
     "external_ids": [
         {
-            "auth_provider": "<provider1>",
-            "external_id": "<user_id_provider_1>"
+            "auth_provider": "example",
+            "external_id": "12345"
         },
         {
-            "auth_provider": "<provider2>",
-            "external_id": "<user_id_provider_2>"
+            "auth_provider": "example2",
+            "external_id": "abc54321"
         }
     ],
-    "avatar_url": "<avatar_url>",
     "admin": false,
     "deactivated": false,
     "user_type": null
@@ -112,41 +113,51 @@ Returns HTTP status code:
 
 URL parameters:
 
-- `user_id`: fully-qualified user id: for example, `@user:server.com`.
+- `user_id` - A fully-qualified user id. For example, `@user:server.com`.
 
 Body parameters:
 
-- `password` - string, optional. If provided, the user's password is updated and all
+- `password` - **string**, optional. If provided, the user's password is updated and all
   devices are logged out, unless `logout_devices` is set to `false`.
-- `logout_devices` - bool, optional, defaults to `true`. If set to false, devices aren't
+- `logout_devices` - **bool**, optional, defaults to `true`. If set to `false`, devices aren't
   logged out even when `password` is provided.
-- `displayname` - string, optional, defaults to the value of `user_id`.
-- `threepids` - array, optional, allows setting the third-party IDs (email, msisdn)
-  - `medium` - string. Kind of third-party ID, either `email` or `msisdn`.
-  - `address` - string. Value of third-party ID.
-  belonging to a user.
-- `external_ids` - array, optional. Allow setting the identifier of the external identity
-  provider for SSO (Single sign-on). Details in the configuration manual under the
-  sections [sso](../usage/configuration/config_documentation.md#sso) and [oidc_providers](../usage/configuration/config_documentation.md#oidc_providers).
-  - `auth_provider` - string. ID of the external identity provider. Value of `idp_id`
-    in the homeserver configuration. Note that no error is raised if the provided
-    value is not in the homeserver configuration.
-  - `external_id` - string, user ID in the external identity provider.
-- `avatar_url` - string, optional, must be a
+- `displayname` - **string**, optional. If set to an empty string (`""`), the user's display name
+  will be removed.
+- `avatar_url` - **string**, optional. Must be a
   [MXC URI](https://matrix.org/docs/spec/client_server/r0.6.0#matrix-content-mxc-uris).
-- `admin` - bool, optional, defaults to `false`.
-- `deactivated` - bool, optional. If unspecified, deactivation state will be left
-  unchanged on existing accounts and set to `false` for new accounts.
-  A user cannot be erased by deactivating with this API. For details on
-  deactivating users see [Deactivate Account](#deactivate-account).
-- `user_type` - string or null, optional. If provided, the user type will be
-  adjusted. If `null` given, the user type will be cleared. Other 
-  allowed options are: `bot` and `support`.
-
-If the user already exists then optional parameters default to the current value.
-
-In order to re-activate an account `deactivated` must be set to `false`. If
-users do not login via single-sign-on, a new `password` must be provided.
+  If set to an empty string (`""`), the user's avatar is removed.
+- `threepids` - **array**, optional. If provided, the user's third-party IDs (email, msisdn) are
+  entirely replaced with the given list. Each item in the array is an object with the following
+  fields:
+  - `medium` - **string**, required. The type of third-party ID, either `email` or `msisdn` (phone number).
+  - `address` - **string**, required. The third-party ID itself, e.g. `alice@example.com` for `email` or
+    `447470274584` (for a phone number with country code "44") and `19254857364` (for a phone number
+    with country code "1") for `msisdn`.
+  Note: If a threepid is removed from a user via this option, Synapse will also attempt to remove
+  that threepid from any identity servers it is aware has a binding for it.
+- `external_ids` - **array**, optional. Allow setting the identifier of the external identity
+  provider for SSO (Single sign-on). More details are in the configuration manual under the
+  sections [sso](../usage/configuration/config_documentation.md#sso) and [oidc_providers](../usage/configuration/config_documentation.md#oidc_providers).
+  - `auth_provider` - **string**, required. The unique, internal ID of the external identity provider.
+    The same as `idp_id` from the homeserver configuration. Note that no error is raised if the
+    provided value is not in the homeserver configuration.
+  - `external_id` - **string**, required. An identifier for the user in the external identity provider.
+    When the user logs in to the identity provider, this must be the unique ID that they map to.
+- `admin` - **bool**, optional, defaults to `false`. Whether the user is a homeserver administrator,
+  granting them access to the Admin API, among other things.
+- `deactivated` - **bool**, optional. If unspecified, deactivation state will be left unchanged.
+
+  Note: the `password` field must also be set if both of the following are true:
+  - `deactivated` is set to `false` and the user was previously deactivated (you are reactivating this user)
+  - Users are allowed to set their password on this homeserver (both `password_config.enabled` and
+    `password_config.localdb_enabled` config options are set to `true`).
+  Users' passwords are wiped upon account deactivation, hence the need to set a new one here.
+
+  Note: a user cannot be erased with this API. For more details on
+  deactivating and erasing users see [Deactivate Account](#deactivate-account).
+- `user_type` - **string** or null, optional. If not provided, the user type will be
+  not be changed. If `null` is given, the user type will be cleared.
+  Other allowed options are: `bot` and `support`.
 
 ## List Accounts
 
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index 48f9858931..a9160c87e3 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -170,8 +170,8 @@ class ProfileHandler:
             displayname_to_set = None
 
         # If the admin changes the display name of a user, the requesting user cannot send
-        # the join event to update the displayname in the rooms.
-        # This must be done by the target user himself.
+        # the join event to update the display name in the rooms.
+        # This must be done by the target user themselves.
         if by_admin:
             requester = create_requester(
                 target_user,
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 331f225116..932333ae57 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -336,7 +336,7 @@ class UserRestServletV2(RestServlet):
                         HTTPStatus.CONFLICT, "External id is already in use."
                     )
 
-            if "avatar_url" in body and isinstance(body["avatar_url"], str):
+            if "avatar_url" in body:
                 await self.profile_handler.set_avatar_url(
                     target_user, requester, body["avatar_url"], True
                 )
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index b109f8c07f..c4022d2427 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -85,6 +85,14 @@ class ProfileWorkerStore(SQLBaseStore):
     async def set_profile_displayname(
         self, user_id: UserID, new_displayname: Optional[str]
     ) -> None:
+        """
+        Set the display name of a user.
+
+        Args:
+            user_id: The user's ID.
+            new_displayname: The new display name. If this is None, the user's display
+                name is removed.
+        """
         user_localpart = user_id.localpart
         await self.db_pool.simple_upsert(
             table="profiles",
@@ -99,6 +107,14 @@ class ProfileWorkerStore(SQLBaseStore):
     async def set_profile_avatar_url(
         self, user_id: UserID, new_avatar_url: Optional[str]
     ) -> None:
+        """
+        Set the avatar of a user.
+
+        Args:
+            user_id: The user's ID.
+            new_avatar_url: The new avatar URL. If this is None, the user's avatar is
+                removed.
+        """
         user_localpart = user_id.localpart
         await self.db_pool.simple_upsert(
             table="profiles",
diff --git a/synapse/util/msisdn.py b/synapse/util/msisdn.py
index 1046224f15..3721a1558e 100644
--- a/synapse/util/msisdn.py
+++ b/synapse/util/msisdn.py
@@ -22,12 +22,16 @@ def phone_number_to_msisdn(country: str, number: str) -> str:
     Takes an ISO-3166-1 2 letter country code and phone number and
     returns an msisdn representing the canonical version of that
     phone number.
+
+    As an example, if `country` is "GB" and `number` is "7470674927", this
+    function will return "447470674927".
+
     Args:
         country: ISO-3166-1 2 letter country code
         number: Phone number in a national or international format
 
     Returns:
-        The canonical form of the phone number, as an msisdn
+        The canonical form of the phone number, as an msisdn.
     Raises:
         SynapseError if the number could not be parsed.
     """
-- 
cgit 1.5.1


From a0f53afd62273767b0f54f227fd0020f64c3f6de Mon Sep 17 00:00:00 2001
From: Zdziszek <132405081+zdzichu-rks@users.noreply.github.com>
Date: Fri, 5 May 2023 14:54:32 +0000
Subject: Handle `DNSNotImplementedError` in SRV resolver (#15523)

Signed-off-by: Zdzichu <zdzichu.rks@protonmail.com>
---
 changelog.d/15523.bugfix                | 1 +
 synapse/http/federation/srv_resolver.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15523.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15523.bugfix b/changelog.d/15523.bugfix
new file mode 100644
index 0000000000..c00754019f
--- /dev/null
+++ b/changelog.d/15523.bugfix
@@ -0,0 +1 @@
+Don't fail on federation over TOR where SRV queries are not supported. Contributed by Zdzichu.
diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py
index de0e882b33..285baddeb7 100644
--- a/synapse/http/federation/srv_resolver.py
+++ b/synapse/http/federation/srv_resolver.py
@@ -22,7 +22,7 @@ import attr
 
 from twisted.internet.error import ConnectError
 from twisted.names import client, dns
-from twisted.names.error import DNSNameError, DomainError
+from twisted.names.error import DNSNameError, DNSNotImplementedError, DomainError
 
 from synapse.logging.context import make_deferred_yieldable
 
@@ -145,6 +145,9 @@ class SrvResolver:
             # TODO: cache this. We can get the SOA out of the exception, and use
             # the negative-TTL value.
             return []
+        except DNSNotImplementedError:
+            # For .onion homeservers this is unavailable, just fallback to host:8448
+            return []
         except DomainError as e:
             # We failed to resolve the name (other than a NameError)
             # Try something in the cache, else rereaise
-- 
cgit 1.5.1


From 36df9c5e36cbad2a378d922085453726a21ae80c Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 5 May 2023 12:13:50 -0400
Subject: Implement MSC4009 to widen the allowed Matrix ID grammar (#15536)

Behind a configuration flag this adds + to the list of allowed
characters in Matrix IDs. The main feature this enables is
using full E.164 phone numbers as Matrix IDs.
---
 changelog.d/15536.feature       |  1 +
 synapse/config/experimental.py  |  3 +++
 synapse/handlers/register.py    | 27 ++++++++++++++-------------
 synapse/handlers/sso.py         |  6 ++++--
 synapse/types/__init__.py       | 21 +++++++++++++++++++--
 tests/handlers/test_register.py | 13 +++++++++++++
 6 files changed, 54 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/15536.feature

(limited to 'synapse')

diff --git a/changelog.d/15536.feature b/changelog.d/15536.feature
new file mode 100644
index 0000000000..824c24575f
--- /dev/null
+++ b/changelog.d/15536.feature
@@ -0,0 +1 @@
+Implement [MSC4009](https://github.com/matrix-org/matrix-spec-proposals/pull/4009) to expand the supported characters in Matrix IDs.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index cab7ccf4b7..514d87cb2c 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -199,3 +199,6 @@ class ExperimentalConfig(Config):
 
         # MSC3970: Scope transaction IDs to devices
         self.msc3970_enabled = experimental.get("msc3970_enabled", False)
+
+        # MSC4009: E.164 Matrix IDs
+        self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False)
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 61c4b833bd..c80946c2e9 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -46,7 +46,7 @@ from synapse.replication.http.register import (
     ReplicationRegisterServlet,
 )
 from synapse.spam_checker_api import RegistrationBehaviour
-from synapse.types import RoomAlias, UserID, create_requester
+from synapse.types import GUEST_USER_ID_PATTERN, RoomAlias, UserID, create_requester
 from synapse.types.state import StateFilter
 
 if TYPE_CHECKING:
@@ -143,10 +143,15 @@ class RegistrationHandler:
         assigned_user_id: Optional[str] = None,
         inhibit_user_in_use_error: bool = False,
     ) -> None:
-        if types.contains_invalid_mxid_characters(localpart):
+        if types.contains_invalid_mxid_characters(
+            localpart, self.hs.config.experimental.msc4009_e164_mxids
+        ):
+            extra_chars = (
+                "=_-./+" if self.hs.config.experimental.msc4009_e164_mxids else "=_-./"
+            )
             raise SynapseError(
                 400,
-                "User ID can only contain characters a-z, 0-9, or '=_-./'",
+                f"User ID can only contain characters a-z, 0-9, or '{extra_chars}'",
                 Codes.INVALID_USERNAME,
             )
 
@@ -195,16 +200,12 @@ class RegistrationHandler:
                         errcode=Codes.FORBIDDEN,
                     )
 
-        if guest_access_token is None:
-            try:
-                int(localpart)
-                raise SynapseError(
-                    400,
-                    "Numeric user IDs are reserved for guest users.",
-                    errcode=Codes.INVALID_USERNAME,
-                )
-            except ValueError:
-                pass
+        if guest_access_token is None and GUEST_USER_ID_PATTERN.fullmatch(localpart):
+            raise SynapseError(
+                400,
+                "Numeric user IDs are reserved for guest users.",
+                errcode=Codes.INVALID_USERNAME,
+            )
 
     async def register_user(
         self,
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 92c3742625..25fd2eb3a1 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -225,6 +225,8 @@ class SsoHandler:
 
         self._consent_at_registration = hs.config.consent.user_consent_at_registration
 
+        self._e164_mxids = hs.config.experimental.msc4009_e164_mxids
+
     def register_identity_provider(self, p: SsoIdentityProvider) -> None:
         p_id = p.idp_id
         assert p_id not in self._identity_providers
@@ -711,7 +713,7 @@ class SsoHandler:
         # Since the localpart is provided via a potentially untrusted module,
         # ensure the MXID is valid before registering.
         if not attributes.localpart or contains_invalid_mxid_characters(
-            attributes.localpart
+            attributes.localpart, self._e164_mxids
         ):
             raise MappingException("localpart is invalid: %s" % (attributes.localpart,))
 
@@ -944,7 +946,7 @@ class SsoHandler:
             localpart,
         )
 
-        if contains_invalid_mxid_characters(localpart):
+        if contains_invalid_mxid_characters(localpart, self._e164_mxids):
             raise SynapseError(400, "localpart is invalid: %s" % (localpart,))
         user_id = UserID(localpart, self._server_name).to_string()
         user_infos = await self._store.get_users_by_id_case_insensitive(user_id)
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 5cee9c3194..325219656a 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -335,18 +335,35 @@ class EventID(DomainSpecificString):
 mxid_localpart_allowed_characters = set(
     "_-./=" + string.ascii_lowercase + string.digits
 )
+# MSC4007 adds the + to the allowed characters.
+#
+# TODO If this was accepted, update the SSO code to support this, see the callers
+#      of map_username_to_mxid_localpart.
+extended_mxid_localpart_allowed_characters = mxid_localpart_allowed_characters | {"+"}
+
+# Guest user IDs are purely numeric.
+GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
 
 
-def contains_invalid_mxid_characters(localpart: str) -> bool:
+def contains_invalid_mxid_characters(
+    localpart: str, use_extended_character_set: bool
+) -> bool:
     """Check for characters not allowed in an mxid or groupid localpart
 
     Args:
         localpart: the localpart to be checked
+        use_extended_character_set: True to use the extended allowed characters
+            from MSC4009.
 
     Returns:
         True if there are any naughty characters
     """
-    return any(c not in mxid_localpart_allowed_characters for c in localpart)
+    allowed_characters = (
+        extended_mxid_localpart_allowed_characters
+        if use_extended_character_set
+        else mxid_localpart_allowed_characters
+    )
+    return any(c not in allowed_characters for c in localpart)
 
 
 UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index aff1ec4758..73822b07a5 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -586,6 +586,19 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         d = self.store.is_support_user(user_id)
         self.assertFalse(self.get_success(d))
 
+    def test_invalid_user_id(self) -> None:
+        invalid_user_id = "+abcd"
+        self.get_failure(
+            self.handler.register_user(localpart=invalid_user_id), SynapseError
+        )
+
+    @override_config({"experimental_features": {"msc4009_e164_mxids": True}})
+    def text_extended_user_ids(self) -> None:
+        """+ should be allowed according to MSC4009."""
+        valid_user_id = "+1234"
+        user_id = self.get_success(self.handler.register_user(localpart=valid_user_id))
+        self.assertEqual(user_id, valid_user_id)
+
     def test_invalid_user_id_length(self) -> None:
         invalid_user_id = "x" * 256
         self.get_failure(
-- 
cgit 1.5.1


From 28bceef84e489faf31d14ac1df7ffdb3e4126a9e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 5 May 2023 15:18:47 -0400
Subject: Check appservices for devices during a /user/devices query. (#15539)

MSC3984 proxies /keys/query requests to appservices, but servers will
can also requests devices / keys from the /user/devices endpoint.

The formats are close enough that we can "proxy" that /user/devices to
appservices (by calling /keys/query) and then change the format of the
returned data before returning it over federation.
---
 changelog.d/15539.misc        |   1 +
 synapse/handlers/device.py    |  28 +++++++++
 tests/handlers/test_device.py | 135 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15539.misc

(limited to 'synapse')

diff --git a/changelog.d/15539.misc b/changelog.d/15539.misc
new file mode 100644
index 0000000000..e5af5dee5c
--- /dev/null
+++ b/changelog.d/15539.misc
@@ -0,0 +1 @@
+Proxy `/user/devices` federation queries to application services for [MSC3984](https://github.com/matrix-org/matrix-spec-proposals/pull/3984).
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index b9d3b7fbc6..5d12a39e26 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -75,10 +75,14 @@ class DeviceWorkerHandler:
         self.store = hs.get_datastores().main
         self.notifier = hs.get_notifier()
         self.state = hs.get_state_handler()
+        self._appservice_handler = hs.get_application_service_handler()
         self._state_storage = hs.get_storage_controllers().state
         self._auth_handler = hs.get_auth_handler()
         self.server_name = hs.hostname
         self._msc3852_enabled = hs.config.experimental.msc3852_enabled
+        self._query_appservices_for_keys = (
+            hs.config.experimental.msc3984_appservice_key_query
+        )
 
         self.device_list_updater = DeviceListWorkerUpdater(hs)
 
@@ -328,6 +332,30 @@ class DeviceWorkerHandler:
             user_id, "self_signing"
         )
 
+        # Check if the application services have any results.
+        if self._query_appservices_for_keys:
+            # Query the appservice for all devices for this user.
+            query: Dict[str, Optional[List[str]]] = {user_id: None}
+
+            # Query the appservices for any keys.
+            appservice_results = await self._appservice_handler.query_keys(query)
+
+            # Merge results, overriding anything from the database.
+            appservice_devices = appservice_results.get("device_keys", {}).get(
+                user_id, {}
+            )
+
+            # Filter the database results to only those devices that the appservice has
+            # *not* responded with.
+            devices = [d for d in devices if d["device_id"] not in appservice_devices]
+            # Append the appservice response by wrapping each result in another dictionary.
+            devices.extend(
+                {"device_id": device_id, "keys": device}
+                for device_id, device in appservice_devices.items()
+            )
+
+            # TODO Handle cross-signing keys.
+
         return {
             "user_id": user_id,
             "stream_id": stream_id,
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index ce7525e29c..ee48f9e546 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -15,15 +15,22 @@
 # limitations under the License.
 
 from typing import Optional
+from unittest import mock
 
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.api.constants import RoomEncryptionAlgorithms
 from synapse.api.errors import NotFoundError, SynapseError
+from synapse.appservice import ApplicationService
 from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
 from synapse.server import HomeServer
+from synapse.storage.databases.main.appservice import _make_exclusive_regex
+from synapse.types import JsonDict
 from synapse.util import Clock
 
 from tests import unittest
+from tests.test_utils import make_awaitable
+from tests.unittest import override_config
 
 user1 = "@boris:aaa"
 user2 = "@theresa:bbb"
@@ -31,7 +38,12 @@ user2 = "@theresa:bbb"
 
 class DeviceTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server", federation_http_client=None)
+        self.appservice_api = mock.Mock()
+        hs = self.setup_test_homeserver(
+            "server",
+            federation_http_client=None,
+            application_service_api=self.appservice_api,
+        )
         handler = hs.get_device_handler()
         assert isinstance(handler, DeviceHandler)
         self.handler = handler
@@ -265,6 +277,127 @@ class DeviceTestCase(unittest.HomeserverTestCase):
             )
             self.reactor.advance(1000)
 
+    @override_config({"experimental_features": {"msc3984_appservice_key_query": True}})
+    def test_on_federation_query_user_devices_appservice(self) -> None:
+        """Test that querying of appservices for keys overrides responses from the database."""
+        local_user = "@boris:" + self.hs.hostname
+        device_1 = "abc"
+        device_2 = "def"
+        device_3 = "ghi"
+
+        # There are 3 devices:
+        #
+        # 1. One which is uploaded to the homeserver.
+        # 2. One which is uploaded to the homeserver, but a newer copy is returned
+        #     by the appservice.
+        # 3. One which is only returned by the appservice.
+        device_key_1: JsonDict = {
+            "user_id": local_user,
+            "device_id": device_1,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+                RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2,
+            ],
+            "keys": {
+                "ed25519:abc": "base64+ed25519+key",
+                "curve25519:abc": "base64+curve25519+key",
+            },
+            "signatures": {local_user: {"ed25519:abc": "base64+signature"}},
+        }
+        device_key_2a: JsonDict = {
+            "user_id": local_user,
+            "device_id": device_2,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+                RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2,
+            ],
+            "keys": {
+                "ed25519:def": "base64+ed25519+key",
+                "curve25519:def": "base64+curve25519+key",
+            },
+            "signatures": {local_user: {"ed25519:def": "base64+signature"}},
+        }
+
+        device_key_2b: JsonDict = {
+            "user_id": local_user,
+            "device_id": device_2,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+                RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2,
+            ],
+            # The device ID is the same (above), but the keys are different.
+            "keys": {
+                "ed25519:xyz": "base64+ed25519+key",
+                "curve25519:xyz": "base64+curve25519+key",
+            },
+            "signatures": {local_user: {"ed25519:xyz": "base64+signature"}},
+        }
+        device_key_3: JsonDict = {
+            "user_id": local_user,
+            "device_id": device_3,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+                RoomEncryptionAlgorithms.MEGOLM_V1_AES_SHA2,
+            ],
+            "keys": {
+                "ed25519:jkl": "base64+ed25519+key",
+                "curve25519:jkl": "base64+curve25519+key",
+            },
+            "signatures": {local_user: {"ed25519:jkl": "base64+signature"}},
+        }
+
+        # Upload keys for devices 1 & 2a.
+        e2e_keys_handler = self.hs.get_e2e_keys_handler()
+        self.get_success(
+            e2e_keys_handler.upload_keys_for_user(
+                local_user, device_1, {"device_keys": device_key_1}
+            )
+        )
+        self.get_success(
+            e2e_keys_handler.upload_keys_for_user(
+                local_user, device_2, {"device_keys": device_key_2a}
+            )
+        )
+
+        # Inject an appservice interested in this user.
+        appservice = ApplicationService(
+            token="i_am_an_app_service",
+            id="1234",
+            namespaces={"users": [{"regex": r"@boris:.+", "exclusive": True}]},
+            # Note: this user does not have to match the regex above
+            sender="@as_main:test",
+        )
+        self.hs.get_datastores().main.services_cache = [appservice]
+        self.hs.get_datastores().main.exclusive_user_regex = _make_exclusive_regex(
+            [appservice]
+        )
+
+        # Setup a response.
+        self.appservice_api.query_keys.return_value = make_awaitable(
+            {
+                "device_keys": {
+                    local_user: {device_2: device_key_2b, device_3: device_key_3}
+                }
+            }
+        )
+
+        # Request all devices.
+        res = self.get_success(
+            self.handler.on_federation_query_user_devices(local_user)
+        )
+        self.assertIn("devices", res)
+        res_devices = res["devices"]
+        for device in res_devices:
+            device["keys"].pop("unsigned", None)
+        self.assertEqual(
+            res_devices,
+            [
+                {"device_id": device_1, "keys": device_key_1},
+                {"device_id": device_2, "keys": device_key_2b},
+                {"device_id": device_3, "keys": device_key_3},
+            ],
+        )
+
 
 class DehydrationTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-- 
cgit 1.5.1


From 2bfe3f0b8193b62a92975b1f89f6b2e0eb643091 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 9 May 2023 07:23:27 -0400
Subject: Use account data constants in more places. (#15554)

---
 changelog.d/15554.misc              |  1 +
 synapse/api/constants.py            |  1 +
 synapse/handlers/read_marker.py     |  5 +++--
 synapse/handlers/sync.py            | 12 ++++++------
 synapse/rest/client/account_data.py |  3 ++-
 5 files changed, 13 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/15554.misc

(limited to 'synapse')

diff --git a/changelog.d/15554.misc b/changelog.d/15554.misc
new file mode 100644
index 0000000000..002e3f5315
--- /dev/null
+++ b/changelog.d/15554.misc
@@ -0,0 +1 @@
+Use account data constants in more places.
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index c56b2f2561..cde9a2ecef 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -257,6 +257,7 @@ class AccountDataTypes:
     DIRECT: Final = "m.direct"
     IGNORED_USER_LIST: Final = "m.ignored_user_list"
     TAG: Final = "m.tag"
+    PUSH_RULES: Final = "m.push_rules"
 
 
 class HistoryVisibility:
diff --git a/synapse/handlers/read_marker.py b/synapse/handlers/read_marker.py
index 05122fd5a6..6d35e61880 100644
--- a/synapse/handlers/read_marker.py
+++ b/synapse/handlers/read_marker.py
@@ -15,6 +15,7 @@
 import logging
 from typing import TYPE_CHECKING
 
+from synapse.api.constants import ReceiptTypes
 from synapse.util.async_helpers import Linearizer
 
 if TYPE_CHECKING:
@@ -42,7 +43,7 @@ class ReadMarkerHandler:
 
         async with self.read_marker_linearizer.queue((room_id, user_id)):
             existing_read_marker = await self.store.get_account_data_for_room_and_type(
-                user_id, room_id, "m.fully_read"
+                user_id, room_id, ReceiptTypes.FULLY_READ
             )
 
             should_update = True
@@ -56,5 +57,5 @@ class ReadMarkerHandler:
             if should_update:
                 content = {"event_id": event_id}
                 await self.account_data_handler.add_account_data_to_room(
-                    user_id, room_id, "m.fully_read", content
+                    user_id, room_id, ReceiptTypes.FULLY_READ, content
                 )
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 64d298408d..cc05b0afa0 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1777,18 +1777,18 @@ class SyncHandler:
 
             if push_rules_changed:
                 global_account_data = dict(global_account_data)
-                global_account_data["m.push_rules"] = await self.push_rules_for_user(
-                    sync_config.user
-                )
+                global_account_data[
+                    AccountDataTypes.PUSH_RULES
+                ] = await self.push_rules_for_user(sync_config.user)
         else:
             all_global_account_data = await self.store.get_global_account_data_for_user(
                 user_id
             )
 
             global_account_data = dict(all_global_account_data)
-            global_account_data["m.push_rules"] = await self.push_rules_for_user(
-                sync_config.user
-            )
+            global_account_data[
+                AccountDataTypes.PUSH_RULES
+            ] = await self.push_rules_for_user(sync_config.user)
 
         account_data_for_user = (
             await sync_config.filter_collection.filter_global_account_data(
diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py
index 43193ad086..8eebb21c76 100644
--- a/synapse/rest/client/account_data.py
+++ b/synapse/rest/client/account_data.py
@@ -15,6 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, Tuple
 
+from synapse.api.constants import ReceiptTypes
 from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
@@ -166,7 +167,7 @@ class RoomAccountDataServlet(RestServlet):
 
         body = parse_json_object_from_request(request)
 
-        if account_data_type == "m.fully_read":
+        if account_data_type == ReceiptTypes.FULLY_READ:
             raise SynapseError(
                 405,
                 "Cannot set m.fully_read through this API."
-- 
cgit 1.5.1


From 4b4e0dc3cecbe9ad65c4728c1ec461321d15789f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 9 May 2023 10:34:10 -0400
Subject: Error if attempting to set m.push_rules account data, per MSC4010.
 (#15555)

m.push_rules, like m.fully_read, is a special account data type that cannot
be set using the normal /account_data endpoint. Return an error instead
of allowing data that will not be used to be stored.
---
 changelog.d/15554.bugfix            |  1 +
 changelog.d/15554.misc              |  1 -
 changelog.d/15555.bugfix            |  1 +
 synapse/config/experimental.py      |  5 +++
 synapse/handlers/push_rules.py      | 16 ++++++-
 synapse/handlers/sync.py            | 12 ++----
 synapse/push/clientformat.py        |  2 +-
 synapse/rest/client/account_data.py | 85 +++++++++++++++++++++++++++++--------
 synapse/rest/client/push_rule.py    |  7 +--
 9 files changed, 95 insertions(+), 35 deletions(-)
 create mode 100644 changelog.d/15554.bugfix
 delete mode 100644 changelog.d/15554.misc
 create mode 100644 changelog.d/15555.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15554.bugfix b/changelog.d/15554.bugfix
new file mode 100644
index 0000000000..0fd9de8c65
--- /dev/null
+++ b/changelog.d/15554.bugfix
@@ -0,0 +1 @@
+Experimental support for [MSC4010](https://github.com/matrix-org/matrix-spec-proposals/pull/4010) which rejects setting the `"m.push_rules"` via account data.
diff --git a/changelog.d/15554.misc b/changelog.d/15554.misc
deleted file mode 100644
index 002e3f5315..0000000000
--- a/changelog.d/15554.misc
+++ /dev/null
@@ -1 +0,0 @@
-Use account data constants in more places.
diff --git a/changelog.d/15555.bugfix b/changelog.d/15555.bugfix
new file mode 100644
index 0000000000..0fd9de8c65
--- /dev/null
+++ b/changelog.d/15555.bugfix
@@ -0,0 +1 @@
+Experimental support for [MSC4010](https://github.com/matrix-org/matrix-spec-proposals/pull/4010) which rejects setting the `"m.push_rules"` via account data.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 514d87cb2c..7af6dbcd09 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -202,3 +202,8 @@ class ExperimentalConfig(Config):
 
         # MSC4009: E.164 Matrix IDs
         self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False)
+
+        # MSC4010: Do not allow setting m.push_rules account data.
+        self.msc4010_push_rules_account_data = experimental.get(
+            "msc4010_push_rules_account_data", False
+        )
diff --git a/synapse/handlers/push_rules.py b/synapse/handlers/push_rules.py
index 813f3aa2d5..7ed88a3611 100644
--- a/synapse/handlers/push_rules.py
+++ b/synapse/handlers/push_rules.py
@@ -11,14 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import TYPE_CHECKING, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
 import attr
 
 from synapse.api.errors import SynapseError, UnrecognizedRequestError
+from synapse.push.clientformat import format_push_rules_for_user
 from synapse.storage.push_rule import RuleNotFoundException
 from synapse.synapse_rust.push import get_base_rule_ids
-from synapse.types import JsonDict
+from synapse.types import JsonDict, UserID
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -115,6 +116,17 @@ class PushRulesHandler:
         stream_id = self._main_store.get_max_push_rules_stream_id()
         self._notifier.on_new_event("push_rules_key", stream_id, users=[user_id])
 
+    async def push_rules_for_user(
+        self, user: UserID
+    ) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
+        """
+        Push rules aren't really account data, but get formatted as such for /sync.
+        """
+        user_id = user.to_string()
+        rules_raw = await self._main_store.get_push_rules_for_user(user_id)
+        rules = format_push_rules_for_user(user, rules_raw)
+        return rules
+
 
 def check_actions(actions: List[Union[str, JsonDict]]) -> None:
     """Check if the given actions are spec compliant.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index cc05b0afa0..c010405be6 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -50,7 +50,6 @@ from synapse.logging.opentracing import (
     start_active_span,
     trace,
 )
-from synapse.push.clientformat import format_push_rules_for_user
 from synapse.storage.databases.main.event_push_actions import RoomNotifCounts
 from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
 from synapse.storage.roommember import MemberSummary
@@ -261,6 +260,7 @@ class SyncHandler:
         self.notifier = hs.get_notifier()
         self.presence_handler = hs.get_presence_handler()
         self._relations_handler = hs.get_relations_handler()
+        self._push_rules_handler = hs.get_push_rules_handler()
         self.event_sources = hs.get_event_sources()
         self.clock = hs.get_clock()
         self.state = hs.get_state_handler()
@@ -428,12 +428,6 @@ class SyncHandler:
             set_tag(SynapseTags.SYNC_RESULT, bool(sync_result))
             return sync_result
 
-    async def push_rules_for_user(self, user: UserID) -> Dict[str, Dict[str, list]]:
-        user_id = user.to_string()
-        rules_raw = await self.store.get_push_rules_for_user(user_id)
-        rules = format_push_rules_for_user(user, rules_raw)
-        return rules
-
     async def ephemeral_by_room(
         self,
         sync_result_builder: "SyncResultBuilder",
@@ -1779,7 +1773,7 @@ class SyncHandler:
                 global_account_data = dict(global_account_data)
                 global_account_data[
                     AccountDataTypes.PUSH_RULES
-                ] = await self.push_rules_for_user(sync_config.user)
+                ] = await self._push_rules_handler.push_rules_for_user(sync_config.user)
         else:
             all_global_account_data = await self.store.get_global_account_data_for_user(
                 user_id
@@ -1788,7 +1782,7 @@ class SyncHandler:
             global_account_data = dict(all_global_account_data)
             global_account_data[
                 AccountDataTypes.PUSH_RULES
-            ] = await self.push_rules_for_user(sync_config.user)
+            ] = await self._push_rules_handler.push_rules_for_user(sync_config.user)
 
         account_data_for_user = (
             await sync_config.filter_collection.filter_global_account_data(
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index 222afbdcc8..88b52c26a0 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -22,7 +22,7 @@ from synapse.types import UserID
 
 def format_push_rules_for_user(
     user: UserID, ruleslist: FilteredPushRules
-) -> Dict[str, Dict[str, list]]:
+) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
     """Converts a list of rawrules and a enabled map into nested dictionaries
     to match the Matrix client-server format for push rules"""
 
diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py
index 8eebb21c76..b1f9e9dc9b 100644
--- a/synapse/rest/client/account_data.py
+++ b/synapse/rest/client/account_data.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, Optional, Tuple
 
-from synapse.api.constants import ReceiptTypes
+from synapse.api.constants import AccountDataTypes, ReceiptTypes
 from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
@@ -30,6 +30,23 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+def _check_can_set_account_data_type(account_data_type: str) -> None:
+    """The fully read marker and push rules cannot be directly set via /account_data."""
+    if account_data_type == ReceiptTypes.FULLY_READ:
+        raise SynapseError(
+            405,
+            "Cannot set m.fully_read through this API."
+            " Use /rooms/!roomId:server.name/read_markers",
+            Codes.BAD_JSON,
+        )
+    elif account_data_type == AccountDataTypes.PUSH_RULES:
+        raise SynapseError(
+            405,
+            "Cannot set m.push_rules through this API. Use /pushrules",
+            Codes.BAD_JSON,
+        )
+
+
 class AccountDataServlet(RestServlet):
     """
     PUT /user/{user_id}/account_data/{account_dataType} HTTP/1.1
@@ -47,6 +64,7 @@ class AccountDataServlet(RestServlet):
         self.auth = hs.get_auth()
         self.store = hs.get_datastores().main
         self.handler = hs.get_account_data_handler()
+        self._push_rules_handler = hs.get_push_rules_handler()
 
     async def on_PUT(
         self, request: SynapseRequest, user_id: str, account_data_type: str
@@ -55,6 +73,10 @@ class AccountDataServlet(RestServlet):
         if user_id != requester.user.to_string():
             raise AuthError(403, "Cannot add account data for other users.")
 
+        # Raise an error if the account data type cannot be set directly.
+        if self._hs.config.experimental.msc4010_push_rules_account_data:
+            _check_can_set_account_data_type(account_data_type)
+
         body = parse_json_object_from_request(request)
 
         # If experimental support for MSC3391 is enabled, then providing an empty dict
@@ -78,19 +100,28 @@ class AccountDataServlet(RestServlet):
         if user_id != requester.user.to_string():
             raise AuthError(403, "Cannot get account data for other users.")
 
-        event = await self.store.get_global_account_data_by_type_for_user(
-            user_id, account_data_type
-        )
+        # Push rules are stored in a separate table and must be queried separately.
+        if (
+            self._hs.config.experimental.msc4010_push_rules_account_data
+            and account_data_type == AccountDataTypes.PUSH_RULES
+        ):
+            account_data: Optional[
+                JsonDict
+            ] = await self._push_rules_handler.push_rules_for_user(requester.user)
+        else:
+            account_data = await self.store.get_global_account_data_by_type_for_user(
+                user_id, account_data_type
+            )
 
-        if event is None:
+        if account_data is None:
             raise NotFoundError("Account data not found")
 
         # If experimental support for MSC3391 is enabled, then this endpoint should
         # return a 404 if the content for an account data type is an empty dict.
-        if self._hs.config.experimental.msc3391_enabled and event == {}:
+        if self._hs.config.experimental.msc3391_enabled and account_data == {}:
             raise NotFoundError("Account data not found")
 
-        return 200, event
+        return 200, account_data
 
 
 class UnstableAccountDataServlet(RestServlet):
@@ -109,6 +140,7 @@ class UnstableAccountDataServlet(RestServlet):
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
+        self._hs = hs
         self.auth = hs.get_auth()
         self.handler = hs.get_account_data_handler()
 
@@ -122,6 +154,10 @@ class UnstableAccountDataServlet(RestServlet):
         if user_id != requester.user.to_string():
             raise AuthError(403, "Cannot delete account data for other users.")
 
+        # Raise an error if the account data type cannot be set directly.
+        if self._hs.config.experimental.msc4010_push_rules_account_data:
+            _check_can_set_account_data_type(account_data_type)
+
         await self.handler.remove_account_data_for_user(user_id, account_data_type)
 
         return 200, {}
@@ -165,9 +201,10 @@ class RoomAccountDataServlet(RestServlet):
                 Codes.INVALID_PARAM,
             )
 
-        body = parse_json_object_from_request(request)
-
-        if account_data_type == ReceiptTypes.FULLY_READ:
+        # Raise an error if the account data type cannot be set directly.
+        if self._hs.config.experimental.msc4010_push_rules_account_data:
+            _check_can_set_account_data_type(account_data_type)
+        elif account_data_type == ReceiptTypes.FULLY_READ:
             raise SynapseError(
                 405,
                 "Cannot set m.fully_read through this API."
@@ -175,6 +212,8 @@ class RoomAccountDataServlet(RestServlet):
                 Codes.BAD_JSON,
             )
 
+        body = parse_json_object_from_request(request)
+
         # If experimental support for MSC3391 is enabled, then providing an empty dict
         # as the value for an account data type should be functionally equivalent to
         # calling the DELETE method on the same type.
@@ -209,19 +248,26 @@ class RoomAccountDataServlet(RestServlet):
                 Codes.INVALID_PARAM,
             )
 
-        event = await self.store.get_account_data_for_room_and_type(
-            user_id, room_id, account_data_type
-        )
+        # Room-specific push rules are not currently supported.
+        if (
+            self._hs.config.experimental.msc4010_push_rules_account_data
+            and account_data_type == AccountDataTypes.PUSH_RULES
+        ):
+            account_data: Optional[JsonDict] = {}
+        else:
+            account_data = await self.store.get_account_data_for_room_and_type(
+                user_id, room_id, account_data_type
+            )
 
-        if event is None:
+        if account_data is None:
             raise NotFoundError("Room account data not found")
 
         # If experimental support for MSC3391 is enabled, then this endpoint should
         # return a 404 if the content for an account data type is an empty dict.
-        if self._hs.config.experimental.msc3391_enabled and event == {}:
+        if self._hs.config.experimental.msc3391_enabled and account_data == {}:
             raise NotFoundError("Room account data not found")
 
-        return 200, event
+        return 200, account_data
 
 
 class UnstableRoomAccountDataServlet(RestServlet):
@@ -241,6 +287,7 @@ class UnstableRoomAccountDataServlet(RestServlet):
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
+        self._hs = hs
         self.auth = hs.get_auth()
         self.handler = hs.get_account_data_handler()
 
@@ -262,6 +309,10 @@ class UnstableRoomAccountDataServlet(RestServlet):
                 Codes.INVALID_PARAM,
             )
 
+        # Raise an error if the account data type cannot be set directly.
+        if self._hs.config.experimental.msc4010_push_rules_account_data:
+            _check_can_set_account_data_type(account_data_type)
+
         await self.handler.remove_account_data_for_room(
             user_id, room_id, account_data_type
         )
diff --git a/synapse/rest/client/push_rule.py b/synapse/rest/client/push_rule.py
index 1147b6f8ec..5c9fece3ba 100644
--- a/synapse/rest/client/push_rule.py
+++ b/synapse/rest/client/push_rule.py
@@ -28,7 +28,6 @@ from synapse.http.servlet import (
     parse_string,
 )
 from synapse.http.site import SynapseRequest
-from synapse.push.clientformat import format_push_rules_for_user
 from synapse.push.rulekinds import PRIORITY_CLASS_MAP
 from synapse.rest.client._base import client_patterns
 from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException
@@ -146,14 +145,12 @@ class PushRuleRestServlet(RestServlet):
 
     async def on_GET(self, request: SynapseRequest, path: str) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
-        user_id = requester.user.to_string()
+        requester.user.to_string()
 
         # we build up the full structure and then decide which bits of it
         # to send which means doing unnecessary work sometimes but is
         # is probably not going to make a whole lot of difference
-        rules_raw = await self.store.get_push_rules_for_user(user_id)
-
-        rules = format_push_rules_for_user(requester.user, rules_raw)
+        rules = await self._push_rules_handler.push_rules_for_user(requester.user)
 
         path_parts = path.split("/")[1:]
 
-- 
cgit 1.5.1


From ab4535b6082db97e8c48a69ea6674fe3b7c5e956 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Tue, 9 May 2023 12:08:51 -0600
Subject: Add config option to prevent media downloads from listed domains.
 (#15197)

This stops media (and thumbnails) from being accessed from the
listed domains. It does not delete any already locally cached media,
but will prevent accessing it.

Note that admin APIs are unaffected by this change.
---
 changelog.d/15197.feature                        |   1 +
 docs/usage/configuration/config_documentation.md |  24 ++++
 synapse/config/repository.py                     |   4 +
 synapse/media/media_repository.py                |   9 ++
 synapse/rest/media/thumbnail_resource.py         |   9 ++
 tests/rest/media/test_domain_blocking.py         | 139 +++++++++++++++++++++++
 6 files changed, 186 insertions(+)
 create mode 100644 changelog.d/15197.feature
 create mode 100644 tests/rest/media/test_domain_blocking.py

(limited to 'synapse')

diff --git a/changelog.d/15197.feature b/changelog.d/15197.feature
new file mode 100644
index 0000000000..c8a6f114e8
--- /dev/null
+++ b/changelog.d/15197.feature
@@ -0,0 +1 @@
+Add an option to prevent media downloads from configured domains.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 14c21f73fe..6dd1a639ed 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1768,6 +1768,30 @@ Example configuration:
 max_image_pixels: 35M
 ```
 ---
+### `prevent_media_downloads_from`
+
+A list of domains to never download media from. Media from these
+domains that is already downloaded will not be deleted, but will be
+inaccessible to users. This option does not affect admin APIs trying
+to download/operate on media.
+
+This will not prevent the listed domains from accessing media themselves.
+It simply prevents users on this server from downloading media originating
+from the listed servers.
+
+This will have no effect on media originating from the local server.
+This only affects media downloaded from other Matrix servers, to
+block domains from URL previews see [`url_preview_url_blacklist`](#url_preview_url_blacklist).
+
+Defaults to an empty list (nothing blocked).
+
+Example configuration:
+```yaml
+prevent_media_downloads_from:
+  - evil.example.org
+  - evil2.example.org
+```
+---
 ### `dynamic_thumbnails`
 
 Whether to generate new thumbnails on the fly to precisely match
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index ecb3edbe3a..655f06505b 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -137,6 +137,10 @@ class ContentRepositoryConfig(Config):
         self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M"))
         self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M"))
 
+        self.prevent_media_downloads_from = config.get(
+            "prevent_media_downloads_from", []
+        )
+
         self.media_store_path = self.ensure_directory(
             config.get("media_store_path", "media_store")
         )
diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py
index b81e3c2b0c..e81c987b10 100644
--- a/synapse/media/media_repository.py
+++ b/synapse/media/media_repository.py
@@ -93,6 +93,7 @@ class MediaRepository:
         self.federation_domain_whitelist = (
             hs.config.federation.federation_domain_whitelist
         )
+        self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
 
         # List of StorageProviders where we should search for media and
         # potentially upload to.
@@ -276,6 +277,14 @@ class MediaRepository:
         ):
             raise FederationDeniedError(server_name)
 
+        # Don't let users download media from domains listed in the config, even
+        # if we might have the media to serve. This is Trust & Safety tooling to
+        # block some servers' media from being accessible to local users.
+        # See `prevent_media_downloads_from` config docs for more info.
+        if server_name in self.prevent_media_downloads_from:
+            respond_404(request)
+            return
+
         self.mark_recently_accessed(server_name, media_id)
 
         # We linearize here to ensure that we don't try and download remote
diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py
index a6396fb05a..661e604b85 100644
--- a/synapse/rest/media/thumbnail_resource.py
+++ b/synapse/rest/media/thumbnail_resource.py
@@ -60,6 +60,7 @@ class ThumbnailResource(DirectServeJsonResource):
         self.media_storage = media_storage
         self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
         self._is_mine_server_name = hs.is_mine_server_name
+        self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
 
     async def _async_render_GET(self, request: SynapseRequest) -> None:
         set_cors_headers(request)
@@ -82,6 +83,14 @@ class ThumbnailResource(DirectServeJsonResource):
                 )
             self.media_repo.mark_recently_accessed(None, media_id)
         else:
+            # Don't let users download media from configured domains, even if it
+            # is already downloaded. This is Trust & Safety tooling to make some
+            # media inaccessible to local users.
+            # See `prevent_media_downloads_from` config docs for more info.
+            if server_name in self.prevent_media_downloads_from:
+                respond_404(request)
+                return
+
             if self.dynamic_thumbnails:
                 await self._select_or_generate_remote_thumbnail(
                     request, server_name, media_id, width, height, method, m_type
diff --git a/tests/rest/media/test_domain_blocking.py b/tests/rest/media/test_domain_blocking.py
new file mode 100644
index 0000000000..9beeeab843
--- /dev/null
+++ b/tests/rest/media/test_domain_blocking.py
@@ -0,0 +1,139 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict
+
+from twisted.test.proto_helpers import MemoryReactor
+from twisted.web.resource import Resource
+
+from synapse.media._base import FileInfo
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.test_utils import SMALL_PNG
+from tests.unittest import override_config
+
+
+class MediaDomainBlockingTests(unittest.HomeserverTestCase):
+    remote_media_id = "doesnotmatter"
+    remote_server_name = "evil.com"
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+        # Inject a piece of media. We'll use this to ensure we're returning a sane
+        # response when we're not supposed to block it, distinguishing a media block
+        # from a regular 404.
+        file_id = "abcdefg12345"
+        file_info = FileInfo(server_name=self.remote_server_name, file_id=file_id)
+        with hs.get_media_repository().media_storage.store_into_file(file_info) as (
+            f,
+            fname,
+            finish,
+        ):
+            f.write(SMALL_PNG)
+            self.get_success(finish())
+
+        self.get_success(
+            self.store.store_cached_remote_media(
+                origin=self.remote_server_name,
+                media_id=self.remote_media_id,
+                media_type="image/png",
+                media_length=1,
+                time_now_ms=clock.time_msec(),
+                upload_name="test.png",
+                filesystem_id=file_id,
+            )
+        )
+
+    def create_resource_dict(self) -> Dict[str, Resource]:
+        # We need to manually set the resource tree to include media, the
+        # default only does `/_matrix/client` APIs.
+        return {"/_matrix/media": self.hs.get_media_repository_resource()}
+
+    @override_config(
+        {
+            # Disable downloads from the domain we'll be trying to download from.
+            # Should result in a 404.
+            "prevent_media_downloads_from": ["evil.com"]
+        }
+    )
+    def test_cannot_download_blocked_media(self) -> None:
+        """
+        Tests to ensure that remote media which is blocked cannot be downloaded.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 404)
+
+    @override_config(
+        {
+            # Disable downloads from a domain we won't be requesting downloads from.
+            # This proves we haven't broken anything.
+            "prevent_media_downloads_from": ["not-listed.com"]
+        }
+    )
+    def test_remote_media_normally_unblocked(self) -> None:
+        """
+        Tests to ensure that remote media is normally able to be downloaded
+        when no domain block is in place.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 200)
+
+    @override_config(
+        {
+            # Disable downloads from the domain we'll be trying to download from.
+            # Should result in a 404.
+            "prevent_media_downloads_from": ["evil.com"],
+            "dynamic_thumbnails": True,
+        }
+    )
+    def test_cannot_download_blocked_media_thumbnail(self) -> None:
+        """
+        Same test as test_cannot_download_blocked_media but for thumbnails.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
+            shorthand=False,
+            content={"width": 100, "height": 100},
+        )
+        self.assertEqual(response.code, 404)
+
+    @override_config(
+        {
+            # Disable downloads from a domain we won't be requesting downloads from.
+            # This proves we haven't broken anything.
+            "prevent_media_downloads_from": ["not-listed.com"],
+            "dynamic_thumbnails": True,
+        }
+    )
+    def test_remote_media_thumbnail_normally_unblocked(self) -> None:
+        """
+        Same test as test_remote_media_normally_unblocked but for thumbnails.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 200)
-- 
cgit 1.5.1


From d3bd03559b14272dd68499ab7cff4b190858b285 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Tue, 9 May 2023 13:25:20 -0500
Subject: HTTP Replication Client (#15470)

Separate out a HTTP client for replication in preparation for
also supporting using UNIX sockets. The major difference from
the base class is that this does not use treq to handle HTTP
requests.
---
 changelog.d/15470.misc            |   1 +
 synapse/http/client.py            | 133 ++++++++++++++++++++++++++++++++-
 synapse/http/replicationagent.py  | 150 ++++++++++++++++++++++++++++++++++++++
 synapse/replication/http/_base.py |   2 +-
 synapse/server.py                 |  13 +++-
 tests/test_state.py               |   1 +
 6 files changed, 297 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15470.misc
 create mode 100644 synapse/http/replicationagent.py

(limited to 'synapse')

diff --git a/changelog.d/15470.misc b/changelog.d/15470.misc
new file mode 100644
index 0000000000..0af0b499c6
--- /dev/null
+++ b/changelog.d/15470.misc
@@ -0,0 +1 @@
+Create new `Client` for use with HTTP Replication between workers. Contributed by Jason Little.
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 91fe474f36..c9479c81ff 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -74,8 +74,9 @@ from twisted.web.iweb import (
 from synapse.api.errors import Codes, HttpResponseException, SynapseError
 from synapse.http import QuieterFileBodyProducer, RequestTimedOutError, redact_uri
 from synapse.http.proxyagent import ProxyAgent
+from synapse.http.replicationagent import ReplicationAgent
 from synapse.http.types import QueryParams
-from synapse.logging.context import make_deferred_yieldable
+from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.logging.opentracing import set_tag, start_active_span, tags
 from synapse.types import ISynapseReactor
 from synapse.util import json_decoder
@@ -819,6 +820,136 @@ class SimpleHttpClient(BaseHttpClient):
             )
 
 
+class ReplicationClient(BaseHttpClient):
+    """Client for connecting to replication endpoints via HTTP and HTTPS.
+
+    Attributes:
+        agent: The custom Twisted Agent used for constructing the connection.
+    """
+
+    def __init__(
+        self,
+        hs: "HomeServer",
+    ):
+        """
+        Args:
+            hs: The HomeServer instance to pass in
+        """
+        super().__init__(hs)
+
+        # Use a pool, but a very small one.
+        pool = HTTPConnectionPool(self.reactor)
+        pool.maxPersistentPerHost = 5
+        pool.cachedConnectionTimeout = 2 * 60
+
+        self.agent: IAgent = ReplicationAgent(
+            hs.get_reactor(),
+            contextFactory=hs.get_http_client_context_factory(),
+            pool=pool,
+        )
+
+    async def request(
+        self,
+        method: str,
+        uri: str,
+        data: Optional[bytes] = None,
+        headers: Optional[Headers] = None,
+    ) -> IResponse:
+        """
+        Make a request, differs from BaseHttpClient.request in that it does not use treq.
+
+        Args:
+            method: HTTP method to use.
+            uri: URI to query.
+            data: Data to send in the request body, if applicable.
+            headers: Request headers.
+
+        Returns:
+            Response object, once the headers have been read.
+
+        Raises:
+            RequestTimedOutError if the request times out before the headers are read
+
+        """
+        outgoing_requests_counter.labels(method).inc()
+
+        logger.debug("Sending request %s %s", method, uri)
+
+        with start_active_span(
+            "outgoing-replication-request",
+            tags={
+                tags.SPAN_KIND: tags.SPAN_KIND_RPC_CLIENT,
+                tags.HTTP_METHOD: method,
+                tags.HTTP_URL: uri,
+            },
+            finish_on_close=True,
+        ):
+            try:
+                body_producer = None
+                if data is not None:
+                    body_producer = QuieterFileBodyProducer(
+                        BytesIO(data),
+                        cooperator=self._cooperator,
+                    )
+
+                # Skip the fancy treq stuff, we don't need cookie handling, redirects,
+                # or buffered response bodies.
+                method_bytes = method.encode("ascii")
+                uri_bytes = uri.encode("ascii")
+
+                # To preserve the logging context, the timeout is treated
+                # in a similar way to `defer.gatherResults`:
+                # * Each logging context-preserving fork is wrapped in
+                #   `run_in_background`. In this case there is only one,
+                #   since the timeout fork is not logging-context aware.
+                # * The `Deferred` that joins the forks back together is
+                #   wrapped in `make_deferred_yieldable` to restore the
+                #   logging context regardless of the path taken.
+                # (The logic/comments for this came from MatrixFederationHttpClient)
+                request_deferred = run_in_background(
+                    self.agent.request,
+                    method_bytes,
+                    uri_bytes,
+                    headers,
+                    bodyProducer=body_producer,
+                )
+
+                # we use our own timeout mechanism rather than twisted's as a workaround
+                # for https://twistedmatrix.com/trac/ticket/9534.
+                # (Updated url https://github.com/twisted/twisted/issues/9534)
+                request_deferred = timeout_deferred(
+                    request_deferred,
+                    60,
+                    self.hs.get_reactor(),
+                )
+
+                # turn timeouts into RequestTimedOutErrors
+                request_deferred.addErrback(_timeout_to_request_timed_out_error)
+
+                response = await make_deferred_yieldable(request_deferred)
+
+                incoming_responses_counter.labels(method, response.code).inc()
+                logger.info(
+                    "Received response to %s %s: %s",
+                    method,
+                    uri,
+                    response.code,
+                )
+                return response
+            except Exception as e:
+                incoming_responses_counter.labels(method, "ERR").inc()
+                logger.info(
+                    "Error sending request to  %s %s: %s %s",
+                    method,
+                    uri,
+                    type(e).__name__,
+                    e.args[0],
+                )
+                set_tag(tags.ERROR, True)
+                set_tag("error_reason", e.args[0])
+                raise
+
+
 def _timeout_to_request_timed_out_error(f: Failure) -> Failure:
     if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError):
         # The TCP connection has its own timeout (set by the 'connectTimeout' param
diff --git a/synapse/http/replicationagent.py b/synapse/http/replicationagent.py
new file mode 100644
index 0000000000..5ecd08be0f
--- /dev/null
+++ b/synapse/http/replicationagent.py
@@ -0,0 +1,150 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Optional
+
+from zope.interface import implementer
+
+from twisted.internet import defer
+from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
+from twisted.internet.interfaces import IStreamClientEndpoint
+from twisted.python.failure import Failure
+from twisted.web.client import URI, HTTPConnectionPool, _AgentBase
+from twisted.web.error import SchemeNotSupported
+from twisted.web.http_headers import Headers
+from twisted.web.iweb import (
+    IAgent,
+    IAgentEndpointFactory,
+    IBodyProducer,
+    IPolicyForHTTPS,
+    IResponse,
+)
+
+from synapse.types import ISynapseReactor
+
+logger = logging.getLogger(__name__)
+
+
+@implementer(IAgentEndpointFactory)
+class ReplicationEndpointFactory:
+    """Connect to a given TCP socket"""
+
+    def __init__(
+        self,
+        reactor: ISynapseReactor,
+        context_factory: IPolicyForHTTPS,
+    ) -> None:
+        self.reactor = reactor
+        self.context_factory = context_factory
+
+    def endpointForURI(self, uri: URI) -> IStreamClientEndpoint:
+        """
+        This part of the factory decides what kind of endpoint is being connected to.
+
+        Args:
+            uri: The pre-parsed URI object containing all the uri data
+
+        Returns: The correct client endpoint object
+        """
+        if uri.scheme in (b"http", b"https"):
+            endpoint = HostnameEndpoint(self.reactor, uri.host, uri.port)
+            if uri.scheme == b"https":
+                endpoint = wrapClientTLS(
+                    self.context_factory.creatorForNetloc(uri.host, uri.port), endpoint
+                )
+            return endpoint
+        else:
+            raise SchemeNotSupported(f"Unsupported scheme: {uri.scheme!r}")
+
+
+@implementer(IAgent)
+class ReplicationAgent(_AgentBase):
+    """
+    Client for connecting to replication endpoints via HTTP and HTTPS.
+
+    Much of this code is copied from Twisted's twisted.web.client.Agent.
+    """
+
+    def __init__(
+        self,
+        reactor: ISynapseReactor,
+        contextFactory: IPolicyForHTTPS,
+        connectTimeout: Optional[float] = None,
+        bindAddress: Optional[bytes] = None,
+        pool: Optional[HTTPConnectionPool] = None,
+    ):
+        """
+        Create a ReplicationAgent.
+
+        Args:
+            reactor: A reactor for this Agent to place outgoing connections.
+            contextFactory: A factory for TLS contexts, to control the
+                verification parameters of OpenSSL.  The default is to use a
+                BrowserLikePolicyForHTTPS, so unless you have special
+                requirements you can leave this as-is.
+            connectTimeout: The amount of time that this Agent will wait
+                for the peer to accept a connection.
+            bindAddress: The local address for client sockets to bind to.
+            pool: An HTTPConnectionPool instance, or None, in which
+                case a non-persistent HTTPConnectionPool instance will be
+                created.
+        """
+        _AgentBase.__init__(self, reactor, pool)
+        endpoint_factory = ReplicationEndpointFactory(reactor, contextFactory)
+        self._endpointFactory = endpoint_factory
+
+    def request(
+        self,
+        method: bytes,
+        uri: bytes,
+        headers: Optional[Headers] = None,
+        bodyProducer: Optional[IBodyProducer] = None,
+    ) -> "defer.Deferred[IResponse]":
+        """
+        Issue a request to the server indicated by the given uri.
+
+        An existing connection from the connection pool may be used or a new
+        one may be created.
+
+        Currently, HTTP and HTTPS schemes are supported in uri.
+
+        This is copied from twisted.web.client.Agent, except:
+
+        * It uses a different pool key (combining the host & port).
+        * It does not call _ensureValidURI(...) since it breaks on some
+          UNIX paths.
+
+        See: twisted.web.iweb.IAgent.request
+        """
+        parsedURI = URI.fromBytes(uri)
+        try:
+            endpoint = self._endpointFactory.endpointForURI(parsedURI)
+        except SchemeNotSupported:
+            return defer.fail(Failure())
+
+        # This sets the Pool key to be:
+        #  (http(s), <host:ip>)
+        key = (parsedURI.scheme, parsedURI.netloc)
+
+        # _requestWithEndpoint comes from _AgentBase class
+        return self._requestWithEndpoint(
+            key,
+            endpoint,
+            method,
+            parsedURI,
+            headers,
+            bodyProducer,
+            parsedURI.originForm,
+        )
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index 8c2c54c07a..23129962e9 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -194,7 +194,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
         the `instance_map` config).
         """
         clock = hs.get_clock()
-        client = hs.get_simple_http_client()
+        client = hs.get_replication_client()
         local_instance_name = hs.get_instance_name()
 
         # The value of these option should match the replication listener settings
diff --git a/synapse/server.py b/synapse/server.py
index fd29c28173..b307295789 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -107,7 +107,11 @@ from synapse.handlers.stats import StatsHandler
 from synapse.handlers.sync import SyncHandler
 from synapse.handlers.typing import FollowerTypingHandler, TypingWriterHandler
 from synapse.handlers.user_directory import UserDirectoryHandler
-from synapse.http.client import InsecureInterceptableContextFactory, SimpleHttpClient
+from synapse.http.client import (
+    InsecureInterceptableContextFactory,
+    ReplicationClient,
+    SimpleHttpClient,
+)
 from synapse.http.matrixfederationclient import MatrixFederationHttpClient
 from synapse.media.media_repository import MediaRepository
 from synapse.metrics.common_usage_metrics import CommonUsageMetricsManager
@@ -471,6 +475,13 @@ class HomeServer(metaclass=abc.ABCMeta):
         )
         return MatrixFederationHttpClient(self, tls_client_options_factory)
 
+    @cache_in_self
+    def get_replication_client(self) -> ReplicationClient:
+        """
+        An HTTP client for HTTP replication.
+        """
+        return ReplicationClient(self)
+
     @cache_in_self
     def get_room_creation_handler(self) -> RoomCreationHandler:
         return RoomCreationHandler(self)
diff --git a/tests/test_state.py b/tests/test_state.py
index b20a26e1ff..2029d3d60a 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -228,6 +228,7 @@ class StateTestCase(unittest.TestCase):
                 "get_macaroon_generator",
                 "get_instance_name",
                 "get_simple_http_client",
+                "get_replication_client",
                 "hostname",
             ]
         )
-- 
cgit 1.5.1


From 86d541f37c1bc9197a6f561b31f3aa359740b4bd Mon Sep 17 00:00:00 2001
From: Tulir Asokan <tulir@maunium.net>
Date: Tue, 9 May 2023 22:02:36 +0300
Subject: Stabilize MSC2659 support for AS ping endpoint. (#15528)

---
 changelog.d/15528.feature              |  1 +
 synapse/api/errors.py                  |  8 ++++----
 synapse/appservice/api.py              |  2 +-
 synapse/config/experimental.py         |  3 ---
 synapse/rest/client/appservice_ping.py | 10 ++++------
 synapse/rest/client/versions.py        |  2 +-
 6 files changed, 11 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/15528.feature

(limited to 'synapse')

diff --git a/changelog.d/15528.feature b/changelog.d/15528.feature
new file mode 100644
index 0000000000..aae9fa1ecf
--- /dev/null
+++ b/changelog.d/15528.feature
@@ -0,0 +1 @@
+Stabilize support for [MSC2659](https://github.com/matrix-org/matrix-spec-proposals/pull/2659): application service ping endpoint. Contributed by Tulir @ Beeper.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index f2d6f9ab2d..8c7c94b045 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -108,10 +108,10 @@ class Codes(str, Enum):
 
     USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL"
 
-    AS_PING_URL_NOT_SET = "FI.MAU.MSC2659_URL_NOT_SET"
-    AS_PING_BAD_STATUS = "FI.MAU.MSC2659_BAD_STATUS"
-    AS_PING_CONNECTION_TIMEOUT = "FI.MAU.MSC2659_CONNECTION_TIMEOUT"
-    AS_PING_CONNECTION_FAILED = "FI.MAU.MSC2659_CONNECTION_FAILED"
+    AS_PING_URL_NOT_SET = "M_URL_NOT_SET"
+    AS_PING_BAD_STATUS = "M_BAD_STATUS"
+    AS_PING_CONNECTION_TIMEOUT = "M_CONNECTION_TIMEOUT"
+    AS_PING_CONNECTION_FAILED = "M_CONNECTION_FAILED"
 
     # Attempt to send a second annotation with the same event type & annotation key
     # MSC2677
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 024098e9cb..5fb3d5083d 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -326,7 +326,7 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         await self.post_json_get_json(
-            uri=f"{service.url}{APP_SERVICE_UNSTABLE_PREFIX}/fi.mau.msc2659/ping",
+            uri=f"{service.url}{APP_SERVICE_PREFIX}/ping",
             post_json={"transaction_id": txn_id},
             headers={"Authorization": [f"Bearer {service.hs_token}"]},
         )
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 7af6dbcd09..6e453bd963 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -189,9 +189,6 @@ class ExperimentalConfig(Config):
         # MSC3967: Do not require UIA when first uploading cross signing keys
         self.msc3967_enabled = experimental.get("msc3967_enabled", False)
 
-        # MSC2659: Application service ping endpoint
-        self.msc2659_enabled = experimental.get("msc2659_enabled", False)
-
         # MSC3981: Recurse relations
         self.msc3981_recurse_relations = experimental.get(
             "msc3981_recurse_relations", False
diff --git a/synapse/rest/client/appservice_ping.py b/synapse/rest/client/appservice_ping.py
index 31466a4ad4..3f553d14d1 100644
--- a/synapse/rest/client/appservice_ping.py
+++ b/synapse/rest/client/appservice_ping.py
@@ -39,9 +39,8 @@ logger = logging.getLogger(__name__)
 
 class AppservicePingRestServlet(RestServlet):
     PATTERNS = client_patterns(
-        "/fi.mau.msc2659/appservice/(?P<appservice_id>[^/]*)/ping",
-        unstable=True,
-        releases=(),
+        "/appservice/(?P<appservice_id>[^/]*)/ping",
+        releases=("v1",),
     )
 
     def __init__(self, hs: "HomeServer"):
@@ -107,9 +106,8 @@ class AppservicePingRestServlet(RestServlet):
 
         duration = time.monotonic() - start
 
-        return HTTPStatus.OK, {"duration": int(duration * 1000)}
+        return HTTPStatus.OK, {"duration_ms": int(duration * 1000)}
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    if hs.config.experimental.msc2659_enabled:
-        AppservicePingRestServlet(hs).register(http_server)
+    AppservicePingRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 59aed66464..5c98916ec2 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -111,7 +111,7 @@ class VersionsRestServlet(RestServlet):
                     # Allows moderators to fetch redacted event content as described in MSC2815
                     "fi.mau.msc2815": self.config.experimental.msc2815_enabled,
                     # Adds a ping endpoint for appservices to check HS->AS connection
-                    "fi.mau.msc2659": self.config.experimental.msc2659_enabled,
+                    "fi.mau.msc2659.stable": True,  # TODO: remove when "v1.7" is added above
                     # Adds support for login token requests as per MSC3882
                     "org.matrix.msc3882": self.config.experimental.msc3882_enabled,
                     # Adds support for remotely enabling/disabling pushers, as per MSC3881
-- 
cgit 1.5.1


From 722ccc30b5b66592099c39c3622e48fcf552d2e2 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Thu, 11 May 2023 10:38:32 +0100
Subject: Add an unstable feature flag for MSC3391 to the /versions endpoint
 (#15562)

---
 changelog.d/15562.misc          | 1 +
 synapse/rest/client/versions.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/15562.misc

(limited to 'synapse')

diff --git a/changelog.d/15562.misc b/changelog.d/15562.misc
new file mode 100644
index 0000000000..eeeb553d8f
--- /dev/null
+++ b/changelog.d/15562.misc
@@ -0,0 +1 @@
+Declare unstable support for [MSC3391](https://github.com/matrix-org/matrix-spec-proposals/pull/3391) under `/_matrix/client/versions` if the experimental implementation is enabled.
\ No newline at end of file
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 5c98916ec2..2d2be6ef38 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -125,6 +125,8 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3912": self.config.experimental.msc3912_enabled,
                     # Adds support for unstable "intentional mentions" behaviour.
                     "org.matrix.msc3952_intentional_mentions": self.config.experimental.msc3952_intentional_mentions,
+                    # Adds support for deleting account data.
+                    "org.matrix.msc3391": self.config.experimental.msc3391_enabled,
                 },
             },
         )
-- 
cgit 1.5.1


From e4f545c452df817daa2f22dfda906f3451d98351 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Thu, 11 May 2023 05:30:56 -0500
Subject: Remove `worker_replication_*` settings (#15491)

* Add master to the instance_map as part of Complement, have ReplicationEndpoint look at instance_map for master.

* Fix typo in drive by.

* Remove unnecessary worker_replication_* bits from unit tests and add master to instance_map(hopefully in the right place)

* Several updates:

1. Switch from master to main for naming the main process in the instance_map. Add useful constants for easier adjustment of names in the future.
2. Add backwards compatibility for worker_replication_* to allow time to transition to new style. Make sure to prioritize declaring main directly on the instance_map.
3. Clean up old comments/commented out code.
4. Adjust unit tests to match with new code.
5. Adjust Complement setup infrastructure to only add main to the instance_map if workers are used and remove now unused options from the worker.yaml template.

* Initial Docs upload

* Changelog

* Missed some commented out code that can go now

* Remove TODO comment that no longer holds true.

* Fix links in docs

* More docs

* Remove debug logging

* Apply suggestions from code review

Co-authored-by: reivilibre <olivier@librepush.net>

* Apply suggestions from code review

Co-authored-by: reivilibre <olivier@librepush.net>

* Update version to latest, include completeish before/after examples in upgrade notes.

* Fix up and docs too

---------

Co-authored-by: reivilibre <olivier@librepush.net>
---
 changelog.d/15491.misc                             |  1 +
 docker/conf-workers/worker.yaml.j2                 |  4 --
 docker/configure_workers_and_start.py              | 15 ++++-
 .../workers/generic_worker.yaml                    |  4 --
 docs/upgrade.md                                    | 78 ++++++++++++++++++++++
 docs/usage/configuration/config_documentation.md   | 20 ++++--
 docs/workers.md                                    | 41 ++++++++----
 synapse/config/workers.py                          | 78 +++++++++++++++++-----
 synapse/replication/http/_base.py                  | 16 ++---
 tests/module_api/test_api.py                       |  1 +
 tests/replication/_base.py                         |  8 +--
 tests/replication/test_auth.py                     |  3 -
 tests/replication/test_client_reader_shard.py      |  2 -
 tests/replication/test_sharded_event_persister.py  |  1 +
 14 files changed, 206 insertions(+), 66 deletions(-)
 create mode 100644 changelog.d/15491.misc

(limited to 'synapse')

diff --git a/changelog.d/15491.misc b/changelog.d/15491.misc
new file mode 100644
index 0000000000..98f88dbf19
--- /dev/null
+++ b/changelog.d/15491.misc
@@ -0,0 +1 @@
+Remove need for `worker_replication_*` based settings in worker configuration yaml by placing this data directly on the `instance_map` instead.
diff --git a/docker/conf-workers/worker.yaml.j2 b/docker/conf-workers/worker.yaml.j2
index 42131afc95..44c6e413cf 100644
--- a/docker/conf-workers/worker.yaml.j2
+++ b/docker/conf-workers/worker.yaml.j2
@@ -6,10 +6,6 @@
 worker_app: "{{ app }}"
 worker_name: "{{ name }}"
 
-# The replication listener on the main synapse process.
-worker_replication_host: 127.0.0.1
-worker_replication_http_port: 9093
-
 worker_listeners:
   - type: http
     port: {{ port }}
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 4beec3daaf..79b5b87397 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -69,6 +69,9 @@ import yaml
 from jinja2 import Environment, FileSystemLoader
 
 MAIN_PROCESS_HTTP_LISTENER_PORT = 8080
+MAIN_PROCESS_INSTANCE_NAME = "main"
+MAIN_PROCESS_LOCALHOST_ADDRESS = "127.0.0.1"
+MAIN_PROCESS_REPLICATION_PORT = 9093
 
 # A simple name used as a placeholder in the WORKERS_CONFIG below. This will be replaced
 # during processing with the name of the worker.
@@ -719,8 +722,8 @@ def generate_worker_files(
     # shared config file.
     listeners = [
         {
-            "port": 9093,
-            "bind_address": "127.0.0.1",
+            "port": MAIN_PROCESS_REPLICATION_PORT,
+            "bind_address": MAIN_PROCESS_LOCALHOST_ADDRESS,
             "type": "http",
             "resources": [{"names": ["replication"]}],
         }
@@ -870,6 +873,14 @@ def generate_worker_files(
 
     workers_in_use = len(requested_worker_types) > 0
 
+    # If there are workers, add the main process to the instance_map too.
+    if workers_in_use:
+        instance_map = shared_config.setdefault("instance_map", {})
+        instance_map[MAIN_PROCESS_INSTANCE_NAME] = {
+            "host": MAIN_PROCESS_LOCALHOST_ADDRESS,
+            "port": MAIN_PROCESS_REPLICATION_PORT,
+        }
+
     # Shared homeserver config
     convert(
         "/conf/shared.yaml.j2",
diff --git a/docs/systemd-with-workers/workers/generic_worker.yaml b/docs/systemd-with-workers/workers/generic_worker.yaml
index a858f99ed1..db6436ee6e 100644
--- a/docs/systemd-with-workers/workers/generic_worker.yaml
+++ b/docs/systemd-with-workers/workers/generic_worker.yaml
@@ -1,10 +1,6 @@
 worker_app: synapse.app.generic_worker
 worker_name: generic_worker1
 
-# The replication listener on the main synapse process.
-worker_replication_host: 127.0.0.1
-worker_replication_http_port: 9093
-
 worker_listeners:
   - type: http
     port: 8083
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 0886b03115..0625de8afb 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,84 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.84.0
+
+## Deprecation of `worker_replication_*` configuration settings
+
+When using workers, 
+* `worker_replication_host`
+* `worker_replication_http_port`
+* `worker_replication_http_tls`
+ 
+can now be removed from individual worker YAML configuration ***if*** you add the main process to the `instance_map` in the shared YAML configuration,
+using the name `main`.
+
+### Before:
+Shared YAML
+```yaml
+instance_map:
+  generic_worker1:
+    host: localhost
+    port: 5678
+    tls: false
+```
+Worker YAML
+```yaml
+worker_app: synapse.app.generic_worker
+worker_name: generic_worker1
+
+worker_replication_host: localhost
+worker_replication_http_port: 3456
+worker_replication_http_tls: false
+
+worker_listeners:
+  - type: http
+    port: 1234
+    resources:
+      - names: [client, federation]
+  - type: http
+    port: 5678
+    resources:
+      - names: [replication]
+
+worker_log_config: /etc/matrix-synapse/generic-worker-log.yaml
+```
+### After:
+Shared YAML
+```yaml
+instance_map:
+  main:
+    host: localhost
+    port: 3456
+    tls: false
+  generic_worker1:
+    host: localhost
+    port: 5678
+    tls: false
+```
+Worker YAML
+```yaml
+worker_app: synapse.app.generic_worker
+worker_name: generic_worker1
+
+worker_listeners:
+  - type: http
+    port: 1234
+    resources:
+      - names: [client, federation]
+  - type: http
+    port: 5678
+    resources:
+      - names: [replication]
+
+worker_log_config: /etc/matrix-synapse/generic-worker-log.yaml
+
+```
+Notes: 
+* `tls` is optional but mirrors the functionality of `worker_replication_http_tls`
+
+
+
 # Upgrading to v1.81.0
 
 ## Application service path & authentication deprecations
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 6dd1a639ed..dc965b4119 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3884,15 +3884,20 @@ federation_sender_instances:
 ### `instance_map`
 
 When using workers this should be a map from [`worker_name`](#worker_name) to the
-HTTP replication listener of the worker, if configured.
+HTTP replication listener of the worker, if configured, and to the main process.
 Each worker declared under [`stream_writers`](../../workers.md#stream-writers) needs
 a HTTP replication listener, and that listener should be included in the `instance_map`.
-(The main process also needs an HTTP replication listener, but it should not be
-listed in the `instance_map`.)
+The main process also needs an entry on the `instance_map`, and it should be listed under
+`main` **if even one other worker exists**. Ensure the port matches with what is declared 
+inside the `listener` block for a `replication` listener.
+
 
 Example configuration:
 ```yaml
 instance_map:
+  main:
+    host: localhost
+    port: 8030
   worker1:
     host: localhost
     port: 8034
@@ -4024,6 +4029,7 @@ worker_name: generic_worker1
 ```
 ---
 ### `worker_replication_host`
+*Deprecated as of version 1.84.0. Place `host` under `main` entry on the [`instance_map`](#instance_map) in your shared yaml configuration instead.*
 
 The HTTP replication endpoint that it should talk to on the main Synapse process.
 The main Synapse process defines this with a `replication` resource in
@@ -4035,6 +4041,7 @@ worker_replication_host: 127.0.0.1
 ```
 ---
 ### `worker_replication_http_port`
+*Deprecated as of version 1.84.0. Place `port` under `main` entry on the [`instance_map`](#instance_map) in your shared yaml configuration instead.*
 
 The HTTP replication port that it should talk to on the main Synapse process.
 The main Synapse process defines this with a `replication` resource in
@@ -4046,6 +4053,7 @@ worker_replication_http_port: 9093
 ```
 ---
 ### `worker_replication_http_tls`
+*Deprecated as of version 1.84.0. Place `tls` under `main` entry on the [`instance_map`](#instance_map) in your shared yaml configuration instead.*
 
 Whether TLS should be used for talking to the HTTP replication port on the main
 Synapse process.
@@ -4071,9 +4079,9 @@ A worker can handle HTTP requests. To do so, a `worker_listeners` option
 must be declared, in the same way as the [`listeners` option](#listeners)
 in the shared config.
 
-Workers declared in [`stream_writers`](#stream_writers) will need to include a
-`replication` listener here, in order to accept internal HTTP requests from
-other workers.
+Workers declared in [`stream_writers`](#stream_writers) and [`instance_map`](#instance_map)
+ will need to include a `replication` listener here, in order to accept internal HTTP 
+requests from other workers.
 
 Example configuration:
 ```yaml
diff --git a/docs/workers.md b/docs/workers.md
index 765f03c263..991814c0bc 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -87,12 +87,18 @@ shared configuration file.
 
 ### Shared configuration
 
-Normally, only a couple of changes are needed to make an existing configuration
-file suitable for use with workers. First, you need to enable an
+Normally, only a few changes are needed to make an existing configuration
+file suitable for use with workers:
+* First, you need to enable an
 ["HTTP replication listener"](usage/configuration/config_documentation.md#listeners)
-for the main process; and secondly, you need to enable
-[redis-based replication](usage/configuration/config_documentation.md#redis).
-Optionally, a [shared secret](usage/configuration/config_documentation.md#worker_replication_secret)
+for the main process
+* Secondly, you need to enable
+[redis-based replication](usage/configuration/config_documentation.md#redis)
+* You will need to add an [`instance_map`](usage/configuration/config_documentation.md#instance_map) 
+with the `main` process defined, as well as the relevant connection information from 
+it's HTTP `replication` listener (defined in step 1 above). Note that the `host` defined 
+is the address the worker needs to look for the `main` process at, not necessarily the same address that is bound to.
+* Optionally, a [shared secret](usage/configuration/config_documentation.md#worker_replication_secret)
 can be used to authenticate HTTP traffic between workers. For example:
 
 ```yaml
@@ -111,6 +117,11 @@ worker_replication_secret: ""
 
 redis:
     enabled: true
+
+instance_map:
+    main:
+        host: 'localhost'
+        port: 9093
 ```
 
 See the [configuration manual](usage/configuration/config_documentation.md)
@@ -130,13 +141,13 @@ In the config file for each worker, you must specify:
  * The type of worker ([`worker_app`](usage/configuration/config_documentation.md#worker_app)).
    The currently available worker applications are listed [below](#available-worker-applications).
  * A unique name for the worker ([`worker_name`](usage/configuration/config_documentation.md#worker_name)).
- * The HTTP replication endpoint that it should talk to on the main synapse process
-   ([`worker_replication_host`](usage/configuration/config_documentation.md#worker_replication_host) and
-   [`worker_replication_http_port`](usage/configuration/config_documentation.md#worker_replication_http_port)).
  * If handling HTTP requests, a [`worker_listeners`](usage/configuration/config_documentation.md#worker_listeners) option
    with an `http` listener.
  * **Synapse 1.72 and older:** if handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for
    the main process (`worker_main_http_uri`). This config option is no longer required and is ignored when running Synapse 1.73 and newer.
+ * **Synapse 1.83 and older:** The HTTP replication endpoint that the worker should talk to on the main synapse process
+   ([`worker_replication_host`](usage/configuration/config_documentation.md#worker_replication_host) and
+   [`worker_replication_http_port`](usage/configuration/config_documentation.md#worker_replication_http_port)). If using Synapse 1.84 and newer, these are not needed if `main` is defined on the [shared configuration](#shared-configuration) `instance_map`
 
 For example:
 
@@ -417,11 +428,14 @@ effects of bursts of events from that bridge on events sent by normal users.
 Additionally, the writing of specific streams (such as events) can be moved off
 of the main process to a particular worker.
 
-To enable this, the worker must have a
-[HTTP `replication` listener](usage/configuration/config_documentation.md#listeners) configured,
-have a [`worker_name`](usage/configuration/config_documentation.md#worker_name)
+To enable this, the worker must have:
+* An [HTTP `replication` listener](usage/configuration/config_documentation.md#listeners) configured,
+* Have a [`worker_name`](usage/configuration/config_documentation.md#worker_name)
 and be listed in the [`instance_map`](usage/configuration/config_documentation.md#instance_map)
-config. The same worker can handle multiple streams, but unless otherwise documented,
+config. 
+* Have the main process declared on the [`instance_map`](usage/configuration/config_documentation.md#instance_map) as well.
+
+Note: The same worker can handle multiple streams, but unless otherwise documented,
 each stream can only have a single writer.
 
 For example, to move event persistence off to a dedicated worker, the shared
@@ -429,6 +443,9 @@ configuration would include:
 
 ```yaml
 instance_map:
+    main:
+        host: localhost
+        port: 8030
     event_persister1:
         host: localhost
         port: 8034
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 95b4047f1d..d2311cc857 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -39,6 +39,19 @@ The '%s' configuration option is deprecated and will be removed in a future
 Synapse version. Please use ``%s: name_of_worker`` instead.
 """
 
+_MISSING_MAIN_PROCESS_INSTANCE_MAP_DATA = """
+Missing data for a worker to connect to main process. Please include '%s' in the
+`instance_map` declared in your shared yaml configuration, or optionally(as a deprecated
+solution) in every worker's yaml as various `worker_replication_*` settings as defined
+in workers documentation here:
+`https://matrix-org.github.io/synapse/latest/workers.html#worker-configuration`
+"""
+# This allows for a handy knob when it's time to change from 'master' to
+# something with less 'history'
+MAIN_PROCESS_INSTANCE_NAME = "master"
+# Use this to adjust what the main process is known as in the yaml instance_map
+MAIN_PROCESS_INSTANCE_MAP_NAME = "main"
+
 logger = logging.getLogger(__name__)
 
 
@@ -161,27 +174,15 @@ class WorkerConfig(Config):
             raise ConfigError("worker_log_config must be a string")
         self.worker_log_config = worker_log_config
 
-        # The host used to connect to the main synapse
-        self.worker_replication_host = config.get("worker_replication_host", None)
-
         # The port on the main synapse for TCP replication
         if "worker_replication_port" in config:
             raise ConfigError(DIRECT_TCP_ERROR, ("worker_replication_port",))
 
-        # The port on the main synapse for HTTP replication endpoint
-        self.worker_replication_http_port = config.get("worker_replication_http_port")
-
-        # The tls mode on the main synapse for HTTP replication endpoint.
-        # For backward compatibility this defaults to False.
-        self.worker_replication_http_tls = config.get(
-            "worker_replication_http_tls", False
-        )
-
         # The shared secret used for authentication when connecting to the main synapse.
         self.worker_replication_secret = config.get("worker_replication_secret", None)
 
         self.worker_name = config.get("worker_name", self.worker_app)
-        self.instance_name = self.worker_name or "master"
+        self.instance_name = self.worker_name or MAIN_PROCESS_INSTANCE_NAME
 
         # FIXME: Remove this check after a suitable amount of time.
         self.worker_main_http_uri = config.get("worker_main_http_uri", None)
@@ -215,12 +216,55 @@ class WorkerConfig(Config):
         )
 
         # A map from instance name to host/port of their HTTP replication endpoint.
+        # Check if the main process is declared. Inject it into the map if it's not,
+        # based first on if a 'main' block is declared then on 'worker_replication_*'
+        # data. If both are available, default to instance_map. The main process
+        # itself doesn't need this data as it would never have to talk to itself.
+        instance_map: Dict[str, Any] = config.get("instance_map", {})
+
+        if instance_map and self.instance_name is not MAIN_PROCESS_INSTANCE_NAME:
+            # The host used to connect to the main synapse
+            main_host = config.get("worker_replication_host", None)
+
+            # The port on the main synapse for HTTP replication endpoint
+            main_port = config.get("worker_replication_http_port")
+
+            # The tls mode on the main synapse for HTTP replication endpoint.
+            # For backward compatibility this defaults to False.
+            main_tls = config.get("worker_replication_http_tls", False)
+
+            # For now, accept 'main' in the instance_map, but the replication system
+            # expects 'master', force that into being until it's changed later.
+            if MAIN_PROCESS_INSTANCE_MAP_NAME in instance_map:
+                instance_map[MAIN_PROCESS_INSTANCE_NAME] = instance_map[
+                    MAIN_PROCESS_INSTANCE_MAP_NAME
+                ]
+                del instance_map[MAIN_PROCESS_INSTANCE_MAP_NAME]
+
+            # This is the backwards compatibility bit that handles the
+            # worker_replication_* bits using setdefault() to not overwrite anything.
+            elif main_host is not None and main_port is not None:
+                instance_map.setdefault(
+                    MAIN_PROCESS_INSTANCE_NAME,
+                    {
+                        "host": main_host,
+                        "port": main_port,
+                        "tls": main_tls,
+                    },
+                )
+
+            else:
+                # If we've gotten here, it means that the main process is not on the
+                # instance_map and that not enough worker_replication_* variables
+                # were declared in the worker's yaml.
+                raise ConfigError(
+                    _MISSING_MAIN_PROCESS_INSTANCE_MAP_DATA
+                    % MAIN_PROCESS_INSTANCE_MAP_NAME
+                )
+
         self.instance_map: Dict[
             str, InstanceLocationConfig
-        ] = parse_and_validate_mapping(
-            config.get("instance_map", {}),
-            InstanceLocationConfig,
-        )
+        ] = parse_and_validate_mapping(instance_map, InstanceLocationConfig)
 
         # Map from type of streams to source, c.f. WriterLocations.
         writers = config.get("stream_writers") or {}
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index 23129962e9..dc7820f963 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -25,6 +25,7 @@ from twisted.internet.error import ConnectError, DNSLookupError
 from twisted.web.server import Request
 
 from synapse.api.errors import HttpResponseException, SynapseError
+from synapse.config.workers import MAIN_PROCESS_INSTANCE_NAME
 from synapse.http import RequestTimedOutError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import parse_json_object_from_request
@@ -197,11 +198,6 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
         client = hs.get_replication_client()
         local_instance_name = hs.get_instance_name()
 
-        # The value of these option should match the replication listener settings
-        master_host = hs.config.worker.worker_replication_host
-        master_port = hs.config.worker.worker_replication_http_port
-        master_tls = hs.config.worker.worker_replication_http_tls
-
         instance_map = hs.config.worker.instance_map
 
         outgoing_gauge = _pending_outgoing_requests.labels(cls.NAME)
@@ -213,7 +209,9 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
             )
 
         @trace_with_opname("outgoing_replication_request")
-        async def send_request(*, instance_name: str = "master", **kwargs: Any) -> Any:
+        async def send_request(
+            *, instance_name: str = MAIN_PROCESS_INSTANCE_NAME, **kwargs: Any
+        ) -> Any:
             # We have to pull these out here to avoid circular dependencies...
             streams = hs.get_replication_command_handler().get_streams_to_replicate()
             replication = hs.get_replication_data_handler()
@@ -221,11 +219,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
             with outgoing_gauge.track_inprogress():
                 if instance_name == local_instance_name:
                     raise Exception("Trying to send HTTP request to self")
-                if instance_name == "master":
-                    host = master_host
-                    port = master_port
-                    tls = master_tls
-                elif instance_name in instance_map:
+                if instance_name in instance_map:
                     host = instance_map[instance_name].host
                     port = instance_map[instance_name].port
                     tls = instance_map[instance_name].tls
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index 758b4bc38b..bff7114cd8 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -837,6 +837,7 @@ class ModuleApiWorkerTestCase(BaseModuleApiTestCase, BaseMultiWorkerStreamTestCa
         conf = super().default_config()
         conf["stream_writers"] = {"presence": ["presence_writer"]}
         conf["instance_map"] = {
+            "main": {"host": "testserv", "port": 8765},
             "presence_writer": {"host": "testserv", "port": 1001},
         }
         return conf
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index 0f1a8a145f..eb9b1f1cd9 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -110,8 +110,7 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
     def _get_worker_hs_config(self) -> dict:
         config = self.default_config()
         config["worker_app"] = "synapse.app.generic_worker"
-        config["worker_replication_host"] = "testserv"
-        config["worker_replication_http_port"] = "8765"
+        config["instance_map"] = {"main": {"host": "testserv", "port": 8765}}
         return config
 
     def _build_replication_data_handler(self) -> "TestReplicationDataHandler":
@@ -249,6 +248,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
         """
         base = super().default_config()
         base["redis"] = {"enabled": True}
+        base["instance_map"] = {"main": {"host": "testserv", "port": 8765}}
         return base
 
     def setUp(self) -> None:
@@ -310,7 +310,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
     def make_worker_hs(
         self, worker_app: str, extra_config: Optional[dict] = None, **kwargs: Any
     ) -> HomeServer:
-        """Make a new worker HS instance, correctly connecting replcation
+        """Make a new worker HS instance, correctly connecting replication
         stream to the master HS.
 
         Args:
@@ -388,8 +388,6 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
 
     def _get_worker_hs_config(self) -> dict:
         config = self.default_config()
-        config["worker_replication_host"] = "testserv"
-        config["worker_replication_http_port"] = "8765"
         return config
 
     def replicate(self) -> None:
diff --git a/tests/replication/test_auth.py b/tests/replication/test_auth.py
index 98602371e4..f7bca0063d 100644
--- a/tests/replication/test_auth.py
+++ b/tests/replication/test_auth.py
@@ -43,9 +43,6 @@ class WorkerAuthenticationTestCase(BaseMultiWorkerStreamTestCase):
     def _get_worker_hs_config(self) -> dict:
         config = self.default_config()
         config["worker_app"] = "synapse.app.generic_worker"
-        config["worker_replication_host"] = "testserv"
-        config["worker_replication_http_port"] = "8765"
-
         return config
 
     def _test_register(self) -> FakeChannel:
diff --git a/tests/replication/test_client_reader_shard.py b/tests/replication/test_client_reader_shard.py
index eca5033761..a18859099f 100644
--- a/tests/replication/test_client_reader_shard.py
+++ b/tests/replication/test_client_reader_shard.py
@@ -29,8 +29,6 @@ class ClientReaderTestCase(BaseMultiWorkerStreamTestCase):
     def _get_worker_hs_config(self) -> dict:
         config = self.default_config()
         config["worker_app"] = "synapse.app.generic_worker"
-        config["worker_replication_host"] = "testserv"
-        config["worker_replication_http_port"] = "8765"
         return config
 
     def test_register_single_worker(self) -> None:
diff --git a/tests/replication/test_sharded_event_persister.py b/tests/replication/test_sharded_event_persister.py
index 7f9cc67e73..4623d737fb 100644
--- a/tests/replication/test_sharded_event_persister.py
+++ b/tests/replication/test_sharded_event_persister.py
@@ -50,6 +50,7 @@ class EventPersisterShardTestCase(BaseMultiWorkerStreamTestCase):
         conf = super().default_config()
         conf["stream_writers"] = {"events": ["worker1", "worker2"]}
         conf["instance_map"] = {
+            "main": {"host": "testserv", "port": 8765},
             "worker1": {"host": "testserv", "port": 1001},
             "worker2": {"host": "testserv", "port": 1002},
         }
-- 
cgit 1.5.1


From 2611433b70fc30c436f6b9b950a3bcc533b3df5b Mon Sep 17 00:00:00 2001
From: Roel ter Maat <roel.termaat@nedap.com>
Date: Thu, 11 May 2023 14:02:51 +0200
Subject: Add redis SSL configuration options (#15312)

* Add SSL options to redis config

* fix lint issues

* Add documentation and changelog file

* add missing . at the end of the changelog

* Move client context factory to new file

* Rename ssl to tls and fix typo

* fix lint issues

* Added when redis attributes were added
---
 changelog.d/15312.feature                        |  1 +
 contrib/docker_compose_workers/README.md         |  4 +++
 docs/usage/configuration/config_documentation.md | 11 ++++++++
 synapse/config/redis.py                          |  6 +++++
 synapse/replication/tcp/context.py               | 34 ++++++++++++++++++++++++
 synapse/replication/tcp/handler.py               | 29 +++++++++++++++-----
 synapse/replication/tcp/redis.py                 | 27 ++++++++++++++-----
 7 files changed, 98 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/15312.feature
 create mode 100644 synapse/replication/tcp/context.py

(limited to 'synapse')

diff --git a/changelog.d/15312.feature b/changelog.d/15312.feature
new file mode 100644
index 0000000000..e4e972cfeb
--- /dev/null
+++ b/changelog.d/15312.feature
@@ -0,0 +1 @@
+Add redis TLS configuration options.
\ No newline at end of file
diff --git a/contrib/docker_compose_workers/README.md b/contrib/docker_compose_workers/README.md
index d3cdfe5614..ebb225fba6 100644
--- a/contrib/docker_compose_workers/README.md
+++ b/contrib/docker_compose_workers/README.md
@@ -70,6 +70,10 @@ redis:
   port: 6379
   # dbid:  <redis_logical_db_id>
   # password: <secret_password>  
+  # use_tls: True
+  # certificate_file: <path_to_certificate>
+  # private_key_file: <path_to_private_key>
+  # ca_file: <path_to_ca_certificate>
 ```
 
 This assumes that your Redis service is called `redis` in your Docker Compose file.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index dc965b4119..93b132b6e4 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3981,9 +3981,16 @@ This setting has the following sub-options:
    localhost and 6379
 * `password`: Optional password if configured on the Redis instance.
 * `dbid`: Optional redis dbid if needs to connect to specific redis logical db.
+* `use_tls`: Whether to use tls connection. Defaults to false.
+* `certificate_file`: Optional path to the certificate file
+* `private_key_file`: Optional path to the private key file
+* `ca_file`: Optional path to the CA certificate file. Use this one or:
+* `ca_path`: Optional path to the folder containing the CA certificate file
 
   _Added in Synapse 1.78.0._
 
+  _Changed in Synapse 1.84.0: Added use\_tls, certificate\_file, private\_key\_file, ca\_file and ca\_path attributes_
+
 Example configuration:
 ```yaml
 redis:
@@ -3992,6 +3999,10 @@ redis:
   port: 6379
   password: <secret_password>
   dbid: <dbid>
+  #use_tls: True
+  #certificate_file: <path_to_the_certificate_file>
+  #private_key_file: <path_to_the_private_key_file>
+  #ca_file: <path_to_the_ca_certificate_file>
 ```
 ---
 ## Individual worker configuration
diff --git a/synapse/config/redis.py b/synapse/config/redis.py
index e6a75be434..636cb450b8 100644
--- a/synapse/config/redis.py
+++ b/synapse/config/redis.py
@@ -35,3 +35,9 @@ class RedisConfig(Config):
         self.redis_port = redis_config.get("port", 6379)
         self.redis_dbid = redis_config.get("dbid", None)
         self.redis_password = redis_config.get("password")
+
+        self.redis_use_tls = redis_config.get("use_tls", False)
+        self.redis_certificate = redis_config.get("certificate_file", None)
+        self.redis_private_key = redis_config.get("private_key_file", None)
+        self.redis_ca_file = redis_config.get("ca_file", None)
+        self.redis_ca_path = redis_config.get("ca_path", None)
diff --git a/synapse/replication/tcp/context.py b/synapse/replication/tcp/context.py
new file mode 100644
index 0000000000..4688b2200b
--- /dev/null
+++ b/synapse/replication/tcp/context.py
@@ -0,0 +1,34 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from OpenSSL.SSL import Context
+from twisted.internet import ssl
+
+from synapse.config.redis import RedisConfig
+
+
+class ClientContextFactory(ssl.ClientContextFactory):
+    def __init__(self, redis_config: RedisConfig):
+        self.redis_config = redis_config
+
+    def getContext(self) -> Context:
+        ctx = super().getContext()
+        if self.redis_config.redis_certificate:
+            ctx.use_certificate_file(self.redis_config.redis_certificate)
+        if self.redis_config.redis_private_key:
+            ctx.use_privatekey_file(self.redis_config.redis_private_key)
+        if self.redis_config.redis_ca_file:
+            ctx.load_verify_locations(cafile=self.redis_config.redis_ca_file)
+        elif self.redis_config.redis_ca_path:
+            ctx.load_verify_locations(capath=self.redis_config.redis_ca_path)
+        return ctx
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index 2290b3e6fe..233ad61d49 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -46,6 +46,7 @@ from synapse.replication.tcp.commands import (
     UserIpCommand,
     UserSyncCommand,
 )
+from synapse.replication.tcp.context import ClientContextFactory
 from synapse.replication.tcp.protocol import IReplicationConnection
 from synapse.replication.tcp.streams import (
     STREAMS_MAP,
@@ -348,13 +349,27 @@ class ReplicationCommandHandler:
             outbound_redis_connection,
             channel_names=self._channels_to_subscribe_to,
         )
-        hs.get_reactor().connectTCP(
-            hs.config.redis.redis_host,
-            hs.config.redis.redis_port,
-            self._factory,
-            timeout=30,
-            bindAddress=None,
-        )
+
+        reactor = hs.get_reactor()
+        redis_config = hs.config.redis
+        if hs.config.redis.redis_use_tls:
+            ssl_context_factory = ClientContextFactory(hs.config.redis)
+            reactor.connectSSL(
+                redis_config.redis_host,
+                redis_config.redis_port,
+                self._factory,
+                ssl_context_factory,
+                timeout=30,
+                bindAddress=None,
+            )
+        else:
+            reactor.connectTCP(
+                redis_config.redis_host,
+                redis_config.redis_port,
+                self._factory,
+                timeout=30,
+                bindAddress=None,
+            )
 
     def get_streams(self) -> Dict[str, Stream]:
         """Get a map from stream name to all streams."""
diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py
index dfc061eb5e..c8f4bf8b27 100644
--- a/synapse/replication/tcp/redis.py
+++ b/synapse/replication/tcp/redis.py
@@ -35,6 +35,7 @@ from synapse.replication.tcp.commands import (
     ReplicateCommand,
     parse_command_from_line,
 )
+from synapse.replication.tcp.context import ClientContextFactory
 from synapse.replication.tcp.protocol import (
     IReplicationConnection,
     tcp_inbound_commands_counter,
@@ -386,12 +387,24 @@ def lazyConnection(
     factory.continueTrying = reconnect
 
     reactor = hs.get_reactor()
-    reactor.connectTCP(
-        host,
-        port,
-        factory,
-        timeout=30,
-        bindAddress=None,
-    )
+
+    if hs.config.redis.redis_use_tls:
+        ssl_context_factory = ClientContextFactory(hs.config.redis)
+        reactor.connectSSL(
+            host,
+            port,
+            factory,
+            ssl_context_factory,
+            timeout=30,
+            bindAddress=None,
+        )
+    else:
+        reactor.connectTCP(
+            host,
+            port,
+            factory,
+            timeout=30,
+            bindAddress=None,
+        )
 
     return factory.handler
-- 
cgit 1.5.1


From d19d1edbcf78a58da3483ecf51f107fedb1f3fd0 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 11 May 2023 11:50:46 -0500
Subject: Print full startup/initialization error (#15569)

I found the error in the **Before** really vague and obtuse and didn't realize port `5432` corresponded to the Postgres port until searching the codebase. It says to check the logs but that wasn't my first instinct. It's just more obvious if we just print the full thing which gives context of the error type and the traceback to the relevant area of code.

#### Before

```
$ poetry run python -m synapse.app.homeserver -c homeserver.yaml
**********************************************************************************
 Error during initialisation:
    connection to server at "localhost" (::1), port 5432 failed: Connection refused
 	Is the server running on that host and accepting TCP/IP connections?
 connection to server at "localhost" (127.0.0.1), port 5432 failed: Connection refused
 	Is the server running on that host and accepting TCP/IP connections?

 There may be more information in the logs.
**********************************************************************************
```

#### After

```sh
$ poetry run python -m synapse.app.homeserver -c homeserver.yaml
**********************************************************************************
 Error during initialisation:
     Traceback (most recent call last):
       File "/home/eric/Documents/github/element/synapse/synapse/app/homeserver.py", line 352, in setup
         hs.setup()
       File "/home/eric/Documents/github/element/synapse/synapse/server.py", line 337, in setup
         self.datastores = Databases(self.DATASTORE_CLASS, self)
       File "/home/eric/Documents/github/element/synapse/synapse/storage/databases/__init__.py", line 65, in __init__
         with make_conn(database_config, engine, "startup") as db_conn:
       File "/home/eric/Documents/github/element/synapse/synapse/storage/database.py", line 161, in make_conn
         native_db_conn = engine.module.connect(**db_params)
       File "/home/eric/.cache/pypoetry/virtualenvs/matrix-synapse-xCtC9ulO-py3.10/lib/python3.10/site-packages/psycopg2/__init__.py", line 122, in connect
         conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
     psycopg2.OperationalError: connection to server at "localhost" (::1), port 5432 failed: Connection refused
     	Is the server running on that host and accepting TCP/IP connections?
     connection to server at "localhost" (127.0.0.1), port 5432 failed: Connection refused
     	Is the server running on that host and accepting TCP/IP connections?


 There may be more information in the logs.
**********************************************************************************
```
---
 changelog.d/15569.feature | 1 +
 synapse/app/_base.py      | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15569.feature

(limited to 'synapse')

diff --git a/changelog.d/15569.feature b/changelog.d/15569.feature
new file mode 100644
index 0000000000..b58af8ad55
--- /dev/null
+++ b/changelog.d/15569.feature
@@ -0,0 +1 @@
+Print full error and stack-trace of any exception that occurs during startup/initialization.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 7f83b34d89..4dfcf484fa 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -21,6 +21,7 @@ import socket
 import sys
 import traceback
 import warnings
+from textwrap import indent
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -212,8 +213,12 @@ def handle_startup_exception(e: Exception) -> NoReturn:
     # Exceptions that occur between setting up the logging and forking or starting
     # the reactor are written to the logs, followed by a summary to stderr.
     logger.exception("Exception during startup")
+
+    error_string = "".join(traceback.format_exception(e))
+    indented_error_string = indent(error_string, "    ")
+
     quit_with_error(
-        f"Error during initialisation:\n   {e}\nThere may be more information in the logs."
+        f"Error during initialisation:\n{indented_error_string}\nThere may be more information in the logs."
     )
 
 
-- 
cgit 1.5.1


From 808105bd31ff400b222001755a60c77c89dc9f6c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 12 May 2023 11:38:16 +0100
Subject: Revert "Set thread_id column to non-null for
 event_push_{actions,actions_staging,summary} (#15437)" (#15580)

This reverts commit a7b3e9ce65335e452de216cb42b9e724e8f3ad1d.
---
 changelog.d/15437.misc                             |   1 -
 synapse/storage/background_updates.py              |  44 ----
 .../storage/databases/main/event_push_actions.py   | 244 ++++++++++++++++++++-
 synapse/storage/schema/__init__.py                 |   3 -
 .../delta/76/04thread_notifications_backfill.sql   |  28 ---
 .../05thread_notifications_not_null.sql.postgres   |  37 ----
 .../76/05thread_notifications_not_null.sql.sqlite  | 102 ---------
 7 files changed, 234 insertions(+), 225 deletions(-)
 delete mode 100644 changelog.d/15437.misc
 delete mode 100644 synapse/storage/schema/main/delta/76/04thread_notifications_backfill.sql
 delete mode 100644 synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.postgres
 delete mode 100644 synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/15437.misc b/changelog.d/15437.misc
deleted file mode 100644
index 2dea23784f..0000000000
--- a/changelog.d/15437.misc
+++ /dev/null
@@ -1 +0,0 @@
-Make the `thread_id` column on `event_push_actions`, `event_push_actions_staging`, and `event_push_summary` non-null.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index ca085ef800..a99aea8926 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -561,50 +561,6 @@ class BackgroundUpdater:
             updater, oneshot=True
         )
 
-    def register_background_validate_constraint(
-        self, update_name: str, constraint_name: str, table: str
-    ) -> None:
-        """Helper for store classes to do a background validate constraint.
-
-        This only applies on PostgreSQL.
-
-        To use:
-
-        1. use a schema delta file to add a background update. Example:
-            INSERT INTO background_updates (update_name, progress_json) VALUES
-                ('validate_my_constraint', '{}');
-
-        2. In the Store constructor, call this method
-
-        Args:
-            update_name: update_name to register for
-            constraint_name: name of constraint to validate
-            table: table the constraint is applied to
-        """
-
-        def runner(conn: Connection) -> None:
-            c = conn.cursor()
-
-            sql = f"""
-            ALTER TABLE {table} VALIDATE CONSTRAINT {constraint_name};
-            """
-            logger.debug("[SQL] %s", sql)
-            c.execute(sql)
-
-        async def updater(progress: JsonDict, batch_size: int) -> int:
-            assert isinstance(
-                self.db_pool.engine, engines.PostgresEngine
-            ), "validate constraint background update registered for non-Postres database"
-
-            logger.info("Validating constraint %s to %s", constraint_name, table)
-            await self.db_pool.runWithConnection(runner)
-            await self._end_background_update(update_name)
-            return 1
-
-        self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
-            updater, oneshot=True
-        )
-
     async def create_index_in_background(
         self,
         index_name: str,
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 2e98a29fef..6fdb1e292e 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -100,6 +100,7 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
+from synapse.types import JsonDict
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 
@@ -288,22 +289,180 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             unique=True,
         )
 
-        self.db_pool.updates.register_background_validate_constraint(
-            "event_push_actions_staging_thread_id",
-            constraint_name="event_push_actions_staging_thread_id",
-            table="event_push_actions_staging",
+        self.db_pool.updates.register_background_update_handler(
+            "event_push_backfill_thread_id",
+            self._background_backfill_thread_id,
         )
-        self.db_pool.updates.register_background_validate_constraint(
-            "event_push_actions_thread_id",
-            constraint_name="event_push_actions_thread_id",
+
+        # Indexes which will be used to quickly make the thread_id column non-null.
+        self.db_pool.updates.register_background_index_update(
+            "event_push_actions_thread_id_null",
+            index_name="event_push_actions_thread_id_null",
             table="event_push_actions",
+            columns=["thread_id"],
+            where_clause="thread_id IS NULL",
         )
-        self.db_pool.updates.register_background_validate_constraint(
-            "event_push_summary_thread_id",
-            constraint_name="event_push_summary_thread_id",
+        self.db_pool.updates.register_background_index_update(
+            "event_push_summary_thread_id_null",
+            index_name="event_push_summary_thread_id_null",
             table="event_push_summary",
+            columns=["thread_id"],
+            where_clause="thread_id IS NULL",
         )
 
+        # Check ASAP (and then later, every 1s) to see if we have finished
+        # background updates the event_push_actions and event_push_summary tables.
+        self._clock.call_later(0.0, self._check_event_push_backfill_thread_id)
+        self._event_push_backfill_thread_id_done = False
+
+    @wrap_as_background_process("check_event_push_backfill_thread_id")
+    async def _check_event_push_backfill_thread_id(self) -> None:
+        """
+        Has thread_id finished backfilling?
+
+        If not, we need to just-in-time update it so the queries work.
+        """
+        done = await self.db_pool.updates.has_completed_background_update(
+            "event_push_backfill_thread_id"
+        )
+
+        if done:
+            self._event_push_backfill_thread_id_done = True
+        else:
+            # Reschedule to run.
+            self._clock.call_later(15.0, self._check_event_push_backfill_thread_id)
+
+    async def _background_backfill_thread_id(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """
+        Fill in the thread_id field for event_push_actions and event_push_summary.
+
+        This is preparatory so that it can be made non-nullable in the future.
+
+        Because all current (null) data is done in an unthreaded manner this
+        simply assumes it is on the "main" timeline. Since event_push_actions
+        are periodically cleared it is not possible to correctly re-calculate
+        the thread_id.
+        """
+        event_push_actions_done = progress.get("event_push_actions_done", False)
+
+        def add_thread_id_txn(
+            txn: LoggingTransaction, start_stream_ordering: int
+        ) -> int:
+            sql = """
+            SELECT stream_ordering
+            FROM event_push_actions
+            WHERE
+                thread_id IS NULL
+                AND stream_ordering > ?
+            ORDER BY stream_ordering
+            LIMIT ?
+            """
+            txn.execute(sql, (start_stream_ordering, batch_size))
+
+            # No more rows to process.
+            rows = txn.fetchall()
+            if not rows:
+                progress["event_push_actions_done"] = True
+                self.db_pool.updates._background_update_progress_txn(
+                    txn, "event_push_backfill_thread_id", progress
+                )
+                return 0
+
+            # Update the thread ID for any of those rows.
+            max_stream_ordering = rows[-1][0]
+
+            sql = """
+            UPDATE event_push_actions
+            SET thread_id = 'main'
+            WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
+            """
+            txn.execute(
+                sql,
+                (
+                    start_stream_ordering,
+                    max_stream_ordering,
+                ),
+            )
+
+            # Update progress.
+            processed_rows = txn.rowcount
+            progress["max_event_push_actions_stream_ordering"] = max_stream_ordering
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "event_push_backfill_thread_id", progress
+            )
+
+            return processed_rows
+
+        def add_thread_id_summary_txn(txn: LoggingTransaction) -> int:
+            min_user_id = progress.get("max_summary_user_id", "")
+            min_room_id = progress.get("max_summary_room_id", "")
+
+            # Slightly overcomplicated query for getting the Nth user ID / room
+            # ID tuple, or the last if there are less than N remaining.
+            sql = """
+            SELECT user_id, room_id FROM (
+                SELECT user_id, room_id FROM event_push_summary
+                WHERE (user_id, room_id) > (?, ?)
+                    AND thread_id IS NULL
+                ORDER BY user_id, room_id
+                LIMIT ?
+            ) AS e
+            ORDER BY user_id DESC, room_id DESC
+            LIMIT 1
+            """
+
+            txn.execute(sql, (min_user_id, min_room_id, batch_size))
+            row = txn.fetchone()
+            if not row:
+                return 0
+
+            max_user_id, max_room_id = row
+
+            sql = """
+            UPDATE event_push_summary
+            SET thread_id = 'main'
+            WHERE
+                (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?)
+                AND thread_id IS NULL
+            """
+            txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id))
+            processed_rows = txn.rowcount
+
+            progress["max_summary_user_id"] = max_user_id
+            progress["max_summary_room_id"] = max_room_id
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "event_push_backfill_thread_id", progress
+            )
+
+            return processed_rows
+
+        # First update the event_push_actions table, then the event_push_summary table.
+        #
+        # Note that the event_push_actions_staging table is ignored since it is
+        # assumed that items in that table will only exist for a short period of
+        # time.
+        if not event_push_actions_done:
+            result = await self.db_pool.runInteraction(
+                "event_push_backfill_thread_id",
+                add_thread_id_txn,
+                progress.get("max_event_push_actions_stream_ordering", 0),
+            )
+        else:
+            result = await self.db_pool.runInteraction(
+                "event_push_backfill_thread_id",
+                add_thread_id_summary_txn,
+            )
+
+            # Only done after the event_push_summary table is done.
+            if not result:
+                await self.db_pool.updates._end_background_update(
+                    "event_push_backfill_thread_id"
+                )
+
+        return result
+
     async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, int]:
         """Get the notification count by room for a user. Only considers notifications,
         not highlight or unread counts, and threads are currently aggregated under their room.
@@ -552,6 +711,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
 
+        # First ensure that the existing rows have an updated thread_id field.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute(
+                """
+                UPDATE event_push_summary
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                (MAIN_TIMELINE, room_id, user_id),
+            )
+            txn.execute(
+                """
+                UPDATE event_push_actions
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                (MAIN_TIMELINE, room_id, user_id),
+            )
+
         # First we pull the counts from the summary table.
         #
         # We check that `last_receipt_stream_ordering` matches the stream ordering of the
@@ -1367,6 +1545,25 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 (room_id, user_id, stream_ordering, *thread_args),
             )
 
+            # First ensure that the existing rows have an updated thread_id field.
+            if not self._event_push_backfill_thread_id_done:
+                txn.execute(
+                    """
+                    UPDATE event_push_summary
+                    SET thread_id = ?
+                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                    """,
+                    (MAIN_TIMELINE, room_id, user_id),
+                )
+                txn.execute(
+                    """
+                    UPDATE event_push_actions
+                    SET thread_id = ?
+                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                    """,
+                    (MAIN_TIMELINE, room_id, user_id),
+                )
+
             # Fetch the notification counts between the stream ordering of the
             # latest receipt and what was previously summarised.
             unread_counts = self._get_notif_unread_count_for_user_room(
@@ -1501,6 +1698,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             rotate_to_stream_ordering: The new maximum event stream ordering to summarise.
         """
 
+        # Ensure that any new actions have an updated thread_id.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute(
+                """
+                UPDATE event_push_actions
+                SET thread_id = ?
+                WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
+                """,
+                (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering),
+            )
+
+        # XXX Do we need to update summaries here too?
+
         # Calculate the new counts that should be upserted into event_push_summary
         sql = """
             SELECT user_id, room_id, thread_id,
@@ -1563,6 +1773,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
+        # Ensure that any updated threads have the proper thread_id.
+        if not self._event_push_backfill_thread_id_done:
+            txn.execute_batch(
+                """
+                UPDATE event_push_summary
+                SET thread_id = ?
+                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
+                """,
+                [
+                    (MAIN_TIMELINE, room_id, user_id)
+                    for user_id, room_id, _ in summaries
+                ],
+            )
+
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 741563abc6..1672976209 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -106,9 +106,6 @@ Changes in SCHEMA_VERSION = 76:
 SCHEMA_COMPAT_VERSION = (
     # Queries against `event_stream_ordering` columns in membership tables must
     # be disambiguated.
-    #
-    # The threads_id column must written to with non-null values for the
-    # event_push_actions, event_push_actions_staging, and event_push_summary tables.
     74
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
diff --git a/synapse/storage/schema/main/delta/76/04thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/76/04thread_notifications_backfill.sql
deleted file mode 100644
index ce6f9ff937..0000000000
--- a/synapse/storage/schema/main/delta/76/04thread_notifications_backfill.sql
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Force the background updates from 06thread_notifications.sql to run in the
--- foreground as code will now require those to be "done".
-
-DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id';
-
--- Overwrite any null thread_id values.
-UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL;
-UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
-UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
-
--- Drop the background updates to calculate the indexes used to find null thread_ids.
-DELETE FROM background_updates WHERE update_name = 'event_push_actions_thread_id_null';
-DELETE FROM background_updates WHERE update_name = 'event_push_summary_thread_id_null';
diff --git a/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.postgres
deleted file mode 100644
index 40936def6f..0000000000
--- a/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.postgres
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright 2022 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- The thread_id columns can now be made non-nullable, this is done by using a
--- constraint (and not altering the column) to avoid taking out a full table lock.
---
--- We initially add an invalid constraint which guards against new data (this
--- doesn't lock the table).
-ALTER TABLE event_push_actions_staging
-    ADD CONSTRAINT event_push_actions_staging_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
-ALTER TABLE event_push_actions
-    ADD CONSTRAINT event_push_actions_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
-ALTER TABLE event_push_summary
-    ADD CONSTRAINT event_push_summary_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
-
--- We then validate the constraint which doesn't need to worry about new data. It
--- only needs a SHARE UPDATE EXCLUSIVE lock but can still take a while to complete.
-INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
-  (7605, 'event_push_actions_staging_thread_id', '{}'),
-  (7605, 'event_push_actions_thread_id', '{}'),
-  (7605, 'event_push_summary_thread_id', '{}');
-
--- Drop the indexes used to find null thread_ids.
-DROP INDEX IF EXISTS event_push_actions_thread_id_null;
-DROP INDEX IF EXISTS event_push_summary_thread_id_null;
diff --git a/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.sqlite
deleted file mode 100644
index e9372b6cf9..0000000000
--- a/synapse/storage/schema/main/delta/76/05thread_notifications_not_null.sql.sqlite
+++ /dev/null
@@ -1,102 +0,0 @@
-/* Copyright 2022 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- -- The thread_id columns can now be made non-nullable.
---
--- SQLite doesn't support modifying columns to an existing table, so it must
--- be recreated.
-
--- Create the new tables.
-CREATE TABLE event_push_actions_staging_new (
-    event_id TEXT NOT NULL,
-    user_id TEXT NOT NULL,
-    actions TEXT NOT NULL,
-    notif SMALLINT NOT NULL,
-    highlight SMALLINT NOT NULL,
-    unread SMALLINT,
-    thread_id TEXT,
-    inserted_ts BIGINT,
-    CONSTRAINT event_push_actions_staging_thread_id CHECK (thread_id is NOT NULL)
-);
-
-CREATE TABLE event_push_actions_new (
-    room_id TEXT NOT NULL,
-    event_id TEXT NOT NULL,
-    user_id TEXT NOT NULL,
-    profile_tag VARCHAR(32),
-    actions TEXT NOT NULL,
-    topological_ordering BIGINT,
-    stream_ordering BIGINT,
-    notif SMALLINT,
-    highlight SMALLINT,
-    unread SMALLINT,
-    thread_id TEXT,
-    CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag),
-    CONSTRAINT event_push_actions_thread_id CHECK (thread_id is NOT NULL)
-);
-
-CREATE TABLE event_push_summary_new (
-    user_id TEXT NOT NULL,
-    room_id TEXT NOT NULL,
-    notif_count BIGINT NOT NULL,
-    stream_ordering BIGINT NOT NULL,
-    unread_count BIGINT,
-    last_receipt_stream_ordering BIGINT,
-    thread_id TEXT,
-    CONSTRAINT event_push_summary_thread_id CHECK (thread_id is NOT NULL)
-);
-
--- Copy the data.
-INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts)
-    SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts
-    FROM event_push_actions_staging;
-
-INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id)
-    SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id
-    FROM event_push_actions;
-
-INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id)
-    SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id
-    FROM event_push_summary;
-
--- Drop the old tables.
-DROP TABLE event_push_actions_staging;
-DROP TABLE event_push_actions;
-DROP TABLE event_push_summary;
-
--- Rename the tables.
-ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging;
-ALTER TABLE event_push_actions_new RENAME TO event_push_actions;
-ALTER TABLE event_push_summary_new RENAME TO event_push_summary;
-
--- Recreate the indexes.
-CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id);
-
-CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering);
-CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering );
-CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
-CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id );
-CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering);
-
-CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary (user_id, room_id, thread_id) ;
-
--- Recreate some indexes in the background, by re-running the background updates
--- from 72/02event_push_actions_index.sql and 72/06thread_notifications.sql.
-INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
-  (7403, 'event_push_summary_unique_index2', '{}')
-  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
-INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
-  (7403, 'event_push_actions_stream_highlight_index', '{}')
-  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
-- 
cgit 1.5.1


From def480442d752f1951cf7f790be873489a09c432 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 12 May 2023 07:31:50 -0400
Subject: Declare support for Matrix 1.6 (#15559)

Adds logging for key server requests which include a key ID.
This is technically in violation of the 1.6 spec, but is the only
way to remain backwards compatibly with earlier versions of
Synapse (and possibly other homeservers) which *did* include
the key ID.
---
 changelog.d/15559.feature                  |  1 +
 synapse/rest/client/versions.py            |  1 +
 synapse/rest/key/v2/local_key_resource.py  | 11 +++++++++++
 synapse/rest/key/v2/remote_key_resource.py | 11 ++++++++++-
 4 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15559.feature

(limited to 'synapse')

diff --git a/changelog.d/15559.feature b/changelog.d/15559.feature
new file mode 100644
index 0000000000..07f729e38c
--- /dev/null
+++ b/changelog.d/15559.feature
@@ -0,0 +1 @@
+Advertise support for Matrix 1.6 on `/_matrix/client/versions`.
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 2d2be6ef38..e9b56fc3f8 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -79,6 +79,7 @@ class VersionsRestServlet(RestServlet):
                     "v1.3",
                     "v1.4",
                     "v1.5",
+                    "v1.6",
                 ],
                 # as per MSC1497:
                 "unstable_features": {
diff --git a/synapse/rest/key/v2/local_key_resource.py b/synapse/rest/key/v2/local_key_resource.py
index d03e728d42..22e7bf9d86 100644
--- a/synapse/rest/key/v2/local_key_resource.py
+++ b/synapse/rest/key/v2/local_key_resource.py
@@ -34,6 +34,8 @@ class LocalKey(RestServlet):
     """HTTP resource containing encoding the TLS X.509 certificate and NACL
     signature verification keys for this server::
 
+        GET /_matrix/key/v2/server HTTP/1.1
+
         GET /_matrix/key/v2/server/a.key.id HTTP/1.1
 
         HTTP/1.1 200 OK
@@ -100,6 +102,15 @@ class LocalKey(RestServlet):
     def on_GET(
         self, request: Request, key_id: Optional[str] = None
     ) -> Tuple[int, JsonDict]:
+        # Matrix 1.6 drops support for passing the key_id, this is incompatible
+        # with earlier versions and is allowed in order to support both.
+        # A warning is issued to help determine when it is safe to drop this.
+        if key_id:
+            logger.warning(
+                "Request for local server key with deprecated key ID (logging to determine usage level for future removal): %s",
+                key_id,
+            )
+
         time_now = self.clock.time_msec()
         # Update the expiry time if less than half the interval remains.
         if time_now + self.config.key.key_refresh_interval / 2 > self.valid_until_ts:
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index ff0454ca57..8f3865d412 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -126,6 +126,15 @@ class RemoteKey(RestServlet):
         self, request: Request, server: str, key_id: Optional[str] = None
     ) -> Tuple[int, JsonDict]:
         if server and key_id:
+            # Matrix 1.6 drops support for passing the key_id, this is incompatible
+            # with earlier versions and is allowed in order to support both.
+            # A warning is issued to help determine when it is safe to drop this.
+            logger.warning(
+                "Request for remote server key with deprecated key ID (logging to determine usage level for future removal): %s / %s",
+                server,
+                key_id,
+            )
+
             minimum_valid_until_ts = parse_integer(request, "minimum_valid_until_ts")
             arguments = {}
             if minimum_valid_until_ts is not None:
@@ -161,7 +170,7 @@ class RemoteKey(RestServlet):
 
         time_now_ms = self.clock.time_msec()
 
-        # Map server_name->key_id->int. Note that the value of the init is unused.
+        # Map server_name->key_id->int. Note that the value of the int is unused.
         # XXX: why don't we just use a set?
         cache_misses: Dict[str, Dict[str, int]] = {}
         for (server_name, key_id, _), key_results in cached.items():
-- 
cgit 1.5.1


From 3690d5bd89e696264ed2d56759c216f47bf23fca Mon Sep 17 00:00:00 2001
From: Michael Weimann <mail@michael-weimann.eu>
Date: Mon, 15 May 2023 10:54:49 +0200
Subject: Add an unstable feature flag for MSC3981 to the /versions endpoint
 (#15558)

Signed-off-by: Michael Weimann <michaelw@matrix.org>
Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/15558.misc          | 1 +
 synapse/rest/client/versions.py | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 changelog.d/15558.misc

(limited to 'synapse')

diff --git a/changelog.d/15558.misc b/changelog.d/15558.misc
new file mode 100644
index 0000000000..a7cfee2513
--- /dev/null
+++ b/changelog.d/15558.misc
@@ -0,0 +1 @@
+Add `org.matrix.msc3981` info to `client/versions`.
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index e9b56fc3f8..58c5b07390 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -126,6 +126,8 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc3912": self.config.experimental.msc3912_enabled,
                     # Adds support for unstable "intentional mentions" behaviour.
                     "org.matrix.msc3952_intentional_mentions": self.config.experimental.msc3952_intentional_mentions,
+                    # Whether recursively provide relations is supported.
+                    "org.matrix.msc3981": self.config.experimental.msc3981_recurse_relations,
                     # Adds support for deleting account data.
                     "org.matrix.msc3391": self.config.experimental.msc3391_enabled,
                 },
-- 
cgit 1.5.1


From ba6b21c81e67583ac850eab5d96fe5666620d614 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 15 May 2023 08:58:09 -0400
Subject: Implement MSC3389 to protect relations from redaction. (#15565)

MSC3389 proposes protecting the relation type & parent event ID
from redaction. This keeps the relation information intact after
redaction which helps with some UX flaws (e.g. deleting an
event causes it to no longer be in a thread, which is confusing).
---
 changelog.d/15565.misc       |  1 +
 synapse/api/room_versions.py | 17 +++++++++
 synapse/events/utils.py      | 12 ++++++
 tests/events/test_utils.py   | 90 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 120 insertions(+)
 create mode 100644 changelog.d/15565.misc

(limited to 'synapse')

diff --git a/changelog.d/15565.misc b/changelog.d/15565.misc
new file mode 100644
index 0000000000..5adc1aab9d
--- /dev/null
+++ b/changelog.d/15565.misc
@@ -0,0 +1 @@
+Implement updated redaction rules from [MSC3389](https://github.com/matrix-org/matrix-spec-proposals/pull/3389).
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 5d9c13e3c3..e65b9a0287 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -96,6 +96,8 @@ class RoomVersion:
     msc2716_historical: bool
     # MSC2716: Adds support for redacting "insertion", "chunk", and "marker" events
     msc2716_redactions: bool
+    # MSC3389: Protect relation information from redaction.
+    msc3389_relation_redactions: bool
     # MSC3787: Adds support for a `knock_restricted` join rule, mixing concepts of
     # knocks and restricted join rules into the same join condition.
     msc3787_knock_restricted_join_rule: bool
@@ -128,6 +130,7 @@ class RoomVersions:
         msc2403_knocking=False,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -149,6 +152,7 @@ class RoomVersions:
         msc2403_knocking=False,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -170,6 +174,7 @@ class RoomVersions:
         msc2403_knocking=False,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -191,6 +196,7 @@ class RoomVersions:
         msc2403_knocking=False,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -212,6 +218,7 @@ class RoomVersions:
         msc2403_knocking=False,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -233,6 +240,7 @@ class RoomVersions:
         msc2403_knocking=False,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -254,6 +262,7 @@ class RoomVersions:
         msc2403_knocking=False,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -275,6 +284,7 @@ class RoomVersions:
         msc2403_knocking=True,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -296,6 +306,7 @@ class RoomVersions:
         msc2403_knocking=True,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -317,6 +328,7 @@ class RoomVersions:
         msc2403_knocking=True,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -338,6 +350,7 @@ class RoomVersions:
         msc2403_knocking=True,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -359,6 +372,7 @@ class RoomVersions:
         msc2403_knocking=True,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
         msc3931_push_features=(),
@@ -380,6 +394,7 @@ class RoomVersions:
         msc2403_knocking=True,
         msc2716_historical=True,
         msc2716_redactions=True,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
         msc3931_push_features=(),
@@ -402,6 +417,7 @@ class RoomVersions:
         msc2403_knocking=True,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
         msc3931_push_features=(PushRuleRoomFlag.EXTENSIBLE_EVENTS,),
@@ -423,6 +439,7 @@ class RoomVersions:
         msc2403_knocking=True,
         msc2716_historical=False,
         msc2716_redactions=False,
+        msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
         msc3931_push_features=(),
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index 0802eb1963..e540f1582a 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -171,6 +171,18 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
     elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER:
         add_fields(EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE)
 
+    # Protect the rel_type and event_id fields under the m.relates_to field.
+    if room_version.msc3389_relation_redactions:
+        relates_to = event_dict["content"].get("m.relates_to")
+        if isinstance(relates_to, collections.abc.Mapping):
+            new_relates_to = {}
+            for field in ("rel_type", "event_id"):
+                if field in relates_to:
+                    new_relates_to[field] = relates_to[field]
+            # Only include a non-empty relates_to field.
+            if new_relates_to:
+                new_content["m.relates_to"] = new_relates_to
+
     allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys}
 
     allowed_fields["content"] = new_content
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index 1b179acb20..02f0800a31 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -15,6 +15,8 @@
 import unittest as stdlib_unittest
 from typing import Any, List, Mapping, Optional
 
+import attr
+
 from synapse.api.constants import EventContentFields
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase, make_event_from_dict
@@ -435,6 +437,94 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.V9,
         )
 
+    def test_relations(self) -> None:
+        """Event relations get redacted until MSC3389."""
+        # Normally the m._relates_to field is redacted.
+        self.run_test(
+            {
+                "type": "m.room.message",
+                "content": {
+                    "body": "foo",
+                    "m.relates_to": {
+                        "rel_type": "rel_type",
+                        "event_id": "$parent:domain",
+                        "other": "stripped",
+                    },
+                },
+            },
+            {
+                "type": "m.room.message",
+                "content": {},
+                "signatures": {},
+                "unsigned": {},
+            },
+            room_version=RoomVersions.V10,
+        )
+
+        # Create a new room version.
+        msc3389_room_ver = attr.evolve(
+            RoomVersions.V10, msc3389_relation_redactions=True
+        )
+
+        self.run_test(
+            {
+                "type": "m.room.message",
+                "content": {
+                    "body": "foo",
+                    "m.relates_to": {
+                        "rel_type": "rel_type",
+                        "event_id": "$parent:domain",
+                        "other": "stripped",
+                    },
+                },
+            },
+            {
+                "type": "m.room.message",
+                "content": {
+                    "m.relates_to": {
+                        "rel_type": "rel_type",
+                        "event_id": "$parent:domain",
+                    },
+                },
+                "signatures": {},
+                "unsigned": {},
+            },
+            room_version=msc3389_room_ver,
+        )
+
+        # If the field is not an object, redact it.
+        self.run_test(
+            {
+                "type": "m.room.message",
+                "content": {
+                    "body": "foo",
+                    "m.relates_to": "stripped",
+                },
+            },
+            {
+                "type": "m.room.message",
+                "content": {},
+                "signatures": {},
+                "unsigned": {},
+            },
+            room_version=msc3389_room_ver,
+        )
+
+        # If the m.relates_to property would be empty, redact it.
+        self.run_test(
+            {
+                "type": "m.room.message",
+                "content": {"body": "foo", "m.relates_to": {"foo": "stripped"}},
+            },
+            {
+                "type": "m.room.message",
+                "content": {},
+                "signatures": {},
+                "unsigned": {},
+            },
+            room_version=msc3389_room_ver,
+        )
+
 
 class SerializeEventTestCase(stdlib_unittest.TestCase):
     def serialize(self, ev: EventBase, fields: Optional[List[str]]) -> JsonDict:
-- 
cgit 1.5.1


From eb3c1823d8b059073903354facfed81ed41efbce Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 15 May 2023 15:01:29 -0400
Subject: Reject instead of erroring on invalid membership events. (#15564)

Instead of resulting in an internal server error for invalid events,
return that the event is invalid.
---
 changelog.d/15564.bugfix |  1 +
 synapse/event_auth.py    | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15564.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15564.bugfix b/changelog.d/15564.bugfix
new file mode 100644
index 0000000000..667114ba42
--- /dev/null
+++ b/changelog.d/15564.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where an invalid membership event could cause an internal server error.
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 25898b95a5..b4b43ec4d7 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -1054,10 +1054,15 @@ def _verify_third_party_invite(
     """
     if "third_party_invite" not in event.content:
         return False
-    if "signed" not in event.content["third_party_invite"]:
+    third_party_invite = event.content["third_party_invite"]
+    if not isinstance(third_party_invite, collections.abc.Mapping):
         return False
-    signed = event.content["third_party_invite"]["signed"]
-    for key in {"mxid", "token"}:
+    if "signed" not in third_party_invite:
+        return False
+    signed = third_party_invite["signed"]
+    if not isinstance(signed, collections.abc.Mapping):
+        return False
+    for key in {"mxid", "token", "signatures"}:
         if key not in signed:
             return False
 
@@ -1075,8 +1080,6 @@ def _verify_third_party_invite(
 
     if signed["mxid"] != event.state_key:
         return False
-    if signed["token"] != token:
-        return False
 
     for public_key_object in get_public_keys(invite_event):
         public_key = public_key_object["public_key"]
@@ -1088,7 +1091,9 @@ def _verify_third_party_invite(
                     verify_key = decode_verify_key_bytes(
                         key_name, decode_base64(public_key)
                     )
-                    verify_signed_json(signed, server, verify_key)
+                    # verify_signed_json incorrectly states it wants a dict, it
+                    # just needs a mapping.
+                    verify_signed_json(signed, server, verify_key)  # type: ignore[arg-type]
 
                     # We got the public key from the invite, so we know that the
                     # correct server signed the signed bundle.
-- 
cgit 1.5.1


From f2905d827f8e5360907dadfd205da588f92aa286 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 15 May 2023 15:02:24 -0400
Subject: Implement MSC3821 to update redaction rules
 (`third_party_invite.signed`) (#15563)

Updates the redaction rules to protect enough information that the
event can still be properly verified.
---
 changelog.d/15563.misc       |  1 +
 synapse/api/room_versions.py | 40 +++++++++++++++++++++++
 synapse/events/utils.py      | 10 ++++++
 tests/events/test_utils.py   | 75 +++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15563.misc

(limited to 'synapse')

diff --git a/changelog.d/15563.misc b/changelog.d/15563.misc
new file mode 100644
index 0000000000..8bfecf2b95
--- /dev/null
+++ b/changelog.d/15563.misc
@@ -0,0 +1 @@
+Implement [MSC3821](https://github.com/matrix-org/matrix-spec-proposals/pull/3821) to update the redaction rules.
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index e65b9a0287..7030b133d3 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -103,6 +103,8 @@ class RoomVersion:
     msc3787_knock_restricted_join_rule: bool
     # MSC3667: Enforce integer power levels
     msc3667_int_only_power_levels: bool
+    # MSC3821: Do not redact the third_party_invite content field for membership events.
+    msc3821_redaction_rules: bool
     # MSC3931: Adds a push rule condition for "room version feature flags", making
     # some push rules room version dependent. Note that adding a flag to this list
     # is not enough to mark it "supported": the push rule evaluator also needs to
@@ -133,6 +135,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -155,6 +158,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -177,6 +181,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -199,6 +204,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -221,6 +227,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -243,6 +250,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -265,6 +273,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -287,6 +296,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -309,6 +319,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -331,6 +342,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -353,6 +365,30 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
+        msc3931_push_features=(),
+        msc3989_redaction_rules=False,
+    )
+    MSC3821 = RoomVersion(
+        "org.matrix.msc3821.opt1",
+        RoomDisposition.UNSTABLE,
+        EventFormatVersions.ROOM_V4_PLUS,
+        StateResolutionVersions.V2,
+        enforce_key_validity=True,
+        special_case_aliases_auth=False,
+        strict_canonicaljson=True,
+        limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=False,
+        msc2176_redaction_rules=False,
+        msc3083_join_rules=True,
+        msc3375_redaction_rules=True,
+        msc2403_knocking=True,
+        msc2716_historical=False,
+        msc2716_redactions=False,
+        msc3389_relation_redactions=False,
+        msc3787_knock_restricted_join_rule=False,
+        msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=True,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -375,6 +411,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -397,6 +434,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
@@ -420,6 +458,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(PushRuleRoomFlag.EXTENSIBLE_EVENTS,),
         msc3989_redaction_rules=False,
     )
@@ -442,6 +481,7 @@ class RoomVersions:
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
+        msc3821_redaction_rules=False,
         msc3931_push_features=(),
         msc3989_redaction_rules=True,
     )
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index e540f1582a..e6d040176b 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -130,6 +130,16 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
         add_fields("membership")
         if room_version.msc3375_redaction_rules:
             add_fields(EventContentFields.AUTHORISING_USER)
+        if room_version.msc3821_redaction_rules:
+            # Preserve the signed field under third_party_invite.
+            third_party_invite = event_dict["content"].get("third_party_invite")
+            if isinstance(third_party_invite, collections.abc.Mapping):
+                new_content["third_party_invite"] = {}
+                if "signed" in third_party_invite:
+                    new_content["third_party_invite"]["signed"] = third_party_invite[
+                        "signed"
+                    ]
+
     elif event_type == EventTypes.Create:
         # MSC2176 rules state that create events cannot be redacted.
         if room_version.msc2176_redaction_rules:
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index 02f0800a31..e40eac2eb0 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -394,7 +394,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
         )
 
     def test_member(self) -> None:
-        """Member events have changed behavior starting with MSC3375."""
+        """Member events have changed behavior in MSC3375 and MSC3821."""
         self.run_test(
             {
                 "type": "m.room.member",
@@ -437,6 +437,79 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             room_version=RoomVersions.V9,
         )
 
+        # After MSC3821, the signed key under third_party_invite is protected
+        # from redaction.
+        THIRD_PARTY_INVITE = {
+            "display_name": "alice",
+            "signed": {
+                "mxid": "@alice:example.org",
+                "signatures": {
+                    "magic.forest": {
+                        "ed25519:3": "fQpGIW1Snz+pwLZu6sTy2aHy/DYWWTspTJRPyNp0PKkymfIsNffysMl6ObMMFdIJhk6g6pwlIqZ54rxo8SLmAg"
+                    }
+                },
+                "token": "abc123",
+            },
+        }
+
+        self.run_test(
+            {
+                "type": "m.room.member",
+                "content": {
+                    "membership": "invite",
+                    "third_party_invite": THIRD_PARTY_INVITE,
+                    "other_key": "stripped",
+                },
+            },
+            {
+                "type": "m.room.member",
+                "content": {
+                    "membership": "invite",
+                    "third_party_invite": {"signed": THIRD_PARTY_INVITE["signed"]},
+                },
+                "signatures": {},
+                "unsigned": {},
+            },
+            room_version=RoomVersions.MSC3821,
+        )
+
+        # Ensure this doesn't break if an invalid field is sent.
+        self.run_test(
+            {
+                "type": "m.room.member",
+                "content": {
+                    "membership": "invite",
+                    "third_party_invite": {},
+                    "other_key": "stripped",
+                },
+            },
+            {
+                "type": "m.room.member",
+                "content": {"membership": "invite", "third_party_invite": {}},
+                "signatures": {},
+                "unsigned": {},
+            },
+            room_version=RoomVersions.MSC3821,
+        )
+
+        self.run_test(
+            {
+                "type": "m.room.member",
+                "content": {
+                    "membership": "invite",
+                    "third_party_invite": "stripped",
+                    "other_key": "stripped",
+                },
+            },
+            {
+                "type": "m.room.member",
+                "content": {"membership": "invite"},
+                "signatures": {},
+                "unsigned": {},
+            },
+            room_version=RoomVersions.MSC3821,
+        )
+
     def test_relations(self) -> None:
         """Event relations get redacted until MSC3389."""
         # Normally the m._relates_to field is redacted.
-- 
cgit 1.5.1


From ba572647b291e593e70a30e45c234c9766472ff3 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 15 May 2023 13:11:21 -0700
Subject: Export `run_as_background_process` from the module API (#15577)

---
 changelog.d/15577.misc         | 1 +
 synapse/module_api/__init__.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/15577.misc

(limited to 'synapse')

diff --git a/changelog.d/15577.misc b/changelog.d/15577.misc
new file mode 100644
index 0000000000..74a7f495de
--- /dev/null
+++ b/changelog.d/15577.misc
@@ -0,0 +1 @@
+Export `run_as_background_process` from the module API.
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 4b59e6825b..2c9d181acf 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -156,6 +156,7 @@ __all__ = [
     "parse_json_object_from_request",
     "respond_with_html",
     "run_in_background",
+    "run_as_background_process",
     "cached",
     "NOT_SPAM",
     "UserID",
-- 
cgit 1.5.1


From b6a7d49b6f1f7c494372fd1b9aab3982c9a299c7 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 16 May 2023 08:56:42 -0500
Subject: `traceback.format_exception(...)` usage that is compatible with
 Python 3.7 and 3.11 (#15599)

* Usage that is compatible with Python 3.8 and 3.11

> Since Python 3.10, instead of passing value and tb, an exception object can
  be passed as the first argument. If value and tb are provided, the first
  argument is ignored in order to provide backwards compatibility.
>
> -- https://docs.python.org/3/library/traceback.html

* Add changelog
---
 changelog.d/15599.bugfix | 1 +
 synapse/app/_base.py     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15599.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15599.bugfix b/changelog.d/15599.bugfix
new file mode 100644
index 0000000000..b58af8ad55
--- /dev/null
+++ b/changelog.d/15599.bugfix
@@ -0,0 +1 @@
+Print full error and stack-trace of any exception that occurs during startup/initialization.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 4dfcf484fa..936b1b0430 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -214,7 +214,7 @@ def handle_startup_exception(e: Exception) -> NoReturn:
     # the reactor are written to the logs, followed by a summary to stderr.
     logger.exception("Exception during startup")
 
-    error_string = "".join(traceback.format_exception(e))
+    error_string = "".join(traceback.format_exception(type(e), e, e.__traceback__))
     indented_error_string = indent(error_string, "    ")
 
     quit_with_error(
-- 
cgit 1.5.1


From c51d2e6199a901113f2dabeb64fc64b015751988 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 16 May 2023 12:19:46 -0500
Subject: Fix subscriptable type usage in Python <3.9 (#15604)

Fix the following `mypy` errors when running `mypy` with Python 3.7:
```
synapse/storage/controllers/stats.py:58: error: "Counter" is not subscriptable, use "typing.Counter" instead  [misc]

tests/test_state.py:267: error: "dict" is not subscriptable, use "typing.Dict" instead  [misc]
```

Part of https://github.com/matrix-org/synapse/issues/15603

In Python 3.9, `typing` is deprecated and the types are subscriptable (generics) by default, https://peps.python.org/pep-0585/#implementation
---
 changelog.d/15604.misc               | 1 +
 synapse/storage/controllers/stats.py | 3 +--
 tests/test_state.py                  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15604.misc

(limited to 'synapse')

diff --git a/changelog.d/15604.misc b/changelog.d/15604.misc
new file mode 100644
index 0000000000..92d1d600bc
--- /dev/null
+++ b/changelog.d/15604.misc
@@ -0,0 +1 @@
+Fix subscriptable type usage in Python <3.9.
diff --git a/synapse/storage/controllers/stats.py b/synapse/storage/controllers/stats.py
index 988e44c6af..2a03528fee 100644
--- a/synapse/storage/controllers/stats.py
+++ b/synapse/storage/controllers/stats.py
@@ -13,8 +13,7 @@
 # limitations under the License.
 
 import logging
-from collections import Counter
-from typing import TYPE_CHECKING, Collection, List, Tuple
+from typing import TYPE_CHECKING, Collection, Counter, List, Tuple
 
 from synapse.api.errors import SynapseError
 from synapse.storage.database import LoggingTransaction
diff --git a/tests/test_state.py b/tests/test_state.py
index 2029d3d60a..ddf59916b1 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -264,7 +264,7 @@ class StateTestCase(unittest.TestCase):
 
         self.dummy_store.register_events(graph.walk())
 
-        context_store: dict[str, EventContext] = {}
+        context_store: Dict[str, EventContext] = {}
 
         for event in graph.walk():
             context = yield defer.ensureDeferred(
-- 
cgit 1.5.1


From 77cda342be3c81fa8557d208e67dc1662ddb462a Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 16 May 2023 08:56:42 -0500
Subject: `traceback.format_exception(...)` usage that is compatible with
 Python 3.7 and 3.11 (#15599)

* Usage that is compatible with Python 3.8 and 3.11

> Since Python 3.10, instead of passing value and tb, an exception object can
  be passed as the first argument. If value and tb are provided, the first
  argument is ignored in order to provide backwards compatibility.
>
> -- https://docs.python.org/3/library/traceback.html

* Add changelog
---
 changelog.d/15599.bugfix | 1 +
 synapse/app/_base.py     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15599.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15599.bugfix b/changelog.d/15599.bugfix
new file mode 100644
index 0000000000..b58af8ad55
--- /dev/null
+++ b/changelog.d/15599.bugfix
@@ -0,0 +1 @@
+Print full error and stack-trace of any exception that occurs during startup/initialization.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 4dfcf484fa..936b1b0430 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -214,7 +214,7 @@ def handle_startup_exception(e: Exception) -> NoReturn:
     # the reactor are written to the logs, followed by a summary to stderr.
     logger.exception("Exception during startup")
 
-    error_string = "".join(traceback.format_exception(e))
+    error_string = "".join(traceback.format_exception(type(e), e, e.__traceback__))
     indented_error_string = indent(error_string, "    ")
 
     quit_with_error(
-- 
cgit 1.5.1


From 9f6ff6a0eb94a9f81b9948bc3b651a1eb78de460 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 16 May 2023 10:57:39 -0700
Subject: Add not null constraint to column `full_user_id` of tables `profiles`
 and `user_filters` (#15537)

---
 changelog.d/15537.misc                             |   1 +
 synapse/storage/databases/main/filtering.py        |  95 +++++++++++++++++++
 synapse/storage/databases/main/profile.py          | 102 ++++++++++++++++++++-
 synapse/storage/schema/__init__.py                 |  10 +-
 .../01_add_profiles_not_valid_check.sql.postgres   |  16 ++++
 ...2_add_user_filters_not_valid_check.sql.postgres |  16 ++++
 .../77/03bg_populate_full_user_id_profiles.sql     |  16 ++++
 .../77/04bg_populate_full_user_id_user_filters.sql |  16 ++++
 tests/storage/test_profile.py                      |  63 +++++++++++++
 tests/storage/test_user_filters.py                 |  94 +++++++++++++++++++
 10 files changed, 425 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15537.misc
 create mode 100644 synapse/storage/schema/main/delta/77/01_add_profiles_not_valid_check.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/77/02_add_user_filters_not_valid_check.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/77/03bg_populate_full_user_id_profiles.sql
 create mode 100644 synapse/storage/schema/main/delta/77/04bg_populate_full_user_id_user_filters.sql
 create mode 100644 tests/storage/test_user_filters.py

(limited to 'synapse')

diff --git a/changelog.d/15537.misc b/changelog.d/15537.misc
new file mode 100644
index 0000000000..979e0ba977
--- /dev/null
+++ b/changelog.d/15537.misc
@@ -0,0 +1 @@
+Add not null constraint to column full_user_id of tables profiles and user_filters.
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index 50516402f9..da31eb44dc 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -25,6 +25,7 @@ from synapse.storage.database import (
     LoggingDatabaseConnection,
     LoggingTransaction,
 )
+from synapse.storage.engines import PostgresEngine
 from synapse.types import JsonDict, UserID
 from synapse.util.caches.descriptors import cached
 
@@ -40,6 +41,8 @@ class FilteringWorkerStore(SQLBaseStore):
         hs: "HomeServer",
     ):
         super().__init__(database, db_conn, hs)
+        self.server_name: str = hs.hostname
+        self.database_engine = database.engine
         self.db_pool.updates.register_background_index_update(
             "full_users_filters_unique_idx",
             index_name="full_users_unique_idx",
@@ -48,6 +51,98 @@ class FilteringWorkerStore(SQLBaseStore):
             unique=True,
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            "populate_full_user_id_user_filters",
+            self.populate_full_user_id_user_filters,
+        )
+
+    async def populate_full_user_id_user_filters(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """
+        Background update to populate the column `full_user_id` of the table
+        user_filters from entries in the column `user_local_part` of the same table
+        """
+
+        lower_bound_id = progress.get("lower_bound_id", "")
+
+        def _get_last_id(txn: LoggingTransaction) -> Optional[str]:
+            sql = """
+                    SELECT user_id FROM user_filters
+                    WHERE user_id > ?
+                    ORDER BY user_id
+                    LIMIT 1 OFFSET 50
+                  """
+            txn.execute(sql, (lower_bound_id,))
+            res = txn.fetchone()
+            if res:
+                upper_bound_id = res[0]
+                return upper_bound_id
+            else:
+                return None
+
+        def _process_batch(
+            txn: LoggingTransaction, lower_bound_id: str, upper_bound_id: str
+        ) -> None:
+            sql = """
+                    UPDATE user_filters
+                    SET full_user_id = '@' || user_id || ?
+                    WHERE ? < user_id AND user_id <= ? AND full_user_id IS NULL
+                   """
+            txn.execute(sql, (f":{self.server_name}", lower_bound_id, upper_bound_id))
+
+        def _final_batch(txn: LoggingTransaction, lower_bound_id: str) -> None:
+            sql = """
+                    UPDATE user_filters
+                    SET full_user_id = '@' || user_id || ?
+                    WHERE ? < user_id AND full_user_id IS NULL
+                   """
+            txn.execute(
+                sql,
+                (
+                    f":{self.server_name}",
+                    lower_bound_id,
+                ),
+            )
+
+            if isinstance(self.database_engine, PostgresEngine):
+                sql = """
+                        ALTER TABLE user_filters VALIDATE CONSTRAINT full_user_id_not_null
+                      """
+                txn.execute(sql)
+
+        upper_bound_id = await self.db_pool.runInteraction(
+            "populate_full_user_id_user_filters", _get_last_id
+        )
+
+        if upper_bound_id is None:
+            await self.db_pool.runInteraction(
+                "populate_full_user_id_user_filters", _final_batch, lower_bound_id
+            )
+
+            await self.db_pool.updates._end_background_update(
+                "populate_full_user_id_user_filters"
+            )
+            return 1
+
+        await self.db_pool.runInteraction(
+            "populate_full_user_id_user_filters",
+            _process_batch,
+            lower_bound_id,
+            upper_bound_id,
+        )
+
+        progress["lower_bound_id"] = upper_bound_id
+
+        await self.db_pool.runInteraction(
+            "populate_full_user_id_user_filters",
+            self.db_pool.updates._background_update_progress_txn,
+            "populate_full_user_id_user_filters",
+            progress,
+        )
+
+        return 50
+
     @cached(num_args=2)
     async def get_user_filter(
         self, user_localpart: str, filter_id: Union[int, str]
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index c4022d2427..65c92bef51 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -15,9 +15,14 @@ from typing import TYPE_CHECKING, Optional
 
 from synapse.api.errors import StoreError
 from synapse.storage._base import SQLBaseStore
-from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
+from synapse.storage.database import (
+    DatabasePool,
+    LoggingDatabaseConnection,
+    LoggingTransaction,
+)
 from synapse.storage.databases.main.roommember import ProfileInfo
-from synapse.types import UserID
+from synapse.storage.engines import PostgresEngine
+from synapse.types import JsonDict, UserID
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -31,6 +36,8 @@ class ProfileWorkerStore(SQLBaseStore):
         hs: "HomeServer",
     ):
         super().__init__(database, db_conn, hs)
+        self.server_name: str = hs.hostname
+        self.database_engine = database.engine
         self.db_pool.updates.register_background_index_update(
             "profiles_full_user_id_key_idx",
             index_name="profiles_full_user_id_key",
@@ -39,6 +46,97 @@ class ProfileWorkerStore(SQLBaseStore):
             unique=True,
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            "populate_full_user_id_profiles", self.populate_full_user_id_profiles
+        )
+
+    async def populate_full_user_id_profiles(
+        self, progress: JsonDict, batch_size: int
+    ) -> int:
+        """
+        Background update to populate the column `full_user_id` of the table
+        profiles from entries in the column `user_local_part` of the same table
+        """
+
+        lower_bound_id = progress.get("lower_bound_id", "")
+
+        def _get_last_id(txn: LoggingTransaction) -> Optional[str]:
+            sql = """
+                    SELECT user_id FROM profiles
+                    WHERE user_id > ?
+                    ORDER BY user_id
+                    LIMIT 1 OFFSET 50
+                  """
+            txn.execute(sql, (lower_bound_id,))
+            res = txn.fetchone()
+            if res:
+                upper_bound_id = res[0]
+                return upper_bound_id
+            else:
+                return None
+
+        def _process_batch(
+            txn: LoggingTransaction, lower_bound_id: str, upper_bound_id: str
+        ) -> None:
+            sql = """
+                    UPDATE profiles
+                    SET full_user_id = '@' || user_id || ?
+                    WHERE ? < user_id AND user_id <= ? AND full_user_id IS NULL
+                   """
+            txn.execute(sql, (f":{self.server_name}", lower_bound_id, upper_bound_id))
+
+        def _final_batch(txn: LoggingTransaction, lower_bound_id: str) -> None:
+            sql = """
+                    UPDATE profiles
+                    SET full_user_id = '@' || user_id || ?
+                    WHERE ? < user_id AND full_user_id IS NULL
+                   """
+            txn.execute(
+                sql,
+                (
+                    f":{self.server_name}",
+                    lower_bound_id,
+                ),
+            )
+
+            if isinstance(self.database_engine, PostgresEngine):
+                sql = """
+                        ALTER TABLE profiles VALIDATE CONSTRAINT full_user_id_not_null
+                      """
+                txn.execute(sql)
+
+        upper_bound_id = await self.db_pool.runInteraction(
+            "populate_full_user_id_profiles", _get_last_id
+        )
+
+        if upper_bound_id is None:
+            await self.db_pool.runInteraction(
+                "populate_full_user_id_profiles", _final_batch, lower_bound_id
+            )
+
+            await self.db_pool.updates._end_background_update(
+                "populate_full_user_id_profiles"
+            )
+            return 1
+
+        await self.db_pool.runInteraction(
+            "populate_full_user_id_profiles",
+            _process_batch,
+            lower_bound_id,
+            upper_bound_id,
+        )
+
+        progress["lower_bound_id"] = upper_bound_id
+
+        await self.db_pool.runInteraction(
+            "populate_full_user_id_profiles",
+            self.db_pool.updates._background_update_progress_txn,
+            "populate_full_user_id_profiles",
+            progress,
+        )
+
+        return 50
+
     async def get_profileinfo(self, user_localpart: str) -> ProfileInfo:
         try:
             profile = await self.db_pool.simple_select_one(
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 1672976209..df2cc31ca6 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 76  # remember to update the list below when updating
+SCHEMA_VERSION = 77  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -100,13 +100,19 @@ Changes in SCHEMA_VERSION = 75:
 
 Changes in SCHEMA_VERSION = 76:
     - Adds a full_user_id column to tables profiles and user_filters.
+
+Changes in SCHEMA_VERSION = 77
+    - (Postgres) Add NOT VALID CHECK (full_user_id IS NOT NULL) to tables profiles and user_filters
 """
 
 
 SCHEMA_COMPAT_VERSION = (
     # Queries against `event_stream_ordering` columns in membership tables must
     # be disambiguated.
-    74
+    #
+    # insertions to the column `full_user_id` of tables profiles and user_filters can no
+    # longer be null
+    76
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/77/01_add_profiles_not_valid_check.sql.postgres b/synapse/storage/schema/main/delta/77/01_add_profiles_not_valid_check.sql.postgres
new file mode 100644
index 0000000000..3eb226c648
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/01_add_profiles_not_valid_check.sql.postgres
@@ -0,0 +1,16 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID;
\ No newline at end of file
diff --git a/synapse/storage/schema/main/delta/77/02_add_user_filters_not_valid_check.sql.postgres b/synapse/storage/schema/main/delta/77/02_add_user_filters_not_valid_check.sql.postgres
new file mode 100644
index 0000000000..ba037daf47
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/02_add_user_filters_not_valid_check.sql.postgres
@@ -0,0 +1,16 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID;
\ No newline at end of file
diff --git a/synapse/storage/schema/main/delta/77/03bg_populate_full_user_id_profiles.sql b/synapse/storage/schema/main/delta/77/03bg_populate_full_user_id_profiles.sql
new file mode 100644
index 0000000000..12101ab914
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/03bg_populate_full_user_id_profiles.sql
@@ -0,0 +1,16 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (7703, 'populate_full_user_id_profiles', '{}');
\ No newline at end of file
diff --git a/synapse/storage/schema/main/delta/77/04bg_populate_full_user_id_user_filters.sql b/synapse/storage/schema/main/delta/77/04bg_populate_full_user_id_user_filters.sql
new file mode 100644
index 0000000000..1f4d683cac
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/04bg_populate_full_user_id_user_filters.sql
@@ -0,0 +1,16 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (7704, 'populate_full_user_id_user_filters', '{}');
\ No newline at end of file
diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py
index 6ec34997ea..f9cf0fcb82 100644
--- a/tests/storage/test_profile.py
+++ b/tests/storage/test_profile.py
@@ -14,6 +14,8 @@
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.server import HomeServer
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import PostgresEngine
 from synapse.types import UserID
 from synapse.util import Clock
 
@@ -69,3 +71,64 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         self.assertIsNone(
             self.get_success(self.store.get_profile_avatar_url(self.u_frank.localpart))
         )
+
+    def test_profiles_bg_migration(self) -> None:
+        """
+        Test background job that copies entries from column user_id to full_user_id, adding
+        the hostname in the process.
+        """
+        updater = self.hs.get_datastores().main.db_pool.updates
+
+        # drop the constraint so we can insert nulls in full_user_id to populate the test
+        if isinstance(self.store.database_engine, PostgresEngine):
+
+            def f(txn: LoggingTransaction) -> None:
+                txn.execute(
+                    "ALTER TABLE profiles DROP CONSTRAINT full_user_id_not_null"
+                )
+
+            self.get_success(self.store.db_pool.runInteraction("", f))
+
+        for i in range(0, 70):
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    "profiles",
+                    {"user_id": f"hello{i:02}"},
+                )
+            )
+
+        # re-add the constraint so that when it's validated it actually exists
+        if isinstance(self.store.database_engine, PostgresEngine):
+
+            def f(txn: LoggingTransaction) -> None:
+                txn.execute(
+                    "ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID"
+                )
+
+            self.get_success(self.store.db_pool.runInteraction("", f))
+
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                values={
+                    "update_name": "populate_full_user_id_profiles",
+                    "progress_json": "{}",
+                },
+            )
+        )
+
+        self.get_success(
+            updater.run_background_updates(False),
+        )
+
+        expected_values = []
+        for i in range(0, 70):
+            expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
+
+        res = self.get_success(
+            self.store.db_pool.execute(
+                "", None, "SELECT full_user_id from profiles ORDER BY full_user_id"
+            )
+        )
+        self.assertEqual(len(res), len(expected_values))
+        self.assertEqual(res, expected_values)
diff --git a/tests/storage/test_user_filters.py b/tests/storage/test_user_filters.py
new file mode 100644
index 0000000000..bab802f56e
--- /dev/null
+++ b/tests/storage/test_user_filters.py
@@ -0,0 +1,94 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import PostgresEngine
+from synapse.util import Clock
+
+from tests import unittest
+
+
+class UserFiltersStoreTestCase(unittest.HomeserverTestCase):
+    """
+    Test background migration that copies entries from column user_id to full_user_id, adding
+    the hostname in the process.
+    """
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+    def test_bg_migration(self) -> None:
+        updater = self.hs.get_datastores().main.db_pool.updates
+
+        # drop the constraint so we can insert nulls in full_user_id to populate the test
+        if isinstance(self.store.database_engine, PostgresEngine):
+
+            def f(txn: LoggingTransaction) -> None:
+                txn.execute(
+                    "ALTER TABLE user_filters DROP CONSTRAINT full_user_id_not_null"
+                )
+
+            self.get_success(self.store.db_pool.runInteraction("", f))
+
+        for i in range(0, 70):
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    "user_filters",
+                    {
+                        "user_id": f"hello{i:02}",
+                        "filter_id": i,
+                        "filter_json": bytearray(i),
+                    },
+                )
+            )
+
+        # re-add the constraint so that when it's validated it actually exists
+        if isinstance(self.store.database_engine, PostgresEngine):
+
+            def f(txn: LoggingTransaction) -> None:
+                txn.execute(
+                    "ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID"
+                )
+
+            self.get_success(self.store.db_pool.runInteraction("", f))
+
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                values={
+                    "update_name": "populate_full_user_id_user_filters",
+                    "progress_json": "{}",
+                },
+            )
+        )
+
+        self.get_success(
+            updater.run_background_updates(False),
+        )
+
+        expected_values = []
+        for i in range(0, 70):
+            expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
+
+        res = self.get_success(
+            self.store.db_pool.execute(
+                "", None, "SELECT full_user_id from user_filters ORDER BY full_user_id"
+            )
+        )
+        self.assertEqual(len(res), len(expected_values))
+        self.assertEqual(res, expected_values)
-- 
cgit 1.5.1


From 375b0a8a119bb925ca280f050a25a931662fcbb5 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 16 May 2023 15:56:38 -0400
Subject: Update code to refer to "workers". (#15606)

A bunch of comments and variables are out of date and use
obsolete terms.
---
 changelog.d/15606.misc                             |   1 +
 docs/replication.md                                |   6 -
 synapse/app/admin_cmd.py                           |   4 +-
 synapse/app/generic_worker.py                      |   4 +-
 synapse/module_api/__init__.py                     |   6 +-
 synapse/replication/tcp/client.py                  |   4 +-
 synapse/storage/databases/main/account_data.py     |   7 +-
 synapse/storage/databases/main/cache.py            |  14 +-
 synapse/storage/databases/main/devices.py          |   2 -
 synapse/storage/databases/main/events_worker.py    |   7 +-
 synapse/storage/databases/main/receipts.py         |   7 +-
 .../storage/schema/main/delta/34/cache_stream.py   |   2 +-
 tests/app/test_openid_listener.py                  |   2 +-
 tests/replication/slave/__init__.py                |  13 -
 tests/replication/slave/storage/__init__.py        |  13 -
 tests/replication/slave/storage/_base.py           |  72 ----
 tests/replication/slave/storage/test_events.py     | 420 ---------------------
 tests/replication/storage/__init__.py              |  13 +
 tests/replication/storage/_base.py                 |  72 ++++
 tests/replication/storage/test_events.py           | 420 +++++++++++++++++++++
 20 files changed, 529 insertions(+), 560 deletions(-)
 create mode 100644 changelog.d/15606.misc
 delete mode 100644 tests/replication/slave/__init__.py
 delete mode 100644 tests/replication/slave/storage/__init__.py
 delete mode 100644 tests/replication/slave/storage/_base.py
 delete mode 100644 tests/replication/slave/storage/test_events.py
 create mode 100644 tests/replication/storage/__init__.py
 create mode 100644 tests/replication/storage/_base.py
 create mode 100644 tests/replication/storage/test_events.py

(limited to 'synapse')

diff --git a/changelog.d/15606.misc b/changelog.d/15606.misc
new file mode 100644
index 0000000000..44265fbf02
--- /dev/null
+++ b/changelog.d/15606.misc
@@ -0,0 +1 @@
+Update internal terminology for workers.
diff --git a/docs/replication.md b/docs/replication.md
index 108da9a065..25145daaf5 100644
--- a/docs/replication.md
+++ b/docs/replication.md
@@ -30,12 +30,6 @@ minimal.
 
 See [the TCP replication documentation](tcp_replication.md).
 
-### The Slaved DataStore
-
-There are read-only version of the synapse storage layer in
-`synapse/replication/slave/storage` that use the response of the
-replication API to invalidate their caches.
-
 ### The TCP Replication Module
 Information about how the tcp replication module is structured, including how
 the classes interact, can be found in
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index b05fe2c589..f9aada269a 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -64,7 +64,7 @@ from synapse.util.logcontext import LoggingContext
 logger = logging.getLogger("synapse.app.admin_cmd")
 
 
-class AdminCmdSlavedStore(
+class AdminCmdStore(
     FilteringWorkerStore,
     ClientIpWorkerStore,
     DeviceWorkerStore,
@@ -103,7 +103,7 @@ class AdminCmdSlavedStore(
 
 
 class AdminCmdServer(HomeServer):
-    DATASTORE_CLASS = AdminCmdSlavedStore  # type: ignore
+    DATASTORE_CLASS = AdminCmdStore  # type: ignore
 
 
 async def export_data_command(hs: HomeServer, args: argparse.Namespace) -> None:
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index e17ce35b8e..909ebccf78 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -102,7 +102,7 @@ from synapse.util.httpresourcetree import create_resource_tree
 logger = logging.getLogger("synapse.app.generic_worker")
 
 
-class GenericWorkerSlavedStore(
+class GenericWorkerStore(
     # FIXME(#3714): We need to add UserDirectoryStore as we write directly
     # rather than going via the correct worker.
     UserDirectoryStore,
@@ -154,7 +154,7 @@ class GenericWorkerSlavedStore(
 
 
 class GenericWorkerServer(HomeServer):
-    DATASTORE_CLASS = GenericWorkerSlavedStore  # type: ignore
+    DATASTORE_CLASS = GenericWorkerStore  # type: ignore
 
     def _listen_http(self, listener_config: ListenerConfig) -> None:
         assert listener_config.http_options is not None
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 2c9d181acf..0e9f366cba 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -134,7 +134,7 @@ from synapse.util.caches.descriptors import CachedFunction, cached as _cached
 from synapse.util.frozenutils import freeze
 
 if TYPE_CHECKING:
-    from synapse.app.generic_worker import GenericWorkerSlavedStore
+    from synapse.app.generic_worker import GenericWorkerStore
     from synapse.server import HomeServer
 
 
@@ -237,9 +237,7 @@ class ModuleApi:
 
         # TODO: Fix this type hint once the types for the data stores have been ironed
         #       out.
-        self._store: Union[
-            DataStore, "GenericWorkerSlavedStore"
-        ] = hs.get_datastores().main
+        self._store: Union[DataStore, "GenericWorkerStore"] = hs.get_datastores().main
         self._storage_controllers = hs.get_storage_controllers()
         self._auth = hs.get_auth()
         self._auth_handler = auth_handler
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 200f667fdf..139f57cf86 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -60,7 +60,7 @@ _WAIT_FOR_REPLICATION_TIMEOUT_SECONDS = 5
 class ReplicationDataHandler:
     """Handles incoming stream updates from replication.
 
-    This instance notifies the slave data store about updates. Can be subclassed
+    This instance notifies the data store about updates. Can be subclassed
     to handle updates in additional ways.
     """
 
@@ -91,7 +91,7 @@ class ReplicationDataHandler:
     ) -> None:
         """Called to handle a batch of replication data with a given stream token.
 
-        By default this just pokes the slave store. Can be overridden in subclasses to
+        By default, this just pokes the data store. Can be overridden in subclasses to
         handle more.
 
         Args:
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index a9843f6e17..8f7bdbc61a 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -85,13 +85,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
                 writers=hs.config.worker.writers.account_data,
             )
         else:
+            # Multiple writers are not supported for SQLite.
+            #
             # We shouldn't be running in worker mode with SQLite, but its useful
             # to support it for unit tests.
-            #
-            # If this process is the writer than we need to use
-            # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
-            # updated over replication. (Multiple writers are not supported for
-            # SQLite).
             self._account_data_id_gen = StreamIdGenerator(
                 db_conn,
                 hs.get_replication_notifier(),
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index bd07d20171..46fa0a73f9 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -274,11 +274,11 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
     async def invalidate_cache_and_stream(
         self, cache_name: str, keys: Tuple[Any, ...]
     ) -> None:
-        """Invalidates the cache and adds it to the cache stream so slaves
+        """Invalidates the cache and adds it to the cache stream so other workers
         will know to invalidate their caches.
 
-        This should only be used to invalidate caches where slaves won't
-        otherwise know from other replication streams that the cache should
+        This should only be used to invalidate caches where other workers won't
+        otherwise have known from other replication streams that the cache should
         be invalidated.
         """
         cache_func = getattr(self, cache_name, None)
@@ -297,11 +297,11 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         cache_func: CachedFunction,
         keys: Tuple[Any, ...],
     ) -> None:
-        """Invalidates the cache and adds it to the cache stream so slaves
+        """Invalidates the cache and adds it to the cache stream so other workers
         will know to invalidate their caches.
 
-        This should only be used to invalidate caches where slaves won't
-        otherwise know from other replication streams that the cache should
+        This should only be used to invalidate caches where other workers won't
+        otherwise have known from other replication streams that the cache should
         be invalidated.
         """
         txn.call_after(cache_func.invalidate, keys)
@@ -310,7 +310,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
     def _invalidate_all_cache_and_stream(
         self, txn: LoggingTransaction, cache_func: CachedFunction
     ) -> None:
-        """Invalidates the entire cache and adds it to the cache stream so slaves
+        """Invalidates the entire cache and adds it to the cache stream so other workers
         will know to invalidate their caches.
         """
 
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 5503621ad6..a67fdb3c22 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -105,8 +105,6 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             is_writer=hs.config.worker.worker_app is None,
         )
 
-        # Type-ignore: _device_list_id_gen is mixed in from either DataStore (as a
-        # StreamIdGenerator) or SlavedDataStore (as a SlavedIdTracker).
         device_list_max = self._device_list_id_gen.get_current_token()
         device_list_prefill, min_device_list_id = self.db_pool.get_cache_dict(
             db_conn,
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 0ff3fc7369..53aa5933d5 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -213,13 +213,10 @@ class EventsWorkerStore(SQLBaseStore):
                 writers=hs.config.worker.writers.events,
             )
         else:
+            # Multiple writers are not supported for SQLite.
+            #
             # We shouldn't be running in worker mode with SQLite, but its useful
             # to support it for unit tests.
-            #
-            # If this process is the writer than we need to use
-            # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
-            # updated over replication. (Multiple writers are not supported for
-            # SQLite).
             self._stream_id_gen = StreamIdGenerator(
                 db_conn,
                 hs.get_replication_notifier(),
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 074942b167..5ee5c7ad9f 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -85,13 +85,10 @@ class ReceiptsWorkerStore(SQLBaseStore):
         else:
             self._can_write_to_receipts = True
 
+            # Multiple writers are not supported for SQLite.
+            #
             # We shouldn't be running in worker mode with SQLite, but its useful
             # to support it for unit tests.
-            #
-            # If this process is the writer than we need to use
-            # `StreamIdGenerator`, otherwise we use `SlavedIdTracker` which gets
-            # updated over replication. (Multiple writers are not supported for
-            # SQLite).
             self._receipts_id_gen = StreamIdGenerator(
                 db_conn,
                 hs.get_replication_notifier(),
diff --git a/synapse/storage/schema/main/delta/34/cache_stream.py b/synapse/storage/schema/main/delta/34/cache_stream.py
index 682c86da1a..882f9b893b 100644
--- a/synapse/storage/schema/main/delta/34/cache_stream.py
+++ b/synapse/storage/schema/main/delta/34/cache_stream.py
@@ -21,7 +21,7 @@ from synapse.storage.prepare_database import get_statements
 logger = logging.getLogger(__name__)
 
 
-# This stream is used to notify replication slaves that some caches have
+# This stream is used to notify workers over replication that some caches have
 # been invalidated that they cannot infer from the other streams.
 CREATE_TABLE = """
 CREATE TABLE cache_invalidation_stream (
diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py
index 2ee343d8a4..6e0413400e 100644
--- a/tests/app/test_openid_listener.py
+++ b/tests/app/test_openid_listener.py
@@ -38,7 +38,7 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase):
 
     def default_config(self) -> JsonDict:
         conf = super().default_config()
-        # we're using FederationReaderServer, which uses a SlavedStore, so we
+        # we're using GenericWorkerServer, which uses a GenericWorkerStore, so we
         # have to tell the FederationHandler not to try to access stuff that is only
         # in the primary store.
         conf["worker_app"] = "yes"
diff --git a/tests/replication/slave/__init__.py b/tests/replication/slave/__init__.py
deleted file mode 100644
index f43a360a80..0000000000
--- a/tests/replication/slave/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/tests/replication/slave/storage/__init__.py b/tests/replication/slave/storage/__init__.py
deleted file mode 100644
index f43a360a80..0000000000
--- a/tests/replication/slave/storage/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py
deleted file mode 100644
index 4c9b494344..0000000000
--- a/tests/replication/slave/storage/_base.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any, Iterable, Optional
-from unittest.mock import Mock
-
-from twisted.test.proto_helpers import MemoryReactor
-
-from synapse.server import HomeServer
-from synapse.util import Clock
-
-from tests.replication._base import BaseStreamTestCase
-
-
-class BaseSlavedStoreTestCase(BaseStreamTestCase):
-    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        return self.setup_test_homeserver(federation_client=Mock())
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        super().prepare(reactor, clock, hs)
-
-        self.reconnect()
-
-        self.master_store = hs.get_datastores().main
-        self.slaved_store = self.worker_hs.get_datastores().main
-        persistence = hs.get_storage_controllers().persistence
-        assert persistence is not None
-        self.persistance = persistence
-
-    def replicate(self) -> None:
-        """Tell the master side of replication that something has happened, and then
-        wait for the replication to occur.
-        """
-        self.streamer.on_notifier_poke()
-        self.pump(0.1)
-
-    def check(
-        self, method: str, args: Iterable[Any], expected_result: Optional[Any] = None
-    ) -> None:
-        master_result = self.get_success(getattr(self.master_store, method)(*args))
-        slaved_result = self.get_success(getattr(self.slaved_store, method)(*args))
-        if expected_result is not None:
-            self.assertEqual(
-                master_result,
-                expected_result,
-                "Expected master result to be %r but was %r"
-                % (expected_result, master_result),
-            )
-            self.assertEqual(
-                slaved_result,
-                expected_result,
-                "Expected slave result to be %r but was %r"
-                % (expected_result, slaved_result),
-            )
-        self.assertEqual(
-            master_result,
-            slaved_result,
-            "Slave result %r does not match master result %r"
-            % (slaved_result, master_result),
-        )
diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py
deleted file mode 100644
index b2125b1fea..0000000000
--- a/tests/replication/slave/storage/test_events.py
+++ /dev/null
@@ -1,420 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from typing import Any, Callable, Iterable, List, Optional, Tuple
-
-from canonicaljson import encode_canonical_json
-from parameterized import parameterized
-
-from twisted.test.proto_helpers import MemoryReactor
-
-from synapse.api.constants import ReceiptTypes
-from synapse.api.room_versions import RoomVersions
-from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict
-from synapse.events.snapshot import EventContext
-from synapse.handlers.room import RoomEventSource
-from synapse.server import HomeServer
-from synapse.storage.databases.main.event_push_actions import (
-    NotifCounts,
-    RoomNotifCounts,
-)
-from synapse.storage.databases.main.events_worker import EventsWorkerStore
-from synapse.storage.roommember import GetRoomsForUserWithStreamOrdering, RoomsForUser
-from synapse.types import PersistedEventPosition
-from synapse.util import Clock
-
-from tests.server import FakeTransport
-
-from ._base import BaseSlavedStoreTestCase
-
-USER_ID = "@feeling:test"
-USER_ID_2 = "@bright:test"
-OUTLIER = {"outlier": True}
-ROOM_ID = "!room:test"
-
-logger = logging.getLogger(__name__)
-
-
-def dict_equals(self: EventBase, other: EventBase) -> bool:
-    me = encode_canonical_json(self.get_pdu_json())
-    them = encode_canonical_json(other.get_pdu_json())
-    return me == them
-
-
-def patch__eq__(cls: object) -> Callable[[], None]:
-    eq = getattr(cls, "__eq__", None)
-    cls.__eq__ = dict_equals  # type: ignore[assignment]
-
-    def unpatch() -> None:
-        if eq is not None:
-            cls.__eq__ = eq  # type: ignore[assignment]
-
-    return unpatch
-
-
-class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase):
-    STORE_TYPE = EventsWorkerStore
-
-    def setUp(self) -> None:
-        # Patch up the equality operator for events so that we can check
-        # whether lists of events match using assertEqual
-        self.unpatches = [patch__eq__(_EventInternalMetadata), patch__eq__(EventBase)]
-        super().setUp()
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        super().prepare(reactor, clock, hs)
-
-        self.get_success(
-            self.master_store.store_room(
-                ROOM_ID,
-                USER_ID,
-                is_public=False,
-                room_version=RoomVersions.V1,
-            )
-        )
-
-    def tearDown(self) -> None:
-        [unpatch() for unpatch in self.unpatches]
-
-    def test_get_latest_event_ids_in_room(self) -> None:
-        create = self.persist(type="m.room.create", key="", creator=USER_ID)
-        self.replicate()
-        self.check("get_latest_event_ids_in_room", (ROOM_ID,), [create.event_id])
-
-        join = self.persist(
-            type="m.room.member",
-            key=USER_ID,
-            membership="join",
-            prev_events=[(create.event_id, {})],
-        )
-        self.replicate()
-        self.check("get_latest_event_ids_in_room", (ROOM_ID,), [join.event_id])
-
-    def test_redactions(self) -> None:
-        self.persist(type="m.room.create", key="", creator=USER_ID)
-        self.persist(type="m.room.member", key=USER_ID, membership="join")
-
-        msg = self.persist(type="m.room.message", msgtype="m.text", body="Hello")
-        self.replicate()
-        self.check("get_event", [msg.event_id], msg)
-
-        redaction = self.persist(type="m.room.redaction", redacts=msg.event_id)
-        self.replicate()
-
-        msg_dict = msg.get_dict()
-        msg_dict["content"] = {}
-        msg_dict["unsigned"]["redacted_by"] = redaction.event_id
-        msg_dict["unsigned"]["redacted_because"] = redaction
-        redacted = make_event_from_dict(
-            msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict()
-        )
-        self.check("get_event", [msg.event_id], redacted)
-
-    def test_backfilled_redactions(self) -> None:
-        self.persist(type="m.room.create", key="", creator=USER_ID)
-        self.persist(type="m.room.member", key=USER_ID, membership="join")
-
-        msg = self.persist(type="m.room.message", msgtype="m.text", body="Hello")
-        self.replicate()
-        self.check("get_event", [msg.event_id], msg)
-
-        redaction = self.persist(
-            type="m.room.redaction", redacts=msg.event_id, backfill=True
-        )
-        self.replicate()
-
-        msg_dict = msg.get_dict()
-        msg_dict["content"] = {}
-        msg_dict["unsigned"]["redacted_by"] = redaction.event_id
-        msg_dict["unsigned"]["redacted_because"] = redaction
-        redacted = make_event_from_dict(
-            msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict()
-        )
-        self.check("get_event", [msg.event_id], redacted)
-
-    def test_invites(self) -> None:
-        self.persist(type="m.room.create", key="", creator=USER_ID)
-        self.check("get_invited_rooms_for_local_user", [USER_ID_2], [])
-        event = self.persist(type="m.room.member", key=USER_ID_2, membership="invite")
-        assert event.internal_metadata.stream_ordering is not None
-
-        self.replicate()
-
-        self.check(
-            "get_invited_rooms_for_local_user",
-            [USER_ID_2],
-            [
-                RoomsForUser(
-                    ROOM_ID,
-                    USER_ID,
-                    "invite",
-                    event.event_id,
-                    event.internal_metadata.stream_ordering,
-                    RoomVersions.V1.identifier,
-                )
-            ],
-        )
-
-    @parameterized.expand([(True,), (False,)])
-    def test_push_actions_for_user(self, send_receipt: bool) -> None:
-        self.persist(type="m.room.create", key="", creator=USER_ID)
-        self.persist(type="m.room.member", key=USER_ID, membership="join")
-        self.persist(
-            type="m.room.member", sender=USER_ID, key=USER_ID_2, membership="join"
-        )
-        event1 = self.persist(type="m.room.message", msgtype="m.text", body="hello")
-        self.replicate()
-
-        if send_receipt:
-            self.get_success(
-                self.master_store.insert_receipt(
-                    ROOM_ID, ReceiptTypes.READ, USER_ID_2, [event1.event_id], None, {}
-                )
-            )
-
-        self.check(
-            "get_unread_event_push_actions_by_room_for_user",
-            [ROOM_ID, USER_ID_2],
-            RoomNotifCounts(
-                NotifCounts(highlight_count=0, unread_count=0, notify_count=0), {}
-            ),
-        )
-
-        self.persist(
-            type="m.room.message",
-            msgtype="m.text",
-            body="world",
-            push_actions=[(USER_ID_2, ["notify"])],
-        )
-        self.replicate()
-        self.check(
-            "get_unread_event_push_actions_by_room_for_user",
-            [ROOM_ID, USER_ID_2],
-            RoomNotifCounts(
-                NotifCounts(highlight_count=0, unread_count=0, notify_count=1), {}
-            ),
-        )
-
-        self.persist(
-            type="m.room.message",
-            msgtype="m.text",
-            body="world",
-            push_actions=[
-                (USER_ID_2, ["notify", {"set_tweak": "highlight", "value": True}])
-            ],
-        )
-        self.replicate()
-        self.check(
-            "get_unread_event_push_actions_by_room_for_user",
-            [ROOM_ID, USER_ID_2],
-            RoomNotifCounts(
-                NotifCounts(highlight_count=1, unread_count=0, notify_count=2), {}
-            ),
-        )
-
-    def test_get_rooms_for_user_with_stream_ordering(self) -> None:
-        """Check that the cache on get_rooms_for_user_with_stream_ordering is invalidated
-        by rows in the events stream
-        """
-        self.persist(type="m.room.create", key="", creator=USER_ID)
-        self.persist(type="m.room.member", key=USER_ID, membership="join")
-        self.replicate()
-        self.check("get_rooms_for_user_with_stream_ordering", (USER_ID_2,), set())
-
-        j2 = self.persist(
-            type="m.room.member", sender=USER_ID_2, key=USER_ID_2, membership="join"
-        )
-        assert j2.internal_metadata.stream_ordering is not None
-        self.replicate()
-
-        expected_pos = PersistedEventPosition(
-            "master", j2.internal_metadata.stream_ordering
-        )
-        self.check(
-            "get_rooms_for_user_with_stream_ordering",
-            (USER_ID_2,),
-            {GetRoomsForUserWithStreamOrdering(ROOM_ID, expected_pos)},
-        )
-
-    def test_get_rooms_for_user_with_stream_ordering_with_multi_event_persist(
-        self,
-    ) -> None:
-        """Check that current_state invalidation happens correctly with multiple events
-        in the persistence batch.
-
-        This test attempts to reproduce a race condition between the event persistence
-        loop and a worker-based Sync handler.
-
-        The problem occurred when the master persisted several events in one batch. It
-        only updates the current_state at the end of each batch, so the obvious thing
-        to do is then to issue a current_state_delta stream update corresponding to the
-        last stream_id in the batch.
-
-        However, that raises the possibility that a worker will see the replication
-        notification for a join event before the current_state caches are invalidated.
-
-        The test involves:
-         * creating a join and a message event for a user, and persisting them in the
-           same batch
-
-         * controlling the replication stream so that updates are sent gradually
-
-         * between each bunch of replication updates, check that we see a consistent
-           snapshot of the state.
-        """
-        self.persist(type="m.room.create", key="", creator=USER_ID)
-        self.persist(type="m.room.member", key=USER_ID, membership="join")
-        self.replicate()
-        self.check("get_rooms_for_user_with_stream_ordering", (USER_ID_2,), set())
-
-        # limit the replication rate
-        repl_transport = self._server_transport
-        assert isinstance(repl_transport, FakeTransport)
-        repl_transport.autoflush = False
-
-        # build the join and message events and persist them in the same batch.
-        logger.info("----- build test events ------")
-        j2, j2ctx = self.build_event(
-            type="m.room.member", sender=USER_ID_2, key=USER_ID_2, membership="join"
-        )
-        msg, msgctx = self.build_event()
-        self.get_success(self.persistance.persist_events([(j2, j2ctx), (msg, msgctx)]))
-        self.replicate()
-        assert j2.internal_metadata.stream_ordering is not None
-
-        event_source = RoomEventSource(self.hs)
-        event_source.store = self.slaved_store
-        current_token = event_source.get_current_key()
-
-        # gradually stream out the replication
-        while repl_transport.buffer:
-            logger.info("------ flush ------")
-            repl_transport.flush(30)
-            self.pump(0)
-
-            prev_token = current_token
-            current_token = event_source.get_current_key()
-
-            # attempt to replicate the behaviour of the sync handler.
-            #
-            # First, we get a list of the rooms we are joined to
-            joined_rooms = self.get_success(
-                self.slaved_store.get_rooms_for_user_with_stream_ordering(USER_ID_2)
-            )
-
-            # Then, we get a list of the events since the last sync
-            membership_changes = self.get_success(
-                self.slaved_store.get_membership_changes_for_user(
-                    USER_ID_2, prev_token, current_token
-                )
-            )
-
-            logger.info(
-                "%s->%s: joined_rooms=%r membership_changes=%r",
-                prev_token,
-                current_token,
-                joined_rooms,
-                membership_changes,
-            )
-
-            # the membership change is only any use to us if the room is in the
-            # joined_rooms list.
-            if membership_changes:
-                expected_pos = PersistedEventPosition(
-                    "master", j2.internal_metadata.stream_ordering
-                )
-                self.assertEqual(
-                    joined_rooms,
-                    {GetRoomsForUserWithStreamOrdering(ROOM_ID, expected_pos)},
-                )
-
-    event_id = 0
-
-    def persist(self, backfill: bool = False, **kwargs: Any) -> EventBase:
-        """
-        Returns:
-            The event that was persisted.
-        """
-        event, context = self.build_event(**kwargs)
-
-        if backfill:
-            self.get_success(
-                self.persistance.persist_events([(event, context)], backfilled=True)
-            )
-        else:
-            self.get_success(self.persistance.persist_event(event, context))
-
-        return event
-
-    def build_event(
-        self,
-        sender: str = USER_ID,
-        room_id: str = ROOM_ID,
-        type: str = "m.room.message",
-        key: Optional[str] = None,
-        internal: Optional[dict] = None,
-        depth: Optional[int] = None,
-        prev_events: Optional[List[Tuple[str, dict]]] = None,
-        auth_events: Optional[List[str]] = None,
-        prev_state: Optional[List[str]] = None,
-        redacts: Optional[str] = None,
-        push_actions: Iterable = frozenset(),
-        **content: object,
-    ) -> Tuple[EventBase, EventContext]:
-        prev_events = prev_events or []
-        auth_events = auth_events or []
-        prev_state = prev_state or []
-
-        if depth is None:
-            depth = self.event_id
-
-        if not prev_events:
-            latest_event_ids = self.get_success(
-                self.master_store.get_latest_event_ids_in_room(room_id)
-            )
-            prev_events = [(ev_id, {}) for ev_id in latest_event_ids]
-
-        event_dict = {
-            "sender": sender,
-            "type": type,
-            "content": content,
-            "event_id": "$%d:blue" % (self.event_id,),
-            "room_id": room_id,
-            "depth": depth,
-            "origin_server_ts": self.event_id,
-            "prev_events": prev_events,
-            "auth_events": auth_events,
-        }
-        if key is not None:
-            event_dict["state_key"] = key
-            event_dict["prev_state"] = prev_state
-
-        if redacts is not None:
-            event_dict["redacts"] = redacts
-
-        event = make_event_from_dict(event_dict, internal_metadata_dict=internal or {})
-
-        self.event_id += 1
-        state_handler = self.hs.get_state_handler()
-        context = self.get_success(state_handler.compute_event_context(event))
-
-        self.get_success(
-            self.master_store.add_push_actions_to_staging(
-                event.event_id,
-                dict(push_actions),
-                False,
-                "main",
-            )
-        )
-        return event, context
diff --git a/tests/replication/storage/__init__.py b/tests/replication/storage/__init__.py
new file mode 100644
index 0000000000..f43a360a80
--- /dev/null
+++ b/tests/replication/storage/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/replication/storage/_base.py b/tests/replication/storage/_base.py
new file mode 100644
index 0000000000..de26a62ae1
--- /dev/null
+++ b/tests/replication/storage/_base.py
@@ -0,0 +1,72 @@
+# Copyright 2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Iterable, Optional
+from unittest.mock import Mock
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests.replication._base import BaseStreamTestCase
+
+
+class BaseWorkerStoreTestCase(BaseStreamTestCase):
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        return self.setup_test_homeserver(federation_client=Mock())
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        super().prepare(reactor, clock, hs)
+
+        self.reconnect()
+
+        self.master_store = hs.get_datastores().main
+        self.worker_store = self.worker_hs.get_datastores().main
+        persistence = hs.get_storage_controllers().persistence
+        assert persistence is not None
+        self.persistance = persistence
+
+    def replicate(self) -> None:
+        """Tell the master side of replication that something has happened, and then
+        wait for the replication to occur.
+        """
+        self.streamer.on_notifier_poke()
+        self.pump(0.1)
+
+    def check(
+        self, method: str, args: Iterable[Any], expected_result: Optional[Any] = None
+    ) -> None:
+        master_result = self.get_success(getattr(self.master_store, method)(*args))
+        worker_result = self.get_success(getattr(self.worker_store, method)(*args))
+        if expected_result is not None:
+            self.assertEqual(
+                master_result,
+                expected_result,
+                "Expected master result to be %r but was %r"
+                % (expected_result, master_result),
+            )
+            self.assertEqual(
+                worker_result,
+                expected_result,
+                "Expected worker result to be %r but was %r"
+                % (expected_result, worker_result),
+            )
+        self.assertEqual(
+            master_result,
+            worker_result,
+            "Worker result %r does not match master result %r"
+            % (worker_result, master_result),
+        )
diff --git a/tests/replication/storage/test_events.py b/tests/replication/storage/test_events.py
new file mode 100644
index 0000000000..f7c6417a09
--- /dev/null
+++ b/tests/replication/storage/test_events.py
@@ -0,0 +1,420 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import Any, Callable, Iterable, List, Optional, Tuple
+
+from canonicaljson import encode_canonical_json
+from parameterized import parameterized
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.api.constants import ReceiptTypes
+from synapse.api.room_versions import RoomVersions
+from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict
+from synapse.events.snapshot import EventContext
+from synapse.handlers.room import RoomEventSource
+from synapse.server import HomeServer
+from synapse.storage.databases.main.event_push_actions import (
+    NotifCounts,
+    RoomNotifCounts,
+)
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.roommember import GetRoomsForUserWithStreamOrdering, RoomsForUser
+from synapse.types import PersistedEventPosition
+from synapse.util import Clock
+
+from tests.server import FakeTransport
+
+from ._base import BaseWorkerStoreTestCase
+
+USER_ID = "@feeling:test"
+USER_ID_2 = "@bright:test"
+OUTLIER = {"outlier": True}
+ROOM_ID = "!room:test"
+
+logger = logging.getLogger(__name__)
+
+
+def dict_equals(self: EventBase, other: EventBase) -> bool:
+    me = encode_canonical_json(self.get_pdu_json())
+    them = encode_canonical_json(other.get_pdu_json())
+    return me == them
+
+
+def patch__eq__(cls: object) -> Callable[[], None]:
+    eq = getattr(cls, "__eq__", None)
+    cls.__eq__ = dict_equals  # type: ignore[assignment]
+
+    def unpatch() -> None:
+        if eq is not None:
+            cls.__eq__ = eq  # type: ignore[assignment]
+
+    return unpatch
+
+
+class EventsWorkerStoreTestCase(BaseWorkerStoreTestCase):
+    STORE_TYPE = EventsWorkerStore
+
+    def setUp(self) -> None:
+        # Patch up the equality operator for events so that we can check
+        # whether lists of events match using assertEqual
+        self.unpatches = [patch__eq__(_EventInternalMetadata), patch__eq__(EventBase)]
+        super().setUp()
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        super().prepare(reactor, clock, hs)
+
+        self.get_success(
+            self.master_store.store_room(
+                ROOM_ID,
+                USER_ID,
+                is_public=False,
+                room_version=RoomVersions.V1,
+            )
+        )
+
+    def tearDown(self) -> None:
+        [unpatch() for unpatch in self.unpatches]
+
+    def test_get_latest_event_ids_in_room(self) -> None:
+        create = self.persist(type="m.room.create", key="", creator=USER_ID)
+        self.replicate()
+        self.check("get_latest_event_ids_in_room", (ROOM_ID,), [create.event_id])
+
+        join = self.persist(
+            type="m.room.member",
+            key=USER_ID,
+            membership="join",
+            prev_events=[(create.event_id, {})],
+        )
+        self.replicate()
+        self.check("get_latest_event_ids_in_room", (ROOM_ID,), [join.event_id])
+
+    def test_redactions(self) -> None:
+        self.persist(type="m.room.create", key="", creator=USER_ID)
+        self.persist(type="m.room.member", key=USER_ID, membership="join")
+
+        msg = self.persist(type="m.room.message", msgtype="m.text", body="Hello")
+        self.replicate()
+        self.check("get_event", [msg.event_id], msg)
+
+        redaction = self.persist(type="m.room.redaction", redacts=msg.event_id)
+        self.replicate()
+
+        msg_dict = msg.get_dict()
+        msg_dict["content"] = {}
+        msg_dict["unsigned"]["redacted_by"] = redaction.event_id
+        msg_dict["unsigned"]["redacted_because"] = redaction
+        redacted = make_event_from_dict(
+            msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict()
+        )
+        self.check("get_event", [msg.event_id], redacted)
+
+    def test_backfilled_redactions(self) -> None:
+        self.persist(type="m.room.create", key="", creator=USER_ID)
+        self.persist(type="m.room.member", key=USER_ID, membership="join")
+
+        msg = self.persist(type="m.room.message", msgtype="m.text", body="Hello")
+        self.replicate()
+        self.check("get_event", [msg.event_id], msg)
+
+        redaction = self.persist(
+            type="m.room.redaction", redacts=msg.event_id, backfill=True
+        )
+        self.replicate()
+
+        msg_dict = msg.get_dict()
+        msg_dict["content"] = {}
+        msg_dict["unsigned"]["redacted_by"] = redaction.event_id
+        msg_dict["unsigned"]["redacted_because"] = redaction
+        redacted = make_event_from_dict(
+            msg_dict, internal_metadata_dict=msg.internal_metadata.get_dict()
+        )
+        self.check("get_event", [msg.event_id], redacted)
+
+    def test_invites(self) -> None:
+        self.persist(type="m.room.create", key="", creator=USER_ID)
+        self.check("get_invited_rooms_for_local_user", [USER_ID_2], [])
+        event = self.persist(type="m.room.member", key=USER_ID_2, membership="invite")
+        assert event.internal_metadata.stream_ordering is not None
+
+        self.replicate()
+
+        self.check(
+            "get_invited_rooms_for_local_user",
+            [USER_ID_2],
+            [
+                RoomsForUser(
+                    ROOM_ID,
+                    USER_ID,
+                    "invite",
+                    event.event_id,
+                    event.internal_metadata.stream_ordering,
+                    RoomVersions.V1.identifier,
+                )
+            ],
+        )
+
+    @parameterized.expand([(True,), (False,)])
+    def test_push_actions_for_user(self, send_receipt: bool) -> None:
+        self.persist(type="m.room.create", key="", creator=USER_ID)
+        self.persist(type="m.room.member", key=USER_ID, membership="join")
+        self.persist(
+            type="m.room.member", sender=USER_ID, key=USER_ID_2, membership="join"
+        )
+        event1 = self.persist(type="m.room.message", msgtype="m.text", body="hello")
+        self.replicate()
+
+        if send_receipt:
+            self.get_success(
+                self.master_store.insert_receipt(
+                    ROOM_ID, ReceiptTypes.READ, USER_ID_2, [event1.event_id], None, {}
+                )
+            )
+
+        self.check(
+            "get_unread_event_push_actions_by_room_for_user",
+            [ROOM_ID, USER_ID_2],
+            RoomNotifCounts(
+                NotifCounts(highlight_count=0, unread_count=0, notify_count=0), {}
+            ),
+        )
+
+        self.persist(
+            type="m.room.message",
+            msgtype="m.text",
+            body="world",
+            push_actions=[(USER_ID_2, ["notify"])],
+        )
+        self.replicate()
+        self.check(
+            "get_unread_event_push_actions_by_room_for_user",
+            [ROOM_ID, USER_ID_2],
+            RoomNotifCounts(
+                NotifCounts(highlight_count=0, unread_count=0, notify_count=1), {}
+            ),
+        )
+
+        self.persist(
+            type="m.room.message",
+            msgtype="m.text",
+            body="world",
+            push_actions=[
+                (USER_ID_2, ["notify", {"set_tweak": "highlight", "value": True}])
+            ],
+        )
+        self.replicate()
+        self.check(
+            "get_unread_event_push_actions_by_room_for_user",
+            [ROOM_ID, USER_ID_2],
+            RoomNotifCounts(
+                NotifCounts(highlight_count=1, unread_count=0, notify_count=2), {}
+            ),
+        )
+
+    def test_get_rooms_for_user_with_stream_ordering(self) -> None:
+        """Check that the cache on get_rooms_for_user_with_stream_ordering is invalidated
+        by rows in the events stream
+        """
+        self.persist(type="m.room.create", key="", creator=USER_ID)
+        self.persist(type="m.room.member", key=USER_ID, membership="join")
+        self.replicate()
+        self.check("get_rooms_for_user_with_stream_ordering", (USER_ID_2,), set())
+
+        j2 = self.persist(
+            type="m.room.member", sender=USER_ID_2, key=USER_ID_2, membership="join"
+        )
+        assert j2.internal_metadata.stream_ordering is not None
+        self.replicate()
+
+        expected_pos = PersistedEventPosition(
+            "master", j2.internal_metadata.stream_ordering
+        )
+        self.check(
+            "get_rooms_for_user_with_stream_ordering",
+            (USER_ID_2,),
+            {GetRoomsForUserWithStreamOrdering(ROOM_ID, expected_pos)},
+        )
+
+    def test_get_rooms_for_user_with_stream_ordering_with_multi_event_persist(
+        self,
+    ) -> None:
+        """Check that current_state invalidation happens correctly with multiple events
+        in the persistence batch.
+
+        This test attempts to reproduce a race condition between the event persistence
+        loop and a worker-based Sync handler.
+
+        The problem occurred when the master persisted several events in one batch. It
+        only updates the current_state at the end of each batch, so the obvious thing
+        to do is then to issue a current_state_delta stream update corresponding to the
+        last stream_id in the batch.
+
+        However, that raises the possibility that a worker will see the replication
+        notification for a join event before the current_state caches are invalidated.
+
+        The test involves:
+         * creating a join and a message event for a user, and persisting them in the
+           same batch
+
+         * controlling the replication stream so that updates are sent gradually
+
+         * between each bunch of replication updates, check that we see a consistent
+           snapshot of the state.
+        """
+        self.persist(type="m.room.create", key="", creator=USER_ID)
+        self.persist(type="m.room.member", key=USER_ID, membership="join")
+        self.replicate()
+        self.check("get_rooms_for_user_with_stream_ordering", (USER_ID_2,), set())
+
+        # limit the replication rate
+        repl_transport = self._server_transport
+        assert isinstance(repl_transport, FakeTransport)
+        repl_transport.autoflush = False
+
+        # build the join and message events and persist them in the same batch.
+        logger.info("----- build test events ------")
+        j2, j2ctx = self.build_event(
+            type="m.room.member", sender=USER_ID_2, key=USER_ID_2, membership="join"
+        )
+        msg, msgctx = self.build_event()
+        self.get_success(self.persistance.persist_events([(j2, j2ctx), (msg, msgctx)]))
+        self.replicate()
+        assert j2.internal_metadata.stream_ordering is not None
+
+        event_source = RoomEventSource(self.hs)
+        event_source.store = self.worker_store
+        current_token = event_source.get_current_key()
+
+        # gradually stream out the replication
+        while repl_transport.buffer:
+            logger.info("------ flush ------")
+            repl_transport.flush(30)
+            self.pump(0)
+
+            prev_token = current_token
+            current_token = event_source.get_current_key()
+
+            # attempt to replicate the behaviour of the sync handler.
+            #
+            # First, we get a list of the rooms we are joined to
+            joined_rooms = self.get_success(
+                self.worker_store.get_rooms_for_user_with_stream_ordering(USER_ID_2)
+            )
+
+            # Then, we get a list of the events since the last sync
+            membership_changes = self.get_success(
+                self.worker_store.get_membership_changes_for_user(
+                    USER_ID_2, prev_token, current_token
+                )
+            )
+
+            logger.info(
+                "%s->%s: joined_rooms=%r membership_changes=%r",
+                prev_token,
+                current_token,
+                joined_rooms,
+                membership_changes,
+            )
+
+            # the membership change is only any use to us if the room is in the
+            # joined_rooms list.
+            if membership_changes:
+                expected_pos = PersistedEventPosition(
+                    "master", j2.internal_metadata.stream_ordering
+                )
+                self.assertEqual(
+                    joined_rooms,
+                    {GetRoomsForUserWithStreamOrdering(ROOM_ID, expected_pos)},
+                )
+
+    event_id = 0
+
+    def persist(self, backfill: bool = False, **kwargs: Any) -> EventBase:
+        """
+        Returns:
+            The event that was persisted.
+        """
+        event, context = self.build_event(**kwargs)
+
+        if backfill:
+            self.get_success(
+                self.persistance.persist_events([(event, context)], backfilled=True)
+            )
+        else:
+            self.get_success(self.persistance.persist_event(event, context))
+
+        return event
+
+    def build_event(
+        self,
+        sender: str = USER_ID,
+        room_id: str = ROOM_ID,
+        type: str = "m.room.message",
+        key: Optional[str] = None,
+        internal: Optional[dict] = None,
+        depth: Optional[int] = None,
+        prev_events: Optional[List[Tuple[str, dict]]] = None,
+        auth_events: Optional[List[str]] = None,
+        prev_state: Optional[List[str]] = None,
+        redacts: Optional[str] = None,
+        push_actions: Iterable = frozenset(),
+        **content: object,
+    ) -> Tuple[EventBase, EventContext]:
+        prev_events = prev_events or []
+        auth_events = auth_events or []
+        prev_state = prev_state or []
+
+        if depth is None:
+            depth = self.event_id
+
+        if not prev_events:
+            latest_event_ids = self.get_success(
+                self.master_store.get_latest_event_ids_in_room(room_id)
+            )
+            prev_events = [(ev_id, {}) for ev_id in latest_event_ids]
+
+        event_dict = {
+            "sender": sender,
+            "type": type,
+            "content": content,
+            "event_id": "$%d:blue" % (self.event_id,),
+            "room_id": room_id,
+            "depth": depth,
+            "origin_server_ts": self.event_id,
+            "prev_events": prev_events,
+            "auth_events": auth_events,
+        }
+        if key is not None:
+            event_dict["state_key"] = key
+            event_dict["prev_state"] = prev_state
+
+        if redacts is not None:
+            event_dict["redacts"] = redacts
+
+        event = make_event_from_dict(event_dict, internal_metadata_dict=internal or {})
+
+        self.event_id += 1
+        state_handler = self.hs.get_state_handler()
+        context = self.get_success(state_handler.compute_event_context(event))
+
+        self.get_success(
+            self.master_store.add_push_actions_to_staging(
+                event.event_id,
+                dict(push_actions),
+                False,
+                "main",
+            )
+        )
+        return event, context
-- 
cgit 1.5.1


From 4ee82c0576baed6358e3818e8c22e01bde6afd02 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 16 May 2023 16:25:01 -0400
Subject: Apply url_preview_url_blacklist to oEmbed and pre-cached images
 (#15601)

There are two situations which were previously not properly checked:

1. If the requested URL was replaced with an oEmbed URL, then the
   oEmbed URL was not checked against url_preview_url_blacklist.
2. Follow-up URLs (either via autodiscovery of oEmbed or to pre-cache
   images) were not checked against url_preview_url_blacklist.
---
 changelog.d/15601.bugfix             |   1 +
 synapse/media/url_previewer.py       | 121 +++++++++++++---------
 tests/media/test_url_previewer.py    | 113 ++++++++++++++++++++
 tests/rest/media/test_url_preview.py | 194 ++++++++++++++++++++++++++++++++++-
 4 files changed, 379 insertions(+), 50 deletions(-)
 create mode 100644 changelog.d/15601.bugfix
 create mode 100644 tests/media/test_url_previewer.py

(limited to 'synapse')

diff --git a/changelog.d/15601.bugfix b/changelog.d/15601.bugfix
new file mode 100644
index 0000000000..426db6cea3
--- /dev/null
+++ b/changelog.d/15601.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the `url_preview_url_blacklist` configuration setting was not applied to oEmbed or image URLs found while previewing a URL.
diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py
index c8a4a809f1..dbdb1fd20e 100644
--- a/synapse/media/url_previewer.py
+++ b/synapse/media/url_previewer.py
@@ -113,7 +113,7 @@ class UrlPreviewer:
        1. Checks URL and timestamp against the database cache and returns the result if it
           has not expired and was successful (a 2xx return code).
        2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
-          does, update the URL to download.
+          does and the new URL is not blocked, update the URL to download.
        3. Downloads the URL and stores it into a file via the media storage provider
           and saves the local media metadata.
        4. If the media is an image:
@@ -127,14 +127,14 @@ class UrlPreviewer:
                 and saves the local media metadata.
              2. Convert the oEmbed response to an Open Graph response.
              3. Override any Open Graph data from the HTML with data from oEmbed.
-          4. If an image exists in the Open Graph response:
+          4. If an image URL exists in the Open Graph response:
              1. Downloads the URL and stores it into a file via the media storage
                 provider and saves the local media metadata.
              2. Generates thumbnails.
              3. Updates the Open Graph response based on image properties.
-       6. If the media is JSON and an oEmbed URL was found:
+       6. If an oEmbed URL was found and the media is JSON:
           1. Convert the oEmbed response to an Open Graph response.
-          2. If a thumbnail or image is in the oEmbed response:
+          2. If an image URL is in the oEmbed response:
              1. Downloads the URL and stores it into a file via the media storage
                 provider and saves the local media metadata.
              2. Generates thumbnails.
@@ -144,7 +144,8 @@ class UrlPreviewer:
 
     If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
     image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
-    does not fail. As much information as possible is returned.
+    does not fail. If any of them are blocked, then those additional requests
+    are skipped. As much information as possible is returned.
 
     The in-memory cache expires after 1 hour.
 
@@ -203,48 +204,14 @@ class UrlPreviewer:
             )
 
     async def preview(self, url: str, user: UserID, ts: int) -> bytes:
-        # XXX: we could move this into _do_preview if we wanted.
-        url_tuple = urlsplit(url)
-        for entry in self.url_preview_url_blacklist:
-            match = True
-            for attrib in entry:
-                pattern = entry[attrib]
-                value = getattr(url_tuple, attrib)
-                logger.debug(
-                    "Matching attrib '%s' with value '%s' against pattern '%s'",
-                    attrib,
-                    value,
-                    pattern,
-                )
-
-                if value is None:
-                    match = False
-                    continue
-
-                # Some attributes might not be parsed as strings by urlsplit (such as the
-                # port, which is parsed as an int). Because we use match functions that
-                # expect strings, we want to make sure that's what we give them.
-                value_str = str(value)
-
-                if pattern.startswith("^"):
-                    if not re.match(pattern, value_str):
-                        match = False
-                        continue
-                else:
-                    if not fnmatch.fnmatch(value_str, pattern):
-                        match = False
-                        continue
-            if match:
-                logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
-                raise SynapseError(
-                    403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
-                )
-
         # the in-memory cache:
-        # * ensures that only one request is active at a time
+        # * ensures that only one request to a URL is active at a time
         # * takes load off the DB for the thundering herds
         # * also caches any failures (unlike the DB) so we don't keep
-        #    requesting the same endpoint
+        #   requesting the same endpoint
+        #
+        # Note that autodiscovered oEmbed URLs and pre-caching of images
+        # are not captured in the in-memory cache.
 
         observable = self._cache.get(url)
 
@@ -283,7 +250,7 @@ class UrlPreviewer:
                 og = og.encode("utf8")
             return og
 
-        # If this URL can be accessed via oEmbed, use that instead.
+        # If this URL can be accessed via an allowed oEmbed, use that instead.
         url_to_download = url
         oembed_url = self._oembed.get_oembed_url(url)
         if oembed_url:
@@ -329,6 +296,7 @@ class UrlPreviewer:
                 # defer to that.
                 oembed_url = self._oembed.autodiscover_from_html(tree)
                 og_from_oembed: JsonDict = {}
+                # Only download to the oEmbed URL if it is allowed.
                 if oembed_url:
                     try:
                         oembed_info = await self._handle_url(
@@ -411,6 +379,59 @@ class UrlPreviewer:
 
         return jsonog.encode("utf8")
 
+    def _is_url_blocked(self, url: str) -> bool:
+        """
+        Check whether the URL is allowed to be previewed (according to the homeserver
+        configuration).
+
+        Args:
+            url: The requested URL.
+
+        Return:
+            True if the URL is blocked, False if it is allowed.
+        """
+        url_tuple = urlsplit(url)
+        for entry in self.url_preview_url_blacklist:
+            match = True
+            # Iterate over each entry. If *all* attributes of that entry match
+            # the current URL, then reject it.
+            for attrib, pattern in entry.items():
+                value = getattr(url_tuple, attrib)
+                logger.debug(
+                    "Matching attrib '%s' with value '%s' against pattern '%s'",
+                    attrib,
+                    value,
+                    pattern,
+                )
+
+                if value is None:
+                    match = False
+                    break
+
+                # Some attributes might not be parsed as strings by urlsplit (such as the
+                # port, which is parsed as an int). Because we use match functions that
+                # expect strings, we want to make sure that's what we give them.
+                value_str = str(value)
+
+                # Check the value against the pattern as either a regular expression or
+                # a glob. If it doesn't match, the entry doesn't match.
+                if pattern.startswith("^"):
+                    if not re.match(pattern, value_str):
+                        match = False
+                        break
+                else:
+                    if not fnmatch.fnmatch(value_str, pattern):
+                        match = False
+                        break
+
+            # All fields matched, return true (the URL is blocked).
+            if match:
+                logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
+                return match
+
+        # No matches were found, the URL is allowed.
+        return False
+
     async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
         """
         Fetches a remote URL and parses the headers.
@@ -547,8 +568,16 @@ class UrlPreviewer:
 
         Returns:
             A MediaInfo object describing the fetched content.
+
+        Raises:
+            SynapseError if the URL is blocked.
         """
 
+        if self._is_url_blocked(url):
+            raise SynapseError(
+                403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
+            )
+
         # TODO: we should probably honour robots.txt... except in practice
         # we're most likely being explicitly triggered by a human rather than a
         # bot, so are we really a robot?
@@ -624,7 +653,7 @@ class UrlPreviewer:
             return
 
         # The image URL from the HTML might be relative to the previewed page,
-        # convert it to an URL which can be requested directly.
+        # convert it to a URL which can be requested directly.
         url_parts = urlparse(image_url)
         if url_parts.scheme != "data":
             image_url = urljoin(media_info.uri, image_url)
diff --git a/tests/media/test_url_previewer.py b/tests/media/test_url_previewer.py
new file mode 100644
index 0000000000..3c4c7d6765
--- /dev/null
+++ b/tests/media/test_url_previewer.py
@@ -0,0 +1,113 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.unittest import override_config
+
+try:
+    import lxml
+except ImportError:
+    lxml = None
+
+
+class URLPreviewTests(unittest.HomeserverTestCase):
+    if not lxml:
+        skip = "url preview feature requires lxml"
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        config = self.default_config()
+        config["url_preview_enabled"] = True
+        config["max_spider_size"] = 9999999
+        config["url_preview_ip_range_blacklist"] = (
+            "192.168.1.1",
+            "1.0.0.0/8",
+            "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
+            "2001:800::/21",
+        )
+
+        self.storage_path = self.mktemp()
+        self.media_store_path = self.mktemp()
+        os.mkdir(self.storage_path)
+        os.mkdir(self.media_store_path)
+        config["media_store_path"] = self.media_store_path
+
+        provider_config = {
+            "module": "synapse.media.storage_provider.FileStorageProviderBackend",
+            "store_local": True,
+            "store_synchronous": False,
+            "store_remote": True,
+            "config": {"directory": self.storage_path},
+        }
+
+        config["media_storage_providers"] = [provider_config]
+
+        return self.setup_test_homeserver(config=config)
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        media_repo_resource = hs.get_media_repository_resource()
+        preview_url = media_repo_resource.children[b"preview_url"]
+        self.url_previewer = preview_url._url_previewer
+
+    def test_all_urls_allowed(self) -> None:
+        self.assertFalse(self.url_previewer._is_url_blocked("http://matrix.org"))
+        self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org"))
+        self.assertFalse(self.url_previewer._is_url_blocked("http://localhost:8000"))
+        self.assertFalse(
+            self.url_previewer._is_url_blocked("http://user:pass@matrix.org")
+        )
+
+    @override_config(
+        {
+            "url_preview_url_blacklist": [
+                {"username": "user"},
+                {"scheme": "http", "netloc": "matrix.org"},
+            ]
+        }
+    )
+    def test_blocked_url(self) -> None:
+        # Blocked via scheme and URL.
+        self.assertTrue(self.url_previewer._is_url_blocked("http://matrix.org"))
+        # Not blocked because all components must match.
+        self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org"))
+
+        # Blocked due to the user.
+        self.assertTrue(
+            self.url_previewer._is_url_blocked("http://user:pass@example.com")
+        )
+        self.assertTrue(self.url_previewer._is_url_blocked("http://user@example.com"))
+
+    @override_config({"url_preview_url_blacklist": [{"netloc": "*.example.com"}]})
+    def test_glob_blocked_url(self) -> None:
+        # All subdomains are blocked.
+        self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com"))
+        self.assertTrue(self.url_previewer._is_url_blocked("http://.example.com"))
+
+        # The TLD is not blocked.
+        self.assertFalse(self.url_previewer._is_url_blocked("https://example.com"))
+
+    @override_config({"url_preview_url_blacklist": [{"netloc": "^.+\\.example\\.com"}]})
+    def test_regex_blocked_urL(self) -> None:
+        # All subdomains are blocked.
+        self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com"))
+        # Requires a non-empty subdomain.
+        self.assertFalse(self.url_previewer._is_url_blocked("http://.example.com"))
+
+        # The TLD is not blocked.
+        self.assertFalse(self.url_previewer._is_url_blocked("https://example.com"))
diff --git a/tests/rest/media/test_url_preview.py b/tests/rest/media/test_url_preview.py
index e44beae8c1..7517155cf3 100644
--- a/tests/rest/media/test_url_preview.py
+++ b/tests/rest/media/test_url_preview.py
@@ -653,6 +653,57 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             server.data,
         )
 
+    def test_image(self) -> None:
+        """An image should be precached if mentioned in the HTML."""
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+        self.lookups["cdn.matrix.org"] = [(IPv4Address, "10.1.2.4")]
+
+        result = (
+            b"""<html><body><img src="http://cdn.matrix.org/foo.png"></body></html>"""
+        )
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        # Respond with the HTML.
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+        self.pump()
+
+        # Respond with the photo.
+        client = self.reactor.tcpClients[1][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b"Content-Type: image/png\r\n\r\n"
+            )
+            % (len(SMALL_PNG),)
+            + SMALL_PNG
+        )
+        self.pump()
+
+        # The image should be in the result.
+        self.assertEqual(channel.code, 200)
+        self._assert_small_png(channel.json_body)
+
     def test_nonexistent_image(self) -> None:
         """If the preview image doesn't exist, ensure some data is returned."""
         self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
@@ -683,9 +734,53 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         )
 
         self.pump()
+
+        # There should not be a second connection.
+        self.assertEqual(len(self.reactor.tcpClients), 1)
+
+        # The image should not be in the result.
         self.assertEqual(channel.code, 200)
+        self.assertNotIn("og:image", channel.json_body)
+
+    @unittest.override_config(
+        {"url_preview_url_blacklist": [{"netloc": "cdn.matrix.org"}]}
+    )
+    def test_image_blocked(self) -> None:
+        """If the preview image doesn't exist, ensure some data is returned."""
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+        self.lookups["cdn.matrix.org"] = [(IPv4Address, "10.1.2.4")]
+
+        result = (
+            b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
+        )
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+        self.pump()
+
+        # There should not be a second connection.
+        self.assertEqual(len(self.reactor.tcpClients), 1)
 
         # The image should not be in the result.
+        self.assertEqual(channel.code, 200)
         self.assertNotIn("og:image", channel.json_body)
 
     def test_oembed_failure(self) -> None:
@@ -880,6 +975,11 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         )
 
         self.pump()
+
+        # Double check that the proper host is being connected to. (Note that
+        # twitter.com can't be resolved so this is already implicitly checked.)
+        self.assertIn(b"\r\nHost: publish.twitter.com\r\n", server.data)
+
         self.assertEqual(channel.code, 200)
         body = channel.json_body
         self.assertEqual(
@@ -940,6 +1040,22 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             },
         )
 
+    @unittest.override_config(
+        {"url_preview_url_blacklist": [{"netloc": "publish.twitter.com"}]}
+    )
+    def test_oembed_blocked(self) -> None:
+        """The oEmbed URL should not be downloaded if the oEmbed URL is blocked."""
+        self.lookups["twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://twitter.com/matrixdotorg/status/12345",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+        self.assertEqual(channel.code, 403, channel.result)
+
     def test_oembed_autodiscovery(self) -> None:
         """
         Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL.
@@ -980,7 +1096,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             % (len(result),)
             + result
         )
-
         self.pump()
 
         # The oEmbed response.
@@ -1004,7 +1119,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             % (len(oembed_content),)
             + oembed_content
         )
-
         self.pump()
 
         # Ensure the URL is what was requested.
@@ -1023,7 +1137,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             % (len(SMALL_PNG),)
             + SMALL_PNG
         )
-
         self.pump()
 
         # Ensure the URL is what was requested.
@@ -1036,6 +1149,59 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         )
         self._assert_small_png(body)
 
+    @unittest.override_config(
+        {"url_preview_url_blacklist": [{"netloc": "publish.twitter.com"}]}
+    )
+    def test_oembed_autodiscovery_blocked(self) -> None:
+        """
+        If the discovered oEmbed URL is blocked, it should be discarded.
+        """
+        # This is a little cheesy in that we use the www subdomain (which isn't the
+        # list of oEmbed patterns) to get "raw" HTML response.
+        self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+        self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.4")]
+
+        result = b"""
+        <title>Test</title>
+        <link rel="alternate" type="application/json+oembed"
+            href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json"
+            title="matrixdotorg" />
+        """
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://www.twitter.com/matrixdotorg/status/12345",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+
+        self.pump()
+
+        # Ensure there's no additional connections.
+        self.assertEqual(len(self.reactor.tcpClients), 1)
+
+        # Ensure the URL is what was requested.
+        self.assertIn(b"\r\nHost: www.twitter.com\r\n", server.data)
+
+        self.assertEqual(channel.code, 200)
+        body = channel.json_body
+        self.assertEqual(body["og:title"], "Test")
+        self.assertNotIn("og:image", body)
+
     def _download_image(self) -> Tuple[str, str]:
         """Downloads an image into the URL cache.
         Returns:
@@ -1192,7 +1358,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             )
 
     @unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]})
-    def test_blacklist_port(self) -> None:
+    def test_blocked_port(self) -> None:
         """Tests that blacklisting URLs with a port makes previewing such URLs
         fail with a 403 error and doesn't impact other previews.
         """
@@ -1230,3 +1396,23 @@ class URLPreviewTests(unittest.HomeserverTestCase):
 
         self.pump()
         self.assertEqual(channel.code, 200)
+
+    @unittest.override_config(
+        {"url_preview_url_blacklist": [{"netloc": "example.com"}]}
+    )
+    def test_blocked_url(self) -> None:
+        """Tests that blacklisting URLs with a host makes previewing such URLs
+        fail with a 403 error.
+        """
+        self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
+
+        bad_url = quote("http://example.com/foo")
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=" + bad_url,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+        self.assertEqual(channel.code, 403, channel.result)
-- 
cgit 1.5.1


From 41b9def9f2c02118796e147f63abf23bc2d7dc04 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 17 May 2023 16:39:06 +0200
Subject: Add a new admin API to create a new device for a user. (#15611)

This allows an external service (e.g. the matrix-authentication-service)
to create devices for users.
---
 changelog.d/15611.feature        |  1 +
 docs/admin_api/user_admin_api.md | 27 +++++++++++++++++++++++++++
 synapse/rest/admin/devices.py    | 29 +++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+)
 create mode 100644 changelog.d/15611.feature

(limited to 'synapse')

diff --git a/changelog.d/15611.feature b/changelog.d/15611.feature
new file mode 100644
index 0000000000..7cfb46fd0a
--- /dev/null
+++ b/changelog.d/15611.feature
@@ -0,0 +1 @@
+Add a new admin API to create a new device for a user.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 6b952ba396..229942b311 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -813,6 +813,33 @@ The following fields are returned in the JSON response body:
 
 - `total` - Total number of user's devices.
 
+### Create a device
+
+Creates a new device for a specific `user_id` and `device_id`. Does nothing if the `device_id` 
+exists already.
+
+The API is:
+
+```
+POST /_synapse/admin/v2/users/<user_id>/devices
+
+{
+  "device_id": "QBUAZIFURK"
+}
+```
+
+An empty JSON dict is returned.
+
+**Parameters**
+
+The following parameters should be set in the URL:
+
+- `user_id` - fully qualified: for example, `@user:server.com`.
+
+The following fields are required in the JSON request body:
+
+- `device_id` - The device ID to create.
+
 ### Delete multiple devices
 Deletes the given devices for a specific `user_id`, and invalidates
 any access token associated with them.
diff --git a/synapse/rest/admin/devices.py b/synapse/rest/admin/devices.py
index 3b2f2d9abb..11ebed9bfd 100644
--- a/synapse/rest/admin/devices.py
+++ b/synapse/rest/admin/devices.py
@@ -137,6 +137,35 @@ class DevicesRestServlet(RestServlet):
         devices = await self.device_handler.get_devices_by_user(target_user.to_string())
         return HTTPStatus.OK, {"devices": devices, "total": len(devices)}
 
+    async def on_POST(
+        self, request: SynapseRequest, user_id: str
+    ) -> Tuple[int, JsonDict]:
+        """Creates a new device for the user."""
+        await assert_requester_is_admin(self.auth, request)
+
+        target_user = UserID.from_string(user_id)
+        if not self.is_mine(target_user):
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST, "Can only create devices for local users"
+            )
+
+        u = await self.store.get_user_by_id(target_user.to_string())
+        if u is None:
+            raise NotFoundError("Unknown user")
+
+        body = parse_json_object_from_request(request)
+        device_id = body.get("device_id")
+        if not device_id:
+            raise SynapseError(HTTPStatus.BAD_REQUEST, "Missing device_id")
+        if not isinstance(device_id, str):
+            raise SynapseError(HTTPStatus.BAD_REQUEST, "device_id must be a string")
+
+        await self.device_handler.check_device_registered(
+            user_id=user_id, device_id=device_id
+        )
+
+        return HTTPStatus.CREATED, {}
+
 
 class DeleteDevicesRestServlet(RestServlet):
     """
-- 
cgit 1.5.1


From e15aa00bc08f68c3a1c1b91f3a59e63554d7aa70 Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 18 May 2023 10:58:13 +0100
Subject: Fix error message when `app_service_config_files` validation fails
 (#15614)

The second argument of `ConfigError` is a path, passed as an optional
`Iterable[str]` and not a `str`. If a string is passed directly,
Synapse unhelpfully emits "Error in configuration at
a.p.p._.s.e.r.v.i.c.e._.c.o.n.f.i.g._.f.i.l.e.s'" when the config
option has the wrong data type.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15614.bugfix     | 1 +
 synapse/config/appservice.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15614.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15614.bugfix b/changelog.d/15614.bugfix
new file mode 100644
index 0000000000..b523ae6eb1
--- /dev/null
+++ b/changelog.d/15614.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.82.0 where the error message displayed when validation of the `app_service_config_files` config option fails would be incorrectly formatted.
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index fd89960e72..c2710fdf04 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -36,11 +36,10 @@ class AppServiceConfig(Config):
         if not isinstance(self.app_service_config_files, list) or not all(
             type(x) is str for x in self.app_service_config_files
         ):
-            # type-ignore: this function gets arbitrary json value; we do use this path.
             raise ConfigError(
                 "Expected '%s' to be a list of AS config files:"
                 % (self.app_service_config_files),
-                "app_service_config_files",
+                ("app_service_config_files",),
             )
 
         self.track_appservice_user_ips = config.get("track_appservice_user_ips", False)
-- 
cgit 1.5.1


From 68dcd2cbcb3c01787ade9cf3725486712a7cafda Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Thu, 18 May 2023 11:11:30 +0100
Subject: Re-type config paths in `ConfigError`s to be `StrSequence`s (#15615)

Part of #14809.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15615.misc        |  1 +
 synapse/config/_base.py       |  3 ++-
 synapse/config/_base.pyi      |  3 ++-
 synapse/config/_util.py       |  8 ++++----
 synapse/config/oembed.py      |  6 +++---
 synapse/config/server.py      |  4 ++--
 synapse/types/__init__.py     |  8 ++++++++
 synapse/util/module_loader.py | 24 +++++++++---------------
 8 files changed, 31 insertions(+), 26 deletions(-)
 create mode 100644 changelog.d/15615.misc

(limited to 'synapse')

diff --git a/changelog.d/15615.misc b/changelog.d/15615.misc
new file mode 100644
index 0000000000..a39fd0a098
--- /dev/null
+++ b/changelog.d/15615.misc
@@ -0,0 +1 @@
+Re-type config paths in `ConfigError`s to be `StrSequence`s instead of `Iterable[str]`s.
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 2ce60610ca..1d268a1817 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -44,6 +44,7 @@ import jinja2
 import pkg_resources
 import yaml
 
+from synapse.types import StrSequence
 from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter
 
 logger = logging.getLogger(__name__)
@@ -58,7 +59,7 @@ class ConfigError(Exception):
            the problem lies.
     """
 
-    def __init__(self, msg: str, path: Optional[Iterable[str]] = None):
+    def __init__(self, msg: str, path: Optional[StrSequence] = None):
         self.msg = msg
         self.path = path
 
diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi
index b5cec132b4..fc51aed234 100644
--- a/synapse/config/_base.pyi
+++ b/synapse/config/_base.pyi
@@ -61,9 +61,10 @@ from synapse.config import (  # noqa: F401
     voip,
     workers,
 )
+from synapse.types import StrSequence
 
 class ConfigError(Exception):
-    def __init__(self, msg: str, path: Optional[Iterable[str]] = None):
+    def __init__(self, msg: str, path: Optional[StrSequence] = None):
         self.msg = msg
         self.path = path
 
diff --git a/synapse/config/_util.py b/synapse/config/_util.py
index dfc5d12210..acccca413b 100644
--- a/synapse/config/_util.py
+++ b/synapse/config/_util.py
@@ -11,17 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Iterable, Type, TypeVar
+from typing import Any, Dict, Type, TypeVar
 
 import jsonschema
 from pydantic import BaseModel, ValidationError, parse_obj_as
 
 from synapse.config._base import ConfigError
-from synapse.types import JsonDict
+from synapse.types import JsonDict, StrSequence
 
 
 def validate_config(
-    json_schema: JsonDict, config: Any, config_path: Iterable[str]
+    json_schema: JsonDict, config: Any, config_path: StrSequence
 ) -> None:
     """Validates a config setting against a JsonSchema definition
 
@@ -45,7 +45,7 @@ def validate_config(
 
 
 def json_error_to_config_error(
-    e: jsonschema.ValidationError, config_path: Iterable[str]
+    e: jsonschema.ValidationError, config_path: StrSequence
 ) -> ConfigError:
     """Converts a json validation error to a user-readable ConfigError
 
diff --git a/synapse/config/oembed.py b/synapse/config/oembed.py
index 0d32aba70a..d7959639ee 100644
--- a/synapse/config/oembed.py
+++ b/synapse/config/oembed.py
@@ -19,7 +19,7 @@ from urllib import parse as urlparse
 import attr
 import pkg_resources
 
-from synapse.types import JsonDict
+from synapse.types import JsonDict, StrSequence
 
 from ._base import Config, ConfigError
 from ._util import validate_config
@@ -80,7 +80,7 @@ class OembedConfig(Config):
             )
 
     def _parse_and_validate_provider(
-        self, providers: List[JsonDict], config_path: Iterable[str]
+        self, providers: List[JsonDict], config_path: StrSequence
     ) -> Iterable[OEmbedEndpointConfig]:
         # Ensure it is the proper form.
         validate_config(
@@ -112,7 +112,7 @@ class OembedConfig(Config):
                     api_endpoint, patterns, endpoint.get("formats")
                 )
 
-    def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
+    def _glob_to_pattern(self, glob: str, config_path: StrSequence) -> Pattern:
         """
         Convert the glob into a sane regular expression to match against. The
         rules followed will be slightly different for the domain portion vs.
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 386c3194b8..64201238d6 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -27,7 +27,7 @@ from netaddr import AddrFormatError, IPNetwork, IPSet
 from twisted.conch.ssh.keys import Key
 
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
-from synapse.types import JsonDict
+from synapse.types import JsonDict, StrSequence
 from synapse.util.module_loader import load_module
 from synapse.util.stringutils import parse_and_validate_server_name
 
@@ -73,7 +73,7 @@ def _6to4(network: IPNetwork) -> IPNetwork:
 def generate_ip_set(
     ip_addresses: Optional[Iterable[str]],
     extra_addresses: Optional[Iterable[str]] = None,
-    config_path: Optional[Iterable[str]] = None,
+    config_path: Optional[StrSequence] = None,
 ) -> IPSet:
     """
     Generate an IPSet from a list of IP addresses or CIDRs.
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 325219656a..42baf8ac6b 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -84,7 +84,15 @@ JsonSerializable = object
 
 # Collection[str] that does not include str itself; str being a Sequence[str]
 # is very misleading and results in bugs.
+#
+# StrCollection is an unordered collection of strings. If ordering is important,
+# StrSequence can be used instead.
 StrCollection = Union[Tuple[str, ...], List[str], AbstractSet[str]]
+# Sequence[str] that does not include str itself; str being a Sequence[str]
+# is very misleading and results in bugs.
+#
+# Unlike StrCollection, StrSequence is an ordered collection of strings.
+StrSequence = Union[Tuple[str, ...], List[str]]
 
 
 # Note that this seems to require inheriting *directly* from Interface in order
diff --git a/synapse/util/module_loader.py b/synapse/util/module_loader.py
index 5a638c6e9a..e3a54df48b 100644
--- a/synapse/util/module_loader.py
+++ b/synapse/util/module_loader.py
@@ -14,17 +14,17 @@
 
 import importlib
 import importlib.util
-import itertools
 from types import ModuleType
-from typing import Any, Iterable, Tuple, Type
+from typing import Any, Tuple, Type
 
 import jsonschema
 
 from synapse.config._base import ConfigError
 from synapse.config._util import json_error_to_config_error
+from synapse.types import StrSequence
 
 
-def load_module(provider: dict, config_path: Iterable[str]) -> Tuple[Type, Any]:
+def load_module(provider: dict, config_path: StrSequence) -> Tuple[Type, Any]:
     """Loads a synapse module with its config
 
     Args:
@@ -39,9 +39,7 @@ def load_module(provider: dict, config_path: Iterable[str]) -> Tuple[Type, Any]:
 
     modulename = provider.get("module")
     if not isinstance(modulename, str):
-        raise ConfigError(
-            "expected a string", path=itertools.chain(config_path, ("module",))
-        )
+        raise ConfigError("expected a string", path=tuple(config_path) + ("module",))
 
     # We need to import the module, and then pick the class out of
     # that, so we split based on the last dot.
@@ -55,19 +53,17 @@ def load_module(provider: dict, config_path: Iterable[str]) -> Tuple[Type, Any]:
         try:
             provider_config = provider_class.parse_config(module_config)
         except jsonschema.ValidationError as e:
-            raise json_error_to_config_error(
-                e, itertools.chain(config_path, ("config",))
-            )
+            raise json_error_to_config_error(e, tuple(config_path) + ("config",))
         except ConfigError as e:
             raise _wrap_config_error(
                 "Failed to parse config for module %r" % (modulename,),
-                prefix=itertools.chain(config_path, ("config",)),
+                prefix=tuple(config_path) + ("config",),
                 e=e,
             )
         except Exception as e:
             raise ConfigError(
                 "Failed to parse config for module %r" % (modulename,),
-                path=itertools.chain(config_path, ("config",)),
+                path=tuple(config_path) + ("config",),
             ) from e
     else:
         provider_config = module_config
@@ -92,9 +88,7 @@ def load_python_module(location: str) -> ModuleType:
     return mod
 
 
-def _wrap_config_error(
-    msg: str, prefix: Iterable[str], e: ConfigError
-) -> "ConfigError":
+def _wrap_config_error(msg: str, prefix: StrSequence, e: ConfigError) -> "ConfigError":
     """Wrap a relative ConfigError with a new path
 
     This is useful when we have a ConfigError with a relative path due to a problem
@@ -102,7 +96,7 @@ def _wrap_config_error(
     """
     path = prefix
     if e.path:
-        path = itertools.chain(prefix, e.path)
+        path = tuple(prefix) + tuple(e.path)
 
     e1 = ConfigError(msg, path)
 
-- 
cgit 1.5.1


From e5b4d93770fe5cfc45f1e769d8cb00a2075d68fa Mon Sep 17 00:00:00 2001
From: Jonathan de Jong <jonathan@automatia.nl>
Date: Thu, 18 May 2023 18:49:12 +0200
Subject: Update Mutual Rooms (MSC2666) implementation (#15621)

To track changes in MSC2666:

- The change from `/mutual_rooms/{user_id}` to `/mutual_rooms?user_id={user_id}`.
- The addition of `next_batch_token` (and logic).
- Unstable flag now being `uk.half-shot.msc2666.query_mutual_rooms`.
- The error code when your own user is requested.
---
 changelog.d/15621.misc                 |  1 +
 synapse/rest/client/mutual_rooms.py    | 43 ++++++++++++++++++++++++----------
 synapse/rest/client/versions.py        |  2 +-
 tests/rest/client/test_mutual_rooms.py |  6 +++--
 4 files changed, 37 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/15621.misc

(limited to 'synapse')

diff --git a/changelog.d/15621.misc b/changelog.d/15621.misc
new file mode 100644
index 0000000000..5d060f4dbc
--- /dev/null
+++ b/changelog.d/15621.misc
@@ -0,0 +1 @@
+Update Mutual Rooms (MSC2666) implementation to match new proposal text.
\ No newline at end of file
diff --git a/synapse/rest/client/mutual_rooms.py b/synapse/rest/client/mutual_rooms.py
index 38ef4e459f..c99445da30 100644
--- a/synapse/rest/client/mutual_rooms.py
+++ b/synapse/rest/client/mutual_rooms.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Tuple
+from http import HTTPStatus
+from typing import TYPE_CHECKING, Dict, List, Tuple
 
 from synapse.api.errors import Codes, SynapseError
 from synapse.http.server import HttpServer
-from synapse.http.servlet import RestServlet
+from synapse.http.servlet import RestServlet, parse_strings_from_args
 from synapse.http.site import SynapseRequest
-from synapse.types import JsonDict, UserID
+from synapse.types import JsonDict
 
 from ._base import client_patterns
 
@@ -30,11 +31,11 @@ logger = logging.getLogger(__name__)
 
 class UserMutualRoomsServlet(RestServlet):
     """
-    GET /uk.half-shot.msc2666/user/mutual_rooms/{user_id} HTTP/1.1
+    GET /uk.half-shot.msc2666/user/mutual_rooms?user_id={user_id} HTTP/1.1
     """
 
     PATTERNS = client_patterns(
-        "/uk.half-shot.msc2666/user/mutual_rooms/(?P<user_id>[^/]*)",
+        "/uk.half-shot.msc2666/user/mutual_rooms$",
         releases=(),  # This is an unstable feature
     )
 
@@ -43,17 +44,35 @@ class UserMutualRoomsServlet(RestServlet):
         self.auth = hs.get_auth()
         self.store = hs.get_datastores().main
 
-    async def on_GET(
-        self, request: SynapseRequest, user_id: str
-    ) -> Tuple[int, JsonDict]:
-        UserID.from_string(user_id)
+    async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        # twisted.web.server.Request.args is incorrectly defined as Optional[Any]
+        args: Dict[bytes, List[bytes]] = request.args  # type: ignore
+
+        user_ids = parse_strings_from_args(args, "user_id", required=True)
+
+        if len(user_ids) > 1:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "Duplicate user_id query parameter",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        # We don't do batching, so a batch token is illegal by default
+        if b"batch_token" in args:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "Unknown batch_token",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        user_id = user_ids[0]
 
         requester = await self.auth.get_user_by_req(request)
         if user_id == requester.user.to_string():
             raise SynapseError(
-                code=400,
-                msg="You cannot request a list of shared rooms with yourself",
-                errcode=Codes.FORBIDDEN,
+                HTTPStatus.UNPROCESSABLE_ENTITY,
+                "You cannot request a list of shared rooms with yourself",
+                errcode=Codes.INVALID_PARAM,
             )
 
         rooms = await self.store.get_mutual_rooms_between_users(
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 58c5b07390..32df054f56 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -91,7 +91,7 @@ class VersionsRestServlet(RestServlet):
                     # Implements additional endpoints as described in MSC2432
                     "org.matrix.msc2432": True,
                     # Implements additional endpoints as described in MSC2666
-                    "uk.half-shot.msc2666.mutual_rooms": True,
+                    "uk.half-shot.msc2666.query_mutual_rooms": True,
                     # Whether new rooms will be set to encrypted or not (based on presets).
                     "io.element.e2ee_forced.public": self.e2ee_forced_public,
                     "io.element.e2ee_forced.private": self.e2ee_forced_private,
diff --git a/tests/rest/client/test_mutual_rooms.py b/tests/rest/client/test_mutual_rooms.py
index a4327f7ace..22fddbd6d6 100644
--- a/tests/rest/client/test_mutual_rooms.py
+++ b/tests/rest/client/test_mutual_rooms.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from urllib.parse import quote
+
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
@@ -44,8 +46,8 @@ class UserMutualRoomsTest(unittest.HomeserverTestCase):
     def _get_mutual_rooms(self, token: str, other_user: str) -> FakeChannel:
         return self.make_request(
             "GET",
-            "/_matrix/client/unstable/uk.half-shot.msc2666/user/mutual_rooms/%s"
-            % other_user,
+            "/_matrix/client/unstable/uk.half-shot.msc2666/user/mutual_rooms"
+            f"?user_id={quote(other_user)}",
             access_token=token,
         )
 
-- 
cgit 1.5.1


From ad50510a06d035a674f0eeed5db5dd3060bc0b1c Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Thu, 18 May 2023 19:37:31 +0100
Subject: Handle missing previous read marker event. (#15464)

If the previous read marker is pointing to an event that no longer exists
(e.g. due to retention) then assume that the newly given read marker
is newer.
---
 changelog.d/15464.bugfix                        |   1 +
 synapse/handlers/read_marker.py                 |  18 ++-
 synapse/storage/databases/main/events_worker.py |   6 -
 tests/rest/client/test_read_marker.py           | 147 ++++++++++++++++++++++++
 4 files changed, 162 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/15464.bugfix
 create mode 100644 tests/rest/client/test_read_marker.py

(limited to 'synapse')

diff --git a/changelog.d/15464.bugfix b/changelog.d/15464.bugfix
new file mode 100644
index 0000000000..3c655989b3
--- /dev/null
+++ b/changelog.d/15464.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where setting the read marker could fail when using message retention. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/handlers/read_marker.py b/synapse/handlers/read_marker.py
index 6d35e61880..49a497a860 100644
--- a/synapse/handlers/read_marker.py
+++ b/synapse/handlers/read_marker.py
@@ -16,6 +16,7 @@ import logging
 from typing import TYPE_CHECKING
 
 from synapse.api.constants import ReceiptTypes
+from synapse.api.errors import SynapseError
 from synapse.util.async_helpers import Linearizer
 
 if TYPE_CHECKING:
@@ -47,12 +48,21 @@ class ReadMarkerHandler:
             )
 
             should_update = True
+            # Get event ordering, this also ensures we know about the event
+            event_ordering = await self.store.get_event_ordering(event_id)
 
             if existing_read_marker:
-                # Only update if the new marker is ahead in the stream
-                should_update = await self.store.is_event_after(
-                    event_id, existing_read_marker["event_id"]
-                )
+                try:
+                    old_event_ordering = await self.store.get_event_ordering(
+                        existing_read_marker["event_id"]
+                    )
+                except SynapseError:
+                    # Old event no longer exists, assume new is ahead. This may
+                    # happen if the old event was removed due to retention.
+                    pass
+                else:
+                    # Only update if the new marker is ahead in the stream
+                    should_update = event_ordering > old_event_ordering
 
             if should_update:
                 content = {"event_id": event_id}
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 53aa5933d5..a39bc90974 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1973,12 +1973,6 @@ class EventsWorkerStore(SQLBaseStore):
 
         return rows, to_token, True
 
-    async def is_event_after(self, event_id1: str, event_id2: str) -> bool:
-        """Returns True if event_id1 is after event_id2 in the stream"""
-        to_1, so_1 = await self.get_event_ordering(event_id1)
-        to_2, so_2 = await self.get_event_ordering(event_id2)
-        return (to_1, so_1) > (to_2, so_2)
-
     @cached(max_entries=5000)
     async def get_event_ordering(self, event_id: str) -> Tuple[int, int]:
         res = await self.db_pool.simple_select_one(
diff --git a/tests/rest/client/test_read_marker.py b/tests/rest/client/test_read_marker.py
new file mode 100644
index 0000000000..0eedcdb476
--- /dev/null
+++ b/tests/rest/client/test_read_marker.py
@@ -0,0 +1,147 @@
+# Copyright 2023 Beeper
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from twisted.test.proto_helpers import MemoryReactor
+
+import synapse.rest.admin
+from synapse.api.constants import EventTypes
+from synapse.rest import admin
+from synapse.rest.client import login, read_marker, register, room
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+
+ONE_HOUR_MS = 3600000
+ONE_DAY_MS = ONE_HOUR_MS * 24
+
+
+class ReadMarkerTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        login.register_servlets,
+        register.register_servlets,
+        read_marker.register_servlets,
+        room.register_servlets,
+        synapse.rest.admin.register_servlets,
+        admin.register_servlets,
+    ]
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        config = self.default_config()
+
+        # merge this default retention config with anything that was specified in
+        # @override_config
+        retention_config = {
+            "enabled": True,
+            "allowed_lifetime_min": ONE_DAY_MS,
+            "allowed_lifetime_max": ONE_DAY_MS * 3,
+        }
+        retention_config.update(config.get("retention", {}))
+        config["retention"] = retention_config
+
+        self.hs = self.setup_test_homeserver(config=config)
+
+        return self.hs
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.owner = self.register_user("owner", "pass")
+        self.owner_tok = self.login("owner", "pass")
+        self.store = self.hs.get_datastores().main
+        self.clock = self.hs.get_clock()
+
+    def test_send_read_marker(self) -> None:
+        room_id = self.helper.create_room_as(self.owner, tok=self.owner_tok)
+
+        def send_message() -> str:
+            res = self.helper.send(room_id=room_id, body="1", tok=self.owner_tok)
+            return res["event_id"]
+
+        # Test setting the read marker on the room
+        event_id_1 = send_message()
+
+        channel = self.make_request(
+            "POST",
+            "/rooms/!abc:beep/read_markers",
+            content={
+                "m.fully_read": event_id_1,
+            },
+            access_token=self.owner_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.result)
+
+        # Test moving the read marker to a newer event
+        event_id_2 = send_message()
+        channel = self.make_request(
+            "POST",
+            "/rooms/!abc:beep/read_markers",
+            content={
+                "m.fully_read": event_id_2,
+            },
+            access_token=self.owner_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.result)
+
+    def test_send_read_marker_missing_previous_event(self) -> None:
+        """
+        Test moving a read marker from an event that previously existed but was
+        later removed due to retention rules.
+        """
+
+        room_id = self.helper.create_room_as(self.owner, tok=self.owner_tok)
+
+        # Set retention rule on the room so we remove old events to test this case
+        self.helper.send_state(
+            room_id=room_id,
+            event_type=EventTypes.Retention,
+            body={"max_lifetime": ONE_DAY_MS},
+            tok=self.owner_tok,
+        )
+
+        def send_message() -> str:
+            res = self.helper.send(room_id=room_id, body="1", tok=self.owner_tok)
+            return res["event_id"]
+
+        # Test setting the read marker on the room
+        event_id_1 = send_message()
+
+        channel = self.make_request(
+            "POST",
+            "/rooms/!abc:beep/read_markers",
+            content={
+                "m.fully_read": event_id_1,
+            },
+            access_token=self.owner_tok,
+        )
+
+        # Send a second message (retention will not remove the latest event ever)
+        send_message()
+        # And then advance so retention rules remove the first event (where the marker is)
+        self.reactor.advance(ONE_DAY_MS * 2 / 1000)
+
+        event = self.get_success(self.store.get_event(event_id_1, allow_none=True))
+        assert event is None
+
+        # TODO See https://github.com/matrix-org/synapse/issues/13476
+        self.store.get_event_ordering.invalidate_all()
+
+        # Test moving the read marker to a newer event
+        event_id_2 = send_message()
+        channel = self.make_request(
+            "POST",
+            "/rooms/!abc:beep/read_markers",
+            content={
+                "m.fully_read": event_id_2,
+            },
+            access_token=self.owner_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.result)
-- 
cgit 1.5.1


From d0de452d1222ada8d219a8c5bc42498a89e5ecea Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Fri, 19 May 2023 11:17:12 +0100
Subject: Fix `HomeServer`s leaking during `trial` test runs (#15630)

This change fixes two memory leaks during `trial` test runs.

Garbage collection is disabled during each test case and a gen-0 GC is
run at the end of each test. However, when the gen-0 GC is run, the
`TestCase` object usually still holds references to the `HomeServer`
used during the test. As a result, the `HomeServer` gets promoted to
gen-1 and then never garbage collected.

Fix this by periodically running full GCs.

Additionally, fix `HomeServer`s leaking after tests that touch inbound
federation due to `FederationRateLimiter`s adding themselves to a global
set, by turning the set into a `WeakSet`.

Resolves #15622.

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15630.misc         |  1 +
 synapse/util/ratelimitutils.py |  6 +++++-
 tests/unittest.py              | 11 +++++++++--
 3 files changed, 15 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15630.misc

(limited to 'synapse')

diff --git a/changelog.d/15630.misc b/changelog.d/15630.misc
new file mode 100644
index 0000000000..a30304bfd6
--- /dev/null
+++ b/changelog.d/15630.misc
@@ -0,0 +1 @@
+Fix two memory leaks in `trial` test runs.
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index f262bf95a0..2ad55ac13e 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -25,10 +25,12 @@ from typing import (
     Iterator,
     List,
     Mapping,
+    MutableSet,
     Optional,
     Set,
     Tuple,
 )
+from weakref import WeakSet
 
 from prometheus_client.core import Counter
 from typing_extensions import ContextManager
@@ -86,7 +88,9 @@ queue_wait_timer = Histogram(
 )
 
 
-_rate_limiter_instances: Set["FederationRateLimiter"] = set()
+# This must be a `WeakSet`, otherwise we indirectly hold on to entire `HomeServer`s
+# during trial test runs and leak a lot of memory.
+_rate_limiter_instances: MutableSet["FederationRateLimiter"] = WeakSet()
 # Protects the _rate_limiter_instances set from concurrent access
 _rate_limiter_instances_lock = threading.Lock()
 
diff --git a/tests/unittest.py b/tests/unittest.py
index b6fdf69635..623c5a75a2 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -229,13 +229,20 @@ class TestCase(unittest.TestCase):
         #
         # The easiest way to do this would be to do a full GC after each test
         # run, but that is very expensive. Instead, we disable GC (above) for
-        # the duration of the test so that we only need to run a gen-0 GC, which
-        # is a lot quicker.
+        # the duration of the test and only run a gen-0 GC, which is a lot
+        # quicker. This doesn't clean up everything, since the TestCase
+        # instance still holds references to objects created during the test,
+        # such as HomeServers, so we do a full GC every so often.
 
         @around(self)
         def tearDown(orig: Callable[[], R]) -> R:
             ret = orig()
             gc.collect(0)
+            # Run a full GC every 50 gen-0 GCs.
+            gen0_stats = gc.get_stats()[0]
+            gen0_collections = gen0_stats["collections"]
+            if gen0_collections % 50 == 0:
+                gc.collect()
             gc.enable()
             set_current_context(SENTINEL_CONTEXT)
 
-- 
cgit 1.5.1


From 07771fa487e1d281fdcae35a47db87ab675cb6b3 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 19 May 2023 07:23:09 -0400
Subject: Remove experimental configuration flags & unstable values for faster
 joins (#15625)

Synapse will no longer send (or respond to) the unstable flags
for faster joins. These were only available behind a configuration
flag and handled in parallel with the stable flags.
---
 changelog.d/15625.misc                            |  1 +
 synapse/config/experimental.py                    | 12 --------
 synapse/federation/federation_server.py           |  2 --
 synapse/federation/transport/client.py            | 29 ++-----------------
 synapse/federation/transport/server/federation.py | 12 +-------
 tests/federation/transport/test_client.py         | 35 ++---------------------
 6 files changed, 8 insertions(+), 83 deletions(-)
 create mode 100644 changelog.d/15625.misc

(limited to 'synapse')

diff --git a/changelog.d/15625.misc b/changelog.d/15625.misc
new file mode 100644
index 0000000000..7ea8cc9433
--- /dev/null
+++ b/changelog.d/15625.misc
@@ -0,0 +1 @@
+Remove the unstable identifiers from faster joins ([MSC3706](https://github.com/matrix-org/matrix-spec-proposals/pull/3706).
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 6e453bd963..d769b7f668 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -84,18 +84,6 @@ class ExperimentalConfig(Config):
             "msc3984_appservice_key_query", False
         )
 
-        # MSC3706 (server-side support for partial state in /send_join responses)
-        # Synapse will always serve partial state responses to requests using the stable
-        # query parameter `omit_members`. If this flag is set, Synapse will also serve
-        # partial state responses to requests using the unstable query parameter
-        # `org.matrix.msc3706.partial_state`.
-        self.msc3706_enabled: bool = experimental.get("msc3706_enabled", False)
-
-        # experimental support for faster joins over federation
-        # (MSC2775, MSC3706, MSC3895)
-        # requires a target server that can provide a partial join response (MSC3706)
-        self.faster_joins_enabled: bool = experimental.get("faster_joins", True)
-
         # MSC3720 (Account status endpoint)
         self.msc3720_enabled: bool = experimental.get("msc3720_enabled", False)
 
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index c590d8f96f..f4ca70a698 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -739,12 +739,10 @@ class FederationServer(FederationBase):
             "event": event_json,
             "state": [p.get_pdu_json(time_now) for p in state_events],
             "auth_chain": [p.get_pdu_json(time_now) for p in auth_chain_events],
-            "org.matrix.msc3706.partial_state": caller_supports_partial_state,
             "members_omitted": caller_supports_partial_state,
         }
 
         if servers_in_room is not None:
-            resp["org.matrix.msc3706.servers_in_room"] = list(servers_in_room)
             resp["servers_in_room"] = list(servers_in_room)
 
         return resp
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index d2fa9976da..1cfc4446c4 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -59,7 +59,6 @@ class TransportLayerClient:
 
     def __init__(self, hs: "HomeServer"):
         self.client = hs.get_federation_http_client()
-        self._faster_joins_enabled = hs.config.experimental.faster_joins_enabled
         self._is_mine_server_name = hs.is_mine_server_name
 
     async def get_room_state_ids(
@@ -363,12 +362,8 @@ class TransportLayerClient:
     ) -> "SendJoinResponse":
         path = _create_v2_path("/send_join/%s/%s", room_id, event_id)
         query_params: Dict[str, str] = {}
-        if self._faster_joins_enabled:
-            # lazy-load state on join
-            query_params["org.matrix.msc3706.partial_state"] = (
-                "true" if omit_members else "false"
-            )
-            query_params["omit_members"] = "true" if omit_members else "false"
+        # lazy-load state on join
+        query_params["omit_members"] = "true" if omit_members else "false"
 
         return await self.client.put_json(
             destination=destination,
@@ -902,9 +897,7 @@ def _members_omitted_parser(response: SendJoinResponse) -> Generator[None, Any,
     while True:
         val = yield
         if not isinstance(val, bool):
-            raise TypeError(
-                "members_omitted (formerly org.matrix.msc370c.partial_state) must be a boolean"
-            )
+            raise TypeError("members_omitted must be a boolean")
         response.members_omitted = val
 
 
@@ -964,14 +957,6 @@ class SendJoinParser(ByteParser[SendJoinResponse]):
         ]
 
         if not v1_api:
-            self._coros.append(
-                ijson.items_coro(
-                    _members_omitted_parser(self._response),
-                    "org.matrix.msc3706.partial_state",
-                    use_float="True",
-                )
-            )
-            # The stable field name comes last, so it "wins" if the fields disagree
             self._coros.append(
                 ijson.items_coro(
                     _members_omitted_parser(self._response),
@@ -980,14 +965,6 @@ class SendJoinParser(ByteParser[SendJoinResponse]):
                 )
             )
 
-            self._coros.append(
-                ijson.items_coro(
-                    _servers_in_room_parser(self._response),
-                    "org.matrix.msc3706.servers_in_room",
-                    use_float="True",
-                )
-            )
-
             # Again, stable field name comes last
             self._coros.append(
                 ijson.items_coro(
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 36b0362504..3a744e25be 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -440,7 +440,6 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet):
         server_name: str,
     ):
         super().__init__(hs, authenticator, ratelimiter, server_name)
-        self._read_msc3706_query_param = hs.config.experimental.msc3706_enabled
 
     async def on_PUT(
         self,
@@ -453,16 +452,7 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet):
         # TODO(paul): assert that event_id parsed from path actually
         #   match those given in content
 
-        partial_state = False
-        # The stable query parameter wins, if it disagrees with the unstable
-        # parameter for some reason.
-        stable_param = parse_boolean_from_args(query, "omit_members", default=None)
-        if stable_param is not None:
-            partial_state = stable_param
-        elif self._read_msc3706_query_param:
-            partial_state = parse_boolean_from_args(
-                query, "org.matrix.msc3706.partial_state", default=False
-            )
+        partial_state = parse_boolean_from_args(query, "omit_members", default=False)
 
         result = await self.handler.on_send_join_request(
             origin, content, room_id, caller_supports_partial_state=partial_state
diff --git a/tests/federation/transport/test_client.py b/tests/federation/transport/test_client.py
index 3d61b1e8a9..93e5c85a27 100644
--- a/tests/federation/transport/test_client.py
+++ b/tests/federation/transport/test_client.py
@@ -86,18 +86,7 @@ class SendJoinParserTestCase(TestCase):
             return parsed_response.members_omitted
 
         self.assertTrue(parse({"members_omitted": True}))
-        self.assertTrue(parse({"org.matrix.msc3706.partial_state": True}))
-
         self.assertFalse(parse({"members_omitted": False}))
-        self.assertFalse(parse({"org.matrix.msc3706.partial_state": False}))
-
-        # If there's a conflict, the stable field wins.
-        self.assertTrue(
-            parse({"members_omitted": True, "org.matrix.msc3706.partial_state": False})
-        )
-        self.assertFalse(
-            parse({"members_omitted": False, "org.matrix.msc3706.partial_state": True})
-        )
 
     def test_servers_in_room(self) -> None:
         """Check that the servers_in_room field is correctly parsed"""
@@ -113,28 +102,10 @@ class SendJoinParserTestCase(TestCase):
             parsed_response = parser.finish()
             return parsed_response.servers_in_room
 
-        self.assertEqual(
-            parse({"org.matrix.msc3706.servers_in_room": ["hs1", "hs2"]}),
-            ["hs1", "hs2"],
-        )
         self.assertEqual(parse({"servers_in_room": ["example.com"]}), ["example.com"])
 
-        # If both are provided, the stable identifier should win
-        self.assertEqual(
-            parse(
-                {
-                    "org.matrix.msc3706.servers_in_room": ["old"],
-                    "servers_in_room": ["new"],
-                }
-            ),
-            ["new"],
-        )
-
-        # And lastly, we should be able to tell if neither field was present.
-        self.assertEqual(
-            parse({}),
-            None,
-        )
+        # We should be able to tell the field is not present.
+        self.assertEqual(parse({}), None)
 
     def test_errors_closing_coroutines(self) -> None:
         """Check we close all coroutines, even if closing the first raises an Exception.
@@ -143,7 +114,7 @@ class SendJoinParserTestCase(TestCase):
         assertions about its attributes or type.
         """
         parser = SendJoinParser(RoomVersions.V1, False)
-        response = {"org.matrix.msc3706.servers_in_room": ["hs1", "hs2"]}
+        response = {"servers_in_room": ["hs1", "hs2"]}
         serialisation = json.dumps(response).encode()
 
         # Mock the coroutines managed by this parser.
-- 
cgit 1.5.1


From 89a23c940672944acd98db58085cdc38191515a8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 19 May 2023 08:06:54 -0400
Subject: Do not allow deactivated users to login with JWT. (#15624)

To improve the organization of this code it moves the JWT login
checks to a separate handler and then fixes the bug (and a
deprecation warning).
---
 changelog.d/15624.bugfix        |   1 +
 synapse/handlers/jwt.py         | 118 ++++++++++++++++++++++++++++++++++++++++
 synapse/rest/client/login.py    |  77 ++++----------------------
 synapse/server.py               |   7 +++
 tests/rest/client/test_login.py |  20 ++++++-
 5 files changed, 156 insertions(+), 67 deletions(-)
 create mode 100644 changelog.d/15624.bugfix
 create mode 100644 synapse/handlers/jwt.py

(limited to 'synapse')

diff --git a/changelog.d/15624.bugfix b/changelog.d/15624.bugfix
new file mode 100644
index 0000000000..fde515ba62
--- /dev/null
+++ b/changelog.d/15624.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where deactivated users were still able to login using the custom `org.matrix.login.jwt` login type (if enabled).
diff --git a/synapse/handlers/jwt.py b/synapse/handlers/jwt.py
new file mode 100644
index 0000000000..5fddc0e315
--- /dev/null
+++ b/synapse/handlers/jwt.py
@@ -0,0 +1,118 @@
+# Copyright 2023 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+
+from authlib.jose import JsonWebToken, JWTClaims
+from authlib.jose.errors import BadSignatureError, InvalidClaimError, JoseError
+
+from synapse.api.errors import Codes, LoginError, StoreError, UserDeactivatedError
+from synapse.types import JsonDict, UserID
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+class JwtHandler:
+    def __init__(self, hs: "HomeServer"):
+        self.hs = hs
+        self._main_store = hs.get_datastores().main
+
+        self.jwt_secret = hs.config.jwt.jwt_secret
+        self.jwt_subject_claim = hs.config.jwt.jwt_subject_claim
+        self.jwt_algorithm = hs.config.jwt.jwt_algorithm
+        self.jwt_issuer = hs.config.jwt.jwt_issuer
+        self.jwt_audiences = hs.config.jwt.jwt_audiences
+
+    async def validate_login(self, login_submission: JsonDict) -> str:
+        """
+        Authenticates the user for the /login API
+
+        Args:
+            login_submission: the whole of the login submission
+                (including 'type' and other relevant fields)
+
+        Returns:
+            The user ID that is logging in.
+
+        Raises:
+            LoginError if there was an authentication problem.
+        """
+        token = login_submission.get("token", None)
+        if token is None:
+            raise LoginError(
+                403, "Token field for JWT is missing", errcode=Codes.FORBIDDEN
+            )
+
+        jwt = JsonWebToken([self.jwt_algorithm])
+        claim_options = {}
+        if self.jwt_issuer is not None:
+            claim_options["iss"] = {"value": self.jwt_issuer, "essential": True}
+        if self.jwt_audiences is not None:
+            claim_options["aud"] = {"values": self.jwt_audiences, "essential": True}
+
+        try:
+            claims = jwt.decode(
+                token,
+                key=self.jwt_secret,
+                claims_cls=JWTClaims,
+                claims_options=claim_options,
+            )
+        except BadSignatureError:
+            # We handle this case separately to provide a better error message
+            raise LoginError(
+                403,
+                "JWT validation failed: Signature verification failed",
+                errcode=Codes.FORBIDDEN,
+            )
+        except JoseError as e:
+            # A JWT error occurred, return some info back to the client.
+            raise LoginError(
+                403,
+                "JWT validation failed: %s" % (str(e),),
+                errcode=Codes.FORBIDDEN,
+            )
+
+        try:
+            claims.validate(leeway=120)  # allows 2 min of clock skew
+
+            # Enforce the old behavior which is rolled out in productive
+            # servers: if the JWT contains an 'aud' claim but none is
+            # configured, the login attempt will fail
+            if claims.get("aud") is not None:
+                if self.jwt_audiences is None or len(self.jwt_audiences) == 0:
+                    raise InvalidClaimError("aud")
+        except JoseError as e:
+            raise LoginError(
+                403,
+                "JWT validation failed: %s" % (str(e),),
+                errcode=Codes.FORBIDDEN,
+            )
+
+        user = claims.get(self.jwt_subject_claim, None)
+        if user is None:
+            raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN)
+
+        user_id = UserID(user, self.hs.hostname).to_string()
+
+        # If the account has been deactivated, do not proceed with the login
+        # flow.
+        try:
+            deactivated = await self._main_store.get_user_deactivated_status(user_id)
+        except StoreError:
+            # JWT lazily creates users, so they may not exist in the database yet.
+            deactivated = False
+        if deactivated:
+            raise UserDeactivatedError("This account has been deactivated")
+
+        return user_id
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index a348720131..afdbf821b5 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -87,11 +87,6 @@ class LoginRestServlet(RestServlet):
 
         # JWT configuration variables.
         self.jwt_enabled = hs.config.jwt.jwt_enabled
-        self.jwt_secret = hs.config.jwt.jwt_secret
-        self.jwt_subject_claim = hs.config.jwt.jwt_subject_claim
-        self.jwt_algorithm = hs.config.jwt.jwt_algorithm
-        self.jwt_issuer = hs.config.jwt.jwt_issuer
-        self.jwt_audiences = hs.config.jwt.jwt_audiences
 
         # SSO configuration.
         self.saml2_enabled = hs.config.saml2.saml2_enabled
@@ -427,7 +422,7 @@ class LoginRestServlet(RestServlet):
         self, login_submission: JsonDict, should_issue_refresh_token: bool = False
     ) -> LoginResponse:
         """
-        Handle the final stage of SSO login.
+        Handle token login.
 
         Args:
             login_submission: The JSON request body.
@@ -452,72 +447,24 @@ class LoginRestServlet(RestServlet):
     async def _do_jwt_login(
         self, login_submission: JsonDict, should_issue_refresh_token: bool = False
     ) -> LoginResponse:
-        token = login_submission.get("token", None)
-        if token is None:
-            raise LoginError(
-                403, "Token field for JWT is missing", errcode=Codes.FORBIDDEN
-            )
-
-        from authlib.jose import JsonWebToken, JWTClaims
-        from authlib.jose.errors import BadSignatureError, InvalidClaimError, JoseError
-
-        jwt = JsonWebToken([self.jwt_algorithm])
-        claim_options = {}
-        if self.jwt_issuer is not None:
-            claim_options["iss"] = {"value": self.jwt_issuer, "essential": True}
-        if self.jwt_audiences is not None:
-            claim_options["aud"] = {"values": self.jwt_audiences, "essential": True}
-
-        try:
-            claims = jwt.decode(
-                token,
-                key=self.jwt_secret,
-                claims_cls=JWTClaims,
-                claims_options=claim_options,
-            )
-        except BadSignatureError:
-            # We handle this case separately to provide a better error message
-            raise LoginError(
-                403,
-                "JWT validation failed: Signature verification failed",
-                errcode=Codes.FORBIDDEN,
-            )
-        except JoseError as e:
-            # A JWT error occurred, return some info back to the client.
-            raise LoginError(
-                403,
-                "JWT validation failed: %s" % (str(e),),
-                errcode=Codes.FORBIDDEN,
-            )
-
-        try:
-            claims.validate(leeway=120)  # allows 2 min of clock skew
-
-            # Enforce the old behavior which is rolled out in productive
-            # servers: if the JWT contains an 'aud' claim but none is
-            # configured, the login attempt will fail
-            if claims.get("aud") is not None:
-                if self.jwt_audiences is None or len(self.jwt_audiences) == 0:
-                    raise InvalidClaimError("aud")
-        except JoseError as e:
-            raise LoginError(
-                403,
-                "JWT validation failed: %s" % (str(e),),
-                errcode=Codes.FORBIDDEN,
-            )
+        """
+        Handle the custom JWT login.
 
-        user = claims.get(self.jwt_subject_claim, None)
-        if user is None:
-            raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN)
+        Args:
+            login_submission: The JSON request body.
+            should_issue_refresh_token: True if this login should issue
+                a refresh token alongside the access token.
 
-        user_id = UserID(user, self.hs.hostname).to_string()
-        result = await self._complete_login(
+        Returns:
+            The body of the JSON response.
+        """
+        user_id = await self.hs.get_jwt_handler().validate_login(login_submission)
+        return await self._complete_login(
             user_id,
             login_submission,
             create_non_existent_users=True,
             should_issue_refresh_token=should_issue_refresh_token,
         )
-        return result
 
 
 def _get_auth_flow_dict_for_idp(idp: SsoIdentityProvider) -> JsonDict:
diff --git a/synapse/server.py b/synapse/server.py
index b307295789..aa90465047 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -147,6 +147,7 @@ logger = logging.getLogger(__name__)
 if TYPE_CHECKING:
     from txredisapi import ConnectionHandler
 
+    from synapse.handlers.jwt import JwtHandler
     from synapse.handlers.oidc import OidcHandler
     from synapse.handlers.saml import SamlHandler
 
@@ -533,6 +534,12 @@ class HomeServer(metaclass=abc.ABCMeta):
     def get_sso_handler(self) -> SsoHandler:
         return SsoHandler(self)
 
+    @cache_in_self
+    def get_jwt_handler(self) -> "JwtHandler":
+        from synapse.handlers.jwt import JwtHandler
+
+        return JwtHandler(self)
+
     @cache_in_self
     def get_sync_handler(self) -> SyncHandler:
         return SyncHandler(self)
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index 62acf4f44e..dc32982e22 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -42,7 +42,7 @@ from tests.test_utils.html_parsers import TestHtmlParser
 from tests.unittest import HomeserverTestCase, override_config, skip_unless
 
 try:
-    from authlib.jose import jwk, jwt
+    from authlib.jose import JsonWebKey, jwt
 
     HAS_JWT = True
 except ImportError:
@@ -1054,6 +1054,22 @@ class JWTTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.json_body["errcode"], "M_FORBIDDEN")
         self.assertEqual(channel.json_body["error"], "Token field for JWT is missing")
 
+    def test_deactivated_user(self) -> None:
+        """Logging in as a deactivated account should error."""
+        user_id = self.register_user("kermit", "monkey")
+        self.get_success(
+            self.hs.get_deactivate_account_handler().deactivate_account(
+                user_id, erase_data=False, requester=create_requester(user_id)
+            )
+        )
+
+        channel = self.jwt_login({"sub": "kermit"})
+        self.assertEqual(channel.code, 403, msg=channel.result)
+        self.assertEqual(channel.json_body["errcode"], "M_USER_DEACTIVATED")
+        self.assertEqual(
+            channel.json_body["error"], "This account has been deactivated"
+        )
+
 
 # The JWTPubKeyTestCase is a complement to JWTTestCase where we instead use
 # RSS256, with a public key configured in synapse as "jwt_secret", and tokens
@@ -1121,7 +1137,7 @@ class JWTPubKeyTestCase(unittest.HomeserverTestCase):
     def jwt_encode(self, payload: Dict[str, Any], secret: str = jwt_privatekey) -> str:
         header = {"alg": "RS256"}
         if secret.startswith("-----BEGIN RSA PRIVATE KEY-----"):
-            secret = jwk.dumps(secret, kty="RSA")
+            secret = JsonWebKey.import_key(secret, {"kty": "RSA"})
         result: bytes = jwt.encode(header, payload, secret)
         return result.decode("ascii")
 
-- 
cgit 1.5.1


From 1e89976b268c296e1fd8fface36ade29c0354254 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 19 May 2023 08:25:25 -0400
Subject: Rename blacklist/whitelist internally. (#15620)

Avoid renaming configuration settings for now and rename internal code
to use blocklist and allowlist instead.
---
 changelog.d/15606.misc                             |   2 +-
 changelog.d/15620.misc                             |   1 +
 synapse/config/repository.py                       |   8 +-
 synapse/config/server.py                           |  24 ++---
 synapse/handlers/federation.py                     |   2 +-
 synapse/handlers/identity.py                       |  18 ++--
 synapse/handlers/sso.py                            |   2 +-
 synapse/http/client.py                             | 119 ++++++++++-----------
 synapse/http/federation/matrix_federation_agent.py |  24 ++---
 synapse/http/matrixfederationclient.py             |  14 +--
 synapse/http/proxyagent.py                         |   2 +-
 synapse/media/url_previewer.py                     |  16 +--
 synapse/push/httppusher.py                         |   2 +-
 synapse/server.py                                  |  10 +-
 synapse/storage/database.py                        |   5 +-
 tests/federation/test_federation_server.py         |   2 +-
 tests/handlers/test_sso.py                         |   2 +-
 .../federation/test_matrix_federation_agent.py     |   8 +-
 tests/http/test_client.py                          |  26 ++---
 tests/http/test_matrixfederationclient.py          |  16 +--
 tests/http/test_proxyagent.py                      |  18 ++--
 tests/http/test_simple_client.py                   |  18 ++--
 tests/push/test_http.py                            |   2 +-
 tests/replication/test_pusher_shard.py             |   6 +-
 tests/rest/media/test_url_preview.py               |  48 ++++-----
 25 files changed, 189 insertions(+), 206 deletions(-)
 create mode 100644 changelog.d/15620.misc

(limited to 'synapse')

diff --git a/changelog.d/15606.misc b/changelog.d/15606.misc
index 44265fbf02..568c0d3fc5 100644
--- a/changelog.d/15606.misc
+++ b/changelog.d/15606.misc
@@ -1 +1 @@
-Update internal terminology for workers.
+Update internal terminology.
diff --git a/changelog.d/15620.misc b/changelog.d/15620.misc
new file mode 100644
index 0000000000..568c0d3fc5
--- /dev/null
+++ b/changelog.d/15620.misc
@@ -0,0 +1 @@
+Update internal terminology.
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index 655f06505b..f6cfdd3e04 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -224,20 +224,20 @@ class ContentRepositoryConfig(Config):
                 if "http" in proxy_env or "https" in proxy_env:
                     logger.warning("".join(HTTP_PROXY_SET_WARNING))
 
-            # we always blacklist '0.0.0.0' and '::', which are supposed to be
+            # we always block '0.0.0.0' and '::', which are supposed to be
             # unroutable addresses.
-            self.url_preview_ip_range_blacklist = generate_ip_set(
+            self.url_preview_ip_range_blocklist = generate_ip_set(
                 config["url_preview_ip_range_blacklist"],
                 ["0.0.0.0", "::"],
                 config_path=("url_preview_ip_range_blacklist",),
             )
 
-            self.url_preview_ip_range_whitelist = generate_ip_set(
+            self.url_preview_ip_range_allowlist = generate_ip_set(
                 config.get("url_preview_ip_range_whitelist", ()),
                 config_path=("url_preview_ip_range_whitelist",),
             )
 
-            self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ())
+            self.url_preview_url_blocklist = config.get("url_preview_url_blacklist", ())
 
             self.url_preview_accept_language = config.get(
                 "url_preview_accept_language"
diff --git a/synapse/config/server.py b/synapse/config/server.py
index 64201238d6..b46fa51593 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -115,7 +115,7 @@ def generate_ip_set(
 
 
 # IP ranges that are considered private / unroutable / don't make sense.
-DEFAULT_IP_RANGE_BLACKLIST = [
+DEFAULT_IP_RANGE_BLOCKLIST = [
     # Localhost
     "127.0.0.0/8",
     # Private networks.
@@ -501,36 +501,36 @@ class ServerConfig(Config):
         # due to resource constraints
         self.admin_contact = config.get("admin_contact", None)
 
-        ip_range_blacklist = config.get(
-            "ip_range_blacklist", DEFAULT_IP_RANGE_BLACKLIST
+        ip_range_blocklist = config.get(
+            "ip_range_blacklist", DEFAULT_IP_RANGE_BLOCKLIST
         )
 
         # Attempt to create an IPSet from the given ranges
 
-        # Always blacklist 0.0.0.0, ::
-        self.ip_range_blacklist = generate_ip_set(
-            ip_range_blacklist, ["0.0.0.0", "::"], config_path=("ip_range_blacklist",)
+        # Always block 0.0.0.0, ::
+        self.ip_range_blocklist = generate_ip_set(
+            ip_range_blocklist, ["0.0.0.0", "::"], config_path=("ip_range_blacklist",)
         )
 
-        self.ip_range_whitelist = generate_ip_set(
+        self.ip_range_allowlist = generate_ip_set(
             config.get("ip_range_whitelist", ()), config_path=("ip_range_whitelist",)
         )
         # The federation_ip_range_blacklist is used for backwards-compatibility
         # and only applies to federation and identity servers.
         if "federation_ip_range_blacklist" in config:
-            # Always blacklist 0.0.0.0, ::
-            self.federation_ip_range_blacklist = generate_ip_set(
+            # Always block 0.0.0.0, ::
+            self.federation_ip_range_blocklist = generate_ip_set(
                 config["federation_ip_range_blacklist"],
                 ["0.0.0.0", "::"],
                 config_path=("federation_ip_range_blacklist",),
             )
             # 'federation_ip_range_whitelist' was never a supported configuration option.
-            self.federation_ip_range_whitelist = None
+            self.federation_ip_range_allowlist = None
         else:
             # No backwards-compatiblity requrired, as federation_ip_range_blacklist
             # is not given. Default to ip_range_blacklist and ip_range_whitelist.
-            self.federation_ip_range_blacklist = self.ip_range_blacklist
-            self.federation_ip_range_whitelist = self.ip_range_whitelist
+            self.federation_ip_range_blocklist = self.ip_range_blocklist
+            self.federation_ip_range_allowlist = self.ip_range_allowlist
 
         # (undocumented) option for torturing the worker-mode replication a bit,
         # for testing. The value defines the number of milliseconds to pause before
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 19dec4812f..2eb28d55ac 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -148,7 +148,7 @@ class FederationHandler:
         self._event_auth_handler = hs.get_event_auth_handler()
         self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
         self.config = hs.config
-        self.http_client = hs.get_proxied_blacklisted_http_client()
+        self.http_client = hs.get_proxied_blocklisted_http_client()
         self._replication = hs.get_replication_data_handler()
         self._federation_event_handler = hs.get_federation_event_handler()
         self._device_handler = hs.get_device_handler()
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index bf0f7acf80..3031384d25 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -52,10 +52,10 @@ class IdentityHandler:
         # An HTTP client for contacting trusted URLs.
         self.http_client = SimpleHttpClient(hs)
         # An HTTP client for contacting identity servers specified by clients.
-        self.blacklisting_http_client = SimpleHttpClient(
+        self._http_client = SimpleHttpClient(
             hs,
-            ip_blacklist=hs.config.server.federation_ip_range_blacklist,
-            ip_whitelist=hs.config.server.federation_ip_range_whitelist,
+            ip_blocklist=hs.config.server.federation_ip_range_blocklist,
+            ip_allowlist=hs.config.server.federation_ip_range_allowlist,
         )
         self.federation_http_client = hs.get_federation_http_client()
         self.hs = hs
@@ -197,7 +197,7 @@ class IdentityHandler:
         try:
             # Use the blacklisting http client as this call is only to identity servers
             # provided by a client
-            data = await self.blacklisting_http_client.post_json_get_json(
+            data = await self._http_client.post_json_get_json(
                 bind_url, bind_data, headers=headers
             )
 
@@ -308,9 +308,7 @@ class IdentityHandler:
         try:
             # Use the blacklisting http client as this call is only to identity servers
             # provided by a client
-            await self.blacklisting_http_client.post_json_get_json(
-                url, content, headers
-            )
+            await self._http_client.post_json_get_json(url, content, headers)
             changed = True
         except HttpResponseException as e:
             changed = False
@@ -579,7 +577,7 @@ class IdentityHandler:
         """
         # Check what hashing details are supported by this identity server
         try:
-            hash_details = await self.blacklisting_http_client.get_json(
+            hash_details = await self._http_client.get_json(
                 "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server),
                 {"access_token": id_access_token},
             )
@@ -646,7 +644,7 @@ class IdentityHandler:
         headers = {"Authorization": create_id_access_token_header(id_access_token)}
 
         try:
-            lookup_results = await self.blacklisting_http_client.post_json_get_json(
+            lookup_results = await self._http_client.post_json_get_json(
                 "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server),
                 {
                     "addresses": [lookup_value],
@@ -752,7 +750,7 @@ class IdentityHandler:
 
         url = "%s%s/_matrix/identity/v2/store-invite" % (id_server_scheme, id_server)
         try:
-            data = await self.blacklisting_http_client.post_json_get_json(
+            data = await self._http_client.post_json_get_json(
                 url,
                 invite_config,
                 {"Authorization": create_id_access_token_header(id_access_token)},
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 25fd2eb3a1..c3a51722bd 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -204,7 +204,7 @@ class SsoHandler:
         self._media_repo = (
             hs.get_media_repository() if hs.config.media.can_load_media_repo else None
         )
-        self._http_client = hs.get_proxied_blacklisted_http_client()
+        self._http_client = hs.get_proxied_blocklisted_http_client()
 
         # The following template is shown after a successful user interactive
         # authentication session. It tells the user they can close the window.
diff --git a/synapse/http/client.py b/synapse/http/client.py
index c9479c81ff..f1ab7a8bc9 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -117,22 +117,22 @@ RawHeaderValue = Union[
 ]
 
 
-def check_against_blacklist(
-    ip_address: IPAddress, ip_whitelist: Optional[IPSet], ip_blacklist: IPSet
+def _is_ip_blocked(
+    ip_address: IPAddress, allowlist: Optional[IPSet], blocklist: IPSet
 ) -> bool:
     """
     Compares an IP address to allowed and disallowed IP sets.
 
     Args:
         ip_address: The IP address to check
-        ip_whitelist: Allowed IP addresses.
-        ip_blacklist: Disallowed IP addresses.
+        allowlist: Allowed IP addresses.
+        blocklist: Disallowed IP addresses.
 
     Returns:
-        True if the IP address is in the blacklist and not in the whitelist.
+        True if the IP address is in the blocklist and not in the allowlist.
     """
-    if ip_address in ip_blacklist:
-        if ip_whitelist is None or ip_address not in ip_whitelist:
+    if ip_address in blocklist:
+        if allowlist is None or ip_address not in allowlist:
             return True
     return False
 
@@ -154,27 +154,27 @@ def _make_scheduler(
     return _scheduler
 
 
-class _IPBlacklistingResolver:
+class _IPBlockingResolver:
     """
-    A proxy for reactor.nameResolver which only produces non-blacklisted IP
-    addresses, preventing DNS rebinding attacks on URL preview.
+    A proxy for reactor.nameResolver which only produces non-blocklisted IP
+    addresses, preventing DNS rebinding attacks.
     """
 
     def __init__(
         self,
         reactor: IReactorPluggableNameResolver,
-        ip_whitelist: Optional[IPSet],
-        ip_blacklist: IPSet,
+        ip_allowlist: Optional[IPSet],
+        ip_blocklist: IPSet,
     ):
         """
         Args:
             reactor: The twisted reactor.
-            ip_whitelist: IP addresses to allow.
-            ip_blacklist: IP addresses to disallow.
+            ip_allowlist: IP addresses to allow.
+            ip_blocklist: IP addresses to disallow.
         """
         self._reactor = reactor
-        self._ip_whitelist = ip_whitelist
-        self._ip_blacklist = ip_blacklist
+        self._ip_allowlist = ip_allowlist
+        self._ip_blocklist = ip_blocklist
 
     def resolveHostName(
         self, recv: IResolutionReceiver, hostname: str, portNumber: int = 0
@@ -191,16 +191,13 @@ class _IPBlacklistingResolver:
 
                 ip_address = IPAddress(address.host)
 
-                if check_against_blacklist(
-                    ip_address, self._ip_whitelist, self._ip_blacklist
-                ):
+                if _is_ip_blocked(ip_address, self._ip_allowlist, self._ip_blocklist):
                     logger.info(
-                        "Dropped %s from DNS resolution to %s due to blacklist"
-                        % (ip_address, hostname)
+                        "Blocked %s from DNS resolution to %s" % (ip_address, hostname)
                     )
                     has_bad_ip = True
 
-            # if we have a blacklisted IP, we'd like to raise an error to block the
+            # if we have a blocked IP, we'd like to raise an error to block the
             # request, but all we can really do from here is claim that there were no
             # valid results.
             if not has_bad_ip:
@@ -232,24 +229,24 @@ class _IPBlacklistingResolver:
 # ISynapseReactor implies IReactorCore, but explicitly marking it this as an implementer
 # of IReactorCore seems to keep mypy-zope happier.
 @implementer(IReactorCore, ISynapseReactor)
-class BlacklistingReactorWrapper:
+class BlocklistingReactorWrapper:
     """
-    A Reactor wrapper which will prevent DNS resolution to blacklisted IP
+    A Reactor wrapper which will prevent DNS resolution to blocked IP
     addresses, to prevent DNS rebinding.
     """
 
     def __init__(
         self,
         reactor: IReactorPluggableNameResolver,
-        ip_whitelist: Optional[IPSet],
-        ip_blacklist: IPSet,
+        ip_allowlist: Optional[IPSet],
+        ip_blocklist: IPSet,
     ):
         self._reactor = reactor
 
-        # We need to use a DNS resolver which filters out blacklisted IP
+        # We need to use a DNS resolver which filters out blocked IP
         # addresses, to prevent DNS rebinding.
-        self._nameResolver = _IPBlacklistingResolver(
-            self._reactor, ip_whitelist, ip_blacklist
+        self._nameResolver = _IPBlockingResolver(
+            self._reactor, ip_allowlist, ip_blocklist
         )
 
     def __getattr__(self, attr: str) -> Any:
@@ -260,7 +257,7 @@ class BlacklistingReactorWrapper:
             return getattr(self._reactor, attr)
 
 
-class BlacklistingAgentWrapper(Agent):
+class BlocklistingAgentWrapper(Agent):
     """
     An Agent wrapper which will prevent access to IP addresses being accessed
     directly (without an IP address lookup).
@@ -269,18 +266,18 @@ class BlacklistingAgentWrapper(Agent):
     def __init__(
         self,
         agent: IAgent,
-        ip_blacklist: IPSet,
-        ip_whitelist: Optional[IPSet] = None,
+        ip_blocklist: IPSet,
+        ip_allowlist: Optional[IPSet] = None,
     ):
         """
         Args:
             agent: The Agent to wrap.
-            ip_whitelist: IP addresses to allow.
-            ip_blacklist: IP addresses to disallow.
+            ip_allowlist: IP addresses to allow.
+            ip_blocklist: IP addresses to disallow.
         """
         self._agent = agent
-        self._ip_whitelist = ip_whitelist
-        self._ip_blacklist = ip_blacklist
+        self._ip_allowlist = ip_allowlist
+        self._ip_blocklist = ip_blocklist
 
     def request(
         self,
@@ -299,13 +296,9 @@ class BlacklistingAgentWrapper(Agent):
             # Not an IP
             pass
         else:
-            if check_against_blacklist(
-                ip_address, self._ip_whitelist, self._ip_blacklist
-            ):
-                logger.info("Blocking access to %s due to blacklist" % (ip_address,))
-                e = SynapseError(
-                    HTTPStatus.FORBIDDEN, "IP address blocked by IP blacklist entry"
-                )
+            if _is_ip_blocked(ip_address, self._ip_allowlist, self._ip_blocklist):
+                logger.info("Blocking access to %s" % (ip_address,))
+                e = SynapseError(HTTPStatus.FORBIDDEN, "IP address blocked")
                 return defer.fail(Failure(e))
 
         return self._agent.request(
@@ -763,10 +756,9 @@ class SimpleHttpClient(BaseHttpClient):
     Args:
         hs: The HomeServer instance to pass in
         treq_args: Extra keyword arguments to be given to treq.request.
-        ip_blacklist: The IP addresses that are blacklisted that
-            we may not request.
-        ip_whitelist: The whitelisted IP addresses, that we can
-           request if it were otherwise caught in a blacklist.
+        ip_blocklist: The IP addresses that we may not request.
+        ip_allowlist: The allowed IP addresses, that we can
+           request if it were otherwise caught in a blocklist.
         use_proxy: Whether proxy settings should be discovered and used
             from conventional environment variables.
     """
@@ -775,19 +767,19 @@ class SimpleHttpClient(BaseHttpClient):
         self,
         hs: "HomeServer",
         treq_args: Optional[Dict[str, Any]] = None,
-        ip_whitelist: Optional[IPSet] = None,
-        ip_blacklist: Optional[IPSet] = None,
+        ip_allowlist: Optional[IPSet] = None,
+        ip_blocklist: Optional[IPSet] = None,
         use_proxy: bool = False,
     ):
         super().__init__(hs, treq_args=treq_args)
-        self._ip_whitelist = ip_whitelist
-        self._ip_blacklist = ip_blacklist
-
-        if self._ip_blacklist:
-            # If we have an IP blacklist, we need to use a DNS resolver which
-            # filters out blacklisted IP addresses, to prevent DNS rebinding.
-            self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
-                self.reactor, self._ip_whitelist, self._ip_blacklist
+        self._ip_allowlist = ip_allowlist
+        self._ip_blocklist = ip_blocklist
+
+        if self._ip_blocklist:
+            # If we have an IP blocklist, we need to use a DNS resolver which
+            # filters out blocked IP addresses, to prevent DNS rebinding.
+            self.reactor: ISynapseReactor = BlocklistingReactorWrapper(
+                self.reactor, self._ip_allowlist, self._ip_blocklist
             )
 
         # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
@@ -809,14 +801,13 @@ class SimpleHttpClient(BaseHttpClient):
             use_proxy=use_proxy,
         )
 
-        if self._ip_blacklist:
-            # If we have an IP blacklist, we then install the blacklisting Agent
-            # which prevents direct access to IP addresses, that are not caught
-            # by the DNS resolution.
-            self.agent = BlacklistingAgentWrapper(
+        if self._ip_blocklist:
+            # If we have an IP blocklist, we then install the Agent which prevents
+            # direct access to IP addresses, that are not caught by the DNS resolution.
+            self.agent = BlocklistingAgentWrapper(
                 self.agent,
-                ip_blacklist=self._ip_blacklist,
-                ip_whitelist=self._ip_whitelist,
+                ip_blocklist=self._ip_blocklist,
+                ip_allowlist=self._ip_allowlist,
             )
 
 
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 8d7d0a3875..7e8cf31682 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -36,7 +36,7 @@ from twisted.web.iweb import IAgent, IAgentEndpointFactory, IBodyProducer, IResp
 
 from synapse.crypto.context_factory import FederationPolicyForHTTPS
 from synapse.http import proxyagent
-from synapse.http.client import BlacklistingAgentWrapper, BlacklistingReactorWrapper
+from synapse.http.client import BlocklistingAgentWrapper, BlocklistingReactorWrapper
 from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint
 from synapse.http.federation.srv_resolver import Server, SrvResolver
 from synapse.http.federation.well_known_resolver import WellKnownResolver
@@ -65,12 +65,12 @@ class MatrixFederationAgent:
         user_agent:
             The user agent header to use for federation requests.
 
-        ip_whitelist: Allowed IP addresses.
+        ip_allowlist: Allowed IP addresses.
 
-        ip_blacklist: Disallowed IP addresses.
+        ip_blocklist: Disallowed IP addresses.
 
         proxy_reactor: twisted reactor to use for connections to the proxy server
-           reactor might have some blacklisting applied (i.e. for DNS queries),
+           reactor might have some blocking applied (i.e. for DNS queries),
            but we need unblocked access to the proxy.
 
         _srv_resolver:
@@ -87,17 +87,17 @@ class MatrixFederationAgent:
         reactor: ISynapseReactor,
         tls_client_options_factory: Optional[FederationPolicyForHTTPS],
         user_agent: bytes,
-        ip_whitelist: Optional[IPSet],
-        ip_blacklist: IPSet,
+        ip_allowlist: Optional[IPSet],
+        ip_blocklist: IPSet,
         _srv_resolver: Optional[SrvResolver] = None,
         _well_known_resolver: Optional[WellKnownResolver] = None,
     ):
-        # proxy_reactor is not blacklisted
+        # proxy_reactor is not blocklisting reactor
         proxy_reactor = reactor
 
-        # We need to use a DNS resolver which filters out blacklisted IP
+        # We need to use a DNS resolver which filters out blocked IP
         # addresses, to prevent DNS rebinding.
-        reactor = BlacklistingReactorWrapper(reactor, ip_whitelist, ip_blacklist)
+        reactor = BlocklistingReactorWrapper(reactor, ip_allowlist, ip_blocklist)
 
         self._clock = Clock(reactor)
         self._pool = HTTPConnectionPool(reactor)
@@ -120,7 +120,7 @@ class MatrixFederationAgent:
         if _well_known_resolver is None:
             _well_known_resolver = WellKnownResolver(
                 reactor,
-                agent=BlacklistingAgentWrapper(
+                agent=BlocklistingAgentWrapper(
                     ProxyAgent(
                         reactor,
                         proxy_reactor,
@@ -128,7 +128,7 @@ class MatrixFederationAgent:
                         contextFactory=tls_client_options_factory,
                         use_proxy=True,
                     ),
-                    ip_blacklist=ip_blacklist,
+                    ip_blocklist=ip_blocklist,
                 ),
                 user_agent=self.user_agent,
             )
@@ -256,7 +256,7 @@ class MatrixHostnameEndpoint:
     Args:
         reactor: twisted reactor to use for underlying requests
         proxy_reactor: twisted reactor to use for connections to the proxy server.
-           'reactor' might have some blacklisting applied (i.e. for DNS queries),
+           'reactor' might have some blocking applied (i.e. for DNS queries),
            but we need unblocked access to the proxy.
         tls_client_options_factory:
             factory to use for fetching client tls options, or none to disable TLS.
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 634882487c..9094dab0fe 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -64,7 +64,7 @@ from synapse.api.errors import (
 from synapse.crypto.context_factory import FederationPolicyForHTTPS
 from synapse.http import QuieterFileBodyProducer
 from synapse.http.client import (
-    BlacklistingAgentWrapper,
+    BlocklistingAgentWrapper,
     BodyExceededMaxSize,
     ByteWriteable,
     _make_scheduler,
@@ -392,15 +392,15 @@ class MatrixFederationHttpClient:
             self.reactor,
             tls_client_options_factory,
             user_agent.encode("ascii"),
-            hs.config.server.federation_ip_range_whitelist,
-            hs.config.server.federation_ip_range_blacklist,
+            hs.config.server.federation_ip_range_allowlist,
+            hs.config.server.federation_ip_range_blocklist,
         )
 
-        # Use a BlacklistingAgentWrapper to prevent circumventing the IP
-        # blacklist via IP literals in server names
-        self.agent = BlacklistingAgentWrapper(
+        # Use a BlocklistingAgentWrapper to prevent circumventing the IP
+        # blocking via IP literals in server names
+        self.agent = BlocklistingAgentWrapper(
             federation_agent,
-            ip_blacklist=hs.config.server.federation_ip_range_blacklist,
+            ip_blocklist=hs.config.server.federation_ip_range_blocklist,
         )
 
         self.clock = hs.get_clock()
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index 94ef737b9e..7bdc4acae7 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -53,7 +53,7 @@ class ProxyAgent(_AgentBase):
             connections.
 
         proxy_reactor: twisted reactor to use for connections to the proxy server
-                       reactor might have some blacklisting applied (i.e. for DNS queries),
+                       reactor might have some blocking applied (i.e. for DNS queries),
                        but we need unblocked access to the proxy.
 
         contextFactory: A factory for TLS contexts, to control the
diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py
index dbdb1fd20e..70b32cee17 100644
--- a/synapse/media/url_previewer.py
+++ b/synapse/media/url_previewer.py
@@ -105,7 +105,7 @@ class UrlPreviewer:
 
     When Synapse is asked to preview a URL it does the following:
 
-    1. Checks against a URL blacklist (defined as `url_preview_url_blacklist` in the
+    1. Checks against a URL blocklist (defined as `url_preview_url_blacklist` in the
        config).
     2. Checks the URL against an in-memory cache and returns the result if it exists. (This
        is also used to de-duplicate processing of multiple in-flight requests at once.)
@@ -167,8 +167,8 @@ class UrlPreviewer:
         self.client = SimpleHttpClient(
             hs,
             treq_args={"browser_like_redirects": True},
-            ip_whitelist=hs.config.media.url_preview_ip_range_whitelist,
-            ip_blacklist=hs.config.media.url_preview_ip_range_blacklist,
+            ip_allowlist=hs.config.media.url_preview_ip_range_allowlist,
+            ip_blocklist=hs.config.media.url_preview_ip_range_blocklist,
             use_proxy=True,
         )
         self.media_repo = media_repo
@@ -186,7 +186,7 @@ class UrlPreviewer:
             or instance_running_jobs == hs.get_instance_name()
         )
 
-        self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist
+        self.url_preview_url_blocklist = hs.config.media.url_preview_url_blocklist
         self.url_preview_accept_language = hs.config.media.url_preview_accept_language
 
         # memory cache mapping urls to an ObservableDeferred returning
@@ -391,7 +391,7 @@ class UrlPreviewer:
             True if the URL is blocked, False if it is allowed.
         """
         url_tuple = urlsplit(url)
-        for entry in self.url_preview_url_blacklist:
+        for entry in self.url_preview_url_blocklist:
             match = True
             # Iterate over each entry. If *all* attributes of that entry match
             # the current URL, then reject it.
@@ -426,7 +426,7 @@ class UrlPreviewer:
 
             # All fields matched, return true (the URL is blocked).
             if match:
-                logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
+                logger.warning("URL %s blocked by entry %s", url, entry)
                 return match
 
         # No matches were found, the URL is allowed.
@@ -472,7 +472,7 @@ class UrlPreviewer:
         except DNSLookupError:
             # DNS lookup returned no results
             # Note: This will also be the case if one of the resolved IP
-            # addresses is blacklisted
+            # addresses is blocked.
             raise SynapseError(
                 502,
                 "DNS resolution failure during URL preview generation",
@@ -575,7 +575,7 @@ class UrlPreviewer:
 
         if self._is_url_blocked(url):
             raise SynapseError(
-                403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
+                403, "URL blocked by url pattern blocklist entry", Codes.UNKNOWN
             )
 
         # TODO: we should probably honour robots.txt... except in practice
diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py
index e91ee05e99..50027680cb 100644
--- a/synapse/push/httppusher.py
+++ b/synapse/push/httppusher.py
@@ -143,7 +143,7 @@ class HttpPusher(Pusher):
             )
 
         self.url = url
-        self.http_client = hs.get_proxied_blacklisted_http_client()
+        self.http_client = hs.get_proxied_blocklisted_http_client()
         self.data_minus_url = {}
         self.data_minus_url.update(self.data)
         del self.data_minus_url["url"]
diff --git a/synapse/server.py b/synapse/server.py
index aa90465047..f6e245569c 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -454,15 +454,15 @@ class HomeServer(metaclass=abc.ABCMeta):
         return SimpleHttpClient(self, use_proxy=True)
 
     @cache_in_self
-    def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient:
+    def get_proxied_blocklisted_http_client(self) -> SimpleHttpClient:
         """
-        An HTTP client that uses configured HTTP(S) proxies and blacklists IPs
-        based on the IP range blacklist/whitelist.
+        An HTTP client that uses configured HTTP(S) proxies and blocks IPs
+        based on the configured IP ranges.
         """
         return SimpleHttpClient(
             self,
-            ip_whitelist=self.config.server.ip_range_whitelist,
-            ip_blacklist=self.config.server.ip_range_blacklist,
+            ip_allowlist=self.config.server.ip_range_allowlist,
+            ip_blocklist=self.config.server.ip_range_blocklist,
             use_proxy=True,
         )
 
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 313cf1a8d0..bdaa508dbe 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -565,9 +565,8 @@ class DatabasePool:
         # A set of tables that are not safe to use native upserts in.
         self._unsafe_to_upsert_tables = set(UNIQUE_INDEX_BACKGROUND_UPDATES.keys())
 
-        # We add the user_directory_search table to the blacklist on SQLite
-        # because the existing search table does not have an index, making it
-        # unsafe to use native upserts.
+        # The user_directory_search table is unsafe to use native upserts
+        # on SQLite because the existing search table does not have an index.
         if isinstance(self.engine, Sqlite3Engine):
             self._unsafe_to_upsert_tables.add("user_directory_search")
 
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index 6c7738d810..5c850d1843 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -63,7 +63,7 @@ class FederationServerTests(unittest.FederatingHomeserverTestCase):
 
 
 class ServerACLsTestCase(unittest.TestCase):
-    def test_blacklisted_server(self) -> None:
+    def test_blocked_server(self) -> None:
         e = _create_acl_event({"allow": ["*"], "deny": ["evil.com"]})
         logging.info("ACL event: %s", e.content)
 
diff --git a/tests/handlers/test_sso.py b/tests/handlers/test_sso.py
index 620ae3a4ba..b9ffdb4ced 100644
--- a/tests/handlers/test_sso.py
+++ b/tests/handlers/test_sso.py
@@ -31,7 +31,7 @@ class TestSSOHandler(unittest.HomeserverTestCase):
         self.http_client.get_file.side_effect = mock_get_file
         self.http_client.user_agent = b"Synapse Test"
         hs = self.setup_test_homeserver(
-            proxied_blacklisted_http_client=self.http_client
+            proxied_blocklisted_http_client=self.http_client
         )
         return hs
 
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index eb7f53fee5..105b4caefa 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -269,8 +269,8 @@ class MatrixFederationAgentTests(unittest.TestCase):
             reactor=cast(ISynapseReactor, self.reactor),
             tls_client_options_factory=self.tls_factory,
             user_agent=b"test-agent",  # Note that this is unused since _well_known_resolver is provided.
-            ip_whitelist=IPSet(),
-            ip_blacklist=IPSet(),
+            ip_allowlist=IPSet(),
+            ip_blocklist=IPSet(),
             _srv_resolver=self.mock_resolver,
             _well_known_resolver=self.well_known_resolver,
         )
@@ -997,8 +997,8 @@ class MatrixFederationAgentTests(unittest.TestCase):
             reactor=self.reactor,
             tls_client_options_factory=tls_factory,
             user_agent=b"test-agent",  # This is unused since _well_known_resolver is passed below.
-            ip_whitelist=IPSet(),
-            ip_blacklist=IPSet(),
+            ip_allowlist=IPSet(),
+            ip_blocklist=IPSet(),
             _srv_resolver=self.mock_resolver,
             _well_known_resolver=WellKnownResolver(
                 cast(ISynapseReactor, self.reactor),
diff --git a/tests/http/test_client.py b/tests/http/test_client.py
index 57b6a84e23..a05b9f17a6 100644
--- a/tests/http/test_client.py
+++ b/tests/http/test_client.py
@@ -27,8 +27,8 @@ from twisted.web.iweb import UNKNOWN_LENGTH
 
 from synapse.api.errors import SynapseError
 from synapse.http.client import (
-    BlacklistingAgentWrapper,
-    BlacklistingReactorWrapper,
+    BlocklistingAgentWrapper,
+    BlocklistingReactorWrapper,
     BodyExceededMaxSize,
     _DiscardBodyWithMaxSizeProtocol,
     read_body_with_max_size,
@@ -140,7 +140,7 @@ class ReadBodyWithMaxSizeTests(TestCase):
         self.assertEqual(result.getvalue(), b"")
 
 
-class BlacklistingAgentTest(TestCase):
+class BlocklistingAgentTest(TestCase):
     def setUp(self) -> None:
         self.reactor, self.clock = get_clock()
 
@@ -157,16 +157,16 @@ class BlacklistingAgentTest(TestCase):
             self.reactor.lookups[domain.decode()] = ip.decode()
             self.reactor.lookups[ip.decode()] = ip.decode()
 
-        self.ip_whitelist = IPSet([self.allowed_ip.decode()])
-        self.ip_blacklist = IPSet(["5.0.0.0/8"])
+        self.ip_allowlist = IPSet([self.allowed_ip.decode()])
+        self.ip_blocklist = IPSet(["5.0.0.0/8"])
 
     def test_reactor(self) -> None:
-        """Apply the blacklisting reactor and ensure it properly blocks connections to particular domains and IPs."""
+        """Apply the blocklisting reactor and ensure it properly blocks connections to particular domains and IPs."""
         agent = Agent(
-            BlacklistingReactorWrapper(
+            BlocklistingReactorWrapper(
                 self.reactor,
-                ip_whitelist=self.ip_whitelist,
-                ip_blacklist=self.ip_blacklist,
+                ip_allowlist=self.ip_allowlist,
+                ip_blocklist=self.ip_blocklist,
             ),
         )
 
@@ -207,11 +207,11 @@ class BlacklistingAgentTest(TestCase):
             self.assertEqual(response.code, 200)
 
     def test_agent(self) -> None:
-        """Apply the blacklisting agent and ensure it properly blocks connections to particular IPs."""
-        agent = BlacklistingAgentWrapper(
+        """Apply the blocklisting agent and ensure it properly blocks connections to particular IPs."""
+        agent = BlocklistingAgentWrapper(
             Agent(self.reactor),
-            ip_blacklist=self.ip_blacklist,
-            ip_whitelist=self.ip_whitelist,
+            ip_blocklist=self.ip_blocklist,
+            ip_allowlist=self.ip_allowlist,
         )
 
         # The unsafe IPs should be rejected.
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index d89a91c59d..0dfc03ce50 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -231,11 +231,11 @@ class FederationClientTests(HomeserverTestCase):
         self.assertIsInstance(f.value, RequestSendFailed)
         self.assertIsInstance(f.value.inner_exception, ResponseNeverReceived)
 
-    def test_client_ip_range_blacklist(self) -> None:
-        """Ensure that Synapse does not try to connect to blacklisted IPs"""
+    def test_client_ip_range_blocklist(self) -> None:
+        """Ensure that Synapse does not try to connect to blocked IPs"""
 
-        # Set up the ip_range blacklist
-        self.hs.config.server.federation_ip_range_blacklist = IPSet(
+        # Set up the ip_range blocklist
+        self.hs.config.server.federation_ip_range_blocklist = IPSet(
             ["127.0.0.0/8", "fe80::/64"]
         )
         self.reactor.lookups["internal"] = "127.0.0.1"
@@ -243,7 +243,7 @@ class FederationClientTests(HomeserverTestCase):
         self.reactor.lookups["fine"] = "10.20.30.40"
         cl = MatrixFederationHttpClient(self.hs, None)
 
-        # Try making a GET request to a blacklisted IPv4 address
+        # Try making a GET request to a blocked IPv4 address
         # ------------------------------------------------------
         # Make the request
         d = defer.ensureDeferred(cl.get_json("internal:8008", "foo/bar", timeout=10000))
@@ -261,7 +261,7 @@ class FederationClientTests(HomeserverTestCase):
         self.assertIsInstance(f.value, RequestSendFailed)
         self.assertIsInstance(f.value.inner_exception, DNSLookupError)
 
-        # Try making a POST request to a blacklisted IPv6 address
+        # Try making a POST request to a blocked IPv6 address
         # -------------------------------------------------------
         # Make the request
         d = defer.ensureDeferred(
@@ -278,11 +278,11 @@ class FederationClientTests(HomeserverTestCase):
         clients = self.reactor.tcpClients
         self.assertEqual(len(clients), 0)
 
-        # Check that it was due to a blacklisted DNS lookup
+        # Check that it was due to a blocked DNS lookup
         f = self.failureResultOf(d, RequestSendFailed)
         self.assertIsInstance(f.value.inner_exception, DNSLookupError)
 
-        # Try making a GET request to a non-blacklisted IPv4 address
+        # Try making a GET request to an allowed IPv4 address
         # ----------------------------------------------------------
         # Make the request
         d = defer.ensureDeferred(cl.post_json("fine:8008", "foo/bar", timeout=10000))
diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py
index cc175052ac..e0ae5a88ff 100644
--- a/tests/http/test_proxyagent.py
+++ b/tests/http/test_proxyagent.py
@@ -32,7 +32,7 @@ from twisted.internet.protocol import Factory, Protocol
 from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol
 from twisted.web.http import HTTPChannel
 
-from synapse.http.client import BlacklistingReactorWrapper
+from synapse.http.client import BlocklistingReactorWrapper
 from synapse.http.connectproxyclient import ProxyCredentials
 from synapse.http.proxyagent import ProxyAgent, parse_proxy
 
@@ -684,11 +684,11 @@ class MatrixFederationAgentTests(TestCase):
         self.assertEqual(body, b"result")
 
     @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"})
-    def test_http_request_via_proxy_with_blacklist(self) -> None:
-        # The blacklist includes the configured proxy IP.
+    def test_http_request_via_proxy_with_blocklist(self) -> None:
+        # The blocklist includes the configured proxy IP.
         agent = ProxyAgent(
-            BlacklistingReactorWrapper(
-                self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"])
+            BlocklistingReactorWrapper(
+                self.reactor, ip_allowlist=None, ip_blocklist=IPSet(["1.0.0.0/8"])
             ),
             self.reactor,
             use_proxy=True,
@@ -730,11 +730,11 @@ class MatrixFederationAgentTests(TestCase):
         self.assertEqual(body, b"result")
 
     @patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"})
-    def test_https_request_via_uppercase_proxy_with_blacklist(self) -> None:
-        # The blacklist includes the configured proxy IP.
+    def test_https_request_via_uppercase_proxy_with_blocklist(self) -> None:
+        # The blocklist includes the configured proxy IP.
         agent = ProxyAgent(
-            BlacklistingReactorWrapper(
-                self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"])
+            BlocklistingReactorWrapper(
+                self.reactor, ip_allowlist=None, ip_blocklist=IPSet(["1.0.0.0/8"])
             ),
             self.reactor,
             contextFactory=get_test_https_policy(),
diff --git a/tests/http/test_simple_client.py b/tests/http/test_simple_client.py
index 010601da4b..be731645bf 100644
--- a/tests/http/test_simple_client.py
+++ b/tests/http/test_simple_client.py
@@ -123,17 +123,17 @@ class SimpleHttpClientTests(HomeserverTestCase):
 
         self.assertIsInstance(f.value, RequestTimedOutError)
 
-    def test_client_ip_range_blacklist(self) -> None:
-        """Ensure that Synapse does not try to connect to blacklisted IPs"""
+    def test_client_ip_range_blocklist(self) -> None:
+        """Ensure that Synapse does not try to connect to blocked IPs"""
 
-        # Add some DNS entries we'll blacklist
+        # Add some DNS entries we'll block
         self.reactor.lookups["internal"] = "127.0.0.1"
         self.reactor.lookups["internalv6"] = "fe80:0:0:0:0:8a2e:370:7337"
-        ip_blacklist = IPSet(["127.0.0.0/8", "fe80::/64"])
+        ip_blocklist = IPSet(["127.0.0.0/8", "fe80::/64"])
 
-        cl = SimpleHttpClient(self.hs, ip_blacklist=ip_blacklist)
+        cl = SimpleHttpClient(self.hs, ip_blocklist=ip_blocklist)
 
-        # Try making a GET request to a blacklisted IPv4 address
+        # Try making a GET request to a blocked IPv4 address
         # ------------------------------------------------------
         # Make the request
         d = defer.ensureDeferred(cl.get_json("http://internal:8008/foo/bar"))
@@ -145,7 +145,7 @@ class SimpleHttpClientTests(HomeserverTestCase):
 
         self.failureResultOf(d, DNSLookupError)
 
-        # Try making a POST request to a blacklisted IPv6 address
+        # Try making a POST request to a blocked IPv6 address
         # -------------------------------------------------------
         # Make the request
         d = defer.ensureDeferred(
@@ -159,10 +159,10 @@ class SimpleHttpClientTests(HomeserverTestCase):
         clients = self.reactor.tcpClients
         self.assertEqual(len(clients), 0)
 
-        # Check that it was due to a blacklisted DNS lookup
+        # Check that it was due to a blocked DNS lookup
         self.failureResultOf(d, DNSLookupError)
 
-        # Try making a GET request to a non-blacklisted IPv4 address
+        # Try making a GET request to a non-blocked IPv4 address
         # ----------------------------------------------------------
         # Make the request
         d = defer.ensureDeferred(cl.get_json("http://testserv:8008/foo/bar"))
diff --git a/tests/push/test_http.py b/tests/push/test_http.py
index 54f558742d..e68a979ee0 100644
--- a/tests/push/test_http.py
+++ b/tests/push/test_http.py
@@ -52,7 +52,7 @@ class HTTPPusherTests(HomeserverTestCase):
 
         m.post_json_get_json = post_json_get_json
 
-        hs = self.setup_test_homeserver(proxied_blacklisted_http_client=m)
+        hs = self.setup_test_homeserver(proxied_blocklisted_http_client=m)
 
         return hs
 
diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py
index dcb3e6669b..875811669c 100644
--- a/tests/replication/test_pusher_shard.py
+++ b/tests/replication/test_pusher_shard.py
@@ -93,7 +93,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {"worker_name": "pusher1", "pusher_instances": ["pusher1"]},
-            proxied_blacklisted_http_client=http_client_mock,
+            proxied_blocklisted_http_client=http_client_mock,
         )
 
         event_id = self._create_pusher_and_send_msg("user")
@@ -126,7 +126,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
                 "worker_name": "pusher1",
                 "pusher_instances": ["pusher1", "pusher2"],
             },
-            proxied_blacklisted_http_client=http_client_mock1,
+            proxied_blocklisted_http_client=http_client_mock1,
         )
 
         http_client_mock2 = Mock(spec_set=["post_json_get_json"])
@@ -140,7 +140,7 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase):
                 "worker_name": "pusher2",
                 "pusher_instances": ["pusher1", "pusher2"],
             },
-            proxied_blacklisted_http_client=http_client_mock2,
+            proxied_blocklisted_http_client=http_client_mock2,
         )
 
         # We choose a user name that we know should go to pusher1.
diff --git a/tests/rest/media/test_url_preview.py b/tests/rest/media/test_url_preview.py
index 7517155cf3..170fb0534a 100644
--- a/tests/rest/media/test_url_preview.py
+++ b/tests/rest/media/test_url_preview.py
@@ -418,9 +418,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
         )
 
-    def test_blacklisted_ip_specific(self) -> None:
+    def test_blocked_ip_specific(self) -> None:
         """
-        Blacklisted IP addresses, found via DNS, are not spidered.
+        Blocked IP addresses, found via DNS, are not spidered.
         """
         self.lookups["example.com"] = [(IPv4Address, "192.168.1.1")]
 
@@ -439,9 +439,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             },
         )
 
-    def test_blacklisted_ip_range(self) -> None:
+    def test_blocked_ip_range(self) -> None:
         """
-        Blacklisted IP ranges, IPs found over DNS, are not spidered.
+        Blocked IP ranges, IPs found over DNS, are not spidered.
         """
         self.lookups["example.com"] = [(IPv4Address, "1.1.1.2")]
 
@@ -458,9 +458,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             },
         )
 
-    def test_blacklisted_ip_specific_direct(self) -> None:
+    def test_blocked_ip_specific_direct(self) -> None:
         """
-        Blacklisted IP addresses, accessed directly, are not spidered.
+        Blocked IP addresses, accessed directly, are not spidered.
         """
         channel = self.make_request(
             "GET", "preview_url?url=http://192.168.1.1", shorthand=False
@@ -470,16 +470,13 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         self.assertEqual(len(self.reactor.tcpClients), 0)
         self.assertEqual(
             channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "IP address blocked by IP blacklist entry",
-            },
+            {"errcode": "M_UNKNOWN", "error": "IP address blocked"},
         )
         self.assertEqual(channel.code, 403)
 
-    def test_blacklisted_ip_range_direct(self) -> None:
+    def test_blocked_ip_range_direct(self) -> None:
         """
-        Blacklisted IP ranges, accessed directly, are not spidered.
+        Blocked IP ranges, accessed directly, are not spidered.
         """
         channel = self.make_request(
             "GET", "preview_url?url=http://1.1.1.2", shorthand=False
@@ -488,15 +485,12 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 403)
         self.assertEqual(
             channel.json_body,
-            {
-                "errcode": "M_UNKNOWN",
-                "error": "IP address blocked by IP blacklist entry",
-            },
+            {"errcode": "M_UNKNOWN", "error": "IP address blocked"},
         )
 
-    def test_blacklisted_ip_range_whitelisted_ip(self) -> None:
+    def test_blocked_ip_range_whitelisted_ip(self) -> None:
         """
-        Blacklisted but then subsequently whitelisted IP addresses can be
+        Blocked but then subsequently whitelisted IP addresses can be
         spidered.
         """
         self.lookups["example.com"] = [(IPv4Address, "1.1.1.1")]
@@ -527,10 +521,10 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
         )
 
-    def test_blacklisted_ip_with_external_ip(self) -> None:
+    def test_blocked_ip_with_external_ip(self) -> None:
         """
-        If a hostname resolves a blacklisted IP, even if there's a
-        non-blacklisted one, it will be rejected.
+        If a hostname resolves a blocked IP, even if there's a non-blocked one,
+        it will be rejected.
         """
         # Hardcode the URL resolving to the IP we want.
         self.lookups["example.com"] = [
@@ -550,9 +544,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             },
         )
 
-    def test_blacklisted_ipv6_specific(self) -> None:
+    def test_blocked_ipv6_specific(self) -> None:
         """
-        Blacklisted IP addresses, found via DNS, are not spidered.
+        Blocked IP addresses, found via DNS, are not spidered.
         """
         self.lookups["example.com"] = [
             (IPv6Address, "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")
@@ -573,9 +567,9 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             },
         )
 
-    def test_blacklisted_ipv6_range(self) -> None:
+    def test_blocked_ipv6_range(self) -> None:
         """
-        Blacklisted IP ranges, IPs found over DNS, are not spidered.
+        Blocked IP ranges, IPs found over DNS, are not spidered.
         """
         self.lookups["example.com"] = [(IPv6Address, "2001:800::1")]
 
@@ -1359,7 +1353,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
 
     @unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]})
     def test_blocked_port(self) -> None:
-        """Tests that blacklisting URLs with a port makes previewing such URLs
+        """Tests that blocking URLs with a port makes previewing such URLs
         fail with a 403 error and doesn't impact other previews.
         """
         self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
@@ -1401,7 +1395,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         {"url_preview_url_blacklist": [{"netloc": "example.com"}]}
     )
     def test_blocked_url(self) -> None:
-        """Tests that blacklisting URLs with a host makes previewing such URLs
+        """Tests that blocking URLs with a host makes previewing such URLs
         fail with a 403 error.
         """
         self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
-- 
cgit 1.5.1


From 736199b7638175c439fff10a1f8a2d7da96838e5 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 19 May 2023 16:13:44 +0000
Subject: Remove old R30 because R30v2 supercedes it (#10428)

R30v2 has been out since 2021-07-19 (https://github.com/matrix-org/synapse/pull/10332)
and we started collecting stats on 2021-08-16. Since it's been over a year now
(almost 2 years), this is enough grace period for us to now rip it out.
---
 changelog.d/10428.removal                          |   1 +
 .../reporting_homeserver_usage_statistics.md       |   5 -
 synapse/app/phone_stats_home.py                    |   4 -
 synapse/storage/databases/main/metrics.py          |  83 -----------
 tests/app/test_phone_stats_home.py                 | 154 ---------------------
 5 files changed, 1 insertion(+), 246 deletions(-)
 create mode 100644 changelog.d/10428.removal

(limited to 'synapse')

diff --git a/changelog.d/10428.removal b/changelog.d/10428.removal
new file mode 100644
index 0000000000..c056e89585
--- /dev/null
+++ b/changelog.d/10428.removal
@@ -0,0 +1 @@
+Remove the old version of the R30 (30-day retained users) phone-home metric.
diff --git a/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md
index 3a7ed7c806..60b758e33b 100644
--- a/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md
+++ b/docs/usage/administration/monitoring/reporting_homeserver_usage_statistics.md
@@ -42,11 +42,6 @@ The following statistics are sent to the configured reporting endpoint:
 | `daily_e2ee_messages`      | int    | The number of (state) events with the type `m.room.encrypted` seen in the last 24 hours.                                                                                                                                                                                                        |
 | `daily_sent_messages`      | int    | The number of (state) events sent by a local user with the type `m.room.message` seen in the last 24 hours.                                                                                                                                                                                     |
 | `daily_sent_e2ee_messages` | int    | The number of (state) events sent by a local user with the type `m.room.encrypted` seen in the last 24 hours.                                                                                                                                                                                   |
-| `r30_users_all`            | int    | The number of 30 day retained users, defined as users who have created their accounts more than 30 days ago, where they were last seen at most 30 days ago and where those two timestamps are over 30 days apart. Includes clients that do not fit into the below r30 client types.             |
-| `r30_users_android`        | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "Android" in the user agent string.                                                                                                                                                                        |
-| `r30_users_ios`            | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "iOS" in the user agent string.                                                                                                                                                                            |
-| `r30_users_electron`       | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "Electron" in the user agent string.                                                                                                                                                                       |
-| `r30_users_web`            | int    | The number of 30 day retained users, as defined above. Filtered only to clients with "Mozilla" or "Gecko" in the user agent string.                                                                                                                                                             |
 | `r30v2_users_all`          | int    | The number of 30 day retained users, with a revised algorithm. Defined as users that appear more than once in the past 60 days, and have more than 30 days between the most and least recent appearances in the past 60 days. Includes clients that do not fit into the below r30 client types. |
 | `r30v2_users_android`      | int    | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "android" (case-insensitive) in the user agent string.                                                                                                                           |
 | `r30v2_users_ios`          | int    | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "ios" (case-insensitive) in the user agent string.                                                                                                                               |
diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py
index 897dd3edac..09988670da 100644
--- a/synapse/app/phone_stats_home.py
+++ b/synapse/app/phone_stats_home.py
@@ -127,10 +127,6 @@ async def phone_stats_home(
     daily_sent_messages = await store.count_daily_sent_messages()
     stats["daily_sent_messages"] = daily_sent_messages
 
-    r30_results = await store.count_r30_users()
-    for name, count in r30_results.items():
-        stats["r30_users_" + name] = count
-
     r30v2_results = await store.count_r30v2_users()
     for name, count in r30v2_results.items():
         stats["r30v2_users_" + name] = count
diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py
index 14294a0bb8..595e22982e 100644
--- a/synapse/storage/databases/main/metrics.py
+++ b/synapse/storage/databases/main/metrics.py
@@ -248,89 +248,6 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
         (count,) = cast(Tuple[int], txn.fetchone())
         return count
 
-    async def count_r30_users(self) -> Dict[str, int]:
-        """
-        Counts the number of 30 day retained users, defined as:-
-         * Users who have created their accounts more than 30 days ago
-         * Where last seen at most 30 days ago
-         * Where account creation and last_seen are > 30 days apart
-
-        Returns:
-             A mapping of counts globally as well as broken out by platform.
-        """
-
-        def _count_r30_users(txn: LoggingTransaction) -> Dict[str, int]:
-            thirty_days_in_secs = 86400 * 30
-            now = int(self._clock.time())
-            thirty_days_ago_in_secs = now - thirty_days_in_secs
-
-            sql = """
-                SELECT platform, COUNT(*) FROM (
-                     SELECT
-                        users.name, platform, users.creation_ts * 1000,
-                        MAX(uip.last_seen)
-                     FROM users
-                     INNER JOIN (
-                         SELECT
-                         user_id,
-                         last_seen,
-                         CASE
-                             WHEN user_agent LIKE '%%Android%%' THEN 'android'
-                             WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
-                             WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
-                             WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
-                             WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
-                             ELSE 'unknown'
-                         END
-                         AS platform
-                         FROM user_ips
-                     ) uip
-                     ON users.name = uip.user_id
-                     AND users.appservice_id is NULL
-                     AND users.creation_ts < ?
-                     AND uip.last_seen/1000 > ?
-                     AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
-                     GROUP BY users.name, platform, users.creation_ts
-                ) u GROUP BY platform
-            """
-
-            results = {}
-            txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
-
-            for row in txn:
-                if row[0] == "unknown":
-                    pass
-                results[row[0]] = row[1]
-
-            sql = """
-                SELECT COUNT(*) FROM (
-                    SELECT users.name, users.creation_ts * 1000,
-                                                        MAX(uip.last_seen)
-                    FROM users
-                    INNER JOIN (
-                        SELECT
-                        user_id,
-                        last_seen
-                        FROM user_ips
-                    ) uip
-                    ON users.name = uip.user_id
-                    AND appservice_id is NULL
-                    AND users.creation_ts < ?
-                    AND uip.last_seen/1000 > ?
-                    AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
-                    GROUP BY users.name, users.creation_ts
-                ) u
-            """
-
-            txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
-
-            (count,) = cast(Tuple[int], txn.fetchone())
-            results["all"] = count
-
-            return results
-
-        return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)
-
     async def count_r30v2_users(self) -> Dict[str, int]:
         """
         Counts the number of 30 day retained users, defined as users that:
diff --git a/tests/app/test_phone_stats_home.py b/tests/app/test_phone_stats_home.py
index a860eedbcf..9305b758d7 100644
--- a/tests/app/test_phone_stats_home.py
+++ b/tests/app/test_phone_stats_home.py
@@ -4,7 +4,6 @@ from synapse.rest.client import login, room
 from synapse.server import HomeServer
 from synapse.util import Clock
 
-from tests import unittest
 from tests.server import ThreadedMemoryReactorClock
 from tests.unittest import HomeserverTestCase
 
@@ -12,154 +11,6 @@ FIVE_MINUTES_IN_SECONDS = 300
 ONE_DAY_IN_SECONDS = 86400
 
 
-class PhoneHomeTestCase(HomeserverTestCase):
-    servlets = [
-        synapse.rest.admin.register_servlets_for_client_rest_resource,
-        room.register_servlets,
-        login.register_servlets,
-    ]
-
-    # Override the retention time for the user_ips table because otherwise it
-    # gets pruned too aggressively for our R30 test.
-    @unittest.override_config({"user_ips_max_age": "365d"})
-    def test_r30_minimum_usage(self) -> None:
-        """
-        Tests the minimum amount of interaction necessary for the R30 metric
-        to consider a user 'retained'.
-        """
-
-        # Register a user, log it in, create a room and send a message
-        user_id = self.register_user("u1", "secret!")
-        access_token = self.login("u1", "secret!")
-        room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
-        self.helper.send(room_id, "message", tok=access_token)
-
-        # Check the R30 results do not count that user.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 0})
-
-        # Advance 30 days (+ 1 second, because strict inequality causes issues if we are
-        # bang on 30 days later).
-        self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)
-
-        # (Make sure the user isn't somehow counted by this point.)
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 0})
-
-        # Send a message (this counts as activity)
-        self.helper.send(room_id, "message2", tok=access_token)
-
-        # We have to wait some time for _update_client_ips_batch to get
-        # called and update the user_ips table.
-        self.reactor.advance(2 * 60 * 60)
-
-        # *Now* the user is counted.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
-
-        # Advance 29 days. The user has now not posted for 29 days.
-        self.reactor.advance(29 * ONE_DAY_IN_SECONDS)
-
-        # The user is still counted.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
-
-        # Advance another day. The user has now not posted for 30 days.
-        self.reactor.advance(ONE_DAY_IN_SECONDS)
-
-        # The user is now no longer counted in R30.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 0})
-
-    def test_r30_minimum_usage_using_default_config(self) -> None:
-        """
-        Tests the minimum amount of interaction necessary for the R30 metric
-        to consider a user 'retained'.
-
-        N.B. This test does not override the `user_ips_max_age` config setting,
-        which defaults to 28 days.
-        """
-
-        # Register a user, log it in, create a room and send a message
-        user_id = self.register_user("u1", "secret!")
-        access_token = self.login("u1", "secret!")
-        room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
-        self.helper.send(room_id, "message", tok=access_token)
-
-        # Check the R30 results do not count that user.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 0})
-
-        # Advance 30 days (+ 1 second, because strict inequality causes issues if we are
-        # bang on 30 days later).
-        self.reactor.advance(30 * ONE_DAY_IN_SECONDS + 1)
-
-        # (Make sure the user isn't somehow counted by this point.)
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 0})
-
-        # Send a message (this counts as activity)
-        self.helper.send(room_id, "message2", tok=access_token)
-
-        # We have to wait some time for _update_client_ips_batch to get
-        # called and update the user_ips table.
-        self.reactor.advance(2 * 60 * 60)
-
-        # *Now* the user is counted.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
-
-        # Advance 27 days. The user has now not posted for 27 days.
-        self.reactor.advance(27 * ONE_DAY_IN_SECONDS)
-
-        # The user is still counted.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
-
-        # Advance another day. The user has now not posted for 28 days.
-        self.reactor.advance(ONE_DAY_IN_SECONDS)
-
-        # The user is now no longer counted in R30.
-        # (This is because the user_ips table has been pruned, which by default
-        # only preserves the last 28 days of entries.)
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 0})
-
-    def test_r30_user_must_be_retained_for_at_least_a_month(self) -> None:
-        """
-        Tests that a newly-registered user must be retained for a whole month
-        before appearing in the R30 statistic, even if they post every day
-        during that time!
-        """
-        # Register a user and send a message
-        user_id = self.register_user("u1", "secret!")
-        access_token = self.login("u1", "secret!")
-        room_id = self.helper.create_room_as(room_creator=user_id, tok=access_token)
-        self.helper.send(room_id, "message", tok=access_token)
-
-        # Check the user does not contribute to R30 yet.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 0})
-
-        for _ in range(30):
-            # This loop posts a message every day for 30 days
-            self.reactor.advance(ONE_DAY_IN_SECONDS)
-            self.helper.send(room_id, "I'm still here", tok=access_token)
-
-            # Notice that the user *still* does not contribute to R30!
-            r30_results = self.get_success(
-                self.hs.get_datastores().main.count_r30_users()
-            )
-            self.assertEqual(r30_results, {"all": 0})
-
-        self.reactor.advance(ONE_DAY_IN_SECONDS)
-        self.helper.send(room_id, "Still here!", tok=access_token)
-
-        # *Now* the user appears in R30.
-        r30_results = self.get_success(self.hs.get_datastores().main.count_r30_users())
-        self.assertEqual(r30_results, {"all": 1, "unknown": 1})
-
-
 class PhoneHomeR30V2TestCase(HomeserverTestCase):
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
@@ -363,11 +214,6 @@ class PhoneHomeR30V2TestCase(HomeserverTestCase):
             r30_results, {"all": 0, "android": 0, "electron": 0, "ios": 0, "web": 0}
         )
 
-        # Check that this is a situation where old R30 differs:
-        # old R30 DOES count this as 'retained'.
-        r30_results = self.get_success(store.count_r30_users())
-        self.assertEqual(r30_results, {"all": 1, "ios": 1})
-
         # Now we want to check that the user will still be able to appear in
         # R30v2 as long as the user performs some other activity between
         # 30 and 60 days later.
-- 
cgit 1.5.1


From ca3c07e833816e69bbaf0372e6cc79f52e6db88e Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 19 May 2023 11:18:45 -0500
Subject: Trace how many new events from the backfill response we need to
 process (#15633)

You can kinda derive this information from how many `_process_pulled_event` spans there are but it would be nice to quickly glance.
---
 changelog.d/15633.misc               | 1 +
 synapse/handlers/federation_event.py | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 changelog.d/15633.misc

(limited to 'synapse')

diff --git a/changelog.d/15633.misc b/changelog.d/15633.misc
new file mode 100644
index 0000000000..4126a20602
--- /dev/null
+++ b/changelog.d/15633.misc
@@ -0,0 +1 @@
+Trace how many new events from the backfill response we need to process.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 06343d40e4..9a08618da5 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -890,6 +890,11 @@ class FederationEventHandler:
             # Continue on with the events that are new to us.
             new_events.append(event)
 
+        set_tag(
+            SynapseTags.RESULT_PREFIX + "new_events.length",
+            str(len(new_events)),
+        )
+
         # We want to sort these by depth so we process them and
         # tell clients about them in order.
         sorted_events = sorted(new_events, key=lambda x: x.depth)
-- 
cgit 1.5.1


From 703a8f9c67cfe25b956dfdcca654818d52fa7ebd Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 19 May 2023 12:26:58 -0500
Subject: Instrument `state` and `state_group` storage related things (tracing)
 (#15610)

Instrument `state` and `state_group` storage related things (tracing) so it's a little more clear where these database transactions are coming from as there is a lot of wires crossing in these functions.

Part of `/messages` performance investigation: https://github.com/matrix-org/synapse/issues/13356
---
 changelog.d/15610.misc                        |  1 +
 synapse/events/snapshot.py                    |  5 ++++
 synapse/state/__init__.py                     |  4 ++++
 synapse/storage/controllers/state.py          | 33 +++++++++++++++++++++++++++
 synapse/storage/databases/state/bg_updates.py |  5 ++++
 synapse/storage/databases/state/store.py      | 15 ++++++++++++
 6 files changed, 63 insertions(+)
 create mode 100644 changelog.d/15610.misc

(limited to 'synapse')

diff --git a/changelog.d/15610.misc b/changelog.d/15610.misc
new file mode 100644
index 0000000000..2eff30f6e3
--- /dev/null
+++ b/changelog.d/15610.misc
@@ -0,0 +1 @@
+Instrument `state` and `state_group` storage-related operations to better picture what's happening when tracing.
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index 9b4d692cf4..e7e8225b8e 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -19,6 +19,7 @@ from immutabledict import immutabledict
 
 from synapse.appservice import ApplicationService
 from synapse.events import EventBase
+from synapse.logging.opentracing import tag_args, trace
 from synapse.types import JsonDict, StateMap
 
 if TYPE_CHECKING:
@@ -242,6 +243,8 @@ class EventContext(UnpersistedEventContextBase):
 
         return self._state_group
 
+    @trace
+    @tag_args
     async def get_current_state_ids(
         self, state_filter: Optional["StateFilter"] = None
     ) -> Optional[StateMap[str]]:
@@ -275,6 +278,8 @@ class EventContext(UnpersistedEventContextBase):
 
         return prev_state_ids
 
+    @trace
+    @tag_args
     async def get_prev_state_ids(
         self, state_filter: Optional["StateFilter"] = None
     ) -> StateMap[str]:
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 6031095249..9bc0c3b7b9 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -45,6 +45,7 @@ from synapse.events.snapshot import (
     UnpersistedEventContextBase,
 )
 from synapse.logging.context import ContextResourceUsage
+from synapse.logging.opentracing import tag_args, trace
 from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet
 from synapse.state import v1, v2
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
@@ -270,6 +271,8 @@ class StateHandler:
         state = await entry.get_state(self._state_storage_controller, StateFilter.all())
         return await self.store.get_joined_hosts(room_id, state, entry)
 
+    @trace
+    @tag_args
     async def calculate_context_info(
         self,
         event: EventBase,
@@ -465,6 +468,7 @@ class StateHandler:
 
         return await unpersisted_context.persist(event)
 
+    @trace
     @measure_func()
     async def resolve_state_groups_for_events(
         self, room_id: str, event_ids: Collection[str], await_full_state: bool = True
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 9d7a8a792f..06a80869eb 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -67,6 +67,8 @@ class StateStorageController:
         """
         self._partial_state_room_tracker.notify_un_partial_stated(room_id)
 
+    @trace
+    @tag_args
     async def get_state_group_delta(
         self, state_group: int
     ) -> Tuple[Optional[int], Optional[StateMap[str]]]:
@@ -84,6 +86,8 @@ class StateStorageController:
         state_group_delta = await self.stores.state.get_state_group_delta(state_group)
         return state_group_delta.prev_group, state_group_delta.delta_ids
 
+    @trace
+    @tag_args
     async def get_state_groups_ids(
         self, _room_id: str, event_ids: Collection[str], await_full_state: bool = True
     ) -> Dict[int, MutableStateMap[str]]:
@@ -114,6 +118,8 @@ class StateStorageController:
 
         return group_to_state
 
+    @trace
+    @tag_args
     async def get_state_ids_for_group(
         self, state_group: int, state_filter: Optional[StateFilter] = None
     ) -> StateMap[str]:
@@ -130,6 +136,8 @@ class StateStorageController:
 
         return group_to_state[state_group]
 
+    @trace
+    @tag_args
     async def get_state_groups(
         self, room_id: str, event_ids: Collection[str]
     ) -> Dict[int, List[EventBase]]:
@@ -165,6 +173,8 @@ class StateStorageController:
             for group, event_id_map in group_to_ids.items()
         }
 
+    @trace
+    @tag_args
     def _get_state_groups_from_groups(
         self, groups: List[int], state_filter: StateFilter
     ) -> Awaitable[Dict[int, StateMap[str]]]:
@@ -183,6 +193,7 @@ class StateStorageController:
         return self.stores.state._get_state_groups_from_groups(groups, state_filter)
 
     @trace
+    @tag_args
     async def get_state_for_events(
         self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None
     ) -> Dict[str, StateMap[EventBase]]:
@@ -280,6 +291,8 @@ class StateStorageController:
 
         return {event: event_to_state[event] for event in event_ids}
 
+    @trace
+    @tag_args
     async def get_state_for_event(
         self, event_id: str, state_filter: Optional[StateFilter] = None
     ) -> StateMap[EventBase]:
@@ -303,6 +316,7 @@ class StateStorageController:
         return state_map[event_id]
 
     @trace
+    @tag_args
     async def get_state_ids_for_event(
         self,
         event_id: str,
@@ -333,6 +347,8 @@ class StateStorageController:
         )
         return state_map[event_id]
 
+    @trace
+    @tag_args
     def get_state_for_groups(
         self, groups: Iterable[int], state_filter: Optional[StateFilter] = None
     ) -> Awaitable[Dict[int, MutableStateMap[str]]]:
@@ -402,6 +418,8 @@ class StateStorageController:
             event_id, room_id, prev_group, delta_ids, current_state_ids
         )
 
+    @trace
+    @tag_args
     @cancellable
     async def get_current_state_ids(
         self,
@@ -442,6 +460,8 @@ class StateStorageController:
                 room_id, on_invalidate=on_invalidate
             )
 
+    @trace
+    @tag_args
     async def get_canonical_alias_for_room(self, room_id: str) -> Optional[str]:
         """Get canonical alias for room, if any
 
@@ -466,6 +486,8 @@ class StateStorageController:
 
         return event.content.get("canonical_alias")
 
+    @trace
+    @tag_args
     async def get_current_state_deltas(
         self, prev_stream_id: int, max_stream_id: int
     ) -> Tuple[int, List[Dict[str, Any]]]:
@@ -500,6 +522,7 @@ class StateStorageController:
         )
 
     @trace
+    @tag_args
     async def get_current_state(
         self, room_id: str, state_filter: Optional[StateFilter] = None
     ) -> StateMap[EventBase]:
@@ -516,6 +539,8 @@ class StateStorageController:
 
         return state_map
 
+    @trace
+    @tag_args
     async def get_current_state_event(
         self, room_id: str, event_type: str, state_key: str
     ) -> Optional[EventBase]:
@@ -527,6 +552,8 @@ class StateStorageController:
         )
         return state_map.get(key)
 
+    @trace
+    @tag_args
     async def get_current_hosts_in_room(self, room_id: str) -> AbstractSet[str]:
         """Get current hosts in room based on current state.
 
@@ -538,6 +565,8 @@ class StateStorageController:
 
         return await self.stores.main.get_current_hosts_in_room(room_id)
 
+    @trace
+    @tag_args
     async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]:
         """Get current hosts in room based on current state.
 
@@ -553,6 +582,8 @@ class StateStorageController:
 
         return await self.stores.main.get_current_hosts_in_room_ordered(room_id)
 
+    @trace
+    @tag_args
     async def get_current_hosts_in_room_or_partial_state_approximation(
         self, room_id: str
     ) -> Collection[str]:
@@ -582,6 +613,8 @@ class StateStorageController:
 
         return hosts
 
+    @trace
+    @tag_args
     async def get_users_in_room_with_profiles(
         self, room_id: str
     ) -> Mapping[str, ProfileInfo]:
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
index 097dea5182..86eb1a8a08 100644
--- a/synapse/storage/databases/state/bg_updates.py
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -15,6 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union
 
+from synapse.logging.opentracing import tag_args, trace
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -40,6 +41,8 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
     updates.
     """
 
+    @trace
+    @tag_args
     def _count_state_group_hops_txn(
         self, txn: LoggingTransaction, state_group: int
     ) -> int:
@@ -83,6 +86,8 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
 
             return count
 
+    @trace
+    @tag_args
     def _get_state_groups_from_groups_txn(
         self,
         txn: LoggingTransaction,
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index 29ff64e876..6984d11352 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -20,6 +20,7 @@ import attr
 from synapse.api.constants import EventTypes
 from synapse.events import EventBase
 from synapse.events.snapshot import UnpersistedEventContext, UnpersistedEventContextBase
+from synapse.logging.opentracing import tag_args, trace
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -159,6 +160,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
             "get_state_group_delta", _get_state_group_delta_txn
         )
 
+    @trace
+    @tag_args
     @cancellable
     async def _get_state_groups_from_groups(
         self, groups: List[int], state_filter: StateFilter
@@ -187,6 +190,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
 
         return results
 
+    @trace
+    @tag_args
     def _get_state_for_group_using_cache(
         self,
         cache: DictionaryCache[int, StateKey, str],
@@ -239,6 +244,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
 
         return state_filter.filter_state(state_dict_ids), not missing_types
 
+    @trace
+    @tag_args
     @cancellable
     async def _get_state_for_groups(
         self, groups: Iterable[int], state_filter: Optional[StateFilter] = None
@@ -305,6 +312,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
 
         return state
 
+    @trace
+    @tag_args
     def _get_state_for_groups_using_cache(
         self,
         groups: Iterable[int],
@@ -403,6 +412,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
                 fetched_keys=non_member_types,
             )
 
+    @trace
+    @tag_args
     async def store_state_deltas_for_batched(
         self,
         events_and_context: List[Tuple[EventBase, UnpersistedEventContextBase]],
@@ -520,6 +531,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
             prev_group,
         )
 
+    @trace
+    @tag_args
     async def store_state_group(
         self,
         event_id: str,
@@ -772,6 +785,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
             ((sg,) for sg in state_groups_to_delete),
         )
 
+    @trace
+    @tag_args
     async def get_previous_state_groups(
         self, state_groups: Iterable[int]
     ) -> Dict[int, int]:
-- 
cgit 1.5.1


From c5d1e6d414fa7b4074bc72ca3719c1341a1c5379 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 22 May 2023 11:31:22 -0400
Subject: Properly parse event_fields in filters (#15607)

The event_fields property in filters should use the proper
escape rules, namely backslashes can be escaped with
an additional backslash.

This adds tests (adapted from matrix-js-sdk) and implements
the logic to properly split the event_fields strings.
---
 changelog.d/15607.bugfix    |  1 +
 synapse/api/filtering.py    | 15 +---------
 synapse/events/utils.py     | 72 ++++++++++++++++++++++++++++++++++++---------
 tests/api/test_filtering.py |  6 ----
 tests/events/test_utils.py  | 39 ++++++++++++++++++++++++
 5 files changed, 99 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/15607.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15607.bugfix b/changelog.d/15607.bugfix
new file mode 100644
index 0000000000..a2767adbe2
--- /dev/null
+++ b/changelog.d/15607.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where filters with multiple backslashes were rejected.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index de7c56bc0f..82aeef8d19 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -128,20 +128,7 @@ USER_FILTER_SCHEMA = {
         "account_data": {"$ref": "#/definitions/filter"},
         "room": {"$ref": "#/definitions/room_filter"},
         "event_format": {"type": "string", "enum": ["client", "federation"]},
-        "event_fields": {
-            "type": "array",
-            "items": {
-                "type": "string",
-                # Don't allow '\\' in event field filters. This makes matching
-                # events a lot easier as we can then use a negative lookbehind
-                # assertion to split '\.' If we allowed \\ then it would
-                # incorrectly split '\\.' See synapse.events.utils.serialize_event
-                #
-                # Note that because this is a regular expression, we have to escape
-                # each backslash in the pattern.
-                "pattern": r"^((?!\\\\).)*$",
-            },
-        },
+        "event_fields": {"type": "array", "items": {"type": "string"}},
     },
     "additionalProperties": True,  # Allow new fields for forward compatibility
 }
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index e6d040176b..e7b7b78b84 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -22,6 +22,7 @@ from typing import (
     Iterable,
     List,
     Mapping,
+    Match,
     MutableMapping,
     Optional,
     Union,
@@ -46,12 +47,10 @@ if TYPE_CHECKING:
     from synapse.handlers.relations import BundledAggregations
 
 
-# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
-# (?<!stuff) matches if the current position in the string is not preceded
-# by a match for 'stuff'.
-# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
-#       the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
-SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.")
+# Split strings on "." but not "\." (or "\\\.").
+SPLIT_FIELD_REGEX = re.compile(r"\\*\.")
+# Find escaped characters, e.g. those with a \ in front of them.
+ESCAPE_SEQUENCE_PATTERN = re.compile(r"\\(.)")
 
 CANONICALJSON_MAX_INT = (2**53) - 1
 CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
@@ -253,6 +252,57 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None:
     sub_out_dict[key_to_move] = sub_dict[key_to_move]
 
 
+def _escape_slash(m: Match[str]) -> str:
+    """
+    Replacement function; replace a backslash-backslash or backslash-dot with the
+    second character. Leaves any other string alone.
+    """
+    if m.group(1) in ("\\", "."):
+        return m.group(1)
+    return m.group(0)
+
+
+def _split_field(field: str) -> List[str]:
+    """
+    Splits strings on unescaped dots and removes escaping.
+
+    Args:
+        field: A string representing a path to a field.
+
+    Returns:
+        A list of nested fields to traverse.
+    """
+
+    # Convert the field and remove escaping:
+    #
+    # 1. "content.body.thing\.with\.dots"
+    # 2. ["content", "body", "thing\.with\.dots"]
+    # 3. ["content", "body", "thing.with.dots"]
+
+    # Find all dots (and their preceding backslashes). If the dot is unescaped
+    # then emit a new field part.
+    result = []
+    prev_start = 0
+    for match in SPLIT_FIELD_REGEX.finditer(field):
+        # If the match is an *even* number of characters than the dot was escaped.
+        if len(match.group()) % 2 == 0:
+            continue
+
+        # Add a new part (up to the dot, exclusive) after escaping.
+        result.append(
+            ESCAPE_SEQUENCE_PATTERN.sub(
+                _escape_slash, field[prev_start : match.end() - 1]
+            )
+        )
+        prev_start = match.end()
+
+    # Add any part of the field after the last unescaped dot. (Note that if the
+    # character is a dot this correctly adds a blank string.)
+    result.append(re.sub(r"\\(.)", _escape_slash, field[prev_start:]))
+
+    return result
+
+
 def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
     """Return a new dict with only the fields in 'dictionary' which are present
     in 'fields'.
@@ -260,7 +310,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
     If there are no event fields specified then all fields are included.
     The entries may include '.' characters to indicate sub-fields.
     So ['content.body'] will include the 'body' field of the 'content' object.
-    A literal '.' character in a field name may be escaped using a '\'.
+    A literal '.' or '\' character in a field name may be escaped using a '\'.
 
     Args:
         dictionary: The dictionary to read from.
@@ -275,13 +325,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
 
     # for each field, convert it:
     # ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
-    split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
-
-    # for each element of the output array of arrays:
-    # remove escaping so we can use the right key names.
-    split_fields[:] = [
-        [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields
-    ]
+    split_fields = [_split_field(f) for f in fields]
 
     output: JsonDict = {}
     for field_array in split_fields:
diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py
index 222449baac..aa6af5ad7b 100644
--- a/tests/api/test_filtering.py
+++ b/tests/api/test_filtering.py
@@ -48,8 +48,6 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         invalid_filters: List[JsonDict] = [
             # `account_data` must be a dictionary
             {"account_data": "Hello World"},
-            # `event_fields` entries must not contain backslashes
-            {"event_fields": [r"\\foo"]},
             # `event_format` must be "client" or "federation"
             {"event_format": "other"},
             # `not_rooms` must contain valid room IDs
@@ -114,10 +112,6 @@ class FilteringTestCase(unittest.HomeserverTestCase):
                 "event_format": "client",
                 "event_fields": ["type", "content", "sender"],
             },
-            # a single backslash should be permitted (though it is debatable whether
-            # it should be permitted before anything other than `.`, and what that
-            # actually means)
-            #
             # (note that event_fields is implemented in
             # synapse.events.utils.serialize_event, and so whether this actually works
             # is tested elsewhere. We just want to check that it is allowed through the
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index e40eac2eb0..c9a610db9a 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -16,6 +16,7 @@ import unittest as stdlib_unittest
 from typing import Any, List, Mapping, Optional
 
 import attr
+from parameterized import parameterized
 
 from synapse.api.constants import EventContentFields
 from synapse.api.room_versions import RoomVersions
@@ -23,6 +24,7 @@ from synapse.events import EventBase, make_event_from_dict
 from synapse.events.utils import (
     PowerLevelsContent,
     SerializeEventConfig,
+    _split_field,
     copy_and_fixup_power_levels_contents,
     maybe_upsert_event_field,
     prune_event,
@@ -794,3 +796,40 @@ class CopyPowerLevelsContentTestCase(stdlib_unittest.TestCase):
     def test_invalid_nesting_raises_type_error(self) -> None:
         with self.assertRaises(TypeError):
             copy_and_fixup_power_levels_contents({"a": {"b": {"c": 1}}})  # type: ignore[dict-item]
+
+
+class SplitFieldTestCase(stdlib_unittest.TestCase):
+    @parameterized.expand(
+        [
+            # A field with no dots.
+            ["m", ["m"]],
+            # Simple dotted fields.
+            ["m.foo", ["m", "foo"]],
+            ["m.foo.bar", ["m", "foo", "bar"]],
+            # Backslash is used as an escape character.
+            [r"m\.foo", ["m.foo"]],
+            [r"m\\.foo", ["m\\", "foo"]],
+            [r"m\\\.foo", [r"m\.foo"]],
+            [r"m\\\\.foo", ["m\\\\", "foo"]],
+            [r"m\foo", [r"m\foo"]],
+            [r"m\\foo", [r"m\foo"]],
+            [r"m\\\foo", [r"m\\foo"]],
+            [r"m\\\\foo", [r"m\\foo"]],
+            # Ensure that escapes at the end don't cause issues.
+            ["m.foo\\", ["m", "foo\\"]],
+            ["m.foo\\", ["m", "foo\\"]],
+            [r"m.foo\.", ["m", "foo."]],
+            [r"m.foo\\.", ["m", "foo\\", ""]],
+            [r"m.foo\\\.", ["m", r"foo\."]],
+            # Empty parts (corresponding to properties which are an empty string) are allowed.
+            [".m", ["", "m"]],
+            ["..m", ["", "", "m"]],
+            ["m.", ["m", ""]],
+            ["m..", ["m", "", ""]],
+            ["m..foo", ["m", "", "foo"]],
+            # Invalid escape sequences.
+            [r"\m", [r"\m"]],
+        ]
+    )
+    def test_split_field(self, input: str, expected: str) -> None:
+        self.assertEqual(_split_field(input), expected)
-- 
cgit 1.5.1


From 737f7ddf5873a28d4334dc7f6b25edbaaaf934c7 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Mon, 22 May 2023 18:58:58 +0100
Subject: Remove outdated comment in log config (#15648)

---
 changelog.d/15648.doc       | 1 +
 docs/sample_log_config.yaml | 4 +---
 synapse/config/logger.py    | 4 +---
 3 files changed, 3 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15648.doc

(limited to 'synapse')

diff --git a/changelog.d/15648.doc b/changelog.d/15648.doc
new file mode 100644
index 0000000000..70f65ebbff
--- /dev/null
+++ b/changelog.d/15648.doc
@@ -0,0 +1 @@
+Remove outdated comment from the generated and sample homeserver log configs.
\ No newline at end of file
diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml
index 6339160d00..ae0318122e 100644
--- a/docs/sample_log_config.yaml
+++ b/docs/sample_log_config.yaml
@@ -68,9 +68,7 @@ root:
     # Write logs to the `buffer` handler, which will buffer them together in memory,
     # then write them to a file.
     #
-    # Replace "buffer" with "console" to log to stderr instead. (Note that you'll
-    # also need to update the configuration for the `twisted` logger above, in
-    # this case.)
+    # Replace "buffer" with "console" to log to stderr instead.
     #
     handlers: [buffer]
 
diff --git a/synapse/config/logger.py b/synapse/config/logger.py
index 56db875b25..1e080133dc 100644
--- a/synapse/config/logger.py
+++ b/synapse/config/logger.py
@@ -117,9 +117,7 @@ root:
     # Write logs to the `buffer` handler, which will buffer them together in memory,
     # then write them to a file.
     #
-    # Replace "buffer" with "console" to log to stderr instead. (Note that you'll
-    # also need to update the configuration for the `twisted` logger above, in
-    # this case.)
+    # Replace "buffer" with "console" to log to stderr instead.
     #
     handlers: [buffer]
 
-- 
cgit 1.5.1


From 1df0221bda65cc90ee3a15d210b87e8065bc865f Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Tue, 23 May 2023 08:05:30 -0500
Subject: Use a custom scheme & the worker name for replication requests.
 (#15578)

All the information needed is already in the `instance_map`, so
use that instead of passing the hostname / IP & port manually
for each replication request.

This consolidates logic for future improvements of using e.g.
UNIX sockets for workers.
---
 changelog.d/15578.misc            |  1 +
 synapse/http/client.py            |  1 +
 synapse/http/replicationagent.py  | 34 +++++++++++++++++++++++++++-------
 synapse/replication/http/_base.py | 18 ++++++------------
 4 files changed, 35 insertions(+), 19 deletions(-)
 create mode 100644 changelog.d/15578.misc

(limited to 'synapse')

diff --git a/changelog.d/15578.misc b/changelog.d/15578.misc
new file mode 100644
index 0000000000..a54422239b
--- /dev/null
+++ b/changelog.d/15578.misc
@@ -0,0 +1 @@
+Allow connecting to HTTP Replication Endpoints by using `worker_name` when constructing the request.
diff --git a/synapse/http/client.py b/synapse/http/client.py
index f1ab7a8bc9..09ea93e10d 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -835,6 +835,7 @@ class ReplicationClient(BaseHttpClient):
 
         self.agent: IAgent = ReplicationAgent(
             hs.get_reactor(),
+            hs.config.worker.instance_map,
             contextFactory=hs.get_http_client_context_factory(),
             pool=pool,
         )
diff --git a/synapse/http/replicationagent.py b/synapse/http/replicationagent.py
index 5ecd08be0f..800f21873d 100644
--- a/synapse/http/replicationagent.py
+++ b/synapse/http/replicationagent.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import Optional
+from typing import Dict, Optional
 
 from zope.interface import implementer
 
@@ -32,6 +32,7 @@ from twisted.web.iweb import (
     IResponse,
 )
 
+from synapse.config.workers import InstanceLocationConfig
 from synapse.types import ISynapseReactor
 
 logger = logging.getLogger(__name__)
@@ -44,9 +45,11 @@ class ReplicationEndpointFactory:
     def __init__(
         self,
         reactor: ISynapseReactor,
+        instance_map: Dict[str, InstanceLocationConfig],
         context_factory: IPolicyForHTTPS,
     ) -> None:
         self.reactor = reactor
+        self.instance_map = instance_map
         self.context_factory = context_factory
 
     def endpointForURI(self, uri: URI) -> IStreamClientEndpoint:
@@ -58,15 +61,29 @@ class ReplicationEndpointFactory:
 
         Returns: The correct client endpoint object
         """
-        if uri.scheme in (b"http", b"https"):
-            endpoint = HostnameEndpoint(self.reactor, uri.host, uri.port)
-            if uri.scheme == b"https":
+        # The given URI has a special scheme and includes the worker name. The
+        # actual connection details are pulled from the instance map.
+        worker_name = uri.netloc.decode("utf-8")
+        scheme = self.instance_map[worker_name].scheme()
+
+        if scheme in ("http", "https"):
+            endpoint = HostnameEndpoint(
+                self.reactor,
+                self.instance_map[worker_name].host,
+                self.instance_map[worker_name].port,
+            )
+            if scheme == "https":
                 endpoint = wrapClientTLS(
-                    self.context_factory.creatorForNetloc(uri.host, uri.port), endpoint
+                    # The 'port' argument below isn't actually used by the function
+                    self.context_factory.creatorForNetloc(
+                        self.instance_map[worker_name].host,
+                        self.instance_map[worker_name].port,
+                    ),
+                    endpoint,
                 )
             return endpoint
         else:
-            raise SchemeNotSupported(f"Unsupported scheme: {uri.scheme!r}")
+            raise SchemeNotSupported(f"Unsupported scheme: {scheme}")
 
 
 @implementer(IAgent)
@@ -80,6 +97,7 @@ class ReplicationAgent(_AgentBase):
     def __init__(
         self,
         reactor: ISynapseReactor,
+        instance_map: Dict[str, InstanceLocationConfig],
         contextFactory: IPolicyForHTTPS,
         connectTimeout: Optional[float] = None,
         bindAddress: Optional[bytes] = None,
@@ -102,7 +120,9 @@ class ReplicationAgent(_AgentBase):
                 created.
         """
         _AgentBase.__init__(self, reactor, pool)
-        endpoint_factory = ReplicationEndpointFactory(reactor, contextFactory)
+        endpoint_factory = ReplicationEndpointFactory(
+            reactor, instance_map, contextFactory
+        )
         self._endpointFactory = endpoint_factory
 
     def request(
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index dc7820f963..63cf24a14d 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -219,11 +219,7 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
             with outgoing_gauge.track_inprogress():
                 if instance_name == local_instance_name:
                     raise Exception("Trying to send HTTP request to self")
-                if instance_name in instance_map:
-                    host = instance_map[instance_name].host
-                    port = instance_map[instance_name].port
-                    tls = instance_map[instance_name].tls
-                else:
+                if instance_name not in instance_map:
                     raise Exception(
                         "Instance %r not in 'instance_map' config" % (instance_name,)
                     )
@@ -271,13 +267,11 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
                         "Unknown METHOD on %s replication endpoint" % (cls.NAME,)
                     )
 
-                # Here the protocol is hard coded to be http by default or https in case the replication
-                # port is set to have tls true.
-                scheme = "https" if tls else "http"
-                uri = "%s://%s:%s/_synapse/replication/%s/%s" % (
-                    scheme,
-                    host,
-                    port,
+                # Hard code a special scheme to show this only used for replication. The
+                # instance_name will be passed into the ReplicationEndpointFactory to
+                # determine connection details from the instance_map.
+                uri = "synapse-replication://%s/_synapse/replication/%s/%s" % (
+                    instance_name,
                     cls.NAME,
                     "/".join(url_args),
                 )
-- 
cgit 1.5.1


From 7c9b91790c013d11ca88a9d01e0054939eda8523 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 23 May 2023 10:35:43 -0400
Subject: Consolidate logic to check for deactivated users. (#15634)

This moves the deactivated user check to the method which
all login types call.

Additionally updates the application service tests to be more
realistic by removing invalid tests and fixing server names.
---
 changelog.d/15634.bugfix                         |  1 +
 docs/modules/password_auth_provider_callbacks.md |  3 ++
 synapse/appservice/__init__.py                   |  3 +-
 synapse/handlers/auth.py                         | 14 ++----
 synapse/handlers/jwt.py                          | 19 ++------
 synapse/rest/client/login.py                     | 23 +++++++--
 tests/handlers/test_password_providers.py        | 59 +++++++++---------------
 7 files changed, 55 insertions(+), 67 deletions(-)
 create mode 100644 changelog.d/15634.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15634.bugfix b/changelog.d/15634.bugfix
new file mode 100644
index 0000000000..ef39e8a689
--- /dev/null
+++ b/changelog.d/15634.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where deactivated users were able to login in uncommon situations.
diff --git a/docs/modules/password_auth_provider_callbacks.md b/docs/modules/password_auth_provider_callbacks.md
index 8275f7ebdc..d66ac7df31 100644
--- a/docs/modules/password_auth_provider_callbacks.md
+++ b/docs/modules/password_auth_provider_callbacks.md
@@ -46,6 +46,9 @@ instead.
 
 If the authentication is unsuccessful, the module must return `None`.
 
+Note that the user is not automatically registered, the `register_user(..)` method of
+the [module API](writing_a_module.html) can be used to lazily create users.
+
 If multiple modules register an auth checker for the same login type but with different
 fields, Synapse will refuse to start.
 
diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py
index 35c330a3c4..2260a8f589 100644
--- a/synapse/appservice/__init__.py
+++ b/synapse/appservice/__init__.py
@@ -86,6 +86,7 @@ class ApplicationService:
             url.rstrip("/") if isinstance(url, str) else None
         )  # url must not end with a slash
         self.hs_token = hs_token
+        # The full Matrix ID for this application service's sender.
         self.sender = sender
         self.namespaces = self._check_namespaces(namespaces)
         self.id = id
@@ -212,7 +213,7 @@ class ApplicationService:
             True if the application service is interested in the user, False if not.
         """
         return (
-            # User is the appservice's sender_localpart user
+            # User is the appservice's configured sender_localpart user
             user_id == self.sender
             # User is in the appservice's user namespace
             or self.is_user_in_namespace(user_id)
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 59e340974d..d001f2fb2f 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -52,7 +52,6 @@ from synapse.api.errors import (
     NotFoundError,
     StoreError,
     SynapseError,
-    UserDeactivatedError,
 )
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.handlers.ui_auth import (
@@ -1419,12 +1418,6 @@ class AuthHandler:
             return None
         (user_id, password_hash) = lookupres
 
-        # If the password hash is None, the account has likely been deactivated
-        if not password_hash:
-            deactivated = await self.store.get_user_deactivated_status(user_id)
-            if deactivated:
-                raise UserDeactivatedError("This account has been deactivated")
-
         result = await self.validate_hash(password, password_hash)
         if not result:
             logger.warning("Failed password login for user %s", user_id)
@@ -1749,8 +1742,11 @@ class AuthHandler:
                 registered.
             auth_provider_session_id: The session ID from the SSO IdP received during login.
         """
-        # If the account has been deactivated, do not proceed with the login
-        # flow.
+        # If the account has been deactivated, do not proceed with the login.
+        #
+        # This gets checked again when the token is submitted but this lets us
+        # provide an HTML error page to the user (instead of issuing a token and
+        # having it error later).
         deactivated = await self.store.get_user_deactivated_status(registered_user_id)
         if deactivated:
             respond_with_html(request, 403, self._sso_account_deactivated_template)
diff --git a/synapse/handlers/jwt.py b/synapse/handlers/jwt.py
index 5fddc0e315..740bf9b3c4 100644
--- a/synapse/handlers/jwt.py
+++ b/synapse/handlers/jwt.py
@@ -16,7 +16,7 @@ from typing import TYPE_CHECKING
 from authlib.jose import JsonWebToken, JWTClaims
 from authlib.jose.errors import BadSignatureError, InvalidClaimError, JoseError
 
-from synapse.api.errors import Codes, LoginError, StoreError, UserDeactivatedError
+from synapse.api.errors import Codes, LoginError
 from synapse.types import JsonDict, UserID
 
 if TYPE_CHECKING:
@@ -26,7 +26,6 @@ if TYPE_CHECKING:
 class JwtHandler:
     def __init__(self, hs: "HomeServer"):
         self.hs = hs
-        self._main_store = hs.get_datastores().main
 
         self.jwt_secret = hs.config.jwt.jwt_secret
         self.jwt_subject_claim = hs.config.jwt.jwt_subject_claim
@@ -34,7 +33,7 @@ class JwtHandler:
         self.jwt_issuer = hs.config.jwt.jwt_issuer
         self.jwt_audiences = hs.config.jwt.jwt_audiences
 
-    async def validate_login(self, login_submission: JsonDict) -> str:
+    def validate_login(self, login_submission: JsonDict) -> str:
         """
         Authenticates the user for the /login API
 
@@ -103,16 +102,4 @@ class JwtHandler:
         if user is None:
             raise LoginError(403, "Invalid JWT", errcode=Codes.FORBIDDEN)
 
-        user_id = UserID(user, self.hs.hostname).to_string()
-
-        # If the account has been deactivated, do not proceed with the login
-        # flow.
-        try:
-            deactivated = await self._main_store.get_user_deactivated_status(user_id)
-        except StoreError:
-            # JWT lazily creates users, so they may not exist in the database yet.
-            deactivated = False
-        if deactivated:
-            raise UserDeactivatedError("This account has been deactivated")
-
-        return user_id
+        return UserID(user, self.hs.hostname).to_string()
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index afdbf821b5..6ca61ffbd0 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -35,6 +35,7 @@ from synapse.api.errors import (
     LoginError,
     NotApprovedError,
     SynapseError,
+    UserDeactivatedError,
 )
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.api.urls import CLIENT_API_PREFIX
@@ -84,6 +85,7 @@ class LoginRestServlet(RestServlet):
     def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.hs = hs
+        self._main_store = hs.get_datastores().main
 
         # JWT configuration variables.
         self.jwt_enabled = hs.config.jwt.jwt_enabled
@@ -112,13 +114,13 @@ class LoginRestServlet(RestServlet):
 
         self._well_known_builder = WellKnownBuilder(hs)
         self._address_ratelimiter = Ratelimiter(
-            store=hs.get_datastores().main,
+            store=self._main_store,
             clock=hs.get_clock(),
             rate_hz=self.hs.config.ratelimiting.rc_login_address.per_second,
             burst_count=self.hs.config.ratelimiting.rc_login_address.burst_count,
         )
         self._account_ratelimiter = Ratelimiter(
-            store=hs.get_datastores().main,
+            store=self._main_store,
             clock=hs.get_clock(),
             rate_hz=self.hs.config.ratelimiting.rc_login_account.per_second,
             burst_count=self.hs.config.ratelimiting.rc_login_account.burst_count,
@@ -280,6 +282,9 @@ class LoginRestServlet(RestServlet):
             login_submission,
             ratelimit=appservice.is_rate_limited(),
             should_issue_refresh_token=should_issue_refresh_token,
+            # The user represented by an appservice's configured sender_localpart
+            # is not actually created in Synapse.
+            should_check_deactivated=qualified_user_id != appservice.sender,
         )
 
     async def _do_other_login(
@@ -326,6 +331,7 @@ class LoginRestServlet(RestServlet):
         auth_provider_id: Optional[str] = None,
         should_issue_refresh_token: bool = False,
         auth_provider_session_id: Optional[str] = None,
+        should_check_deactivated: bool = True,
     ) -> LoginResponse:
         """Called when we've successfully authed the user and now need to
         actually login them in (e.g. create devices). This gets called on
@@ -345,6 +351,11 @@ class LoginRestServlet(RestServlet):
             should_issue_refresh_token: True if this login should issue
                 a refresh token alongside the access token.
             auth_provider_session_id: The session ID got during login from the SSO IdP.
+            should_check_deactivated: True if the user should be checked for
+                deactivation status before logging in.
+
+                This exists purely for appservice's configured sender_localpart
+                which doesn't have an associated user in the database.
 
         Returns:
             Dictionary of account information after successful login.
@@ -364,6 +375,12 @@ class LoginRestServlet(RestServlet):
                 )
             user_id = canonical_uid
 
+        # If the account has been deactivated, do not proceed with the login.
+        if should_check_deactivated:
+            deactivated = await self._main_store.get_user_deactivated_status(user_id)
+            if deactivated:
+                raise UserDeactivatedError("This account has been deactivated")
+
         device_id = login_submission.get("device_id")
 
         # If device_id is present, check that device_id is not longer than a reasonable 512 characters
@@ -458,7 +475,7 @@ class LoginRestServlet(RestServlet):
         Returns:
             The body of the JSON response.
         """
-        user_id = await self.hs.get_jwt_handler().validate_login(login_submission)
+        user_id = self.hs.get_jwt_handler().validate_login(login_submission)
         return await self._complete_login(
             user_id,
             login_submission,
diff --git a/tests/handlers/test_password_providers.py b/tests/handlers/test_password_providers.py
index aa91bc0a3d..394006f5f3 100644
--- a/tests/handlers/test_password_providers.py
+++ b/tests/handlers/test_password_providers.py
@@ -18,13 +18,17 @@ from http import HTTPStatus
 from typing import Any, Dict, List, Optional, Type, Union
 from unittest.mock import Mock
 
+from twisted.test.proto_helpers import MemoryReactor
+
 import synapse
 from synapse.api.constants import LoginType
 from synapse.api.errors import Codes
 from synapse.handlers.account import AccountHandler
 from synapse.module_api import ModuleApi
 from synapse.rest.client import account, devices, login, logout, register
+from synapse.server import HomeServer
 from synapse.types import JsonDict, UserID
+from synapse.util import Clock
 
 from tests import unittest
 from tests.server import FakeChannel
@@ -162,10 +166,16 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
     CALLBACK_USERNAME = "get_username_for_registration"
     CALLBACK_DISPLAYNAME = "get_displayname_for_registration"
 
-    def setUp(self) -> None:
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
         # we use a global mock device, so make sure we are starting with a clean slate
         mock_password_provider.reset_mock()
-        super().setUp()
+
+        # The mock password provider doesn't register the users, so ensure they
+        # are registered first.
+        self.register_user("u", "not-the-tested-password")
+        self.register_user("user", "not-the-tested-password")
 
     @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider))
     def test_password_only_auth_progiver_login_legacy(self) -> None:
@@ -185,22 +195,12 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         mock_password_provider.reset_mock()
 
         # login with mxid should work too
-        channel = self._send_password_login("@u:bz", "p")
+        channel = self._send_password_login("@u:test", "p")
         self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
-        self.assertEqual("@u:bz", channel.json_body["user_id"])
-        mock_password_provider.check_password.assert_called_once_with("@u:bz", "p")
+        self.assertEqual("@u:test", channel.json_body["user_id"])
+        mock_password_provider.check_password.assert_called_once_with("@u:test", "p")
         mock_password_provider.reset_mock()
 
-        # try a weird username / pass. Honestly it's unclear what we *expect* to happen
-        # in these cases, but at least we can guard against the API changing
-        # unexpectedly
-        channel = self._send_password_login(" USER🙂NAME ", " pASS\U0001F622word ")
-        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
-        self.assertEqual("@ USER🙂NAME :test", channel.json_body["user_id"])
-        mock_password_provider.check_password.assert_called_once_with(
-            "@ USER🙂NAME :test", " pASS😢word "
-        )
-
     @override_config(legacy_providers_config(LegacyPasswordOnlyAuthProvider))
     def test_password_only_auth_provider_ui_auth_legacy(self) -> None:
         self.password_only_auth_provider_ui_auth_test_body()
@@ -208,10 +208,6 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
     def password_only_auth_provider_ui_auth_test_body(self) -> None:
         """UI Auth should delegate correctly to the password provider"""
 
-        # create the user, otherwise access doesn't work
-        module_api = self.hs.get_module_api()
-        self.get_success(module_api.register_user("u"))
-
         # log in twice, to get two devices
         mock_password_provider.check_password.return_value = make_awaitable(True)
         tok1 = self.login("u", "p")
@@ -401,29 +397,16 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         mock_password_provider.check_auth.assert_not_called()
 
         mock_password_provider.check_auth.return_value = make_awaitable(
-            ("@user:bz", None)
+            ("@user:test", None)
         )
         channel = self._send_login("test.login_type", "u", test_field="y")
         self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
-        self.assertEqual("@user:bz", channel.json_body["user_id"])
+        self.assertEqual("@user:test", channel.json_body["user_id"])
         mock_password_provider.check_auth.assert_called_once_with(
             "u", "test.login_type", {"test_field": "y"}
         )
         mock_password_provider.reset_mock()
 
-        # try a weird username. Again, it's unclear what we *expect* to happen
-        # in these cases, but at least we can guard against the API changing
-        # unexpectedly
-        mock_password_provider.check_auth.return_value = make_awaitable(
-            ("@ MALFORMED! :bz", None)
-        )
-        channel = self._send_login("test.login_type", " USER🙂NAME ", test_field=" abc ")
-        self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
-        self.assertEqual("@ MALFORMED! :bz", channel.json_body["user_id"])
-        mock_password_provider.check_auth.assert_called_once_with(
-            " USER🙂NAME ", "test.login_type", {"test_field": " abc "}
-        )
-
     @override_config(legacy_providers_config(LegacyCustomAuthProvider))
     def test_custom_auth_provider_ui_auth_legacy(self) -> None:
         self.custom_auth_provider_ui_auth_test_body()
@@ -465,7 +448,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
 
         # right params, but authing as the wrong user
         mock_password_provider.check_auth.return_value = make_awaitable(
-            ("@user:bz", None)
+            ("@user:test", None)
         )
         body["auth"]["test_field"] = "foo"
         channel = self._delete_device(tok1, "dev2", body)
@@ -498,11 +481,11 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         callback = Mock(return_value=make_awaitable(None))
 
         mock_password_provider.check_auth.return_value = make_awaitable(
-            ("@user:bz", callback)
+            ("@user:test", callback)
         )
         channel = self._send_login("test.login_type", "u", test_field="y")
         self.assertEqual(channel.code, HTTPStatus.OK, channel.result)
-        self.assertEqual("@user:bz", channel.json_body["user_id"])
+        self.assertEqual("@user:test", channel.json_body["user_id"])
         mock_password_provider.check_auth.assert_called_once_with(
             "u", "test.login_type", {"test_field": "y"}
         )
@@ -512,7 +495,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
         call_args, call_kwargs = callback.call_args
         # should be one positional arg
         self.assertEqual(len(call_args), 1)
-        self.assertEqual(call_args[0]["user_id"], "@user:bz")
+        self.assertEqual(call_args[0]["user_id"], "@user:test")
         for p in ["user_id", "access_token", "device_id", "home_server"]:
             self.assertIn(p, call_args[0])
 
-- 
cgit 1.5.1


From 379eb2d7abc8e3215cc9fd14deefb975137c9494 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 23 May 2023 12:26:25 -0500
Subject: Fix `@trace` not wrapping some state methods that return coroutines
 correctly (#15647)

```
2023-05-21 09:30:09,288 - synapse.logging.opentracing - 940 - ERROR - POST-1 - @trace may not have wrapped StateStorageController.get_state_for_groups correctly! The function is not async but returned a coroutine
```

Tracing instrumentation for these functions originally introduced in https://github.com/matrix-org/synapse/pull/15610
---
 changelog.d/15647.bugfix             |  1 +
 synapse/storage/controllers/state.py | 15 ++++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/15647.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15647.bugfix b/changelog.d/15647.bugfix
new file mode 100644
index 0000000000..2eff30f6e3
--- /dev/null
+++ b/changelog.d/15647.bugfix
@@ -0,0 +1 @@
+Instrument `state` and `state_group` storage-related operations to better picture what's happening when tracing.
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 06a80869eb..7089b0a1d8 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -16,7 +16,6 @@ from typing import (
     TYPE_CHECKING,
     AbstractSet,
     Any,
-    Awaitable,
     Callable,
     Collection,
     Dict,
@@ -175,9 +174,9 @@ class StateStorageController:
 
     @trace
     @tag_args
-    def _get_state_groups_from_groups(
+    async def _get_state_groups_from_groups(
         self, groups: List[int], state_filter: StateFilter
-    ) -> Awaitable[Dict[int, StateMap[str]]]:
+    ) -> Dict[int, StateMap[str]]:
         """Returns the state groups for a given set of groups, filtering on
         types of state events.
 
@@ -190,7 +189,9 @@ class StateStorageController:
             Dict of state group to state map.
         """
 
-        return self.stores.state._get_state_groups_from_groups(groups, state_filter)
+        return await self.stores.state._get_state_groups_from_groups(
+            groups, state_filter
+        )
 
     @trace
     @tag_args
@@ -349,9 +350,9 @@ class StateStorageController:
 
     @trace
     @tag_args
-    def get_state_for_groups(
+    async def get_state_for_groups(
         self, groups: Iterable[int], state_filter: Optional[StateFilter] = None
-    ) -> Awaitable[Dict[int, MutableStateMap[str]]]:
+    ) -> Dict[int, MutableStateMap[str]]:
         """Gets the state at each of a list of state groups, optionally
         filtering by type/state_key
 
@@ -363,7 +364,7 @@ class StateStorageController:
         Returns:
             Dict of state group to state map.
         """
-        return self.stores.state._get_state_for_groups(
+        return await self.stores.state._get_state_for_groups(
             groups, state_filter or StateFilter.all()
         )
 
-- 
cgit 1.5.1


From 1f55c04cbca6dc56085896dd980defa26ffe3b5b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 24 May 2023 08:59:31 -0400
Subject: Improve type hints for cached decorator. (#15658)

The cached decorators always return a Deferred, which was not
properly propagated. It was close enough when wrapping coroutines,
but failed if a bare function was wrapped.
---
 changelog.d/15658.misc                       |  1 +
 scripts-dev/mypy_synapse_plugin.py           | 34 +++++++++++-
 synapse/storage/databases/main/roommember.py |  2 +-
 synapse/util/caches/descriptors.py           |  6 +-
 tests/appservice/test_appservice.py          | 82 ++++++++++------------------
 tests/storage/test_transactions.py           | 11 ++--
 6 files changed, 73 insertions(+), 63 deletions(-)
 create mode 100644 changelog.d/15658.misc

(limited to 'synapse')

diff --git a/changelog.d/15658.misc b/changelog.d/15658.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15658.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py
index 2c377533c0..8058e9c993 100644
--- a/scripts-dev/mypy_synapse_plugin.py
+++ b/scripts-dev/mypy_synapse_plugin.py
@@ -18,10 +18,11 @@ can crop up, e.g the cache descriptors.
 
 from typing import Callable, Optional, Type
 
+from mypy.erasetype import remove_instance_last_known_values
 from mypy.nodes import ARG_NAMED_OPT
 from mypy.plugin import MethodSigContext, Plugin
 from mypy.typeops import bind_self
-from mypy.types import CallableType, NoneType, UnionType
+from mypy.types import CallableType, Instance, NoneType, UnionType
 
 
 class SynapsePlugin(Plugin):
@@ -92,10 +93,41 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
     arg_names.append("on_invalidate")
     arg_kinds.append(ARG_NAMED_OPT)  # Arg is an optional kwarg.
 
+    # Finally we ensure the return type is a Deferred.
+    if (
+        isinstance(signature.ret_type, Instance)
+        and signature.ret_type.type.fullname == "twisted.internet.defer.Deferred"
+    ):
+        # If it is already a Deferred, nothing to do.
+        ret_type = signature.ret_type
+    else:
+        ret_arg = None
+        if isinstance(signature.ret_type, Instance):
+            # If a coroutine, wrap the coroutine's return type in a Deferred.
+            if signature.ret_type.type.fullname == "typing.Coroutine":
+                ret_arg = signature.ret_type.args[2]
+
+            # If an awaitable, wrap the awaitable's final value in a Deferred.
+            elif signature.ret_type.type.fullname == "typing.Awaitable":
+                ret_arg = signature.ret_type.args[0]
+
+        # Otherwise, wrap the return value in a Deferred.
+        if ret_arg is None:
+            ret_arg = signature.ret_type
+
+        # This should be able to use ctx.api.named_generic_type, but that doesn't seem
+        # to find the correct symbol for anything more than 1 module deep.
+        #
+        # modules is not part of CheckerPluginInterface. The following is a combination
+        # of TypeChecker.named_generic_type and TypeChecker.lookup_typeinfo.
+        sym = ctx.api.modules["twisted.internet.defer"].names.get("Deferred")  # type: ignore[attr-defined]
+        ret_type = Instance(sym.node, [remove_instance_last_known_values(ret_arg)])
+
     signature = signature.copy_modified(
         arg_types=arg_types,
         arg_names=arg_names,
         arg_kinds=arg_kinds,
+        ret_type=ret_type,
     )
 
     return signature
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index e068f27a10..ae9c201b87 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -1099,7 +1099,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
         # `get_joined_hosts` is called with the "current" state group for the
         # room, and so consecutive calls will be for consecutive state groups
         # which point to the previous state group.
-        cache = await self._get_joined_hosts_cache(room_id)  # type: ignore[misc]
+        cache = await self._get_joined_hosts_cache(room_id)
 
         # If the state group in the cache matches, we already have the data we need.
         if state_entry.state_group == cache.state_group:
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 81df71a0c5..8514a75a1c 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -220,7 +220,9 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
         self.iterable = iterable
         self.prune_unread_entries = prune_unread_entries
 
-    def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]:
+    def __get__(
+        self, obj: Optional[Any], owner: Optional[Type]
+    ) -> Callable[..., "defer.Deferred[Any]"]:
         cache: DeferredCache[CacheKey, Any] = DeferredCache(
             name=self.name,
             max_entries=self.max_entries,
@@ -232,7 +234,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase):
         get_cache_key = self.cache_key_builder
 
         @functools.wraps(self.orig)
-        def _wrapped(*args: Any, **kwargs: Any) -> Any:
+        def _wrapped(*args: Any, **kwargs: Any) -> "defer.Deferred[Any]":
             # If we're passed a cache_context then we'll want to call its invalidate()
             # whenever we are invalidated
             invalidate_callback = kwargs.pop("on_invalidate", None)
diff --git a/tests/appservice/test_appservice.py b/tests/appservice/test_appservice.py
index dee976356f..66753c60c4 100644
--- a/tests/appservice/test_appservice.py
+++ b/tests/appservice/test_appservice.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import re
-from typing import Generator
+from typing import Any, Generator
 from unittest.mock import Mock
 
 from twisted.internet import defer
@@ -49,15 +49,13 @@ class ApplicationServiceTestCase(unittest.TestCase):
     @defer.inlineCallbacks
     def test_regex_user_id_prefix_match(
         self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         self.event.sender = "@irc_foobar:matrix.org"
         self.assertTrue(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
@@ -65,15 +63,13 @@ class ApplicationServiceTestCase(unittest.TestCase):
     @defer.inlineCallbacks
     def test_regex_user_id_prefix_no_match(
         self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         self.event.sender = "@someone_else:matrix.org"
         self.assertFalse(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
@@ -81,17 +77,15 @@ class ApplicationServiceTestCase(unittest.TestCase):
     @defer.inlineCallbacks
     def test_regex_room_member_is_checked(
         self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         self.event.sender = "@someone_else:matrix.org"
         self.event.type = "m.room.member"
         self.event.state_key = "@irc_foobar:matrix.org"
         self.assertTrue(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
@@ -99,17 +93,15 @@ class ApplicationServiceTestCase(unittest.TestCase):
     @defer.inlineCallbacks
     def test_regex_room_id_match(
         self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_ROOMS].append(
             _regex("!some_prefix.*some_suffix:matrix.org")
         )
         self.event.room_id = "!some_prefixs0m3th1nGsome_suffix:matrix.org"
         self.assertTrue(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
@@ -117,25 +109,21 @@ class ApplicationServiceTestCase(unittest.TestCase):
     @defer.inlineCallbacks
     def test_regex_room_id_no_match(
         self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_ROOMS].append(
             _regex("!some_prefix.*some_suffix:matrix.org")
         )
         self.event.room_id = "!XqBunHwQIXUiqCaoxq:matrix.org"
         self.assertFalse(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
 
     @defer.inlineCallbacks
-    def test_regex_alias_match(
-        self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    def test_regex_alias_match(self) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_ALIASES].append(
             _regex("#irc_.*:matrix.org")
         )
@@ -145,10 +133,8 @@ class ApplicationServiceTestCase(unittest.TestCase):
         self.store.get_local_users_in_room = simple_async_mock([])
         self.assertTrue(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
@@ -192,7 +178,7 @@ class ApplicationServiceTestCase(unittest.TestCase):
     @defer.inlineCallbacks
     def test_regex_alias_no_match(
         self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_ALIASES].append(
             _regex("#irc_.*:matrix.org")
         )
@@ -213,7 +199,7 @@ class ApplicationServiceTestCase(unittest.TestCase):
     @defer.inlineCallbacks
     def test_regex_multiple_matches(
         self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    ) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_ALIASES].append(
             _regex("#irc_.*:matrix.org")
         )
@@ -223,18 +209,14 @@ class ApplicationServiceTestCase(unittest.TestCase):
         self.store.get_local_users_in_room = simple_async_mock([])
         self.assertTrue(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
 
     @defer.inlineCallbacks
-    def test_interested_in_self(
-        self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    def test_interested_in_self(self) -> Generator["defer.Deferred[Any]", object, None]:
         # make sure invites get through
         self.service.sender = "@appservice:name"
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
@@ -243,18 +225,14 @@ class ApplicationServiceTestCase(unittest.TestCase):
         self.event.state_key = self.service.sender
         self.assertTrue(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
 
     @defer.inlineCallbacks
-    def test_member_list_match(
-        self,
-    ) -> Generator["defer.Deferred[object]", object, None]:
+    def test_member_list_match(self) -> Generator["defer.Deferred[Any]", object, None]:
         self.service.namespaces[ApplicationService.NS_USERS].append(_regex("@irc_.*"))
         # Note that @irc_fo:here is the AS user.
         self.store.get_local_users_in_room = simple_async_mock(
@@ -265,10 +243,8 @@ class ApplicationServiceTestCase(unittest.TestCase):
         self.event.sender = "@xmpp_foobar:matrix.org"
         self.assertTrue(
             (
-                yield defer.ensureDeferred(
-                    self.service.is_interested_in_event(
-                        self.event.event_id, self.event, self.store
-                    )
+                yield self.service.is_interested_in_event(
+                    self.event.event_id, self.event, self.store
                 )
             )
         )
diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index db9ee9955e..2fab84a529 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -33,15 +33,14 @@ class TransactionStoreTestCase(HomeserverTestCase):
         destination retries, as well as testing tht we can set and get
         correctly.
         """
-        d = self.store.get_destination_retry_timings("example.com")
-        r = self.get_success(d)
+        r = self.get_success(self.store.get_destination_retry_timings("example.com"))
         self.assertIsNone(r)
 
-        d = self.store.set_destination_retry_timings("example.com", 1000, 50, 100)
-        self.get_success(d)
+        self.get_success(
+            self.store.set_destination_retry_timings("example.com", 1000, 50, 100)
+        )
 
-        d = self.store.get_destination_retry_timings("example.com")
-        r = self.get_success(d)
+        r = self.get_success(self.store.get_destination_retry_timings("example.com"))
 
         self.assertEqual(
             DestinationRetryTimings(
-- 
cgit 1.5.1


From c7e9c1d5ae2fd0fa68b28c51a3bce503194c4718 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 24 May 2023 15:13:28 +0100
Subject: Speed up user directory rebuild for users some more... (#15665)

---
 changelog.d/15665.misc                           |   1 +
 synapse/storage/databases/main/user_directory.py | 190 ++++++++++++++---------
 2 files changed, 115 insertions(+), 76 deletions(-)
 create mode 100644 changelog.d/15665.misc

(limited to 'synapse')

diff --git a/changelog.d/15665.misc b/changelog.d/15665.misc
new file mode 100644
index 0000000000..7ad424d8df
--- /dev/null
+++ b/changelog.d/15665.misc
@@ -0,0 +1 @@
+Speed up rebuilding of the user directory for local users.
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index b7d58978de..a0319575f0 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -17,6 +17,7 @@ import re
 import unicodedata
 from typing import (
     TYPE_CHECKING,
+    Collection,
     Iterable,
     List,
     Mapping,
@@ -45,7 +46,7 @@ from synapse.util.stringutils import non_null_str_or_none
 if TYPE_CHECKING:
     from synapse.server import HomeServer
 
-from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules
+from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, UserTypes
 from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
@@ -356,13 +357,30 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
         Add all local users to the user directory.
         """
 
-        def _get_next_batch(txn: LoggingTransaction) -> Optional[List[str]]:
-            sql = "SELECT user_id FROM %s LIMIT %s" % (
-                TEMP_TABLE + "_users",
-                str(batch_size),
-            )
-            txn.execute(sql)
-            user_result = cast(List[Tuple[str]], txn.fetchall())
+        def _populate_user_directory_process_users_txn(
+            txn: LoggingTransaction,
+        ) -> Optional[int]:
+            if self.database_engine.supports_returning:
+                # Note: we use an ORDER BY in the SELECT to force usage of an
+                # index. Otherwise, postgres does a sequential scan that is
+                # surprisingly slow (I think due to the fact it will read/skip
+                # over lots of already deleted rows).
+                sql = f"""
+                    DELETE FROM {TEMP_TABLE + "_users"}
+                    WHERE user_id IN (
+                        SELECT user_id FROM {TEMP_TABLE + "_users"} ORDER BY user_id LIMIT ?
+                    )
+                    RETURNING user_id
+                """
+                txn.execute(sql, (batch_size,))
+                user_result = cast(List[Tuple[str]], txn.fetchall())
+            else:
+                sql = "SELECT user_id FROM %s ORDER BY user_id LIMIT %s" % (
+                    TEMP_TABLE + "_users",
+                    str(batch_size),
+                )
+                txn.execute(sql)
+                user_result = cast(List[Tuple[str]], txn.fetchall())
 
             if not user_result:
                 return None
@@ -378,85 +396,81 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 assert count_result is not None
                 progress["remaining"] = count_result[0]
 
-            return users_to_work_on
-
-        users_to_work_on = await self.db_pool.runInteraction(
-            "populate_user_directory_temp_read", _get_next_batch
-        )
+            if not users_to_work_on:
+                return None
 
-        # No more users -- complete the transaction.
-        if not users_to_work_on:
-            await self.db_pool.updates._end_background_update(
-                "populate_user_directory_process_users"
+            logger.debug(
+                "Processing the next %d users of %d remaining",
+                len(users_to_work_on),
+                progress["remaining"],
             )
-            return 1
-
-        logger.debug(
-            "Processing the next %d users of %d remaining"
-            % (len(users_to_work_on), progress["remaining"])
-        )
 
-        # First filter down to users we want to insert into the user directory.
-        users_to_insert = [
-            user_id
-            for user_id in users_to_work_on
-            if await self.should_include_local_user_in_dir(user_id)
-        ]
+            # First filter down to users we want to insert into the user directory.
+            users_to_insert = self._filter_local_users_for_dir_txn(
+                txn, users_to_work_on
+            )
 
-        # Next fetch their profiles. Note that the `user_id` here is the
-        # *localpart*, and that not all users have profiles.
-        profile_rows = await self.db_pool.simple_select_many_batch(
-            table="profiles",
-            column="user_id",
-            iterable=[get_localpart_from_id(u) for u in users_to_insert],
-            retcols=(
-                "user_id",
-                "displayname",
-                "avatar_url",
-            ),
-            keyvalues={},
-            desc="populate_user_directory_process_users_get_profiles",
-        )
-        profiles = {
-            f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
-                f"@{row['user_id']}:{self.server_name}",
-                row["displayname"],
-                row["avatar_url"],
+            # Next fetch their profiles. Note that the `user_id` here is the
+            # *localpart*, and that not all users have profiles.
+            profile_rows = self.db_pool.simple_select_many_txn(
+                txn,
+                table="profiles",
+                column="user_id",
+                iterable=[get_localpart_from_id(u) for u in users_to_insert],
+                retcols=(
+                    "user_id",
+                    "displayname",
+                    "avatar_url",
+                ),
+                keyvalues={},
             )
-            for row in profile_rows
-        }
+            profiles = {
+                f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
+                    f"@{row['user_id']}:{self.server_name}",
+                    row["displayname"],
+                    row["avatar_url"],
+                )
+                for row in profile_rows
+            }
 
-        profiles_to_insert = [
-            profiles.get(user_id) or _UserDirProfile(user_id)
-            for user_id in users_to_insert
-        ]
+            profiles_to_insert = [
+                profiles.get(user_id) or _UserDirProfile(user_id)
+                for user_id in users_to_insert
+            ]
+
+            # Actually insert the users with their profiles into the directory.
+            self._update_profiles_in_user_dir_txn(txn, profiles_to_insert)
+
+            # We've finished processing the users. Delete it from the table, if
+            # we haven't already.
+            if not self.database_engine.supports_returning:
+                self.db_pool.simple_delete_many_txn(
+                    txn,
+                    table=TEMP_TABLE + "_users",
+                    column="user_id",
+                    values=users_to_work_on,
+                    keyvalues={},
+                )
 
-        # Actually insert the users with their profiles into the directory.
-        await self.db_pool.runInteraction(
-            "populate_user_directory_process_users_insertion",
-            self._update_profiles_in_user_dir_txn,
-            profiles_to_insert,
-        )
+            # Update the remaining counter.
+            progress["remaining"] -= len(users_to_work_on)
+            self.db_pool.updates._background_update_progress_txn(
+                txn, "populate_user_directory_process_users", progress
+            )
+            return len(users_to_work_on)
 
-        # We've finished processing the users. Delete it from the table.
-        await self.db_pool.simple_delete_many(
-            table=TEMP_TABLE + "_users",
-            column="user_id",
-            iterable=users_to_work_on,
-            keyvalues={},
-            desc="populate_user_directory_process_users_delete",
+        processed_count = await self.db_pool.runInteraction(
+            "populate_user_directory_temp", _populate_user_directory_process_users_txn
         )
 
-        # Update the remaining counter.
-        progress["remaining"] -= len(users_to_work_on)
-        await self.db_pool.runInteraction(
-            "populate_user_directory",
-            self.db_pool.updates._background_update_progress_txn,
-            "populate_user_directory_process_users",
-            progress,
-        )
+        # No more users -- complete the transaction.
+        if not processed_count:
+            await self.db_pool.updates._end_background_update(
+                "populate_user_directory_process_users"
+            )
+            return 1
 
-        return len(users_to_work_on)
+        return processed_count
 
     async def should_include_local_user_in_dir(self, user: str) -> bool:
         """Certain classes of local user are omitted from the user directory.
@@ -494,6 +508,30 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
 
         return True
 
+    def _filter_local_users_for_dir_txn(
+        self, txn: LoggingTransaction, users: Collection[str]
+    ) -> Collection[str]:
+        """A batched version of `should_include_local_user_in_dir`"""
+        users = [
+            user
+            for user in users
+            if self.get_app_service_by_user_id(user) is None  # type: ignore[attr-defined]
+            and not self.get_if_app_services_interested_in_user(user)  # type: ignore[attr-defined]
+        ]
+
+        rows = self.db_pool.simple_select_many_txn(
+            txn,
+            table="users",
+            column="name",
+            iterable=users,
+            keyvalues={
+                "deactivated": 0,
+            },
+            retcols=("name", "user_type"),
+        )
+
+        return [row["name"] for row in rows if row["user_type"] != UserTypes.SUPPORT]
+
     async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> bool:
         """Check if the room is either world_readable or publically joinable"""
 
-- 
cgit 1.5.1


From 8839b6c2f8b07d5d122a15e79b1ebdbdd5f3e26b Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 24 May 2023 13:23:26 -0700
Subject: Add requesting user id parameter to key claim methods in
 `TransportLayerClient` (#15663)

---
 changelog.d/15663.misc                  |  1 +
 synapse/federation/federation_client.py |  6 ++++--
 synapse/federation/transport/client.py  | 16 +++++++++++++---
 synapse/handlers/e2e_keys.py            |  3 ++-
 synapse/rest/client/keys.py             |  8 ++++----
 tests/handlers/test_e2e_keys.py         | 16 +++++++++++++++-
 6 files changed, 39 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/15663.misc

(limited to 'synapse')

diff --git a/changelog.d/15663.misc b/changelog.d/15663.misc
new file mode 100644
index 0000000000..cc5f801543
--- /dev/null
+++ b/changelog.d/15663.misc
@@ -0,0 +1 @@
+Add requesting user id parameter to key claim methods in `TransportLayerClient`.
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 076b9287c6..a2cf3a96c6 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -236,6 +236,7 @@ class FederationClient(FederationBase):
 
     async def claim_client_keys(
         self,
+        user: UserID,
         destination: str,
         query: Dict[str, Dict[str, Dict[str, int]]],
         timeout: Optional[int],
@@ -243,6 +244,7 @@ class FederationClient(FederationBase):
         """Claims one-time keys for a device hosted on a remote server.
 
         Args:
+            user: The user id of the requesting user
             destination: Domain name of the remote homeserver
             content: The query content.
 
@@ -279,7 +281,7 @@ class FederationClient(FederationBase):
         if use_unstable:
             try:
                 return await self.transport_layer.claim_client_keys_unstable(
-                    destination, unstable_content, timeout
+                    user, destination, unstable_content, timeout
                 )
             except HttpResponseException as e:
                 # If an error is received that is due to an unrecognised endpoint,
@@ -295,7 +297,7 @@ class FederationClient(FederationBase):
             logger.debug("Skipping unstable claim client keys API")
 
         return await self.transport_layer.claim_client_keys(
-            destination, content, timeout
+            user, destination, content, timeout
         )
 
     @trace
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 1cfc4446c4..0b17f713ea 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -45,7 +45,7 @@ from synapse.events import EventBase, make_event_from_dict
 from synapse.federation.units import Transaction
 from synapse.http.matrixfederationclient import ByteParser, LegacyJsonSendParser
 from synapse.http.types import QueryParams
-from synapse.types import JsonDict
+from synapse.types import JsonDict, UserID
 from synapse.util import ExceptionBundle
 
 if TYPE_CHECKING:
@@ -630,7 +630,11 @@ class TransportLayerClient:
         )
 
     async def claim_client_keys(
-        self, destination: str, query_content: JsonDict, timeout: Optional[int]
+        self,
+        user: UserID,
+        destination: str,
+        query_content: JsonDict,
+        timeout: Optional[int],
     ) -> JsonDict:
         """Claim one-time keys for a list of devices hosted on a remote server.
 
@@ -655,6 +659,7 @@ class TransportLayerClient:
             }
 
         Args:
+            user: the user_id of the requesting user
             destination: The server to query.
             query_content: The user ids to query.
         Returns:
@@ -671,7 +676,11 @@ class TransportLayerClient:
         )
 
     async def claim_client_keys_unstable(
-        self, destination: str, query_content: JsonDict, timeout: Optional[int]
+        self,
+        user: UserID,
+        destination: str,
+        query_content: JsonDict,
+        timeout: Optional[int],
     ) -> JsonDict:
         """Claim one-time keys for a list of devices hosted on a remote server.
 
@@ -696,6 +705,7 @@ class TransportLayerClient:
             }
 
         Args:
+            user: the user_id of the requesting user
             destination: The server to query.
             query_content: The user ids to query.
         Returns:
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 24741b667b..ad075497c8 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -661,6 +661,7 @@ class E2eKeysHandler:
     async def claim_one_time_keys(
         self,
         query: Dict[str, Dict[str, Dict[str, int]]],
+        user: UserID,
         timeout: Optional[int],
         always_include_fallback_keys: bool,
     ) -> JsonDict:
@@ -703,7 +704,7 @@ class E2eKeysHandler:
             device_keys = remote_queries[destination]
             try:
                 remote_result = await self.federation.claim_client_keys(
-                    destination, device_keys, timeout=timeout
+                    user, destination, device_keys, timeout=timeout
                 )
                 for user_id, keys in remote_result["one_time_keys"].items():
                     if user_id in device_keys:
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index 9bbab5e624..413edd8a4d 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -287,7 +287,7 @@ class OneTimeKeyServlet(RestServlet):
         self.e2e_keys_handler = hs.get_e2e_keys_handler()
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        await self.auth.get_user_by_req(request, allow_guest=True)
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
         timeout = parse_integer(request, "timeout", 10 * 1000)
         body = parse_json_object_from_request(request)
 
@@ -298,7 +298,7 @@ class OneTimeKeyServlet(RestServlet):
                 query.setdefault(user_id, {})[device_id] = {algorithm: 1}
 
         result = await self.e2e_keys_handler.claim_one_time_keys(
-            query, timeout, always_include_fallback_keys=False
+            query, requester.user, timeout, always_include_fallback_keys=False
         )
         return 200, result
 
@@ -335,7 +335,7 @@ class UnstableOneTimeKeyServlet(RestServlet):
         self.e2e_keys_handler = hs.get_e2e_keys_handler()
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        await self.auth.get_user_by_req(request, allow_guest=True)
+        requester = await self.auth.get_user_by_req(request, allow_guest=True)
         timeout = parse_integer(request, "timeout", 10 * 1000)
         body = parse_json_object_from_request(request)
 
@@ -346,7 +346,7 @@ class UnstableOneTimeKeyServlet(RestServlet):
                 query.setdefault(user_id, {})[device_id] = Counter(algorithms)
 
         result = await self.e2e_keys_handler.claim_one_time_keys(
-            query, timeout, always_include_fallback_keys=True
+            query, requester.user, timeout, always_include_fallback_keys=True
         )
         return 200, result
 
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 72d0584061..2eaffe511e 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -27,7 +27,7 @@ from synapse.appservice import ApplicationService
 from synapse.handlers.device import DeviceHandler
 from synapse.server import HomeServer
 from synapse.storage.databases.main.appservice import _make_exclusive_regex
-from synapse.types import JsonDict
+from synapse.types import JsonDict, UserID
 from synapse.util import Clock
 
 from tests import unittest
@@ -45,6 +45,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.handler = hs.get_e2e_keys_handler()
         self.store = self.hs.get_datastores().main
+        self.requester = UserID.from_string(f"@test_requester:{self.hs.hostname}")
 
     def test_query_local_devices_no_devices(self) -> None:
         """If the user has no devices, we expect an empty list."""
@@ -161,6 +162,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         res2 = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -206,6 +208,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -225,6 +228,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -274,6 +278,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -286,6 +291,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -307,6 +313,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -348,6 +355,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -370,6 +378,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -1080,6 +1089,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id_1: {"alg1": 1}, device_id_2: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=False,
             )
@@ -1125,6 +1135,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id_1: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -1169,6 +1180,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id_1: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -1202,6 +1214,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id_1: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=True,
             )
@@ -1229,6 +1242,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         claim_res = self.get_success(
             self.handler.claim_one_time_keys(
                 {local_user: {device_id_1: {"alg1": 1}}},
+                self.requester,
                 timeout=None,
                 always_include_fallback_keys=True,
             )
-- 
cgit 1.5.1


From 77156a4bc1f87e98754e3f7f86e52a84a4253a10 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 24 May 2023 23:22:24 -0500
Subject: Process previously failed backfill events in the background (#15585)

Process previously failed backfill events in the background because they are bound to fail again and we don't need to waste time holding up the request for something that is bound to fail again.

Fix https://github.com/matrix-org/synapse/issues/13623

Follow-up to https://github.com/matrix-org/synapse/issues/13621 and https://github.com/matrix-org/synapse/issues/13622

Part of making `/messages` faster: https://github.com/matrix-org/synapse/issues/13356
---
 changelog.d/15585.feature                          |  1 +
 synapse/handlers/federation_event.py               | 70 ++++++++++++++--
 synapse/storage/databases/main/event_federation.py | 31 ++++++-
 synapse/util/iterutils.py                          | 27 ++++++
 tests/handlers/test_federation_event.py            | 95 ++++++++++++++++++++++
 tests/storage/test_event_federation.py             | 37 +++++++++
 6 files changed, 252 insertions(+), 9 deletions(-)
 create mode 100644 changelog.d/15585.feature

(limited to 'synapse')

diff --git a/changelog.d/15585.feature b/changelog.d/15585.feature
new file mode 100644
index 0000000000..1adcfb69ee
--- /dev/null
+++ b/changelog.d/15585.feature
@@ -0,0 +1 @@
+Process previously failed backfill events in the background to avoid blocking requests for something that is bound to fail again.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 9a08618da5..42141d3670 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -88,7 +88,7 @@ from synapse.types import (
 )
 from synapse.types.state import StateFilter
 from synapse.util.async_helpers import Linearizer, concurrently_execute
-from synapse.util.iterutils import batch_iter
+from synapse.util.iterutils import batch_iter, partition
 from synapse.util.retryutils import NotRetryingDestination
 from synapse.util.stringutils import shortstr
 
@@ -865,7 +865,7 @@ class FederationEventHandler:
             [event.event_id for event in events]
         )
 
-        new_events = []
+        new_events: List[EventBase] = []
         for event in events:
             event_id = event.event_id
 
@@ -895,12 +895,66 @@ class FederationEventHandler:
             str(len(new_events)),
         )
 
-        # We want to sort these by depth so we process them and
-        # tell clients about them in order.
-        sorted_events = sorted(new_events, key=lambda x: x.depth)
-        for ev in sorted_events:
-            with nested_logging_context(ev.event_id):
-                await self._process_pulled_event(origin, ev, backfilled=backfilled)
+        @trace
+        async def _process_new_pulled_events(new_events: Collection[EventBase]) -> None:
+            # We want to sort these by depth so we process them and tell clients about
+            # them in order. It's also more efficient to backfill this way (`depth`
+            # ascending) because one backfill event is likely to be the `prev_event` of
+            # the next event we're going to process.
+            sorted_events = sorted(new_events, key=lambda x: x.depth)
+            for ev in sorted_events:
+                with nested_logging_context(ev.event_id):
+                    await self._process_pulled_event(origin, ev, backfilled=backfilled)
+
+        # Check if we've already tried to process these events at some point in the
+        # past. We aren't concerned with the expontntial backoff here, just whether it
+        # has failed to be processed before.
+        event_ids_with_failed_pull_attempts = (
+            await self._store.get_event_ids_with_failed_pull_attempts(
+                [event.event_id for event in new_events]
+            )
+        )
+
+        # We construct the event lists in source order from `/backfill` response because
+        # it's a) easiest, but also b) the order in which we process things matters for
+        # MSC2716 historical batches because many historical events are all at the same
+        # `depth` and we rely on the tenuous sort that the other server gave us and hope
+        # they're doing their best. The brittle nature of this ordering for historical
+        # messages over federation is one of the reasons why we don't want to continue
+        # on MSC2716 until we have online topological ordering.
+        events_with_failed_pull_attempts, fresh_events = partition(
+            new_events, lambda e: e.event_id in event_ids_with_failed_pull_attempts
+        )
+        set_tag(
+            SynapseTags.FUNC_ARG_PREFIX + "events_with_failed_pull_attempts",
+            str(event_ids_with_failed_pull_attempts),
+        )
+        set_tag(
+            SynapseTags.RESULT_PREFIX + "events_with_failed_pull_attempts.length",
+            str(len(events_with_failed_pull_attempts)),
+        )
+        set_tag(
+            SynapseTags.FUNC_ARG_PREFIX + "fresh_events",
+            str([event.event_id for event in fresh_events]),
+        )
+        set_tag(
+            SynapseTags.RESULT_PREFIX + "fresh_events.length",
+            str(len(fresh_events)),
+        )
+
+        # Process previously failed backfill events in the background to not waste
+        # time on something that is likely to fail again.
+        if len(events_with_failed_pull_attempts) > 0:
+            run_as_background_process(
+                "_process_new_pulled_events_with_failed_pull_attempts",
+                _process_new_pulled_events,
+                events_with_failed_pull_attempts,
+            )
+
+        # We can optimistically try to process and wait for the event to be fully
+        # persisted if we've never tried before.
+        if len(fresh_events) > 0:
+            await _process_new_pulled_events(fresh_events)
 
     @trace
     @tag_args
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index ac19de183c..2681917d0b 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -46,7 +46,7 @@ from synapse.storage.database import (
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.signatures import SignatureWorkerStore
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
-from synapse.types import JsonDict
+from synapse.types import JsonDict, StrCollection
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.lrucache import LruCache
@@ -1583,6 +1583,35 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause))
 
+    @trace
+    async def get_event_ids_with_failed_pull_attempts(
+        self, event_ids: StrCollection
+    ) -> Set[str]:
+        """
+        Filter the given list of `event_ids` and return events which have any failed
+        pull attempts.
+
+        Args:
+            event_ids: A list of events to filter down.
+
+        Returns:
+            A filtered down list of `event_ids` that have previous failed pull attempts.
+        """
+
+        rows = await self.db_pool.simple_select_many_batch(
+            table="event_failed_pull_attempts",
+            column="event_id",
+            iterable=event_ids,
+            keyvalues={},
+            retcols=("event_id",),
+            desc="get_event_ids_with_failed_pull_attempts",
+        )
+        event_ids_with_failed_pull_attempts: Set[str] = {
+            row["event_id"] for row in rows
+        }
+
+        return event_ids_with_failed_pull_attempts
+
     @trace
     async def get_event_ids_to_not_pull_from_backoff(
         self,
diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py
index 4938ddf703..a0efb96d3b 100644
--- a/synapse/util/iterutils.py
+++ b/synapse/util/iterutils.py
@@ -15,11 +15,13 @@
 import heapq
 from itertools import islice
 from typing import (
+    Callable,
     Collection,
     Dict,
     Generator,
     Iterable,
     Iterator,
+    List,
     Mapping,
     Set,
     Sized,
@@ -71,6 +73,31 @@ def chunk_seq(iseq: S, maxlen: int) -> Iterator[S]:
     return (iseq[i : i + maxlen] for i in range(0, len(iseq), maxlen))
 
 
+def partition(
+    iterable: Iterable[T], predicate: Callable[[T], bool]
+) -> Tuple[List[T], List[T]]:
+    """
+    Separate a given iterable into two lists based on the result of a predicate function.
+
+    Args:
+        iterable: the iterable to partition (separate)
+        predicate: a function that takes an item from the iterable and returns a boolean
+
+    Returns:
+        A tuple of two lists, the first containing all items for which the predicate
+        returned True, the second containing all items for which the predicate returned
+        False
+    """
+    true_results = []
+    false_results = []
+    for item in iterable:
+        if predicate(item):
+            true_results.append(item)
+        else:
+            false_results.append(item)
+    return true_results, false_results
+
+
 def sorted_topologically(
     nodes: Iterable[T],
     graph: Mapping[T, Collection[T]],
diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py
index c067e5bfe3..23f1b33b2f 100644
--- a/tests/handlers/test_federation_event.py
+++ b/tests/handlers/test_federation_event.py
@@ -664,6 +664,101 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
             StoreError,
         )
 
+    def test_backfill_process_previously_failed_pull_attempt_event_in_the_background(
+        self,
+    ) -> None:
+        """
+        Sanity check that events are still processed even if it is in the background
+        for events that already have failed pull attempts.
+        """
+        OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
+        main_store = self.hs.get_datastores().main
+
+        # Create the room
+        user_id = self.register_user("kermit", "test")
+        tok = self.login("kermit", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+        room_version = self.get_success(main_store.get_room_version(room_id))
+
+        # Allow the remote user to send state events
+        self.helper.send_state(
+            room_id,
+            "m.room.power_levels",
+            {"events_default": 0, "state_default": 0},
+            tok=tok,
+        )
+
+        # Add the remote user to the room
+        member_event = self.get_success(
+            event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
+        )
+
+        initial_state_map = self.get_success(
+            main_store.get_partial_current_state_ids(room_id)
+        )
+
+        auth_event_ids = [
+            initial_state_map[("m.room.create", "")],
+            initial_state_map[("m.room.power_levels", "")],
+            member_event.event_id,
+        ]
+
+        # Create a regular event that should process
+        pulled_event = make_event_from_dict(
+            self.add_hashes_and_signatures_from_other_server(
+                {
+                    "type": "test_regular_type",
+                    "room_id": room_id,
+                    "sender": OTHER_USER,
+                    "prev_events": [
+                        member_event.event_id,
+                    ],
+                    "auth_events": auth_event_ids,
+                    "origin_server_ts": 1,
+                    "depth": 12,
+                    "content": {"body": "pulled_event"},
+                }
+            ),
+            room_version,
+        )
+
+        # Record a failed pull attempt for this event which will cause us to backfill it
+        # in the background from here on out.
+        self.get_success(
+            main_store.record_event_failed_pull_attempt(
+                room_id, pulled_event.event_id, "fake cause"
+            )
+        )
+
+        # We expect an outbound request to /backfill, so stub that out
+        self.mock_federation_transport_client.backfill.return_value = make_awaitable(
+            {
+                "origin": self.OTHER_SERVER_NAME,
+                "origin_server_ts": 123,
+                "pdus": [
+                    pulled_event.get_pdu_json(),
+                ],
+            }
+        )
+
+        # The function under test: try to backfill and process the pulled event
+        with LoggingContext("test"):
+            self.get_success(
+                self.hs.get_federation_event_handler().backfill(
+                    self.OTHER_SERVER_NAME,
+                    room_id,
+                    limit=1,
+                    extremities=["$some_extremity"],
+                )
+            )
+
+        # Ensure `run_as_background_process(...)` has a chance to run (essentially
+        # `wait_for_background_processes()`)
+        self.reactor.pump((0.1,))
+
+        # Make sure we processed and persisted the pulled event
+        self.get_success(main_store.get_event(pulled_event.event_id, allow_none=False))
+
     def test_process_pulled_event_with_rejected_missing_state(self) -> None:
         """Ensure that we correctly handle pulled events with missing state containing a
         rejected state event
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 81e50bdd55..4b8d8328d7 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -1134,6 +1134,43 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         self.assertEqual(backfill_event_ids, ["insertion_eventA"])
 
+    def test_get_event_ids_with_failed_pull_attempts(self) -> None:
+        """
+        Test to make sure we properly get event_ids based on whether they have any
+        failed pull attempts.
+        """
+        # Create the room
+        user_id = self.register_user("alice", "test")
+        tok = self.login("alice", "test")
+        room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
+
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "$failed_event_id1", "fake cause"
+            )
+        )
+        self.get_success(
+            self.store.record_event_failed_pull_attempt(
+                room_id, "$failed_event_id2", "fake cause"
+            )
+        )
+
+        event_ids_with_failed_pull_attempts = self.get_success(
+            self.store.get_event_ids_with_failed_pull_attempts(
+                event_ids=[
+                    "$failed_event_id1",
+                    "$fresh_event_id1",
+                    "$failed_event_id2",
+                    "$fresh_event_id2",
+                ]
+            )
+        )
+
+        self.assertEqual(
+            event_ids_with_failed_pull_attempts,
+            {"$failed_event_id1", "$failed_event_id2"},
+        )
+
     def test_get_event_ids_to_not_pull_from_backoff(self) -> None:
         """
         Test to make sure only event IDs we should backoff from are returned.
-- 
cgit 1.5.1


From 4e013093a87094c711eb047a41e2de3807c7873e Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Fri, 26 May 2023 05:46:13 -0600
Subject: Add MSC3820 (room version 11) option 2 unstable room version.
 (#15666)

---
 changelog.d/15666.misc       |  1 +
 synapse/api/room_versions.py | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 changelog.d/15666.misc

(limited to 'synapse')

diff --git a/changelog.d/15666.misc b/changelog.d/15666.misc
new file mode 100644
index 0000000000..92eae49952
--- /dev/null
+++ b/changelog.d/15666.misc
@@ -0,0 +1 @@
+Implement "option 2" for [MSC3820](https://github.com/matrix-org/matrix-spec-proposals/pull/3820): Room version 11.
\ No newline at end of file
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 7030b133d3..035a14171b 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -485,6 +485,30 @@ class RoomVersions:
         msc3931_push_features=(),
         msc3989_redaction_rules=True,
     )
+    MSC3820opt2 = RoomVersion(
+        # Based upon v10
+        "org.matrix.msc3820.opt2",
+        RoomDisposition.UNSTABLE,
+        EventFormatVersions.ROOM_V4_PLUS,
+        StateResolutionVersions.V2,
+        enforce_key_validity=True,
+        special_case_aliases_auth=False,
+        strict_canonicaljson=True,
+        limit_notifications_power_levels=True,
+        msc2175_implicit_room_creator=True,  # Used by MSC3820
+        msc2176_redaction_rules=True,  # Used by MSC3820
+        msc3083_join_rules=True,
+        msc3375_redaction_rules=True,
+        msc2403_knocking=True,
+        msc2716_historical=False,
+        msc2716_redactions=False,
+        msc3389_relation_redactions=False,
+        msc3787_knock_restricted_join_rule=True,
+        msc3667_int_only_power_levels=True,
+        msc3821_redaction_rules=True,  # Used by MSC3820
+        msc3931_push_features=(),
+        msc3989_redaction_rules=True,  # Used by MSC3820
+    )
 
 
 KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
-- 
cgit 1.5.1


From c775d80b73b7930b9541e353fc24dcef66579e48 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Fri, 26 May 2023 14:28:55 +0000
Subject: Fix a bug introduced in Synapse v1.84.0 where workers do not start up
 when no `instance_map` was provided. (#15672)

* Fix #15669: always populate instance map even if it was empty

* Fix some tests

* Fix more tests

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* CI fix: don't forget to update apt repository sources before installing olddeps deps

* Add test testing the backwards compatibility

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 .github/workflows/tests.yml              |  1 +
 changelog.d/15672.bugfix                 |  1 +
 synapse/config/workers.py                |  2 +-
 tests/app/test_homeserver_start.py       |  2 ++
 tests/app/test_openid_listener.py        |  1 +
 tests/config/test_workers.py             | 43 +++++++++++++++++++++++++++++---
 tests/replication/test_federation_ack.py |  1 +
 tests/storage/test_rollback_worker.py    |  1 +
 8 files changed, 47 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/15672.bugfix

(limited to 'synapse')

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 51cbeb3298..ce3a57fb01 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -314,6 +314,7 @@ jobs:
       # There aren't wheels for some of the older deps, so we need to install
       # their build dependencies
       - run: |
+          sudo apt-get -qq update
           sudo apt-get -qq install build-essential libffi-dev python-dev \
           libxml2-dev libxslt-dev xmlsec1 zlib1g-dev libjpeg-dev libwebp-dev
 
diff --git a/changelog.d/15672.bugfix b/changelog.d/15672.bugfix
new file mode 100644
index 0000000000..c81d7332b7
--- /dev/null
+++ b/changelog.d/15672.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse v1.84.0 where workers do not start up when no `instance_map` was provided.
\ No newline at end of file
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index d2311cc857..38e13dd7b5 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -222,7 +222,7 @@ class WorkerConfig(Config):
         # itself doesn't need this data as it would never have to talk to itself.
         instance_map: Dict[str, Any] = config.get("instance_map", {})
 
-        if instance_map and self.instance_name is not MAIN_PROCESS_INSTANCE_NAME:
+        if self.instance_name is not MAIN_PROCESS_INSTANCE_NAME:
             # The host used to connect to the main synapse
             main_host = config.get("worker_replication_host", None)
 
diff --git a/tests/app/test_homeserver_start.py b/tests/app/test_homeserver_start.py
index 788c935537..cd117b7394 100644
--- a/tests/app/test_homeserver_start.py
+++ b/tests/app/test_homeserver_start.py
@@ -25,6 +25,8 @@ class HomeserverAppStartTestCase(ConfigFileTestCase):
         # Add a blank line as otherwise the next addition ends up on a line with a comment
         self.add_lines_to_config(["  "])
         self.add_lines_to_config(["worker_app: test_worker_app"])
+        self.add_lines_to_config(["worker_replication_host: 127.0.0.1"])
+        self.add_lines_to_config(["worker_replication_http_port: 0"])
 
         # Ensure that starting master process with worker config raises an exception
         with self.assertRaises(ConfigError):
diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py
index 2ee343d8a4..056d9402a4 100644
--- a/tests/app/test_openid_listener.py
+++ b/tests/app/test_openid_listener.py
@@ -42,6 +42,7 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase):
         # have to tell the FederationHandler not to try to access stuff that is only
         # in the primary store.
         conf["worker_app"] = "yes"
+        conf["instance_map"] = {"main": {"host": "127.0.0.1", "port": 0}}
 
         return conf
 
diff --git a/tests/config/test_workers.py b/tests/config/test_workers.py
index 49a6bdf408..086359fd71 100644
--- a/tests/config/test_workers.py
+++ b/tests/config/test_workers.py
@@ -17,7 +17,7 @@ from unittest.mock import Mock
 from immutabledict import immutabledict
 
 from synapse.config import ConfigError
-from synapse.config.workers import WorkerConfig
+from synapse.config.workers import InstanceLocationConfig, WorkerConfig
 
 from tests.unittest import TestCase
 
@@ -94,6 +94,7 @@ class WorkerDutyConfigTestCase(TestCase):
                 # so that it doesn't raise an exception here.
                 # (This is not read by `_should_this_worker_perform_duty`.)
                 "notify_appservices": False,
+                "instance_map": {"main": {"host": "127.0.0.1", "port": 0}},
             },
         )
 
@@ -138,7 +139,9 @@ class WorkerDutyConfigTestCase(TestCase):
         """
 
         main_process_config = self._make_worker_config(
-            worker_app="synapse.app.homeserver", worker_name=None
+            worker_app="synapse.app.homeserver",
+            worker_name=None,
+            extras={"instance_map": {"main": {"host": "127.0.0.1", "port": 0}}},
         )
 
         self.assertTrue(
@@ -203,6 +206,7 @@ class WorkerDutyConfigTestCase(TestCase):
                 # so that it doesn't raise an exception here.
                 # (This is not read by `_should_this_worker_perform_duty`.)
                 "notify_appservices": False,
+                "instance_map": {"main": {"host": "127.0.0.1", "port": 0}},
             },
         )
 
@@ -236,7 +240,9 @@ class WorkerDutyConfigTestCase(TestCase):
         Tests new config options. This is for the master's config.
         """
         main_process_config = self._make_worker_config(
-            worker_app="synapse.app.homeserver", worker_name=None
+            worker_app="synapse.app.homeserver",
+            worker_name=None,
+            extras={"instance_map": {"main": {"host": "127.0.0.1", "port": 0}}},
         )
 
         self.assertTrue(
@@ -262,7 +268,9 @@ class WorkerDutyConfigTestCase(TestCase):
         Tests new config options. This is for the worker's config.
         """
         appservice_worker_config = self._make_worker_config(
-            worker_app="synapse.app.generic_worker", worker_name="worker1"
+            worker_app="synapse.app.generic_worker",
+            worker_name="worker1",
+            extras={"instance_map": {"main": {"host": "127.0.0.1", "port": 0}}},
         )
 
         self.assertTrue(
@@ -298,6 +306,7 @@ class WorkerDutyConfigTestCase(TestCase):
             extras={
                 "notify_appservices_from_worker": "worker2",
                 "update_user_directory_from_worker": "worker1",
+                "instance_map": {"main": {"host": "127.0.0.1", "port": 0}},
             },
         )
         self.assertFalse(worker1_config.should_notify_appservices)
@@ -309,7 +318,33 @@ class WorkerDutyConfigTestCase(TestCase):
             extras={
                 "notify_appservices_from_worker": "worker2",
                 "update_user_directory_from_worker": "worker1",
+                "instance_map": {"main": {"host": "127.0.0.1", "port": 0}},
             },
         )
         self.assertTrue(worker2_config.should_notify_appservices)
         self.assertFalse(worker2_config.should_update_user_directory)
+
+    def test_worker_instance_map_compat(self) -> None:
+        """
+        Test that `worker_replication_*` settings are compatibly handled by
+        adding them to the instance map as a `main` entry.
+        """
+
+        worker1_config = self._make_worker_config(
+            worker_app="synapse.app.generic_worker",
+            worker_name="worker1",
+            extras={
+                "notify_appservices_from_worker": "worker2",
+                "update_user_directory_from_worker": "worker1",
+                "worker_replication_host": "127.0.0.42",
+                "worker_replication_http_port": 1979,
+            },
+        )
+        self.assertEqual(
+            worker1_config.instance_map,
+            {
+                "master": InstanceLocationConfig(
+                    host="127.0.0.42", port=1979, tls=False
+                ),
+            },
+        )
diff --git a/tests/replication/test_federation_ack.py b/tests/replication/test_federation_ack.py
index 12668b34c5..cf59b1a204 100644
--- a/tests/replication/test_federation_ack.py
+++ b/tests/replication/test_federation_ack.py
@@ -32,6 +32,7 @@ class FederationAckTestCase(HomeserverTestCase):
         config["worker_app"] = "synapse.app.generic_worker"
         config["worker_name"] = "federation_sender1"
         config["federation_sender_instances"] = ["federation_sender1"]
+        config["instance_map"] = {"main": {"host": "127.0.0.1", "port": 0}}
         return config
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
diff --git a/tests/storage/test_rollback_worker.py b/tests/storage/test_rollback_worker.py
index 966aafea6f..6861d3a6c9 100644
--- a/tests/storage/test_rollback_worker.py
+++ b/tests/storage/test_rollback_worker.py
@@ -55,6 +55,7 @@ class WorkerSchemaTests(HomeserverTestCase):
 
         # Mark this as a worker app.
         conf["worker_app"] = "yes"
+        conf["instance_map"] = {"main": {"host": "127.0.0.1", "port": 0}}
 
         return conf
 
-- 
cgit 1.5.1


From 2ad91ec628126753590c1a90c432270d6c8fa8fd Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 26 May 2023 13:16:08 -0400
Subject: Set thread_id column to non-null for
 event_push_{actions,actions_staging,summary} (#15597)

Updates the database schema to require a thread_id (by adding a
constraint that the column is non-null) for event_push_actions,
event_push_actions_staging, and event_push_actions_summary.

For PostgreSQL we add the constraint as NOT VALID, then
VALIDATE the constraint a background job to avoid locking
the table during an upgrade.

Each table is updated as a separate schema delta to avoid
deadlocks between them.

For SQLite we simply rebuild the table & copy the data.
---
 changelog.d/15597.misc                             |   1 +
 synapse/storage/background_updates.py              |  44 ++++
 .../storage/databases/main/event_push_actions.py   | 254 +++------------------
 synapse/storage/schema/__init__.py                 |   3 +
 .../delta/77/05thread_notifications_backfill.sql   |  28 +++
 .../77/06thread_notifications_not_null.sql.sqlite  | 102 +++++++++
 ...ations_not_null_event_push_actions.sql.postgres |  27 +++
 ...ot_null_event_push_actions_staging.sql.postgres |  27 +++
 ...ations_not_null_event_push_summary.sql.postgres |  29 +++
 9 files changed, 292 insertions(+), 223 deletions(-)
 create mode 100644 changelog.d/15597.misc
 create mode 100644 synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
 create mode 100644 synapse/storage/schema/main/delta/77/06thread_notifications_not_null.sql.sqlite
 create mode 100644 synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_actions.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_actions_staging.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_summary.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/15597.misc b/changelog.d/15597.misc
new file mode 100644
index 0000000000..2dea23784f
--- /dev/null
+++ b/changelog.d/15597.misc
@@ -0,0 +1 @@
+Make the `thread_id` column on `event_push_actions`, `event_push_actions_staging`, and `event_push_summary` non-null.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index a99aea8926..ca085ef800 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -561,6 +561,50 @@ class BackgroundUpdater:
             updater, oneshot=True
         )
 
+    def register_background_validate_constraint(
+        self, update_name: str, constraint_name: str, table: str
+    ) -> None:
+        """Helper for store classes to do a background validate constraint.
+
+        This only applies on PostgreSQL.
+
+        To use:
+
+        1. use a schema delta file to add a background update. Example:
+            INSERT INTO background_updates (update_name, progress_json) VALUES
+                ('validate_my_constraint', '{}');
+
+        2. In the Store constructor, call this method
+
+        Args:
+            update_name: update_name to register for
+            constraint_name: name of constraint to validate
+            table: table the constraint is applied to
+        """
+
+        def runner(conn: Connection) -> None:
+            c = conn.cursor()
+
+            sql = f"""
+            ALTER TABLE {table} VALIDATE CONSTRAINT {constraint_name};
+            """
+            logger.debug("[SQL] %s", sql)
+            c.execute(sql)
+
+        async def updater(progress: JsonDict, batch_size: int) -> int:
+            assert isinstance(
+                self.db_pool.engine, engines.PostgresEngine
+            ), "validate constraint background update registered for non-Postres database"
+
+            logger.info("Validating constraint %s to %s", constraint_name, table)
+            await self.db_pool.runWithConnection(runner)
+            await self._end_background_update(update_name)
+            return 1
+
+        self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
+            updater, oneshot=True
+        )
+
     async def create_index_in_background(
         self,
         index_name: str,
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 6fdb1e292e..07bda7d6be 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -289,179 +289,52 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             unique=True,
         )
 
-        self.db_pool.updates.register_background_update_handler(
-            "event_push_backfill_thread_id",
-            self._background_backfill_thread_id,
+        self.db_pool.updates.register_background_validate_constraint(
+            "event_push_actions_staging_thread_id",
+            constraint_name="event_push_actions_staging_thread_id",
+            table="event_push_actions_staging",
         )
-
-        # Indexes which will be used to quickly make the thread_id column non-null.
-        self.db_pool.updates.register_background_index_update(
-            "event_push_actions_thread_id_null",
-            index_name="event_push_actions_thread_id_null",
+        self.db_pool.updates.register_background_validate_constraint(
+            "event_push_actions_thread_id",
+            constraint_name="event_push_actions_thread_id",
             table="event_push_actions",
-            columns=["thread_id"],
-            where_clause="thread_id IS NULL",
         )
-        self.db_pool.updates.register_background_index_update(
-            "event_push_summary_thread_id_null",
-            index_name="event_push_summary_thread_id_null",
+        self.db_pool.updates.register_background_validate_constraint(
+            "event_push_summary_thread_id",
+            constraint_name="event_push_summary_thread_id",
             table="event_push_summary",
-            columns=["thread_id"],
-            where_clause="thread_id IS NULL",
         )
 
-        # Check ASAP (and then later, every 1s) to see if we have finished
-        # background updates the event_push_actions and event_push_summary tables.
-        self._clock.call_later(0.0, self._check_event_push_backfill_thread_id)
-        self._event_push_backfill_thread_id_done = False
-
-    @wrap_as_background_process("check_event_push_backfill_thread_id")
-    async def _check_event_push_backfill_thread_id(self) -> None:
-        """
-        Has thread_id finished backfilling?
-
-        If not, we need to just-in-time update it so the queries work.
-        """
-        done = await self.db_pool.updates.has_completed_background_update(
-            "event_push_backfill_thread_id"
+        self.db_pool.updates.register_background_update_handler(
+            "event_push_drop_null_thread_id_indexes",
+            self._background_drop_null_thread_id_indexes,
         )
 
-        if done:
-            self._event_push_backfill_thread_id_done = True
-        else:
-            # Reschedule to run.
-            self._clock.call_later(15.0, self._check_event_push_backfill_thread_id)
-
-    async def _background_backfill_thread_id(
+    async def _background_drop_null_thread_id_indexes(
         self, progress: JsonDict, batch_size: int
     ) -> int:
         """
-        Fill in the thread_id field for event_push_actions and event_push_summary.
-
-        This is preparatory so that it can be made non-nullable in the future.
-
-        Because all current (null) data is done in an unthreaded manner this
-        simply assumes it is on the "main" timeline. Since event_push_actions
-        are periodically cleared it is not possible to correctly re-calculate
-        the thread_id.
+        Drop the indexes used to find null thread_ids for event_push_actions and
+        event_push_summary.
         """
-        event_push_actions_done = progress.get("event_push_actions_done", False)
 
-        def add_thread_id_txn(
-            txn: LoggingTransaction, start_stream_ordering: int
-        ) -> int:
-            sql = """
-            SELECT stream_ordering
-            FROM event_push_actions
-            WHERE
-                thread_id IS NULL
-                AND stream_ordering > ?
-            ORDER BY stream_ordering
-            LIMIT ?
-            """
-            txn.execute(sql, (start_stream_ordering, batch_size))
-
-            # No more rows to process.
-            rows = txn.fetchall()
-            if not rows:
-                progress["event_push_actions_done"] = True
-                self.db_pool.updates._background_update_progress_txn(
-                    txn, "event_push_backfill_thread_id", progress
-                )
-                return 0
+        def drop_null_thread_id_indexes_txn(txn: LoggingTransaction) -> None:
+            sql = "DROP INDEX IF EXISTS event_push_actions_thread_id_null"
+            logger.debug("[SQL] %s", sql)
+            txn.execute(sql)
 
-            # Update the thread ID for any of those rows.
-            max_stream_ordering = rows[-1][0]
+            sql = "DROP INDEX IF EXISTS event_push_summary_thread_id_null"
+            logger.debug("[SQL] %s", sql)
+            txn.execute(sql)
 
-            sql = """
-            UPDATE event_push_actions
-            SET thread_id = 'main'
-            WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
-            """
-            txn.execute(
-                sql,
-                (
-                    start_stream_ordering,
-                    max_stream_ordering,
-                ),
-            )
-
-            # Update progress.
-            processed_rows = txn.rowcount
-            progress["max_event_push_actions_stream_ordering"] = max_stream_ordering
-            self.db_pool.updates._background_update_progress_txn(
-                txn, "event_push_backfill_thread_id", progress
-            )
-
-            return processed_rows
-
-        def add_thread_id_summary_txn(txn: LoggingTransaction) -> int:
-            min_user_id = progress.get("max_summary_user_id", "")
-            min_room_id = progress.get("max_summary_room_id", "")
-
-            # Slightly overcomplicated query for getting the Nth user ID / room
-            # ID tuple, or the last if there are less than N remaining.
-            sql = """
-            SELECT user_id, room_id FROM (
-                SELECT user_id, room_id FROM event_push_summary
-                WHERE (user_id, room_id) > (?, ?)
-                    AND thread_id IS NULL
-                ORDER BY user_id, room_id
-                LIMIT ?
-            ) AS e
-            ORDER BY user_id DESC, room_id DESC
-            LIMIT 1
-            """
-
-            txn.execute(sql, (min_user_id, min_room_id, batch_size))
-            row = txn.fetchone()
-            if not row:
-                return 0
-
-            max_user_id, max_room_id = row
-
-            sql = """
-            UPDATE event_push_summary
-            SET thread_id = 'main'
-            WHERE
-                (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?)
-                AND thread_id IS NULL
-            """
-            txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id))
-            processed_rows = txn.rowcount
-
-            progress["max_summary_user_id"] = max_user_id
-            progress["max_summary_room_id"] = max_room_id
-            self.db_pool.updates._background_update_progress_txn(
-                txn, "event_push_backfill_thread_id", progress
-            )
-
-            return processed_rows
-
-        # First update the event_push_actions table, then the event_push_summary table.
-        #
-        # Note that the event_push_actions_staging table is ignored since it is
-        # assumed that items in that table will only exist for a short period of
-        # time.
-        if not event_push_actions_done:
-            result = await self.db_pool.runInteraction(
-                "event_push_backfill_thread_id",
-                add_thread_id_txn,
-                progress.get("max_event_push_actions_stream_ordering", 0),
-            )
-        else:
-            result = await self.db_pool.runInteraction(
-                "event_push_backfill_thread_id",
-                add_thread_id_summary_txn,
-            )
-
-            # Only done after the event_push_summary table is done.
-            if not result:
-                await self.db_pool.updates._end_background_update(
-                    "event_push_backfill_thread_id"
-                )
-
-        return result
+        await self.db_pool.runInteraction(
+            "drop_null_thread_id_indexes_txn",
+            drop_null_thread_id_indexes_txn,
+        )
+        await self.db_pool.updates._end_background_update(
+            "event_push_drop_null_thread_id_indexes"
+        )
+        return 0
 
     async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, int]:
         """Get the notification count by room for a user. Only considers notifications,
@@ -711,25 +584,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE),
         )
 
-        # First ensure that the existing rows have an updated thread_id field.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute(
-                """
-                UPDATE event_push_summary
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                (MAIN_TIMELINE, room_id, user_id),
-            )
-            txn.execute(
-                """
-                UPDATE event_push_actions
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                (MAIN_TIMELINE, room_id, user_id),
-            )
-
         # First we pull the counts from the summary table.
         #
         # We check that `last_receipt_stream_ordering` matches the stream ordering of the
@@ -1545,25 +1399,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
                 (room_id, user_id, stream_ordering, *thread_args),
             )
 
-            # First ensure that the existing rows have an updated thread_id field.
-            if not self._event_push_backfill_thread_id_done:
-                txn.execute(
-                    """
-                    UPDATE event_push_summary
-                    SET thread_id = ?
-                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                    """,
-                    (MAIN_TIMELINE, room_id, user_id),
-                )
-                txn.execute(
-                    """
-                    UPDATE event_push_actions
-                    SET thread_id = ?
-                    WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                    """,
-                    (MAIN_TIMELINE, room_id, user_id),
-                )
-
             # Fetch the notification counts between the stream ordering of the
             # latest receipt and what was previously summarised.
             unread_counts = self._get_notif_unread_count_for_user_room(
@@ -1698,19 +1533,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             rotate_to_stream_ordering: The new maximum event stream ordering to summarise.
         """
 
-        # Ensure that any new actions have an updated thread_id.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute(
-                """
-                UPDATE event_push_actions
-                SET thread_id = ?
-                WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL
-                """,
-                (MAIN_TIMELINE, old_rotate_stream_ordering, rotate_to_stream_ordering),
-            )
-
-        # XXX Do we need to update summaries here too?
-
         # Calculate the new counts that should be upserted into event_push_summary
         sql = """
             SELECT user_id, room_id, thread_id,
@@ -1773,20 +1595,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
 
         logger.info("Rotating notifications, handling %d rows", len(summaries))
 
-        # Ensure that any updated threads have the proper thread_id.
-        if not self._event_push_backfill_thread_id_done:
-            txn.execute_batch(
-                """
-                UPDATE event_push_summary
-                SET thread_id = ?
-                WHERE room_id = ? AND user_id = ? AND thread_id is NULL
-                """,
-                [
-                    (MAIN_TIMELINE, room_id, user_id)
-                    for user_id, room_id, _ in summaries
-                ],
-            )
-
         self.db_pool.simple_upsert_many_txn(
             txn,
             table="event_push_summary",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index df2cc31ca6..5cc786f030 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -110,6 +110,9 @@ SCHEMA_COMPAT_VERSION = (
     # Queries against `event_stream_ordering` columns in membership tables must
     # be disambiguated.
     #
+    # The threads_id column must written to with non-null values for the
+    # event_push_actions, event_push_actions_staging, and event_push_summary tables.
+    #
     # insertions to the column `full_user_id` of tables profiles and user_filters can no
     # longer be null
     76
diff --git a/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
new file mode 100644
index 0000000000..ce6f9ff937
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
@@ -0,0 +1,28 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Force the background updates from 06thread_notifications.sql to run in the
+-- foreground as code will now require those to be "done".
+
+DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id';
+
+-- Overwrite any null thread_id values.
+UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL;
+UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
+UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
+
+-- Drop the background updates to calculate the indexes used to find null thread_ids.
+DELETE FROM background_updates WHERE update_name = 'event_push_actions_thread_id_null';
+DELETE FROM background_updates WHERE update_name = 'event_push_summary_thread_id_null';
diff --git a/synapse/storage/schema/main/delta/77/06thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/77/06thread_notifications_not_null.sql.sqlite
new file mode 100644
index 0000000000..d19b9648b5
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/06thread_notifications_not_null.sql.sqlite
@@ -0,0 +1,102 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ -- The thread_id columns can now be made non-nullable.
+--
+-- SQLite doesn't support modifying columns to an existing table, so it must
+-- be recreated.
+
+-- Create the new tables.
+CREATE TABLE event_push_actions_staging_new (
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    actions TEXT NOT NULL,
+    notif SMALLINT NOT NULL,
+    highlight SMALLINT NOT NULL,
+    unread SMALLINT,
+    thread_id TEXT,
+    inserted_ts BIGINT,
+    CONSTRAINT event_push_actions_staging_thread_id CHECK (thread_id is NOT NULL)
+);
+
+CREATE TABLE event_push_actions_new (
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL,
+    user_id TEXT NOT NULL,
+    profile_tag VARCHAR(32),
+    actions TEXT NOT NULL,
+    topological_ordering BIGINT,
+    stream_ordering BIGINT,
+    notif SMALLINT,
+    highlight SMALLINT,
+    unread SMALLINT,
+    thread_id TEXT,
+    CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag),
+    CONSTRAINT event_push_actions_thread_id CHECK (thread_id is NOT NULL)
+);
+
+CREATE TABLE event_push_summary_new (
+    user_id TEXT NOT NULL,
+    room_id TEXT NOT NULL,
+    notif_count BIGINT NOT NULL,
+    stream_ordering BIGINT NOT NULL,
+    unread_count BIGINT,
+    last_receipt_stream_ordering BIGINT,
+    thread_id TEXT,
+    CONSTRAINT event_push_summary_thread_id CHECK (thread_id is NOT NULL)
+);
+
+-- Copy the data.
+INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts)
+    SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts
+    FROM event_push_actions_staging;
+
+INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id)
+    SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id
+    FROM event_push_actions;
+
+INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id)
+    SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id
+    FROM event_push_summary;
+
+-- Drop the old tables.
+DROP TABLE event_push_actions_staging;
+DROP TABLE event_push_actions;
+DROP TABLE event_push_summary;
+
+-- Rename the tables.
+ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging;
+ALTER TABLE event_push_actions_new RENAME TO event_push_actions;
+ALTER TABLE event_push_summary_new RENAME TO event_push_summary;
+
+-- Recreate the indexes.
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id);
+
+CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering);
+CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering );
+CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
+CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id );
+CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering);
+
+CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary (user_id, room_id, thread_id) ;
+
+-- Recreate some indexes in the background, by re-running the background updates
+-- from 72/02event_push_actions_index.sql and 72/06thread_notifications.sql.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7706, 'event_push_summary_unique_index2', '{}')
+  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7706, 'event_push_actions_stream_highlight_index', '{}')
+  ON CONFLICT (update_name) DO UPDATE SET progress_json = '{}';
diff --git a/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_actions.sql.postgres b/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_actions.sql.postgres
new file mode 100644
index 0000000000..381184b5e2
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_actions.sql.postgres
@@ -0,0 +1,27 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- The thread_id columns can now be made non-nullable, this is done by using a
+-- constraint (and not altering the column) to avoid taking out a full table lock.
+--
+-- We initially add an invalid constraint which guards against new data (this
+-- doesn't lock the table).
+ALTER TABLE event_push_actions
+    ADD CONSTRAINT event_push_actions_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
+
+-- We then validate the constraint which doesn't need to worry about new data. It
+-- only needs a SHARE UPDATE EXCLUSIVE lock but can still take a while to complete.
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7706, 'event_push_actions_thread_id', '{}', 'event_push_actions_staging_thread_id');
diff --git a/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_actions_staging.sql.postgres b/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_actions_staging.sql.postgres
new file mode 100644
index 0000000000..395f9c7260
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_actions_staging.sql.postgres
@@ -0,0 +1,27 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- The thread_id columns can now be made non-nullable, this is done by using a
+-- constraint (and not altering the column) to avoid taking out a full table lock.
+--
+-- We initially add an invalid constraint which guards against new data (this
+-- doesn't lock the table).
+ALTER TABLE event_push_actions_staging
+    ADD CONSTRAINT event_push_actions_staging_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
+
+-- We then validate the constraint which doesn't need to worry about new data. It
+-- only needs a SHARE UPDATE EXCLUSIVE lock but can still take a while to complete.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (7706, 'event_push_actions_staging_thread_id', '{}');
diff --git a/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_summary.sql.postgres b/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_summary.sql.postgres
new file mode 100644
index 0000000000..140ceff1fa
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/06thread_notifications_not_null_event_push_summary.sql.postgres
@@ -0,0 +1,29 @@
+/* Copyright 2022 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- The thread_id columns can now be made non-nullable, this is done by using a
+-- constraint (and not altering the column) to avoid taking out a full table lock.
+--
+-- We initially add an invalid constraint which guards against new data (this
+-- doesn't lock the table).
+ALTER TABLE event_push_summary
+    ADD CONSTRAINT event_push_summary_thread_id CHECK (thread_id IS NOT NULL) NOT VALID;
+
+-- We then validate the constraint which doesn't need to worry about new data. It
+-- only needs a SHARE UPDATE EXCLUSIVE lock but can still take a while to complete.
+INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
+  (7706, 'event_push_summary_thread_id', '{}', 'event_push_actions_thread_id'),
+  -- Also clean-up the old indexes.
+  (7706, 'event_push_drop_null_thread_id_indexes', '{}', 'event_push_summary_thread_id');
-- 
cgit 1.5.1


From 50918c494057dc93bfa6e37f7d140d68711846d1 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Fri, 26 May 2023 12:05:24 -0600
Subject: Add `MSC3820opt2` as a known room version (#15678)

---
 changelog.d/15678.misc       | 1 +
 synapse/api/room_versions.py | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/15678.misc

(limited to 'synapse')

diff --git a/changelog.d/15678.misc b/changelog.d/15678.misc
new file mode 100644
index 0000000000..92eae49952
--- /dev/null
+++ b/changelog.d/15678.misc
@@ -0,0 +1 @@
+Implement "option 2" for [MSC3820](https://github.com/matrix-org/matrix-spec-proposals/pull/3820): Room version 11.
\ No newline at end of file
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 035a14171b..c5c71e242f 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -528,6 +528,7 @@ KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
         RoomVersions.V10,
         RoomVersions.MSC2716v4,
         RoomVersions.MSC3989,
+        RoomVersions.MSC3820opt2,
     )
 }
 
-- 
cgit 1.5.1


From c835befd10ae0087c3c54a36989ba347313b68af Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Fri, 26 May 2023 14:28:39 -0500
Subject: Add Unix socket support for Redis connections (#15644)

Adds a new configuration setting to connect to Redis via a Unix
socket instead of over TCP. Disabled by default.
---
 changelog.d/15644.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  4 ++
 stubs/txredisapi.pyi                             |  3 ++
 synapse/config/redis.py                          |  1 +
 synapse/replication/tcp/handler.py               | 10 +++-
 synapse/replication/tcp/redis.py                 | 62 +++++++++++++++++++++---
 synapse/server.py                                | 42 ++++++++++------
 7 files changed, 100 insertions(+), 23 deletions(-)
 create mode 100644 changelog.d/15644.feature

(limited to 'synapse')

diff --git a/changelog.d/15644.feature b/changelog.d/15644.feature
new file mode 100644
index 0000000000..1b6126af53
--- /dev/null
+++ b/changelog.d/15644.feature
@@ -0,0 +1 @@
+Add Unix socket support for Redis connections. Contributed by Jason Little.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 93b132b6e4..5ede6d0a82 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3979,6 +3979,8 @@ This setting has the following sub-options:
 * `enabled`: whether to use Redis support. Defaults to false.
 * `host` and `port`: Optional host and port to use to connect to redis. Defaults to
    localhost and 6379
+* `path`: The full path to a local Unix socket file. **If this is used, `host` and
+ `port` are ignored.** Defaults to `/tmp/redis.sock'
 * `password`: Optional password if configured on the Redis instance.
 * `dbid`: Optional redis dbid if needs to connect to specific redis logical db.
 * `use_tls`: Whether to use tls connection. Defaults to false.
@@ -3991,6 +3993,8 @@ This setting has the following sub-options:
 
   _Changed in Synapse 1.84.0: Added use\_tls, certificate\_file, private\_key\_file, ca\_file and ca\_path attributes_
 
+  _Changed in Synapse 1.85.0: Added path option to use a local Unix socket_
+
 Example configuration:
 ```yaml
 redis:
diff --git a/stubs/txredisapi.pyi b/stubs/txredisapi.pyi
index 695a2307c2..b7bd59d2ea 100644
--- a/stubs/txredisapi.pyi
+++ b/stubs/txredisapi.pyi
@@ -61,6 +61,9 @@ def lazyConnection(
 # most methods to it via ConnectionHandler.__getattr__.
 class ConnectionHandler(RedisProtocol):
     def disconnect(self) -> "Deferred[None]": ...
+    def __repr__(self) -> str: ...
+
+class UnixConnectionHandler(ConnectionHandler): ...
 
 class RedisFactory(protocol.ReconnectingClientFactory):
     continueTrying: bool
diff --git a/synapse/config/redis.py b/synapse/config/redis.py
index 636cb450b8..3c4c499e22 100644
--- a/synapse/config/redis.py
+++ b/synapse/config/redis.py
@@ -33,6 +33,7 @@ class RedisConfig(Config):
 
         self.redis_host = redis_config.get("host", "localhost")
         self.redis_port = redis_config.get("port", 6379)
+        self.redis_path = redis_config.get("path", None)
         self.redis_dbid = redis_config.get("dbid", None)
         self.redis_password = redis_config.get("password")
 
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index 233ad61d49..5d108fe11b 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -352,7 +352,15 @@ class ReplicationCommandHandler:
 
         reactor = hs.get_reactor()
         redis_config = hs.config.redis
-        if hs.config.redis.redis_use_tls:
+        if redis_config.redis_path is not None:
+            reactor.connectUNIX(
+                redis_config.redis_path,
+                self._factory,
+                timeout=30,
+                checkPID=False,
+            )
+
+        elif hs.config.redis.redis_use_tls:
             ssl_context_factory = ClientContextFactory(hs.config.redis)
             reactor.connectSSL(
                 redis_config.redis_host,
diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py
index c8f4bf8b27..7e96145b3b 100644
--- a/synapse/replication/tcp/redis.py
+++ b/synapse/replication/tcp/redis.py
@@ -17,7 +17,12 @@ from inspect import isawaitable
 from typing import TYPE_CHECKING, Any, Generic, List, Optional, Type, TypeVar, cast
 
 import attr
-import txredisapi
+from txredisapi import (
+    ConnectionHandler,
+    RedisFactory,
+    SubscriberProtocol,
+    UnixConnectionHandler,
+)
 from zope.interface import implementer
 
 from twisted.internet.address import IPv4Address, IPv6Address
@@ -68,7 +73,7 @@ class ConstantProperty(Generic[T, V]):
 
 
 @implementer(IReplicationConnection)
-class RedisSubscriber(txredisapi.SubscriberProtocol):
+class RedisSubscriber(SubscriberProtocol):
     """Connection to redis subscribed to replication stream.
 
     This class fulfils two functions:
@@ -95,7 +100,7 @@ class RedisSubscriber(txredisapi.SubscriberProtocol):
     synapse_handler: "ReplicationCommandHandler"
     synapse_stream_prefix: str
     synapse_channel_names: List[str]
-    synapse_outbound_redis_connection: txredisapi.ConnectionHandler
+    synapse_outbound_redis_connection: ConnectionHandler
 
     def __init__(self, *args: Any, **kwargs: Any):
         super().__init__(*args, **kwargs)
@@ -229,7 +234,7 @@ class RedisSubscriber(txredisapi.SubscriberProtocol):
         )
 
 
-class SynapseRedisFactory(txredisapi.RedisFactory):
+class SynapseRedisFactory(RedisFactory):
     """A subclass of RedisFactory that periodically sends pings to ensure that
     we detect dead connections.
     """
@@ -245,7 +250,7 @@ class SynapseRedisFactory(txredisapi.RedisFactory):
         dbid: Optional[int],
         poolsize: int,
         isLazy: bool = False,
-        handler: Type = txredisapi.ConnectionHandler,
+        handler: Type = ConnectionHandler,
         charset: str = "utf-8",
         password: Optional[str] = None,
         replyTimeout: int = 30,
@@ -326,7 +331,7 @@ class RedisDirectTcpReplicationClientFactory(SynapseRedisFactory):
     def __init__(
         self,
         hs: "HomeServer",
-        outbound_redis_connection: txredisapi.ConnectionHandler,
+        outbound_redis_connection: ConnectionHandler,
         channel_names: List[str],
     ):
         super().__init__(
@@ -368,7 +373,7 @@ def lazyConnection(
     reconnect: bool = True,
     password: Optional[str] = None,
     replyTimeout: int = 30,
-) -> txredisapi.ConnectionHandler:
+) -> ConnectionHandler:
     """Creates a connection to Redis that is lazily set up and reconnects if the
     connections is lost.
     """
@@ -380,7 +385,7 @@ def lazyConnection(
         dbid=dbid,
         poolsize=1,
         isLazy=True,
-        handler=txredisapi.ConnectionHandler,
+        handler=ConnectionHandler,
         password=password,
         replyTimeout=replyTimeout,
     )
@@ -408,3 +413,44 @@ def lazyConnection(
         )
 
     return factory.handler
+
+
+def lazyUnixConnection(
+    hs: "HomeServer",
+    path: str = "/tmp/redis.sock",
+    dbid: Optional[int] = None,
+    reconnect: bool = True,
+    password: Optional[str] = None,
+    replyTimeout: int = 30,
+) -> ConnectionHandler:
+    """Creates a connection to Redis that is lazily set up and reconnects if the
+    connection is lost.
+
+    Returns:
+        A subclass of ConnectionHandler, which is a UnixConnectionHandler in this case.
+    """
+
+    uuid = path
+
+    factory = SynapseRedisFactory(
+        hs,
+        uuid=uuid,
+        dbid=dbid,
+        poolsize=1,
+        isLazy=True,
+        handler=UnixConnectionHandler,
+        password=password,
+        replyTimeout=replyTimeout,
+    )
+    factory.continueTrying = reconnect
+
+    reactor = hs.get_reactor()
+
+    reactor.connectUNIX(
+        path,
+        factory,
+        timeout=30,
+        checkPID=False,
+    )
+
+    return factory.handler
diff --git a/synapse/server.py b/synapse/server.py
index f6e245569c..cce5fb66ff 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -864,22 +864,36 @@ class HomeServer(metaclass=abc.ABCMeta):
 
         # We only want to import redis module if we're using it, as we have
         # `txredisapi` as an optional dependency.
-        from synapse.replication.tcp.redis import lazyConnection
+        from synapse.replication.tcp.redis import lazyConnection, lazyUnixConnection
 
-        logger.info(
-            "Connecting to redis (host=%r port=%r) for external cache",
-            self.config.redis.redis_host,
-            self.config.redis.redis_port,
-        )
+        if self.config.redis.redis_path is None:
+            logger.info(
+                "Connecting to redis (host=%r port=%r) for external cache",
+                self.config.redis.redis_host,
+                self.config.redis.redis_port,
+            )
 
-        return lazyConnection(
-            hs=self,
-            host=self.config.redis.redis_host,
-            port=self.config.redis.redis_port,
-            dbid=self.config.redis.redis_dbid,
-            password=self.config.redis.redis_password,
-            reconnect=True,
-        )
+            return lazyConnection(
+                hs=self,
+                host=self.config.redis.redis_host,
+                port=self.config.redis.redis_port,
+                dbid=self.config.redis.redis_dbid,
+                password=self.config.redis.redis_password,
+                reconnect=True,
+            )
+        else:
+            logger.info(
+                "Connecting to redis (path=%r) for external cache",
+                self.config.redis.redis_path,
+            )
+
+            return lazyUnixConnection(
+                hs=self,
+                path=self.config.redis.redis_path,
+                dbid=self.config.redis.redis_dbid,
+                password=self.config.redis.redis_password,
+                reconnect=True,
+            )
 
     def should_send_federation(self) -> bool:
         "Should this server be sending federation traffic directly?"
-- 
cgit 1.5.1


From 5d8c659373ae2b169892fc9d99d54bd1b3baf65a Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 30 May 2023 14:37:39 +0100
Subject: Remove unused `FederationServer.__str__` override (#15690)

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15690.misc                  | 1 +
 synapse/federation/federation_server.py | 3 ---
 2 files changed, 1 insertion(+), 3 deletions(-)
 create mode 100644 changelog.d/15690.misc

(limited to 'synapse')

diff --git a/changelog.d/15690.misc b/changelog.d/15690.misc
new file mode 100644
index 0000000000..c6c259eb7d
--- /dev/null
+++ b/changelog.d/15690.misc
@@ -0,0 +1 @@
+Remove some unused code.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index f4ca70a698..e17cb840de 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -1291,9 +1291,6 @@ class FederationServer(FederationBase):
                 return
             lock = new_lock
 
-    def __str__(self) -> str:
-        return "<ReplicationLayer(%s)>" % self.server_name
-
     async def exchange_third_party_invite(
         self, sender_user_id: str, target_user_id: str, room_id: str, signed: Dict
     ) -> None:
-- 
cgit 1.5.1


From e2c8458bba5ab20f84c93a6c68e293b2d304cdc0 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 17 Jun 2022 14:48:55 +0200
Subject: Make the api.auth.Auth a Protocol

---
 synapse/api/auth.py             | 602 ----------------------------------------
 synapse/api/auth/__init__.py    | 175 ++++++++++++
 synapse/api/auth/base.py        | 273 ++++++++++++++++++
 synapse/api/auth/internal.py    | 369 ++++++++++++++++++++++++
 synapse/server.py               |   3 +-
 tests/api/test_auth.py          |   4 +-
 tests/handlers/test_register.py |   4 +-
 tests/test_state.py             |   4 +-
 8 files changed, 825 insertions(+), 609 deletions(-)
 delete mode 100644 synapse/api/auth.py
 create mode 100644 synapse/api/auth/__init__.py
 create mode 100644 synapse/api/auth/base.py
 create mode 100644 synapse/api/auth/internal.py

(limited to 'synapse')

diff --git a/synapse/api/auth.py b/synapse/api/auth.py
deleted file mode 100644
index 66e869bc2d..0000000000
--- a/synapse/api/auth.py
+++ /dev/null
@@ -1,602 +0,0 @@
-# Copyright 2014 - 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from typing import TYPE_CHECKING, Optional, Tuple
-
-import pymacaroons
-from netaddr import IPAddress
-
-from twisted.web.server import Request
-
-from synapse import event_auth
-from synapse.api.constants import EventTypes, HistoryVisibility, Membership
-from synapse.api.errors import (
-    AuthError,
-    Codes,
-    InvalidClientTokenError,
-    MissingClientTokenError,
-    UnstableSpecAuthError,
-)
-from synapse.appservice import ApplicationService
-from synapse.http import get_request_user_agent
-from synapse.http.site import SynapseRequest
-from synapse.logging.opentracing import (
-    active_span,
-    force_tracing,
-    start_active_span,
-    trace,
-)
-from synapse.types import Requester, create_requester
-from synapse.util.cancellation import cancellable
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-# guests always get this device id.
-GUEST_DEVICE_ID = "guest_device"
-
-
-class Auth:
-    """
-    This class contains functions for authenticating users of our client-server API.
-    """
-
-    def __init__(self, hs: "HomeServer"):
-        self.hs = hs
-        self.clock = hs.get_clock()
-        self.store = hs.get_datastores().main
-        self._account_validity_handler = hs.get_account_validity_handler()
-        self._storage_controllers = hs.get_storage_controllers()
-        self._macaroon_generator = hs.get_macaroon_generator()
-
-        self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips
-        self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips
-        self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users
-
-    async def check_user_in_room(
-        self,
-        room_id: str,
-        requester: Requester,
-        allow_departed_users: bool = False,
-    ) -> Tuple[str, Optional[str]]:
-        """Check if the user is in the room, or was at some point.
-        Args:
-            room_id: The room to check.
-
-            requester: The user making the request, according to the access token.
-
-            current_state: Optional map of the current state of the room.
-                If provided then that map is used to check whether they are a
-                member of the room. Otherwise the current membership is
-                loaded from the database.
-
-            allow_departed_users: if True, accept users that were previously
-                members but have now departed.
-
-        Raises:
-            AuthError if the user is/was not in the room.
-        Returns:
-            The current membership of the user in the room and the
-            membership event ID of the user.
-        """
-
-        user_id = requester.user.to_string()
-        (
-            membership,
-            member_event_id,
-        ) = await self.store.get_local_current_membership_for_user_in_room(
-            user_id=user_id,
-            room_id=room_id,
-        )
-
-        if membership:
-            if membership == Membership.JOIN:
-                return membership, member_event_id
-
-            # XXX this looks totally bogus. Why do we not allow users who have been banned,
-            # or those who were members previously and have been re-invited?
-            if allow_departed_users and membership == Membership.LEAVE:
-                forgot = await self.store.did_forget(user_id, room_id)
-                if not forgot:
-                    return membership, member_event_id
-        raise UnstableSpecAuthError(
-            403,
-            "User %s not in room %s" % (user_id, room_id),
-            errcode=Codes.NOT_JOINED,
-        )
-
-    @cancellable
-    async def get_user_by_req(
-        self,
-        request: SynapseRequest,
-        allow_guest: bool = False,
-        allow_expired: bool = False,
-    ) -> Requester:
-        """Get a registered user's ID.
-
-        Args:
-            request: An HTTP request with an access_token query parameter.
-            allow_guest: If False, will raise an AuthError if the user making the
-                request is a guest.
-            allow_expired: If True, allow the request through even if the account
-                is expired, or session token lifetime has ended. Note that
-                /login will deliver access tokens regardless of expiration.
-
-        Returns:
-            Resolves to the requester
-        Raises:
-            InvalidClientCredentialsError if no user by that token exists or the token
-                is invalid.
-            AuthError if access is denied for the user in the access token
-        """
-        parent_span = active_span()
-        with start_active_span("get_user_by_req"):
-            requester = await self._wrapped_get_user_by_req(
-                request, allow_guest, allow_expired
-            )
-
-            if parent_span:
-                if requester.authenticated_entity in self._force_tracing_for_users:
-                    # request tracing is enabled for this user, so we need to force it
-                    # tracing on for the parent span (which will be the servlet span).
-                    #
-                    # It's too late for the get_user_by_req span to inherit the setting,
-                    # so we also force it on for that.
-                    force_tracing()
-                    force_tracing(parent_span)
-                parent_span.set_tag(
-                    "authenticated_entity", requester.authenticated_entity
-                )
-                parent_span.set_tag("user_id", requester.user.to_string())
-                if requester.device_id is not None:
-                    parent_span.set_tag("device_id", requester.device_id)
-                if requester.app_service is not None:
-                    parent_span.set_tag("appservice_id", requester.app_service.id)
-            return requester
-
-    @cancellable
-    async def _wrapped_get_user_by_req(
-        self,
-        request: SynapseRequest,
-        allow_guest: bool,
-        allow_expired: bool,
-    ) -> Requester:
-        """Helper for get_user_by_req
-
-        Once get_user_by_req has set up the opentracing span, this does the actual work.
-        """
-        try:
-            ip_addr = request.getClientAddress().host
-            user_agent = get_request_user_agent(request)
-
-            access_token = self.get_access_token_from_request(request)
-
-            # First check if it could be a request from an appservice
-            requester = await self._get_appservice_user(request)
-            if not requester:
-                # If not, it should be from a regular user
-                requester = await self.get_user_by_access_token(
-                    access_token, allow_expired=allow_expired
-                )
-
-                # Deny the request if the user account has expired.
-                # This check is only done for regular users, not appservice ones.
-                if not allow_expired:
-                    if await self._account_validity_handler.is_user_expired(
-                        requester.user.to_string()
-                    ):
-                        # Raise the error if either an account validity module has determined
-                        # the account has expired, or the legacy account validity
-                        # implementation is enabled and determined the account has expired
-                        raise AuthError(
-                            403,
-                            "User account has expired",
-                            errcode=Codes.EXPIRED_ACCOUNT,
-                        )
-
-            if ip_addr and (
-                not requester.app_service or self._track_appservice_user_ips
-            ):
-                # XXX(quenting): I'm 95% confident that we could skip setting the
-                # device_id to "dummy-device" for appservices, and that the only impact
-                # would be some rows which whould not deduplicate in the 'user_ips'
-                # table during the transition
-                recorded_device_id = (
-                    "dummy-device"
-                    if requester.device_id is None and requester.app_service is not None
-                    else requester.device_id
-                )
-                await self.store.insert_client_ip(
-                    user_id=requester.authenticated_entity,
-                    access_token=access_token,
-                    ip=ip_addr,
-                    user_agent=user_agent,
-                    device_id=recorded_device_id,
-                )
-
-                # Track also the puppeted user client IP if enabled and the user is puppeting
-                if (
-                    requester.user.to_string() != requester.authenticated_entity
-                    and self._track_puppeted_user_ips
-                ):
-                    await self.store.insert_client_ip(
-                        user_id=requester.user.to_string(),
-                        access_token=access_token,
-                        ip=ip_addr,
-                        user_agent=user_agent,
-                        device_id=requester.device_id,
-                    )
-
-            if requester.is_guest and not allow_guest:
-                raise AuthError(
-                    403,
-                    "Guest access not allowed",
-                    errcode=Codes.GUEST_ACCESS_FORBIDDEN,
-                )
-
-            request.requester = requester
-            return requester
-        except KeyError:
-            raise MissingClientTokenError()
-
-    async def validate_appservice_can_control_user_id(
-        self, app_service: ApplicationService, user_id: str
-    ) -> None:
-        """Validates that the app service is allowed to control
-        the given user.
-
-        Args:
-            app_service: The app service that controls the user
-            user_id: The author MXID that the app service is controlling
-
-        Raises:
-            AuthError: If the application service is not allowed to control the user
-                (user namespace regex does not match, wrong homeserver, etc)
-                or if the user has not been registered yet.
-        """
-
-        # It's ok if the app service is trying to use the sender from their registration
-        if app_service.sender == user_id:
-            pass
-        # Check to make sure the app service is allowed to control the user
-        elif not app_service.is_interested_in_user(user_id):
-            raise AuthError(
-                403,
-                "Application service cannot masquerade as this user (%s)." % user_id,
-            )
-        # Check to make sure the user is already registered on the homeserver
-        elif not (await self.store.get_user_by_id(user_id)):
-            raise AuthError(
-                403, "Application service has not registered this user (%s)" % user_id
-            )
-
-    @cancellable
-    async def _get_appservice_user(self, request: Request) -> Optional[Requester]:
-        """
-        Given a request, reads the request parameters to determine:
-        - whether it's an application service that's making this request
-        - what user the application service should be treated as controlling
-          (the user_id URI parameter allows an application service to masquerade
-          any applicable user in its namespace)
-        - what device the application service should be treated as controlling
-          (the device_id[^1] URI parameter allows an application service to masquerade
-          as any device that exists for the relevant user)
-
-        [^1] Unstable and provided by MSC3202.
-             Must use `org.matrix.msc3202.device_id` in place of `device_id` for now.
-
-        Returns:
-            the application service `Requester` of that request
-
-        Postconditions:
-        - The `app_service` field in the returned `Requester` is set
-        - The `user_id` field in the returned `Requester` is either the application
-          service sender or the controlled user set by the `user_id` URI parameter
-        - The returned application service is permitted to control the returned user ID.
-        - The returned device ID, if present, has been checked to be a valid device ID
-          for the returned user ID.
-        """
-        DEVICE_ID_ARG_NAME = b"org.matrix.msc3202.device_id"
-
-        app_service = self.store.get_app_service_by_token(
-            self.get_access_token_from_request(request)
-        )
-        if app_service is None:
-            return None
-
-        if app_service.ip_range_whitelist:
-            ip_address = IPAddress(request.getClientAddress().host)
-            if ip_address not in app_service.ip_range_whitelist:
-                return None
-
-        # This will always be set by the time Twisted calls us.
-        assert request.args is not None
-
-        if b"user_id" in request.args:
-            effective_user_id = request.args[b"user_id"][0].decode("utf8")
-            await self.validate_appservice_can_control_user_id(
-                app_service, effective_user_id
-            )
-        else:
-            effective_user_id = app_service.sender
-
-        effective_device_id: Optional[str] = None
-
-        if (
-            self.hs.config.experimental.msc3202_device_masquerading_enabled
-            and DEVICE_ID_ARG_NAME in request.args
-        ):
-            effective_device_id = request.args[DEVICE_ID_ARG_NAME][0].decode("utf8")
-            # We only just set this so it can't be None!
-            assert effective_device_id is not None
-            device_opt = await self.store.get_device(
-                effective_user_id, effective_device_id
-            )
-            if device_opt is None:
-                # For now, use 400 M_EXCLUSIVE if the device doesn't exist.
-                # This is an open thread of discussion on MSC3202 as of 2021-12-09.
-                raise AuthError(
-                    400,
-                    f"Application service trying to use a device that doesn't exist ('{effective_device_id}' for {effective_user_id})",
-                    Codes.EXCLUSIVE,
-                )
-
-        return create_requester(
-            effective_user_id, app_service=app_service, device_id=effective_device_id
-        )
-
-    async def get_user_by_access_token(
-        self,
-        token: str,
-        allow_expired: bool = False,
-    ) -> Requester:
-        """Validate access token and get user_id from it
-
-        Args:
-            token: The access token to get the user by
-            allow_expired: If False, raises an InvalidClientTokenError
-                if the token is expired
-
-        Raises:
-            InvalidClientTokenError if a user by that token exists, but the token is
-                expired
-            InvalidClientCredentialsError if no user by that token exists or the token
-                is invalid
-        """
-
-        # First look in the database to see if the access token is present
-        # as an opaque token.
-        user_info = await self.store.get_user_by_access_token(token)
-        if user_info:
-            valid_until_ms = user_info.valid_until_ms
-            if (
-                not allow_expired
-                and valid_until_ms is not None
-                and valid_until_ms < self.clock.time_msec()
-            ):
-                # there was a valid access token, but it has expired.
-                # soft-logout the user.
-                raise InvalidClientTokenError(
-                    msg="Access token has expired", soft_logout=True
-                )
-
-            # Mark the token as used. This is used to invalidate old refresh
-            # tokens after some time.
-            await self.store.mark_access_token_as_used(user_info.token_id)
-
-            requester = create_requester(
-                user_id=user_info.user_id,
-                access_token_id=user_info.token_id,
-                is_guest=user_info.is_guest,
-                shadow_banned=user_info.shadow_banned,
-                device_id=user_info.device_id,
-                authenticated_entity=user_info.token_owner,
-            )
-
-            return requester
-
-        # If the token isn't found in the database, then it could still be a
-        # macaroon for a guest, so we check that here.
-        try:
-            user_id = self._macaroon_generator.verify_guest_token(token)
-
-            # Guest access tokens are not stored in the database (there can
-            # only be one access token per guest, anyway).
-            #
-            # In order to prevent guest access tokens being used as regular
-            # user access tokens (and hence getting around the invalidation
-            # process), we look up the user id and check that it is indeed
-            # a guest user.
-            #
-            # It would of course be much easier to store guest access
-            # tokens in the database as well, but that would break existing
-            # guest tokens.
-            stored_user = await self.store.get_user_by_id(user_id)
-            if not stored_user:
-                raise InvalidClientTokenError("Unknown user_id %s" % user_id)
-            if not stored_user["is_guest"]:
-                raise InvalidClientTokenError(
-                    "Guest access token used for regular user"
-                )
-
-            return create_requester(
-                user_id=user_id,
-                is_guest=True,
-                # all guests get the same device id
-                device_id=GUEST_DEVICE_ID,
-                authenticated_entity=user_id,
-            )
-        except (
-            pymacaroons.exceptions.MacaroonException,
-            TypeError,
-            ValueError,
-        ) as e:
-            logger.warning(
-                "Invalid access token in auth: %s %s.",
-                type(e),
-                e,
-            )
-            raise InvalidClientTokenError("Invalid access token passed.")
-
-    async def is_server_admin(self, requester: Requester) -> bool:
-        """Check if the given user is a local server admin.
-
-        Args:
-            requester: The user making the request, according to the access token.
-
-        Returns:
-            True if the user is an admin
-        """
-        return await self.store.is_server_admin(requester.user)
-
-    async def check_can_change_room_list(
-        self, room_id: str, requester: Requester
-    ) -> bool:
-        """Determine whether the user is allowed to edit the room's entry in the
-        published room list.
-
-        Args:
-            room_id: The room to check.
-            requester: The user making the request, according to the access token.
-        """
-
-        is_admin = await self.is_server_admin(requester)
-        if is_admin:
-            return True
-
-        await self.check_user_in_room(room_id, requester)
-
-        # We currently require the user is a "moderator" in the room. We do this
-        # by checking if they would (theoretically) be able to change the
-        # m.room.canonical_alias events
-
-        power_level_event = (
-            await self._storage_controllers.state.get_current_state_event(
-                room_id, EventTypes.PowerLevels, ""
-            )
-        )
-
-        auth_events = {}
-        if power_level_event:
-            auth_events[(EventTypes.PowerLevels, "")] = power_level_event
-
-        send_level = event_auth.get_send_level(
-            EventTypes.CanonicalAlias, "", power_level_event
-        )
-        user_level = event_auth.get_user_power_level(
-            requester.user.to_string(), auth_events
-        )
-
-        return user_level >= send_level
-
-    @staticmethod
-    def has_access_token(request: Request) -> bool:
-        """Checks if the request has an access_token.
-
-        Returns:
-            False if no access_token was given, True otherwise.
-        """
-        # This will always be set by the time Twisted calls us.
-        assert request.args is not None
-
-        query_params = request.args.get(b"access_token")
-        auth_headers = request.requestHeaders.getRawHeaders(b"Authorization")
-        return bool(query_params) or bool(auth_headers)
-
-    @staticmethod
-    @cancellable
-    def get_access_token_from_request(request: Request) -> str:
-        """Extracts the access_token from the request.
-
-        Args:
-            request: The http request.
-        Returns:
-            The access_token
-        Raises:
-            MissingClientTokenError: If there isn't a single access_token in the
-                request
-        """
-        # This will always be set by the time Twisted calls us.
-        assert request.args is not None
-
-        auth_headers = request.requestHeaders.getRawHeaders(b"Authorization")
-        query_params = request.args.get(b"access_token")
-        if auth_headers:
-            # Try the get the access_token from a "Authorization: Bearer"
-            # header
-            if query_params is not None:
-                raise MissingClientTokenError(
-                    "Mixing Authorization headers and access_token query parameters."
-                )
-            if len(auth_headers) > 1:
-                raise MissingClientTokenError("Too many Authorization headers.")
-            parts = auth_headers[0].split(b" ")
-            if parts[0] == b"Bearer" and len(parts) == 2:
-                return parts[1].decode("ascii")
-            else:
-                raise MissingClientTokenError("Invalid Authorization header.")
-        else:
-            # Try to get the access_token from the query params.
-            if not query_params:
-                raise MissingClientTokenError()
-
-            return query_params[0].decode("ascii")
-
-    @trace
-    async def check_user_in_room_or_world_readable(
-        self, room_id: str, requester: Requester, allow_departed_users: bool = False
-    ) -> Tuple[str, Optional[str]]:
-        """Checks that the user is or was in the room or the room is world
-        readable. If it isn't then an exception is raised.
-
-        Args:
-            room_id: The room to check.
-            requester: The user making the request, according to the access token.
-            allow_departed_users: If True, accept users that were previously
-                members but have now departed.
-
-        Returns:
-            Resolves to the current membership of the user in the room and the
-            membership event ID of the user. If the user is not in the room and
-            never has been, then `(Membership.JOIN, None)` is returned.
-        """
-
-        try:
-            # check_user_in_room will return the most recent membership
-            # event for the user if:
-            #  * The user is a non-guest user, and was ever in the room
-            #  * The user is a guest user, and has joined the room
-            # else it will throw.
-            return await self.check_user_in_room(
-                room_id, requester, allow_departed_users=allow_departed_users
-            )
-        except AuthError:
-            visibility = await self._storage_controllers.state.get_current_state_event(
-                room_id, EventTypes.RoomHistoryVisibility, ""
-            )
-            if (
-                visibility
-                and visibility.content.get("history_visibility")
-                == HistoryVisibility.WORLD_READABLE
-            ):
-                return Membership.JOIN, None
-            raise UnstableSpecAuthError(
-                403,
-                "User %s not in room %s, and room previews are disabled"
-                % (requester.user, room_id),
-                errcode=Codes.NOT_JOINED,
-            )
diff --git a/synapse/api/auth/__init__.py b/synapse/api/auth/__init__.py
new file mode 100644
index 0000000000..90cfe39d76
--- /dev/null
+++ b/synapse/api/auth/__init__.py
@@ -0,0 +1,175 @@
+# Copyright 2023 The Matrix.org Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, Tuple
+
+from typing_extensions import Protocol
+
+from twisted.web.server import Request
+
+from synapse.appservice import ApplicationService
+from synapse.http.site import SynapseRequest
+from synapse.types import Requester
+
+# guests always get this device id.
+GUEST_DEVICE_ID = "guest_device"
+
+
+class Auth(Protocol):
+    """The interface that an auth provider must implement."""
+
+    async def check_user_in_room(
+        self,
+        room_id: str,
+        requester: Requester,
+        allow_departed_users: bool = False,
+    ) -> Tuple[str, Optional[str]]:
+        """Check if the user is in the room, or was at some point.
+        Args:
+            room_id: The room to check.
+
+            user_id: The user to check.
+
+            current_state: Optional map of the current state of the room.
+                If provided then that map is used to check whether they are a
+                member of the room. Otherwise the current membership is
+                loaded from the database.
+
+            allow_departed_users: if True, accept users that were previously
+                members but have now departed.
+
+        Raises:
+            AuthError if the user is/was not in the room.
+        Returns:
+            The current membership of the user in the room and the
+            membership event ID of the user.
+        """
+
+    async def get_user_by_req(
+        self,
+        request: SynapseRequest,
+        allow_guest: bool = False,
+        allow_expired: bool = False,
+    ) -> Requester:
+        """Get a registered user's ID.
+
+        Args:
+            request: An HTTP request with an access_token query parameter.
+            allow_guest: If False, will raise an AuthError if the user making the
+                request is a guest.
+            allow_expired: If True, allow the request through even if the account
+                is expired, or session token lifetime has ended. Note that
+                /login will deliver access tokens regardless of expiration.
+
+        Returns:
+            Resolves to the requester
+        Raises:
+            InvalidClientCredentialsError if no user by that token exists or the token
+                is invalid.
+            AuthError if access is denied for the user in the access token
+        """
+
+    async def validate_appservice_can_control_user_id(
+        self, app_service: ApplicationService, user_id: str
+    ) -> None:
+        """Validates that the app service is allowed to control
+        the given user.
+
+        Args:
+            app_service: The app service that controls the user
+            user_id: The author MXID that the app service is controlling
+
+        Raises:
+            AuthError: If the application service is not allowed to control the user
+                (user namespace regex does not match, wrong homeserver, etc)
+                or if the user has not been registered yet.
+        """
+
+    async def get_user_by_access_token(
+        self,
+        token: str,
+        allow_expired: bool = False,
+    ) -> Requester:
+        """Validate access token and get user_id from it
+
+        Args:
+            token: The access token to get the user by
+            allow_expired: If False, raises an InvalidClientTokenError
+                if the token is expired
+
+        Raises:
+            InvalidClientTokenError if a user by that token exists, but the token is
+                expired
+            InvalidClientCredentialsError if no user by that token exists or the token
+                is invalid
+        """
+
+    async def is_server_admin(self, requester: Requester) -> bool:
+        """Check if the given user is a local server admin.
+
+        Args:
+            requester: user to check
+
+        Returns:
+            True if the user is an admin
+        """
+
+    async def check_can_change_room_list(
+        self, room_id: str, requester: Requester
+    ) -> bool:
+        """Determine whether the user is allowed to edit the room's entry in the
+        published room list.
+
+        Args:
+            room_id
+            user
+        """
+
+    @staticmethod
+    def has_access_token(request: Request) -> bool:
+        """Checks if the request has an access_token.
+
+        Returns:
+            False if no access_token was given, True otherwise.
+        """
+
+    @staticmethod
+    def get_access_token_from_request(request: Request) -> str:
+        """Extracts the access_token from the request.
+
+        Args:
+            request: The http request.
+        Returns:
+            The access_token
+        Raises:
+            MissingClientTokenError: If there isn't a single access_token in the
+                request
+        """
+
+    async def check_user_in_room_or_world_readable(
+        self, room_id: str, requester: Requester, allow_departed_users: bool = False
+    ) -> Tuple[str, Optional[str]]:
+        """Checks that the user is or was in the room or the room is world
+        readable. If it isn't then an exception is raised.
+
+        Args:
+            room_id: room to check
+            user_id: user to check
+            allow_departed_users: if True, accept users that were previously
+                members but have now departed
+
+        Returns:
+            Resolves to the current membership of the user in the room and the
+            membership event ID of the user. If the user is not in the room and
+            never has been, then `(Membership.JOIN, None)` is returned.
+        """
diff --git a/synapse/api/auth/base.py b/synapse/api/auth/base.py
new file mode 100644
index 0000000000..240f2b90de
--- /dev/null
+++ b/synapse/api/auth/base.py
@@ -0,0 +1,273 @@
+# Copyright 2023 The Matrix.org Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Optional, Tuple
+
+from twisted.web.server import Request
+
+from synapse import event_auth
+from synapse.api.constants import EventTypes, HistoryVisibility, Membership
+from synapse.api.errors import (
+    AuthError,
+    Codes,
+    MissingClientTokenError,
+    UnstableSpecAuthError,
+)
+from synapse.appservice import ApplicationService
+from synapse.logging.opentracing import trace
+from synapse.types import Requester
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class BaseAuth:
+    """Common base class for all auth implementations."""
+
+    def __init__(self, hs: "HomeServer"):
+        self.hs = hs
+        self.store = hs.get_datastores().main
+        self._storage_controllers = hs.get_storage_controllers()
+
+    async def check_user_in_room(
+        self,
+        room_id: str,
+        requester: Requester,
+        allow_departed_users: bool = False,
+    ) -> Tuple[str, Optional[str]]:
+        """Check if the user is in the room, or was at some point.
+        Args:
+            room_id: The room to check.
+
+            requester: The user making the request, according to the access token.
+
+            current_state: Optional map of the current state of the room.
+                If provided then that map is used to check whether they are a
+                member of the room. Otherwise the current membership is
+                loaded from the database.
+
+            allow_departed_users: if True, accept users that were previously
+                members but have now departed.
+
+        Raises:
+            AuthError if the user is/was not in the room.
+        Returns:
+            The current membership of the user in the room and the
+            membership event ID of the user.
+        """
+
+        user_id = requester.user.to_string()
+        (
+            membership,
+            member_event_id,
+        ) = await self.store.get_local_current_membership_for_user_in_room(
+            user_id=user_id,
+            room_id=room_id,
+        )
+
+        if membership:
+            if membership == Membership.JOIN:
+                return membership, member_event_id
+
+            # XXX this looks totally bogus. Why do we not allow users who have been banned,
+            # or those who were members previously and have been re-invited?
+            if allow_departed_users and membership == Membership.LEAVE:
+                forgot = await self.store.did_forget(user_id, room_id)
+                if not forgot:
+                    return membership, member_event_id
+        raise UnstableSpecAuthError(
+            403,
+            "User %s not in room %s" % (user_id, room_id),
+            errcode=Codes.NOT_JOINED,
+        )
+
+    @trace
+    async def check_user_in_room_or_world_readable(
+        self, room_id: str, requester: Requester, allow_departed_users: bool = False
+    ) -> Tuple[str, Optional[str]]:
+        """Checks that the user is or was in the room or the room is world
+        readable. If it isn't then an exception is raised.
+
+        Args:
+            room_id: room to check
+            user_id: user to check
+            allow_departed_users: if True, accept users that were previously
+                members but have now departed
+
+        Returns:
+            Resolves to the current membership of the user in the room and the
+            membership event ID of the user. If the user is not in the room and
+            never has been, then `(Membership.JOIN, None)` is returned.
+        """
+
+        try:
+            # check_user_in_room will return the most recent membership
+            # event for the user if:
+            #  * The user is a non-guest user, and was ever in the room
+            #  * The user is a guest user, and has joined the room
+            # else it will throw.
+            return await self.check_user_in_room(
+                room_id, requester, allow_departed_users=allow_departed_users
+            )
+        except AuthError:
+            visibility = await self._storage_controllers.state.get_current_state_event(
+                room_id, EventTypes.RoomHistoryVisibility, ""
+            )
+            if (
+                visibility
+                and visibility.content.get("history_visibility")
+                == HistoryVisibility.WORLD_READABLE
+            ):
+                return Membership.JOIN, None
+            raise AuthError(
+                403,
+                "User %r not in room %s, and room previews are disabled"
+                % (requester.user, room_id),
+            )
+
+    async def validate_appservice_can_control_user_id(
+        self, app_service: ApplicationService, user_id: str
+    ) -> None:
+        """Validates that the app service is allowed to control
+        the given user.
+
+        Args:
+            app_service: The app service that controls the user
+            user_id: The author MXID that the app service is controlling
+
+        Raises:
+            AuthError: If the application service is not allowed to control the user
+                (user namespace regex does not match, wrong homeserver, etc)
+                or if the user has not been registered yet.
+        """
+
+        # It's ok if the app service is trying to use the sender from their registration
+        if app_service.sender == user_id:
+            pass
+        # Check to make sure the app service is allowed to control the user
+        elif not app_service.is_interested_in_user(user_id):
+            raise AuthError(
+                403,
+                "Application service cannot masquerade as this user (%s)." % user_id,
+            )
+        # Check to make sure the user is already registered on the homeserver
+        elif not (await self.store.get_user_by_id(user_id)):
+            raise AuthError(
+                403, "Application service has not registered this user (%s)" % user_id
+            )
+
+    async def is_server_admin(self, requester: Requester) -> bool:
+        """Check if the given user is a local server admin.
+
+        Args:
+            requester: user to check
+
+        Returns:
+            True if the user is an admin
+        """
+        raise NotImplementedError()
+
+    async def check_can_change_room_list(
+        self, room_id: str, requester: Requester
+    ) -> bool:
+        """Determine whether the user is allowed to edit the room's entry in the
+        published room list.
+
+        Args:
+            room_id
+            user
+        """
+
+        is_admin = await self.is_server_admin(requester)
+        if is_admin:
+            return True
+
+        await self.check_user_in_room(room_id, requester)
+
+        # We currently require the user is a "moderator" in the room. We do this
+        # by checking if they would (theoretically) be able to change the
+        # m.room.canonical_alias events
+
+        power_level_event = (
+            await self._storage_controllers.state.get_current_state_event(
+                room_id, EventTypes.PowerLevels, ""
+            )
+        )
+
+        auth_events = {}
+        if power_level_event:
+            auth_events[(EventTypes.PowerLevels, "")] = power_level_event
+
+        send_level = event_auth.get_send_level(
+            EventTypes.CanonicalAlias, "", power_level_event
+        )
+        user_level = event_auth.get_user_power_level(
+            requester.user.to_string(), auth_events
+        )
+
+        return user_level >= send_level
+
+    @staticmethod
+    def has_access_token(request: Request) -> bool:
+        """Checks if the request has an access_token.
+
+        Returns:
+            False if no access_token was given, True otherwise.
+        """
+        # This will always be set by the time Twisted calls us.
+        assert request.args is not None
+
+        query_params = request.args.get(b"access_token")
+        auth_headers = request.requestHeaders.getRawHeaders(b"Authorization")
+        return bool(query_params) or bool(auth_headers)
+
+    @staticmethod
+    def get_access_token_from_request(request: Request) -> str:
+        """Extracts the access_token from the request.
+
+        Args:
+            request: The http request.
+        Returns:
+            The access_token
+        Raises:
+            MissingClientTokenError: If there isn't a single access_token in the
+                request
+        """
+        # This will always be set by the time Twisted calls us.
+        assert request.args is not None
+
+        auth_headers = request.requestHeaders.getRawHeaders(b"Authorization")
+        query_params = request.args.get(b"access_token")
+        if auth_headers:
+            # Try the get the access_token from a "Authorization: Bearer"
+            # header
+            if query_params is not None:
+                raise MissingClientTokenError(
+                    "Mixing Authorization headers and access_token query parameters."
+                )
+            if len(auth_headers) > 1:
+                raise MissingClientTokenError("Too many Authorization headers.")
+            parts = auth_headers[0].split(b" ")
+            if parts[0] == b"Bearer" and len(parts) == 2:
+                return parts[1].decode("ascii")
+            else:
+                raise MissingClientTokenError("Invalid Authorization header.")
+        else:
+            # Try to get the access_token from the query params.
+            if not query_params:
+                raise MissingClientTokenError()
+
+            return query_params[0].decode("ascii")
diff --git a/synapse/api/auth/internal.py b/synapse/api/auth/internal.py
new file mode 100644
index 0000000000..813d537e53
--- /dev/null
+++ b/synapse/api/auth/internal.py
@@ -0,0 +1,369 @@
+# Copyright 2023 The Matrix.org Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Optional
+
+import pymacaroons
+from netaddr import IPAddress
+
+from twisted.web.server import Request
+
+from synapse.api.errors import (
+    AuthError,
+    Codes,
+    InvalidClientTokenError,
+    MissingClientTokenError,
+)
+from synapse.http import get_request_user_agent
+from synapse.http.site import SynapseRequest
+from synapse.logging.opentracing import active_span, force_tracing, start_active_span
+from synapse.types import Requester, create_requester
+from synapse.util.cancellation import cancellable
+
+from . import GUEST_DEVICE_ID
+from .base import BaseAuth
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class InternalAuth(BaseAuth):
+    """
+    This class contains functions for authenticating users of our client-server API.
+    """
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+        self.clock = hs.get_clock()
+        self._account_validity_handler = hs.get_account_validity_handler()
+        self._macaroon_generator = hs.get_macaroon_generator()
+
+        self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips
+        self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips
+        self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users
+
+    @cancellable
+    async def get_user_by_req(
+        self,
+        request: SynapseRequest,
+        allow_guest: bool = False,
+        allow_expired: bool = False,
+    ) -> Requester:
+        """Get a registered user's ID.
+
+        Args:
+            request: An HTTP request with an access_token query parameter.
+            allow_guest: If False, will raise an AuthError if the user making the
+                request is a guest.
+            allow_expired: If True, allow the request through even if the account
+                is expired, or session token lifetime has ended. Note that
+                /login will deliver access tokens regardless of expiration.
+
+        Returns:
+            Resolves to the requester
+        Raises:
+            InvalidClientCredentialsError if no user by that token exists or the token
+                is invalid.
+            AuthError if access is denied for the user in the access token
+        """
+        parent_span = active_span()
+        with start_active_span("get_user_by_req"):
+            requester = await self._wrapped_get_user_by_req(
+                request, allow_guest, allow_expired
+            )
+
+            if parent_span:
+                if requester.authenticated_entity in self._force_tracing_for_users:
+                    # request tracing is enabled for this user, so we need to force it
+                    # tracing on for the parent span (which will be the servlet span).
+                    #
+                    # It's too late for the get_user_by_req span to inherit the setting,
+                    # so we also force it on for that.
+                    force_tracing()
+                    force_tracing(parent_span)
+                parent_span.set_tag(
+                    "authenticated_entity", requester.authenticated_entity
+                )
+                parent_span.set_tag("user_id", requester.user.to_string())
+                if requester.device_id is not None:
+                    parent_span.set_tag("device_id", requester.device_id)
+                if requester.app_service is not None:
+                    parent_span.set_tag("appservice_id", requester.app_service.id)
+            return requester
+
+    @cancellable
+    async def _wrapped_get_user_by_req(
+        self,
+        request: SynapseRequest,
+        allow_guest: bool,
+        allow_expired: bool,
+    ) -> Requester:
+        """Helper for get_user_by_req
+
+        Once get_user_by_req has set up the opentracing span, this does the actual work.
+        """
+        try:
+            ip_addr = request.getClientAddress().host
+            user_agent = get_request_user_agent(request)
+
+            access_token = self.get_access_token_from_request(request)
+
+            # First check if it could be a request from an appservice
+            requester = await self._get_appservice_user(request)
+            if not requester:
+                # If not, it should be from a regular user
+                requester = await self.get_user_by_access_token(
+                    access_token, allow_expired=allow_expired
+                )
+
+                # Deny the request if the user account has expired.
+                # This check is only done for regular users, not appservice ones.
+                if not allow_expired:
+                    if await self._account_validity_handler.is_user_expired(
+                        requester.user.to_string()
+                    ):
+                        # Raise the error if either an account validity module has determined
+                        # the account has expired, or the legacy account validity
+                        # implementation is enabled and determined the account has expired
+                        raise AuthError(
+                            403,
+                            "User account has expired",
+                            errcode=Codes.EXPIRED_ACCOUNT,
+                        )
+
+            if ip_addr and (
+                not requester.app_service or self._track_appservice_user_ips
+            ):
+                # XXX(quenting): I'm 95% confident that we could skip setting the
+                # device_id to "dummy-device" for appservices, and that the only impact
+                # would be some rows which whould not deduplicate in the 'user_ips'
+                # table during the transition
+                recorded_device_id = (
+                    "dummy-device"
+                    if requester.device_id is None and requester.app_service is not None
+                    else requester.device_id
+                )
+                await self.store.insert_client_ip(
+                    user_id=requester.authenticated_entity,
+                    access_token=access_token,
+                    ip=ip_addr,
+                    user_agent=user_agent,
+                    device_id=recorded_device_id,
+                )
+
+                # Track also the puppeted user client IP if enabled and the user is puppeting
+                if (
+                    requester.user.to_string() != requester.authenticated_entity
+                    and self._track_puppeted_user_ips
+                ):
+                    await self.store.insert_client_ip(
+                        user_id=requester.user.to_string(),
+                        access_token=access_token,
+                        ip=ip_addr,
+                        user_agent=user_agent,
+                        device_id=requester.device_id,
+                    )
+
+            if requester.is_guest and not allow_guest:
+                raise AuthError(
+                    403,
+                    "Guest access not allowed",
+                    errcode=Codes.GUEST_ACCESS_FORBIDDEN,
+                )
+
+            request.requester = requester
+            return requester
+        except KeyError:
+            raise MissingClientTokenError()
+
+    @cancellable
+    async def _get_appservice_user(self, request: Request) -> Optional[Requester]:
+        """
+        Given a request, reads the request parameters to determine:
+        - whether it's an application service that's making this request
+        - what user the application service should be treated as controlling
+          (the user_id URI parameter allows an application service to masquerade
+          any applicable user in its namespace)
+        - what device the application service should be treated as controlling
+          (the device_id[^1] URI parameter allows an application service to masquerade
+          as any device that exists for the relevant user)
+
+        [^1] Unstable and provided by MSC3202.
+             Must use `org.matrix.msc3202.device_id` in place of `device_id` for now.
+
+        Returns:
+            the application service `Requester` of that request
+
+        Postconditions:
+        - The `app_service` field in the returned `Requester` is set
+        - The `user_id` field in the returned `Requester` is either the application
+          service sender or the controlled user set by the `user_id` URI parameter
+        - The returned application service is permitted to control the returned user ID.
+        - The returned device ID, if present, has been checked to be a valid device ID
+          for the returned user ID.
+        """
+        DEVICE_ID_ARG_NAME = b"org.matrix.msc3202.device_id"
+
+        app_service = self.store.get_app_service_by_token(
+            self.get_access_token_from_request(request)
+        )
+        if app_service is None:
+            return None
+
+        if app_service.ip_range_whitelist:
+            ip_address = IPAddress(request.getClientAddress().host)
+            if ip_address not in app_service.ip_range_whitelist:
+                return None
+
+        # This will always be set by the time Twisted calls us.
+        assert request.args is not None
+
+        if b"user_id" in request.args:
+            effective_user_id = request.args[b"user_id"][0].decode("utf8")
+            await self.validate_appservice_can_control_user_id(
+                app_service, effective_user_id
+            )
+        else:
+            effective_user_id = app_service.sender
+
+        effective_device_id: Optional[str] = None
+
+        if (
+            self.hs.config.experimental.msc3202_device_masquerading_enabled
+            and DEVICE_ID_ARG_NAME in request.args
+        ):
+            effective_device_id = request.args[DEVICE_ID_ARG_NAME][0].decode("utf8")
+            # We only just set this so it can't be None!
+            assert effective_device_id is not None
+            device_opt = await self.store.get_device(
+                effective_user_id, effective_device_id
+            )
+            if device_opt is None:
+                # For now, use 400 M_EXCLUSIVE if the device doesn't exist.
+                # This is an open thread of discussion on MSC3202 as of 2021-12-09.
+                raise AuthError(
+                    400,
+                    f"Application service trying to use a device that doesn't exist ('{effective_device_id}' for {effective_user_id})",
+                    Codes.EXCLUSIVE,
+                )
+
+        return create_requester(
+            effective_user_id, app_service=app_service, device_id=effective_device_id
+        )
+
+    async def get_user_by_access_token(
+        self,
+        token: str,
+        allow_expired: bool = False,
+    ) -> Requester:
+        """Validate access token and get user_id from it
+
+        Args:
+            token: The access token to get the user by
+            allow_expired: If False, raises an InvalidClientTokenError
+                if the token is expired
+
+        Raises:
+            InvalidClientTokenError if a user by that token exists, but the token is
+                expired
+            InvalidClientCredentialsError if no user by that token exists or the token
+                is invalid
+        """
+
+        # First look in the database to see if the access token is present
+        # as an opaque token.
+        user_info = await self.store.get_user_by_access_token(token)
+        if user_info:
+            valid_until_ms = user_info.valid_until_ms
+            if (
+                not allow_expired
+                and valid_until_ms is not None
+                and valid_until_ms < self.clock.time_msec()
+            ):
+                # there was a valid access token, but it has expired.
+                # soft-logout the user.
+                raise InvalidClientTokenError(
+                    msg="Access token has expired", soft_logout=True
+                )
+
+            # Mark the token as used. This is used to invalidate old refresh
+            # tokens after some time.
+            await self.store.mark_access_token_as_used(user_info.token_id)
+
+            requester = create_requester(
+                user_id=user_info.user_id,
+                access_token_id=user_info.token_id,
+                is_guest=user_info.is_guest,
+                shadow_banned=user_info.shadow_banned,
+                device_id=user_info.device_id,
+                authenticated_entity=user_info.token_owner,
+            )
+
+            return requester
+
+        # If the token isn't found in the database, then it could still be a
+        # macaroon for a guest, so we check that here.
+        try:
+            user_id = self._macaroon_generator.verify_guest_token(token)
+
+            # Guest access tokens are not stored in the database (there can
+            # only be one access token per guest, anyway).
+            #
+            # In order to prevent guest access tokens being used as regular
+            # user access tokens (and hence getting around the invalidation
+            # process), we look up the user id and check that it is indeed
+            # a guest user.
+            #
+            # It would of course be much easier to store guest access
+            # tokens in the database as well, but that would break existing
+            # guest tokens.
+            stored_user = await self.store.get_user_by_id(user_id)
+            if not stored_user:
+                raise InvalidClientTokenError("Unknown user_id %s" % user_id)
+            if not stored_user["is_guest"]:
+                raise InvalidClientTokenError(
+                    "Guest access token used for regular user"
+                )
+
+            return create_requester(
+                user_id=user_id,
+                is_guest=True,
+                # all guests get the same device id
+                device_id=GUEST_DEVICE_ID,
+                authenticated_entity=user_id,
+            )
+        except (
+            pymacaroons.exceptions.MacaroonException,
+            TypeError,
+            ValueError,
+        ) as e:
+            logger.warning(
+                "Invalid access token in auth: %s %s.",
+                type(e),
+                e,
+            )
+            raise InvalidClientTokenError("Invalid access token passed.")
+
+    async def is_server_admin(self, requester: Requester) -> bool:
+        """Check if the given user is a local server admin.
+
+        Args:
+            requester: The user making the request, according to the access token.
+
+        Returns:
+            True if the user is an admin
+        """
+        return await self.store.is_server_admin(requester.user)
diff --git a/synapse/server.py b/synapse/server.py
index cce5fb66ff..df88af12a9 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -31,6 +31,7 @@ from twisted.web.iweb import IPolicyForHTTPS
 from twisted.web.resource import Resource
 
 from synapse.api.auth import Auth
+from synapse.api.auth.internal import InternalAuth
 from synapse.api.auth_blocking import AuthBlocking
 from synapse.api.filtering import Filtering
 from synapse.api.ratelimiting import Ratelimiter, RequestRatelimiter
@@ -427,7 +428,7 @@ class HomeServer(metaclass=abc.ABCMeta):
 
     @cache_in_self
     def get_auth(self) -> Auth:
-        return Auth(self)
+        return InternalAuth(self)
 
     @cache_in_self
     def get_auth_blocking(self) -> AuthBlocking:
diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py
index 6e36e73f0d..3dac52d178 100644
--- a/tests/api/test_auth.py
+++ b/tests/api/test_auth.py
@@ -18,7 +18,7 @@ import pymacaroons
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.auth import Auth
+from synapse.api.auth.internal import InternalAuth
 from synapse.api.auth_blocking import AuthBlocking
 from synapse.api.constants import UserTypes
 from synapse.api.errors import (
@@ -48,7 +48,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         # have been called by the HomeserverTestCase machinery.
         hs.datastores.main = self.store  # type: ignore[union-attr]
         hs.get_auth_handler().store = self.store
-        self.auth = Auth(hs)
+        self.auth = InternalAuth(hs)
 
         # AuthBlocking reads from the hs' config on initialization. We need to
         # modify its config instead of the hs'
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index 73822b07a5..8d8584609b 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -17,7 +17,7 @@ from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.auth import Auth
+from synapse.api.auth.internal import InternalAuth
 from synapse.api.constants import UserTypes
 from synapse.api.errors import (
     CodeMessageException,
@@ -683,7 +683,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         request = Mock(args={})
         request.args[b"access_token"] = [token.encode("ascii")]
         request.requestHeaders.getRawHeaders = mock_getRawHeaders()
-        auth = Auth(self.hs)
+        auth = InternalAuth(self.hs)
         requester = self.get_success(auth.get_user_by_req(request))
 
         self.assertTrue(requester.shadow_banned)
diff --git a/tests/test_state.py b/tests/test_state.py
index ddf59916b1..7a49b87953 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -28,7 +28,7 @@ from unittest.mock import Mock
 
 from twisted.internet import defer
 
-from synapse.api.auth import Auth
+from synapse.api.auth.internal import InternalAuth
 from synapse.api.constants import EventTypes, Membership
 from synapse.api.room_versions import RoomVersions
 from synapse.events import EventBase, make_event_from_dict
@@ -240,7 +240,7 @@ class StateTestCase(unittest.TestCase):
         hs.get_macaroon_generator.return_value = MacaroonGenerator(
             clock, "tesths", b"verysecret"
         )
-        hs.get_auth.return_value = Auth(hs)
+        hs.get_auth.return_value = InternalAuth(hs)
         hs.get_state_resolution_handler = lambda: StateResolutionHandler(hs)
         hs.get_storage_controllers.return_value = storage_controllers
 
-- 
cgit 1.5.1


From 765244faeef9e20c573d2c7935f05f76aeca1c28 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 13 Sep 2022 17:54:32 +0200
Subject: Initial MSC3964 support: delegation of auth to OIDC server

---
 synapse/api/auth/oauth_delegated.py | 227 ++++++++++++++++++++++++++++++++++++
 synapse/config/auth.py              |  30 ++++-
 synapse/server.py                   |   4 +
 3 files changed, 260 insertions(+), 1 deletion(-)
 create mode 100644 synapse/api/auth/oauth_delegated.py

(limited to 'synapse')

diff --git a/synapse/api/auth/oauth_delegated.py b/synapse/api/auth/oauth_delegated.py
new file mode 100644
index 0000000000..b3b5c29a94
--- /dev/null
+++ b/synapse/api/auth/oauth_delegated.py
@@ -0,0 +1,227 @@
+# Copyright 2023 The Matrix.org Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from urllib.parse import urlencode
+
+from authlib.oauth2 import ClientAuth
+from authlib.oauth2.auth import encode_client_secret_basic, encode_client_secret_post
+from authlib.oauth2.rfc7523 import ClientSecretJWT, PrivateKeyJWT, private_key_jwt_sign
+from authlib.oauth2.rfc7662 import IntrospectionToken
+from authlib.oidc.discovery import OpenIDProviderMetadata, get_well_known_url
+
+from twisted.web.client import readBody
+from twisted.web.http_headers import Headers
+
+from synapse.api.auth.base import BaseAuth
+from synapse.api.errors import AuthError, StoreError
+from synapse.http.site import SynapseRequest
+from synapse.logging.context import make_deferred_yieldable
+from synapse.types import Requester, UserID, create_requester
+from synapse.util import json_decoder
+from synapse.util.caches.cached_call import RetryOnExceptionCachedCall
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+def scope_to_list(scope: str) -> List[str]:
+    """Convert a scope string to a list of scope tokens"""
+    return scope.strip().split(" ")
+
+
+class PrivateKeyJWTWithKid(PrivateKeyJWT):
+    """An implementation of the private_key_jwt client auth method that includes a kid header.
+
+    This is needed because some providers (Keycloak) require the kid header to figure
+    out which key to use to verify the signature.
+    """
+
+    def sign(self, auth: Any, token_endpoint: str) -> bytes:
+        return private_key_jwt_sign(
+            auth.client_secret,
+            client_id=auth.client_id,
+            token_endpoint=token_endpoint,
+            claims=self.claims,
+            header={"kid": auth.client_secret["kid"]},
+        )
+
+
+class OAuthDelegatedAuth(BaseAuth):
+    AUTH_METHODS = {
+        "client_secret_post": encode_client_secret_post,
+        "client_secret_basic": encode_client_secret_basic,
+        "client_secret_jwt": ClientSecretJWT(),
+        "private_key_jwt": PrivateKeyJWTWithKid(),
+    }
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        self._config = hs.config.auth
+        assert self._config.oauth_delegation_enabled, "OAuth delegation is not enabled"
+        assert self._config.oauth_delegation_issuer, "No issuer provided"
+        assert self._config.oauth_delegation_client_id, "No client_id provided"
+        assert self._config.oauth_delegation_client_secret, "No client_secret provided"
+        assert (
+            self._config.oauth_delegation_client_auth_method
+            in OAuthDelegatedAuth.AUTH_METHODS
+        ), "Invalid client_auth_method"
+
+        self._http_client = hs.get_proxied_http_client()
+        self._hostname = hs.hostname
+
+        self._issuer_metadata = RetryOnExceptionCachedCall(self._load_metadata)
+        secret = self._config.oauth_delegation_client_secret
+        self._client_auth = ClientAuth(
+            self._config.oauth_delegation_client_id,
+            secret,
+            OAuthDelegatedAuth.AUTH_METHODS[
+                self._config.oauth_delegation_client_auth_method
+            ],
+        )
+
+    async def _load_metadata(self) -> OpenIDProviderMetadata:
+        if self._config.oauth_delegation_issuer_metadata is not None:
+            return OpenIDProviderMetadata(
+                **self._config.oauth_delegation_issuer_metadata
+            )
+        url = get_well_known_url(self._config.oauth_delegation_issuer, external=True)
+        response = await self._http_client.get_json(url)
+        metadata = OpenIDProviderMetadata(**response)
+        # metadata.validate_introspection_endpoint()
+        return metadata
+
+    async def _introspect_token(self, token: str) -> IntrospectionToken:
+        metadata = await self._issuer_metadata.get()
+        introspection_endpoint = metadata.get("introspection_endpoint")
+        raw_headers: Dict[str, str] = {
+            "Content-Type": "application/x-www-form-urlencoded",
+            "User-Agent": str(self._http_client.user_agent, "utf-8"),
+            "Accept": "application/json",
+        }
+
+        args = {"token": token, "token_type_hint": "access_token"}
+        body = urlencode(args, True)
+
+        # Fill the body/headers with credentials
+        uri, raw_headers, body = self._client_auth.prepare(
+            method="POST", uri=introspection_endpoint, headers=raw_headers, body=body
+        )
+        headers = Headers({k: [v] for (k, v) in raw_headers.items()})
+
+        # Do the actual request
+        # We're not using the SimpleHttpClient util methods as we don't want to
+        # check the HTTP status code and we do the body encoding ourself.
+        response = await self._http_client.request(
+            method="POST",
+            uri=uri,
+            data=body.encode("utf-8"),
+            headers=headers,
+        )
+
+        resp_body = await make_deferred_yieldable(readBody(response))
+        # TODO: Let's not worry about 5xx errors & co. for now and just try
+        # decoding that as JSON. We should also do some validation of the
+        # response
+        resp = json_decoder.decode(resp_body.decode("utf-8"))
+        return IntrospectionToken(**resp)
+
+    async def get_user_by_req(
+        self,
+        request: SynapseRequest,
+        allow_guest: bool = False,
+        allow_expired: bool = False,
+    ) -> Requester:
+        access_token = self.get_access_token_from_request(request)
+        return await self.get_user_by_access_token(access_token, allow_expired)
+
+    async def get_user_by_access_token(
+        self,
+        token: str,
+        allow_expired: bool = False,
+    ) -> Requester:
+        introspection_result = await self._introspect_token(token)
+
+        logger.info(f"Introspection result: {introspection_result!r}")
+
+        # TODO: introspection verification should be more extensive, especially:
+        #   - verify the scopes
+        #   - verify the audience
+        if not introspection_result.get("active"):
+            raise AuthError(
+                403,
+                "Invalid access token",
+            )
+
+        # TODO: claim mapping should be configurable
+        username: Optional[str] = introspection_result.get("username")
+        if username is None or not isinstance(username, str):
+            raise AuthError(
+                500,
+                "Invalid username claim in the introspection result",
+            )
+
+        # Let's look at the scope
+        scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
+        device_id = None
+        # Find device_id in scope
+        for tok in scope:
+            if tok.startswith("urn:matrix:org.matrix.msc2967.client:device:"):
+                parts = tok.split(":")
+                if len(parts) == 5:
+                    device_id = parts[4]
+
+        user_id = UserID(username, self._hostname)
+        user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string())
+
+        # If the user does not exist, we should create it on the fly
+        # TODO: we could use SCIM to provision users ahead of time and listen
+        # for SCIM SET events if those ever become standard:
+        # https://datatracker.ietf.org/doc/html/draft-hunt-scim-notify-00
+        if not user_info:
+            await self.store.register_user(user_id=user_id.to_string())
+            user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string())
+            if not user_info:
+                raise AuthError(
+                    500,
+                    "Could not create user on the fly",
+                )
+
+        if device_id:
+            # Create the device on the fly if it does not exist
+            try:
+                await self.store.get_device(
+                    user_id=user_id.to_string(), device_id=device_id
+                )
+            except StoreError:
+                await self.store.store_device(
+                    user_id=user_id.to_string(),
+                    device_id=device_id,
+                    initial_device_display_name="OIDC-native client",
+                )
+
+        # TODO: there is a few things missing in the requester here, which still need
+        # to be figured out, like:
+        #   - impersonation, with the `authenticated_entity`, which is used for
+        #     rate-limiting, MAU limits, etc.
+        #   - shadow-banning, with the `shadow_banned` flag
+        #   - a proper solution for appservices, which still needs to be figured out in
+        #     the context of MSC3861
+        return create_requester(
+            user_id=user_id,
+            device_id=device_id,
+        )
diff --git a/synapse/config/auth.py b/synapse/config/auth.py
index 35774962c0..25b5cc60dc 100644
--- a/synapse/config/auth.py
+++ b/synapse/config/auth.py
@@ -14,9 +14,11 @@
 # limitations under the License.
 from typing import Any
 
+from authlib.jose.rfc7517 import JsonWebKey
+
 from synapse.types import JsonDict
 
-from ._base import Config
+from ._base import Config, ConfigError
 
 
 class AuthConfig(Config):
@@ -53,3 +55,29 @@ class AuthConfig(Config):
         self.ui_auth_session_timeout = self.parse_duration(
             ui_auth.get("session_timeout", 0)
         )
+
+        oauth_delegation = config.get("oauth_delegation", {})
+        self.oauth_delegation_enabled = oauth_delegation.get("enabled", False)
+        self.oauth_delegation_issuer = oauth_delegation.get("issuer", "")
+        self.oauth_delegation_issuer_metadata = oauth_delegation.get("issuer_metadata")
+        self.oauth_delegation_account = oauth_delegation.get("account", "")
+        self.oauth_delegation_client_id = oauth_delegation.get("client_id", "")
+        self.oauth_delegation_client_secret = oauth_delegation.get("client_secret", "")
+        self.oauth_delegation_client_auth_method = oauth_delegation.get(
+            "client_auth_method", "client_secret_post"
+        )
+
+        self.password_enabled = password_config.get(
+            "enabled", not self.oauth_delegation_enabled
+        )
+
+        if self.oauth_delegation_client_auth_method == "private_key_jwt":
+            self.oauth_delegation_client_secret = JsonWebKey.import_key(
+                self.oauth_delegation_client_secret
+            )
+
+        # If we are delegating via OAuth then password cannot be supported as well
+        if self.oauth_delegation_enabled and self.password_enabled:
+            raise ConfigError(
+                "Password auth cannot be enabled when OAuth delegation is enabled"
+            )
diff --git a/synapse/server.py b/synapse/server.py
index df88af12a9..1c82500f30 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -428,6 +428,10 @@ class HomeServer(metaclass=abc.ABCMeta):
 
     @cache_in_self
     def get_auth(self) -> Auth:
+        if self.config.auth.oauth_delegation_enabled:
+            from synapse.api.auth.oauth_delegated import OAuthDelegatedAuth
+
+            return OAuthDelegatedAuth(self)
         return InternalAuth(self)
 
     @cache_in_self
-- 
cgit 1.5.1


From 8f576aa462684e13b20dc380e759a76e6db821b6 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 16 May 2023 15:36:40 +0200
Subject: Expose the public keys used for client authentication on an endpoint

---
 synapse/rest/synapse/client/__init__.py |  6 +++
 synapse/rest/synapse/client/jwks.py     | 72 +++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 synapse/rest/synapse/client/jwks.py

(limited to 'synapse')

diff --git a/synapse/rest/synapse/client/__init__.py b/synapse/rest/synapse/client/__init__.py
index e55924f597..dcfd0ad6aa 100644
--- a/synapse/rest/synapse/client/__init__.py
+++ b/synapse/rest/synapse/client/__init__.py
@@ -46,6 +46,12 @@ def build_synapse_client_resource_tree(hs: "HomeServer") -> Mapping[str, Resourc
         "/_synapse/client/unsubscribe": UnsubscribeResource(hs),
     }
 
+    # Expose the JWKS endpoint if OAuth2 delegation is enabled
+    if hs.config.auth.oauth_delegation_enabled:
+        from synapse.rest.synapse.client.jwks import JwksResource
+
+        resources["/_synapse/jwks"] = JwksResource(hs)
+
     # provider-specific SSO bits. Only load these if they are enabled, since they
     # rely on optional dependencies.
     if hs.config.oidc.oidc_enabled:
diff --git a/synapse/rest/synapse/client/jwks.py b/synapse/rest/synapse/client/jwks.py
new file mode 100644
index 0000000000..818585843e
--- /dev/null
+++ b/synapse/rest/synapse/client/jwks.py
@@ -0,0 +1,72 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Tuple
+
+from synapse.http.server import DirectServeJsonResource
+from synapse.http.site import SynapseRequest
+from synapse.types import JsonDict
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class JwksResource(DirectServeJsonResource):
+    def __init__(self, hs: "HomeServer"):
+        from authlib.jose.rfc7517 import Key
+
+        super().__init__(extract_context=True)
+
+        # Parameters that are allowed to be exposed in the public key.
+        # This is done manually, because authlib's private to public key conversion
+        # is unreliable depending on the version. Instead, we just serialize the private
+        # key and only keep the public parameters.
+        # List from https://www.iana.org/assignments/jose/jose.xhtml#web-key-parameters
+        public_parameters = {
+            "kty",
+            "use",
+            "key_ops",
+            "alg",
+            "kid",
+            "x5u",
+            "x5c",
+            "x5t",
+            "x5t#S256",
+            "crv",
+            "x",
+            "y",
+            "n",
+            "e",
+            "ext",
+        }
+
+        secret = hs.config.auth.oauth_delegation_client_secret
+
+        if isinstance(secret, Key):
+            private_key = secret.as_dict()
+            public_key = {
+                k: v for k, v in private_key.items() if k in public_parameters
+            }
+            keys = [public_key]
+        else:
+            keys = []
+
+        self.res = {
+            "keys": keys,
+        }
+
+    async def _async_render_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        return 200, self.res
-- 
cgit 1.5.1


From e82ec6d00819253d15d22a41ba3b75ad77dce98f Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quentingliech@gmail.com>
Date: Thu, 18 Nov 2021 15:21:00 +0100
Subject: MSC2965: OIDC Provider discovery via well-known document

---
 synapse/rest/well_known.py | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'synapse')

diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py
index e2174fdfea..fd3b17a5ad 100644
--- a/synapse/rest/well_known.py
+++ b/synapse/rest/well_known.py
@@ -44,6 +44,15 @@ class WellKnownBuilder:
                 "base_url": self._config.registration.default_identity_server
             }
 
+        if self._config.auth.oauth_delegation_enabled:
+            result["org.matrix.msc2965.authentication"] = {
+                "issuer": self._config.auth.oauth_delegation_issuer
+            }
+            if self._config.auth.oauth_delegation_account != "":
+                result["org.matrix.msc2965.authentication"][
+                    "account"
+                ] = self._config.auth.oauth_delegation_account
+
         if self._config.server.extra_well_known_client_content:
             for (
                 key,
-- 
cgit 1.5.1


From c5cf1b421d8e0d765f812880ff41fe5d244a0919 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 17 Jun 2022 16:58:05 +0200
Subject: Save the scopes in the requester

---
 synapse/api/auth/oauth_delegated.py | 1 +
 synapse/types/__init__.py           | 8 ++++++++
 tests/api/test_auth.py              | 2 ++
 3 files changed, 11 insertions(+)

(limited to 'synapse')

diff --git a/synapse/api/auth/oauth_delegated.py b/synapse/api/auth/oauth_delegated.py
index b3b5c29a94..2715127e32 100644
--- a/synapse/api/auth/oauth_delegated.py
+++ b/synapse/api/auth/oauth_delegated.py
@@ -224,4 +224,5 @@ class OAuthDelegatedAuth(BaseAuth):
         return create_requester(
             user_id=user_id,
             device_id=device_id,
+            scope=scope,
         )
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 42baf8ac6b..dfc95e8ebb 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -131,6 +131,7 @@ class Requester:
     user: "UserID"
     access_token_id: Optional[int]
     is_guest: bool
+    scope: Set[str]
     shadow_banned: bool
     device_id: Optional[str]
     app_service: Optional["ApplicationService"]
@@ -147,6 +148,7 @@ class Requester:
             "user_id": self.user.to_string(),
             "access_token_id": self.access_token_id,
             "is_guest": self.is_guest,
+            "scope": list(self.scope),
             "shadow_banned": self.shadow_banned,
             "device_id": self.device_id,
             "app_server_id": self.app_service.id if self.app_service else None,
@@ -175,6 +177,7 @@ class Requester:
             user=UserID.from_string(input["user_id"]),
             access_token_id=input["access_token_id"],
             is_guest=input["is_guest"],
+            scope=set(input["scope"]),
             shadow_banned=input["shadow_banned"],
             device_id=input["device_id"],
             app_service=appservice,
@@ -186,6 +189,7 @@ def create_requester(
     user_id: Union[str, "UserID"],
     access_token_id: Optional[int] = None,
     is_guest: bool = False,
+    scope: StrCollection = (),
     shadow_banned: bool = False,
     device_id: Optional[str] = None,
     app_service: Optional["ApplicationService"] = None,
@@ -199,6 +203,7 @@ def create_requester(
         access_token_id:  *ID* of the access token used for this
             request, or None if it came via the appservice API or similar
         is_guest:  True if the user making this request is a guest user
+        scope:  the scope of the access token used for this request, if any
         shadow_banned:  True if the user making this request is shadow-banned.
         device_id:  device_id which was set at authentication time
         app_service:  the AS requesting on behalf of the user
@@ -215,10 +220,13 @@ def create_requester(
     if authenticated_entity is None:
         authenticated_entity = user_id.to_string()
 
+    scope = set(scope)
+
     return Requester(
         user_id,
         access_token_id,
         is_guest,
+        scope,
         shadow_banned,
         device_id,
         app_service,
diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py
index 3dac52d178..cdb0048122 100644
--- a/tests/api/test_auth.py
+++ b/tests/api/test_auth.py
@@ -426,6 +426,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
             access_token_id=None,
             device_id="FOOBAR",
             is_guest=False,
+            scope=set(),
             shadow_banned=False,
             app_service=appservice,
             authenticated_entity="@appservice:server",
@@ -456,6 +457,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
             access_token_id=None,
             device_id="FOOBAR",
             is_guest=False,
+            scope=set(),
             shadow_banned=False,
             app_service=appservice,
             authenticated_entity="@appservice:server",
-- 
cgit 1.5.1


From 7628dbf4e9b48d9714ccbd0530af579d9c290fed Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Mon, 20 Jun 2022 11:17:48 +0200
Subject: Handle the Synapse admin scope

---
 synapse/api/auth/oauth_delegated.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'synapse')

diff --git a/synapse/api/auth/oauth_delegated.py b/synapse/api/auth/oauth_delegated.py
index 2715127e32..ff1f395e58 100644
--- a/synapse/api/auth/oauth_delegated.py
+++ b/synapse/api/auth/oauth_delegated.py
@@ -140,6 +140,9 @@ class OAuthDelegatedAuth(BaseAuth):
         resp = json_decoder.decode(resp_body.decode("utf-8"))
         return IntrospectionToken(**resp)
 
+    async def is_server_admin(self, requester: Requester) -> bool:
+        return "urn:synapse:admin:*" in requester.scope
+
     async def get_user_by_req(
         self,
         request: SynapseRequest,
-- 
cgit 1.5.1


From f9cd549f6485620381443f2b4b75a1bd0a88d39f Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 13 Sep 2022 16:13:20 +0200
Subject: Record the `sub` claims as an external_id

---
 synapse/api/auth/oauth_delegated.py | 59 +++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 22 deletions(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/oauth_delegated.py b/synapse/api/auth/oauth_delegated.py
index ff1f395e58..5565ef0a1a 100644
--- a/synapse/api/auth/oauth_delegated.py
+++ b/synapse/api/auth/oauth_delegated.py
@@ -68,6 +68,8 @@ class OAuthDelegatedAuth(BaseAuth):
         "private_key_jwt": PrivateKeyJWTWithKid(),
     }
 
+    EXTERNAL_ID_PROVIDER = "oauth-delegated"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
 
@@ -170,13 +172,42 @@ class OAuthDelegatedAuth(BaseAuth):
                 "Invalid access token",
             )
 
-        # TODO: claim mapping should be configurable
-        username: Optional[str] = introspection_result.get("username")
-        if username is None or not isinstance(username, str):
-            raise AuthError(
-                500,
-                "Invalid username claim in the introspection result",
+        # Match via the sub claim
+        sub: Optional[str] = introspection_result.get("sub")
+        if sub is None:
+            raise AuthError(500, "Invalid sub claim in the introspection result")
+
+        user_id_str = await self.store.get_user_by_external_id(
+            OAuthDelegatedAuth.EXTERNAL_ID_PROVIDER, sub
+        )
+        if user_id_str is None:
+            # If we could not find a user via the external_id, it either does not exist,
+            # or the external_id was never recorded
+
+            # TODO: claim mapping should be configurable
+            username: Optional[str] = introspection_result.get("username")
+            if username is None or not isinstance(username, str):
+                raise AuthError(
+                    500,
+                    "Invalid username claim in the introspection result",
+                )
+            user_id = UserID(username, self._hostname)
+
+            # First try to find a user from the username claim
+            user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string())
+            if user_info is None:
+                # If the user does not exist, we should create it on the fly
+                # TODO: we could use SCIM to provision users ahead of time and listen
+                # for SCIM SET events if those ever become standard:
+                # https://datatracker.ietf.org/doc/html/draft-hunt-scim-notify-00
+                await self.store.register_user(user_id=user_id.to_string())
+
+            # And record the sub as external_id
+            await self.store.record_user_external_id(
+                OAuthDelegatedAuth.EXTERNAL_ID_PROVIDER, sub, user_id.to_string()
             )
+        else:
+            user_id = UserID.from_string(user_id_str)
 
         # Let's look at the scope
         scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
@@ -188,22 +219,6 @@ class OAuthDelegatedAuth(BaseAuth):
                 if len(parts) == 5:
                     device_id = parts[4]
 
-        user_id = UserID(username, self._hostname)
-        user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string())
-
-        # If the user does not exist, we should create it on the fly
-        # TODO: we could use SCIM to provision users ahead of time and listen
-        # for SCIM SET events if those ever become standard:
-        # https://datatracker.ietf.org/doc/html/draft-hunt-scim-notify-00
-        if not user_info:
-            await self.store.register_user(user_id=user_id.to_string())
-            user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string())
-            if not user_info:
-                raise AuthError(
-                    500,
-                    "Could not create user on the fly",
-                )
-
         if device_id:
             # Create the device on the fly if it does not exist
             try:
-- 
cgit 1.5.1


From d20669971a5be17776a2991c77f5348662bb3902 Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@matrix.org>
Date: Tue, 20 Sep 2022 12:54:18 +0100
Subject: Use `name` claim as display name when registering users on the fly.

This makes is so that the `name` claim got when introspecting the token
is used as the display name when registering a user on the fly.
---
 synapse/api/auth/oauth_delegated.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/oauth_delegated.py b/synapse/api/auth/oauth_delegated.py
index 5565ef0a1a..9e01e3fadc 100644
--- a/synapse/api/auth/oauth_delegated.py
+++ b/synapse/api/auth/oauth_delegated.py
@@ -200,7 +200,14 @@ class OAuthDelegatedAuth(BaseAuth):
                 # TODO: we could use SCIM to provision users ahead of time and listen
                 # for SCIM SET events if those ever become standard:
                 # https://datatracker.ietf.org/doc/html/draft-hunt-scim-notify-00
-                await self.store.register_user(user_id=user_id.to_string())
+
+                # TODO: claim mapping should be configurable
+                # If present, use the name claim as the displayname
+                name: Optional[str] = introspection_result.get("name")
+
+                await self.store.register_user(
+                    user_id=user_id.to_string(), create_profile_with_displayname=name
+                )
 
             # And record the sub as external_id
             await self.store.record_user_external_id(
-- 
cgit 1.5.1


From a1374b5c70fc8520930a1777dc131403812d7967 Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@matrix.org>
Date: Wed, 16 Nov 2022 11:05:05 +0000
Subject: MSC2967: Check access token scope for use as user and add guest
 support

---
 synapse/api/auth/oauth_delegated.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/oauth_delegated.py b/synapse/api/auth/oauth_delegated.py
index 9e01e3fadc..cfa178218c 100644
--- a/synapse/api/auth/oauth_delegated.py
+++ b/synapse/api/auth/oauth_delegated.py
@@ -25,7 +25,7 @@ from twisted.web.client import readBody
 from twisted.web.http_headers import Headers
 
 from synapse.api.auth.base import BaseAuth
-from synapse.api.errors import AuthError, StoreError
+from synapse.api.errors import AuthError, InvalidClientTokenError, StoreError
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import make_deferred_yieldable
 from synapse.types import Requester, UserID, create_requester
@@ -164,18 +164,29 @@ class OAuthDelegatedAuth(BaseAuth):
         logger.info(f"Introspection result: {introspection_result!r}")
 
         # TODO: introspection verification should be more extensive, especially:
-        #   - verify the scopes
         #   - verify the audience
         if not introspection_result.get("active"):
-            raise AuthError(
-                403,
-                "Invalid access token",
-            )
+            raise InvalidClientTokenError("Token is not active")
+
+        # Let's look at the scope
+        scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
+
+        # Determine type of user based on presence of particular scopes
+        has_admin_scope = "urn:synapse:admin:*" in scope
+        has_user_scope = "urn:matrix:org.matrix.msc2967.client:api:*" in scope
+        has_guest_scope = "urn:matrix:org.matrix.msc2967.client:api:guest" in scope
+        is_user = has_user_scope or has_admin_scope
+        is_guest = has_guest_scope and not is_user
+
+        if not is_user and not is_guest:
+            raise InvalidClientTokenError("No scope in token granting user rights")
 
         # Match via the sub claim
         sub: Optional[str] = introspection_result.get("sub")
         if sub is None:
-            raise AuthError(500, "Invalid sub claim in the introspection result")
+            raise InvalidClientTokenError(
+                "Invalid sub claim in the introspection result"
+            )
 
         user_id_str = await self.store.get_user_by_external_id(
             OAuthDelegatedAuth.EXTERNAL_ID_PROVIDER, sub
@@ -216,10 +227,8 @@ class OAuthDelegatedAuth(BaseAuth):
         else:
             user_id = UserID.from_string(user_id_str)
 
-        # Let's look at the scope
-        scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
-        device_id = None
         # Find device_id in scope
+        device_id = None
         for tok in scope:
             if tok.startswith("urn:matrix:org.matrix.msc2967.client:device:"):
                 parts = tok.split(":")
@@ -250,4 +259,5 @@ class OAuthDelegatedAuth(BaseAuth):
             user_id=user_id,
             device_id=device_id,
             scope=scope,
+            is_guest=is_guest,
         )
-- 
cgit 1.5.1


From 5fe96082d09d1af3dc33b62b6a47a6baca02703c Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@matrix.org>
Date: Thu, 17 Nov 2022 14:34:11 +0000
Subject: Actually enforce guest + return www-authenticate header

---
 synapse/api/auth/oauth_delegated.py     | 18 ++++++++++++--
 synapse/api/errors.py                   | 28 ++++++++++++++++++---
 synapse/http/server.py                  |  6 +++++
 tests/handlers/test_oauth_delegation.py | 43 ++++++++++++++++++++++++++++++---
 4 files changed, 87 insertions(+), 8 deletions(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/oauth_delegated.py b/synapse/api/auth/oauth_delegated.py
index cfa178218c..9cb6eb7f79 100644
--- a/synapse/api/auth/oauth_delegated.py
+++ b/synapse/api/auth/oauth_delegated.py
@@ -25,7 +25,12 @@ from twisted.web.client import readBody
 from twisted.web.http_headers import Headers
 
 from synapse.api.auth.base import BaseAuth
-from synapse.api.errors import AuthError, InvalidClientTokenError, StoreError
+from synapse.api.errors import (
+    AuthError,
+    InvalidClientTokenError,
+    OAuthInsufficientScopeError,
+    StoreError,
+)
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import make_deferred_yieldable
 from synapse.types import Requester, UserID, create_requester
@@ -152,7 +157,16 @@ class OAuthDelegatedAuth(BaseAuth):
         allow_expired: bool = False,
     ) -> Requester:
         access_token = self.get_access_token_from_request(request)
-        return await self.get_user_by_access_token(access_token, allow_expired)
+
+        # TODO: we probably want to assert the allow_guest inside this call so that we don't provision the user if they don't have enough permission:
+        requester = await self.get_user_by_access_token(access_token, allow_expired)
+
+        if not allow_guest and requester.is_guest:
+            raise OAuthInsufficientScopeError(
+                ["urn:matrix:org.matrix.msc2967.client:api:*"]
+            )
+
+        return requester
 
     async def get_user_by_access_token(
         self,
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 8c7c94b045..af894243f8 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -119,14 +119,20 @@ class Codes(str, Enum):
 
 
 class CodeMessageException(RuntimeError):
-    """An exception with integer code and message string attributes.
+    """An exception with integer code, a message string attributes and optional headers.
 
     Attributes:
         code: HTTP error code
         msg: string describing the error
+        headers: optional response headers to send
     """
 
-    def __init__(self, code: Union[int, HTTPStatus], msg: str):
+    def __init__(
+        self,
+        code: Union[int, HTTPStatus],
+        msg: str,
+        headers: Optional[Dict[str, str]] = None,
+    ):
         super().__init__("%d: %s" % (code, msg))
 
         # Some calls to this method pass instances of http.HTTPStatus for `code`.
@@ -137,6 +143,7 @@ class CodeMessageException(RuntimeError):
         # To eliminate this behaviour, we convert them to their integer equivalents here.
         self.code = int(code)
         self.msg = msg
+        self.headers = headers
 
 
 class RedirectException(CodeMessageException):
@@ -182,6 +189,7 @@ class SynapseError(CodeMessageException):
         msg: str,
         errcode: str = Codes.UNKNOWN,
         additional_fields: Optional[Dict] = None,
+        headers: Optional[Dict[str, str]] = None,
     ):
         """Constructs a synapse error.
 
@@ -190,7 +198,7 @@ class SynapseError(CodeMessageException):
             msg: The human-readable error message.
             errcode: The matrix error code e.g 'M_FORBIDDEN'
         """
-        super().__init__(code, msg)
+        super().__init__(code, msg, headers)
         self.errcode = errcode
         if additional_fields is None:
             self._additional_fields: Dict = {}
@@ -335,6 +343,20 @@ class AuthError(SynapseError):
         super().__init__(code, msg, errcode, additional_fields)
 
 
+class OAuthInsufficientScopeError(SynapseError):
+    """An error raised when the caller does not have sufficient scope to perform the requested action"""
+
+    def __init__(
+        self,
+        required_scopes: List[str],
+    ):
+        headers = {
+            "WWW-Authenticate": 'Bearer error="insufficient_scope", scope="%s"'
+            % (" ".join(required_scopes))
+        }
+        super().__init__(401, "Insufficient scope", Codes.FORBIDDEN, None, headers)
+
+
 class UnstableSpecAuthError(AuthError):
     """An error raised when a new error code is being proposed to replace a previous one.
     This error will return a "org.matrix.unstable.errcode" property with the new error code,
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 101dc2e747..04768c6a23 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -111,6 +111,9 @@ def return_json_error(
         exc: SynapseError = f.value  # type: ignore
         error_code = exc.code
         error_dict = exc.error_dict(config)
+        if exc.headers is not None:
+            for header, value in exc.headers.items():
+                request.setHeader(header, value)
         logger.info("%s SynapseError: %s - %s", request, error_code, exc.msg)
     elif f.check(CancelledError):
         error_code = HTTP_STATUS_REQUEST_CANCELLED
@@ -172,6 +175,9 @@ def return_html_error(
         cme: CodeMessageException = f.value  # type: ignore
         code = cme.code
         msg = cme.msg
+        if cme.headers is not None:
+            for header, value in cme.headers.items():
+                request.setHeader(header, value)
 
         if isinstance(cme, RedirectException):
             logger.info("%s redirect to %s", request, cme.location)
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index 54f4894819..bca9db1626 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -17,7 +17,8 @@ from urllib.parse import parse_qs
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.errors import InvalidClientTokenError
+from synapse.api.errors import InvalidClientTokenError, OAuthInsufficientScopeError
+from synapse.rest.client import devices
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
@@ -82,6 +83,10 @@ async def get_json(url: str) -> JsonDict:
 
 @skip_unless(HAS_AUTHLIB, "requires authlib")
 class MSC3861OAuthDelegation(HomeserverTestCase):
+    servlets = [
+        devices.register_servlets,
+    ]
+
     def default_config(self) -> Dict[str, Any]:
         config = super().default_config()
         config["public_baseurl"] = BASE_URL
@@ -314,7 +319,37 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         )
         self.assertEqual(requester.device_id, DEVICE)
 
-    def test_active_guest_with_device(self) -> None:
+    def test_active_guest_not_allowed(self) -> None:
+        """The handler should return an insufficient scope error."""
+
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(
+                code=200,
+                payload={
+                    "active": True,
+                    "sub": SUBJECT,
+                    "scope": " ".join([MATRIX_GUEST_SCOPE, MATRIX_DEVICE_SCOPE]),
+                    "username": USERNAME,
+                },
+            )
+        )
+        request = Mock(args={})
+        request.args[b"access_token"] = [b"mockAccessToken"]
+        request.requestHeaders.getRawHeaders = mock_getRawHeaders()
+        error = self.get_failure(
+            self.auth.get_user_by_req(request), OAuthInsufficientScopeError
+        )
+        self.http_client.get_json.assert_called_once_with(WELL_KNOWN)
+        self.http_client.request.assert_called_once_with(
+            method="POST", uri=INTROSPECTION_ENDPOINT, data=ANY, headers=ANY
+        )
+        self._assertParams()
+        self.assertEqual(
+            getattr(error.value, "headers", {})["WWW-Authenticate"],
+            'Bearer error="insufficient_scope", scope="urn:matrix:org.matrix.msc2967.client:api:*"',
+        )
+
+    def test_active_guest_allowed(self) -> None:
         """The handler should return a requester with guest user rights and a device ID."""
 
         self.http_client.request = simple_async_mock(
@@ -331,7 +366,9 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         request = Mock(args={})
         request.args[b"access_token"] = [b"mockAccessToken"]
         request.requestHeaders.getRawHeaders = mock_getRawHeaders()
-        requester = self.get_success(self.auth.get_user_by_req(request))
+        requester = self.get_success(
+            self.auth.get_user_by_req(request, allow_guest=True)
+        )
         self.http_client.get_json.assert_called_once_with(WELL_KNOWN)
         self.http_client.request.assert_called_once_with(
             method="POST", uri=INTROSPECTION_ENDPOINT, data=ANY, headers=ANY
-- 
cgit 1.5.1


From 31691d61511d41286272d779727502e396ce86eb Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 10 May 2023 16:08:43 +0200
Subject: Disable account related endpoints when using OAuth delegation

---
 synapse/handlers/auth.py                |   8 +-
 synapse/rest/client/account.py          |  24 +++--
 synapse/rest/client/devices.py          |  11 +-
 synapse/rest/client/keys.py             |  30 +++++-
 synapse/rest/client/login.py            |   3 +
 synapse/rest/client/logout.py           |   3 +
 synapse/rest/client/register.py         |   3 +
 tests/handlers/test_oauth_delegation.py | 180 +++++++++++++++++++++++++++++++-
 8 files changed, 243 insertions(+), 19 deletions(-)

(limited to 'synapse')

diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index d001f2fb2f..a53984be33 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -274,6 +274,8 @@ class AuthHandler:
         # response.
         self._extra_attributes: Dict[str, SsoLoginExtraAttributes] = {}
 
+        self.oauth_delegation_enabled = hs.config.auth.oauth_delegation_enabled
+
     async def validate_user_via_ui_auth(
         self,
         requester: Requester,
@@ -322,8 +324,12 @@ class AuthHandler:
 
             LimitExceededError if the ratelimiter's failed request count for this
                 user is too high to proceed
-
         """
+        if self.oauth_delegation_enabled:
+            raise SynapseError(
+                HTTPStatus.INTERNAL_SERVER_ERROR, "UIA shouldn't be used with MSC3861"
+            )
+
         if not requester.access_token_id:
             raise ValueError("Cannot validate a user without an access token")
         if can_skip_ui_auth and self._ui_auth_session_timeout:
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 3d0c55daa0..ccd1f7509c 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -27,6 +27,7 @@ from synapse.api.constants import LoginType
 from synapse.api.errors import (
     Codes,
     InteractiveAuthIncompleteError,
+    NotFoundError,
     SynapseError,
     ThreepidValidationError,
 )
@@ -600,6 +601,9 @@ class ThreepidRestServlet(RestServlet):
     # ThreePidBindRestServelet.PostBody with an `alias_generator` to handle
     # `threePidCreds` versus `three_pid_creds`.
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        if self.hs.config.auth.oauth_delegation_enabled:
+            raise NotFoundError(errcode=Codes.UNRECOGNIZED)
+
         if not self.hs.config.registration.enable_3pid_changes:
             raise SynapseError(
                 400, "3PID changes are disabled on this server", Codes.FORBIDDEN
@@ -890,19 +894,21 @@ class AccountStatusRestServlet(RestServlet):
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     if hs.config.worker.worker_app is None:
-        EmailPasswordRequestTokenRestServlet(hs).register(http_server)
-        PasswordRestServlet(hs).register(http_server)
-        DeactivateAccountRestServlet(hs).register(http_server)
-        EmailThreepidRequestTokenRestServlet(hs).register(http_server)
-        MsisdnThreepidRequestTokenRestServlet(hs).register(http_server)
-        AddThreepidEmailSubmitTokenServlet(hs).register(http_server)
-        AddThreepidMsisdnSubmitTokenServlet(hs).register(http_server)
+        if not hs.config.auth.oauth_delegation_enabled:
+            EmailPasswordRequestTokenRestServlet(hs).register(http_server)
+            DeactivateAccountRestServlet(hs).register(http_server)
+            PasswordRestServlet(hs).register(http_server)
+            EmailThreepidRequestTokenRestServlet(hs).register(http_server)
+            MsisdnThreepidRequestTokenRestServlet(hs).register(http_server)
+            AddThreepidEmailSubmitTokenServlet(hs).register(http_server)
+            AddThreepidMsisdnSubmitTokenServlet(hs).register(http_server)
     ThreepidRestServlet(hs).register(http_server)
     if hs.config.worker.worker_app is None:
-        ThreepidAddRestServlet(hs).register(http_server)
         ThreepidBindRestServlet(hs).register(http_server)
         ThreepidUnbindRestServlet(hs).register(http_server)
-        ThreepidDeleteRestServlet(hs).register(http_server)
+        if not hs.config.auth.oauth_delegation_enabled:
+            ThreepidAddRestServlet(hs).register(http_server)
+            ThreepidDeleteRestServlet(hs).register(http_server)
     WhoamiRestServlet(hs).register(http_server)
 
     if hs.config.worker.worker_app is None and hs.config.experimental.msc3720_enabled:
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index e97d0bf475..00e9bff43f 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple
 from pydantic import Extra, StrictStr
 
 from synapse.api import errors
-from synapse.api.errors import NotFoundError
+from synapse.api.errors import NotFoundError, UnrecognizedRequestError
 from synapse.handlers.device import DeviceHandler
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
@@ -135,6 +135,7 @@ class DeviceRestServlet(RestServlet):
         self.device_handler = handler
         self.auth_handler = hs.get_auth_handler()
         self._msc3852_enabled = hs.config.experimental.msc3852_enabled
+        self.oauth_delegation_enabled = hs.config.auth.oauth_delegation_enabled
 
     async def on_GET(
         self, request: SynapseRequest, device_id: str
@@ -166,6 +167,9 @@ class DeviceRestServlet(RestServlet):
     async def on_DELETE(
         self, request: SynapseRequest, device_id: str
     ) -> Tuple[int, JsonDict]:
+        if self.oauth_delegation_enabled:
+            raise UnrecognizedRequestError(code=404)
+
         requester = await self.auth.get_user_by_req(request)
 
         try:
@@ -344,7 +348,10 @@ class ClaimDehydratedDeviceServlet(RestServlet):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    if hs.config.worker.worker_app is None:
+    if (
+        hs.config.worker.worker_app is None
+        and not hs.config.auth.oauth_delegation_enabled
+    ):
         DeleteDevicesRestServlet(hs).register(http_server)
     DevicesRestServlet(hs).register(http_server)
     if hs.config.worker.worker_app is None:
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index 413edd8a4d..c3ca83c0c8 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -17,9 +17,10 @@
 import logging
 import re
 from collections import Counter
+from http import HTTPStatus
 from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
 
-from synapse.api.errors import InvalidAPICallError, SynapseError
+from synapse.api.errors import Codes, InvalidAPICallError, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     RestServlet,
@@ -375,9 +376,29 @@ class SigningKeyUploadServlet(RestServlet):
         user_id = requester.user.to_string()
         body = parse_json_object_from_request(request)
 
-        if self.hs.config.experimental.msc3967_enabled:
-            if await self.e2e_keys_handler.is_cross_signing_set_up_for_user(user_id):
-                # If we already have a master key then cross signing is set up and we require UIA to reset
+        is_cross_signing_setup = (
+            await self.e2e_keys_handler.is_cross_signing_set_up_for_user(user_id)
+        )
+
+        # Before MSC3967 we required UIA both when setting up cross signing for the
+        # first time and when resetting the device signing key. With MSC3967 we only
+        # require UIA when resetting cross-signing, and not when setting up the first
+        # time. Because there is no UIA in MSC3861, for now we throw an error if the
+        # user tries to reset the device signing key when MSC3861 is enabled, but allow
+        # first-time setup.
+        if self.hs.config.auth.oauth_delegation_enabled:
+            # There is no way to reset the device signing key with MSC3861
+            if is_cross_signing_setup:
+                raise SynapseError(
+                    HTTPStatus.NOT_IMPLEMENTED,
+                    "Resetting cross signing keys is not yet supported with MSC3861",
+                    Codes.UNRECOGNIZED,
+                )
+            # But first-time setup is fine
+
+        elif self.hs.config.experimental.msc3967_enabled:
+            # If we already have a master key then cross signing is set up and we require UIA to reset
+            if is_cross_signing_setup:
                 await self.auth_handler.validate_user_via_ui_auth(
                     requester,
                     request,
@@ -387,6 +408,7 @@ class SigningKeyUploadServlet(RestServlet):
                     can_skip_ui_auth=False,
                 )
             # Otherwise we don't require UIA since we are setting up cross signing for first time
+
         else:
             # Previous behaviour is to always require UIA but allow it to be skipped
             await self.auth_handler.validate_user_via_ui_auth(
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index 6ca61ffbd0..4d0eabcb84 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -633,6 +633,9 @@ class CasTicketServlet(RestServlet):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+    if hs.config.auth.oauth_delegation_enabled:
+        return
+
     LoginRestServlet(hs).register(http_server)
     if (
         hs.config.worker.worker_app is None
diff --git a/synapse/rest/client/logout.py b/synapse/rest/client/logout.py
index 6d34625ad5..b64a6d5961 100644
--- a/synapse/rest/client/logout.py
+++ b/synapse/rest/client/logout.py
@@ -80,5 +80,8 @@ class LogoutAllRestServlet(RestServlet):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+    if hs.config.auth.oauth_delegation_enabled:
+        return
+
     LogoutRestServlet(hs).register(http_server)
     LogoutAllRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 7f84a17e29..6866988c38 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -955,6 +955,9 @@ def _calculate_registration_flows(
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
+    if hs.config.auth.oauth_delegation_enabled:
+        return
+
     if hs.config.worker.worker_app is None:
         EmailRegisterRequestTokenRestServlet(hs).register(http_server)
         MsisdnRegisterRequestTokenRestServlet(hs).register(http_server)
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index bca9db1626..ee1bc5ca7a 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -11,14 +11,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict
+
+from http import HTTPStatus
+from typing import Any, Dict, Union
 from unittest.mock import ANY, Mock
 from urllib.parse import parse_qs
 
+from signedjson.key import (
+    encode_verify_key_base64,
+    generate_signing_key,
+    get_verify_key,
+)
+from signedjson.sign import sign_json
+
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.errors import InvalidClientTokenError, OAuthInsufficientScopeError
-from synapse.rest.client import devices
+from synapse.api.errors import (
+    Codes,
+    InvalidClientTokenError,
+    OAuthInsufficientScopeError,
+)
+from synapse.rest.client import account, devices, keys, login, logout, register
 from synapse.server import HomeServer
 from synapse.types import JsonDict
 from synapse.util import Clock
@@ -57,6 +70,7 @@ DEVICE = "AABBCCDD"
 MATRIX_DEVICE_SCOPE = "urn:matrix:org.matrix.msc2967.client:device:" + DEVICE
 SUBJECT = "abc-def-ghi"
 USERNAME = "test-user"
+USER_ID = "@" + USERNAME + ":" + SERVER_NAME
 
 
 async def get_json(url: str) -> JsonDict:
@@ -84,7 +98,12 @@ async def get_json(url: str) -> JsonDict:
 @skip_unless(HAS_AUTHLIB, "requires authlib")
 class MSC3861OAuthDelegation(HomeserverTestCase):
     servlets = [
+        account.register_servlets,
         devices.register_servlets,
+        keys.register_servlets,
+        register.register_servlets,
+        login.register_servlets,
+        logout.register_servlets,
     ]
 
     def default_config(self) -> Dict[str, Any]:
@@ -380,3 +399,158 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
             get_awaitable_result(self.auth.is_server_admin(requester)), False
         )
         self.assertEqual(requester.device_id, DEVICE)
+
+    def make_device_keys(self, user_id: str, device_id: str) -> JsonDict:
+        # We only generate a master key to simplify the test.
+        master_signing_key = generate_signing_key(device_id)
+        master_verify_key = encode_verify_key_base64(get_verify_key(master_signing_key))
+
+        return {
+            "master_key": sign_json(
+                {
+                    "user_id": user_id,
+                    "usage": ["master"],
+                    "keys": {"ed25519:" + master_verify_key: master_verify_key},
+                },
+                user_id,
+                master_signing_key,
+            ),
+        }
+
+    def test_cross_signing(self) -> None:
+        """Try uploading device keys with OAuth delegation enabled."""
+
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(
+                code=200,
+                payload={
+                    "active": True,
+                    "sub": SUBJECT,
+                    "scope": " ".join([MATRIX_USER_SCOPE, MATRIX_DEVICE_SCOPE]),
+                    "username": USERNAME,
+                },
+            )
+        )
+        keys_upload_body = self.make_device_keys(USER_ID, DEVICE)
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/keys/device_signing/upload",
+            keys_upload_body,
+            access_token="mockAccessToken",
+        )
+
+        self.assertEqual(channel.code, 200, channel.json_body)
+
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/keys/device_signing/upload",
+            keys_upload_body,
+            access_token="mockAccessToken",
+        )
+
+        self.assertEqual(channel.code, HTTPStatus.NOT_IMPLEMENTED, channel.json_body)
+
+    def expect_unauthorized(
+        self, method: str, path: str, content: Union[bytes, str, JsonDict] = ""
+    ) -> None:
+        channel = self.make_request(method, path, content, shorthand=False)
+
+        self.assertEqual(channel.code, 401, channel.json_body)
+
+    def expect_unrecognized(
+        self, method: str, path: str, content: Union[bytes, str, JsonDict] = ""
+    ) -> None:
+        channel = self.make_request(method, path, content)
+
+        self.assertEqual(channel.code, 404, channel.json_body)
+        self.assertEqual(
+            channel.json_body["errcode"], Codes.UNRECOGNIZED, channel.json_body
+        )
+
+    def test_uia_endpoints(self) -> None:
+        """Test that endpoints that were removed in MSC2964 are no longer available."""
+
+        # This is just an endpoint that should remain visible (but requires auth):
+        self.expect_unauthorized("GET", "/_matrix/client/v3/devices")
+
+        # This remains usable, but will require a uia scope:
+        self.expect_unauthorized(
+            "POST", "/_matrix/client/v3/keys/device_signing/upload"
+        )
+
+    def test_3pid_endpoints(self) -> None:
+        """Test that 3pid account management endpoints that were removed in MSC2964 are no longer available."""
+
+        # Remains and requires auth:
+        self.expect_unauthorized("GET", "/_matrix/client/v3/account/3pid")
+        self.expect_unauthorized(
+            "POST",
+            "/_matrix/client/v3/account/3pid/bind",
+            {
+                "client_secret": "foo",
+                "id_access_token": "bar",
+                "id_server": "foo",
+                "sid": "bar",
+            },
+        )
+        self.expect_unauthorized("POST", "/_matrix/client/v3/account/3pid/unbind", {})
+
+        # These are gone:
+        self.expect_unrecognized(
+            "POST", "/_matrix/client/v3/account/3pid"
+        )  # deprecated
+        self.expect_unrecognized("POST", "/_matrix/client/v3/account/3pid/add")
+        self.expect_unrecognized("POST", "/_matrix/client/v3/account/3pid/delete")
+        self.expect_unrecognized(
+            "POST", "/_matrix/client/v3/account/3pid/email/requestToken"
+        )
+        self.expect_unrecognized(
+            "POST", "/_matrix/client/v3/account/3pid/msisdn/requestToken"
+        )
+
+    def test_account_management_endpoints_removed(self) -> None:
+        """Test that account management endpoints that were removed in MSC2964 are no longer available."""
+        self.expect_unrecognized("POST", "/_matrix/client/v3/account/deactivate")
+        self.expect_unrecognized("POST", "/_matrix/client/v3/account/password")
+        self.expect_unrecognized(
+            "POST", "/_matrix/client/v3/account/password/email/requestToken"
+        )
+        self.expect_unrecognized(
+            "POST", "/_matrix/client/v3/account/password/msisdn/requestToken"
+        )
+
+    def test_registration_endpoints_removed(self) -> None:
+        """Test that registration endpoints that were removed in MSC2964 are no longer available."""
+        self.expect_unrecognized(
+            "GET", "/_matrix/client/v1/register/m.login.registration_token/validity"
+        )
+        self.expect_unrecognized("POST", "/_matrix/client/v3/register")
+        self.expect_unrecognized("GET", "/_matrix/client/v3/register")
+        self.expect_unrecognized("GET", "/_matrix/client/v3/register/available")
+        self.expect_unrecognized(
+            "POST", "/_matrix/client/v3/register/email/requestToken"
+        )
+        self.expect_unrecognized(
+            "POST", "/_matrix/client/v3/register/msisdn/requestToken"
+        )
+
+    def test_session_management_endpoints_removed(self) -> None:
+        """Test that session management endpoints that were removed in MSC2964 are no longer available."""
+        self.expect_unrecognized("GET", "/_matrix/client/v3/login")
+        self.expect_unrecognized("POST", "/_matrix/client/v3/login")
+        self.expect_unrecognized("GET", "/_matrix/client/v3/login/sso/redirect")
+        self.expect_unrecognized("POST", "/_matrix/client/v3/logout")
+        self.expect_unrecognized("POST", "/_matrix/client/v3/logout/all")
+        self.expect_unrecognized("POST", "/_matrix/client/v3/refresh")
+        self.expect_unrecognized("GET", "/_matrix/static/client/login")
+
+    def test_device_management_endpoints_removed(self) -> None:
+        """Test that device management endpoints that were removed in MSC2964 are no longer available."""
+        self.expect_unrecognized("POST", "/_matrix/client/v3/delete_devices")
+        self.expect_unrecognized("DELETE", "/_matrix/client/v3/devices/{DEVICE}")
+
+    def test_openid_endpoints_removed(self) -> None:
+        """Test that OpenID id_token endpoints that were removed in MSC2964 are no longer available."""
+        self.expect_unrecognized(
+            "POST", "/_matrix/client/v3/user/{USERNAME}/openid/request_token"
+        )
-- 
cgit 1.5.1


From 249f4a338dde0c1bcde5e14121d8d9fa156f185f Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@matrix.org>
Date: Tue, 9 May 2023 16:20:04 +0200
Subject: Refactor config to be an experimental feature

Also enforce you can't combine it with incompatible config options
---
 synapse/api/auth/msc3861_delegated.py   | 280 ++++++++++++++++++++++++++++++++
 synapse/api/auth/oauth_delegated.py     | 277 -------------------------------
 synapse/config/auth.py                  |  39 +----
 synapse/config/experimental.py          | 193 +++++++++++++++++++++-
 synapse/handlers/auth.py                |   4 +-
 synapse/module_api/__init__.py          |   7 +
 synapse/rest/client/account.py          |   6 +-
 synapse/rest/client/devices.py          |   6 +-
 synapse/rest/client/keys.py             |   2 +-
 synapse/rest/client/login.py            |   2 +-
 synapse/rest/client/logout.py           |   2 +-
 synapse/rest/client/register.py         |   2 +-
 synapse/rest/synapse/client/__init__.py |   2 +-
 synapse/rest/synapse/client/jwks.py     |   8 +-
 synapse/rest/well_known.py              |   9 +-
 synapse/server.py                       |   6 +-
 tests/config/test_oauth_delegation.py   | 202 +++++++++++++++++++++++
 tests/handlers/test_oauth_delegation.py |  15 +-
 tests/rest/test_well_known.py           |  17 +-
 19 files changed, 731 insertions(+), 348 deletions(-)
 create mode 100644 synapse/api/auth/msc3861_delegated.py
 delete mode 100644 synapse/api/auth/oauth_delegated.py
 create mode 100644 tests/config/test_oauth_delegation.py

(limited to 'synapse')

diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
new file mode 100644
index 0000000000..4ca3280bd3
--- /dev/null
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -0,0 +1,280 @@
+# Copyright 2023 The Matrix.org Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from urllib.parse import urlencode
+
+from authlib.oauth2 import ClientAuth
+from authlib.oauth2.auth import encode_client_secret_basic, encode_client_secret_post
+from authlib.oauth2.rfc7523 import ClientSecretJWT, PrivateKeyJWT, private_key_jwt_sign
+from authlib.oauth2.rfc7662 import IntrospectionToken
+from authlib.oidc.discovery import OpenIDProviderMetadata, get_well_known_url
+
+from twisted.web.client import readBody
+from twisted.web.http_headers import Headers
+
+from synapse.api.auth.base import BaseAuth
+from synapse.api.errors import (
+    AuthError,
+    InvalidClientTokenError,
+    OAuthInsufficientScopeError,
+    StoreError,
+)
+from synapse.http.site import SynapseRequest
+from synapse.logging.context import make_deferred_yieldable
+from synapse.types import Requester, UserID, create_requester
+from synapse.util import json_decoder
+from synapse.util.caches.cached_call import RetryOnExceptionCachedCall
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+def scope_to_list(scope: str) -> List[str]:
+    """Convert a scope string to a list of scope tokens"""
+    return scope.strip().split(" ")
+
+
+class PrivateKeyJWTWithKid(PrivateKeyJWT):
+    """An implementation of the private_key_jwt client auth method that includes a kid header.
+
+    This is needed because some providers (Keycloak) require the kid header to figure
+    out which key to use to verify the signature.
+    """
+
+    def sign(self, auth: Any, token_endpoint: str) -> bytes:
+        return private_key_jwt_sign(
+            auth.client_secret,
+            client_id=auth.client_id,
+            token_endpoint=token_endpoint,
+            claims=self.claims,
+            header={"kid": auth.client_secret["kid"]},
+        )
+
+
+class MSC3861DelegatedAuth(BaseAuth):
+    AUTH_METHODS = {
+        "client_secret_post": encode_client_secret_post,
+        "client_secret_basic": encode_client_secret_basic,
+        "client_secret_jwt": ClientSecretJWT(),
+        "private_key_jwt": PrivateKeyJWTWithKid(),
+    }
+
+    EXTERNAL_ID_PROVIDER = "oauth-delegated"
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__(hs)
+
+        self._config = hs.config.experimental.msc3861
+        auth_method = MSC3861DelegatedAuth.AUTH_METHODS.get(
+            self._config.client_auth_method.value, None
+        )
+        # Those assertions are already checked when parsing the config
+        assert self._config.enabled, "OAuth delegation is not enabled"
+        assert self._config.issuer, "No issuer provided"
+        assert self._config.client_id, "No client_id provided"
+        assert auth_method is not None, "Invalid client_auth_method provided"
+
+        self._http_client = hs.get_proxied_http_client()
+        self._hostname = hs.hostname
+
+        self._issuer_metadata = RetryOnExceptionCachedCall(self._load_metadata)
+
+        if isinstance(auth_method, PrivateKeyJWTWithKid):
+            # Use the JWK as the client secret when using the private_key_jwt method
+            assert self._config.jwk, "No JWK provided"
+            self._client_auth = ClientAuth(
+                self._config.client_id, self._config.jwk, auth_method
+            )
+        else:
+            # Else use the client secret
+            assert self._config.client_secret, "No client_secret provided"
+            self._client_auth = ClientAuth(
+                self._config.client_id, self._config.client_secret, auth_method
+            )
+
+    async def _load_metadata(self) -> OpenIDProviderMetadata:
+        if self._config.issuer_metadata is not None:
+            return OpenIDProviderMetadata(**self._config.issuer_metadata)
+        url = get_well_known_url(self._config.issuer, external=True)
+        response = await self._http_client.get_json(url)
+        metadata = OpenIDProviderMetadata(**response)
+        # metadata.validate_introspection_endpoint()
+        return metadata
+
+    async def _introspect_token(self, token: str) -> IntrospectionToken:
+        metadata = await self._issuer_metadata.get()
+        introspection_endpoint = metadata.get("introspection_endpoint")
+        raw_headers: Dict[str, str] = {
+            "Content-Type": "application/x-www-form-urlencoded",
+            "User-Agent": str(self._http_client.user_agent, "utf-8"),
+            "Accept": "application/json",
+        }
+
+        args = {"token": token, "token_type_hint": "access_token"}
+        body = urlencode(args, True)
+
+        # Fill the body/headers with credentials
+        uri, raw_headers, body = self._client_auth.prepare(
+            method="POST", uri=introspection_endpoint, headers=raw_headers, body=body
+        )
+        headers = Headers({k: [v] for (k, v) in raw_headers.items()})
+
+        # Do the actual request
+        # We're not using the SimpleHttpClient util methods as we don't want to
+        # check the HTTP status code and we do the body encoding ourself.
+        response = await self._http_client.request(
+            method="POST",
+            uri=uri,
+            data=body.encode("utf-8"),
+            headers=headers,
+        )
+
+        resp_body = await make_deferred_yieldable(readBody(response))
+        # TODO: Let's not worry about 5xx errors & co. for now and just try
+        # decoding that as JSON. We should also do some validation of the
+        # response
+        resp = json_decoder.decode(resp_body.decode("utf-8"))
+        return IntrospectionToken(**resp)
+
+    async def is_server_admin(self, requester: Requester) -> bool:
+        return "urn:synapse:admin:*" in requester.scope
+
+    async def get_user_by_req(
+        self,
+        request: SynapseRequest,
+        allow_guest: bool = False,
+        allow_expired: bool = False,
+    ) -> Requester:
+        access_token = self.get_access_token_from_request(request)
+
+        # TODO: we probably want to assert the allow_guest inside this call so that we don't provision the user if they don't have enough permission:
+        requester = await self.get_user_by_access_token(access_token, allow_expired)
+
+        if not allow_guest and requester.is_guest:
+            raise OAuthInsufficientScopeError(
+                ["urn:matrix:org.matrix.msc2967.client:api:*"]
+            )
+
+        return requester
+
+    async def get_user_by_access_token(
+        self,
+        token: str,
+        allow_expired: bool = False,
+    ) -> Requester:
+        introspection_result = await self._introspect_token(token)
+
+        logger.info(f"Introspection result: {introspection_result!r}")
+
+        # TODO: introspection verification should be more extensive, especially:
+        #   - verify the audience
+        if not introspection_result.get("active"):
+            raise InvalidClientTokenError("Token is not active")
+
+        # Let's look at the scope
+        scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
+
+        # Determine type of user based on presence of particular scopes
+        has_admin_scope = "urn:synapse:admin:*" in scope
+        has_user_scope = "urn:matrix:org.matrix.msc2967.client:api:*" in scope
+        has_guest_scope = "urn:matrix:org.matrix.msc2967.client:api:guest" in scope
+        is_user = has_user_scope or has_admin_scope
+        is_guest = has_guest_scope and not is_user
+
+        if not is_user and not is_guest:
+            raise InvalidClientTokenError("No scope in token granting user rights")
+
+        # Match via the sub claim
+        sub: Optional[str] = introspection_result.get("sub")
+        if sub is None:
+            raise InvalidClientTokenError(
+                "Invalid sub claim in the introspection result"
+            )
+
+        user_id_str = await self.store.get_user_by_external_id(
+            MSC3861DelegatedAuth.EXTERNAL_ID_PROVIDER, sub
+        )
+        if user_id_str is None:
+            # If we could not find a user via the external_id, it either does not exist,
+            # or the external_id was never recorded
+
+            # TODO: claim mapping should be configurable
+            username: Optional[str] = introspection_result.get("username")
+            if username is None or not isinstance(username, str):
+                raise AuthError(
+                    500,
+                    "Invalid username claim in the introspection result",
+                )
+            user_id = UserID(username, self._hostname)
+
+            # First try to find a user from the username claim
+            user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string())
+            if user_info is None:
+                # If the user does not exist, we should create it on the fly
+                # TODO: we could use SCIM to provision users ahead of time and listen
+                # for SCIM SET events if those ever become standard:
+                # https://datatracker.ietf.org/doc/html/draft-hunt-scim-notify-00
+
+                # TODO: claim mapping should be configurable
+                # If present, use the name claim as the displayname
+                name: Optional[str] = introspection_result.get("name")
+
+                await self.store.register_user(
+                    user_id=user_id.to_string(), create_profile_with_displayname=name
+                )
+
+            # And record the sub as external_id
+            await self.store.record_user_external_id(
+                MSC3861DelegatedAuth.EXTERNAL_ID_PROVIDER, sub, user_id.to_string()
+            )
+        else:
+            user_id = UserID.from_string(user_id_str)
+
+        # Find device_id in scope
+        device_id = None
+        for tok in scope:
+            if tok.startswith("urn:matrix:org.matrix.msc2967.client:device:"):
+                parts = tok.split(":")
+                if len(parts) == 5:
+                    device_id = parts[4]
+
+        if device_id:
+            # Create the device on the fly if it does not exist
+            try:
+                await self.store.get_device(
+                    user_id=user_id.to_string(), device_id=device_id
+                )
+            except StoreError:
+                await self.store.store_device(
+                    user_id=user_id.to_string(),
+                    device_id=device_id,
+                    initial_device_display_name="OIDC-native client",
+                )
+
+        # TODO: there is a few things missing in the requester here, which still need
+        # to be figured out, like:
+        #   - impersonation, with the `authenticated_entity`, which is used for
+        #     rate-limiting, MAU limits, etc.
+        #   - shadow-banning, with the `shadow_banned` flag
+        #   - a proper solution for appservices, which still needs to be figured out in
+        #     the context of MSC3861
+        return create_requester(
+            user_id=user_id,
+            device_id=device_id,
+            scope=scope,
+            is_guest=is_guest,
+        )
diff --git a/synapse/api/auth/oauth_delegated.py b/synapse/api/auth/oauth_delegated.py
deleted file mode 100644
index 9cb6eb7f79..0000000000
--- a/synapse/api/auth/oauth_delegated.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Copyright 2023 The Matrix.org Foundation.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
-from urllib.parse import urlencode
-
-from authlib.oauth2 import ClientAuth
-from authlib.oauth2.auth import encode_client_secret_basic, encode_client_secret_post
-from authlib.oauth2.rfc7523 import ClientSecretJWT, PrivateKeyJWT, private_key_jwt_sign
-from authlib.oauth2.rfc7662 import IntrospectionToken
-from authlib.oidc.discovery import OpenIDProviderMetadata, get_well_known_url
-
-from twisted.web.client import readBody
-from twisted.web.http_headers import Headers
-
-from synapse.api.auth.base import BaseAuth
-from synapse.api.errors import (
-    AuthError,
-    InvalidClientTokenError,
-    OAuthInsufficientScopeError,
-    StoreError,
-)
-from synapse.http.site import SynapseRequest
-from synapse.logging.context import make_deferred_yieldable
-from synapse.types import Requester, UserID, create_requester
-from synapse.util import json_decoder
-from synapse.util.caches.cached_call import RetryOnExceptionCachedCall
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-def scope_to_list(scope: str) -> List[str]:
-    """Convert a scope string to a list of scope tokens"""
-    return scope.strip().split(" ")
-
-
-class PrivateKeyJWTWithKid(PrivateKeyJWT):
-    """An implementation of the private_key_jwt client auth method that includes a kid header.
-
-    This is needed because some providers (Keycloak) require the kid header to figure
-    out which key to use to verify the signature.
-    """
-
-    def sign(self, auth: Any, token_endpoint: str) -> bytes:
-        return private_key_jwt_sign(
-            auth.client_secret,
-            client_id=auth.client_id,
-            token_endpoint=token_endpoint,
-            claims=self.claims,
-            header={"kid": auth.client_secret["kid"]},
-        )
-
-
-class OAuthDelegatedAuth(BaseAuth):
-    AUTH_METHODS = {
-        "client_secret_post": encode_client_secret_post,
-        "client_secret_basic": encode_client_secret_basic,
-        "client_secret_jwt": ClientSecretJWT(),
-        "private_key_jwt": PrivateKeyJWTWithKid(),
-    }
-
-    EXTERNAL_ID_PROVIDER = "oauth-delegated"
-
-    def __init__(self, hs: "HomeServer"):
-        super().__init__(hs)
-
-        self._config = hs.config.auth
-        assert self._config.oauth_delegation_enabled, "OAuth delegation is not enabled"
-        assert self._config.oauth_delegation_issuer, "No issuer provided"
-        assert self._config.oauth_delegation_client_id, "No client_id provided"
-        assert self._config.oauth_delegation_client_secret, "No client_secret provided"
-        assert (
-            self._config.oauth_delegation_client_auth_method
-            in OAuthDelegatedAuth.AUTH_METHODS
-        ), "Invalid client_auth_method"
-
-        self._http_client = hs.get_proxied_http_client()
-        self._hostname = hs.hostname
-
-        self._issuer_metadata = RetryOnExceptionCachedCall(self._load_metadata)
-        secret = self._config.oauth_delegation_client_secret
-        self._client_auth = ClientAuth(
-            self._config.oauth_delegation_client_id,
-            secret,
-            OAuthDelegatedAuth.AUTH_METHODS[
-                self._config.oauth_delegation_client_auth_method
-            ],
-        )
-
-    async def _load_metadata(self) -> OpenIDProviderMetadata:
-        if self._config.oauth_delegation_issuer_metadata is not None:
-            return OpenIDProviderMetadata(
-                **self._config.oauth_delegation_issuer_metadata
-            )
-        url = get_well_known_url(self._config.oauth_delegation_issuer, external=True)
-        response = await self._http_client.get_json(url)
-        metadata = OpenIDProviderMetadata(**response)
-        # metadata.validate_introspection_endpoint()
-        return metadata
-
-    async def _introspect_token(self, token: str) -> IntrospectionToken:
-        metadata = await self._issuer_metadata.get()
-        introspection_endpoint = metadata.get("introspection_endpoint")
-        raw_headers: Dict[str, str] = {
-            "Content-Type": "application/x-www-form-urlencoded",
-            "User-Agent": str(self._http_client.user_agent, "utf-8"),
-            "Accept": "application/json",
-        }
-
-        args = {"token": token, "token_type_hint": "access_token"}
-        body = urlencode(args, True)
-
-        # Fill the body/headers with credentials
-        uri, raw_headers, body = self._client_auth.prepare(
-            method="POST", uri=introspection_endpoint, headers=raw_headers, body=body
-        )
-        headers = Headers({k: [v] for (k, v) in raw_headers.items()})
-
-        # Do the actual request
-        # We're not using the SimpleHttpClient util methods as we don't want to
-        # check the HTTP status code and we do the body encoding ourself.
-        response = await self._http_client.request(
-            method="POST",
-            uri=uri,
-            data=body.encode("utf-8"),
-            headers=headers,
-        )
-
-        resp_body = await make_deferred_yieldable(readBody(response))
-        # TODO: Let's not worry about 5xx errors & co. for now and just try
-        # decoding that as JSON. We should also do some validation of the
-        # response
-        resp = json_decoder.decode(resp_body.decode("utf-8"))
-        return IntrospectionToken(**resp)
-
-    async def is_server_admin(self, requester: Requester) -> bool:
-        return "urn:synapse:admin:*" in requester.scope
-
-    async def get_user_by_req(
-        self,
-        request: SynapseRequest,
-        allow_guest: bool = False,
-        allow_expired: bool = False,
-    ) -> Requester:
-        access_token = self.get_access_token_from_request(request)
-
-        # TODO: we probably want to assert the allow_guest inside this call so that we don't provision the user if they don't have enough permission:
-        requester = await self.get_user_by_access_token(access_token, allow_expired)
-
-        if not allow_guest and requester.is_guest:
-            raise OAuthInsufficientScopeError(
-                ["urn:matrix:org.matrix.msc2967.client:api:*"]
-            )
-
-        return requester
-
-    async def get_user_by_access_token(
-        self,
-        token: str,
-        allow_expired: bool = False,
-    ) -> Requester:
-        introspection_result = await self._introspect_token(token)
-
-        logger.info(f"Introspection result: {introspection_result!r}")
-
-        # TODO: introspection verification should be more extensive, especially:
-        #   - verify the audience
-        if not introspection_result.get("active"):
-            raise InvalidClientTokenError("Token is not active")
-
-        # Let's look at the scope
-        scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
-
-        # Determine type of user based on presence of particular scopes
-        has_admin_scope = "urn:synapse:admin:*" in scope
-        has_user_scope = "urn:matrix:org.matrix.msc2967.client:api:*" in scope
-        has_guest_scope = "urn:matrix:org.matrix.msc2967.client:api:guest" in scope
-        is_user = has_user_scope or has_admin_scope
-        is_guest = has_guest_scope and not is_user
-
-        if not is_user and not is_guest:
-            raise InvalidClientTokenError("No scope in token granting user rights")
-
-        # Match via the sub claim
-        sub: Optional[str] = introspection_result.get("sub")
-        if sub is None:
-            raise InvalidClientTokenError(
-                "Invalid sub claim in the introspection result"
-            )
-
-        user_id_str = await self.store.get_user_by_external_id(
-            OAuthDelegatedAuth.EXTERNAL_ID_PROVIDER, sub
-        )
-        if user_id_str is None:
-            # If we could not find a user via the external_id, it either does not exist,
-            # or the external_id was never recorded
-
-            # TODO: claim mapping should be configurable
-            username: Optional[str] = introspection_result.get("username")
-            if username is None or not isinstance(username, str):
-                raise AuthError(
-                    500,
-                    "Invalid username claim in the introspection result",
-                )
-            user_id = UserID(username, self._hostname)
-
-            # First try to find a user from the username claim
-            user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string())
-            if user_info is None:
-                # If the user does not exist, we should create it on the fly
-                # TODO: we could use SCIM to provision users ahead of time and listen
-                # for SCIM SET events if those ever become standard:
-                # https://datatracker.ietf.org/doc/html/draft-hunt-scim-notify-00
-
-                # TODO: claim mapping should be configurable
-                # If present, use the name claim as the displayname
-                name: Optional[str] = introspection_result.get("name")
-
-                await self.store.register_user(
-                    user_id=user_id.to_string(), create_profile_with_displayname=name
-                )
-
-            # And record the sub as external_id
-            await self.store.record_user_external_id(
-                OAuthDelegatedAuth.EXTERNAL_ID_PROVIDER, sub, user_id.to_string()
-            )
-        else:
-            user_id = UserID.from_string(user_id_str)
-
-        # Find device_id in scope
-        device_id = None
-        for tok in scope:
-            if tok.startswith("urn:matrix:org.matrix.msc2967.client:device:"):
-                parts = tok.split(":")
-                if len(parts) == 5:
-                    device_id = parts[4]
-
-        if device_id:
-            # Create the device on the fly if it does not exist
-            try:
-                await self.store.get_device(
-                    user_id=user_id.to_string(), device_id=device_id
-                )
-            except StoreError:
-                await self.store.store_device(
-                    user_id=user_id.to_string(),
-                    device_id=device_id,
-                    initial_device_display_name="OIDC-native client",
-                )
-
-        # TODO: there is a few things missing in the requester here, which still need
-        # to be figured out, like:
-        #   - impersonation, with the `authenticated_entity`, which is used for
-        #     rate-limiting, MAU limits, etc.
-        #   - shadow-banning, with the `shadow_banned` flag
-        #   - a proper solution for appservices, which still needs to be figured out in
-        #     the context of MSC3861
-        return create_requester(
-            user_id=user_id,
-            device_id=device_id,
-            scope=scope,
-            is_guest=is_guest,
-        )
diff --git a/synapse/config/auth.py b/synapse/config/auth.py
index 25b5cc60dc..12e853980e 100644
--- a/synapse/config/auth.py
+++ b/synapse/config/auth.py
@@ -14,11 +14,9 @@
 # limitations under the License.
 from typing import Any
 
-from authlib.jose.rfc7517 import JsonWebKey
-
 from synapse.types import JsonDict
 
-from ._base import Config, ConfigError
+from ._base import Config
 
 
 class AuthConfig(Config):
@@ -31,7 +29,14 @@ class AuthConfig(Config):
         if password_config is None:
             password_config = {}
 
-        passwords_enabled = password_config.get("enabled", True)
+        # The default value of password_config.enabled is True, unless msc3861 is enabled.
+        msc3861_enabled = (
+            config.get("experimental_features", {})
+            .get("msc3861", {})
+            .get("enabled", False)
+        )
+        passwords_enabled = password_config.get("enabled", not msc3861_enabled)
+
         # 'only_for_reauth' allows users who have previously set a password to use it,
         # even though passwords would otherwise be disabled.
         passwords_for_reauth_only = passwords_enabled == "only_for_reauth"
@@ -55,29 +60,3 @@ class AuthConfig(Config):
         self.ui_auth_session_timeout = self.parse_duration(
             ui_auth.get("session_timeout", 0)
         )
-
-        oauth_delegation = config.get("oauth_delegation", {})
-        self.oauth_delegation_enabled = oauth_delegation.get("enabled", False)
-        self.oauth_delegation_issuer = oauth_delegation.get("issuer", "")
-        self.oauth_delegation_issuer_metadata = oauth_delegation.get("issuer_metadata")
-        self.oauth_delegation_account = oauth_delegation.get("account", "")
-        self.oauth_delegation_client_id = oauth_delegation.get("client_id", "")
-        self.oauth_delegation_client_secret = oauth_delegation.get("client_secret", "")
-        self.oauth_delegation_client_auth_method = oauth_delegation.get(
-            "client_auth_method", "client_secret_post"
-        )
-
-        self.password_enabled = password_config.get(
-            "enabled", not self.oauth_delegation_enabled
-        )
-
-        if self.oauth_delegation_client_auth_method == "private_key_jwt":
-            self.oauth_delegation_client_secret = JsonWebKey.import_key(
-                self.oauth_delegation_client_secret
-            )
-
-        # If we are delegating via OAuth then password cannot be supported as well
-        if self.oauth_delegation_enabled and self.password_enabled:
-            raise ConfigError(
-                "Password auth cannot be enabled when OAuth delegation is enabled"
-            )
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index d769b7f668..b9607975f9 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -12,15 +12,196 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Optional
+import enum
+from typing import TYPE_CHECKING, Any, Optional
 
 import attr
+import attr.validators
 
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersions
 from synapse.config import ConfigError
-from synapse.config._base import Config
+from synapse.config._base import Config, RootConfig
 from synapse.types import JsonDict
 
+# Determine whether authlib is installed.
+try:
+    import authlib  # noqa: F401
+
+    HAS_AUTHLIB = True
+except ImportError:
+    HAS_AUTHLIB = False
+
+if TYPE_CHECKING:
+    # Only import this if we're type checking, as it might not be installed at runtime.
+    from authlib.jose.rfc7517 import JsonWebKey
+
+
+class ClientAuthMethod(enum.Enum):
+    """List of supported client auth methods."""
+
+    CLIENT_SECRET_POST = "client_secret_post"
+    CLIENT_SECRET_BASIC = "client_secret_basic"
+    CLIENT_SECRET_JWT = "client_secret_jwt"
+    PRIVATE_KEY_JWT = "private_key_jwt"
+
+
+def _parse_jwks(jwks: Optional[JsonDict]) -> Optional["JsonWebKey"]:
+    """A helper function to parse a JWK dict into a JsonWebKey."""
+
+    if jwks is None:
+        return None
+
+    from authlib.jose.rfc7517 import JsonWebKey
+
+    return JsonWebKey.import_key(jwks)
+
+
+@attr.s(slots=True, frozen=True)
+class MSC3861:
+    """Configuration for MSC3861: Matrix architecture change to delegate authentication via OIDC"""
+
+    enabled: bool = attr.ib(default=False, validator=attr.validators.instance_of(bool))
+    """Whether to enable MSC3861 auth delegation."""
+
+    @enabled.validator
+    def _check_enabled(self, attribute: attr.Attribute, value: bool) -> None:
+        # Only allow enabling MSC3861 if authlib is installed
+        if value and not HAS_AUTHLIB:
+            raise ConfigError(
+                "MSC3861 is enabled but authlib is not installed. "
+                "Please install authlib to use MSC3861."
+            )
+
+    issuer: str = attr.ib(default="", validator=attr.validators.instance_of(str))
+    """The URL of the OIDC Provider."""
+
+    issuer_metadata: Optional[JsonDict] = attr.ib(default=None)
+    """The issuer metadata to use, otherwise discovered from /.well-known/openid-configuration as per MSC2965."""
+
+    client_id: str = attr.ib(
+        default="",
+        validator=attr.validators.instance_of(str),
+    )
+    """The client ID to use when calling the introspection endpoint."""
+
+    client_auth_method: ClientAuthMethod = attr.ib(
+        default=ClientAuthMethod.CLIENT_SECRET_POST, converter=ClientAuthMethod
+    )
+    """The auth method used when calling the introspection endpoint."""
+
+    client_secret: Optional[str] = attr.ib(
+        default=None,
+        validator=attr.validators.optional(attr.validators.instance_of(str)),
+    )
+    """
+    The client secret to use when calling the introspection endpoint,
+    when using any of the client_secret_* client auth methods.
+    """
+
+    jwk: Optional["JsonWebKey"] = attr.ib(default=None, converter=_parse_jwks)
+    """
+    The JWKS to use when calling the introspection endpoint,
+    when using the private_key_jwt client auth method.
+    """
+
+    @client_auth_method.validator
+    def _check_client_auth_method(
+        self, attribute: attr.Attribute, value: ClientAuthMethod
+    ) -> None:
+        # Check that the right client credentials are provided for the client auth method.
+        if not self.enabled:
+            return
+
+        if value == ClientAuthMethod.PRIVATE_KEY_JWT and self.jwk is None:
+            raise ConfigError(
+                "A JWKS must be provided when using the private_key_jwt client auth method"
+            )
+
+        if (
+            value
+            in (
+                ClientAuthMethod.CLIENT_SECRET_POST,
+                ClientAuthMethod.CLIENT_SECRET_BASIC,
+                ClientAuthMethod.CLIENT_SECRET_JWT,
+            )
+            and self.client_secret is None
+        ):
+            raise ConfigError(
+                f"A client secret must be provided when using the {value} client auth method"
+            )
+
+    account_management_url: Optional[str] = attr.ib(
+        default=None,
+        validator=attr.validators.optional(attr.validators.instance_of(str)),
+    )
+    """The URL of the My Account page on the OIDC Provider as per MSC2965."""
+
+    def check_config_conflicts(self, root: RootConfig) -> None:
+        """Checks for any configuration conflicts with other parts of Synapse.
+
+        Raises:
+            ConfigError: If there are any configuration conflicts.
+        """
+
+        if not self.enabled:
+            return
+
+        if (
+            root.auth.password_enabled_for_reauth
+            or root.auth.password_enabled_for_login
+        ):
+            raise ConfigError(
+                "Password auth cannot be enabled when OAuth delegation is enabled"
+            )
+
+        if root.registration.enable_registration:
+            raise ConfigError(
+                "Registration cannot be enabled when OAuth delegation is enabled"
+            )
+
+        if (
+            root.oidc.oidc_enabled
+            or root.saml2.saml2_enabled
+            or root.cas.cas_enabled
+            or root.jwt.jwt_enabled
+        ):
+            raise ConfigError("SSO cannot be enabled when OAuth delegation is enabled")
+
+        if bool(root.authproviders.password_providers):
+            raise ConfigError(
+                "Password auth providers cannot be enabled when OAuth delegation is enabled"
+            )
+
+        if root.captcha.enable_registration_captcha:
+            raise ConfigError(
+                "CAPTCHA cannot be enabled when OAuth delegation is enabled"
+            )
+
+        if root.experimental.msc3882_enabled:
+            raise ConfigError(
+                "MSC3882 cannot be enabled when OAuth delegation is enabled"
+            )
+
+        if root.registration.refresh_token_lifetime:
+            raise ConfigError(
+                "refresh_token_lifetime cannot be set when OAuth delegation is enabled"
+            )
+
+        if root.registration.nonrefreshable_access_token_lifetime:
+            raise ConfigError(
+                "nonrefreshable_access_token_lifetime cannot be set when OAuth delegation is enabled"
+            )
+
+        if root.registration.session_lifetime:
+            raise ConfigError(
+                "session_lifetime cannot be set when OAuth delegation is enabled"
+            )
+
+        if not root.experimental.msc3970_enabled:
+            raise ConfigError(
+                "experimental_features.msc3970_enabled must be 'true' when OAuth delegation is enabled"
+            )
+
 
 @attr.s(auto_attribs=True, frozen=True, slots=True)
 class MSC3866Config:
@@ -182,8 +363,14 @@ class ExperimentalConfig(Config):
             "msc3981_recurse_relations", False
         )
 
+        # MSC3861: Matrix architecture change to delegate authentication via OIDC
+        self.msc3861 = MSC3861(**experimental.get("msc3861", {}))
+
         # MSC3970: Scope transaction IDs to devices
-        self.msc3970_enabled = experimental.get("msc3970_enabled", False)
+        self.msc3970_enabled = experimental.get("msc3970_enabled", self.msc3861.enabled)
+
+        # Check that none of the other config options conflict with MSC3861 when enabled
+        self.msc3861.check_config_conflicts(self.root)
 
         # MSC4009: E.164 Matrix IDs
         self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False)
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index a53984be33..4f986d90cb 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -274,7 +274,7 @@ class AuthHandler:
         # response.
         self._extra_attributes: Dict[str, SsoLoginExtraAttributes] = {}
 
-        self.oauth_delegation_enabled = hs.config.auth.oauth_delegation_enabled
+        self.msc3861_oauth_delegation_enabled = hs.config.experimental.msc3861.enabled
 
     async def validate_user_via_ui_auth(
         self,
@@ -325,7 +325,7 @@ class AuthHandler:
             LimitExceededError if the ratelimiter's failed request count for this
                 user is too high to proceed
         """
-        if self.oauth_delegation_enabled:
+        if self.msc3861_oauth_delegation_enabled:
             raise SynapseError(
                 HTTPStatus.INTERNAL_SERVER_ERROR, "UIA shouldn't be used with MSC3861"
             )
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 0e9f366cba..134bd2e620 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -38,6 +38,7 @@ from twisted.web.resource import Resource
 
 from synapse.api import errors
 from synapse.api.errors import SynapseError
+from synapse.config import ConfigError
 from synapse.events import EventBase
 from synapse.events.presence_router import (
     GET_INTERESTED_USERS_CALLBACK,
@@ -252,6 +253,7 @@ class ModuleApi:
         self._device_handler = hs.get_device_handler()
         self.custom_template_dir = hs.config.server.custom_template_directory
         self._callbacks = hs.get_module_api_callbacks()
+        self.msc3861_oauth_delegation_enabled = hs.config.experimental.msc3861.enabled
 
         try:
             app_name = self._hs.config.email.email_app_name
@@ -419,6 +421,11 @@ class ModuleApi:
 
         Added in Synapse v1.46.0.
         """
+        if self.msc3861_oauth_delegation_enabled:
+            raise ConfigError(
+                "Cannot use password auth provider callbacks when OAuth delegation is enabled"
+            )
+
         return self._password_auth_provider.register_password_auth_provider_callbacks(
             check_3pid_auth=check_3pid_auth,
             on_logged_out=on_logged_out,
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index ccd1f7509c..679ab9f266 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -601,7 +601,7 @@ class ThreepidRestServlet(RestServlet):
     # ThreePidBindRestServelet.PostBody with an `alias_generator` to handle
     # `threePidCreds` versus `three_pid_creds`.
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        if self.hs.config.auth.oauth_delegation_enabled:
+        if self.hs.config.experimental.msc3861.enabled:
             raise NotFoundError(errcode=Codes.UNRECOGNIZED)
 
         if not self.hs.config.registration.enable_3pid_changes:
@@ -894,7 +894,7 @@ class AccountStatusRestServlet(RestServlet):
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     if hs.config.worker.worker_app is None:
-        if not hs.config.auth.oauth_delegation_enabled:
+        if not hs.config.experimental.msc3861.enabled:
             EmailPasswordRequestTokenRestServlet(hs).register(http_server)
             DeactivateAccountRestServlet(hs).register(http_server)
             PasswordRestServlet(hs).register(http_server)
@@ -906,7 +906,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     if hs.config.worker.worker_app is None:
         ThreepidBindRestServlet(hs).register(http_server)
         ThreepidUnbindRestServlet(hs).register(http_server)
-        if not hs.config.auth.oauth_delegation_enabled:
+        if not hs.config.experimental.msc3861.enabled:
             ThreepidAddRestServlet(hs).register(http_server)
             ThreepidDeleteRestServlet(hs).register(http_server)
     WhoamiRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 00e9bff43f..38dff9703f 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -135,7 +135,7 @@ class DeviceRestServlet(RestServlet):
         self.device_handler = handler
         self.auth_handler = hs.get_auth_handler()
         self._msc3852_enabled = hs.config.experimental.msc3852_enabled
-        self.oauth_delegation_enabled = hs.config.auth.oauth_delegation_enabled
+        self._msc3861_oauth_delegation_enabled = hs.config.experimental.msc3861.enabled
 
     async def on_GET(
         self, request: SynapseRequest, device_id: str
@@ -167,7 +167,7 @@ class DeviceRestServlet(RestServlet):
     async def on_DELETE(
         self, request: SynapseRequest, device_id: str
     ) -> Tuple[int, JsonDict]:
-        if self.oauth_delegation_enabled:
+        if self._msc3861_oauth_delegation_enabled:
             raise UnrecognizedRequestError(code=404)
 
         requester = await self.auth.get_user_by_req(request)
@@ -350,7 +350,7 @@ class ClaimDehydratedDeviceServlet(RestServlet):
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     if (
         hs.config.worker.worker_app is None
-        and not hs.config.auth.oauth_delegation_enabled
+        and not hs.config.experimental.msc3861.enabled
     ):
         DeleteDevicesRestServlet(hs).register(http_server)
     DevicesRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py
index c3ca83c0c8..70b8be1aa2 100644
--- a/synapse/rest/client/keys.py
+++ b/synapse/rest/client/keys.py
@@ -386,7 +386,7 @@ class SigningKeyUploadServlet(RestServlet):
         # time. Because there is no UIA in MSC3861, for now we throw an error if the
         # user tries to reset the device signing key when MSC3861 is enabled, but allow
         # first-time setup.
-        if self.hs.config.auth.oauth_delegation_enabled:
+        if self.hs.config.experimental.msc3861.enabled:
             # There is no way to reset the device signing key with MSC3861
             if is_cross_signing_setup:
                 raise SynapseError(
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index 4d0eabcb84..d4dc2462b9 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -633,7 +633,7 @@ class CasTicketServlet(RestServlet):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    if hs.config.auth.oauth_delegation_enabled:
+    if hs.config.experimental.msc3861.enabled:
         return
 
     LoginRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/logout.py b/synapse/rest/client/logout.py
index b64a6d5961..94ad90942f 100644
--- a/synapse/rest/client/logout.py
+++ b/synapse/rest/client/logout.py
@@ -80,7 +80,7 @@ class LogoutAllRestServlet(RestServlet):
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    if hs.config.auth.oauth_delegation_enabled:
+    if hs.config.experimental.msc3861.enabled:
         return
 
     LogoutRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 6866988c38..f8fb0e1dee 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -955,7 +955,7 @@ def _calculate_registration_flows(
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    if hs.config.auth.oauth_delegation_enabled:
+    if hs.config.experimental.msc3861.enabled:
         return
 
     if hs.config.worker.worker_app is None:
diff --git a/synapse/rest/synapse/client/__init__.py b/synapse/rest/synapse/client/__init__.py
index dcfd0ad6aa..57335fb913 100644
--- a/synapse/rest/synapse/client/__init__.py
+++ b/synapse/rest/synapse/client/__init__.py
@@ -47,7 +47,7 @@ def build_synapse_client_resource_tree(hs: "HomeServer") -> Mapping[str, Resourc
     }
 
     # Expose the JWKS endpoint if OAuth2 delegation is enabled
-    if hs.config.auth.oauth_delegation_enabled:
+    if hs.config.experimental.msc3861.enabled:
         from synapse.rest.synapse.client.jwks import JwksResource
 
         resources["/_synapse/jwks"] = JwksResource(hs)
diff --git a/synapse/rest/synapse/client/jwks.py b/synapse/rest/synapse/client/jwks.py
index 818585843e..7c0a1223fb 100644
--- a/synapse/rest/synapse/client/jwks.py
+++ b/synapse/rest/synapse/client/jwks.py
@@ -26,8 +26,6 @@ logger = logging.getLogger(__name__)
 
 class JwksResource(DirectServeJsonResource):
     def __init__(self, hs: "HomeServer"):
-        from authlib.jose.rfc7517 import Key
-
         super().__init__(extract_context=True)
 
         # Parameters that are allowed to be exposed in the public key.
@@ -53,10 +51,10 @@ class JwksResource(DirectServeJsonResource):
             "ext",
         }
 
-        secret = hs.config.auth.oauth_delegation_client_secret
+        key = hs.config.experimental.msc3861.jwk
 
-        if isinstance(secret, Key):
-            private_key = secret.as_dict()
+        if key is not None:
+            private_key = key.as_dict()
             public_key = {
                 k: v for k, v in private_key.items() if k in public_parameters
             }
diff --git a/synapse/rest/well_known.py b/synapse/rest/well_known.py
index fd3b17a5ad..b8b4b5379b 100644
--- a/synapse/rest/well_known.py
+++ b/synapse/rest/well_known.py
@@ -44,14 +44,15 @@ class WellKnownBuilder:
                 "base_url": self._config.registration.default_identity_server
             }
 
-        if self._config.auth.oauth_delegation_enabled:
+        # We use the MSC3861 values as they are used by multiple MSCs
+        if self._config.experimental.msc3861.enabled:
             result["org.matrix.msc2965.authentication"] = {
-                "issuer": self._config.auth.oauth_delegation_issuer
+                "issuer": self._config.experimental.msc3861.issuer
             }
-            if self._config.auth.oauth_delegation_account != "":
+            if self._config.experimental.msc3861.account_management_url is not None:
                 result["org.matrix.msc2965.authentication"][
                     "account"
-                ] = self._config.auth.oauth_delegation_account
+                ] = self._config.experimental.msc3861.account_management_url
 
         if self._config.server.extra_well_known_client_content:
             for (
diff --git a/synapse/server.py b/synapse/server.py
index 1c82500f30..0f36ef69cb 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -428,10 +428,10 @@ class HomeServer(metaclass=abc.ABCMeta):
 
     @cache_in_self
     def get_auth(self) -> Auth:
-        if self.config.auth.oauth_delegation_enabled:
-            from synapse.api.auth.oauth_delegated import OAuthDelegatedAuth
+        if self.config.experimental.msc3861.enabled:
+            from synapse.api.auth.msc3861_delegated import MSC3861DelegatedAuth
 
-            return OAuthDelegatedAuth(self)
+            return MSC3861DelegatedAuth(self)
         return InternalAuth(self)
 
     @cache_in_self
diff --git a/tests/config/test_oauth_delegation.py b/tests/config/test_oauth_delegation.py
new file mode 100644
index 0000000000..c5fc6d6ebb
--- /dev/null
+++ b/tests/config/test_oauth_delegation.py
@@ -0,0 +1,202 @@
+# Copyright 2023 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict
+from unittest.mock import Mock
+
+from synapse.config import ConfigError
+from synapse.module_api import ModuleApi
+from synapse.types import JsonDict
+
+from tests.server import get_clock
+from tests.unittest import HomeserverTestCase, override_config, skip_unless
+
+try:
+    import authlib  # noqa: F401
+
+    HAS_AUTHLIB = True
+except ImportError:
+    HAS_AUTHLIB = False
+
+
+# These are a few constants that are used as config parameters in the tests.
+SERVER_NAME = "test"
+ISSUER = "https://issuer/"
+CLIENT_ID = "test-client-id"
+CLIENT_SECRET = "test-client-secret"
+BASE_URL = "https://synapse/"
+
+
+class CustomAuthModule:
+    """A module which registers a password auth provider."""
+
+    @staticmethod
+    def parse_config(config: JsonDict) -> None:
+        pass
+
+    def __init__(self, config: None, api: ModuleApi):
+        api.register_password_auth_provider_callbacks(
+            auth_checkers={("m.login.password", ("password",)): Mock()},
+        )
+
+
+@skip_unless(HAS_AUTHLIB, "requires authlib")
+class MSC3861OAuthDelegation(HomeserverTestCase):
+    """Test that the Homeserver fails to initialize if the config is invalid."""
+
+    def setUp(self) -> None:
+        self.reactor, self.clock = get_clock()
+        self._hs_args = {"clock": self.clock, "reactor": self.reactor}
+
+    def default_config(self) -> Dict[str, Any]:
+        config = super().default_config()
+        config["public_baseurl"] = BASE_URL
+        if "experimental_features" not in config:
+            config["experimental_features"] = {}
+        config["experimental_features"]["msc3861"] = {
+            "enabled": True,
+            "issuer": ISSUER,
+            "client_id": CLIENT_ID,
+            "client_auth_method": "client_secret_post",
+            "client_secret": CLIENT_SECRET,
+        }
+        return config
+
+    def test_registration_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "password_config": {
+                "enabled": True,
+            },
+        }
+    )
+    def test_password_config_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "oidc_providers": [
+                {
+                    "idp_id": "microsoft",
+                    "idp_name": "Microsoft",
+                    "issuer": "https://login.microsoftonline.com/<tenant id>/v2.0",
+                    "client_id": "<client id>",
+                    "client_secret": "<client secret>",
+                    "scopes": ["openid", "profile"],
+                    "authorization_endpoint": "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/authorize",
+                    "token_endpoint": "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/token",
+                    "userinfo_endpoint": "https://graph.microsoft.com/oidc/userinfo",
+                }
+            ],
+        }
+    )
+    def test_oidc_sso_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "cas_config": {
+                "enabled": True,
+                "server_url": "https://cas-server.com",
+                "displayname_attribute": "name",
+                "required_attributes": {"userGroup": "staff", "department": "None"},
+            },
+        }
+    )
+    def test_cas_sso_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "modules": [
+                {
+                    "module": f"{__name__}.{CustomAuthModule.__qualname__}",
+                    "config": {},
+                }
+            ],
+        }
+    )
+    def test_auth_providers_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "jwt_config": {
+                "enabled": True,
+                "secret": "my-secret-token",
+                "algorithm": "HS256",
+            },
+        }
+    )
+    def test_jwt_auth_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "experimental_features": {
+                "msc3882_enabled": True,
+            },
+        }
+    )
+    def test_msc3882_auth_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "recaptcha_public_key": "test",
+            "recaptcha_private_key": "test",
+            "enable_registration_captcha": True,
+        }
+    )
+    def test_captcha_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "refresh_token_lifetime": "24h",
+            "refreshable_access_token_lifetime": "10m",
+            "nonrefreshable_access_token_lifetime": "24h",
+        }
+    )
+    def test_refreshable_tokens_cannot_be_enabled(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
+
+    @override_config(
+        {
+            "enable_registration": False,
+            "session_lifetime": "24h",
+        }
+    )
+    def test_session_lifetime_cannot_be_set(self) -> None:
+        with self.assertRaises(ConfigError):
+            self.setup_test_homeserver()
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index ee1bc5ca7a..081fef51ec 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -109,12 +109,15 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
     def default_config(self) -> Dict[str, Any]:
         config = super().default_config()
         config["public_baseurl"] = BASE_URL
-        config["oauth_delegation"] = {
-            "enabled": True,
-            "issuer": ISSUER,
-            "client_id": CLIENT_ID,
-            "client_auth_method": "client_secret_post",
-            "client_secret": CLIENT_SECRET,
+        config["disable_registration"] = True
+        config["experimental_features"] = {
+            "msc3861": {
+                "enabled": True,
+                "issuer": ISSUER,
+                "client_id": CLIENT_ID,
+                "client_auth_method": "client_secret_post",
+                "client_secret": CLIENT_SECRET,
+            }
         }
         return config
 
diff --git a/tests/rest/test_well_known.py b/tests/rest/test_well_known.py
index 34333d88df..377243a170 100644
--- a/tests/rest/test_well_known.py
+++ b/tests/rest/test_well_known.py
@@ -108,14 +108,17 @@ class WellKnownTests(unittest.HomeserverTestCase):
     @unittest.override_config(
         {
             "public_baseurl": "https://homeserver",  # this is only required so that client well known is served
-            "oauth_delegation": {
-                "enabled": True,
-                "issuer": "https://issuer",
-                "account": "https://my-account.issuer",
-                "client_id": "id",
-                "client_auth_method": "client_secret_post",
-                "client_secret": "secret",
+            "experimental_features": {
+                "msc3861": {
+                    "enabled": True,
+                    "issuer": "https://issuer",
+                    "account_management_url": "https://my-account.issuer",
+                    "client_id": "id",
+                    "client_auth_method": "client_secret_post",
+                    "client_secret": "secret",
+                },
             },
+            "disable_registration": True,
         }
     )
     def test_client_well_known_msc3861_oauth_delegation(self) -> None:
-- 
cgit 1.5.1


From c008b44b4f7bb3604be77709c62e6ec78389f8ed Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 4 Apr 2023 18:11:17 +0200
Subject: Add an admin token for MAS -> Synapse calls

---
 synapse/api/auth/msc3861_delegated.py | 15 +++++++++++++++
 synapse/config/experimental.py        |  9 +++++++++
 2 files changed, 24 insertions(+)

(limited to 'synapse')

diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 4ca3280bd3..a84b7730b3 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -90,6 +90,7 @@ class MSC3861DelegatedAuth(BaseAuth):
 
         self._http_client = hs.get_proxied_http_client()
         self._hostname = hs.hostname
+        self._admin_token = self._config.admin_token
 
         self._issuer_metadata = RetryOnExceptionCachedCall(self._load_metadata)
 
@@ -176,6 +177,20 @@ class MSC3861DelegatedAuth(BaseAuth):
         token: str,
         allow_expired: bool = False,
     ) -> Requester:
+        if self._admin_token is not None and token == self._admin_token:
+            # XXX: This is a temporary solution so that the admin API can be called by
+            # the OIDC provider. This will be removed once we have OIDC client
+            # credentials grant support in matrix-authentication-service.
+            logging.info("Admin toked used")
+            # XXX: that user doesn't exist and won't be provisioned.
+            # This is mostly fine for admin calls, but we should also think about doing
+            # requesters without a user_id.
+            admin_user = UserID("__oidc_admin", self._hostname)
+            return create_requester(
+                user_id=admin_user,
+                scope=["urn:synapse:admin:*"],
+            )
+
         introspection_result = await self._introspect_token(token)
 
         logger.info(f"Introspection result: {introspection_result!r}")
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index b9607975f9..d4dff22b0b 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -136,6 +136,15 @@ class MSC3861:
     )
     """The URL of the My Account page on the OIDC Provider as per MSC2965."""
 
+    admin_token: Optional[str] = attr.ib(
+        default=None,
+        validator=attr.validators.optional(attr.validators.instance_of(str)),
+    )
+    """
+    A token that should be considered as an admin token.
+    This is used by the OIDC provider, to make admin calls to Synapse.
+    """
+
     def check_config_conflicts(self, root: RootConfig) -> None:
         """Checks for any configuration conflicts with other parts of Synapse.
 
-- 
cgit 1.5.1


From 4d0231b3648d5d70a8e0f4d99a0c040f12f15669 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 16 May 2023 10:52:37 +0200
Subject: Make AS tokens work & allow ASes to /register

---
 synapse/api/auth/base.py                | 80 +++++++++++++++++++++++++++++++-
 synapse/api/auth/internal.py            | 82 +--------------------------------
 synapse/api/auth/msc3861_delegated.py   |  9 +++-
 synapse/rest/client/register.py         | 69 +++++++++++++++++++++++++++
 tests/handlers/test_oauth_delegation.py |  4 +-
 5 files changed, 159 insertions(+), 85 deletions(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/base.py b/synapse/api/auth/base.py
index 240f2b90de..9321d6f186 100644
--- a/synapse/api/auth/base.py
+++ b/synapse/api/auth/base.py
@@ -14,6 +14,8 @@
 import logging
 from typing import TYPE_CHECKING, Optional, Tuple
 
+from netaddr import IPAddress
+
 from twisted.web.server import Request
 
 from synapse import event_auth
@@ -26,7 +28,8 @@ from synapse.api.errors import (
 )
 from synapse.appservice import ApplicationService
 from synapse.logging.opentracing import trace
-from synapse.types import Requester
+from synapse.types import Requester, create_requester
+from synapse.util.cancellation import cancellable
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -271,3 +274,78 @@ class BaseAuth:
                 raise MissingClientTokenError()
 
             return query_params[0].decode("ascii")
+
+    @cancellable
+    async def get_appservice_user(
+        self, request: Request, access_token: str
+    ) -> Optional[Requester]:
+        """
+        Given a request, reads the request parameters to determine:
+        - whether it's an application service that's making this request
+        - what user the application service should be treated as controlling
+          (the user_id URI parameter allows an application service to masquerade
+          any applicable user in its namespace)
+        - what device the application service should be treated as controlling
+          (the device_id[^1] URI parameter allows an application service to masquerade
+          as any device that exists for the relevant user)
+
+        [^1] Unstable and provided by MSC3202.
+             Must use `org.matrix.msc3202.device_id` in place of `device_id` for now.
+
+        Returns:
+            the application service `Requester` of that request
+
+        Postconditions:
+        - The `app_service` field in the returned `Requester` is set
+        - The `user_id` field in the returned `Requester` is either the application
+          service sender or the controlled user set by the `user_id` URI parameter
+        - The returned application service is permitted to control the returned user ID.
+        - The returned device ID, if present, has been checked to be a valid device ID
+          for the returned user ID.
+        """
+        DEVICE_ID_ARG_NAME = b"org.matrix.msc3202.device_id"
+
+        app_service = self.store.get_app_service_by_token(access_token)
+        if app_service is None:
+            return None
+
+        if app_service.ip_range_whitelist:
+            ip_address = IPAddress(request.getClientAddress().host)
+            if ip_address not in app_service.ip_range_whitelist:
+                return None
+
+        # This will always be set by the time Twisted calls us.
+        assert request.args is not None
+
+        if b"user_id" in request.args:
+            effective_user_id = request.args[b"user_id"][0].decode("utf8")
+            await self.validate_appservice_can_control_user_id(
+                app_service, effective_user_id
+            )
+        else:
+            effective_user_id = app_service.sender
+
+        effective_device_id: Optional[str] = None
+
+        if (
+            self.hs.config.experimental.msc3202_device_masquerading_enabled
+            and DEVICE_ID_ARG_NAME in request.args
+        ):
+            effective_device_id = request.args[DEVICE_ID_ARG_NAME][0].decode("utf8")
+            # We only just set this so it can't be None!
+            assert effective_device_id is not None
+            device_opt = await self.store.get_device(
+                effective_user_id, effective_device_id
+            )
+            if device_opt is None:
+                # For now, use 400 M_EXCLUSIVE if the device doesn't exist.
+                # This is an open thread of discussion on MSC3202 as of 2021-12-09.
+                raise AuthError(
+                    400,
+                    f"Application service trying to use a device that doesn't exist ('{effective_device_id}' for {effective_user_id})",
+                    Codes.EXCLUSIVE,
+                )
+
+        return create_requester(
+            effective_user_id, app_service=app_service, device_id=effective_device_id
+        )
diff --git a/synapse/api/auth/internal.py b/synapse/api/auth/internal.py
index 813d537e53..e2ae198b19 100644
--- a/synapse/api/auth/internal.py
+++ b/synapse/api/auth/internal.py
@@ -12,12 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 import pymacaroons
-from netaddr import IPAddress
-
-from twisted.web.server import Request
 
 from synapse.api.errors import (
     AuthError,
@@ -122,7 +119,7 @@ class InternalAuth(BaseAuth):
             access_token = self.get_access_token_from_request(request)
 
             # First check if it could be a request from an appservice
-            requester = await self._get_appservice_user(request)
+            requester = await self.get_appservice_user(request, access_token)
             if not requester:
                 # If not, it should be from a regular user
                 requester = await self.get_user_by_access_token(
@@ -189,81 +186,6 @@ class InternalAuth(BaseAuth):
         except KeyError:
             raise MissingClientTokenError()
 
-    @cancellable
-    async def _get_appservice_user(self, request: Request) -> Optional[Requester]:
-        """
-        Given a request, reads the request parameters to determine:
-        - whether it's an application service that's making this request
-        - what user the application service should be treated as controlling
-          (the user_id URI parameter allows an application service to masquerade
-          any applicable user in its namespace)
-        - what device the application service should be treated as controlling
-          (the device_id[^1] URI parameter allows an application service to masquerade
-          as any device that exists for the relevant user)
-
-        [^1] Unstable and provided by MSC3202.
-             Must use `org.matrix.msc3202.device_id` in place of `device_id` for now.
-
-        Returns:
-            the application service `Requester` of that request
-
-        Postconditions:
-        - The `app_service` field in the returned `Requester` is set
-        - The `user_id` field in the returned `Requester` is either the application
-          service sender or the controlled user set by the `user_id` URI parameter
-        - The returned application service is permitted to control the returned user ID.
-        - The returned device ID, if present, has been checked to be a valid device ID
-          for the returned user ID.
-        """
-        DEVICE_ID_ARG_NAME = b"org.matrix.msc3202.device_id"
-
-        app_service = self.store.get_app_service_by_token(
-            self.get_access_token_from_request(request)
-        )
-        if app_service is None:
-            return None
-
-        if app_service.ip_range_whitelist:
-            ip_address = IPAddress(request.getClientAddress().host)
-            if ip_address not in app_service.ip_range_whitelist:
-                return None
-
-        # This will always be set by the time Twisted calls us.
-        assert request.args is not None
-
-        if b"user_id" in request.args:
-            effective_user_id = request.args[b"user_id"][0].decode("utf8")
-            await self.validate_appservice_can_control_user_id(
-                app_service, effective_user_id
-            )
-        else:
-            effective_user_id = app_service.sender
-
-        effective_device_id: Optional[str] = None
-
-        if (
-            self.hs.config.experimental.msc3202_device_masquerading_enabled
-            and DEVICE_ID_ARG_NAME in request.args
-        ):
-            effective_device_id = request.args[DEVICE_ID_ARG_NAME][0].decode("utf8")
-            # We only just set this so it can't be None!
-            assert effective_device_id is not None
-            device_opt = await self.store.get_device(
-                effective_user_id, effective_device_id
-            )
-            if device_opt is None:
-                # For now, use 400 M_EXCLUSIVE if the device doesn't exist.
-                # This is an open thread of discussion on MSC3202 as of 2021-12-09.
-                raise AuthError(
-                    400,
-                    f"Application service trying to use a device that doesn't exist ('{effective_device_id}' for {effective_user_id})",
-                    Codes.EXCLUSIVE,
-                )
-
-        return create_requester(
-            effective_user_id, app_service=app_service, device_id=effective_device_id
-        )
-
     async def get_user_by_access_token(
         self,
         token: str,
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index a84b7730b3..b84dce2563 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -162,14 +162,19 @@ class MSC3861DelegatedAuth(BaseAuth):
     ) -> Requester:
         access_token = self.get_access_token_from_request(request)
 
-        # TODO: we probably want to assert the allow_guest inside this call so that we don't provision the user if they don't have enough permission:
-        requester = await self.get_user_by_access_token(access_token, allow_expired)
+        requester = await self.get_appservice_user(request, access_token)
+        if not requester:
+            # TODO: we probably want to assert the allow_guest inside this call
+            # so that we don't provision the user if they don't have enough permission:
+            requester = await self.get_user_by_access_token(access_token, allow_expired)
 
         if not allow_guest and requester.is_guest:
             raise OAuthInsufficientScopeError(
                 ["urn:matrix:org.matrix.msc2967.client:api:*"]
             )
 
+        request.requester = requester
+
         return requester
 
     async def get_user_by_access_token(
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index f8fb0e1dee..d59669f0b6 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -869,6 +869,74 @@ class RegisterRestServlet(RestServlet):
         return 200, result
 
 
+class RegisterAppServiceOnlyRestServlet(RestServlet):
+    """An alternative registration API endpoint that only allows ASes to register
+
+    This replaces the regular /register endpoint if MSC3861. There are two notable
+    differences with the regular /register endpoint:
+     - It only allows the `m.login.application_service` login type
+     - It does not create a device or access token for the just-registered user
+
+    Note that the exact behaviour of this endpoint is not yet finalised. It should be
+    just good enough to make most ASes work.
+    """
+
+    PATTERNS = client_patterns("/register$")
+    CATEGORY = "Registration/login requests"
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+
+        self.auth = hs.get_auth()
+        self.registration_handler = hs.get_registration_handler()
+        self.ratelimiter = hs.get_registration_ratelimiter()
+
+    @interactive_auth_handler
+    async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        body = parse_json_object_from_request(request)
+
+        client_addr = request.getClientAddress().host
+
+        await self.ratelimiter.ratelimit(None, client_addr, update=False)
+
+        kind = parse_string(request, "kind", default="user")
+
+        if kind == "guest":
+            raise SynapseError(403, "Guest access is disabled")
+        elif kind != "user":
+            raise UnrecognizedRequestError(
+                f"Do not understand membership kind: {kind}",
+            )
+
+        # Pull out the provided username and do basic sanity checks early since
+        # the auth layer will store these in sessions.
+        desired_username = body.get("username")
+        if not isinstance(desired_username, str) or len(desired_username) > 512:
+            raise SynapseError(400, "Invalid username")
+
+        # Allow only ASes to use this API.
+        if body.get("type") != APP_SERVICE_REGISTRATION_TYPE:
+            raise SynapseError(403, "Non-application service registration type")
+
+        if not self.auth.has_access_token(request):
+            raise SynapseError(
+                400,
+                "Appservice token must be provided when using a type of m.login.application_service",
+            )
+
+        # XXX we should check that desired_username is valid. Currently
+        # we give appservices carte blanche for any insanity in mxids,
+        # because the IRC bridges rely on being able to register stupid
+        # IDs.
+
+        as_token = self.auth.get_access_token_from_request(request)
+
+        user_id = await self.registration_handler.appservice_register(
+            desired_username, as_token
+        )
+        return 200, {"user_id": user_id}
+
+
 def _calculate_registration_flows(
     config: HomeServerConfig, auth_handler: AuthHandler
 ) -> List[List[str]]:
@@ -956,6 +1024,7 @@ def _calculate_registration_flows(
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     if hs.config.experimental.msc3861.enabled:
+        RegisterAppServiceOnlyRestServlet(hs).register(http_server)
         return
 
     if hs.config.worker.worker_app is None:
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index 081fef51ec..e53020a58a 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -527,8 +527,8 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         self.expect_unrecognized(
             "GET", "/_matrix/client/v1/register/m.login.registration_token/validity"
         )
-        self.expect_unrecognized("POST", "/_matrix/client/v3/register")
-        self.expect_unrecognized("GET", "/_matrix/client/v3/register")
+        # This is still available for AS registrations
+        # self.expect_unrecognized("POST", "/_matrix/client/v3/register")
         self.expect_unrecognized("GET", "/_matrix/client/v3/register/available")
         self.expect_unrecognized(
             "POST", "/_matrix/client/v3/register/email/requestToken"
-- 
cgit 1.5.1


From e343125b3880bfc55223735a784eb1894db5e9be Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 10 May 2023 18:05:06 +0200
Subject: Disable incompatible Admin API endpoints

---
 synapse/rest/admin/__init__.py          | 21 +++++++++++++--------
 synapse/rest/admin/users.py             |  8 ++++++++
 tests/handlers/test_oauth_delegation.py | 19 +++++++++++++++++++
 3 files changed, 40 insertions(+), 8 deletions(-)

(limited to 'synapse')

diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index c729364839..fe8177ed4d 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -257,9 +257,11 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     DeleteRoomStatusByRoomIdRestServlet(hs).register(http_server)
     JoinRoomAliasServlet(hs).register(http_server)
     VersionServlet(hs).register(http_server)
-    UserAdminServlet(hs).register(http_server)
+    if not hs.config.experimental.msc3861.enabled:
+        UserAdminServlet(hs).register(http_server)
     UserMembershipRestServlet(hs).register(http_server)
-    UserTokenRestServlet(hs).register(http_server)
+    if not hs.config.experimental.msc3861.enabled:
+        UserTokenRestServlet(hs).register(http_server)
     UserRestServletV2(hs).register(http_server)
     UsersRestServletV2(hs).register(http_server)
     UserMediaStatisticsRestServlet(hs).register(http_server)
@@ -274,9 +276,10 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     RoomEventContextServlet(hs).register(http_server)
     RateLimitRestServlet(hs).register(http_server)
     UsernameAvailableRestServlet(hs).register(http_server)
-    ListRegistrationTokensRestServlet(hs).register(http_server)
-    NewRegistrationTokenRestServlet(hs).register(http_server)
-    RegistrationTokenRestServlet(hs).register(http_server)
+    if not hs.config.experimental.msc3861.enabled:
+        ListRegistrationTokensRestServlet(hs).register(http_server)
+        NewRegistrationTokenRestServlet(hs).register(http_server)
+        RegistrationTokenRestServlet(hs).register(http_server)
     DestinationMembershipRestServlet(hs).register(http_server)
     DestinationResetConnectionRestServlet(hs).register(http_server)
     DestinationRestServlet(hs).register(http_server)
@@ -306,10 +309,12 @@ def register_servlets_for_client_rest_resource(
     # The following resources can only be run on the main process.
     if hs.config.worker.worker_app is None:
         DeactivateAccountRestServlet(hs).register(http_server)
-        ResetPasswordRestServlet(hs).register(http_server)
+        if not hs.config.experimental.msc3861.enabled:
+            ResetPasswordRestServlet(hs).register(http_server)
     SearchUsersRestServlet(hs).register(http_server)
-    UserRegisterServlet(hs).register(http_server)
-    AccountValidityRenewServlet(hs).register(http_server)
+    if not hs.config.experimental.msc3861.enabled:
+        UserRegisterServlet(hs).register(http_server)
+        AccountValidityRenewServlet(hs).register(http_server)
 
     # Load the media repo ones if we're using them. Otherwise load the servlets which
     # don't need a media repo (typically readonly admin APIs).
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 932333ae57..407fe9c804 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -71,6 +71,7 @@ class UsersRestServletV2(RestServlet):
         self.auth = hs.get_auth()
         self.admin_handler = hs.get_admin_handler()
         self._msc3866_enabled = hs.config.experimental.msc3866.enabled
+        self._msc3861_enabled = hs.config.experimental.msc3861.enabled
 
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self.auth, request)
@@ -94,7 +95,14 @@ class UsersRestServletV2(RestServlet):
 
         user_id = parse_string(request, "user_id")
         name = parse_string(request, "name")
+
         guests = parse_boolean(request, "guests", default=True)
+        if self._msc3861_enabled and guests:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "The guests parameter is not supported when MSC3861 is enabled.",
+                errcode=Codes.INVALID_PARAM,
+            )
         deactivated = parse_boolean(request, "deactivated", default=False)
 
         # If support for MSC3866 is not enabled, apply no filtering based on the
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index e53020a58a..b79c43a424 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -31,6 +31,7 @@ from synapse.api.errors import (
     InvalidClientTokenError,
     OAuthInsufficientScopeError,
 )
+from synapse.rest import admin
 from synapse.rest.client import account, devices, keys, login, logout, register
 from synapse.server import HomeServer
 from synapse.types import JsonDict
@@ -104,6 +105,7 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         register.register_servlets,
         login.register_servlets,
         logout.register_servlets,
+        admin.register_servlets,
     ]
 
     def default_config(self) -> Dict[str, Any]:
@@ -557,3 +559,20 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         self.expect_unrecognized(
             "POST", "/_matrix/client/v3/user/{USERNAME}/openid/request_token"
         )
+
+    def test_admin_api_endpoints_removed(self) -> None:
+        """Test that admin API endpoints that were removed in MSC2964 are no longer available."""
+        self.expect_unrecognized("GET", "/_synapse/admin/v1/registration_tokens")
+        self.expect_unrecognized("POST", "/_synapse/admin/v1/registration_tokens/new")
+        self.expect_unrecognized("GET", "/_synapse/admin/v1/registration_tokens/abcd")
+        self.expect_unrecognized("PUT", "/_synapse/admin/v1/registration_tokens/abcd")
+        self.expect_unrecognized(
+            "DELETE", "/_synapse/admin/v1/registration_tokens/abcd"
+        )
+        self.expect_unrecognized("POST", "/_synapse/admin/v1/reset_password/foo")
+        self.expect_unrecognized("POST", "/_synapse/admin/v1/users/foo/login")
+        self.expect_unrecognized("GET", "/_synapse/admin/v1/register")
+        self.expect_unrecognized("POST", "/_synapse/admin/v1/register")
+        self.expect_unrecognized("GET", "/_synapse/admin/v1/users/foo/admin")
+        self.expect_unrecognized("PUT", "/_synapse/admin/v1/users/foo/admin")
+        self.expect_unrecognized("POST", "/_synapse/admin/v1/account_validity/validity")
-- 
cgit 1.5.1


From 14a5be9c4d69b5669792f2cdc658c266847a8c4a Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Mon, 22 May 2023 15:48:57 +0200
Subject: Handle errors when introspecting tokens

This returns a proper 503 when the introspection endpoint is not working
for some reason, which should avoid logging out clients in those cases.
---
 synapse/api/auth/msc3861_delegated.py   | 42 +++++++++++++++++++++++++++++----
 tests/handlers/test_oauth_delegation.py | 35 +++++++++++++++++++++++++++
 tests/test_utils/__init__.py            |  4 ++--
 3 files changed, 74 insertions(+), 7 deletions(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index b84dce2563..82c66691da 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -27,9 +27,11 @@ from twisted.web.http_headers import Headers
 from synapse.api.auth.base import BaseAuth
 from synapse.api.errors import (
     AuthError,
+    HttpResponseException,
     InvalidClientTokenError,
     OAuthInsufficientScopeError,
     StoreError,
+    SynapseError,
 )
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import make_deferred_yieldable
@@ -117,6 +119,21 @@ class MSC3861DelegatedAuth(BaseAuth):
         return metadata
 
     async def _introspect_token(self, token: str) -> IntrospectionToken:
+        """
+        Send a token to the introspection endpoint and returns the introspection response
+
+        Parameters:
+            token: The token to introspect
+
+        Raises:
+            HttpResponseException: If the introspection endpoint returns a non-2xx response
+            ValueError: If the introspection endpoint returns an invalid JSON response
+            JSONDecodeError: If the introspection endpoint returns a non-JSON response
+            Exception: If the HTTP request fails
+
+        Returns:
+            The introspection response
+        """
         metadata = await self._issuer_metadata.get()
         introspection_endpoint = metadata.get("introspection_endpoint")
         raw_headers: Dict[str, str] = {
@@ -136,7 +153,7 @@ class MSC3861DelegatedAuth(BaseAuth):
 
         # Do the actual request
         # We're not using the SimpleHttpClient util methods as we don't want to
-        # check the HTTP status code and we do the body encoding ourself.
+        # check the HTTP status code, and we do the body encoding ourselves.
         response = await self._http_client.request(
             method="POST",
             uri=uri,
@@ -145,10 +162,21 @@ class MSC3861DelegatedAuth(BaseAuth):
         )
 
         resp_body = await make_deferred_yieldable(readBody(response))
-        # TODO: Let's not worry about 5xx errors & co. for now and just try
-        # decoding that as JSON. We should also do some validation of the
-        # response
+
+        if response.code < 200 or response.code >= 300:
+            raise HttpResponseException(
+                response.code,
+                response.phrase.decode("ascii", errors="replace"),
+                resp_body,
+            )
+
         resp = json_decoder.decode(resp_body.decode("utf-8"))
+
+        if not isinstance(resp, dict):
+            raise ValueError(
+                "The introspection endpoint returned an invalid JSON response."
+            )
+
         return IntrospectionToken(**resp)
 
     async def is_server_admin(self, requester: Requester) -> bool:
@@ -196,7 +224,11 @@ class MSC3861DelegatedAuth(BaseAuth):
                 scope=["urn:synapse:admin:*"],
             )
 
-        introspection_result = await self._introspect_token(token)
+        try:
+            introspection_result = await self._introspect_token(token)
+        except Exception:
+            logger.exception("Failed to introspect token")
+            raise SynapseError(503, "Unable to introspect the access token")
 
         logger.info(f"Introspection result: {introspection_result!r}")
 
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index b79c43a424..16ce2c069d 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -30,6 +30,7 @@ from synapse.api.errors import (
     Codes,
     InvalidClientTokenError,
     OAuthInsufficientScopeError,
+    SynapseError,
 )
 from synapse.rest import admin
 from synapse.rest.client import account, devices, keys, login, logout, register
@@ -405,6 +406,40 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         )
         self.assertEqual(requester.device_id, DEVICE)
 
+    def test_unavailable_introspection_endpoint(self) -> None:
+        """The handler should return an internal server error."""
+        request = Mock(args={})
+        request.args[b"access_token"] = [b"mockAccessToken"]
+        request.requestHeaders.getRawHeaders = mock_getRawHeaders()
+
+        # The introspection endpoint is returning an error.
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse(code=500, body=b"Internal Server Error")
+        )
+        error = self.get_failure(self.auth.get_user_by_req(request), SynapseError)
+        self.assertEqual(error.value.code, 503)
+
+        # The introspection endpoint request fails.
+        self.http_client.request = simple_async_mock(raises=Exception())
+        error = self.get_failure(self.auth.get_user_by_req(request), SynapseError)
+        self.assertEqual(error.value.code, 503)
+
+        # The introspection endpoint does not return a JSON object.
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(
+                code=200, payload=["this is an array", "not an object"]
+            )
+        )
+        error = self.get_failure(self.auth.get_user_by_req(request), SynapseError)
+        self.assertEqual(error.value.code, 503)
+
+        # The introspection endpoint does not return valid JSON.
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse(code=200, body=b"this is not valid JSON")
+        )
+        error = self.get_failure(self.auth.get_user_by_req(request), SynapseError)
+        self.assertEqual(error.value.code, 503)
+
     def make_device_keys(self, user_id: str, device_id: str) -> JsonDict:
         # We only generate a master key to simplify the test.
         master_signing_key = generate_signing_key(device_id)
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
index e5dae670a7..c8cc841d95 100644
--- a/tests/test_utils/__init__.py
+++ b/tests/test_utils/__init__.py
@@ -33,7 +33,7 @@ from twisted.web.http import RESPONSES
 from twisted.web.http_headers import Headers
 from twisted.web.iweb import IResponse
 
-from synapse.types import JsonDict
+from synapse.types import JsonSerializable
 
 if TYPE_CHECKING:
     from sys import UnraisableHookArgs
@@ -145,7 +145,7 @@ class FakeResponse:  # type: ignore[misc]
         protocol.connectionLost(Failure(ResponseDone()))
 
     @classmethod
-    def json(cls, *, code: int = 200, payload: JsonDict) -> "FakeResponse":
+    def json(cls, *, code: int = 200, payload: JsonSerializable) -> "FakeResponse":
         headers = Headers({"Content-Type": ["application/json"]})
         body = json.dumps(payload).encode("utf-8")
         return cls(code=code, body=body, headers=headers)
-- 
cgit 1.5.1


From 98afc57d59df118a13f894fc66f206bc7409e14a Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Mon, 22 May 2023 17:17:49 +0200
Subject: Make OIDC scope constants

---
 synapse/api/auth/msc3861_delegated.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 82c66691da..5b0e678c0f 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -44,6 +44,15 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
+# Scope as defined by MSC2967
+# https://github.com/matrix-org/matrix-spec-proposals/pull/2967
+SCOPE_MATRIX_API = "urn:matrix:org.matrix.msc2967.client:api:*"
+SCOPE_MATRIX_GUEST = "urn:matrix:org.matrix.msc2967.client:api:guest"
+SCOPE_MATRIX_DEVICE_PREFIX = "urn:matrix:org.matrix.msc2967.client:device:"
+
+# Scope which allows access to the Synapse admin API
+SCOPE_SYNAPSE_ADMIN = "urn:synapse:admin:*"
+
 
 def scope_to_list(scope: str) -> List[str]:
     """Convert a scope string to a list of scope tokens"""
@@ -197,9 +206,7 @@ class MSC3861DelegatedAuth(BaseAuth):
             requester = await self.get_user_by_access_token(access_token, allow_expired)
 
         if not allow_guest and requester.is_guest:
-            raise OAuthInsufficientScopeError(
-                ["urn:matrix:org.matrix.msc2967.client:api:*"]
-            )
+            raise OAuthInsufficientScopeError([SCOPE_MATRIX_API])
 
         request.requester = requester
 
@@ -241,9 +248,9 @@ class MSC3861DelegatedAuth(BaseAuth):
         scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
 
         # Determine type of user based on presence of particular scopes
-        has_admin_scope = "urn:synapse:admin:*" in scope
-        has_user_scope = "urn:matrix:org.matrix.msc2967.client:api:*" in scope
-        has_guest_scope = "urn:matrix:org.matrix.msc2967.client:api:guest" in scope
+        has_admin_scope = SCOPE_SYNAPSE_ADMIN in scope
+        has_user_scope = SCOPE_MATRIX_API in scope
+        has_guest_scope = SCOPE_MATRIX_GUEST in scope
         is_user = has_user_scope or has_admin_scope
         is_guest = has_guest_scope and not is_user
 
@@ -299,10 +306,8 @@ class MSC3861DelegatedAuth(BaseAuth):
         # Find device_id in scope
         device_id = None
         for tok in scope:
-            if tok.startswith("urn:matrix:org.matrix.msc2967.client:device:"):
-                parts = tok.split(":")
-                if len(parts) == 5:
-                    device_id = parts[4]
+            if tok.startswith(SCOPE_MATRIX_DEVICE_PREFIX):
+                device_id = tok[len(SCOPE_MATRIX_DEVICE_PREFIX) :]
 
         if device_id:
             # Create the device on the fly if it does not exist
-- 
cgit 1.5.1


From f739bde962daa9bc425c8343f35993ae889dbc67 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Tue, 23 May 2023 16:59:53 +0200
Subject: Reject tokens with multiple device scopes

---
 synapse/api/auth/msc3861_delegated.py   | 30 ++++++++++++++++++++++++------
 tests/handlers/test_oauth_delegation.py | 29 ++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 7 deletions(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 5b0e678c0f..e4b16c0b5c 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -303,13 +303,31 @@ class MSC3861DelegatedAuth(BaseAuth):
         else:
             user_id = UserID.from_string(user_id_str)
 
-        # Find device_id in scope
-        device_id = None
-        for tok in scope:
-            if tok.startswith(SCOPE_MATRIX_DEVICE_PREFIX):
-                device_id = tok[len(SCOPE_MATRIX_DEVICE_PREFIX) :]
+        # Find device_ids in scope
+        # We only allow a single device_id in the scope, so we find them all in the
+        # scope list, and raise if there are more than one. The OIDC server should be
+        # the one enforcing valid scopes, so we raise a 500 if we find an invalid scope.
+        device_ids = [
+            tok[len(SCOPE_MATRIX_DEVICE_PREFIX) :]
+            for tok in scope
+            if tok.startswith(SCOPE_MATRIX_DEVICE_PREFIX)
+        ]
+
+        if len(device_ids) > 1:
+            raise AuthError(
+                500,
+                "Multiple device IDs in scope",
+            )
+
+        device_id = device_ids[0] if device_ids else None
+        if device_id is not None:
+            # Sanity check the device_id
+            if len(device_id) > 255 or len(device_id) < 1:
+                raise AuthError(
+                    500,
+                    "Invalid device ID in scope",
+                )
 
-        if device_id:
             # Create the device on the fly if it does not exist
             try:
                 await self.store.get_device(
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index 16ce2c069d..0641535512 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -27,6 +27,7 @@ from signedjson.sign import sign_json
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import (
+    AuthError,
     Codes,
     InvalidClientTokenError,
     OAuthInsufficientScopeError,
@@ -68,8 +69,9 @@ INTROSPECTION_ENDPOINT = ISSUER + "introspect"
 SYNAPSE_ADMIN_SCOPE = "urn:synapse:admin:*"
 MATRIX_USER_SCOPE = "urn:matrix:org.matrix.msc2967.client:api:*"
 MATRIX_GUEST_SCOPE = "urn:matrix:org.matrix.msc2967.client:api:guest"
+MATRIX_DEVICE_SCOPE_PREFIX = "urn:matrix:org.matrix.msc2967.client:device:"
 DEVICE = "AABBCCDD"
-MATRIX_DEVICE_SCOPE = "urn:matrix:org.matrix.msc2967.client:device:" + DEVICE
+MATRIX_DEVICE_SCOPE = MATRIX_DEVICE_SCOPE_PREFIX + DEVICE
 SUBJECT = "abc-def-ghi"
 USERNAME = "test-user"
 USER_ID = "@" + USERNAME + ":" + SERVER_NAME
@@ -344,6 +346,31 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         )
         self.assertEqual(requester.device_id, DEVICE)
 
+    def test_multiple_devices(self) -> None:
+        """The handler should raise an error if multiple devices are found in the scope."""
+
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(
+                code=200,
+                payload={
+                    "active": True,
+                    "sub": SUBJECT,
+                    "scope": " ".join(
+                        [
+                            MATRIX_USER_SCOPE,
+                            f"{MATRIX_DEVICE_SCOPE_PREFIX}AABBCC",
+                            f"{MATRIX_DEVICE_SCOPE_PREFIX}DDEEFF",
+                        ]
+                    ),
+                    "username": USERNAME,
+                },
+            )
+        )
+        request = Mock(args={})
+        request.args[b"access_token"] = [b"mockAccessToken"]
+        request.requestHeaders.getRawHeaders = mock_getRawHeaders()
+        self.get_failure(self.auth.get_user_by_req(request), AuthError)
+
     def test_active_guest_not_allowed(self) -> None:
         """The handler should return an insufficient scope error."""
 
-- 
cgit 1.5.1


From 32a2f050042531ad4673b42789e833e9cd307740 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 26 May 2023 14:50:19 +0200
Subject: Make the config tests spawn the homeserver only when needed

---
 synapse/config/experimental.py        |  40 ++--
 tests/config/test_oauth_delegation.py | 348 +++++++++++++++-------------------
 2 files changed, 182 insertions(+), 206 deletions(-)

(limited to 'synapse')

diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index d4dff22b0b..1d189b2e26 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -69,7 +69,8 @@ class MSC3861:
         if value and not HAS_AUTHLIB:
             raise ConfigError(
                 "MSC3861 is enabled but authlib is not installed. "
-                "Please install authlib to use MSC3861."
+                "Please install authlib to use MSC3861.",
+                ("experimental", "msc3861", "enabled"),
             )
 
     issuer: str = attr.ib(default="", validator=attr.validators.instance_of(str))
@@ -114,7 +115,8 @@ class MSC3861:
 
         if value == ClientAuthMethod.PRIVATE_KEY_JWT and self.jwk is None:
             raise ConfigError(
-                "A JWKS must be provided when using the private_key_jwt client auth method"
+                "A JWKS must be provided when using the private_key_jwt client auth method",
+                ("experimental", "msc3861", "client_auth_method"),
             )
 
         if (
@@ -127,7 +129,8 @@ class MSC3861:
             and self.client_secret is None
         ):
             raise ConfigError(
-                f"A client secret must be provided when using the {value} client auth method"
+                f"A client secret must be provided when using the {value} client auth method",
+                ("experimental", "msc3861", "client_auth_method"),
             )
 
     account_management_url: Optional[str] = attr.ib(
@@ -160,12 +163,14 @@ class MSC3861:
             or root.auth.password_enabled_for_login
         ):
             raise ConfigError(
-                "Password auth cannot be enabled when OAuth delegation is enabled"
+                "Password auth cannot be enabled when OAuth delegation is enabled",
+                ("password_config", "enabled"),
             )
 
         if root.registration.enable_registration:
             raise ConfigError(
-                "Registration cannot be enabled when OAuth delegation is enabled"
+                "Registration cannot be enabled when OAuth delegation is enabled",
+                ("enable_registration",),
             )
 
         if (
@@ -183,32 +188,38 @@ class MSC3861:
 
         if root.captcha.enable_registration_captcha:
             raise ConfigError(
-                "CAPTCHA cannot be enabled when OAuth delegation is enabled"
+                "CAPTCHA cannot be enabled when OAuth delegation is enabled",
+                ("captcha", "enable_registration_captcha"),
             )
 
         if root.experimental.msc3882_enabled:
             raise ConfigError(
-                "MSC3882 cannot be enabled when OAuth delegation is enabled"
+                "MSC3882 cannot be enabled when OAuth delegation is enabled",
+                ("experimental_features", "msc3882_enabled"),
             )
 
         if root.registration.refresh_token_lifetime:
             raise ConfigError(
-                "refresh_token_lifetime cannot be set when OAuth delegation is enabled"
+                "refresh_token_lifetime cannot be set when OAuth delegation is enabled",
+                ("refresh_token_lifetime",),
             )
 
         if root.registration.nonrefreshable_access_token_lifetime:
             raise ConfigError(
-                "nonrefreshable_access_token_lifetime cannot be set when OAuth delegation is enabled"
+                "nonrefreshable_access_token_lifetime cannot be set when OAuth delegation is enabled",
+                ("nonrefreshable_access_token_lifetime",),
             )
 
         if root.registration.session_lifetime:
             raise ConfigError(
-                "session_lifetime cannot be set when OAuth delegation is enabled"
+                "session_lifetime cannot be set when OAuth delegation is enabled",
+                ("session_lifetime",),
             )
 
         if not root.experimental.msc3970_enabled:
             raise ConfigError(
-                "experimental_features.msc3970_enabled must be 'true' when OAuth delegation is enabled"
+                "experimental_features.msc3970_enabled must be 'true' when OAuth delegation is enabled",
+                ("experimental_features", "msc3970_enabled"),
             )
 
 
@@ -373,7 +384,12 @@ class ExperimentalConfig(Config):
         )
 
         # MSC3861: Matrix architecture change to delegate authentication via OIDC
-        self.msc3861 = MSC3861(**experimental.get("msc3861", {}))
+        try:
+            self.msc3861 = MSC3861(**experimental.get("msc3861", {}))
+        except ValueError as exc:
+            raise ConfigError(
+                "Invalid MSC3861 configuration", ("experimental", "msc3861")
+            ) from exc
 
         # MSC3970: Scope transaction IDs to devices
         self.msc3970_enabled = experimental.get("msc3970_enabled", self.msc3861.enabled)
diff --git a/tests/config/test_oauth_delegation.py b/tests/config/test_oauth_delegation.py
index 6d294e0144..2ead721b00 100644
--- a/tests/config/test_oauth_delegation.py
+++ b/tests/config/test_oauth_delegation.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Dict
 from unittest.mock import Mock
 
 from synapse.config import ConfigError
+from synapse.config.homeserver import HomeServerConfig
 from synapse.module_api import ModuleApi
 from synapse.types import JsonDict
 
-from tests.server import get_clock
-from tests.unittest import HomeserverTestCase, override_config, skip_unless
+from tests.server import get_clock, setup_test_homeserver
+from tests.unittest import TestCase, skip_unless
+from tests.utils import default_config
 
 try:
     import authlib  # noqa: F401
@@ -51,45 +52,15 @@ class CustomAuthModule:
         )
 
 
-def _dict_merge(merge_dict: dict, into_dict: dict) -> None:
-    """Do a deep merge of two dicts
-
-    Recursively merges `merge_dict` into `into_dict`:
-      * For keys where both `merge_dict` and `into_dict` have a dict value, the values
-        are recursively merged
-      * For all other keys, the values in `into_dict` (if any) are overwritten with
-        the value from `merge_dict`.
-
-    Args:
-        merge_dict: dict to merge
-        into_dict: target dict to be modified
-    """
-    for k, v in merge_dict.items():
-        if k not in into_dict:
-            into_dict[k] = v
-            continue
-
-        current_val = into_dict[k]
-
-        if isinstance(v, dict) and isinstance(current_val, dict):
-            _dict_merge(v, current_val)
-            continue
-
-        # otherwise we just overwrite
-        into_dict[k] = v
-
-
 @skip_unless(HAS_AUTHLIB, "requires authlib")
-class MSC3861OAuthDelegation(HomeserverTestCase):
+class MSC3861OAuthDelegation(TestCase):
     """Test that the Homeserver fails to initialize if the config is invalid."""
 
     def setUp(self) -> None:
-        self.reactor, self.clock = get_clock()
-        self._hs_args = {"clock": self.clock, "reactor": self.reactor}
-
-    def default_config(self) -> Dict[str, Any]:
-        default_extra_config = {
+        self.config_dict: JsonDict = {
+            **default_config("test"),
             "public_baseurl": BASE_URL,
+            "enable_registration": False,
             "experimental_features": {
                 "msc3861": {
                     "enabled": True,
@@ -100,198 +71,187 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
                 }
             },
         }
-        _dict_merge(
-            {} if self._extra_config is None else self._extra_config,
-            default_extra_config,
-        )
-        self._extra_config = default_extra_config
-        return super().default_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-        }
-    )
+    def parse_config(self) -> HomeServerConfig:
+        config = HomeServerConfig()
+        config.parse_config_dict(self.config_dict, "", "")
+        return config
+
     def test_client_secret_post_works(self) -> None:
-        self.setup_test_homeserver()
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="client_secret_post",
+            client_secret=CLIENT_SECRET,
+        )
+
+        self.parse_config()
+
+    def test_client_secret_post_requires_client_secret(self) -> None:
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="client_secret_post",
+            client_secret=None,
+        )
+
+        with self.assertRaises(ConfigError):
+            self.parse_config()
+
+    def test_client_secret_basic_works(self) -> None:
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="client_secret_basic",
+            client_secret=CLIENT_SECRET,
+        )
+
+        self.parse_config()
+
+    def test_client_secret_basic_requires_client_secret(self) -> None:
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="client_secret_basic",
+            client_secret=None,
+        )
+
+        with self.assertRaises(ConfigError):
+            self.parse_config()
+
+    def test_client_secret_jwt_works(self) -> None:
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="client_secret_jwt",
+            client_secret=CLIENT_SECRET,
+        )
+
+        self.parse_config()
+
+    def test_client_secret_jwt_requires_client_secret(self) -> None:
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="client_secret_jwt",
+            client_secret=None,
+        )
+
+        with self.assertRaises(ConfigError):
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "experimental_features": {
-                "msc3861": {
-                    "client_auth_method": "invalid",
-                }
-            },
-        }
-    )
     def test_invalid_client_auth_method(self) -> None:
-        with self.assertRaises(ValueError):
-            self.setup_test_homeserver()
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="invalid",
+        )
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "experimental_features": {
-                "msc3861": {
-                    "client_auth_method": "private_key_jwt",
-                }
-            },
-        }
-    )
-    def test_invalid_private_key_jwt(self) -> None:
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
+
+    def test_private_key_jwt_requires_jwk(self) -> None:
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="private_key_jwt",
+        )
+
+        with self.assertRaises(ConfigError):
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "experimental_features": {
-                "msc3861": {
-                    "client_auth_method": "private_key_jwt",
-                    "jwk": {
-                        "p": "-frVdP_tZ-J_nIR6HNMDq1N7aunwm51nAqNnhqIyuA8ikx7LlQED1tt2LD3YEvYyW8nxE2V95HlCRZXQPMiRJBFOsbmYkzl2t-MpavTaObB_fct_JqcRtdXddg4-_ihdjRDwUOreq_dpWh6MIKsC3UyekfkHmeEJg5YpOTL15j8",
-                        "kty": "RSA",
-                        "q": "oFw-Enr_YozQB1ab-kawn4jY3yHi8B1nSmYT0s8oTCflrmps5BFJfCkHL5ij3iY15z0o2m0N-jjB1oSJ98O4RayEEYNQlHnTNTl0kRIWzpoqblHUIxVcahIpP_xTovBJzwi8XXoLGqHOOMA-r40LSyVgP2Ut8D9qBwV6_UfT0LU",
-                        "d": "WFkDPYo4b4LIS64D_QtQfGGuAObPvc3HFfp9VZXyq3SJR58XZRHE0jqtlEMNHhOTgbMYS3w8nxPQ_qVzY-5hs4fIanwvB64mAoOGl0qMHO65DTD_WsGFwzYClJPBVniavkLE2Hmpu8IGe6lGliN8vREC6_4t69liY-XcN_ECboVtC2behKkLOEASOIMuS7YcKAhTJFJwkl1dqDlliEn5A4u4xy7nuWQz3juB1OFdKlwGA5dfhDNglhoLIwNnkLsUPPFO-WB5ZNEW35xxHOToxj4bShvDuanVA6mJPtTKjz0XibjB36bj_nF_j7EtbE2PdGJ2KevAVgElR4lqS4ISgQ",
-                        "e": "AQAB",
-                        "kid": "test",
-                        "qi": "cPfNk8l8W5exVNNea4d7QZZ8Qr8LgHghypYAxz8PQh1fNa8Ya1SNUDVzC2iHHhszxxA0vB9C7jGze8dBrvnzWYF1XvQcqNIVVgHhD57R1Nm3dj2NoHIKe0Cu4bCUtP8xnZQUN4KX7y4IIcgRcBWG1hT6DEYZ4BxqicnBXXNXAUI",
-                        "dp": "dKlMHvslV1sMBQaKWpNb3gPq0B13TZhqr3-E2_8sPlvJ3fD8P4CmwwnOn50JDuhY3h9jY5L06sBwXjspYISVv8hX-ndMLkEeF3lrJeA5S70D8rgakfZcPIkffm3tlf1Ok3v5OzoxSv3-67Df4osMniyYwDUBCB5Oq1tTx77xpU8",
-                        "dq": "S4ooU1xNYYcjl9FcuJEEMqKsRrAXzzSKq6laPTwIp5dDwt2vXeAm1a4eDHXC-6rUSZGt5PbqVqzV4s-cjnJMI8YYkIdjNg4NSE1Ac_YpeDl3M3Colb5CQlU7yUB7xY2bt0NOOFp9UJZYJrOo09mFMGjy5eorsbitoZEbVqS3SuE",
-                        "n": "nJbYKqFwnURKimaviyDFrNLD3gaKR1JW343Qem25VeZxoMq1665RHVoO8n1oBm4ClZdjIiZiVdpyqzD5-Ow12YQgQEf1ZHP3CCcOQQhU57Rh5XvScTe5IxYVkEW32IW2mp_CJ6WfjYpfeL4azarVk8H3Vr59d1rSrKTVVinVdZer9YLQyC_rWAQNtHafPBMrf6RYiNGV9EiYn72wFIXlLlBYQ9Fx7bfe1PaL6qrQSsZP3_rSpuvVdLh1lqGeCLR0pyclA9uo5m2tMyCXuuGQLbA_QJm5xEc7zd-WFdux2eXF045oxnSZ_kgQt-pdN7AxGWOVvwoTf9am6mSkEdv6iw",
-                    },
-                }
-            },
-        }
-    )
     def test_private_key_jwt_works(self) -> None:
-        self.setup_test_homeserver()
+        self.config_dict["experimental_features"]["msc3861"].update(
+            client_auth_method="private_key_jwt",
+            jwk={
+                "p": "-frVdP_tZ-J_nIR6HNMDq1N7aunwm51nAqNnhqIyuA8ikx7LlQED1tt2LD3YEvYyW8nxE2V95HlCRZXQPMiRJBFOsbmYkzl2t-MpavTaObB_fct_JqcRtdXddg4-_ihdjRDwUOreq_dpWh6MIKsC3UyekfkHmeEJg5YpOTL15j8",
+                "kty": "RSA",
+                "q": "oFw-Enr_YozQB1ab-kawn4jY3yHi8B1nSmYT0s8oTCflrmps5BFJfCkHL5ij3iY15z0o2m0N-jjB1oSJ98O4RayEEYNQlHnTNTl0kRIWzpoqblHUIxVcahIpP_xTovBJzwi8XXoLGqHOOMA-r40LSyVgP2Ut8D9qBwV6_UfT0LU",
+                "d": "WFkDPYo4b4LIS64D_QtQfGGuAObPvc3HFfp9VZXyq3SJR58XZRHE0jqtlEMNHhOTgbMYS3w8nxPQ_qVzY-5hs4fIanwvB64mAoOGl0qMHO65DTD_WsGFwzYClJPBVniavkLE2Hmpu8IGe6lGliN8vREC6_4t69liY-XcN_ECboVtC2behKkLOEASOIMuS7YcKAhTJFJwkl1dqDlliEn5A4u4xy7nuWQz3juB1OFdKlwGA5dfhDNglhoLIwNnkLsUPPFO-WB5ZNEW35xxHOToxj4bShvDuanVA6mJPtTKjz0XibjB36bj_nF_j7EtbE2PdGJ2KevAVgElR4lqS4ISgQ",
+                "e": "AQAB",
+                "kid": "test",
+                "qi": "cPfNk8l8W5exVNNea4d7QZZ8Qr8LgHghypYAxz8PQh1fNa8Ya1SNUDVzC2iHHhszxxA0vB9C7jGze8dBrvnzWYF1XvQcqNIVVgHhD57R1Nm3dj2NoHIKe0Cu4bCUtP8xnZQUN4KX7y4IIcgRcBWG1hT6DEYZ4BxqicnBXXNXAUI",
+                "dp": "dKlMHvslV1sMBQaKWpNb3gPq0B13TZhqr3-E2_8sPlvJ3fD8P4CmwwnOn50JDuhY3h9jY5L06sBwXjspYISVv8hX-ndMLkEeF3lrJeA5S70D8rgakfZcPIkffm3tlf1Ok3v5OzoxSv3-67Df4osMniyYwDUBCB5Oq1tTx77xpU8",
+                "dq": "S4ooU1xNYYcjl9FcuJEEMqKsRrAXzzSKq6laPTwIp5dDwt2vXeAm1a4eDHXC-6rUSZGt5PbqVqzV4s-cjnJMI8YYkIdjNg4NSE1Ac_YpeDl3M3Colb5CQlU7yUB7xY2bt0NOOFp9UJZYJrOo09mFMGjy5eorsbitoZEbVqS3SuE",
+                "n": "nJbYKqFwnURKimaviyDFrNLD3gaKR1JW343Qem25VeZxoMq1665RHVoO8n1oBm4ClZdjIiZiVdpyqzD5-Ow12YQgQEf1ZHP3CCcOQQhU57Rh5XvScTe5IxYVkEW32IW2mp_CJ6WfjYpfeL4azarVk8H3Vr59d1rSrKTVVinVdZer9YLQyC_rWAQNtHafPBMrf6RYiNGV9EiYn72wFIXlLlBYQ9Fx7bfe1PaL6qrQSsZP3_rSpuvVdLh1lqGeCLR0pyclA9uo5m2tMyCXuuGQLbA_QJm5xEc7zd-WFdux2eXF045oxnSZ_kgQt-pdN7AxGWOVvwoTf9am6mSkEdv6iw",
+            },
+        )
+        self.parse_config()
 
     def test_registration_cannot_be_enabled(self) -> None:
+        self.config_dict["enable_registration"] = True
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "password_config": {
-                "enabled": True,
-            },
-        }
-    )
     def test_password_config_cannot_be_enabled(self) -> None:
+        self.config_dict["password_config"] = {"enabled": True}
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "oidc_providers": [
-                {
-                    "idp_id": "microsoft",
-                    "idp_name": "Microsoft",
-                    "issuer": "https://login.microsoftonline.com/<tenant id>/v2.0",
-                    "client_id": "<client id>",
-                    "client_secret": "<client secret>",
-                    "scopes": ["openid", "profile"],
-                    "authorization_endpoint": "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/authorize",
-                    "token_endpoint": "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/token",
-                    "userinfo_endpoint": "https://graph.microsoft.com/oidc/userinfo",
-                }
-            ],
-        }
-    )
     def test_oidc_sso_cannot_be_enabled(self) -> None:
+        self.config_dict["oidc_providers"] = [
+            {
+                "idp_id": "microsoft",
+                "idp_name": "Microsoft",
+                "issuer": "https://login.microsoftonline.com/<tenant id>/v2.0",
+                "client_id": "<client id>",
+                "client_secret": "<client secret>",
+                "scopes": ["openid", "profile"],
+                "authorization_endpoint": "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/authorize",
+                "token_endpoint": "https://login.microsoftonline.com/<tenant id>/oauth2/v2.0/token",
+                "userinfo_endpoint": "https://graph.microsoft.com/oidc/userinfo",
+            }
+        ]
+
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "cas_config": {
-                "enabled": True,
-                "server_url": "https://cas-server.com",
-                "displayname_attribute": "name",
-                "required_attributes": {"userGroup": "staff", "department": "None"},
-            },
-        }
-    )
     def test_cas_sso_cannot_be_enabled(self) -> None:
+        self.config_dict["cas_config"] = {
+            "enabled": True,
+            "server_url": "https://cas-server.com",
+            "displayname_attribute": "name",
+            "required_attributes": {"userGroup": "staff", "department": "None"},
+        }
+
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "modules": [
-                {
-                    "module": f"{__name__}.{CustomAuthModule.__qualname__}",
-                    "config": {},
-                }
-            ],
-        }
-    )
     def test_auth_providers_cannot_be_enabled(self) -> None:
+        self.config_dict["modules"] = [
+            {
+                "module": f"{__name__}.{CustomAuthModule.__qualname__}",
+                "config": {},
+            }
+        ]
+
+        # This requires actually setting up an HS, as the module will be run on setup,
+        # which should raise as the module tries to register an auth provider
+        config = self.parse_config()
+        reactor, clock = get_clock()
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            setup_test_homeserver(
+                self.addCleanup, reactor=reactor, clock=clock, config=config
+            )
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "jwt_config": {
-                "enabled": True,
-                "secret": "my-secret-token",
-                "algorithm": "HS256",
-            },
-        }
-    )
     def test_jwt_auth_cannot_be_enabled(self) -> None:
+        self.config_dict["jwt_config"] = {
+            "enabled": True,
+            "secret": "my-secret-token",
+            "algorithm": "HS256",
+        }
+
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "experimental_features": {
-                "msc3882_enabled": True,
-            },
-        }
-    )
     def test_msc3882_auth_cannot_be_enabled(self) -> None:
+        self.config_dict["experimental_features"]["msc3882_enabled"] = True
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "recaptcha_public_key": "test",
-            "recaptcha_private_key": "test",
-            "enable_registration_captcha": True,
-        }
-    )
     def test_captcha_cannot_be_enabled(self) -> None:
+        self.config_dict.update(
+            enable_registration_captcha=True,
+            recaptcha_public_key="test",
+            recaptcha_private_key="test",
+        )
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "refresh_token_lifetime": "24h",
-            "refreshable_access_token_lifetime": "10m",
-            "nonrefreshable_access_token_lifetime": "24h",
-        }
-    )
     def test_refreshable_tokens_cannot_be_enabled(self) -> None:
+        self.config_dict.update(
+            refresh_token_lifetime="24h",
+            refreshable_access_token_lifetime="10m",
+            nonrefreshable_access_token_lifetime="24h",
+        )
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
 
-    @override_config(
-        {
-            "enable_registration": False,
-            "session_lifetime": "24h",
-        }
-    )
     def test_session_lifetime_cannot_be_set(self) -> None:
+        self.config_dict["session_lifetime"] = "24h"
         with self.assertRaises(ConfigError):
-            self.setup_test_homeserver()
+            self.parse_config()
-- 
cgit 1.5.1


From ceb3dd77db0d3ce992d40175c3f53f6b6ddfa168 Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Fri, 26 May 2023 15:16:34 +0200
Subject: Enforce that an admin token also has the basic Matrix API scope

---
 synapse/api/auth/msc3861_delegated.py   |  7 ++-----
 tests/handlers/test_oauth_delegation.py | 26 +++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'synapse')

diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index e4b16c0b5c..31c1de0119 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -248,13 +248,10 @@ class MSC3861DelegatedAuth(BaseAuth):
         scope: List[str] = scope_to_list(introspection_result.get("scope", ""))
 
         # Determine type of user based on presence of particular scopes
-        has_admin_scope = SCOPE_SYNAPSE_ADMIN in scope
         has_user_scope = SCOPE_MATRIX_API in scope
         has_guest_scope = SCOPE_MATRIX_GUEST in scope
-        is_user = has_user_scope or has_admin_scope
-        is_guest = has_guest_scope and not is_user
 
-        if not is_user and not is_guest:
+        if not has_user_scope and not has_guest_scope:
             raise InvalidClientTokenError("No scope in token granting user rights")
 
         # Match via the sub claim
@@ -351,5 +348,5 @@ class MSC3861DelegatedAuth(BaseAuth):
             user_id=user_id,
             device_id=device_id,
             scope=scope,
-            is_guest=is_guest,
+            is_guest=(has_guest_scope and not has_user_scope),
         )
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index 0641535512..6309d7b36e 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -224,6 +224,30 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         )
         self._assertParams()
 
+    def test_active_admin_not_user(self) -> None:
+        """The handler should raise when the scope has admin right but not user."""
+
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(
+                code=200,
+                payload={
+                    "active": True,
+                    "sub": SUBJECT,
+                    "scope": " ".join([SYNAPSE_ADMIN_SCOPE]),
+                    "username": USERNAME,
+                },
+            )
+        )
+        request = Mock(args={})
+        request.args[b"access_token"] = [b"mockAccessToken"]
+        request.requestHeaders.getRawHeaders = mock_getRawHeaders()
+        self.get_failure(self.auth.get_user_by_req(request), InvalidClientTokenError)
+        self.http_client.get_json.assert_called_once_with(WELL_KNOWN)
+        self.http_client.request.assert_called_once_with(
+            method="POST", uri=INTROSPECTION_ENDPOINT, data=ANY, headers=ANY
+        )
+        self._assertParams()
+
     def test_active_admin(self) -> None:
         """The handler should return a requester with admin rights."""
 
@@ -233,7 +257,7 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
                 payload={
                     "active": True,
                     "sub": SUBJECT,
-                    "scope": " ".join([SYNAPSE_ADMIN_SCOPE]),
+                    "scope": " ".join([SYNAPSE_ADMIN_SCOPE, MATRIX_USER_SCOPE]),
                     "username": USERNAME,
                 },
             )
-- 
cgit 1.5.1


From c01343de43b86eb4a6c055547369d07c198a435f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 31 May 2023 07:18:29 -0400
Subject: Add stricter mypy options (#15694)

Enable warn_unused_configs, strict_concatenate, disallow_subclassing_any,
and disallow_incomplete_defs.
---
 changelog.d/15694.misc                  |  1 +
 mypy.ini                                | 23 ++++++++++++++++++++---
 synapse/api/auth/msc3861_delegated.py   |  2 +-
 synapse/federation/federation_server.py |  4 ++--
 synapse/handlers/oidc.py                |  2 +-
 synapse/handlers/pagination.py          |  4 ++--
 synapse/http/server.py                  | 14 +++++++-------
 synapse/util/__init__.py                |  4 ++--
 synapse/util/async_helpers.py           |  2 +-
 synapse/util/caches/lrucache.py         |  6 ++----
 tests/server.py                         |  2 +-
 11 files changed, 40 insertions(+), 24 deletions(-)
 create mode 100644 changelog.d/15694.misc

(limited to 'synapse')

diff --git a/changelog.d/15694.misc b/changelog.d/15694.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15694.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index a7ec66196d..56cd1d560e 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -2,17 +2,29 @@
 namespace_packages = True
 plugins = pydantic.mypy, mypy_zope:plugin, scripts-dev/mypy_synapse_plugin.py
 follow_imports = normal
-check_untyped_defs = True
 show_error_codes = True
 show_traceback = True
 mypy_path = stubs
 warn_unreachable = True
-warn_unused_ignores = True
 local_partial_types = True
 no_implicit_optional = True
+
+# Strict checks, see mypy --help
+warn_unused_configs = True
+# disallow_any_generics = True
+disallow_subclassing_any = True
+# disallow_untyped_calls = True
 disallow_untyped_defs = True
-strict_equality = True
+disallow_incomplete_defs = True
+# check_untyped_defs = True
+# disallow_untyped_decorators = True
 warn_redundant_casts = True
+warn_unused_ignores = True
+# warn_return_any = True
+# no_implicit_reexport = True
+strict_equality = True
+strict_concatenate = True
+
 # Run mypy type checking with the minimum supported Python version to catch new usage
 # that isn't backwards-compatible (types, overloads, etc).
 python_version = 3.8
@@ -31,6 +43,7 @@ warn_unused_ignores = False
 
 [mypy-synapse.util.caches.treecache]
 disallow_untyped_defs = False
+disallow_incomplete_defs = False
 
 ;; Dependencies without annotations
 ;; Before ignoring a module, check to see if type stubs are available.
@@ -40,6 +53,7 @@ disallow_untyped_defs = False
 ;; which we can pull in as a dev dependency by adding to `pyproject.toml`'s
 ;; `[tool.poetry.dev-dependencies]` list.
 
+# https://github.com/lepture/authlib/issues/460
 [mypy-authlib.*]
 ignore_missing_imports = True
 
@@ -49,9 +63,11 @@ ignore_missing_imports = True
 [mypy-lxml]
 ignore_missing_imports = True
 
+# https://github.com/msgpack/msgpack-python/issues/448
 [mypy-msgpack]
 ignore_missing_imports = True
 
+# https://github.com/wolever/parameterized/issues/143
 [mypy-parameterized.*]
 ignore_missing_imports = True
 
@@ -73,6 +89,7 @@ ignore_missing_imports = True
 [mypy-srvlookup.*]
 ignore_missing_imports = True
 
+# https://github.com/twisted/treq/pull/366
 [mypy-treq.*]
 ignore_missing_imports = True
 
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 31c1de0119..bd4fc9c0ee 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -59,7 +59,7 @@ def scope_to_list(scope: str) -> List[str]:
     return scope.strip().split(" ")
 
 
-class PrivateKeyJWTWithKid(PrivateKeyJWT):
+class PrivateKeyJWTWithKid(PrivateKeyJWT):  # type: ignore[misc]
     """An implementation of the private_key_jwt client auth method that includes a kid header.
 
     This is needed because some providers (Keycloak) require the kid header to figure
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index e17cb840de..149351dda0 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -515,7 +515,7 @@ class FederationServer(FederationBase):
                     logger.error(
                         "Failed to handle PDU %s",
                         event_id,
-                        exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore
+                        exc_info=(f.type, f.value, f.getTracebackObject()),
                     )
                     return {"error": str(e)}
 
@@ -1247,7 +1247,7 @@ class FederationServer(FederationBase):
                     logger.error(
                         "Failed to handle PDU %s",
                         event.event_id,
-                        exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore
+                        exc_info=(f.type, f.value, f.getTracebackObject()),
                     )
 
                 received_ts = await self.store.remove_received_event_from_staging(
diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py
index e7e0b5e049..24b68e0301 100644
--- a/synapse/handlers/oidc.py
+++ b/synapse/handlers/oidc.py
@@ -1354,7 +1354,7 @@ class OidcProvider:
         finish_request(request)
 
 
-class LogoutToken(JWTClaims):
+class LogoutToken(JWTClaims):  # type: ignore[misc]
     """
     Holds and verify claims of a logout token, as per
     https://openid.net/specs/openid-connect-backchannel-1_0.html#LogoutToken
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 63b35c8d62..d5257acb7d 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -360,7 +360,7 @@ class PaginationHandler:
         except Exception:
             f = Failure()
             logger.error(
-                "[purge] failed", exc_info=(f.type, f.value, f.getTracebackObject())  # type: ignore
+                "[purge] failed", exc_info=(f.type, f.value, f.getTracebackObject())
             )
             self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED
             self._purges_by_id[purge_id].error = f.getErrorMessage()
@@ -689,7 +689,7 @@ class PaginationHandler:
             f = Failure()
             logger.error(
                 "failed",
-                exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore
+                exc_info=(f.type, f.value, f.getTracebackObject()),
             )
             self._delete_by_id[delete_id].status = DeleteStatus.STATUS_FAILED
             self._delete_by_id[delete_id].error = f.getErrorMessage()
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 04768c6a23..933172c873 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -108,7 +108,7 @@ def return_json_error(
 
     if f.check(SynapseError):
         # mypy doesn't understand that f.check asserts the type.
-        exc: SynapseError = f.value  # type: ignore
+        exc: SynapseError = f.value
         error_code = exc.code
         error_dict = exc.error_dict(config)
         if exc.headers is not None:
@@ -124,7 +124,7 @@ def return_json_error(
                 "Got cancellation before client disconnection from %r: %r",
                 request.request_metrics.name,
                 request,
-                exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore[arg-type]
+                exc_info=(f.type, f.value, f.getTracebackObject()),
             )
     else:
         error_code = 500
@@ -134,7 +134,7 @@ def return_json_error(
             "Failed handle request via %r: %r",
             request.request_metrics.name,
             request,
-            exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore[arg-type]
+            exc_info=(f.type, f.value, f.getTracebackObject()),
         )
 
     # Only respond with an error response if we haven't already started writing,
@@ -172,7 +172,7 @@ def return_html_error(
     """
     if f.check(CodeMessageException):
         # mypy doesn't understand that f.check asserts the type.
-        cme: CodeMessageException = f.value  # type: ignore
+        cme: CodeMessageException = f.value
         code = cme.code
         msg = cme.msg
         if cme.headers is not None:
@@ -189,7 +189,7 @@ def return_html_error(
             logger.error(
                 "Failed handle request %r",
                 request,
-                exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore[arg-type]
+                exc_info=(f.type, f.value, f.getTracebackObject()),
             )
     elif f.check(CancelledError):
         code = HTTP_STATUS_REQUEST_CANCELLED
@@ -199,7 +199,7 @@ def return_html_error(
             logger.error(
                 "Got cancellation before client disconnection when handling request %r",
                 request,
-                exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore[arg-type]
+                exc_info=(f.type, f.value, f.getTracebackObject()),
             )
     else:
         code = HTTPStatus.INTERNAL_SERVER_ERROR
@@ -208,7 +208,7 @@ def return_html_error(
         logger.error(
             "Failed handle request %r",
             request,
-            exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore[arg-type]
+            exc_info=(f.type, f.value, f.getTracebackObject()),
         )
 
     if isinstance(error_template, str):
diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py
index 9ddd26ccaa..7ea0c4c36b 100644
--- a/synapse/util/__init__.py
+++ b/synapse/util/__init__.py
@@ -76,7 +76,7 @@ def unwrapFirstError(failure: Failure) -> Failure:
     # the subFailure's value, which will do a better job of preserving stacktraces.
     # (actually, you probably want to use yieldable_gather_results anyway)
     failure.trap(defer.FirstError)
-    return failure.value.subFailure  # type: ignore[union-attr]  # Issue in Twisted's annotations
+    return failure.value.subFailure
 
 
 P = ParamSpec("P")
@@ -178,7 +178,7 @@ def log_failure(
     """
 
     logger.error(
-        msg, exc_info=(failure.type, failure.value, failure.getTracebackObject())  # type: ignore[arg-type]
+        msg, exc_info=(failure.type, failure.value, failure.getTracebackObject())
     )
 
     if not consumeErrors:
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 01e3cd46f6..4041e49e71 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -138,7 +138,7 @@ class ObservableDeferred(Generic[_T], AbstractObservableDeferred[_T]):
             for observer in observers:
                 # This is a little bit of magic to correctly propagate stack
                 # traces when we `await` on one of the observer deferreds.
-                f.value.__failure__ = f  # type: ignore[union-attr]
+                f.value.__failure__ = f
                 try:
                     observer.errback(f)
                 except Exception as e:
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 452d5d04c1..ed0da17227 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -93,10 +93,8 @@ VT = TypeVar("VT")
 # a general type var, distinct from either KT or VT
 T = TypeVar("T")
 
-P = TypeVar("P")
 
-
-class _TimedListNode(ListNode[P]):
+class _TimedListNode(ListNode[T]):
     """A `ListNode` that tracks last access time."""
 
     __slots__ = ["last_access_ts_secs"]
@@ -821,7 +819,7 @@ class AsyncLruCache(Generic[KT, VT]):
     utilize external cache systems that require await behaviour to be created.
     """
 
-    def __init__(self, *args, **kwargs):  # type: ignore
+    def __init__(self, *args: Any, **kwargs: Any):
         self._lru_cache: LruCache[KT, VT] = LruCache(*args, **kwargs)
 
     async def get(
diff --git a/tests/server.py b/tests/server.py
index 7296f0a552..a12c3e3b9a 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -642,7 +642,7 @@ def _make_test_homeserver_synchronous(server: HomeServer) -> None:
         pool.runWithConnection = runWithConnection  # type: ignore[assignment]
         pool.runInteraction = runInteraction  # type: ignore[assignment]
         # Replace the thread pool with a threadless 'thread' pool
-        pool.threadpool = ThreadPool(clock._reactor)  # type: ignore[assignment]
+        pool.threadpool = ThreadPool(clock._reactor)
         pool.running = True
 
     # We've just changed the Databases to run DB transactions on the same
-- 
cgit 1.5.1


From daf3a679089770e00d1b70d8ed2f91ab108b73e3 Mon Sep 17 00:00:00 2001
From: Gabriel Féron <g@leirbag.net>
Date: Wed, 31 May 2023 15:18:37 +0200
Subject: Add get_canonical_room_alias to module API (#15450)

Co-authored-by: Boxdot <d@zerovolt.org>
---
 changelog.d/15450.feature            |  1 +
 synapse/module_api/__init__.py       | 27 +++++++++++++++++++++++++++
 synapse/storage/controllers/state.py |  2 +-
 3 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15450.feature

(limited to 'synapse')

diff --git a/changelog.d/15450.feature b/changelog.d/15450.feature
new file mode 100644
index 0000000000..2102381143
--- /dev/null
+++ b/changelog.d/15450.feature
@@ -0,0 +1 @@
+Support resolving a room's [canonical alias](https://spec.matrix.org/v1.7/client-server-api/#mroomcanonical_alias) via the module API.
\ No newline at end of file
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 134bd2e620..a8d6224a45 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -122,6 +122,7 @@ from synapse.types import (
     JsonMapping,
     Requester,
     RoomAlias,
+    RoomID,
     StateMap,
     UserID,
     UserInfo,
@@ -1570,6 +1571,32 @@ class ModuleApi:
             start_timestamp, end_timestamp
         )
 
+    async def get_canonical_room_alias(self, room_id: RoomID) -> Optional[RoomAlias]:
+        """
+        Retrieve the given room's current canonical alias.
+
+        A room may declare an alias as "canonical", meaning that it is the
+        preferred alias to use when referring to the room. This function
+        retrieves that alias from the room's state.
+
+        Added in Synapse v1.86.0.
+
+        Args:
+            room_id: The Room ID to find the alias of.
+
+        Returns:
+            None if the room ID does not exist, or if the room exists but has no canonical alias.
+            Otherwise, the parsed room alias.
+        """
+        room_alias_str = (
+            await self._storage_controllers.state.get_canonical_alias_for_room(
+                room_id.to_string()
+            )
+        )
+        if room_alias_str:
+            return RoomAlias.from_string(room_alias_str)
+        return None
+
     async def lookup_room_alias(self, room_alias: str) -> Tuple[str, List[str]]:
         """
         Get the room ID associated with a room alias.
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 7089b0a1d8..233df7cce2 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -485,7 +485,7 @@ class StateStorageController:
         if not event:
             return None
 
-        return event.content.get("canonical_alias")
+        return event.content.get("alias")
 
     @trace
     @tag_args
-- 
cgit 1.5.1


From 11e15d79b8a0af593fd9467e0cc7f8a9dfcb6c4f Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 31 May 2023 13:59:56 +0000
Subject: Fix a performance issue introduced in Synapse v1.83.0 which meant
 that purging rooms was very slow and database-intensive. (#15693)

* Add indices required to efficiently validate new foreign key constraints on stream_ordering

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/15693.bugfix                           |  1 +
 synapse/storage/databases/state/bg_updates.py      | 31 ++++++++++++++++++++++
 .../77/14bg_indices_event_stream_ordering.sql      | 20 ++++++++++++++
 3 files changed, 52 insertions(+)
 create mode 100644 changelog.d/15693.bugfix
 create mode 100644 synapse/storage/schema/main/delta/77/14bg_indices_event_stream_ordering.sql

(limited to 'synapse')

diff --git a/changelog.d/15693.bugfix b/changelog.d/15693.bugfix
new file mode 100644
index 0000000000..d0325de007
--- /dev/null
+++ b/changelog.d/15693.bugfix
@@ -0,0 +1 @@
+Fix a performance issue introduced in Synapse v1.83.0 which meant that purging rooms was very slow and database-intensive.
\ No newline at end of file
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
index 86eb1a8a08..5b8ba436d4 100644
--- a/synapse/storage/databases/state/bg_updates.py
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -261,6 +261,16 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
     STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx"
     STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME = "state_group_edges_unique_idx"
 
+    CURRENT_STATE_EVENTS_STREAM_ORDERING_INDEX_UPDATE_NAME = (
+        "current_state_events_stream_ordering_idx"
+    )
+    ROOM_MEMBERSHIPS_STREAM_ORDERING_INDEX_UPDATE_NAME = (
+        "room_memberships_stream_ordering_idx"
+    )
+    LOCAL_CURRENT_MEMBERSHIP_STREAM_ORDERING_INDEX_UPDATE_NAME = (
+        "local_current_membership_stream_ordering_idx"
+    )
+
     def __init__(
         self,
         database: DatabasePool,
@@ -297,6 +307,27 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
             replaces_index="state_group_edges_idx",
         )
 
+        # These indices are needed to validate the foreign key constraint
+        # when events are deleted.
+        self.db_pool.updates.register_background_index_update(
+            self.CURRENT_STATE_EVENTS_STREAM_ORDERING_INDEX_UPDATE_NAME,
+            index_name="current_state_events_stream_ordering_idx",
+            table="current_state_events",
+            columns=["event_stream_ordering"],
+        )
+        self.db_pool.updates.register_background_index_update(
+            self.ROOM_MEMBERSHIPS_STREAM_ORDERING_INDEX_UPDATE_NAME,
+            index_name="room_memberships_stream_ordering_idx",
+            table="room_memberships",
+            columns=["event_stream_ordering"],
+        )
+        self.db_pool.updates.register_background_index_update(
+            self.LOCAL_CURRENT_MEMBERSHIP_STREAM_ORDERING_INDEX_UPDATE_NAME,
+            index_name="local_current_membership_stream_ordering_idx",
+            table="local_current_membership",
+            columns=["event_stream_ordering"],
+        )
+
     async def _background_deduplicate_state(
         self, progress: dict, batch_size: int
     ) -> int:
diff --git a/synapse/storage/schema/main/delta/77/14bg_indices_event_stream_ordering.sql b/synapse/storage/schema/main/delta/77/14bg_indices_event_stream_ordering.sql
new file mode 100644
index 0000000000..ec8cd522ec
--- /dev/null
+++ b/synapse/storage/schema/main/delta/77/14bg_indices_event_stream_ordering.sql
@@ -0,0 +1,20 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json)
+    VALUES
+        (7714, 'current_state_events_stream_ordering_idx', '{}'),
+        (7714, 'local_current_membership_stream_ordering_idx', '{}'),
+        (7714, 'room_memberships_stream_ordering_idx', '{}');
-- 
cgit 1.5.1


From 6f18812bb044a2959fdc9881c328578adb7b33f2 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 31 May 2023 13:06:57 -0400
Subject: Add stubs package for lxml. (#15697)

The stubs have some issues so this has some generous cast
and ignores in it, but it is better than not having stubs.

Note that confusing that Element is a function which creates
_Element instances (and similarly for Comment).
---
 changelog.d/15697.misc               |  1 +
 mypy.ini                             |  3 --
 poetry.lock                          | 25 +++++++++---
 pyproject.toml                       |  1 +
 synapse/media/oembed.py              | 32 +++++++++------
 synapse/media/preview_html.py        | 79 +++++++++++++++++++++++++-----------
 tests/media/test_html_preview.py     | 18 +++++++-
 tests/media/test_oembed.py           |  2 +-
 tests/media/test_url_previewer.py    |  2 +-
 tests/rest/media/test_url_preview.py |  2 +-
 10 files changed, 117 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/15697.misc

(limited to 'synapse')

diff --git a/changelog.d/15697.misc b/changelog.d/15697.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/15697.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/mypy.ini b/mypy.ini
index 56cd1d560e..1038b7d8c7 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -60,9 +60,6 @@ ignore_missing_imports = True
 [mypy-ijson.*]
 ignore_missing_imports = True
 
-[mypy-lxml]
-ignore_missing_imports = True
-
 # https://github.com/msgpack/msgpack-python/issues/448
 [mypy-msgpack]
 ignore_missing_imports = True
diff --git a/poetry.lock b/poetry.lock
index 0879e64cf1..d8964f5719 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
 
 [[package]]
 name = "alabaster"
@@ -1215,6 +1215,21 @@ html5 = ["html5lib"]
 htmlsoup = ["BeautifulSoup4"]
 source = ["Cython (>=0.29.7)"]
 
+[[package]]
+name = "lxml-stubs"
+version = "0.4.0"
+description = "Type annotations for the lxml package"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "lxml-stubs-0.4.0.tar.gz", hash = "sha256:184877b42127256abc2b932ba8bd0ab5ea80bd0b0fee618d16daa40e0b71abee"},
+    {file = "lxml_stubs-0.4.0-py3-none-any.whl", hash = "sha256:3b381e9e82397c64ea3cc4d6f79d1255d015f7b114806d4826218805c10ec003"},
+]
+
+[package.extras]
+test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1.9.3)"]
+
 [[package]]
 name = "markdown-it-py"
 version = "2.2.0"
@@ -3409,22 +3424,22 @@ docs = ["Sphinx", "repoze.sphinx.autointerface"]
 test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"]
 
 [extras]
-all = ["matrix-synapse-ldap3", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pysaml2", "authlib", "lxml", "sentry-sdk", "jaeger-client", "opentracing", "txredisapi", "hiredis", "Pympler", "pyicu"]
+all = ["Pympler", "authlib", "hiredis", "jaeger-client", "lxml", "matrix-synapse-ldap3", "opentracing", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pyicu", "pysaml2", "sentry-sdk", "txredisapi"]
 cache-memory = ["Pympler"]
 jwt = ["authlib"]
 matrix-synapse-ldap3 = ["matrix-synapse-ldap3"]
 oidc = ["authlib"]
 opentracing = ["jaeger-client", "opentracing"]
 postgres = ["psycopg2", "psycopg2cffi", "psycopg2cffi-compat"]
-redis = ["txredisapi", "hiredis"]
+redis = ["hiredis", "txredisapi"]
 saml2 = ["pysaml2"]
 sentry = ["sentry-sdk"]
 systemd = ["systemd-python"]
-test = ["parameterized", "idna"]
+test = ["idna", "parameterized"]
 url-preview = ["lxml"]
 user-search = ["pyicu"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.7.1"
-content-hash = "ef3a16dd66177f7141239e1a2d3e07cc14c08f1e4e0c5127184d022bc062da52"
+content-hash = "7ad11e62a675e09444cf33ca2de3216fc4efc5874a2575e54d95d577a52439d3"
diff --git a/pyproject.toml b/pyproject.toml
index 7227bc7523..4476f57ca7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -314,6 +314,7 @@ black = ">=22.3.0"
 ruff = "0.0.265"
 
 # Typechecking
+lxml-stubs = ">=0.4.0"
 mypy = "*"
 mypy-zope = "*"
 types-bleach = ">=4.1.0"
diff --git a/synapse/media/oembed.py b/synapse/media/oembed.py
index c0eaf04be5..5ad9eec80b 100644
--- a/synapse/media/oembed.py
+++ b/synapse/media/oembed.py
@@ -14,7 +14,7 @@
 import html
 import logging
 import urllib.parse
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List, Optional, cast
 
 import attr
 
@@ -98,7 +98,7 @@ class OEmbedProvider:
         # No match.
         return None
 
-    def autodiscover_from_html(self, tree: "etree.Element") -> Optional[str]:
+    def autodiscover_from_html(self, tree: "etree._Element") -> Optional[str]:
         """
         Search an HTML document for oEmbed autodiscovery information.
 
@@ -109,18 +109,22 @@ class OEmbedProvider:
             The URL to use for oEmbed information, or None if no URL was found.
         """
         # Search for link elements with the proper rel and type attributes.
-        for tag in tree.xpath(
-            "//link[@rel='alternate'][@type='application/json+oembed']"
+        # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+        for tag in cast(
+            List["etree._Element"],
+            tree.xpath("//link[@rel='alternate'][@type='application/json+oembed']"),
         ):
             if "href" in tag.attrib:
-                return tag.attrib["href"]
+                return cast(str, tag.attrib["href"])
 
         # Some providers (e.g. Flickr) use alternative instead of alternate.
-        for tag in tree.xpath(
-            "//link[@rel='alternative'][@type='application/json+oembed']"
+        # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+        for tag in cast(
+            List["etree._Element"],
+            tree.xpath("//link[@rel='alternative'][@type='application/json+oembed']"),
         ):
             if "href" in tag.attrib:
-                return tag.attrib["href"]
+                return cast(str, tag.attrib["href"])
 
         return None
 
@@ -212,11 +216,12 @@ class OEmbedProvider:
         return OEmbedResult(open_graph_response, author_name, cache_age)
 
 
-def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
+def _fetch_urls(tree: "etree._Element", tag_name: str) -> List[str]:
     results = []
-    for tag in tree.xpath("//*/" + tag_name):
+    # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+    for tag in cast(List["etree._Element"], tree.xpath("//*/" + tag_name)):
         if "src" in tag.attrib:
-            results.append(tag.attrib["src"])
+            results.append(cast(str, tag.attrib["src"]))
     return results
 
 
@@ -244,11 +249,12 @@ def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) ->
     parser = etree.HTMLParser(recover=True, encoding="utf-8")
 
     # Attempt to parse the body. If this fails, log and return no metadata.
-    tree = etree.fromstring(html_body, parser)
+    # TODO Develop of lxml-stubs has this correct.
+    tree = etree.fromstring(html_body, parser)  # type: ignore[arg-type]
 
     # The data was successfully parsed, but no tree was found.
     if tree is None:
-        return
+        return  # type: ignore[unreachable]
 
     # Attempt to find interesting URLs (images, videos, embeds).
     if "og:image" not in open_graph_response:
diff --git a/synapse/media/preview_html.py b/synapse/media/preview_html.py
index 516d0434f0..1bc7ccb7f3 100644
--- a/synapse/media/preview_html.py
+++ b/synapse/media/preview_html.py
@@ -24,6 +24,7 @@ from typing import (
     Optional,
     Set,
     Union,
+    cast,
 )
 
 if TYPE_CHECKING:
@@ -115,7 +116,7 @@ def _get_html_media_encodings(
 
 def decode_body(
     body: bytes, uri: str, content_type: Optional[str] = None
-) -> Optional["etree.Element"]:
+) -> Optional["etree._Element"]:
     """
     This uses lxml to parse the HTML document.
 
@@ -152,11 +153,12 @@ def decode_body(
 
     # Attempt to parse the body. Returns None if the body was successfully
     # parsed, but no tree was found.
-    return etree.fromstring(body, parser)
+    # TODO Develop of lxml-stubs has this correct.
+    return etree.fromstring(body, parser)  # type: ignore[arg-type]
 
 
 def _get_meta_tags(
-    tree: "etree.Element",
+    tree: "etree._Element",
     property: str,
     prefix: str,
     property_mapper: Optional[Callable[[str], Optional[str]]] = None,
@@ -175,9 +177,15 @@ def _get_meta_tags(
     Returns:
         A map of tag name to value.
     """
+    # This actually returns Dict[str, str], but the caller sets this as a variable
+    # which is Dict[str, Optional[str]].
     results: Dict[str, Optional[str]] = {}
-    for tag in tree.xpath(
-        f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]"
+    # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+    for tag in cast(
+        List["etree._Element"],
+        tree.xpath(
+            f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]"
+        ),
     ):
         # if we've got more than 50 tags, someone is taking the piss
         if len(results) >= 50:
@@ -187,14 +195,15 @@ def _get_meta_tags(
             )
             return {}
 
-        key = tag.attrib[property]
+        key = cast(str, tag.attrib[property])
         if property_mapper:
-            key = property_mapper(key)
+            new_key = property_mapper(key)
             # None is a special value used to ignore a value.
-            if key is None:
+            if new_key is None:
                 continue
+            key = new_key
 
-        results[key] = tag.attrib["content"]
+        results[key] = cast(str, tag.attrib["content"])
 
     return results
 
@@ -219,7 +228,7 @@ def _map_twitter_to_open_graph(key: str) -> Optional[str]:
     return "og" + key[7:]
 
 
-def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
+def parse_html_to_open_graph(tree: "etree._Element") -> Dict[str, Optional[str]]:
     """
     Parse the HTML document into an Open Graph response.
 
@@ -276,24 +285,36 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
 
     if "og:title" not in og:
         # Attempt to find a title from the title tag, or the biggest header on the page.
-        title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()")
+        # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+        title = cast(
+            List["etree._ElementUnicodeResult"],
+            tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()"),
+        )
         if title:
             og["og:title"] = title[0].strip()
         else:
             og["og:title"] = None
 
     if "og:image" not in og:
-        meta_image = tree.xpath(
-            "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]"
+        # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+        meta_image = cast(
+            List["etree._ElementUnicodeResult"],
+            tree.xpath(
+                "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]"
+            ),
         )
         # If a meta image is found, use it.
         if meta_image:
             og["og:image"] = meta_image[0]
         else:
             # Try to find images which are larger than 10px by 10px.
+            # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
             #
             # TODO: consider inlined CSS styles as well as width & height attribs
-            images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]")
+            images = cast(
+                List["etree._Element"],
+                tree.xpath("//img[@src][number(@width)>10][number(@height)>10]"),
+            )
             images = sorted(
                 images,
                 key=lambda i: (
@@ -302,20 +323,29 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
             )
             # If no images were found, try to find *any* images.
             if not images:
-                images = tree.xpath("//img[@src][1]")
+                # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+                images = cast(List["etree._Element"], tree.xpath("//img[@src][1]"))
             if images:
-                og["og:image"] = images[0].attrib["src"]
+                og["og:image"] = cast(str, images[0].attrib["src"])
 
             # Finally, fallback to the favicon if nothing else.
             else:
-                favicons = tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]")
+                # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+                favicons = cast(
+                    List["etree._ElementUnicodeResult"],
+                    tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]"),
+                )
                 if favicons:
                     og["og:image"] = favicons[0]
 
     if "og:description" not in og:
         # Check the first meta description tag for content.
-        meta_description = tree.xpath(
-            "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]"
+        # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this.
+        meta_description = cast(
+            List["etree._ElementUnicodeResult"],
+            tree.xpath(
+                "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]"
+            ),
         )
         # If a meta description is found with content, use it.
         if meta_description:
@@ -332,7 +362,7 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
     return og
 
 
-def parse_html_description(tree: "etree.Element") -> Optional[str]:
+def parse_html_description(tree: "etree._Element") -> Optional[str]:
     """
     Calculate a text description based on an HTML document.
 
@@ -368,6 +398,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
         "canvas",
         "img",
         "picture",
+        # etree.Comment is a function which creates an etree._Comment element.
+        # The "tag" attribute of an etree._Comment instance is confusingly the
+        # etree.Comment function instead of a string.
         etree.Comment,
     }
 
@@ -381,8 +414,8 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
 
 
 def _iterate_over_text(
-    tree: Optional["etree.Element"],
-    tags_to_ignore: Set[Union[str, "etree.Comment"]],
+    tree: Optional["etree._Element"],
+    tags_to_ignore: Set[object],
     stack_limit: int = 1024,
 ) -> Generator[str, None, None]:
     """Iterate over the tree returning text nodes in a depth first fashion,
@@ -402,7 +435,7 @@ def _iterate_over_text(
 
     # This is a stack whose items are elements to iterate over *or* strings
     # to be returned.
-    elements: List[Union[str, "etree.Element"]] = [tree]
+    elements: List[Union[str, "etree._Element"]] = [tree]
     while elements:
         el = elements.pop()
 
diff --git a/tests/media/test_html_preview.py b/tests/media/test_html_preview.py
index e7da75db3e..ea84bb3d3d 100644
--- a/tests/media/test_html_preview.py
+++ b/tests/media/test_html_preview.py
@@ -24,7 +24,7 @@ from tests import unittest
 try:
     import lxml
 except ImportError:
-    lxml = None
+    lxml = None  # type: ignore[assignment]
 
 
 class SummarizeTestCase(unittest.TestCase):
@@ -160,6 +160,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
@@ -176,6 +177,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
@@ -195,6 +197,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(
@@ -217,6 +220,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
@@ -231,6 +235,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
@@ -246,6 +251,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(og, {"og:title": "Title", "og:description": "Title"})
@@ -261,6 +267,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
@@ -281,6 +288,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(og, {"og:title": "Title", "og:description": "Finally!"})
@@ -296,6 +304,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         """
 
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
 
         self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
@@ -324,6 +333,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         <head><title>Foo</title></head><body>Some text.</body></html>
         """.strip()
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
 
@@ -338,6 +348,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         </html>
         """
         tree = decode_body(html, "http://example.com/test.html", "invalid-encoding")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
 
@@ -353,6 +364,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         </html>
         """
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
         self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})
 
@@ -367,6 +379,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         </html>
         """
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
         self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."})
 
@@ -380,6 +393,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         </html>
         """
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
         self.assertEqual(
             og,
@@ -401,6 +415,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         </html>
         """
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
         self.assertEqual(
             og,
@@ -419,6 +434,7 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
         """
         tree = decode_body(html, "http://example.com/test.html")
+        assert tree is not None
         og = parse_html_to_open_graph(tree)
         self.assertEqual(
             og,
diff --git a/tests/media/test_oembed.py b/tests/media/test_oembed.py
index c8bf8421da..3bc19cb1cc 100644
--- a/tests/media/test_oembed.py
+++ b/tests/media/test_oembed.py
@@ -28,7 +28,7 @@ from tests.unittest import HomeserverTestCase
 try:
     import lxml
 except ImportError:
-    lxml = None
+    lxml = None  # type: ignore[assignment]
 
 
 class OEmbedTests(HomeserverTestCase):
diff --git a/tests/media/test_url_previewer.py b/tests/media/test_url_previewer.py
index 3c4c7d6765..46ecde5344 100644
--- a/tests/media/test_url_previewer.py
+++ b/tests/media/test_url_previewer.py
@@ -24,7 +24,7 @@ from tests.unittest import override_config
 try:
     import lxml
 except ImportError:
-    lxml = None
+    lxml = None  # type: ignore[assignment]
 
 
 class URLPreviewTests(unittest.HomeserverTestCase):
diff --git a/tests/rest/media/test_url_preview.py b/tests/rest/media/test_url_preview.py
index 170fb0534a..05d5e39cab 100644
--- a/tests/rest/media/test_url_preview.py
+++ b/tests/rest/media/test_url_preview.py
@@ -40,7 +40,7 @@ from tests.test_utils import SMALL_PNG
 try:
     import lxml
 except ImportError:
-    lxml = None
+    lxml = None  # type: ignore[assignment]
 
 
 class URLPreviewTests(unittest.HomeserverTestCase):
-- 
cgit 1.5.1


From 6d9e2fd8782a6610d6daf499d141e67f476b2f8c Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 31 May 2023 15:13:48 -0700
Subject: Speed up background jobs populate_full_user_id_user_filters and
 populate_full_user_id_profiles (#15700)

---
 changelog.d/15700.misc                      | 1 +
 synapse/storage/databases/main/filtering.py | 2 +-
 synapse/storage/databases/main/profile.py   | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15700.misc

(limited to 'synapse')

diff --git a/changelog.d/15700.misc b/changelog.d/15700.misc
new file mode 100644
index 0000000000..e96bc681aa
--- /dev/null
+++ b/changelog.d/15700.misc
@@ -0,0 +1 @@
+Speed up background jobs `populate_full_user_id_user_filters` and `populate_full_user_id_profiles`.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index da31eb44dc..f777777cbf 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -71,7 +71,7 @@ class FilteringWorkerStore(SQLBaseStore):
                     SELECT user_id FROM user_filters
                     WHERE user_id > ?
                     ORDER BY user_id
-                    LIMIT 1 OFFSET 50
+                    LIMIT 1 OFFSET 1000
                   """
             txn.execute(sql, (lower_bound_id,))
             res = txn.fetchone()
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index 65c92bef51..21d54c7a7a 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -65,7 +65,7 @@ class ProfileWorkerStore(SQLBaseStore):
                     SELECT user_id FROM profiles
                     WHERE user_id > ?
                     ORDER BY user_id
-                    LIMIT 1 OFFSET 50
+                    LIMIT 1 OFFSET 1000
                   """
             txn.execute(sql, (lower_bound_id,))
             res = txn.fetchone()
-- 
cgit 1.5.1


From d1693f03626391097b59ea9568cd8a869ed89569 Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@users.noreply.github.com>
Date: Thu, 1 Jun 2023 13:52:51 +0100
Subject: Implement stable support for MSC3882 to allow an existing
 device/session to generate a login token for use on a new device/session
 (#15388)

Implements stable support for MSC3882; this involves updating Synapse's support to
match the MSC / the spec says.

Continue to support the unstable version to allow clients to transition.
---
 changelog.d/15388.feature                        |  1 +
 docs/usage/configuration/config_documentation.md | 65 ++++++++++++++--------
 synapse/config/auth.py                           | 10 ++++
 synapse/config/experimental.py                   | 13 +----
 synapse/rest/client/capabilities.py              |  3 +
 synapse/rest/client/login.py                     | 31 ++++++++---
 synapse/rest/client/login_token_request.py       | 47 +++++++++++-----
 synapse/rest/client/versions.py                  |  4 +-
 tests/config/test_oauth_delegation.py            |  4 +-
 tests/rest/client/test_capabilities.py           | 28 ++++++++++
 tests/rest/client/test_login.py                  | 23 ++++++++
 tests/rest/client/test_login_token_request.py    | 71 ++++++++++++++++++------
 12 files changed, 225 insertions(+), 75 deletions(-)
 create mode 100644 changelog.d/15388.feature

(limited to 'synapse')

diff --git a/changelog.d/15388.feature b/changelog.d/15388.feature
new file mode 100644
index 0000000000..6cc55cafa2
--- /dev/null
+++ b/changelog.d/15388.feature
@@ -0,0 +1 @@
+Stable support for [MSC3882](https://github.com/matrix-org/matrix-spec-proposals/pull/3882) to allow an existing device/session to generate a login token for use on a new device/session.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 5ede6d0a82..0cf6e075ff 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2570,7 +2570,50 @@ Example configuration:
 ```yaml
 nonrefreshable_access_token_lifetime: 24h
 ```
+---
+### `ui_auth`
+
+The amount of time to allow a user-interactive authentication session to be active.
 
+This defaults to 0, meaning the user is queried for their credentials
+before every action, but this can be overridden to allow a single
+validation to be re-used.  This weakens the protections afforded by
+the user-interactive authentication process, by allowing for multiple
+(and potentially different) operations to use the same validation session.
+
+This is ignored for potentially "dangerous" operations (including
+deactivating an account, modifying an account password, adding a 3PID,
+and minting additional login tokens).
+
+Use the `session_timeout` sub-option here to change the time allowed for credential validation.
+
+Example configuration:
+```yaml
+ui_auth:
+    session_timeout: "15s"
+```
+---
+### `login_via_existing_session`
+
+Matrix supports the ability of an existing session to mint a login token for
+another client.
+
+Synapse disables this by default as it has security ramifications -- a malicious
+client could use the mechanism to spawn more than one session.
+
+The duration of time the generated token is valid for can be configured with the
+`token_timeout` sub-option.
+
+User-interactive authentication is required when this is enabled unless the
+`require_ui_auth` sub-option is set to `False`.
+
+Example configuration:
+```yaml
+login_via_existing_session:
+    enabled: true
+    require_ui_auth: false
+    token_timeout: "5m"
+```
 ---
 ## Metrics
 Config options related to metrics.
@@ -3415,28 +3458,6 @@ password_config:
       require_uppercase: true
 ```
 ---
-### `ui_auth`
-
-The amount of time to allow a user-interactive authentication session to be active.
-
-This defaults to 0, meaning the user is queried for their credentials
-before every action, but this can be overridden to allow a single
-validation to be re-used.  This weakens the protections afforded by
-the user-interactive authentication process, by allowing for multiple
-(and potentially different) operations to use the same validation session.
-
-This is ignored for potentially "dangerous" operations (including
-deactivating an account, modifying an account password, and
-adding a 3PID).
-
-Use the `session_timeout` sub-option here to change the time allowed for credential validation.
-
-Example configuration:
-```yaml
-ui_auth:
-    session_timeout: "15s"
-```
----
 ## Push
 Configuration settings related to push notifications
 
diff --git a/synapse/config/auth.py b/synapse/config/auth.py
index 12e853980e..c7ab428f28 100644
--- a/synapse/config/auth.py
+++ b/synapse/config/auth.py
@@ -60,3 +60,13 @@ class AuthConfig(Config):
         self.ui_auth_session_timeout = self.parse_duration(
             ui_auth.get("session_timeout", 0)
         )
+
+        # Logging in with an existing session.
+        login_via_existing = config.get("login_via_existing_session", {})
+        self.login_via_existing_enabled = login_via_existing.get("enabled", False)
+        self.login_via_existing_require_ui_auth = login_via_existing.get(
+            "require_ui_auth", True
+        )
+        self.login_via_existing_token_timeout = self.parse_duration(
+            login_via_existing.get("token_timeout", "5m")
+        )
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 1d189b2e26..a9e002cf08 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -192,10 +192,10 @@ class MSC3861:
                 ("captcha", "enable_registration_captcha"),
             )
 
-        if root.experimental.msc3882_enabled:
+        if root.auth.login_via_existing_enabled:
             raise ConfigError(
-                "MSC3882 cannot be enabled when OAuth delegation is enabled",
-                ("experimental_features", "msc3882_enabled"),
+                "Login via existing session cannot be enabled when OAuth delegation is enabled",
+                ("login_via_existing_session", "enabled"),
             )
 
         if root.registration.refresh_token_lifetime:
@@ -319,13 +319,6 @@ class ExperimentalConfig(Config):
         # MSC3881: Remotely toggle push notifications for another client
         self.msc3881_enabled: bool = experimental.get("msc3881_enabled", False)
 
-        # MSC3882: Allow an existing session to sign in a new session
-        self.msc3882_enabled: bool = experimental.get("msc3882_enabled", False)
-        self.msc3882_ui_auth: bool = experimental.get("msc3882_ui_auth", True)
-        self.msc3882_token_timeout = self.parse_duration(
-            experimental.get("msc3882_token_timeout", "5m")
-        )
-
         # MSC3874: Filtering /messages with rel_types / not_rel_types.
         self.msc3874_enabled: bool = experimental.get("msc3874_enabled", False)
 
diff --git a/synapse/rest/client/capabilities.py b/synapse/rest/client/capabilities.py
index 0dbf8f6818..3154b9f77e 100644
--- a/synapse/rest/client/capabilities.py
+++ b/synapse/rest/client/capabilities.py
@@ -65,6 +65,9 @@ class CapabilitiesRestServlet(RestServlet):
                 "m.3pid_changes": {
                     "enabled": self.config.registration.enable_3pid_changes
                 },
+                "m.get_login_token": {
+                    "enabled": self.config.auth.login_via_existing_enabled,
+                },
             }
         }
 
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index d4dc2462b9..6493b00bb8 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -104,6 +104,9 @@ class LoginRestServlet(RestServlet):
             and hs.config.experimental.msc3866.require_approval_for_new_accounts
         )
 
+        # Whether get login token is enabled.
+        self._get_login_token_enabled = hs.config.auth.login_via_existing_enabled
+
         self.auth = hs.get_auth()
 
         self.clock = hs.get_clock()
@@ -142,6 +145,9 @@ class LoginRestServlet(RestServlet):
             # to SSO.
             flows.append({"type": LoginRestServlet.CAS_TYPE})
 
+        # The login token flow requires m.login.token to be advertised.
+        support_login_token_flow = self._get_login_token_enabled
+
         if self.cas_enabled or self.saml2_enabled or self.oidc_enabled:
             flows.append(
                 {
@@ -153,14 +159,23 @@ class LoginRestServlet(RestServlet):
                 }
             )
 
-            # While it's valid for us to advertise this login type generally,
-            # synapse currently only gives out these tokens as part of the
-            # SSO login flow.
-            # Generally we don't want to advertise login flows that clients
-            # don't know how to implement, since they (currently) will always
-            # fall back to the fallback API if they don't understand one of the
-            # login flow types returned.
-            flows.append({"type": LoginRestServlet.TOKEN_TYPE})
+            # SSO requires a login token to be generated, so we need to advertise that flow
+            support_login_token_flow = True
+
+        # While it's valid for us to advertise this login type generally,
+        # synapse currently only gives out these tokens as part of the
+        # SSO login flow or as part of login via an existing session.
+        #
+        # Generally we don't want to advertise login flows that clients
+        # don't know how to implement, since they (currently) will always
+        # fall back to the fallback API if they don't understand one of the
+        # login flow types returned.
+        if support_login_token_flow:
+            tokenTypeFlow: Dict[str, Any] = {"type": LoginRestServlet.TOKEN_TYPE}
+            # If the login token flow is enabled advertise the get_login_token flag.
+            if self._get_login_token_enabled:
+                tokenTypeFlow["get_login_token"] = True
+            flows.append(tokenTypeFlow)
 
         flows.extend({"type": t} for t in self.auth_handler.get_supported_login_types())
 
diff --git a/synapse/rest/client/login_token_request.py b/synapse/rest/client/login_token_request.py
index 43ea21d5e6..b1629f94a5 100644
--- a/synapse/rest/client/login_token_request.py
+++ b/synapse/rest/client/login_token_request.py
@@ -15,6 +15,7 @@
 import logging
 from typing import TYPE_CHECKING, Tuple
 
+from synapse.api.ratelimiting import Ratelimiter
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
@@ -33,7 +34,7 @@ class LoginTokenRequestServlet(RestServlet):
 
     Request:
 
-    POST /login/token HTTP/1.1
+    POST /login/get_token HTTP/1.1
     Content-Type: application/json
 
     {}
@@ -43,30 +44,45 @@ class LoginTokenRequestServlet(RestServlet):
     HTTP/1.1 200 OK
     {
         "login_token": "ABDEFGH",
-        "expires_in": 3600,
+        "expires_in_ms": 3600000,
     }
     """
 
-    PATTERNS = client_patterns(
-        "/org.matrix.msc3882/login/token$", releases=[], v1=False, unstable=True
-    )
+    PATTERNS = [
+        *client_patterns(
+            "/login/get_token$", releases=["v1"], v1=False, unstable=False
+        ),
+        # TODO: this is no longer needed once unstable MSC3882 does not need to be supported:
+        *client_patterns(
+            "/org.matrix.msc3882/login/token$", releases=[], v1=False, unstable=True
+        ),
+    ]
 
     def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.auth = hs.get_auth()
-        self.store = hs.get_datastores().main
-        self.clock = hs.get_clock()
-        self.server_name = hs.config.server.server_name
+        self._main_store = hs.get_datastores().main
         self.auth_handler = hs.get_auth_handler()
-        self.token_timeout = hs.config.experimental.msc3882_token_timeout
-        self.ui_auth = hs.config.experimental.msc3882_ui_auth
+        self.token_timeout = hs.config.auth.login_via_existing_token_timeout
+        self._require_ui_auth = hs.config.auth.login_via_existing_require_ui_auth
+
+        # Ratelimit aggressively to a maxmimum of 1 request per minute.
+        #
+        # This endpoint can be used to spawn additional sessions and could be
+        # abused by a malicious client to create many sessions.
+        self._ratelimiter = Ratelimiter(
+            store=self._main_store,
+            clock=hs.get_clock(),
+            rate_hz=1 / 60,
+            burst_count=1,
+        )
 
     @interactive_auth_handler
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
         body = parse_json_object_from_request(request)
 
-        if self.ui_auth:
+        if self._require_ui_auth:
             await self.auth_handler.validate_user_via_ui_auth(
                 requester,
                 request,
@@ -75,9 +91,12 @@ class LoginTokenRequestServlet(RestServlet):
                 can_skip_ui_auth=False,  # Don't allow skipping of UI auth
             )
 
+        # Ensure that this endpoint isn't being used too often. (Ensure this is
+        # done *after* UI auth.)
+        await self._ratelimiter.ratelimit(None, requester.user.to_string().lower())
+
         login_token = await self.auth_handler.create_login_token_for_user_id(
             user_id=requester.user.to_string(),
-            auth_provider_id="org.matrix.msc3882.login_token_request",
             duration_ms=self.token_timeout,
         )
 
@@ -85,11 +104,13 @@ class LoginTokenRequestServlet(RestServlet):
             200,
             {
                 "login_token": login_token,
+                # TODO: this is no longer needed once unstable MSC3882 does not need to be supported:
                 "expires_in": self.token_timeout // 1000,
+                "expires_in_ms": self.token_timeout,
             },
         )
 
 
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    if hs.config.experimental.msc3882_enabled:
+    if hs.config.auth.login_via_existing_enabled:
         LoginTokenRequestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 32df054f56..547bf34df1 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -113,8 +113,8 @@ class VersionsRestServlet(RestServlet):
                     "fi.mau.msc2815": self.config.experimental.msc2815_enabled,
                     # Adds a ping endpoint for appservices to check HS->AS connection
                     "fi.mau.msc2659.stable": True,  # TODO: remove when "v1.7" is added above
-                    # Adds support for login token requests as per MSC3882
-                    "org.matrix.msc3882": self.config.experimental.msc3882_enabled,
+                    # TODO: this is no longer needed once unstable MSC3882 does not need to be supported:
+                    "org.matrix.msc3882": self.config.auth.login_via_existing_enabled,
                     # Adds support for remotely enabling/disabling pushers, as per MSC3881
                     "org.matrix.msc3881": self.config.experimental.msc3881_enabled,
                     # Adds support for filtering /messages by event relation.
diff --git a/tests/config/test_oauth_delegation.py b/tests/config/test_oauth_delegation.py
index 2ead721b00..f57c813a58 100644
--- a/tests/config/test_oauth_delegation.py
+++ b/tests/config/test_oauth_delegation.py
@@ -228,8 +228,8 @@ class MSC3861OAuthDelegation(TestCase):
         with self.assertRaises(ConfigError):
             self.parse_config()
 
-    def test_msc3882_auth_cannot_be_enabled(self) -> None:
-        self.config_dict["experimental_features"]["msc3882_enabled"] = True
+    def test_login_via_existing_session_cannot_be_enabled(self) -> None:
+        self.config_dict["login_via_existing_session"] = {"enabled": True}
         with self.assertRaises(ConfigError):
             self.parse_config()
 
diff --git a/tests/rest/client/test_capabilities.py b/tests/rest/client/test_capabilities.py
index c16e8d43f4..cf23430f6a 100644
--- a/tests/rest/client/test_capabilities.py
+++ b/tests/rest/client/test_capabilities.py
@@ -186,3 +186,31 @@ class CapabilitiesTestCase(unittest.HomeserverTestCase):
             self.assertGreater(len(details["support"]), 0)
             for room_version in details["support"]:
                 self.assertTrue(room_version in KNOWN_ROOM_VERSIONS, str(room_version))
+
+    def test_get_get_token_login_fields_when_disabled(self) -> None:
+        """By default login via an existing session is disabled."""
+        access_token = self.get_success(
+            self.auth_handler.create_access_token_for_user_id(
+                self.user, device_id=None, valid_until_ms=None
+            )
+        )
+
+        channel = self.make_request("GET", self.url, access_token=access_token)
+        capabilities = channel.json_body["capabilities"]
+
+        self.assertEqual(channel.code, HTTPStatus.OK)
+        self.assertFalse(capabilities["m.get_login_token"]["enabled"])
+
+    @override_config({"login_via_existing_session": {"enabled": True}})
+    def test_get_get_token_login_fields_when_enabled(self) -> None:
+        access_token = self.get_success(
+            self.auth_handler.create_access_token_for_user_id(
+                self.user, device_id=None, valid_until_ms=None
+            )
+        )
+
+        channel = self.make_request("GET", self.url, access_token=access_token)
+        capabilities = channel.json_body["capabilities"]
+
+        self.assertEqual(channel.code, HTTPStatus.OK)
+        self.assertTrue(capabilities["m.get_login_token"]["enabled"])
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index dc32982e22..f3c3bc69a9 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -446,6 +446,29 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
             ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"]
         )
 
+    def test_get_login_flows_with_login_via_existing_disabled(self) -> None:
+        """GET /login should return m.login.token without get_login_token"""
+        channel = self.make_request("GET", "/_matrix/client/r0/login")
+        self.assertEqual(channel.code, 200, channel.result)
+
+        flows = {flow["type"]: flow for flow in channel.json_body["flows"]}
+        self.assertNotIn("m.login.token", flows)
+
+    @override_config({"login_via_existing_session": {"enabled": True}})
+    def test_get_login_flows_with_login_via_existing_enabled(self) -> None:
+        """GET /login should return m.login.token with get_login_token true"""
+        channel = self.make_request("GET", "/_matrix/client/r0/login")
+        self.assertEqual(channel.code, 200, channel.result)
+
+        self.assertCountEqual(
+            channel.json_body["flows"],
+            [
+                {"type": "m.login.token", "get_login_token": True},
+                {"type": "m.login.password"},
+                {"type": "m.login.application_service"},
+            ],
+        )
+
 
 @skip_unless(has_saml2 and HAS_OIDC, "Requires SAML2 and OIDC")
 class MultiSSOTestCase(unittest.HomeserverTestCase):
diff --git a/tests/rest/client/test_login_token_request.py b/tests/rest/client/test_login_token_request.py
index b8187db982..f05e619aa8 100644
--- a/tests/rest/client/test_login_token_request.py
+++ b/tests/rest/client/test_login_token_request.py
@@ -15,14 +15,14 @@
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.rest import admin
-from synapse.rest.client import login, login_token_request
+from synapse.rest.client import login, login_token_request, versions
 from synapse.server import HomeServer
 from synapse.util import Clock
 
 from tests import unittest
 from tests.unittest import override_config
 
-endpoint = "/_matrix/client/unstable/org.matrix.msc3882/login/token"
+GET_TOKEN_ENDPOINT = "/_matrix/client/v1/login/get_token"
 
 
 class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
@@ -30,6 +30,7 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
         admin.register_servlets,
         login_token_request.register_servlets,
+        versions.register_servlets,  # TODO: remove once unstable revision 0 support is removed
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
@@ -46,26 +47,26 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
         self.password = "password"
 
     def test_disabled(self) -> None:
-        channel = self.make_request("POST", endpoint, {}, access_token=None)
+        channel = self.make_request("POST", GET_TOKEN_ENDPOINT, {}, access_token=None)
         self.assertEqual(channel.code, 404)
 
         self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
-        channel = self.make_request("POST", endpoint, {}, access_token=token)
+        channel = self.make_request("POST", GET_TOKEN_ENDPOINT, {}, access_token=token)
         self.assertEqual(channel.code, 404)
 
-    @override_config({"experimental_features": {"msc3882_enabled": True}})
+    @override_config({"login_via_existing_session": {"enabled": True}})
     def test_require_auth(self) -> None:
-        channel = self.make_request("POST", endpoint, {}, access_token=None)
+        channel = self.make_request("POST", GET_TOKEN_ENDPOINT, {}, access_token=None)
         self.assertEqual(channel.code, 401)
 
-    @override_config({"experimental_features": {"msc3882_enabled": True}})
+    @override_config({"login_via_existing_session": {"enabled": True}})
     def test_uia_on(self) -> None:
         user_id = self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
-        channel = self.make_request("POST", endpoint, {}, access_token=token)
+        channel = self.make_request("POST", GET_TOKEN_ENDPOINT, {}, access_token=token)
         self.assertEqual(channel.code, 401)
         self.assertIn({"stages": ["m.login.password"]}, channel.json_body["flows"])
 
@@ -80,9 +81,9 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
             },
         }
 
-        channel = self.make_request("POST", endpoint, uia, access_token=token)
+        channel = self.make_request("POST", GET_TOKEN_ENDPOINT, uia, access_token=token)
         self.assertEqual(channel.code, 200)
-        self.assertEqual(channel.json_body["expires_in"], 300)
+        self.assertEqual(channel.json_body["expires_in_ms"], 300000)
 
         login_token = channel.json_body["login_token"]
 
@@ -95,15 +96,15 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.json_body["user_id"], user_id)
 
     @override_config(
-        {"experimental_features": {"msc3882_enabled": True, "msc3882_ui_auth": False}}
+        {"login_via_existing_session": {"enabled": True, "require_ui_auth": False}}
     )
     def test_uia_off(self) -> None:
         user_id = self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
-        channel = self.make_request("POST", endpoint, {}, access_token=token)
+        channel = self.make_request("POST", GET_TOKEN_ENDPOINT, {}, access_token=token)
         self.assertEqual(channel.code, 200)
-        self.assertEqual(channel.json_body["expires_in"], 300)
+        self.assertEqual(channel.json_body["expires_in_ms"], 300000)
 
         login_token = channel.json_body["login_token"]
 
@@ -117,10 +118,10 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
 
     @override_config(
         {
-            "experimental_features": {
-                "msc3882_enabled": True,
-                "msc3882_ui_auth": False,
-                "msc3882_token_timeout": "15s",
+            "login_via_existing_session": {
+                "enabled": True,
+                "require_ui_auth": False,
+                "token_timeout": "15s",
             }
         }
     )
@@ -128,6 +129,40 @@ class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase):
         self.register_user(self.user, self.password)
         token = self.login(self.user, self.password)
 
-        channel = self.make_request("POST", endpoint, {}, access_token=token)
+        channel = self.make_request("POST", GET_TOKEN_ENDPOINT, {}, access_token=token)
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["expires_in_ms"], 15000)
+
+    @override_config(
+        {
+            "login_via_existing_session": {
+                "enabled": True,
+                "require_ui_auth": False,
+                "token_timeout": "15s",
+            }
+        }
+    )
+    def test_unstable_support(self) -> None:
+        # TODO: remove support for unstable MSC3882 is no longer needed
+
+        # check feature is advertised in versions response:
+        channel = self.make_request(
+            "GET", "/_matrix/client/versions", {}, access_token=None
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body["unstable_features"]["org.matrix.msc3882"], True
+        )
+
+        self.register_user(self.user, self.password)
+        token = self.login(self.user, self.password)
+
+        # check feature is available via the unstable endpoint and returns an expires_in value in seconds
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/unstable/org.matrix.msc3882/login/token",
+            {},
+            access_token=token,
+        )
         self.assertEqual(channel.code, 200)
         self.assertEqual(channel.json_body["expires_in"], 15)
-- 
cgit 1.5.1


From 5ed0e8c61f6b46289fdc5609e8e573b67c2c1982 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 1 Jun 2023 14:25:20 +0100
Subject: Cache requests for user's devices from federation (#15675)

This should mitigate the issue where lots of different servers requests
the same user's devices all at once.
---
 changelog.d/15675.misc                            |  1 +
 synapse/storage/databases/main/devices.py         |  4 ++
 synapse/storage/databases/main/end_to_end_keys.py | 67 ++++++++++++++++++++++-
 3 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15675.misc

(limited to 'synapse')

diff --git a/changelog.d/15675.misc b/changelog.d/15675.misc
new file mode 100644
index 0000000000..05538fdbef
--- /dev/null
+++ b/changelog.d/15675.misc
@@ -0,0 +1 @@
+Cache requests for user's devices over federation.
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index a67fdb3c22..f677d048aa 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1941,6 +1941,10 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
             user_id,
             stream_ids[-1],
         )
+        txn.call_after(
+            self._get_e2e_device_keys_for_federation_query_inner.invalidate,
+            (user_id,),
+        )
 
         min_stream_id = stream_ids[0]
 
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 4bc391f213..91ae9c457d 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -16,6 +16,7 @@
 import abc
 from typing import (
     TYPE_CHECKING,
+    Any,
     Collection,
     Dict,
     Iterable,
@@ -39,6 +40,7 @@ from synapse.appservice import (
     TransactionUnusedFallbackKeys,
 )
 from synapse.logging.opentracing import log_kv, set_tag, trace
+from synapse.replication.tcp.streams._base import DeviceListsStream
 from synapse.storage._base import SQLBaseStore, db_to_json
 from synapse.storage.database import (
     DatabasePool,
@@ -104,6 +106,23 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             self.hs.config.federation.allow_device_name_lookup_over_federation
         )
 
+    def process_replication_rows(
+        self,
+        stream_name: str,
+        instance_name: str,
+        token: int,
+        rows: Iterable[Any],
+    ) -> None:
+        if stream_name == DeviceListsStream.NAME:
+            for row in rows:
+                assert isinstance(row, DeviceListsStream.DeviceListsStreamRow)
+                if row.entity.startswith("@"):
+                    self._get_e2e_device_keys_for_federation_query_inner.invalidate(
+                        (row.entity,)
+                    )
+
+        super().process_replication_rows(stream_name, instance_name, token, rows)
+
     async def get_e2e_device_keys_for_federation_query(
         self, user_id: str
     ) -> Tuple[int, List[JsonDict]]:
@@ -114,6 +133,50 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         """
         now_stream_id = self.get_device_stream_token()
 
+        # We need to be careful with the caching here, as we need to always
+        # return *all* persisted devices, however there may be a lag between a
+        # new device being persisted and the cache being invalidated.
+        cached_results = (
+            self._get_e2e_device_keys_for_federation_query_inner.cache.get_immediate(
+                user_id, None
+            )
+        )
+        if cached_results is not None:
+            # Check that there have been no new devices added by another worker
+            # after the cache. This should be quick as there should be few rows
+            # with a higher stream ordering.
+            #
+            # Note that we invalidate based on the device stream, so we only
+            # have to check for potential invalidations after the
+            # `now_stream_id`.
+            sql = """
+                SELECT user_id FROM device_lists_stream
+                WHERE stream_id >= ? AND user_id = ?
+            """
+            rows = await self.db_pool.execute(
+                "get_e2e_device_keys_for_federation_query_check",
+                None,
+                sql,
+                now_stream_id,
+                user_id,
+            )
+            if not rows:
+                # No new rows, so cache is still valid.
+                return now_stream_id, cached_results
+
+            # There has, so let's invalidate the cache and run the query.
+            self._get_e2e_device_keys_for_federation_query_inner.invalidate((user_id,))
+
+        results = await self._get_e2e_device_keys_for_federation_query_inner(user_id)
+
+        return now_stream_id, results
+
+    @cached(iterable=True)
+    async def _get_e2e_device_keys_for_federation_query_inner(
+        self, user_id: str
+    ) -> List[JsonDict]:
+        """Get all devices (with any device keys) for a user"""
+
         devices = await self.get_e2e_device_keys_and_signatures([(user_id, None)])
 
         if devices:
@@ -134,9 +197,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
                 results.append(result)
 
-            return now_stream_id, results
+            return results
 
-        return now_stream_id, []
+        return []
 
     @trace
     @cancellable
-- 
cgit 1.5.1


From 30a5076da8ad776c150ad2745b5f34b4446012e0 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Thu, 1 Jun 2023 21:27:18 -0500
Subject: Log when events are (unexpectedly) filtered out of responses in tests
 (#14213)

See https://github.com/matrix-org/synapse/pull/14095#discussion_r990335492

This is useful because when see that a relevant event is an `outlier` or `soft-failed`, then that's a good unexpected indicator explaining why it's not showing up. `filter_events_for_client` is used in `/sync`, `/messages`, `/context` which are all common end-to-end assertion touch points (also notifications, relations).
---
 changelog.d/14213.misc                |  1 +
 docker/README.md                      |  3 ++-
 docker/conf/log.config                | 30 ++++++++++++++++++++++++------
 docker/configure_workers_and_start.py |  3 +++
 scripts-dev/complement.sh             |  4 ++++
 synapse/visibility.py                 | 14 +++++++-------
 tests/test_utils/logging_setup.py     | 12 ++++++++++++
 7 files changed, 53 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/14213.misc

(limited to 'synapse')

diff --git a/changelog.d/14213.misc b/changelog.d/14213.misc
new file mode 100644
index 0000000000..b0689f3d15
--- /dev/null
+++ b/changelog.d/14213.misc
@@ -0,0 +1 @@
+Log when events are (maybe unexpectedly) filtered out of responses in tests.
diff --git a/docker/README.md b/docker/README.md
index eda3221c23..08372e95c6 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -73,7 +73,8 @@ The following environment variables are supported in `generate` mode:
   will log sensitive information such as access tokens.
   This should not be needed unless you are a developer attempting to debug something
   particularly tricky.
-
+* `SYNAPSE_LOG_TESTING`: if set, Synapse will log additional information useful
+  for testing.
 
 ## Postgres
 
diff --git a/docker/conf/log.config b/docker/conf/log.config
index 90b5179838..5772321202 100644
--- a/docker/conf/log.config
+++ b/docker/conf/log.config
@@ -49,17 +49,35 @@ handlers:
     class: logging.StreamHandler
     formatter: precise
 
-{% if not SYNAPSE_LOG_SENSITIVE %}
-{#
-  If SYNAPSE_LOG_SENSITIVE is unset, then override synapse.storage.SQL to INFO
-  so that DEBUG entries (containing sensitive information) are not emitted.
-#}
 loggers:
+    # This is just here so we can leave `loggers` in the config regardless of whether
+    # we configure other loggers below (avoid empty yaml dict error).
+    _placeholder:
+        level: "INFO"
+
+    {% if not SYNAPSE_LOG_SENSITIVE %}
+    {#
+      If SYNAPSE_LOG_SENSITIVE is unset, then override synapse.storage.SQL to INFO
+      so that DEBUG entries (containing sensitive information) are not emitted.
+    #}
     synapse.storage.SQL:
         # beware: increasing this to DEBUG will make synapse log sensitive
         # information such as access tokens.
         level: INFO
-{% endif %}
+    {% endif %}
+
+    {% if SYNAPSE_LOG_TESTING %}
+    {#
+      If Synapse is under test, log a few more useful things for a developer
+      attempting to debug something particularly tricky.
+
+      With `synapse.visibility.filtered_event_debug`, it logs when events are (maybe
+      unexpectedly) filtered out of responses in tests. It's just nice to be able to
+      look at the CI log and figure out why an event isn't being returned.
+    #}
+    synapse.visibility.filtered_event_debug:
+        level: DEBUG
+    {% endif %}
 
 root:
     level: {{ SYNAPSE_LOG_LEVEL or "INFO" }}
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 79b5b87397..87a740e3d4 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -40,6 +40,8 @@
 #         log level. INFO is the default.
 #   * SYNAPSE_LOG_SENSITIVE: If unset, SQL and SQL values won't be logged,
 #         regardless of the SYNAPSE_LOG_LEVEL setting.
+#   * SYNAPSE_LOG_TESTING: if set, Synapse will log additional information useful
+#     for testing.
 #
 # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined
 # in the project's README), this script may be run multiple times, and functionality should
@@ -947,6 +949,7 @@ def generate_worker_log_config(
     extra_log_template_args["SYNAPSE_LOG_SENSITIVE"] = environ.get(
         "SYNAPSE_LOG_SENSITIVE"
     )
+    extra_log_template_args["SYNAPSE_LOG_TESTING"] = environ.get("SYNAPSE_LOG_TESTING")
 
     # Render and write the file
     log_config_filepath = f"/conf/workers/{worker_name}.log.config"
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index cba2799f15..131f26234e 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -269,6 +269,10 @@ if [[ -n "$SYNAPSE_TEST_LOG_LEVEL" ]]; then
   export PASS_SYNAPSE_LOG_SENSITIVE=1
 fi
 
+# Log a few more useful things for a developer attempting to debug something
+# particularly tricky.
+export PASS_SYNAPSE_LOG_TESTING=1
+
 # Run the tests!
 echo "Images built; running complement"
 cd "$COMPLEMENT_DIR"
diff --git a/synapse/visibility.py b/synapse/visibility.py
index 468e22f8f6..fc71dc92a4 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -41,7 +41,7 @@ from synapse.types.state import StateFilter
 from synapse.util import Clock
 
 logger = logging.getLogger(__name__)
-
+filtered_event_logger = logging.getLogger("synapse.visibility.filtered_event_debug")
 
 VISIBILITY_PRIORITY = (
     HistoryVisibility.WORLD_READABLE,
@@ -97,8 +97,8 @@ async def filter_events_for_client(
     events_before_filtering = events
     events = [e for e in events if not e.internal_metadata.is_soft_failed()]
     if len(events_before_filtering) != len(events):
-        if logger.isEnabledFor(logging.DEBUG):
-            logger.debug(
+        if filtered_event_logger.isEnabledFor(logging.DEBUG):
+            filtered_event_logger.debug(
                 "filter_events_for_client: Filtered out soft-failed events: Before=%s, After=%s",
                 [event.event_id for event in events_before_filtering],
                 [event.event_id for event in events],
@@ -319,7 +319,7 @@ def _check_client_allowed_to_see_event(
             _check_filter_send_to_client(event, clock, retention_policy, sender_ignored)
             == _CheckFilter.DENIED
         ):
-            logger.debug(
+            filtered_event_logger.debug(
                 "_check_client_allowed_to_see_event(event=%s): Filtered out event because `_check_filter_send_to_client` returned `_CheckFilter.DENIED`",
                 event.event_id,
             )
@@ -341,7 +341,7 @@ def _check_client_allowed_to_see_event(
             )
             return event
 
-        logger.debug(
+        filtered_event_logger.debug(
             "_check_client_allowed_to_see_event(event=%s): Filtered out event because it's an outlier",
             event.event_id,
         )
@@ -367,7 +367,7 @@ def _check_client_allowed_to_see_event(
 
     membership_result = _check_membership(user_id, event, visibility, state, is_peeking)
     if not membership_result.allowed:
-        logger.debug(
+        filtered_event_logger.debug(
             "_check_client_allowed_to_see_event(event=%s): Filtered out event because the user can't see the event because of their membership, membership_result.allowed=%s membership_result.joined=%s",
             event.event_id,
             membership_result.allowed,
@@ -378,7 +378,7 @@ def _check_client_allowed_to_see_event(
     # If the sender has been erased and the user was not joined at the time, we
     # must only return the redacted form.
     if sender_erased and not membership_result.joined:
-        logger.debug(
+        filtered_event_logger.debug(
             "_check_client_allowed_to_see_event(event=%s): Returning pruned event because `sender_erased` and the user was not joined at the time",
             event.event_id,
         )
diff --git a/tests/test_utils/logging_setup.py b/tests/test_utils/logging_setup.py
index c37f205ed0..199bb06a81 100644
--- a/tests/test_utils/logging_setup.py
+++ b/tests/test_utils/logging_setup.py
@@ -53,4 +53,16 @@ def setup_logging() -> None:
     log_level = os.environ.get("SYNAPSE_TEST_LOG_LEVEL", "ERROR")
     root_logger.setLevel(log_level)
 
+    # In order to not add noise by default (since we only log ERROR messages for trial
+    # tests as configured above), we only enable this for developers for looking for
+    # more INFO or DEBUG.
+    if root_logger.isEnabledFor(logging.INFO):
+        # Log when events are (maybe unexpectedly) filtered out of responses in tests. It's
+        # just nice to be able to look at the CI log and figure out why an event isn't being
+        # returned.
+        logging.getLogger("synapse.visibility.filtered_event_debug").setLevel(
+            logging.DEBUG
+        )
+
+    # Blow away the pyo3-log cache so that it reloads the configuration.
     reset_logging_config()
-- 
cgit 1.5.1


From e0f2429d137c74059f5b7f151297e28dbfd82d48 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Fri, 2 Jun 2023 15:13:50 +0200
Subject: Add a catch-all * to the supported relation types when redacting
 (#15705)

This is an update to MSC3912 implementation
---
 changelog.d/15705.feature                   |   1 +
 synapse/handlers/relations.py               |  16 +++--
 synapse/storage/databases/main/relations.py |  30 ++++++++
 tests/rest/client/test_redactions.py        | 104 +++++++++++++++++++++++++++-
 4 files changed, 143 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/15705.feature

(limited to 'synapse')

diff --git a/changelog.d/15705.feature b/changelog.d/15705.feature
new file mode 100644
index 0000000000..e3cbb5a12e
--- /dev/null
+++ b/changelog.d/15705.feature
@@ -0,0 +1 @@
+Add a catch-all * to the supported relation types when redacting an event and its related events. This is an update to [MSC3912](https://github.com/matrix-org/matrix-spec-proposals/pull/3861) implementation.
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index 4824635162..db97f7aede 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -205,16 +205,22 @@ class RelationsHandler:
             event_id: The event IDs to look and redact relations of.
             initial_redaction_event: The redaction for the event referred to by
                 event_id.
-            relation_types: The types of relations to look for.
+            relation_types: The types of relations to look for. If "*" is in the list,
+                all related events will be redacted regardless of the type.
 
         Raises:
             ShadowBanError if the requester is shadow-banned
         """
-        related_event_ids = (
-            await self._main_store.get_all_relations_for_event_with_types(
-                event_id, relation_types
+        if "*" in relation_types:
+            related_event_ids = await self._main_store.get_all_relations_for_event(
+                event_id
+            )
+        else:
+            related_event_ids = (
+                await self._main_store.get_all_relations_for_event_with_types(
+                    event_id, relation_types
+                )
             )
-        )
 
         for related_event_id in related_event_ids:
             try:
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 4a6c6c724d..96908f14ba 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -365,6 +365,36 @@ class RelationsWorkerStore(SQLBaseStore):
             func=get_all_relation_ids_for_event_with_types_txn,
         )
 
+    async def get_all_relations_for_event(
+        self,
+        event_id: str,
+    ) -> List[str]:
+        """Get the event IDs of all events that have a relation to the given event.
+
+        Args:
+            event_id: The event for which to look for related events.
+
+        Returns:
+            A list of the IDs of the events that relate to the given event.
+        """
+
+        def get_all_relation_ids_for_event_txn(
+            txn: LoggingTransaction,
+        ) -> List[str]:
+            rows = self.db_pool.simple_select_list_txn(
+                txn=txn,
+                table="event_relations",
+                keyvalues={"relates_to_id": event_id},
+                retcols=["event_id"],
+            )
+
+            return [row["event_id"] for row in rows]
+
+        return await self.db_pool.runInteraction(
+            desc="get_all_relation_ids_for_event",
+            func=get_all_relation_ids_for_event_txn,
+        )
+
     async def event_includes_relation(self, event_id: str) -> bool:
         """Check if the given event relates to another event.
 
diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py
index 84a60c0b07..b43e95292c 100644
--- a/tests/rest/client/test_redactions.py
+++ b/tests/rest/client/test_redactions.py
@@ -217,9 +217,9 @@ class RedactionsTestCase(HomeserverTestCase):
             self._redact_event(self.mod_access_token, self.room_id, msg_id)
 
     @override_config({"experimental_features": {"msc3912_enabled": True}})
-    def test_redact_relations(self) -> None:
-        """Tests that we can redact the relations of an event at the same time as the
-        event itself.
+    def test_redact_relations_with_types(self) -> None:
+        """Tests that we can redact the relations of an event of specific types
+        at the same time as the event itself.
         """
         # Send a root event.
         res = self.helper.send_event(
@@ -317,6 +317,104 @@ class RedactionsTestCase(HomeserverTestCase):
         )
         self.assertNotIn("redacted_because", event_dict, event_dict)
 
+    @override_config({"experimental_features": {"msc3912_enabled": True}})
+    def test_redact_all_relations(self) -> None:
+        """Tests that we can redact all the relations of an event at the same time as the
+        event itself.
+        """
+        # Send a root event.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={"msgtype": "m.text", "body": "hello"},
+            tok=self.mod_access_token,
+        )
+        root_event_id = res["event_id"]
+
+        # Send an edit to this root event.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "body": " * hello world",
+                "m.new_content": {
+                    "body": "hello world",
+                    "msgtype": "m.text",
+                },
+                "m.relates_to": {
+                    "event_id": root_event_id,
+                    "rel_type": RelationTypes.REPLACE,
+                },
+                "msgtype": "m.text",
+            },
+            tok=self.mod_access_token,
+        )
+        edit_event_id = res["event_id"]
+
+        # Also send a threaded message whose root is the same as the edit's.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Message,
+            content={
+                "msgtype": "m.text",
+                "body": "message 1",
+                "m.relates_to": {
+                    "event_id": root_event_id,
+                    "rel_type": RelationTypes.THREAD,
+                },
+            },
+            tok=self.mod_access_token,
+        )
+        threaded_event_id = res["event_id"]
+
+        # Also send a reaction, again with the same root.
+        res = self.helper.send_event(
+            room_id=self.room_id,
+            type=EventTypes.Reaction,
+            content={
+                "m.relates_to": {
+                    "rel_type": RelationTypes.ANNOTATION,
+                    "event_id": root_event_id,
+                    "key": "👍",
+                }
+            },
+            tok=self.mod_access_token,
+        )
+        reaction_event_id = res["event_id"]
+
+        # Redact the root event, specifying that we also want to delete all events that
+        # relate to it.
+        self._redact_event(
+            self.mod_access_token,
+            self.room_id,
+            root_event_id,
+            with_relations=["*"],
+        )
+
+        # Check that the root event got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, root_event_id, self.mod_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
+        # Check that the edit got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, edit_event_id, self.mod_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
+        # Check that the threaded message got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, threaded_event_id, self.mod_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
+        # Check that the reaction got redacted.
+        event_dict = self.helper.get_event(
+            self.room_id, reaction_event_id, self.mod_access_token
+        )
+        self.assertIn("redacted_because", event_dict, event_dict)
+
     @override_config({"experimental_features": {"msc3912_enabled": True}})
     def test_redact_relations_no_perms(self) -> None:
         """Tests that, when redacting a message along with its relations, if not all
-- 
cgit 1.5.1


From d0c4257f14addbf0c9072c2e34ae1c8294716ed5 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Fri, 2 Jun 2023 17:24:13 -0700
Subject: `N + 3`: Read from column `full_user_id` rather than `user_id` of
 tables `profiles` and `user_filters`  (#15649)

---
 changelog.d/15649.misc                             |  1 +
 synapse/api/filtering.py                           |  4 +-
 synapse/handlers/account_validity.py               |  2 +-
 synapse/handlers/admin.py                          |  2 +-
 synapse/handlers/auth.py                           |  2 +-
 synapse/handlers/deactivate_account.py             |  2 +-
 synapse/handlers/profile.py                        | 26 ++----
 synapse/handlers/register.py                       |  2 +-
 synapse/module_api/__init__.py                     |  4 +-
 synapse/push/mailer.py                             |  2 +-
 synapse/rest/client/filter.py                      |  2 +-
 synapse/rest/client/sync.py                        |  2 +-
 synapse/storage/databases/main/filtering.py        | 12 +--
 synapse/storage/databases/main/profile.py          | 12 +--
 synapse/storage/schema/__init__.py                 |  5 +-
 .../delta/78/01_validate_and_update_profiles.py    | 92 +++++++++++++++++++++
 .../78/02_validate_and_update_user_filters.py      | 95 ++++++++++++++++++++++
 tests/api/test_filtering.py                        | 25 ++----
 tests/handlers/test_profile.py                     | 28 ++-----
 tests/module_api/test_api.py                       |  6 +-
 tests/rest/client/test_filter.py                   |  4 +-
 tests/storage/test_profile.py                      | 17 ++--
 22 files changed, 252 insertions(+), 95 deletions(-)
 create mode 100644 changelog.d/15649.misc
 create mode 100644 synapse/storage/schema/main/delta/78/01_validate_and_update_profiles.py
 create mode 100644 synapse/storage/schema/main/delta/78/02_validate_and_update_user_filters.py

(limited to 'synapse')

diff --git a/changelog.d/15649.misc b/changelog.d/15649.misc
new file mode 100644
index 0000000000..fca38abe0f
--- /dev/null
+++ b/changelog.d/15649.misc
@@ -0,0 +1 @@
+Read from column `full_user_id` rather than `user_id` of tables `profiles` and `user_filters`.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 82aeef8d19..0995ecbe83 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -152,9 +152,9 @@ class Filtering:
         self.DEFAULT_FILTER_COLLECTION = FilterCollection(hs, {})
 
     async def get_user_filter(
-        self, user_localpart: str, filter_id: Union[int, str]
+        self, user_id: UserID, filter_id: Union[int, str]
     ) -> "FilterCollection":
-        result = await self.store.get_user_filter(user_localpart, filter_id)
+        result = await self.store.get_user_filter(user_id, filter_id)
         return FilterCollection(self._hs, result)
 
     def add_user_filter(self, user_id: UserID, user_filter: JsonDict) -> Awaitable[int]:
diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py
index 4aa4ebf7e4..f1a7a05df6 100644
--- a/synapse/handlers/account_validity.py
+++ b/synapse/handlers/account_validity.py
@@ -164,7 +164,7 @@ class AccountValidityHandler:
 
         try:
             user_display_name = await self.store.get_profile_displayname(
-                UserID.from_string(user_id).localpart
+                UserID.from_string(user_id)
             )
             if user_display_name is None:
                 user_display_name = user_id
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index b06f25b03c..119c7f8384 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -89,7 +89,7 @@ class AdminHandler:
         }
 
         # Add additional user metadata
-        profile = await self._store.get_profileinfo(user.localpart)
+        profile = await self._store.get_profileinfo(user)
         threepids = await self._store.user_get_threepids(user.to_string())
         external_ids = [
             ({"auth_provider": auth_provider, "external_id": external_id})
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 4f986d90cb..59ecafa6a0 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -1759,7 +1759,7 @@ class AuthHandler:
             return
 
         user_profile_data = await self.store.get_profileinfo(
-            UserID.from_string(registered_user_id).localpart
+            UserID.from_string(registered_user_id)
         )
 
         # Store any extra attributes which will be passed in the login response.
diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py
index f299b89a1b..67adeae6a7 100644
--- a/synapse/handlers/deactivate_account.py
+++ b/synapse/handlers/deactivate_account.py
@@ -297,5 +297,5 @@ class DeactivateAccountHandler:
         # Add the user to the directory, if necessary. Note that
         # this must be done after the user is re-activated, because
         # deactivated users are excluded from the user directory.
-        profile = await self.store.get_profileinfo(user.localpart)
+        profile = await self.store.get_profileinfo(user)
         await self.user_directory_handler.handle_local_profile_change(user_id, profile)
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index a9160c87e3..a7f8c5e636 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -67,7 +67,7 @@ class ProfileHandler:
         target_user = UserID.from_string(user_id)
 
         if self.hs.is_mine(target_user):
-            profileinfo = await self.store.get_profileinfo(target_user.localpart)
+            profileinfo = await self.store.get_profileinfo(target_user)
             if profileinfo.display_name is None:
                 raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND)
 
@@ -99,9 +99,7 @@ class ProfileHandler:
     async def get_displayname(self, target_user: UserID) -> Optional[str]:
         if self.hs.is_mine(target_user):
             try:
-                displayname = await self.store.get_profile_displayname(
-                    target_user.localpart
-                )
+                displayname = await self.store.get_profile_displayname(target_user)
             except StoreError as e:
                 if e.code == 404:
                     raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND)
@@ -147,7 +145,7 @@ class ProfileHandler:
             raise AuthError(400, "Cannot set another user's displayname")
 
         if not by_admin and not self.hs.config.registration.enable_set_displayname:
-            profile = await self.store.get_profileinfo(target_user.localpart)
+            profile = await self.store.get_profileinfo(target_user)
             if profile.display_name:
                 raise SynapseError(
                     400,
@@ -180,7 +178,7 @@ class ProfileHandler:
 
         await self.store.set_profile_displayname(target_user, displayname_to_set)
 
-        profile = await self.store.get_profileinfo(target_user.localpart)
+        profile = await self.store.get_profileinfo(target_user)
         await self.user_directory_handler.handle_local_profile_change(
             target_user.to_string(), profile
         )
@@ -194,9 +192,7 @@ class ProfileHandler:
     async def get_avatar_url(self, target_user: UserID) -> Optional[str]:
         if self.hs.is_mine(target_user):
             try:
-                avatar_url = await self.store.get_profile_avatar_url(
-                    target_user.localpart
-                )
+                avatar_url = await self.store.get_profile_avatar_url(target_user)
             except StoreError as e:
                 if e.code == 404:
                     raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND)
@@ -241,7 +237,7 @@ class ProfileHandler:
             raise AuthError(400, "Cannot set another user's avatar_url")
 
         if not by_admin and not self.hs.config.registration.enable_set_avatar_url:
-            profile = await self.store.get_profileinfo(target_user.localpart)
+            profile = await self.store.get_profileinfo(target_user)
             if profile.avatar_url:
                 raise SynapseError(
                     400, "Changing avatar is disabled on this server", Codes.FORBIDDEN
@@ -272,7 +268,7 @@ class ProfileHandler:
 
         await self.store.set_profile_avatar_url(target_user, avatar_url_to_set)
 
-        profile = await self.store.get_profileinfo(target_user.localpart)
+        profile = await self.store.get_profileinfo(target_user)
         await self.user_directory_handler.handle_local_profile_change(
             target_user.to_string(), profile
         )
@@ -369,14 +365,10 @@ class ProfileHandler:
         response = {}
         try:
             if just_field is None or just_field == "displayname":
-                response["displayname"] = await self.store.get_profile_displayname(
-                    user.localpart
-                )
+                response["displayname"] = await self.store.get_profile_displayname(user)
 
             if just_field is None or just_field == "avatar_url":
-                response["avatar_url"] = await self.store.get_profile_avatar_url(
-                    user.localpart
-                )
+                response["avatar_url"] = await self.store.get_profile_avatar_url(user)
         except StoreError as e:
             if e.code == 404:
                 raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND)
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index c80946c2e9..a2d3f03061 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -315,7 +315,7 @@ class RegistrationHandler:
                 approved=approved,
             )
 
-            profile = await self.store.get_profileinfo(localpart)
+            profile = await self.store.get_profileinfo(user)
             await self.user_directory_handler.handle_local_profile_change(
                 user_id, profile
             )
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index a8d6224a45..84b2aef620 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -655,7 +655,9 @@ class ModuleApi:
         Returns:
             The profile information (i.e. display name and avatar URL).
         """
-        return await self._store.get_profileinfo(localpart)
+        server_name = self._hs.hostname
+        user_id = UserID.from_string(f"@{localpart}:{server_name}")
+        return await self._store.get_profileinfo(user_id)
 
     async def get_threepids_for_user(self, user_id: str) -> List[Dict[str, str]]:
         """Look up the threepids (email addresses and phone numbers) associated with the
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 491a09b71d..79e0627b6a 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -247,7 +247,7 @@ class Mailer:
 
         try:
             user_display_name = await self.store.get_profile_displayname(
-                UserID.from_string(user_id).localpart
+                UserID.from_string(user_id)
             )
             if user_display_name is None:
                 user_display_name = user_id
diff --git a/synapse/rest/client/filter.py b/synapse/rest/client/filter.py
index 04561f36d7..5da1e511a2 100644
--- a/synapse/rest/client/filter.py
+++ b/synapse/rest/client/filter.py
@@ -58,7 +58,7 @@ class GetFilterRestServlet(RestServlet):
 
         try:
             filter_collection = await self.filtering.get_user_filter(
-                user_localpart=target_user.localpart, filter_id=filter_id_int
+                user_id=target_user, filter_id=filter_id_int
             )
         except StoreError as e:
             if e.code != 404:
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index 03b0578945..d7854ed4fd 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -178,7 +178,7 @@ class SyncRestServlet(RestServlet):
         else:
             try:
                 filter_collection = await self.filtering.get_user_filter(
-                    user.localpart, filter_id
+                    user, filter_id
                 )
             except StoreError as err:
                 if err.code != 404:
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index f777777cbf..fff417f9e3 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -145,7 +145,7 @@ class FilteringWorkerStore(SQLBaseStore):
 
     @cached(num_args=2)
     async def get_user_filter(
-        self, user_localpart: str, filter_id: Union[int, str]
+        self, user_id: UserID, filter_id: Union[int, str]
     ) -> JsonDict:
         # filter_id is BIGINT UNSIGNED, so if it isn't a number, fail
         # with a coherent error message rather than 500 M_UNKNOWN.
@@ -156,7 +156,7 @@ class FilteringWorkerStore(SQLBaseStore):
 
         def_json = await self.db_pool.simple_select_one_onecol(
             table="user_filters",
-            keyvalues={"user_id": user_localpart, "filter_id": filter_id},
+            keyvalues={"full_user_id": user_id.to_string(), "filter_id": filter_id},
             retcol="filter_json",
             allow_none=False,
             desc="get_user_filter",
@@ -172,15 +172,15 @@ class FilteringWorkerStore(SQLBaseStore):
         def _do_txn(txn: LoggingTransaction) -> int:
             sql = (
                 "SELECT filter_id FROM user_filters "
-                "WHERE user_id = ? AND filter_json = ?"
+                "WHERE full_user_id = ? AND filter_json = ?"
             )
-            txn.execute(sql, (user_id.localpart, bytearray(def_json)))
+            txn.execute(sql, (user_id.to_string(), bytearray(def_json)))
             filter_id_response = txn.fetchone()
             if filter_id_response is not None:
                 return filter_id_response[0]
 
-            sql = "SELECT MAX(filter_id) FROM user_filters WHERE user_id = ?"
-            txn.execute(sql, (user_id.localpart,))
+            sql = "SELECT MAX(filter_id) FROM user_filters WHERE full_user_id = ?"
+            txn.execute(sql, (user_id.to_string(),))
             max_id = cast(Tuple[Optional[int]], txn.fetchone())[0]
             if max_id is None:
                 filter_id = 0
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index 21d54c7a7a..3ba9cc8853 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -137,11 +137,11 @@ class ProfileWorkerStore(SQLBaseStore):
 
         return 50
 
-    async def get_profileinfo(self, user_localpart: str) -> ProfileInfo:
+    async def get_profileinfo(self, user_id: UserID) -> ProfileInfo:
         try:
             profile = await self.db_pool.simple_select_one(
                 table="profiles",
-                keyvalues={"user_id": user_localpart},
+                keyvalues={"full_user_id": user_id.to_string()},
                 retcols=("displayname", "avatar_url"),
                 desc="get_profileinfo",
             )
@@ -156,18 +156,18 @@ class ProfileWorkerStore(SQLBaseStore):
             avatar_url=profile["avatar_url"], display_name=profile["displayname"]
         )
 
-    async def get_profile_displayname(self, user_localpart: str) -> Optional[str]:
+    async def get_profile_displayname(self, user_id: UserID) -> Optional[str]:
         return await self.db_pool.simple_select_one_onecol(
             table="profiles",
-            keyvalues={"user_id": user_localpart},
+            keyvalues={"full_user_id": user_id.to_string()},
             retcol="displayname",
             desc="get_profile_displayname",
         )
 
-    async def get_profile_avatar_url(self, user_localpart: str) -> Optional[str]:
+    async def get_profile_avatar_url(self, user_id: UserID) -> Optional[str]:
         return await self.db_pool.simple_select_one_onecol(
             table="profiles",
-            keyvalues={"user_id": user_localpart},
+            keyvalues={"full_user_id": user_id.to_string()},
             retcol="avatar_url",
             desc="get_profile_avatar_url",
         )
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 5cc786f030..fc190a8b13 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 77  # remember to update the list below when updating
+SCHEMA_VERSION = 78  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -103,6 +103,9 @@ Changes in SCHEMA_VERSION = 76:
 
 Changes in SCHEMA_VERSION = 77
     - (Postgres) Add NOT VALID CHECK (full_user_id IS NOT NULL) to tables profiles and user_filters
+
+Changes in SCHEMA_VERSION = 78
+    - Validate check (full_user_id IS NOT NULL) on tables profiles and user_filters
 """
 
 
diff --git a/synapse/storage/schema/main/delta/78/01_validate_and_update_profiles.py b/synapse/storage/schema/main/delta/78/01_validate_and_update_profiles.py
new file mode 100644
index 0000000000..8398d8f548
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/01_validate_and_update_profiles.py
@@ -0,0 +1,92 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+
+
+def run_upgrade(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+    config: HomeServerConfig,
+) -> None:
+    """
+    Part 3 of a multi-step migration to drop the column `user_id` and replace it with
+    `full_user_id`. See the database schema docs for more information on the full
+    migration steps.
+    """
+    hostname = config.server.server_name
+
+    if isinstance(database_engine, PostgresEngine):
+        # check if the constraint can be validated
+        check_sql = """
+        SELECT user_id from profiles WHERE full_user_id IS NULL
+        """
+        cur.execute(check_sql)
+        res = cur.fetchall()
+
+        if res:
+            # there are rows the background job missed, finish them here before we validate the constraint
+            process_rows_sql = """
+            UPDATE profiles
+            SET full_user_id = '@' || user_id || ?
+            WHERE user_id IN (
+                SELECT user_id FROM profiles WHERE full_user_id IS NULL
+            )
+            """
+            cur.execute(process_rows_sql, (f":{hostname}",))
+
+        # Now we can validate
+        validate_sql = """
+        ALTER TABLE profiles VALIDATE CONSTRAINT full_user_id_not_null
+        """
+        cur.execute(validate_sql)
+
+    else:
+        # in SQLite we need to rewrite the table to add the constraint.
+        # First drop any temporary table that might be here from a previous failed migration.
+        cur.execute("DROP TABLE IF EXISTS temp_profiles")
+
+        create_sql = """
+        CREATE TABLE temp_profiles (
+            full_user_id text NOT NULL,
+            user_id text,
+            displayname text,
+            avatar_url text,
+            UNIQUE (full_user_id),
+            UNIQUE (user_id)
+        )
+        """
+        cur.execute(create_sql)
+
+        copy_sql = """
+        INSERT INTO temp_profiles (
+            user_id,
+            displayname,
+            avatar_url,
+            full_user_id)
+            SELECT user_id, displayname, avatar_url, '@' || user_id || ':' || ? FROM profiles
+        """
+        cur.execute(copy_sql, (f"{hostname}",))
+
+        drop_sql = """
+        DROP TABLE profiles
+        """
+        cur.execute(drop_sql)
+
+        rename_sql = """
+        ALTER TABLE temp_profiles RENAME to profiles
+        """
+        cur.execute(rename_sql)
diff --git a/synapse/storage/schema/main/delta/78/02_validate_and_update_user_filters.py b/synapse/storage/schema/main/delta/78/02_validate_and_update_user_filters.py
new file mode 100644
index 0000000000..8ef63335e7
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/02_validate_and_update_user_filters.py
@@ -0,0 +1,95 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+
+
+def run_upgrade(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+    config: HomeServerConfig,
+) -> None:
+    """
+    Part 3 of a multi-step migration to drop the column `user_id` and replace it with
+    `full_user_id`. See the database schema docs for more information on the full
+    migration steps.
+    """
+    hostname = config.server.server_name
+
+    if isinstance(database_engine, PostgresEngine):
+        # check if the constraint can be validated
+        check_sql = """
+        SELECT user_id from user_filters WHERE full_user_id IS NULL
+        """
+        cur.execute(check_sql)
+        res = cur.fetchall()
+
+        if res:
+            # there are rows the background job missed, finish them here before we validate constraint
+            process_rows_sql = """
+            UPDATE user_filters
+            SET full_user_id = '@' || user_id || ?
+            WHERE user_id IN (
+                SELECT user_id FROM user_filters WHERE full_user_id IS NULL
+            )
+            """
+            cur.execute(process_rows_sql, (f":{hostname}",))
+
+        # Now we can validate
+        validate_sql = """
+        ALTER TABLE user_filters VALIDATE CONSTRAINT full_user_id_not_null
+        """
+        cur.execute(validate_sql)
+
+    else:
+        cur.execute("DROP TABLE IF EXISTS temp_user_filters")
+        create_sql = """
+        CREATE TABLE temp_user_filters (
+            full_user_id text NOT NULL,
+            user_id text NOT NULL,
+            filter_id bigint NOT NULL,
+            filter_json bytea NOT NULL,
+            UNIQUE (full_user_id),
+            UNIQUE (user_id)
+        )
+        """
+        cur.execute(create_sql)
+
+        index_sql = """
+        CREATE UNIQUE INDEX IF NOT EXISTS user_filters_unique ON
+            temp_user_filters (user_id, filter_id)
+        """
+        cur.execute(index_sql)
+
+        copy_sql = """
+        INSERT INTO temp_user_filters (
+            user_id,
+            filter_id,
+            filter_json,
+            full_user_id)
+            SELECT user_id, filter_id, filter_json, '@' || user_id || ':' || ? FROM user_filters
+        """
+        cur.execute(copy_sql, (f"{hostname}",))
+
+        drop_sql = """
+        DROP TABLE user_filters
+        """
+        cur.execute(drop_sql)
+
+        rename_sql = """
+        ALTER TABLE temp_user_filters RENAME to user_filters
+        """
+        cur.execute(rename_sql)
diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py
index aa6af5ad7b..868f0c6995 100644
--- a/tests/api/test_filtering.py
+++ b/tests/api/test_filtering.py
@@ -35,7 +35,6 @@ from tests.events.test_utils import MockEvent
 
 user_id = UserID.from_string("@test_user:test")
 user2_id = UserID.from_string("@test_user2:test")
-user_localpart = "test_user"
 
 
 class FilteringTestCase(unittest.HomeserverTestCase):
@@ -449,9 +448,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         ]
 
         user_filter = self.get_success(
-            self.filtering.get_user_filter(
-                user_localpart=user_localpart, filter_id=filter_id
-            )
+            self.filtering.get_user_filter(user_id=user_id, filter_id=filter_id)
         )
 
         results = self.get_success(user_filter.filter_presence(presence_states))
@@ -479,9 +476,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         ]
 
         user_filter = self.get_success(
-            self.filtering.get_user_filter(
-                user_localpart=user_localpart + "2", filter_id=filter_id
-            )
+            self.filtering.get_user_filter(user_id=user2_id, filter_id=filter_id)
         )
 
         results = self.get_success(user_filter.filter_presence(presence_states))
@@ -498,9 +493,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         events = [event]
 
         user_filter = self.get_success(
-            self.filtering.get_user_filter(
-                user_localpart=user_localpart, filter_id=filter_id
-            )
+            self.filtering.get_user_filter(user_id=user_id, filter_id=filter_id)
         )
 
         results = self.get_success(user_filter.filter_room_state(events=events))
@@ -519,9 +512,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         events = [event]
 
         user_filter = self.get_success(
-            self.filtering.get_user_filter(
-                user_localpart=user_localpart, filter_id=filter_id
-            )
+            self.filtering.get_user_filter(user_id=user_id, filter_id=filter_id)
         )
 
         results = self.get_success(user_filter.filter_room_state(events))
@@ -603,9 +594,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
             user_filter_json,
             (
                 self.get_success(
-                    self.datastore.get_user_filter(
-                        user_localpart=user_localpart, filter_id=0
-                    )
+                    self.datastore.get_user_filter(user_id=user_id, filter_id=0)
                 )
             ),
         )
@@ -620,9 +609,7 @@ class FilteringTestCase(unittest.HomeserverTestCase):
         )
 
         filter = self.get_success(
-            self.filtering.get_user_filter(
-                user_localpart=user_localpart, filter_id=filter_id
-            )
+            self.filtering.get_user_filter(user_id=user_id, filter_id=filter_id)
         )
 
         self.assertEqual(filter.get_filter_json(), user_filter_json)
diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py
index 64a9a22afe..196ceb0b82 100644
--- a/tests/handlers/test_profile.py
+++ b/tests/handlers/test_profile.py
@@ -80,11 +80,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertEqual(
-            (
-                self.get_success(
-                    self.store.get_profile_displayname(self.frank.localpart)
-                )
-            ),
+            (self.get_success(self.store.get_profile_displayname(self.frank))),
             "Frank Jr.",
         )
 
@@ -96,11 +92,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertEqual(
-            (
-                self.get_success(
-                    self.store.get_profile_displayname(self.frank.localpart)
-                )
-            ),
+            (self.get_success(self.store.get_profile_displayname(self.frank))),
             "Frank",
         )
 
@@ -112,7 +104,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertIsNone(
-            self.get_success(self.store.get_profile_displayname(self.frank.localpart))
+            self.get_success(self.store.get_profile_displayname(self.frank))
         )
 
     def test_set_my_name_if_disabled(self) -> None:
@@ -122,11 +114,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         self.get_success(self.store.set_profile_displayname(self.frank, "Frank"))
 
         self.assertEqual(
-            (
-                self.get_success(
-                    self.store.get_profile_displayname(self.frank.localpart)
-                )
-            ),
+            (self.get_success(self.store.get_profile_displayname(self.frank))),
             "Frank",
         )
 
@@ -201,7 +189,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertEqual(
-            (self.get_success(self.store.get_profile_avatar_url(self.frank.localpart))),
+            (self.get_success(self.store.get_profile_avatar_url(self.frank))),
             "http://my.server/pic.gif",
         )
 
@@ -215,7 +203,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertEqual(
-            (self.get_success(self.store.get_profile_avatar_url(self.frank.localpart))),
+            (self.get_success(self.store.get_profile_avatar_url(self.frank))),
             "http://my.server/me.png",
         )
 
@@ -229,7 +217,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertIsNone(
-            (self.get_success(self.store.get_profile_avatar_url(self.frank.localpart))),
+            (self.get_success(self.store.get_profile_avatar_url(self.frank))),
         )
 
     def test_set_my_avatar_if_disabled(self) -> None:
@@ -241,7 +229,7 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         )
 
         self.assertEqual(
-            (self.get_success(self.store.get_profile_avatar_url(self.frank.localpart))),
+            (self.get_success(self.store.get_profile_avatar_url(self.frank))),
             "http://my.server/me.png",
         )
 
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index bff7114cd8..b3310abe1b 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -28,7 +28,7 @@ from synapse.module_api import ModuleApi
 from synapse.rest import admin
 from synapse.rest.client import login, notifications, presence, profile, room
 from synapse.server import HomeServer
-from synapse.types import JsonDict, create_requester
+from synapse.types import JsonDict, UserID, create_requester
 from synapse.util import Clock
 
 from tests.events.test_presence_router import send_presence_update, sync_presence
@@ -103,7 +103,9 @@ class ModuleApiTestCase(BaseModuleApiTestCase):
         self.assertEqual(email["added_at"], 0)
 
         # Check that the displayname was assigned
-        displayname = self.get_success(self.store.get_profile_displayname("bob"))
+        displayname = self.get_success(
+            self.store.get_profile_displayname(UserID.from_string("@bob:test"))
+        )
         self.assertEqual(displayname, "Bobberino")
 
     def test_can_register_admin_user(self) -> None:
diff --git a/tests/rest/client/test_filter.py b/tests/rest/client/test_filter.py
index 9faa9de050..a2d5d340be 100644
--- a/tests/rest/client/test_filter.py
+++ b/tests/rest/client/test_filter.py
@@ -46,7 +46,9 @@ class FilterTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 200)
         self.assertEqual(channel.json_body, {"filter_id": "0"})
         filter = self.get_success(
-            self.store.get_user_filter(user_localpart="apple", filter_id=0)
+            self.store.get_user_filter(
+                user_id=UserID.from_string(FilterTestCase.user_id), filter_id=0
+            )
         )
         self.pump()
         self.assertEqual(filter, self.EXAMPLE_FILTER)
diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py
index f9cf0fcb82..fe5bb77913 100644
--- a/tests/storage/test_profile.py
+++ b/tests/storage/test_profile.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.server import HomeServer
@@ -35,18 +36,14 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(
             "Frank",
-            (
-                self.get_success(
-                    self.store.get_profile_displayname(self.u_frank.localpart)
-                )
-            ),
+            (self.get_success(self.store.get_profile_displayname(self.u_frank))),
         )
 
         # test set to None
         self.get_success(self.store.set_profile_displayname(self.u_frank, None))
 
         self.assertIsNone(
-            self.get_success(self.store.get_profile_displayname(self.u_frank.localpart))
+            self.get_success(self.store.get_profile_displayname(self.u_frank))
         )
 
     def test_avatar_url(self) -> None:
@@ -58,18 +55,14 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(
             "http://my.site/here",
-            (
-                self.get_success(
-                    self.store.get_profile_avatar_url(self.u_frank.localpart)
-                )
-            ),
+            (self.get_success(self.store.get_profile_avatar_url(self.u_frank))),
         )
 
         # test set to None
         self.get_success(self.store.set_profile_avatar_url(self.u_frank, None))
 
         self.assertIsNone(
-            self.get_success(self.store.get_profile_avatar_url(self.u_frank.localpart))
+            self.get_success(self.store.get_profile_avatar_url(self.u_frank))
         )
 
     def test_profiles_bg_migration(self) -> None:
-- 
cgit 1.5.1


From f9561b9e37e4cbd97a71dd10549f1f03d3f01b5e Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 5 Jun 2023 23:38:52 -0500
Subject: Some house keeping on `maybe_backfill()` functions (#15709)

---
 changelog.d/15709.misc         |  1 +
 synapse/handlers/federation.py | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 changelog.d/15709.misc

(limited to 'synapse')

diff --git a/changelog.d/15709.misc b/changelog.d/15709.misc
new file mode 100644
index 0000000000..e9ce84a940
--- /dev/null
+++ b/changelog.d/15709.misc
@@ -0,0 +1 @@
+Update docstring and traces on `maybe_backfill()` functions.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 2eb28d55ac..57d6b70cff 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -200,6 +200,7 @@ class FederationHandler:
             )
 
     @trace
+    @tag_args
     async def maybe_backfill(
         self, room_id: str, current_depth: int, limit: int
     ) -> bool:
@@ -214,6 +215,9 @@ class FederationHandler:
             limit: The number of events that the pagination request will
                 return. This is used as part of the heuristic to decide if we
                 should back paginate.
+
+        Returns:
+            True if we actually tried to backfill something, otherwise False.
         """
         # Starting the processing time here so we can include the room backfill
         # linearizer lock queue in the timing
@@ -227,6 +231,8 @@ class FederationHandler:
                 processing_start_time=processing_start_time,
             )
 
+    @trace
+    @tag_args
     async def _maybe_backfill_inner(
         self,
         room_id: str,
@@ -247,6 +253,9 @@ class FederationHandler:
             limit: The max number of events to request from the remote federated server.
             processing_start_time: The time when `maybe_backfill` started processing.
                 Only used for timing. If `None`, no timing observation will be made.
+
+        Returns:
+            True if we actually tried to backfill something, otherwise False.
         """
         backwards_extremities = [
             _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY)
@@ -302,6 +311,14 @@ class FederationHandler:
             len(sorted_backfill_points),
             sorted_backfill_points,
         )
+        set_tag(
+            SynapseTags.RESULT_PREFIX + "sorted_backfill_points",
+            str(sorted_backfill_points),
+        )
+        set_tag(
+            SynapseTags.RESULT_PREFIX + "sorted_backfill_points.length",
+            str(len(sorted_backfill_points)),
+        )
 
         # If we have no backfill points lower than the `current_depth` then
         # either we can a) bail or b) still attempt to backfill. We opt to try
-- 
cgit 1.5.1


From f880e64b11bd03d1ebd710b34b541d5b2e044baa Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 6 Jun 2023 04:11:07 -0400
Subject: Stabilize support for MSC3952: Intentional mentions. (#15520)

---
 changelog.d/15520.feature                   |  1 +
 rust/benches/evaluator.rs                   |  3 ---
 rust/src/push/base_rules.rs                 |  8 +++----
 rust/src/push/evaluator.rs                  | 10 ++++-----
 rust/src/push/mod.rs                        |  7 ------
 stubs/synapse/synapse_rust/push.pyi         |  1 -
 synapse/api/constants.py                    |  2 +-
 synapse/config/experimental.py              |  5 -----
 synapse/events/validator.py                 |  9 ++------
 synapse/push/bulk_push_rule_evaluator.py    |  8 +------
 synapse/rest/client/versions.py             |  2 --
 synapse/storage/databases/main/push_rule.py |  1 -
 tests/push/test_bulk_push_rule_evaluator.py | 34 +++++++++++------------------
 13 files changed, 27 insertions(+), 64 deletions(-)
 create mode 100644 changelog.d/15520.feature

(limited to 'synapse')

diff --git a/changelog.d/15520.feature b/changelog.d/15520.feature
new file mode 100644
index 0000000000..f4fd40ab94
--- /dev/null
+++ b/changelog.d/15520.feature
@@ -0,0 +1 @@
+Enable support for [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952): intentional mentions.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 64e13f6486..c2f33258a4 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -13,8 +13,6 @@
 // limitations under the License.
 
 #![feature(test)]
-use std::collections::BTreeSet;
-
 use synapse::push::{
     evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, JsonValue,
     PushRules, SimpleJsonValue,
@@ -197,7 +195,6 @@ fn bench_eval_message(b: &mut Bencher) {
         false,
         false,
         false,
-        false,
     );
 
     b.iter(|| eval.run(&rules, Some("bob"), Some("person")));
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 51372e1553..9d6c304d92 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -142,11 +142,11 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default_enabled: true,
     },
     PushRule {
-        rule_id: Cow::Borrowed(".org.matrix.msc3952.is_user_mention"),
+        rule_id: Cow::Borrowed("global/override/.m.is_user_mention"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[Condition::Known(
             KnownCondition::ExactEventPropertyContainsType(EventPropertyIsTypeCondition {
-                key: Cow::Borrowed("content.org\\.matrix\\.msc3952\\.mentions.user_ids"),
+                key: Cow::Borrowed("content.m\\.mentions.user_ids"),
                 value_type: Cow::Borrowed(&EventMatchPatternType::UserId),
             }),
         )]),
@@ -163,11 +163,11 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default_enabled: true,
     },
     PushRule {
-        rule_id: Cow::Borrowed(".org.matrix.msc3952.is_room_mention"),
+        rule_id: Cow::Borrowed("global/override/.m.is_room_mention"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[
             Condition::Known(KnownCondition::EventPropertyIs(EventPropertyIsCondition {
-                key: Cow::Borrowed("content.org\\.matrix\\.msc3952\\.mentions.room"),
+                key: Cow::Borrowed("content.m\\.mentions.room"),
                 value: Cow::Borrowed(&SimpleJsonValue::Bool(true)),
             })),
             Condition::Known(KnownCondition::SenderNotificationPermission {
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 2d7c4c06be..59c53b1776 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -70,7 +70,9 @@ pub struct PushRuleEvaluator {
     /// The "content.body", if any.
     body: String,
 
-    /// True if the event has a mentions property and MSC3952 support is enabled.
+    /// True if the event has a m.mentions property. (Note that this is a separate
+    /// flag instead of checking flattened_keys since the m.mentions property
+    /// might be an empty map and not appear in flattened_keys.
     has_mentions: bool,
 
     /// The number of users in the room.
@@ -155,9 +157,7 @@ impl PushRuleEvaluator {
             let rule_id = &push_rule.rule_id().to_string();
 
             // For backwards-compatibility the legacy mention rules are disabled
-            // if the event contains the 'm.mentions' property (and if the
-            // experimental feature is enabled, both of these are represented
-            // by the has_mentions flag).
+            // if the event contains the 'm.mentions' property.
             if self.has_mentions
                 && (rule_id == "global/override/.m.rule.contains_display_name"
                     || rule_id == "global/content/.m.rule.contains_user_name"
@@ -562,7 +562,7 @@ fn test_requires_room_version_supports_condition() {
     };
     let rules = PushRules::new(vec![custom_rule]);
     result = evaluator.run(
-        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false, false),
+        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false),
         None,
         None,
     );
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index f19d3c739f..514980579b 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -527,7 +527,6 @@ pub struct FilteredPushRules {
     msc1767_enabled: bool,
     msc3381_polls_enabled: bool,
     msc3664_enabled: bool,
-    msc3952_intentional_mentions: bool,
     msc3958_suppress_edits_enabled: bool,
 }
 
@@ -540,7 +539,6 @@ impl FilteredPushRules {
         msc1767_enabled: bool,
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
-        msc3952_intentional_mentions: bool,
         msc3958_suppress_edits_enabled: bool,
     ) -> Self {
         Self {
@@ -549,7 +547,6 @@ impl FilteredPushRules {
             msc1767_enabled,
             msc3381_polls_enabled,
             msc3664_enabled,
-            msc3952_intentional_mentions,
             msc3958_suppress_edits_enabled,
         }
     }
@@ -587,10 +584,6 @@ impl FilteredPushRules {
                     return false;
                 }
 
-                if !self.msc3952_intentional_mentions && rule.rule_id.contains("org.matrix.msc3952")
-                {
-                    return false;
-                }
                 if !self.msc3958_suppress_edits_enabled
                     && rule.rule_id == "global/override/.com.beeper.suppress_edits"
                 {
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 5d0ce4b1a4..d573a37b9a 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -46,7 +46,6 @@ class FilteredPushRules:
         msc1767_enabled: bool,
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
-        msc3952_intentional_mentions: bool,
         msc3958_suppress_edits_enabled: bool,
     ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index cde9a2ecef..faf0770c66 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -236,7 +236,7 @@ class EventContentFields:
     AUTHORISING_USER: Final = "join_authorised_via_users_server"
 
     # Use for mentioning users.
-    MSC3952_MENTIONS: Final = "org.matrix.msc3952.mentions"
+    MENTIONS: Final = "m.mentions"
 
     # an unspecced field added to to-device messages to identify them uniquely-ish
     TO_DEVICE_MSGID: Final = "org.matrix.msgid"
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index a9e002cf08..1d5b5ded45 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -358,11 +358,6 @@ class ExperimentalConfig(Config):
         # MSC3391: Removing account data.
         self.msc3391_enabled = experimental.get("msc3391_enabled", False)
 
-        # MSC3952: Intentional mentions, this depends on MSC3966.
-        self.msc3952_intentional_mentions = experimental.get(
-            "msc3952_intentional_mentions", False
-        )
-
         # MSC3959: Do not generate notifications for edits.
         self.msc3958_supress_edit_notifs = experimental.get(
             "msc3958_supress_edit_notifs", False
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index 47203209db..9278f1a1aa 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -134,13 +134,8 @@ class EventValidator:
                 )
 
         # If the event contains a mentions key, validate it.
-        if (
-            EventContentFields.MSC3952_MENTIONS in event.content
-            and config.experimental.msc3952_intentional_mentions
-        ):
-            validate_json_object(
-                event.content[EventContentFields.MSC3952_MENTIONS], Mentions
-            )
+        if EventContentFields.MENTIONS in event.content:
+            validate_json_object(event.content[EventContentFields.MENTIONS], Mentions)
 
     def _validate_retention(self, event: EventBase) -> None:
         """Checks that an event that defines the retention policy for a room respects the
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 320084f5f5..33002cc0f2 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -120,9 +120,6 @@ class BulkPushRuleEvaluator:
         self.should_calculate_push_rules = self.hs.config.push.enable_push
 
         self._related_event_match_enabled = self.hs.config.experimental.msc3664_enabled
-        self._intentional_mentions_enabled = (
-            self.hs.config.experimental.msc3952_intentional_mentions
-        )
 
         self.room_push_rule_cache_metrics = register_cache(
             "cache",
@@ -390,10 +387,7 @@ class BulkPushRuleEvaluator:
                         del notification_levels[key]
 
         # Pull out any user and room mentions.
-        has_mentions = (
-            self._intentional_mentions_enabled
-            and EventContentFields.MSC3952_MENTIONS in event.content
-        )
+        has_mentions = EventContentFields.MENTIONS in event.content
 
         evaluator = PushRuleEvaluator(
             _flatten_dict(event),
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 547bf34df1..1910648755 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -124,8 +124,6 @@ class VersionsRestServlet(RestServlet):
                     is not None,
                     # Adds support for relation-based redactions as per MSC3912.
                     "org.matrix.msc3912": self.config.experimental.msc3912_enabled,
-                    # Adds support for unstable "intentional mentions" behaviour.
-                    "org.matrix.msc3952_intentional_mentions": self.config.experimental.msc3952_intentional_mentions,
                     # Whether recursively provide relations is supported.
                     "org.matrix.msc3981": self.config.experimental.msc3981_recurse_relations,
                     # Adds support for deleting account data.
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index 9f862f00c1..e098ceea3c 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -88,7 +88,6 @@ def _load_rules(
         msc1767_enabled=experimental_config.msc1767_enabled,
         msc3664_enabled=experimental_config.msc3664_enabled,
         msc3381_polls_enabled=experimental_config.msc3381_polls_enabled,
-        msc3952_intentional_mentions=experimental_config.msc3952_intentional_mentions,
         msc3958_suppress_edits_enabled=experimental_config.msc3958_supress_edit_notifs,
     )
 
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 9501096a77..1e06f86071 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -228,7 +228,6 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         )
         return len(result) > 0
 
-    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
     def test_user_mentions(self) -> None:
         """Test the behavior of an event which includes invalid user mentions."""
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
@@ -237,9 +236,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         self.assertFalse(self._create_and_process(bulk_evaluator))
         # An empty mentions field should not notify.
         self.assertFalse(
-            self._create_and_process(
-                bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {}}
-            )
+            self._create_and_process(bulk_evaluator, {EventContentFields.MENTIONS: {}})
         )
 
         # Non-dict mentions should be ignored.
@@ -253,7 +250,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
             for mentions in (None, True, False, 1, "foo", []):
                 self.assertFalse(
                     self._create_and_process(
-                        bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: mentions}
+                        bulk_evaluator, {EventContentFields.MENTIONS: mentions}
                     )
                 )
 
@@ -262,7 +259,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 self.assertFalse(
                     self._create_and_process(
                         bulk_evaluator,
-                        {EventContentFields.MSC3952_MENTIONS: {"user_ids": mentions}},
+                        {EventContentFields.MENTIONS: {"user_ids": mentions}},
                     )
                 )
 
@@ -270,14 +267,14 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         self.assertTrue(
             self._create_and_process(
                 bulk_evaluator,
-                {EventContentFields.MSC3952_MENTIONS: {"user_ids": [self.alice]}},
+                {EventContentFields.MENTIONS: {"user_ids": [self.alice]}},
             )
         )
         self.assertTrue(
             self._create_and_process(
                 bulk_evaluator,
                 {
-                    EventContentFields.MSC3952_MENTIONS: {
+                    EventContentFields.MENTIONS: {
                         "user_ids": ["@another:test", self.alice]
                     }
                 },
@@ -288,11 +285,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         self.assertTrue(
             self._create_and_process(
                 bulk_evaluator,
-                {
-                    EventContentFields.MSC3952_MENTIONS: {
-                        "user_ids": [self.alice, self.alice]
-                    }
-                },
+                {EventContentFields.MENTIONS: {"user_ids": [self.alice, self.alice]}},
             )
         )
 
@@ -307,7 +300,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 self._create_and_process(
                     bulk_evaluator,
                     {
-                        EventContentFields.MSC3952_MENTIONS: {
+                        EventContentFields.MENTIONS: {
                             "user_ids": [None, True, False, {}, []]
                         }
                     },
@@ -317,7 +310,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 self._create_and_process(
                     bulk_evaluator,
                     {
-                        EventContentFields.MSC3952_MENTIONS: {
+                        EventContentFields.MENTIONS: {
                             "user_ids": [None, True, False, {}, [], self.alice]
                         }
                     },
@@ -331,12 +324,11 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 {
                     "body": self.alice,
                     "msgtype": "m.text",
-                    EventContentFields.MSC3952_MENTIONS: {},
+                    EventContentFields.MENTIONS: {},
                 },
             )
         )
 
-    @override_config({"experimental_features": {"msc3952_intentional_mentions": True}})
     def test_room_mentions(self) -> None:
         """Test the behavior of an event which includes invalid room mentions."""
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
@@ -344,7 +336,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         # Room mentions from those without power should not notify.
         self.assertFalse(
             self._create_and_process(
-                bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {"room": True}}
+                bulk_evaluator, {EventContentFields.MENTIONS: {"room": True}}
             )
         )
 
@@ -358,7 +350,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         )
         self.assertTrue(
             self._create_and_process(
-                bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {"room": True}}
+                bulk_evaluator, {EventContentFields.MENTIONS: {"room": True}}
             )
         )
 
@@ -374,7 +366,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 self.assertFalse(
                     self._create_and_process(
                         bulk_evaluator,
-                        {EventContentFields.MSC3952_MENTIONS: {"room": mentions}},
+                        {EventContentFields.MENTIONS: {"room": mentions}},
                     )
                 )
 
@@ -385,7 +377,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
                 {
                     "body": "@room",
                     "msgtype": "m.text",
-                    EventContentFields.MSC3952_MENTIONS: {},
+                    EventContentFields.MENTIONS: {},
                 },
             )
         )
-- 
cgit 1.5.1


From dfd77f426e3e4a66dd027db7078ed0345a4c74dd Mon Sep 17 00:00:00 2001
From: Sean Quah <8349537+squahtx@users.noreply.github.com>
Date: Tue, 6 Jun 2023 12:32:29 +0100
Subject: Remove some unused `server_name` fields (#15723)

Signed-off-by: Sean Quah <seanq@matrix.org>
---
 changelog.d/15723.misc                | 1 +
 synapse/handlers/presence.py          | 1 -
 synapse/handlers/read_marker.py       | 1 -
 synapse/handlers/room.py              | 1 -
 synapse/handlers/stats.py             | 1 -
 synapse/rest/media/upload_resource.py | 1 -
 6 files changed, 1 insertion(+), 5 deletions(-)
 create mode 100644 changelog.d/15723.misc

(limited to 'synapse')

diff --git a/changelog.d/15723.misc b/changelog.d/15723.misc
new file mode 100644
index 0000000000..ba331adca7
--- /dev/null
+++ b/changelog.d/15723.misc
@@ -0,0 +1 @@
+Removed some unused fields.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 4ad2233573..0a219b7962 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -648,7 +648,6 @@ class PresenceHandler(BasePresenceHandler):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         self.hs = hs
-        self.server_name = hs.hostname
         self.wheel_timer: WheelTimer[str] = WheelTimer()
         self.notifier = hs.get_notifier()
         self._presence_enabled = hs.config.server.use_presence
diff --git a/synapse/handlers/read_marker.py b/synapse/handlers/read_marker.py
index 49a497a860..df5a4f3e22 100644
--- a/synapse/handlers/read_marker.py
+++ b/synapse/handlers/read_marker.py
@@ -27,7 +27,6 @@ logger = logging.getLogger(__name__)
 
 class ReadMarkerHandler:
     def __init__(self, hs: "HomeServer"):
-        self.server_name = hs.config.server.server_name
         self.store = hs.get_datastores().main
         self.account_data_handler = hs.get_account_data_handler()
         self.read_marker_linearizer = Linearizer(name="read_marker")
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 5e1702d78a..cb957f2033 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1490,7 +1490,6 @@ class RoomContextHandler:
 
 class TimestampLookupHandler:
     def __init__(self, hs: "HomeServer"):
-        self.server_name = hs.hostname
         self.store = hs.get_datastores().main
         self.state_handler = hs.get_state_handler()
         self.federation_client = hs.get_federation_client()
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index 5c01482acf..7cabf7980a 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -42,7 +42,6 @@ class StatsHandler:
         self.store = hs.get_datastores().main
         self._storage_controllers = hs.get_storage_controllers()
         self.state = hs.get_state_handler()
-        self.server_name = hs.hostname
         self.clock = hs.get_clock()
         self.notifier = hs.get_notifier()
         self.is_mine_id = hs.is_mine_id
diff --git a/synapse/rest/media/upload_resource.py b/synapse/rest/media/upload_resource.py
index 697348613b..043e8d6077 100644
--- a/synapse/rest/media/upload_resource.py
+++ b/synapse/rest/media/upload_resource.py
@@ -39,7 +39,6 @@ class UploadResource(DirectServeJsonResource):
         self.filepaths = media_repo.filepaths
         self.store = hs.get_datastores().main
         self.clock = hs.get_clock()
-        self.server_name = hs.hostname
         self.auth = hs.get_auth()
         self.max_upload_size = hs.config.media.max_upload_size
         self.clock = hs.get_clock()
-- 
cgit 1.5.1


From 6ee96e936646d6ccc55dc076f62f8cf518c90d1e Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 6 Jun 2023 13:16:03 -0700
Subject: Improve performance of user directory search (#15729)

---
 changelog.d/15729.misc                           |  1 +
 synapse/storage/databases/main/user_directory.py | 12 ++++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15729.misc

(limited to 'synapse')

diff --git a/changelog.d/15729.misc b/changelog.d/15729.misc
new file mode 100644
index 0000000000..3940254305
--- /dev/null
+++ b/changelog.d/15729.misc
@@ -0,0 +1 @@
+Improve performance of user directory search.
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index a0319575f0..b0a06baf4f 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -1061,12 +1061,15 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
             # The array of numbers are the weights for the various part of the
             # search: (domain, _, display name, localpart)
             sql = """
+                WITH matching_users AS (
+                    SELECT user_id, vector FROM user_directory_search WHERE vector @@ to_tsquery('simple', ?)
+                    LIMIT 10000
+                )
                 SELECT d.user_id AS user_id, display_name, avatar_url
-                FROM user_directory_search as t
+                FROM matching_users as t
                 INNER JOIN user_directory AS d USING (user_id)
                 WHERE
                     %(where_clause)s
-                    AND vector @@ to_tsquery('simple', ?)
                 ORDER BY
                     (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
                     * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END)
@@ -1095,8 +1098,9 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 "order_case_statements": " ".join(additional_ordering_statements),
             }
             args = (
-                join_args
-                + (full_query, exact_query, prefix_query)
+                (full_query,)
+                + join_args
+                + (exact_query, prefix_query)
                 + ordering_arguments
                 + (limit + 1,)
             )
-- 
cgit 1.5.1


From 33c3550887f412f015cf651db82a9082bb12cd9e Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 6 Jun 2023 16:25:03 -0500
Subject: Add context for when/why to use the `long_retries` option when
 sending Federation requests (#15721)

---
 changelog.d/15721.misc                 |  1 +
 synapse/http/matrixfederationclient.py | 11 +++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15721.misc

(limited to 'synapse')

diff --git a/changelog.d/15721.misc b/changelog.d/15721.misc
new file mode 100644
index 0000000000..f4d892daf9
--- /dev/null
+++ b/changelog.d/15721.misc
@@ -0,0 +1 @@
+Add context for when/why to use the `long_retries` option when sending Federation requests.
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 9094dab0fe..abb5ae5815 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -499,8 +499,15 @@ class MatrixFederationHttpClient:
                 Note that the above intervals are *in addition* to the time spent
                 waiting for the request to complete (up to `timeout` ms).
 
-                NB: the long retry algorithm takes over 20 minutes to complete, with
-                a default timeout of 60s!
+                NB: the long retry algorithm takes over 20 minutes to complete, with a
+                default timeout of 60s! It's best not to use the `long_retries` option
+                for something that is blocking a client so we don't make them wait for
+                aaaaages, whereas some things like sending transactions (server to
+                server) we can be a lot more lenient but its very fuzzy / hand-wavey.
+
+                In the future, we could be more intelligent about doing this sort of
+                thing by looking at things with the bigger picture in mind,
+                https://github.com/matrix-org/synapse/issues/8917
 
             ignore_backoff: true to ignore the historical backoff data
                 and try the request anyway.
-- 
cgit 1.5.1


From 4e6390cb10676d3f621319663587f49baa57bedc Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 6 Jun 2023 16:26:12 -0500
Subject: Update error to more plainly explain we can only authorize our own
 events (#15725)

---
 changelog.d/15725.misc                  | 1 +
 synapse/federation/federation_server.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15725.misc

(limited to 'synapse')

diff --git a/changelog.d/15725.misc b/changelog.d/15725.misc
new file mode 100644
index 0000000000..6c7a8a41d8
--- /dev/null
+++ b/changelog.d/15725.misc
@@ -0,0 +1 @@
+Update federation error to more plainly explain we can only authorize our own membership events.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 149351dda0..9425b32507 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -944,7 +944,7 @@ class FederationServer(FederationBase):
             if not self._is_mine_server_name(authorising_server):
                 raise SynapseError(
                     400,
-                    f"Cannot authorise request from resident server: {authorising_server}",
+                    f"Cannot authorise membership event for {authorising_server}. We can only authorise requests from our own homeserver",
                 )
 
             event.signatures.update(
-- 
cgit 1.5.1


From 8bfded81f3378ab6333f174e182f2aae6ef01f49 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 6 Jun 2023 17:39:22 -0500
Subject: Trace functions which return `Awaitable` (#15650)

---
 changelog.d/15650.misc            |  1 +
 synapse/logging/opentracing.py    | 37 +++++++++++++++++++++++----------
 tests/logging/test_opentracing.py | 43 +++++++++++++++++++++++++++++----------
 3 files changed, 59 insertions(+), 22 deletions(-)
 create mode 100644 changelog.d/15650.misc

(limited to 'synapse')

diff --git a/changelog.d/15650.misc b/changelog.d/15650.misc
new file mode 100644
index 0000000000..9bbad113e1
--- /dev/null
+++ b/changelog.d/15650.misc
@@ -0,0 +1 @@
+Add support for tracing functions which return `Awaitable`s.
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index c70eee649c..75217e3f45 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -171,6 +171,7 @@ from functools import wraps
 from typing import (
     TYPE_CHECKING,
     Any,
+    Awaitable,
     Callable,
     Collection,
     ContextManager,
@@ -903,6 +904,7 @@ def _custom_sync_async_decorator(
     """
 
     if inspect.iscoroutinefunction(func):
+        # For this branch, we handle async functions like `async def func() -> RInner`.
         # In this branch, R = Awaitable[RInner], for some other type RInner
         @wraps(func)
         async def _wrapper(
@@ -914,15 +916,16 @@ def _custom_sync_async_decorator(
                 return await func(*args, **kwargs)  # type: ignore[misc]
 
     else:
-        # The other case here handles both sync functions and those
-        # decorated with inlineDeferred.
+        # The other case here handles sync functions including those decorated with
+        # `@defer.inlineCallbacks` or that return a `Deferred` or other `Awaitable`.
         @wraps(func)
-        def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+        def _wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
             scope = wrapping_logic(func, *args, **kwargs)
             scope.__enter__()
 
             try:
                 result = func(*args, **kwargs)
+
                 if isinstance(result, defer.Deferred):
 
                     def call_back(result: R) -> R:
@@ -930,20 +933,32 @@ def _custom_sync_async_decorator(
                         return result
 
                     def err_back(result: R) -> R:
+                        # TODO: Pass the error details into `scope.__exit__(...)` for
+                        #       consistency with the other paths.
                         scope.__exit__(None, None, None)
                         return result
 
                     result.addCallbacks(call_back, err_back)
 
+                elif inspect.isawaitable(result):
+
+                    async def wrap_awaitable() -> Any:
+                        try:
+                            assert isinstance(result, Awaitable)
+                            awaited_result = await result
+                            scope.__exit__(None, None, None)
+                            return awaited_result
+                        except Exception as e:
+                            scope.__exit__(type(e), None, e.__traceback__)
+                            raise
+
+                    # The original method returned an awaitable, eg. a coroutine, so we
+                    # create another awaitable wrapping it that calls
+                    # `scope.__exit__(...)`.
+                    return wrap_awaitable()
                 else:
-                    if inspect.isawaitable(result):
-                        logger.error(
-                            "@trace may not have wrapped %s correctly! "
-                            "The function is not async but returned a %s.",
-                            func.__qualname__,
-                            type(result).__name__,
-                        )
-
+                    # Just a simple sync function so we can just exit the scope and
+                    # return the result without any fuss.
                     scope.__exit__(None, None, None)
 
                 return result
diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py
index e28ba84cc2..1bc7d64ad9 100644
--- a/tests/logging/test_opentracing.py
+++ b/tests/logging/test_opentracing.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import cast
+from typing import Awaitable, cast
 
 from twisted.internet import defer
 from twisted.test.proto_helpers import MemoryReactorClock
@@ -227,8 +227,6 @@ class LogContextScopeManagerTestCase(TestCase):
         Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`
         with functions that return deferreds
         """
-        reactor = MemoryReactorClock()
-
         with LoggingContext("root context"):
 
             @trace_with_opname("fixture_deferred_func", tracer=self._tracer)
@@ -240,9 +238,6 @@ class LogContextScopeManagerTestCase(TestCase):
 
             result_d1 = fixture_deferred_func()
 
-            # let the tasks complete
-            reactor.pump((2,) * 8)
-
             self.assertEqual(self.successResultOf(result_d1), "foo")
 
         # the span should have been reported
@@ -256,8 +251,6 @@ class LogContextScopeManagerTestCase(TestCase):
         Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`
         with async functions
         """
-        reactor = MemoryReactorClock()
-
         with LoggingContext("root context"):
 
             @trace_with_opname("fixture_async_func", tracer=self._tracer)
@@ -267,9 +260,6 @@ class LogContextScopeManagerTestCase(TestCase):
 
             d1 = defer.ensureDeferred(fixture_async_func())
 
-            # let the tasks complete
-            reactor.pump((2,) * 8)
-
             self.assertEqual(self.successResultOf(d1), "foo")
 
         # the span should have been reported
@@ -277,3 +267,34 @@ class LogContextScopeManagerTestCase(TestCase):
             [span.operation_name for span in self._reporter.get_spans()],
             ["fixture_async_func"],
         )
+
+    def test_trace_decorator_awaitable_return(self) -> None:
+        """
+        Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`
+        with functions that return an awaitable (e.g. a coroutine)
+        """
+        with LoggingContext("root context"):
+            # Something we can return without `await` to get a coroutine
+            async def fixture_async_func() -> str:
+                return "foo"
+
+            # The actual kind of function we want to test that returns an awaitable
+            @trace_with_opname("fixture_awaitable_return_func", tracer=self._tracer)
+            @tag_args
+            def fixture_awaitable_return_func() -> Awaitable[str]:
+                return fixture_async_func()
+
+            # Something we can run with `defer.ensureDeferred(runner())` and pump the
+            # whole async tasks through to completion.
+            async def runner() -> str:
+                return await fixture_awaitable_return_func()
+
+            d1 = defer.ensureDeferred(runner())
+
+            self.assertEqual(self.successResultOf(d1), "foo")
+
+        # the span should have been reported
+        self.assertEqual(
+            [span.operation_name for span in self._reporter.get_spans()],
+            ["fixture_awaitable_return_func"],
+        )
-- 
cgit 1.5.1


From 9d911b0da651893e0b67cb3506e18582cb0d95b5 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 6 Jun 2023 22:19:57 -0500
Subject: No need for the extra join since `membership` is built-in to
 `current_state_events` (#15731)

This helps with the upstream `is_host_joined()` and `is_host_invited()` functions.

`membership` was added to `current_state_events` in https://github.com/matrix-org/synapse/pull/5706 and forced in https://github.com/matrix-org/synapse/pull/13745
---
 changelog.d/15731.misc                       | 1 +
 synapse/storage/databases/main/roommember.py | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15731.misc

(limited to 'synapse')

diff --git a/changelog.d/15731.misc b/changelog.d/15731.misc
new file mode 100644
index 0000000000..906bc26962
--- /dev/null
+++ b/changelog.d/15731.misc
@@ -0,0 +1 @@
+Remove redundant table join with `room_memberships` when doing a `is_host_joined()`/`is_host_invited()` call (`membership` is already part of the `current_state_events`).
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index ae9c201b87..1b8ec67f54 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -927,11 +927,10 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
             raise Exception("Invalid host name")
 
         sql = """
-            SELECT state_key FROM current_state_events AS c
-            INNER JOIN room_memberships AS m USING (event_id)
-            WHERE m.membership = ?
+            SELECT state_key FROM current_state_events
+            WHERE membership = ?
                 AND type = 'm.room.member'
-                AND c.room_id = ?
+                AND room_id = ?
                 AND state_key LIKE ?
             LIMIT 1
         """
-- 
cgit 1.5.1


From a701c089fa2a345243985a765506a52b50e50963 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 7 Jun 2023 10:50:32 +0100
Subject: Fix schema delta error in 1.85 (#15738)

There appears to be a race where you can end up with entries in
`event_push_summary` with both a `NULL` and `main` thread ID.

Fixes #15736

Introduced in #15597
---
 changelog.d/15738.bugfix                                          | 1 +
 .../schema/main/delta/77/05thread_notifications_backfill.sql      | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 changelog.d/15738.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15738.bugfix b/changelog.d/15738.bugfix
new file mode 100644
index 0000000000..7129ab0782
--- /dev/null
+++ b/changelog.d/15738.bugfix
@@ -0,0 +1 @@
+Fix bug in schema delta that broke upgrades for some deployments. Introduced in v1.85.0.
diff --git a/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
index ce6f9ff937..b09aa817ae 100644
--- a/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
+++ b/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
@@ -21,6 +21,14 @@ DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_i
 -- Overwrite any null thread_id values.
 UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL;
 UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
+
+-- Empirically we can end up with entries in the push summary table with both a
+-- `NULL` and `main` thread ID, which causes the update below to fail. We fudge
+-- this by deleting any `NULL` rows that have a corresponding `main`.
+DELETE FROM event_push_summary AS a WHERE thread_id IS NULL AND EXISTS (
+    SELECT 1 FROM event_push_summary AS b
+    WHERE b.thread_id = 'main' AND a.user_id = b.user_id AND a.room_id = b.room_id
+);
 UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
 
 -- Drop the background updates to calculate the indexes used to find null thread_ids.
-- 
cgit 1.5.1


From f7c6553ebce51a46f1c78aa0a3fc6cc1effb346d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 7 Jun 2023 13:02:42 +0100
Subject: Fix schema delta error in 1.85 (#15739)

Some users seem to have multiple rows per user / room with a null thread
ID, which we need to handle.
---
 changelog.d/15739.bugfix                                 |  1 +
 .../main/delta/77/05thread_notifications_backfill.sql    | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15739.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15739.bugfix b/changelog.d/15739.bugfix
new file mode 100644
index 0000000000..7129ab0782
--- /dev/null
+++ b/changelog.d/15739.bugfix
@@ -0,0 +1 @@
+Fix bug in schema delta that broke upgrades for some deployments. Introduced in v1.85.0.
diff --git a/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
index b09aa817ae..a5da7a17a0 100644
--- a/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
+++ b/synapse/storage/schema/main/delta/77/05thread_notifications_backfill.sql
@@ -23,13 +23,25 @@ UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL
 UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL;
 
 -- Empirically we can end up with entries in the push summary table with both a
--- `NULL` and `main` thread ID, which causes the update below to fail. We fudge
+-- `NULL` and `main` thread ID, which causes the insert below to fail. We fudge
 -- this by deleting any `NULL` rows that have a corresponding `main`.
 DELETE FROM event_push_summary AS a WHERE thread_id IS NULL AND EXISTS (
     SELECT 1 FROM event_push_summary AS b
     WHERE b.thread_id = 'main' AND a.user_id = b.user_id AND a.room_id = b.room_id
 );
-UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL;
+-- Copy the NULL threads to have a 'main' thread ID.
+--
+-- Note: Some people seem to have duplicate rows with a `NULL` thread ID, in
+-- which case we just fudge it with using MAX of the values. The counts *may* be
+-- wrong for such rooms, but a) its an edge case, and b) they'll be fixed when
+-- the user reads the room.
+INSERT INTO event_push_summary (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id)
+    SELECT user_id, room_id, MAX(notif_count), MAX(stream_ordering), MAX(unread_count), MAX(last_receipt_stream_ordering), 'main'
+    FROM event_push_summary
+    WHERE thread_id IS NULL
+    GROUP BY user_id, room_id, thread_id;
+
+DELETE FROM event_push_summary AS a WHERE thread_id IS NULL;
 
 -- Drop the background updates to calculate the indexes used to find null thread_ids.
 DELETE FROM background_updates WHERE update_name = 'event_push_actions_thread_id_null';
-- 
cgit 1.5.1


From 5c24d7b9ebd8dec2c76dac5118cee22a1bb1032a Mon Sep 17 00:00:00 2001
From: Grant McLean <grant@catalyst.net.nz>
Date: Thu, 8 Jun 2023 03:21:25 +1200
Subject: Check required power levels earlier in createRoom handler. (#15695)

* Check required power levels earlier in createRoom handler.

- If a server was configured to reject the creation of rooms with E2EE
  enabled (by specifying an unattainably high power level for
  "m.room.encryption" in default_power_level_content_override), the 403
  error was not being triggered until after the room was created and
  before the "m.room.power_levels" was sent.  This allowed a user to
  access the partially-configured room and complete the setup of E2EE
  and power levels manually.

- This change causes the power level overrides to be checked earlier and
  the request to be rejected before the user gains access to the room.

- A new `_validate_room_config` method is added to contain checks that
  should be run before a room is created.

- The new test case confirms that a user request is rejected by the new
  validation method.

Signed-off-by: Grant McLean <grant@catalyst.net.nz>

* Add a changelog file.

* Formatting fix for black.

* Remove unneeded line from test.

---------

Signed-off-by: Grant McLean <grant@catalyst.net.nz>
---
 changelog.d/15695.bugfix        |  1 +
 synapse/handlers/room.py        | 76 +++++++++++++++++++++++++++++++++--------
 tests/rest/client/test_rooms.py | 37 ++++++++++++++++++++
 3 files changed, 100 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/15695.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15695.bugfix b/changelog.d/15695.bugfix
new file mode 100644
index 0000000000..99bf1fe05e
--- /dev/null
+++ b/changelog.d/15695.bugfix
@@ -0,0 +1 @@
+Check permissions for enabling encryption earlier during room creation to avoid creating broken rooms.
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index cb957f2033..bf907b7881 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -872,6 +872,8 @@ class RoomCreationHandler:
         visibility = config.get("visibility", "private")
         is_public = visibility == "public"
 
+        self._validate_room_config(config, visibility)
+
         room_id = await self._generate_and_create_room_id(
             creator_id=user_id,
             is_public=is_public,
@@ -1111,20 +1113,7 @@ class RoomCreationHandler:
 
             return new_event, new_unpersisted_context
 
-        visibility = room_config.get("visibility", "private")
-        preset_config = room_config.get(
-            "preset",
-            RoomCreationPreset.PRIVATE_CHAT
-            if visibility == "private"
-            else RoomCreationPreset.PUBLIC_CHAT,
-        )
-
-        try:
-            config = self._presets_dict[preset_config]
-        except KeyError:
-            raise SynapseError(
-                400, f"'{preset_config}' is not a valid preset", errcode=Codes.BAD_JSON
-            )
+        preset_config, config = self._room_preset_config(room_config)
 
         # MSC2175 removes the creator field from the create event.
         if not room_version.msc2175_implicit_room_creator:
@@ -1306,6 +1295,65 @@ class RoomCreationHandler:
         assert last_event.internal_metadata.stream_ordering is not None
         return last_event.internal_metadata.stream_ordering, last_event.event_id, depth
 
+    def _validate_room_config(
+        self,
+        config: JsonDict,
+        visibility: str,
+    ) -> None:
+        """Checks configuration parameters for a /createRoom request.
+
+        If validation detects invalid parameters an exception may be raised to
+        cause room creation to be aborted and an error response to be returned
+        to the client.
+
+        Args:
+            config: A dict of configuration options. Originally from the body of
+                the /createRoom request
+            visibility: One of "public" or "private"
+        """
+
+        # Validate the requested preset, raise a 400 error if not valid
+        preset_name, preset_config = self._room_preset_config(config)
+
+        # If the user is trying to create an encrypted room and this is forbidden
+        # by the configured default_power_level_content_override, then reject the
+        # request before the room is created.
+        raw_initial_state = config.get("initial_state", [])
+        room_encryption_event = any(
+            s.get("type", "") == EventTypes.RoomEncryption for s in raw_initial_state
+        )
+
+        if preset_config["encrypted"] or room_encryption_event:
+            if self._default_power_level_content_override:
+                override = self._default_power_level_content_override.get(preset_name)
+                if override is not None:
+                    event_levels = override.get("events", {})
+                    room_admin_level = event_levels.get(EventTypes.PowerLevels, 100)
+                    encryption_level = event_levels.get(EventTypes.RoomEncryption, 100)
+                    if encryption_level > room_admin_level:
+                        raise SynapseError(
+                            403,
+                            f"You cannot create an encrypted room. user_level ({room_admin_level}) < send_level ({encryption_level})",
+                        )
+
+    def _room_preset_config(self, room_config: JsonDict) -> Tuple[str, dict]:
+        # The spec says rooms should default to private visibility if
+        # `visibility` is not specified.
+        visibility = room_config.get("visibility", "private")
+        preset_name = room_config.get(
+            "preset",
+            RoomCreationPreset.PRIVATE_CHAT
+            if visibility == "private"
+            else RoomCreationPreset.PUBLIC_CHAT,
+        )
+        try:
+            preset_config = self._presets_dict[preset_name]
+        except KeyError:
+            raise SynapseError(
+                400, f"'{preset_name}' is not a valid preset", errcode=Codes.BAD_JSON
+            )
+        return preset_name, preset_config
+
     def _generate_room_id(self) -> str:
         """Generates a random room ID.
 
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 4d39c89f6f..f1b4e1ad2f 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -1941,6 +1941,43 @@ class RoomPowerLevelOverridesInPracticeTestCase(RoomBase):
             channel.json_body["error"],
         )
 
+    @unittest.override_config(
+        {
+            "default_power_level_content_override": {
+                "private_chat": {
+                    "events": {
+                        "m.room.avatar": 50,
+                        "m.room.canonical_alias": 50,
+                        "m.room.encryption": 999,
+                        "m.room.history_visibility": 100,
+                        "m.room.name": 50,
+                        "m.room.power_levels": 100,
+                        "m.room.server_acl": 100,
+                        "m.room.tombstone": 100,
+                    },
+                    "events_default": 0,
+                },
+            }
+        },
+    )
+    def test_config_override_blocks_encrypted_room(self) -> None:
+        # Given the server has config for private_chats,
+
+        # When I attempt to create an encrypted private_chat room
+        channel = self.make_request(
+            "POST",
+            "/createRoom",
+            '{"creation_content": {"m.federate": false},"name": "Secret Private Room","preset": "private_chat","initial_state": [{"type": "m.room.encryption","state_key": "","content": {"algorithm": "m.megolm.v1.aes-sha2"}}]}',
+        )
+
+        # Then I am not allowed because the required power level is unattainable
+        self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.result["body"])
+        self.assertEqual(
+            "You cannot create an encrypted room. "
+            + "user_level (100) < send_level (999)",
+            channel.json_body["error"],
+        )
+
 
 class RoomInitialSyncTestCase(RoomBase):
     """Tests /rooms/$room_id/initialSync."""
-- 
cgit 1.5.1


From e536f02f68135a8494f80ded75d1a53b98cbcb8d Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 7 Jun 2023 11:47:01 -0500
Subject: Remove superfluous `room_memberships` join from background update
 (#15733)

Spawning from https://github.com/matrix-org/synapse/pull/15731
---
 changelog.d/15733.misc                       | 1 +
 synapse/storage/databases/main/roommember.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 changelog.d/15733.misc

(limited to 'synapse')

diff --git a/changelog.d/15733.misc b/changelog.d/15733.misc
new file mode 100644
index 0000000000..3ae7be3c27
--- /dev/null
+++ b/changelog.d/15733.misc
@@ -0,0 +1 @@
+Remove superfluous `room_memberships` join from background update.
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 1b8ec67f54..582875c91a 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -1460,7 +1460,6 @@ class RoomMemberBackgroundUpdateStore(SQLBaseStore):
                 SELECT stream_ordering, event_id, events.room_id, event_json.json
                 FROM events
                 INNER JOIN event_json USING (event_id)
-                INNER JOIN room_memberships USING (event_id)
                 WHERE ? <= stream_ordering AND stream_ordering < ?
                 AND type = 'm.room.member'
                 ORDER BY stream_ordering DESC
-- 
cgit 1.5.1


From d162aecaac52fb467822e319e4c3c5b216c33ca9 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 7 Jun 2023 18:12:23 +0100
Subject: Quick & dirty metric for background update status (#15740)

* Quick & dirty metric for background update status

* Changelog

* Remove debug

Co-authored-by: Mathieu Velten <mathieuv@matrix.org>

* Actually write to _aborted

---------

Co-authored-by: Mathieu Velten <mathieuv@matrix.org>
---
 changelog.d/15740.feature             |  1 +
 synapse/metrics/__init__.py           |  2 ++
 synapse/storage/background_updates.py | 30 ++++++++++++++++++++++++++++++
 synapse/storage/database.py           |  8 +++++++-
 4 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15740.feature

(limited to 'synapse')

diff --git a/changelog.d/15740.feature b/changelog.d/15740.feature
new file mode 100644
index 0000000000..fed342ea55
--- /dev/null
+++ b/changelog.d/15740.feature
@@ -0,0 +1 @@
+Expose a metric reporting the database background update status.
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 8ce5887229..39fc629937 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -77,6 +77,8 @@ RegistryProxy = cast(CollectorRegistry, _RegistryProxy)
 
 @attr.s(slots=True, hash=True, auto_attribs=True)
 class LaterGauge(Collector):
+    """A Gauge which periodically calls a user-provided callback to produce metrics."""
+
     name: str
     desc: str
     labels: Optional[Sequence[str]] = attr.ib(hash=False)
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index ca085ef800..edc97a9d61 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from enum import IntEnum
 from types import TracebackType
 from typing import (
     TYPE_CHECKING,
@@ -136,6 +137,15 @@ class BackgroundUpdatePerformance:
             return float(self.total_item_count) / float(self.total_duration_ms)
 
 
+class UpdaterStatus(IntEnum):
+    # Use negative values for error conditions.
+    ABORTED = -1
+    DISABLED = 0
+    NOT_STARTED = 1
+    RUNNING_UPDATE = 2
+    COMPLETE = 3
+
+
 class BackgroundUpdater:
     """Background updates are updates to the database that run in the
     background. Each update processes a batch of data at once. We attempt to
@@ -158,11 +168,16 @@ class BackgroundUpdater:
 
         self._background_update_performance: Dict[str, BackgroundUpdatePerformance] = {}
         self._background_update_handlers: Dict[str, _BackgroundUpdateHandler] = {}
+        # TODO: all these bool flags make me feel icky---can we combine into a status
+        # enum?
         self._all_done = False
 
         # Whether we're currently running updates
         self._running = False
 
+        # Marker to be set if we abort and halt all background updates.
+        self._aborted = False
+
         # Whether background updates are enabled. This allows us to
         # enable/disable background updates via the admin API.
         self.enabled = True
@@ -175,6 +190,20 @@ class BackgroundUpdater:
         self.sleep_duration_ms = hs.config.background_updates.sleep_duration_ms
         self.sleep_enabled = hs.config.background_updates.sleep_enabled
 
+    def get_status(self) -> UpdaterStatus:
+        """An integer summarising the updater status. Used as a metric."""
+        if self._aborted:
+            return UpdaterStatus.ABORTED
+        # TODO: a status for "have seen at least one failure, but haven't aborted yet".
+        if not self.enabled:
+            return UpdaterStatus.DISABLED
+
+        if self._all_done:
+            return UpdaterStatus.COMPLETE
+        if self._running:
+            return UpdaterStatus.RUNNING_UPDATE
+        return UpdaterStatus.NOT_STARTED
+
     def register_update_controller_callbacks(
         self,
         on_update: ON_UPDATE_CALLBACK,
@@ -296,6 +325,7 @@ class BackgroundUpdater:
                 except Exception:
                     back_to_back_failures += 1
                     if back_to_back_failures >= 5:
+                        self._aborted = True
                         raise RuntimeError(
                             "5 back-to-back background update failures; aborting."
                         )
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index bdaa508dbe..10fa6c4802 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -54,7 +54,7 @@ from synapse.logging.context import (
     current_context,
     make_deferred_yieldable,
 )
-from synapse.metrics import register_threadpool
+from synapse.metrics import LaterGauge, register_threadpool
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.background_updates import BackgroundUpdater
 from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine
@@ -547,6 +547,12 @@ class DatabasePool:
         self._db_pool = make_pool(hs.get_reactor(), database_config, engine)
 
         self.updates = BackgroundUpdater(hs, self)
+        LaterGauge(
+            "synapse_background_update_status",
+            "Background update status",
+            [],
+            self.updates.get_status,
+        )
 
         self._previous_txn_total_time = 0.0
         self._current_txn_total_time = 0.0
-- 
cgit 1.5.1


From 733342ad3ef271a2c5bd4ba442a15fa3be3dab30 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 8 Jun 2023 13:03:48 +0100
Subject: Fix using TLS for replication (#15746)

Fixes #15744.
---
 changelog.d/15746.bugfix         | 1 +
 synapse/http/replicationagent.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15746.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15746.bugfix b/changelog.d/15746.bugfix
new file mode 100644
index 0000000000..8d3e22f2e5
--- /dev/null
+++ b/changelog.d/15746.bugfix
@@ -0,0 +1 @@
+Fix regression where using TLS for replication did not work. Introduced in v1.85.0.
diff --git a/synapse/http/replicationagent.py b/synapse/http/replicationagent.py
index 800f21873d..d6ba6f0e57 100644
--- a/synapse/http/replicationagent.py
+++ b/synapse/http/replicationagent.py
@@ -76,7 +76,7 @@ class ReplicationEndpointFactory:
                 endpoint = wrapClientTLS(
                     # The 'port' argument below isn't actually used by the function
                     self.context_factory.creatorForNetloc(
-                        self.instance_map[worker_name].host,
+                        self.instance_map[worker_name].host.encode("utf-8"),
                         self.instance_map[worker_name].port,
                     ),
                     endpoint,
-- 
cgit 1.5.1


From c485ed1c5a4c62ae555531cfd001a5e5f8bc2e44 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 8 Jun 2023 13:14:40 +0100
Subject: Clear event caches when we purge history (#15609)

This should help a little with #13476

---------

Co-authored-by: Patrick Cloke <patrickc@matrix.org>
---
 changelog.d/15609.bugfix                           |   1 +
 synapse/storage/_base.py                           |  31 +++++
 synapse/storage/databases/main/cache.py            | 134 +++++++++++++++++++++
 synapse/storage/databases/main/events_worker.py    |   9 ++
 synapse/storage/databases/main/purge_events.py     |   8 +-
 synapse/util/caches/lrucache.py                    |   2 +-
 tests/handlers/test_sync.py                        |   2 +-
 tests/rest/client/test_read_marker.py              |   3 -
 tests/storage/databases/main/test_events_worker.py |   8 +-
 9 files changed, 184 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/15609.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15609.bugfix b/changelog.d/15609.bugfix
new file mode 100644
index 0000000000..b5a990cfec
--- /dev/null
+++ b/changelog.d/15609.bugfix
@@ -0,0 +1 @@
+Correctly clear caches when we delete a room.
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 481fec72fe..fe4a763411 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -86,9 +86,14 @@ class SQLBaseStore(metaclass=ABCMeta):
             room_id: Room where state changed
             members_changed: The user_ids of members that have changed
         """
+
+        # XXX: If you add something to this function make sure you add it to
+        # `_invalidate_state_caches_all` as well.
+
         # If there were any membership changes, purge the appropriate caches.
         for host in {get_domain_from_id(u) for u in members_changed}:
             self._attempt_to_invalidate_cache("is_host_joined", (room_id, host))
+            self._attempt_to_invalidate_cache("is_host_invited", (room_id, host))
         if members_changed:
             self._attempt_to_invalidate_cache("get_users_in_room", (room_id,))
             self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,))
@@ -117,6 +122,32 @@ class SQLBaseStore(metaclass=ABCMeta):
         self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
         self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,))
 
+    def _invalidate_state_caches_all(self, room_id: str) -> None:
+        """Invalidates caches that are based on the current state, but does
+        not stream invalidations down replication.
+
+        Same as `_invalidate_state_caches`, except that works when we don't know
+        which memberships have changed.
+
+        Args:
+            room_id: Room where state changed
+        """
+        self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,))
+        self._attempt_to_invalidate_cache("get_users_in_room", (room_id,))
+        self._attempt_to_invalidate_cache("is_host_invited", None)
+        self._attempt_to_invalidate_cache("is_host_joined", None)
+        self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,))
+        self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,))
+        self._attempt_to_invalidate_cache("get_number_joined_users_in_room", (room_id,))
+        self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
+        self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
+        self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None)
+        self._attempt_to_invalidate_cache(
+            "get_rooms_for_user_with_stream_ordering", None
+        )
+        self._attempt_to_invalidate_cache("get_rooms_for_user", None)
+        self._attempt_to_invalidate_cache("get_room_summary", (room_id,))
+
     def _attempt_to_invalidate_cache(
         self, cache_name: str, key: Optional[Collection[Any]]
     ) -> bool:
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 46fa0a73f9..6e1c7d681f 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -46,6 +46,12 @@ logger = logging.getLogger(__name__)
 # based on the current state when notifying workers over replication.
 CURRENT_STATE_CACHE_NAME = "cs_cache_fake"
 
+# As above, but for invalidating event caches on history deletion
+PURGE_HISTORY_CACHE_NAME = "ph_cache_fake"
+
+# As above, but for invalidating room caches on room deletion
+DELETE_ROOM_CACHE_NAME = "dr_cache_fake"
+
 
 class CacheInvalidationWorkerStore(SQLBaseStore):
     def __init__(
@@ -175,6 +181,23 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
                     room_id = row.keys[0]
                     members_changed = set(row.keys[1:])
                     self._invalidate_state_caches(room_id, members_changed)
+                elif row.cache_func == PURGE_HISTORY_CACHE_NAME:
+                    if row.keys is None:
+                        raise Exception(
+                            "Can't send an 'invalidate all' for 'purge history' cache"
+                        )
+
+                    room_id = row.keys[0]
+                    self._invalidate_caches_for_room_events(room_id)
+                elif row.cache_func == DELETE_ROOM_CACHE_NAME:
+                    if row.keys is None:
+                        raise Exception(
+                            "Can't send an 'invalidate all' for 'delete room' cache"
+                        )
+
+                    room_id = row.keys[0]
+                    self._invalidate_caches_for_room_events(room_id)
+                    self._invalidate_caches_for_room(room_id)
                 else:
                     self._attempt_to_invalidate_cache(row.cache_func, row.keys)
 
@@ -226,6 +249,9 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         relates_to: Optional[str],
         backfilled: bool,
     ) -> None:
+        # XXX: If you add something to this function make sure you add it to
+        # `_invalidate_caches_for_room_events` as well.
+
         # This invalidates any local in-memory cached event objects, the original
         # process triggering the invalidation is responsible for clearing any external
         # cached objects.
@@ -271,6 +297,106 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             self._attempt_to_invalidate_cache("get_thread_participated", (relates_to,))
             self._attempt_to_invalidate_cache("get_threads", (room_id,))
 
+    def _invalidate_caches_for_room_events_and_stream(
+        self, txn: LoggingTransaction, room_id: str
+    ) -> None:
+        """Invalidate caches associated with events in a room, and stream to
+        replication.
+
+        Used when we delete events a room, but don't know which events we've
+        deleted.
+        """
+
+        self._send_invalidation_to_replication(txn, PURGE_HISTORY_CACHE_NAME, [room_id])
+        txn.call_after(self._invalidate_caches_for_room_events, room_id)
+
+    def _invalidate_caches_for_room_events(self, room_id: str) -> None:
+        """Invalidate caches associated with events in a room, and stream to
+        replication.
+
+        Used when we delete events in a room, but don't know which events we've
+        deleted.
+        """
+
+        self._invalidate_local_get_event_cache_all()  # type: ignore[attr-defined]
+
+        self._attempt_to_invalidate_cache("have_seen_event", (room_id,))
+        self._attempt_to_invalidate_cache("get_latest_event_ids_in_room", (room_id,))
+        self._attempt_to_invalidate_cache(
+            "get_unread_event_push_actions_by_room_for_user", (room_id,)
+        )
+
+        self._attempt_to_invalidate_cache("_get_membership_from_event_id", None)
+        self._attempt_to_invalidate_cache("get_relations_for_event", None)
+        self._attempt_to_invalidate_cache("get_applicable_edit", None)
+        self._attempt_to_invalidate_cache("get_thread_id", None)
+        self._attempt_to_invalidate_cache("get_thread_id_for_receipts", None)
+        self._attempt_to_invalidate_cache("get_invited_rooms_for_local_user", None)
+        self._attempt_to_invalidate_cache(
+            "get_rooms_for_user_with_stream_ordering", None
+        )
+        self._attempt_to_invalidate_cache("get_rooms_for_user", None)
+        self._attempt_to_invalidate_cache("get_references_for_event", None)
+        self._attempt_to_invalidate_cache("get_thread_summary", None)
+        self._attempt_to_invalidate_cache("get_thread_participated", None)
+        self._attempt_to_invalidate_cache("get_threads", (room_id,))
+
+        self._attempt_to_invalidate_cache("_get_state_group_for_event", None)
+
+        self._attempt_to_invalidate_cache("get_event_ordering", None)
+        self._attempt_to_invalidate_cache("is_partial_state_event", None)
+        self._attempt_to_invalidate_cache("_get_joined_profile_from_event_id", None)
+
+    def _invalidate_caches_for_room_and_stream(
+        self, txn: LoggingTransaction, room_id: str
+    ) -> None:
+        """Invalidate caches associated with rooms, and stream to replication.
+
+        Used when we delete rooms.
+        """
+
+        self._send_invalidation_to_replication(txn, DELETE_ROOM_CACHE_NAME, [room_id])
+        txn.call_after(self._invalidate_caches_for_room, room_id)
+
+    def _invalidate_caches_for_room(self, room_id: str) -> None:
+        """Invalidate caches associated with rooms.
+
+        Used when we delete rooms.
+        """
+
+        # If we've deleted the room then we also need to purge all event caches.
+        self._invalidate_caches_for_room_events(room_id)
+
+        self._attempt_to_invalidate_cache("get_account_data_for_room", None)
+        self._attempt_to_invalidate_cache("get_account_data_for_room_and_type", None)
+        self._attempt_to_invalidate_cache("get_aliases_for_room", (room_id,))
+        self._attempt_to_invalidate_cache("get_latest_event_ids_in_room", (room_id,))
+        self._attempt_to_invalidate_cache("_get_forward_extremeties_for_room", None)
+        self._attempt_to_invalidate_cache(
+            "get_unread_event_push_actions_by_room_for_user", (room_id,)
+        )
+        self._attempt_to_invalidate_cache(
+            "_get_linearized_receipts_for_room", (room_id,)
+        )
+        self._attempt_to_invalidate_cache("is_room_blocked", (room_id,))
+        self._attempt_to_invalidate_cache("get_retention_policy_for_room", (room_id,))
+        self._attempt_to_invalidate_cache(
+            "_get_partial_state_servers_at_join", (room_id,)
+        )
+        self._attempt_to_invalidate_cache("is_partial_state_room", (room_id,))
+        self._attempt_to_invalidate_cache("get_invited_rooms_for_local_user", None)
+        self._attempt_to_invalidate_cache(
+            "get_current_hosts_in_room_ordered", (room_id,)
+        )
+        self._attempt_to_invalidate_cache("did_forget", None)
+        self._attempt_to_invalidate_cache("get_forgotten_rooms_for_user", None)
+        self._attempt_to_invalidate_cache("_get_membership_from_event_id", None)
+        self._attempt_to_invalidate_cache("get_room_version_id", (room_id,))
+
+        # And delete state caches.
+
+        self._invalidate_state_caches_all(room_id)
+
     async def invalidate_cache_and_stream(
         self, cache_name: str, keys: Tuple[Any, ...]
     ) -> None:
@@ -377,6 +503,14 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
                 "Can't stream invalidate all with magic current state cache"
             )
 
+        if cache_name == PURGE_HISTORY_CACHE_NAME and keys is None:
+            raise Exception(
+                "Can't stream invalidate all with magic purge history cache"
+            )
+
+        if cache_name == DELETE_ROOM_CACHE_NAME and keys is None:
+            raise Exception("Can't stream invalidate all with magic delete room cache")
+
         if isinstance(self.database_engine, PostgresEngine):
             assert self._cache_id_gen is not None
 
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index a39bc90974..d93ffc4efa 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -903,6 +903,15 @@ class EventsWorkerStore(SQLBaseStore):
         self._event_ref.pop(event_id, None)
         self._current_event_fetches.pop(event_id, None)
 
+    def _invalidate_local_get_event_cache_all(self) -> None:
+        """Clears the in-memory get event caches.
+
+        Used when we purge room history.
+        """
+        self._get_event_cache.clear()
+        self._event_ref.clear()
+        self._current_event_fetches.clear()
+
     async def _get_events_from_cache(
         self, events: Iterable[str], update_metrics: bool = True
     ) -> Dict[str, EventCacheEntry]:
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index efbd3e75d9..9773c1fcd2 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -308,6 +308,8 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
 
         logger.info("[purge] done")
 
+        self._invalidate_caches_for_room_events_and_stream(txn, room_id)
+
         return referenced_state_groups
 
     async def purge_room(self, room_id: str) -> List[int]:
@@ -485,10 +487,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
         #       index on them. In any case we should be clearing out 'stream' tables
         #       periodically anyway (#5888)
 
-        # TODO: we could probably usefully do a bunch more cache invalidation here
-
-        # XXX: as with purge_history, this is racy, but no worse than other races
-        #   that already exist.
-        self._invalidate_cache_and_stream(txn, self.have_seen_event, (room_id,))
+        self._invalidate_caches_for_room_and_stream(txn, room_id)
 
         return state_groups
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index ed0da17227..6137c85e10 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -862,5 +862,5 @@ class AsyncLruCache(Generic[KT, VT]):
     async def contains(self, key: KT) -> bool:
         return self._lru_cache.contains(key)
 
-    async def clear(self) -> None:
+    def clear(self) -> None:
         self._lru_cache.clear()
diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py
index 0d9a3de92a..9f035a02dc 100644
--- a/tests/handlers/test_sync.py
+++ b/tests/handlers/test_sync.py
@@ -163,7 +163,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase):
         # Blow away caches (supported room versions can only change due to a restart).
         self.store.get_rooms_for_user_with_stream_ordering.invalidate_all()
         self.store.get_rooms_for_user.invalidate_all()
-        self.get_success(self.store._get_event_cache.clear())
+        self.store._get_event_cache.clear()
         self.store._event_ref.clear()
 
         # The rooms should be excluded from the sync response.
diff --git a/tests/rest/client/test_read_marker.py b/tests/rest/client/test_read_marker.py
index 0eedcdb476..5cdd5694a0 100644
--- a/tests/rest/client/test_read_marker.py
+++ b/tests/rest/client/test_read_marker.py
@@ -131,9 +131,6 @@ class ReadMarkerTestCase(unittest.HomeserverTestCase):
         event = self.get_success(self.store.get_event(event_id_1, allow_none=True))
         assert event is None
 
-        # TODO See https://github.com/matrix-org/synapse/issues/13476
-        self.store.get_event_ordering.invalidate_all()
-
         # Test moving the read marker to a newer event
         event_id_2 = send_message()
         channel = self.make_request(
diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py
index 9606ecc43b..788500e38f 100644
--- a/tests/storage/databases/main/test_events_worker.py
+++ b/tests/storage/databases/main/test_events_worker.py
@@ -188,7 +188,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
         self.event_id = res["event_id"]
 
         # Reset the event cache so the tests start with it empty
-        self.get_success(self.store._get_event_cache.clear())
+        self.store._get_event_cache.clear()
 
     def test_simple(self) -> None:
         """Test that we cache events that we pull from the DB."""
@@ -205,7 +205,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
         """
 
         # Reset the event cache
-        self.get_success(self.store._get_event_cache.clear())
+        self.store._get_event_cache.clear()
 
         with LoggingContext("test") as ctx:
             # We keep hold of the event event though we never use it.
@@ -215,7 +215,7 @@ class EventCacheTestCase(unittest.HomeserverTestCase):
             self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1)
 
         # Reset the event cache
-        self.get_success(self.store._get_event_cache.clear())
+        self.store._get_event_cache.clear()
 
         with LoggingContext("test") as ctx:
             self.get_success(self.store.get_event(self.event_id))
@@ -390,7 +390,7 @@ class GetEventCancellationTestCase(unittest.HomeserverTestCase):
         self.event_id = res["event_id"]
 
         # Reset the event cache so the tests start with it empty
-        self.get_success(self.store._get_event_cache.clear())
+        self.store._get_event_cache.clear()
 
     @contextmanager
     def blocking_get_event_calls(
-- 
cgit 1.5.1


From d84e66144dc12dacf71c987a2ba802dd59c0b68e Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Fri, 9 Jun 2023 00:00:46 -0700
Subject: Allow for the configuration of max request retries and min/max retry
 delays in the matrix federation client (#12504)

Co-authored-by: Mathieu Velten <mathieuv@matrix.org>
Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/12504.misc                           |  1 +
 docs/usage/configuration/config_documentation.md | 26 ++++++++++++++++++++++++
 synapse/config/federation.py                     | 10 +++++++++
 synapse/http/matrixfederationclient.py           | 21 +++++++++++--------
 tests/http/test_matrixfederationclient.py        | 20 +++++++++++++++++-
 5 files changed, 68 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/12504.misc

(limited to 'synapse')

diff --git a/changelog.d/12504.misc b/changelog.d/12504.misc
new file mode 100644
index 0000000000..0bebaa213d
--- /dev/null
+++ b/changelog.d/12504.misc
@@ -0,0 +1 @@
+Allow for the configuration of max request retries and min/max retry delays in the matrix federation client.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 0cf6e075ff..8426de0417 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1196,6 +1196,32 @@ Example configuration:
 allow_device_name_lookup_over_federation: true
 ```
 ---
+### `federation`
+
+The federation section defines some sub-options related to federation.
+
+The following options are related to configuring timeout and retry logic for one request,
+independently of the others.
+Short retry algorithm is used when something or someone will wait for the request to have an
+answer, while long retry is used for requests that happen in the background,
+like sending a federation transaction.
+
+* `client_timeout`: timeout for the federation requests in seconds. Default to 60s.
+* `max_short_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 2s.
+* `max_long_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 60s.
+* `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
+* `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
+
+Example configuration:
+```yaml
+federation:
+  client_timeout: 180
+  max_short_retry_delay: 7
+  max_long_retry_delay: 100
+  max_short_retries: 5
+  max_long_retries: 20
+```
+---
 ## Caching
 
 Options related to caching.
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 336fca578a..d21f7fd02a 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -22,6 +22,8 @@ class FederationConfig(Config):
     section = "federation"
 
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
+        federation_config = config.setdefault("federation", {})
+
         # FIXME: federation_domain_whitelist needs sytests
         self.federation_domain_whitelist: Optional[dict] = None
         federation_domain_whitelist = config.get("federation_domain_whitelist", None)
@@ -49,5 +51,13 @@ class FederationConfig(Config):
             "allow_device_name_lookup_over_federation", False
         )
 
+        # Allow for the configuration of timeout, max request retries
+        # and min/max retry delays in the matrix federation client.
+        self.client_timeout = federation_config.get("client_timeout", 60)
+        self.max_long_retry_delay = federation_config.get("max_long_retry_delay", 60)
+        self.max_short_retry_delay = federation_config.get("max_short_retry_delay", 2)
+        self.max_long_retries = federation_config.get("max_long_retries", 10)
+        self.max_short_retries = federation_config.get("max_short_retries", 3)
+
 
 _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}}
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index abb5ae5815..ed36825b67 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -95,8 +95,6 @@ incoming_responses_counter = Counter(
 )
 
 
-MAX_LONG_RETRIES = 10
-MAX_SHORT_RETRIES = 3
 MAXINT = sys.maxsize
 
 
@@ -406,7 +404,12 @@ class MatrixFederationHttpClient:
         self.clock = hs.get_clock()
         self._store = hs.get_datastores().main
         self.version_string_bytes = hs.version_string.encode("ascii")
-        self.default_timeout = 60
+        self.default_timeout = hs.config.federation.client_timeout
+
+        self.max_long_retry_delay = hs.config.federation.max_long_retry_delay
+        self.max_short_retry_delay = hs.config.federation.max_short_retry_delay
+        self.max_long_retries = hs.config.federation.max_long_retries
+        self.max_short_retries = hs.config.federation.max_short_retries
 
         self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor))
 
@@ -583,9 +586,9 @@ class MatrixFederationHttpClient:
             # XXX: Would be much nicer to retry only at the transaction-layer
             # (once we have reliable transactions in place)
             if long_retries:
-                retries_left = MAX_LONG_RETRIES
+                retries_left = self.max_long_retries
             else:
-                retries_left = MAX_SHORT_RETRIES
+                retries_left = self.max_short_retries
 
             url_bytes = request.uri
             url_str = url_bytes.decode("ascii")
@@ -730,12 +733,12 @@ class MatrixFederationHttpClient:
 
                     if retries_left and not timeout:
                         if long_retries:
-                            delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
-                            delay = min(delay, 60)
+                            delay = 4 ** (self.max_long_retries + 1 - retries_left)
+                            delay = min(delay, self.max_long_retry_delay)
                             delay *= random.uniform(0.8, 1.4)
                         else:
-                            delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
-                            delay = min(delay, 2)
+                            delay = 0.5 * 2 ** (self.max_short_retries - retries_left)
+                            delay = min(delay, self.max_short_retry_delay)
                             delay *= random.uniform(0.8, 1.4)
 
                         logger.debug(
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index 0dfc03ce50..8565f8ac64 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -40,7 +40,7 @@ from synapse.server import HomeServer
 from synapse.util import Clock
 
 from tests.server import FakeTransport
-from tests.unittest import HomeserverTestCase
+from tests.unittest import HomeserverTestCase, override_config
 
 
 def check_logcontext(context: LoggingContextOrSentinel) -> None:
@@ -640,3 +640,21 @@ class FederationClientTests(HomeserverTestCase):
             self.cl.build_auth_headers(
                 b"", b"GET", b"https://example.com", destination_is=b""
             )
+
+    @override_config(
+        {
+            "federation": {
+                "client_timeout": 180,
+                "max_long_retry_delay": 100,
+                "max_short_retry_delay": 7,
+                "max_long_retries": 20,
+                "max_short_retries": 5,
+            }
+        }
+    )
+    def test_configurable_retry_and_delay_values(self) -> None:
+        self.assertEqual(self.cl.default_timeout, 180)
+        self.assertEqual(self.cl.max_long_retry_delay, 100)
+        self.assertEqual(self.cl.max_short_retry_delay, 7)
+        self.assertEqual(self.cl.max_long_retries, 20)
+        self.assertEqual(self.cl.max_short_retries, 5)
-- 
cgit 1.5.1


From fcc3ca37e1b404981d9a0d6f2708e14407775b97 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 9 Jun 2023 15:39:49 -0500
Subject: Backfill in the background if we're doing it "just because" (#15710)

Fix https://github.com/matrix-org/synapse/issues/15702
---
 changelog.d/15710.feature      |  1 +
 synapse/handlers/federation.py | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/15710.feature

(limited to 'synapse')

diff --git a/changelog.d/15710.feature b/changelog.d/15710.feature
new file mode 100644
index 0000000000..fe77a2fef6
--- /dev/null
+++ b/changelog.d/15710.feature
@@ -0,0 +1 @@
+Speed up `/messages` by backfilling in the background when there are no backward extremities where we are directly paginating.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 57d6b70cff..b7b5e21020 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -320,14 +320,21 @@ class FederationHandler:
             str(len(sorted_backfill_points)),
         )
 
-        # If we have no backfill points lower than the `current_depth` then
-        # either we can a) bail or b) still attempt to backfill. We opt to try
-        # backfilling anyway just in case we do get relevant events.
+        # If we have no backfill points lower than the `current_depth` then either we
+        # can a) bail or b) still attempt to backfill. We opt to try backfilling anyway
+        # just in case we do get relevant events. This is good for eventual consistency
+        # sake but we don't need to block the client for something that is just as
+        # likely not to return anything relevant so we backfill in the background. The
+        # only way, this could return something relevant is if we discover a new branch
+        # of history that extends all the way back to where we are currently paginating
+        # and it's within the 100 events that are returned from `/backfill`.
         if not sorted_backfill_points and current_depth != MAX_DEPTH:
             logger.debug(
                 "_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points."
             )
-            return await self._maybe_backfill_inner(
+            run_as_background_process(
+                "_maybe_backfill_inner_anyway_with_max_depth",
+                self._maybe_backfill_inner,
                 room_id=room_id,
                 # We use `MAX_DEPTH` so that we find all backfill points next
                 # time (all events are below the `MAX_DEPTH`)
@@ -338,6 +345,9 @@ class FederationHandler:
                 # overall otherwise the smaller one will throw off the results.
                 processing_start_time=None,
             )
+            # We return `False` because we're backfilling in the background and there is
+            # no new events immediately for the caller to know about yet.
+            return False
 
         # Even after recursing with `MAX_DEPTH`, we didn't find any
         # backward extremities to backfill from.
-- 
cgit 1.5.1


From df945e0d7cd3cc78e54002115ae5e5793ed0a116 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 13 Jun 2023 12:07:55 -0400
Subject: Fix MSC3983 support: Use the unstable /keys/claim federation endpoint
 if multiple keys are requested (#15755)

---
 changelog.d/15755.misc                  | 1 +
 synapse/federation/federation_client.py | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15755.misc

(limited to 'synapse')

diff --git a/changelog.d/15755.misc b/changelog.d/15755.misc
new file mode 100644
index 0000000000..a65340d380
--- /dev/null
+++ b/changelog.d/15755.misc
@@ -0,0 +1 @@
+Fix requesting multiple keys at once over federation, related to [MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983).
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index a2cf3a96c6..e5359ca558 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -260,7 +260,9 @@ class FederationClient(FederationBase):
         use_unstable = False
         for user_id, one_time_keys in query.items():
             for device_id, algorithms in one_time_keys.items():
-                if any(count > 1 for count in algorithms.values()):
+                # If more than one algorithm is requested, attempt to use the unstable
+                # endpoint.
+                if sum(algorithms.values()) > 1:
                     use_unstable = True
                 if algorithms:
                     # For the stable query, choose only the first algorithm.
@@ -296,6 +298,7 @@ class FederationClient(FederationBase):
         else:
             logger.debug("Skipping unstable claim client keys API")
 
+        # TODO Potentially attempt multiple queries and combine the results?
         return await self.transport_layer.claim_client_keys(
             user, destination, content, timeout
         )
-- 
cgit 1.5.1


From 0757d59ec4f3275e30907825b4dfb0fdbdce9006 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 13 Jun 2023 12:31:08 -0500
Subject: Avoid backfill when we already have messages to return (#15737)

We now only block the client to backfill when we see a large gap in the events (more than 2 events missing in a row according to `depth`), more than 3 single-event holes, or not enough messages to fill the response. Otherwise, we return the messages directly to the client and backfill in the background for eventual consistency sake.

Fix https://github.com/matrix-org/synapse/issues/15696
---
 changelog.d/15737.feature      |   1 +
 synapse/handlers/pagination.py | 137 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 116 insertions(+), 22 deletions(-)
 create mode 100644 changelog.d/15737.feature

(limited to 'synapse')

diff --git a/changelog.d/15737.feature b/changelog.d/15737.feature
new file mode 100644
index 0000000000..9a547b5ebd
--- /dev/null
+++ b/changelog.d/15737.feature
@@ -0,0 +1 @@
+Improve `/messages` response time by avoiding backfill when we already have messages to return.
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index d5257acb7d..19b8728db9 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -40,6 +40,11 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
+# How many single event gaps we tolerate returning in a `/messages` response before we
+# backfill and try to fill in the history. This is an arbitrarily picked number so feel
+# free to tune it in the future.
+BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3
+
 
 @attr.s(slots=True, auto_attribs=True)
 class PurgeStatus:
@@ -486,35 +491,35 @@ class PaginationHandler:
                         room_id, room_token.stream
                     )
 
-                if not use_admin_priviledge and membership == Membership.LEAVE:
-                    # If they have left the room then clamp the token to be before
-                    # they left the room, to save the effort of loading from the
-                    # database.
-
-                    # This is only None if the room is world_readable, in which
-                    # case "JOIN" would have been returned.
-                    assert member_event_id
+            # If they have left the room then clamp the token to be before
+            # they left the room, to save the effort of loading from the
+            # database.
+            if (
+                pagin_config.direction == Direction.BACKWARDS
+                and not use_admin_priviledge
+                and membership == Membership.LEAVE
+            ):
+                # This is only None if the room is world_readable, in which case
+                # "Membership.JOIN" would have been returned and we should never hit
+                # this branch.
+                assert member_event_id
+
+                leave_token = await self.store.get_topological_token_for_event(
+                    member_event_id
+                )
+                assert leave_token.topological is not None
 
-                    leave_token = await self.store.get_topological_token_for_event(
-                        member_event_id
+                if leave_token.topological < curr_topo:
+                    from_token = from_token.copy_and_replace(
+                        StreamKeyType.ROOM, leave_token
                     )
-                    assert leave_token.topological is not None
-
-                    if leave_token.topological < curr_topo:
-                        from_token = from_token.copy_and_replace(
-                            StreamKeyType.ROOM, leave_token
-                        )
-
-                await self.hs.get_federation_handler().maybe_backfill(
-                    room_id,
-                    curr_topo,
-                    limit=pagin_config.limit,
-                )
 
             to_room_key = None
             if pagin_config.to_token:
                 to_room_key = pagin_config.to_token.room_key
 
+            # Initially fetch the events from the database. With any luck, we can return
+            # these without blocking on backfill (handled below).
             events, next_key = await self.store.paginate_room_events(
                 room_id=room_id,
                 from_key=from_token.room_key,
@@ -524,6 +529,94 @@ class PaginationHandler:
                 event_filter=event_filter,
             )
 
+            if pagin_config.direction == Direction.BACKWARDS:
+                # We use a `Set` because there can be multiple events at a given depth
+                # and we only care about looking at the unique continum of depths to
+                # find gaps.
+                event_depths: Set[int] = {event.depth for event in events}
+                sorted_event_depths = sorted(event_depths)
+
+                # Inspect the depths of the returned events to see if there are any gaps
+                found_big_gap = False
+                number_of_gaps = 0
+                previous_event_depth = (
+                    sorted_event_depths[0] if len(sorted_event_depths) > 0 else 0
+                )
+                for event_depth in sorted_event_depths:
+                    # We don't expect a negative depth but we'll just deal with it in
+                    # any case by taking the absolute value to get the true gap between
+                    # any two integers.
+                    depth_gap = abs(event_depth - previous_event_depth)
+                    # A `depth_gap` of 1 is a normal continuous chain to the next event
+                    # (1 <-- 2 <-- 3) so anything larger indicates a missing event (it's
+                    # also possible there is no event at a given depth but we can't ever
+                    # know that for sure)
+                    if depth_gap > 1:
+                        number_of_gaps += 1
+
+                    # We only tolerate a small number single-event long gaps in the
+                    # returned events because those are most likely just events we've
+                    # failed to pull in the past. Anything longer than that is probably
+                    # a sign that we're missing a decent chunk of history and we should
+                    # try to backfill it.
+                    #
+                    # XXX: It's possible we could tolerate longer gaps if we checked
+                    # that a given events `prev_events` is one that has failed pull
+                    # attempts and we could just treat it like a dead branch of history
+                    # for now or at least something that we don't need the block the
+                    # client on to try pulling.
+                    #
+                    # XXX: If we had something like MSC3871 to indicate gaps in the
+                    # timeline to the client, we could also get away with any sized gap
+                    # and just have the client refetch the holes as they see fit.
+                    if depth_gap > 2:
+                        found_big_gap = True
+                        break
+                    previous_event_depth = event_depth
+
+                # Backfill in the foreground if we found a big gap, have too many holes,
+                # or we don't have enough events to fill the limit that the client asked
+                # for.
+                missing_too_many_events = (
+                    number_of_gaps > BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD
+                )
+                not_enough_events_to_fill_response = len(events) < pagin_config.limit
+                if (
+                    found_big_gap
+                    or missing_too_many_events
+                    or not_enough_events_to_fill_response
+                ):
+                    did_backfill = (
+                        await self.hs.get_federation_handler().maybe_backfill(
+                            room_id,
+                            curr_topo,
+                            limit=pagin_config.limit,
+                        )
+                    )
+
+                    # If we did backfill something, refetch the events from the database to
+                    # catch anything new that might have been added since we last fetched.
+                    if did_backfill:
+                        events, next_key = await self.store.paginate_room_events(
+                            room_id=room_id,
+                            from_key=from_token.room_key,
+                            to_key=to_room_key,
+                            direction=pagin_config.direction,
+                            limit=pagin_config.limit,
+                            event_filter=event_filter,
+                        )
+                else:
+                    # Otherwise, we can backfill in the background for eventual
+                    # consistency's sake but we don't need to block the client waiting
+                    # for a costly federation call and processing.
+                    run_as_background_process(
+                        "maybe_backfill_in_the_background",
+                        self.hs.get_federation_handler().maybe_backfill,
+                        room_id,
+                        curr_topo,
+                        limit=pagin_config.limit,
+                    )
+
             next_token = from_token.copy_and_replace(StreamKeyType.ROOM, next_key)
 
         # if no events are returned from pagination, that implies
-- 
cgit 1.5.1


From 59ec4a0dc1404991935e3c29abe548affa0446bf Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Tue, 13 Jun 2023 19:51:47 +0200
Subject: Fix MSC3983 support: only one OTK per device was returned through
 federation (#15770)

---
 changelog.d/15770.bugfix                | 1 +
 synapse/federation/federation_server.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15770.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15770.bugfix b/changelog.d/15770.bugfix
new file mode 100644
index 0000000000..a65340d380
--- /dev/null
+++ b/changelog.d/15770.bugfix
@@ -0,0 +1 @@
+Fix requesting multiple keys at once over federation, related to [MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983).
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 9425b32507..61fa3b30af 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -1016,7 +1016,9 @@ class FederationServer(FederationBase):
             for user_id, device_keys in result.items():
                 for device_id, keys in device_keys.items():
                     for key_id, key in keys.items():
-                        json_result.setdefault(user_id, {})[device_id] = {key_id: key}
+                        json_result.setdefault(user_id, {}).setdefault(device_id, {})[
+                            key_id
+                        ] = key
 
         logger.info(
             "Claimed one-time-keys: %s",
-- 
cgit 1.5.1


From 553f2f53e7930d721d2070ffa45af6397f5ecb60 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 13 Jun 2023 13:22:06 -0700
Subject: Replace `EventContext` fields `prev_group` and `delta_ids` with field
 `state_group_deltas` (#15233)

---
 changelog.d/15233.misc                        |   1 +
 synapse/events/snapshot.py                    | 159 ++++++++++++++++++--------
 synapse/storage/controllers/persist_events.py |   5 +-
 tests/events/test_snapshot.py                 |   3 +-
 tests/storage/test_event_chain.py             |   5 +-
 tests/test_state.py                           |  11 +-
 6 files changed, 126 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/15233.misc

(limited to 'synapse')

diff --git a/changelog.d/15233.misc b/changelog.d/15233.misc
new file mode 100644
index 0000000000..1dff00bf3c
--- /dev/null
+++ b/changelog.d/15233.misc
@@ -0,0 +1 @@
+Replace `EventContext` fields `prev_group` and `delta_ids` with field `state_group_deltas`.
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index e7e8225b8e..a43498ed4d 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, List, Optional, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
 import attr
 from immutabledict import immutabledict
@@ -107,33 +107,32 @@ class EventContext(UnpersistedEventContextBase):
         state_delta_due_to_event: If `state_group` and `state_group_before_event` are not None
             then this is the delta of the state between the two groups.
 
-        prev_group: If it is known, ``state_group``'s prev_group. Note that this being
-            None does not necessarily mean that ``state_group`` does not have
-            a prev_group!
+        state_group_deltas: If not empty, this is a dict collecting a mapping of the state
+            difference between state groups.
 
-            If the event is a state event, this is normally the same as
-            ``state_group_before_event``.
+            The keys are a tuple of two integers: the initial group and final state group.
+            The corresponding value is a state map representing the state delta between
+            these state groups.
 
-            If ``state_group`` is None (ie, the event is an outlier), ``prev_group``
-            will always also be ``None``.
+            The dictionary is expected to have at most two entries with state groups of:
 
-            Note that this *not* (necessarily) the state group associated with
-            ``_prev_state_ids``.
+            1. The state group before the event and after the event.
+            2. The state group preceding the state group before the event and the
+               state group before the event.
 
-        delta_ids: If ``prev_group`` is not None, the state delta between ``prev_group``
-            and ``state_group``.
+            This information is collected and stored as part of an optimization for persisting
+            events.
 
         partial_state: if True, we may be storing this event with a temporary,
             incomplete state.
     """
 
     _storage: "StorageControllers"
+    state_group_deltas: Dict[Tuple[int, int], StateMap[str]]
     rejected: Optional[str] = None
     _state_group: Optional[int] = None
     state_group_before_event: Optional[int] = None
     _state_delta_due_to_event: Optional[StateMap[str]] = None
-    prev_group: Optional[int] = None
-    delta_ids: Optional[StateMap[str]] = None
     app_service: Optional[ApplicationService] = None
 
     partial_state: bool = False
@@ -145,16 +144,14 @@ class EventContext(UnpersistedEventContextBase):
         state_group_before_event: Optional[int],
         state_delta_due_to_event: Optional[StateMap[str]],
         partial_state: bool,
-        prev_group: Optional[int] = None,
-        delta_ids: Optional[StateMap[str]] = None,
+        state_group_deltas: Dict[Tuple[int, int], StateMap[str]],
     ) -> "EventContext":
         return EventContext(
             storage=storage,
             state_group=state_group,
             state_group_before_event=state_group_before_event,
             state_delta_due_to_event=state_delta_due_to_event,
-            prev_group=prev_group,
-            delta_ids=delta_ids,
+            state_group_deltas=state_group_deltas,
             partial_state=partial_state,
         )
 
@@ -163,7 +160,7 @@ class EventContext(UnpersistedEventContextBase):
         storage: "StorageControllers",
     ) -> "EventContext":
         """Return an EventContext instance suitable for persisting an outlier event"""
-        return EventContext(storage=storage)
+        return EventContext(storage=storage, state_group_deltas={})
 
     async def persist(self, event: EventBase) -> "EventContext":
         return self
@@ -183,13 +180,15 @@ class EventContext(UnpersistedEventContextBase):
             "state_group": self._state_group,
             "state_group_before_event": self.state_group_before_event,
             "rejected": self.rejected,
-            "prev_group": self.prev_group,
+            "state_group_deltas": _encode_state_group_delta(self.state_group_deltas),
             "state_delta_due_to_event": _encode_state_dict(
                 self._state_delta_due_to_event
             ),
-            "delta_ids": _encode_state_dict(self.delta_ids),
             "app_service_id": self.app_service.id if self.app_service else None,
             "partial_state": self.partial_state,
+            # add dummy delta_ids and prev_group for backwards compatibility
+            "delta_ids": None,
+            "prev_group": None,
         }
 
     @staticmethod
@@ -204,17 +203,24 @@ class EventContext(UnpersistedEventContextBase):
         Returns:
             The event context.
         """
+        # workaround for backwards/forwards compatibility: if the input doesn't have a value
+        # for "state_group_deltas" just assign an empty dict
+        state_group_deltas = input.get("state_group_deltas", None)
+        if state_group_deltas:
+            state_group_deltas = _decode_state_group_delta(state_group_deltas)
+        else:
+            state_group_deltas = {}
+
         context = EventContext(
             # We use the state_group and prev_state_id stuff to pull the
             # current_state_ids out of the DB and construct prev_state_ids.
             storage=storage,
             state_group=input["state_group"],
             state_group_before_event=input["state_group_before_event"],
-            prev_group=input["prev_group"],
+            state_group_deltas=state_group_deltas,
             state_delta_due_to_event=_decode_state_dict(
                 input["state_delta_due_to_event"]
             ),
-            delta_ids=_decode_state_dict(input["delta_ids"]),
             rejected=input["rejected"],
             partial_state=input.get("partial_state", False),
         )
@@ -349,7 +355,7 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
     _storage: "StorageControllers"
     state_group_before_event: Optional[int]
     state_group_after_event: Optional[int]
-    state_delta_due_to_event: Optional[dict]
+    state_delta_due_to_event: Optional[StateMap[str]]
     prev_group_for_state_group_before_event: Optional[int]
     delta_ids_to_state_group_before_event: Optional[StateMap[str]]
     partial_state: bool
@@ -380,26 +386,16 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
 
         events_and_persisted_context = []
         for event, unpersisted_context in amended_events_and_context:
-            if event.is_state():
-                context = EventContext(
-                    storage=unpersisted_context._storage,
-                    state_group=unpersisted_context.state_group_after_event,
-                    state_group_before_event=unpersisted_context.state_group_before_event,
-                    state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
-                    partial_state=unpersisted_context.partial_state,
-                    prev_group=unpersisted_context.state_group_before_event,
-                    delta_ids=unpersisted_context.state_delta_due_to_event,
-                )
-            else:
-                context = EventContext(
-                    storage=unpersisted_context._storage,
-                    state_group=unpersisted_context.state_group_after_event,
-                    state_group_before_event=unpersisted_context.state_group_before_event,
-                    state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
-                    partial_state=unpersisted_context.partial_state,
-                    prev_group=unpersisted_context.prev_group_for_state_group_before_event,
-                    delta_ids=unpersisted_context.delta_ids_to_state_group_before_event,
-                )
+            state_group_deltas = unpersisted_context._build_state_group_deltas()
+
+            context = EventContext(
+                storage=unpersisted_context._storage,
+                state_group=unpersisted_context.state_group_after_event,
+                state_group_before_event=unpersisted_context.state_group_before_event,
+                state_delta_due_to_event=unpersisted_context.state_delta_due_to_event,
+                partial_state=unpersisted_context.partial_state,
+                state_group_deltas=state_group_deltas,
+            )
             events_and_persisted_context.append((event, context))
         return events_and_persisted_context
 
@@ -452,11 +448,11 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
 
         # if the event isn't a state event the state group doesn't change
         if not self.state_delta_due_to_event:
-            state_group_after_event = self.state_group_before_event
+            self.state_group_after_event = self.state_group_before_event
 
         # otherwise if it is a state event we need to get a state group for it
         else:
-            state_group_after_event = await self._storage.state.store_state_group(
+            self.state_group_after_event = await self._storage.state.store_state_group(
                 event.event_id,
                 event.room_id,
                 prev_group=self.state_group_before_event,
@@ -464,16 +460,81 @@ class UnpersistedEventContext(UnpersistedEventContextBase):
                 current_state_ids=None,
             )
 
+        state_group_deltas = self._build_state_group_deltas()
+
         return EventContext.with_state(
             storage=self._storage,
-            state_group=state_group_after_event,
+            state_group=self.state_group_after_event,
             state_group_before_event=self.state_group_before_event,
             state_delta_due_to_event=self.state_delta_due_to_event,
+            state_group_deltas=state_group_deltas,
             partial_state=self.partial_state,
-            prev_group=self.state_group_before_event,
-            delta_ids=self.state_delta_due_to_event,
         )
 
+    def _build_state_group_deltas(self) -> Dict[Tuple[int, int], StateMap]:
+        """
+        Collect deltas between the state groups associated with this context
+        """
+        state_group_deltas = {}
+
+        # if we know the state group before the event and after the event, add them and the
+        # state delta between them to state_group_deltas
+        if self.state_group_before_event and self.state_group_after_event:
+            # if we have the state groups we should have the delta
+            assert self.state_delta_due_to_event is not None
+            state_group_deltas[
+                (
+                    self.state_group_before_event,
+                    self.state_group_after_event,
+                )
+            ] = self.state_delta_due_to_event
+
+        # the state group before the event may also have a state group which precedes it, if
+        # we have that and the state group before the event, add them and the state
+        # delta between them to state_group_deltas
+        if (
+            self.prev_group_for_state_group_before_event
+            and self.state_group_before_event
+        ):
+            # if we have both state groups we should have the delta between them
+            assert self.delta_ids_to_state_group_before_event is not None
+            state_group_deltas[
+                (
+                    self.prev_group_for_state_group_before_event,
+                    self.state_group_before_event,
+                )
+            ] = self.delta_ids_to_state_group_before_event
+
+        return state_group_deltas
+
+
+def _encode_state_group_delta(
+    state_group_delta: Dict[Tuple[int, int], StateMap[str]]
+) -> List[Tuple[int, int, Optional[List[Tuple[str, str, str]]]]]:
+    if not state_group_delta:
+        return []
+
+    state_group_delta_encoded = []
+    for key, value in state_group_delta.items():
+        state_group_delta_encoded.append((key[0], key[1], _encode_state_dict(value)))
+
+    return state_group_delta_encoded
+
+
+def _decode_state_group_delta(
+    input: List[Tuple[int, int, List[Tuple[str, str, str]]]]
+) -> Dict[Tuple[int, int], StateMap[str]]:
+    if not input:
+        return {}
+
+    state_group_deltas = {}
+    for state_group_1, state_group_2, state_dict in input:
+        state_map = _decode_state_dict(state_dict)
+        assert state_map is not None
+        state_group_deltas[(state_group_1, state_group_2)] = state_map
+
+    return state_group_deltas
+
 
 def _encode_state_dict(
     state_dict: Optional[StateMap[str]],
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index f1d2c71c91..35c0680365 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -839,9 +839,8 @@ class EventsPersistenceStorageController:
                         "group" % (ev.event_id,)
                     )
                 continue
-
-            if ctx.prev_group:
-                state_group_deltas[(ctx.prev_group, ctx.state_group)] = ctx.delta_ids
+            if ctx.state_group_deltas:
+                state_group_deltas.update(ctx.state_group_deltas)
 
         # We need to map the event_ids to their state groups. First, let's
         # check if the event is one we're persisting, in which case we can
diff --git a/tests/events/test_snapshot.py b/tests/events/test_snapshot.py
index 6687c28e8f..b5e42f9600 100644
--- a/tests/events/test_snapshot.py
+++ b/tests/events/test_snapshot.py
@@ -101,8 +101,7 @@ class TestEventContext(unittest.HomeserverTestCase):
         self.assertEqual(
             context.state_group_before_event, d_context.state_group_before_event
         )
-        self.assertEqual(context.prev_group, d_context.prev_group)
-        self.assertEqual(context.delta_ids, d_context.delta_ids)
+        self.assertEqual(context.state_group_deltas, d_context.state_group_deltas)
         self.assertEqual(context.app_service, d_context.app_service)
 
         self.assertEqual(
diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py
index e39b63edac..48ebfadaab 100644
--- a/tests/storage/test_event_chain.py
+++ b/tests/storage/test_event_chain.py
@@ -401,7 +401,10 @@ class EventChainStoreTestCase(HomeserverTestCase):
             assert persist_events_store is not None
             persist_events_store._store_event_txn(
                 txn,
-                [(e, EventContext(self.hs.get_storage_controllers())) for e in events],
+                [
+                    (e, EventContext(self.hs.get_storage_controllers(), {}))
+                    for e in events
+                ],
             )
 
             # Actually call the function that calculates the auth chain stuff.
diff --git a/tests/test_state.py b/tests/test_state.py
index 7a49b87953..eded38c766 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -555,10 +555,15 @@ class StateTestCase(unittest.TestCase):
             (e.event_id for e in old_state + [event]), current_state_ids.values()
         )
 
-        self.assertIsNotNone(context.state_group_before_event)
+        assert context.state_group_before_event is not None
+        assert context.state_group is not None
+        self.assertEqual(
+            context.state_group_deltas.get(
+                (context.state_group_before_event, context.state_group)
+            ),
+            {(event.type, event.state_key): event.event_id},
+        )
         self.assertNotEqual(context.state_group_before_event, context.state_group)
-        self.assertEqual(context.state_group_before_event, context.prev_group)
-        self.assertEqual({("state", ""): event.event_id}, context.delta_ids)
 
     @defer.inlineCallbacks
     def test_trivial_annotate_message(
-- 
cgit 1.5.1


From 8ddb2de55387d54bac53138f374f55c7608991ce Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 13 Jun 2023 16:34:54 -0500
Subject: Document `looping_call()` functionality that will wait for the given
 function to finish before scheduling another (#15772)

Thanks to @erikjohnston for clarifying, https://github.com/matrix-org/synapse/pull/15743#discussion_r1226544457

We don't have to worry about calls stacking up if the given function takes longer than the scheduled time.
---
 changelog.d/15772.doc    | 1 +
 synapse/util/__init__.py | 5 +++++
 2 files changed, 6 insertions(+)
 create mode 100644 changelog.d/15772.doc

(limited to 'synapse')

diff --git a/changelog.d/15772.doc b/changelog.d/15772.doc
new file mode 100644
index 0000000000..4d6c933c71
--- /dev/null
+++ b/changelog.d/15772.doc
@@ -0,0 +1 @@
+Document `looping_call()` functionality that will wait for the given function to finish before scheduling another.
diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py
index 7ea0c4c36b..9f3b8741c1 100644
--- a/synapse/util/__init__.py
+++ b/synapse/util/__init__.py
@@ -116,6 +116,11 @@ class Clock:
 
         Waits `msec` initially before calling `f` for the first time.
 
+        If the function given to `looping_call` returns an awaitable/deferred, the next
+        call isn't scheduled until after the returned awaitable has finished. We get
+        this functionality thanks to this function being a thin wrapper around
+        `twisted.internet.task.LoopingCall`.
+
         Note that the function will be called with no logcontext, so if it is anything
         other than trivial, you probably want to wrap it in run_as_background_process.
 
-- 
cgit 1.5.1


From 21fea6b7493533985f7fa14924949514b5a356e2 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Wed, 14 Jun 2023 03:42:18 -0500
Subject: Prefill events after invalidate not before when persisting events
 (#15758)

Fixes #15757
---
 changelog.d/15758.bugfix                           |  1 +
 synapse/storage/databases/main/events.py           | 15 +++++--
 synapse/storage/databases/main/events_worker.py    |  2 +-
 synapse/util/caches/lrucache.py                    |  8 +++-
 tests/storage/databases/main/test_events_worker.py | 49 ++++++++++++++++++++++
 5 files changed, 70 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/15758.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15758.bugfix b/changelog.d/15758.bugfix
new file mode 100644
index 0000000000..cabe25ca24
--- /dev/null
+++ b/changelog.d/15758.bugfix
@@ -0,0 +1 @@
+Avoid invalidating a cache that was just prefilled.
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index e2e6eb479f..44af3357af 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1729,13 +1729,22 @@ class PersistEventsStore:
             if not row["rejects"] and not row["redacts"]:
                 to_prefill.append(EventCacheEntry(event=event, redacted_event=None))
 
-        async def prefill() -> None:
+        async def external_prefill() -> None:
             for cache_entry in to_prefill:
-                await self.store._get_event_cache.set(
+                await self.store._get_event_cache.set_external(
                     (cache_entry.event.event_id,), cache_entry
                 )
 
-        txn.async_call_after(prefill)
+        def local_prefill() -> None:
+            for cache_entry in to_prefill:
+                self.store._get_event_cache.set_local(
+                    (cache_entry.event.event_id,), cache_entry
+                )
+
+        # The order these are called here is not as important as knowing that after the
+        # transaction is finished, the async_call_after will run before the call_after.
+        txn.async_call_after(external_prefill)
+        txn.call_after(local_prefill)
 
     def _store_redaction(self, txn: LoggingTransaction, event: EventBase) -> None:
         assert event.redacts is not None
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index d93ffc4efa..7e7648c951 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -883,7 +883,7 @@ class EventsWorkerStore(SQLBaseStore):
 
     async def _invalidate_async_get_event_cache(self, event_id: str) -> None:
         """
-        Invalidates an event in the asyncronous get event cache, which may be remote.
+        Invalidates an event in the asynchronous get event cache, which may be remote.
 
         Arguments:
             event_id: the event ID to invalidate
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 6137c85e10..be6554319a 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -842,7 +842,13 @@ class AsyncLruCache(Generic[KT, VT]):
         return self._lru_cache.get(key, update_metrics=update_metrics)
 
     async def set(self, key: KT, value: VT) -> None:
-        self._lru_cache.set(key, value)
+        # This will add the entries in the correct order, local first external second
+        self.set_local(key, value)
+        await self.set_external(key, value)
+
+    async def set_external(self, key: KT, value: VT) -> None:
+        # This method should add an entry to any configured external cache, in this case noop.
+        pass
 
     def set_local(self, key: KT, value: VT) -> None:
         self._lru_cache.set(key, value)
diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py
index 788500e38f..b223dc750b 100644
--- a/tests/storage/databases/main/test_events_worker.py
+++ b/tests/storage/databases/main/test_events_worker.py
@@ -139,6 +139,55 @@ class HaveSeenEventsTestCase(unittest.HomeserverTestCase):
             # That should result in a single db query to lookup
             self.assertEqual(ctx.get_resource_usage().db_txn_count, 1)
 
+    def test_persisting_event_prefills_get_event_cache(self) -> None:
+        """
+        Test to make sure that the `_get_event_cache` is prefilled after we persist an
+        event and returns the updated value.
+        """
+        event, event_context = self.get_success(
+            create_event(
+                self.hs,
+                room_id=self.room_id,
+                sender=self.user,
+                type="test_event_type",
+                content={"body": "conflabulation"},
+            )
+        )
+
+        # First, check `_get_event_cache` for the event we just made
+        # to verify it's not in the cache.
+        res = self.store._get_event_cache.get_local((event.event_id,))
+        self.assertEqual(res, None, "Event was cached when it should not have been.")
+
+        with LoggingContext(name="test") as ctx:
+            # Persist the event which should invalidate then prefill the
+            # `_get_event_cache` so we don't return stale values.
+            # Side Note: Apparently, persisting an event isn't a transaction in the
+            # sense that it is recorded in the LoggingContext
+            persistence = self.hs.get_storage_controllers().persistence
+            assert persistence is not None
+            self.get_success(
+                persistence.persist_event(
+                    event,
+                    event_context,
+                )
+            )
+
+            # Check `_get_event_cache` again and we should see the updated fact
+            # that we now have the event cached after persisting it.
+            res = self.store._get_event_cache.get_local((event.event_id,))
+            self.assertEqual(res.event, event, "Event not cached as expected.")  # type: ignore
+
+            # Try and fetch the event from the database.
+            self.get_success(self.store.get_event(event.event_id))
+
+            # Verify that the database hit was avoided.
+            self.assertEqual(
+                ctx.get_resource_usage().evt_db_fetch_count,
+                0,
+                "Database was hit, which would not happen if event was cached.",
+            )
+
     def test_invalidate_cache_by_room_id(self) -> None:
         """
         Test to make sure that all events associated with the given `(room_id,)`
-- 
cgit 1.5.1


From 14f9d9b4520099118f009ae4f4c6b11b779af499 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 14 Jun 2023 11:53:55 +0200
Subject: Fix empty scope when having version mismatch between workers (#15774)

---
 changelog.d/15774.bugfix  | 1 +
 synapse/types/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15774.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15774.bugfix b/changelog.d/15774.bugfix
new file mode 100644
index 0000000000..c24d6c25e4
--- /dev/null
+++ b/changelog.d/15774.bugfix
@@ -0,0 +1 @@
+Fix an error when having workers of different versions running.
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index dfc95e8ebb..095be070e0 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -177,7 +177,7 @@ class Requester:
             user=UserID.from_string(input["user_id"]),
             access_token_id=input["access_token_id"],
             is_guest=input["is_guest"],
-            scope=set(input["scope"]),
+            scope=set(input.get("scope", [])),
             shadow_banned=input["shadow_banned"],
             device_id=input["device_id"],
             app_service=appservice,
-- 
cgit 1.5.1


From ef0d3d7bd941b497ad8291c58bcc53700e08b999 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 14 Jun 2023 11:55:09 +0200
Subject: Revert "Allow for the configuration of max request retries and
 min/max retry delays in the matrix federation client (#12504)"

This reverts commit d84e66144dc12dacf71c987a2ba802dd59c0b68e.
---
 CHANGES.md                                       |  1 -
 docs/usage/configuration/config_documentation.md | 26 ------------------------
 synapse/config/federation.py                     | 10 ---------
 synapse/http/matrixfederationclient.py           | 21 ++++++++-----------
 tests/http/test_matrixfederationclient.py        | 20 +-----------------
 5 files changed, 10 insertions(+), 68 deletions(-)

(limited to 'synapse')

diff --git a/CHANGES.md b/CHANGES.md
index 5412581eef..d898593664 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -30,7 +30,6 @@ Improved Documentation
 Internal Changes
 ----------------
 
-- Allow for the configuration of max request retries and min/max retry delays in the matrix federation client. ([\#12504](https://github.com/matrix-org/synapse/issues/12504))
 - Log when events are (maybe unexpectedly) filtered out of responses in tests. ([\#14213](https://github.com/matrix-org/synapse/issues/14213))
 - Read from column `full_user_id` rather than `user_id` of tables `profiles` and `user_filters`. ([\#15649](https://github.com/matrix-org/synapse/issues/15649))
 - Add support for tracing functions which return `Awaitable`s. ([\#15650](https://github.com/matrix-org/synapse/issues/15650))
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 8426de0417..0cf6e075ff 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1196,32 +1196,6 @@ Example configuration:
 allow_device_name_lookup_over_federation: true
 ```
 ---
-### `federation`
-
-The federation section defines some sub-options related to federation.
-
-The following options are related to configuring timeout and retry logic for one request,
-independently of the others.
-Short retry algorithm is used when something or someone will wait for the request to have an
-answer, while long retry is used for requests that happen in the background,
-like sending a federation transaction.
-
-* `client_timeout`: timeout for the federation requests in seconds. Default to 60s.
-* `max_short_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 2s.
-* `max_long_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 60s.
-* `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
-* `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
-
-Example configuration:
-```yaml
-federation:
-  client_timeout: 180
-  max_short_retry_delay: 7
-  max_long_retry_delay: 100
-  max_short_retries: 5
-  max_long_retries: 20
-```
----
 ## Caching
 
 Options related to caching.
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index d21f7fd02a..336fca578a 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -22,8 +22,6 @@ class FederationConfig(Config):
     section = "federation"
 
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
-        federation_config = config.setdefault("federation", {})
-
         # FIXME: federation_domain_whitelist needs sytests
         self.federation_domain_whitelist: Optional[dict] = None
         federation_domain_whitelist = config.get("federation_domain_whitelist", None)
@@ -51,13 +49,5 @@ class FederationConfig(Config):
             "allow_device_name_lookup_over_federation", False
         )
 
-        # Allow for the configuration of timeout, max request retries
-        # and min/max retry delays in the matrix federation client.
-        self.client_timeout = federation_config.get("client_timeout", 60)
-        self.max_long_retry_delay = federation_config.get("max_long_retry_delay", 60)
-        self.max_short_retry_delay = federation_config.get("max_short_retry_delay", 2)
-        self.max_long_retries = federation_config.get("max_long_retries", 10)
-        self.max_short_retries = federation_config.get("max_short_retries", 3)
-
 
 _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}}
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index ed36825b67..abb5ae5815 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -95,6 +95,8 @@ incoming_responses_counter = Counter(
 )
 
 
+MAX_LONG_RETRIES = 10
+MAX_SHORT_RETRIES = 3
 MAXINT = sys.maxsize
 
 
@@ -404,12 +406,7 @@ class MatrixFederationHttpClient:
         self.clock = hs.get_clock()
         self._store = hs.get_datastores().main
         self.version_string_bytes = hs.version_string.encode("ascii")
-        self.default_timeout = hs.config.federation.client_timeout
-
-        self.max_long_retry_delay = hs.config.federation.max_long_retry_delay
-        self.max_short_retry_delay = hs.config.federation.max_short_retry_delay
-        self.max_long_retries = hs.config.federation.max_long_retries
-        self.max_short_retries = hs.config.federation.max_short_retries
+        self.default_timeout = 60
 
         self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor))
 
@@ -586,9 +583,9 @@ class MatrixFederationHttpClient:
             # XXX: Would be much nicer to retry only at the transaction-layer
             # (once we have reliable transactions in place)
             if long_retries:
-                retries_left = self.max_long_retries
+                retries_left = MAX_LONG_RETRIES
             else:
-                retries_left = self.max_short_retries
+                retries_left = MAX_SHORT_RETRIES
 
             url_bytes = request.uri
             url_str = url_bytes.decode("ascii")
@@ -733,12 +730,12 @@ class MatrixFederationHttpClient:
 
                     if retries_left and not timeout:
                         if long_retries:
-                            delay = 4 ** (self.max_long_retries + 1 - retries_left)
-                            delay = min(delay, self.max_long_retry_delay)
+                            delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
+                            delay = min(delay, 60)
                             delay *= random.uniform(0.8, 1.4)
                         else:
-                            delay = 0.5 * 2 ** (self.max_short_retries - retries_left)
-                            delay = min(delay, self.max_short_retry_delay)
+                            delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
+                            delay = min(delay, 2)
                             delay *= random.uniform(0.8, 1.4)
 
                         logger.debug(
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index 8565f8ac64..0dfc03ce50 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -40,7 +40,7 @@ from synapse.server import HomeServer
 from synapse.util import Clock
 
 from tests.server import FakeTransport
-from tests.unittest import HomeserverTestCase, override_config
+from tests.unittest import HomeserverTestCase
 
 
 def check_logcontext(context: LoggingContextOrSentinel) -> None:
@@ -640,21 +640,3 @@ class FederationClientTests(HomeserverTestCase):
             self.cl.build_auth_headers(
                 b"", b"GET", b"https://example.com", destination_is=b""
             )
-
-    @override_config(
-        {
-            "federation": {
-                "client_timeout": 180,
-                "max_long_retry_delay": 100,
-                "max_short_retry_delay": 7,
-                "max_long_retries": 20,
-                "max_short_retries": 5,
-            }
-        }
-    )
-    def test_configurable_retry_and_delay_values(self) -> None:
-        self.assertEqual(self.cl.default_timeout, 180)
-        self.assertEqual(self.cl.max_long_retry_delay, 100)
-        self.assertEqual(self.cl.max_short_retry_delay, 7)
-        self.assertEqual(self.cl.max_long_retries, 20)
-        self.assertEqual(self.cl.max_short_retries, 5)
-- 
cgit 1.5.1


From 1404f68a03e684434dea6b2a9a5c2739c719549d Mon Sep 17 00:00:00 2001
From: Tulir Asokan <tulir@maunium.net>
Date: Wed, 14 Jun 2023 17:42:33 +0300
Subject: Fix joining rooms through aliases where the alias server isn't a real
 homeserver (#15776)

---
 changelog.d/15776.bugfix        | 1 +
 synapse/handlers/room_member.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15776.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15776.bugfix b/changelog.d/15776.bugfix
new file mode 100644
index 0000000000..f146a85f1a
--- /dev/null
+++ b/changelog.d/15776.bugfix
@@ -0,0 +1 @@
+Fix joining rooms through aliases where the alias server isn't a real homeserver. Contributed by @tulir @ Beeper.
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index af0ca5c26d..55df34bd06 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -1498,7 +1498,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # put the server which owns the alias at the front of the server list.
         if room_alias.domain in servers:
             servers.remove(room_alias.domain)
-        servers.insert(0, room_alias.domain)
+            servers.insert(0, room_alias.domain)
 
         return RoomID.from_string(room_id), servers
 
-- 
cgit 1.5.1


From d93912042191d30ff1f7aa41d9f0779a609caca8 Mon Sep 17 00:00:00 2001
From: Josh Qou <97894002+joshqou@users.noreply.github.com>
Date: Thu, 15 Jun 2023 14:23:27 +0100
Subject: Fix unsafe hotserving behaviour for non-multimedia uploads. (#15680)

* Fix unsafe hotserving behaviour for non-multimedia uploads.

* invert disposition assert

* test_media_storage.py: run lint

* test_base.py: /inline/attachment/s

* Only return attachment for disposition type, update tests

* Update synapse/media/_base.py

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* Update changelog.d/15680.bugfix

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>

* add attribution

* Update changelog.

---------

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 changelog.d/15680.bugfix          |  1 +
 synapse/media/_base.py            | 15 ++++++++++++---
 tests/media/test_base.py          | 12 ++++++------
 tests/media/test_media_storage.py | 20 ++++++++++----------
 4 files changed, 29 insertions(+), 19 deletions(-)
 create mode 100644 changelog.d/15680.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15680.bugfix b/changelog.d/15680.bugfix
new file mode 100644
index 0000000000..04ac19b4ec
--- /dev/null
+++ b/changelog.d/15680.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where media files were served in an unsafe manner. Contributed by @joshqou.
diff --git a/synapse/media/_base.py b/synapse/media/_base.py
index ef8334ae25..20cb8b9010 100644
--- a/synapse/media/_base.py
+++ b/synapse/media/_base.py
@@ -152,6 +152,9 @@ def add_file_headers(
         content_type = media_type
 
     request.setHeader(b"Content-Type", content_type.encode("UTF-8"))
+
+    # Use a Content-Disposition of attachment to force download of media.
+    disposition = "attachment"
     if upload_name:
         # RFC6266 section 4.1 [1] defines both `filename` and `filename*`.
         #
@@ -173,11 +176,17 @@ def add_file_headers(
         # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we
         # may as well just do the filename* version.
         if _can_encode_filename_as_token(upload_name):
-            disposition = "inline; filename=%s" % (upload_name,)
+            disposition = "%s; filename=%s" % (
+                disposition,
+                upload_name,
+            )
         else:
-            disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)
+            disposition = "%s; filename*=utf-8''%s" % (
+                disposition,
+                _quote(upload_name),
+            )
 
-        request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
+    request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
 
     # cache for at least a day.
     # XXX: we might want to turn this off for data we don't want to
diff --git a/tests/media/test_base.py b/tests/media/test_base.py
index 66498c744d..4728c80969 100644
--- a/tests/media/test_base.py
+++ b/tests/media/test_base.py
@@ -20,12 +20,12 @@ from tests import unittest
 class GetFileNameFromHeadersTests(unittest.TestCase):
     # input -> expected result
     TEST_CASES = {
-        b"inline; filename=abc.txt": "abc.txt",
-        b'inline; filename="azerty"': "azerty",
-        b'inline; filename="aze%20rty"': "aze%20rty",
-        b'inline; filename="aze"rty"': 'aze"rty',
-        b'inline; filename="azer;ty"': "azer;ty",
-        b"inline; filename*=utf-8''foo%C2%A3bar": "foo£bar",
+        b"attachment; filename=abc.txt": "abc.txt",
+        b'attachment; filename="azerty"': "azerty",
+        b'attachment; filename="aze%20rty"': "aze%20rty",
+        b'attachment; filename="aze"rty"': 'aze"rty',
+        b'attachment; filename="azer;ty"': "azer;ty",
+        b"attachment; filename*=utf-8''foo%C2%A3bar": "foo£bar",
     }
 
     def tests(self) -> None:
diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py
index f0f2da65db..ea0051dde4 100644
--- a/tests/media/test_media_storage.py
+++ b/tests/media/test_media_storage.py
@@ -317,7 +317,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
 
     def test_handle_missing_content_type(self) -> None:
         channel = self._req(
-            b"inline; filename=out" + self.test_image.extension,
+            b"attachment; filename=out" + self.test_image.extension,
             include_content_type=False,
         )
         headers = channel.headers
@@ -331,7 +331,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         If the filename is filename=<ascii> then Synapse will decode it as an
         ASCII string, and use filename= in the response.
         """
-        channel = self._req(b"inline; filename=out" + self.test_image.extension)
+        channel = self._req(b"attachment; filename=out" + self.test_image.extension)
 
         headers = channel.headers
         self.assertEqual(
@@ -339,7 +339,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         )
         self.assertEqual(
             headers.getRawHeaders(b"Content-Disposition"),
-            [b"inline; filename=out" + self.test_image.extension],
+            [b"attachment; filename=out" + self.test_image.extension],
         )
 
     def test_disposition_filenamestar_utf8escaped(self) -> None:
@@ -350,7 +350,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         """
         filename = parse.quote("\u2603".encode()).encode("ascii")
         channel = self._req(
-            b"inline; filename*=utf-8''" + filename + self.test_image.extension
+            b"attachment; filename*=utf-8''" + filename + self.test_image.extension
         )
 
         headers = channel.headers
@@ -359,13 +359,13 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         )
         self.assertEqual(
             headers.getRawHeaders(b"Content-Disposition"),
-            [b"inline; filename*=utf-8''" + filename + self.test_image.extension],
+            [b"attachment; filename*=utf-8''" + filename + self.test_image.extension],
         )
 
     def test_disposition_none(self) -> None:
         """
-        If there is no filename, one isn't passed on in the Content-Disposition
-        of the request.
+        If there is no filename, Content-Disposition should only
+        be a disposition type.
         """
         channel = self._req(None)
 
@@ -373,7 +373,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         self.assertEqual(
             headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
         )
-        self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None)
+        self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), [b"attachment"])
 
     def test_thumbnail_crop(self) -> None:
         """Test that a cropped remote thumbnail is available."""
@@ -612,7 +612,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         Tests that the `X-Robots-Tag` header is present, which informs web crawlers
         to not index, archive, or follow links in media.
         """
-        channel = self._req(b"inline; filename=out" + self.test_image.extension)
+        channel = self._req(b"attachment; filename=out" + self.test_image.extension)
 
         headers = channel.headers
         self.assertEqual(
@@ -625,7 +625,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         Test that the Cross-Origin-Resource-Policy header is set to "cross-origin"
         allowing web clients to embed media from the downloads API.
         """
-        channel = self._req(b"inline; filename=out" + self.test_image.extension)
+        channel = self._req(b"attachment; filename=out" + self.test_image.extension)
 
         headers = channel.headers
 
-- 
cgit 1.5.1


From f63d4a3a65e95d3845c43a9dd2893605b06f164a Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Fri, 16 Jun 2023 12:15:12 +0200
Subject: Regularly try to wake up dests instead of waiting for next PDU/EDU
 (#15743)

---
 changelog.d/15743.misc                       |  1 +
 synapse/federation/sender/__init__.py        | 34 +++++++++++++---------------
 tests/federation/test_federation_catch_up.py | 22 ++++++++----------
 3 files changed, 26 insertions(+), 31 deletions(-)
 create mode 100644 changelog.d/15743.misc

(limited to 'synapse')

diff --git a/changelog.d/15743.misc b/changelog.d/15743.misc
new file mode 100644
index 0000000000..b95eed929e
--- /dev/null
+++ b/changelog.d/15743.misc
@@ -0,0 +1 @@
+Regularly try to send transactions to other servers after they failed instead of waiting for a new event to be available before trying.
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index f3bdc5a4d2..97abbdee18 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -109,10 +109,8 @@ was enabled*, Catch-Up Mode is exited and we return to `_transaction_transmissio
 
 If a remote server is unreachable over federation, we back off from that server,
 with an exponentially-increasing retry interval.
-Whilst we don't automatically retry after the interval, we prevent making new attempts
-until such time as the back-off has cleared.
-Once the back-off is cleared and a new PDU or EDU arrives for transmission, the transmission
-loop resumes and empties the queue by making federation requests.
+We automatically retry after the retry interval expires (roughly, the logic to do so
+being triggered every minute).
 
 If the backoff grows too large (> 1 hour), the in-memory queue is emptied (to prevent
 unbounded growth) and Catch-Up Mode is entered.
@@ -145,7 +143,6 @@ from prometheus_client import Counter
 from typing_extensions import Literal
 
 from twisted.internet import defer
-from twisted.internet.interfaces import IDelayedCall
 
 import synapse.metrics
 from synapse.api.presence import UserPresenceState
@@ -184,14 +181,18 @@ sent_pdus_destination_dist_total = Counter(
     "Total number of PDUs queued for sending across all destinations",
 )
 
-# Time (in s) after Synapse's startup that we will begin to wake up destinations
-# that have catch-up outstanding.
-CATCH_UP_STARTUP_DELAY_SEC = 15
+# Time (in s) to wait before trying to wake up destinations that have
+# catch-up outstanding. This will also be the delay applied at startup
+# before trying the same.
+# Please note that rate limiting still applies, so while the loop is
+# executed every X seconds the destinations may not be wake up because
+# they are being rate limited following previous attempt failures.
+WAKEUP_RETRY_PERIOD_SEC = 60
 
 # Time (in s) to wait in between waking up each destination, i.e. one destination
-# will be woken up every <x> seconds after Synapse's startup until we have woken
-# every destination has outstanding catch-up.
-CATCH_UP_STARTUP_INTERVAL_SEC = 5
+# will be woken up every <x> seconds until we have woken every destination
+# has outstanding catch-up.
+WAKEUP_INTERVAL_BETWEEN_DESTINATIONS_SEC = 5
 
 
 class AbstractFederationSender(metaclass=abc.ABCMeta):
@@ -415,12 +416,10 @@ class FederationSender(AbstractFederationSender):
             / hs.config.ratelimiting.federation_rr_transactions_per_room_per_second
         )
 
-        # wake up destinations that have outstanding PDUs to be caught up
-        self._catchup_after_startup_timer: Optional[
-            IDelayedCall
-        ] = self.clock.call_later(
-            CATCH_UP_STARTUP_DELAY_SEC,
+        # Regularly wake up destinations that have outstanding PDUs to be caught up
+        self.clock.looping_call(
             run_as_background_process,
+            WAKEUP_RETRY_PERIOD_SEC * 1000.0,
             "wake_destinations_needing_catchup",
             self._wake_destinations_needing_catchup,
         )
@@ -966,7 +965,6 @@ class FederationSender(AbstractFederationSender):
 
             if not destinations_to_wake:
                 # finished waking all destinations!
-                self._catchup_after_startup_timer = None
                 break
 
             last_processed = destinations_to_wake[-1]
@@ -983,4 +981,4 @@ class FederationSender(AbstractFederationSender):
                     last_processed,
                 )
                 self.wake_destination(destination)
-                await self.clock.sleep(CATCH_UP_STARTUP_INTERVAL_SEC)
+                await self.clock.sleep(WAKEUP_INTERVAL_BETWEEN_DESTINATIONS_SEC)
diff --git a/tests/federation/test_federation_catch_up.py b/tests/federation/test_federation_catch_up.py
index 391ae51707..b290b020a2 100644
--- a/tests/federation/test_federation_catch_up.py
+++ b/tests/federation/test_federation_catch_up.py
@@ -431,28 +431,24 @@ class FederationCatchUpTestCases(FederatingHomeserverTestCase):
         # ACT: call _wake_destinations_needing_catchup
 
         # patch wake_destination to just count the destinations instead
-        woken = []
+        woken = set()
 
         def wake_destination_track(destination: str) -> None:
-            woken.append(destination)
+            woken.add(destination)
 
         self.federation_sender.wake_destination = wake_destination_track  # type: ignore[assignment]
 
-        # cancel the pre-existing timer for _wake_destinations_needing_catchup
-        # this is because we are calling it manually rather than waiting for it
-        # to be called automatically
-        assert self.federation_sender._catchup_after_startup_timer is not None
-        self.federation_sender._catchup_after_startup_timer.cancel()
-
-        self.get_success(
-            self.federation_sender._wake_destinations_needing_catchup(), by=5.0
-        )
+        # We wait quite long so that all dests can be woken up, since there is a delay
+        # between them.
+        self.pump(by=5.0)
 
         # ASSERT (_wake_destinations_needing_catchup):
         # - all remotes are woken up, save for zzzerver
         self.assertNotIn("zzzerver", woken)
-        # - all destinations are woken exactly once; they appear once in woken.
-        self.assertCountEqual(woken, server_names[:-1])
+        # - all destinations are woken, potentially more than once, since the
+        # wake up is called regularly and we don't ack in this test that a transaction
+        # has been successfully sent.
+        self.assertCountEqual(woken, set(server_names[:-1]))
 
     def test_not_latest_event(self) -> None:
         """Test that we send the latest event in the room even if its not ours."""
-- 
cgit 1.5.1


From 0618bf94cdc56631e670b4e93e4dfaeae2162e73 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Fri, 16 Jun 2023 14:17:02 +0200
Subject: push rules: fix internal conversion from _type to value (#15781)

Also fix wrong rule names for `is_user_mention` and `is_room_mention`.
---
 changelog.d/15781.bugfix                  |  1 +
 rust/src/push/base_rules.rs               |  4 +-
 synapse/push/clientformat.py              | 26 +++++-------
 tests/rest/client/test_push_rule_attrs.py | 67 +++++++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/15781.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15781.bugfix b/changelog.d/15781.bugfix
new file mode 100644
index 0000000000..5faf59afee
--- /dev/null
+++ b/changelog.d/15781.bugfix
@@ -0,0 +1 @@
+Fix a bug in push rules handling leading to an invalid (per spec) `is_user_mention` rule sent to clients. Also fix wrong rule names for `is_user_mention` and `is_room_mention`.
\ No newline at end of file
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 9d6c304d92..7eea9313f0 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -142,7 +142,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default_enabled: true,
     },
     PushRule {
-        rule_id: Cow::Borrowed("global/override/.m.is_user_mention"),
+        rule_id: Cow::Borrowed("global/override/.m.rule.is_user_mention"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[Condition::Known(
             KnownCondition::ExactEventPropertyContainsType(EventPropertyIsTypeCondition {
@@ -163,7 +163,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
         default_enabled: true,
     },
     PushRule {
-        rule_id: Cow::Borrowed("global/override/.m.is_room_mention"),
+        rule_id: Cow::Borrowed("global/override/.m.rule.is_room_mention"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[
             Condition::Known(KnownCondition::EventPropertyIs(EventPropertyIsCondition {
diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py
index 88b52c26a0..735cef0aed 100644
--- a/synapse/push/clientformat.py
+++ b/synapse/push/clientformat.py
@@ -41,12 +41,7 @@ def format_push_rules_for_user(
 
         rulearray.append(template_rule)
 
-        for type_key in ("pattern", "value"):
-            type_value = template_rule.pop(f"{type_key}_type", None)
-            if type_value == "user_id":
-                template_rule[type_key] = user.to_string()
-            elif type_value == "user_localpart":
-                template_rule[type_key] = user.localpart
+        _convert_type_to_value(template_rule, user)
 
         template_rule["enabled"] = enabled
 
@@ -63,19 +58,20 @@ def format_push_rules_for_user(
         for c in template_rule["conditions"]:
             c.pop("_cache_key", None)
 
-            pattern_type = c.pop("pattern_type", None)
-            if pattern_type == "user_id":
-                c["pattern"] = user.to_string()
-            elif pattern_type == "user_localpart":
-                c["pattern"] = user.localpart
-
-            sender_type = c.pop("sender_type", None)
-            if sender_type == "user_id":
-                c["sender"] = user.to_string()
+            _convert_type_to_value(c, user)
 
     return rules
 
 
+def _convert_type_to_value(rule_or_cond: Dict[str, Any], user: UserID) -> None:
+    for type_key in ("pattern", "value"):
+        type_value = rule_or_cond.pop(f"{type_key}_type", None)
+        if type_value == "user_id":
+            rule_or_cond[type_key] = user.to_string()
+        elif type_value == "user_localpart":
+            rule_or_cond[type_key] = user.localpart
+
+
 def _add_empty_priority_class_arrays(d: Dict[str, list]) -> Dict[str, list]:
     for pc in PRIORITY_CLASS_MAP.keys():
         d[pc] = []
diff --git a/tests/rest/client/test_push_rule_attrs.py b/tests/rest/client/test_push_rule_attrs.py
index 4f875b9289..5aca74475f 100644
--- a/tests/rest/client/test_push_rule_attrs.py
+++ b/tests/rest/client/test_push_rule_attrs.py
@@ -412,3 +412,70 @@ class PushRuleAttributesTestCase(HomeserverTestCase):
         )
         self.assertEqual(channel.code, 404)
         self.assertEqual(channel.json_body["errcode"], Codes.NOT_FOUND)
+
+    def test_contains_user_name(self) -> None:
+        """
+        Tests that `contains_user_name` rule is present and have proper value in `pattern`.
+        """
+        username = "bob"
+        self.register_user(username, "pass")
+        token = self.login(username, "pass")
+
+        channel = self.make_request(
+            "GET",
+            "/pushrules/global/content/.m.rule.contains_user_name",
+            access_token=token,
+        )
+
+        self.assertEqual(channel.code, 200)
+
+        self.assertEqual(
+            {
+                "rule_id": ".m.rule.contains_user_name",
+                "default": True,
+                "enabled": True,
+                "pattern": username,
+                "actions": [
+                    "notify",
+                    {"set_tweak": "highlight"},
+                    {"set_tweak": "sound", "value": "default"},
+                ],
+            },
+            channel.json_body,
+        )
+
+    def test_is_user_mention(self) -> None:
+        """
+        Tests that `is_user_mention` rule is present and have proper value in `value`.
+        """
+        user = self.register_user("bob", "pass")
+        token = self.login("bob", "pass")
+
+        channel = self.make_request(
+            "GET",
+            "/pushrules/global/override/.m.rule.is_user_mention",
+            access_token=token,
+        )
+
+        self.assertEqual(channel.code, 200)
+
+        self.assertEqual(
+            {
+                "rule_id": ".m.rule.is_user_mention",
+                "default": True,
+                "enabled": True,
+                "conditions": [
+                    {
+                        "kind": "event_property_contains",
+                        "key": "content.m\\.mentions.user_ids",
+                        "value": user,
+                    }
+                ],
+                "actions": [
+                    "notify",
+                    {"set_tweak": "highlight"},
+                    {"set_tweak": "sound", "value": "default"},
+                ],
+            },
+            channel.json_body,
+        )
-- 
cgit 1.5.1


From 2ac6c3bbb535677bd62b3df425dd1755dba79b66 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Fri, 16 Jun 2023 15:25:44 +0100
Subject: Don't always lock "user_ips" table when performing non-native upsert
 (#15788)

---
 changelog.d/15788.bugfix    | 1 +
 synapse/storage/database.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15788.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15788.bugfix b/changelog.d/15788.bugfix
new file mode 100644
index 0000000000..d22aae7baf
--- /dev/null
+++ b/changelog.d/15788.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.57.0 where the wrong table would be locked on updating database rows when using SQLite as the database backend.
\ No newline at end of file
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 10fa6c4802..7e49ae11bc 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1529,7 +1529,7 @@ class DatabasePool:
         # Lock the table just once, to prevent it being done once per row.
         # Note that, according to Postgres' documentation, once obtained,
         # the lock is held for the remainder of the current transaction.
-        self.engine.lock_table(txn, "user_ips")
+        self.engine.lock_table(txn, table)
 
         for keyv, valv in zip(key_values, value_values):
             _keys = dict(zip(key_names, keyv))
-- 
cgit 1.5.1


From 0f02f0b4da92229e88e27a92ea3bfa523457bfc1 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 16 Jun 2023 14:12:24 -0500
Subject: Remove experimental MSC2716 implementation to incrementally import
 history into existing rooms (#15748)

Context for why we're removing the implementation:

 - https://github.com/matrix-org/matrix-spec-proposals/pull/2716#issuecomment-1487441010
 - https://github.com/matrix-org/matrix-spec-proposals/pull/2716#issuecomment-1504262734

Anyone wanting to continue MSC2716, should also address these leftover tasks: https://github.com/matrix-org/synapse/issues/10737

Closes https://github.com/matrix-org/synapse/issues/10737 in the fact that it is not longer necessary to track those things.
---
 changelog.d/15748.removal                          |   1 +
 .../complement/conf/workers-shared-extra.yaml.j2   |   2 -
 docker/configure_workers_and_start.py              |   1 -
 docs/workers.md                                    |   1 -
 scripts-dev/complement.sh                          |   4 -
 synapse/api/constants.py                           |  14 -
 synapse/api/room_versions.py                       |  61 ---
 synapse/app/generic_worker.py                      |   2 -
 synapse/config/experimental.py                     |   3 -
 synapse/event_auth.py                              |  40 --
 synapse/events/__init__.py                         |   9 -
 synapse/events/utils.py                            |   9 -
 synapse/handlers/federation.py                     |  33 +-
 synapse/handlers/federation_event.py               | 109 -----
 synapse/handlers/message.py                        | 168 +-------
 synapse/handlers/room_batch.py                     | 466 ---------------------
 synapse/handlers/room_member.py                    |  54 +--
 synapse/push/bulk_push_rule_evaluator.py           |   1 -
 synapse/rest/__init__.py                           |   2 -
 synapse/rest/client/room_batch.py                  | 254 -----------
 synapse/rest/client/versions.py                    |   2 -
 synapse/server.py                                  |   5 -
 synapse/storage/databases/main/__init__.py         |   2 -
 synapse/storage/databases/main/event_federation.py | 211 +---------
 synapse/storage/databases/main/events.py           | 125 ------
 synapse/storage/databases/main/room_batch.py       |  47 ---
 tests/rest/client/test_room_batch.py               | 302 -------------
 tests/storage/test_event_federation.py             | 211 ----------
 28 files changed, 36 insertions(+), 2103 deletions(-)
 create mode 100644 changelog.d/15748.removal
 delete mode 100644 synapse/handlers/room_batch.py
 delete mode 100644 synapse/rest/client/room_batch.py
 delete mode 100644 synapse/storage/databases/main/room_batch.py
 delete mode 100644 tests/rest/client/test_room_batch.py

(limited to 'synapse')

diff --git a/changelog.d/15748.removal b/changelog.d/15748.removal
new file mode 100644
index 0000000000..dcb9780178
--- /dev/null
+++ b/changelog.d/15748.removal
@@ -0,0 +1 @@
+Remove experimental [MSC2716](https://github.com/matrix-org/matrix-spec-proposals/pull/2716) implementation to incrementally import history into existing rooms.
diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2
index 63acf86a46..2b11b487f6 100644
--- a/docker/complement/conf/workers-shared-extra.yaml.j2
+++ b/docker/complement/conf/workers-shared-extra.yaml.j2
@@ -92,8 +92,6 @@ allow_device_name_lookup_over_federation: true
 ## Experimental Features ##
 
 experimental_features:
-  # Enable history backfilling support
-  msc2716_enabled: true
   # client-side support for partial state in /send_join responses
   faster_joins: true
   # Enable support for polls
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 87a740e3d4..62fb88daab 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -244,7 +244,6 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/(api/v1|r0|v3|unstable)/join/",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/knock/",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/profile/",
-            "^/_matrix/client/(v1|unstable/org.matrix.msc2716)/rooms/.*/batch_send",
         ],
         "shared_extra_conf": {},
         "worker_extra_conf": "",
diff --git a/docs/workers.md b/docs/workers.md
index 991814c0bc..735128762a 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -232,7 +232,6 @@ information.
     ^/_matrix/client/v1/rooms/.*/hierarchy$
     ^/_matrix/client/(v1|unstable)/rooms/.*/relations/
     ^/_matrix/client/v1/rooms/.*/threads$
-    ^/_matrix/client/unstable/org.matrix.msc2716/rooms/.*/batch_send$
     ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$
     ^/_matrix/client/(r0|v3|unstable)/account/3pid$
     ^/_matrix/client/(r0|v3|unstable)/account/whoami$
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 131f26234e..24b83cfeb6 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -246,10 +246,6 @@ else
   else
     export PASS_SYNAPSE_COMPLEMENT_DATABASE=sqlite
   fi
-
-  # The tests for importing historical messages (MSC2716)
-  # only pass with monoliths, currently.
-  test_tags="$test_tags,msc2716"
 fi
 
 if [[ -n "$ASYNCIO_REACTOR" ]]; then
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index faf0770c66..dc32553d0c 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -123,10 +123,6 @@ class EventTypes:
     SpaceChild: Final = "m.space.child"
     SpaceParent: Final = "m.space.parent"
 
-    MSC2716_INSERTION: Final = "org.matrix.msc2716.insertion"
-    MSC2716_BATCH: Final = "org.matrix.msc2716.batch"
-    MSC2716_MARKER: Final = "org.matrix.msc2716.marker"
-
     Reaction: Final = "m.reaction"
 
 
@@ -222,16 +218,6 @@ class EventContentFields:
     # Used in m.room.guest_access events.
     GUEST_ACCESS: Final = "guest_access"
 
-    # Used on normal messages to indicate they were historically imported after the fact
-    MSC2716_HISTORICAL: Final = "org.matrix.msc2716.historical"
-    # For "insertion" events to indicate what the next batch ID should be in
-    # order to connect to it
-    MSC2716_NEXT_BATCH_ID: Final = "next_batch_id"
-    # Used on "batch" events to indicate which insertion event it connects to
-    MSC2716_BATCH_ID: Final = "batch_id"
-    # For "marker" events
-    MSC2716_INSERTION_EVENT_REFERENCE: Final = "insertion_event_reference"
-
     # The authorising user for joining a restricted room.
     AUTHORISING_USER: Final = "join_authorised_via_users_server"
 
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index c5c71e242f..25c105a4c8 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -91,11 +91,6 @@ class RoomVersion:
     # MSC2403: Allows join_rules to be set to 'knock', changes auth rules to allow sending
     # m.room.membership event with membership 'knock'.
     msc2403_knocking: bool
-    # MSC2716: Adds m.room.power_levels -> content.historical field to control
-    # whether "insertion", "chunk", "marker" events can be sent
-    msc2716_historical: bool
-    # MSC2716: Adds support for redacting "insertion", "chunk", and "marker" events
-    msc2716_redactions: bool
     # MSC3389: Protect relation information from redaction.
     msc3389_relation_redactions: bool
     # MSC3787: Adds support for a `knock_restricted` join rule, mixing concepts of
@@ -130,8 +125,6 @@ class RoomVersions:
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
         msc2403_knocking=False,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -153,8 +146,6 @@ class RoomVersions:
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
         msc2403_knocking=False,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -176,8 +167,6 @@ class RoomVersions:
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
         msc2403_knocking=False,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -199,8 +188,6 @@ class RoomVersions:
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
         msc2403_knocking=False,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -222,8 +209,6 @@ class RoomVersions:
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
         msc2403_knocking=False,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -245,8 +230,6 @@ class RoomVersions:
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
         msc2403_knocking=False,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -268,8 +251,6 @@ class RoomVersions:
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
         msc2403_knocking=False,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -291,8 +272,6 @@ class RoomVersions:
         msc3083_join_rules=False,
         msc3375_redaction_rules=False,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -314,8 +293,6 @@ class RoomVersions:
         msc3083_join_rules=True,
         msc3375_redaction_rules=False,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -337,8 +314,6 @@ class RoomVersions:
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -360,8 +335,6 @@ class RoomVersions:
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=False,
@@ -383,8 +356,6 @@ class RoomVersions:
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=False,
         msc3667_int_only_power_levels=False,
@@ -406,8 +377,6 @@ class RoomVersions:
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
@@ -415,29 +384,6 @@ class RoomVersions:
         msc3931_push_features=(),
         msc3989_redaction_rules=False,
     )
-    MSC2716v4 = RoomVersion(
-        "org.matrix.msc2716v4",
-        RoomDisposition.UNSTABLE,
-        EventFormatVersions.ROOM_V4_PLUS,
-        StateResolutionVersions.V2,
-        enforce_key_validity=True,
-        special_case_aliases_auth=False,
-        strict_canonicaljson=True,
-        limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=True,
-        msc2716_historical=True,
-        msc2716_redactions=True,
-        msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
-        msc3931_push_features=(),
-        msc3989_redaction_rules=False,
-    )
     MSC1767v10 = RoomVersion(
         # MSC1767 (Extensible Events) based on room version "10"
         "org.matrix.msc1767.10",
@@ -453,8 +399,6 @@ class RoomVersions:
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
@@ -476,8 +420,6 @@ class RoomVersions:
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
@@ -500,8 +442,6 @@ class RoomVersions:
         msc3083_join_rules=True,
         msc3375_redaction_rules=True,
         msc2403_knocking=True,
-        msc2716_historical=False,
-        msc2716_redactions=False,
         msc3389_relation_redactions=False,
         msc3787_knock_restricted_join_rule=True,
         msc3667_int_only_power_levels=True,
@@ -526,7 +466,6 @@ KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
         RoomVersions.V9,
         RoomVersions.MSC3787,
         RoomVersions.V10,
-        RoomVersions.MSC2716v4,
         RoomVersions.MSC3989,
         RoomVersions.MSC3820opt2,
     )
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 909ebccf78..7406c3948c 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -83,7 +83,6 @@ from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
 from synapse.storage.databases.main.registration import RegistrationWorkerStore
 from synapse.storage.databases.main.relations import RelationsWorkerStore
 from synapse.storage.databases.main.room import RoomWorkerStore
-from synapse.storage.databases.main.room_batch import RoomBatchStore
 from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
 from synapse.storage.databases.main.search import SearchStore
 from synapse.storage.databases.main.session import SessionStore
@@ -120,7 +119,6 @@ class GenericWorkerStore(
     # the races it creates aren't too bad.
     KeyStore,
     RoomWorkerStore,
-    RoomBatchStore,
     DirectoryWorkerStore,
     PushRulesWorkerStore,
     ApplicationServiceTransactionWorkerStore,
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 1d5b5ded45..8e0f5356b4 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -247,9 +247,6 @@ class ExperimentalConfig(Config):
         # MSC3026 (busy presence state)
         self.msc3026_enabled: bool = experimental.get("msc3026_enabled", False)
 
-        # MSC2716 (importing historical messages)
-        self.msc2716_enabled: bool = experimental.get("msc2716_enabled", False)
-
         # MSC3244 (room version capabilities)
         self.msc3244_enabled: bool = experimental.get("msc3244_enabled", True)
 
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index b4b43ec4d7..3aaf53dfbd 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -339,13 +339,6 @@ def check_state_dependent_auth_rules(
     if event.type == EventTypes.Redaction:
         check_redaction(event.room_version, event, auth_dict)
 
-    if (
-        event.type == EventTypes.MSC2716_INSERTION
-        or event.type == EventTypes.MSC2716_BATCH
-        or event.type == EventTypes.MSC2716_MARKER
-    ):
-        check_historical(event.room_version, event, auth_dict)
-
     logger.debug("Allowing! %s", event)
 
 
@@ -365,7 +358,6 @@ LENIENT_EVENT_BYTE_LIMITS_ROOM_VERSIONS = {
     RoomVersions.V9,
     RoomVersions.MSC3787,
     RoomVersions.V10,
-    RoomVersions.MSC2716v4,
     RoomVersions.MSC1767v10,
 }
 
@@ -823,38 +815,6 @@ def check_redaction(
     raise AuthError(403, "You don't have permission to redact events")
 
 
-def check_historical(
-    room_version_obj: RoomVersion,
-    event: "EventBase",
-    auth_events: StateMap["EventBase"],
-) -> None:
-    """Check whether the event sender is allowed to send historical related
-    events like "insertion", "batch", and "marker".
-
-    Returns:
-        None
-
-    Raises:
-        AuthError if the event sender is not allowed to send historical related events
-        ("insertion", "batch", and "marker").
-    """
-    # Ignore the auth checks in room versions that do not support historical
-    # events
-    if not room_version_obj.msc2716_historical:
-        return
-
-    user_level = get_user_power_level(event.user_id, auth_events)
-
-    historical_level = get_named_level(auth_events, "historical", 100)
-
-    if user_level < historical_level:
-        raise UnstableSpecAuthError(
-            403,
-            'You don\'t have permission to send send historical related events ("insertion", "batch", and "marker")',
-            errcode=Codes.INSUFFICIENT_POWER,
-        )
-
-
 def _check_power_levels(
     room_version_obj: RoomVersion,
     event: "EventBase",
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index de7e5be42b..75b62adb33 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -198,7 +198,6 @@ class _EventInternalMetadata:
     soft_failed: DictProperty[bool] = DictProperty("soft_failed")
     proactively_send: DictProperty[bool] = DictProperty("proactively_send")
     redacted: DictProperty[bool] = DictProperty("redacted")
-    historical: DictProperty[bool] = DictProperty("historical")
 
     txn_id: DictProperty[str] = DictProperty("txn_id")
     """The transaction ID, if it was set when the event was created."""
@@ -288,14 +287,6 @@ class _EventInternalMetadata:
         """
         return self._dict.get("redacted", False)
 
-    def is_historical(self) -> bool:
-        """Whether this is a historical message.
-        This is used by the batchsend historical message endpoint and
-        is needed to and mark the event as backfilled and skip some checks
-        like push notifications.
-        """
-        return self._dict.get("historical", False)
-
     def is_notifiable(self) -> bool:
         """Whether this event can trigger a push notification"""
         return not self.is_outlier() or self.is_out_of_band_membership()
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index e7b7b78b84..a55efcca56 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -164,21 +164,12 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
         if room_version.msc2176_redaction_rules:
             add_fields("invite")
 
-        if room_version.msc2716_historical:
-            add_fields("historical")
-
     elif event_type == EventTypes.Aliases and room_version.special_case_aliases_auth:
         add_fields("aliases")
     elif event_type == EventTypes.RoomHistoryVisibility:
         add_fields("history_visibility")
     elif event_type == EventTypes.Redaction and room_version.msc2176_redaction_rules:
         add_fields("redacts")
-    elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_INSERTION:
-        add_fields(EventContentFields.MSC2716_NEXT_BATCH_ID)
-    elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_BATCH:
-        add_fields(EventContentFields.MSC2716_BATCH_ID)
-    elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER:
-        add_fields(EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE)
 
     # Protect the rel_type and event_id fields under the m.relates_to field.
     if room_version.msc3389_relation_redactions:
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index b7b5e21020..cc5ed97730 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -105,14 +105,12 @@ backfill_processing_before_timer = Histogram(
 )
 
 
+# TODO: We can refactor this away now that there is only one backfill point again
 class _BackfillPointType(Enum):
     # a regular backwards extremity (ie, an event which we don't yet have, but which
     # is referred to by other events in the DAG)
     BACKWARDS_EXTREMITY = enum.auto()
 
-    # an MSC2716 "insertion event"
-    INSERTION_PONT = enum.auto()
-
 
 @attr.s(slots=True, auto_attribs=True, frozen=True)
 class _BackfillPoint:
@@ -273,32 +271,10 @@ class FederationHandler:
             )
         ]
 
-        insertion_events_to_be_backfilled: List[_BackfillPoint] = []
-        if self.hs.config.experimental.msc2716_enabled:
-            insertion_events_to_be_backfilled = [
-                _BackfillPoint(event_id, depth, _BackfillPointType.INSERTION_PONT)
-                for event_id, depth in await self.store.get_insertion_event_backward_extremities_in_room(
-                    room_id=room_id,
-                    current_depth=current_depth,
-                    # We only need to end up with 5 extremities combined with
-                    # the backfill points to make the `/backfill` request ...
-                    # (see the other comment above for more context).
-                    limit=50,
-                )
-            ]
-        logger.debug(
-            "_maybe_backfill_inner: backwards_extremities=%s insertion_events_to_be_backfilled=%s",
-            backwards_extremities,
-            insertion_events_to_be_backfilled,
-        )
-
         # we now have a list of potential places to backpaginate from. We prefer to
         # start with the most recent (ie, max depth), so let's sort the list.
         sorted_backfill_points: List[_BackfillPoint] = sorted(
-            itertools.chain(
-                backwards_extremities,
-                insertion_events_to_be_backfilled,
-            ),
+            backwards_extremities,
             key=lambda e: -int(e.depth),
         )
 
@@ -411,10 +387,7 @@ class FederationHandler:
             #   event but not anything before it. This would require looking at the
             #   state *before* the event, ignoring the special casing certain event
             #   types have.
-            if bp.type == _BackfillPointType.INSERTION_PONT:
-                event_ids_to_check = [bp.event_id]
-            else:
-                event_ids_to_check = await self.store.get_successor_events(bp.event_id)
+            event_ids_to_check = await self.store.get_successor_events(bp.event_id)
 
             events_to_check = await self.store.get_events_as_list(
                 event_ids_to_check,
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 42141d3670..d32d224d56 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -601,18 +601,6 @@ class FederationEventHandler:
                 room_id, [(event, context)]
             )
 
-            # If we're joining the room again, check if there is new marker
-            # state indicating that there is new history imported somewhere in
-            # the DAG. Multiple markers can exist in the current state with
-            # unique state_keys.
-            #
-            # Do this after the state from the remote join was persisted (via
-            # `persist_events_and_notify`). Otherwise we can run into a
-            # situation where the create event doesn't exist yet in the
-            # `current_state_events`
-            for e in state:
-                await self._handle_marker_event(origin, e)
-
             return stream_id_after_persist
 
     async def update_state_for_partial_state_event(
@@ -915,13 +903,6 @@ class FederationEventHandler:
             )
         )
 
-        # We construct the event lists in source order from `/backfill` response because
-        # it's a) easiest, but also b) the order in which we process things matters for
-        # MSC2716 historical batches because many historical events are all at the same
-        # `depth` and we rely on the tenuous sort that the other server gave us and hope
-        # they're doing their best. The brittle nature of this ordering for historical
-        # messages over federation is one of the reasons why we don't want to continue
-        # on MSC2716 until we have online topological ordering.
         events_with_failed_pull_attempts, fresh_events = partition(
             new_events, lambda e: e.event_id in event_ids_with_failed_pull_attempts
         )
@@ -1460,8 +1441,6 @@ class FederationEventHandler:
 
         await self._run_push_actions_and_persist_event(event, context, backfilled)
 
-        await self._handle_marker_event(origin, event)
-
         if backfilled or context.rejected:
             return
 
@@ -1559,94 +1538,6 @@ class FederationEventHandler:
         except Exception:
             logger.exception("Failed to resync device for %s", sender)
 
-    @trace
-    async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> None:
-        """Handles backfilling the insertion event when we receive a marker
-        event that points to one.
-
-        Args:
-            origin: Origin of the event. Will be called to get the insertion event
-            marker_event: The event to process
-        """
-
-        if marker_event.type != EventTypes.MSC2716_MARKER:
-            # Not a marker event
-            return
-
-        if marker_event.rejected_reason is not None:
-            # Rejected event
-            return
-
-        # Skip processing a marker event if the room version doesn't
-        # support it or the event is not from the room creator.
-        room_version = await self._store.get_room_version(marker_event.room_id)
-        create_event = await self._store.get_create_event_for_room(marker_event.room_id)
-        if not room_version.msc2175_implicit_room_creator:
-            room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
-        else:
-            room_creator = create_event.sender
-        if not room_version.msc2716_historical and (
-            not self._config.experimental.msc2716_enabled
-            or marker_event.sender != room_creator
-        ):
-            return
-
-        logger.debug("_handle_marker_event: received %s", marker_event)
-
-        insertion_event_id = marker_event.content.get(
-            EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE
-        )
-
-        if insertion_event_id is None:
-            # Nothing to retrieve then (invalid marker)
-            return
-
-        already_seen_insertion_event = await self._store.have_seen_event(
-            marker_event.room_id, insertion_event_id
-        )
-        if already_seen_insertion_event:
-            # No need to process a marker again if we have already seen the
-            # insertion event that it was pointing to
-            return
-
-        logger.debug(
-            "_handle_marker_event: backfilling insertion event %s", insertion_event_id
-        )
-
-        await self._get_events_and_persist(
-            origin,
-            marker_event.room_id,
-            [insertion_event_id],
-        )
-
-        insertion_event = await self._store.get_event(
-            insertion_event_id, allow_none=True
-        )
-        if insertion_event is None:
-            logger.warning(
-                "_handle_marker_event: server %s didn't return insertion event %s for marker %s",
-                origin,
-                insertion_event_id,
-                marker_event.event_id,
-            )
-            return
-
-        logger.debug(
-            "_handle_marker_event: succesfully backfilled insertion event %s from marker event %s",
-            insertion_event,
-            marker_event,
-        )
-
-        await self._store.insert_insertion_extremity(
-            insertion_event_id, marker_event.room_id
-        )
-
-        logger.debug(
-            "_handle_marker_event: insertion extremity added for %s from marker event %s",
-            insertion_event,
-            marker_event,
-        )
-
     async def backfill_event_id(
         self, destinations: List[str], room_id: str, event_id: str
     ) -> PulledPduInfo:
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 0b61c2272b..4292b47037 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -60,7 +60,6 @@ from synapse.replication.http.send_event import ReplicationSendEventRestServlet
 from synapse.replication.http.send_events import ReplicationSendEventsRestServlet
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.types import (
-    MutableStateMap,
     PersistedEventPosition,
     Requester,
     RoomAlias,
@@ -573,7 +572,6 @@ class EventCreationHandler:
         state_event_ids: Optional[List[str]] = None,
         require_consent: bool = True,
         outlier: bool = False,
-        historical: bool = False,
         depth: Optional[int] = None,
         state_map: Optional[StateMap[str]] = None,
         for_batch: bool = False,
@@ -599,7 +597,7 @@ class EventCreationHandler:
             allow_no_prev_events: Whether to allow this event to be created an empty
                 list of prev_events. Normally this is prohibited just because most
                 events should have a prev_event and we should only use this in special
-                cases like MSC2716.
+                cases (previously useful for MSC2716).
             prev_event_ids:
                 the forward extremities to use as the prev_events for the
                 new event.
@@ -614,13 +612,10 @@ class EventCreationHandler:
                 If non-None, prev_event_ids must also be provided.
 
             state_event_ids:
-                The full state at a given event. This is used particularly by the MSC2716
-                /batch_send endpoint. One use case is with insertion events which float at
-                the beginning of a historical batch and don't have any `prev_events` to
-                derive from; we add all of these state events as the explicit state so the
-                rest of the historical batch can inherit the same state and state_group.
-                This should normally be left as None, which will cause the auth_event_ids
-                to be calculated based on the room state at the prev_events.
+                The full state at a given event. This was previously used particularly
+                by the MSC2716 /batch_send endpoint. This should normally be left as
+                None, which will cause the auth_event_ids to be calculated based on the
+                room state at the prev_events.
 
             require_consent: Whether to check if the requester has
                 consented to the privacy policy.
@@ -629,10 +624,6 @@ class EventCreationHandler:
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
 
-            historical: Indicates whether the message is being inserted
-                back in time around some existing events. This is used to skip
-                a few checks and mark the event as backfilled.
-
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -717,8 +708,6 @@ class EventCreationHandler:
 
         builder.internal_metadata.outlier = outlier
 
-        builder.internal_metadata.historical = historical
-
         event, unpersisted_context = await self.create_new_client_event(
             builder=builder,
             requester=requester,
@@ -947,7 +936,6 @@ class EventCreationHandler:
         txn_id: Optional[str] = None,
         ignore_shadow_ban: bool = False,
         outlier: bool = False,
-        historical: bool = False,
         depth: Optional[int] = None,
     ) -> Tuple[EventBase, int]:
         """
@@ -961,19 +949,16 @@ class EventCreationHandler:
             allow_no_prev_events: Whether to allow this event to be created an empty
                 list of prev_events. Normally this is prohibited just because most
                 events should have a prev_event and we should only use this in special
-                cases like MSC2716.
+                cases (previously useful for MSC2716).
             prev_event_ids:
                 The event IDs to use as the prev events.
                 Should normally be left as None to automatically request them
                 from the database.
             state_event_ids:
-                The full state at a given event. This is used particularly by the MSC2716
-                /batch_send endpoint. One use case is with insertion events which float at
-                the beginning of a historical batch and don't have any `prev_events` to
-                derive from; we add all of these state events as the explicit state so the
-                rest of the historical batch can inherit the same state and state_group.
-                This should normally be left as None, which will cause the auth_event_ids
-                to be calculated based on the room state at the prev_events.
+                The full state at a given event. This was previously used particularly
+                by the MSC2716 /batch_send endpoint. This should normally be left as
+                None, which will cause the auth_event_ids to be calculated based on the
+                room state at the prev_events.
             ratelimit: Whether to rate limit this send.
             txn_id: The transaction ID.
             ignore_shadow_ban: True if shadow-banned users should be allowed to
@@ -981,9 +966,6 @@ class EventCreationHandler:
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
-            historical: Indicates whether the message is being inserted
-                back in time around some existing events. This is used to skip
-                a few checks and mark the event as backfilled.
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -1053,7 +1035,6 @@ class EventCreationHandler:
                     prev_event_ids=prev_event_ids,
                     state_event_ids=state_event_ids,
                     outlier=outlier,
-                    historical=historical,
                     depth=depth,
                 )
                 context = await unpersisted_context.persist(event)
@@ -1145,7 +1126,7 @@ class EventCreationHandler:
             allow_no_prev_events: Whether to allow this event to be created an empty
                 list of prev_events. Normally this is prohibited just because most
                 events should have a prev_event and we should only use this in special
-                cases like MSC2716.
+                cases (previously useful for MSC2716).
             prev_event_ids:
                 the forward extremities to use as the prev_events for the
                 new event.
@@ -1158,13 +1139,10 @@ class EventCreationHandler:
                 based on the room state at the prev_events.
 
             state_event_ids:
-                The full state at a given event. This is used particularly by the MSC2716
-                /batch_send endpoint. One use case is with insertion events which float at
-                the beginning of a historical batch and don't have any `prev_events` to
-                derive from; we add all of these state events as the explicit state so the
-                rest of the historical batch can inherit the same state and state_group.
-                This should normally be left as None, which will cause the auth_event_ids
-                to be calculated based on the room state at the prev_events.
+                The full state at a given event. This was previously used particularly
+                by the MSC2716 /batch_send endpoint. This should normally be left as
+                None, which will cause the auth_event_ids to be calculated based on the
+                room state at the prev_events.
 
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
@@ -1261,52 +1239,6 @@ class EventCreationHandler:
             if builder.internal_metadata.outlier:
                 event.internal_metadata.outlier = True
                 context = EventContext.for_outlier(self._storage_controllers)
-            elif (
-                event.type == EventTypes.MSC2716_INSERTION
-                and state_event_ids
-                and builder.internal_metadata.is_historical()
-            ):
-                # Add explicit state to the insertion event so it has state to derive
-                # from even though it's floating with no `prev_events`. The rest of
-                # the batch can derive from this state and state_group.
-                #
-                # TODO(faster_joins): figure out how this works, and make sure that the
-                #   old state is complete.
-                #   https://github.com/matrix-org/synapse/issues/13003
-                metadata = await self.store.get_metadata_for_events(state_event_ids)
-
-                state_map_for_event: MutableStateMap[str] = {}
-                for state_id in state_event_ids:
-                    data = metadata.get(state_id)
-                    if data is None:
-                        # We're trying to persist a new historical batch of events
-                        # with the given state, e.g. via
-                        # `RoomBatchSendEventRestServlet`. The state can be inferred
-                        # by Synapse or set directly by the client.
-                        #
-                        # Either way, we should have persisted all the state before
-                        # getting here.
-                        raise Exception(
-                            f"State event {state_id} not found in DB,"
-                            " Synapse should have persisted it before using it."
-                        )
-
-                    if data.state_key is None:
-                        raise Exception(
-                            f"Trying to set non-state event {state_id} as state"
-                        )
-
-                    state_map_for_event[(data.event_type, data.state_key)] = state_id
-
-                # TODO(faster_joins): check how MSC2716 works and whether we can have
-                #   partial state here
-                #   https://github.com/matrix-org/synapse/issues/13003
-                context = await self.state.calculate_context_info(
-                    event,
-                    state_ids_before_event=state_map_for_event,
-                    partial_state=False,
-                )
-
             else:
                 context = await self.state.calculate_context_info(event)
 
@@ -1876,28 +1808,6 @@ class EventCreationHandler:
                             403, "Redacting server ACL events is not permitted"
                         )
 
-                    # Add a little safety stop-gap to prevent people from trying to
-                    # redact MSC2716 related events when they're in a room version
-                    # which does not support it yet. We allow people to use MSC2716
-                    # events in existing room versions but only from the room
-                    # creator since it does not require any changes to the auth
-                    # rules and in effect, the redaction algorithm . In the
-                    # supported room version, we add the `historical` power level to
-                    # auth the MSC2716 related events and adjust the redaction
-                    # algorthim to keep the `historical` field around (redacting an
-                    # event should only strip fields which don't affect the
-                    # structural protocol level).
-                    is_msc2716_event = (
-                        original_event.type == EventTypes.MSC2716_INSERTION
-                        or original_event.type == EventTypes.MSC2716_BATCH
-                        or original_event.type == EventTypes.MSC2716_MARKER
-                    )
-                    if not room_version_obj.msc2716_historical and is_msc2716_event:
-                        raise AuthError(
-                            403,
-                            "Redacting MSC2716 events is not supported in this room version",
-                        )
-
                 event_types = event_auth.auth_types_for_event(event.room_version, event)
                 prev_state_ids = await context.get_prev_state_ids(
                     StateFilter.from_types(event_types)
@@ -1935,58 +1845,12 @@ class EventCreationHandler:
                 if prev_state_ids:
                     raise AuthError(403, "Changing the room create event is forbidden")
 
-            if event.type == EventTypes.MSC2716_INSERTION:
-                room_version = await self.store.get_room_version_id(event.room_id)
-                room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
-
-                create_event = await self.store.get_create_event_for_room(event.room_id)
-                if not room_version_obj.msc2175_implicit_room_creator:
-                    room_creator = create_event.content.get(
-                        EventContentFields.ROOM_CREATOR
-                    )
-                else:
-                    room_creator = create_event.sender
-
-                # Only check an insertion event if the room version
-                # supports it or the event is from the room creator.
-                if room_version_obj.msc2716_historical or (
-                    self.config.experimental.msc2716_enabled
-                    and event.sender == room_creator
-                ):
-                    next_batch_id = event.content.get(
-                        EventContentFields.MSC2716_NEXT_BATCH_ID
-                    )
-                    conflicting_insertion_event_id = None
-                    if next_batch_id:
-                        conflicting_insertion_event_id = (
-                            await self.store.get_insertion_event_id_by_batch_id(
-                                event.room_id, next_batch_id
-                            )
-                        )
-                    if conflicting_insertion_event_id is not None:
-                        # The current insertion event that we're processing is invalid
-                        # because an insertion event already exists in the room with the
-                        # same next_batch_id. We can't allow multiple because the batch
-                        # pointing will get weird, e.g. we can't determine which insertion
-                        # event the batch event is pointing to.
-                        raise SynapseError(
-                            HTTPStatus.BAD_REQUEST,
-                            "Another insertion event already exists with the same next_batch_id",
-                            errcode=Codes.INVALID_PARAM,
-                        )
-
-            # Mark any `m.historical` messages as backfilled so they don't appear
-            # in `/sync` and have the proper decrementing `stream_ordering` as we import
-            backfilled = False
-            if event.internal_metadata.is_historical():
-                backfilled = True
-
         assert self._storage_controllers.persistence is not None
         (
             persisted_events,
             max_stream_token,
         ) = await self._storage_controllers.persistence.persist_events(
-            events_and_context, backfilled=backfilled
+            events_and_context,
         )
 
         events_and_pos = []
diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py
deleted file mode 100644
index bf9df60218..0000000000
--- a/synapse/handlers/room_batch.py
+++ /dev/null
@@ -1,466 +0,0 @@
-import logging
-from typing import TYPE_CHECKING, List, Tuple
-
-from synapse.api.constants import EventContentFields, EventTypes
-from synapse.appservice import ApplicationService
-from synapse.http.servlet import assert_params_in_dict
-from synapse.types import JsonDict, Requester, UserID, create_requester
-from synapse.util.stringutils import random_string
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-class RoomBatchHandler:
-    def __init__(self, hs: "HomeServer"):
-        self.hs = hs
-        self.store = hs.get_datastores().main
-        self._state_storage_controller = hs.get_storage_controllers().state
-        self.event_creation_handler = hs.get_event_creation_handler()
-        self.room_member_handler = hs.get_room_member_handler()
-        self.auth = hs.get_auth()
-
-    async def inherit_depth_from_prev_ids(self, prev_event_ids: List[str]) -> int:
-        """Finds the depth which would sort it after the most-recent
-        prev_event_id but before the successors of those events. If no
-        successors are found, we assume it's an historical extremity part of the
-        current batch and use the same depth of the prev_event_ids.
-
-        Args:
-            prev_event_ids: List of prev event IDs
-
-        Returns:
-            Inherited depth
-        """
-        (
-            most_recent_prev_event_id,
-            most_recent_prev_event_depth,
-        ) = await self.store.get_max_depth_of(prev_event_ids)
-
-        # We want to insert the historical event after the `prev_event` but before the successor event
-        #
-        # We inherit depth from the successor event instead of the `prev_event`
-        # because events returned from `/messages` are first sorted by `topological_ordering`
-        # which is just the `depth` and then tie-break with `stream_ordering`.
-        #
-        # We mark these inserted historical events as "backfilled" which gives them a
-        # negative `stream_ordering`. If we use the same depth as the `prev_event`,
-        # then our historical event will tie-break and be sorted before the `prev_event`
-        # when it should come after.
-        #
-        # We want to use the successor event depth so they appear after `prev_event` because
-        # it has a larger `depth` but before the successor event because the `stream_ordering`
-        # is negative before the successor event.
-        assert most_recent_prev_event_id is not None
-        successor_event_ids = await self.store.get_successor_events(
-            most_recent_prev_event_id
-        )
-
-        # If we can't find any successor events, then it's a forward extremity of
-        # historical messages and we can just inherit from the previous historical
-        # event which we can already assume has the correct depth where we want
-        # to insert into.
-        if not successor_event_ids:
-            depth = most_recent_prev_event_depth
-        else:
-            (
-                _,
-                oldest_successor_depth,
-            ) = await self.store.get_min_depth_of(successor_event_ids)
-
-            depth = oldest_successor_depth
-
-        return depth
-
-    def create_insertion_event_dict(
-        self, sender: str, room_id: str, origin_server_ts: int
-    ) -> JsonDict:
-        """Creates an event dict for an "insertion" event with the proper fields
-        and a random batch ID.
-
-        Args:
-            sender: The event author MXID
-            room_id: The room ID that the event belongs to
-            origin_server_ts: Timestamp when the event was sent
-
-        Returns:
-            The new event dictionary to insert.
-        """
-
-        next_batch_id = random_string(8)
-        insertion_event = {
-            "type": EventTypes.MSC2716_INSERTION,
-            "sender": sender,
-            "room_id": room_id,
-            "content": {
-                EventContentFields.MSC2716_NEXT_BATCH_ID: next_batch_id,
-                EventContentFields.MSC2716_HISTORICAL: True,
-            },
-            "origin_server_ts": origin_server_ts,
-        }
-
-        return insertion_event
-
-    async def create_requester_for_user_id_from_app_service(
-        self, user_id: str, app_service: ApplicationService
-    ) -> Requester:
-        """Creates a new requester for the given user_id
-        and validates that the app service is allowed to control
-        the given user.
-
-        Args:
-            user_id: The author MXID that the app service is controlling
-            app_service: The app service that controls the user
-
-        Returns:
-            Requester object
-        """
-
-        await self.auth.validate_appservice_can_control_user_id(app_service, user_id)
-
-        return create_requester(user_id, app_service=app_service)
-
-    async def get_most_recent_full_state_ids_from_event_id_list(
-        self, event_ids: List[str]
-    ) -> List[str]:
-        """Find the most recent event_id and grab the full state at that event.
-        We will use this as a base to auth our historical messages against.
-
-        Args:
-            event_ids: List of event ID's to look at
-
-        Returns:
-            List of event ID's
-        """
-
-        (
-            most_recent_event_id,
-            _,
-        ) = await self.store.get_max_depth_of(event_ids)
-        # mapping from (type, state_key) -> state_event_id
-        assert most_recent_event_id is not None
-        prev_state_map = await self._state_storage_controller.get_state_ids_for_event(
-            most_recent_event_id
-        )
-        # List of state event ID's
-        full_state_ids = list(prev_state_map.values())
-
-        return full_state_ids
-
-    async def persist_state_events_at_start(
-        self,
-        state_events_at_start: List[JsonDict],
-        room_id: str,
-        initial_state_event_ids: List[str],
-        app_service_requester: Requester,
-    ) -> List[str]:
-        """Takes all `state_events_at_start` event dictionaries and creates/persists
-        them in a floating state event chain which don't resolve into the current room
-        state. They are floating because they reference no prev_events which disconnects
-        them from the normal DAG.
-
-        Args:
-            state_events_at_start:
-            room_id: Room where you want the events persisted in.
-            initial_state_event_ids:
-                The base set of state for the historical batch which the floating
-                state chain will derive from. This should probably be the state
-                from the `prev_event` defined by `/batch_send?prev_event_id=$abc`.
-            app_service_requester: The requester of an application service.
-
-        Returns:
-            List of state event ID's we just persisted
-        """
-        assert app_service_requester.app_service
-
-        state_event_ids_at_start = []
-        state_event_ids = initial_state_event_ids.copy()
-
-        # Make the state events float off on their own by specifying no
-        # prev_events for the first one in the chain so we don't have a bunch of
-        # `@mxid joined the room` noise between each batch.
-        prev_event_ids_for_state_chain: List[str] = []
-
-        for index, state_event in enumerate(state_events_at_start):
-            assert_params_in_dict(
-                state_event, ["type", "origin_server_ts", "content", "sender"]
-            )
-
-            logger.debug(
-                "RoomBatchSendEventRestServlet inserting state_event=%s", state_event
-            )
-
-            event_dict = {
-                "type": state_event["type"],
-                "origin_server_ts": state_event["origin_server_ts"],
-                "content": state_event["content"],
-                "room_id": room_id,
-                "sender": state_event["sender"],
-                "state_key": state_event["state_key"],
-            }
-
-            # Mark all events as historical
-            event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True
-
-            # TODO: This is pretty much the same as some other code to handle inserting state in this file
-            if event_dict["type"] == EventTypes.Member:
-                membership = event_dict["content"].get("membership", None)
-                event_id, _ = await self.room_member_handler.update_membership(
-                    await self.create_requester_for_user_id_from_app_service(
-                        state_event["sender"], app_service_requester.app_service
-                    ),
-                    target=UserID.from_string(event_dict["state_key"]),
-                    room_id=room_id,
-                    action=membership,
-                    content=event_dict["content"],
-                    historical=True,
-                    # Only the first event in the state chain should be floating.
-                    # The rest should hang off each other in a chain.
-                    allow_no_prev_events=index == 0,
-                    prev_event_ids=prev_event_ids_for_state_chain,
-                    # The first event in the state chain is floating with no
-                    # `prev_events` which means it can't derive state from
-                    # anywhere automatically. So we need to set some state
-                    # explicitly.
-                    #
-                    # Make sure to use a copy of this list because we modify it
-                    # later in the loop here. Otherwise it will be the same
-                    # reference and also update in the event when we append
-                    # later.
-                    state_event_ids=state_event_ids.copy(),
-                )
-            else:
-                (
-                    event,
-                    _,
-                ) = await self.event_creation_handler.create_and_send_nonmember_event(
-                    await self.create_requester_for_user_id_from_app_service(
-                        state_event["sender"], app_service_requester.app_service
-                    ),
-                    event_dict,
-                    historical=True,
-                    # Only the first event in the state chain should be floating.
-                    # The rest should hang off each other in a chain.
-                    allow_no_prev_events=index == 0,
-                    prev_event_ids=prev_event_ids_for_state_chain,
-                    # The first event in the state chain is floating with no
-                    # `prev_events` which means it can't derive state from
-                    # anywhere automatically. So we need to set some state
-                    # explicitly.
-                    #
-                    # Make sure to use a copy of this list because we modify it
-                    # later in the loop here. Otherwise it will be the same
-                    # reference and also update in the event when we append later.
-                    state_event_ids=state_event_ids.copy(),
-                )
-                event_id = event.event_id
-
-            state_event_ids_at_start.append(event_id)
-            state_event_ids.append(event_id)
-            # Connect all the state in a floating chain
-            prev_event_ids_for_state_chain = [event_id]
-
-        return state_event_ids_at_start
-
-    async def persist_historical_events(
-        self,
-        events_to_create: List[JsonDict],
-        room_id: str,
-        inherited_depth: int,
-        initial_state_event_ids: List[str],
-        app_service_requester: Requester,
-    ) -> List[str]:
-        """Create and persists all events provided sequentially. Handles the
-        complexity of creating events in chronological order so they can
-        reference each other by prev_event but still persists in
-        reverse-chronoloical order so they have the correct
-        (topological_ordering, stream_ordering) and sort correctly from
-        /messages.
-
-        Args:
-            events_to_create: List of historical events to create in JSON
-                dictionary format.
-            room_id: Room where you want the events persisted in.
-            inherited_depth: The depth to create the events at (you will
-                probably by calling inherit_depth_from_prev_ids(...)).
-            initial_state_event_ids:
-                This is used to set explicit state for the insertion event at
-                the start of the historical batch since it's floating with no
-                prev_events to derive state from automatically.
-            app_service_requester: The requester of an application service.
-
-        Returns:
-            List of persisted event IDs
-        """
-        assert app_service_requester.app_service
-
-        # We expect the first event in a historical batch to be an insertion event
-        assert events_to_create[0]["type"] == EventTypes.MSC2716_INSERTION
-        # We expect the last event in a historical batch to be an batch event
-        assert events_to_create[-1]["type"] == EventTypes.MSC2716_BATCH
-
-        # Make the historical event chain float off on its own by specifying no
-        # prev_events for the first event in the chain which causes the HS to
-        # ask for the state at the start of the batch later.
-        prev_event_ids: List[str] = []
-
-        event_ids = []
-        events_to_persist = []
-        for index, ev in enumerate(events_to_create):
-            assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"])
-
-            assert self.hs.is_mine_id(ev["sender"]), "User must be our own: %s" % (
-                ev["sender"],
-            )
-
-            event_dict = {
-                "type": ev["type"],
-                "origin_server_ts": ev["origin_server_ts"],
-                "content": ev["content"],
-                "room_id": room_id,
-                "sender": ev["sender"],  # requester.user.to_string(),
-                "prev_events": prev_event_ids.copy(),
-            }
-
-            # Mark all events as historical
-            event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True
-
-            event, unpersisted_context = await self.event_creation_handler.create_event(
-                await self.create_requester_for_user_id_from_app_service(
-                    ev["sender"], app_service_requester.app_service
-                ),
-                event_dict,
-                # Only the first event (which is the insertion event) in the
-                # chain should be floating. The rest should hang off each other
-                # in a chain.
-                allow_no_prev_events=index == 0,
-                prev_event_ids=event_dict.get("prev_events"),
-                # Since the first event (which is the insertion event) in the
-                # chain is floating with no `prev_events`, it can't derive state
-                # from anywhere automatically. So we need to set some state
-                # explicitly.
-                state_event_ids=initial_state_event_ids if index == 0 else None,
-                historical=True,
-                depth=inherited_depth,
-            )
-            context = await unpersisted_context.persist(event)
-            assert context._state_group
-
-            # Normally this is done when persisting the event but we have to
-            # pre-emptively do it here because we create all the events first,
-            # then persist them in another pass below. And we want to share
-            # state_groups across the whole batch so this lookup needs to work
-            # for the next event in the batch in this loop.
-            await self.store.store_state_group_id_for_event_id(
-                event_id=event.event_id,
-                state_group_id=context._state_group,
-            )
-
-            logger.debug(
-                "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s",
-                event,
-                prev_event_ids,
-            )
-
-            events_to_persist.append((event, context))
-            event_id = event.event_id
-
-            event_ids.append(event_id)
-            prev_event_ids = [event_id]
-
-        # Persist events in reverse-chronological order so they have the
-        # correct stream_ordering as they are backfilled (which decrements).
-        # Events are sorted by (topological_ordering, stream_ordering)
-        # where topological_ordering is just depth.
-        for event, context in reversed(events_to_persist):
-            # This call can't raise `PartialStateConflictError` since we forbid
-            # use of the historical batch API during partial state
-            await self.event_creation_handler.handle_new_client_event(
-                await self.create_requester_for_user_id_from_app_service(
-                    event.sender, app_service_requester.app_service
-                ),
-                events_and_context=[(event, context)],
-            )
-
-        return event_ids
-
-    async def handle_batch_of_events(
-        self,
-        events_to_create: List[JsonDict],
-        room_id: str,
-        batch_id_to_connect_to: str,
-        inherited_depth: int,
-        initial_state_event_ids: List[str],
-        app_service_requester: Requester,
-    ) -> Tuple[List[str], str]:
-        """
-        Handles creating and persisting all of the historical events as well as
-        insertion and batch meta events to make the batch navigable in the DAG.
-
-        Args:
-            events_to_create: List of historical events to create in JSON
-                dictionary format.
-            room_id: Room where you want the events created in.
-            batch_id_to_connect_to: The batch_id from the insertion event you
-                want this batch to connect to.
-            inherited_depth: The depth to create the events at (you will
-                probably by calling inherit_depth_from_prev_ids(...)).
-            initial_state_event_ids:
-                This is used to set explicit state for the insertion event at
-                the start of the historical batch since it's floating with no
-                prev_events to derive state from automatically. This should
-                probably be the state from the `prev_event` defined by
-                `/batch_send?prev_event_id=$abc` plus the outcome of
-                `persist_state_events_at_start`
-            app_service_requester: The requester of an application service.
-
-        Returns:
-            Tuple containing a list of created events and the next_batch_id
-        """
-
-        # Connect this current batch to the insertion event from the previous batch
-        last_event_in_batch = events_to_create[-1]
-        batch_event = {
-            "type": EventTypes.MSC2716_BATCH,
-            "sender": app_service_requester.user.to_string(),
-            "room_id": room_id,
-            "content": {
-                EventContentFields.MSC2716_BATCH_ID: batch_id_to_connect_to,
-                EventContentFields.MSC2716_HISTORICAL: True,
-            },
-            # Since the batch event is put at the end of the batch,
-            # where the newest-in-time event is, copy the origin_server_ts from
-            # the last event we're inserting
-            "origin_server_ts": last_event_in_batch["origin_server_ts"],
-        }
-        # Add the batch event to the end of the batch (newest-in-time)
-        events_to_create.append(batch_event)
-
-        # Add an "insertion" event to the start of each batch (next to the oldest-in-time
-        # event in the batch) so the next batch can be connected to this one.
-        insertion_event = self.create_insertion_event_dict(
-            sender=app_service_requester.user.to_string(),
-            room_id=room_id,
-            # Since the insertion event is put at the start of the batch,
-            # where the oldest-in-time event is, copy the origin_server_ts from
-            # the first event we're inserting
-            origin_server_ts=events_to_create[0]["origin_server_ts"],
-        )
-        next_batch_id = insertion_event["content"][
-            EventContentFields.MSC2716_NEXT_BATCH_ID
-        ]
-        # Prepend the insertion event to the start of the batch (oldest-in-time)
-        events_to_create = [insertion_event] + events_to_create
-
-        # Create and persist all of the historical events
-        event_ids = await self.persist_historical_events(
-            events_to_create=events_to_create,
-            room_id=room_id,
-            inherited_depth=inherited_depth,
-            initial_state_event_ids=initial_state_event_ids,
-            app_service_requester=app_service_requester,
-        )
-
-        return event_ids, next_batch_id
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 55df34bd06..82e4fa7363 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -362,7 +362,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         content: Optional[dict] = None,
         require_consent: bool = True,
         outlier: bool = False,
-        historical: bool = False,
         origin_server_ts: Optional[int] = None,
     ) -> Tuple[str, int]:
         """
@@ -378,16 +377,13 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             allow_no_prev_events: Whether to allow this event to be created an empty
                 list of prev_events. Normally this is prohibited just because most
                 events should have a prev_event and we should only use this in special
-                cases like MSC2716.
+                cases (previously useful for MSC2716).
             prev_event_ids: The event IDs to use as the prev events
             state_event_ids:
-                The full state at a given event. This is used particularly by the MSC2716
-                /batch_send endpoint. One use case is the historical `state_events_at_start`;
-                since each is marked as an `outlier`, the `EventContext.for_outlier()` won't
-                have any `state_ids` set and therefore can't derive any state even though the
-                prev_events are set so we need to set them ourself via this argument.
-                This should normally be left as None, which will cause the auth_event_ids
-                to be calculated based on the room state at the prev_events.
+                The full state at a given event. This was previously used particularly
+                by the MSC2716 /batch_send endpoint. This should normally be left as
+                None, which will cause the auth_event_ids to be calculated based on the
+                room state at the prev_events.
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -400,9 +396,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
-            historical: Indicates whether the message is being inserted
-                back in time around some existing events. This is used to skip
-                a few checks and mark the event as backfilled.
             origin_server_ts: The origin_server_ts to use if a new event is created. Uses
                 the current timestamp if set to None.
 
@@ -477,7 +470,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     depth=depth,
                     require_consent=require_consent,
                     outlier=outlier,
-                    historical=historical,
                 )
                 context = await unpersisted_context.persist(event)
                 prev_state_ids = await context.get_prev_state_ids(
@@ -585,7 +577,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         new_room: bool = False,
         require_consent: bool = True,
         outlier: bool = False,
-        historical: bool = False,
         allow_no_prev_events: bool = False,
         prev_event_ids: Optional[List[str]] = None,
         state_event_ids: Optional[List[str]] = None,
@@ -610,22 +601,16 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
-            historical: Indicates whether the message is being inserted
-                back in time around some existing events. This is used to skip
-                a few checks and mark the event as backfilled.
             allow_no_prev_events: Whether to allow this event to be created an empty
                 list of prev_events. Normally this is prohibited just because most
                 events should have a prev_event and we should only use this in special
-                cases like MSC2716.
+                cases (previously useful for MSC2716).
             prev_event_ids: The event IDs to use as the prev events
             state_event_ids:
-                The full state at a given event. This is used particularly by the MSC2716
-                /batch_send endpoint. One use case is the historical `state_events_at_start`;
-                since each is marked as an `outlier`, the `EventContext.for_outlier()` won't
-                have any `state_ids` set and therefore can't derive any state even though the
-                prev_events are set so we need to set them ourself via this argument.
-                This should normally be left as None, which will cause the auth_event_ids
-                to be calculated based on the room state at the prev_events.
+                The full state at a given event. This was previously used particularly
+                by the MSC2716 /batch_send endpoint. This should normally be left as
+                None, which will cause the auth_event_ids to be calculated based on the
+                room state at the prev_events.
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -667,7 +652,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                         new_room=new_room,
                         require_consent=require_consent,
                         outlier=outlier,
-                        historical=historical,
                         allow_no_prev_events=allow_no_prev_events,
                         prev_event_ids=prev_event_ids,
                         state_event_ids=state_event_ids,
@@ -691,7 +675,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         new_room: bool = False,
         require_consent: bool = True,
         outlier: bool = False,
-        historical: bool = False,
         allow_no_prev_events: bool = False,
         prev_event_ids: Optional[List[str]] = None,
         state_event_ids: Optional[List[str]] = None,
@@ -718,22 +701,16 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             outlier: Indicates whether the event is an `outlier`, i.e. if
                 it's from an arbitrary point and floating in the DAG as
                 opposed to being inline with the current DAG.
-            historical: Indicates whether the message is being inserted
-                back in time around some existing events. This is used to skip
-                a few checks and mark the event as backfilled.
             allow_no_prev_events: Whether to allow this event to be created an empty
                 list of prev_events. Normally this is prohibited just because most
                 events should have a prev_event and we should only use this in special
-                cases like MSC2716.
+                cases (previously useful for MSC2716).
             prev_event_ids: The event IDs to use as the prev events
             state_event_ids:
-                The full state at a given event. This is used particularly by the MSC2716
-                /batch_send endpoint. One use case is the historical `state_events_at_start`;
-                since each is marked as an `outlier`, the `EventContext.for_outlier()` won't
-                have any `state_ids` set and therefore can't derive any state even though the
-                prev_events are set so we need to set them ourself via this argument.
-                This should normally be left as None, which will cause the auth_event_ids
-                to be calculated based on the room state at the prev_events.
+                The full state at a given event. This was previously used particularly
+                by the MSC2716 /batch_send endpoint. This should normally be left as
+                None, which will cause the auth_event_ids to be calculated based on the
+                room state at the prev_events.
             depth: Override the depth used to order the event in the DAG.
                 Should normally be set to None, which will cause the depth to be calculated
                 based on the prev_events.
@@ -877,7 +854,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 content=content,
                 require_consent=require_consent,
                 outlier=outlier,
-                historical=historical,
                 origin_server_ts=origin_server_ts,
             )
 
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 33002cc0f2..67377c647b 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -322,7 +322,6 @@ class BulkPushRuleEvaluator:
     ) -> None:
         if (
             not event.internal_metadata.is_notifiable()
-            or event.internal_metadata.is_historical()
             or event.room_id in self.hs.config.server.rooms_to_exclude_from_sync
         ):
             # Push rules for events that aren't notifiable can't be processed by this and
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index 1af8d99d20..df0845edb2 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -48,7 +48,6 @@ from synapse.rest.client import (
     rendezvous,
     report_event,
     room,
-    room_batch,
     room_keys,
     room_upgrade_rest_servlet,
     sendtodevice,
@@ -132,7 +131,6 @@ class ClientRestResource(JsonResource):
         user_directory.register_servlets(hs, client_resource)
         if is_main_process:
             room_upgrade_rest_servlet.register_servlets(hs, client_resource)
-        room_batch.register_servlets(hs, client_resource)
         capabilities.register_servlets(hs, client_resource)
         if is_main_process:
             account_validity.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py
deleted file mode 100644
index 69f85112d8..0000000000
--- a/synapse/rest/client/room_batch.py
+++ /dev/null
@@ -1,254 +0,0 @@
-# Copyright 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import re
-from http import HTTPStatus
-from typing import TYPE_CHECKING, Tuple
-
-from synapse.api.constants import EventContentFields
-from synapse.api.errors import AuthError, Codes, SynapseError
-from synapse.http.server import HttpServer
-from synapse.http.servlet import (
-    RestServlet,
-    assert_params_in_dict,
-    parse_json_object_from_request,
-    parse_string,
-    parse_strings_from_args,
-)
-from synapse.http.site import SynapseRequest
-from synapse.types import JsonDict
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-logger = logging.getLogger(__name__)
-
-
-class RoomBatchSendEventRestServlet(RestServlet):
-    """
-    API endpoint which can insert a batch of events historically back in time
-    next to the given `prev_event`.
-
-    `batch_id` comes from `next_batch_id `in the response of the batch send
-    endpoint and is derived from the "insertion" events added to each batch.
-    It's not required for the first batch send.
-
-    `state_events_at_start` is used to define the historical state events
-    needed to auth the events like join events. These events will float
-    outside of the normal DAG as outlier's and won't be visible in the chat
-    history which also allows us to insert multiple batches without having a bunch
-    of `@mxid joined the room` noise between each batch.
-
-    `events` is chronological list of events you want to insert.
-    There is a reverse-chronological constraint on batches so once you insert
-    some messages, you can only insert older ones after that.
-    tldr; Insert batches from your most recent history -> oldest history.
-
-    POST /_matrix/client/unstable/org.matrix.msc2716/rooms/<roomID>/batch_send?prev_event_id=<eventID>&batch_id=<batchID>
-    {
-        "events": [ ... ],
-        "state_events_at_start": [ ... ]
-    }
-    """
-
-    PATTERNS = (
-        re.compile(
-            "^/_matrix/client/unstable/org.matrix.msc2716"
-            "/rooms/(?P<room_id>[^/]*)/batch_send$"
-        ),
-    )
-    CATEGORY = "Client API requests"
-
-    def __init__(self, hs: "HomeServer"):
-        super().__init__()
-        self.store = hs.get_datastores().main
-        self.event_creation_handler = hs.get_event_creation_handler()
-        self.auth = hs.get_auth()
-        self.room_batch_handler = hs.get_room_batch_handler()
-
-    async def on_POST(
-        self, request: SynapseRequest, room_id: str
-    ) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_guest=False)
-
-        if not requester.app_service:
-            raise AuthError(
-                HTTPStatus.FORBIDDEN,
-                "Only application services can use the /batchsend endpoint",
-            )
-
-        body = parse_json_object_from_request(request)
-        assert_params_in_dict(body, ["state_events_at_start", "events"])
-
-        assert request.args is not None
-        prev_event_ids_from_query = parse_strings_from_args(
-            request.args, "prev_event_id"
-        )
-        batch_id_from_query = parse_string(request, "batch_id")
-
-        if prev_event_ids_from_query is None:
-            raise SynapseError(
-                HTTPStatus.BAD_REQUEST,
-                "prev_event query parameter is required when inserting historical messages back in time",
-                errcode=Codes.MISSING_PARAM,
-            )
-
-        if await self.store.is_partial_state_room(room_id):
-            raise SynapseError(
-                HTTPStatus.BAD_REQUEST,
-                "Cannot insert history batches until we have fully joined the room",
-                errcode=Codes.UNABLE_DUE_TO_PARTIAL_STATE,
-            )
-
-        # Verify the batch_id_from_query corresponds to an actual insertion event
-        # and have the batch connected.
-        if batch_id_from_query:
-            corresponding_insertion_event_id = (
-                await self.store.get_insertion_event_id_by_batch_id(
-                    room_id, batch_id_from_query
-                )
-            )
-            if corresponding_insertion_event_id is None:
-                raise SynapseError(
-                    HTTPStatus.BAD_REQUEST,
-                    "No insertion event corresponds to the given ?batch_id",
-                    errcode=Codes.INVALID_PARAM,
-                )
-
-        # Make sure that the prev_event_ids exist and aren't outliers - ie, they are
-        # regular parts of the room DAG where we know the state.
-        non_outlier_prev_events = await self.store.have_events_in_timeline(
-            prev_event_ids_from_query
-        )
-        for prev_event_id in prev_event_ids_from_query:
-            if prev_event_id not in non_outlier_prev_events:
-                raise SynapseError(
-                    HTTPStatus.BAD_REQUEST,
-                    "prev_event %s does not exist, or is an outlier" % (prev_event_id,),
-                    errcode=Codes.INVALID_PARAM,
-                )
-
-        # For the event we are inserting next to (`prev_event_ids_from_query`),
-        # find the most recent state events that allowed that message to be
-        # sent. We will use that as a base to auth our historical messages
-        # against.
-        state_event_ids = await self.room_batch_handler.get_most_recent_full_state_ids_from_event_id_list(
-            prev_event_ids_from_query
-        )
-
-        state_event_ids_at_start = []
-        # Create and persist all of the state events that float off on their own
-        # before the batch. These will most likely be all of the invite/member
-        # state events used to auth the upcoming historical messages.
-        if body["state_events_at_start"]:
-            state_event_ids_at_start = (
-                await self.room_batch_handler.persist_state_events_at_start(
-                    state_events_at_start=body["state_events_at_start"],
-                    room_id=room_id,
-                    initial_state_event_ids=state_event_ids,
-                    app_service_requester=requester,
-                )
-            )
-            # Update our ongoing auth event ID list with all of the new state we
-            # just created
-            state_event_ids.extend(state_event_ids_at_start)
-
-        inherited_depth = await self.room_batch_handler.inherit_depth_from_prev_ids(
-            prev_event_ids_from_query
-        )
-
-        events_to_create = body["events"]
-
-        # Figure out which batch to connect to. If they passed in
-        # batch_id_from_query let's use it. The batch ID passed in comes
-        # from the batch_id in the "insertion" event from the previous batch.
-        last_event_in_batch = events_to_create[-1]
-        base_insertion_event = None
-        if batch_id_from_query:
-            batch_id_to_connect_to = batch_id_from_query
-        # Otherwise, create an insertion event to act as a starting point.
-        #
-        # We don't always have an insertion event to start hanging more history
-        # off of (ideally there would be one in the main DAG, but that's not the
-        # case if we're wanting to add history to e.g. existing rooms without
-        # an insertion event), in which case we just create a new insertion event
-        # that can then get pointed to by a "marker" event later.
-        else:
-            base_insertion_event_dict = (
-                self.room_batch_handler.create_insertion_event_dict(
-                    sender=requester.user.to_string(),
-                    room_id=room_id,
-                    origin_server_ts=last_event_in_batch["origin_server_ts"],
-                )
-            )
-            base_insertion_event_dict["prev_events"] = prev_event_ids_from_query.copy()
-
-            (
-                base_insertion_event,
-                _,
-            ) = await self.event_creation_handler.create_and_send_nonmember_event(
-                await self.room_batch_handler.create_requester_for_user_id_from_app_service(
-                    base_insertion_event_dict["sender"],
-                    requester.app_service,
-                ),
-                base_insertion_event_dict,
-                prev_event_ids=base_insertion_event_dict.get("prev_events"),
-                # Also set the explicit state here because we want to resolve
-                # any `state_events_at_start` here too. It's not strictly
-                # necessary to accomplish anything but if someone asks for the
-                # state at this point, we probably want to show them the
-                # historical state that was part of this batch.
-                state_event_ids=state_event_ids,
-                historical=True,
-                depth=inherited_depth,
-            )
-
-            batch_id_to_connect_to = base_insertion_event.content[
-                EventContentFields.MSC2716_NEXT_BATCH_ID
-            ]
-
-        # Create and persist all of the historical events as well as insertion
-        # and batch meta events to make the batch navigable in the DAG.
-        event_ids, next_batch_id = await self.room_batch_handler.handle_batch_of_events(
-            events_to_create=events_to_create,
-            room_id=room_id,
-            batch_id_to_connect_to=batch_id_to_connect_to,
-            inherited_depth=inherited_depth,
-            initial_state_event_ids=state_event_ids,
-            app_service_requester=requester,
-        )
-
-        insertion_event_id = event_ids[0]
-        batch_event_id = event_ids[-1]
-        historical_event_ids = event_ids[1:-1]
-
-        response_dict = {
-            "state_event_ids": state_event_ids_at_start,
-            "event_ids": historical_event_ids,
-            "next_batch_id": next_batch_id,
-            "insertion_event_id": insertion_event_id,
-            "batch_event_id": batch_event_id,
-        }
-        if base_insertion_event is not None:
-            response_dict["base_insertion_event_id"] = base_insertion_event.event_id
-
-        return HTTPStatus.OK, response_dict
-
-
-def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
-    msc2716_enabled = hs.config.experimental.msc2716_enabled
-
-    if msc2716_enabled:
-        RoomBatchSendEventRestServlet(hs).register(http_server)
diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py
index 1910648755..95400ba570 100644
--- a/synapse/rest/client/versions.py
+++ b/synapse/rest/client/versions.py
@@ -102,8 +102,6 @@ class VersionsRestServlet(RestServlet):
                     "org.matrix.msc2285.stable": True,  # TODO: Remove when MSC2285 becomes a part of the spec
                     # Supports filtering of /publicRooms by room type as per MSC3827
                     "org.matrix.msc3827.stable": True,
-                    # Adds support for importing historical messages as per MSC2716
-                    "org.matrix.msc2716": self.config.experimental.msc2716_enabled,
                     # Adds support for thread relations, per MSC3440.
                     "org.matrix.msc3440.stable": True,  # TODO: remove when "v1.3" is added above
                     # Support for thread read receipts & notification counts.
diff --git a/synapse/server.py b/synapse/server.py
index 0f36ef69cb..b72b76a38b 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -91,7 +91,6 @@ from synapse.handlers.room import (
     RoomShutdownHandler,
     TimestampLookupHandler,
 )
-from synapse.handlers.room_batch import RoomBatchHandler
 from synapse.handlers.room_list import RoomListHandler
 from synapse.handlers.room_member import (
     RoomForgetterHandler,
@@ -492,10 +491,6 @@ class HomeServer(metaclass=abc.ABCMeta):
     def get_room_creation_handler(self) -> RoomCreationHandler:
         return RoomCreationHandler(self)
 
-    @cache_in_self
-    def get_room_batch_handler(self) -> RoomBatchHandler:
-        return RoomBatchHandler(self)
-
     @cache_in_self
     def get_room_shutdown_handler(self) -> RoomShutdownHandler:
         return RoomShutdownHandler(self)
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 0032a92f49..3a10c265c9 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -61,7 +61,6 @@ from .registration import RegistrationStore
 from .rejections import RejectionsStore
 from .relations import RelationsStore
 from .room import RoomStore
-from .room_batch import RoomBatchStore
 from .roommember import RoomMemberStore
 from .search import SearchStore
 from .session import SessionStore
@@ -87,7 +86,6 @@ class DataStore(
     DeviceStore,
     RoomMemberStore,
     RoomStore,
-    RoomBatchStore,
     RegistrationStore,
     ProfileStore,
     PresenceStore,
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 2681917d0b..8b6e3c1dc7 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -31,7 +31,7 @@ from typing import (
 import attr
 from prometheus_client import Counter, Gauge
 
-from synapse.api.constants import MAX_DEPTH, EventTypes
+from synapse.api.constants import MAX_DEPTH
 from synapse.api.errors import StoreError
 from synapse.api.room_versions import EventFormatVersions, RoomVersion
 from synapse.events import EventBase, make_event_from_dict
@@ -891,124 +891,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             room_id,
         )
 
-    @trace
-    async def get_insertion_event_backward_extremities_in_room(
-        self,
-        room_id: str,
-        current_depth: int,
-        limit: int,
-    ) -> List[Tuple[str, int]]:
-        """
-        Get the insertion events we know about that we haven't backfilled yet
-        along with the approximate depth. Only returns insertion events that are
-        at a depth lower than or equal to the `current_depth`. Sorted by depth,
-        highest to lowest (descending) so the closest events to the
-        `current_depth` are first in the list.
-
-        We ignore insertion events that are newer than the user's current scroll
-        position (ie, those with depth greater than `current_depth`) as:
-            1. we don't really care about getting events that have happened
-               after our current position; and
-            2. by the nature of paginating and scrolling back, we have likely
-               previously tried and failed to backfill from that insertion event, so
-               to avoid getting "stuck" requesting the same backfill repeatedly
-               we drop those insertion event.
-
-        Args:
-            room_id: Room where we want to find the oldest events
-            current_depth: The depth at the user's current scrollback position
-            limit: The max number of insertion event extremities to return
-
-        Returns:
-            List of (event_id, depth) tuples. Sorted by depth, highest to lowest
-            (descending) so the closest events to the `current_depth` are first
-            in the list.
-        """
-
-        def get_insertion_event_backward_extremities_in_room_txn(
-            txn: LoggingTransaction, room_id: str
-        ) -> List[Tuple[str, int]]:
-            if isinstance(self.database_engine, PostgresEngine):
-                least_function = "LEAST"
-            elif isinstance(self.database_engine, Sqlite3Engine):
-                least_function = "MIN"
-            else:
-                raise RuntimeError("Unknown database engine")
-
-            sql = f"""
-                SELECT
-                    insertion_event_extremity.event_id, event.depth
-                /* We only want insertion events that are also marked as backwards extremities */
-                FROM insertion_event_extremities AS insertion_event_extremity
-                /* Get the depth of the insertion event from the events table */
-                INNER JOIN events AS event USING (event_id)
-                /**
-                 * We use this info to make sure we don't retry to use a backfill point
-                 * if we've already attempted to backfill from it recently.
-                 */
-                LEFT JOIN event_failed_pull_attempts AS failed_backfill_attempt_info
-                ON
-                    failed_backfill_attempt_info.room_id = insertion_event_extremity.room_id
-                    AND failed_backfill_attempt_info.event_id = insertion_event_extremity.event_id
-                WHERE
-                    insertion_event_extremity.room_id = ?
-                    /**
-                     * We only want extremities that are older than or at
-                     * the same position of the given `current_depth` (where older
-                     * means less than the given depth) because we're looking backwards
-                     * from the `current_depth` when backfilling.
-                     *
-                     *                         current_depth (ignore events that come after this, ignore 2-4)
-                     *                         |
-                     *                         ▼
-                     * <oldest-in-time> [0]<--[1]<--[2]<--[3]<--[4] <newest-in-time>
-                     */
-                    AND event.depth <= ? /* current_depth */
-                    /**
-                     * Exponential back-off (up to the upper bound) so we don't retry the
-                     * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc
-                     *
-                     * We use `1 << n` as a power of 2 equivalent for compatibility
-                     * with older SQLites. The left shift equivalent only works with
-                     * powers of 2 because left shift is a binary operation (base-2).
-                     * Otherwise, we would use `power(2, n)` or the power operator, `2^n`.
-                     */
-                    AND (
-                        failed_backfill_attempt_info.event_id IS NULL
-                        OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + (
-                            (1 << {least_function}(failed_backfill_attempt_info.num_attempts, ? /* max doubling steps */))
-                            * ? /* step */
-                        )
-                    )
-                /**
-                 * Sort from highest (closest to the `current_depth`) to the lowest depth
-                 * because the closest are most relevant to backfill from first.
-                 * Then tie-break on alphabetical order of the event_ids so we get a
-                 * consistent ordering which is nice when asserting things in tests.
-                 */
-                ORDER BY event.depth DESC, insertion_event_extremity.event_id DESC
-                LIMIT ?
-            """
-
-            txn.execute(
-                sql,
-                (
-                    room_id,
-                    current_depth,
-                    self._clock.time_msec(),
-                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
-                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS,
-                    limit,
-                ),
-            )
-            return cast(List[Tuple[str, int]], txn.fetchall())
-
-        return await self.db_pool.runInteraction(
-            "get_insertion_event_backward_extremities_in_room",
-            get_insertion_event_backward_extremities_in_room_txn,
-            room_id,
-        )
-
     async def get_max_depth_of(
         self, event_ids: Collection[str]
     ) -> Tuple[Optional[str], int]:
@@ -1280,50 +1162,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         return event_ids
 
-    def _get_connected_batch_event_backfill_results_txn(
-        self, txn: LoggingTransaction, insertion_event_id: str, limit: int
-    ) -> List[BackfillQueueNavigationItem]:
-        """
-        Find any batch connections of a given insertion event.
-        A batch event points at a insertion event via:
-        batch_event.content[MSC2716_BATCH_ID] -> insertion_event.content[MSC2716_NEXT_BATCH_ID]
-
-        Args:
-            txn: The database transaction to use
-            insertion_event_id: The event ID to navigate from. We will find
-                batch events that point back at this insertion event.
-            limit: Max number of event ID's to query for and return
-
-        Returns:
-            List of batch events that the backfill queue can process
-        """
-        batch_connection_query = """
-            SELECT e.depth, e.stream_ordering, c.event_id, e.type FROM insertion_events AS i
-            /* Find the batch that connects to the given insertion event */
-            INNER JOIN batch_events AS c
-            ON i.next_batch_id = c.batch_id
-            /* Get the depth of the batch start event from the events table */
-            INNER JOIN events AS e ON c.event_id = e.event_id
-            /* Find an insertion event which matches the given event_id */
-            WHERE i.event_id = ?
-            LIMIT ?
-        """
-
-        # Find any batch connections for the given insertion event
-        txn.execute(
-            batch_connection_query,
-            (insertion_event_id, limit),
-        )
-        return [
-            BackfillQueueNavigationItem(
-                depth=row[0],
-                stream_ordering=row[1],
-                event_id=row[2],
-                type=row[3],
-            )
-            for row in txn
-        ]
-
     def _get_connected_prev_event_backfill_results_txn(
         self, txn: LoggingTransaction, event_id: str, limit: int
     ) -> List[BackfillQueueNavigationItem]:
@@ -1472,40 +1310,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
             event_id_results.add(event_id)
 
-            # Try and find any potential historical batches of message history.
-            if self.hs.config.experimental.msc2716_enabled:
-                # We need to go and try to find any batch events connected
-                # to a given insertion event (by batch_id). If we find any, we'll
-                # add them to the queue and navigate up the DAG like normal in the
-                # next iteration of the loop.
-                if event_type == EventTypes.MSC2716_INSERTION:
-                    # Find any batch connections for the given insertion event
-                    connected_batch_event_backfill_results = (
-                        self._get_connected_batch_event_backfill_results_txn(
-                            txn, event_id, limit - len(event_id_results)
-                        )
-                    )
-                    logger.debug(
-                        "_get_backfill_events(room_id=%s): connected_batch_event_backfill_results=%s",
-                        room_id,
-                        connected_batch_event_backfill_results,
-                    )
-                    for (
-                        connected_batch_event_backfill_item
-                    ) in connected_batch_event_backfill_results:
-                        if (
-                            connected_batch_event_backfill_item.event_id
-                            not in event_id_results
-                        ):
-                            queue.put(
-                                (
-                                    -connected_batch_event_backfill_item.depth,
-                                    -connected_batch_event_backfill_item.stream_ordering,
-                                    connected_batch_event_backfill_item.event_id,
-                                    connected_batch_event_backfill_item.type,
-                                )
-                            )
-
             # Now we just look up the DAG by prev_events as normal
             connected_prev_event_backfill_results = (
                 self._get_connected_prev_event_backfill_results_txn(
@@ -1748,19 +1552,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             _delete_old_forward_extrem_cache_txn,
         )
 
-    @trace
-    async def insert_insertion_extremity(self, event_id: str, room_id: str) -> None:
-        await self.db_pool.simple_upsert(
-            table="insertion_event_extremities",
-            keyvalues={"event_id": event_id},
-            values={
-                "event_id": event_id,
-                "room_id": room_id,
-            },
-            insertion_values={},
-            desc="insert_insertion_extremity",
-        )
-
     async def insert_received_event_to_staging(
         self, origin: str, event: EventBase
     ) -> None:
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 44af3357af..5c9db7554e 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1664,9 +1664,6 @@ class PersistEventsStore:
 
             self._handle_event_relations(txn, event)
 
-            self._handle_insertion_event(txn, event)
-            self._handle_batch_event(txn, event)
-
             # Store the labels for this event.
             labels = event.content.get(EventContentFields.LABELS)
             if labels:
@@ -1927,128 +1924,6 @@ class PersistEventsStore:
                 ),
             )
 
-    def _handle_insertion_event(
-        self, txn: LoggingTransaction, event: EventBase
-    ) -> None:
-        """Handles keeping track of insertion events and edges/connections.
-        Part of MSC2716.
-
-        Args:
-            txn: The database transaction object
-            event: The event to process
-        """
-
-        if event.type != EventTypes.MSC2716_INSERTION:
-            # Not a insertion event
-            return
-
-        # Skip processing an insertion event if the room version doesn't
-        # support it or the event is not from the room creator.
-        room_version = self.store.get_room_version_txn(txn, event.room_id)
-        room_creator = self.db_pool.simple_select_one_onecol_txn(
-            txn,
-            table="rooms",
-            keyvalues={"room_id": event.room_id},
-            retcol="creator",
-            allow_none=True,
-        )
-        if not room_version.msc2716_historical and (
-            not self.hs.config.experimental.msc2716_enabled
-            or event.sender != room_creator
-        ):
-            return
-
-        next_batch_id = event.content.get(EventContentFields.MSC2716_NEXT_BATCH_ID)
-        if next_batch_id is None:
-            # Invalid insertion event without next batch ID
-            return
-
-        logger.debug(
-            "_handle_insertion_event (next_batch_id=%s) %s", next_batch_id, event
-        )
-
-        # Keep track of the insertion event and the batch ID
-        self.db_pool.simple_insert_txn(
-            txn,
-            table="insertion_events",
-            values={
-                "event_id": event.event_id,
-                "room_id": event.room_id,
-                "next_batch_id": next_batch_id,
-            },
-        )
-
-        # Insert an edge for every prev_event connection
-        for prev_event_id in event.prev_event_ids():
-            self.db_pool.simple_insert_txn(
-                txn,
-                table="insertion_event_edges",
-                values={
-                    "event_id": event.event_id,
-                    "room_id": event.room_id,
-                    "insertion_prev_event_id": prev_event_id,
-                },
-            )
-
-    def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase) -> None:
-        """Handles inserting the batch edges/connections between the batch event
-        and an insertion event. Part of MSC2716.
-
-        Args:
-            txn: The database transaction object
-            event: The event to process
-        """
-
-        if event.type != EventTypes.MSC2716_BATCH:
-            # Not a batch event
-            return
-
-        # Skip processing a batch event if the room version doesn't
-        # support it or the event is not from the room creator.
-        room_version = self.store.get_room_version_txn(txn, event.room_id)
-        room_creator = self.db_pool.simple_select_one_onecol_txn(
-            txn,
-            table="rooms",
-            keyvalues={"room_id": event.room_id},
-            retcol="creator",
-            allow_none=True,
-        )
-        if not room_version.msc2716_historical and (
-            not self.hs.config.experimental.msc2716_enabled
-            or event.sender != room_creator
-        ):
-            return
-
-        batch_id = event.content.get(EventContentFields.MSC2716_BATCH_ID)
-        if batch_id is None:
-            # Invalid batch event without a batch ID
-            return
-
-        logger.debug("_handle_batch_event batch_id=%s %s", batch_id, event)
-
-        # Keep track of the insertion event and the batch ID
-        self.db_pool.simple_insert_txn(
-            txn,
-            table="batch_events",
-            values={
-                "event_id": event.event_id,
-                "room_id": event.room_id,
-                "batch_id": batch_id,
-            },
-        )
-
-        # When we receive an event with a `batch_id` referencing the
-        # `next_batch_id` of the insertion event, we can remove it from the
-        # `insertion_event_extremities` table.
-        sql = """
-            DELETE FROM insertion_event_extremities WHERE event_id IN (
-                SELECT event_id FROM insertion_events
-                WHERE next_batch_id = ?
-            )
-        """
-
-        txn.execute(sql, (batch_id,))
-
     def _handle_redact_relations(
         self, txn: LoggingTransaction, room_id: str, redacted_event_id: str
     ) -> None:
diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py
deleted file mode 100644
index 131f357d04..0000000000
--- a/synapse/storage/databases/main/room_batch.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright 2021 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Optional
-
-from synapse.storage._base import SQLBaseStore
-
-
-class RoomBatchStore(SQLBaseStore):
-    async def get_insertion_event_id_by_batch_id(
-        self, room_id: str, batch_id: str
-    ) -> Optional[str]:
-        """Retrieve a insertion event ID.
-
-        Args:
-            batch_id: The batch ID of the insertion event to retrieve.
-
-        Returns:
-            The event_id of an insertion event, or None if there is no known
-            insertion event for the given insertion event.
-        """
-        return await self.db_pool.simple_select_one_onecol(
-            table="insertion_events",
-            keyvalues={"room_id": room_id, "next_batch_id": batch_id},
-            retcol="event_id",
-            allow_none=True,
-        )
-
-    async def store_state_group_id_for_event_id(
-        self, event_id: str, state_group_id: int
-    ) -> None:
-        await self.db_pool.simple_upsert(
-            table="event_to_state_groups",
-            keyvalues={"event_id": event_id},
-            values={"state_group": state_group_id, "event_id": event_id},
-        )
diff --git a/tests/rest/client/test_room_batch.py b/tests/rest/client/test_room_batch.py
deleted file mode 100644
index 9d5cb60d16..0000000000
--- a/tests/rest/client/test_room_batch.py
+++ /dev/null
@@ -1,302 +0,0 @@
-import logging
-from typing import List, Tuple
-from unittest.mock import Mock, patch
-
-from twisted.test.proto_helpers import MemoryReactor
-
-from synapse.api.constants import EventContentFields, EventTypes
-from synapse.appservice import ApplicationService
-from synapse.rest import admin
-from synapse.rest.client import login, register, room, room_batch, sync
-from synapse.server import HomeServer
-from synapse.types import JsonDict, RoomStreamToken
-from synapse.util import Clock
-
-from tests import unittest
-
-logger = logging.getLogger(__name__)
-
-
-def _create_join_state_events_for_batch_send_request(
-    virtual_user_ids: List[str],
-    insert_time: int,
-) -> List[JsonDict]:
-    return [
-        {
-            "type": EventTypes.Member,
-            "sender": virtual_user_id,
-            "origin_server_ts": insert_time,
-            "content": {
-                "membership": "join",
-                "displayname": "display-name-for-%s" % (virtual_user_id,),
-            },
-            "state_key": virtual_user_id,
-        }
-        for virtual_user_id in virtual_user_ids
-    ]
-
-
-def _create_message_events_for_batch_send_request(
-    virtual_user_id: str, insert_time: int, count: int
-) -> List[JsonDict]:
-    return [
-        {
-            "type": EventTypes.Message,
-            "sender": virtual_user_id,
-            "origin_server_ts": insert_time,
-            "content": {
-                "msgtype": "m.text",
-                "body": "Historical %d" % (i),
-                EventContentFields.MSC2716_HISTORICAL: True,
-            },
-        }
-        for i in range(count)
-    ]
-
-
-class RoomBatchTestCase(unittest.HomeserverTestCase):
-    """Test importing batches of historical messages."""
-
-    servlets = [
-        admin.register_servlets_for_client_rest_resource,
-        room_batch.register_servlets,
-        room.register_servlets,
-        register.register_servlets,
-        login.register_servlets,
-        sync.register_servlets,
-    ]
-
-    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        config = self.default_config()
-
-        self.appservice = ApplicationService(
-            token="i_am_an_app_service",
-            id="1234",
-            namespaces={"users": [{"regex": r"@as_user.*", "exclusive": True}]},
-            # Note: this user does not have to match the regex above
-            sender="@as_main:test",
-        )
-
-        mock_load_appservices = Mock(return_value=[self.appservice])
-        with patch(
-            "synapse.storage.databases.main.appservice.load_appservices",
-            mock_load_appservices,
-        ):
-            hs = self.setup_test_homeserver(config=config)
-        return hs
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.clock = clock
-        self._storage_controllers = hs.get_storage_controllers()
-
-        self.virtual_user_id, _ = self.register_appservice_user(
-            "as_user_potato", self.appservice.token
-        )
-
-    def _create_test_room(self) -> Tuple[str, str, str, str]:
-        room_id = self.helper.create_room_as(
-            self.appservice.sender, tok=self.appservice.token
-        )
-
-        res_a = self.helper.send_event(
-            room_id=room_id,
-            type=EventTypes.Message,
-            content={
-                "msgtype": "m.text",
-                "body": "A",
-            },
-            tok=self.appservice.token,
-        )
-        event_id_a = res_a["event_id"]
-
-        res_b = self.helper.send_event(
-            room_id=room_id,
-            type=EventTypes.Message,
-            content={
-                "msgtype": "m.text",
-                "body": "B",
-            },
-            tok=self.appservice.token,
-        )
-        event_id_b = res_b["event_id"]
-
-        res_c = self.helper.send_event(
-            room_id=room_id,
-            type=EventTypes.Message,
-            content={
-                "msgtype": "m.text",
-                "body": "C",
-            },
-            tok=self.appservice.token,
-        )
-        event_id_c = res_c["event_id"]
-
-        return room_id, event_id_a, event_id_b, event_id_c
-
-    @unittest.override_config({"experimental_features": {"msc2716_enabled": True}})
-    def test_same_state_groups_for_whole_historical_batch(self) -> None:
-        """Make sure that when using the `/batch_send` endpoint to import a
-        bunch of historical messages, it re-uses the same `state_group` across
-        the whole batch. This is an easy optimization to make sure we're getting
-        right because the state for the whole batch is contained in
-        `state_events_at_start` and can be shared across everything.
-        """
-
-        time_before_room = int(self.clock.time_msec())
-        room_id, event_id_a, _, _ = self._create_test_room()
-
-        channel = self.make_request(
-            "POST",
-            "/_matrix/client/unstable/org.matrix.msc2716/rooms/%s/batch_send?prev_event_id=%s"
-            % (room_id, event_id_a),
-            content={
-                "events": _create_message_events_for_batch_send_request(
-                    self.virtual_user_id, time_before_room, 3
-                ),
-                "state_events_at_start": _create_join_state_events_for_batch_send_request(
-                    [self.virtual_user_id], time_before_room
-                ),
-            },
-            access_token=self.appservice.token,
-        )
-        self.assertEqual(channel.code, 200, channel.result)
-
-        # Get the historical event IDs that we just imported
-        historical_event_ids = channel.json_body["event_ids"]
-        self.assertEqual(len(historical_event_ids), 3)
-
-        # Fetch the state_groups
-        state_group_map = self.get_success(
-            self._storage_controllers.state.get_state_groups_ids(
-                room_id, historical_event_ids
-            )
-        )
-
-        # We expect all of the historical events to be using the same state_group
-        # so there should only be a single state_group here!
-        self.assertEqual(
-            len(state_group_map.keys()),
-            1,
-            "Expected a single state_group to be returned by saw state_groups=%s"
-            % (state_group_map.keys(),),
-        )
-
-    @unittest.override_config({"experimental_features": {"msc2716_enabled": True}})
-    def test_sync_while_batch_importing(self) -> None:
-        """
-        Make sure that /sync correctly returns full room state when a user joins
-        during ongoing batch backfilling.
-        See: https://github.com/matrix-org/synapse/issues/12281
-        """
-        # Create user who will be invited & join room
-        user_id = self.register_user("beep", "test")
-        user_tok = self.login("beep", "test")
-
-        time_before_room = int(self.clock.time_msec())
-
-        # Create a room with some events
-        room_id, _, _, _ = self._create_test_room()
-        # Invite the user
-        self.helper.invite(
-            room_id, src=self.appservice.sender, tok=self.appservice.token, targ=user_id
-        )
-
-        # Create another room, send a bunch of events to advance the stream token
-        other_room_id = self.helper.create_room_as(
-            self.appservice.sender, tok=self.appservice.token
-        )
-        for _ in range(5):
-            self.helper.send_event(
-                room_id=other_room_id,
-                type=EventTypes.Message,
-                content={"msgtype": "m.text", "body": "C"},
-                tok=self.appservice.token,
-            )
-
-        # Join the room as the normal user
-        self.helper.join(room_id, user_id, tok=user_tok)
-
-        # Create an event to hang the historical batch from - In order to see
-        # the failure case originally reported in #12281, the historical batch
-        # must be hung from the most recent event in the room so the base
-        # insertion event ends up with the highest `topogological_ordering`
-        # (`depth`) in the room but will have a negative `stream_ordering`
-        # because it's a `historical` event. Previously, when assembling the
-        # `state` for the `/sync` response, the bugged logic would sort by
-        # `topological_ordering` descending and pick up the base insertion
-        # event because it has a negative `stream_ordering` below the given
-        # pagination token. Now we properly sort by `stream_ordering`
-        # descending which puts `historical` events with a negative
-        # `stream_ordering` way at the bottom and aren't selected as expected.
-        response = self.helper.send_event(
-            room_id=room_id,
-            type=EventTypes.Message,
-            content={
-                "msgtype": "m.text",
-                "body": "C",
-            },
-            tok=self.appservice.token,
-        )
-        event_to_hang_id = response["event_id"]
-
-        channel = self.make_request(
-            "POST",
-            "/_matrix/client/unstable/org.matrix.msc2716/rooms/%s/batch_send?prev_event_id=%s"
-            % (room_id, event_to_hang_id),
-            content={
-                "events": _create_message_events_for_batch_send_request(
-                    self.virtual_user_id, time_before_room, 3
-                ),
-                "state_events_at_start": _create_join_state_events_for_batch_send_request(
-                    [self.virtual_user_id], time_before_room
-                ),
-            },
-            access_token=self.appservice.token,
-        )
-        self.assertEqual(channel.code, 200, channel.result)
-
-        # Now we need to find the invite + join events stream tokens so we can sync between
-        main_store = self.hs.get_datastores().main
-        events, next_key = self.get_success(
-            main_store.get_recent_events_for_room(
-                room_id,
-                50,
-                end_token=main_store.get_room_max_token(),
-            ),
-        )
-        invite_event_position = None
-        for event in events:
-            if (
-                event.type == "m.room.member"
-                and event.content["membership"] == "invite"
-            ):
-                invite_event_position = self.get_success(
-                    main_store.get_topological_token_for_event(event.event_id)
-                )
-                break
-
-        assert invite_event_position is not None, "No invite event found"
-
-        # Remove the topological order from the token by re-creating w/stream only
-        invite_event_position = RoomStreamToken(None, invite_event_position.stream)
-
-        # Sync everything after this token
-        since_token = self.get_success(invite_event_position.to_string(main_store))
-        sync_response = self.make_request(
-            "GET",
-            f"/sync?since={since_token}",
-            access_token=user_tok,
-        )
-
-        # Assert that, for this room, the user was considered to have joined and thus
-        # receives the full state history
-        state_event_types = [
-            event["type"]
-            for event in sync_response.json_body["rooms"]["join"][room_id]["state"][
-                "events"
-            ]
-        ]
-
-        assert (
-            "m.room.create" in state_event_types
-        ), "Missing room full state in sync response"
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 4b8d8328d7..0f3b0744f1 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -20,7 +20,6 @@ from parameterized import parameterized
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.constants import EventTypes
 from synapse.api.room_versions import (
     KNOWN_ROOM_VERSIONS,
     EventFormatVersions,
@@ -924,216 +923,6 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
         self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"])
 
-    def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo:
-        """
-        Sets up a room with various insertion event backward extremities to test
-        backfill functions against.
-
-        Returns:
-            _BackfillSetupInfo including the `room_id` to test against and
-            `depth_map` of events in the room
-        """
-        room_id = "!backfill-room-test:some-host"
-
-        depth_map: Dict[str, int] = {
-            "1": 1,
-            "2": 2,
-            "insertion_eventA": 3,
-            "3": 4,
-            "insertion_eventB": 5,
-            "4": 6,
-            "5": 7,
-        }
-
-        def populate_db(txn: LoggingTransaction) -> None:
-            # Insert the room to satisfy the foreign key constraint of
-            # `event_failed_pull_attempts`
-            self.store.db_pool.simple_insert_txn(
-                txn,
-                "rooms",
-                {
-                    "room_id": room_id,
-                    "creator": "room_creator_user_id",
-                    "is_public": True,
-                    "room_version": "6",
-                },
-            )
-
-            # Insert our server events
-            stream_ordering = 0
-            for event_id, depth in depth_map.items():
-                self.store.db_pool.simple_insert_txn(
-                    txn,
-                    table="events",
-                    values={
-                        "event_id": event_id,
-                        "type": EventTypes.MSC2716_INSERTION
-                        if event_id.startswith("insertion_event")
-                        else "test_regular_type",
-                        "room_id": room_id,
-                        "depth": depth,
-                        "topological_ordering": depth,
-                        "stream_ordering": stream_ordering,
-                        "processed": True,
-                        "outlier": False,
-                    },
-                )
-
-                if event_id.startswith("insertion_event"):
-                    self.store.db_pool.simple_insert_txn(
-                        txn,
-                        table="insertion_event_extremities",
-                        values={
-                            "event_id": event_id,
-                            "room_id": room_id,
-                        },
-                    )
-
-                stream_ordering += 1
-
-        self.get_success(
-            self.store.db_pool.runInteraction(
-                "_setup_room_for_insertion_backfill_tests_populate_db",
-                populate_db,
-            )
-        )
-
-        return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map)
-
-    def test_get_insertion_event_backward_extremities_in_room(self) -> None:
-        """
-        Test to make sure only insertion event backward extremities that are
-        older and come before the `current_depth` are returned.
-        """
-        setup_info = self._setup_room_for_insertion_backfill_tests()
-        room_id = setup_info.room_id
-        depth_map = setup_info.depth_map
-
-        # Try at "insertion_eventB"
-        backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(
-                room_id, depth_map["insertion_eventB"], limit=100
-            )
-        )
-        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertEqual(backfill_event_ids, ["insertion_eventB", "insertion_eventA"])
-
-        # Try at "insertion_eventA"
-        backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(
-                room_id, depth_map["insertion_eventA"], limit=100
-            )
-        )
-        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        # Event "2" has a depth of 2 but is not included here because we only
-        # know the approximate depth of 5 from our event "3".
-        self.assertListEqual(backfill_event_ids, ["insertion_eventA"])
-
-    def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_have_attempted(
-        self,
-    ) -> None:
-        """
-        Test to make sure that insertion events we have attempted to backfill
-        (and within backoff timeout duration) do not show up as an event to
-        backfill again.
-        """
-        setup_info = self._setup_room_for_insertion_backfill_tests()
-        room_id = setup_info.room_id
-        depth_map = setup_info.depth_map
-
-        # Record some attempts to backfill these events which will make
-        # `get_insertion_event_backward_extremities_in_room` exclude them
-        # because we haven't passed the backoff interval.
-        self.get_success(
-            self.store.record_event_failed_pull_attempt(
-                room_id, "insertion_eventA", "fake cause"
-            )
-        )
-
-        # No time has passed since we attempted to backfill ^
-
-        # Try at "insertion_eventB"
-        backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(
-                room_id, depth_map["insertion_eventB"], limit=100
-            )
-        )
-        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        # Only the backfill points that we didn't record earlier exist here.
-        self.assertEqual(backfill_event_ids, ["insertion_eventB"])
-
-    def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_after_backoff_duration(
-        self,
-    ) -> None:
-        """
-        Test to make sure after we fake attempt to backfill event
-        "insertion_eventA" many times, we can see retry and see the
-        "insertion_eventA" again after the backoff timeout duration has
-        exceeded.
-        """
-        setup_info = self._setup_room_for_insertion_backfill_tests()
-        room_id = setup_info.room_id
-        depth_map = setup_info.depth_map
-
-        # Record some attempts to backfill these events which will make
-        # `get_backfill_points_in_room` exclude them because we
-        # haven't passed the backoff interval.
-        self.get_success(
-            self.store.record_event_failed_pull_attempt(
-                room_id, "insertion_eventB", "fake cause"
-            )
-        )
-        self.get_success(
-            self.store.record_event_failed_pull_attempt(
-                room_id, "insertion_eventA", "fake cause"
-            )
-        )
-        self.get_success(
-            self.store.record_event_failed_pull_attempt(
-                room_id, "insertion_eventA", "fake cause"
-            )
-        )
-        self.get_success(
-            self.store.record_event_failed_pull_attempt(
-                room_id, "insertion_eventA", "fake cause"
-            )
-        )
-        self.get_success(
-            self.store.record_event_failed_pull_attempt(
-                room_id, "insertion_eventA", "fake cause"
-            )
-        )
-
-        # Now advance time by 2 hours and we should only be able to see
-        # "insertion_eventB" because we have waited long enough for the single
-        # attempt (2^1 hours) but we still shouldn't see "insertion_eventA"
-        # because we haven't waited long enough for this many attempts.
-        self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
-
-        # Try at "insertion_eventA" and make sure that "insertion_eventA" is not
-        # in the list because we've already attempted many times
-        backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(
-                room_id, depth_map["insertion_eventA"], limit=100
-            )
-        )
-        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertEqual(backfill_event_ids, [])
-
-        # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
-        # see if we can now backfill it
-        self.reactor.advance(datetime.timedelta(hours=20).total_seconds())
-
-        # Try at "insertion_eventA" again after we advanced enough time and we
-        # should see "insertion_eventA" again
-        backfill_points = self.get_success(
-            self.store.get_insertion_event_backward_extremities_in_room(
-                room_id, depth_map["insertion_eventA"], limit=100
-            )
-        )
-        backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
-        self.assertEqual(backfill_event_ids, ["insertion_eventA"])
-
     def test_get_event_ids_with_failed_pull_attempts(self) -> None:
         """
         Test to make sure we properly get event_ids based on whether they have any
-- 
cgit 1.5.1


From 887fa4b66b038c886634a3eef92af108e391be34 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 20 Jun 2023 04:05:31 -0500
Subject: Switch from `matrix://` to `matrix-federation://` scheme for internal
 Synapse routing of outbound federation traffic (#15806)

`matrix://` is a registered specced scheme nowadays and doesn't make sense for
our internal to Synapse use case anymore. ([discussion]
(https://github.com/matrix-org/synapse/pull/15773#discussion_r1227598679))
---
 changelog.d/15806.misc                             |  1 +
 contrib/lnav/synapse-log-format.json               |  2 +-
 scripts-dev/federation_client.py                   |  4 +--
 synapse/http/federation/matrix_federation_agent.py | 14 ++++----
 synapse/http/matrixfederationclient.py             |  9 ++++-
 tests/federation/test_federation_client.py         |  4 +--
 .../federation/test_matrix_federation_agent.py     | 38 ++++++++++++----------
 7 files changed, 43 insertions(+), 29 deletions(-)
 create mode 100644 changelog.d/15806.misc

(limited to 'synapse')

diff --git a/changelog.d/15806.misc b/changelog.d/15806.misc
new file mode 100644
index 0000000000..80d0eb2f8f
--- /dev/null
+++ b/changelog.d/15806.misc
@@ -0,0 +1 @@
+Switch from `matrix://` to `matrix-federation://` scheme for internal Synapse routing of outbound federation traffic.
diff --git a/contrib/lnav/synapse-log-format.json b/contrib/lnav/synapse-log-format.json
index ad7017ee5e..649cd623e8 100644
--- a/contrib/lnav/synapse-log-format.json
+++ b/contrib/lnav/synapse-log-format.json
@@ -29,7 +29,7 @@
         "level": "error"
       },
       {
-        "line": "my-matrix-server-federation-sender-1 | 2023-01-25 20:56:20,995 - synapse.http.matrixfederationclient - 709 - WARNING - federation_transaction_transmission_loop-3 - {PUT-O-3} [example.com] Request failed: PUT matrix://example.com/_matrix/federation/v1/send/1674680155797: HttpResponseException('403: Forbidden')",
+        "line": "my-matrix-server-federation-sender-1 | 2023-01-25 20:56:20,995 - synapse.http.matrixfederationclient - 709 - WARNING - federation_transaction_transmission_loop-3 - {PUT-O-3} [example.com] Request failed: PUT matrix-federation://example.com/_matrix/federation/v1/send/1674680155797: HttpResponseException('403: Forbidden')",
         "level": "warning"
       },
       {
diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py
index b1d5e2e616..63f0b25ddd 100755
--- a/scripts-dev/federation_client.py
+++ b/scripts-dev/federation_client.py
@@ -136,11 +136,11 @@ def request(
         authorization_headers.append(header)
         print("Authorization: %s" % header, file=sys.stderr)
 
-    dest = "matrix://%s%s" % (destination, path)
+    dest = "matrix-federation://%s%s" % (destination, path)
     print("Requesting %s" % dest, file=sys.stderr)
 
     s = requests.Session()
-    s.mount("matrix://", MatrixConnectionAdapter())
+    s.mount("matrix-federation://", MatrixConnectionAdapter())
 
     headers: Dict[str, str] = {
         "Authorization": authorization_headers[0],
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 7e8cf31682..91a24efcd0 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -51,8 +51,10 @@ logger = logging.getLogger(__name__)
 @implementer(IAgent)
 class MatrixFederationAgent:
     """An Agent-like thing which provides a `request` method which correctly
-    handles resolving matrix server names when using matrix://. Handles standard
-    https URIs as normal.
+    handles resolving matrix server names when using `matrix-federation://`. Handles
+    standard https URIs as normal. The `matrix-federation://` scheme is internal to
+    Synapse and we purposely want to avoid colliding with the `matrix://` URL scheme
+    which is now specced.
 
     Doesn't implement any retries. (Those are done in MatrixFederationHttpClient.)
 
@@ -167,14 +169,14 @@ class MatrixFederationAgent:
         # There must be a valid hostname.
         assert parsed_uri.hostname
 
-        # If this is a matrix:// URI check if the server has delegated matrix
+        # If this is a matrix-federation:// URI check if the server has delegated matrix
         # traffic using well-known delegation.
         #
         # We have to do this here and not in the endpoint as we need to rewrite
         # the host header with the delegated server name.
         delegated_server = None
         if (
-            parsed_uri.scheme == b"matrix"
+            parsed_uri.scheme == b"matrix-federation"
             and not _is_ip_literal(parsed_uri.hostname)
             and not parsed_uri.port
         ):
@@ -250,7 +252,7 @@ class MatrixHostnameEndpointFactory:
 
 @implementer(IStreamClientEndpoint)
 class MatrixHostnameEndpoint:
-    """An endpoint that resolves matrix:// URLs using Matrix server name
+    """An endpoint that resolves matrix-federation:// URLs using Matrix server name
     resolution (i.e. via SRV). Does not check for well-known delegation.
 
     Args:
@@ -379,7 +381,7 @@ class MatrixHostnameEndpoint:
         connect to.
         """
 
-        if self._parsed_uri.scheme != b"matrix":
+        if self._parsed_uri.scheme != b"matrix-federation":
             return [Server(host=self._parsed_uri.host, port=self._parsed_uri.port)]
 
         # Note: We don't do well-known lookup as that needs to have happened
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index abb5ae5815..fc0101808d 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -174,7 +174,14 @@ class MatrixFederationRequest:
 
         # The object is frozen so we can pre-compute this.
         uri = urllib.parse.urlunparse(
-            (b"matrix", destination_bytes, path_bytes, None, query_bytes, b"")
+            (
+                b"matrix-federation",
+                destination_bytes,
+                path_bytes,
+                None,
+                query_bytes,
+                b"",
+            )
         )
         object.__setattr__(self, "uri", uri)
 
diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py
index 91694e4fca..a45ab83683 100644
--- a/tests/federation/test_federation_client.py
+++ b/tests/federation/test_federation_client.py
@@ -124,7 +124,7 @@ class FederationClientTest(FederatingHomeserverTestCase):
         # check the right call got made to the agent
         self._mock_agent.request.assert_called_once_with(
             b"GET",
-            b"matrix://yet.another.server/_matrix/federation/v1/state/%21room_id?event_id=event_id",
+            b"matrix-federation://yet.another.server/_matrix/federation/v1/state/%21room_id?event_id=event_id",
             headers=mock.ANY,
             bodyProducer=None,
         )
@@ -232,7 +232,7 @@ class FederationClientTest(FederatingHomeserverTestCase):
         # check the right call got made to the agent
         self._mock_agent.request.assert_called_once_with(
             b"GET",
-            b"matrix://yet.another.server/_matrix/federation/v1/event/event_id",
+            b"matrix-federation://yet.another.server/_matrix/federation/v1/event/event_id",
             headers=mock.ANY,
             bodyProducer=None,
         )
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index 105b4caefa..aed2a4c07a 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -292,7 +292,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.agent = self._make_agent()
 
         self.reactor.lookups["testserv"] = "1.2.3.4"
-        test_d = self._make_get_request(b"matrix://testserv:8448/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv:8448/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -393,7 +393,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
 
         self.reactor.lookups["testserv"] = "1.2.3.4"
         self.reactor.lookups["proxy.com"] = "9.9.9.9"
-        test_d = self._make_get_request(b"matrix://testserv:8448/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv:8448/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -532,7 +532,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         # there will be a getaddrinfo on the IP
         self.reactor.lookups["1.2.3.4"] = "1.2.3.4"
 
-        test_d = self._make_get_request(b"matrix://1.2.3.4/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://1.2.3.4/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -568,7 +568,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         # there will be a getaddrinfo on the IP
         self.reactor.lookups["::1"] = "::1"
 
-        test_d = self._make_get_request(b"matrix://[::1]/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://[::1]/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -604,7 +604,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         # there will be a getaddrinfo on the IP
         self.reactor.lookups["::1"] = "::1"
 
-        test_d = self._make_get_request(b"matrix://[::1]:80/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://[::1]:80/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -639,7 +639,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.mock_resolver.resolve_service.side_effect = generate_resolve_service([])
         self.reactor.lookups["testserv1"] = "1.2.3.4"
 
-        test_d = self._make_get_request(b"matrix://testserv1/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv1/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -693,7 +693,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         # there will be a getaddrinfo on the IP
         self.reactor.lookups["1.2.3.5"] = "1.2.3.5"
 
-        test_d = self._make_get_request(b"matrix://1.2.3.5/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://1.2.3.5/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -725,7 +725,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.mock_resolver.resolve_service.side_effect = generate_resolve_service([])
         self.reactor.lookups["testserv"] = "1.2.3.4"
 
-        test_d = self._make_get_request(b"matrix://testserv/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -780,7 +780,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.lookups["testserv"] = "1.2.3.4"
         self.reactor.lookups["target-server"] = "1::f"
 
-        test_d = self._make_get_request(b"matrix://testserv/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -844,7 +844,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.lookups["testserv"] = "1.2.3.4"
         self.reactor.lookups["target-server"] = "1::f"
 
-        test_d = self._make_get_request(b"matrix://testserv/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -933,7 +933,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.mock_resolver.resolve_service.side_effect = generate_resolve_service([])
         self.reactor.lookups["testserv"] = "1.2.3.4"
 
-        test_d = self._make_get_request(b"matrix://testserv/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -1009,7 +1009,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
             ),
         )
 
-        test_d = agent.request(b"GET", b"matrix://testserv/foo/bar")
+        test_d = agent.request(b"GET", b"matrix-federation://testserv/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -1042,7 +1042,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         )
         self.reactor.lookups["srvtarget"] = "1.2.3.4"
 
-        test_d = self._make_get_request(b"matrix://testserv/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -1082,7 +1082,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.lookups["testserv"] = "1.2.3.4"
         self.reactor.lookups["srvtarget"] = "5.6.7.8"
 
-        test_d = self._make_get_request(b"matrix://testserv/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -1143,7 +1143,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.lookups["xn--bcher-kva.com"] = "1.2.3.4"
 
         # this is idna for bücher.com
-        test_d = self._make_get_request(b"matrix://xn--bcher-kva.com/foo/bar")
+        test_d = self._make_get_request(
+            b"matrix-federation://xn--bcher-kva.com/foo/bar"
+        )
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -1204,7 +1206,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
         )
         self.reactor.lookups["xn--trget-3qa.com"] = "1.2.3.4"
 
-        test_d = self._make_get_request(b"matrix://xn--bcher-kva.com/foo/bar")
+        test_d = self._make_get_request(
+            b"matrix-federation://xn--bcher-kva.com/foo/bar"
+        )
 
         # Nothing happened yet
         self.assertNoResult(test_d)
@@ -1411,7 +1415,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         )
         self.reactor.lookups["target.com"] = "1.2.3.4"
 
-        test_d = self._make_get_request(b"matrix://testserv/foo/bar")
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
 
         # Nothing happened yet
         self.assertNoResult(test_d)
-- 
cgit 1.5.1


From 496f73103df838795b0e98f8c1c7337468e41abc Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 21 Jun 2023 10:41:11 +0200
Subject: Allow for the configuration of max request retries and min/max retry
 delays in the matrix federation client (#15783)

---
 changelog.d/15783.misc                           |  1 +
 docs/usage/configuration/config_documentation.md | 26 +++++++++++
 synapse/config/federation.py                     | 16 +++++++
 synapse/http/matrixfederationclient.py           | 59 +++++++++++++++---------
 tests/http/test_matrixfederationclient.py        | 20 +++++++-
 5 files changed, 100 insertions(+), 22 deletions(-)
 create mode 100644 changelog.d/15783.misc

(limited to 'synapse')

diff --git a/changelog.d/15783.misc b/changelog.d/15783.misc
new file mode 100644
index 0000000000..0bebaa213d
--- /dev/null
+++ b/changelog.d/15783.misc
@@ -0,0 +1 @@
+Allow for the configuration of max request retries and min/max retry delays in the matrix federation client.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 0cf6e075ff..26d7c7900c 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1196,6 +1196,32 @@ Example configuration:
 allow_device_name_lookup_over_federation: true
 ```
 ---
+### `federation`
+
+The federation section defines some sub-options related to federation.
+
+The following options are related to configuring timeout and retry logic for one request,
+independently of the others.
+Short retry algorithm is used when something or someone will wait for the request to have an
+answer, while long retry is used for requests that happen in the background,
+like sending a federation transaction.
+
+* `client_timeout`: timeout for the federation requests. Default to 60s.
+* `max_short_retry_delay`: maximum delay to be used for the short retry algo. Default to 2s.
+* `max_long_retry_delay`: maximum delay to be used for the short retry algo. Default to 60s.
+* `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
+* `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
+
+Example configuration:
+```yaml
+federation:
+  client_timeout: 180s
+  max_short_retry_delay: 7s
+  max_long_retry_delay: 100s
+  max_short_retries: 5
+  max_long_retries: 20
+```
+---
 ## Caching
 
 Options related to caching.
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 336fca578a..0e1cb8b6e3 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -22,6 +22,8 @@ class FederationConfig(Config):
     section = "federation"
 
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
+        federation_config = config.setdefault("federation", {})
+
         # FIXME: federation_domain_whitelist needs sytests
         self.federation_domain_whitelist: Optional[dict] = None
         federation_domain_whitelist = config.get("federation_domain_whitelist", None)
@@ -49,5 +51,19 @@ class FederationConfig(Config):
             "allow_device_name_lookup_over_federation", False
         )
 
+        # Allow for the configuration of timeout, max request retries
+        # and min/max retry delays in the matrix federation client.
+        self.client_timeout_ms = Config.parse_duration(
+            federation_config.get("client_timeout", "60s")
+        )
+        self.max_long_retry_delay_ms = Config.parse_duration(
+            federation_config.get("max_long_retry_delay", "60s")
+        )
+        self.max_short_retry_delay_ms = Config.parse_duration(
+            federation_config.get("max_short_retry_delay", "2s")
+        )
+        self.max_long_retries = federation_config.get("max_long_retries", 10)
+        self.max_short_retries = federation_config.get("max_short_retries", 3)
+
 
 _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}}
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index fc0101808d..cc4e258b0f 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -95,8 +95,6 @@ incoming_responses_counter = Counter(
 )
 
 
-MAX_LONG_RETRIES = 10
-MAX_SHORT_RETRIES = 3
 MAXINT = sys.maxsize
 
 
@@ -413,7 +411,16 @@ class MatrixFederationHttpClient:
         self.clock = hs.get_clock()
         self._store = hs.get_datastores().main
         self.version_string_bytes = hs.version_string.encode("ascii")
-        self.default_timeout = 60
+        self.default_timeout_seconds = hs.config.federation.client_timeout_ms / 1000
+
+        self.max_long_retry_delay_seconds = (
+            hs.config.federation.max_long_retry_delay_ms / 1000
+        )
+        self.max_short_retry_delay_seconds = (
+            hs.config.federation.max_short_retry_delay_ms / 1000
+        )
+        self.max_long_retries = hs.config.federation.max_long_retries
+        self.max_short_retries = hs.config.federation.max_short_retries
 
         self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor))
 
@@ -542,10 +549,10 @@ class MatrixFederationHttpClient:
             logger.exception(f"Invalid destination: {request.destination}.")
             raise FederationDeniedError(request.destination)
 
-        if timeout:
+        if timeout is not None:
             _sec_timeout = timeout / 1000
         else:
-            _sec_timeout = self.default_timeout
+            _sec_timeout = self.default_timeout_seconds
 
         if (
             self.hs.config.federation.federation_domain_whitelist is not None
@@ -590,9 +597,9 @@ class MatrixFederationHttpClient:
             # XXX: Would be much nicer to retry only at the transaction-layer
             # (once we have reliable transactions in place)
             if long_retries:
-                retries_left = MAX_LONG_RETRIES
+                retries_left = self.max_long_retries
             else:
-                retries_left = MAX_SHORT_RETRIES
+                retries_left = self.max_short_retries
 
             url_bytes = request.uri
             url_str = url_bytes.decode("ascii")
@@ -737,24 +744,34 @@ class MatrixFederationHttpClient:
 
                     if retries_left and not timeout:
                         if long_retries:
-                            delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
-                            delay = min(delay, 60)
-                            delay *= random.uniform(0.8, 1.4)
+                            delay_seconds = 4 ** (
+                                self.max_long_retries + 1 - retries_left
+                            )
+                            delay_seconds = min(
+                                delay_seconds, self.max_long_retry_delay_seconds
+                            )
+                            delay_seconds *= random.uniform(0.8, 1.4)
                         else:
-                            delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
-                            delay = min(delay, 2)
-                            delay *= random.uniform(0.8, 1.4)
+                            delay_seconds = 0.5 * 2 ** (
+                                self.max_short_retries - retries_left
+                            )
+                            delay_seconds = min(
+                                delay_seconds, self.max_short_retry_delay_seconds
+                            )
+                            delay_seconds *= random.uniform(0.8, 1.4)
 
                         logger.debug(
                             "{%s} [%s] Waiting %ss before re-sending...",
                             request.txn_id,
                             request.destination,
-                            delay,
+                            delay_seconds,
                         )
 
                         # Sleep for the calculated delay, or wake up immediately
                         # if we get notified that the server is back up.
-                        await self._sleeper.sleep(request.destination, delay * 1000)
+                        await self._sleeper.sleep(
+                            request.destination, delay_seconds * 1000
+                        )
                         retries_left -= 1
                     else:
                         raise
@@ -953,7 +970,7 @@ class MatrixFederationHttpClient:
         if timeout is not None:
             _sec_timeout = timeout / 1000
         else:
-            _sec_timeout = self.default_timeout
+            _sec_timeout = self.default_timeout_seconds
 
         if parser is None:
             parser = cast(ByteParser[T], JsonParser())
@@ -1031,10 +1048,10 @@ class MatrixFederationHttpClient:
             ignore_backoff=ignore_backoff,
         )
 
-        if timeout:
+        if timeout is not None:
             _sec_timeout = timeout / 1000
         else:
-            _sec_timeout = self.default_timeout
+            _sec_timeout = self.default_timeout_seconds
 
         body = await _handle_response(
             self.reactor, _sec_timeout, request, response, start_ms, parser=JsonParser()
@@ -1142,7 +1159,7 @@ class MatrixFederationHttpClient:
         if timeout is not None:
             _sec_timeout = timeout / 1000
         else:
-            _sec_timeout = self.default_timeout
+            _sec_timeout = self.default_timeout_seconds
 
         if parser is None:
             parser = cast(ByteParser[T], JsonParser())
@@ -1218,7 +1235,7 @@ class MatrixFederationHttpClient:
         if timeout is not None:
             _sec_timeout = timeout / 1000
         else:
-            _sec_timeout = self.default_timeout
+            _sec_timeout = self.default_timeout_seconds
 
         body = await _handle_response(
             self.reactor, _sec_timeout, request, response, start_ms, parser=JsonParser()
@@ -1270,7 +1287,7 @@ class MatrixFederationHttpClient:
 
         try:
             d = read_body_with_max_size(response, output_stream, max_size)
-            d.addTimeout(self.default_timeout, self.reactor)
+            d.addTimeout(self.default_timeout_seconds, self.reactor)
             length = await make_deferred_yieldable(d)
         except BodyExceededMaxSize:
             msg = "Requested file is too large > %r bytes" % (max_size,)
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index 0dfc03ce50..b5f4a60fe5 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -40,7 +40,7 @@ from synapse.server import HomeServer
 from synapse.util import Clock
 
 from tests.server import FakeTransport
-from tests.unittest import HomeserverTestCase
+from tests.unittest import HomeserverTestCase, override_config
 
 
 def check_logcontext(context: LoggingContextOrSentinel) -> None:
@@ -640,3 +640,21 @@ class FederationClientTests(HomeserverTestCase):
             self.cl.build_auth_headers(
                 b"", b"GET", b"https://example.com", destination_is=b""
             )
+
+    @override_config(
+        {
+            "federation": {
+                "client_timeout": "180s",
+                "max_long_retry_delay": "100s",
+                "max_short_retry_delay": "7s",
+                "max_long_retries": 20,
+                "max_short_retries": 5,
+            }
+        }
+    )
+    def test_configurable_retry_and_delay_values(self) -> None:
+        self.assertEqual(self.cl.default_timeout_seconds, 180)
+        self.assertEqual(self.cl.max_long_retry_delay_seconds, 100)
+        self.assertEqual(self.cl.max_short_retry_delay_seconds, 7)
+        self.assertEqual(self.cl.max_long_retries, 20)
+        self.assertEqual(self.cl.max_short_retries, 5)
-- 
cgit 1.5.1


From 289ce3b8d946c4c4964fac2ff020a0535dead4f0 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 21 Jun 2023 14:20:46 +0100
Subject: Fix harmless exception in port DB script (#15814)

The port DB script would try and run database background tasks, which
could fail if the data they acted on was in the process of being ported.
These exceptions were non fatal.

Fixes #15789
---
 changelog.d/15814.misc              | 1 +
 synapse/_scripts/synapse_port_db.py | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 changelog.d/15814.misc

(limited to 'synapse')

diff --git a/changelog.d/15814.misc b/changelog.d/15814.misc
new file mode 100644
index 0000000000..8e1107212f
--- /dev/null
+++ b/changelog.d/15814.misc
@@ -0,0 +1 @@
+Fix harmless exceptions being printed when running the port DB script.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 27fee3d9a9..a803ada8ad 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -1369,6 +1369,9 @@ def main() -> None:
         sys.stderr.write("Database must use the 'psycopg2' connector.\n")
         sys.exit(3)
 
+    # Don't run the background tasks that get started by the data stores.
+    hs_config["run_background_tasks_on"] = "some_other_process"
+
     config = HomeServerConfig()
     config.parse_config_dict(hs_config, "", "")
 
-- 
cgit 1.5.1


From e0c39d6bb526b01368393ae5d2173c8e6d39b60f Mon Sep 17 00:00:00 2001
From: Nicolas Werner <89468146+nico-famedly@users.noreply.github.com>
Date: Wed, 21 Jun 2023 15:56:31 +0200
Subject: Fix forgotten rooms missing in initial sync (#15815)

If you leave a room and forget it, then rejoin it, the room would be
missing from the next initial sync.

fixes #13262

Signed-off-by: Nicolas Werner <n.werner@famedly.com>
---
 changelog.d/15815.bugfix                |  1 +
 synapse/storage/databases/main/cache.py | 13 +++++++++++++
 tests/handlers/test_room_member.py      | 21 +++++++++++++++++++++
 3 files changed, 35 insertions(+)
 create mode 100644 changelog.d/15815.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15815.bugfix b/changelog.d/15815.bugfix
new file mode 100644
index 0000000000..e20b5acac1
--- /dev/null
+++ b/changelog.d/15815.bugfix
@@ -0,0 +1 @@
+Fix forgotten rooms missing from initial sync after rejoining them. Contributed by Nico from Famedly.
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 6e1c7d681f..c940f864d1 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -289,6 +289,17 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             )
             self._attempt_to_invalidate_cache("get_rooms_for_user", (state_key,))
 
+            self._attempt_to_invalidate_cache(
+                "did_forget",
+                (
+                    state_key,
+                    room_id,
+                ),
+            )
+            self._attempt_to_invalidate_cache(
+                "get_forgotten_rooms_for_user", (state_key,)
+            )
+
         if relates_to:
             self._attempt_to_invalidate_cache("get_relations_for_event", (relates_to,))
             self._attempt_to_invalidate_cache("get_references_for_event", (relates_to,))
@@ -336,6 +347,8 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             "get_rooms_for_user_with_stream_ordering", None
         )
         self._attempt_to_invalidate_cache("get_rooms_for_user", None)
+        self._attempt_to_invalidate_cache("did_forget", None)
+        self._attempt_to_invalidate_cache("get_forgotten_rooms_for_user", None)
         self._attempt_to_invalidate_cache("get_references_for_event", None)
         self._attempt_to_invalidate_cache("get_thread_summary", None)
         self._attempt_to_invalidate_cache("get_thread_participated", None)
diff --git a/tests/handlers/test_room_member.py b/tests/handlers/test_room_member.py
index a444d822cd..41199ffa29 100644
--- a/tests/handlers/test_room_member.py
+++ b/tests/handlers/test_room_member.py
@@ -333,6 +333,27 @@ class RoomMemberMasterHandlerTestCase(HomeserverTestCase):
             self.get_success(self.store.is_locally_forgotten_room(self.room_id))
         )
 
+    def test_leave_and_unforget(self) -> None:
+        """Tests if rejoining a room unforgets the room, so that it shows up in sync again."""
+        self.helper.join(self.room_id, user=self.bob, tok=self.bob_token)
+
+        # alice is not the last room member that leaves and forgets the room
+        self.helper.leave(self.room_id, user=self.alice, tok=self.alice_token)
+        self.get_success(self.handler.forget(self.alice_ID, self.room_id))
+        self.assertTrue(
+            self.get_success(self.store.did_forget(self.alice, self.room_id))
+        )
+
+        self.helper.join(self.room_id, user=self.alice, tok=self.alice_token)
+        self.assertFalse(
+            self.get_success(self.store.did_forget(self.alice, self.room_id))
+        )
+
+        # the server has not forgotten the room
+        self.assertFalse(
+            self.get_success(self.store.is_locally_forgotten_room(self.room_id))
+        )
+
     @override_config({"forget_rooms_on_leave": True})
     def test_leave_and_auto_forget(self) -> None:
         """Tests the `forget_rooms_on_leave` config option."""
-- 
cgit 1.5.1


From 25c55a9d2267b09da3e18090ee538782d6771a27 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 26 Jun 2023 15:12:20 +0100
Subject: Add login spam checker API (#15838)

---
 changelog.d/15838.feature                          |   1 +
 docs/modules/spam_checker_callbacks.md             |  36 +++++++
 synapse/http/site.py                               |  11 +++
 synapse/module_api/__init__.py                     |   3 +
 .../module_api/callbacks/spamchecker_callbacks.py  |  80 +++++++++++++++
 synapse/rest/client/login.py                       |  52 +++++++++-
 tests/rest/client/test_login.py                    | 108 ++++++++++++++++++++-
 7 files changed, 285 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15838.feature

(limited to 'synapse')

diff --git a/changelog.d/15838.feature b/changelog.d/15838.feature
new file mode 100644
index 0000000000..04c77bd723
--- /dev/null
+++ b/changelog.d/15838.feature
@@ -0,0 +1 @@
+Add spam checker module API for logins.
diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md
index 1a0c6ec954..ffdfe6082e 100644
--- a/docs/modules/spam_checker_callbacks.md
+++ b/docs/modules/spam_checker_callbacks.md
@@ -348,6 +348,42 @@ callback returns `False`, Synapse falls through to the next one. The value of th
 callback that does not return `False` will be used. If this happens, Synapse will not call
 any of the subsequent implementations of this callback.
 
+
+### `check_login_for_spam`
+
+_First introduced in Synapse v1.87.0_
+
+```python
+async def check_login_for_spam(
+    user_id: str,
+    device_id: Optional[str],
+    initial_display_name: Optional[str],
+    request_info: Collection[Tuple[Optional[str], str]],
+    auth_provider_id: Optional[str] = None,
+) -> Union["synapse.module_api.NOT_SPAM", "synapse.module_api.errors.Codes"]
+```
+
+Called when a user logs in.
+
+The arguments passed to this callback are:
+
+* `user_id`: The user ID the user is logging in with
+* `device_id`: The device ID the user is re-logging into.
+* `initial_display_name`: The device display name, if any.
+* `request_info`: A collection of tuples, which first item is a user agent, and which
+  second item is an IP address. These user agents and IP addresses are the ones that were
+  used during the login process.
+* `auth_provider_id`: The identifier of the SSO authentication provider, if any.
+
+If multiple modules implement this callback, they will be considered in order. If a
+callback returns `synapse.module_api.NOT_SPAM`, Synapse falls through to the next one.
+The value of the first callback that does not return `synapse.module_api.NOT_SPAM` will
+be used. If this happens, Synapse will not call any of the subsequent implementations of
+this callback.
+
+*Note:* This will not be called when a user registers.
+
+
 ## Example
 
 The example below is a module that implements the spam checker callback
diff --git a/synapse/http/site.py b/synapse/http/site.py
index c530966ef3..5b5a7c1e59 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -521,6 +521,11 @@ class SynapseRequest(Request):
         else:
             return self.getClientAddress().host
 
+    def request_info(self) -> "RequestInfo":
+        h = self.getHeader(b"User-Agent")
+        user_agent = h.decode("ascii", "replace") if h else None
+        return RequestInfo(user_agent=user_agent, ip=self.get_client_ip_if_available())
+
 
 class XForwardedForRequest(SynapseRequest):
     """Request object which honours proxy headers
@@ -661,3 +666,9 @@ class SynapseSite(Site):
 
     def log(self, request: SynapseRequest) -> None:
         pass
+
+
+@attr.s(auto_attribs=True, frozen=True, slots=True)
+class RequestInfo:
+    user_agent: Optional[str]
+    ip: str
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 84b2aef620..95f7800111 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -80,6 +80,7 @@ from synapse.module_api.callbacks.account_validity_callbacks import (
 )
 from synapse.module_api.callbacks.spamchecker_callbacks import (
     CHECK_EVENT_FOR_SPAM_CALLBACK,
+    CHECK_LOGIN_FOR_SPAM_CALLBACK,
     CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK,
     CHECK_REGISTRATION_FOR_SPAM_CALLBACK,
     CHECK_USERNAME_FOR_SPAM_CALLBACK,
@@ -302,6 +303,7 @@ class ModuleApi:
             CHECK_REGISTRATION_FOR_SPAM_CALLBACK
         ] = None,
         check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None,
+        check_login_for_spam: Optional[CHECK_LOGIN_FOR_SPAM_CALLBACK] = None,
     ) -> None:
         """Registers callbacks for spam checking capabilities.
 
@@ -319,6 +321,7 @@ class ModuleApi:
             check_username_for_spam=check_username_for_spam,
             check_registration_for_spam=check_registration_for_spam,
             check_media_file_for_spam=check_media_file_for_spam,
+            check_login_for_spam=check_login_for_spam,
         )
 
     def register_account_validity_callbacks(
diff --git a/synapse/module_api/callbacks/spamchecker_callbacks.py b/synapse/module_api/callbacks/spamchecker_callbacks.py
index 4456d1b81e..7cee442145 100644
--- a/synapse/module_api/callbacks/spamchecker_callbacks.py
+++ b/synapse/module_api/callbacks/spamchecker_callbacks.py
@@ -196,6 +196,26 @@ CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[
         ]
     ],
 ]
+CHECK_LOGIN_FOR_SPAM_CALLBACK = Callable[
+    [
+        str,
+        Optional[str],
+        Optional[str],
+        Collection[Tuple[Optional[str], str]],
+        Optional[str],
+    ],
+    Awaitable[
+        Union[
+            Literal["NOT_SPAM"],
+            Codes,
+            # Highly experimental, not officially part of the spamchecker API, may
+            # disappear without warning depending on the results of ongoing
+            # experiments.
+            # Use this to return additional information as part of an error.
+            Tuple[Codes, JsonDict],
+        ]
+    ],
+]
 
 
 def load_legacy_spam_checkers(hs: "synapse.server.HomeServer") -> None:
@@ -315,6 +335,7 @@ class SpamCheckerModuleApiCallbacks:
         self._check_media_file_for_spam_callbacks: List[
             CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK
         ] = []
+        self._check_login_for_spam_callbacks: List[CHECK_LOGIN_FOR_SPAM_CALLBACK] = []
 
     def register_callbacks(
         self,
@@ -335,6 +356,7 @@ class SpamCheckerModuleApiCallbacks:
             CHECK_REGISTRATION_FOR_SPAM_CALLBACK
         ] = None,
         check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None,
+        check_login_for_spam: Optional[CHECK_LOGIN_FOR_SPAM_CALLBACK] = None,
     ) -> None:
         """Register callbacks from module for each hook."""
         if check_event_for_spam is not None:
@@ -378,6 +400,9 @@ class SpamCheckerModuleApiCallbacks:
         if check_media_file_for_spam is not None:
             self._check_media_file_for_spam_callbacks.append(check_media_file_for_spam)
 
+        if check_login_for_spam is not None:
+            self._check_login_for_spam_callbacks.append(check_login_for_spam)
+
     @trace
     async def check_event_for_spam(
         self, event: "synapse.events.EventBase"
@@ -819,3 +844,58 @@ class SpamCheckerModuleApiCallbacks:
                     return synapse.api.errors.Codes.FORBIDDEN, {}
 
         return self.NOT_SPAM
+
+    async def check_login_for_spam(
+        self,
+        user_id: str,
+        device_id: Optional[str],
+        initial_display_name: Optional[str],
+        request_info: Collection[Tuple[Optional[str], str]],
+        auth_provider_id: Optional[str] = None,
+    ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
+        """Checks if we should allow the given registration request.
+
+        Args:
+            user_id: The request user ID
+            request_info: List of tuples of user agent and IP that
+                were used during the registration process.
+            auth_provider_id: The SSO IdP the user used, e.g "oidc", "saml",
+                "cas". If any. Note this does not include users registered
+                via a password provider.
+
+        Returns:
+            Enum for how the request should be handled
+        """
+
+        for callback in self._check_login_for_spam_callbacks:
+            with Measure(
+                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
+            ):
+                res = await delay_cancellation(
+                    callback(
+                        user_id,
+                        device_id,
+                        initial_display_name,
+                        request_info,
+                        auth_provider_id,
+                    )
+                )
+                # Normalize return values to `Codes` or `"NOT_SPAM"`.
+                if res is self.NOT_SPAM:
+                    continue
+                elif isinstance(res, synapse.api.errors.Codes):
+                    return res, {}
+                elif (
+                    isinstance(res, tuple)
+                    and len(res) == 2
+                    and isinstance(res[0], synapse.api.errors.Codes)
+                    and isinstance(res[1], dict)
+                ):
+                    return res
+                else:
+                    logger.warning(
+                        "Module returned invalid value, rejecting login as spam"
+                    )
+                    return synapse.api.errors.Codes.FORBIDDEN, {}
+
+        return self.NOT_SPAM
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index 6493b00bb8..d724c68920 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -50,7 +50,7 @@ from synapse.http.servlet import (
     parse_json_object_from_request,
     parse_string,
 )
-from synapse.http.site import SynapseRequest
+from synapse.http.site import RequestInfo, SynapseRequest
 from synapse.rest.client._base import client_patterns
 from synapse.rest.well_known import WellKnownBuilder
 from synapse.types import JsonDict, UserID
@@ -114,6 +114,7 @@ class LoginRestServlet(RestServlet):
         self.auth_handler = self.hs.get_auth_handler()
         self.registration_handler = hs.get_registration_handler()
         self._sso_handler = hs.get_sso_handler()
+        self._spam_checker = hs.get_module_api_callbacks().spam_checker
 
         self._well_known_builder = WellKnownBuilder(hs)
         self._address_ratelimiter = Ratelimiter(
@@ -197,6 +198,8 @@ class LoginRestServlet(RestServlet):
             self._refresh_tokens_enabled and client_requested_refresh_token
         )
 
+        request_info = request.request_info()
+
         try:
             if login_submission["type"] == LoginRestServlet.APPSERVICE_TYPE:
                 requester = await self.auth.get_user_by_req(request)
@@ -216,6 +219,7 @@ class LoginRestServlet(RestServlet):
                     login_submission,
                     appservice,
                     should_issue_refresh_token=should_issue_refresh_token,
+                    request_info=request_info,
                 )
             elif (
                 self.jwt_enabled
@@ -227,6 +231,7 @@ class LoginRestServlet(RestServlet):
                 result = await self._do_jwt_login(
                     login_submission,
                     should_issue_refresh_token=should_issue_refresh_token,
+                    request_info=request_info,
                 )
             elif login_submission["type"] == LoginRestServlet.TOKEN_TYPE:
                 await self._address_ratelimiter.ratelimit(
@@ -235,6 +240,7 @@ class LoginRestServlet(RestServlet):
                 result = await self._do_token_login(
                     login_submission,
                     should_issue_refresh_token=should_issue_refresh_token,
+                    request_info=request_info,
                 )
             else:
                 await self._address_ratelimiter.ratelimit(
@@ -243,6 +249,7 @@ class LoginRestServlet(RestServlet):
                 result = await self._do_other_login(
                     login_submission,
                     should_issue_refresh_token=should_issue_refresh_token,
+                    request_info=request_info,
                 )
         except KeyError:
             raise SynapseError(400, "Missing JSON keys.")
@@ -265,6 +272,8 @@ class LoginRestServlet(RestServlet):
         login_submission: JsonDict,
         appservice: ApplicationService,
         should_issue_refresh_token: bool = False,
+        *,
+        request_info: RequestInfo,
     ) -> LoginResponse:
         identifier = login_submission.get("identifier")
         logger.info("Got appservice login request with identifier: %r", identifier)
@@ -300,10 +309,15 @@ class LoginRestServlet(RestServlet):
             # The user represented by an appservice's configured sender_localpart
             # is not actually created in Synapse.
             should_check_deactivated=qualified_user_id != appservice.sender,
+            request_info=request_info,
         )
 
     async def _do_other_login(
-        self, login_submission: JsonDict, should_issue_refresh_token: bool = False
+        self,
+        login_submission: JsonDict,
+        should_issue_refresh_token: bool = False,
+        *,
+        request_info: RequestInfo,
     ) -> LoginResponse:
         """Handle non-token/saml/jwt logins
 
@@ -333,6 +347,7 @@ class LoginRestServlet(RestServlet):
             login_submission,
             callback,
             should_issue_refresh_token=should_issue_refresh_token,
+            request_info=request_info,
         )
         return result
 
@@ -347,6 +362,8 @@ class LoginRestServlet(RestServlet):
         should_issue_refresh_token: bool = False,
         auth_provider_session_id: Optional[str] = None,
         should_check_deactivated: bool = True,
+        *,
+        request_info: RequestInfo,
     ) -> LoginResponse:
         """Called when we've successfully authed the user and now need to
         actually login them in (e.g. create devices). This gets called on
@@ -371,6 +388,7 @@ class LoginRestServlet(RestServlet):
 
                 This exists purely for appservice's configured sender_localpart
                 which doesn't have an associated user in the database.
+            request_info: The user agent/IP address of the user.
 
         Returns:
             Dictionary of account information after successful login.
@@ -417,6 +435,22 @@ class LoginRestServlet(RestServlet):
                 )
 
         initial_display_name = login_submission.get("initial_device_display_name")
+        spam_check = await self._spam_checker.check_login_for_spam(
+            user_id,
+            device_id=device_id,
+            initial_display_name=initial_display_name,
+            request_info=[(request_info.user_agent, request_info.ip)],
+            auth_provider_id=auth_provider_id,
+        )
+        if spam_check != self._spam_checker.NOT_SPAM:
+            logger.info("Blocking login due to spam checker")
+            raise SynapseError(
+                403,
+                msg="Login was blocked by the server",
+                errcode=spam_check[0],
+                additional_fields=spam_check[1],
+            )
+
         (
             device_id,
             access_token,
@@ -451,7 +485,11 @@ class LoginRestServlet(RestServlet):
         return result
 
     async def _do_token_login(
-        self, login_submission: JsonDict, should_issue_refresh_token: bool = False
+        self,
+        login_submission: JsonDict,
+        should_issue_refresh_token: bool = False,
+        *,
+        request_info: RequestInfo,
     ) -> LoginResponse:
         """
         Handle token login.
@@ -474,10 +512,15 @@ class LoginRestServlet(RestServlet):
             auth_provider_id=res.auth_provider_id,
             should_issue_refresh_token=should_issue_refresh_token,
             auth_provider_session_id=res.auth_provider_session_id,
+            request_info=request_info,
         )
 
     async def _do_jwt_login(
-        self, login_submission: JsonDict, should_issue_refresh_token: bool = False
+        self,
+        login_submission: JsonDict,
+        should_issue_refresh_token: bool = False,
+        *,
+        request_info: RequestInfo,
     ) -> LoginResponse:
         """
         Handle the custom JWT login.
@@ -496,6 +539,7 @@ class LoginRestServlet(RestServlet):
             login_submission,
             create_non_existent_users=True,
             should_issue_refresh_token=should_issue_refresh_token,
+            request_info=request_info,
         )
 
 
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index f3c3bc69a9..ffbc13bb8d 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 import time
 import urllib.parse
-from typing import Any, Dict, List, Optional
+from typing import Any, Collection, Dict, List, Optional, Tuple, Union
 from unittest.mock import Mock
 from urllib.parse import urlencode
 
 import pymacaroons
+from typing_extensions import Literal
 
 from twisted.test.proto_helpers import MemoryReactor
 from twisted.web.resource import Resource
@@ -26,11 +27,12 @@ import synapse.rest.admin
 from synapse.api.constants import ApprovalNoticeMedium, LoginType
 from synapse.api.errors import Codes
 from synapse.appservice import ApplicationService
+from synapse.module_api import ModuleApi
 from synapse.rest.client import devices, login, logout, register
 from synapse.rest.client.account import WhoamiRestServlet
 from synapse.rest.synapse.client import build_synapse_client_resource_tree
 from synapse.server import HomeServer
-from synapse.types import create_requester
+from synapse.types import JsonDict, create_requester
 from synapse.util import Clock
 
 from tests import unittest
@@ -88,6 +90,56 @@ ADDITIONAL_LOGIN_FLOWS = [
 ]
 
 
+class TestSpamChecker:
+    def __init__(self, config: None, api: ModuleApi):
+        api.register_spam_checker_callbacks(
+            check_login_for_spam=self.check_login_for_spam,
+        )
+
+    @staticmethod
+    def parse_config(config: JsonDict) -> None:
+        return None
+
+    async def check_login_for_spam(
+        self,
+        user_id: str,
+        device_id: Optional[str],
+        initial_display_name: Optional[str],
+        request_info: Collection[Tuple[Optional[str], str]],
+        auth_provider_id: Optional[str] = None,
+    ) -> Union[
+        Literal["NOT_SPAM"],
+        Tuple["synapse.module_api.errors.Codes", JsonDict],
+    ]:
+        return "NOT_SPAM"
+
+
+class DenyAllSpamChecker:
+    def __init__(self, config: None, api: ModuleApi):
+        api.register_spam_checker_callbacks(
+            check_login_for_spam=self.check_login_for_spam,
+        )
+
+    @staticmethod
+    def parse_config(config: JsonDict) -> None:
+        return None
+
+    async def check_login_for_spam(
+        self,
+        user_id: str,
+        device_id: Optional[str],
+        initial_display_name: Optional[str],
+        request_info: Collection[Tuple[Optional[str], str]],
+        auth_provider_id: Optional[str] = None,
+    ) -> Union[
+        Literal["NOT_SPAM"],
+        Tuple["synapse.module_api.errors.Codes", JsonDict],
+    ]:
+        # Return an odd set of values to ensure that they get correctly passed
+        # to the client.
+        return Codes.LIMIT_EXCEEDED, {"extra": "value"}
+
+
 class LoginRestServletTestCase(unittest.HomeserverTestCase):
     servlets = [
         synapse.rest.admin.register_servlets_for_client_rest_resource,
@@ -469,6 +521,58 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
             ],
         )
 
+    @override_config(
+        {
+            "modules": [
+                {
+                    "module": TestSpamChecker.__module__
+                    + "."
+                    + TestSpamChecker.__qualname__
+                }
+            ]
+        }
+    )
+    def test_spam_checker_allow(self) -> None:
+        """Check that that adding a spam checker doesn't break login."""
+        self.register_user("kermit", "monkey")
+
+        body = {"type": "m.login.password", "user": "kermit", "password": "monkey"}
+
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/login",
+            body,
+        )
+        self.assertEqual(channel.code, 200, channel.result)
+
+    @override_config(
+        {
+            "modules": [
+                {
+                    "module": DenyAllSpamChecker.__module__
+                    + "."
+                    + DenyAllSpamChecker.__qualname__
+                }
+            ]
+        }
+    )
+    def test_spam_checker_deny(self) -> None:
+        """Check that login"""
+
+        self.register_user("kermit", "monkey")
+
+        body = {"type": "m.login.password", "user": "kermit", "password": "monkey"}
+
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/login",
+            body,
+        )
+        self.assertEqual(channel.code, 403, channel.result)
+        self.assertDictContainsSubset(
+            {"errcode": Codes.LIMIT_EXCEEDED, "extra": "value"}, channel.json_body
+        )
+
 
 @skip_unless(has_saml2 and HAS_OIDC, "Requires SAML2 and OIDC")
 class MultiSSOTestCase(unittest.HomeserverTestCase):
-- 
cgit 1.5.1


From 78cfa55dad911e667b5a9b613e232eb72410382f Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 27 Jun 2023 01:41:42 -0700
Subject: Fix sqlite `user_filters` upgrade (#15817)

---
 changelog.d/15817.bugfix                           |  1 +
 .../78/02_validate_and_update_user_filters.py      |  4 +-
 .../78/03_remove_unused_indexes_user_filters.py    | 65 ++++++++++++++++++++++
 .../78/04_add_full_user_id_index_user_filters.py   | 25 +++++++++
 4 files changed, 92 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/15817.bugfix
 create mode 100644 synapse/storage/schema/main/delta/78/03_remove_unused_indexes_user_filters.py
 create mode 100644 synapse/storage/schema/main/delta/78/04_add_full_user_id_index_user_filters.py

(limited to 'synapse')

diff --git a/changelog.d/15817.bugfix b/changelog.d/15817.bugfix
new file mode 100644
index 0000000000..2b025730ad
--- /dev/null
+++ b/changelog.d/15817.bugfix
@@ -0,0 +1 @@
+Fix sqlite `user_filters` upgrade introduced in v1.86.0.
diff --git a/synapse/storage/schema/main/delta/78/02_validate_and_update_user_filters.py b/synapse/storage/schema/main/delta/78/02_validate_and_update_user_filters.py
index 8ef63335e7..e148ed26f2 100644
--- a/synapse/storage/schema/main/delta/78/02_validate_and_update_user_filters.py
+++ b/synapse/storage/schema/main/delta/78/02_validate_and_update_user_filters.py
@@ -61,9 +61,7 @@ def run_upgrade(
             full_user_id text NOT NULL,
             user_id text NOT NULL,
             filter_id bigint NOT NULL,
-            filter_json bytea NOT NULL,
-            UNIQUE (full_user_id),
-            UNIQUE (user_id)
+            filter_json bytea NOT NULL
         )
         """
         cur.execute(create_sql)
diff --git a/synapse/storage/schema/main/delta/78/03_remove_unused_indexes_user_filters.py b/synapse/storage/schema/main/delta/78/03_remove_unused_indexes_user_filters.py
new file mode 100644
index 0000000000..f5ba1c3fd4
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/03_remove_unused_indexes_user_filters.py
@@ -0,0 +1,65 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, Sqlite3Engine
+
+
+def run_update(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+    config: HomeServerConfig,
+) -> None:
+    """
+    Fix to drop unused indexes caused by incorrectly adding UNIQUE constraint to
+    columns `user_id` and `full_user_id` of table `user_filters` in previous migration.
+    """
+
+    if isinstance(database_engine, Sqlite3Engine):
+        cur.execute("DROP TABLE IF EXISTS temp_user_filters")
+        create_sql = """
+        CREATE TABLE temp_user_filters (
+            full_user_id text NOT NULL,
+            user_id text NOT NULL,
+            filter_id bigint NOT NULL,
+            filter_json bytea NOT NULL
+        )
+        """
+        cur.execute(create_sql)
+
+        copy_sql = """
+        INSERT INTO temp_user_filters (
+            user_id,
+            filter_id,
+            filter_json,
+            full_user_id)
+            SELECT user_id, filter_id, filter_json, full_user_id FROM user_filters
+        """
+        cur.execute(copy_sql)
+
+        drop_sql = """
+        DROP TABLE user_filters
+        """
+        cur.execute(drop_sql)
+
+        rename_sql = """
+        ALTER TABLE temp_user_filters RENAME to user_filters
+        """
+        cur.execute(rename_sql)
+
+        index_sql = """
+        CREATE UNIQUE INDEX IF NOT EXISTS user_filters_unique ON
+        user_filters (user_id, filter_id)
+        """
+        cur.execute(index_sql)
diff --git a/synapse/storage/schema/main/delta/78/04_add_full_user_id_index_user_filters.py b/synapse/storage/schema/main/delta/78/04_add_full_user_id_index_user_filters.py
new file mode 100644
index 0000000000..97fecc2bd9
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/04_add_full_user_id_index_user_filters.py
@@ -0,0 +1,25 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, Sqlite3Engine
+
+
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
+    if isinstance(database_engine, Sqlite3Engine):
+        idx_sql = """
+        CREATE UNIQUE INDEX IF NOT EXISTS user_filters_full_user_id_unique ON
+        user_filters (full_user_id, filter_id)
+        """
+        cur.execute(idx_sql)
-- 
cgit 1.5.1


From 53aa26eddc772a6719bf0da64b0684c333294d05 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Mon, 3 Jul 2023 10:38:57 +0000
Subject: Add a timeout that aborts any Postgres statement taking more than 1
 hour. (#15853)

* Add a timeout to Postgres statements

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/15853.misc              |  1 +
 synapse/storage/engines/postgres.py | 13 +++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 changelog.d/15853.misc

(limited to 'synapse')

diff --git a/changelog.d/15853.misc b/changelog.d/15853.misc
new file mode 100644
index 0000000000..3e9516b1ad
--- /dev/null
+++ b/changelog.d/15853.misc
@@ -0,0 +1 @@
+Add a timeout that aborts any Postgres statement taking more than 1 hour.
\ No newline at end of file
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index b350f57ccb..05a72dc554 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -45,6 +45,15 @@ class PostgresEngine(
 
         psycopg2.extensions.register_adapter(bytes, _disable_bytes_adapter)
         self.synchronous_commit: bool = database_config.get("synchronous_commit", True)
+        # Set the statement timeout to 1 hour by default.
+        # Any query taking more than 1 hour should probably be considered a bug;
+        # most of the time this is a sign that work needs to be split up or that
+        # some degenerate query plan has been created and the client has probably
+        # timed out/walked off anyway.
+        # This is in milliseconds.
+        self.statement_timeout: Optional[int] = database_config.get(
+            "statement_timeout", 60 * 60 * 1000
+        )
         self._version: Optional[int] = None  # unknown as yet
 
         self.isolation_level_map: Mapping[int, int] = {
@@ -157,6 +166,10 @@ class PostgresEngine(
         if not self.synchronous_commit:
             cursor.execute("SET synchronous_commit TO OFF")
 
+        # Abort really long-running statements and turn them into errors.
+        if self.statement_timeout is not None:
+            cursor.execute("SET statement_timeout TO ?", (self.statement_timeout,))
+
         cursor.close()
         db_conn.commit()
 
-- 
cgit 1.5.1


From 07d7cbfe69c239a7ffe5668c1166799370eef0d6 Mon Sep 17 00:00:00 2001
From: pacien <pacien@users.noreply.github.com>
Date: Mon, 3 Jul 2023 16:39:38 +0200
Subject: devices: use combined ANY clause for faster cleanup (#15861)

Old device entries for the same user were being removed in individual
SQL commands, making the batch take way longer than necessary.

This combines the commands into a single one with a IN/ANY clause.

Example of log entry before the change, regularly observed with
"log_min_duration_statement = 10000" in PostgreSQL's config:

    LOG:  duration: 42538.282 ms  statement:
    DELETE FROM device_lists_stream
    WHERE user_id = '@someone' AND device_id = 'someid1'
    AND stream_id < 123456789
    ;
    DELETE FROM device_lists_stream
    WHERE user_id = '@someone' AND device_id = 'someid2'
    AND stream_id < 123456789
    ;
    [repeated for each device ID of that user, potentially a lot...]

With the patch applied on my instance for the past couple of days, I
no longer notice overly long statements of that particular kind.

Signed-off-by: pacien <pacien.trangirard@pacien.net>
---
 changelog.d/15861.misc                    |  1 +
 synapse/storage/databases/main/devices.py | 14 +++++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/15861.misc

(limited to 'synapse')

diff --git a/changelog.d/15861.misc b/changelog.d/15861.misc
new file mode 100644
index 0000000000..6f320eab81
--- /dev/null
+++ b/changelog.d/15861.misc
@@ -0,0 +1 @@
+Optimised cleanup of old entries in device_lists_stream.
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index f677d048aa..d9df437e51 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1950,12 +1950,16 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
 
         # Delete older entries in the table, as we really only care about
         # when the latest change happened.
-        txn.execute_batch(
-            """
+        cleanup_obsolete_stmt = """
             DELETE FROM device_lists_stream
-            WHERE user_id = ? AND device_id = ? AND stream_id < ?
-            """,
-            [(user_id, device_id, min_stream_id) for device_id in device_ids],
+            WHERE user_id = ? AND stream_id < ? AND %s
+        """
+        device_ids_clause, device_ids_args = make_in_list_sql_clause(
+            txn.database_engine, "device_id", device_ids
+        )
+        txn.execute(
+            cleanup_obsolete_stmt % (device_ids_clause,),
+            [user_id, min_stream_id] + device_ids_args,
         )
 
         self.db_pool.simple_insert_many_txn(
-- 
cgit 1.5.1


From c8e81898b66086ee8bdfd18bd24452c26033e480 Mon Sep 17 00:00:00 2001
From: Michael Weimann <michaelw@element.io>
Date: Wed, 5 Jul 2023 00:03:20 +0200
Subject: Add not_user_type param to the list accounts admin API (#15844)

Signed-off-by: Michael Weimann <michaelw@element.io>
---
 changelog.d/15844.feature                  |  1 +
 docs/admin_api/user_admin_api.md           |  3 ++
 synapse/rest/admin/users.py                |  9 ++++
 synapse/storage/databases/main/__init__.py | 37 ++++++++++++++
 tests/rest/admin/test_user.py              | 78 ++++++++++++++++++++++++++++++
 5 files changed, 128 insertions(+)
 create mode 100644 changelog.d/15844.feature

(limited to 'synapse')

diff --git a/changelog.d/15844.feature b/changelog.d/15844.feature
new file mode 100644
index 0000000000..c220055d41
--- /dev/null
+++ b/changelog.d/15844.feature
@@ -0,0 +1 @@
+Add `not_user_type` param to the list accounts admin API.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 229942b311..f17e60b1cb 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -242,6 +242,9 @@ The following parameters should be set in the URL:
 
 - `dir` - Direction of media order. Either `f` for forwards or `b` for backwards.
   Setting this value to `b` will reverse the above sort order. Defaults to `f`.
+- `not_user_type` - Exclude certain user types, such as bot users, from the request.
+   Can be provided multiple times. Possible values are `bot`, `support` or "empty string".
+   "empty string" here means to exclude users without a type.
 
 Caution. The database only has indexes on the columns `name` and `creation_ts`.
 This means that if a different sort order is used (`is_guest`, `admin`,
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 407fe9c804..e0257daa75 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -28,6 +28,7 @@ from synapse.http.servlet import (
     parse_integer,
     parse_json_object_from_request,
     parse_string,
+    parse_strings_from_args,
 )
 from synapse.http.site import SynapseRequest
 from synapse.rest.admin._base import (
@@ -64,6 +65,9 @@ class UsersRestServletV2(RestServlet):
     The parameter `guests` can be used to exclude guest users.
     The parameter `deactivated` can be used to include deactivated users.
     The parameter `order_by` can be used to order the result.
+    The parameter `not_user_type` can be used to exclude certain user types.
+    Possible values are `bot`, `support` or "empty string".
+    "empty string" here means to exclude users without a type.
     """
 
     def __init__(self, hs: "HomeServer"):
@@ -131,6 +135,10 @@ class UsersRestServletV2(RestServlet):
 
         direction = parse_enum(request, "dir", Direction, default=Direction.FORWARDS)
 
+        # twisted.web.server.Request.args is incorrectly defined as Optional[Any]
+        args: Dict[bytes, List[bytes]] = request.args  # type: ignore
+        not_user_types = parse_strings_from_args(args, "not_user_type")
+
         users, total = await self.store.get_users_paginate(
             start,
             limit,
@@ -141,6 +149,7 @@ class UsersRestServletV2(RestServlet):
             order_by,
             direction,
             approved,
+            not_user_types,
         )
 
         # If support for MSC3866 is not enabled, don't show the approval flag.
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 3a10c265c9..80c0304b19 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -19,6 +19,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, cast
 
 from synapse.api.constants import Direction
 from synapse.config.homeserver import HomeServerConfig
+from synapse.storage._base import make_in_list_sql_clause
 from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
@@ -170,6 +171,7 @@ class DataStore(
         order_by: str = UserSortOrder.NAME.value,
         direction: Direction = Direction.FORWARDS,
         approved: bool = True,
+        not_user_types: Optional[List[str]] = None,
     ) -> Tuple[List[JsonDict], int]:
         """Function to retrieve a paginated list of users from
         users list. This will return a json list of users and the
@@ -185,6 +187,7 @@ class DataStore(
             order_by: the sort order of the returned list
             direction: sort ascending or descending
             approved: whether to include approved users
+            not_user_types: list of user types to exclude
         Returns:
             A tuple of a list of mappings from user to information and a count of total users.
         """
@@ -222,6 +225,40 @@ class DataStore(
                 # be already existing users that we consider as already approved.
                 filters.append("approved IS FALSE")
 
+            if not_user_types:
+                if len(not_user_types) == 1 and not_user_types[0] == "":
+                    # Only exclude NULL type users
+                    filters.append("user_type IS NOT NULL")
+                else:
+                    not_user_types_has_empty = False
+                    not_user_types_without_empty = []
+
+                    for not_user_type in not_user_types:
+                        if not_user_type == "":
+                            not_user_types_has_empty = True
+                        else:
+                            not_user_types_without_empty.append(not_user_type)
+
+                    not_user_type_clause, not_user_type_args = make_in_list_sql_clause(
+                        self.database_engine,
+                        "u.user_type",
+                        not_user_types_without_empty,
+                    )
+
+                    if not_user_types_has_empty:
+                        # NULL values should be excluded.
+                        # They evaluate to false > nothing to do here.
+                        filters.append("NOT %s" % (not_user_type_clause))
+                    else:
+                        # NULL values should *not* be excluded.
+                        # Add a special predicate to the query.
+                        filters.append(
+                            "(NOT %s OR %s IS NULL)"
+                            % (not_user_type_clause, "u.user_type")
+                        )
+
+                    args.extend(not_user_type_args)
+
             where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
 
             sql_base = f"""
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 434bb56d44..a17a1bb1d8 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -933,6 +933,84 @@ class UsersListTestCase(unittest.HomeserverTestCase):
         self.assertEqual(1, len(non_admin_user_ids), non_admin_user_ids)
         self.assertEqual(not_approved_user, non_admin_user_ids[0])
 
+    def test_filter_not_user_types(self) -> None:
+        """Tests that the endpoint handles the not_user_types param"""
+
+        regular_user_id = self.register_user("normalo", "secret")
+
+        bot_user_id = self.register_user("robo", "secret")
+        self.make_request(
+            "PUT",
+            "/_synapse/admin/v2/users/" + urllib.parse.quote(bot_user_id),
+            {"user_type": UserTypes.BOT},
+            access_token=self.admin_user_tok,
+        )
+
+        support_user_id = self.register_user("foo", "secret")
+        self.make_request(
+            "PUT",
+            "/_synapse/admin/v2/users/" + urllib.parse.quote(support_user_id),
+            {"user_type": UserTypes.SUPPORT},
+            access_token=self.admin_user_tok,
+        )
+
+        def test_user_type(
+            expected_user_ids: List[str], not_user_types: Optional[List[str]] = None
+        ) -> None:
+            """Runs a test for the not_user_types param
+            Args:
+                expected_user_ids: Ids of the users that are expected to be returned
+                not_user_types: List of values for the not_user_types param
+            """
+
+            user_type_query = ""
+
+            if not_user_types is not None:
+                user_type_query = "&".join(
+                    [f"not_user_type={u}" for u in not_user_types]
+                )
+
+            test_url = f"{self.url}?{user_type_query}"
+            channel = self.make_request(
+                "GET",
+                test_url,
+                access_token=self.admin_user_tok,
+            )
+
+            self.assertEqual(200, channel.code)
+            self.assertEqual(channel.json_body["total"], len(expected_user_ids))
+            self.assertEqual(
+                expected_user_ids,
+                [u["name"] for u in channel.json_body["users"]],
+            )
+
+        # Request without user_types →  all users expected
+        test_user_type([self.admin_user, support_user_id, regular_user_id, bot_user_id])
+
+        # Request and exclude bot users
+        test_user_type(
+            [self.admin_user, support_user_id, regular_user_id],
+            not_user_types=[UserTypes.BOT],
+        )
+
+        # Request and exclude bot and support users
+        test_user_type(
+            [self.admin_user, regular_user_id],
+            not_user_types=[UserTypes.BOT, UserTypes.SUPPORT],
+        )
+
+        # Request and exclude empty user types →  only expected the bot and support user
+        test_user_type([support_user_id, bot_user_id], not_user_types=[""])
+
+        # Request and exclude empty user types and bots →  only expected the support user
+        test_user_type([support_user_id], not_user_types=["", UserTypes.BOT])
+
+        # Request and exclude a custom type (neither service nor bot) →  expect all users
+        test_user_type(
+            [self.admin_user, support_user_id, regular_user_id, bot_user_id],
+            not_user_types=["custom"],
+        )
+
     def test_erasure_status(self) -> None:
         # Create a new user.
         user_id = self.register_user("eraseme", "eraseme")
-- 
cgit 1.5.1


From c303eca8cc31e5eb9edb10019f02c3a9e39a47ab Mon Sep 17 00:00:00 2001
From: an0nfunc <40771419+an0nfunc@users.noreply.github.com>
Date: Wed, 5 Jul 2023 10:52:12 +0200
Subject: use Image.LANCZOS instead of Image.ANTIALIAS for thumbnail resize
 (#15876)

Image.ANTIALIAS is not defined in current pillow releases. Since ANTIALIAS was just using LANCZOS anyways, this is just a cosmetic change, but makes synapse work with most recent pillow releases.

Signed-off-by: Giovanni Harting <539@idlegandalf.com>
---
 changelog.d/15876.bugfix     | 1 +
 synapse/media/thumbnailer.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15876.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15876.bugfix b/changelog.d/15876.bugfix
new file mode 100644
index 0000000000..9dbae04c4f
--- /dev/null
+++ b/changelog.d/15876.bugfix
@@ -0,0 +1 @@
+Correctly resize thumbnails with pillow version >=10.
diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py
index f909a4fb9a..73d2272f05 100644
--- a/synapse/media/thumbnailer.py
+++ b/synapse/media/thumbnailer.py
@@ -131,7 +131,7 @@ class Thumbnailer:
             else:
                 with self.image:
                     self.image = self.image.convert("RGB")
-        return self.image.resize((width, height), Image.ANTIALIAS)
+        return self.image.resize((width, height), Image.LANCZOS)
 
     def scale(self, width: int, height: int, output_type: str) -> BytesIO:
         """Rescales the image to the given dimensions.
-- 
cgit 1.5.1


From 95a96b21eb98c638ae36814ec74ba468226e373c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 5 Jul 2023 10:43:19 +0100
Subject: Add foreign key constraint to `event_forward_extremities`. (#15751)

---
 changelog.d/15751.misc                             |   1 +
 synapse/_scripts/synapse_port_db.py                |   2 +
 synapse/storage/background_updates.py              | 335 ++++++++++++++++++++-
 synapse/storage/database.py                        |  37 +++
 synapse/storage/databases/main/event_federation.py |  10 +
 synapse/storage/databases/main/events.py           |  12 +-
 .../delta/78/03event_extremities_constraints.py    |  51 ++++
 tests/storage/test_background_update.py            | 227 +++++++++++++-
 tests/storage/test_event_federation.py             |  35 ++-
 9 files changed, 699 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/15751.misc
 create mode 100644 synapse/storage/schema/main/delta/78/03event_extremities_constraints.py

(limited to 'synapse')

diff --git a/changelog.d/15751.misc b/changelog.d/15751.misc
new file mode 100644
index 0000000000..e0ecea6c2f
--- /dev/null
+++ b/changelog.d/15751.misc
@@ -0,0 +1 @@
+Add foreign key constraint to `event_forward_extremities`.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index a803ada8ad..e126a2e0c5 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -61,6 +61,7 @@ from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpda
 from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore
 from synapse.storage.databases.main.e2e_room_keys import EndToEndRoomKeyBackgroundStore
 from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyBackgroundStore
+from synapse.storage.databases.main.event_federation import EventFederationWorkerStore
 from synapse.storage.databases.main.event_push_actions import EventPushActionsStore
 from synapse.storage.databases.main.events_bg_updates import (
     EventsBackgroundUpdatesStore,
@@ -239,6 +240,7 @@ class Store(
     PresenceBackgroundUpdateStore,
     ReceiptsBackgroundUpdateStore,
     RelationsWorkerStore,
+    EventFederationWorkerStore,
 ):
     def execute(self, f: Callable[..., R], *args: Any, **kwargs: Any) -> Awaitable[R]:
         return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs)
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index edc97a9d61..5dce0a0159 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -11,8 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import abc
 import logging
-from enum import IntEnum
+from enum import Enum, IntEnum
 from types import TracebackType
 from typing import (
     TYPE_CHECKING,
@@ -24,12 +25,16 @@ from typing import (
     Iterable,
     List,
     Optional,
+    Sequence,
+    Tuple,
     Type,
 )
 
 import attr
+from pydantic import BaseModel
 
 from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage.engines import PostgresEngine
 from synapse.storage.types import Connection, Cursor
 from synapse.types import JsonDict
 from synapse.util import Clock, json_encoder
@@ -48,6 +53,78 @@ DEFAULT_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]]
 MIN_BATCH_SIZE_CALLBACK = Callable[[str, str], Awaitable[int]]
 
 
+class Constraint(metaclass=abc.ABCMeta):
+    """Base class representing different constraints.
+
+    Used by `register_background_validate_constraint_and_delete_rows`.
+    """
+
+    @abc.abstractmethod
+    def make_check_clause(self, table: str) -> str:
+        """Returns an SQL expression that checks the row passes the constraint."""
+        pass
+
+    @abc.abstractmethod
+    def make_constraint_clause_postgres(self) -> str:
+        """Returns an SQL clause for creating the constraint.
+
+        Only used on Postgres DBs
+        """
+        pass
+
+
+@attr.s(auto_attribs=True)
+class ForeignKeyConstraint(Constraint):
+    """A foreign key constraint.
+
+    Attributes:
+        referenced_table: The "parent" table name.
+        columns: The list of mappings of columns from table to referenced table
+    """
+
+    referenced_table: str
+    columns: Sequence[Tuple[str, str]]
+
+    def make_check_clause(self, table: str) -> str:
+        join_clause = " AND ".join(
+            f"{col1} = {table}.{col2}" for col1, col2 in self.columns
+        )
+        return f"EXISTS (SELECT 1 FROM {self.referenced_table} WHERE {join_clause})"
+
+    def make_constraint_clause_postgres(self) -> str:
+        column1_list = ", ".join(col1 for col1, col2 in self.columns)
+        column2_list = ", ".join(col2 for col1, col2 in self.columns)
+        return f"FOREIGN KEY ({column1_list}) REFERENCES {self.referenced_table} ({column2_list})"
+
+
+@attr.s(auto_attribs=True)
+class NotNullConstraint(Constraint):
+    """A NOT NULL column constraint"""
+
+    column: str
+
+    def make_check_clause(self, table: str) -> str:
+        return f"{self.column} IS NOT NULL"
+
+    def make_constraint_clause_postgres(self) -> str:
+        return f"CHECK ({self.column} IS NOT NULL)"
+
+
+class ValidateConstraintProgress(BaseModel):
+    """The format of the progress JSON for validate constraint background
+    updates.
+
+    Used by `register_background_validate_constraint_and_delete_rows`.
+    """
+
+    class State(str, Enum):
+        check = "check"
+        validate = "validate"
+
+    state: State = State.validate
+    lower_bound: Sequence[Any] = ()
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class _BackgroundUpdateHandler:
     """A handler for a given background update.
@@ -740,6 +817,179 @@ class BackgroundUpdater:
         logger.info("Adding index %s to %s", index_name, table)
         await self.db_pool.runWithConnection(runner)
 
+    def register_background_validate_constraint_and_delete_rows(
+        self,
+        update_name: str,
+        table: str,
+        constraint_name: str,
+        constraint: Constraint,
+        unique_columns: Sequence[str],
+    ) -> None:
+        """Helper for store classes to do a background validate constraint, and
+        delete rows that do not pass the constraint check.
+
+        Note: This deletes rows that don't match the constraint. This may not be
+        appropriate in all situations, and so the suitability of using this
+        method should be considered on a case-by-case basis.
+
+        This only applies on PostgreSQL.
+
+        For SQLite the table gets recreated as part of the schema delta and the
+        data is copied over synchronously (or whatever the correct way to
+        describe it as).
+
+        Args:
+            update_name: The name of the background update.
+            table: The table with the invalid constraint.
+            constraint_name: The name of the constraint
+            constraint: A `Constraint` object matching the type of constraint.
+            unique_columns: A sequence of columns that form a unique constraint
+              on the table. Used to iterate over the table.
+        """
+
+        assert isinstance(
+            self.db_pool.engine, engines.PostgresEngine
+        ), "validate constraint background update registered for non-Postres database"
+
+        async def updater(progress: JsonDict, batch_size: int) -> int:
+            return await self.validate_constraint_and_delete_in_background(
+                update_name=update_name,
+                table=table,
+                constraint_name=constraint_name,
+                constraint=constraint,
+                unique_columns=unique_columns,
+                progress=progress,
+                batch_size=batch_size,
+            )
+
+        self._background_update_handlers[update_name] = _BackgroundUpdateHandler(
+            updater, oneshot=True
+        )
+
+    async def validate_constraint_and_delete_in_background(
+        self,
+        update_name: str,
+        table: str,
+        constraint_name: str,
+        constraint: Constraint,
+        unique_columns: Sequence[str],
+        progress: JsonDict,
+        batch_size: int,
+    ) -> int:
+        """Validates a table constraint that has been marked as `NOT VALID`,
+        deleting rows that don't pass the constraint check.
+
+        This will delete rows that do not meet the validation check.
+
+        update_name: str,
+        table: str,
+        constraint_name: str,
+        constraint: Constraint,
+        unique_columns: Sequence[str],
+        """
+
+        # We validate the constraint by:
+        #   1. Trying to validate the constraint as is. If this succeeds then
+        #      we're done.
+        #   2. Otherwise, we manually scan the table to remove rows that don't
+        #      match the constraint.
+        #   3. We try re-validating the constraint.
+
+        parsed_progress = ValidateConstraintProgress.parse_obj(progress)
+
+        if parsed_progress.state == ValidateConstraintProgress.State.check:
+            return_columns = ", ".join(unique_columns)
+            order_columns = ", ".join(unique_columns)
+
+            where_clause = ""
+            args: List[Any] = []
+            if parsed_progress.lower_bound:
+                where_clause = f"""WHERE ({order_columns}) > ({", ".join("?" for _ in unique_columns)})"""
+                args.extend(parsed_progress.lower_bound)
+
+            args.append(batch_size)
+
+            sql = f"""
+                SELECT
+                    {return_columns},
+                    {constraint.make_check_clause(table)} AS check
+                FROM {table}
+                {where_clause}
+                ORDER BY {order_columns}
+                LIMIT ?
+            """
+
+            def validate_constraint_in_background_check(
+                txn: "LoggingTransaction",
+            ) -> None:
+                txn.execute(sql, args)
+                rows = txn.fetchall()
+
+                new_progress = parsed_progress.copy()
+
+                if not rows:
+                    new_progress.state = ValidateConstraintProgress.State.validate
+                    self._background_update_progress_txn(
+                        txn, update_name, new_progress.dict()
+                    )
+                    return
+
+                new_progress.lower_bound = rows[-1][:-1]
+
+                to_delete = [row[:-1] for row in rows if not row[-1]]
+
+                if to_delete:
+                    logger.warning(
+                        "Deleting %d rows that do not pass new constraint",
+                        len(to_delete),
+                    )
+
+                    self.db_pool.simple_delete_many_batch_txn(
+                        txn, table=table, keys=unique_columns, values=to_delete
+                    )
+
+                self._background_update_progress_txn(
+                    txn, update_name, new_progress.dict()
+                )
+
+            await self.db_pool.runInteraction(
+                "validate_constraint_in_background_check",
+                validate_constraint_in_background_check,
+            )
+
+            return batch_size
+
+        elif parsed_progress.state == ValidateConstraintProgress.State.validate:
+            sql = f"ALTER TABLE {table} VALIDATE CONSTRAINT {constraint_name}"
+
+            def validate_constraint_in_background_validate(
+                txn: "LoggingTransaction",
+            ) -> None:
+                txn.execute(sql)
+
+            try:
+                await self.db_pool.runInteraction(
+                    "validate_constraint_in_background_validate",
+                    validate_constraint_in_background_validate,
+                )
+
+                await self._end_background_update(update_name)
+            except self.db_pool.engine.module.IntegrityError as e:
+                # If we get an integrity error here, then we go back and recheck the table.
+                logger.warning("Integrity error when validating constraint: %s", e)
+                await self._background_update_progress(
+                    update_name,
+                    ValidateConstraintProgress(
+                        state=ValidateConstraintProgress.State.check
+                    ).dict(),
+                )
+
+            return batch_size
+        else:
+            raise Exception(
+                f"Unrecognized state '{parsed_progress.state}' when trying to validate_constraint_and_delete_in_background"
+            )
+
     async def _end_background_update(self, update_name: str) -> None:
         """Removes a completed background update task from the queue.
 
@@ -795,3 +1045,86 @@ class BackgroundUpdater:
             keyvalues={"update_name": update_name},
             updatevalues={"progress_json": progress_json},
         )
+
+
+def run_validate_constraint_and_delete_rows_schema_delta(
+    txn: "LoggingTransaction",
+    ordering: int,
+    update_name: str,
+    table: str,
+    constraint_name: str,
+    constraint: Constraint,
+    sqlite_table_name: str,
+    sqlite_table_schema: str,
+) -> None:
+    """Runs a schema delta to add a constraint to the table. This should be run
+    in a schema delta file.
+
+    For PostgreSQL the constraint is added and validated in the background.
+
+    For SQLite the table is recreated and data copied across immediately. This
+    is done by the caller passing in a script to create the new table. Note that
+    table indexes and triggers are copied over automatically.
+
+    There must be a corresponding call to
+    `register_background_validate_constraint_and_delete_rows` to register the
+    background update in one of the data store classes.
+
+    Attributes:
+        txn ordering, update_name: For adding a row to background_updates table.
+        table: The table to add constraint to. constraint_name: The name of the
+        new constraint constraint: A `Constraint` object describing the
+        constraint sqlite_table_name: For SQLite the name of the empty copy of
+        table sqlite_table_schema: A SQL script for creating the above table.
+    """
+
+    if isinstance(txn.database_engine, PostgresEngine):
+        # For postgres we can just add the constraint and mark it as NOT VALID,
+        # and then insert a background update to go and check the validity in
+        # the background.
+        txn.execute(
+            f"""
+            ALTER TABLE {table}
+            ADD CONSTRAINT {constraint_name} {constraint.make_constraint_clause_postgres()}
+            NOT VALID
+            """
+        )
+
+        txn.execute(
+            "INSERT INTO background_updates (ordering, update_name, progress_json) VALUES (?, ?, '{}')",
+            (ordering, update_name),
+        )
+    else:
+        # For SQLite, we:
+        #   1. fetch all indexes/triggers/etc related to the table
+        #   2. create an empty copy of the table
+        #   3. copy across the rows (that satisfy the check)
+        #   4. replace the old table with the new able.
+        #   5. add back all the indexes/triggers/etc
+
+        # Fetch the indexes/triggers/etc. Note that `sql` column being null is
+        # due to indexes being auto created based on the class definition (e.g.
+        # PRIMARY KEY), and so don't need to be recreated.
+        txn.execute(
+            """
+            SELECT sql FROM sqlite_master
+            WHERE tbl_name = ? AND type != 'table' AND sql IS NOT NULL
+            """,
+            (table,),
+        )
+        extras = [row[0] for row in txn]
+
+        txn.execute(sqlite_table_schema)
+
+        sql = f"""
+            INSERT INTO {sqlite_table_name} SELECT * FROM {table}
+            WHERE {constraint.make_check_clause(table)}
+        """
+
+        txn.execute(sql)
+
+        txn.execute(f"DROP TABLE {table}")
+        txn.execute(f"ALTER TABLE {sqlite_table_name} RENAME TO {table}")
+
+        for extra in extras:
+            txn.execute(extra)
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 7e49ae11bc..a1c8fb0f46 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -2313,6 +2313,43 @@ class DatabasePool:
 
         return txn.rowcount
 
+    @staticmethod
+    def simple_delete_many_batch_txn(
+        txn: LoggingTransaction,
+        table: str,
+        keys: Collection[str],
+        values: Iterable[Iterable[Any]],
+    ) -> None:
+        """Executes a DELETE query on the named table.
+
+        The input is given as a list of rows, where each row is a list of values.
+        (Actually any iterable is fine.)
+
+        Args:
+            txn: The transaction to use.
+            table: string giving the table name
+            keys: list of column names
+            values: for each row, a list of values in the same order as `keys`
+        """
+
+        if isinstance(txn.database_engine, PostgresEngine):
+            # We use `execute_values` as it can be a lot faster than `execute_batch`,
+            # but it's only available on postgres.
+            sql = "DELETE FROM %s WHERE (%s) IN (VALUES ?)" % (
+                table,
+                ", ".join(k for k in keys),
+            )
+
+            txn.execute_values(sql, values, fetch=False)
+        else:
+            sql = "DELETE FROM %s WHERE (%s) = (%s)" % (
+                table,
+                ", ".join(k for k in keys),
+                ", ".join("?" for _ in keys),
+            )
+
+            txn.execute_batch(sql, values)
+
     def get_cache_dict(
         self,
         db_conn: LoggingDatabaseConnection,
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 8b6e3c1dc7..dabe603c8c 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -38,6 +38,7 @@ from synapse.events import EventBase, make_event_from_dict
 from synapse.logging.opentracing import tag_args, trace
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.background_updates import ForeignKeyConstraint
 from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
@@ -140,6 +141,15 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         self._clock.looping_call(self._get_stats_for_federation_staging, 30 * 1000)
 
+        if isinstance(self.database_engine, PostgresEngine):
+            self.db_pool.updates.register_background_validate_constraint_and_delete_rows(
+                update_name="event_forward_extremities_event_id_foreign_key_constraint_update",
+                table="event_forward_extremities",
+                constraint_name="event_forward_extremities_event_id",
+                constraint=ForeignKeyConstraint("events", [("event_id", "event_id")]),
+                unique_columns=("event_id", "room_id"),
+            )
+
     async def get_auth_chain(
         self, room_id: str, event_ids: Collection[str], include_given: bool = False
     ) -> List[EventBase]:
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 5c9db7554e..2b83a69426 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -415,12 +415,6 @@ class PersistEventsStore:
                 backfilled=False,
             )
 
-        self._update_forward_extremities_txn(
-            txn,
-            new_forward_extremities=new_forward_extremities,
-            max_stream_order=max_stream_order,
-        )
-
         # Ensure that we don't have the same event twice.
         events_and_contexts = self._filter_events_and_contexts_for_duplicates(
             events_and_contexts
@@ -439,6 +433,12 @@ class PersistEventsStore:
 
         self._store_event_txn(txn, events_and_contexts=events_and_contexts)
 
+        self._update_forward_extremities_txn(
+            txn,
+            new_forward_extremities=new_forward_extremities,
+            max_stream_order=max_stream_order,
+        )
+
         self._persist_transaction_ids_txn(txn, events_and_contexts)
 
         # Insert into event_to_state_groups.
diff --git a/synapse/storage/schema/main/delta/78/03event_extremities_constraints.py b/synapse/storage/schema/main/delta/78/03event_extremities_constraints.py
new file mode 100644
index 0000000000..f12e2a8f3e
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/03event_extremities_constraints.py
@@ -0,0 +1,51 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This migration adds foreign key constraint to `event_forward_extremities` table.
+"""
+from synapse.storage.background_updates import (
+    ForeignKeyConstraint,
+    run_validate_constraint_and_delete_rows_schema_delta,
+)
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine
+
+FORWARD_EXTREMITIES_TABLE_SCHEMA = """
+    CREATE TABLE event_forward_extremities2(
+        event_id TEXT NOT NULL,
+        room_id TEXT NOT NULL,
+        UNIQUE (event_id, room_id),
+        CONSTRAINT event_forward_extremities_event_id FOREIGN KEY (event_id) REFERENCES events (event_id)
+    )
+"""
+
+
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
+    run_validate_constraint_and_delete_rows_schema_delta(
+        cur,
+        ordering=7803,
+        update_name="event_forward_extremities_event_id_foreign_key_constraint_update",
+        table="event_forward_extremities",
+        constraint_name="event_forward_extremities_event_id",
+        constraint=ForeignKeyConstraint("events", [("event_id", "event_id")]),
+        sqlite_table_name="event_forward_extremities2",
+        sqlite_table_schema=FORWARD_EXTREMITIES_TABLE_SCHEMA,
+    )
+
+    # We can't add a similar constraint to `event_backward_extremities` as the
+    # events in there don't exist in the `events` table and `event_edges`
+    # doesn't have a unique constraint on `prev_event_id` (so we can't make a
+    # foreign key point to it).
diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py
index fd619b64d4..6ca546f3f7 100644
--- a/tests/storage/test_background_update.py
+++ b/tests/storage/test_background_update.py
@@ -20,7 +20,14 @@ from twisted.internet.defer import Deferred, ensureDeferred
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.server import HomeServer
-from synapse.storage.background_updates import BackgroundUpdater
+from synapse.storage.background_updates import (
+    BackgroundUpdater,
+    ForeignKeyConstraint,
+    NotNullConstraint,
+    run_validate_constraint_and_delete_rows_schema_delta,
+)
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 from synapse.types import JsonDict
 from synapse.util import Clock
 
@@ -404,3 +411,221 @@ class BackgroundUpdateControllerTestCase(unittest.HomeserverTestCase):
         self.pump()
         self._update_ctx_manager.__aexit__.assert_called()
         self.get_success(do_update_d)
+
+
+class BackgroundUpdateValidateConstraintTestCase(unittest.HomeserverTestCase):
+    """Tests the validate contraint and delete background handlers."""
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.updates: BackgroundUpdater = self.hs.get_datastores().main.db_pool.updates
+        # the base test class should have run the real bg updates for us
+        self.assertTrue(
+            self.get_success(self.updates.has_completed_background_updates())
+        )
+
+        self.store = self.hs.get_datastores().main
+
+    def test_not_null_constraint(self) -> None:
+        # Create the initial tables, where we have some invalid data.
+        """Tests adding a not null constraint."""
+        table_sql = """
+            CREATE TABLE test_constraint(
+                a INT PRIMARY KEY,
+                b INT
+            );
+        """
+        self.get_success(
+            self.store.db_pool.execute(
+                "test_not_null_constraint", lambda _: None, table_sql
+            )
+        )
+
+        # We add an index so that we can check that its correctly recreated when
+        # using SQLite.
+        index_sql = "CREATE INDEX test_index ON test_constraint(a)"
+        self.get_success(
+            self.store.db_pool.execute(
+                "test_not_null_constraint", lambda _: None, index_sql
+            )
+        )
+
+        self.get_success(
+            self.store.db_pool.simple_insert("test_constraint", {"a": 1, "b": 1})
+        )
+        self.get_success(
+            self.store.db_pool.simple_insert("test_constraint", {"a": 2, "b": None})
+        )
+        self.get_success(
+            self.store.db_pool.simple_insert("test_constraint", {"a": 3, "b": 3})
+        )
+
+        # Now lets do the migration
+
+        table2_sqlite = """
+            CREATE TABLE test_constraint2(
+                a INT PRIMARY KEY,
+                b INT,
+                CONSTRAINT test_constraint_name CHECK (b is NOT NULL)
+            );
+        """
+
+        def delta(txn: LoggingTransaction) -> None:
+            run_validate_constraint_and_delete_rows_schema_delta(
+                txn,
+                ordering=1000,
+                update_name="test_bg_update",
+                table="test_constraint",
+                constraint_name="test_constraint_name",
+                constraint=NotNullConstraint("b"),
+                sqlite_table_name="test_constraint2",
+                sqlite_table_schema=table2_sqlite,
+            )
+
+        self.get_success(
+            self.store.db_pool.runInteraction(
+                "test_not_null_constraint",
+                delta,
+            )
+        )
+
+        if isinstance(self.store.database_engine, PostgresEngine):
+            # Postgres uses a background update
+            self.updates.register_background_validate_constraint_and_delete_rows(
+                "test_bg_update",
+                table="test_constraint",
+                constraint_name="test_constraint_name",
+                constraint=NotNullConstraint("b"),
+                unique_columns=["a"],
+            )
+
+            # Tell the DataStore that it hasn't finished all updates yet
+            self.store.db_pool.updates._all_done = False
+
+            # Now let's actually drive the updates to completion
+            self.wait_for_background_updates()
+
+        # Check the correct values are in the new table.
+        rows = self.get_success(
+            self.store.db_pool.simple_select_list(
+                table="test_constraint",
+                keyvalues={},
+                retcols=("a", "b"),
+            )
+        )
+
+        self.assertCountEqual(rows, [{"a": 1, "b": 1}, {"a": 3, "b": 3}])
+
+        # And check that invalid rows get correctly rejected.
+        self.get_failure(
+            self.store.db_pool.simple_insert("test_constraint", {"a": 2, "b": None}),
+            exc=self.store.database_engine.module.IntegrityError,
+        )
+
+        # Check the index is still there for SQLite.
+        if isinstance(self.store.database_engine, Sqlite3Engine):
+            # Ensure the index exists in the schema.
+            self.get_success(
+                self.store.db_pool.simple_select_one_onecol(
+                    table="sqlite_master",
+                    keyvalues={"tbl_name": "test_constraint"},
+                    retcol="name",
+                )
+            )
+
+    def test_foreign_constraint(self) -> None:
+        """Tests adding a not foreign key constraint."""
+
+        # Create the initial tables, where we have some invalid data.
+        base_sql = """
+            CREATE TABLE base_table(
+                b INT PRIMARY KEY
+            );
+        """
+
+        table_sql = """
+            CREATE TABLE test_constraint(
+                a INT PRIMARY KEY,
+                b INT NOT NULL
+            );
+        """
+        self.get_success(
+            self.store.db_pool.execute(
+                "test_foreign_key_constraint", lambda _: None, base_sql
+            )
+        )
+        self.get_success(
+            self.store.db_pool.execute(
+                "test_foreign_key_constraint", lambda _: None, table_sql
+            )
+        )
+
+        self.get_success(self.store.db_pool.simple_insert("base_table", {"b": 1}))
+        self.get_success(
+            self.store.db_pool.simple_insert("test_constraint", {"a": 1, "b": 1})
+        )
+        self.get_success(
+            self.store.db_pool.simple_insert("test_constraint", {"a": 2, "b": 2})
+        )
+        self.get_success(self.store.db_pool.simple_insert("base_table", {"b": 3}))
+        self.get_success(
+            self.store.db_pool.simple_insert("test_constraint", {"a": 3, "b": 3})
+        )
+
+        table2_sqlite = """
+            CREATE TABLE test_constraint2(
+                a INT PRIMARY KEY,
+                b INT NOT NULL,
+                CONSTRAINT test_constraint_name FOREIGN KEY (b) REFERENCES base_table (b)
+            );
+        """
+
+        def delta(txn: LoggingTransaction) -> None:
+            run_validate_constraint_and_delete_rows_schema_delta(
+                txn,
+                ordering=1000,
+                update_name="test_bg_update",
+                table="test_constraint",
+                constraint_name="test_constraint_name",
+                constraint=ForeignKeyConstraint("base_table", [("b", "b")]),
+                sqlite_table_name="test_constraint2",
+                sqlite_table_schema=table2_sqlite,
+            )
+
+        self.get_success(
+            self.store.db_pool.runInteraction(
+                "test_foreign_key_constraint",
+                delta,
+            )
+        )
+
+        if isinstance(self.store.database_engine, PostgresEngine):
+            # Postgres uses a background update
+            self.updates.register_background_validate_constraint_and_delete_rows(
+                "test_bg_update",
+                table="test_constraint",
+                constraint_name="test_constraint_name",
+                constraint=ForeignKeyConstraint("base_table", [("b", "b")]),
+                unique_columns=["a"],
+            )
+
+            # Tell the DataStore that it hasn't finished all updates yet
+            self.store.db_pool.updates._all_done = False
+
+            # Now let's actually drive the updates to completion
+            self.wait_for_background_updates()
+
+        # Check the correct values are in the new table.
+        rows = self.get_success(
+            self.store.db_pool.simple_select_list(
+                table="test_constraint",
+                keyvalues={},
+                retcols=("a", "b"),
+            )
+        )
+        self.assertCountEqual(rows, [{"a": 1, "b": 1}, {"a": 3, "b": 3}])
+
+        # And check that invalid rows get correctly rejected.
+        self.get_failure(
+            self.store.db_pool.simple_insert("test_constraint", {"a": 2, "b": 2}),
+            exc=self.store.database_engine.module.IntegrityError,
+        )
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 0f3b0744f1..9c151a5e62 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -20,6 +20,7 @@ from parameterized import parameterized
 
 from twisted.test.proto_helpers import MemoryReactor
 
+from synapse.api.constants import EventTypes
 from synapse.api.room_versions import (
     KNOWN_ROOM_VERSIONS,
     EventFormatVersions,
@@ -98,8 +99,32 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         room2 = "#room2"
         room3 = "#room3"
 
-        def insert_event(txn: Cursor, i: int, room_id: str) -> None:
+        def insert_event(txn: LoggingTransaction, i: int, room_id: str) -> None:
             event_id = "$event_%i:local" % i
+
+            # We need to insert into events table to get around the foreign key constraint.
+            self.store.db_pool.simple_insert_txn(
+                txn,
+                table="events",
+                values={
+                    "instance_name": "master",
+                    "stream_ordering": self.store._stream_id_gen.get_next_txn(txn),
+                    "topological_ordering": 1,
+                    "depth": 1,
+                    "event_id": event_id,
+                    "room_id": room_id,
+                    "type": EventTypes.Message,
+                    "processed": True,
+                    "outlier": False,
+                    "origin_server_ts": 0,
+                    "received_ts": 0,
+                    "sender": "@user:local",
+                    "contains_url": False,
+                    "state_key": None,
+                    "rejection_reason": None,
+                },
+            )
+
             txn.execute(
                 (
                     "INSERT INTO event_forward_extremities (room_id, event_id) "
@@ -113,10 +138,14 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
                 self.store.db_pool.runInteraction("insert", insert_event, i, room1)
             )
             self.get_success(
-                self.store.db_pool.runInteraction("insert", insert_event, i, room2)
+                self.store.db_pool.runInteraction(
+                    "insert", insert_event, i + 100, room2
+                )
             )
             self.get_success(
-                self.store.db_pool.runInteraction("insert", insert_event, i, room3)
+                self.store.db_pool.runInteraction(
+                    "insert", insert_event, i + 200, room3
+                )
             )
 
         # Test simple case
-- 
cgit 1.5.1


From 4cf9f92f395e8c448b94eccb48fbfe2e7e61d7cd Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Wed, 5 Jul 2023 05:44:02 -0500
Subject: Fix could not serialize access due to concurrent `DELETE` from
 presence_stream (#15826)

* Change update_presence to have a isolation level of READ_COMMITTED

* changelog
---
 changelog.d/15826.misc                     | 1 +
 synapse/storage/databases/main/presence.py | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15826.misc

(limited to 'synapse')

diff --git a/changelog.d/15826.misc b/changelog.d/15826.misc
new file mode 100644
index 0000000000..88903f3f7c
--- /dev/null
+++ b/changelog.d/15826.misc
@@ -0,0 +1 @@
+Use lower isolation level when cleaning old presence stream data to avoid serialization errors.
diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py
index beb210f8ee..b51d20ac26 100644
--- a/synapse/storage/databases/main/presence.py
+++ b/synapse/storage/databases/main/presence.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, cast
 
 from synapse.api.presence import PresenceState, UserPresenceState
@@ -24,6 +23,7 @@ from synapse.storage.database import (
 )
 from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
 from synapse.storage.engines import PostgresEngine
+from synapse.storage.engines._base import IsolationLevel
 from synapse.storage.types import Connection
 from synapse.storage.util.id_generators import (
     AbstractStreamIdGenerator,
@@ -115,11 +115,16 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
         )
 
         async with stream_ordering_manager as stream_orderings:
+            # Run the interaction with an isolation level of READ_COMMITTED to avoid
+            # serialization errors(and rollbacks) in the database. This way it will
+            # ignore new rows during the DELETE, but will pick them up the next time
+            # this is run. Currently, that is between 5-60 seconds.
             await self.db_pool.runInteraction(
                 "update_presence",
                 self._update_presence_txn,
                 stream_orderings,
                 presence_states,
+                isolation_level=IsolationLevel.READ_COMMITTED,
             )
 
         return stream_orderings[-1], self._presence_id_gen.get_current_token()
-- 
cgit 1.5.1


From ce857c05d5f6fa6b66c4a59c4917c440c9b98047 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 5 Jul 2023 10:22:21 -0500
Subject: Add tracing to media `/upload` endpoint (#15850)

Add tracing instrumentation to media `/upload` code paths to investigate https://github.com/matrix-org/synapse/issues/15841
---
 changelog.d/15850.misc                                | 1 +
 synapse/media/media_repository.py                     | 3 +++
 synapse/media/media_storage.py                        | 7 +++++++
 synapse/media/storage_provider.py                     | 5 +++++
 synapse/media/thumbnailer.py                          | 5 +++++
 synapse/module_api/callbacks/spamchecker_callbacks.py | 1 +
 synapse/storage/databases/main/media_repository.py    | 5 +++++
 7 files changed, 27 insertions(+)
 create mode 100644 changelog.d/15850.misc

(limited to 'synapse')

diff --git a/changelog.d/15850.misc b/changelog.d/15850.misc
new file mode 100644
index 0000000000..0e49ab23fe
--- /dev/null
+++ b/changelog.d/15850.misc
@@ -0,0 +1 @@
+Add tracing to media `/upload` code paths.
diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py
index e81c987b10..4b750c700b 100644
--- a/synapse/media/media_repository.py
+++ b/synapse/media/media_repository.py
@@ -35,6 +35,7 @@ from synapse.api.errors import (
 from synapse.config.repository import ThumbnailRequirement
 from synapse.http.site import SynapseRequest
 from synapse.logging.context import defer_to_thread
+from synapse.logging.opentracing import trace
 from synapse.media._base import (
     FileInfo,
     Responder,
@@ -174,6 +175,7 @@ class MediaRepository:
         else:
             self.recently_accessed_locals.add(media_id)
 
+    @trace
     async def create_content(
         self,
         media_type: str,
@@ -710,6 +712,7 @@ class MediaRepository:
         # Could not generate thumbnail.
         return None
 
+    @trace
     async def _generate_thumbnails(
         self,
         server_name: Optional[str],
diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py
index a819d95407..eebcbc48e8 100644
--- a/synapse/media/media_storage.py
+++ b/synapse/media/media_storage.py
@@ -38,6 +38,7 @@ from twisted.protocols.basic import FileSender
 
 from synapse.api.errors import NotFoundError
 from synapse.logging.context import defer_to_thread, make_deferred_yieldable
+from synapse.logging.opentracing import trace
 from synapse.util import Clock
 from synapse.util.file_consumer import BackgroundFileConsumer
 
@@ -76,6 +77,7 @@ class MediaStorage:
         self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.clock = hs.get_clock()
 
+    @trace
     async def store_file(self, source: IO, file_info: FileInfo) -> str:
         """Write `source` to the on disk media store, and also any other
         configured storage providers
@@ -95,10 +97,12 @@ class MediaStorage:
 
         return fname
 
+    @trace
     async def write_to_file(self, source: IO, output: IO) -> None:
         """Asynchronously write the `source` to `output`."""
         await defer_to_thread(self.reactor, _write_file_synchronously, source, output)
 
+    @trace
     @contextlib.contextmanager
     def store_into_file(
         self, file_info: FileInfo
@@ -214,6 +218,7 @@ class MediaStorage:
 
         return None
 
+    @trace
     async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str:
         """Ensures that the given file is in the local cache. Attempts to
         download it from storage providers if it isn't.
@@ -259,6 +264,7 @@ class MediaStorage:
 
         raise NotFoundError()
 
+    @trace
     def _file_info_to_path(self, file_info: FileInfo) -> str:
         """Converts file_info into a relative path.
 
@@ -301,6 +307,7 @@ class MediaStorage:
         return self.filepaths.local_media_filepath_rel(file_info.file_id)
 
 
+@trace
 def _write_file_synchronously(source: IO, dest: IO) -> None:
     """Write `source` to the file like `dest` synchronously. Should be called
     from a thread.
diff --git a/synapse/media/storage_provider.py b/synapse/media/storage_provider.py
index 1c9b71d69c..0aea3a7a0d 100644
--- a/synapse/media/storage_provider.py
+++ b/synapse/media/storage_provider.py
@@ -20,6 +20,7 @@ from typing import TYPE_CHECKING, Callable, Optional
 
 from synapse.config._base import Config
 from synapse.logging.context import defer_to_thread, run_in_background
+from synapse.logging.opentracing import trace
 from synapse.util.async_helpers import maybe_awaitable
 
 from ._base import FileInfo, Responder
@@ -86,6 +87,7 @@ class StorageProviderWrapper(StorageProvider):
     def __str__(self) -> str:
         return "StorageProviderWrapper[%s]" % (self.backend,)
 
+    @trace
     async def store_file(self, path: str, file_info: FileInfo) -> None:
         if not file_info.server_name and not self.store_local:
             return None
@@ -114,6 +116,7 @@ class StorageProviderWrapper(StorageProvider):
 
             run_in_background(store)
 
+    @trace
     async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
         if file_info.url_cache:
             # Files in the URL preview cache definitely aren't stored here,
@@ -141,6 +144,7 @@ class FileStorageProviderBackend(StorageProvider):
     def __str__(self) -> str:
         return "FileStorageProviderBackend[%s]" % (self.base_directory,)
 
+    @trace
     async def store_file(self, path: str, file_info: FileInfo) -> None:
         """See StorageProvider.store_file"""
 
@@ -159,6 +163,7 @@ class FileStorageProviderBackend(StorageProvider):
             backup_fname,
         )
 
+    @trace
     async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
         """See StorageProvider.fetch"""
 
diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py
index 73d2272f05..2bfa58ceee 100644
--- a/synapse/media/thumbnailer.py
+++ b/synapse/media/thumbnailer.py
@@ -19,6 +19,8 @@ from typing import Optional, Tuple, Type
 
 from PIL import Image
 
+from synapse.logging.opentracing import trace
+
 logger = logging.getLogger(__name__)
 
 EXIF_ORIENTATION_TAG = 0x0112
@@ -82,6 +84,7 @@ class Thumbnailer:
             # A lot of parsing errors can happen when parsing EXIF
             logger.info("Error parsing image EXIF information: %s", e)
 
+    @trace
     def transpose(self) -> Tuple[int, int]:
         """Transpose the image using its EXIF Orientation tag
 
@@ -133,6 +136,7 @@ class Thumbnailer:
                     self.image = self.image.convert("RGB")
         return self.image.resize((width, height), Image.LANCZOS)
 
+    @trace
     def scale(self, width: int, height: int, output_type: str) -> BytesIO:
         """Rescales the image to the given dimensions.
 
@@ -142,6 +146,7 @@ class Thumbnailer:
         with self._resize(width, height) as scaled:
             return self._encode_image(scaled, output_type)
 
+    @trace
     def crop(self, width: int, height: int, output_type: str) -> BytesIO:
         """Rescales and crops the image to the given dimensions preserving
         aspect::
diff --git a/synapse/module_api/callbacks/spamchecker_callbacks.py b/synapse/module_api/callbacks/spamchecker_callbacks.py
index 7cee442145..e191450323 100644
--- a/synapse/module_api/callbacks/spamchecker_callbacks.py
+++ b/synapse/module_api/callbacks/spamchecker_callbacks.py
@@ -788,6 +788,7 @@ class SpamCheckerModuleApiCallbacks:
 
         return RegistrationBehaviour.ALLOW
 
+    @trace
     async def check_media_file_for_spam(
         self, file_wrapper: ReadableFileWrapper, file_info: FileInfo
     ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]:
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index fa8be214ce..8cebeb5189 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -27,6 +27,7 @@ from typing import (
 )
 
 from synapse.api.constants import Direction
+from synapse.logging.opentracing import trace
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.database import (
     DatabasePool,
@@ -328,6 +329,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             "get_local_media_ids", _get_local_media_ids_txn
         )
 
+    @trace
     async def store_local_media(
         self,
         media_id: str,
@@ -447,6 +449,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             desc="get_local_media_thumbnails",
         )
 
+    @trace
     async def store_local_thumbnail(
         self,
         media_id: str,
@@ -568,6 +571,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             desc="get_remote_media_thumbnails",
         )
 
+    @trace
     async def get_remote_media_thumbnail(
         self,
         origin: str,
@@ -599,6 +603,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             desc="get_remote_media_thumbnail",
         )
 
+    @trace
     async def store_remote_media_thumbnail(
         self,
         origin: str,
-- 
cgit 1.5.1


From 39d131b016673bbd4d3c28095c8838b8c6dc0953 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 5 Jul 2023 17:25:00 +0100
Subject: Add basic read/write lock (#15782)

---
 changelog.d/15782.misc                             |   1 +
 synapse/_scripts/synapse_port_db.py                |   9 +-
 synapse/storage/databases/main/lock.py             | 224 ++++++++++++----
 .../78/04_read_write_locks_triggers.sql.postgres   | 152 +++++++++++
 .../78/04_read_write_locks_triggers.sql.sqlite     | 119 +++++++++
 tests/storage/databases/main/test_lock.py          | 283 ++++++++++++++++++++-
 6 files changed, 730 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/15782.misc
 create mode 100644 synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/15782.misc b/changelog.d/15782.misc
new file mode 100644
index 0000000000..aae493b973
--- /dev/null
+++ b/changelog.d/15782.misc
@@ -0,0 +1 @@
+Add read/write style cross-worker locks.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index e126a2e0c5..7c4aa0afa2 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -197,6 +197,11 @@ IGNORED_TABLES = {
     "ui_auth_sessions",
     "ui_auth_sessions_credentials",
     "ui_auth_sessions_ips",
+    # Ignore the worker locks table, as a) there shouldn't be any acquired locks
+    # after porting, and b) the circular foreign key constraints make it hard to
+    # port.
+    "worker_read_write_locks_mode",
+    "worker_read_write_locks",
 }
 
 
@@ -805,7 +810,9 @@ class Porter:
             )
             # Map from table name to args passed to `handle_table`, i.e. a tuple
             # of: `postgres_size`, `table_size`, `forward_chunk`, `backward_chunk`.
-            tables_to_port_info_map = {r[0]: r[1:] for r in setup_res}
+            tables_to_port_info_map = {
+                r[0]: r[1:] for r in setup_res if r[0] not in IGNORED_TABLES
+            }
 
             # Step 5. Do the copying.
             #
diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py
index 7270ef09da..c89b4f7919 100644
--- a/synapse/storage/databases/main/lock.py
+++ b/synapse/storage/databases/main/lock.py
@@ -25,6 +25,7 @@ from synapse.storage.database import (
     LoggingDatabaseConnection,
     LoggingTransaction,
 )
+from synapse.storage.engines import PostgresEngine
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
@@ -68,12 +69,20 @@ class LockStore(SQLBaseStore):
         self._reactor = hs.get_reactor()
         self._instance_name = hs.get_instance_id()
 
-        # A map from `(lock_name, lock_key)` to the token of any locks that we
-        # think we currently hold.
-        self._live_tokens: WeakValueDictionary[
+        # A map from `(lock_name, lock_key)` to lock that we think we
+        # currently hold.
+        self._live_lock_tokens: WeakValueDictionary[
             Tuple[str, str], Lock
         ] = WeakValueDictionary()
 
+        # A map from `(lock_name, lock_key, token)` to read/write lock that we
+        # think we currently hold. For a given lock_name/lock_key, there can be
+        # multiple read locks at a time but only one write lock (no mixing read
+        # and write locks at the same time).
+        self._live_read_write_lock_tokens: WeakValueDictionary[
+            Tuple[str, str, str], Lock
+        ] = WeakValueDictionary()
+
         # When we shut down we want to remove the locks. Technically this can
         # lead to a race, as we may drop the lock while we are still processing.
         # However, a) it should be a small window, b) the lock is best effort
@@ -91,11 +100,13 @@ class LockStore(SQLBaseStore):
         """Called when the server is shutting down"""
         logger.info("Dropping held locks due to shutdown")
 
-        # We need to take a copy of the tokens dict as dropping the locks will
-        # cause the dictionary to change.
-        locks = dict(self._live_tokens)
+        # We need to take a copy of the locks as dropping the locks will cause
+        # the dictionary to change.
+        locks = list(self._live_lock_tokens.values()) + list(
+            self._live_read_write_lock_tokens.values()
+        )
 
-        for lock in locks.values():
+        for lock in locks:
             await lock.release()
 
         logger.info("Dropped locks due to shutdown")
@@ -122,7 +133,7 @@ class LockStore(SQLBaseStore):
         """
 
         # Check if this process has taken out a lock and if it's still valid.
-        lock = self._live_tokens.get((lock_name, lock_key))
+        lock = self._live_lock_tokens.get((lock_name, lock_key))
         if lock and await lock.is_still_valid():
             return None
 
@@ -176,61 +187,111 @@ class LockStore(SQLBaseStore):
             self._reactor,
             self._clock,
             self,
+            read_write=False,
             lock_name=lock_name,
             lock_key=lock_key,
             token=token,
         )
 
-        self._live_tokens[(lock_name, lock_key)] = lock
+        self._live_lock_tokens[(lock_name, lock_key)] = lock
 
         return lock
 
-    async def _is_lock_still_valid(
-        self, lock_name: str, lock_key: str, token: str
-    ) -> bool:
-        """Checks whether this instance still holds the lock."""
-        last_renewed_ts = await self.db_pool.simple_select_one_onecol(
-            table="worker_locks",
-            keyvalues={
-                "lock_name": lock_name,
-                "lock_key": lock_key,
-                "token": token,
-            },
-            retcol="last_renewed_ts",
-            allow_none=True,
-            desc="is_lock_still_valid",
-        )
-        return (
-            last_renewed_ts is not None
-            and self._clock.time_msec() - _LOCK_TIMEOUT_MS < last_renewed_ts
-        )
+    async def try_acquire_read_write_lock(
+        self,
+        lock_name: str,
+        lock_key: str,
+        write: bool,
+    ) -> Optional["Lock"]:
+        """Try to acquire a lock for the given name/key. Will return an async
+        context manager if the lock is successfully acquired, which *must* be
+        used (otherwise the lock will leak).
+        """
 
-    async def _renew_lock(self, lock_name: str, lock_key: str, token: str) -> None:
-        """Attempt to renew the lock if we still hold it."""
-        await self.db_pool.simple_update(
-            table="worker_locks",
-            keyvalues={
-                "lock_name": lock_name,
-                "lock_key": lock_key,
-                "token": token,
-            },
-            updatevalues={"last_renewed_ts": self._clock.time_msec()},
-            desc="renew_lock",
-        )
+        now = self._clock.time_msec()
+        token = random_string(6)
 
-    async def _drop_lock(self, lock_name: str, lock_key: str, token: str) -> None:
-        """Attempt to drop the lock, if we still hold it"""
-        await self.db_pool.simple_delete(
-            table="worker_locks",
-            keyvalues={
-                "lock_name": lock_name,
-                "lock_key": lock_key,
-                "token": token,
-            },
-            desc="drop_lock",
+        def _try_acquire_read_write_lock_txn(txn: LoggingTransaction) -> None:
+            # We attempt to acquire the lock by inserting into
+            # `worker_read_write_locks` and seeing if that fails any
+            # constraints. If it doesn't then we have acquired the lock,
+            # otherwise we haven't.
+            #
+            # Before that though we clear the table of any stale locks.
+
+            delete_sql = """
+                DELETE FROM worker_read_write_locks
+                    WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?;
+            """
+
+            insert_sql = """
+                INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """
+
+            if isinstance(self.database_engine, PostgresEngine):
+                # For Postgres we can send these queries at the same time.
+                txn.execute(
+                    delete_sql + ";" + insert_sql,
+                    (
+                        # DELETE args
+                        now - _LOCK_TIMEOUT_MS,
+                        lock_name,
+                        lock_key,
+                        # UPSERT args
+                        lock_name,
+                        lock_key,
+                        write,
+                        self._instance_name,
+                        token,
+                        now,
+                    ),
+                )
+            else:
+                # For SQLite these need to be two queries.
+                txn.execute(
+                    delete_sql,
+                    (
+                        now - _LOCK_TIMEOUT_MS,
+                        lock_name,
+                        lock_key,
+                    ),
+                )
+                txn.execute(
+                    insert_sql,
+                    (
+                        lock_name,
+                        lock_key,
+                        write,
+                        self._instance_name,
+                        token,
+                        now,
+                    ),
+                )
+
+            return
+
+        try:
+            await self.db_pool.runInteraction(
+                "try_acquire_read_write_lock",
+                _try_acquire_read_write_lock_txn,
+            )
+        except self.database_engine.module.IntegrityError:
+            return None
+
+        lock = Lock(
+            self._reactor,
+            self._clock,
+            self,
+            read_write=True,
+            lock_name=lock_name,
+            lock_key=lock_key,
+            token=token,
         )
 
-        self._live_tokens.pop((lock_name, lock_key), None)
+        self._live_read_write_lock_tokens[(lock_name, lock_key, token)] = lock
+
+        return lock
 
 
 class Lock:
@@ -259,6 +320,7 @@ class Lock:
         reactor: IReactorCore,
         clock: Clock,
         store: LockStore,
+        read_write: bool,
         lock_name: str,
         lock_key: str,
         token: str,
@@ -266,13 +328,23 @@ class Lock:
         self._reactor = reactor
         self._clock = clock
         self._store = store
+        self._read_write = read_write
         self._lock_name = lock_name
         self._lock_key = lock_key
 
         self._token = token
 
+        self._table = "worker_read_write_locks" if read_write else "worker_locks"
+
         self._looping_call = clock.looping_call(
-            self._renew, _RENEWAL_INTERVAL_MS, store, lock_name, lock_key, token
+            self._renew,
+            _RENEWAL_INTERVAL_MS,
+            store,
+            clock,
+            read_write,
+            lock_name,
+            lock_key,
+            token,
         )
 
         self._dropped = False
@@ -281,6 +353,8 @@ class Lock:
     @wrap_as_background_process("Lock._renew")
     async def _renew(
         store: LockStore,
+        clock: Clock,
+        read_write: bool,
         lock_name: str,
         lock_key: str,
         token: str,
@@ -291,12 +365,34 @@ class Lock:
         don't end up with a reference to `self` in the reactor, which would stop
         this from being cleaned up if we dropped the context manager.
         """
-        await store._renew_lock(lock_name, lock_key, token)
+        table = "worker_read_write_locks" if read_write else "worker_locks"
+        await store.db_pool.simple_update(
+            table=table,
+            keyvalues={
+                "lock_name": lock_name,
+                "lock_key": lock_key,
+                "token": token,
+            },
+            updatevalues={"last_renewed_ts": clock.time_msec()},
+            desc="renew_lock",
+        )
 
     async def is_still_valid(self) -> bool:
         """Check if the lock is still held by us"""
-        return await self._store._is_lock_still_valid(
-            self._lock_name, self._lock_key, self._token
+        last_renewed_ts = await self._store.db_pool.simple_select_one_onecol(
+            table=self._table,
+            keyvalues={
+                "lock_name": self._lock_name,
+                "lock_key": self._lock_key,
+                "token": self._token,
+            },
+            retcol="last_renewed_ts",
+            allow_none=True,
+            desc="is_lock_still_valid",
+        )
+        return (
+            last_renewed_ts is not None
+            and self._clock.time_msec() - _LOCK_TIMEOUT_MS < last_renewed_ts
         )
 
     async def __aenter__(self) -> None:
@@ -325,7 +421,23 @@ class Lock:
         if self._looping_call.running:
             self._looping_call.stop()
 
-        await self._store._drop_lock(self._lock_name, self._lock_key, self._token)
+        await self._store.db_pool.simple_delete(
+            table=self._table,
+            keyvalues={
+                "lock_name": self._lock_name,
+                "lock_key": self._lock_key,
+                "token": self._token,
+            },
+            desc="drop_lock",
+        )
+
+        if self._read_write:
+            self._store._live_read_write_lock_tokens.pop(
+                (self._lock_name, self._lock_key, self._token), None
+            )
+        else:
+            self._store._live_lock_tokens.pop((self._lock_name, self._lock_key), None)
+
         self._dropped = True
 
     def __del__(self) -> None:
diff --git a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
new file mode 100644
index 0000000000..e1a41be9c9
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
@@ -0,0 +1,152 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- We implement read/write style locks by using two tables with mutual foreign
+-- key constraints. Note that this implementation is vulnerable to starving
+-- writers if read locks repeatedly get acquired.
+--
+-- The first table (`worker_read_write_locks_mode`) indicates that a given lock
+-- has either been acquired in read mode *or* write mode, but not both. This is
+-- enforced by the unique constraint. Each instance of a lock being acquired is
+-- associated with a random `token`.
+--
+-- The second table (`worker_read_write_locks`) tracks who has currently
+-- acquired a given lock. For a given lock_name/lock_key, there can be multiple
+-- read locks at a time but only one write lock (no mixing read and write locks
+-- at the same time).
+--
+-- The foreign key from the second to first table enforces that for any given
+-- lock the second table cannot have a mix of rows with read or write.
+--
+-- The foreign key from the first to second table enforces that we don't have a
+-- row for a lock in the first table if not in the second table.
+--
+--
+-- Furthermore, we add some triggers to automatically keep the first table up to
+-- date when inserting/deleting from the second table. This reduces the number
+-- of round trips needed to acquire and release locks, as those operations
+-- simply become an INSERT or DELETE. These triggers are added in a separate
+-- delta due to database specific syntax.
+
+
+-- A table to track whether a lock is currently acquired, and if so whether its
+-- in read or write mode.
+CREATE TABLE worker_read_write_locks_mode (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- Whether this lock is in read (false) or write (true) mode
+    write_lock BOOLEAN NOT NULL,
+    -- A token that has currently acquired the lock. We need this so that we can
+    -- add a foreign constraint from this table to `worker_read_write_locks`.
+    token TEXT NOT NULL
+);
+
+-- Ensure that we can only have one row per lock
+CREATE UNIQUE INDEX worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key);
+-- We need this (redundant) constraint so that we can have a foreign key
+-- constraint against this table.
+CREATE UNIQUE INDEX worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock);
+
+
+-- A table to track who has currently acquired a given lock.
+CREATE TABLE worker_read_write_locks (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- We write the instance name to ease manual debugging, we don't ever read
+    -- from it.
+    -- Note: instance names aren't guarenteed to be unique.
+    instance_name TEXT NOT NULL,
+    -- Whether the process has taken out a "read" or a "write" lock.
+    write_lock BOOLEAN NOT NULL,
+    -- A random string generated each time an instance takes out a lock. Used by
+    -- the instance to tell whether the lock is still held by it (e.g. in the
+    -- case where the process stalls for a long time the lock may time out and
+    -- be taken out by another instance, at which point the original instance
+    -- can tell it no longer holds the lock as the tokens no longer match).
+    token TEXT NOT NULL,
+    last_renewed_ts BIGINT NOT NULL,
+
+    -- This constraint ensures that a given lock has only been acquired in read
+    -- xor write mode, but not both.
+    FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock)
+);
+
+CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
+-- Ensures that only one instance can acquire a lock in write mode at a time.
+CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
+
+
+-- Add a foreign key constraint to ensure that if a lock is in
+-- `worker_read_write_locks_mode` then there must be a corresponding row in
+-- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in
+-- `worker_read_write_locks_mode` when the lock is not currently acquired).
+--
+-- We only add to PostgreSQL as SQLite does not support adding constraints
+-- after table creation, and so doesn't support "circular" foreign key
+-- constraints.
+ALTER TABLE worker_read_write_locks_mode ADD CONSTRAINT worker_read_write_locks_mode_foreign
+    FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED;
+
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
+BEGIN
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO NOTHING;
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE upsert_read_write_lock_parent();
+
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
+DECLARE
+    new_token TEXT;
+BEGIN
+    SELECT token INTO new_token FROM worker_read_write_locks
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key;
+
+    IF NOT FOUND THEN
+        DELETE FROM worker_read_write_locks_mode
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+    ELSE
+        UPDATE worker_read_write_locks_mode
+            SET token = new_token
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+    END IF;
+
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE delete_read_write_lock_parent();
diff --git a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
new file mode 100644
index 0000000000..be2dfbbb8a
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
@@ -0,0 +1,119 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- c.f. the postgres version for context. The tables and constraints are the
+-- same, however they need to be defined slightly differently to work around how
+-- each database handles circular foreign key references.
+
+
+
+-- A table to track whether a lock is currently acquired, and if so whether its
+-- in read or write mode.
+CREATE TABLE worker_read_write_locks_mode (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- Whether this lock is in read (false) or write (true) mode
+    write_lock BOOLEAN NOT NULL,
+    -- A token that has currently acquired the lock. We need this so that we can
+    -- add a foreign constraint from this table to `worker_read_write_locks`.
+    token TEXT NOT NULL,
+    -- Add a foreign key constraint to ensure that if a lock is in
+    -- `worker_read_write_locks_mode` then there must be a corresponding row in
+    -- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in
+    -- `worker_read_write_locks_mode` when the lock is not currently acquired).
+    FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED
+);
+
+-- Ensure that we can only have one row per lock
+CREATE UNIQUE INDEX worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key);
+-- We need this (redundant) constraint so that we can have a foreign key
+-- constraint against this table.
+CREATE UNIQUE INDEX worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock);
+
+
+-- A table to track who has currently acquired a given lock.
+CREATE TABLE worker_read_write_locks (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- We write the instance name to ease manual debugging, we don't ever read
+    -- from it.
+    -- Note: instance names aren't guarenteed to be unique.
+    instance_name TEXT NOT NULL,
+    -- Whether the process has taken out a "read" or a "write" lock.
+    write_lock BOOLEAN NOT NULL,
+    -- A random string generated each time an instance takes out a lock. Used by
+    -- the instance to tell whether the lock is still held by it (e.g. in the
+    -- case where the process stalls for a long time the lock may time out and
+    -- be taken out by another instance, at which point the original instance
+    -- can tell it no longer holds the lock as the tokens no longer match).
+    token TEXT NOT NULL,
+    last_renewed_ts BIGINT NOT NULL,
+
+    -- This constraint ensures that a given lock has only been acquired in read
+    -- xor write mode, but not both.
+    FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock)
+);
+
+CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
+-- Ensures that only one instance can acquire a lock in write mode at a time.
+CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
+
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger
+BEFORE INSERT ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+    -- First ensure that `worker_read_write_locks_mode` doesn't have stale
+    -- entries in it, as on SQLite we don't have the foreign key constraint to
+    -- enforce this.
+    DELETE FROM worker_read_write_locks_mode
+        WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+        AND NOT EXISTS (
+            SELECT 1 FROM worker_read_write_locks
+            WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+        );
+
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO NOTHING;
+END;
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger
+AFTER DELETE ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+    DELETE FROM worker_read_write_locks_mode
+        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        AND NOT EXISTS (
+            SELECT 1 FROM worker_read_write_locks
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        );
+
+    UPDATE worker_read_write_locks_mode
+        SET token = (
+            SELECT token FROM worker_read_write_locks
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        )
+        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+END;
diff --git a/tests/storage/databases/main/test_lock.py b/tests/storage/databases/main/test_lock.py
index 56cb49d9b5..ad454f6dd8 100644
--- a/tests/storage/databases/main/test_lock.py
+++ b/tests/storage/databases/main/test_lock.py
@@ -166,4 +166,285 @@ class LockTestCase(unittest.HomeserverTestCase):
         # Now call the shutdown code
         self.get_success(self.store._on_shutdown())
 
-        self.assertEqual(self.store._live_tokens, {})
+        self.assertEqual(self.store._live_lock_tokens, {})
+
+
+class ReadWriteLockTestCase(unittest.HomeserverTestCase):
+    """Test the read/write lock implementation."""
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+    def test_acquire_write_contention(self) -> None:
+        """Test that we can only acquire one write lock at a time"""
+        # Track the number of tasks holding the lock.
+        # Should be at most 1.
+        in_lock = 0
+        max_in_lock = 0
+
+        release_lock: "Deferred[None]" = Deferred()
+
+        async def task() -> None:
+            nonlocal in_lock
+            nonlocal max_in_lock
+
+            lock = await self.store.try_acquire_read_write_lock(
+                "name", "key", write=True
+            )
+            if not lock:
+                return
+
+            async with lock:
+                in_lock += 1
+                max_in_lock = max(max_in_lock, in_lock)
+
+                # Block to allow other tasks to attempt to take the lock.
+                await release_lock
+
+                in_lock -= 1
+
+        # Start 3 tasks.
+        task1 = defer.ensureDeferred(task())
+        task2 = defer.ensureDeferred(task())
+        task3 = defer.ensureDeferred(task())
+
+        # Give the reactor a kick so that the database transaction returns.
+        self.pump()
+
+        release_lock.callback(None)
+
+        # Run the tasks to completion.
+        # To work around `Linearizer`s using a different reactor to sleep when
+        # contended (#12841), we call `runUntilCurrent` on
+        # `twisted.internet.reactor`, which is a different reactor to that used
+        # by the homeserver.
+        assert isinstance(reactor, ReactorBase)
+        self.get_success(task1)
+        reactor.runUntilCurrent()
+        self.get_success(task2)
+        reactor.runUntilCurrent()
+        self.get_success(task3)
+
+        # At most one task should have held the lock at a time.
+        self.assertEqual(max_in_lock, 1)
+
+    def test_acquire_multiple_reads(self) -> None:
+        """Test that we can acquire multiple read locks at a time"""
+        # Track the number of tasks holding the lock.
+        in_lock = 0
+        max_in_lock = 0
+
+        release_lock: "Deferred[None]" = Deferred()
+
+        async def task() -> None:
+            nonlocal in_lock
+            nonlocal max_in_lock
+
+            lock = await self.store.try_acquire_read_write_lock(
+                "name", "key", write=False
+            )
+            if not lock:
+                return
+
+            async with lock:
+                in_lock += 1
+                max_in_lock = max(max_in_lock, in_lock)
+
+                # Block to allow other tasks to attempt to take the lock.
+                await release_lock
+
+                in_lock -= 1
+
+        # Start 3 tasks.
+        task1 = defer.ensureDeferred(task())
+        task2 = defer.ensureDeferred(task())
+        task3 = defer.ensureDeferred(task())
+
+        # Give the reactor a kick so that the database transaction returns.
+        self.pump()
+
+        release_lock.callback(None)
+
+        # Run the tasks to completion.
+        # To work around `Linearizer`s using a different reactor to sleep when
+        # contended (#12841), we call `runUntilCurrent` on
+        # `twisted.internet.reactor`, which is a different reactor to that used
+        # by the homeserver.
+        assert isinstance(reactor, ReactorBase)
+        self.get_success(task1)
+        reactor.runUntilCurrent()
+        self.get_success(task2)
+        reactor.runUntilCurrent()
+        self.get_success(task3)
+
+        # At most one task should have held the lock at a time.
+        self.assertEqual(max_in_lock, 3)
+
+    def test_write_lock_acquired(self) -> None:
+        """Test that we can take out a write lock and that while we hold it
+        nobody else can take it out.
+        """
+        # First to acquire this lock, so it should complete
+        lock = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        assert lock is not None
+
+        # Enter the context manager
+        self.get_success(lock.__aenter__())
+
+        # Attempting to acquire the lock again fails, as both read and write.
+        lock2 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        self.assertIsNone(lock2)
+
+        lock3 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=False)
+        )
+        self.assertIsNone(lock3)
+
+        # Calling `is_still_valid` reports true.
+        self.assertTrue(self.get_success(lock.is_still_valid()))
+
+        # Drop the lock
+        self.get_success(lock.__aexit__(None, None, None))
+
+        # We can now acquire the lock again.
+        lock4 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        assert lock4 is not None
+        self.get_success(lock4.__aenter__())
+        self.get_success(lock4.__aexit__(None, None, None))
+
+    def test_read_lock_acquired(self) -> None:
+        """Test that we can take out a read lock and that while we hold it
+        only other reads can use it.
+        """
+        # First to acquire this lock, so it should complete
+        lock = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=False)
+        )
+        assert lock is not None
+
+        # Enter the context manager
+        self.get_success(lock.__aenter__())
+
+        # Attempting to acquire the write lock fails
+        lock2 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        self.assertIsNone(lock2)
+
+        # Attempting to acquire a read lock succeeds
+        lock3 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=False)
+        )
+        assert lock3 is not None
+        self.get_success(lock3.__aenter__())
+
+        # Calling `is_still_valid` reports true.
+        self.assertTrue(self.get_success(lock.is_still_valid()))
+
+        # Drop the first lock
+        self.get_success(lock.__aexit__(None, None, None))
+
+        # Attempting to acquire the write lock still fails, as lock3 is still
+        # active.
+        lock4 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        self.assertIsNone(lock4)
+
+        # Drop the still open third lock
+        self.get_success(lock3.__aexit__(None, None, None))
+
+        # We can now acquire the lock again.
+        lock5 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        assert lock5 is not None
+        self.get_success(lock5.__aenter__())
+        self.get_success(lock5.__aexit__(None, None, None))
+
+    def test_maintain_lock(self) -> None:
+        """Test that we don't time out locks while they're still active (lock is
+        renewed in the background if the process is still alive)"""
+
+        lock = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        assert lock is not None
+
+        self.get_success(lock.__aenter__())
+
+        # Wait for ages with the lock, we should not be able to get the lock.
+        self.reactor.advance(5 * _LOCK_TIMEOUT_MS / 1000)
+        self.pump()
+
+        lock2 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        self.assertIsNone(lock2)
+
+        self.get_success(lock.__aexit__(None, None, None))
+
+    def test_timeout_lock(self) -> None:
+        """Test that we time out locks if they're not updated for ages"""
+
+        lock = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        assert lock is not None
+
+        self.get_success(lock.__aenter__())
+
+        # We simulate the process getting stuck by cancelling the looping call
+        # that keeps the lock active.
+        lock._looping_call.stop()
+
+        # Wait for the lock to timeout.
+        self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
+
+        lock2 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        self.assertIsNotNone(lock2)
+
+        self.assertFalse(self.get_success(lock.is_still_valid()))
+
+    def test_drop(self) -> None:
+        """Test that dropping the context manager means we stop renewing the lock"""
+
+        lock = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        self.assertIsNotNone(lock)
+
+        del lock
+
+        # Wait for the lock to timeout.
+        self.reactor.advance(2 * _LOCK_TIMEOUT_MS / 1000)
+
+        lock2 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        self.assertIsNotNone(lock2)
+
+    def test_shutdown(self) -> None:
+        """Test that shutting down Synapse releases the locks"""
+        # Acquire two locks
+        lock = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key", write=True)
+        )
+        self.assertIsNotNone(lock)
+        lock2 = self.get_success(
+            self.store.try_acquire_read_write_lock("name", "key2", write=True)
+        )
+        self.assertIsNotNone(lock2)
+
+        # Now call the shutdown code
+        self.get_success(self.store._on_shutdown())
+
+        self.assertEqual(self.store._live_read_write_lock_tokens, {})
-- 
cgit 1.5.1


From 561d06b481176f61ed12f5a4723b127ff8624662 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 5 Jul 2023 18:45:42 -0500
Subject: Remove support for Python 3.7 (#15851)

Fix https://github.com/matrix-org/synapse/issues/15836
---
 .ci/scripts/calculate_jobs.py           | 11 ++++---
 .github/workflows/release-artifacts.yml |  2 +-
 .github/workflows/tests.yml             |  6 ++--
 changelog.d/15851.removal               |  1 +
 docker/Dockerfile-dhvirtualenv          | 50 ++++++++++++++----------------
 docs/setup/installation.md              |  2 +-
 docs/upgrade.md                         | 12 +++++++
 poetry.lock                             | 55 ++-------------------------------
 pyproject.toml                          |  5 +--
 synapse/__init__.py                     |  4 +--
 synapse/util/check_dependencies.py      |  5 +--
 tests/metrics/test_metrics.py           | 10 ++----
 12 files changed, 55 insertions(+), 108 deletions(-)
 create mode 100644 changelog.d/15851.removal

(limited to 'synapse')

diff --git a/.ci/scripts/calculate_jobs.py b/.ci/scripts/calculate_jobs.py
index b41ec0b6e2..c2c18b48e3 100755
--- a/.ci/scripts/calculate_jobs.py
+++ b/.ci/scripts/calculate_jobs.py
@@ -29,11 +29,12 @@ IS_PR = os.environ["GITHUB_REF"].startswith("refs/pull/")
 
 # First calculate the various trial jobs.
 #
-# For each type of test we only run on Py3.7 on PRs
+# For PRs, we only run each type of test with the oldest Python version supported (which
+# is Python 3.8 right now)
 
 trial_sqlite_tests = [
     {
-        "python-version": "3.7",
+        "python-version": "3.8",
         "database": "sqlite",
         "extras": "all",
     }
@@ -46,13 +47,13 @@ if not IS_PR:
             "database": "sqlite",
             "extras": "all",
         }
-        for version in ("3.8", "3.9", "3.10", "3.11")
+        for version in ("3.9", "3.10", "3.11")
     )
 
 
 trial_postgres_tests = [
     {
-        "python-version": "3.7",
+        "python-version": "3.8",
         "database": "postgres",
         "postgres-version": "11",
         "extras": "all",
@@ -71,7 +72,7 @@ if not IS_PR:
 
 trial_no_extra_tests = [
     {
-        "python-version": "3.7",
+        "python-version": "3.8",
         "database": "sqlite",
         "extras": "",
     }
diff --git a/.github/workflows/release-artifacts.yml b/.github/workflows/release-artifacts.yml
index 0981200401..f331f67d97 100644
--- a/.github/workflows/release-artifacts.yml
+++ b/.github/workflows/release-artifacts.yml
@@ -144,7 +144,7 @@ jobs:
 
       - name: Only build a single wheel on PR
         if: startsWith(github.ref, 'refs/pull/')
-        run: echo "CIBW_BUILD="cp37-manylinux_${{ matrix.arch }}"" >> $GITHUB_ENV
+        run: echo "CIBW_BUILD="cp38-manylinux_${{ matrix.arch }}"" >> $GITHUB_ENV
 
       - name: Build wheels
         run: python -m cibuildwheel --output-dir wheelhouse
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 6c22984997..0a01e82984 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -320,7 +320,7 @@ jobs:
 
       - uses: actions/setup-python@v4
         with:
-          python-version: '3.7'
+          python-version: '3.8'
 
       - name: Prepare old deps
         if: steps.cache-poetry-old-deps.outputs.cache-hit != 'true'
@@ -362,7 +362,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["pypy-3.7"]
+        python-version: ["pypy-3.8"]
         extras: ["all"]
 
     steps:
@@ -477,7 +477,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - python-version: "3.7"
+          - python-version: "3.8"
             postgres-version: "11"
 
           - python-version: "3.11"
diff --git a/changelog.d/15851.removal b/changelog.d/15851.removal
new file mode 100644
index 0000000000..e08df4c136
--- /dev/null
+++ b/changelog.d/15851.removal
@@ -0,0 +1 @@
+Remove support for Python 3.7.
diff --git a/docker/Dockerfile-dhvirtualenv b/docker/Dockerfile-dhvirtualenv
index 861129ebc2..b7679924c2 100644
--- a/docker/Dockerfile-dhvirtualenv
+++ b/docker/Dockerfile-dhvirtualenv
@@ -28,12 +28,12 @@ FROM docker.io/library/${distro} as builder
 
 RUN apt-get update -qq -o Acquire::Languages=none
 RUN env DEBIAN_FRONTEND=noninteractive apt-get install \
-        -yqq --no-install-recommends \
-        build-essential \
-        ca-certificates \
-        devscripts \
-        equivs \
-        wget
+    -yqq --no-install-recommends \
+    build-essential \
+    ca-certificates \
+    devscripts \
+    equivs \
+    wget
 
 # fetch and unpack the package
 # We are temporarily using a fork of dh-virtualenv due to an incompatibility with Python 3.11, which ships with
@@ -62,33 +62,29 @@ FROM docker.io/library/${distro}
 ARG distro=""
 ENV distro ${distro}
 
-# Python < 3.7 assumes LANG="C" means ASCII-only and throws on printing unicode
-# http://bugs.python.org/issue19846
-ENV LANG C.UTF-8
-
 # Install the build dependencies
 #
 # NB: keep this list in sync with the list of build-deps in debian/control
 # TODO: it would be nice to do that automatically.
 RUN apt-get update -qq -o Acquire::Languages=none \
     && env DEBIAN_FRONTEND=noninteractive apt-get install \
-        -yqq --no-install-recommends -o Dpkg::Options::=--force-unsafe-io \
-        build-essential \
-        curl \
-        debhelper \
-        devscripts \
-        libsystemd-dev \
-        lsb-release \
-        pkg-config \
-        python3-dev \
-        python3-pip \
-        python3-setuptools \
-        python3-venv \
-        sqlite3 \
-        libpq-dev \
-        libicu-dev \
-        pkg-config \
-        xmlsec1
+    -yqq --no-install-recommends -o Dpkg::Options::=--force-unsafe-io \
+    build-essential \
+    curl \
+    debhelper \
+    devscripts \
+    libsystemd-dev \
+    lsb-release \
+    pkg-config \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    python3-venv \
+    sqlite3 \
+    libpq-dev \
+    libicu-dev \
+    pkg-config \
+    xmlsec1
 
 # Install rust and ensure it's in the PATH
 ENV RUSTUP_HOME=/rust
diff --git a/docs/setup/installation.md b/docs/setup/installation.md
index 86e506a3e2..4ca8c6b697 100644
--- a/docs/setup/installation.md
+++ b/docs/setup/installation.md
@@ -200,7 +200,7 @@ When following this route please make sure that the [Platform-specific prerequis
 System requirements:
 
 - POSIX-compliant system (tested on Linux & OS X)
-- Python 3.7 or later, up to Python 3.11.
+- Python 3.8 or later, up to Python 3.11.
 - At least 1GB of free RAM if you want to join large public rooms like #matrix:matrix.org
 
 If building on an uncommon architecture for which pre-built wheels are
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 4cd38b1393..384f4010b4 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -87,6 +87,18 @@ process, for example:
     wget https://packages.matrix.org/debian/pool/main/m/matrix-synapse-py3/matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
+
+# Upgrading to v1.88.0
+
+## Minimum supported Python version
+
+The minimum supported Python version has been increased from v3.7 to v3.8.
+You will need Python 3.8 to run Synapse v1.88.0 (due out July 18th, 2023).
+
+If you use current versions of the Matrix.org-distributed Debian
+packages or Docker images, no action is required.
+
+
 # Upgrading to v1.86.0
 
 ## Minimum supported Rust version
diff --git a/poetry.lock b/poetry.lock
index 9aaf5c7de7..c62337053e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -41,9 +41,6 @@ files = [
     {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
 ]
 
-[package.dependencies]
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
-
 [package.extras]
 cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
 dev = ["attrs[docs,tests]", "pre-commit"]
@@ -190,7 +187,6 @@ packaging = ">=22.0"
 pathspec = ">=0.9.0"
 platformdirs = ">=2"
 tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""}
 typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
 
 [package.extras]
@@ -412,7 +408,6 @@ files = [
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 
 [[package]]
 name = "click-default-group"
@@ -601,7 +596,6 @@ files = [
 
 [package.dependencies]
 gitdb = ">=4.0.1,<5"
-typing-extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.8\""}
 
 [[package]]
 name = "hiredis"
@@ -847,7 +841,6 @@ files = [
 ]
 
 [package.dependencies]
-typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
 zipp = ">=0.5"
 
 [package.extras]
@@ -987,11 +980,9 @@ files = [
 
 [package.dependencies]
 attrs = ">=17.4.0"
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""}
 pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""}
 pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
-typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
 
 [package.extras]
 format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
@@ -1199,7 +1190,6 @@ files = [
 
 [package.dependencies]
 mdurl = ">=0.1,<1.0"
-typing_extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""}
 
 [package.extras]
 benchmarking = ["psutil", "pytest", "pytest-benchmark"]
@@ -1283,7 +1273,6 @@ files = [
 
 [package.dependencies]
 attrs = "*"
-importlib-metadata = {version = ">=1.4", markers = "python_version < \"3.8\""}
 
 [package.extras]
 dev = ["aiounittest", "black (==22.3.0)", "build (==0.8.0)", "flake8 (==4.0.1)", "isort (==5.9.3)", "mypy (==0.910)", "tox", "twine (==4.0.1)", "twisted"]
@@ -1459,7 +1448,6 @@ files = [
 [package.dependencies]
 mypy-extensions = ">=0.4.3"
 tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""}
 typing-extensions = ">=3.10"
 
 [package.extras]
@@ -1721,9 +1709,6 @@ files = [
     {file = "platformdirs-3.1.1.tar.gz", hash = "sha256:024996549ee88ec1a9aa99ff7f8fc819bb59e2c3477b410d90a16d32d6e707aa"},
 ]
 
-[package.dependencies]
-typing-extensions = {version = ">=4.4", markers = "python_version < \"3.8\""}
-
 [package.extras]
 docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"]
 test = ["appdirs (==1.4.4)", "covdefaults (>=2.2.2)", "pytest (>=7.2.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
@@ -2060,7 +2045,6 @@ files = [
 [package.dependencies]
 cryptography = ">=3.1"
 defusedxml = "*"
-importlib-metadata = {version = ">=1.7.0", markers = "python_version < \"3.8\""}
 importlib-resources = {version = "*", markers = "python_version < \"3.9\""}
 pyopenssl = "*"
 python-dateutil = "*"
@@ -2410,9 +2394,7 @@ files = [
 
 [package.dependencies]
 canonicaljson = ">=1.0.0"
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 pynacl = ">=0.3.0"
-typing-extensions = {version = ">=3.5", markers = "python_version < \"3.8\""}
 unpaddedbase64 = ">=1.0.1"
 
 [package.extras]
@@ -2852,39 +2834,6 @@ files = [
 six = "*"
 twisted = "*"
 
-[[package]]
-name = "typed-ast"
-version = "1.5.4"
-description = "a fork of Python 2 and 3 ast modules with type comment support"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"},
-    {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"},
-    {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"},
-    {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"},
-    {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"},
-    {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"},
-    {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"},
-    {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"},
-    {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"},
-    {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"},
-    {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"},
-    {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"},
-    {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"},
-    {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"},
-    {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"},
-    {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"},
-    {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"},
-    {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"},
-    {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"},
-    {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"},
-    {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"},
-    {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"},
-    {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"},
-    {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"},
-]
-
 [[package]]
 name = "types-bleach"
 version = "6.0.0.3"
@@ -3293,5 +3242,5 @@ user-search = ["pyicu"]
 
 [metadata]
 lock-version = "2.0"
-python-versions = "^3.7.1"
-content-hash = "7f31754a1009d7b6c9a1bd7221a0b243ffd510f362c28f0da417aaac16757a87"
+python-versions = "^3.8.0"
+content-hash = "0832381cc9e7065e8d95c810d732aa031b98d55cf188719989b12d841993e62e"
diff --git a/pyproject.toml b/pyproject.toml
index 192a07756b..a6e3a935a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -147,7 +147,7 @@ synapse_review_recent_signups = "synapse._scripts.review_recent_signups:main"
 update_synapse_database = "synapse._scripts.update_synapse_database:main"
 
 [tool.poetry.dependencies]
-python = "^3.7.1"
+python = "^3.8.0"
 
 # Mandatory Dependencies
 # ----------------------
@@ -203,9 +203,6 @@ ijson = ">=3.1.4"
 matrix-common = "^1.3.0"
 # We need packaging.requirements.Requirement, added in 16.1.
 packaging = ">=16.1"
-# At the time of writing, we only use functions from the version `importlib.metadata`
-# which shipped in Python 3.8. This corresponds to version 1.4 of the backport.
-importlib_metadata = { version = ">=1.4", python = "<3.8" }
 # This is the most recent version of Pydantic with available on common distros.
 # We are currently incompatible with >=2.0.0: (https://github.com/matrix-org/synapse/issues/15858)
 pydantic = "^1.7.4"
diff --git a/synapse/__init__.py b/synapse/__init__.py
index b97ee59f15..6c1801862b 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -25,8 +25,8 @@ from synapse.util.rust import check_rust_lib_up_to_date
 from synapse.util.stringutils import strtobool
 
 # Check that we're not running on an unsupported Python version.
-if sys.version_info < (3, 7):
-    print("Synapse requires Python 3.7 or above.")
+if sys.version_info < (3, 8):
+    print("Synapse requires Python 3.8 or above.")
     sys.exit(1)
 
 # Allow using the asyncio reactor via env var.
diff --git a/synapse/util/check_dependencies.py b/synapse/util/check_dependencies.py
index 1c0fde4966..114130a08f 100644
--- a/synapse/util/check_dependencies.py
+++ b/synapse/util/check_dependencies.py
@@ -21,16 +21,13 @@ require. But this is probably just symptomatic of Python's package management.
 """
 
 import logging
+from importlib import metadata
 from typing import Iterable, NamedTuple, Optional
 
 from packaging.requirements import Requirement
 
 DISTRIBUTION_NAME = "matrix-synapse"
 
-try:
-    from importlib import metadata
-except ImportError:
-    import importlib_metadata as metadata  # type: ignore[no-redef]
 
 __all__ = ["check_requirements"]
 
diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py
index 7c3656d049..d14876826c 100644
--- a/tests/metrics/test_metrics.py
+++ b/tests/metrics/test_metrics.py
@@ -12,19 +12,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from importlib import metadata
 from typing import Dict, Tuple
-
-from typing_extensions import Protocol
-
-try:
-    from importlib import metadata
-except ImportError:
-    import importlib_metadata as metadata  # type: ignore[no-redef]
-
 from unittest.mock import patch
 
 from pkg_resources import parse_version
 from prometheus_client.core import Sample
+from typing_extensions import Protocol
 
 from synapse.app._base import _set_prometheus_client_use_created_metrics
 from synapse.metrics import REGISTRY, InFlightGauge, generate_latest
-- 
cgit 1.5.1


From b07b14b494ae1dd564b4c44f844c9a9545b3d08a Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Wed, 5 Jul 2023 18:53:55 -0500
Subject: Federation outbound proxy (#15773)

Allow configuring the set of workers to proxy outbound federation traffic through (`outbound_federation_restricted_to`).

This is useful when you have a worker setup with `federation_sender` instances responsible for sending outbound federation requests and want to make sure *all* outbound federation traffic goes through those instances. Before this change, the generic workers would still contact federation themselves for things like profile lookups, backfill, etc. This PR allows you to set more strict access controls/firewall for all workers and only allow the `federation_sender`'s to contact the outside world.

The original code is from @erikjohnston's branches which I've gotten in-shape to merge.
---
 changelog.d/15773.feature                         |   1 +
 docs/usage/configuration/config_documentation.md  |  31 ++-
 docs/workers.md                                   |  20 ++
 synapse/app/_base.py                              |   2 +
 synapse/app/generic_worker.py                     |   1 +
 synapse/app/homeserver.py                         |   1 +
 synapse/config/workers.py                         |  40 +++-
 synapse/http/client.py                            |   7 +-
 synapse/http/matrixfederationclient.py            | 132 +++++++++++-
 synapse/http/proxy.py                             | 249 ++++++++++++++++++++++
 synapse/http/proxyagent.py                        |  79 ++++++-
 synapse/http/server.py                            |  55 ++---
 synapse/http/site.py                              |  26 ++-
 tests/app/test_openid_listener.py                 |   8 +-
 tests/handlers/test_device.py                     |   3 +-
 tests/handlers/test_federation.py                 |   2 +-
 tests/handlers/test_presence.py                   |   1 -
 tests/handlers/test_typing.py                     |  10 +
 tests/http/test_matrixfederationclient.py         | 189 +++++++++++++++-
 tests/http/test_proxy.py                          |  53 +++++
 tests/replication/_base.py                        |   3 +-
 tests/replication/test_federation_sender_shard.py |  22 +-
 tests/rest/client/test_presence.py                |   1 -
 tests/rest/client/test_rooms.py                   |   2 -
 tests/storage/test_e2e_room_keys.py               |   2 +-
 tests/storage/test_purge.py                       |   2 +-
 tests/storage/test_rollback_worker.py             |   4 +-
 tests/test_server.py                              |  33 +--
 tests/unittest.py                                 |   1 +
 29 files changed, 890 insertions(+), 90 deletions(-)
 create mode 100644 changelog.d/15773.feature
 create mode 100644 synapse/http/proxy.py
 create mode 100644 tests/http/test_proxy.py

(limited to 'synapse')

diff --git a/changelog.d/15773.feature b/changelog.d/15773.feature
new file mode 100644
index 0000000000..0d77fae2dc
--- /dev/null
+++ b/changelog.d/15773.feature
@@ -0,0 +1 @@
+Allow configuring the set of workers to proxy outbound federation traffic through via `outbound_federation_restricted_to`.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 26d7c7900c..89a92c4682 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3930,13 +3930,14 @@ federation_sender_instances:
 ---
 ### `instance_map`
 
-When using workers this should be a map from [`worker_name`](#worker_name) to the
-HTTP replication listener of the worker, if configured, and to the main process.
-Each worker declared under [`stream_writers`](../../workers.md#stream-writers) needs
-a HTTP replication listener, and that listener should be included in the `instance_map`.
-The main process also needs an entry on the `instance_map`, and it should be listed under
-`main` **if even one other worker exists**. Ensure the port matches with what is declared 
-inside the `listener` block for a `replication` listener.
+When using workers this should be a map from [`worker_name`](#worker_name) to the HTTP
+replication listener of the worker, if configured, and to the main process. Each worker
+declared under [`stream_writers`](../../workers.md#stream-writers) and
+[`outbound_federation_restricted_to`](#outbound_federation_restricted_to) needs a HTTP replication listener, and that
+listener should be included in the `instance_map`. The main process also needs an entry
+on the `instance_map`, and it should be listed under `main` **if even one other worker
+exists**. Ensure the port matches with what is declared inside the `listener` block for
+a `replication` listener.
 
 
 Example configuration:
@@ -3966,6 +3967,22 @@ stream_writers:
   typing: worker1
 ```
 ---
+### `outbound_federation_restricted_to`
+
+When using workers, you can restrict outbound federation traffic to only go through a
+specific subset of workers. Any worker specified here must also be in the
+[`instance_map`](#instance_map).
+
+```yaml
+outbound_federation_restricted_to:
+  - federation_sender1
+  - federation_sender2
+```
+
+Also see the [worker
+documentation](../../workers.md#restrict-outbound-federation-traffic-to-a-specific-set-of-workers)
+for more info.
+---
 ### `run_background_tasks_on`
 
 The [worker](../../workers.md#background-tasks) that is used to run
diff --git a/docs/workers.md b/docs/workers.md
index 735128762a..303e0f0e7a 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -531,6 +531,26 @@ the stream writer for the `presence` stream:
 
     ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/
 
+#### Restrict outbound federation traffic to a specific set of workers
+
+The `outbound_federation_restricted_to` configuration is useful to make sure outbound
+federation traffic only goes through a specified subset of workers. This allows you to
+set more strict access controls (like a firewall) for all workers and only allow the
+`federation_sender`'s to contact the outside world.
+
+```yaml
+instance_map:
+    main:
+        host: localhost
+        port: 8030
+    federation_sender1:
+        host: localhost
+        port: 8034
+
+outbound_federation_restricted_to:
+  - federation_sender1
+```
+
 #### Background tasks
 
 There is also support for moving background tasks to a separate
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 936b1b0430..938ab40f27 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -386,6 +386,7 @@ def listen_unix(
 
 
 def listen_http(
+    hs: "HomeServer",
     listener_config: ListenerConfig,
     root_resource: Resource,
     version_string: str,
@@ -406,6 +407,7 @@ def listen_http(
         version_string,
         max_request_body_size=max_request_body_size,
         reactor=reactor,
+        federation_agent=hs.get_federation_http_client().agent,
     )
 
     if isinstance(listener_config, TCPListenerConfig):
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 7406c3948c..dc79efcc14 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -221,6 +221,7 @@ class GenericWorkerServer(HomeServer):
         root_resource = create_resource_tree(resources, OptionsResource())
 
         _base.listen_http(
+            self,
             listener_config,
             root_resource,
             self.version_string,
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 84236ac299..f188c7265a 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -139,6 +139,7 @@ class SynapseHomeServer(HomeServer):
             root_resource = OptionsResource()
 
         ports = listen_http(
+            self,
             listener_config,
             create_resource_tree(resources, root_resource),
             self.version_string,
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 38e13dd7b5..0b9789160c 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -15,7 +15,7 @@
 
 import argparse
 import logging
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 import attr
 from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr
@@ -148,6 +148,27 @@ class WriterLocations:
     )
 
 
+@attr.s(auto_attribs=True)
+class OutboundFederationRestrictedTo:
+    """Whether we limit outbound federation to a certain set of instances.
+
+    Attributes:
+        instances: optional list of instances that can make outbound federation
+            requests. If None then all instances can make federation requests.
+        locations: list of instance locations to connect to proxy via.
+    """
+
+    instances: Optional[List[str]]
+    locations: List[InstanceLocationConfig] = attr.Factory(list)
+
+    def __contains__(self, instance: str) -> bool:
+        # It feels a bit dirty to return `True` if `instances` is `None`, but it makes
+        # sense in downstream usage in the sense that if
+        # `outbound_federation_restricted_to` is not configured, then any instance can
+        # talk to federation (no restrictions so always return `True`).
+        return self.instances is None or instance in self.instances
+
+
 class WorkerConfig(Config):
     """The workers are processes run separately to the main synapse process.
     They have their own pid_file and listener configuration. They use the
@@ -357,6 +378,23 @@ class WorkerConfig(Config):
             new_option_name="update_user_directory_from_worker",
         )
 
+        outbound_federation_restricted_to = config.get(
+            "outbound_federation_restricted_to", None
+        )
+        self.outbound_federation_restricted_to = OutboundFederationRestrictedTo(
+            outbound_federation_restricted_to
+        )
+        if outbound_federation_restricted_to:
+            for instance in outbound_federation_restricted_to:
+                if instance not in self.instance_map:
+                    raise ConfigError(
+                        "Instance %r is configured in 'outbound_federation_restricted_to' but does not appear in `instance_map` config."
+                        % (instance,)
+                    )
+                self.outbound_federation_restricted_to.locations.append(
+                    self.instance_map[instance]
+                )
+
     def _should_this_worker_perform_duty(
         self,
         config: Dict[str, Any],
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 09ea93e10d..ca2cdbc6e2 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -1037,7 +1037,12 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
         if reason.check(ResponseDone):
             self.deferred.callback(self.length)
         elif reason.check(PotentialDataLoss):
-            # stolen from https://github.com/twisted/treq/pull/49/files
+            # This applies to requests which don't set `Content-Length` or a
+            # `Transfer-Encoding` in the response because in this case the end of the
+            # response is indicated by the connection being closed, an event which may
+            # also be due to a transient network problem or other error. But since this
+            # behavior is expected of some servers (like YouTube), let's ignore it.
+            # Stolen from https://github.com/twisted/treq/pull/49/files
             # http://twistedmatrix.com/trac/ticket/4840
             self.deferred.callback(self.length)
         else:
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index cc4e258b0f..b00396fdc7 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -50,7 +50,7 @@ from twisted.internet.interfaces import IReactorTime
 from twisted.internet.task import Cooperator
 from twisted.web.client import ResponseFailed
 from twisted.web.http_headers import Headers
-from twisted.web.iweb import IBodyProducer, IResponse
+from twisted.web.iweb import IAgent, IBodyProducer, IResponse
 
 import synapse.metrics
 import synapse.util.retryutils
@@ -72,6 +72,7 @@ from synapse.http.client import (
     read_body_with_max_size,
 )
 from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
+from synapse.http.proxyagent import ProxyAgent
 from synapse.http.types import QueryParams
 from synapse.logging import opentracing
 from synapse.logging.context import make_deferred_yieldable, run_in_background
@@ -393,17 +394,32 @@ class MatrixFederationHttpClient:
         if hs.config.server.user_agent_suffix:
             user_agent = "%s %s" % (user_agent, hs.config.server.user_agent_suffix)
 
-        federation_agent = MatrixFederationAgent(
-            self.reactor,
-            tls_client_options_factory,
-            user_agent.encode("ascii"),
-            hs.config.server.federation_ip_range_allowlist,
-            hs.config.server.federation_ip_range_blocklist,
+        outbound_federation_restricted_to = (
+            hs.config.worker.outbound_federation_restricted_to
         )
+        if hs.get_instance_name() in outbound_federation_restricted_to:
+            # Talk to federation directly
+            federation_agent: IAgent = MatrixFederationAgent(
+                self.reactor,
+                tls_client_options_factory,
+                user_agent.encode("ascii"),
+                hs.config.server.federation_ip_range_allowlist,
+                hs.config.server.federation_ip_range_blocklist,
+            )
+        else:
+            # We need to talk to federation via the proxy via one of the configured
+            # locations
+            federation_proxies = outbound_federation_restricted_to.locations
+            federation_agent = ProxyAgent(
+                self.reactor,
+                self.reactor,
+                tls_client_options_factory,
+                federation_proxies=federation_proxies,
+            )
 
         # Use a BlocklistingAgentWrapper to prevent circumventing the IP
         # blocking via IP literals in server names
-        self.agent = BlocklistingAgentWrapper(
+        self.agent: IAgent = BlocklistingAgentWrapper(
             federation_agent,
             ip_blocklist=hs.config.server.federation_ip_range_blocklist,
         )
@@ -412,7 +428,6 @@ class MatrixFederationHttpClient:
         self._store = hs.get_datastores().main
         self.version_string_bytes = hs.version_string.encode("ascii")
         self.default_timeout_seconds = hs.config.federation.client_timeout_ms / 1000
-
         self.max_long_retry_delay_seconds = (
             hs.config.federation.max_long_retry_delay_ms / 1000
         )
@@ -1131,6 +1146,101 @@ class MatrixFederationHttpClient:
             Succeeds when we get a 2xx HTTP response. The
             result will be the decoded JSON body.
 
+        Raises:
+            HttpResponseException: If we get an HTTP response code >= 300
+                (except 429).
+            NotRetryingDestination: If we are not yet ready to retry this
+                server.
+            FederationDeniedError: If this destination is not on our
+                federation whitelist
+            RequestSendFailed: If there were problems connecting to the
+                remote, due to e.g. DNS failures, connection timeouts etc.
+        """
+        json_dict, _ = await self.get_json_with_headers(
+            destination=destination,
+            path=path,
+            args=args,
+            retry_on_dns_fail=retry_on_dns_fail,
+            timeout=timeout,
+            ignore_backoff=ignore_backoff,
+            try_trailing_slash_on_400=try_trailing_slash_on_400,
+            parser=parser,
+        )
+        return json_dict
+
+    @overload
+    async def get_json_with_headers(
+        self,
+        destination: str,
+        path: str,
+        args: Optional[QueryParams] = None,
+        retry_on_dns_fail: bool = True,
+        timeout: Optional[int] = None,
+        ignore_backoff: bool = False,
+        try_trailing_slash_on_400: bool = False,
+        parser: Literal[None] = None,
+    ) -> Tuple[JsonDict, Dict[bytes, List[bytes]]]:
+        ...
+
+    @overload
+    async def get_json_with_headers(
+        self,
+        destination: str,
+        path: str,
+        args: Optional[QueryParams] = ...,
+        retry_on_dns_fail: bool = ...,
+        timeout: Optional[int] = ...,
+        ignore_backoff: bool = ...,
+        try_trailing_slash_on_400: bool = ...,
+        parser: ByteParser[T] = ...,
+    ) -> Tuple[T, Dict[bytes, List[bytes]]]:
+        ...
+
+    async def get_json_with_headers(
+        self,
+        destination: str,
+        path: str,
+        args: Optional[QueryParams] = None,
+        retry_on_dns_fail: bool = True,
+        timeout: Optional[int] = None,
+        ignore_backoff: bool = False,
+        try_trailing_slash_on_400: bool = False,
+        parser: Optional[ByteParser[T]] = None,
+    ) -> Tuple[Union[JsonDict, T], Dict[bytes, List[bytes]]]:
+        """GETs some json from the given host homeserver and path
+
+        Args:
+            destination: The remote server to send the HTTP request to.
+
+            path: The HTTP path.
+
+            args: A dictionary used to create query strings, defaults to
+                None.
+
+            retry_on_dns_fail: true if the request should be retried on DNS failures
+
+            timeout: number of milliseconds to wait for the response.
+                self._default_timeout (60s) by default.
+
+                Note that we may make several attempts to send the request; this
+                timeout applies to the time spent waiting for response headers for
+                *each* attempt (including connection time) as well as the time spent
+                reading the response body after a 200 response.
+
+            ignore_backoff: true to ignore the historical backoff data
+                and try the request anyway.
+
+            try_trailing_slash_on_400: True if on a 400 M_UNRECOGNIZED
+                response we should try appending a trailing slash to the end of
+                the request. Workaround for #3622 in Synapse <= v0.99.3.
+
+            parser: The parser to use to decode the response. Defaults to
+                parsing as JSON.
+
+        Returns:
+            Succeeds when we get a 2xx HTTP response. The result will be a tuple of the
+            decoded JSON body and a dict of the response headers.
+
         Raises:
             HttpResponseException: If we get an HTTP response code >= 300
                 (except 429).
@@ -1156,6 +1266,8 @@ class MatrixFederationHttpClient:
             timeout=timeout,
         )
 
+        headers = dict(response.headers.getAllRawHeaders())
+
         if timeout is not None:
             _sec_timeout = timeout / 1000
         else:
@@ -1173,7 +1285,7 @@ class MatrixFederationHttpClient:
             parser=parser,
         )
 
-        return body
+        return body, headers
 
     async def delete_json(
         self,
diff --git a/synapse/http/proxy.py b/synapse/http/proxy.py
new file mode 100644
index 0000000000..0874d67760
--- /dev/null
+++ b/synapse/http/proxy.py
@@ -0,0 +1,249 @@
+#  Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import json
+import logging
+import urllib.parse
+from typing import TYPE_CHECKING, Any, Optional, Set, Tuple, cast
+
+from twisted.internet import protocol
+from twisted.internet.interfaces import ITCPTransport
+from twisted.internet.protocol import connectionDone
+from twisted.python import failure
+from twisted.python.failure import Failure
+from twisted.web.client import ResponseDone
+from twisted.web.http_headers import Headers
+from twisted.web.iweb import IAgent, IResponse
+from twisted.web.resource import IResource
+from twisted.web.server import Site
+
+from synapse.api.errors import Codes
+from synapse.http import QuieterFileBodyProducer
+from synapse.http.server import _AsyncResource
+from synapse.logging.context import make_deferred_yieldable, run_in_background
+from synapse.types import ISynapseReactor
+from synapse.util.async_helpers import timeout_deferred
+
+if TYPE_CHECKING:
+    from synapse.http.site import SynapseRequest
+
+logger = logging.getLogger(__name__)
+
+# "Hop-by-hop" headers (as opposed to "end-to-end" headers) as defined by RFC2616
+# section 13.5.1 and referenced in RFC9110 section 7.6.1. These are meant to only be
+# consumed by the immediate recipient and not be forwarded on.
+HOP_BY_HOP_HEADERS = {
+    "Connection",
+    "Keep-Alive",
+    "Proxy-Authenticate",
+    "Proxy-Authorization",
+    "TE",
+    "Trailers",
+    "Transfer-Encoding",
+    "Upgrade",
+}
+
+
+def parse_connection_header_value(
+    connection_header_value: Optional[bytes],
+) -> Set[str]:
+    """
+    Parse the `Connection` header to determine which headers we should not be copied
+    over from the remote response.
+
+    As defined by RFC2616 section 14.10 and RFC9110 section 7.6.1
+
+    Example: `Connection: close, X-Foo, X-Bar` will return `{"Close", "X-Foo", "X-Bar"}`
+
+    Even though "close" is a special directive, let's just treat it as just another
+    header for simplicity. If people want to check for this directive, they can simply
+    check for `"Close" in headers`.
+
+    Args:
+        connection_header_value: The value of the `Connection` header.
+
+    Returns:
+        The set of header names that should not be copied over from the remote response.
+        The keys are capitalized in canonical capitalization.
+    """
+    headers = Headers()
+    extra_headers_to_remove: Set[str] = set()
+    if connection_header_value:
+        extra_headers_to_remove = {
+            headers._canonicalNameCaps(connection_option.strip()).decode("ascii")
+            for connection_option in connection_header_value.split(b",")
+        }
+
+    return extra_headers_to_remove
+
+
+class ProxyResource(_AsyncResource):
+    """
+    A stub resource that proxies any requests with a `matrix-federation://` scheme
+    through the given `federation_agent` to the remote homeserver and ferries back the
+    info.
+    """
+
+    isLeaf = True
+
+    def __init__(self, reactor: ISynapseReactor, federation_agent: IAgent):
+        super().__init__(True)
+
+        self.reactor = reactor
+        self.agent = federation_agent
+
+    async def _async_render(self, request: "SynapseRequest") -> Tuple[int, Any]:
+        uri = urllib.parse.urlparse(request.uri)
+        assert uri.scheme == b"matrix-federation"
+
+        headers = Headers()
+        for header_name in (b"User-Agent", b"Authorization", b"Content-Type"):
+            header_value = request.getHeader(header_name)
+            if header_value:
+                headers.addRawHeader(header_name, header_value)
+
+        request_deferred = run_in_background(
+            self.agent.request,
+            request.method,
+            request.uri,
+            headers=headers,
+            bodyProducer=QuieterFileBodyProducer(request.content),
+        )
+        request_deferred = timeout_deferred(
+            request_deferred,
+            # This should be set longer than the timeout in `MatrixFederationHttpClient`
+            # so that it has enough time to complete and pass us the data before we give
+            # up.
+            timeout=90,
+            reactor=self.reactor,
+        )
+
+        response = await make_deferred_yieldable(request_deferred)
+
+        return response.code, response
+
+    def _send_response(
+        self,
+        request: "SynapseRequest",
+        code: int,
+        response_object: Any,
+    ) -> None:
+        response = cast(IResponse, response_object)
+        response_headers = cast(Headers, response.headers)
+
+        request.setResponseCode(code)
+
+        # The `Connection` header also defines which headers should not be copied over.
+        connection_header = response_headers.getRawHeaders(b"connection")
+        extra_headers_to_remove = parse_connection_header_value(
+            connection_header[0] if connection_header else None
+        )
+
+        # Copy headers.
+        for k, v in response_headers.getAllRawHeaders():
+            # Do not copy over any hop-by-hop headers. These are meant to only be
+            # consumed by the immediate recipient and not be forwarded on.
+            header_key = k.decode("ascii")
+            if (
+                header_key in HOP_BY_HOP_HEADERS
+                or header_key in extra_headers_to_remove
+            ):
+                continue
+
+            request.responseHeaders.setRawHeaders(k, v)
+
+        response.deliverBody(_ProxyResponseBody(request))
+
+    def _send_error_response(
+        self,
+        f: failure.Failure,
+        request: "SynapseRequest",
+    ) -> None:
+        request.setResponseCode(502)
+        request.setHeader(b"Content-Type", b"application/json")
+        request.write(
+            (
+                json.dumps(
+                    {
+                        "errcode": Codes.UNKNOWN,
+                        "err": "ProxyResource: Error when proxying request: %s %s -> %s"
+                        % (
+                            request.method.decode("ascii"),
+                            request.uri.decode("ascii"),
+                            f,
+                        ),
+                    }
+                )
+            ).encode()
+        )
+        request.finish()
+
+
+class _ProxyResponseBody(protocol.Protocol):
+    """
+    A protocol that proxies the given remote response data back out to the given local
+    request.
+    """
+
+    transport: Optional[ITCPTransport] = None
+
+    def __init__(self, request: "SynapseRequest") -> None:
+        self._request = request
+
+    def dataReceived(self, data: bytes) -> None:
+        # Avoid sending response data to the local request that already disconnected
+        if self._request._disconnected and self.transport is not None:
+            # Close the connection (forcefully) since all the data will get
+            # discarded anyway.
+            self.transport.abortConnection()
+            return
+
+        self._request.write(data)
+
+    def connectionLost(self, reason: Failure = connectionDone) -> None:
+        # If the local request is already finished (successfully or failed), don't
+        # worry about sending anything back.
+        if self._request.finished:
+            return
+
+        if reason.check(ResponseDone):
+            self._request.finish()
+        else:
+            # Abort the underlying request since our remote request also failed.
+            self._request.transport.abortConnection()
+
+
+class ProxySite(Site):
+    """
+    Proxies any requests with a `matrix-federation://` scheme through the given
+    `federation_agent`. Otherwise, behaves like a normal `Site`.
+    """
+
+    def __init__(
+        self,
+        resource: IResource,
+        reactor: ISynapseReactor,
+        federation_agent: IAgent,
+    ):
+        super().__init__(resource, reactor=reactor)
+
+        self._proxy_resource = ProxyResource(reactor, federation_agent)
+
+    def getResourceFor(self, request: "SynapseRequest") -> IResource:
+        uri = urllib.parse.urlparse(request.uri)
+        if uri.scheme == b"matrix-federation":
+            return self._proxy_resource
+
+        return super().getResourceFor(request)
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index 7bdc4acae7..1fa3adbef2 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import random
 import re
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Collection, Dict, List, Optional, Sequence, Tuple
 from urllib.parse import urlparse
 from urllib.request import (  # type: ignore[attr-defined]
     getproxies_environment,
@@ -24,7 +25,12 @@ from zope.interface import implementer
 
 from twisted.internet import defer
 from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
-from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint
+from twisted.internet.interfaces import (
+    IProtocol,
+    IProtocolFactory,
+    IReactorCore,
+    IStreamClientEndpoint,
+)
 from twisted.python.failure import Failure
 from twisted.web.client import (
     URI,
@@ -36,8 +42,10 @@ from twisted.web.error import SchemeNotSupported
 from twisted.web.http_headers import Headers
 from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS, IResponse
 
+from synapse.config.workers import InstanceLocationConfig
 from synapse.http import redact_uri
 from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials
+from synapse.logging.context import run_in_background
 
 logger = logging.getLogger(__name__)
 
@@ -74,6 +82,10 @@ class ProxyAgent(_AgentBase):
         use_proxy: Whether proxy settings should be discovered and used
             from conventional environment variables.
 
+        federation_proxies: An optional list of locations to proxy outbound federation
+            traffic through (only requests that use the `matrix-federation://` scheme
+            will be proxied).
+
     Raises:
         ValueError if use_proxy is set and the environment variables
             contain an invalid proxy specification.
@@ -89,6 +101,7 @@ class ProxyAgent(_AgentBase):
         bindAddress: Optional[bytes] = None,
         pool: Optional[HTTPConnectionPool] = None,
         use_proxy: bool = False,
+        federation_proxies: Collection[InstanceLocationConfig] = (),
     ):
         contextFactory = contextFactory or BrowserLikePolicyForHTTPS()
 
@@ -127,6 +140,27 @@ class ProxyAgent(_AgentBase):
         self._policy_for_https = contextFactory
         self._reactor = reactor
 
+        self._federation_proxy_endpoint: Optional[IStreamClientEndpoint] = None
+        if federation_proxies:
+            endpoints = []
+            for federation_proxy in federation_proxies:
+                endpoint = HostnameEndpoint(
+                    self.proxy_reactor,
+                    federation_proxy.host,
+                    federation_proxy.port,
+                )
+
+                if federation_proxy.tls:
+                    tls_connection_creator = self._policy_for_https.creatorForNetloc(
+                        federation_proxy.host,
+                        federation_proxy.port,
+                    )
+                    endpoint = wrapClientTLS(tls_connection_creator, endpoint)
+
+                endpoints.append(endpoint)
+
+            self._federation_proxy_endpoint = _ProxyEndpoints(endpoints)
+
     def request(
         self,
         method: bytes,
@@ -214,6 +248,14 @@ class ProxyAgent(_AgentBase):
                 parsed_uri.port,
                 self.https_proxy_creds,
             )
+        elif (
+            parsed_uri.scheme == b"matrix-federation"
+            and self._federation_proxy_endpoint
+        ):
+            # Cache *all* connections under the same key, since we are only
+            # connecting to a single destination, the proxy:
+            endpoint = self._federation_proxy_endpoint
+            request_path = uri
         else:
             # not using a proxy
             endpoint = HostnameEndpoint(
@@ -233,6 +275,11 @@ class ProxyAgent(_AgentBase):
             endpoint = wrapClientTLS(tls_connection_creator, endpoint)
         elif parsed_uri.scheme == b"http":
             pass
+        elif (
+            parsed_uri.scheme == b"matrix-federation"
+            and self._federation_proxy_endpoint
+        ):
+            pass
         else:
             return defer.fail(
                 Failure(
@@ -337,3 +384,31 @@ def parse_proxy(
         credentials = ProxyCredentials(b"".join([url.username, b":", url.password]))
 
     return url.scheme, url.hostname, url.port or default_port, credentials
+
+
+@implementer(IStreamClientEndpoint)
+class _ProxyEndpoints:
+    """An endpoint that randomly iterates through a given list of endpoints at
+    each connection attempt.
+    """
+
+    def __init__(self, endpoints: Sequence[IStreamClientEndpoint]) -> None:
+        assert endpoints
+        self._endpoints = endpoints
+
+    def connect(
+        self, protocol_factory: IProtocolFactory
+    ) -> "defer.Deferred[IProtocol]":
+        """Implements IStreamClientEndpoint interface"""
+
+        return run_in_background(self._do_connect, protocol_factory)
+
+    async def _do_connect(self, protocol_factory: IProtocolFactory) -> IProtocol:
+        failures: List[Failure] = []
+        for endpoint in random.sample(self._endpoints, k=len(self._endpoints)):
+            try:
+                return await endpoint.connect(protocol_factory)
+            except Exception:
+                failures.append(Failure())
+
+        failures.pop().raiseException()
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 933172c873..ff3153a9d9 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -18,6 +18,7 @@ import html
 import logging
 import types
 import urllib
+import urllib.parse
 from http import HTTPStatus
 from http.client import FOUND
 from inspect import isawaitable
@@ -65,7 +66,6 @@ from synapse.api.errors import (
     UnrecognizedRequestError,
 )
 from synapse.config.homeserver import HomeServerConfig
-from synapse.http.site import SynapseRequest
 from synapse.logging.context import defer_to_thread, preserve_fn, run_in_background
 from synapse.logging.opentracing import active_span, start_active_span, trace_servlet
 from synapse.util import json_encoder
@@ -76,6 +76,7 @@ from synapse.util.iterutils import chunk_seq
 if TYPE_CHECKING:
     import opentracing
 
+    from synapse.http.site import SynapseRequest
     from synapse.server import HomeServer
 
 logger = logging.getLogger(__name__)
@@ -102,7 +103,7 @@ HTTP_STATUS_REQUEST_CANCELLED = 499
 
 
 def return_json_error(
-    f: failure.Failure, request: SynapseRequest, config: Optional[HomeServerConfig]
+    f: failure.Failure, request: "SynapseRequest", config: Optional[HomeServerConfig]
 ) -> None:
     """Sends a JSON error response to clients."""
 
@@ -220,8 +221,8 @@ def return_html_error(
 
 
 def wrap_async_request_handler(
-    h: Callable[["_AsyncResource", SynapseRequest], Awaitable[None]]
-) -> Callable[["_AsyncResource", SynapseRequest], "defer.Deferred[None]"]:
+    h: Callable[["_AsyncResource", "SynapseRequest"], Awaitable[None]]
+) -> Callable[["_AsyncResource", "SynapseRequest"], "defer.Deferred[None]"]:
     """Wraps an async request handler so that it calls request.processing.
 
     This helps ensure that work done by the request handler after the request is completed
@@ -235,7 +236,7 @@ def wrap_async_request_handler(
     """
 
     async def wrapped_async_request_handler(
-        self: "_AsyncResource", request: SynapseRequest
+        self: "_AsyncResource", request: "SynapseRequest"
     ) -> None:
         with request.processing():
             await h(self, request)
@@ -300,7 +301,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
 
         self._extract_context = extract_context
 
-    def render(self, request: SynapseRequest) -> int:
+    def render(self, request: "SynapseRequest") -> int:
         """This gets called by twisted every time someone sends us a request."""
         request.render_deferred = defer.ensureDeferred(
             self._async_render_wrapper(request)
@@ -308,7 +309,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
         return NOT_DONE_YET
 
     @wrap_async_request_handler
-    async def _async_render_wrapper(self, request: SynapseRequest) -> None:
+    async def _async_render_wrapper(self, request: "SynapseRequest") -> None:
         """This is a wrapper that delegates to `_async_render` and handles
         exceptions, return values, metrics, etc.
         """
@@ -326,9 +327,15 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
             # of our stack, and thus gives us a sensible stack
             # trace.
             f = failure.Failure()
+            logger.exception(
+                "Error handling request",
+                exc_info=(f.type, f.value, f.getTracebackObject()),
+            )
             self._send_error_response(f, request)
 
-    async def _async_render(self, request: SynapseRequest) -> Optional[Tuple[int, Any]]:
+    async def _async_render(
+        self, request: "SynapseRequest"
+    ) -> Optional[Tuple[int, Any]]:
         """Delegates to `_async_render_<METHOD>` methods, or returns a 400 if
         no appropriate method exists. Can be overridden in sub classes for
         different routing.
@@ -358,7 +365,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     def _send_response(
         self,
-        request: SynapseRequest,
+        request: "SynapseRequest",
         code: int,
         response_object: Any,
     ) -> None:
@@ -368,7 +375,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: SynapseRequest,
+        request: "SynapseRequest",
     ) -> None:
         raise NotImplementedError()
 
@@ -384,7 +391,7 @@ class DirectServeJsonResource(_AsyncResource):
 
     def _send_response(
         self,
-        request: SynapseRequest,
+        request: "SynapseRequest",
         code: int,
         response_object: Any,
     ) -> None:
@@ -401,7 +408,7 @@ class DirectServeJsonResource(_AsyncResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: SynapseRequest,
+        request: "SynapseRequest",
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_json_error(f, request, None)
@@ -473,7 +480,7 @@ class JsonResource(DirectServeJsonResource):
             )
 
     def _get_handler_for_request(
-        self, request: SynapseRequest
+        self, request: "SynapseRequest"
     ) -> Tuple[ServletCallback, str, Dict[str, str]]:
         """Finds a callback method to handle the given request.
 
@@ -503,7 +510,7 @@ class JsonResource(DirectServeJsonResource):
         # Huh. No one wanted to handle that? Fiiiiiine.
         raise UnrecognizedRequestError(code=404)
 
-    async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]:
+    async def _async_render(self, request: "SynapseRequest") -> Tuple[int, Any]:
         callback, servlet_classname, group_dict = self._get_handler_for_request(request)
 
         request.is_render_cancellable = is_function_cancellable(callback)
@@ -535,7 +542,7 @@ class JsonResource(DirectServeJsonResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: SynapseRequest,
+        request: "SynapseRequest",
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_json_error(f, request, self.hs.config)
@@ -551,7 +558,7 @@ class DirectServeHtmlResource(_AsyncResource):
 
     def _send_response(
         self,
-        request: SynapseRequest,
+        request: "SynapseRequest",
         code: int,
         response_object: Any,
     ) -> None:
@@ -565,7 +572,7 @@ class DirectServeHtmlResource(_AsyncResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: SynapseRequest,
+        request: "SynapseRequest",
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_html_error(f, request, self.ERROR_TEMPLATE)
@@ -592,7 +599,7 @@ class UnrecognizedRequestResource(resource.Resource):
     errcode of M_UNRECOGNIZED.
     """
 
-    def render(self, request: SynapseRequest) -> int:
+    def render(self, request: "SynapseRequest") -> int:
         f = failure.Failure(UnrecognizedRequestError(code=404))
         return_json_error(f, request, None)
         # A response has already been sent but Twisted requires either NOT_DONE_YET
@@ -622,7 +629,7 @@ class RootRedirect(resource.Resource):
 class OptionsResource(resource.Resource):
     """Responds to OPTION requests for itself and all children."""
 
-    def render_OPTIONS(self, request: SynapseRequest) -> bytes:
+    def render_OPTIONS(self, request: "SynapseRequest") -> bytes:
         request.setResponseCode(204)
         request.setHeader(b"Content-Length", b"0")
 
@@ -737,7 +744,7 @@ def _encode_json_bytes(json_object: object) -> bytes:
 
 
 def respond_with_json(
-    request: SynapseRequest,
+    request: "SynapseRequest",
     code: int,
     json_object: Any,
     send_cors: bool = False,
@@ -787,7 +794,7 @@ def respond_with_json(
 
 
 def respond_with_json_bytes(
-    request: SynapseRequest,
+    request: "SynapseRequest",
     code: int,
     json_bytes: bytes,
     send_cors: bool = False,
@@ -825,7 +832,7 @@ def respond_with_json_bytes(
 
 
 async def _async_write_json_to_request_in_thread(
-    request: SynapseRequest,
+    request: "SynapseRequest",
     json_encoder: Callable[[Any], bytes],
     json_object: Any,
 ) -> None:
@@ -883,7 +890,7 @@ def _write_bytes_to_request(request: Request, bytes_to_write: bytes) -> None:
     _ByteProducer(request, bytes_generator)
 
 
-def set_cors_headers(request: SynapseRequest) -> None:
+def set_cors_headers(request: "SynapseRequest") -> None:
     """Set the CORS headers so that javascript running in a web browsers can
     use this API
 
@@ -981,7 +988,7 @@ def set_clickjacking_protection_headers(request: Request) -> None:
 
 
 def respond_with_redirect(
-    request: SynapseRequest, url: bytes, statusCode: int = FOUND, cors: bool = False
+    request: "SynapseRequest", url: bytes, statusCode: int = FOUND, cors: bool = False
 ) -> None:
     """
     Write a 302 (or other specified status code) response to the request, if it is still alive.
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 5b5a7c1e59..0ee2598345 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -21,25 +21,28 @@ from zope.interface import implementer
 
 from twisted.internet.address import UNIXAddress
 from twisted.internet.defer import Deferred
-from twisted.internet.interfaces import IAddress, IReactorTime
+from twisted.internet.interfaces import IAddress
 from twisted.python.failure import Failure
 from twisted.web.http import HTTPChannel
+from twisted.web.iweb import IAgent
 from twisted.web.resource import IResource, Resource
-from twisted.web.server import Request, Site
+from twisted.web.server import Request
 
 from synapse.config.server import ListenerConfig
 from synapse.http import get_request_user_agent, redact_uri
+from synapse.http.proxy import ProxySite
 from synapse.http.request_metrics import RequestMetrics, requests_counter
 from synapse.logging.context import (
     ContextRequest,
     LoggingContext,
     PreserveLoggingContext,
 )
-from synapse.types import Requester
+from synapse.types import ISynapseReactor, Requester
 
 if TYPE_CHECKING:
     import opentracing
 
+
 logger = logging.getLogger(__name__)
 
 _next_request_seq = 0
@@ -102,7 +105,7 @@ class SynapseRequest(Request):
         # A boolean indicating whether `render_deferred` should be cancelled if the
         # client disconnects early. Expected to be set by the coroutine started by
         # `Resource.render`, if rendering is asynchronous.
-        self.is_render_cancellable = False
+        self.is_render_cancellable: bool = False
 
         global _next_request_seq
         self.request_seq = _next_request_seq
@@ -601,7 +604,7 @@ class _XForwardedForAddress:
     host: str
 
 
-class SynapseSite(Site):
+class SynapseSite(ProxySite):
     """
     Synapse-specific twisted http Site
 
@@ -623,7 +626,8 @@ class SynapseSite(Site):
         resource: IResource,
         server_version_string: str,
         max_request_body_size: int,
-        reactor: IReactorTime,
+        reactor: ISynapseReactor,
+        federation_agent: IAgent,
     ):
         """
 
@@ -638,7 +642,11 @@ class SynapseSite(Site):
                 dropping the connection
             reactor: reactor to be used to manage connection timeouts
         """
-        Site.__init__(self, resource, reactor=reactor)
+        super().__init__(
+            resource=resource,
+            reactor=reactor,
+            federation_agent=federation_agent,
+        )
 
         self.site_tag = site_tag
         self.reactor = reactor
@@ -649,7 +657,9 @@ class SynapseSite(Site):
 
         request_id_header = config.http_options.request_id_header
 
-        self.experimental_cors_msc3886 = config.http_options.experimental_cors_msc3886
+        self.experimental_cors_msc3886: bool = (
+            config.http_options.experimental_cors_msc3886
+        )
 
         def request_factory(channel: HTTPChannel, queued: bool) -> Request:
             return request_class(
diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py
index 5a965f233b..21c5309740 100644
--- a/tests/app/test_openid_listener.py
+++ b/tests/app/test_openid_listener.py
@@ -31,9 +31,7 @@ from tests.unittest import HomeserverTestCase
 
 class FederationReaderOpenIDListenerTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(
-            federation_http_client=None, homeserver_to_use=GenericWorkerServer
-        )
+        hs = self.setup_test_homeserver(homeserver_to_use=GenericWorkerServer)
         return hs
 
     def default_config(self) -> JsonDict:
@@ -91,9 +89,7 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase):
 @patch("synapse.app.homeserver.KeyResource", new=Mock())
 class SynapseHomeserverOpenIDListenerTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(
-            federation_http_client=None, homeserver_to_use=SynapseHomeServer
-        )
+        hs = self.setup_test_homeserver(homeserver_to_use=SynapseHomeServer)
         return hs
 
     @parameterized.expand(
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index ee48f9e546..66215af2b8 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -41,7 +41,6 @@ class DeviceTestCase(unittest.HomeserverTestCase):
         self.appservice_api = mock.Mock()
         hs = self.setup_test_homeserver(
             "server",
-            federation_http_client=None,
             application_service_api=self.appservice_api,
         )
         handler = hs.get_device_handler()
@@ -401,7 +400,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
 
 class DehydrationTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server", federation_http_client=None)
+        hs = self.setup_test_homeserver("server")
         handler = hs.get_device_handler()
         assert isinstance(handler, DeviceHandler)
         self.handler = handler
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index bf0862ed54..5f11d5df11 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -57,7 +57,7 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(federation_http_client=None)
+        hs = self.setup_test_homeserver()
         self.handler = hs.get_federation_handler()
         self.store = hs.get_datastores().main
         return hs
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 19f5322317..fd66d573d2 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -993,7 +993,6 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver(
             "server",
-            federation_http_client=None,
             federation_sender=Mock(spec=FederationSender),
         )
         return hs
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 94518a7196..5da1d95f0b 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -17,6 +17,8 @@ import json
 from typing import Dict, List, Set
 from unittest.mock import ANY, Mock, call
 
+from netaddr import IPSet
+
 from twisted.test.proto_helpers import MemoryReactor
 from twisted.web.resource import Resource
 
@@ -24,6 +26,7 @@ from synapse.api.constants import EduTypes
 from synapse.api.errors import AuthError
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.handlers.typing import TypingWriterHandler
+from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
 from synapse.server import HomeServer
 from synapse.types import JsonDict, Requester, UserID, create_requester
 from synapse.util import Clock
@@ -76,6 +79,13 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
         # we mock out the federation client too
         self.mock_federation_client = Mock(spec=["put_json"])
         self.mock_federation_client.put_json.return_value = make_awaitable((200, "OK"))
+        self.mock_federation_client.agent = MatrixFederationAgent(
+            reactor,
+            tls_client_options_factory=None,
+            user_agent=b"SynapseInTrialTest/0.0.0",
+            ip_allowlist=None,
+            ip_blocklist=IPSet(),
+        )
 
         # the tests assume that we are starting at unix time 1000
         reactor.pump((1000,))
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index b5f4a60fe5..a8b9737d1f 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Generator
-from unittest.mock import Mock
+from typing import Any, Dict, Generator
+from unittest.mock import ANY, Mock, create_autospec
 
 from netaddr import IPSet
 from parameterized import parameterized
@@ -21,10 +21,11 @@ from twisted.internet import defer
 from twisted.internet.defer import Deferred, TimeoutError
 from twisted.internet.error import ConnectingCancelledError, DNSLookupError
 from twisted.test.proto_helpers import MemoryReactor, StringTransport
-from twisted.web.client import ResponseNeverReceived
+from twisted.web.client import Agent, ResponseNeverReceived
 from twisted.web.http import HTTPChannel
+from twisted.web.http_headers import Headers
 
-from synapse.api.errors import RequestSendFailed
+from synapse.api.errors import HttpResponseException, RequestSendFailed
 from synapse.http.matrixfederationclient import (
     ByteParser,
     MatrixFederationHttpClient,
@@ -39,7 +40,9 @@ from synapse.logging.context import (
 from synapse.server import HomeServer
 from synapse.util import Clock
 
+from tests.replication._base import BaseMultiWorkerStreamTestCase
 from tests.server import FakeTransport
+from tests.test_utils import FakeResponse
 from tests.unittest import HomeserverTestCase, override_config
 
 
@@ -658,3 +661,181 @@ class FederationClientTests(HomeserverTestCase):
         self.assertEqual(self.cl.max_short_retry_delay_seconds, 7)
         self.assertEqual(self.cl.max_long_retries, 20)
         self.assertEqual(self.cl.max_short_retries, 5)
+
+
+class FederationClientProxyTests(BaseMultiWorkerStreamTestCase):
+    def default_config(self) -> Dict[str, Any]:
+        conf = super().default_config()
+        conf["instance_map"] = {
+            "main": {"host": "testserv", "port": 8765},
+            "federation_sender": {"host": "testserv", "port": 1001},
+        }
+        return conf
+
+    @override_config({"outbound_federation_restricted_to": ["federation_sender"]})
+    def test_proxy_requests_through_federation_sender_worker(self) -> None:
+        """
+        Test that all outbound federation requests go through the `federation_sender`
+        worker
+        """
+        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
+        # so we can act like some remote server responding to requests
+        mock_client_on_federation_sender = Mock()
+        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
+        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
+
+        # Create the `federation_sender` worker
+        self.federation_sender = self.make_worker_hs(
+            "synapse.app.generic_worker",
+            {"worker_name": "federation_sender"},
+            federation_http_client=mock_client_on_federation_sender,
+        )
+
+        # Fake `remoteserv:8008` responding to requests
+        mock_agent_on_federation_sender.request.side_effect = (
+            lambda *args, **kwargs: defer.succeed(
+                FakeResponse.json(
+                    payload={
+                        "foo": "bar",
+                    }
+                )
+            )
+        )
+
+        # This federation request from the main process should be proxied through the
+        # `federation_sender` worker off to the remote server
+        test_request_from_main_process_d = defer.ensureDeferred(
+            self.hs.get_federation_http_client().get_json("remoteserv:8008", "foo/bar")
+        )
+
+        # Pump the reactor so our deferred goes through the motions
+        self.pump()
+
+        # Make sure that the request was proxied through the `federation_sender` worker
+        mock_agent_on_federation_sender.request.assert_called_once_with(
+            b"GET",
+            b"matrix-federation://remoteserv:8008/foo/bar",
+            headers=ANY,
+            bodyProducer=ANY,
+        )
+
+        # Make sure the response is as expected back on the main worker
+        res = self.successResultOf(test_request_from_main_process_d)
+        self.assertEqual(res, {"foo": "bar"})
+
+    @override_config({"outbound_federation_restricted_to": ["federation_sender"]})
+    def test_proxy_request_with_network_error_through_federation_sender_worker(
+        self,
+    ) -> None:
+        """
+        Test that when the outbound federation request fails with a network related
+        error, a sensible error makes its way back to the main process.
+        """
+        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
+        # so we can act like some remote server responding to requests
+        mock_client_on_federation_sender = Mock()
+        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
+        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
+
+        # Create the `federation_sender` worker
+        self.federation_sender = self.make_worker_hs(
+            "synapse.app.generic_worker",
+            {"worker_name": "federation_sender"},
+            federation_http_client=mock_client_on_federation_sender,
+        )
+
+        # Fake `remoteserv:8008` responding to requests
+        mock_agent_on_federation_sender.request.side_effect = (
+            lambda *args, **kwargs: defer.fail(ResponseNeverReceived("fake error"))
+        )
+
+        # This federation request from the main process should be proxied through the
+        # `federation_sender` worker off to the remote server
+        test_request_from_main_process_d = defer.ensureDeferred(
+            self.hs.get_federation_http_client().get_json("remoteserv:8008", "foo/bar")
+        )
+
+        # Pump the reactor so our deferred goes through the motions. We pump with 10
+        # seconds (0.1 * 100) so the `MatrixFederationHttpClient` runs out of retries
+        # and finally passes along the error response.
+        self.pump(0.1)
+
+        # Make sure that the request was proxied through the `federation_sender` worker
+        mock_agent_on_federation_sender.request.assert_called_with(
+            b"GET",
+            b"matrix-federation://remoteserv:8008/foo/bar",
+            headers=ANY,
+            bodyProducer=ANY,
+        )
+
+        # Make sure we get some sort of error back on the main worker
+        failure_res = self.failureResultOf(test_request_from_main_process_d)
+        self.assertIsInstance(failure_res.value, RequestSendFailed)
+        self.assertIsInstance(failure_res.value.inner_exception, HttpResponseException)
+
+    @override_config({"outbound_federation_restricted_to": ["federation_sender"]})
+    def test_proxy_requests_and_discards_hop_by_hop_headers(self) -> None:
+        """
+        Test to make sure hop-by-hop headers and addional headers defined in the
+        `Connection` header are discarded when proxying requests
+        """
+        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
+        # so we can act like some remote server responding to requests
+        mock_client_on_federation_sender = Mock()
+        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
+        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
+
+        # Create the `federation_sender` worker
+        self.federation_sender = self.make_worker_hs(
+            "synapse.app.generic_worker",
+            {"worker_name": "federation_sender"},
+            federation_http_client=mock_client_on_federation_sender,
+        )
+
+        # Fake `remoteserv:8008` responding to requests
+        mock_agent_on_federation_sender.request.side_effect = lambda *args, **kwargs: defer.succeed(
+            FakeResponse(
+                code=200,
+                body=b'{"foo": "bar"}',
+                headers=Headers(
+                    {
+                        "Content-Type": ["application/json"],
+                        "Connection": ["close, X-Foo, X-Bar"],
+                        # Should be removed because it's defined in the `Connection` header
+                        "X-Foo": ["foo"],
+                        "X-Bar": ["bar"],
+                        # Should be removed because it's a hop-by-hop header
+                        "Proxy-Authorization": "abcdef",
+                    }
+                ),
+            )
+        )
+
+        # This federation request from the main process should be proxied through the
+        # `federation_sender` worker off to the remote server
+        test_request_from_main_process_d = defer.ensureDeferred(
+            self.hs.get_federation_http_client().get_json_with_headers(
+                "remoteserv:8008", "foo/bar"
+            )
+        )
+
+        # Pump the reactor so our deferred goes through the motions
+        self.pump()
+
+        # Make sure that the request was proxied through the `federation_sender` worker
+        mock_agent_on_federation_sender.request.assert_called_once_with(
+            b"GET",
+            b"matrix-federation://remoteserv:8008/foo/bar",
+            headers=ANY,
+            bodyProducer=ANY,
+        )
+
+        res, headers = self.successResultOf(test_request_from_main_process_d)
+        header_names = set(headers.keys())
+
+        # Make sure the response does not include the hop-by-hop headers
+        self.assertNotIn(b"X-Foo", header_names)
+        self.assertNotIn(b"X-Bar", header_names)
+        self.assertNotIn(b"Proxy-Authorization", header_names)
+        # Make sure the response is as expected back on the main worker
+        self.assertEqual(res, {"foo": "bar"})
diff --git a/tests/http/test_proxy.py b/tests/http/test_proxy.py
new file mode 100644
index 0000000000..0dc9ba8e05
--- /dev/null
+++ b/tests/http/test_proxy.py
@@ -0,0 +1,53 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Set
+
+from parameterized import parameterized
+
+from synapse.http.proxy import parse_connection_header_value
+
+from tests.unittest import TestCase
+
+
+class ProxyTests(TestCase):
+    @parameterized.expand(
+        [
+            [b"close, X-Foo, X-Bar", {"Close", "X-Foo", "X-Bar"}],
+            # No whitespace
+            [b"close,X-Foo,X-Bar", {"Close", "X-Foo", "X-Bar"}],
+            # More whitespace
+            [b"close,    X-Foo,      X-Bar", {"Close", "X-Foo", "X-Bar"}],
+            # "close" directive in not the first position
+            [b"X-Foo, X-Bar, close", {"X-Foo", "X-Bar", "Close"}],
+            # Normalizes header capitalization
+            [b"keep-alive, x-fOo, x-bAr", {"Keep-Alive", "X-Foo", "X-Bar"}],
+            # Handles header names with whitespace
+            [
+                b"keep-alive, x  foo, x bar",
+                {"Keep-Alive", "X  foo", "X bar"},
+            ],
+        ]
+    )
+    def test_parse_connection_header_value(
+        self,
+        connection_header_value: bytes,
+        expected_extra_headers_to_remove: Set[str],
+    ) -> None:
+        """
+        Tests that the connection header value is parsed correctly
+        """
+        self.assertEqual(
+            expected_extra_headers_to_remove,
+            parse_connection_header_value(connection_header_value),
+        )
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index eb9b1f1cd9..96badc46b0 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -69,10 +69,10 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
         # Make a new HomeServer object for the worker
         self.reactor.lookups["testserv"] = "1.2.3.4"
         self.worker_hs = self.setup_test_homeserver(
-            federation_http_client=None,
             homeserver_to_use=GenericWorkerServer,
             config=self._get_worker_hs_config(),
             reactor=self.reactor,
+            federation_http_client=None,
         )
 
         # Since we use sqlite in memory databases we need to make sure the
@@ -380,6 +380,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
             server_version_string="1",
             max_request_body_size=8192,
             reactor=self.reactor,
+            federation_agent=worker_hs.get_federation_http_client().agent,
         )
 
         worker_hs.get_replication_command_handler().start_replication(worker_hs)
diff --git a/tests/replication/test_federation_sender_shard.py b/tests/replication/test_federation_sender_shard.py
index 08703206a9..a324b4d31d 100644
--- a/tests/replication/test_federation_sender_shard.py
+++ b/tests/replication/test_federation_sender_shard.py
@@ -14,14 +14,18 @@
 import logging
 from unittest.mock import Mock
 
+from netaddr import IPSet
+
 from synapse.api.constants import EventTypes, Membership
 from synapse.events.builder import EventBuilderFactory
 from synapse.handlers.typing import TypingWriterHandler
+from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
 from synapse.rest.admin import register_servlets_for_client_rest_resource
 from synapse.rest.client import login, room
 from synapse.types import UserID, create_requester
 
 from tests.replication._base import BaseMultiWorkerStreamTestCase
+from tests.server import get_clock
 from tests.test_utils import make_awaitable
 
 logger = logging.getLogger(__name__)
@@ -41,13 +45,25 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         room.register_servlets,
     ]
 
+    def setUp(self) -> None:
+        super().setUp()
+
+        reactor, _ = get_clock()
+        self.matrix_federation_agent = MatrixFederationAgent(
+            reactor,
+            tls_client_options_factory=None,
+            user_agent=b"SynapseInTrialTest/0.0.0",
+            ip_allowlist=None,
+            ip_blocklist=IPSet(),
+        )
+
     def test_send_event_single_sender(self) -> None:
         """Test that using a single federation sender worker correctly sends a
         new event.
         """
         mock_client = Mock(spec=["put_json"])
         mock_client.put_json.return_value = make_awaitable({})
-
+        mock_client.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -78,6 +94,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         """
         mock_client1 = Mock(spec=["put_json"])
         mock_client1.put_json.return_value = make_awaitable({})
+        mock_client1.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -92,6 +109,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
 
         mock_client2 = Mock(spec=["put_json"])
         mock_client2.put_json.return_value = make_awaitable({})
+        mock_client2.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -145,6 +163,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         """
         mock_client1 = Mock(spec=["put_json"])
         mock_client1.put_json.return_value = make_awaitable({})
+        mock_client1.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -159,6 +178,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
 
         mock_client2 = Mock(spec=["put_json"])
         mock_client2.put_json.return_value = make_awaitable({})
+        mock_client2.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
diff --git a/tests/rest/client/test_presence.py b/tests/rest/client/test_presence.py
index dcbb125a3b..e12098102b 100644
--- a/tests/rest/client/test_presence.py
+++ b/tests/rest/client/test_presence.py
@@ -40,7 +40,6 @@ class PresenceTestCase(unittest.HomeserverTestCase):
 
         hs = self.setup_test_homeserver(
             "red",
-            federation_http_client=None,
             federation_client=Mock(),
             presence_handler=self.presence_handler,
         )
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index f1b4e1ad2f..d013e75d55 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -67,8 +67,6 @@ class RoomBase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         self.hs = self.setup_test_homeserver(
             "red",
-            federation_http_client=None,
-            federation_client=Mock(),
         )
 
         self.hs.get_federation_handler = Mock()  # type: ignore[assignment]
diff --git a/tests/storage/test_e2e_room_keys.py b/tests/storage/test_e2e_room_keys.py
index 9cb326d90a..f6df31aba4 100644
--- a/tests/storage/test_e2e_room_keys.py
+++ b/tests/storage/test_e2e_room_keys.py
@@ -31,7 +31,7 @@ room_key: RoomKey = {
 
 class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server", federation_http_client=None)
+        hs = self.setup_test_homeserver("server")
         self.store = hs.get_datastores().main
         return hs
 
diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py
index 857e2caf2e..0282673167 100644
--- a/tests/storage/test_purge.py
+++ b/tests/storage/test_purge.py
@@ -27,7 +27,7 @@ class PurgeTests(HomeserverTestCase):
     servlets = [room.register_servlets]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server", federation_http_client=None)
+        hs = self.setup_test_homeserver("server")
         return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
diff --git a/tests/storage/test_rollback_worker.py b/tests/storage/test_rollback_worker.py
index 6861d3a6c9..809c9f175d 100644
--- a/tests/storage/test_rollback_worker.py
+++ b/tests/storage/test_rollback_worker.py
@@ -45,9 +45,7 @@ def fake_listdir(filepath: str) -> List[str]:
 
 class WorkerSchemaTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(
-            federation_http_client=None, homeserver_to_use=GenericWorkerServer
-        )
+        hs = self.setup_test_homeserver(homeserver_to_use=GenericWorkerServer)
         return hs
 
     def default_config(self) -> JsonDict:
diff --git a/tests/test_server.py b/tests/test_server.py
index e266c06a2c..fe5afebdcd 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -38,7 +38,7 @@ from tests.http.server._base import test_disconnect
 from tests.server import (
     FakeChannel,
     FakeSite,
-    ThreadedMemoryReactorClock,
+    get_clock,
     make_request,
     setup_test_homeserver,
 )
@@ -46,12 +46,11 @@ from tests.server import (
 
 class JsonResourceTests(unittest.TestCase):
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
-        self.hs_clock = Clock(self.reactor)
+        reactor, clock = get_clock()
+        self.reactor = reactor
         self.homeserver = setup_test_homeserver(
             self.addCleanup,
-            federation_http_client=None,
-            clock=self.hs_clock,
+            clock=clock,
             reactor=self.reactor,
         )
 
@@ -209,7 +208,13 @@ class JsonResourceTests(unittest.TestCase):
 
 class OptionsResourceTests(unittest.TestCase):
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
+        reactor, clock = get_clock()
+        self.reactor = reactor
+        self.homeserver = setup_test_homeserver(
+            self.addCleanup,
+            clock=clock,
+            reactor=self.reactor,
+        )
 
         class DummyResource(Resource):
             isLeaf = True
@@ -242,6 +247,7 @@ class OptionsResourceTests(unittest.TestCase):
             "1.0",
             max_request_body_size=4096,
             reactor=self.reactor,
+            federation_agent=self.homeserver.get_federation_http_client().agent,
         )
 
         # render the request and return the channel
@@ -344,7 +350,8 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase):
             await self.callback(request)
 
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
+        reactor, _ = get_clock()
+        self.reactor = reactor
 
     def test_good_response(self) -> None:
         async def callback(request: SynapseRequest) -> None:
@@ -462,9 +469,9 @@ class DirectServeJsonResourceCancellationTests(unittest.TestCase):
     """Tests for `DirectServeJsonResource` cancellation."""
 
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
-        self.clock = Clock(self.reactor)
-        self.resource = CancellableDirectServeJsonResource(self.clock)
+        reactor, clock = get_clock()
+        self.reactor = reactor
+        self.resource = CancellableDirectServeJsonResource(clock)
         self.site = FakeSite(self.resource, self.reactor)
 
     def test_cancellable_disconnect(self) -> None:
@@ -496,9 +503,9 @@ class DirectServeHtmlResourceCancellationTests(unittest.TestCase):
     """Tests for `DirectServeHtmlResource` cancellation."""
 
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
-        self.clock = Clock(self.reactor)
-        self.resource = CancellableDirectServeHtmlResource(self.clock)
+        reactor, clock = get_clock()
+        self.reactor = reactor
+        self.resource = CancellableDirectServeHtmlResource(clock)
         self.site = FakeSite(self.resource, self.reactor)
 
     def test_cancellable_disconnect(self) -> None:
diff --git a/tests/unittest.py b/tests/unittest.py
index c73195b32b..334a95a917 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -358,6 +358,7 @@ class HomeserverTestCase(TestCase):
             server_version_string="1",
             max_request_body_size=4096,
             reactor=self.reactor,
+            federation_agent=self.hs.get_federation_http_client().agent,
         )
 
         from tests.rest.client.utils import RestHelper
-- 
cgit 1.5.1


From f19dd39dfc04d3533198d8a52fab6ed49e6d5066 Mon Sep 17 00:00:00 2001
From: sarthak shah <75999816+sarthakshah65@users.noreply.github.com>
Date: Thu, 6 Jul 2023 20:58:09 +0530
Subject: Update link to the clients webpage, fix #15825 (#15874)

---
 changelog.d/15874.misc    | 1 +
 synapse/static/index.html | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15874.misc

(limited to 'synapse')

diff --git a/changelog.d/15874.misc b/changelog.d/15874.misc
new file mode 100644
index 0000000000..0d434bef5d
--- /dev/null
+++ b/changelog.d/15874.misc
@@ -0,0 +1 @@
+Updated the link in home page from https://matrix.org/docs/projects/try-matrix-now.html#clients to https://matrix.org/ecosystem/clients/.
diff --git a/synapse/static/index.html b/synapse/static/index.html
index bf46df9097..297a7877f3 100644
--- a/synapse/static/index.html
+++ b/synapse/static/index.html
@@ -48,7 +48,7 @@
     </div>
     <h1>It works! Synapse is running</h1>
     <p>Your Synapse server is listening on this port and is ready for messages.</p>
-    <p>To use this server you'll need <a href="https://matrix.org/docs/projects/try-matrix-now.html#clients" target="_blank" rel="noopener noreferrer">a Matrix client</a>.
+    <p>To use this server you'll need <a href="https://matrix.org/ecosystem/clients/" target="_blank" rel="noopener noreferrer">a Matrix client</a>.
     </p>
     <p>Welcome to the Matrix universe :)</p>
     <hr>
-- 
cgit 1.5.1


From 2481b7dfa41c1c890346136f04344a4e1660ef32 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Fri, 7 Jul 2023 02:45:25 -0500
Subject: Remove `worker_replication_*` deprecated settings, with helpful
 errors on startup (#15860)

Co-authored-by: reivilibre <oliverw@matrix.org>
---
 changelog.d/15860.removal                        |  1 +
 docs/upgrade.md                                  | 15 +++++++
 docs/usage/configuration/config_documentation.md | 45 ---------------------
 docs/workers.md                                  |  3 --
 synapse/config/workers.py                        | 50 ++++++++++++++----------
 tests/app/test_homeserver_start.py               |  6 +--
 tests/config/test_workers.py                     | 27 +------------
 7 files changed, 49 insertions(+), 98 deletions(-)
 create mode 100644 changelog.d/15860.removal

(limited to 'synapse')

diff --git a/changelog.d/15860.removal b/changelog.d/15860.removal
new file mode 100644
index 0000000000..1993bf0299
--- /dev/null
+++ b/changelog.d/15860.removal
@@ -0,0 +1 @@
+Remove deprecated `worker_replication_host`, `worker_replication_http_port` and `worker_replication_http_tls` configuration options.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 384f4010b4..b94d13c4da 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -98,6 +98,21 @@ You will need Python 3.8 to run Synapse v1.88.0 (due out July 18th, 2023).
 If you use current versions of the Matrix.org-distributed Debian
 packages or Docker images, no action is required.
 
+## Removal of `worker_replication_*` settings
+
+As mentioned previously in [Upgrading to v1.84.0](#upgrading-to-v1840), the following deprecated settings
+are being removed in this release of Synapse:
+
+* [`worker_replication_host`](https://matrix-org.github.io/synapse/v1.86/usage/configuration/config_documentation.html#worker_replication_host)
+* [`worker_replication_http_port`](https://matrix-org.github.io/synapse/v1.86/usage/configuration/config_documentation.html#worker_replication_http_port)
+* [`worker_replication_http_tls`](https://matrix-org.github.io/synapse/v1.86/usage/configuration/config_documentation.html#worker_replication_http_tls)
+
+Please ensure that you have migrated to using `main` on your shared configuration's `instance_map`
+(or create one if necessary). This is required if you have ***any*** workers at all;
+administrators of single-process (monolith) installations don't need to do anything.
+
+For an illustrative example, please see [Upgrading to v1.84.0](#upgrading-to-v1840) below.
+
 
 # Upgrading to v1.86.0
 
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 89a92c4682..04e8390ffe 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -4107,51 +4107,6 @@ Example configuration:
 worker_name: generic_worker1
 ```
 ---
-### `worker_replication_host`
-*Deprecated as of version 1.84.0. Place `host` under `main` entry on the [`instance_map`](#instance_map) in your shared yaml configuration instead.*
-
-The HTTP replication endpoint that it should talk to on the main Synapse process.
-The main Synapse process defines this with a `replication` resource in
-[`listeners` option](#listeners).
-
-Example configuration:
-```yaml
-worker_replication_host: 127.0.0.1
-```
----
-### `worker_replication_http_port`
-*Deprecated as of version 1.84.0. Place `port` under `main` entry on the [`instance_map`](#instance_map) in your shared yaml configuration instead.*
-
-The HTTP replication port that it should talk to on the main Synapse process.
-The main Synapse process defines this with a `replication` resource in
-[`listeners` option](#listeners).
-
-Example configuration:
-```yaml
-worker_replication_http_port: 9093
-```
----
-### `worker_replication_http_tls`
-*Deprecated as of version 1.84.0. Place `tls` under `main` entry on the [`instance_map`](#instance_map) in your shared yaml configuration instead.*
-
-Whether TLS should be used for talking to the HTTP replication port on the main
-Synapse process.
-The main Synapse process defines this with the `tls` option on its [listener](#listeners) that
-has the `replication` resource enabled.
-
-**Please note:** by default, it is not safe to expose replication ports to the
-public Internet, even with TLS enabled.
-See [`worker_replication_secret`](#worker_replication_secret).
-
-Defaults to `false`.
-
-*Added in Synapse 1.72.0.*
-
-Example configuration:
-```yaml
-worker_replication_http_tls: true
-```
----
 ### `worker_listeners`
 
 A worker can handle HTTP requests. To do so, a `worker_listeners` option
diff --git a/docs/workers.md b/docs/workers.md
index 303e0f0e7a..03415c6eb3 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -145,9 +145,6 @@ In the config file for each worker, you must specify:
    with an `http` listener.
  * **Synapse 1.72 and older:** if handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for
    the main process (`worker_main_http_uri`). This config option is no longer required and is ignored when running Synapse 1.73 and newer.
- * **Synapse 1.83 and older:** The HTTP replication endpoint that the worker should talk to on the main synapse process
-   ([`worker_replication_host`](usage/configuration/config_documentation.md#worker_replication_host) and
-   [`worker_replication_http_port`](usage/configuration/config_documentation.md#worker_replication_http_port)). If using Synapse 1.84 and newer, these are not needed if `main` is defined on the [shared configuration](#shared-configuration) `instance_map`
 
 For example:
 
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 0b9789160c..5c81eb5c67 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -41,11 +41,17 @@ Synapse version. Please use ``%s: name_of_worker`` instead.
 
 _MISSING_MAIN_PROCESS_INSTANCE_MAP_DATA = """
 Missing data for a worker to connect to main process. Please include '%s' in the
-`instance_map` declared in your shared yaml configuration, or optionally(as a deprecated
-solution) in every worker's yaml as various `worker_replication_*` settings as defined
-in workers documentation here:
+`instance_map` declared in your shared yaml configuration as defined in configuration
+documentation here:
+`https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#instance_map`
+"""
+
+WORKER_REPLICATION_SETTING_DEPRECATED_MESSAGE = """
+'%s' is no longer a supported worker setting, please place '%s' onto your shared
+configuration under `main` inside the `instance_map`. See workers documentation here:
 `https://matrix-org.github.io/synapse/latest/workers.html#worker-configuration`
 """
+
 # This allows for a handy knob when it's time to change from 'master' to
 # something with less 'history'
 MAIN_PROCESS_INSTANCE_NAME = "master"
@@ -237,22 +243,37 @@ class WorkerConfig(Config):
         )
 
         # A map from instance name to host/port of their HTTP replication endpoint.
-        # Check if the main process is declared. Inject it into the map if it's not,
-        # based first on if a 'main' block is declared then on 'worker_replication_*'
-        # data. If both are available, default to instance_map. The main process
-        # itself doesn't need this data as it would never have to talk to itself.
+        # Check if the main process is declared. The main process itself doesn't need
+        # this data as it would never have to talk to itself.
         instance_map: Dict[str, Any] = config.get("instance_map", {})
 
         if self.instance_name is not MAIN_PROCESS_INSTANCE_NAME:
+            # TODO: The next 3 condition blocks can be deleted after some time has
+            #  passed and we're ready to stop checking for these settings.
             # The host used to connect to the main synapse
             main_host = config.get("worker_replication_host", None)
+            if main_host:
+                raise ConfigError(
+                    WORKER_REPLICATION_SETTING_DEPRECATED_MESSAGE
+                    % ("worker_replication_host", main_host)
+                )
 
             # The port on the main synapse for HTTP replication endpoint
             main_port = config.get("worker_replication_http_port")
+            if main_port:
+                raise ConfigError(
+                    WORKER_REPLICATION_SETTING_DEPRECATED_MESSAGE
+                    % ("worker_replication_http_port", main_port)
+                )
 
             # The tls mode on the main synapse for HTTP replication endpoint.
             # For backward compatibility this defaults to False.
             main_tls = config.get("worker_replication_http_tls", False)
+            if main_tls:
+                raise ConfigError(
+                    WORKER_REPLICATION_SETTING_DEPRECATED_MESSAGE
+                    % ("worker_replication_http_tls", main_tls)
+                )
 
             # For now, accept 'main' in the instance_map, but the replication system
             # expects 'master', force that into being until it's changed later.
@@ -262,22 +283,9 @@ class WorkerConfig(Config):
                 ]
                 del instance_map[MAIN_PROCESS_INSTANCE_MAP_NAME]
 
-            # This is the backwards compatibility bit that handles the
-            # worker_replication_* bits using setdefault() to not overwrite anything.
-            elif main_host is not None and main_port is not None:
-                instance_map.setdefault(
-                    MAIN_PROCESS_INSTANCE_NAME,
-                    {
-                        "host": main_host,
-                        "port": main_port,
-                        "tls": main_tls,
-                    },
-                )
-
             else:
                 # If we've gotten here, it means that the main process is not on the
-                # instance_map and that not enough worker_replication_* variables
-                # were declared in the worker's yaml.
+                # instance_map.
                 raise ConfigError(
                     _MISSING_MAIN_PROCESS_INSTANCE_MAP_DATA
                     % MAIN_PROCESS_INSTANCE_MAP_NAME
diff --git a/tests/app/test_homeserver_start.py b/tests/app/test_homeserver_start.py
index cd117b7394..0201933b04 100644
--- a/tests/app/test_homeserver_start.py
+++ b/tests/app/test_homeserver_start.py
@@ -25,9 +25,9 @@ class HomeserverAppStartTestCase(ConfigFileTestCase):
         # Add a blank line as otherwise the next addition ends up on a line with a comment
         self.add_lines_to_config(["  "])
         self.add_lines_to_config(["worker_app: test_worker_app"])
-        self.add_lines_to_config(["worker_replication_host: 127.0.0.1"])
-        self.add_lines_to_config(["worker_replication_http_port: 0"])
-
+        self.add_lines_to_config(["worker_log_config: /data/logconfig.config"])
+        self.add_lines_to_config(["instance_map:"])
+        self.add_lines_to_config(["  main:", "    host: 127.0.0.1", "    port: 1234"])
         # Ensure that starting master process with worker config raises an exception
         with self.assertRaises(ConfigError):
             synapse.app.homeserver.setup(["-c", self.config_file])
diff --git a/tests/config/test_workers.py b/tests/config/test_workers.py
index 086359fd71..2a643ae4f3 100644
--- a/tests/config/test_workers.py
+++ b/tests/config/test_workers.py
@@ -17,7 +17,7 @@ from unittest.mock import Mock
 from immutabledict import immutabledict
 
 from synapse.config import ConfigError
-from synapse.config.workers import InstanceLocationConfig, WorkerConfig
+from synapse.config.workers import WorkerConfig
 
 from tests.unittest import TestCase
 
@@ -323,28 +323,3 @@ class WorkerDutyConfigTestCase(TestCase):
         )
         self.assertTrue(worker2_config.should_notify_appservices)
         self.assertFalse(worker2_config.should_update_user_directory)
-
-    def test_worker_instance_map_compat(self) -> None:
-        """
-        Test that `worker_replication_*` settings are compatibly handled by
-        adding them to the instance map as a `main` entry.
-        """
-
-        worker1_config = self._make_worker_config(
-            worker_app="synapse.app.generic_worker",
-            worker_name="worker1",
-            extras={
-                "notify_appservices_from_worker": "worker2",
-                "update_user_directory_from_worker": "worker1",
-                "worker_replication_host": "127.0.0.42",
-                "worker_replication_http_port": 1979,
-            },
-        )
-        self.assertEqual(
-            worker1_config.instance_map,
-            {
-                "master": InstanceLocationConfig(
-                    host="127.0.0.42", port=1979, tls=False
-                ),
-            },
-        )
-- 
cgit 1.5.1


From f25b0f88081bb436bef914983cff7087b54eba5f Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Fri, 7 Jul 2023 09:23:27 -0700
Subject: Stop writing to column `user_id` of tables `profiles` and
 `user_filters` (#15787)

---
 changelog.d/15787.misc                             |  1 +
 synapse/storage/database.py                        |  2 +
 synapse/storage/databases/main/__init__.py         |  6 +-
 synapse/storage/databases/main/filtering.py        |  5 +-
 synapse/storage/databases/main/profile.py          | 12 +--
 synapse/storage/schema/__init__.py                 |  9 ++-
 .../79/01_drop_user_id_constraint_profiles.py      | 50 ++++++++++++
 .../79/02_drop_user_id_constraint_user_filters.py  | 54 +++++++++++++
 tests/storage/test_profile.py                      | 63 ---------------
 tests/storage/test_user_filters.py                 | 94 ----------------------
 10 files changed, 123 insertions(+), 173 deletions(-)
 create mode 100644 changelog.d/15787.misc
 create mode 100644 synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py
 create mode 100644 synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py
 delete mode 100644 tests/storage/test_user_filters.py

(limited to 'synapse')

diff --git a/changelog.d/15787.misc b/changelog.d/15787.misc
new file mode 100644
index 0000000000..bd7536d36e
--- /dev/null
+++ b/changelog.d/15787.misc
@@ -0,0 +1 @@
+Stop writing to column `user_id` of tables `profiles` and `user_filters`.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index a1c8fb0f46..c9d687fb2f 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -98,6 +98,8 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = {
     "event_push_summary": "event_push_summary_unique_index2",
     "receipts_linearized": "receipts_linearized_unique_index",
     "receipts_graph": "receipts_graph_unique_index",
+    "profiles": "profiles_full_user_id_key_idx",
+    "user_filters": "full_users_filters_unique_idx",
 }
 
 
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 80c0304b19..b6028853c9 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union, cast
 
 from synapse.api.constants import Direction
 from synapse.config.homeserver import HomeServerConfig
@@ -196,7 +196,7 @@ class DataStore(
             txn: LoggingTransaction,
         ) -> Tuple[List[JsonDict], int]:
             filters = []
-            args = [self.hs.config.server.server_name]
+            args: List[Union[str, int]] = []
 
             # Set ordering
             order_by_column = UserSortOrder(order_by).value
@@ -263,7 +263,7 @@ class DataStore(
 
             sql_base = f"""
                 FROM users as u
-                LEFT JOIN profiles AS p ON u.name = '@' || p.user_id || ':' || ?
+                LEFT JOIN profiles AS p ON u.name = p.full_user_id
                 LEFT JOIN erased_users AS eu ON u.name = eu.user_id
                 {where_clause}
                 """
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index fff417f9e3..75f7fe8756 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -188,14 +188,13 @@ class FilteringWorkerStore(SQLBaseStore):
                 filter_id = max_id + 1
 
             sql = (
-                "INSERT INTO user_filters (full_user_id, user_id, filter_id, filter_json)"
-                "VALUES(?, ?, ?, ?)"
+                "INSERT INTO user_filters (full_user_id, filter_id, filter_json)"
+                "VALUES(?, ?, ?)"
             )
             txn.execute(
                 sql,
                 (
                     user_id.to_string(),
-                    user_id.localpart,
                     filter_id,
                     bytearray(def_json),
                 ),
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index 3ba9cc8853..660a5507b7 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -173,10 +173,9 @@ class ProfileWorkerStore(SQLBaseStore):
         )
 
     async def create_profile(self, user_id: UserID) -> None:
-        user_localpart = user_id.localpart
         await self.db_pool.simple_insert(
             table="profiles",
-            values={"user_id": user_localpart, "full_user_id": user_id.to_string()},
+            values={"full_user_id": user_id.to_string()},
             desc="create_profile",
         )
 
@@ -191,13 +190,11 @@ class ProfileWorkerStore(SQLBaseStore):
             new_displayname: The new display name. If this is None, the user's display
                 name is removed.
         """
-        user_localpart = user_id.localpart
         await self.db_pool.simple_upsert(
             table="profiles",
-            keyvalues={"user_id": user_localpart},
+            keyvalues={"full_user_id": user_id.to_string()},
             values={
                 "displayname": new_displayname,
-                "full_user_id": user_id.to_string(),
             },
             desc="set_profile_displayname",
         )
@@ -213,11 +210,10 @@ class ProfileWorkerStore(SQLBaseStore):
             new_avatar_url: The new avatar URL. If this is None, the user's avatar is
                 removed.
         """
-        user_localpart = user_id.localpart
         await self.db_pool.simple_upsert(
             table="profiles",
-            keyvalues={"user_id": user_localpart},
-            values={"avatar_url": new_avatar_url, "full_user_id": user_id.to_string()},
+            keyvalues={"full_user_id": user_id.to_string()},
+            values={"avatar_url": new_avatar_url},
             desc="set_profile_avatar_url",
         )
 
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index fc190a8b13..6d14963c0a 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 78  # remember to update the list below when updating
+SCHEMA_VERSION = 79  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -106,6 +106,9 @@ Changes in SCHEMA_VERSION = 77
 
 Changes in SCHEMA_VERSION = 78
     - Validate check (full_user_id IS NOT NULL) on tables profiles and user_filters
+
+Changes in SCHEMA_VERSION = 79
+    - We no longer write to column user_id of tables profiles and user_filters
 """
 
 
@@ -118,7 +121,9 @@ SCHEMA_COMPAT_VERSION = (
     #
     # insertions to the column `full_user_id` of tables profiles and user_filters can no
     # longer be null
-    76
+    #
+    # we no longer write to column `full_user_id` of tables profiles and user_filters
+    78
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py b/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py
new file mode 100644
index 0000000000..3541266f7d
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py
@@ -0,0 +1,50 @@
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+
+
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
+    """
+    Update to drop the NOT NULL constraint on column user_id so that we can cease to
+    write to it without inserts to other columns triggering the constraint
+    """
+
+    if isinstance(database_engine, PostgresEngine):
+        drop_sql = """
+        ALTER TABLE profiles ALTER COLUMN user_id DROP NOT NULL
+        """
+        cur.execute(drop_sql)
+    else:
+        # irritatingly in SQLite we need to rewrite the table to drop the constraint.
+        cur.execute("DROP TABLE IF EXISTS temp_profiles")
+
+        create_sql = """
+        CREATE TABLE temp_profiles (
+            full_user_id text NOT NULL,
+            user_id text,
+            displayname text,
+            avatar_url text,
+            UNIQUE (full_user_id),
+            UNIQUE (user_id)
+        )
+        """
+        cur.execute(create_sql)
+
+        copy_sql = """
+        INSERT INTO temp_profiles (
+            user_id,
+            displayname,
+            avatar_url,
+            full_user_id)
+            SELECT user_id, displayname, avatar_url, full_user_id FROM profiles
+        """
+        cur.execute(copy_sql)
+
+        drop_sql = """
+        DROP TABLE profiles
+        """
+        cur.execute(drop_sql)
+
+        rename_sql = """
+        ALTER TABLE temp_profiles RENAME to profiles
+        """
+        cur.execute(rename_sql)
diff --git a/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py b/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py
new file mode 100644
index 0000000000..8e7569c470
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py
@@ -0,0 +1,54 @@
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+
+
+def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
+    """
+    Update to drop the NOT NULL constraint on column user_id so that we can cease to
+    write to it without inserts to other columns triggering the constraint
+    """
+    if isinstance(database_engine, PostgresEngine):
+        drop_sql = """
+        ALTER TABLE user_filters ALTER COLUMN user_id DROP NOT NULL
+        """
+        cur.execute(drop_sql)
+
+    else:
+        # irritatingly in SQLite we need to rewrite the table to drop the constraint.
+        cur.execute("DROP TABLE IF EXISTS temp_user_filters")
+
+        create_sql = """
+        CREATE TABLE temp_user_filters (
+            full_user_id text NOT NULL,
+            user_id text,
+            filter_id bigint NOT NULL,
+            filter_json bytea NOT NULL
+        )
+        """
+        cur.execute(create_sql)
+
+        index_sql = """
+            CREATE UNIQUE INDEX IF NOT EXISTS user_filters_full_user_id_unique ON
+            temp_user_filters (full_user_id, filter_id)
+        """
+        cur.execute(index_sql)
+
+        copy_sql = """
+            INSERT INTO temp_user_filters (
+                user_id,
+                filter_id,
+                filter_json,
+                full_user_id)
+            SELECT user_id, filter_id, filter_json, full_user_id FROM user_filters
+        """
+        cur.execute(copy_sql)
+
+        drop_sql = """
+        DROP TABLE user_filters
+        """
+        cur.execute(drop_sql)
+
+        rename_sql = """
+        ALTER TABLE temp_user_filters RENAME to user_filters
+        """
+        cur.execute(rename_sql)
diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py
index fe5bb77913..bbe8bd88bc 100644
--- a/tests/storage/test_profile.py
+++ b/tests/storage/test_profile.py
@@ -15,8 +15,6 @@
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.server import HomeServer
-from synapse.storage.database import LoggingTransaction
-from synapse.storage.engines import PostgresEngine
 from synapse.types import UserID
 from synapse.util import Clock
 
@@ -64,64 +62,3 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         self.assertIsNone(
             self.get_success(self.store.get_profile_avatar_url(self.u_frank))
         )
-
-    def test_profiles_bg_migration(self) -> None:
-        """
-        Test background job that copies entries from column user_id to full_user_id, adding
-        the hostname in the process.
-        """
-        updater = self.hs.get_datastores().main.db_pool.updates
-
-        # drop the constraint so we can insert nulls in full_user_id to populate the test
-        if isinstance(self.store.database_engine, PostgresEngine):
-
-            def f(txn: LoggingTransaction) -> None:
-                txn.execute(
-                    "ALTER TABLE profiles DROP CONSTRAINT full_user_id_not_null"
-                )
-
-            self.get_success(self.store.db_pool.runInteraction("", f))
-
-        for i in range(0, 70):
-            self.get_success(
-                self.store.db_pool.simple_insert(
-                    "profiles",
-                    {"user_id": f"hello{i:02}"},
-                )
-            )
-
-        # re-add the constraint so that when it's validated it actually exists
-        if isinstance(self.store.database_engine, PostgresEngine):
-
-            def f(txn: LoggingTransaction) -> None:
-                txn.execute(
-                    "ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID"
-                )
-
-            self.get_success(self.store.db_pool.runInteraction("", f))
-
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                values={
-                    "update_name": "populate_full_user_id_profiles",
-                    "progress_json": "{}",
-                },
-            )
-        )
-
-        self.get_success(
-            updater.run_background_updates(False),
-        )
-
-        expected_values = []
-        for i in range(0, 70):
-            expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
-
-        res = self.get_success(
-            self.store.db_pool.execute(
-                "", None, "SELECT full_user_id from profiles ORDER BY full_user_id"
-            )
-        )
-        self.assertEqual(len(res), len(expected_values))
-        self.assertEqual(res, expected_values)
diff --git a/tests/storage/test_user_filters.py b/tests/storage/test_user_filters.py
deleted file mode 100644
index bab802f56e..0000000000
--- a/tests/storage/test_user_filters.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2023 The Matrix.org Foundation C.I.C
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from twisted.test.proto_helpers import MemoryReactor
-
-from synapse.server import HomeServer
-from synapse.storage.database import LoggingTransaction
-from synapse.storage.engines import PostgresEngine
-from synapse.util import Clock
-
-from tests import unittest
-
-
-class UserFiltersStoreTestCase(unittest.HomeserverTestCase):
-    """
-    Test background migration that copies entries from column user_id to full_user_id, adding
-    the hostname in the process.
-    """
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.store = hs.get_datastores().main
-
-    def test_bg_migration(self) -> None:
-        updater = self.hs.get_datastores().main.db_pool.updates
-
-        # drop the constraint so we can insert nulls in full_user_id to populate the test
-        if isinstance(self.store.database_engine, PostgresEngine):
-
-            def f(txn: LoggingTransaction) -> None:
-                txn.execute(
-                    "ALTER TABLE user_filters DROP CONSTRAINT full_user_id_not_null"
-                )
-
-            self.get_success(self.store.db_pool.runInteraction("", f))
-
-        for i in range(0, 70):
-            self.get_success(
-                self.store.db_pool.simple_insert(
-                    "user_filters",
-                    {
-                        "user_id": f"hello{i:02}",
-                        "filter_id": i,
-                        "filter_json": bytearray(i),
-                    },
-                )
-            )
-
-        # re-add the constraint so that when it's validated it actually exists
-        if isinstance(self.store.database_engine, PostgresEngine):
-
-            def f(txn: LoggingTransaction) -> None:
-                txn.execute(
-                    "ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID"
-                )
-
-            self.get_success(self.store.db_pool.runInteraction("", f))
-
-        self.get_success(
-            self.store.db_pool.simple_insert(
-                "background_updates",
-                values={
-                    "update_name": "populate_full_user_id_user_filters",
-                    "progress_json": "{}",
-                },
-            )
-        )
-
-        self.get_success(
-            updater.run_background_updates(False),
-        )
-
-        expected_values = []
-        for i in range(0, 70):
-            expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
-
-        res = self.get_success(
-            self.store.db_pool.execute(
-                "", None, "SELECT full_user_id from user_filters ORDER BY full_user_id"
-            )
-        )
-        self.assertEqual(len(res), len(expected_values))
-        self.assertEqual(res, expected_values)
-- 
cgit 1.5.1


From e55a9b3e41e73f34fda781b9374935c4623f7ea9 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 10 Jul 2023 16:24:42 +0100
Subject: Fix downgrading to previous version of Synapse (#15907)

We do this by marking the constraint as deferrable.
---
 changelog.d/15907.misc                                         |  1 +
 synapse/storage/background_updates.py                          |  7 ++++++-
 synapse/storage/databases/main/event_federation.py             |  4 +++-
 .../schema/main/delta/78/03event_extremities_constraints.py    | 10 ++++++++--
 tests/storage/test_background_update.py                        |  8 ++++++--
 5 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15907.misc

(limited to 'synapse')

diff --git a/changelog.d/15907.misc b/changelog.d/15907.misc
new file mode 100644
index 0000000000..e0ecea6c2f
--- /dev/null
+++ b/changelog.d/15907.misc
@@ -0,0 +1 @@
+Add foreign key constraint to `event_forward_extremities`.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 5dce0a0159..2d5ddc3e7b 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -80,10 +80,14 @@ class ForeignKeyConstraint(Constraint):
     Attributes:
         referenced_table: The "parent" table name.
         columns: The list of mappings of columns from table to referenced table
+        deferred: Whether to defer checking of the constraint to the end of the
+            transaction. This is useful for e.g. backwards compatibility where
+            an older version inserted data in the wrong order.
     """
 
     referenced_table: str
     columns: Sequence[Tuple[str, str]]
+    deferred: bool
 
     def make_check_clause(self, table: str) -> str:
         join_clause = " AND ".join(
@@ -94,7 +98,8 @@ class ForeignKeyConstraint(Constraint):
     def make_constraint_clause_postgres(self) -> str:
         column1_list = ", ".join(col1 for col1, col2 in self.columns)
         column2_list = ", ".join(col2 for col1, col2 in self.columns)
-        return f"FOREIGN KEY ({column1_list}) REFERENCES {self.referenced_table} ({column2_list})"
+        defer_clause = " DEFERRABLE INITIALLY DEFERRED" if self.deferred else ""
+        return f"FOREIGN KEY ({column1_list}) REFERENCES {self.referenced_table} ({column2_list}) {defer_clause}"
 
 
 @attr.s(auto_attribs=True)
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index dabe603c8c..b2cda52ce5 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -146,7 +146,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 update_name="event_forward_extremities_event_id_foreign_key_constraint_update",
                 table="event_forward_extremities",
                 constraint_name="event_forward_extremities_event_id",
-                constraint=ForeignKeyConstraint("events", [("event_id", "event_id")]),
+                constraint=ForeignKeyConstraint(
+                    "events", [("event_id", "event_id")], deferred=True
+                ),
                 unique_columns=("event_id", "room_id"),
             )
 
diff --git a/synapse/storage/schema/main/delta/78/03event_extremities_constraints.py b/synapse/storage/schema/main/delta/78/03event_extremities_constraints.py
index f12e2a8f3e..bf8c57dbe8 100644
--- a/synapse/storage/schema/main/delta/78/03event_extremities_constraints.py
+++ b/synapse/storage/schema/main/delta/78/03event_extremities_constraints.py
@@ -28,19 +28,25 @@ FORWARD_EXTREMITIES_TABLE_SCHEMA = """
         event_id TEXT NOT NULL,
         room_id TEXT NOT NULL,
         UNIQUE (event_id, room_id),
-        CONSTRAINT event_forward_extremities_event_id FOREIGN KEY (event_id) REFERENCES events (event_id)
+        CONSTRAINT event_forward_extremities_event_id FOREIGN KEY (event_id) REFERENCES events (event_id) DEFERRABLE INITIALLY DEFERRED
     )
 """
 
 
 def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
+    # We mark this as a deferred constraint, as the previous version of Synapse
+    # inserted the event into the forward extremities *before* the events table.
+    # By marking as deferred we ensure that downgrading to the previous version
+    # will continue to work.
     run_validate_constraint_and_delete_rows_schema_delta(
         cur,
         ordering=7803,
         update_name="event_forward_extremities_event_id_foreign_key_constraint_update",
         table="event_forward_extremities",
         constraint_name="event_forward_extremities_event_id",
-        constraint=ForeignKeyConstraint("events", [("event_id", "event_id")]),
+        constraint=ForeignKeyConstraint(
+            "events", [("event_id", "event_id")], deferred=True
+        ),
         sqlite_table_name="event_forward_extremities2",
         sqlite_table_schema=FORWARD_EXTREMITIES_TABLE_SCHEMA,
     )
diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py
index 6ca546f3f7..a4a823a252 100644
--- a/tests/storage/test_background_update.py
+++ b/tests/storage/test_background_update.py
@@ -586,7 +586,9 @@ class BackgroundUpdateValidateConstraintTestCase(unittest.HomeserverTestCase):
                 update_name="test_bg_update",
                 table="test_constraint",
                 constraint_name="test_constraint_name",
-                constraint=ForeignKeyConstraint("base_table", [("b", "b")]),
+                constraint=ForeignKeyConstraint(
+                    "base_table", [("b", "b")], deferred=False
+                ),
                 sqlite_table_name="test_constraint2",
                 sqlite_table_schema=table2_sqlite,
             )
@@ -604,7 +606,9 @@ class BackgroundUpdateValidateConstraintTestCase(unittest.HomeserverTestCase):
                 "test_bg_update",
                 table="test_constraint",
                 constraint_name="test_constraint_name",
-                constraint=ForeignKeyConstraint("base_table", [("b", "b")]),
+                constraint=ForeignKeyConstraint(
+                    "base_table", [("b", "b")], deferred=False
+                ),
                 unique_columns=["a"],
             )
 
-- 
cgit 1.5.1


From c9bf644fa0c2c06f8143b14ccdb655feebed97df Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 10 Jul 2023 11:10:20 -0500
Subject: Revert "Federation outbound proxy" (#15910)

Revert "Federation outbound proxy (#15773)"

This reverts commit b07b14b494ae1dd564b4c44f844c9a9545b3d08a.
---
 changelog.d/15773.feature                         |   1 -
 docs/usage/configuration/config_documentation.md  |  31 +--
 docs/workers.md                                   |  20 --
 synapse/app/_base.py                              |   2 -
 synapse/app/generic_worker.py                     |   1 -
 synapse/app/homeserver.py                         |   1 -
 synapse/config/workers.py                         |  40 +---
 synapse/http/client.py                            |   7 +-
 synapse/http/matrixfederationclient.py            | 132 +-----------
 synapse/http/proxy.py                             | 249 ----------------------
 synapse/http/proxyagent.py                        |  79 +------
 synapse/http/server.py                            |  55 +++--
 synapse/http/site.py                              |  26 +--
 tests/app/test_openid_listener.py                 |   8 +-
 tests/handlers/test_device.py                     |   3 +-
 tests/handlers/test_federation.py                 |   2 +-
 tests/handlers/test_presence.py                   |   1 +
 tests/handlers/test_typing.py                     |  10 -
 tests/http/test_matrixfederationclient.py         | 189 +---------------
 tests/http/test_proxy.py                          |  53 -----
 tests/replication/_base.py                        |   3 +-
 tests/replication/test_federation_sender_shard.py |  22 +-
 tests/rest/client/test_presence.py                |   1 +
 tests/rest/client/test_rooms.py                   |   2 +
 tests/storage/test_e2e_room_keys.py               |   2 +-
 tests/storage/test_purge.py                       |   2 +-
 tests/storage/test_rollback_worker.py             |   4 +-
 tests/test_server.py                              |  33 ++-
 tests/unittest.py                                 |   1 -
 29 files changed, 90 insertions(+), 890 deletions(-)
 delete mode 100644 changelog.d/15773.feature
 delete mode 100644 synapse/http/proxy.py
 delete mode 100644 tests/http/test_proxy.py

(limited to 'synapse')

diff --git a/changelog.d/15773.feature b/changelog.d/15773.feature
deleted file mode 100644
index 0d77fae2dc..0000000000
--- a/changelog.d/15773.feature
+++ /dev/null
@@ -1 +0,0 @@
-Allow configuring the set of workers to proxy outbound federation traffic through via `outbound_federation_restricted_to`.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 04e8390ffe..ff59cbccc1 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3930,14 +3930,13 @@ federation_sender_instances:
 ---
 ### `instance_map`
 
-When using workers this should be a map from [`worker_name`](#worker_name) to the HTTP
-replication listener of the worker, if configured, and to the main process. Each worker
-declared under [`stream_writers`](../../workers.md#stream-writers) and
-[`outbound_federation_restricted_to`](#outbound_federation_restricted_to) needs a HTTP replication listener, and that
-listener should be included in the `instance_map`. The main process also needs an entry
-on the `instance_map`, and it should be listed under `main` **if even one other worker
-exists**. Ensure the port matches with what is declared inside the `listener` block for
-a `replication` listener.
+When using workers this should be a map from [`worker_name`](#worker_name) to the
+HTTP replication listener of the worker, if configured, and to the main process.
+Each worker declared under [`stream_writers`](../../workers.md#stream-writers) needs
+a HTTP replication listener, and that listener should be included in the `instance_map`.
+The main process also needs an entry on the `instance_map`, and it should be listed under
+`main` **if even one other worker exists**. Ensure the port matches with what is declared 
+inside the `listener` block for a `replication` listener.
 
 
 Example configuration:
@@ -3967,22 +3966,6 @@ stream_writers:
   typing: worker1
 ```
 ---
-### `outbound_federation_restricted_to`
-
-When using workers, you can restrict outbound federation traffic to only go through a
-specific subset of workers. Any worker specified here must also be in the
-[`instance_map`](#instance_map).
-
-```yaml
-outbound_federation_restricted_to:
-  - federation_sender1
-  - federation_sender2
-```
-
-Also see the [worker
-documentation](../../workers.md#restrict-outbound-federation-traffic-to-a-specific-set-of-workers)
-for more info.
----
 ### `run_background_tasks_on`
 
 The [worker](../../workers.md#background-tasks) that is used to run
diff --git a/docs/workers.md b/docs/workers.md
index 03415c6eb3..828f082e75 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -528,26 +528,6 @@ the stream writer for the `presence` stream:
 
     ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/
 
-#### Restrict outbound federation traffic to a specific set of workers
-
-The `outbound_federation_restricted_to` configuration is useful to make sure outbound
-federation traffic only goes through a specified subset of workers. This allows you to
-set more strict access controls (like a firewall) for all workers and only allow the
-`federation_sender`'s to contact the outside world.
-
-```yaml
-instance_map:
-    main:
-        host: localhost
-        port: 8030
-    federation_sender1:
-        host: localhost
-        port: 8034
-
-outbound_federation_restricted_to:
-  - federation_sender1
-```
-
 #### Background tasks
 
 There is also support for moving background tasks to a separate
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 938ab40f27..936b1b0430 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -386,7 +386,6 @@ def listen_unix(
 
 
 def listen_http(
-    hs: "HomeServer",
     listener_config: ListenerConfig,
     root_resource: Resource,
     version_string: str,
@@ -407,7 +406,6 @@ def listen_http(
         version_string,
         max_request_body_size=max_request_body_size,
         reactor=reactor,
-        federation_agent=hs.get_federation_http_client().agent,
     )
 
     if isinstance(listener_config, TCPListenerConfig):
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index dc79efcc14..7406c3948c 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -221,7 +221,6 @@ class GenericWorkerServer(HomeServer):
         root_resource = create_resource_tree(resources, OptionsResource())
 
         _base.listen_http(
-            self,
             listener_config,
             root_resource,
             self.version_string,
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index f188c7265a..84236ac299 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -139,7 +139,6 @@ class SynapseHomeServer(HomeServer):
             root_resource = OptionsResource()
 
         ports = listen_http(
-            self,
             listener_config,
             create_resource_tree(resources, root_resource),
             self.version_string,
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 5c81eb5c67..ccfe75eaf3 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -15,7 +15,7 @@
 
 import argparse
 import logging
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Union
 
 import attr
 from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr
@@ -154,27 +154,6 @@ class WriterLocations:
     )
 
 
-@attr.s(auto_attribs=True)
-class OutboundFederationRestrictedTo:
-    """Whether we limit outbound federation to a certain set of instances.
-
-    Attributes:
-        instances: optional list of instances that can make outbound federation
-            requests. If None then all instances can make federation requests.
-        locations: list of instance locations to connect to proxy via.
-    """
-
-    instances: Optional[List[str]]
-    locations: List[InstanceLocationConfig] = attr.Factory(list)
-
-    def __contains__(self, instance: str) -> bool:
-        # It feels a bit dirty to return `True` if `instances` is `None`, but it makes
-        # sense in downstream usage in the sense that if
-        # `outbound_federation_restricted_to` is not configured, then any instance can
-        # talk to federation (no restrictions so always return `True`).
-        return self.instances is None or instance in self.instances
-
-
 class WorkerConfig(Config):
     """The workers are processes run separately to the main synapse process.
     They have their own pid_file and listener configuration. They use the
@@ -386,23 +365,6 @@ class WorkerConfig(Config):
             new_option_name="update_user_directory_from_worker",
         )
 
-        outbound_federation_restricted_to = config.get(
-            "outbound_federation_restricted_to", None
-        )
-        self.outbound_federation_restricted_to = OutboundFederationRestrictedTo(
-            outbound_federation_restricted_to
-        )
-        if outbound_federation_restricted_to:
-            for instance in outbound_federation_restricted_to:
-                if instance not in self.instance_map:
-                    raise ConfigError(
-                        "Instance %r is configured in 'outbound_federation_restricted_to' but does not appear in `instance_map` config."
-                        % (instance,)
-                    )
-                self.outbound_federation_restricted_to.locations.append(
-                    self.instance_map[instance]
-                )
-
     def _should_this_worker_perform_duty(
         self,
         config: Dict[str, Any],
diff --git a/synapse/http/client.py b/synapse/http/client.py
index ca2cdbc6e2..09ea93e10d 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -1037,12 +1037,7 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
         if reason.check(ResponseDone):
             self.deferred.callback(self.length)
         elif reason.check(PotentialDataLoss):
-            # This applies to requests which don't set `Content-Length` or a
-            # `Transfer-Encoding` in the response because in this case the end of the
-            # response is indicated by the connection being closed, an event which may
-            # also be due to a transient network problem or other error. But since this
-            # behavior is expected of some servers (like YouTube), let's ignore it.
-            # Stolen from https://github.com/twisted/treq/pull/49/files
+            # stolen from https://github.com/twisted/treq/pull/49/files
             # http://twistedmatrix.com/trac/ticket/4840
             self.deferred.callback(self.length)
         else:
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index b00396fdc7..cc4e258b0f 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -50,7 +50,7 @@ from twisted.internet.interfaces import IReactorTime
 from twisted.internet.task import Cooperator
 from twisted.web.client import ResponseFailed
 from twisted.web.http_headers import Headers
-from twisted.web.iweb import IAgent, IBodyProducer, IResponse
+from twisted.web.iweb import IBodyProducer, IResponse
 
 import synapse.metrics
 import synapse.util.retryutils
@@ -72,7 +72,6 @@ from synapse.http.client import (
     read_body_with_max_size,
 )
 from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
-from synapse.http.proxyagent import ProxyAgent
 from synapse.http.types import QueryParams
 from synapse.logging import opentracing
 from synapse.logging.context import make_deferred_yieldable, run_in_background
@@ -394,32 +393,17 @@ class MatrixFederationHttpClient:
         if hs.config.server.user_agent_suffix:
             user_agent = "%s %s" % (user_agent, hs.config.server.user_agent_suffix)
 
-        outbound_federation_restricted_to = (
-            hs.config.worker.outbound_federation_restricted_to
+        federation_agent = MatrixFederationAgent(
+            self.reactor,
+            tls_client_options_factory,
+            user_agent.encode("ascii"),
+            hs.config.server.federation_ip_range_allowlist,
+            hs.config.server.federation_ip_range_blocklist,
         )
-        if hs.get_instance_name() in outbound_federation_restricted_to:
-            # Talk to federation directly
-            federation_agent: IAgent = MatrixFederationAgent(
-                self.reactor,
-                tls_client_options_factory,
-                user_agent.encode("ascii"),
-                hs.config.server.federation_ip_range_allowlist,
-                hs.config.server.federation_ip_range_blocklist,
-            )
-        else:
-            # We need to talk to federation via the proxy via one of the configured
-            # locations
-            federation_proxies = outbound_federation_restricted_to.locations
-            federation_agent = ProxyAgent(
-                self.reactor,
-                self.reactor,
-                tls_client_options_factory,
-                federation_proxies=federation_proxies,
-            )
 
         # Use a BlocklistingAgentWrapper to prevent circumventing the IP
         # blocking via IP literals in server names
-        self.agent: IAgent = BlocklistingAgentWrapper(
+        self.agent = BlocklistingAgentWrapper(
             federation_agent,
             ip_blocklist=hs.config.server.federation_ip_range_blocklist,
         )
@@ -428,6 +412,7 @@ class MatrixFederationHttpClient:
         self._store = hs.get_datastores().main
         self.version_string_bytes = hs.version_string.encode("ascii")
         self.default_timeout_seconds = hs.config.federation.client_timeout_ms / 1000
+
         self.max_long_retry_delay_seconds = (
             hs.config.federation.max_long_retry_delay_ms / 1000
         )
@@ -1146,101 +1131,6 @@ class MatrixFederationHttpClient:
             Succeeds when we get a 2xx HTTP response. The
             result will be the decoded JSON body.
 
-        Raises:
-            HttpResponseException: If we get an HTTP response code >= 300
-                (except 429).
-            NotRetryingDestination: If we are not yet ready to retry this
-                server.
-            FederationDeniedError: If this destination is not on our
-                federation whitelist
-            RequestSendFailed: If there were problems connecting to the
-                remote, due to e.g. DNS failures, connection timeouts etc.
-        """
-        json_dict, _ = await self.get_json_with_headers(
-            destination=destination,
-            path=path,
-            args=args,
-            retry_on_dns_fail=retry_on_dns_fail,
-            timeout=timeout,
-            ignore_backoff=ignore_backoff,
-            try_trailing_slash_on_400=try_trailing_slash_on_400,
-            parser=parser,
-        )
-        return json_dict
-
-    @overload
-    async def get_json_with_headers(
-        self,
-        destination: str,
-        path: str,
-        args: Optional[QueryParams] = None,
-        retry_on_dns_fail: bool = True,
-        timeout: Optional[int] = None,
-        ignore_backoff: bool = False,
-        try_trailing_slash_on_400: bool = False,
-        parser: Literal[None] = None,
-    ) -> Tuple[JsonDict, Dict[bytes, List[bytes]]]:
-        ...
-
-    @overload
-    async def get_json_with_headers(
-        self,
-        destination: str,
-        path: str,
-        args: Optional[QueryParams] = ...,
-        retry_on_dns_fail: bool = ...,
-        timeout: Optional[int] = ...,
-        ignore_backoff: bool = ...,
-        try_trailing_slash_on_400: bool = ...,
-        parser: ByteParser[T] = ...,
-    ) -> Tuple[T, Dict[bytes, List[bytes]]]:
-        ...
-
-    async def get_json_with_headers(
-        self,
-        destination: str,
-        path: str,
-        args: Optional[QueryParams] = None,
-        retry_on_dns_fail: bool = True,
-        timeout: Optional[int] = None,
-        ignore_backoff: bool = False,
-        try_trailing_slash_on_400: bool = False,
-        parser: Optional[ByteParser[T]] = None,
-    ) -> Tuple[Union[JsonDict, T], Dict[bytes, List[bytes]]]:
-        """GETs some json from the given host homeserver and path
-
-        Args:
-            destination: The remote server to send the HTTP request to.
-
-            path: The HTTP path.
-
-            args: A dictionary used to create query strings, defaults to
-                None.
-
-            retry_on_dns_fail: true if the request should be retried on DNS failures
-
-            timeout: number of milliseconds to wait for the response.
-                self._default_timeout (60s) by default.
-
-                Note that we may make several attempts to send the request; this
-                timeout applies to the time spent waiting for response headers for
-                *each* attempt (including connection time) as well as the time spent
-                reading the response body after a 200 response.
-
-            ignore_backoff: true to ignore the historical backoff data
-                and try the request anyway.
-
-            try_trailing_slash_on_400: True if on a 400 M_UNRECOGNIZED
-                response we should try appending a trailing slash to the end of
-                the request. Workaround for #3622 in Synapse <= v0.99.3.
-
-            parser: The parser to use to decode the response. Defaults to
-                parsing as JSON.
-
-        Returns:
-            Succeeds when we get a 2xx HTTP response. The result will be a tuple of the
-            decoded JSON body and a dict of the response headers.
-
         Raises:
             HttpResponseException: If we get an HTTP response code >= 300
                 (except 429).
@@ -1266,8 +1156,6 @@ class MatrixFederationHttpClient:
             timeout=timeout,
         )
 
-        headers = dict(response.headers.getAllRawHeaders())
-
         if timeout is not None:
             _sec_timeout = timeout / 1000
         else:
@@ -1285,7 +1173,7 @@ class MatrixFederationHttpClient:
             parser=parser,
         )
 
-        return body, headers
+        return body
 
     async def delete_json(
         self,
diff --git a/synapse/http/proxy.py b/synapse/http/proxy.py
deleted file mode 100644
index 0874d67760..0000000000
--- a/synapse/http/proxy.py
+++ /dev/null
@@ -1,249 +0,0 @@
-#  Copyright 2023 The Matrix.org Foundation C.I.C.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-import json
-import logging
-import urllib.parse
-from typing import TYPE_CHECKING, Any, Optional, Set, Tuple, cast
-
-from twisted.internet import protocol
-from twisted.internet.interfaces import ITCPTransport
-from twisted.internet.protocol import connectionDone
-from twisted.python import failure
-from twisted.python.failure import Failure
-from twisted.web.client import ResponseDone
-from twisted.web.http_headers import Headers
-from twisted.web.iweb import IAgent, IResponse
-from twisted.web.resource import IResource
-from twisted.web.server import Site
-
-from synapse.api.errors import Codes
-from synapse.http import QuieterFileBodyProducer
-from synapse.http.server import _AsyncResource
-from synapse.logging.context import make_deferred_yieldable, run_in_background
-from synapse.types import ISynapseReactor
-from synapse.util.async_helpers import timeout_deferred
-
-if TYPE_CHECKING:
-    from synapse.http.site import SynapseRequest
-
-logger = logging.getLogger(__name__)
-
-# "Hop-by-hop" headers (as opposed to "end-to-end" headers) as defined by RFC2616
-# section 13.5.1 and referenced in RFC9110 section 7.6.1. These are meant to only be
-# consumed by the immediate recipient and not be forwarded on.
-HOP_BY_HOP_HEADERS = {
-    "Connection",
-    "Keep-Alive",
-    "Proxy-Authenticate",
-    "Proxy-Authorization",
-    "TE",
-    "Trailers",
-    "Transfer-Encoding",
-    "Upgrade",
-}
-
-
-def parse_connection_header_value(
-    connection_header_value: Optional[bytes],
-) -> Set[str]:
-    """
-    Parse the `Connection` header to determine which headers we should not be copied
-    over from the remote response.
-
-    As defined by RFC2616 section 14.10 and RFC9110 section 7.6.1
-
-    Example: `Connection: close, X-Foo, X-Bar` will return `{"Close", "X-Foo", "X-Bar"}`
-
-    Even though "close" is a special directive, let's just treat it as just another
-    header for simplicity. If people want to check for this directive, they can simply
-    check for `"Close" in headers`.
-
-    Args:
-        connection_header_value: The value of the `Connection` header.
-
-    Returns:
-        The set of header names that should not be copied over from the remote response.
-        The keys are capitalized in canonical capitalization.
-    """
-    headers = Headers()
-    extra_headers_to_remove: Set[str] = set()
-    if connection_header_value:
-        extra_headers_to_remove = {
-            headers._canonicalNameCaps(connection_option.strip()).decode("ascii")
-            for connection_option in connection_header_value.split(b",")
-        }
-
-    return extra_headers_to_remove
-
-
-class ProxyResource(_AsyncResource):
-    """
-    A stub resource that proxies any requests with a `matrix-federation://` scheme
-    through the given `federation_agent` to the remote homeserver and ferries back the
-    info.
-    """
-
-    isLeaf = True
-
-    def __init__(self, reactor: ISynapseReactor, federation_agent: IAgent):
-        super().__init__(True)
-
-        self.reactor = reactor
-        self.agent = federation_agent
-
-    async def _async_render(self, request: "SynapseRequest") -> Tuple[int, Any]:
-        uri = urllib.parse.urlparse(request.uri)
-        assert uri.scheme == b"matrix-federation"
-
-        headers = Headers()
-        for header_name in (b"User-Agent", b"Authorization", b"Content-Type"):
-            header_value = request.getHeader(header_name)
-            if header_value:
-                headers.addRawHeader(header_name, header_value)
-
-        request_deferred = run_in_background(
-            self.agent.request,
-            request.method,
-            request.uri,
-            headers=headers,
-            bodyProducer=QuieterFileBodyProducer(request.content),
-        )
-        request_deferred = timeout_deferred(
-            request_deferred,
-            # This should be set longer than the timeout in `MatrixFederationHttpClient`
-            # so that it has enough time to complete and pass us the data before we give
-            # up.
-            timeout=90,
-            reactor=self.reactor,
-        )
-
-        response = await make_deferred_yieldable(request_deferred)
-
-        return response.code, response
-
-    def _send_response(
-        self,
-        request: "SynapseRequest",
-        code: int,
-        response_object: Any,
-    ) -> None:
-        response = cast(IResponse, response_object)
-        response_headers = cast(Headers, response.headers)
-
-        request.setResponseCode(code)
-
-        # The `Connection` header also defines which headers should not be copied over.
-        connection_header = response_headers.getRawHeaders(b"connection")
-        extra_headers_to_remove = parse_connection_header_value(
-            connection_header[0] if connection_header else None
-        )
-
-        # Copy headers.
-        for k, v in response_headers.getAllRawHeaders():
-            # Do not copy over any hop-by-hop headers. These are meant to only be
-            # consumed by the immediate recipient and not be forwarded on.
-            header_key = k.decode("ascii")
-            if (
-                header_key in HOP_BY_HOP_HEADERS
-                or header_key in extra_headers_to_remove
-            ):
-                continue
-
-            request.responseHeaders.setRawHeaders(k, v)
-
-        response.deliverBody(_ProxyResponseBody(request))
-
-    def _send_error_response(
-        self,
-        f: failure.Failure,
-        request: "SynapseRequest",
-    ) -> None:
-        request.setResponseCode(502)
-        request.setHeader(b"Content-Type", b"application/json")
-        request.write(
-            (
-                json.dumps(
-                    {
-                        "errcode": Codes.UNKNOWN,
-                        "err": "ProxyResource: Error when proxying request: %s %s -> %s"
-                        % (
-                            request.method.decode("ascii"),
-                            request.uri.decode("ascii"),
-                            f,
-                        ),
-                    }
-                )
-            ).encode()
-        )
-        request.finish()
-
-
-class _ProxyResponseBody(protocol.Protocol):
-    """
-    A protocol that proxies the given remote response data back out to the given local
-    request.
-    """
-
-    transport: Optional[ITCPTransport] = None
-
-    def __init__(self, request: "SynapseRequest") -> None:
-        self._request = request
-
-    def dataReceived(self, data: bytes) -> None:
-        # Avoid sending response data to the local request that already disconnected
-        if self._request._disconnected and self.transport is not None:
-            # Close the connection (forcefully) since all the data will get
-            # discarded anyway.
-            self.transport.abortConnection()
-            return
-
-        self._request.write(data)
-
-    def connectionLost(self, reason: Failure = connectionDone) -> None:
-        # If the local request is already finished (successfully or failed), don't
-        # worry about sending anything back.
-        if self._request.finished:
-            return
-
-        if reason.check(ResponseDone):
-            self._request.finish()
-        else:
-            # Abort the underlying request since our remote request also failed.
-            self._request.transport.abortConnection()
-
-
-class ProxySite(Site):
-    """
-    Proxies any requests with a `matrix-federation://` scheme through the given
-    `federation_agent`. Otherwise, behaves like a normal `Site`.
-    """
-
-    def __init__(
-        self,
-        resource: IResource,
-        reactor: ISynapseReactor,
-        federation_agent: IAgent,
-    ):
-        super().__init__(resource, reactor=reactor)
-
-        self._proxy_resource = ProxyResource(reactor, federation_agent)
-
-    def getResourceFor(self, request: "SynapseRequest") -> IResource:
-        uri = urllib.parse.urlparse(request.uri)
-        if uri.scheme == b"matrix-federation":
-            return self._proxy_resource
-
-        return super().getResourceFor(request)
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index 1fa3adbef2..7bdc4acae7 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import random
 import re
-from typing import Any, Collection, Dict, List, Optional, Sequence, Tuple
+from typing import Any, Dict, Optional, Tuple
 from urllib.parse import urlparse
 from urllib.request import (  # type: ignore[attr-defined]
     getproxies_environment,
@@ -25,12 +24,7 @@ from zope.interface import implementer
 
 from twisted.internet import defer
 from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
-from twisted.internet.interfaces import (
-    IProtocol,
-    IProtocolFactory,
-    IReactorCore,
-    IStreamClientEndpoint,
-)
+from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint
 from twisted.python.failure import Failure
 from twisted.web.client import (
     URI,
@@ -42,10 +36,8 @@ from twisted.web.error import SchemeNotSupported
 from twisted.web.http_headers import Headers
 from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS, IResponse
 
-from synapse.config.workers import InstanceLocationConfig
 from synapse.http import redact_uri
 from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials
-from synapse.logging.context import run_in_background
 
 logger = logging.getLogger(__name__)
 
@@ -82,10 +74,6 @@ class ProxyAgent(_AgentBase):
         use_proxy: Whether proxy settings should be discovered and used
             from conventional environment variables.
 
-        federation_proxies: An optional list of locations to proxy outbound federation
-            traffic through (only requests that use the `matrix-federation://` scheme
-            will be proxied).
-
     Raises:
         ValueError if use_proxy is set and the environment variables
             contain an invalid proxy specification.
@@ -101,7 +89,6 @@ class ProxyAgent(_AgentBase):
         bindAddress: Optional[bytes] = None,
         pool: Optional[HTTPConnectionPool] = None,
         use_proxy: bool = False,
-        federation_proxies: Collection[InstanceLocationConfig] = (),
     ):
         contextFactory = contextFactory or BrowserLikePolicyForHTTPS()
 
@@ -140,27 +127,6 @@ class ProxyAgent(_AgentBase):
         self._policy_for_https = contextFactory
         self._reactor = reactor
 
-        self._federation_proxy_endpoint: Optional[IStreamClientEndpoint] = None
-        if federation_proxies:
-            endpoints = []
-            for federation_proxy in federation_proxies:
-                endpoint = HostnameEndpoint(
-                    self.proxy_reactor,
-                    federation_proxy.host,
-                    federation_proxy.port,
-                )
-
-                if federation_proxy.tls:
-                    tls_connection_creator = self._policy_for_https.creatorForNetloc(
-                        federation_proxy.host,
-                        federation_proxy.port,
-                    )
-                    endpoint = wrapClientTLS(tls_connection_creator, endpoint)
-
-                endpoints.append(endpoint)
-
-            self._federation_proxy_endpoint = _ProxyEndpoints(endpoints)
-
     def request(
         self,
         method: bytes,
@@ -248,14 +214,6 @@ class ProxyAgent(_AgentBase):
                 parsed_uri.port,
                 self.https_proxy_creds,
             )
-        elif (
-            parsed_uri.scheme == b"matrix-federation"
-            and self._federation_proxy_endpoint
-        ):
-            # Cache *all* connections under the same key, since we are only
-            # connecting to a single destination, the proxy:
-            endpoint = self._federation_proxy_endpoint
-            request_path = uri
         else:
             # not using a proxy
             endpoint = HostnameEndpoint(
@@ -275,11 +233,6 @@ class ProxyAgent(_AgentBase):
             endpoint = wrapClientTLS(tls_connection_creator, endpoint)
         elif parsed_uri.scheme == b"http":
             pass
-        elif (
-            parsed_uri.scheme == b"matrix-federation"
-            and self._federation_proxy_endpoint
-        ):
-            pass
         else:
             return defer.fail(
                 Failure(
@@ -384,31 +337,3 @@ def parse_proxy(
         credentials = ProxyCredentials(b"".join([url.username, b":", url.password]))
 
     return url.scheme, url.hostname, url.port or default_port, credentials
-
-
-@implementer(IStreamClientEndpoint)
-class _ProxyEndpoints:
-    """An endpoint that randomly iterates through a given list of endpoints at
-    each connection attempt.
-    """
-
-    def __init__(self, endpoints: Sequence[IStreamClientEndpoint]) -> None:
-        assert endpoints
-        self._endpoints = endpoints
-
-    def connect(
-        self, protocol_factory: IProtocolFactory
-    ) -> "defer.Deferred[IProtocol]":
-        """Implements IStreamClientEndpoint interface"""
-
-        return run_in_background(self._do_connect, protocol_factory)
-
-    async def _do_connect(self, protocol_factory: IProtocolFactory) -> IProtocol:
-        failures: List[Failure] = []
-        for endpoint in random.sample(self._endpoints, k=len(self._endpoints)):
-            try:
-                return await endpoint.connect(protocol_factory)
-            except Exception:
-                failures.append(Failure())
-
-        failures.pop().raiseException()
diff --git a/synapse/http/server.py b/synapse/http/server.py
index ff3153a9d9..933172c873 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -18,7 +18,6 @@ import html
 import logging
 import types
 import urllib
-import urllib.parse
 from http import HTTPStatus
 from http.client import FOUND
 from inspect import isawaitable
@@ -66,6 +65,7 @@ from synapse.api.errors import (
     UnrecognizedRequestError,
 )
 from synapse.config.homeserver import HomeServerConfig
+from synapse.http.site import SynapseRequest
 from synapse.logging.context import defer_to_thread, preserve_fn, run_in_background
 from synapse.logging.opentracing import active_span, start_active_span, trace_servlet
 from synapse.util import json_encoder
@@ -76,7 +76,6 @@ from synapse.util.iterutils import chunk_seq
 if TYPE_CHECKING:
     import opentracing
 
-    from synapse.http.site import SynapseRequest
     from synapse.server import HomeServer
 
 logger = logging.getLogger(__name__)
@@ -103,7 +102,7 @@ HTTP_STATUS_REQUEST_CANCELLED = 499
 
 
 def return_json_error(
-    f: failure.Failure, request: "SynapseRequest", config: Optional[HomeServerConfig]
+    f: failure.Failure, request: SynapseRequest, config: Optional[HomeServerConfig]
 ) -> None:
     """Sends a JSON error response to clients."""
 
@@ -221,8 +220,8 @@ def return_html_error(
 
 
 def wrap_async_request_handler(
-    h: Callable[["_AsyncResource", "SynapseRequest"], Awaitable[None]]
-) -> Callable[["_AsyncResource", "SynapseRequest"], "defer.Deferred[None]"]:
+    h: Callable[["_AsyncResource", SynapseRequest], Awaitable[None]]
+) -> Callable[["_AsyncResource", SynapseRequest], "defer.Deferred[None]"]:
     """Wraps an async request handler so that it calls request.processing.
 
     This helps ensure that work done by the request handler after the request is completed
@@ -236,7 +235,7 @@ def wrap_async_request_handler(
     """
 
     async def wrapped_async_request_handler(
-        self: "_AsyncResource", request: "SynapseRequest"
+        self: "_AsyncResource", request: SynapseRequest
     ) -> None:
         with request.processing():
             await h(self, request)
@@ -301,7 +300,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
 
         self._extract_context = extract_context
 
-    def render(self, request: "SynapseRequest") -> int:
+    def render(self, request: SynapseRequest) -> int:
         """This gets called by twisted every time someone sends us a request."""
         request.render_deferred = defer.ensureDeferred(
             self._async_render_wrapper(request)
@@ -309,7 +308,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
         return NOT_DONE_YET
 
     @wrap_async_request_handler
-    async def _async_render_wrapper(self, request: "SynapseRequest") -> None:
+    async def _async_render_wrapper(self, request: SynapseRequest) -> None:
         """This is a wrapper that delegates to `_async_render` and handles
         exceptions, return values, metrics, etc.
         """
@@ -327,15 +326,9 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
             # of our stack, and thus gives us a sensible stack
             # trace.
             f = failure.Failure()
-            logger.exception(
-                "Error handling request",
-                exc_info=(f.type, f.value, f.getTracebackObject()),
-            )
             self._send_error_response(f, request)
 
-    async def _async_render(
-        self, request: "SynapseRequest"
-    ) -> Optional[Tuple[int, Any]]:
+    async def _async_render(self, request: SynapseRequest) -> Optional[Tuple[int, Any]]:
         """Delegates to `_async_render_<METHOD>` methods, or returns a 400 if
         no appropriate method exists. Can be overridden in sub classes for
         different routing.
@@ -365,7 +358,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     def _send_response(
         self,
-        request: "SynapseRequest",
+        request: SynapseRequest,
         code: int,
         response_object: Any,
     ) -> None:
@@ -375,7 +368,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: "SynapseRequest",
+        request: SynapseRequest,
     ) -> None:
         raise NotImplementedError()
 
@@ -391,7 +384,7 @@ class DirectServeJsonResource(_AsyncResource):
 
     def _send_response(
         self,
-        request: "SynapseRequest",
+        request: SynapseRequest,
         code: int,
         response_object: Any,
     ) -> None:
@@ -408,7 +401,7 @@ class DirectServeJsonResource(_AsyncResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: "SynapseRequest",
+        request: SynapseRequest,
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_json_error(f, request, None)
@@ -480,7 +473,7 @@ class JsonResource(DirectServeJsonResource):
             )
 
     def _get_handler_for_request(
-        self, request: "SynapseRequest"
+        self, request: SynapseRequest
     ) -> Tuple[ServletCallback, str, Dict[str, str]]:
         """Finds a callback method to handle the given request.
 
@@ -510,7 +503,7 @@ class JsonResource(DirectServeJsonResource):
         # Huh. No one wanted to handle that? Fiiiiiine.
         raise UnrecognizedRequestError(code=404)
 
-    async def _async_render(self, request: "SynapseRequest") -> Tuple[int, Any]:
+    async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]:
         callback, servlet_classname, group_dict = self._get_handler_for_request(request)
 
         request.is_render_cancellable = is_function_cancellable(callback)
@@ -542,7 +535,7 @@ class JsonResource(DirectServeJsonResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: "SynapseRequest",
+        request: SynapseRequest,
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_json_error(f, request, self.hs.config)
@@ -558,7 +551,7 @@ class DirectServeHtmlResource(_AsyncResource):
 
     def _send_response(
         self,
-        request: "SynapseRequest",
+        request: SynapseRequest,
         code: int,
         response_object: Any,
     ) -> None:
@@ -572,7 +565,7 @@ class DirectServeHtmlResource(_AsyncResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: "SynapseRequest",
+        request: SynapseRequest,
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_html_error(f, request, self.ERROR_TEMPLATE)
@@ -599,7 +592,7 @@ class UnrecognizedRequestResource(resource.Resource):
     errcode of M_UNRECOGNIZED.
     """
 
-    def render(self, request: "SynapseRequest") -> int:
+    def render(self, request: SynapseRequest) -> int:
         f = failure.Failure(UnrecognizedRequestError(code=404))
         return_json_error(f, request, None)
         # A response has already been sent but Twisted requires either NOT_DONE_YET
@@ -629,7 +622,7 @@ class RootRedirect(resource.Resource):
 class OptionsResource(resource.Resource):
     """Responds to OPTION requests for itself and all children."""
 
-    def render_OPTIONS(self, request: "SynapseRequest") -> bytes:
+    def render_OPTIONS(self, request: SynapseRequest) -> bytes:
         request.setResponseCode(204)
         request.setHeader(b"Content-Length", b"0")
 
@@ -744,7 +737,7 @@ def _encode_json_bytes(json_object: object) -> bytes:
 
 
 def respond_with_json(
-    request: "SynapseRequest",
+    request: SynapseRequest,
     code: int,
     json_object: Any,
     send_cors: bool = False,
@@ -794,7 +787,7 @@ def respond_with_json(
 
 
 def respond_with_json_bytes(
-    request: "SynapseRequest",
+    request: SynapseRequest,
     code: int,
     json_bytes: bytes,
     send_cors: bool = False,
@@ -832,7 +825,7 @@ def respond_with_json_bytes(
 
 
 async def _async_write_json_to_request_in_thread(
-    request: "SynapseRequest",
+    request: SynapseRequest,
     json_encoder: Callable[[Any], bytes],
     json_object: Any,
 ) -> None:
@@ -890,7 +883,7 @@ def _write_bytes_to_request(request: Request, bytes_to_write: bytes) -> None:
     _ByteProducer(request, bytes_generator)
 
 
-def set_cors_headers(request: "SynapseRequest") -> None:
+def set_cors_headers(request: SynapseRequest) -> None:
     """Set the CORS headers so that javascript running in a web browsers can
     use this API
 
@@ -988,7 +981,7 @@ def set_clickjacking_protection_headers(request: Request) -> None:
 
 
 def respond_with_redirect(
-    request: "SynapseRequest", url: bytes, statusCode: int = FOUND, cors: bool = False
+    request: SynapseRequest, url: bytes, statusCode: int = FOUND, cors: bool = False
 ) -> None:
     """
     Write a 302 (or other specified status code) response to the request, if it is still alive.
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 0ee2598345..5b5a7c1e59 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -21,28 +21,25 @@ from zope.interface import implementer
 
 from twisted.internet.address import UNIXAddress
 from twisted.internet.defer import Deferred
-from twisted.internet.interfaces import IAddress
+from twisted.internet.interfaces import IAddress, IReactorTime
 from twisted.python.failure import Failure
 from twisted.web.http import HTTPChannel
-from twisted.web.iweb import IAgent
 from twisted.web.resource import IResource, Resource
-from twisted.web.server import Request
+from twisted.web.server import Request, Site
 
 from synapse.config.server import ListenerConfig
 from synapse.http import get_request_user_agent, redact_uri
-from synapse.http.proxy import ProxySite
 from synapse.http.request_metrics import RequestMetrics, requests_counter
 from synapse.logging.context import (
     ContextRequest,
     LoggingContext,
     PreserveLoggingContext,
 )
-from synapse.types import ISynapseReactor, Requester
+from synapse.types import Requester
 
 if TYPE_CHECKING:
     import opentracing
 
-
 logger = logging.getLogger(__name__)
 
 _next_request_seq = 0
@@ -105,7 +102,7 @@ class SynapseRequest(Request):
         # A boolean indicating whether `render_deferred` should be cancelled if the
         # client disconnects early. Expected to be set by the coroutine started by
         # `Resource.render`, if rendering is asynchronous.
-        self.is_render_cancellable: bool = False
+        self.is_render_cancellable = False
 
         global _next_request_seq
         self.request_seq = _next_request_seq
@@ -604,7 +601,7 @@ class _XForwardedForAddress:
     host: str
 
 
-class SynapseSite(ProxySite):
+class SynapseSite(Site):
     """
     Synapse-specific twisted http Site
 
@@ -626,8 +623,7 @@ class SynapseSite(ProxySite):
         resource: IResource,
         server_version_string: str,
         max_request_body_size: int,
-        reactor: ISynapseReactor,
-        federation_agent: IAgent,
+        reactor: IReactorTime,
     ):
         """
 
@@ -642,11 +638,7 @@ class SynapseSite(ProxySite):
                 dropping the connection
             reactor: reactor to be used to manage connection timeouts
         """
-        super().__init__(
-            resource=resource,
-            reactor=reactor,
-            federation_agent=federation_agent,
-        )
+        Site.__init__(self, resource, reactor=reactor)
 
         self.site_tag = site_tag
         self.reactor = reactor
@@ -657,9 +649,7 @@ class SynapseSite(ProxySite):
 
         request_id_header = config.http_options.request_id_header
 
-        self.experimental_cors_msc3886: bool = (
-            config.http_options.experimental_cors_msc3886
-        )
+        self.experimental_cors_msc3886 = config.http_options.experimental_cors_msc3886
 
         def request_factory(channel: HTTPChannel, queued: bool) -> Request:
             return request_class(
diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py
index 21c5309740..5a965f233b 100644
--- a/tests/app/test_openid_listener.py
+++ b/tests/app/test_openid_listener.py
@@ -31,7 +31,9 @@ from tests.unittest import HomeserverTestCase
 
 class FederationReaderOpenIDListenerTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(homeserver_to_use=GenericWorkerServer)
+        hs = self.setup_test_homeserver(
+            federation_http_client=None, homeserver_to_use=GenericWorkerServer
+        )
         return hs
 
     def default_config(self) -> JsonDict:
@@ -89,7 +91,9 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase):
 @patch("synapse.app.homeserver.KeyResource", new=Mock())
 class SynapseHomeserverOpenIDListenerTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(homeserver_to_use=SynapseHomeServer)
+        hs = self.setup_test_homeserver(
+            federation_http_client=None, homeserver_to_use=SynapseHomeServer
+        )
         return hs
 
     @parameterized.expand(
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index 66215af2b8..ee48f9e546 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -41,6 +41,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
         self.appservice_api = mock.Mock()
         hs = self.setup_test_homeserver(
             "server",
+            federation_http_client=None,
             application_service_api=self.appservice_api,
         )
         handler = hs.get_device_handler()
@@ -400,7 +401,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
 
 class DehydrationTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server")
+        hs = self.setup_test_homeserver("server", federation_http_client=None)
         handler = hs.get_device_handler()
         assert isinstance(handler, DeviceHandler)
         self.handler = handler
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index 5f11d5df11..bf0862ed54 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -57,7 +57,7 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver()
+        hs = self.setup_test_homeserver(federation_http_client=None)
         self.handler = hs.get_federation_handler()
         self.store = hs.get_datastores().main
         return hs
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index fd66d573d2..19f5322317 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -993,6 +993,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver(
             "server",
+            federation_http_client=None,
             federation_sender=Mock(spec=FederationSender),
         )
         return hs
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 5da1d95f0b..94518a7196 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -17,8 +17,6 @@ import json
 from typing import Dict, List, Set
 from unittest.mock import ANY, Mock, call
 
-from netaddr import IPSet
-
 from twisted.test.proto_helpers import MemoryReactor
 from twisted.web.resource import Resource
 
@@ -26,7 +24,6 @@ from synapse.api.constants import EduTypes
 from synapse.api.errors import AuthError
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.handlers.typing import TypingWriterHandler
-from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
 from synapse.server import HomeServer
 from synapse.types import JsonDict, Requester, UserID, create_requester
 from synapse.util import Clock
@@ -79,13 +76,6 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
         # we mock out the federation client too
         self.mock_federation_client = Mock(spec=["put_json"])
         self.mock_federation_client.put_json.return_value = make_awaitable((200, "OK"))
-        self.mock_federation_client.agent = MatrixFederationAgent(
-            reactor,
-            tls_client_options_factory=None,
-            user_agent=b"SynapseInTrialTest/0.0.0",
-            ip_allowlist=None,
-            ip_blocklist=IPSet(),
-        )
 
         # the tests assume that we are starting at unix time 1000
         reactor.pump((1000,))
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index a8b9737d1f..b5f4a60fe5 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Generator
-from unittest.mock import ANY, Mock, create_autospec
+from typing import Generator
+from unittest.mock import Mock
 
 from netaddr import IPSet
 from parameterized import parameterized
@@ -21,11 +21,10 @@ from twisted.internet import defer
 from twisted.internet.defer import Deferred, TimeoutError
 from twisted.internet.error import ConnectingCancelledError, DNSLookupError
 from twisted.test.proto_helpers import MemoryReactor, StringTransport
-from twisted.web.client import Agent, ResponseNeverReceived
+from twisted.web.client import ResponseNeverReceived
 from twisted.web.http import HTTPChannel
-from twisted.web.http_headers import Headers
 
-from synapse.api.errors import HttpResponseException, RequestSendFailed
+from synapse.api.errors import RequestSendFailed
 from synapse.http.matrixfederationclient import (
     ByteParser,
     MatrixFederationHttpClient,
@@ -40,9 +39,7 @@ from synapse.logging.context import (
 from synapse.server import HomeServer
 from synapse.util import Clock
 
-from tests.replication._base import BaseMultiWorkerStreamTestCase
 from tests.server import FakeTransport
-from tests.test_utils import FakeResponse
 from tests.unittest import HomeserverTestCase, override_config
 
 
@@ -661,181 +658,3 @@ class FederationClientTests(HomeserverTestCase):
         self.assertEqual(self.cl.max_short_retry_delay_seconds, 7)
         self.assertEqual(self.cl.max_long_retries, 20)
         self.assertEqual(self.cl.max_short_retries, 5)
-
-
-class FederationClientProxyTests(BaseMultiWorkerStreamTestCase):
-    def default_config(self) -> Dict[str, Any]:
-        conf = super().default_config()
-        conf["instance_map"] = {
-            "main": {"host": "testserv", "port": 8765},
-            "federation_sender": {"host": "testserv", "port": 1001},
-        }
-        return conf
-
-    @override_config({"outbound_federation_restricted_to": ["federation_sender"]})
-    def test_proxy_requests_through_federation_sender_worker(self) -> None:
-        """
-        Test that all outbound federation requests go through the `federation_sender`
-        worker
-        """
-        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
-        # so we can act like some remote server responding to requests
-        mock_client_on_federation_sender = Mock()
-        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
-        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
-
-        # Create the `federation_sender` worker
-        self.federation_sender = self.make_worker_hs(
-            "synapse.app.generic_worker",
-            {"worker_name": "federation_sender"},
-            federation_http_client=mock_client_on_federation_sender,
-        )
-
-        # Fake `remoteserv:8008` responding to requests
-        mock_agent_on_federation_sender.request.side_effect = (
-            lambda *args, **kwargs: defer.succeed(
-                FakeResponse.json(
-                    payload={
-                        "foo": "bar",
-                    }
-                )
-            )
-        )
-
-        # This federation request from the main process should be proxied through the
-        # `federation_sender` worker off to the remote server
-        test_request_from_main_process_d = defer.ensureDeferred(
-            self.hs.get_federation_http_client().get_json("remoteserv:8008", "foo/bar")
-        )
-
-        # Pump the reactor so our deferred goes through the motions
-        self.pump()
-
-        # Make sure that the request was proxied through the `federation_sender` worker
-        mock_agent_on_federation_sender.request.assert_called_once_with(
-            b"GET",
-            b"matrix-federation://remoteserv:8008/foo/bar",
-            headers=ANY,
-            bodyProducer=ANY,
-        )
-
-        # Make sure the response is as expected back on the main worker
-        res = self.successResultOf(test_request_from_main_process_d)
-        self.assertEqual(res, {"foo": "bar"})
-
-    @override_config({"outbound_federation_restricted_to": ["federation_sender"]})
-    def test_proxy_request_with_network_error_through_federation_sender_worker(
-        self,
-    ) -> None:
-        """
-        Test that when the outbound federation request fails with a network related
-        error, a sensible error makes its way back to the main process.
-        """
-        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
-        # so we can act like some remote server responding to requests
-        mock_client_on_federation_sender = Mock()
-        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
-        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
-
-        # Create the `federation_sender` worker
-        self.federation_sender = self.make_worker_hs(
-            "synapse.app.generic_worker",
-            {"worker_name": "federation_sender"},
-            federation_http_client=mock_client_on_federation_sender,
-        )
-
-        # Fake `remoteserv:8008` responding to requests
-        mock_agent_on_federation_sender.request.side_effect = (
-            lambda *args, **kwargs: defer.fail(ResponseNeverReceived("fake error"))
-        )
-
-        # This federation request from the main process should be proxied through the
-        # `federation_sender` worker off to the remote server
-        test_request_from_main_process_d = defer.ensureDeferred(
-            self.hs.get_federation_http_client().get_json("remoteserv:8008", "foo/bar")
-        )
-
-        # Pump the reactor so our deferred goes through the motions. We pump with 10
-        # seconds (0.1 * 100) so the `MatrixFederationHttpClient` runs out of retries
-        # and finally passes along the error response.
-        self.pump(0.1)
-
-        # Make sure that the request was proxied through the `federation_sender` worker
-        mock_agent_on_federation_sender.request.assert_called_with(
-            b"GET",
-            b"matrix-federation://remoteserv:8008/foo/bar",
-            headers=ANY,
-            bodyProducer=ANY,
-        )
-
-        # Make sure we get some sort of error back on the main worker
-        failure_res = self.failureResultOf(test_request_from_main_process_d)
-        self.assertIsInstance(failure_res.value, RequestSendFailed)
-        self.assertIsInstance(failure_res.value.inner_exception, HttpResponseException)
-
-    @override_config({"outbound_federation_restricted_to": ["federation_sender"]})
-    def test_proxy_requests_and_discards_hop_by_hop_headers(self) -> None:
-        """
-        Test to make sure hop-by-hop headers and addional headers defined in the
-        `Connection` header are discarded when proxying requests
-        """
-        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
-        # so we can act like some remote server responding to requests
-        mock_client_on_federation_sender = Mock()
-        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
-        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
-
-        # Create the `federation_sender` worker
-        self.federation_sender = self.make_worker_hs(
-            "synapse.app.generic_worker",
-            {"worker_name": "federation_sender"},
-            federation_http_client=mock_client_on_federation_sender,
-        )
-
-        # Fake `remoteserv:8008` responding to requests
-        mock_agent_on_federation_sender.request.side_effect = lambda *args, **kwargs: defer.succeed(
-            FakeResponse(
-                code=200,
-                body=b'{"foo": "bar"}',
-                headers=Headers(
-                    {
-                        "Content-Type": ["application/json"],
-                        "Connection": ["close, X-Foo, X-Bar"],
-                        # Should be removed because it's defined in the `Connection` header
-                        "X-Foo": ["foo"],
-                        "X-Bar": ["bar"],
-                        # Should be removed because it's a hop-by-hop header
-                        "Proxy-Authorization": "abcdef",
-                    }
-                ),
-            )
-        )
-
-        # This federation request from the main process should be proxied through the
-        # `federation_sender` worker off to the remote server
-        test_request_from_main_process_d = defer.ensureDeferred(
-            self.hs.get_federation_http_client().get_json_with_headers(
-                "remoteserv:8008", "foo/bar"
-            )
-        )
-
-        # Pump the reactor so our deferred goes through the motions
-        self.pump()
-
-        # Make sure that the request was proxied through the `federation_sender` worker
-        mock_agent_on_federation_sender.request.assert_called_once_with(
-            b"GET",
-            b"matrix-federation://remoteserv:8008/foo/bar",
-            headers=ANY,
-            bodyProducer=ANY,
-        )
-
-        res, headers = self.successResultOf(test_request_from_main_process_d)
-        header_names = set(headers.keys())
-
-        # Make sure the response does not include the hop-by-hop headers
-        self.assertNotIn(b"X-Foo", header_names)
-        self.assertNotIn(b"X-Bar", header_names)
-        self.assertNotIn(b"Proxy-Authorization", header_names)
-        # Make sure the response is as expected back on the main worker
-        self.assertEqual(res, {"foo": "bar"})
diff --git a/tests/http/test_proxy.py b/tests/http/test_proxy.py
deleted file mode 100644
index 0dc9ba8e05..0000000000
--- a/tests/http/test_proxy.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2023 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import Set
-
-from parameterized import parameterized
-
-from synapse.http.proxy import parse_connection_header_value
-
-from tests.unittest import TestCase
-
-
-class ProxyTests(TestCase):
-    @parameterized.expand(
-        [
-            [b"close, X-Foo, X-Bar", {"Close", "X-Foo", "X-Bar"}],
-            # No whitespace
-            [b"close,X-Foo,X-Bar", {"Close", "X-Foo", "X-Bar"}],
-            # More whitespace
-            [b"close,    X-Foo,      X-Bar", {"Close", "X-Foo", "X-Bar"}],
-            # "close" directive in not the first position
-            [b"X-Foo, X-Bar, close", {"X-Foo", "X-Bar", "Close"}],
-            # Normalizes header capitalization
-            [b"keep-alive, x-fOo, x-bAr", {"Keep-Alive", "X-Foo", "X-Bar"}],
-            # Handles header names with whitespace
-            [
-                b"keep-alive, x  foo, x bar",
-                {"Keep-Alive", "X  foo", "X bar"},
-            ],
-        ]
-    )
-    def test_parse_connection_header_value(
-        self,
-        connection_header_value: bytes,
-        expected_extra_headers_to_remove: Set[str],
-    ) -> None:
-        """
-        Tests that the connection header value is parsed correctly
-        """
-        self.assertEqual(
-            expected_extra_headers_to_remove,
-            parse_connection_header_value(connection_header_value),
-        )
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index 96badc46b0..eb9b1f1cd9 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -69,10 +69,10 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
         # Make a new HomeServer object for the worker
         self.reactor.lookups["testserv"] = "1.2.3.4"
         self.worker_hs = self.setup_test_homeserver(
+            federation_http_client=None,
             homeserver_to_use=GenericWorkerServer,
             config=self._get_worker_hs_config(),
             reactor=self.reactor,
-            federation_http_client=None,
         )
 
         # Since we use sqlite in memory databases we need to make sure the
@@ -380,7 +380,6 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
             server_version_string="1",
             max_request_body_size=8192,
             reactor=self.reactor,
-            federation_agent=worker_hs.get_federation_http_client().agent,
         )
 
         worker_hs.get_replication_command_handler().start_replication(worker_hs)
diff --git a/tests/replication/test_federation_sender_shard.py b/tests/replication/test_federation_sender_shard.py
index a324b4d31d..08703206a9 100644
--- a/tests/replication/test_federation_sender_shard.py
+++ b/tests/replication/test_federation_sender_shard.py
@@ -14,18 +14,14 @@
 import logging
 from unittest.mock import Mock
 
-from netaddr import IPSet
-
 from synapse.api.constants import EventTypes, Membership
 from synapse.events.builder import EventBuilderFactory
 from synapse.handlers.typing import TypingWriterHandler
-from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
 from synapse.rest.admin import register_servlets_for_client_rest_resource
 from synapse.rest.client import login, room
 from synapse.types import UserID, create_requester
 
 from tests.replication._base import BaseMultiWorkerStreamTestCase
-from tests.server import get_clock
 from tests.test_utils import make_awaitable
 
 logger = logging.getLogger(__name__)
@@ -45,25 +41,13 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         room.register_servlets,
     ]
 
-    def setUp(self) -> None:
-        super().setUp()
-
-        reactor, _ = get_clock()
-        self.matrix_federation_agent = MatrixFederationAgent(
-            reactor,
-            tls_client_options_factory=None,
-            user_agent=b"SynapseInTrialTest/0.0.0",
-            ip_allowlist=None,
-            ip_blocklist=IPSet(),
-        )
-
     def test_send_event_single_sender(self) -> None:
         """Test that using a single federation sender worker correctly sends a
         new event.
         """
         mock_client = Mock(spec=["put_json"])
         mock_client.put_json.return_value = make_awaitable({})
-        mock_client.agent = self.matrix_federation_agent
+
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -94,7 +78,6 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         """
         mock_client1 = Mock(spec=["put_json"])
         mock_client1.put_json.return_value = make_awaitable({})
-        mock_client1.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -109,7 +92,6 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
 
         mock_client2 = Mock(spec=["put_json"])
         mock_client2.put_json.return_value = make_awaitable({})
-        mock_client2.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -163,7 +145,6 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         """
         mock_client1 = Mock(spec=["put_json"])
         mock_client1.put_json.return_value = make_awaitable({})
-        mock_client1.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -178,7 +159,6 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
 
         mock_client2 = Mock(spec=["put_json"])
         mock_client2.put_json.return_value = make_awaitable({})
-        mock_client2.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
diff --git a/tests/rest/client/test_presence.py b/tests/rest/client/test_presence.py
index e12098102b..dcbb125a3b 100644
--- a/tests/rest/client/test_presence.py
+++ b/tests/rest/client/test_presence.py
@@ -40,6 +40,7 @@ class PresenceTestCase(unittest.HomeserverTestCase):
 
         hs = self.setup_test_homeserver(
             "red",
+            federation_http_client=None,
             federation_client=Mock(),
             presence_handler=self.presence_handler,
         )
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index d013e75d55..f1b4e1ad2f 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -67,6 +67,8 @@ class RoomBase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         self.hs = self.setup_test_homeserver(
             "red",
+            federation_http_client=None,
+            federation_client=Mock(),
         )
 
         self.hs.get_federation_handler = Mock()  # type: ignore[assignment]
diff --git a/tests/storage/test_e2e_room_keys.py b/tests/storage/test_e2e_room_keys.py
index f6df31aba4..9cb326d90a 100644
--- a/tests/storage/test_e2e_room_keys.py
+++ b/tests/storage/test_e2e_room_keys.py
@@ -31,7 +31,7 @@ room_key: RoomKey = {
 
 class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server")
+        hs = self.setup_test_homeserver("server", federation_http_client=None)
         self.store = hs.get_datastores().main
         return hs
 
diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py
index 0282673167..857e2caf2e 100644
--- a/tests/storage/test_purge.py
+++ b/tests/storage/test_purge.py
@@ -27,7 +27,7 @@ class PurgeTests(HomeserverTestCase):
     servlets = [room.register_servlets]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server")
+        hs = self.setup_test_homeserver("server", federation_http_client=None)
         return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
diff --git a/tests/storage/test_rollback_worker.py b/tests/storage/test_rollback_worker.py
index 809c9f175d..6861d3a6c9 100644
--- a/tests/storage/test_rollback_worker.py
+++ b/tests/storage/test_rollback_worker.py
@@ -45,7 +45,9 @@ def fake_listdir(filepath: str) -> List[str]:
 
 class WorkerSchemaTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(homeserver_to_use=GenericWorkerServer)
+        hs = self.setup_test_homeserver(
+            federation_http_client=None, homeserver_to_use=GenericWorkerServer
+        )
         return hs
 
     def default_config(self) -> JsonDict:
diff --git a/tests/test_server.py b/tests/test_server.py
index fe5afebdcd..e266c06a2c 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -38,7 +38,7 @@ from tests.http.server._base import test_disconnect
 from tests.server import (
     FakeChannel,
     FakeSite,
-    get_clock,
+    ThreadedMemoryReactorClock,
     make_request,
     setup_test_homeserver,
 )
@@ -46,11 +46,12 @@ from tests.server import (
 
 class JsonResourceTests(unittest.TestCase):
     def setUp(self) -> None:
-        reactor, clock = get_clock()
-        self.reactor = reactor
+        self.reactor = ThreadedMemoryReactorClock()
+        self.hs_clock = Clock(self.reactor)
         self.homeserver = setup_test_homeserver(
             self.addCleanup,
-            clock=clock,
+            federation_http_client=None,
+            clock=self.hs_clock,
             reactor=self.reactor,
         )
 
@@ -208,13 +209,7 @@ class JsonResourceTests(unittest.TestCase):
 
 class OptionsResourceTests(unittest.TestCase):
     def setUp(self) -> None:
-        reactor, clock = get_clock()
-        self.reactor = reactor
-        self.homeserver = setup_test_homeserver(
-            self.addCleanup,
-            clock=clock,
-            reactor=self.reactor,
-        )
+        self.reactor = ThreadedMemoryReactorClock()
 
         class DummyResource(Resource):
             isLeaf = True
@@ -247,7 +242,6 @@ class OptionsResourceTests(unittest.TestCase):
             "1.0",
             max_request_body_size=4096,
             reactor=self.reactor,
-            federation_agent=self.homeserver.get_federation_http_client().agent,
         )
 
         # render the request and return the channel
@@ -350,8 +344,7 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase):
             await self.callback(request)
 
     def setUp(self) -> None:
-        reactor, _ = get_clock()
-        self.reactor = reactor
+        self.reactor = ThreadedMemoryReactorClock()
 
     def test_good_response(self) -> None:
         async def callback(request: SynapseRequest) -> None:
@@ -469,9 +462,9 @@ class DirectServeJsonResourceCancellationTests(unittest.TestCase):
     """Tests for `DirectServeJsonResource` cancellation."""
 
     def setUp(self) -> None:
-        reactor, clock = get_clock()
-        self.reactor = reactor
-        self.resource = CancellableDirectServeJsonResource(clock)
+        self.reactor = ThreadedMemoryReactorClock()
+        self.clock = Clock(self.reactor)
+        self.resource = CancellableDirectServeJsonResource(self.clock)
         self.site = FakeSite(self.resource, self.reactor)
 
     def test_cancellable_disconnect(self) -> None:
@@ -503,9 +496,9 @@ class DirectServeHtmlResourceCancellationTests(unittest.TestCase):
     """Tests for `DirectServeHtmlResource` cancellation."""
 
     def setUp(self) -> None:
-        reactor, clock = get_clock()
-        self.reactor = reactor
-        self.resource = CancellableDirectServeHtmlResource(clock)
+        self.reactor = ThreadedMemoryReactorClock()
+        self.clock = Clock(self.reactor)
+        self.resource = CancellableDirectServeHtmlResource(self.clock)
         self.site = FakeSite(self.resource, self.reactor)
 
     def test_cancellable_disconnect(self) -> None:
diff --git a/tests/unittest.py b/tests/unittest.py
index 334a95a917..c73195b32b 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -358,7 +358,6 @@ class HomeserverTestCase(TestCase):
             server_version_string="1",
             max_request_body_size=4096,
             reactor=self.reactor,
-            federation_agent=self.hs.get_federation_http_client().agent,
         )
 
         from tests.rest.client.utils import RestHelper
-- 
cgit 1.5.1


From 2328e90fbb65216ff84a08834d3cd99573bccdff Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Mon, 10 Jul 2023 17:23:11 -0500
Subject: Make the media `/upload` tracing less ambiguous (#15888)

A lot of the functions have the same name in this space like `store_file`,
and we also do it multiple times for different reasons (main media repo,
other storage providers, thumbnails, etc) so it's good to differentiate
them so your head doesn't explode.

Follow-up to https://github.com/matrix-org/synapse/pull/15850

Tracing instrumentation to media `/upload` code paths to investigate https://github.com/matrix-org/synapse/issues/15841
---
 changelog.d/15888.misc            |  1 +
 synapse/media/media_storage.py    | 78 ++++++++++++++++++++++++---------------
 synapse/media/storage_provider.py | 25 +++++++------
 3 files changed, 62 insertions(+), 42 deletions(-)
 create mode 100644 changelog.d/15888.misc

(limited to 'synapse')

diff --git a/changelog.d/15888.misc b/changelog.d/15888.misc
new file mode 100644
index 0000000000..0e49ab23fe
--- /dev/null
+++ b/changelog.d/15888.misc
@@ -0,0 +1 @@
+Add tracing to media `/upload` code paths.
diff --git a/synapse/media/media_storage.py b/synapse/media/media_storage.py
index eebcbc48e8..a17ccb3d80 100644
--- a/synapse/media/media_storage.py
+++ b/synapse/media/media_storage.py
@@ -38,7 +38,7 @@ from twisted.protocols.basic import FileSender
 
 from synapse.api.errors import NotFoundError
 from synapse.logging.context import defer_to_thread, make_deferred_yieldable
-from synapse.logging.opentracing import trace
+from synapse.logging.opentracing import start_active_span, trace, trace_with_opname
 from synapse.util import Clock
 from synapse.util.file_consumer import BackgroundFileConsumer
 
@@ -77,7 +77,7 @@ class MediaStorage:
         self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self.clock = hs.get_clock()
 
-    @trace
+    @trace_with_opname("MediaStorage.store_file")
     async def store_file(self, source: IO, file_info: FileInfo) -> str:
         """Write `source` to the on disk media store, and also any other
         configured storage providers
@@ -91,18 +91,19 @@ class MediaStorage:
         """
 
         with self.store_into_file(file_info) as (f, fname, finish_cb):
-            # Write to the main repository
+            # Write to the main media repository
             await self.write_to_file(source, f)
+            # Write to the other storage providers
             await finish_cb()
 
         return fname
 
-    @trace
+    @trace_with_opname("MediaStorage.write_to_file")
     async def write_to_file(self, source: IO, output: IO) -> None:
         """Asynchronously write the `source` to `output`."""
         await defer_to_thread(self.reactor, _write_file_synchronously, source, output)
 
-    @trace
+    @trace_with_opname("MediaStorage.store_into_file")
     @contextlib.contextmanager
     def store_into_file(
         self, file_info: FileInfo
@@ -117,9 +118,9 @@ class MediaStorage:
         fname can be used to read the contents from after upload, e.g. to
         generate thumbnails.
 
-        finish_cb must be called and waited on after the file has been
-        successfully been written to. Should not be called if there was an
-        error.
+        finish_cb must be called and waited on after the file has been successfully been
+        written to. Should not be called if there was an error. Checks for spam and
+        stores the file into the configured storage providers.
 
         Args:
             file_info: Info about the file to store
@@ -139,35 +140,48 @@ class MediaStorage:
 
         finished_called = [False]
 
+        main_media_repo_write_trace_scope = start_active_span(
+            "writing to main media repo"
+        )
+        main_media_repo_write_trace_scope.__enter__()
+
         try:
             with open(fname, "wb") as f:
 
                 async def finish() -> None:
-                    # Ensure that all writes have been flushed and close the
-                    # file.
-                    f.flush()
-                    f.close()
-
-                    spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
-                        ReadableFileWrapper(self.clock, fname), file_info
-                    )
-                    if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
-                        logger.info("Blocking media due to spam checker")
-                        # Note that we'll delete the stored media, due to the
-                        # try/except below. The media also won't be stored in
-                        # the DB.
-                        # We currently ignore any additional field returned by
-                        # the spam-check API.
-                        raise SpamMediaException(errcode=spam_check[0])
-
-                    for provider in self.storage_providers:
-                        await provider.store_file(path, file_info)
-
-                    finished_called[0] = True
+                    # When someone calls finish, we assume they are done writing to the main media repo
+                    main_media_repo_write_trace_scope.__exit__(None, None, None)
+
+                    with start_active_span("writing to other storage providers"):
+                        # Ensure that all writes have been flushed and close the
+                        # file.
+                        f.flush()
+                        f.close()
+
+                        spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
+                            ReadableFileWrapper(self.clock, fname), file_info
+                        )
+                        if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
+                            logger.info("Blocking media due to spam checker")
+                            # Note that we'll delete the stored media, due to the
+                            # try/except below. The media also won't be stored in
+                            # the DB.
+                            # We currently ignore any additional field returned by
+                            # the spam-check API.
+                            raise SpamMediaException(errcode=spam_check[0])
+
+                        for provider in self.storage_providers:
+                            with start_active_span(str(provider)):
+                                await provider.store_file(path, file_info)
+
+                        finished_called[0] = True
 
                 yield f, fname, finish
         except Exception as e:
             try:
+                main_media_repo_write_trace_scope.__exit__(
+                    type(e), None, e.__traceback__
+                )
                 os.remove(fname)
             except Exception:
                 pass
@@ -175,7 +189,11 @@ class MediaStorage:
             raise e from None
 
         if not finished_called:
-            raise Exception("Finished callback not called")
+            exc = Exception("Finished callback not called")
+            main_media_repo_write_trace_scope.__exit__(
+                type(exc), None, exc.__traceback__
+            )
+            raise exc
 
     async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
         """Attempts to fetch media described by file_info from the local cache
diff --git a/synapse/media/storage_provider.py b/synapse/media/storage_provider.py
index 0aea3a7a0d..70a45cfd5b 100644
--- a/synapse/media/storage_provider.py
+++ b/synapse/media/storage_provider.py
@@ -20,7 +20,7 @@ from typing import TYPE_CHECKING, Callable, Optional
 
 from synapse.config._base import Config
 from synapse.logging.context import defer_to_thread, run_in_background
-from synapse.logging.opentracing import trace
+from synapse.logging.opentracing import start_active_span, trace_with_opname
 from synapse.util.async_helpers import maybe_awaitable
 
 from ._base import FileInfo, Responder
@@ -87,7 +87,7 @@ class StorageProviderWrapper(StorageProvider):
     def __str__(self) -> str:
         return "StorageProviderWrapper[%s]" % (self.backend,)
 
-    @trace
+    @trace_with_opname("StorageProviderWrapper.store_file")
     async def store_file(self, path: str, file_info: FileInfo) -> None:
         if not file_info.server_name and not self.store_local:
             return None
@@ -116,7 +116,7 @@ class StorageProviderWrapper(StorageProvider):
 
             run_in_background(store)
 
-    @trace
+    @trace_with_opname("StorageProviderWrapper.fetch")
     async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
         if file_info.url_cache:
             # Files in the URL preview cache definitely aren't stored here,
@@ -144,7 +144,7 @@ class FileStorageProviderBackend(StorageProvider):
     def __str__(self) -> str:
         return "FileStorageProviderBackend[%s]" % (self.base_directory,)
 
-    @trace
+    @trace_with_opname("FileStorageProviderBackend.store_file")
     async def store_file(self, path: str, file_info: FileInfo) -> None:
         """See StorageProvider.store_file"""
 
@@ -156,14 +156,15 @@ class FileStorageProviderBackend(StorageProvider):
 
         # mypy needs help inferring the type of the second parameter, which is generic
         shutil_copyfile: Callable[[str, str], str] = shutil.copyfile
-        await defer_to_thread(
-            self.hs.get_reactor(),
-            shutil_copyfile,
-            primary_fname,
-            backup_fname,
-        )
-
-    @trace
+        with start_active_span("shutil_copyfile"):
+            await defer_to_thread(
+                self.hs.get_reactor(),
+                shutil_copyfile,
+                primary_fname,
+                backup_fname,
+            )
+
+    @trace_with_opname("FileStorageProviderBackend.fetch")
     async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
         """See StorageProvider.fetch"""
 
-- 
cgit 1.5.1


From b516d919995f3bf36045263376628ff0aa298095 Mon Sep 17 00:00:00 2001
From: Michael Telatynski <7t3chguy@gmail.com>
Date: Tue, 11 Jul 2023 09:18:50 +0100
Subject: Add `Server` to Access-Control-Expose-Headers header (#15908)

---
 changelog.d/15908.misc | 1 +
 synapse/http/server.py | 2 +-
 tests/test_server.py   | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15908.misc

(limited to 'synapse')

diff --git a/changelog.d/15908.misc b/changelog.d/15908.misc
new file mode 100644
index 0000000000..3ab8674e03
--- /dev/null
+++ b/changelog.d/15908.misc
@@ -0,0 +1 @@
+Add `Server` to Access-Control-Expose-Headers header.
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 933172c873..e411ac7e62 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -910,7 +910,7 @@ def set_cors_headers(request: SynapseRequest) -> None:
         )
         request.setHeader(
             b"Access-Control-Expose-Headers",
-            b"Synapse-Trace-Id",
+            b"Synapse-Trace-Id, Server",
         )
 
 
diff --git a/tests/test_server.py b/tests/test_server.py
index e266c06a2c..dc491e06ed 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -268,7 +268,7 @@ class OptionsResourceTests(unittest.TestCase):
         )
         self.assertEqual(
             channel.headers.getRawHeaders(b"Access-Control-Expose-Headers"),
-            [b"Synapse-Trace-Id"],
+            [b"Synapse-Trace-Id, Server"],
         )
 
     def _check_cors_msc3886_headers(self, channel: FakeChannel) -> None:
-- 
cgit 1.5.1


From a4243183f0b500f9f30f2d24af19f30a99f65f63 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 11 Jul 2023 12:21:00 -0400
Subject: Add + as an allowed character for Matrix IDs (MSC4009) (#15911)

---
 changelog.d/15911.feature       |  1 +
 synapse/config/experimental.py  |  3 ---
 synapse/handlers/register.py    |  9 ++-------
 synapse/handlers/saml.py        |  4 ++--
 synapse/handlers/sso.py         |  6 ++----
 synapse/types/__init__.py       | 22 +++++-----------------
 tests/handlers/test_register.py | 11 +++++------
 7 files changed, 17 insertions(+), 39 deletions(-)
 create mode 100644 changelog.d/15911.feature

(limited to 'synapse')

diff --git a/changelog.d/15911.feature b/changelog.d/15911.feature
new file mode 100644
index 0000000000..b24077c6c3
--- /dev/null
+++ b/changelog.d/15911.feature
@@ -0,0 +1 @@
+Allow `+` in Matrix IDs, per [MSC4009](https://github.com/matrix-org/matrix-spec-proposals/pull/4009).
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 8e0f5356b4..0970f22a75 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -382,9 +382,6 @@ class ExperimentalConfig(Config):
         # Check that none of the other config options conflict with MSC3861 when enabled
         self.msc3861.check_config_conflicts(self.root)
 
-        # MSC4009: E.164 Matrix IDs
-        self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False)
-
         # MSC4010: Do not allow setting m.push_rules account data.
         self.msc4010_push_rules_account_data = experimental.get(
             "msc4010_push_rules_account_data", False
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index a2d3f03061..3a55056df5 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -143,15 +143,10 @@ class RegistrationHandler:
         assigned_user_id: Optional[str] = None,
         inhibit_user_in_use_error: bool = False,
     ) -> None:
-        if types.contains_invalid_mxid_characters(
-            localpart, self.hs.config.experimental.msc4009_e164_mxids
-        ):
-            extra_chars = (
-                "=_-./+" if self.hs.config.experimental.msc4009_e164_mxids else "=_-./"
-            )
+        if types.contains_invalid_mxid_characters(localpart):
             raise SynapseError(
                 400,
-                f"User ID can only contain characters a-z, 0-9, or '{extra_chars}'",
+                "User ID can only contain characters a-z, 0-9, or '=_-./+'",
                 Codes.INVALID_USERNAME,
             )
 
diff --git a/synapse/handlers/saml.py b/synapse/handlers/saml.py
index 874860d461..6083c9f4b5 100644
--- a/synapse/handlers/saml.py
+++ b/synapse/handlers/saml.py
@@ -27,9 +27,9 @@ from synapse.http.servlet import parse_string
 from synapse.http.site import SynapseRequest
 from synapse.module_api import ModuleApi
 from synapse.types import (
+    MXID_LOCALPART_ALLOWED_CHARACTERS,
     UserID,
     map_username_to_mxid_localpart,
-    mxid_localpart_allowed_characters,
 )
 from synapse.util.iterutils import chunk_seq
 
@@ -371,7 +371,7 @@ class SamlHandler:
 
 
 DOT_REPLACE_PATTERN = re.compile(
-    "[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters)),)
+    "[^%s]" % (re.escape("".join(MXID_LOCALPART_ALLOWED_CHARACTERS)),)
 )
 
 
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index c3a51722bd..4d29328a74 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -225,8 +225,6 @@ class SsoHandler:
 
         self._consent_at_registration = hs.config.consent.user_consent_at_registration
 
-        self._e164_mxids = hs.config.experimental.msc4009_e164_mxids
-
     def register_identity_provider(self, p: SsoIdentityProvider) -> None:
         p_id = p.idp_id
         assert p_id not in self._identity_providers
@@ -713,7 +711,7 @@ class SsoHandler:
         # Since the localpart is provided via a potentially untrusted module,
         # ensure the MXID is valid before registering.
         if not attributes.localpart or contains_invalid_mxid_characters(
-            attributes.localpart, self._e164_mxids
+            attributes.localpart
         ):
             raise MappingException("localpart is invalid: %s" % (attributes.localpart,))
 
@@ -946,7 +944,7 @@ class SsoHandler:
             localpart,
         )
 
-        if contains_invalid_mxid_characters(localpart, self._e164_mxids):
+        if contains_invalid_mxid_characters(localpart):
             raise SynapseError(400, "localpart is invalid: %s" % (localpart,))
         user_id = UserID(localpart, self._server_name).to_string()
         user_infos = await self._store.get_users_by_id_case_insensitive(user_id)
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 095be070e0..fdfd465c8d 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -348,22 +348,15 @@ class EventID(DomainSpecificString):
     SIGIL = "$"
 
 
-mxid_localpart_allowed_characters = set(
-    "_-./=" + string.ascii_lowercase + string.digits
+MXID_LOCALPART_ALLOWED_CHARACTERS = set(
+    "_-./=+" + string.ascii_lowercase + string.digits
 )
-# MSC4007 adds the + to the allowed characters.
-#
-# TODO If this was accepted, update the SSO code to support this, see the callers
-#      of map_username_to_mxid_localpart.
-extended_mxid_localpart_allowed_characters = mxid_localpart_allowed_characters | {"+"}
 
 # Guest user IDs are purely numeric.
 GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
 
 
-def contains_invalid_mxid_characters(
-    localpart: str, use_extended_character_set: bool
-) -> bool:
+def contains_invalid_mxid_characters(localpart: str) -> bool:
     """Check for characters not allowed in an mxid or groupid localpart
 
     Args:
@@ -374,12 +367,7 @@ def contains_invalid_mxid_characters(
     Returns:
         True if there are any naughty characters
     """
-    allowed_characters = (
-        extended_mxid_localpart_allowed_characters
-        if use_extended_character_set
-        else mxid_localpart_allowed_characters
-    )
-    return any(c not in allowed_characters for c in localpart)
+    return any(c not in MXID_LOCALPART_ALLOWED_CHARACTERS for c in localpart)
 
 
 UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
@@ -396,7 +384,7 @@ UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
 #    bytes rather than strings
 #
 NON_MXID_CHARACTER_PATTERN = re.compile(
-    ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode(
+    ("[^%s]" % (re.escape("".join(MXID_LOCALPART_ALLOWED_CHARACTERS - {"="})),)).encode(
         "ascii"
     )
 )
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index 8d8584609b..54eeec228e 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -587,17 +587,16 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         self.assertFalse(self.get_success(d))
 
     def test_invalid_user_id(self) -> None:
-        invalid_user_id = "+abcd"
+        invalid_user_id = "^abcd"
         self.get_failure(
             self.handler.register_user(localpart=invalid_user_id), SynapseError
         )
 
-    @override_config({"experimental_features": {"msc4009_e164_mxids": True}})
-    def text_extended_user_ids(self) -> None:
-        """+ should be allowed according to MSC4009."""
-        valid_user_id = "+1234"
+    def test_special_chars(self) -> None:
+        """Ensure that characters which are allowed in Matrix IDs work."""
+        valid_user_id = "a1234_-./=+"
         user_id = self.get_success(self.handler.register_user(localpart=valid_user_id))
-        self.assertEqual(user_id, valid_user_id)
+        self.assertEqual(user_id, f"@{valid_user_id}:test")
 
     def test_invalid_user_id_length(self) -> None:
         invalid_user_id = "x" * 256
-- 
cgit 1.5.1


From 224ef0b669fdd85925d66deb38ba1b51c5aaa1bd Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Tue, 11 Jul 2023 13:08:06 -0500
Subject: Unix Sockets for HTTP Replication (#15708)

Unix socket support for `federation` and `client` Listeners has existed now for a little while(since [1.81.0](https://github.com/matrix-org/synapse/pull/15353)), but there was one last hold out before it could be complete: HTTP Replication communication. This should finish it up. The Listeners would have always worked, but would have had no way to be talked to/at.

---------

Co-authored-by: Eric Eastwood <madlittlemods@gmail.com>
Co-authored-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
Co-authored-by: Eric Eastwood <erice@element.io>
---
 changelog.d/15708.feature                        |   1 +
 docker/conf-workers/nginx.conf.j2                |   4 +
 docker/conf-workers/shared.yaml.j2               |   3 +
 docker/conf-workers/supervisord.conf.j2          |   4 +
 docker/conf-workers/worker.yaml.j2               |   4 +
 docker/conf/homeserver.yaml                      |  10 ++-
 docker/configure_workers_and_start.py            | 104 +++++++++++++++++------
 docs/development/contributing_guide.md           |   1 +
 docs/usage/configuration/config_documentation.md |  52 +++++++++++-
 docs/workers.md                                  |   9 +-
 scripts-dev/complement.sh                        |   4 +
 synapse/config/workers.py                        |  24 +++++-
 synapse/http/replicationagent.py                 |  47 ++++++----
 synapse/logging/opentracing.py                   |   6 +-
 tests/replication/_base.py                       |   7 +-
 tests/server.py                                  |  32 ++++++-
 16 files changed, 260 insertions(+), 52 deletions(-)
 create mode 100644 changelog.d/15708.feature

(limited to 'synapse')

diff --git a/changelog.d/15708.feature b/changelog.d/15708.feature
new file mode 100644
index 0000000000..06a6c959ab
--- /dev/null
+++ b/changelog.d/15708.feature
@@ -0,0 +1 @@
+Add Unix Socket support for HTTP Replication Listeners. Document and provide usage instructions for utilizing Unix sockets in Synapse. Contributed by Jason Little.
diff --git a/docker/conf-workers/nginx.conf.j2 b/docker/conf-workers/nginx.conf.j2
index 967fc65e79..d1e02af723 100644
--- a/docker/conf-workers/nginx.conf.j2
+++ b/docker/conf-workers/nginx.conf.j2
@@ -35,7 +35,11 @@ server {
 
     # Send all other traffic to the main process
     location ~* ^(\\/_matrix|\\/_synapse) {
+{% if using_unix_sockets %}
+        proxy_pass http://unix:/run/main_public.sock;
+{% else %}
         proxy_pass http://localhost:8080;
+{% endif %}
         proxy_set_header X-Forwarded-For $remote_addr;
         proxy_set_header X-Forwarded-Proto $scheme;
         proxy_set_header Host $host;
diff --git a/docker/conf-workers/shared.yaml.j2 b/docker/conf-workers/shared.yaml.j2
index 92d25386dc..1dfc60ad11 100644
--- a/docker/conf-workers/shared.yaml.j2
+++ b/docker/conf-workers/shared.yaml.j2
@@ -6,6 +6,9 @@
 {% if enable_redis %}
 redis:
     enabled: true
+    {% if using_unix_sockets %}
+    path: /tmp/redis.sock
+    {% endif %}
 {% endif %}
 
 {% if appservice_registrations is not none %}
diff --git a/docker/conf-workers/supervisord.conf.j2 b/docker/conf-workers/supervisord.conf.j2
index 9f1e03cfc0..da93358051 100644
--- a/docker/conf-workers/supervisord.conf.j2
+++ b/docker/conf-workers/supervisord.conf.j2
@@ -19,7 +19,11 @@ username=www-data
 autorestart=true
 
 [program:redis]
+{% if using_unix_sockets %}
+command=/usr/local/bin/prefix-log /usr/local/bin/redis-server --unixsocket /tmp/redis.sock
+{% else %}
 command=/usr/local/bin/prefix-log /usr/local/bin/redis-server
+{% endif %}
 priority=1
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
diff --git a/docker/conf-workers/worker.yaml.j2 b/docker/conf-workers/worker.yaml.j2
index 44c6e413cf..29ec74b4ea 100644
--- a/docker/conf-workers/worker.yaml.j2
+++ b/docker/conf-workers/worker.yaml.j2
@@ -8,7 +8,11 @@ worker_name: "{{ name }}"
 
 worker_listeners:
   - type: http
+{% if using_unix_sockets %}
+    path: "/run/worker.{{ port }}"
+{% else %}
     port: {{ port }}
+{% endif %}
 {% if listener_resources %}
     resources:
       - names:
diff --git a/docker/conf/homeserver.yaml b/docker/conf/homeserver.yaml
index f10f78a48c..c46b955d63 100644
--- a/docker/conf/homeserver.yaml
+++ b/docker/conf/homeserver.yaml
@@ -36,12 +36,17 @@ listeners:
 
   # Allow configuring in case we want to reverse proxy 8008
   # using another process in the same container
+{% if SYNAPSE_USE_UNIX_SOCKET %}
+  # Unix sockets don't care about TLS or IP addresses or ports
+  - path: '/run/main_public.sock'
+    type: http
+{% else %}
   - port: {{ SYNAPSE_HTTP_PORT or 8008 }}
     tls: false
     bind_addresses: ['::']
     type: http
     x_forwarded: false
-
+{% endif %}
     resources:
       - names: [client]
         compress: true
@@ -57,8 +62,11 @@ database:
     user: "{{ POSTGRES_USER or "synapse" }}"
     password: "{{ POSTGRES_PASSWORD }}"
     database: "{{ POSTGRES_DB or "synapse" }}"
+{% if not SYNAPSE_USE_UNIX_SOCKET %}
+{# Synapse will use a default unix socket for Postgres when host/port is not specified (behavior from `psycopg2`). #}
     host: "{{ POSTGRES_HOST or "db" }}"
     port: "{{ POSTGRES_PORT or "5432" }}"
+{% endif %}
     cp_min: 5
     cp_max: 10
 {% else %}
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 62fb88daab..dc824038b5 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -74,6 +74,9 @@ MAIN_PROCESS_HTTP_LISTENER_PORT = 8080
 MAIN_PROCESS_INSTANCE_NAME = "main"
 MAIN_PROCESS_LOCALHOST_ADDRESS = "127.0.0.1"
 MAIN_PROCESS_REPLICATION_PORT = 9093
+# Obviously, these would only be used with the UNIX socket option
+MAIN_PROCESS_UNIX_SOCKET_PUBLIC_PATH = "/run/main_public.sock"
+MAIN_PROCESS_UNIX_SOCKET_PRIVATE_PATH = "/run/main_private.sock"
 
 # A simple name used as a placeholder in the WORKERS_CONFIG below. This will be replaced
 # during processing with the name of the worker.
@@ -407,11 +410,15 @@ def add_worker_roles_to_shared_config(
         )
 
         # Map of stream writer instance names to host/ports combos
-        instance_map[worker_name] = {
-            "host": "localhost",
-            "port": worker_port,
-        }
-
+        if os.environ.get("SYNAPSE_USE_UNIX_SOCKET", False):
+            instance_map[worker_name] = {
+                "path": f"/run/worker.{worker_port}",
+            }
+        else:
+            instance_map[worker_name] = {
+                "host": "localhost",
+                "port": worker_port,
+            }
     # Update the list of stream writers. It's convenient that the name of the worker
     # type is the same as the stream to write. Iterate over the whole list in case there
     # is more than one.
@@ -423,10 +430,15 @@ def add_worker_roles_to_shared_config(
 
             # Map of stream writer instance names to host/ports combos
             # For now, all stream writers need http replication ports
-            instance_map[worker_name] = {
-                "host": "localhost",
-                "port": worker_port,
-            }
+            if os.environ.get("SYNAPSE_USE_UNIX_SOCKET", False):
+                instance_map[worker_name] = {
+                    "path": f"/run/worker.{worker_port}",
+                }
+            else:
+                instance_map[worker_name] = {
+                    "host": "localhost",
+                    "port": worker_port,
+                }
 
 
 def merge_worker_template_configs(
@@ -718,17 +730,29 @@ def generate_worker_files(
     # Note that yaml cares about indentation, so care should be taken to insert lines
     # into files at the correct indentation below.
 
+    # Convenience helper for if using unix sockets instead of host:port
+    using_unix_sockets = environ.get("SYNAPSE_USE_UNIX_SOCKET", False)
     # First read the original config file and extract the listeners block. Then we'll
     # add another listener for replication. Later we'll write out the result to the
     # shared config file.
-    listeners = [
-        {
-            "port": MAIN_PROCESS_REPLICATION_PORT,
-            "bind_address": MAIN_PROCESS_LOCALHOST_ADDRESS,
-            "type": "http",
-            "resources": [{"names": ["replication"]}],
-        }
-    ]
+    listeners: List[Any]
+    if using_unix_sockets:
+        listeners = [
+            {
+                "path": MAIN_PROCESS_UNIX_SOCKET_PRIVATE_PATH,
+                "type": "http",
+                "resources": [{"names": ["replication"]}],
+            }
+        ]
+    else:
+        listeners = [
+            {
+                "port": MAIN_PROCESS_REPLICATION_PORT,
+                "bind_address": MAIN_PROCESS_LOCALHOST_ADDRESS,
+                "type": "http",
+                "resources": [{"names": ["replication"]}],
+            }
+        ]
     with open(config_path) as file_stream:
         original_config = yaml.safe_load(file_stream)
         original_listeners = original_config.get("listeners")
@@ -769,7 +793,17 @@ def generate_worker_files(
 
     # A list of internal endpoints to healthcheck, starting with the main process
     # which exists even if no workers do.
-    healthcheck_urls = ["http://localhost:8080/health"]
+    # This list ends up being part of the command line to curl, (curl added support for
+    # Unix sockets in version 7.40).
+    if using_unix_sockets:
+        healthcheck_urls = [
+            f"--unix-socket {MAIN_PROCESS_UNIX_SOCKET_PUBLIC_PATH} "
+            # The scheme and hostname from the following URL are ignored.
+            # The only thing that matters is the path `/health`
+            "http://localhost/health"
+        ]
+    else:
+        healthcheck_urls = ["http://localhost:8080/health"]
 
     # Get the set of all worker types that we have configured
     all_worker_types_in_use = set(chain(*requested_worker_types.values()))
@@ -806,8 +840,12 @@ def generate_worker_files(
         # given worker_type needs to stay assigned and not be replaced.
         worker_config["shared_extra_conf"].update(shared_config)
         shared_config = worker_config["shared_extra_conf"]
-
-        healthcheck_urls.append("http://localhost:%d/health" % (worker_port,))
+        if using_unix_sockets:
+            healthcheck_urls.append(
+                f"--unix-socket /run/worker.{worker_port} http://localhost/health"
+            )
+        else:
+            healthcheck_urls.append("http://localhost:%d/health" % (worker_port,))
 
         # Update the shared config with sharding-related options if necessary
         add_worker_roles_to_shared_config(
@@ -826,6 +864,7 @@ def generate_worker_files(
             "/conf/workers/{name}.yaml".format(name=worker_name),
             **worker_config,
             worker_log_config_filepath=log_config_filepath,
+            using_unix_sockets=using_unix_sockets,
         )
 
         # Save this worker's port number to the correct nginx upstreams
@@ -846,8 +885,13 @@ def generate_worker_files(
     nginx_upstream_config = ""
     for upstream_worker_base_name, upstream_worker_ports in nginx_upstreams.items():
         body = ""
-        for port in upstream_worker_ports:
-            body += f"    server localhost:{port};\n"
+        if using_unix_sockets:
+            for port in upstream_worker_ports:
+                body += f"    server unix:/run/worker.{port};\n"
+
+        else:
+            for port in upstream_worker_ports:
+                body += f"    server localhost:{port};\n"
 
         # Add to the list of configured upstreams
         nginx_upstream_config += NGINX_UPSTREAM_CONFIG_BLOCK.format(
@@ -877,10 +921,15 @@ def generate_worker_files(
     # If there are workers, add the main process to the instance_map too.
     if workers_in_use:
         instance_map = shared_config.setdefault("instance_map", {})
-        instance_map[MAIN_PROCESS_INSTANCE_NAME] = {
-            "host": MAIN_PROCESS_LOCALHOST_ADDRESS,
-            "port": MAIN_PROCESS_REPLICATION_PORT,
-        }
+        if using_unix_sockets:
+            instance_map[MAIN_PROCESS_INSTANCE_NAME] = {
+                "path": MAIN_PROCESS_UNIX_SOCKET_PRIVATE_PATH,
+            }
+        else:
+            instance_map[MAIN_PROCESS_INSTANCE_NAME] = {
+                "host": MAIN_PROCESS_LOCALHOST_ADDRESS,
+                "port": MAIN_PROCESS_REPLICATION_PORT,
+            }
 
     # Shared homeserver config
     convert(
@@ -890,6 +939,7 @@ def generate_worker_files(
         appservice_registrations=appservice_registrations,
         enable_redis=workers_in_use,
         workers_in_use=workers_in_use,
+        using_unix_sockets=using_unix_sockets,
     )
 
     # Nginx config
@@ -900,6 +950,7 @@ def generate_worker_files(
         upstream_directives=nginx_upstream_config,
         tls_cert_path=os.environ.get("SYNAPSE_TLS_CERT"),
         tls_key_path=os.environ.get("SYNAPSE_TLS_KEY"),
+        using_unix_sockets=using_unix_sockets,
     )
 
     # Supervisord config
@@ -909,6 +960,7 @@ def generate_worker_files(
         "/etc/supervisor/supervisord.conf",
         main_config_path=config_path,
         enable_redis=workers_in_use,
+        using_unix_sockets=using_unix_sockets,
     )
 
     convert(
diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md
index e9210b1776..698687b91f 100644
--- a/docs/development/contributing_guide.md
+++ b/docs/development/contributing_guide.md
@@ -370,6 +370,7 @@ The above will run a monolithic (single-process) Synapse with SQLite as the data
     See the [worker documentation](../workers.md) for additional information on workers.
 - Passing `ASYNCIO_REACTOR=1` as an environment variable to use the Twisted asyncio reactor instead of the default one.
 - Passing `PODMAN=1` will use the [podman](https://podman.io/) container runtime, instead of docker.
+- Passing `UNIX_SOCKETS=1` will utilise Unix socket functionality for Synapse, Redis, and Postgres(when applicable).
 
 To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`, e.g:
 ```sh
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index ff59cbccc1..d9286e83bc 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -462,6 +462,20 @@ See the docs [request log format](../administration/request_log.md).
 * `additional_resources`: Only valid for an 'http' listener. A map of
    additional endpoints which should be loaded via dynamic modules.
 
+Unix socket support (_Added in Synapse 1.88.0_):
+* `path`: A path and filename for a Unix socket. Make sure it is located in a
+  directory with read and write permissions, and that it already exists (the directory
+  will not be created). Defaults to `None`.
+  * **Note**: The use of both `path` and `port` options for the same `listener` is not
+    compatible.
+  * The `x_forwarded` option defaults to true  when using Unix sockets and can be omitted.
+  * Other options that would not make sense to use with a UNIX socket, such as 
+    `bind_addresses` and `tls` will be ignored and can be removed.
+* `mode`: The file permissions to set on the UNIX socket. Defaults to `666`
+* **Note:** Must be set as `type: http` (does not support `metrics` and `manhole`). 
+  Also make sure that `metrics` is not included in `resources` -> `names`
+
+
 Valid resource names are:
 
 * `client`: the client-server API (/_matrix/client), and the synapse admin API (/_synapse/admin). Also implies `media` and `static`.
@@ -474,7 +488,7 @@ Valid resource names are:
 
 * `media`: the media API (/_matrix/media).
 
-* `metrics`: the metrics interface. See [here](../../metrics-howto.md).
+* `metrics`: the metrics interface. See [here](../../metrics-howto.md). (Not compatible with Unix sockets)
 
 * `openid`: OpenID authentication. See [here](../../openid.md).
 
@@ -533,6 +547,22 @@ listeners:
     bind_addresses: ['::1', '127.0.0.1']
     type: manhole
 ```
+Example configuration #3:
+```yaml
+listeners:
+  # Unix socket listener: Ideal for Synapse deployments behind a reverse proxy, offering
+  # lightweight interprocess communication without TCP/IP overhead, avoid port
+  # conflicts, and providing enhanced security through system file permissions.
+  #
+  # Note that x_forwarded will default to true, when using a UNIX socket. Please see
+  # https://matrix-org.github.io/synapse/latest/reverse_proxy.html.
+  #
+  - path: /var/run/synapse/main_public.sock
+    type: http
+    resources:
+      - names: [client, federation]
+```
+
 ---
 ### `manhole_settings`
 
@@ -3949,6 +3979,14 @@ instance_map:
     host: localhost
     port: 8034
 ```
+Example configuration(#2, for UNIX sockets):
+```yaml
+instance_map:
+  main:
+    path: /var/run/synapse/main_replication.sock
+  worker1:
+    path: /var/run/synapse/worker1_replication.sock
+```
 ---
 ### `stream_writers`
 
@@ -4108,6 +4146,18 @@ worker_listeners:
     resources:
       - names: [client, federation]
 ```
+Example configuration(#2, using UNIX sockets with a `replication` listener):
+```yaml
+worker_listeners:
+  - type: http
+    path: /var/run/synapse/worker_public.sock
+    resources:
+      - names: [client, federation]
+  - type: http
+    path: /var/run/synapse/worker_replication.sock
+    resources:
+      - names: [replication]
+```
 ---
 ### `worker_manhole`
 
diff --git a/docs/workers.md b/docs/workers.md
index 828f082e75..735cd3f18d 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -95,9 +95,12 @@ for the main process
 * Secondly, you need to enable
 [redis-based replication](usage/configuration/config_documentation.md#redis)
 * You will need to add an [`instance_map`](usage/configuration/config_documentation.md#instance_map) 
-with the `main` process defined, as well as the relevant connection information from 
-it's HTTP `replication` listener (defined in step 1 above). Note that the `host` defined 
-is the address the worker needs to look for the `main` process at, not necessarily the same address that is bound to.
+with the `main` process defined, as well as the relevant connection information from
+it's HTTP `replication` listener (defined in step 1 above).
+  * Note that the `host` defined is the address the worker needs to look for the `main`
+  process at, not necessarily the same address that is bound to.
+  * If you are using Unix sockets for the `replication` resource, make sure to
+  use a `path` to the socket file instead of a `port`.
 * Optionally, a [shared secret](usage/configuration/config_documentation.md#worker_replication_secret)
 can be used to authenticate HTTP traffic between workers. For example:
 
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index 24b83cfeb6..fea76cb5af 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -253,6 +253,10 @@ if [[ -n "$ASYNCIO_REACTOR" ]]; then
   export PASS_SYNAPSE_COMPLEMENT_USE_ASYNCIO_REACTOR=true
 fi
 
+if [[ -n "$UNIX_SOCKETS" ]]; then
+  # Enable full on Unix socket mode for Synapse, Redis and Postgresql
+  export PASS_SYNAPSE_USE_UNIX_SOCKET=1
+fi
 
 if [[ -n "$SYNAPSE_TEST_LOG_LEVEL" ]]; then
   # Set the log level to what is desired
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index ccfe75eaf3..e55ca12a36 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -94,7 +94,7 @@ class ConfigModel(BaseModel):
         allow_mutation = False
 
 
-class InstanceLocationConfig(ConfigModel):
+class InstanceTcpLocationConfig(ConfigModel):
     """The host and port to talk to an instance via HTTP replication."""
 
     host: StrictStr
@@ -110,6 +110,23 @@ class InstanceLocationConfig(ConfigModel):
         return f"{self.host}:{self.port}"
 
 
+class InstanceUnixLocationConfig(ConfigModel):
+    """The socket file to talk to an instance via HTTP replication."""
+
+    path: StrictStr
+
+    def scheme(self) -> str:
+        """Hardcode a retrievable scheme"""
+        return "unix"
+
+    def netloc(self) -> str:
+        """Nicely format the address location data"""
+        return f"{self.path}"
+
+
+InstanceLocationConfig = Union[InstanceTcpLocationConfig, InstanceUnixLocationConfig]
+
+
 @attr.s
 class WriterLocations:
     """Specifies the instances that write various streams.
@@ -270,9 +287,12 @@ class WorkerConfig(Config):
                     % MAIN_PROCESS_INSTANCE_MAP_NAME
                 )
 
+        # type-ignore: the expression `Union[A, B]` is not a Type[Union[A, B]] currently
         self.instance_map: Dict[
             str, InstanceLocationConfig
-        ] = parse_and_validate_mapping(instance_map, InstanceLocationConfig)
+        ] = parse_and_validate_mapping(
+            instance_map, InstanceLocationConfig  # type: ignore[arg-type]
+        )
 
         # Map from type of streams to source, c.f. WriterLocations.
         writers = config.get("stream_writers") or {}
diff --git a/synapse/http/replicationagent.py b/synapse/http/replicationagent.py
index d6ba6f0e57..3ba2f22dfd 100644
--- a/synapse/http/replicationagent.py
+++ b/synapse/http/replicationagent.py
@@ -18,7 +18,11 @@ from typing import Dict, Optional
 from zope.interface import implementer
 
 from twisted.internet import defer
-from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
+from twisted.internet.endpoints import (
+    HostnameEndpoint,
+    UNIXClientEndpoint,
+    wrapClientTLS,
+)
 from twisted.internet.interfaces import IStreamClientEndpoint
 from twisted.python.failure import Failure
 from twisted.web.client import URI, HTTPConnectionPool, _AgentBase
@@ -32,7 +36,11 @@ from twisted.web.iweb import (
     IResponse,
 )
 
-from synapse.config.workers import InstanceLocationConfig
+from synapse.config.workers import (
+    InstanceLocationConfig,
+    InstanceTcpLocationConfig,
+    InstanceUnixLocationConfig,
+)
 from synapse.types import ISynapseReactor
 
 logger = logging.getLogger(__name__)
@@ -40,7 +48,7 @@ logger = logging.getLogger(__name__)
 
 @implementer(IAgentEndpointFactory)
 class ReplicationEndpointFactory:
-    """Connect to a given TCP socket"""
+    """Connect to a given TCP or UNIX socket"""
 
     def __init__(
         self,
@@ -64,24 +72,27 @@ class ReplicationEndpointFactory:
         # The given URI has a special scheme and includes the worker name. The
         # actual connection details are pulled from the instance map.
         worker_name = uri.netloc.decode("utf-8")
-        scheme = self.instance_map[worker_name].scheme()
+        location_config = self.instance_map[worker_name]
+        scheme = location_config.scheme()
 
-        if scheme in ("http", "https"):
+        if isinstance(location_config, InstanceTcpLocationConfig):
             endpoint = HostnameEndpoint(
                 self.reactor,
-                self.instance_map[worker_name].host,
-                self.instance_map[worker_name].port,
+                location_config.host,
+                location_config.port,
             )
             if scheme == "https":
                 endpoint = wrapClientTLS(
                     # The 'port' argument below isn't actually used by the function
                     self.context_factory.creatorForNetloc(
-                        self.instance_map[worker_name].host.encode("utf-8"),
-                        self.instance_map[worker_name].port,
+                        location_config.host.encode("utf-8"),
+                        location_config.port,
                     ),
                     endpoint,
                 )
             return endpoint
+        elif isinstance(location_config, InstanceUnixLocationConfig):
+            return UNIXClientEndpoint(self.reactor, location_config.path)
         else:
             raise SchemeNotSupported(f"Unsupported scheme: {scheme}")
 
@@ -138,13 +149,16 @@ class ReplicationAgent(_AgentBase):
         An existing connection from the connection pool may be used or a new
         one may be created.
 
-        Currently, HTTP and HTTPS schemes are supported in uri.
+        Currently, HTTP, HTTPS and UNIX schemes are supported in uri.
 
         This is copied from twisted.web.client.Agent, except:
 
-        * It uses a different pool key (combining the host & port).
-        * It does not call _ensureValidURI(...) since it breaks on some
-          UNIX paths.
+        * It uses a different pool key (combining the scheme with either host & port or
+          socket path).
+        * It does not call _ensureValidURI(...) as the strictness of IDNA2008 is not
+          required when using a worker's name as a 'hostname' for Synapse HTTP
+          Replication machinery. Specifically, this allows a range of ascii characters
+          such as '+' and '_' in hostnames/worker's names.
 
         See: twisted.web.iweb.IAgent.request
         """
@@ -154,9 +168,12 @@ class ReplicationAgent(_AgentBase):
         except SchemeNotSupported:
             return defer.fail(Failure())
 
+        worker_name = parsedURI.netloc.decode("utf-8")
+        key_scheme = self._endpointFactory.instance_map[worker_name].scheme()
+        key_netloc = self._endpointFactory.instance_map[worker_name].netloc()
         # This sets the Pool key to be:
-        #  (http(s), <host:ip>)
-        key = (parsedURI.scheme, parsedURI.netloc)
+        #  (http(s), <host:port>) or (unix, <socket_path>)
+        key = (key_scheme, key_netloc)
 
         # _requestWithEndpoint comes from _AgentBase class
         return self._requestWithEndpoint(
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 75217e3f45..be910128aa 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -1070,7 +1070,7 @@ def trace_servlet(
         tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER,
         tags.HTTP_METHOD: request.get_method(),
         tags.HTTP_URL: request.get_redacted_uri(),
-        tags.PEER_HOST_IPV6: request.getClientAddress().host,
+        tags.PEER_HOST_IPV6: request.get_client_ip_if_available(),
     }
 
     request_name = request.request_metrics.name
@@ -1091,9 +1091,11 @@ def trace_servlet(
             # with JsonResource).
             scope.span.set_operation_name(request.request_metrics.name)
 
+            # Mypy seems to think that start_context.tag below can be Optional[str], but
+            # that doesn't appear to be correct and works in practice.
             request_tags[
                 SynapseTags.REQUEST_TAG
-            ] = request.request_metrics.start_context.tag
+            ] = request.request_metrics.start_context.tag  # type: ignore[assignment]
 
             # set the tags *after* the servlet completes, in case it decided to
             # prioritise the span (tags will get dropped on unprioritised spans)
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index eb9b1f1cd9..39aadb9ed5 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -22,6 +22,7 @@ from twisted.test.proto_helpers import MemoryReactor
 from twisted.web.resource import Resource
 
 from synapse.app.generic_worker import GenericWorkerServer
+from synapse.config.workers import InstanceTcpLocationConfig, InstanceUnixLocationConfig
 from synapse.http.site import SynapseRequest, SynapseSite
 from synapse.replication.http import ReplicationRestResource
 from synapse.replication.tcp.client import ReplicationDataHandler
@@ -339,7 +340,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
         # `_handle_http_replication_attempt` like we do with the master HS.
         instance_name = worker_hs.get_instance_name()
         instance_loc = worker_hs.config.worker.instance_map.get(instance_name)
-        if instance_loc:
+        if instance_loc and isinstance(instance_loc, InstanceTcpLocationConfig):
             # Ensure the host is one that has a fake DNS entry.
             if instance_loc.host not in self.reactor.lookups:
                 raise Exception(
@@ -360,6 +361,10 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
                 instance_loc.port,
                 lambda: self._handle_http_replication_attempt(worker_hs, port),
             )
+        elif instance_loc and isinstance(instance_loc, InstanceUnixLocationConfig):
+            raise Exception(
+                "Unix sockets are not supported for unit tests at this time."
+            )
 
         store = worker_hs.get_datastores().main
         store.db_pool._db_pool = self.database_pool._db_pool
diff --git a/tests/server.py b/tests/server.py
index a12c3e3b9a..c84a524e8c 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -53,6 +53,7 @@ from twisted.internet.interfaces import (
     IConnector,
     IConsumer,
     IHostnameResolver,
+    IListeningPort,
     IProducer,
     IProtocol,
     IPullProducer,
@@ -62,7 +63,7 @@ from twisted.internet.interfaces import (
     IResolverSimple,
     ITransport,
 )
-from twisted.internet.protocol import ClientFactory, DatagramProtocol
+from twisted.internet.protocol import ClientFactory, DatagramProtocol, Factory
 from twisted.python import threadpool
 from twisted.python.failure import Failure
 from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock
@@ -523,6 +524,35 @@ class ThreadedMemoryReactorClock(MemoryReactorClock):
         """
         self._tcp_callbacks[(host, port)] = callback
 
+    def connectUNIX(
+        self,
+        address: str,
+        factory: ClientFactory,
+        timeout: float = 30,
+        checkPID: int = 0,
+    ) -> IConnector:
+        """
+        Unix sockets aren't supported for unit tests yet. Make it obvious to any
+        developer trying it out that they will need to do some work before being able
+        to use it in tests.
+        """
+        raise Exception("Unix sockets are not implemented for tests yet, sorry.")
+
+    def listenUNIX(
+        self,
+        address: str,
+        factory: Factory,
+        backlog: int = 50,
+        mode: int = 0o666,
+        wantPID: int = 0,
+    ) -> IListeningPort:
+        """
+        Unix sockets aren't supported for unit tests yet. Make it obvious to any
+        developer trying it out that they will need to do some work before being able
+        to use it in tests.
+        """
+        raise Exception("Unix sockets are not implemented for tests, sorry")
+
     def connectTCP(
         self,
         host: str,
-- 
cgit 1.5.1


From ae391db777af0146ad8b50fa5bcbd7cc39c0d886 Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 11 Jul 2023 17:12:41 -0500
Subject: Better warning in logs when we fail to fetch an alias (#15922)

**Before:**
```
Error retrieving alias
```

**After:**
```
Error retrieving alias #foo:bar -> 401 Unauthorized
```

*Spawning from creating the [manual testing strategy for the outbound federation proxy](https://github.com/matrix-org/synapse/pull/15773).*
---
 changelog.d/15922.misc        | 1 +
 synapse/handlers/directory.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15922.misc

(limited to 'synapse')

diff --git a/changelog.d/15922.misc b/changelog.d/15922.misc
new file mode 100644
index 0000000000..93fc644877
--- /dev/null
+++ b/changelog.d/15922.misc
@@ -0,0 +1 @@
+Add details to warning in log when we fail to fetch an alias.
diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py
index 1e0623c7f8..623a4e7b1d 100644
--- a/synapse/handlers/directory.py
+++ b/synapse/handlers/directory.py
@@ -277,7 +277,9 @@ class DirectoryHandler:
             except RequestSendFailed:
                 raise SynapseError(502, "Failed to fetch alias")
             except CodeMessageException as e:
-                logging.warning("Error retrieving alias")
+                logging.warning(
+                    "Error retrieving alias %s -> %s %s", room_alias, e.code, e.msg
+                )
                 if e.code == 404:
                     fed_result = None
                 else:
-- 
cgit 1.5.1


From 8eb7bb975eed0250aa8be5e8fb70c586cbff6b37 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 12 Jul 2023 11:09:13 +0200
Subject: Mark get_user_in_directory private since only used in tests (#15884)

---
 changelog.d/15884.misc                           |  1 +
 synapse/storage/databases/main/user_directory.py |  9 +--------
 tests/handlers/test_user_directory.py            | 18 +++++++++---------
 tests/rest/admin/test_user.py                    |  6 +++---
 4 files changed, 14 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/15884.misc

(limited to 'synapse')

diff --git a/changelog.d/15884.misc b/changelog.d/15884.misc
new file mode 100644
index 0000000000..8e73a9a6cd
--- /dev/null
+++ b/changelog.d/15884.misc
@@ -0,0 +1 @@
+Mark `get_user_in_directory` private since it is only used in tests. Also remove the cache from it.
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index b0a06baf4f..924022c95c 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -62,7 +62,6 @@ from synapse.types import (
     get_domain_from_id,
     get_localpart_from_id,
 )
-from synapse.util.caches.descriptors import cached
 
 logger = logging.getLogger(__name__)
 
@@ -771,9 +770,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
             # This should be unreachable.
             raise Exception("Unrecognized database engine")
 
-        for p in profiles:
-            txn.call_after(self.get_user_in_directory.invalidate, (p.user_id,))
-
     async def add_users_who_share_private_room(
         self, room_id: str, user_id_tuples: Iterable[Tuple[str, str]]
     ) -> None:
@@ -831,14 +827,12 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
             txn.execute(f"{truncate} user_directory_search")
             txn.execute(f"{truncate} users_in_public_rooms")
             txn.execute(f"{truncate} users_who_share_private_rooms")
-            txn.call_after(self.get_user_in_directory.invalidate_all)
 
         await self.db_pool.runInteraction(
             "delete_all_from_user_dir", _delete_all_from_user_dir_txn
         )
 
-    @cached()
-    async def get_user_in_directory(self, user_id: str) -> Optional[Mapping[str, str]]:
+    async def _get_user_in_directory(self, user_id: str) -> Optional[Mapping[str, str]]:
         return await self.db_pool.simple_select_one(
             table="user_directory",
             keyvalues={"user_id": user_id},
@@ -900,7 +894,6 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 table="users_who_share_private_rooms",
                 keyvalues={"other_user_id": user_id},
             )
-            txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
 
         await self.db_pool.runInteraction(
             "remove_from_user_dir", _remove_from_user_dir_txn
diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py
index 15a7dc6818..9785dd698b 100644
--- a/tests/handlers/test_user_directory.py
+++ b/tests/handlers/test_user_directory.py
@@ -356,7 +356,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
                 support_user_id, ProfileInfo("I love support me", None)
             )
         )
-        profile = self.get_success(self.store.get_user_in_directory(support_user_id))
+        profile = self.get_success(self.store._get_user_in_directory(support_user_id))
         self.assertIsNone(profile)
         display_name = "display_name"
 
@@ -364,7 +364,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         self.get_success(
             self.handler.handle_local_profile_change(regular_user_id, profile_info)
         )
-        profile = self.get_success(self.store.get_user_in_directory(regular_user_id))
+        profile = self.get_success(self.store._get_user_in_directory(regular_user_id))
         assert profile is not None
         self.assertTrue(profile["display_name"] == display_name)
 
@@ -383,7 +383,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         )
 
         # profile is in directory
-        profile = self.get_success(self.store.get_user_in_directory(r_user_id))
+        profile = self.get_success(self.store._get_user_in_directory(r_user_id))
         assert profile is not None
         self.assertTrue(profile["display_name"] == display_name)
 
@@ -392,7 +392,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         self.get_success(self.handler.handle_local_user_deactivated(r_user_id))
 
         # profile is not in directory
-        profile = self.get_success(self.store.get_user_in_directory(r_user_id))
+        profile = self.get_success(self.store._get_user_in_directory(r_user_id))
         self.assertIsNone(profile)
 
         # update profile after deactivation
@@ -401,7 +401,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         )
 
         # profile is furthermore not in directory
-        profile = self.get_success(self.store.get_user_in_directory(r_user_id))
+        profile = self.get_success(self.store._get_user_in_directory(r_user_id))
         self.assertIsNone(profile)
 
     def test_handle_local_profile_change_with_appservice_user(self) -> None:
@@ -411,7 +411,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         )
 
         # profile is not in directory
-        profile = self.get_success(self.store.get_user_in_directory(as_user_id))
+        profile = self.get_success(self.store._get_user_in_directory(as_user_id))
         self.assertIsNone(profile)
 
         # update profile
@@ -421,13 +421,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
         )
 
         # profile is still not in directory
-        profile = self.get_success(self.store.get_user_in_directory(as_user_id))
+        profile = self.get_success(self.store._get_user_in_directory(as_user_id))
         self.assertIsNone(profile)
 
     def test_handle_local_profile_change_with_appservice_sender(self) -> None:
         # profile is not in directory
         profile = self.get_success(
-            self.store.get_user_in_directory(self.appservice.sender)
+            self.store._get_user_in_directory(self.appservice.sender)
         )
         self.assertIsNone(profile)
 
@@ -441,7 +441,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase):
 
         # profile is still not in directory
         profile = self.get_success(
-            self.store.get_user_in_directory(self.appservice.sender)
+            self.store._get_user_in_directory(self.appservice.sender)
         )
         self.assertIsNone(profile)
 
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index a17a1bb1d8..6f7b4bf642 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -2472,7 +2472,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         """
 
         # is in user directory
-        profile = self.get_success(self.store.get_user_in_directory(self.other_user))
+        profile = self.get_success(self.store._get_user_in_directory(self.other_user))
         assert profile is not None
         self.assertTrue(profile["display_name"] == "User")
 
@@ -2489,7 +2489,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertTrue(channel.json_body["deactivated"])
 
         # is not in user directory
-        profile = self.get_success(self.store.get_user_in_directory(self.other_user))
+        profile = self.get_success(self.store._get_user_in_directory(self.other_user))
         self.assertIsNone(profile)
 
         # Set new displayname user
@@ -2506,7 +2506,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertEqual("Foobar", channel.json_body["displayname"])
 
         # is not in user directory
-        profile = self.get_success(self.store.get_user_in_directory(self.other_user))
+        profile = self.get_success(self.store._get_user_in_directory(self.other_user))
         self.assertIsNone(profile)
 
     def test_reactivate_user(self) -> None:
-- 
cgit 1.5.1


From 36c6b92bfc6570b7b8f3d0416ec4a47a3b7846d3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 12 Jul 2023 12:02:11 +0100
Subject: Fix push for invites received over federation (#15820)

---
 changelog.d/15820.bugfix   |  1 +
 synapse/push/push_tools.py | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15820.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15820.bugfix b/changelog.d/15820.bugfix
new file mode 100644
index 0000000000..d259d32061
--- /dev/null
+++ b/changelog.d/15820.bugfix
@@ -0,0 +1 @@
+Fix long-standing bug where remote invites weren't correctly pushed.
diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py
index 7ee07e4bee..a94a6e97c1 100644
--- a/synapse/push/push_tools.py
+++ b/synapse/push/push_tools.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from typing import Dict
 
+from synapse.api.constants import EventTypes, Membership
 from synapse.events import EventBase
 from synapse.push.presentable_names import calculate_room_name, name_from_member_event
 from synapse.storage.controllers import StorageControllers
@@ -49,7 +50,41 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -
 async def get_context_for_event(
     storage: StorageControllers, ev: EventBase, user_id: str
 ) -> Dict[str, str]:
-    ctx = {}
+    ctx: Dict[str, str] = {}
+
+    if ev.internal_metadata.outlier:
+        # We don't have state for outliers, so we can't compute the context
+        # except for invites and knocks. (Such events are known as 'out-of-band
+        # memberships' for the user).
+        if ev.type != EventTypes.Member:
+            return ctx
+
+        # We might be able to pull out the display name for the sender straight
+        # from the membership event
+        event_display_name = ev.content.get("displayname")
+        if event_display_name and ev.state_key == ev.sender:
+            ctx["sender_display_name"] = event_display_name
+
+        room_state = []
+        if ev.content.get("membership") == Membership.INVITE:
+            room_state = ev.unsigned.get("invite_room_state", [])
+        elif ev.content.get("membership") == Membership.KNOCK:
+            room_state = ev.unsigned.get("knock_room_state", [])
+
+        # Ideally we'd reuse the logic in `calculate_room_name`, but that gets
+        # complicated to handle partial events vs pulling events from the DB.
+        for state_dict in room_state:
+            type_tuple = (state_dict["type"], state_dict.get("state_key"))
+            if type_tuple == (EventTypes.Member, ev.sender):
+                display_name = state_dict["content"].get("displayname")
+                if display_name:
+                    ctx["sender_display_name"] = display_name
+            elif type_tuple == (EventTypes.Name, ""):
+                room_name = state_dict["content"].get("name")
+                if room_name:
+                    ctx["name"] = room_name
+
+        return ctx
 
     room_state_ids = await storage.state.get_state_ids_for_event(ev.event_id)
 
-- 
cgit 1.5.1


From 5bdf01fccdee521390a03ea5a148eded7d0ad426 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 12 Jul 2023 08:39:25 -0400
Subject: Fix running with an empty experimental features section. (#15925)

---
 changelog.d/15925.bugfix | 1 +
 synapse/config/auth.py   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15925.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15925.bugfix b/changelog.d/15925.bugfix
new file mode 100644
index 0000000000..e3ef783576
--- /dev/null
+++ b/changelog.d/15925.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.86.0 where Synapse starting with an empty `experimental_features` configuration setting.
diff --git a/synapse/config/auth.py b/synapse/config/auth.py
index c7ab428f28..3b4c77f572 100644
--- a/synapse/config/auth.py
+++ b/synapse/config/auth.py
@@ -31,7 +31,7 @@ class AuthConfig(Config):
 
         # The default value of password_config.enabled is True, unless msc3861 is enabled.
         msc3861_enabled = (
-            config.get("experimental_features", {})
+            (config.get("experimental_features") or {})
             .get("msc3861", {})
             .get("enabled", False)
         )
-- 
cgit 1.5.1


From 204b66c203564a019f1ecb4fb3909bfb375ce615 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 12 Jul 2023 10:30:05 -0400
Subject: Remove unneeded __init__. (#15926)

Remove an __init__ which only calls super() without changing the
input arguments.
---
 changelog.d/15926.misc                            | 1 +
 synapse/federation/transport/server/federation.py | 9 ---------
 2 files changed, 1 insertion(+), 9 deletions(-)
 create mode 100644 changelog.d/15926.misc

(limited to 'synapse')

diff --git a/changelog.d/15926.misc b/changelog.d/15926.misc
new file mode 100644
index 0000000000..bf4c0fa5d0
--- /dev/null
+++ b/changelog.d/15926.misc
@@ -0,0 +1 @@
+Remove unneeded `__init__`.
diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py
index 3a744e25be..3248953b48 100644
--- a/synapse/federation/transport/server/federation.py
+++ b/synapse/federation/transport/server/federation.py
@@ -432,15 +432,6 @@ class FederationV2SendJoinServlet(BaseFederationServerServlet):
 
     PREFIX = FEDERATION_V2_PREFIX
 
-    def __init__(
-        self,
-        hs: "HomeServer",
-        authenticator: Authenticator,
-        ratelimiter: FederationRateLimiter,
-        server_name: str,
-    ):
-        super().__init__(hs, authenticator, ratelimiter, server_name)
-
     async def on_PUT(
         self,
         origin: str,
-- 
cgit 1.5.1


From 20ae617d1417f8dd52e20b3a20cb01b4c2fd87c9 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 13 Jul 2023 07:23:56 -0400
Subject: Stop accepting 'user' parameter for application service registration.
 (#15928)

This is unspecced, but has existed for a very long time.
---
 changelog.d/15928.removal       |  1 +
 docs/upgrade.md                 | 10 ++++++++++
 synapse/rest/client/register.py | 12 ++++--------
 3 files changed, 15 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/15928.removal

(limited to 'synapse')

diff --git a/changelog.d/15928.removal b/changelog.d/15928.removal
new file mode 100644
index 0000000000..5563213d31
--- /dev/null
+++ b/changelog.d/15928.removal
@@ -0,0 +1 @@
+Remove support for calling the `/register` endpoint with an unspecced `user` property for application services.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index b94d13c4da..5dde6c769e 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,16 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.89.0
+
+## Removal of unspecced `user` property for `/register`
+
+Application services can no longer call `/register` with a `user` property to create new users.
+The standard `username` property should be used instead. See the
+[Application Service specification](https://spec.matrix.org/v1.7/application-service-api/#server-admin-style-permissions)
+for more information.
+
+
 # Upgrading to v1.88.0
 
 ## Minimum supported Python version
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index d59669f0b6..77e3b91b79 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -462,9 +462,9 @@ class RegisterRestServlet(RestServlet):
         # the auth layer will store these in sessions.
         desired_username = None
         if "username" in body:
-            if not isinstance(body["username"], str) or len(body["username"]) > 512:
-                raise SynapseError(400, "Invalid username")
             desired_username = body["username"]
+            if not isinstance(desired_username, str) or len(desired_username) > 512:
+                raise SynapseError(400, "Invalid username")
 
         # fork off as soon as possible for ASes which have completely
         # different registration flows to normal users
@@ -477,11 +477,6 @@ class RegisterRestServlet(RestServlet):
                     "Appservice token must be provided when using a type of m.login.application_service",
                 )
 
-            # Set the desired user according to the AS API (which uses the
-            # 'user' key not 'username'). Since this is a new addition, we'll
-            # fallback to 'username' if they gave one.
-            desired_username = body.get("user", desired_username)
-
             # XXX we should check that desired_username is valid. Currently
             # we give appservices carte blanche for any insanity in mxids,
             # because the IRC bridges rely on being able to register stupid
@@ -489,7 +484,8 @@ class RegisterRestServlet(RestServlet):
 
             access_token = self.auth.get_access_token_from_request(request)
 
-            if not isinstance(desired_username, str):
+            # Desired username is either a string or None.
+            if desired_username is None:
                 raise SynapseError(400, "Desired Username is missing or not a string")
 
             result = await self._do_appservice_registration(
-- 
cgit 1.5.1


From 1c802de626de3293049206cb788af15cbc8ea17f Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Tue, 18 Jul 2023 03:49:21 -0500
Subject: Re-introduce the outbound federation proxy (#15913)

Allow configuring the set of workers to proxy outbound federation traffic through (`outbound_federation_restricted_to`).

This is useful when you have a worker setup with `federation_sender` instances responsible for sending outbound federation requests and want to make sure *all* outbound federation traffic goes through those instances. Before this change, the generic workers would still contact federation themselves for things like profile lookups, backfill, etc. This PR allows you to set more strict access controls/firewall for all workers and only allow the `federation_sender`'s to contact the outside world.
---
 changelog.d/15913.feature                         |   1 +
 docs/usage/configuration/config_documentation.md  |  33 ++-
 docs/workers.md                                   |  24 ++
 synapse/api/errors.py                             |   7 +
 synapse/app/_base.py                              |   2 +
 synapse/app/generic_worker.py                     |   1 +
 synapse/app/homeserver.py                         |   1 +
 synapse/config/workers.py                         |  45 +++-
 synapse/http/client.py                            |   7 +-
 synapse/http/connectproxyclient.py                |  20 +-
 synapse/http/matrixfederationclient.py            | 142 ++++++++++-
 synapse/http/proxy.py                             | 283 +++++++++++++++++++++
 synapse/http/proxyagent.py                        | 141 ++++++++++-
 synapse/http/server.py                            |  55 +++--
 synapse/http/site.py                              |  27 +-
 tests/app/test_openid_listener.py                 |   8 +-
 tests/handlers/test_device.py                     |   3 +-
 tests/handlers/test_federation.py                 |   2 +-
 tests/handlers/test_presence.py                   |   1 -
 tests/handlers/test_typing.py                     |  10 +
 tests/http/test_matrixfederationclient.py         | 284 +++++++++++++++++++++-
 tests/http/test_proxy.py                          |  53 ++++
 tests/http/test_proxyagent.py                     |   4 +-
 tests/replication/_base.py                        |   3 +-
 tests/replication/test_federation_sender_shard.py |  22 +-
 tests/rest/client/test_presence.py                |   1 -
 tests/rest/client/test_rooms.py                   |   2 -
 tests/storage/test_e2e_room_keys.py               |   2 +-
 tests/storage/test_purge.py                       |   2 +-
 tests/storage/test_rollback_worker.py             |   4 +-
 tests/test_server.py                              |  33 ++-
 tests/unittest.py                                 |   1 +
 32 files changed, 1128 insertions(+), 96 deletions(-)
 create mode 100644 changelog.d/15913.feature
 create mode 100644 synapse/http/proxy.py
 create mode 100644 tests/http/test_proxy.py

(limited to 'synapse')

diff --git a/changelog.d/15913.feature b/changelog.d/15913.feature
new file mode 100644
index 0000000000..0d77fae2dc
--- /dev/null
+++ b/changelog.d/15913.feature
@@ -0,0 +1 @@
+Allow configuring the set of workers to proxy outbound federation traffic through via `outbound_federation_restricted_to`.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 22cd1772dc..4e6fcd085a 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3960,13 +3960,14 @@ federation_sender_instances:
 ---
 ### `instance_map`
 
-When using workers this should be a map from [`worker_name`](#worker_name) to the
-HTTP replication listener of the worker, if configured, and to the main process.
-Each worker declared under [`stream_writers`](../../workers.md#stream-writers) needs
-a HTTP replication listener, and that listener should be included in the `instance_map`.
-The main process also needs an entry on the `instance_map`, and it should be listed under
-`main` **if even one other worker exists**. Ensure the port matches with what is declared 
-inside the `listener` block for a `replication` listener.
+When using workers this should be a map from [`worker_name`](#worker_name) to the HTTP
+replication listener of the worker, if configured, and to the main process. Each worker
+declared under [`stream_writers`](../../workers.md#stream-writers) and
+[`outbound_federation_restricted_to`](#outbound_federation_restricted_to) needs a HTTP
+replication listener, and that listener should be included in the `instance_map`. The
+main process also needs an entry on the `instance_map`, and it should be listed under
+`main` **if even one other worker exists**. Ensure the port matches with what is
+declared inside the `listener` block for a `replication` listener.
 
 
 Example configuration:
@@ -4004,6 +4005,24 @@ stream_writers:
   typing: worker1
 ```
 ---
+### `outbound_federation_restricted_to`
+
+When using workers, you can restrict outbound federation traffic to only go through a
+specific subset of workers. Any worker specified here must also be in the
+[`instance_map`](#instance_map).
+[`worker_replication_secret`](#worker_replication_secret) must also be configured to
+authorize inter-worker communication.
+
+```yaml
+outbound_federation_restricted_to:
+  - federation_sender1
+  - federation_sender2
+```
+
+Also see the [worker
+documentation](../../workers.md#restrict-outbound-federation-traffic-to-a-specific-set-of-workers)
+for more info.
+---
 ### `run_background_tasks_on`
 
 The [worker](../../workers.md#background-tasks) that is used to run
diff --git a/docs/workers.md b/docs/workers.md
index cf9c0add82..24bd22724e 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -531,6 +531,30 @@ the stream writer for the `presence` stream:
 
     ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/
 
+#### Restrict outbound federation traffic to a specific set of workers
+
+The
+[`outbound_federation_restricted_to`](usage/configuration/config_documentation.md#outbound_federation_restricted_to)
+configuration is useful to make sure outbound federation traffic only goes through a
+specified subset of workers. This allows you to set more strict access controls (like a
+firewall) for all workers and only allow the `federation_sender`'s to contact the
+outside world.
+
+```yaml
+instance_map:
+    main:
+        host: localhost
+        port: 8030
+    federation_sender1:
+        host: localhost
+        port: 8034
+
+outbound_federation_restricted_to:
+  - federation_sender1
+
+worker_replication_secret: "secret_secret"
+```
+
 #### Background tasks
 
 There is also support for moving background tasks to a separate
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index af894243f8..3546aaf7c3 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -217,6 +217,13 @@ class InvalidAPICallError(SynapseError):
         super().__init__(HTTPStatus.BAD_REQUEST, msg, Codes.BAD_JSON)
 
 
+class InvalidProxyCredentialsError(SynapseError):
+    """Error raised when the proxy credentials are invalid."""
+
+    def __init__(self, msg: str, errcode: str = Codes.UNKNOWN):
+        super().__init__(401, msg, errcode)
+
+
 class ProxiedRequestError(SynapseError):
     """An error from a general matrix endpoint, eg. from a proxied Matrix API call.
 
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index 936b1b0430..a94b57a671 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -386,6 +386,7 @@ def listen_unix(
 
 
 def listen_http(
+    hs: "HomeServer",
     listener_config: ListenerConfig,
     root_resource: Resource,
     version_string: str,
@@ -406,6 +407,7 @@ def listen_http(
         version_string,
         max_request_body_size=max_request_body_size,
         reactor=reactor,
+        hs=hs,
     )
 
     if isinstance(listener_config, TCPListenerConfig):
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index 7406c3948c..dc79efcc14 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -221,6 +221,7 @@ class GenericWorkerServer(HomeServer):
         root_resource = create_resource_tree(resources, OptionsResource())
 
         _base.listen_http(
+            self,
             listener_config,
             root_resource,
             self.version_string,
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 84236ac299..f188c7265a 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -139,6 +139,7 @@ class SynapseHomeServer(HomeServer):
             root_resource = OptionsResource()
 
         ports = listen_http(
+            self,
             listener_config,
             create_resource_tree(resources, root_resource),
             self.version_string,
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index e55ca12a36..6567fb6bb0 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -15,7 +15,7 @@
 
 import argparse
 import logging
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 import attr
 from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr
@@ -171,6 +171,27 @@ class WriterLocations:
     )
 
 
+@attr.s(auto_attribs=True)
+class OutboundFederationRestrictedTo:
+    """Whether we limit outbound federation to a certain set of instances.
+
+    Attributes:
+        instances: optional list of instances that can make outbound federation
+            requests. If None then all instances can make federation requests.
+        locations: list of instance locations to connect to proxy via.
+    """
+
+    instances: Optional[List[str]]
+    locations: List[InstanceLocationConfig] = attr.Factory(list)
+
+    def __contains__(self, instance: str) -> bool:
+        # It feels a bit dirty to return `True` if `instances` is `None`, but it makes
+        # sense in downstream usage in the sense that if
+        # `outbound_federation_restricted_to` is not configured, then any instance can
+        # talk to federation (no restrictions so always return `True`).
+        return self.instances is None or instance in self.instances
+
+
 class WorkerConfig(Config):
     """The workers are processes run separately to the main synapse process.
     They have their own pid_file and listener configuration. They use the
@@ -385,6 +406,28 @@ class WorkerConfig(Config):
             new_option_name="update_user_directory_from_worker",
         )
 
+        outbound_federation_restricted_to = config.get(
+            "outbound_federation_restricted_to", None
+        )
+        self.outbound_federation_restricted_to = OutboundFederationRestrictedTo(
+            outbound_federation_restricted_to
+        )
+        if outbound_federation_restricted_to:
+            if not self.worker_replication_secret:
+                raise ConfigError(
+                    "`worker_replication_secret` must be configured when using `outbound_federation_restricted_to`."
+                )
+
+            for instance in outbound_federation_restricted_to:
+                if instance not in self.instance_map:
+                    raise ConfigError(
+                        "Instance %r is configured in 'outbound_federation_restricted_to' but does not appear in `instance_map` config."
+                        % (instance,)
+                    )
+                self.outbound_federation_restricted_to.locations.append(
+                    self.instance_map[instance]
+                )
+
     def _should_this_worker_perform_duty(
         self,
         config: Dict[str, Any],
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 09ea93e10d..ca2cdbc6e2 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -1037,7 +1037,12 @@ class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
         if reason.check(ResponseDone):
             self.deferred.callback(self.length)
         elif reason.check(PotentialDataLoss):
-            # stolen from https://github.com/twisted/treq/pull/49/files
+            # This applies to requests which don't set `Content-Length` or a
+            # `Transfer-Encoding` in the response because in this case the end of the
+            # response is indicated by the connection being closed, an event which may
+            # also be due to a transient network problem or other error. But since this
+            # behavior is expected of some servers (like YouTube), let's ignore it.
+            # Stolen from https://github.com/twisted/treq/pull/49/files
             # http://twistedmatrix.com/trac/ticket/4840
             self.deferred.callback(self.length)
         else:
diff --git a/synapse/http/connectproxyclient.py b/synapse/http/connectproxyclient.py
index 23a60af171..636efc33e8 100644
--- a/synapse/http/connectproxyclient.py
+++ b/synapse/http/connectproxyclient.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import abc
 import base64
 import logging
 from typing import Optional, Union
@@ -39,8 +40,14 @@ class ProxyConnectError(ConnectError):
     pass
 
 
-@attr.s(auto_attribs=True)
 class ProxyCredentials:
+    @abc.abstractmethod
+    def as_proxy_authorization_value(self) -> bytes:
+        raise NotImplementedError()
+
+
+@attr.s(auto_attribs=True)
+class BasicProxyCredentials(ProxyCredentials):
     username_password: bytes
 
     def as_proxy_authorization_value(self) -> bytes:
@@ -55,6 +62,17 @@ class ProxyCredentials:
         return b"Basic " + base64.encodebytes(self.username_password)
 
 
+@attr.s(auto_attribs=True)
+class BearerProxyCredentials(ProxyCredentials):
+    access_token: bytes
+
+    def as_proxy_authorization_value(self) -> bytes:
+        """
+        Return the value for a Proxy-Authorization header (i.e. 'Bearer xxx').
+        """
+        return b"Bearer " + self.access_token
+
+
 @implementer(IStreamClientEndpoint)
 class HTTPConnectProxyEndpoint:
     """An Endpoint implementation which will send a CONNECT request to an http proxy
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index cc4e258b0f..583c03447c 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -50,7 +50,7 @@ from twisted.internet.interfaces import IReactorTime
 from twisted.internet.task import Cooperator
 from twisted.web.client import ResponseFailed
 from twisted.web.http_headers import Headers
-from twisted.web.iweb import IBodyProducer, IResponse
+from twisted.web.iweb import IAgent, IBodyProducer, IResponse
 
 import synapse.metrics
 import synapse.util.retryutils
@@ -71,7 +71,9 @@ from synapse.http.client import (
     encode_query_args,
     read_body_with_max_size,
 )
+from synapse.http.connectproxyclient import BearerProxyCredentials
 from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
+from synapse.http.proxyagent import ProxyAgent
 from synapse.http.types import QueryParams
 from synapse.logging import opentracing
 from synapse.logging.context import make_deferred_yieldable, run_in_background
@@ -393,17 +395,41 @@ class MatrixFederationHttpClient:
         if hs.config.server.user_agent_suffix:
             user_agent = "%s %s" % (user_agent, hs.config.server.user_agent_suffix)
 
-        federation_agent = MatrixFederationAgent(
-            self.reactor,
-            tls_client_options_factory,
-            user_agent.encode("ascii"),
-            hs.config.server.federation_ip_range_allowlist,
-            hs.config.server.federation_ip_range_blocklist,
+        outbound_federation_restricted_to = (
+            hs.config.worker.outbound_federation_restricted_to
         )
+        if hs.get_instance_name() in outbound_federation_restricted_to:
+            # Talk to federation directly
+            federation_agent: IAgent = MatrixFederationAgent(
+                self.reactor,
+                tls_client_options_factory,
+                user_agent.encode("ascii"),
+                hs.config.server.federation_ip_range_allowlist,
+                hs.config.server.federation_ip_range_blocklist,
+            )
+        else:
+            proxy_authorization_secret = hs.config.worker.worker_replication_secret
+            assert (
+                proxy_authorization_secret is not None
+            ), "`worker_replication_secret` must be set when using `outbound_federation_restricted_to` (used to authenticate requests across workers)"
+            federation_proxy_credentials = BearerProxyCredentials(
+                proxy_authorization_secret.encode("ascii")
+            )
+
+            # We need to talk to federation via the proxy via one of the configured
+            # locations
+            federation_proxy_locations = outbound_federation_restricted_to.locations
+            federation_agent = ProxyAgent(
+                self.reactor,
+                self.reactor,
+                tls_client_options_factory,
+                federation_proxy_locations=federation_proxy_locations,
+                federation_proxy_credentials=federation_proxy_credentials,
+            )
 
         # Use a BlocklistingAgentWrapper to prevent circumventing the IP
         # blocking via IP literals in server names
-        self.agent = BlocklistingAgentWrapper(
+        self.agent: IAgent = BlocklistingAgentWrapper(
             federation_agent,
             ip_blocklist=hs.config.server.federation_ip_range_blocklist,
         )
@@ -412,7 +438,6 @@ class MatrixFederationHttpClient:
         self._store = hs.get_datastores().main
         self.version_string_bytes = hs.version_string.encode("ascii")
         self.default_timeout_seconds = hs.config.federation.client_timeout_ms / 1000
-
         self.max_long_retry_delay_seconds = (
             hs.config.federation.max_long_retry_delay_ms / 1000
         )
@@ -1131,6 +1156,101 @@ class MatrixFederationHttpClient:
             Succeeds when we get a 2xx HTTP response. The
             result will be the decoded JSON body.
 
+        Raises:
+            HttpResponseException: If we get an HTTP response code >= 300
+                (except 429).
+            NotRetryingDestination: If we are not yet ready to retry this
+                server.
+            FederationDeniedError: If this destination is not on our
+                federation whitelist
+            RequestSendFailed: If there were problems connecting to the
+                remote, due to e.g. DNS failures, connection timeouts etc.
+        """
+        json_dict, _ = await self.get_json_with_headers(
+            destination=destination,
+            path=path,
+            args=args,
+            retry_on_dns_fail=retry_on_dns_fail,
+            timeout=timeout,
+            ignore_backoff=ignore_backoff,
+            try_trailing_slash_on_400=try_trailing_slash_on_400,
+            parser=parser,
+        )
+        return json_dict
+
+    @overload
+    async def get_json_with_headers(
+        self,
+        destination: str,
+        path: str,
+        args: Optional[QueryParams] = None,
+        retry_on_dns_fail: bool = True,
+        timeout: Optional[int] = None,
+        ignore_backoff: bool = False,
+        try_trailing_slash_on_400: bool = False,
+        parser: Literal[None] = None,
+    ) -> Tuple[JsonDict, Dict[bytes, List[bytes]]]:
+        ...
+
+    @overload
+    async def get_json_with_headers(
+        self,
+        destination: str,
+        path: str,
+        args: Optional[QueryParams] = ...,
+        retry_on_dns_fail: bool = ...,
+        timeout: Optional[int] = ...,
+        ignore_backoff: bool = ...,
+        try_trailing_slash_on_400: bool = ...,
+        parser: ByteParser[T] = ...,
+    ) -> Tuple[T, Dict[bytes, List[bytes]]]:
+        ...
+
+    async def get_json_with_headers(
+        self,
+        destination: str,
+        path: str,
+        args: Optional[QueryParams] = None,
+        retry_on_dns_fail: bool = True,
+        timeout: Optional[int] = None,
+        ignore_backoff: bool = False,
+        try_trailing_slash_on_400: bool = False,
+        parser: Optional[ByteParser[T]] = None,
+    ) -> Tuple[Union[JsonDict, T], Dict[bytes, List[bytes]]]:
+        """GETs some json from the given host homeserver and path
+
+        Args:
+            destination: The remote server to send the HTTP request to.
+
+            path: The HTTP path.
+
+            args: A dictionary used to create query strings, defaults to
+                None.
+
+            retry_on_dns_fail: true if the request should be retried on DNS failures
+
+            timeout: number of milliseconds to wait for the response.
+                self._default_timeout (60s) by default.
+
+                Note that we may make several attempts to send the request; this
+                timeout applies to the time spent waiting for response headers for
+                *each* attempt (including connection time) as well as the time spent
+                reading the response body after a 200 response.
+
+            ignore_backoff: true to ignore the historical backoff data
+                and try the request anyway.
+
+            try_trailing_slash_on_400: True if on a 400 M_UNRECOGNIZED
+                response we should try appending a trailing slash to the end of
+                the request. Workaround for #3622 in Synapse <= v0.99.3.
+
+            parser: The parser to use to decode the response. Defaults to
+                parsing as JSON.
+
+        Returns:
+            Succeeds when we get a 2xx HTTP response. The result will be a tuple of the
+            decoded JSON body and a dict of the response headers.
+
         Raises:
             HttpResponseException: If we get an HTTP response code >= 300
                 (except 429).
@@ -1156,6 +1276,8 @@ class MatrixFederationHttpClient:
             timeout=timeout,
         )
 
+        headers = dict(response.headers.getAllRawHeaders())
+
         if timeout is not None:
             _sec_timeout = timeout / 1000
         else:
@@ -1173,7 +1295,7 @@ class MatrixFederationHttpClient:
             parser=parser,
         )
 
-        return body
+        return body, headers
 
     async def delete_json(
         self,
diff --git a/synapse/http/proxy.py b/synapse/http/proxy.py
new file mode 100644
index 0000000000..c9f51e51bc
--- /dev/null
+++ b/synapse/http/proxy.py
@@ -0,0 +1,283 @@
+#  Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import json
+import logging
+import urllib.parse
+from typing import TYPE_CHECKING, Any, Optional, Set, Tuple, cast
+
+from twisted.internet import protocol
+from twisted.internet.interfaces import ITCPTransport
+from twisted.internet.protocol import connectionDone
+from twisted.python import failure
+from twisted.python.failure import Failure
+from twisted.web.client import ResponseDone
+from twisted.web.http_headers import Headers
+from twisted.web.iweb import IResponse
+from twisted.web.resource import IResource
+from twisted.web.server import Request, Site
+
+from synapse.api.errors import Codes, InvalidProxyCredentialsError
+from synapse.http import QuieterFileBodyProducer
+from synapse.http.server import _AsyncResource
+from synapse.logging.context import make_deferred_yieldable, run_in_background
+from synapse.types import ISynapseReactor
+from synapse.util.async_helpers import timeout_deferred
+
+if TYPE_CHECKING:
+    from synapse.http.site import SynapseRequest
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+# "Hop-by-hop" headers (as opposed to "end-to-end" headers) as defined by RFC2616
+# section 13.5.1 and referenced in RFC9110 section 7.6.1. These are meant to only be
+# consumed by the immediate recipient and not be forwarded on.
+HOP_BY_HOP_HEADERS = {
+    "Connection",
+    "Keep-Alive",
+    "Proxy-Authenticate",
+    "Proxy-Authorization",
+    "TE",
+    "Trailers",
+    "Transfer-Encoding",
+    "Upgrade",
+}
+
+
+def parse_connection_header_value(
+    connection_header_value: Optional[bytes],
+) -> Set[str]:
+    """
+    Parse the `Connection` header to determine which headers we should not be copied
+    over from the remote response.
+
+    As defined by RFC2616 section 14.10 and RFC9110 section 7.6.1
+
+    Example: `Connection: close, X-Foo, X-Bar` will return `{"Close", "X-Foo", "X-Bar"}`
+
+    Even though "close" is a special directive, let's just treat it as just another
+    header for simplicity. If people want to check for this directive, they can simply
+    check for `"Close" in headers`.
+
+    Args:
+        connection_header_value: The value of the `Connection` header.
+
+    Returns:
+        The set of header names that should not be copied over from the remote response.
+        The keys are capitalized in canonical capitalization.
+    """
+    headers = Headers()
+    extra_headers_to_remove: Set[str] = set()
+    if connection_header_value:
+        extra_headers_to_remove = {
+            headers._canonicalNameCaps(connection_option.strip()).decode("ascii")
+            for connection_option in connection_header_value.split(b",")
+        }
+
+    return extra_headers_to_remove
+
+
+class ProxyResource(_AsyncResource):
+    """
+    A stub resource that proxies any requests with a `matrix-federation://` scheme
+    through the given `federation_agent` to the remote homeserver and ferries back the
+    info.
+    """
+
+    isLeaf = True
+
+    def __init__(self, reactor: ISynapseReactor, hs: "HomeServer"):
+        super().__init__(True)
+
+        self.reactor = reactor
+        self.agent = hs.get_federation_http_client().agent
+
+        self._proxy_authorization_secret = hs.config.worker.worker_replication_secret
+
+    def _check_auth(self, request: Request) -> None:
+        # The `matrix-federation://` proxy functionality can only be used with auth.
+        # Protect homserver admins forgetting to configure a secret.
+        assert self._proxy_authorization_secret is not None
+
+        # Get the authorization header.
+        auth_headers = request.requestHeaders.getRawHeaders(b"Proxy-Authorization")
+
+        if not auth_headers:
+            raise InvalidProxyCredentialsError(
+                "Missing Proxy-Authorization header.", Codes.MISSING_TOKEN
+            )
+        if len(auth_headers) > 1:
+            raise InvalidProxyCredentialsError(
+                "Too many Proxy-Authorization headers.", Codes.UNAUTHORIZED
+            )
+        parts = auth_headers[0].split(b" ")
+        if parts[0] == b"Bearer" and len(parts) == 2:
+            received_secret = parts[1].decode("ascii")
+            if self._proxy_authorization_secret == received_secret:
+                # Success!
+                return
+
+        raise InvalidProxyCredentialsError(
+            "Invalid Proxy-Authorization header.", Codes.UNAUTHORIZED
+        )
+
+    async def _async_render(self, request: "SynapseRequest") -> Tuple[int, Any]:
+        uri = urllib.parse.urlparse(request.uri)
+        assert uri.scheme == b"matrix-federation"
+
+        # Check the authorization headers before handling the request.
+        self._check_auth(request)
+
+        headers = Headers()
+        for header_name in (b"User-Agent", b"Authorization", b"Content-Type"):
+            header_value = request.getHeader(header_name)
+            if header_value:
+                headers.addRawHeader(header_name, header_value)
+
+        request_deferred = run_in_background(
+            self.agent.request,
+            request.method,
+            request.uri,
+            headers=headers,
+            bodyProducer=QuieterFileBodyProducer(request.content),
+        )
+        request_deferred = timeout_deferred(
+            request_deferred,
+            # This should be set longer than the timeout in `MatrixFederationHttpClient`
+            # so that it has enough time to complete and pass us the data before we give
+            # up.
+            timeout=90,
+            reactor=self.reactor,
+        )
+
+        response = await make_deferred_yieldable(request_deferred)
+
+        return response.code, response
+
+    def _send_response(
+        self,
+        request: "SynapseRequest",
+        code: int,
+        response_object: Any,
+    ) -> None:
+        response = cast(IResponse, response_object)
+        response_headers = cast(Headers, response.headers)
+
+        request.setResponseCode(code)
+
+        # The `Connection` header also defines which headers should not be copied over.
+        connection_header = response_headers.getRawHeaders(b"connection")
+        extra_headers_to_remove = parse_connection_header_value(
+            connection_header[0] if connection_header else None
+        )
+
+        # Copy headers.
+        for k, v in response_headers.getAllRawHeaders():
+            # Do not copy over any hop-by-hop headers. These are meant to only be
+            # consumed by the immediate recipient and not be forwarded on.
+            header_key = k.decode("ascii")
+            if (
+                header_key in HOP_BY_HOP_HEADERS
+                or header_key in extra_headers_to_remove
+            ):
+                continue
+
+            request.responseHeaders.setRawHeaders(k, v)
+
+        response.deliverBody(_ProxyResponseBody(request))
+
+    def _send_error_response(
+        self,
+        f: failure.Failure,
+        request: "SynapseRequest",
+    ) -> None:
+        if isinstance(f.value, InvalidProxyCredentialsError):
+            error_response_code = f.value.code
+            error_response_json = {"errcode": f.value.errcode, "err": f.value.msg}
+        else:
+            error_response_code = 502
+            error_response_json = {
+                "errcode": Codes.UNKNOWN,
+                "err": "ProxyResource: Error when proxying request: %s %s -> %s"
+                % (
+                    request.method.decode("ascii"),
+                    request.uri.decode("ascii"),
+                    f,
+                ),
+            }
+
+        request.setResponseCode(error_response_code)
+        request.setHeader(b"Content-Type", b"application/json")
+        request.write((json.dumps(error_response_json)).encode())
+        request.finish()
+
+
+class _ProxyResponseBody(protocol.Protocol):
+    """
+    A protocol that proxies the given remote response data back out to the given local
+    request.
+    """
+
+    transport: Optional[ITCPTransport] = None
+
+    def __init__(self, request: "SynapseRequest") -> None:
+        self._request = request
+
+    def dataReceived(self, data: bytes) -> None:
+        # Avoid sending response data to the local request that already disconnected
+        if self._request._disconnected and self.transport is not None:
+            # Close the connection (forcefully) since all the data will get
+            # discarded anyway.
+            self.transport.abortConnection()
+            return
+
+        self._request.write(data)
+
+    def connectionLost(self, reason: Failure = connectionDone) -> None:
+        # If the local request is already finished (successfully or failed), don't
+        # worry about sending anything back.
+        if self._request.finished:
+            return
+
+        if reason.check(ResponseDone):
+            self._request.finish()
+        else:
+            # Abort the underlying request since our remote request also failed.
+            self._request.transport.abortConnection()
+
+
+class ProxySite(Site):
+    """
+    Proxies any requests with a `matrix-federation://` scheme through the given
+    `federation_agent`. Otherwise, behaves like a normal `Site`.
+    """
+
+    def __init__(
+        self,
+        resource: IResource,
+        reactor: ISynapseReactor,
+        hs: "HomeServer",
+    ):
+        super().__init__(resource, reactor=reactor)
+
+        self._proxy_resource = ProxyResource(reactor, hs=hs)
+
+    def getResourceFor(self, request: "SynapseRequest") -> IResource:
+        uri = urllib.parse.urlparse(request.uri)
+        if uri.scheme == b"matrix-federation":
+            return self._proxy_resource
+
+        return super().getResourceFor(request)
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index 7bdc4acae7..59ab8fad35 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import random
 import re
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Collection, Dict, List, Optional, Sequence, Tuple
 from urllib.parse import urlparse
 from urllib.request import (  # type: ignore[attr-defined]
     getproxies_environment,
@@ -23,8 +24,17 @@ from urllib.request import (  # type: ignore[attr-defined]
 from zope.interface import implementer
 
 from twisted.internet import defer
-from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
-from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint
+from twisted.internet.endpoints import (
+    HostnameEndpoint,
+    UNIXClientEndpoint,
+    wrapClientTLS,
+)
+from twisted.internet.interfaces import (
+    IProtocol,
+    IProtocolFactory,
+    IReactorCore,
+    IStreamClientEndpoint,
+)
 from twisted.python.failure import Failure
 from twisted.web.client import (
     URI,
@@ -36,8 +46,18 @@ from twisted.web.error import SchemeNotSupported
 from twisted.web.http_headers import Headers
 from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS, IResponse
 
+from synapse.config.workers import (
+    InstanceLocationConfig,
+    InstanceTcpLocationConfig,
+    InstanceUnixLocationConfig,
+)
 from synapse.http import redact_uri
-from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials
+from synapse.http.connectproxyclient import (
+    BasicProxyCredentials,
+    HTTPConnectProxyEndpoint,
+    ProxyCredentials,
+)
+from synapse.logging.context import run_in_background
 
 logger = logging.getLogger(__name__)
 
@@ -74,6 +94,14 @@ class ProxyAgent(_AgentBase):
         use_proxy: Whether proxy settings should be discovered and used
             from conventional environment variables.
 
+        federation_proxy_locations: An optional list of locations to proxy outbound federation
+            traffic through (only requests that use the `matrix-federation://` scheme
+            will be proxied).
+
+        federation_proxy_credentials: Required if `federation_proxy_locations` is set. The
+            credentials to use when proxying outbound federation traffic through another
+            worker.
+
     Raises:
         ValueError if use_proxy is set and the environment variables
             contain an invalid proxy specification.
@@ -89,6 +117,8 @@ class ProxyAgent(_AgentBase):
         bindAddress: Optional[bytes] = None,
         pool: Optional[HTTPConnectionPool] = None,
         use_proxy: bool = False,
+        federation_proxy_locations: Collection[InstanceLocationConfig] = (),
+        federation_proxy_credentials: Optional[ProxyCredentials] = None,
     ):
         contextFactory = contextFactory or BrowserLikePolicyForHTTPS()
 
@@ -127,6 +157,47 @@ class ProxyAgent(_AgentBase):
         self._policy_for_https = contextFactory
         self._reactor = reactor
 
+        self._federation_proxy_endpoint: Optional[IStreamClientEndpoint] = None
+        self._federation_proxy_credentials: Optional[ProxyCredentials] = None
+        if federation_proxy_locations:
+            assert (
+                federation_proxy_credentials is not None
+            ), "`federation_proxy_credentials` are required when using `federation_proxy_locations`"
+
+            endpoints: List[IStreamClientEndpoint] = []
+            for federation_proxy_location in federation_proxy_locations:
+                endpoint: IStreamClientEndpoint
+                if isinstance(federation_proxy_location, InstanceTcpLocationConfig):
+                    endpoint = HostnameEndpoint(
+                        self.proxy_reactor,
+                        federation_proxy_location.host,
+                        federation_proxy_location.port,
+                    )
+                    if federation_proxy_location.tls:
+                        tls_connection_creator = (
+                            self._policy_for_https.creatorForNetloc(
+                                federation_proxy_location.host.encode("utf-8"),
+                                federation_proxy_location.port,
+                            )
+                        )
+                        endpoint = wrapClientTLS(tls_connection_creator, endpoint)
+
+                elif isinstance(federation_proxy_location, InstanceUnixLocationConfig):
+                    endpoint = UNIXClientEndpoint(
+                        self.proxy_reactor, federation_proxy_location.path
+                    )
+
+                else:
+                    # It is supremely unlikely we ever hit this
+                    raise SchemeNotSupported(
+                        f"Unknown type of Endpoint requested, check {federation_proxy_location}"
+                    )
+
+                endpoints.append(endpoint)
+
+            self._federation_proxy_endpoint = _RandomSampleEndpoints(endpoints)
+            self._federation_proxy_credentials = federation_proxy_credentials
+
     def request(
         self,
         method: bytes,
@@ -214,6 +285,25 @@ class ProxyAgent(_AgentBase):
                 parsed_uri.port,
                 self.https_proxy_creds,
             )
+        elif (
+            parsed_uri.scheme == b"matrix-federation"
+            and self._federation_proxy_endpoint
+        ):
+            assert (
+                self._federation_proxy_credentials is not None
+            ), "`federation_proxy_credentials` are required when using `federation_proxy_locations`"
+
+            # Set a Proxy-Authorization header
+            if headers is None:
+                headers = Headers()
+            # We always need authentication for the outbound federation proxy
+            headers.addRawHeader(
+                b"Proxy-Authorization",
+                self._federation_proxy_credentials.as_proxy_authorization_value(),
+            )
+
+            endpoint = self._federation_proxy_endpoint
+            request_path = uri
         else:
             # not using a proxy
             endpoint = HostnameEndpoint(
@@ -233,6 +323,11 @@ class ProxyAgent(_AgentBase):
             endpoint = wrapClientTLS(tls_connection_creator, endpoint)
         elif parsed_uri.scheme == b"http":
             pass
+        elif (
+            parsed_uri.scheme == b"matrix-federation"
+            and self._federation_proxy_endpoint
+        ):
+            pass
         else:
             return defer.fail(
                 Failure(
@@ -334,6 +429,42 @@ def parse_proxy(
 
     credentials = None
     if url.username and url.password:
-        credentials = ProxyCredentials(b"".join([url.username, b":", url.password]))
+        credentials = BasicProxyCredentials(
+            b"".join([url.username, b":", url.password])
+        )
 
     return url.scheme, url.hostname, url.port or default_port, credentials
+
+
+@implementer(IStreamClientEndpoint)
+class _RandomSampleEndpoints:
+    """An endpoint that randomly iterates through a given list of endpoints at
+    each connection attempt.
+    """
+
+    def __init__(
+        self,
+        endpoints: Sequence[IStreamClientEndpoint],
+    ) -> None:
+        assert endpoints
+        self._endpoints = endpoints
+
+    def __repr__(self) -> str:
+        return f"<_RandomSampleEndpoints endpoints={self._endpoints}>"
+
+    def connect(
+        self, protocol_factory: IProtocolFactory
+    ) -> "defer.Deferred[IProtocol]":
+        """Implements IStreamClientEndpoint interface"""
+
+        return run_in_background(self._do_connect, protocol_factory)
+
+    async def _do_connect(self, protocol_factory: IProtocolFactory) -> IProtocol:
+        failures: List[Failure] = []
+        for endpoint in random.sample(self._endpoints, k=len(self._endpoints)):
+            try:
+                return await endpoint.connect(protocol_factory)
+            except Exception:
+                failures.append(Failure())
+
+        failures.pop().raiseException()
diff --git a/synapse/http/server.py b/synapse/http/server.py
index e411ac7e62..f592600880 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -18,6 +18,7 @@ import html
 import logging
 import types
 import urllib
+import urllib.parse
 from http import HTTPStatus
 from http.client import FOUND
 from inspect import isawaitable
@@ -65,7 +66,6 @@ from synapse.api.errors import (
     UnrecognizedRequestError,
 )
 from synapse.config.homeserver import HomeServerConfig
-from synapse.http.site import SynapseRequest
 from synapse.logging.context import defer_to_thread, preserve_fn, run_in_background
 from synapse.logging.opentracing import active_span, start_active_span, trace_servlet
 from synapse.util import json_encoder
@@ -76,6 +76,7 @@ from synapse.util.iterutils import chunk_seq
 if TYPE_CHECKING:
     import opentracing
 
+    from synapse.http.site import SynapseRequest
     from synapse.server import HomeServer
 
 logger = logging.getLogger(__name__)
@@ -102,7 +103,7 @@ HTTP_STATUS_REQUEST_CANCELLED = 499
 
 
 def return_json_error(
-    f: failure.Failure, request: SynapseRequest, config: Optional[HomeServerConfig]
+    f: failure.Failure, request: "SynapseRequest", config: Optional[HomeServerConfig]
 ) -> None:
     """Sends a JSON error response to clients."""
 
@@ -220,8 +221,8 @@ def return_html_error(
 
 
 def wrap_async_request_handler(
-    h: Callable[["_AsyncResource", SynapseRequest], Awaitable[None]]
-) -> Callable[["_AsyncResource", SynapseRequest], "defer.Deferred[None]"]:
+    h: Callable[["_AsyncResource", "SynapseRequest"], Awaitable[None]]
+) -> Callable[["_AsyncResource", "SynapseRequest"], "defer.Deferred[None]"]:
     """Wraps an async request handler so that it calls request.processing.
 
     This helps ensure that work done by the request handler after the request is completed
@@ -235,7 +236,7 @@ def wrap_async_request_handler(
     """
 
     async def wrapped_async_request_handler(
-        self: "_AsyncResource", request: SynapseRequest
+        self: "_AsyncResource", request: "SynapseRequest"
     ) -> None:
         with request.processing():
             await h(self, request)
@@ -300,7 +301,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
 
         self._extract_context = extract_context
 
-    def render(self, request: SynapseRequest) -> int:
+    def render(self, request: "SynapseRequest") -> int:
         """This gets called by twisted every time someone sends us a request."""
         request.render_deferred = defer.ensureDeferred(
             self._async_render_wrapper(request)
@@ -308,7 +309,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
         return NOT_DONE_YET
 
     @wrap_async_request_handler
-    async def _async_render_wrapper(self, request: SynapseRequest) -> None:
+    async def _async_render_wrapper(self, request: "SynapseRequest") -> None:
         """This is a wrapper that delegates to `_async_render` and handles
         exceptions, return values, metrics, etc.
         """
@@ -326,9 +327,15 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
             # of our stack, and thus gives us a sensible stack
             # trace.
             f = failure.Failure()
+            logger.exception(
+                "Error handling request",
+                exc_info=(f.type, f.value, f.getTracebackObject()),
+            )
             self._send_error_response(f, request)
 
-    async def _async_render(self, request: SynapseRequest) -> Optional[Tuple[int, Any]]:
+    async def _async_render(
+        self, request: "SynapseRequest"
+    ) -> Optional[Tuple[int, Any]]:
         """Delegates to `_async_render_<METHOD>` methods, or returns a 400 if
         no appropriate method exists. Can be overridden in sub classes for
         different routing.
@@ -358,7 +365,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
     @abc.abstractmethod
     def _send_response(
         self,
-        request: SynapseRequest,
+        request: "SynapseRequest",
         code: int,
         response_object: Any,
     ) -> None:
@@ -368,7 +375,7 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: SynapseRequest,
+        request: "SynapseRequest",
     ) -> None:
         raise NotImplementedError()
 
@@ -384,7 +391,7 @@ class DirectServeJsonResource(_AsyncResource):
 
     def _send_response(
         self,
-        request: SynapseRequest,
+        request: "SynapseRequest",
         code: int,
         response_object: Any,
     ) -> None:
@@ -401,7 +408,7 @@ class DirectServeJsonResource(_AsyncResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: SynapseRequest,
+        request: "SynapseRequest",
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_json_error(f, request, None)
@@ -473,7 +480,7 @@ class JsonResource(DirectServeJsonResource):
             )
 
     def _get_handler_for_request(
-        self, request: SynapseRequest
+        self, request: "SynapseRequest"
     ) -> Tuple[ServletCallback, str, Dict[str, str]]:
         """Finds a callback method to handle the given request.
 
@@ -503,7 +510,7 @@ class JsonResource(DirectServeJsonResource):
         # Huh. No one wanted to handle that? Fiiiiiine.
         raise UnrecognizedRequestError(code=404)
 
-    async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]:
+    async def _async_render(self, request: "SynapseRequest") -> Tuple[int, Any]:
         callback, servlet_classname, group_dict = self._get_handler_for_request(request)
 
         request.is_render_cancellable = is_function_cancellable(callback)
@@ -535,7 +542,7 @@ class JsonResource(DirectServeJsonResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: SynapseRequest,
+        request: "SynapseRequest",
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_json_error(f, request, self.hs.config)
@@ -551,7 +558,7 @@ class DirectServeHtmlResource(_AsyncResource):
 
     def _send_response(
         self,
-        request: SynapseRequest,
+        request: "SynapseRequest",
         code: int,
         response_object: Any,
     ) -> None:
@@ -565,7 +572,7 @@ class DirectServeHtmlResource(_AsyncResource):
     def _send_error_response(
         self,
         f: failure.Failure,
-        request: SynapseRequest,
+        request: "SynapseRequest",
     ) -> None:
         """Implements _AsyncResource._send_error_response"""
         return_html_error(f, request, self.ERROR_TEMPLATE)
@@ -592,7 +599,7 @@ class UnrecognizedRequestResource(resource.Resource):
     errcode of M_UNRECOGNIZED.
     """
 
-    def render(self, request: SynapseRequest) -> int:
+    def render(self, request: "SynapseRequest") -> int:
         f = failure.Failure(UnrecognizedRequestError(code=404))
         return_json_error(f, request, None)
         # A response has already been sent but Twisted requires either NOT_DONE_YET
@@ -622,7 +629,7 @@ class RootRedirect(resource.Resource):
 class OptionsResource(resource.Resource):
     """Responds to OPTION requests for itself and all children."""
 
-    def render_OPTIONS(self, request: SynapseRequest) -> bytes:
+    def render_OPTIONS(self, request: "SynapseRequest") -> bytes:
         request.setResponseCode(204)
         request.setHeader(b"Content-Length", b"0")
 
@@ -737,7 +744,7 @@ def _encode_json_bytes(json_object: object) -> bytes:
 
 
 def respond_with_json(
-    request: SynapseRequest,
+    request: "SynapseRequest",
     code: int,
     json_object: Any,
     send_cors: bool = False,
@@ -787,7 +794,7 @@ def respond_with_json(
 
 
 def respond_with_json_bytes(
-    request: SynapseRequest,
+    request: "SynapseRequest",
     code: int,
     json_bytes: bytes,
     send_cors: bool = False,
@@ -825,7 +832,7 @@ def respond_with_json_bytes(
 
 
 async def _async_write_json_to_request_in_thread(
-    request: SynapseRequest,
+    request: "SynapseRequest",
     json_encoder: Callable[[Any], bytes],
     json_object: Any,
 ) -> None:
@@ -883,7 +890,7 @@ def _write_bytes_to_request(request: Request, bytes_to_write: bytes) -> None:
     _ByteProducer(request, bytes_generator)
 
 
-def set_cors_headers(request: SynapseRequest) -> None:
+def set_cors_headers(request: "SynapseRequest") -> None:
     """Set the CORS headers so that javascript running in a web browsers can
     use this API
 
@@ -981,7 +988,7 @@ def set_clickjacking_protection_headers(request: Request) -> None:
 
 
 def respond_with_redirect(
-    request: SynapseRequest, url: bytes, statusCode: int = FOUND, cors: bool = False
+    request: "SynapseRequest", url: bytes, statusCode: int = FOUND, cors: bool = False
 ) -> None:
     """
     Write a 302 (or other specified status code) response to the request, if it is still alive.
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 5b5a7c1e59..a388d6cf7f 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -21,25 +21,29 @@ from zope.interface import implementer
 
 from twisted.internet.address import UNIXAddress
 from twisted.internet.defer import Deferred
-from twisted.internet.interfaces import IAddress, IReactorTime
+from twisted.internet.interfaces import IAddress
 from twisted.python.failure import Failure
 from twisted.web.http import HTTPChannel
 from twisted.web.resource import IResource, Resource
-from twisted.web.server import Request, Site
+from twisted.web.server import Request
 
 from synapse.config.server import ListenerConfig
 from synapse.http import get_request_user_agent, redact_uri
+from synapse.http.proxy import ProxySite
 from synapse.http.request_metrics import RequestMetrics, requests_counter
 from synapse.logging.context import (
     ContextRequest,
     LoggingContext,
     PreserveLoggingContext,
 )
-from synapse.types import Requester
+from synapse.types import ISynapseReactor, Requester
 
 if TYPE_CHECKING:
     import opentracing
 
+    from synapse.server import HomeServer
+
+
 logger = logging.getLogger(__name__)
 
 _next_request_seq = 0
@@ -102,7 +106,7 @@ class SynapseRequest(Request):
         # A boolean indicating whether `render_deferred` should be cancelled if the
         # client disconnects early. Expected to be set by the coroutine started by
         # `Resource.render`, if rendering is asynchronous.
-        self.is_render_cancellable = False
+        self.is_render_cancellable: bool = False
 
         global _next_request_seq
         self.request_seq = _next_request_seq
@@ -601,7 +605,7 @@ class _XForwardedForAddress:
     host: str
 
 
-class SynapseSite(Site):
+class SynapseSite(ProxySite):
     """
     Synapse-specific twisted http Site
 
@@ -623,7 +627,8 @@ class SynapseSite(Site):
         resource: IResource,
         server_version_string: str,
         max_request_body_size: int,
-        reactor: IReactorTime,
+        reactor: ISynapseReactor,
+        hs: "HomeServer",
     ):
         """
 
@@ -638,7 +643,11 @@ class SynapseSite(Site):
                 dropping the connection
             reactor: reactor to be used to manage connection timeouts
         """
-        Site.__init__(self, resource, reactor=reactor)
+        super().__init__(
+            resource=resource,
+            reactor=reactor,
+            hs=hs,
+        )
 
         self.site_tag = site_tag
         self.reactor = reactor
@@ -649,7 +658,9 @@ class SynapseSite(Site):
 
         request_id_header = config.http_options.request_id_header
 
-        self.experimental_cors_msc3886 = config.http_options.experimental_cors_msc3886
+        self.experimental_cors_msc3886: bool = (
+            config.http_options.experimental_cors_msc3886
+        )
 
         def request_factory(channel: HTTPChannel, queued: bool) -> Request:
             return request_class(
diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py
index 5a965f233b..21c5309740 100644
--- a/tests/app/test_openid_listener.py
+++ b/tests/app/test_openid_listener.py
@@ -31,9 +31,7 @@ from tests.unittest import HomeserverTestCase
 
 class FederationReaderOpenIDListenerTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(
-            federation_http_client=None, homeserver_to_use=GenericWorkerServer
-        )
+        hs = self.setup_test_homeserver(homeserver_to_use=GenericWorkerServer)
         return hs
 
     def default_config(self) -> JsonDict:
@@ -91,9 +89,7 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase):
 @patch("synapse.app.homeserver.KeyResource", new=Mock())
 class SynapseHomeserverOpenIDListenerTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(
-            federation_http_client=None, homeserver_to_use=SynapseHomeServer
-        )
+        hs = self.setup_test_homeserver(homeserver_to_use=SynapseHomeServer)
         return hs
 
     @parameterized.expand(
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index ee48f9e546..66215af2b8 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -41,7 +41,6 @@ class DeviceTestCase(unittest.HomeserverTestCase):
         self.appservice_api = mock.Mock()
         hs = self.setup_test_homeserver(
             "server",
-            federation_http_client=None,
             application_service_api=self.appservice_api,
         )
         handler = hs.get_device_handler()
@@ -401,7 +400,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
 
 class DehydrationTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server", federation_http_client=None)
+        hs = self.setup_test_homeserver("server")
         handler = hs.get_device_handler()
         assert isinstance(handler, DeviceHandler)
         self.handler = handler
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index bf0862ed54..5f11d5df11 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -57,7 +57,7 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
     ]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(federation_http_client=None)
+        hs = self.setup_test_homeserver()
         self.handler = hs.get_federation_handler()
         self.store = hs.get_datastores().main
         return hs
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 19f5322317..fd66d573d2 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -993,7 +993,6 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver(
             "server",
-            federation_http_client=None,
             federation_sender=Mock(spec=FederationSender),
         )
         return hs
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 94518a7196..5da1d95f0b 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -17,6 +17,8 @@ import json
 from typing import Dict, List, Set
 from unittest.mock import ANY, Mock, call
 
+from netaddr import IPSet
+
 from twisted.test.proto_helpers import MemoryReactor
 from twisted.web.resource import Resource
 
@@ -24,6 +26,7 @@ from synapse.api.constants import EduTypes
 from synapse.api.errors import AuthError
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.handlers.typing import TypingWriterHandler
+from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
 from synapse.server import HomeServer
 from synapse.types import JsonDict, Requester, UserID, create_requester
 from synapse.util import Clock
@@ -76,6 +79,13 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
         # we mock out the federation client too
         self.mock_federation_client = Mock(spec=["put_json"])
         self.mock_federation_client.put_json.return_value = make_awaitable((200, "OK"))
+        self.mock_federation_client.agent = MatrixFederationAgent(
+            reactor,
+            tls_client_options_factory=None,
+            user_agent=b"SynapseInTrialTest/0.0.0",
+            ip_allowlist=None,
+            ip_blocklist=IPSet(),
+        )
 
         # the tests assume that we are starting at unix time 1000
         reactor.pump((1000,))
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index b5f4a60fe5..ab94f3f67a 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Generator
-from unittest.mock import Mock
+from typing import Any, Dict, Generator
+from unittest.mock import ANY, Mock, create_autospec
 
 from netaddr import IPSet
 from parameterized import parameterized
@@ -21,10 +21,12 @@ from twisted.internet import defer
 from twisted.internet.defer import Deferred, TimeoutError
 from twisted.internet.error import ConnectingCancelledError, DNSLookupError
 from twisted.test.proto_helpers import MemoryReactor, StringTransport
-from twisted.web.client import ResponseNeverReceived
+from twisted.web.client import Agent, ResponseNeverReceived
 from twisted.web.http import HTTPChannel
+from twisted.web.http_headers import Headers
 
-from synapse.api.errors import RequestSendFailed
+from synapse.api.errors import HttpResponseException, RequestSendFailed
+from synapse.config._base import ConfigError
 from synapse.http.matrixfederationclient import (
     ByteParser,
     MatrixFederationHttpClient,
@@ -39,7 +41,9 @@ from synapse.logging.context import (
 from synapse.server import HomeServer
 from synapse.util import Clock
 
+from tests.replication._base import BaseMultiWorkerStreamTestCase
 from tests.server import FakeTransport
+from tests.test_utils import FakeResponse
 from tests.unittest import HomeserverTestCase, override_config
 
 
@@ -658,3 +662,275 @@ class FederationClientTests(HomeserverTestCase):
         self.assertEqual(self.cl.max_short_retry_delay_seconds, 7)
         self.assertEqual(self.cl.max_long_retries, 20)
         self.assertEqual(self.cl.max_short_retries, 5)
+
+
+class FederationClientProxyTests(BaseMultiWorkerStreamTestCase):
+    def default_config(self) -> Dict[str, Any]:
+        conf = super().default_config()
+        conf["instance_map"] = {
+            "main": {"host": "testserv", "port": 8765},
+            "federation_sender": {"host": "testserv", "port": 1001},
+        }
+        return conf
+
+    @override_config(
+        {
+            "outbound_federation_restricted_to": ["federation_sender"],
+            "worker_replication_secret": "secret",
+        }
+    )
+    def test_proxy_requests_through_federation_sender_worker(self) -> None:
+        """
+        Test that all outbound federation requests go through the `federation_sender`
+        worker
+        """
+        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
+        # so we can act like some remote server responding to requests
+        mock_client_on_federation_sender = Mock()
+        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
+        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
+
+        # Create the `federation_sender` worker
+        self.make_worker_hs(
+            "synapse.app.generic_worker",
+            {"worker_name": "federation_sender"},
+            federation_http_client=mock_client_on_federation_sender,
+        )
+
+        # Fake `remoteserv:8008` responding to requests
+        mock_agent_on_federation_sender.request.side_effect = (
+            lambda *args, **kwargs: defer.succeed(
+                FakeResponse.json(
+                    payload={
+                        "foo": "bar",
+                    }
+                )
+            )
+        )
+
+        # This federation request from the main process should be proxied through the
+        # `federation_sender` worker off to the remote server
+        test_request_from_main_process_d = defer.ensureDeferred(
+            self.hs.get_federation_http_client().get_json("remoteserv:8008", "foo/bar")
+        )
+
+        # Pump the reactor so our deferred goes through the motions
+        self.pump()
+
+        # Make sure that the request was proxied through the `federation_sender` worker
+        mock_agent_on_federation_sender.request.assert_called_once_with(
+            b"GET",
+            b"matrix-federation://remoteserv:8008/foo/bar",
+            headers=ANY,
+            bodyProducer=ANY,
+        )
+
+        # Make sure the response is as expected back on the main worker
+        res = self.successResultOf(test_request_from_main_process_d)
+        self.assertEqual(res, {"foo": "bar"})
+
+    @override_config(
+        {
+            "outbound_federation_restricted_to": ["federation_sender"],
+            "worker_replication_secret": "secret",
+        }
+    )
+    def test_proxy_request_with_network_error_through_federation_sender_worker(
+        self,
+    ) -> None:
+        """
+        Test that when the outbound federation request fails with a network related
+        error, a sensible error makes its way back to the main process.
+        """
+        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
+        # so we can act like some remote server responding to requests
+        mock_client_on_federation_sender = Mock()
+        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
+        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
+
+        # Create the `federation_sender` worker
+        self.make_worker_hs(
+            "synapse.app.generic_worker",
+            {"worker_name": "federation_sender"},
+            federation_http_client=mock_client_on_federation_sender,
+        )
+
+        # Fake `remoteserv:8008` responding to requests
+        mock_agent_on_federation_sender.request.side_effect = (
+            lambda *args, **kwargs: defer.fail(ResponseNeverReceived("fake error"))
+        )
+
+        # This federation request from the main process should be proxied through the
+        # `federation_sender` worker off to the remote server
+        test_request_from_main_process_d = defer.ensureDeferred(
+            self.hs.get_federation_http_client().get_json("remoteserv:8008", "foo/bar")
+        )
+
+        # Pump the reactor so our deferred goes through the motions. We pump with 10
+        # seconds (0.1 * 100) so the `MatrixFederationHttpClient` runs out of retries
+        # and finally passes along the error response.
+        self.pump(0.1)
+
+        # Make sure that the request was proxied through the `federation_sender` worker
+        mock_agent_on_federation_sender.request.assert_called_with(
+            b"GET",
+            b"matrix-federation://remoteserv:8008/foo/bar",
+            headers=ANY,
+            bodyProducer=ANY,
+        )
+
+        # Make sure we get some sort of error back on the main worker
+        failure_res = self.failureResultOf(test_request_from_main_process_d)
+        self.assertIsInstance(failure_res.value, RequestSendFailed)
+        self.assertIsInstance(failure_res.value.inner_exception, HttpResponseException)
+        self.assertEqual(failure_res.value.inner_exception.code, 502)
+
+    @override_config(
+        {
+            "outbound_federation_restricted_to": ["federation_sender"],
+            "worker_replication_secret": "secret",
+        }
+    )
+    def test_proxy_requests_and_discards_hop_by_hop_headers(self) -> None:
+        """
+        Test to make sure hop-by-hop headers and addional headers defined in the
+        `Connection` header are discarded when proxying requests
+        """
+        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
+        # so we can act like some remote server responding to requests
+        mock_client_on_federation_sender = Mock()
+        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
+        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
+
+        # Create the `federation_sender` worker
+        self.make_worker_hs(
+            "synapse.app.generic_worker",
+            {"worker_name": "federation_sender"},
+            federation_http_client=mock_client_on_federation_sender,
+        )
+
+        # Fake `remoteserv:8008` responding to requests
+        mock_agent_on_federation_sender.request.side_effect = lambda *args, **kwargs: defer.succeed(
+            FakeResponse(
+                code=200,
+                body=b'{"foo": "bar"}',
+                headers=Headers(
+                    {
+                        "Content-Type": ["application/json"],
+                        "Connection": ["close, X-Foo, X-Bar"],
+                        # Should be removed because it's defined in the `Connection` header
+                        "X-Foo": ["foo"],
+                        "X-Bar": ["bar"],
+                        # Should be removed because it's a hop-by-hop header
+                        "Proxy-Authorization": "abcdef",
+                    }
+                ),
+            )
+        )
+
+        # This federation request from the main process should be proxied through the
+        # `federation_sender` worker off to the remote server
+        test_request_from_main_process_d = defer.ensureDeferred(
+            self.hs.get_federation_http_client().get_json_with_headers(
+                "remoteserv:8008", "foo/bar"
+            )
+        )
+
+        # Pump the reactor so our deferred goes through the motions
+        self.pump()
+
+        # Make sure that the request was proxied through the `federation_sender` worker
+        mock_agent_on_federation_sender.request.assert_called_once_with(
+            b"GET",
+            b"matrix-federation://remoteserv:8008/foo/bar",
+            headers=ANY,
+            bodyProducer=ANY,
+        )
+
+        res, headers = self.successResultOf(test_request_from_main_process_d)
+        header_names = set(headers.keys())
+
+        # Make sure the response does not include the hop-by-hop headers
+        self.assertNotIn(b"X-Foo", header_names)
+        self.assertNotIn(b"X-Bar", header_names)
+        self.assertNotIn(b"Proxy-Authorization", header_names)
+        # Make sure the response is as expected back on the main worker
+        self.assertEqual(res, {"foo": "bar"})
+
+    @override_config(
+        {
+            "outbound_federation_restricted_to": ["federation_sender"],
+            # `worker_replication_secret` is set here so that the test setup is able to pass
+            # but the actual homserver creation test is in the test body below
+            "worker_replication_secret": "secret",
+        }
+    )
+    def test_not_able_to_proxy_requests_through_federation_sender_worker_when_no_secret_configured(
+        self,
+    ) -> None:
+        """
+        Test that we aren't able to proxy any outbound federation requests when
+        `worker_replication_secret` is not configured.
+        """
+        with self.assertRaises(ConfigError):
+            # Create the `federation_sender` worker
+            self.make_worker_hs(
+                "synapse.app.generic_worker",
+                {
+                    "worker_name": "federation_sender",
+                    # Test that we aren't able to proxy any outbound federation requests
+                    # when `worker_replication_secret` is not configured.
+                    "worker_replication_secret": None,
+                },
+            )
+
+    @override_config(
+        {
+            "outbound_federation_restricted_to": ["federation_sender"],
+            "worker_replication_secret": "secret",
+        }
+    )
+    def test_not_able_to_proxy_requests_through_federation_sender_worker_when_wrong_auth_given(
+        self,
+    ) -> None:
+        """
+        Test that we aren't able to proxy any outbound federation requests when the
+        wrong authorization is given.
+        """
+        # Mock out the `MatrixFederationHttpClient` of the `federation_sender` instance
+        # so we can act like some remote server responding to requests
+        mock_client_on_federation_sender = Mock()
+        mock_agent_on_federation_sender = create_autospec(Agent, spec_set=True)
+        mock_client_on_federation_sender.agent = mock_agent_on_federation_sender
+
+        # Create the `federation_sender` worker
+        self.make_worker_hs(
+            "synapse.app.generic_worker",
+            {
+                "worker_name": "federation_sender",
+                # Test that we aren't able to proxy any outbound federation requests
+                # when `worker_replication_secret` is wrong.
+                "worker_replication_secret": "wrong",
+            },
+            federation_http_client=mock_client_on_federation_sender,
+        )
+
+        # This federation request from the main process should be proxied through the
+        # `federation_sender` worker off but will fail here because it's using the wrong
+        # authorization.
+        test_request_from_main_process_d = defer.ensureDeferred(
+            self.hs.get_federation_http_client().get_json("remoteserv:8008", "foo/bar")
+        )
+
+        # Pump the reactor so our deferred goes through the motions. We pump with 10
+        # seconds (0.1 * 100) so the `MatrixFederationHttpClient` runs out of retries
+        # and finally passes along the error response.
+        self.pump(0.1)
+
+        # Make sure that the request was *NOT* proxied through the `federation_sender`
+        # worker
+        mock_agent_on_federation_sender.request.assert_not_called()
+
+        failure_res = self.failureResultOf(test_request_from_main_process_d)
+        self.assertIsInstance(failure_res.value, HttpResponseException)
+        self.assertEqual(failure_res.value.code, 401)
diff --git a/tests/http/test_proxy.py b/tests/http/test_proxy.py
new file mode 100644
index 0000000000..0dc9ba8e05
--- /dev/null
+++ b/tests/http/test_proxy.py
@@ -0,0 +1,53 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Set
+
+from parameterized import parameterized
+
+from synapse.http.proxy import parse_connection_header_value
+
+from tests.unittest import TestCase
+
+
+class ProxyTests(TestCase):
+    @parameterized.expand(
+        [
+            [b"close, X-Foo, X-Bar", {"Close", "X-Foo", "X-Bar"}],
+            # No whitespace
+            [b"close,X-Foo,X-Bar", {"Close", "X-Foo", "X-Bar"}],
+            # More whitespace
+            [b"close,    X-Foo,      X-Bar", {"Close", "X-Foo", "X-Bar"}],
+            # "close" directive in not the first position
+            [b"X-Foo, X-Bar, close", {"X-Foo", "X-Bar", "Close"}],
+            # Normalizes header capitalization
+            [b"keep-alive, x-fOo, x-bAr", {"Keep-Alive", "X-Foo", "X-Bar"}],
+            # Handles header names with whitespace
+            [
+                b"keep-alive, x  foo, x bar",
+                {"Keep-Alive", "X  foo", "X bar"},
+            ],
+        ]
+    )
+    def test_parse_connection_header_value(
+        self,
+        connection_header_value: bytes,
+        expected_extra_headers_to_remove: Set[str],
+    ) -> None:
+        """
+        Tests that the connection header value is parsed correctly
+        """
+        self.assertEqual(
+            expected_extra_headers_to_remove,
+            parse_connection_header_value(connection_header_value),
+        )
diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py
index e0ae5a88ff..8164b0b78e 100644
--- a/tests/http/test_proxyagent.py
+++ b/tests/http/test_proxyagent.py
@@ -33,7 +33,7 @@ from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol
 from twisted.web.http import HTTPChannel
 
 from synapse.http.client import BlocklistingReactorWrapper
-from synapse.http.connectproxyclient import ProxyCredentials
+from synapse.http.connectproxyclient import BasicProxyCredentials
 from synapse.http.proxyagent import ProxyAgent, parse_proxy
 
 from tests.http import (
@@ -205,7 +205,7 @@ class ProxyParserTests(TestCase):
         """
         proxy_cred = None
         if expected_credentials:
-            proxy_cred = ProxyCredentials(expected_credentials)
+            proxy_cred = BasicProxyCredentials(expected_credentials)
         self.assertEqual(
             (
                 expected_scheme,
diff --git a/tests/replication/_base.py b/tests/replication/_base.py
index 39aadb9ed5..6712ac485d 100644
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@@ -70,10 +70,10 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
         # Make a new HomeServer object for the worker
         self.reactor.lookups["testserv"] = "1.2.3.4"
         self.worker_hs = self.setup_test_homeserver(
-            federation_http_client=None,
             homeserver_to_use=GenericWorkerServer,
             config=self._get_worker_hs_config(),
             reactor=self.reactor,
+            federation_http_client=None,
         )
 
         # Since we use sqlite in memory databases we need to make sure the
@@ -385,6 +385,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
             server_version_string="1",
             max_request_body_size=8192,
             reactor=self.reactor,
+            hs=worker_hs,
         )
 
         worker_hs.get_replication_command_handler().start_replication(worker_hs)
diff --git a/tests/replication/test_federation_sender_shard.py b/tests/replication/test_federation_sender_shard.py
index 08703206a9..a324b4d31d 100644
--- a/tests/replication/test_federation_sender_shard.py
+++ b/tests/replication/test_federation_sender_shard.py
@@ -14,14 +14,18 @@
 import logging
 from unittest.mock import Mock
 
+from netaddr import IPSet
+
 from synapse.api.constants import EventTypes, Membership
 from synapse.events.builder import EventBuilderFactory
 from synapse.handlers.typing import TypingWriterHandler
+from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent
 from synapse.rest.admin import register_servlets_for_client_rest_resource
 from synapse.rest.client import login, room
 from synapse.types import UserID, create_requester
 
 from tests.replication._base import BaseMultiWorkerStreamTestCase
+from tests.server import get_clock
 from tests.test_utils import make_awaitable
 
 logger = logging.getLogger(__name__)
@@ -41,13 +45,25 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         room.register_servlets,
     ]
 
+    def setUp(self) -> None:
+        super().setUp()
+
+        reactor, _ = get_clock()
+        self.matrix_federation_agent = MatrixFederationAgent(
+            reactor,
+            tls_client_options_factory=None,
+            user_agent=b"SynapseInTrialTest/0.0.0",
+            ip_allowlist=None,
+            ip_blocklist=IPSet(),
+        )
+
     def test_send_event_single_sender(self) -> None:
         """Test that using a single federation sender worker correctly sends a
         new event.
         """
         mock_client = Mock(spec=["put_json"])
         mock_client.put_json.return_value = make_awaitable({})
-
+        mock_client.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -78,6 +94,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         """
         mock_client1 = Mock(spec=["put_json"])
         mock_client1.put_json.return_value = make_awaitable({})
+        mock_client1.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -92,6 +109,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
 
         mock_client2 = Mock(spec=["put_json"])
         mock_client2.put_json.return_value = make_awaitable({})
+        mock_client2.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -145,6 +163,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
         """
         mock_client1 = Mock(spec=["put_json"])
         mock_client1.put_json.return_value = make_awaitable({})
+        mock_client1.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
@@ -159,6 +178,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
 
         mock_client2 = Mock(spec=["put_json"])
         mock_client2.put_json.return_value = make_awaitable({})
+        mock_client2.agent = self.matrix_federation_agent
         self.make_worker_hs(
             "synapse.app.generic_worker",
             {
diff --git a/tests/rest/client/test_presence.py b/tests/rest/client/test_presence.py
index dcbb125a3b..e12098102b 100644
--- a/tests/rest/client/test_presence.py
+++ b/tests/rest/client/test_presence.py
@@ -40,7 +40,6 @@ class PresenceTestCase(unittest.HomeserverTestCase):
 
         hs = self.setup_test_homeserver(
             "red",
-            federation_http_client=None,
             federation_client=Mock(),
             presence_handler=self.presence_handler,
         )
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index f1b4e1ad2f..d013e75d55 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -67,8 +67,6 @@ class RoomBase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         self.hs = self.setup_test_homeserver(
             "red",
-            federation_http_client=None,
-            federation_client=Mock(),
         )
 
         self.hs.get_federation_handler = Mock()  # type: ignore[assignment]
diff --git a/tests/storage/test_e2e_room_keys.py b/tests/storage/test_e2e_room_keys.py
index 9cb326d90a..f6df31aba4 100644
--- a/tests/storage/test_e2e_room_keys.py
+++ b/tests/storage/test_e2e_room_keys.py
@@ -31,7 +31,7 @@ room_key: RoomKey = {
 
 class E2eRoomKeysHandlerTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server", federation_http_client=None)
+        hs = self.setup_test_homeserver("server")
         self.store = hs.get_datastores().main
         return hs
 
diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py
index 857e2caf2e..0282673167 100644
--- a/tests/storage/test_purge.py
+++ b/tests/storage/test_purge.py
@@ -27,7 +27,7 @@ class PurgeTests(HomeserverTestCase):
     servlets = [room.register_servlets]
 
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver("server", federation_http_client=None)
+        hs = self.setup_test_homeserver("server")
         return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
diff --git a/tests/storage/test_rollback_worker.py b/tests/storage/test_rollback_worker.py
index 6861d3a6c9..809c9f175d 100644
--- a/tests/storage/test_rollback_worker.py
+++ b/tests/storage/test_rollback_worker.py
@@ -45,9 +45,7 @@ def fake_listdir(filepath: str) -> List[str]:
 
 class WorkerSchemaTests(HomeserverTestCase):
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        hs = self.setup_test_homeserver(
-            federation_http_client=None, homeserver_to_use=GenericWorkerServer
-        )
+        hs = self.setup_test_homeserver(homeserver_to_use=GenericWorkerServer)
         return hs
 
     def default_config(self) -> JsonDict:
diff --git a/tests/test_server.py b/tests/test_server.py
index dc491e06ed..36162cd1f5 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -38,7 +38,7 @@ from tests.http.server._base import test_disconnect
 from tests.server import (
     FakeChannel,
     FakeSite,
-    ThreadedMemoryReactorClock,
+    get_clock,
     make_request,
     setup_test_homeserver,
 )
@@ -46,12 +46,11 @@ from tests.server import (
 
 class JsonResourceTests(unittest.TestCase):
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
-        self.hs_clock = Clock(self.reactor)
+        reactor, clock = get_clock()
+        self.reactor = reactor
         self.homeserver = setup_test_homeserver(
             self.addCleanup,
-            federation_http_client=None,
-            clock=self.hs_clock,
+            clock=clock,
             reactor=self.reactor,
         )
 
@@ -209,7 +208,13 @@ class JsonResourceTests(unittest.TestCase):
 
 class OptionsResourceTests(unittest.TestCase):
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
+        reactor, clock = get_clock()
+        self.reactor = reactor
+        self.homeserver = setup_test_homeserver(
+            self.addCleanup,
+            clock=clock,
+            reactor=self.reactor,
+        )
 
         class DummyResource(Resource):
             isLeaf = True
@@ -242,6 +247,7 @@ class OptionsResourceTests(unittest.TestCase):
             "1.0",
             max_request_body_size=4096,
             reactor=self.reactor,
+            hs=self.homeserver,
         )
 
         # render the request and return the channel
@@ -344,7 +350,8 @@ class WrapHtmlRequestHandlerTests(unittest.TestCase):
             await self.callback(request)
 
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
+        reactor, _ = get_clock()
+        self.reactor = reactor
 
     def test_good_response(self) -> None:
         async def callback(request: SynapseRequest) -> None:
@@ -462,9 +469,9 @@ class DirectServeJsonResourceCancellationTests(unittest.TestCase):
     """Tests for `DirectServeJsonResource` cancellation."""
 
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
-        self.clock = Clock(self.reactor)
-        self.resource = CancellableDirectServeJsonResource(self.clock)
+        reactor, clock = get_clock()
+        self.reactor = reactor
+        self.resource = CancellableDirectServeJsonResource(clock)
         self.site = FakeSite(self.resource, self.reactor)
 
     def test_cancellable_disconnect(self) -> None:
@@ -496,9 +503,9 @@ class DirectServeHtmlResourceCancellationTests(unittest.TestCase):
     """Tests for `DirectServeHtmlResource` cancellation."""
 
     def setUp(self) -> None:
-        self.reactor = ThreadedMemoryReactorClock()
-        self.clock = Clock(self.reactor)
-        self.resource = CancellableDirectServeHtmlResource(self.clock)
+        reactor, clock = get_clock()
+        self.reactor = reactor
+        self.resource = CancellableDirectServeHtmlResource(clock)
         self.site = FakeSite(self.resource, self.reactor)
 
     def test_cancellable_disconnect(self) -> None:
diff --git a/tests/unittest.py b/tests/unittest.py
index c73195b32b..b0721e060c 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -358,6 +358,7 @@ class HomeserverTestCase(TestCase):
             server_version_string="1",
             max_request_body_size=4096,
             reactor=self.reactor,
+            hs=self.hs,
         )
 
         from tests.rest.client.utils import RestHelper
-- 
cgit 1.5.1


From 199c2709479a833e0dc01d19773031c3d5fa63fb Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Tue, 18 Jul 2023 04:36:40 -0500
Subject: Add a locality to a few presence metrics (#15952)

---
 changelog.d/15952.misc       |  1 +
 synapse/handlers/presence.py | 37 ++++++++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/15952.misc

(limited to 'synapse')

diff --git a/changelog.d/15952.misc b/changelog.d/15952.misc
new file mode 100644
index 0000000000..c4160977cb
--- /dev/null
+++ b/changelog.d/15952.misc
@@ -0,0 +1 @@
+Update presence metrics to differentiate remote vs local users.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 0a219b7962..cd7df0525f 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -95,13 +95,12 @@ bump_active_time_counter = Counter("synapse_handler_presence_bump_active_time",
 get_updates_counter = Counter("synapse_handler_presence_get_updates", "", ["type"])
 
 notify_reason_counter = Counter(
-    "synapse_handler_presence_notify_reason", "", ["reason"]
+    "synapse_handler_presence_notify_reason", "", ["locality", "reason"]
 )
 state_transition_counter = Counter(
-    "synapse_handler_presence_state_transition", "", ["from", "to"]
+    "synapse_handler_presence_state_transition", "", ["locality", "from", "to"]
 )
 
-
 # If a user was last active in the last LAST_ACTIVE_GRANULARITY, consider them
 # "currently_active"
 LAST_ACTIVE_GRANULARITY = 60 * 1000
@@ -567,8 +566,8 @@ class WorkerPresenceHandler(BasePresenceHandler):
         for new_state in states:
             old_state = self.user_to_current_state.get(new_state.user_id)
             self.user_to_current_state[new_state.user_id] = new_state
-
-            if not old_state or should_notify(old_state, new_state):
+            is_mine = self.is_mine_id(new_state.user_id)
+            if not old_state or should_notify(old_state, new_state, is_mine):
                 state_to_notify.append(new_state)
 
         stream_id = token
@@ -1499,23 +1498,31 @@ class PresenceHandler(BasePresenceHandler):
             )
 
 
-def should_notify(old_state: UserPresenceState, new_state: UserPresenceState) -> bool:
+def should_notify(
+    old_state: UserPresenceState, new_state: UserPresenceState, is_mine: bool
+) -> bool:
     """Decides if a presence state change should be sent to interested parties."""
+    user_location = "remote"
+    if is_mine:
+        user_location = "local"
+
     if old_state == new_state:
         return False
 
     if old_state.status_msg != new_state.status_msg:
-        notify_reason_counter.labels("status_msg_change").inc()
+        notify_reason_counter.labels(user_location, "status_msg_change").inc()
         return True
 
     if old_state.state != new_state.state:
-        notify_reason_counter.labels("state_change").inc()
-        state_transition_counter.labels(old_state.state, new_state.state).inc()
+        notify_reason_counter.labels(user_location, "state_change").inc()
+        state_transition_counter.labels(
+            user_location, old_state.state, new_state.state
+        ).inc()
         return True
 
     if old_state.state == PresenceState.ONLINE:
         if new_state.currently_active != old_state.currently_active:
-            notify_reason_counter.labels("current_active_change").inc()
+            notify_reason_counter.labels(user_location, "current_active_change").inc()
             return True
 
         if (
@@ -1524,12 +1531,16 @@ def should_notify(old_state: UserPresenceState, new_state: UserPresenceState) ->
         ):
             # Only notify about last active bumps if we're not currently active
             if not new_state.currently_active:
-                notify_reason_counter.labels("last_active_change_online").inc()
+                notify_reason_counter.labels(
+                    user_location, "last_active_change_online"
+                ).inc()
                 return True
 
     elif new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY:
         # Always notify for a transition where last active gets bumped.
-        notify_reason_counter.labels("last_active_change_not_online").inc()
+        notify_reason_counter.labels(
+            user_location, "last_active_change_not_online"
+        ).inc()
         return True
 
     return False
@@ -1989,7 +2000,7 @@ def handle_update(
         )
 
     # Check whether the change was something worth notifying about
-    if should_notify(prev_state, new_state):
+    if should_notify(prev_state, new_state, is_mine):
         new_state = new_state.copy_and_replace(last_federation_update_ts=now)
         persist_and_notify = True
 
-- 
cgit 1.5.1


From e625c3dca0b8147a204fe812af36b784473a6b50 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 18 Jul 2023 03:44:09 -0700
Subject: Revert "Stop writing to column `user_id` of tables `profiles` and
 `user_filters`. (#15953)

* Revert "Stop writing to column `user_id` of tables `profiles` and `user_filters` (#15787)"

This reverts commit f25b0f88081bb436bef914983cff7087b54eba5f.

* newsfragement
---
 changelog.d/15953.misc                             |  1 +
 synapse/storage/database.py                        |  2 -
 synapse/storage/databases/main/__init__.py         |  6 +-
 synapse/storage/databases/main/filtering.py        |  5 +-
 synapse/storage/databases/main/profile.py          | 12 ++-
 synapse/storage/schema/__init__.py                 |  9 +--
 .../79/01_drop_user_id_constraint_profiles.py      | 50 ------------
 .../79/02_drop_user_id_constraint_user_filters.py  | 54 -------------
 tests/storage/test_profile.py                      | 63 +++++++++++++++
 tests/storage/test_user_filters.py                 | 94 ++++++++++++++++++++++
 10 files changed, 174 insertions(+), 122 deletions(-)
 create mode 100644 changelog.d/15953.misc
 delete mode 100644 synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py
 delete mode 100644 synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py
 create mode 100644 tests/storage/test_user_filters.py

(limited to 'synapse')

diff --git a/changelog.d/15953.misc b/changelog.d/15953.misc
new file mode 100644
index 0000000000..a20e78178f
--- /dev/null
+++ b/changelog.d/15953.misc
@@ -0,0 +1 @@
+Revert "Stop writing to column user_id of tables profiles and user_filters`.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index c9d687fb2f..a1c8fb0f46 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -98,8 +98,6 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = {
     "event_push_summary": "event_push_summary_unique_index2",
     "receipts_linearized": "receipts_linearized_unique_index",
     "receipts_graph": "receipts_graph_unique_index",
-    "profiles": "profiles_full_user_id_key_idx",
-    "user_filters": "full_users_filters_unique_idx",
 }
 
 
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index b6028853c9..80c0304b19 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, List, Optional, Tuple, Union, cast
+from typing import TYPE_CHECKING, List, Optional, Tuple, cast
 
 from synapse.api.constants import Direction
 from synapse.config.homeserver import HomeServerConfig
@@ -196,7 +196,7 @@ class DataStore(
             txn: LoggingTransaction,
         ) -> Tuple[List[JsonDict], int]:
             filters = []
-            args: List[Union[str, int]] = []
+            args = [self.hs.config.server.server_name]
 
             # Set ordering
             order_by_column = UserSortOrder(order_by).value
@@ -263,7 +263,7 @@ class DataStore(
 
             sql_base = f"""
                 FROM users as u
-                LEFT JOIN profiles AS p ON u.name = p.full_user_id
+                LEFT JOIN profiles AS p ON u.name = '@' || p.user_id || ':' || ?
                 LEFT JOIN erased_users AS eu ON u.name = eu.user_id
                 {where_clause}
                 """
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index 75f7fe8756..fff417f9e3 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -188,13 +188,14 @@ class FilteringWorkerStore(SQLBaseStore):
                 filter_id = max_id + 1
 
             sql = (
-                "INSERT INTO user_filters (full_user_id, filter_id, filter_json)"
-                "VALUES(?, ?, ?)"
+                "INSERT INTO user_filters (full_user_id, user_id, filter_id, filter_json)"
+                "VALUES(?, ?, ?, ?)"
             )
             txn.execute(
                 sql,
                 (
                     user_id.to_string(),
+                    user_id.localpart,
                     filter_id,
                     bytearray(def_json),
                 ),
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
index 660a5507b7..3ba9cc8853 100644
--- a/synapse/storage/databases/main/profile.py
+++ b/synapse/storage/databases/main/profile.py
@@ -173,9 +173,10 @@ class ProfileWorkerStore(SQLBaseStore):
         )
 
     async def create_profile(self, user_id: UserID) -> None:
+        user_localpart = user_id.localpart
         await self.db_pool.simple_insert(
             table="profiles",
-            values={"full_user_id": user_id.to_string()},
+            values={"user_id": user_localpart, "full_user_id": user_id.to_string()},
             desc="create_profile",
         )
 
@@ -190,11 +191,13 @@ class ProfileWorkerStore(SQLBaseStore):
             new_displayname: The new display name. If this is None, the user's display
                 name is removed.
         """
+        user_localpart = user_id.localpart
         await self.db_pool.simple_upsert(
             table="profiles",
-            keyvalues={"full_user_id": user_id.to_string()},
+            keyvalues={"user_id": user_localpart},
             values={
                 "displayname": new_displayname,
+                "full_user_id": user_id.to_string(),
             },
             desc="set_profile_displayname",
         )
@@ -210,10 +213,11 @@ class ProfileWorkerStore(SQLBaseStore):
             new_avatar_url: The new avatar URL. If this is None, the user's avatar is
                 removed.
         """
+        user_localpart = user_id.localpart
         await self.db_pool.simple_upsert(
             table="profiles",
-            keyvalues={"full_user_id": user_id.to_string()},
-            values={"avatar_url": new_avatar_url},
+            keyvalues={"user_id": user_localpart},
+            values={"avatar_url": new_avatar_url, "full_user_id": user_id.to_string()},
             desc="set_profile_avatar_url",
         )
 
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 6d14963c0a..fc190a8b13 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 79  # remember to update the list below when updating
+SCHEMA_VERSION = 78  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -106,9 +106,6 @@ Changes in SCHEMA_VERSION = 77
 
 Changes in SCHEMA_VERSION = 78
     - Validate check (full_user_id IS NOT NULL) on tables profiles and user_filters
-
-Changes in SCHEMA_VERSION = 79
-    - We no longer write to column user_id of tables profiles and user_filters
 """
 
 
@@ -121,9 +118,7 @@ SCHEMA_COMPAT_VERSION = (
     #
     # insertions to the column `full_user_id` of tables profiles and user_filters can no
     # longer be null
-    #
-    # we no longer write to column `full_user_id` of tables profiles and user_filters
-    78
+    76
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py b/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py
deleted file mode 100644
index 3541266f7d..0000000000
--- a/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from synapse.storage.database import LoggingTransaction
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
-
-
-def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
-    """
-    Update to drop the NOT NULL constraint on column user_id so that we can cease to
-    write to it without inserts to other columns triggering the constraint
-    """
-
-    if isinstance(database_engine, PostgresEngine):
-        drop_sql = """
-        ALTER TABLE profiles ALTER COLUMN user_id DROP NOT NULL
-        """
-        cur.execute(drop_sql)
-    else:
-        # irritatingly in SQLite we need to rewrite the table to drop the constraint.
-        cur.execute("DROP TABLE IF EXISTS temp_profiles")
-
-        create_sql = """
-        CREATE TABLE temp_profiles (
-            full_user_id text NOT NULL,
-            user_id text,
-            displayname text,
-            avatar_url text,
-            UNIQUE (full_user_id),
-            UNIQUE (user_id)
-        )
-        """
-        cur.execute(create_sql)
-
-        copy_sql = """
-        INSERT INTO temp_profiles (
-            user_id,
-            displayname,
-            avatar_url,
-            full_user_id)
-            SELECT user_id, displayname, avatar_url, full_user_id FROM profiles
-        """
-        cur.execute(copy_sql)
-
-        drop_sql = """
-        DROP TABLE profiles
-        """
-        cur.execute(drop_sql)
-
-        rename_sql = """
-        ALTER TABLE temp_profiles RENAME to profiles
-        """
-        cur.execute(rename_sql)
diff --git a/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py b/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py
deleted file mode 100644
index 8e7569c470..0000000000
--- a/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from synapse.storage.database import LoggingTransaction
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
-
-
-def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
-    """
-    Update to drop the NOT NULL constraint on column user_id so that we can cease to
-    write to it without inserts to other columns triggering the constraint
-    """
-    if isinstance(database_engine, PostgresEngine):
-        drop_sql = """
-        ALTER TABLE user_filters ALTER COLUMN user_id DROP NOT NULL
-        """
-        cur.execute(drop_sql)
-
-    else:
-        # irritatingly in SQLite we need to rewrite the table to drop the constraint.
-        cur.execute("DROP TABLE IF EXISTS temp_user_filters")
-
-        create_sql = """
-        CREATE TABLE temp_user_filters (
-            full_user_id text NOT NULL,
-            user_id text,
-            filter_id bigint NOT NULL,
-            filter_json bytea NOT NULL
-        )
-        """
-        cur.execute(create_sql)
-
-        index_sql = """
-            CREATE UNIQUE INDEX IF NOT EXISTS user_filters_full_user_id_unique ON
-            temp_user_filters (full_user_id, filter_id)
-        """
-        cur.execute(index_sql)
-
-        copy_sql = """
-            INSERT INTO temp_user_filters (
-                user_id,
-                filter_id,
-                filter_json,
-                full_user_id)
-            SELECT user_id, filter_id, filter_json, full_user_id FROM user_filters
-        """
-        cur.execute(copy_sql)
-
-        drop_sql = """
-        DROP TABLE user_filters
-        """
-        cur.execute(drop_sql)
-
-        rename_sql = """
-        ALTER TABLE temp_user_filters RENAME to user_filters
-        """
-        cur.execute(rename_sql)
diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py
index bbe8bd88bc..fe5bb77913 100644
--- a/tests/storage/test_profile.py
+++ b/tests/storage/test_profile.py
@@ -15,6 +15,8 @@
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.server import HomeServer
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import PostgresEngine
 from synapse.types import UserID
 from synapse.util import Clock
 
@@ -62,3 +64,64 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         self.assertIsNone(
             self.get_success(self.store.get_profile_avatar_url(self.u_frank))
         )
+
+    def test_profiles_bg_migration(self) -> None:
+        """
+        Test background job that copies entries from column user_id to full_user_id, adding
+        the hostname in the process.
+        """
+        updater = self.hs.get_datastores().main.db_pool.updates
+
+        # drop the constraint so we can insert nulls in full_user_id to populate the test
+        if isinstance(self.store.database_engine, PostgresEngine):
+
+            def f(txn: LoggingTransaction) -> None:
+                txn.execute(
+                    "ALTER TABLE profiles DROP CONSTRAINT full_user_id_not_null"
+                )
+
+            self.get_success(self.store.db_pool.runInteraction("", f))
+
+        for i in range(0, 70):
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    "profiles",
+                    {"user_id": f"hello{i:02}"},
+                )
+            )
+
+        # re-add the constraint so that when it's validated it actually exists
+        if isinstance(self.store.database_engine, PostgresEngine):
+
+            def f(txn: LoggingTransaction) -> None:
+                txn.execute(
+                    "ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID"
+                )
+
+            self.get_success(self.store.db_pool.runInteraction("", f))
+
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                values={
+                    "update_name": "populate_full_user_id_profiles",
+                    "progress_json": "{}",
+                },
+            )
+        )
+
+        self.get_success(
+            updater.run_background_updates(False),
+        )
+
+        expected_values = []
+        for i in range(0, 70):
+            expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
+
+        res = self.get_success(
+            self.store.db_pool.execute(
+                "", None, "SELECT full_user_id from profiles ORDER BY full_user_id"
+            )
+        )
+        self.assertEqual(len(res), len(expected_values))
+        self.assertEqual(res, expected_values)
diff --git a/tests/storage/test_user_filters.py b/tests/storage/test_user_filters.py
new file mode 100644
index 0000000000..bab802f56e
--- /dev/null
+++ b/tests/storage/test_user_filters.py
@@ -0,0 +1,94 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import PostgresEngine
+from synapse.util import Clock
+
+from tests import unittest
+
+
+class UserFiltersStoreTestCase(unittest.HomeserverTestCase):
+    """
+    Test background migration that copies entries from column user_id to full_user_id, adding
+    the hostname in the process.
+    """
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+    def test_bg_migration(self) -> None:
+        updater = self.hs.get_datastores().main.db_pool.updates
+
+        # drop the constraint so we can insert nulls in full_user_id to populate the test
+        if isinstance(self.store.database_engine, PostgresEngine):
+
+            def f(txn: LoggingTransaction) -> None:
+                txn.execute(
+                    "ALTER TABLE user_filters DROP CONSTRAINT full_user_id_not_null"
+                )
+
+            self.get_success(self.store.db_pool.runInteraction("", f))
+
+        for i in range(0, 70):
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    "user_filters",
+                    {
+                        "user_id": f"hello{i:02}",
+                        "filter_id": i,
+                        "filter_json": bytearray(i),
+                    },
+                )
+            )
+
+        # re-add the constraint so that when it's validated it actually exists
+        if isinstance(self.store.database_engine, PostgresEngine):
+
+            def f(txn: LoggingTransaction) -> None:
+                txn.execute(
+                    "ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID"
+                )
+
+            self.get_success(self.store.db_pool.runInteraction("", f))
+
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                values={
+                    "update_name": "populate_full_user_id_user_filters",
+                    "progress_json": "{}",
+                },
+            )
+        )
+
+        self.get_success(
+            updater.run_background_updates(False),
+        )
+
+        expected_values = []
+        for i in range(0, 70):
+            expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
+
+        res = self.get_success(
+            self.store.db_pool.execute(
+                "", None, "SELECT full_user_id from user_filters ORDER BY full_user_id"
+            )
+        )
+        self.assertEqual(len(res), len(expected_values))
+        self.assertEqual(res, expected_values)
-- 
cgit 1.5.1


From 6d81aec09febe86532235141e84c4ea0b3f56049 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 18 Jul 2023 08:44:59 -0400
Subject: Support room version 11 (#15912)

And fix a bug in the implementation of the updated redaction
format (MSC2174) where the top-level redacts field was not
properly added for backwards-compatibility.
---
 changelog.d/15912.feature                |   1 +
 scripts-dev/complement.sh                |   2 +-
 synapse/api/room_versions.py             | 329 ++++++++++---------------------
 synapse/event_auth.py                    |  28 ++-
 synapse/events/__init__.py               |   2 +-
 synapse/events/builder.py                |   2 +-
 synapse/events/utils.py                  |  31 +--
 synapse/federation/federation_base.py    |   2 +-
 synapse/federation/federation_client.py  |   6 +-
 synapse/federation/federation_server.py  |   6 +-
 synapse/handlers/event_auth.py           |   4 +-
 synapse/handlers/federation.py           |   2 +-
 synapse/handlers/room.py                 |   2 +-
 synapse/handlers/room_summary.py         |   4 +-
 synapse/push/bulk_push_rule_evaluator.py |   2 +-
 synapse/rest/client/room.py              |   4 +-
 synapse/storage/databases/main/room.py   |   2 +-
 tests/events/test_utils.py               |  30 ++-
 tests/rest/client/test_redactions.py     |  21 +-
 19 files changed, 190 insertions(+), 290 deletions(-)
 create mode 100644 changelog.d/15912.feature

(limited to 'synapse')

diff --git a/changelog.d/15912.feature b/changelog.d/15912.feature
new file mode 100644
index 0000000000..0faed11eda
--- /dev/null
+++ b/changelog.d/15912.feature
@@ -0,0 +1 @@
+Support room version 11 from [MSC3820](https://github.com/matrix-org/matrix-spec-proposals/pull/3820).
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
index fea76cb5af..8416b55674 100755
--- a/scripts-dev/complement.sh
+++ b/scripts-dev/complement.sh
@@ -214,7 +214,7 @@ fi
 
 extra_test_args=()
 
-test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930,faster_joins"
+test_tags="synapse_blacklist,msc3874,msc3890,msc3391,msc3930,faster_joins"
 
 # All environment variables starting with PASS_ will be shared.
 # (The prefix is stripped off before reaching the container.)
diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py
index 25c105a4c8..e7662d5b99 100644
--- a/synapse/api/room_versions.py
+++ b/synapse/api/room_versions.py
@@ -78,36 +78,29 @@ class RoomVersion:
     # MSC2209: Check 'notifications' key while verifying
     # m.room.power_levels auth rules.
     limit_notifications_power_levels: bool
-    # MSC2175: No longer include the creator in m.room.create events.
-    msc2175_implicit_room_creator: bool
-    # MSC2174/MSC2176: Apply updated redaction rules algorithm, move redacts to
-    # content property.
-    msc2176_redaction_rules: bool
-    # MSC3083: Support the 'restricted' join_rule.
-    msc3083_join_rules: bool
-    # MSC3375: Support for the proper redaction rules for MSC3083. This mustn't
-    #          be enabled if MSC3083 is not.
-    msc3375_redaction_rules: bool
-    # MSC2403: Allows join_rules to be set to 'knock', changes auth rules to allow sending
-    # m.room.membership event with membership 'knock'.
-    msc2403_knocking: bool
+    # No longer include the creator in m.room.create events.
+    implicit_room_creator: bool
+    # Apply updated redaction rules algorithm from room version 11.
+    updated_redaction_rules: bool
+    # Support the 'restricted' join rule.
+    restricted_join_rule: bool
+    # Support for the proper redaction rules for the restricted join rule. This requires
+    # restricted_join_rule to be enabled.
+    restricted_join_rule_fix: bool
+    # Support the 'knock' join rule.
+    knock_join_rule: bool
     # MSC3389: Protect relation information from redaction.
     msc3389_relation_redactions: bool
-    # MSC3787: Adds support for a `knock_restricted` join rule, mixing concepts of
-    # knocks and restricted join rules into the same join condition.
-    msc3787_knock_restricted_join_rule: bool
-    # MSC3667: Enforce integer power levels
-    msc3667_int_only_power_levels: bool
-    # MSC3821: Do not redact the third_party_invite content field for membership events.
-    msc3821_redaction_rules: bool
+    # Support the 'knock_restricted' join rule.
+    knock_restricted_join_rule: bool
+    # Enforce integer power levels
+    enforce_int_power_levels: bool
     # MSC3931: Adds a push rule condition for "room version feature flags", making
     # some push rules room version dependent. Note that adding a flag to this list
     # is not enough to mark it "supported": the push rule evaluator also needs to
     # support the flag. Unknown flags are ignored by the evaluator, making conditions
     # fail if used.
     msc3931_push_features: Tuple[str, ...]  # values from PushRuleRoomFlag
-    # MSC3989: Redact the origin field.
-    msc3989_redaction_rules: bool
 
 
 class RoomVersions:
@@ -120,17 +113,15 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=False,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=False,
+        restricted_join_rule_fix=False,
+        knock_join_rule=False,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V2 = RoomVersion(
         "2",
@@ -141,17 +132,15 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=False,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=False,
+        restricted_join_rule_fix=False,
+        knock_join_rule=False,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V3 = RoomVersion(
         "3",
@@ -162,17 +151,15 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=False,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=False,
+        restricted_join_rule_fix=False,
+        knock_join_rule=False,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V4 = RoomVersion(
         "4",
@@ -183,17 +170,15 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=False,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=False,
+        restricted_join_rule_fix=False,
+        knock_join_rule=False,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V5 = RoomVersion(
         "5",
@@ -204,17 +189,15 @@ class RoomVersions:
         special_case_aliases_auth=True,
         strict_canonicaljson=False,
         limit_notifications_power_levels=False,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=False,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=False,
+        restricted_join_rule_fix=False,
+        knock_join_rule=False,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V6 = RoomVersion(
         "6",
@@ -225,38 +208,15 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=False,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=False,
+        restricted_join_rule_fix=False,
+        knock_join_rule=False,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
-    )
-    MSC2176 = RoomVersion(
-        "org.matrix.msc2176",
-        RoomDisposition.UNSTABLE,
-        EventFormatVersions.ROOM_V4_PLUS,
-        StateResolutionVersions.V2,
-        enforce_key_validity=True,
-        special_case_aliases_auth=False,
-        strict_canonicaljson=True,
-        limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=True,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=False,
-        msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
-        msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V7 = RoomVersion(
         "7",
@@ -267,17 +227,15 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=False,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=True,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=False,
+        restricted_join_rule_fix=False,
+        knock_join_rule=True,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V8 = RoomVersion(
         "8",
@@ -288,17 +246,15 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=True,
-        msc3375_redaction_rules=False,
-        msc2403_knocking=True,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=True,
+        restricted_join_rule_fix=False,
+        knock_join_rule=True,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V9 = RoomVersion(
         "9",
@@ -309,59 +265,15 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=True,
-        msc3375_redaction_rules=True,
-        msc2403_knocking=True,
-        msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
-        msc3931_push_features=(),
-        msc3989_redaction_rules=False,
-    )
-    MSC3787 = RoomVersion(
-        "org.matrix.msc3787",
-        RoomDisposition.UNSTABLE,
-        EventFormatVersions.ROOM_V4_PLUS,
-        StateResolutionVersions.V2,
-        enforce_key_validity=True,
-        special_case_aliases_auth=False,
-        strict_canonicaljson=True,
-        limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=True,
-        msc3375_redaction_rules=True,
-        msc2403_knocking=True,
-        msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=True,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=False,
-        msc3931_push_features=(),
-        msc3989_redaction_rules=False,
-    )
-    MSC3821 = RoomVersion(
-        "org.matrix.msc3821.opt1",
-        RoomDisposition.UNSTABLE,
-        EventFormatVersions.ROOM_V4_PLUS,
-        StateResolutionVersions.V2,
-        enforce_key_validity=True,
-        special_case_aliases_auth=False,
-        strict_canonicaljson=True,
-        limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=True,
-        msc3375_redaction_rules=True,
-        msc2403_knocking=True,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=True,
+        restricted_join_rule_fix=True,
+        knock_join_rule=True,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=False,
-        msc3667_int_only_power_levels=False,
-        msc3821_redaction_rules=True,
+        knock_restricted_join_rule=False,
+        enforce_int_power_levels=False,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     V10 = RoomVersion(
         "10",
@@ -372,17 +284,15 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=True,
-        msc3375_redaction_rules=True,
-        msc2403_knocking=True,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=True,
+        restricted_join_rule_fix=True,
+        knock_join_rule=True,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=True,
-        msc3667_int_only_power_levels=True,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=True,
+        enforce_int_power_levels=True,
         msc3931_push_features=(),
-        msc3989_redaction_rules=False,
     )
     MSC1767v10 = RoomVersion(
         # MSC1767 (Extensible Events) based on room version "10"
@@ -394,60 +304,34 @@ class RoomVersions:
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=True,
-        msc3375_redaction_rules=True,
-        msc2403_knocking=True,
+        implicit_room_creator=False,
+        updated_redaction_rules=False,
+        restricted_join_rule=True,
+        restricted_join_rule_fix=True,
+        knock_join_rule=True,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=True,
-        msc3667_int_only_power_levels=True,
-        msc3821_redaction_rules=False,
+        knock_restricted_join_rule=True,
+        enforce_int_power_levels=True,
         msc3931_push_features=(PushRuleRoomFlag.EXTENSIBLE_EVENTS,),
-        msc3989_redaction_rules=False,
     )
-    MSC3989 = RoomVersion(
-        "org.matrix.msc3989",
-        RoomDisposition.UNSTABLE,
-        EventFormatVersions.ROOM_V4_PLUS,
-        StateResolutionVersions.V2,
-        enforce_key_validity=True,
-        special_case_aliases_auth=False,
-        strict_canonicaljson=True,
-        limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=False,
-        msc2176_redaction_rules=False,
-        msc3083_join_rules=True,
-        msc3375_redaction_rules=True,
-        msc2403_knocking=True,
-        msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=True,
-        msc3667_int_only_power_levels=True,
-        msc3821_redaction_rules=False,
-        msc3931_push_features=(),
-        msc3989_redaction_rules=True,
-    )
-    MSC3820opt2 = RoomVersion(
-        # Based upon v10
-        "org.matrix.msc3820.opt2",
-        RoomDisposition.UNSTABLE,
+    V11 = RoomVersion(
+        "11",
+        RoomDisposition.STABLE,
         EventFormatVersions.ROOM_V4_PLUS,
         StateResolutionVersions.V2,
         enforce_key_validity=True,
         special_case_aliases_auth=False,
         strict_canonicaljson=True,
         limit_notifications_power_levels=True,
-        msc2175_implicit_room_creator=True,  # Used by MSC3820
-        msc2176_redaction_rules=True,  # Used by MSC3820
-        msc3083_join_rules=True,
-        msc3375_redaction_rules=True,
-        msc2403_knocking=True,
+        implicit_room_creator=True,  # Used by MSC3820
+        updated_redaction_rules=True,  # Used by MSC3820
+        restricted_join_rule=True,
+        restricted_join_rule_fix=True,
+        knock_join_rule=True,
         msc3389_relation_redactions=False,
-        msc3787_knock_restricted_join_rule=True,
-        msc3667_int_only_power_levels=True,
-        msc3821_redaction_rules=True,  # Used by MSC3820
+        knock_restricted_join_rule=True,
+        enforce_int_power_levels=True,
         msc3931_push_features=(),
-        msc3989_redaction_rules=True,  # Used by MSC3820
     )
 
 
@@ -460,14 +344,11 @@ KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = {
         RoomVersions.V4,
         RoomVersions.V5,
         RoomVersions.V6,
-        RoomVersions.MSC2176,
         RoomVersions.V7,
         RoomVersions.V8,
         RoomVersions.V9,
-        RoomVersions.MSC3787,
         RoomVersions.V10,
-        RoomVersions.MSC3989,
-        RoomVersions.MSC3820opt2,
+        RoomVersions.V11,
     )
 }
 
@@ -496,12 +377,12 @@ MSC3244_CAPABILITIES = {
         RoomVersionCapability(
             "knock",
             RoomVersions.V7,
-            lambda room_version: room_version.msc2403_knocking,
+            lambda room_version: room_version.knock_join_rule,
         ),
         RoomVersionCapability(
             "restricted",
             RoomVersions.V9,
-            lambda room_version: room_version.msc3083_join_rules,
+            lambda room_version: room_version.restricted_join_rule,
         ),
     )
 }
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 3aaf53dfbd..3a260a492b 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -126,7 +126,7 @@ def validate_event_for_room_version(event: "EventBase") -> None:
             raise AuthError(403, "Event not signed by sending server")
 
     is_invite_via_allow_rule = (
-        event.room_version.msc3083_join_rules
+        event.room_version.restricted_join_rule
         and event.type == EventTypes.Member
         and event.membership == Membership.JOIN
         and EventContentFields.AUTHORISING_USER in event.content
@@ -352,11 +352,9 @@ LENIENT_EVENT_BYTE_LIMITS_ROOM_VERSIONS = {
     RoomVersions.V4,
     RoomVersions.V5,
     RoomVersions.V6,
-    RoomVersions.MSC2176,
     RoomVersions.V7,
     RoomVersions.V8,
     RoomVersions.V9,
-    RoomVersions.MSC3787,
     RoomVersions.V10,
     RoomVersions.MSC1767v10,
 }
@@ -449,7 +447,7 @@ def _check_create(event: "EventBase") -> None:
 
     # 1.4 If content has no creator field, reject if the room version requires it.
     if (
-        not event.room_version.msc2175_implicit_room_creator
+        not event.room_version.implicit_room_creator
         and EventContentFields.ROOM_CREATOR not in event.content
     ):
         raise AuthError(403, "Create event lacks a 'creator' property")
@@ -486,7 +484,7 @@ def _is_membership_change_allowed(
         key = (EventTypes.Create, "")
         create = auth_events.get(key)
         if create and event.prev_event_ids()[0] == create.event_id:
-            if room_version.msc2175_implicit_room_creator:
+            if room_version.implicit_room_creator:
                 creator = create.sender
             else:
                 creator = create.content[EventContentFields.ROOM_CREATOR]
@@ -509,7 +507,7 @@ def _is_membership_change_allowed(
     caller_invited = caller and caller.membership == Membership.INVITE
     caller_knocked = (
         caller
-        and room_version.msc2403_knocking
+        and room_version.knock_join_rule
         and caller.membership == Membership.KNOCK
     )
 
@@ -609,9 +607,9 @@ def _is_membership_change_allowed(
         elif join_rule == JoinRules.PUBLIC:
             pass
         elif (
-            room_version.msc3083_join_rules and join_rule == JoinRules.RESTRICTED
+            room_version.restricted_join_rule and join_rule == JoinRules.RESTRICTED
         ) or (
-            room_version.msc3787_knock_restricted_join_rule
+            room_version.knock_restricted_join_rule
             and join_rule == JoinRules.KNOCK_RESTRICTED
         ):
             # This is the same as public, but the event must contain a reference
@@ -641,9 +639,9 @@ def _is_membership_change_allowed(
 
         elif (
             join_rule == JoinRules.INVITE
-            or (room_version.msc2403_knocking and join_rule == JoinRules.KNOCK)
+            or (room_version.knock_join_rule and join_rule == JoinRules.KNOCK)
             or (
-                room_version.msc3787_knock_restricted_join_rule
+                room_version.knock_restricted_join_rule
                 and join_rule == JoinRules.KNOCK_RESTRICTED
             )
         ):
@@ -677,9 +675,9 @@ def _is_membership_change_allowed(
                 "You don't have permission to ban",
                 errcode=Codes.INSUFFICIENT_POWER,
             )
-    elif room_version.msc2403_knocking and Membership.KNOCK == membership:
+    elif room_version.knock_join_rule and Membership.KNOCK == membership:
         if join_rule != JoinRules.KNOCK and (
-            not room_version.msc3787_knock_restricted_join_rule
+            not room_version.knock_restricted_join_rule
             or join_rule != JoinRules.KNOCK_RESTRICTED
         ):
             raise AuthError(403, "You don't have permission to knock")
@@ -836,7 +834,7 @@ def _check_power_levels(
     # Reject events with stringy power levels if required by room version
     if (
         event.type == EventTypes.PowerLevels
-        and room_version_obj.msc3667_int_only_power_levels
+        and room_version_obj.enforce_int_power_levels
     ):
         for k, v in event.content.items():
             if k in {
@@ -972,7 +970,7 @@ def get_user_power_level(user_id: str, auth_events: StateMap["EventBase"]) -> in
         key = (EventTypes.Create, "")
         create_event = auth_events.get(key)
         if create_event is not None:
-            if create_event.room_version.msc2175_implicit_room_creator:
+            if create_event.room_version.implicit_room_creator:
                 creator = create_event.sender
             else:
                 creator = create_event.content[EventContentFields.ROOM_CREATOR]
@@ -1110,7 +1108,7 @@ def auth_types_for_event(
                 )
                 auth_types.add(key)
 
-        if room_version.msc3083_join_rules and membership == Membership.JOIN:
+        if room_version.restricted_join_rule and membership == Membership.JOIN:
             if EventContentFields.AUTHORISING_USER in event.content:
                 key = (
                     EventTypes.Member,
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 75b62adb33..35257a3b1b 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -346,7 +346,7 @@ class EventBase(metaclass=abc.ABCMeta):
     @property
     def redacts(self) -> Optional[str]:
         """MSC2176 moved the redacts field into the content."""
-        if self.room_version.msc2176_redaction_rules:
+        if self.room_version.updated_redaction_rules:
             return self.content.get("redacts")
         return self.get("redacts")
 
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index a254548c6c..14ea0e6640 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -175,7 +175,7 @@ class EventBuilder:
 
         # MSC2174 moves the redacts property to the content, it is invalid to
         # provide it as a top-level property.
-        if self._redacts is not None and not self.room_version.msc2176_redaction_rules:
+        if self._redacts is not None and not self.room_version.updated_redaction_rules:
             event_dict["redacts"] = self._redacts
 
         if self._origin_server_ts is not None:
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index a55efcca56..ecfc5c0568 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -108,13 +108,9 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
         "origin_server_ts",
     ]
 
-    # Room versions from before MSC2176 had additional allowed keys.
-    if not room_version.msc2176_redaction_rules:
-        allowed_keys.extend(["prev_state", "membership"])
-
-    # Room versions before MSC3989 kept the origin field.
-    if not room_version.msc3989_redaction_rules:
-        allowed_keys.append("origin")
+    # Earlier room versions from had additional allowed keys.
+    if not room_version.updated_redaction_rules:
+        allowed_keys.extend(["prev_state", "membership", "origin"])
 
     event_type = event_dict["type"]
 
@@ -127,9 +123,9 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
 
     if event_type == EventTypes.Member:
         add_fields("membership")
-        if room_version.msc3375_redaction_rules:
+        if room_version.restricted_join_rule_fix:
             add_fields(EventContentFields.AUTHORISING_USER)
-        if room_version.msc3821_redaction_rules:
+        if room_version.updated_redaction_rules:
             # Preserve the signed field under third_party_invite.
             third_party_invite = event_dict["content"].get("third_party_invite")
             if isinstance(third_party_invite, collections.abc.Mapping):
@@ -141,13 +137,13 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
 
     elif event_type == EventTypes.Create:
         # MSC2176 rules state that create events cannot be redacted.
-        if room_version.msc2176_redaction_rules:
+        if room_version.updated_redaction_rules:
             return event_dict
 
         add_fields("creator")
     elif event_type == EventTypes.JoinRules:
         add_fields("join_rule")
-        if room_version.msc3083_join_rules:
+        if room_version.restricted_join_rule:
             add_fields("allow")
     elif event_type == EventTypes.PowerLevels:
         add_fields(
@@ -161,14 +157,14 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
             "redact",
         )
 
-        if room_version.msc2176_redaction_rules:
+        if room_version.updated_redaction_rules:
             add_fields("invite")
 
     elif event_type == EventTypes.Aliases and room_version.special_case_aliases_auth:
         add_fields("aliases")
     elif event_type == EventTypes.RoomHistoryVisibility:
         add_fields("history_visibility")
-    elif event_type == EventTypes.Redaction and room_version.msc2176_redaction_rules:
+    elif event_type == EventTypes.Redaction and room_version.updated_redaction_rules:
         add_fields("redacts")
 
     # Protect the rel_type and event_id fields under the m.relates_to field.
@@ -477,6 +473,15 @@ def serialize_event(
     if config.as_client_event:
         d = config.event_format(d)
 
+    # If the event is a redaction, copy the redacts field from the content to
+    # top-level for backwards compatibility.
+    if (
+        e.type == EventTypes.Redaction
+        and e.room_version.updated_redaction_rules
+        and e.redacts is not None
+    ):
+        d["redacts"] = e.redacts
+
     only_event_fields = config.only_event_fields
     if only_event_fields:
         if not isinstance(only_event_fields, list) or not all(
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index b77022b406..31e0260b83 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -231,7 +231,7 @@ async def _check_sigs_on_pdu(
     # If this is a join event for a restricted room it may have been authorised
     # via a different server from the sending server. Check those signatures.
     if (
-        room_version.msc3083_join_rules
+        room_version.restricted_join_rule
         and pdu.type == EventTypes.Member
         and pdu.membership == Membership.JOIN
         and EventContentFields.AUTHORISING_USER in pdu.content
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index e5359ca558..89bd597409 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -983,7 +983,7 @@ class FederationClient(FederationBase):
             if not room_version:
                 raise UnsupportedRoomVersionError()
 
-            if not room_version.msc2403_knocking and membership == Membership.KNOCK:
+            if not room_version.knock_join_rule and membership == Membership.KNOCK:
                 raise SynapseError(
                     400,
                     "This room version does not support knocking",
@@ -1069,7 +1069,7 @@ class FederationClient(FederationBase):
             # * Ensure the signatures are good.
             #
             # Otherwise, fallback to the provided event.
-            if room_version.msc3083_join_rules and response.event:
+            if room_version.restricted_join_rule and response.event:
                 event = response.event
 
                 valid_pdu = await self._check_sigs_and_hash_and_fetch_one(
@@ -1195,7 +1195,7 @@ class FederationClient(FederationBase):
 
         # MSC3083 defines additional error codes for room joins.
         failover_errcodes = None
-        if room_version.msc3083_join_rules:
+        if room_version.restricted_join_rule:
             failover_errcodes = (
                 Codes.UNABLE_AUTHORISE_JOIN,
                 Codes.UNABLE_TO_GRANT_JOIN,
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 61fa3b30af..fa61dd8c10 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -806,7 +806,7 @@ class FederationServer(FederationBase):
             raise IncompatibleRoomVersionError(room_version=room_version.identifier)
 
         # Check that this room supports knocking as defined by its room version
-        if not room_version.msc2403_knocking:
+        if not room_version.knock_join_rule:
             raise SynapseError(
                 403,
                 "This room version does not support knocking",
@@ -909,7 +909,7 @@ class FederationServer(FederationBase):
                 errcode=Codes.NOT_FOUND,
             )
 
-        if membership_type == Membership.KNOCK and not room_version.msc2403_knocking:
+        if membership_type == Membership.KNOCK and not room_version.knock_join_rule:
             raise SynapseError(
                 403,
                 "This room version does not support knocking",
@@ -933,7 +933,7 @@ class FederationServer(FederationBase):
         # the event is valid to be sent into the room. Currently this is only done
         # if the user is being joined via restricted join rules.
         if (
-            room_version.msc3083_join_rules
+            room_version.restricted_join_rule
             and event.membership == Membership.JOIN
             and EventContentFields.AUTHORISING_USER in event.content
         ):
diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py
index 3e37c0cbe2..82a7617a08 100644
--- a/synapse/handlers/event_auth.py
+++ b/synapse/handlers/event_auth.py
@@ -277,7 +277,7 @@ class EventAuthHandler:
             True if the proper room version and join rules are set for restricted access.
         """
         # This only applies to room versions which support the new join rule.
-        if not room_version.msc3083_join_rules:
+        if not room_version.restricted_join_rule:
             return False
 
         # If there's no join rule, then it defaults to invite (so this doesn't apply).
@@ -292,7 +292,7 @@ class EventAuthHandler:
             return True
 
         # also check for MSC3787 behaviour
-        if room_version.msc3787_knock_restricted_join_rule:
+        if room_version.knock_restricted_join_rule:
             return content_join_rule == JoinRules.KNOCK_RESTRICTED
 
         return False
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index cc5ed97730..15b9fbe44a 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -957,7 +957,7 @@ class FederationHandler:
         # Note that this requires the /send_join request to come back to the
         # same server.
         prev_event_ids = None
-        if room_version.msc3083_join_rules:
+        if room_version.restricted_join_rule:
             # Note that the room's state can change out from under us and render our
             # nice join rules-conformant event non-conformant by the time we build the
             # event. When this happens, our validation at the end fails and we respond
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index bf907b7881..0513e28aab 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1116,7 +1116,7 @@ class RoomCreationHandler:
         preset_config, config = self._room_preset_config(room_config)
 
         # MSC2175 removes the creator field from the create event.
-        if not room_version.msc2175_implicit_room_creator:
+        if not room_version.implicit_room_creator:
             creation_content["creator"] = creator_id
         creation_event, unpersisted_creation_context = await create_event(
             EventTypes.Create, creation_content, False
diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py
index 807245160d..dad3e23470 100644
--- a/synapse/handlers/room_summary.py
+++ b/synapse/handlers/room_summary.py
@@ -564,9 +564,9 @@ class RoomSummaryHandler:
             join_rule = join_rules_event.content.get("join_rule")
             if (
                 join_rule == JoinRules.PUBLIC
-                or (room_version.msc2403_knocking and join_rule == JoinRules.KNOCK)
+                or (room_version.knock_join_rule and join_rule == JoinRules.KNOCK)
                 or (
-                    room_version.msc3787_knock_restricted_join_rule
+                    room_version.knock_restricted_join_rule
                     and join_rule == JoinRules.KNOCK_RESTRICTED
                 )
             ):
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 67377c647b..990c079c81 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -375,7 +375,7 @@ class BulkPushRuleEvaluator:
         # _get_power_levels_and_sender_level in its call to get_user_power_level
         # (even for room V10.)
         notification_levels = power_levels.get("notifications", {})
-        if not event.room_version.msc3667_int_only_power_levels:
+        if not event.room_version.enforce_int_power_levels:
             keys = list(notification_levels.keys())
             for key in keys:
                 level = notification_levels.get(key, SENTINEL)
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 951bd033f5..dc498001e4 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -1117,7 +1117,7 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
         # Ensure the redacts property in the content matches the one provided in
         # the URL.
         room_version = await self._store.get_room_version(room_id)
-        if room_version.msc2176_redaction_rules:
+        if room_version.updated_redaction_rules:
             if "redacts" in content and content["redacts"] != event_id:
                 raise SynapseError(
                     400,
@@ -1151,7 +1151,7 @@ class RoomRedactEventRestServlet(TransactionRestServlet):
                     "sender": requester.user.to_string(),
                 }
                 # Earlier room versions had a top-level redacts property.
-                if not room_version.msc2176_redaction_rules:
+                if not room_version.updated_redaction_rules:
                     event_dict["redacts"] = event_id
 
                 (
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index ca8be8c80d..830658f328 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -2136,7 +2136,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore):
             raise StoreError(400, "No create event in state")
 
         # Before MSC2175, the room creator was a separate field.
-        if not room_version.msc2175_implicit_room_creator:
+        if not room_version.implicit_room_creator:
             room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR)
 
             if not isinstance(room_creator, str):
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index c9a610db9a..6a52af4d82 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -140,18 +140,16 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             },
         )
 
-        # As of MSC2176 we now redact the membership and prev_states keys.
+        # As of room versions we now redact the membership, prev_states, and origin keys.
         self.run_test(
-            {"type": "A", "prev_state": "prev_state", "membership": "join"},
-            {"type": "A", "content": {}, "signatures": {}, "unsigned": {}},
-            room_version=RoomVersions.MSC2176,
-        )
-
-        # As of MSC3989 we now redact the origin key.
-        self.run_test(
-            {"type": "A", "origin": "example.com"},
+            {
+                "type": "A",
+                "prev_state": "prev_state",
+                "membership": "join",
+                "origin": "example.com",
+            },
             {"type": "A", "content": {}, "signatures": {}, "unsigned": {}},
-            room_version=RoomVersions.MSC3989,
+            room_version=RoomVersions.V11,
         )
 
     def test_unsigned(self) -> None:
@@ -236,7 +234,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
                 "signatures": {},
                 "unsigned": {},
             },
-            room_version=RoomVersions.MSC2176,
+            room_version=RoomVersions.V11,
         )
 
     def test_power_levels(self) -> None:
@@ -286,7 +284,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
                 "signatures": {},
                 "unsigned": {},
             },
-            room_version=RoomVersions.MSC2176,
+            room_version=RoomVersions.V11,
         )
 
     def test_alias_event(self) -> None:
@@ -349,7 +347,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
                 "signatures": {},
                 "unsigned": {},
             },
-            room_version=RoomVersions.MSC2176,
+            room_version=RoomVersions.V11,
         )
 
     def test_join_rules(self) -> None:
@@ -472,7 +470,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
                 "signatures": {},
                 "unsigned": {},
             },
-            room_version=RoomVersions.MSC3821,
+            room_version=RoomVersions.V11,
         )
 
         # Ensure this doesn't break if an invalid field is sent.
@@ -491,7 +489,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
                 "signatures": {},
                 "unsigned": {},
             },
-            room_version=RoomVersions.MSC3821,
+            room_version=RoomVersions.V11,
         )
 
         self.run_test(
@@ -509,7 +507,7 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
                 "signatures": {},
                 "unsigned": {},
             },
-            room_version=RoomVersions.MSC3821,
+            room_version=RoomVersions.V11,
         )
 
     def test_relations(self) -> None:
diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py
index b43e95292c..6028886bd6 100644
--- a/tests/rest/client/test_redactions.py
+++ b/tests/rest/client/test_redactions.py
@@ -20,6 +20,8 @@ from synapse.api.room_versions import RoomVersions
 from synapse.rest import admin
 from synapse.rest.client import login, room, sync
 from synapse.server import HomeServer
+from synapse.storage._base import db_to_json
+from synapse.storage.database import LoggingTransaction
 from synapse.types import JsonDict
 from synapse.util import Clock
 
@@ -573,7 +575,7 @@ class RedactionsTestCase(HomeserverTestCase):
         room_id = self.helper.create_room_as(
             self.mod_user_id,
             tok=self.mod_access_token,
-            room_version=RoomVersions.MSC2176.identifier,
+            room_version=RoomVersions.V11.identifier,
         )
 
         # Create an event.
@@ -597,5 +599,20 @@ class RedactionsTestCase(HomeserverTestCase):
         redact_event = timeline[-1]
         self.assertEqual(redact_event["type"], EventTypes.Redaction)
         # The redacts key should be in the content.
-        self.assertNotIn("redacts", redact_event)
         self.assertEquals(redact_event["content"]["redacts"], event_id)
+
+        # It should also be copied as the top-level redacts field for backwards
+        # compatibility.
+        self.assertEquals(redact_event["redacts"], event_id)
+
+        # But it isn't actually part of the event.
+        def get_event(txn: LoggingTransaction) -> JsonDict:
+            return db_to_json(
+                main_datastore._fetch_event_rows(txn, [event_id])[event_id].json
+            )
+
+        main_datastore = self.hs.get_datastores().main
+        event_json = self.get_success(
+            main_datastore.db_pool.runInteraction("get_event", get_event)
+        )
+        self.assertNotIn("redacts", event_json)
-- 
cgit 1.5.1


From cb6e2c6cc7f45b3d4f5516b49741d133e7b2b1c3 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 18 Jul 2023 16:59:27 -0700
Subject: Fix background schema updates failing over a large upgrade gap
 (#15887)

---
 changelog.d/15887.misc                             |  1 +
 .../78/05_mitigate_stream_ordering_update_race.py  | 70 ++++++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 changelog.d/15887.misc
 create mode 100644 synapse/storage/schema/main/delta/78/05_mitigate_stream_ordering_update_race.py

(limited to 'synapse')

diff --git a/changelog.d/15887.misc b/changelog.d/15887.misc
new file mode 100644
index 0000000000..7c1005078e
--- /dev/null
+++ b/changelog.d/15887.misc
@@ -0,0 +1 @@
+Fix background schema updates failing over a large upgrade gap.
diff --git a/synapse/storage/schema/main/delta/78/05_mitigate_stream_ordering_update_race.py b/synapse/storage/schema/main/delta/78/05_mitigate_stream_ordering_update_race.py
new file mode 100644
index 0000000000..1a22f6a404
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/05_mitigate_stream_ordering_update_race.py
@@ -0,0 +1,70 @@
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+
+
+def run_create(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+) -> None:
+    """
+    An attempt to mitigate a painful race between foreground and background updates
+    touching the `stream_ordering` column of the events table. More info can be found
+    at https://github.com/matrix-org/synapse/issues/15677.
+    """
+
+    # technically the bg update we're concerned with below should only have been added in
+    # postgres but it doesn't hurt to be extra careful
+    if isinstance(database_engine, PostgresEngine):
+        select_sql = """
+            SELECT 1 FROM background_updates
+                WHERE update_name = 'replace_stream_ordering_column'
+        """
+        cur.execute(select_sql)
+        res = cur.fetchone()
+
+        # if the background update `replace_stream_ordering_column` is still pending, we need
+        # to drop the indexes added in 7403, and re-add them to the column `stream_ordering2`
+        # with the idea that they will be preserved when the column is renamed `stream_ordering`
+        # after the background update has finished
+        if res:
+            drop_cse_sql = """
+            ALTER TABLE current_state_events DROP CONSTRAINT event_stream_ordering_fkey
+            """
+            cur.execute(drop_cse_sql)
+
+            drop_lcm_sql = """
+            ALTER TABLE local_current_membership DROP CONSTRAINT event_stream_ordering_fkey
+            """
+            cur.execute(drop_lcm_sql)
+
+            drop_rm_sql = """
+            ALTER TABLE room_memberships DROP CONSTRAINT event_stream_ordering_fkey
+            """
+            cur.execute(drop_rm_sql)
+
+            add_cse_sql = """
+            ALTER TABLE current_state_events ADD CONSTRAINT event_stream_ordering_fkey
+            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
+            """
+            cur.execute(add_cse_sql)
+
+            add_lcm_sql = """
+            ALTER TABLE local_current_membership ADD CONSTRAINT event_stream_ordering_fkey
+            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
+            """
+            cur.execute(add_lcm_sql)
+
+            add_rm_sql = """
+            ALTER TABLE room_memberships ADD CONSTRAINT event_stream_ordering_fkey
+            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
+            """
+            cur.execute(add_rm_sql)
-- 
cgit 1.5.1


From 40a3583ba14cc32f63154afc9e2c9b1058697f16 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 19 Jul 2023 12:06:38 +0100
Subject: Fix race in triggers for read/write locks. (#15933)

---
 changelog.d/15933.misc                             |  1 +
 .../78/04_read_write_locks_triggers.sql.postgres   | 51 ----------------
 .../78/04_read_write_locks_triggers.sql.sqlite     | 47 ---------------
 .../79/03_read_write_locks_triggers.sql.postgres   | 69 ++++++++++++++++++++++
 .../79/03_read_write_locks_triggers.sql.sqlite     | 65 ++++++++++++++++++++
 5 files changed, 135 insertions(+), 98 deletions(-)
 create mode 100644 changelog.d/15933.misc
 create mode 100644 synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/15933.misc b/changelog.d/15933.misc
new file mode 100644
index 0000000000..8457994c68
--- /dev/null
+++ b/changelog.d/15933.misc
@@ -0,0 +1 @@
+Fix bug with read/write lock implementation. This is currently unused so has no observable effects.
diff --git a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
index e1a41be9c9..e1cc3469a4 100644
--- a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
+++ b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
@@ -99,54 +99,3 @@ CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lo
 -- constraints.
 ALTER TABLE worker_read_write_locks_mode ADD CONSTRAINT worker_read_write_locks_mode_foreign
     FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED;
-
-
--- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
--- and acquire a lock, i.e. insert into `worker_read_write_locks`,
-CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
-BEGIN
-    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
-        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
-        ON CONFLICT (lock_name, lock_key)
-        DO NOTHING;
-    RETURN NEW;
-END
-$$
-LANGUAGE plpgsql;
-
-CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
-    FOR EACH ROW
-    EXECUTE PROCEDURE upsert_read_write_lock_parent();
-
-
--- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
--- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
--- update the `worker_read_write_locks_mode.token` to match another instance
--- that has currently acquired the lock, or we delete the row if nobody has
--- currently acquired a lock.
-CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
-DECLARE
-    new_token TEXT;
-BEGIN
-    SELECT token INTO new_token FROM worker_read_write_locks
-        WHERE
-            lock_name = OLD.lock_name
-            AND lock_key = OLD.lock_key;
-
-    IF NOT FOUND THEN
-        DELETE FROM worker_read_write_locks_mode
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
-    ELSE
-        UPDATE worker_read_write_locks_mode
-            SET token = new_token
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
-    END IF;
-
-    RETURN NEW;
-END
-$$
-LANGUAGE plpgsql;
-
-CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
-    FOR EACH ROW
-    EXECUTE PROCEDURE delete_read_write_lock_parent();
diff --git a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
index be2dfbbb8a..b15432f576 100644
--- a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
+++ b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
@@ -70,50 +70,3 @@ CREATE TABLE worker_read_write_locks (
 CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
 -- Ensures that only one instance can acquire a lock in write mode at a time.
 CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
-
-
--- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
--- and acquire a lock, i.e. insert into `worker_read_write_locks`,
-CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger
-BEFORE INSERT ON worker_read_write_locks
-FOR EACH ROW
-BEGIN
-    -- First ensure that `worker_read_write_locks_mode` doesn't have stale
-    -- entries in it, as on SQLite we don't have the foreign key constraint to
-    -- enforce this.
-    DELETE FROM worker_read_write_locks_mode
-        WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
-        AND NOT EXISTS (
-            SELECT 1 FROM worker_read_write_locks
-            WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
-        );
-
-    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
-        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
-        ON CONFLICT (lock_name, lock_key)
-        DO NOTHING;
-END;
-
--- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
--- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
--- update the `worker_read_write_locks_mode.token` to match another instance
--- that has currently acquired the lock, or we delete the row if nobody has
--- currently acquired a lock.
-CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger
-AFTER DELETE ON worker_read_write_locks
-FOR EACH ROW
-BEGIN
-    DELETE FROM worker_read_write_locks_mode
-        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-        AND NOT EXISTS (
-            SELECT 1 FROM worker_read_write_locks
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-        );
-
-    UPDATE worker_read_write_locks_mode
-        SET token = (
-            SELECT token FROM worker_read_write_locks
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-        )
-        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
-END;
diff --git a/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
new file mode 100644
index 0000000000..ea3496ef2d
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
@@ -0,0 +1,69 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
+BEGIN
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE upsert_read_write_lock_parent();
+
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
+DECLARE
+    new_token TEXT;
+BEGIN
+    SELECT token INTO new_token FROM worker_read_write_locks
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key
+        LIMIT 1 FOR UPDATE;
+
+    IF NOT FOUND THEN
+        DELETE FROM worker_read_write_locks_mode
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token;
+    ELSE
+        UPDATE worker_read_write_locks_mode
+            SET token = new_token
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+    END IF;
+
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE delete_read_write_lock_parent();
diff --git a/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite
new file mode 100644
index 0000000000..acb1a77c80
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite
@@ -0,0 +1,65 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger;
+CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger
+BEFORE INSERT ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+    -- First ensure that `worker_read_write_locks_mode` doesn't have stale
+    -- entries in it, as on SQLite we don't have the foreign key constraint to
+    -- enforce this.
+    DELETE FROM worker_read_write_locks_mode
+        WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+        AND NOT EXISTS (
+            SELECT 1 FROM worker_read_write_locks
+            WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+        );
+
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
+END;
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger;
+CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger
+AFTER DELETE ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+    DELETE FROM worker_read_write_locks_mode
+        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+            AND token = OLD.token
+        AND NOT EXISTS (
+            SELECT 1 FROM worker_read_write_locks
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        );
+
+    UPDATE worker_read_write_locks_mode
+        SET token = (
+            SELECT token FROM worker_read_write_locks
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        )
+        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+END;
-- 
cgit 1.5.1


From 19796e20aab31272176e24ec23be9a18cc6680a5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 19 Jul 2023 13:17:08 +0100
Subject: Fix bad merge of #15933 (#15958)

This was because we reverted the bump of the schema version, so we were not applying the new deltas.
---
 changelog.d/15958.misc                             |  1 +
 .../78/06_read_write_locks_triggers.sql.postgres   | 69 ++++++++++++++++++++++
 .../78/06_read_write_locks_triggers.sql.sqlite     | 65 ++++++++++++++++++++
 .../79/03_read_write_locks_triggers.sql.postgres   | 69 ----------------------
 .../79/03_read_write_locks_triggers.sql.sqlite     | 65 --------------------
 5 files changed, 135 insertions(+), 134 deletions(-)
 create mode 100644 changelog.d/15958.misc
 create mode 100644 synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.sqlite
 delete mode 100644 synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
 delete mode 100644 synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/15958.misc b/changelog.d/15958.misc
new file mode 100644
index 0000000000..8457994c68
--- /dev/null
+++ b/changelog.d/15958.misc
@@ -0,0 +1 @@
+Fix bug with read/write lock implementation. This is currently unused so has no observable effects.
diff --git a/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.postgres
new file mode 100644
index 0000000000..ea3496ef2d
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.postgres
@@ -0,0 +1,69 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
+BEGIN
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE upsert_read_write_lock_parent();
+
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
+DECLARE
+    new_token TEXT;
+BEGIN
+    SELECT token INTO new_token FROM worker_read_write_locks
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key
+        LIMIT 1 FOR UPDATE;
+
+    IF NOT FOUND THEN
+        DELETE FROM worker_read_write_locks_mode
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token;
+    ELSE
+        UPDATE worker_read_write_locks_mode
+            SET token = new_token
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+    END IF;
+
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE delete_read_write_lock_parent();
diff --git a/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.sqlite
new file mode 100644
index 0000000000..acb1a77c80
--- /dev/null
+++ b/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.sqlite
@@ -0,0 +1,65 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger;
+CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger
+BEFORE INSERT ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+    -- First ensure that `worker_read_write_locks_mode` doesn't have stale
+    -- entries in it, as on SQLite we don't have the foreign key constraint to
+    -- enforce this.
+    DELETE FROM worker_read_write_locks_mode
+        WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+        AND NOT EXISTS (
+            SELECT 1 FROM worker_read_write_locks
+            WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+        );
+
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
+END;
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger;
+CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger
+AFTER DELETE ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+    DELETE FROM worker_read_write_locks_mode
+        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+            AND token = OLD.token
+        AND NOT EXISTS (
+            SELECT 1 FROM worker_read_write_locks
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        );
+
+    UPDATE worker_read_write_locks_mode
+        SET token = (
+            SELECT token FROM worker_read_write_locks
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        )
+        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+END;
diff --git a/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
deleted file mode 100644
index ea3496ef2d..0000000000
--- a/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
-
--- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
--- and acquire a lock, i.e. insert into `worker_read_write_locks`,
-CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
-BEGIN
-    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
-        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
-        ON CONFLICT (lock_name, lock_key)
-        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
-    RETURN NEW;
-END
-$$
-LANGUAGE plpgsql;
-
-DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger ON worker_read_write_locks;
-CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
-    FOR EACH ROW
-    EXECUTE PROCEDURE upsert_read_write_lock_parent();
-
-
--- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
--- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
--- update the `worker_read_write_locks_mode.token` to match another instance
--- that has currently acquired the lock, or we delete the row if nobody has
--- currently acquired a lock.
-CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
-DECLARE
-    new_token TEXT;
-BEGIN
-    SELECT token INTO new_token FROM worker_read_write_locks
-        WHERE
-            lock_name = OLD.lock_name
-            AND lock_key = OLD.lock_key
-        LIMIT 1 FOR UPDATE;
-
-    IF NOT FOUND THEN
-        DELETE FROM worker_read_write_locks_mode
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token;
-    ELSE
-        UPDATE worker_read_write_locks_mode
-            SET token = new_token
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
-    END IF;
-
-    RETURN NEW;
-END
-$$
-LANGUAGE plpgsql;
-
-DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks;
-CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
-    FOR EACH ROW
-    EXECUTE PROCEDURE delete_read_write_lock_parent();
diff --git a/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite
deleted file mode 100644
index acb1a77c80..0000000000
--- a/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
-
--- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
--- and acquire a lock, i.e. insert into `worker_read_write_locks`,
-DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger;
-CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger
-BEFORE INSERT ON worker_read_write_locks
-FOR EACH ROW
-BEGIN
-    -- First ensure that `worker_read_write_locks_mode` doesn't have stale
-    -- entries in it, as on SQLite we don't have the foreign key constraint to
-    -- enforce this.
-    DELETE FROM worker_read_write_locks_mode
-        WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
-        AND NOT EXISTS (
-            SELECT 1 FROM worker_read_write_locks
-            WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
-        );
-
-    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
-        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
-        ON CONFLICT (lock_name, lock_key)
-        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
-END;
-
--- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
--- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
--- update the `worker_read_write_locks_mode.token` to match another instance
--- that has currently acquired the lock, or we delete the row if nobody has
--- currently acquired a lock.
-DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger;
-CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger
-AFTER DELETE ON worker_read_write_locks
-FOR EACH ROW
-BEGIN
-    DELETE FROM worker_read_write_locks_mode
-        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-            AND token = OLD.token
-        AND NOT EXISTS (
-            SELECT 1 FROM worker_read_write_locks
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-        );
-
-    UPDATE worker_read_write_locks_mode
-        SET token = (
-            SELECT token FROM worker_read_write_locks
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-        )
-        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
-END;
-- 
cgit 1.5.1


From 67f9e5293ea6650b2ec284c0b7503f3f3eade94b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 19 Jul 2023 18:00:33 +0100
Subject: Ensure a long state res does not starve CPU (#15960)

We do this by yielding the reactor in hot loops.
---
 changelog.d/15960.misc | 1 +
 synapse/state/v2.py    | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15960.misc

(limited to 'synapse')

diff --git a/changelog.d/15960.misc b/changelog.d/15960.misc
new file mode 100644
index 0000000000..7cac24a3c5
--- /dev/null
+++ b/changelog.d/15960.misc
@@ -0,0 +1 @@
+Ensure a long state res does not starve CPU by occasionally yielding to the reactor.
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 1b9d7d8457..44c49274a9 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -667,7 +667,7 @@ async def _mainline_sort(
     order_map = {}
     for idx, ev_id in enumerate(event_ids, start=1):
         depth = await _get_mainline_depth_for_event(
-            event_map[ev_id], mainline_map, event_map, state_res_store
+            clock, event_map[ev_id], mainline_map, event_map, state_res_store
         )
         order_map[ev_id] = (depth, event_map[ev_id].origin_server_ts, ev_id)
 
@@ -682,6 +682,7 @@ async def _mainline_sort(
 
 
 async def _get_mainline_depth_for_event(
+    clock: Clock,
     event: EventBase,
     mainline_map: Dict[str, int],
     event_map: Dict[str, EventBase],
@@ -704,6 +705,7 @@ async def _get_mainline_depth_for_event(
 
     # We do an iterative search, replacing `event with the power level in its
     # auth events (if any)
+    idx = 0
     while tmp_event:
         depth = mainline_map.get(tmp_event.event_id)
         if depth is not None:
@@ -720,6 +722,11 @@ async def _get_mainline_depth_for_event(
                 tmp_event = aev
                 break
 
+        idx += 1
+
+        if idx % _AWAIT_AFTER_ITERATIONS == 0:
+            await clock.sleep(0)
+
     # Didn't find a power level auth event, so we just return 0
     return 0
 
-- 
cgit 1.5.1


From ad52db3b5cbf8b78b10a82ce45313c606b244fee Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 20 Jul 2023 10:46:37 +0100
Subject: Reduce the amount of state we pull out (#15968)

---
 changelog.d/15968.misc          |  1 +
 synapse/handlers/federation.py  |  6 ++----
 synapse/handlers/message.py     |  4 ++--
 synapse/handlers/room_member.py | 15 +++++++++------
 4 files changed, 14 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15968.misc

(limited to 'synapse')

diff --git a/changelog.d/15968.misc b/changelog.d/15968.misc
new file mode 100644
index 0000000000..af7132cc72
--- /dev/null
+++ b/changelog.d/15968.misc
@@ -0,0 +1 @@
+Reduce the amount of state we pull out.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 15b9fbe44a..2b93b8c621 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1581,9 +1581,7 @@ class FederationHandler:
             event.content["third_party_invite"]["signed"]["token"],
         )
         original_invite = None
-        prev_state_ids = await context.get_prev_state_ids(
-            StateFilter.from_types([(EventTypes.ThirdPartyInvite, None)])
-        )
+        prev_state_ids = await context.get_prev_state_ids(StateFilter.from_types([key]))
         original_invite_id = prev_state_ids.get(key)
         if original_invite_id:
             original_invite = await self.store.get_event(
@@ -1636,7 +1634,7 @@ class FederationHandler:
         token = signed["token"]
 
         prev_state_ids = await context.get_prev_state_ids(
-            StateFilter.from_types([(EventTypes.ThirdPartyInvite, None)])
+            StateFilter.from_types([(EventTypes.ThirdPartyInvite, token)])
         )
         invite_event_id = prev_state_ids.get((EventTypes.ThirdPartyInvite, token))
 
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 4292b47037..9910716bc6 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -738,7 +738,7 @@ class EventCreationHandler:
                 prev_event_id = state_map.get((EventTypes.Member, event.sender))
             else:
                 prev_state_ids = await unpersisted_context.get_prev_state_ids(
-                    StateFilter.from_types([(EventTypes.Member, None)])
+                    StateFilter.from_types([(EventTypes.Member, event.sender)])
                 )
                 prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender))
             prev_event = (
@@ -860,7 +860,7 @@ class EventCreationHandler:
             return None
 
         prev_state_ids = await context.get_prev_state_ids(
-            StateFilter.from_types([(event.type, None)])
+            StateFilter.from_types([(event.type, event.state_key)])
         )
         prev_event_id = prev_state_ids.get((event.type, event.state_key))
         if not prev_event_id:
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 82e4fa7363..496e701f13 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -473,7 +473,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 )
                 context = await unpersisted_context.persist(event)
                 prev_state_ids = await context.get_prev_state_ids(
-                    StateFilter.from_types([(EventTypes.Member, None)])
+                    StateFilter.from_types([(EventTypes.Member, user_id)])
                 )
 
                 prev_member_event_id = prev_state_ids.get(
@@ -1340,7 +1340,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             requester = types.create_requester(target_user)
 
         prev_state_ids = await context.get_prev_state_ids(
-            StateFilter.from_types([(EventTypes.GuestAccess, None)])
+            StateFilter.from_types([(EventTypes.GuestAccess, "")])
         )
         if event.membership == Membership.JOIN:
             if requester.is_guest:
@@ -1362,11 +1362,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
             ratelimit=ratelimit,
         )
 
-        prev_member_event_id = prev_state_ids.get(
-            (EventTypes.Member, event.state_key), None
-        )
-
         if event.membership == Membership.LEAVE:
+            prev_state_ids = await context.get_prev_state_ids(
+                StateFilter.from_types([(EventTypes.Member, event.state_key)])
+            )
+            prev_member_event_id = prev_state_ids.get(
+                (EventTypes.Member, event.state_key), None
+            )
+
             if prev_member_event_id:
                 prev_member_event = await self.store.get_event(prev_member_event_id)
                 if prev_member_event.membership == Membership.JOIN:
-- 
cgit 1.5.1


From fd44053b84e6519b7425f295e71e1084111bec46 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 20 Jul 2023 11:07:58 +0100
Subject: Don't log exceptions for every non-200 response (#15969)

Introduced in #15913
---
 changelog.d/15969.feature | 1 +
 synapse/http/server.py    | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)
 create mode 100644 changelog.d/15969.feature

(limited to 'synapse')

diff --git a/changelog.d/15969.feature b/changelog.d/15969.feature
new file mode 100644
index 0000000000..0d77fae2dc
--- /dev/null
+++ b/changelog.d/15969.feature
@@ -0,0 +1 @@
+Allow configuring the set of workers to proxy outbound federation traffic through via `outbound_federation_restricted_to`.
diff --git a/synapse/http/server.py b/synapse/http/server.py
index f592600880..5109cec983 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -327,10 +327,6 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta):
             # of our stack, and thus gives us a sensible stack
             # trace.
             f = failure.Failure()
-            logger.exception(
-                "Error handling request",
-                exc_info=(f.type, f.value, f.getTracebackObject()),
-            )
             self._send_error_response(f, request)
 
     async def _async_render(
-- 
cgit 1.5.1


From fc1e534e411174d730ca3c0c7e4d2ef7fd8be56b Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 20 Jul 2023 15:51:28 +0100
Subject: Speed up updating state in large rooms (#15971)

This should speed up updating state in rooms with lots of state.
---
 changelog.d/15971.misc                       |   1 +
 synapse/handlers/message.py                  |   9 +-
 synapse/state/__init__.py                    |   3 +-
 synapse/storage/controllers/state.py         | 137 ++++++++++++++++++++++++++-
 synapse/storage/databases/main/roommember.py | 122 ------------------------
 5 files changed, 141 insertions(+), 131 deletions(-)
 create mode 100644 changelog.d/15971.misc

(limited to 'synapse')

diff --git a/changelog.d/15971.misc b/changelog.d/15971.misc
new file mode 100644
index 0000000000..4afd8922fc
--- /dev/null
+++ b/changelog.d/15971.misc
@@ -0,0 +1 @@
+Speed up updating state in large rooms.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 9910716bc6..fff0b5fa12 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1565,12 +1565,11 @@ class EventCreationHandler:
                 if state_entry.state_group in self._external_cache_joined_hosts_updates:
                     return
 
-                state = await state_entry.get_state(
-                    self._storage_controllers.state, StateFilter.all()
-                )
                 with opentracing.start_active_span("get_joined_hosts"):
-                    joined_hosts = await self.store.get_joined_hosts(
-                        event.room_id, state, state_entry
+                    joined_hosts = (
+                        await self._storage_controllers.state.get_joined_hosts(
+                            event.room_id, state_entry
+                        )
                     )
 
                 # Note that the expiry times must be larger than the expiry time in
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 9bc0c3b7b9..1b91cf5eaa 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -268,8 +268,7 @@ class StateHandler:
             The hosts in the room at the given events
         """
         entry = await self.resolve_state_groups_for_events(room_id, event_ids)
-        state = await entry.get_state(self._state_storage_controller, StateFilter.all())
-        return await self.store.get_joined_hosts(room_id, state, entry)
+        return await self._state_storage_controller.get_joined_hosts(room_id, entry)
 
     @trace
     @tag_args
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 233df7cce2..278c7832ba 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from itertools import chain
 from typing import (
     TYPE_CHECKING,
     AbstractSet,
@@ -19,14 +20,16 @@ from typing import (
     Callable,
     Collection,
     Dict,
+    FrozenSet,
     Iterable,
     List,
     Mapping,
     Optional,
     Tuple,
+    Union,
 )
 
-from synapse.api.constants import EventTypes
+from synapse.api.constants import EventTypes, Membership
 from synapse.events import EventBase
 from synapse.logging.opentracing import tag_args, trace
 from synapse.storage.roommember import ProfileInfo
@@ -34,14 +37,20 @@ from synapse.storage.util.partial_state_events_tracker import (
     PartialCurrentStateTracker,
     PartialStateEventsTracker,
 )
-from synapse.types import MutableStateMap, StateMap
+from synapse.types import MutableStateMap, StateMap, get_domain_from_id
 from synapse.types.state import StateFilter
+from synapse.util.async_helpers import Linearizer
+from synapse.util.caches import intern_string
+from synapse.util.caches.descriptors import cached
 from synapse.util.cancellation import cancellable
+from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
+    from synapse.state import _StateCacheEntry
     from synapse.storage.databases import Databases
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -52,10 +61,15 @@ class StateStorageController:
 
     def __init__(self, hs: "HomeServer", stores: "Databases"):
         self._is_mine_id = hs.is_mine_id
+        self._clock = hs.get_clock()
         self.stores = stores
         self._partial_state_events_tracker = PartialStateEventsTracker(stores.main)
         self._partial_state_room_tracker = PartialCurrentStateTracker(stores.main)
 
+        # Used by `_get_joined_hosts` to ensure only one thing mutates the cache
+        # at a time. Keyed by room_id.
+        self._joined_host_linearizer = Linearizer("_JoinedHostsCache")
+
     def notify_event_un_partial_stated(self, event_id: str) -> None:
         self._partial_state_events_tracker.notify_un_partial_stated(event_id)
 
@@ -627,3 +641,122 @@ class StateStorageController:
         await self._partial_state_room_tracker.await_full_state(room_id)
 
         return await self.stores.main.get_users_in_room_with_profiles(room_id)
+
+    async def get_joined_hosts(
+        self, room_id: str, state_entry: "_StateCacheEntry"
+    ) -> FrozenSet[str]:
+        state_group: Union[object, int] = state_entry.state_group
+        if not state_group:
+            # If state_group is None it means it has yet to be assigned a
+            # state group, i.e. we need to make sure that calls with a state_group
+            # of None don't hit previous cached calls with a None state_group.
+            # To do this we set the state_group to a new object as object() != object()
+            state_group = object()
+
+        assert state_group is not None
+        with Measure(self._clock, "get_joined_hosts"):
+            return await self._get_joined_hosts(
+                room_id, state_group, state_entry=state_entry
+            )
+
+    @cached(num_args=2, max_entries=10000, iterable=True)
+    async def _get_joined_hosts(
+        self,
+        room_id: str,
+        state_group: Union[object, int],
+        state_entry: "_StateCacheEntry",
+    ) -> FrozenSet[str]:
+        # We don't use `state_group`, it's there so that we can cache based on
+        # it. However, its important that its never None, since two
+        # current_state's with a state_group of None are likely to be different.
+        #
+        # The `state_group` must match the `state_entry.state_group` (if not None).
+        assert state_group is not None
+        assert state_entry.state_group is None or state_entry.state_group == state_group
+
+        # We use a secondary cache of previous work to allow us to build up the
+        # joined hosts for the given state group based on previous state groups.
+        #
+        # We cache one object per room containing the results of the last state
+        # group we got joined hosts for. The idea is that generally
+        # `get_joined_hosts` is called with the "current" state group for the
+        # room, and so consecutive calls will be for consecutive state groups
+        # which point to the previous state group.
+        cache = await self.stores.main._get_joined_hosts_cache(room_id)
+
+        # If the state group in the cache matches, we already have the data we need.
+        if state_entry.state_group == cache.state_group:
+            return frozenset(cache.hosts_to_joined_users)
+
+        # Since we'll mutate the cache we need to lock.
+        async with self._joined_host_linearizer.queue(room_id):
+            if state_entry.state_group == cache.state_group:
+                # Same state group, so nothing to do. We've already checked for
+                # this above, but the cache may have changed while waiting on
+                # the lock.
+                pass
+            elif state_entry.prev_group == cache.state_group:
+                # The cached work is for the previous state group, so we work out
+                # the delta.
+                assert state_entry.delta_ids is not None
+                for (typ, state_key), event_id in state_entry.delta_ids.items():
+                    if typ != EventTypes.Member:
+                        continue
+
+                    host = intern_string(get_domain_from_id(state_key))
+                    user_id = state_key
+                    known_joins = cache.hosts_to_joined_users.setdefault(host, set())
+
+                    event = await self.stores.main.get_event(event_id)
+                    if event.membership == Membership.JOIN:
+                        known_joins.add(user_id)
+                    else:
+                        known_joins.discard(user_id)
+
+                        if not known_joins:
+                            cache.hosts_to_joined_users.pop(host, None)
+            else:
+                # The cache doesn't match the state group or prev state group,
+                # so we calculate the result from first principles.
+                #
+                # We need to fetch all hosts joined to the room according to `state` by
+                # inspecting all join memberships in `state`. However, if the `state` is
+                # relatively recent then many of its events are likely to be held in
+                # the current state of the room, which is easily available and likely
+                # cached.
+                #
+                # We therefore compute the set of `state` events not in the
+                # current state and only fetch those.
+                current_memberships = (
+                    await self.stores.main._get_approximate_current_memberships_in_room(
+                        room_id
+                    )
+                )
+                unknown_state_events = {}
+                joined_users_in_current_state = []
+
+                state = await state_entry.get_state(
+                    self, StateFilter.from_types([(EventTypes.Member, None)])
+                )
+
+                for (type, state_key), event_id in state.items():
+                    if event_id not in current_memberships:
+                        unknown_state_events[type, state_key] = event_id
+                    elif current_memberships[event_id] == Membership.JOIN:
+                        joined_users_in_current_state.append(state_key)
+
+                joined_user_ids = await self.stores.main.get_joined_user_ids_from_state(
+                    room_id, unknown_state_events
+                )
+
+                cache.hosts_to_joined_users = {}
+                for user_id in chain(joined_user_ids, joined_users_in_current_state):
+                    host = intern_string(get_domain_from_id(user_id))
+                    cache.hosts_to_joined_users.setdefault(host, set()).add(user_id)
+
+            if state_entry.state_group:
+                cache.state_group = state_entry.state_group
+            else:
+                cache.state_group = object()
+
+        return frozenset(cache.hosts_to_joined_users)
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 582875c91a..fff259f74c 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from itertools import chain
 from typing import (
     TYPE_CHECKING,
     AbstractSet,
@@ -57,15 +56,12 @@ from synapse.types import (
     StrCollection,
     get_domain_from_id,
 )
-from synapse.util.async_helpers import Linearizer
-from synapse.util.caches import intern_string
 from synapse.util.caches.descriptors import _CacheContext, cached, cachedList
 from synapse.util.iterutils import batch_iter
 from synapse.util.metrics import Measure
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
-    from synapse.state import _StateCacheEntry
 
 logger = logging.getLogger(__name__)
 
@@ -91,10 +87,6 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
     ):
         super().__init__(database, db_conn, hs)
 
-        # Used by `_get_joined_hosts` to ensure only one thing mutates the cache
-        # at a time. Keyed by room_id.
-        self._joined_host_linearizer = Linearizer("_JoinedHostsCache")
-
         self._server_notices_mxid = hs.config.servernotices.server_notices_mxid
 
         if (
@@ -1057,120 +1049,6 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
             "get_current_hosts_in_room_ordered", get_current_hosts_in_room_ordered_txn
         )
 
-    async def get_joined_hosts(
-        self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry"
-    ) -> FrozenSet[str]:
-        state_group: Union[object, int] = state_entry.state_group
-        if not state_group:
-            # If state_group is None it means it has yet to be assigned a
-            # state group, i.e. we need to make sure that calls with a state_group
-            # of None don't hit previous cached calls with a None state_group.
-            # To do this we set the state_group to a new object as object() != object()
-            state_group = object()
-
-        assert state_group is not None
-        with Measure(self._clock, "get_joined_hosts"):
-            return await self._get_joined_hosts(
-                room_id, state_group, state, state_entry=state_entry
-            )
-
-    @cached(num_args=2, max_entries=10000, iterable=True)
-    async def _get_joined_hosts(
-        self,
-        room_id: str,
-        state_group: Union[object, int],
-        state: StateMap[str],
-        state_entry: "_StateCacheEntry",
-    ) -> FrozenSet[str]:
-        # We don't use `state_group`, it's there so that we can cache based on
-        # it. However, its important that its never None, since two
-        # current_state's with a state_group of None are likely to be different.
-        #
-        # The `state_group` must match the `state_entry.state_group` (if not None).
-        assert state_group is not None
-        assert state_entry.state_group is None or state_entry.state_group == state_group
-
-        # We use a secondary cache of previous work to allow us to build up the
-        # joined hosts for the given state group based on previous state groups.
-        #
-        # We cache one object per room containing the results of the last state
-        # group we got joined hosts for. The idea is that generally
-        # `get_joined_hosts` is called with the "current" state group for the
-        # room, and so consecutive calls will be for consecutive state groups
-        # which point to the previous state group.
-        cache = await self._get_joined_hosts_cache(room_id)
-
-        # If the state group in the cache matches, we already have the data we need.
-        if state_entry.state_group == cache.state_group:
-            return frozenset(cache.hosts_to_joined_users)
-
-        # Since we'll mutate the cache we need to lock.
-        async with self._joined_host_linearizer.queue(room_id):
-            if state_entry.state_group == cache.state_group:
-                # Same state group, so nothing to do. We've already checked for
-                # this above, but the cache may have changed while waiting on
-                # the lock.
-                pass
-            elif state_entry.prev_group == cache.state_group:
-                # The cached work is for the previous state group, so we work out
-                # the delta.
-                assert state_entry.delta_ids is not None
-                for (typ, state_key), event_id in state_entry.delta_ids.items():
-                    if typ != EventTypes.Member:
-                        continue
-
-                    host = intern_string(get_domain_from_id(state_key))
-                    user_id = state_key
-                    known_joins = cache.hosts_to_joined_users.setdefault(host, set())
-
-                    event = await self.get_event(event_id)
-                    if event.membership == Membership.JOIN:
-                        known_joins.add(user_id)
-                    else:
-                        known_joins.discard(user_id)
-
-                        if not known_joins:
-                            cache.hosts_to_joined_users.pop(host, None)
-            else:
-                # The cache doesn't match the state group or prev state group,
-                # so we calculate the result from first principles.
-                #
-                # We need to fetch all hosts joined to the room according to `state` by
-                # inspecting all join memberships in `state`. However, if the `state` is
-                # relatively recent then many of its events are likely to be held in
-                # the current state of the room, which is easily available and likely
-                # cached.
-                #
-                # We therefore compute the set of `state` events not in the
-                # current state and only fetch those.
-                current_memberships = (
-                    await self._get_approximate_current_memberships_in_room(room_id)
-                )
-                unknown_state_events = {}
-                joined_users_in_current_state = []
-
-                for (type, state_key), event_id in state.items():
-                    if event_id not in current_memberships:
-                        unknown_state_events[type, state_key] = event_id
-                    elif current_memberships[event_id] == Membership.JOIN:
-                        joined_users_in_current_state.append(state_key)
-
-                joined_user_ids = await self.get_joined_user_ids_from_state(
-                    room_id, unknown_state_events
-                )
-
-                cache.hosts_to_joined_users = {}
-                for user_id in chain(joined_user_ids, joined_users_in_current_state):
-                    host = intern_string(get_domain_from_id(user_id))
-                    cache.hosts_to_joined_users.setdefault(host, set()).add(user_id)
-
-            if state_entry.state_group:
-                cache.state_group = state_entry.state_group
-            else:
-                cache.state_group = object()
-
-        return frozenset(cache.hosts_to_joined_users)
-
     async def _get_approximate_current_memberships_in_room(
         self, room_id: str
     ) -> Mapping[str, Optional[str]]:
-- 
cgit 1.5.1


From f08d05dd2ce8ab38240cfa691b07a27cff0356e9 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Sun, 23 Jul 2023 16:30:54 -0700
Subject: Actually stop reading from column `user_id` of tables `profiles`
 (#15955)

---
 changelog.d/15955.misc                           |  1 +
 synapse/storage/databases/main/__init__.py       |  4 ++--
 synapse/storage/databases/main/stats.py          |  4 ++--
 synapse/storage/databases/main/user_directory.py | 13 ++++++-------
 tests/rest/admin/test_user.py                    |  2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15955.misc

(limited to 'synapse')

diff --git a/changelog.d/15955.misc b/changelog.d/15955.misc
new file mode 100644
index 0000000000..dc4f687e0a
--- /dev/null
+++ b/changelog.d/15955.misc
@@ -0,0 +1 @@
+Stop reading from column `user_id` of table `profiles`.
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 80c0304b19..be67d1ff22 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -196,7 +196,7 @@ class DataStore(
             txn: LoggingTransaction,
         ) -> Tuple[List[JsonDict], int]:
             filters = []
-            args = [self.hs.config.server.server_name]
+            args: list = []
 
             # Set ordering
             order_by_column = UserSortOrder(order_by).value
@@ -263,7 +263,7 @@ class DataStore(
 
             sql_base = f"""
                 FROM users as u
-                LEFT JOIN profiles AS p ON u.name = '@' || p.user_id || ':' || ?
+                LEFT JOIN profiles AS p ON u.name = p.full_user_id
                 LEFT JOIN erased_users AS eu ON u.name = eu.user_id
                 {where_clause}
                 """
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 97c4dc2603..f34b7ce8f4 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -697,7 +697,7 @@ class StatsStore(StateDeltasStore):
             txn: LoggingTransaction,
         ) -> Tuple[List[JsonDict], int]:
             filters = []
-            args = [self.hs.config.server.server_name]
+            args: list = []
 
             if search_term:
                 filters.append("(lmr.user_id LIKE ? OR displayname LIKE ?)")
@@ -733,7 +733,7 @@ class StatsStore(StateDeltasStore):
 
             sql_base = """
                 FROM local_media_repository as lmr
-                LEFT JOIN profiles AS p ON lmr.user_id = '@' || p.user_id || ':' || ?
+                LEFT JOIN profiles AS p ON lmr.user_id = p.full_user_id
                 {}
                 GROUP BY lmr.user_id, displayname
             """.format(
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 924022c95c..2a136f2ff6 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -409,23 +409,22 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
                 txn, users_to_work_on
             )
 
-            # Next fetch their profiles. Note that the `user_id` here is the
-            # *localpart*, and that not all users have profiles.
+            # Next fetch their profiles. Note that not all users have profiles.
             profile_rows = self.db_pool.simple_select_many_txn(
                 txn,
                 table="profiles",
-                column="user_id",
-                iterable=[get_localpart_from_id(u) for u in users_to_insert],
+                column="full_user_id",
+                iterable=list(users_to_insert),
                 retcols=(
-                    "user_id",
+                    "full_user_id",
                     "displayname",
                     "avatar_url",
                 ),
                 keyvalues={},
             )
             profiles = {
-                f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
-                    f"@{row['user_id']}:{self.server_name}",
+                row["full_user_id"]: _UserDirProfile(
+                    row["full_user_id"],
                     row["displayname"],
                     row["avatar_url"],
                 )
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 6f7b4bf642..9af9db6e3e 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -1418,7 +1418,7 @@ class DeactivateAccountTestCase(unittest.HomeserverTestCase):
         # To test deactivation for users without a profile, we delete the profile information for our user.
         self.get_success(
             self.store.db_pool.simple_delete_one(
-                table="profiles", keyvalues={"user_id": "user"}
+                table="profiles", keyvalues={"full_user_id": "@user:test"}
             )
         )
 
-- 
cgit 1.5.1


From 5c7364fea57e24ae3ce2ac833a3521abd58312db Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Sun, 23 Jul 2023 16:32:01 -0700
Subject: Properly handle redactions of creation events (#15973)

---
 changelog.d/15973.bugfix   | 1 +
 synapse/events/utils.py    | 8 +++++---
 tests/events/test_utils.py | 9 +++++++--
 3 files changed, 13 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/15973.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15973.bugfix b/changelog.d/15973.bugfix
new file mode 100644
index 0000000000..c9280d0037
--- /dev/null
+++ b/changelog.d/15973.bugfix
@@ -0,0 +1 @@
+Properly handle redactions of creation events.
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index ecfc5c0568..c890833b1d 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -136,11 +136,13 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
                     ]
 
     elif event_type == EventTypes.Create:
-        # MSC2176 rules state that create events cannot be redacted.
         if room_version.updated_redaction_rules:
-            return event_dict
+            # MSC2176 rules state that create events cannot have their `content` redacted.
+            new_content = event_dict["content"]
+        elif not room_version.implicit_room_creator:
+            # Some room versions give meaning to `creator`
+            add_fields("creator")
 
-        add_fields("creator")
     elif event_type == EventTypes.JoinRules:
         add_fields("join_rule")
         if room_version.restricted_join_rule:
diff --git a/tests/events/test_utils.py b/tests/events/test_utils.py
index 6a52af4d82..978612e432 100644
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@@ -225,9 +225,14 @@ class PruneEventTestCase(stdlib_unittest.TestCase):
             },
         )
 
-        # After MSC2176, create events get nothing redacted.
+        # After MSC2176, create events should preserve field `content`
         self.run_test(
-            {"type": "m.room.create", "content": {"not_a_real_key": True}},
+            {
+                "type": "m.room.create",
+                "content": {"not_a_real_key": True},
+                "origin": "some_homeserver",
+                "nonsense_field": "some_random_garbage",
+            },
             {
                 "type": "m.room.create",
                 "content": {"not_a_real_key": True},
-- 
cgit 1.5.1


From 654902a7583d20d7e0b57dc4634fbe573ff99993 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 24 Jul 2023 13:43:43 +0100
Subject: Resync stale devices in background (#15975)

This is so we don't block responding to federation transaction while we
try and fetch the device lists.
---
 changelog.d/15975.bugfix   | 1 +
 synapse/handlers/device.py | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15975.bugfix

(limited to 'synapse')

diff --git a/changelog.d/15975.bugfix b/changelog.d/15975.bugfix
new file mode 100644
index 0000000000..59738cca0a
--- /dev/null
+++ b/changelog.d/15975.bugfix
@@ -0,0 +1 @@
+Fix bug where resyncing stale device lists could block responding to federation transactions, and thus delay receiving new data from the remote server.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 5d12a39e26..d73d9dca08 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -1124,7 +1124,14 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
                 )
 
             if resync:
-                await self.multi_user_device_resync([user_id])
+                # We mark as stale up front in case we get restarted.
+                await self.store.mark_remote_users_device_caches_as_stale([user_id])
+                run_as_background_process(
+                    "_maybe_retry_device_resync",
+                    self.multi_user_device_resync,
+                    [user_id],
+                    False,
+                )
             else:
                 # Simply update the single device, since we know that is the only
                 # change (because of the single prev_id matching the current cache)
-- 
cgit 1.5.1


From 641ff9ef7eaa7f1a632b983f4d36bb28dc23484d Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 24 Jul 2023 08:23:19 -0700
Subject: Support MSC3814: Dehydrated Devices (#15929)

Signed-off-by: Nicolas Werner <n.werner@famedly.com>
Co-authored-by: Nicolas Werner <n.werner@famedly.com>
Co-authored-by: Nicolas Werner <89468146+nico-famedly@users.noreply.github.com>
Co-authored-by: Hubert Chathi <hubert@uhoreg.ca>
---
 changelog.d/15929.feature         |   1 +
 synapse/config/experimental.py    |  21 ++++
 synapse/handlers/device.py        |   4 +-
 synapse/handlers/devicemessage.py | 108 +++++++++++++++++-
 synapse/rest/client/devices.py    | 232 +++++++++++++++++++++++++++++++++++++-
 tests/handlers/test_device.py     |  99 +++++++++++++++-
 tests/rest/client/test_devices.py | 150 +++++++++++++++++++++++-
 7 files changed, 603 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/15929.feature

(limited to 'synapse')

diff --git a/changelog.d/15929.feature b/changelog.d/15929.feature
new file mode 100644
index 0000000000..c3aaeae66e
--- /dev/null
+++ b/changelog.d/15929.feature
@@ -0,0 +1 @@
+Implement [MSC3814](https://github.com/matrix-org/matrix-spec-proposals/pull/3814), dehydrated devices v2/shrivelled sessions and move [MSC2697](https://github.com/matrix-org/matrix-spec-proposals/pull/2697) behind a config flag. Contributed by Nico from Famedly and H-Shay.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 0970f22a75..1695ed8ca3 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -247,6 +247,27 @@ class ExperimentalConfig(Config):
         # MSC3026 (busy presence state)
         self.msc3026_enabled: bool = experimental.get("msc3026_enabled", False)
 
+        # MSC2697 (device dehydration)
+        # Enabled by default since this option was added after adding the feature.
+        # It is not recommended that both MSC2697 and MSC3814 both be enabled at
+        # once.
+        self.msc2697_enabled: bool = experimental.get("msc2697_enabled", True)
+
+        # MSC3814 (dehydrated devices with SSSS)
+        # This is an alternative method to achieve the same goals as MSC2697.
+        # It is not recommended that both MSC2697 and MSC3814 both be enabled at
+        # once.
+        self.msc3814_enabled: bool = experimental.get("msc3814_enabled", False)
+
+        if self.msc2697_enabled and self.msc3814_enabled:
+            raise ConfigError(
+                "MSC2697 and MSC3814 should not both be enabled.",
+                (
+                    "experimental_features",
+                    "msc3814_enabled",
+                ),
+            )
+
         # MSC3244 (room version capabilities)
         self.msc3244_enabled: bool = experimental.get("msc3244_enabled", True)
 
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index d73d9dca08..f3a713f5fa 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -653,6 +653,7 @@ class DeviceHandler(DeviceWorkerHandler):
     async def store_dehydrated_device(
         self,
         user_id: str,
+        device_id: Optional[str],
         device_data: JsonDict,
         initial_device_display_name: Optional[str] = None,
     ) -> str:
@@ -661,6 +662,7 @@ class DeviceHandler(DeviceWorkerHandler):
 
         Args:
             user_id: the user that we are storing the device for
+            device_id: device id supplied by client
             device_data: the dehydrated device information
             initial_device_display_name: The display name to use for the device
         Returns:
@@ -668,7 +670,7 @@ class DeviceHandler(DeviceWorkerHandler):
         """
         device_id = await self.check_device_registered(
             user_id,
-            None,
+            device_id,
             initial_device_display_name,
         )
         old_device_id = await self.store.store_dehydrated_device(
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 3caf9b31cc..15e94a03cb 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -13,10 +13,11 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Any, Dict
+from http import HTTPStatus
+from typing import TYPE_CHECKING, Any, Dict, Optional
 
 from synapse.api.constants import EduTypes, EventContentFields, ToDeviceEventTypes
-from synapse.api.errors import SynapseError
+from synapse.api.errors import Codes, SynapseError
 from synapse.api.ratelimiting import Ratelimiter
 from synapse.logging.context import run_in_background
 from synapse.logging.opentracing import (
@@ -48,6 +49,9 @@ class DeviceMessageHandler:
         self.store = hs.get_datastores().main
         self.notifier = hs.get_notifier()
         self.is_mine = hs.is_mine
+        if hs.config.experimental.msc3814_enabled:
+            self.event_sources = hs.get_event_sources()
+            self.device_handler = hs.get_device_handler()
 
         # We only need to poke the federation sender explicitly if its on the
         # same instance. Other federation sender instances will get notified by
@@ -303,3 +307,103 @@ class DeviceMessageHandler:
                 # Enqueue a new federation transaction to send the new
                 # device messages to each remote destination.
                 self.federation_sender.send_device_messages(destination)
+
+    async def get_events_for_dehydrated_device(
+        self,
+        requester: Requester,
+        device_id: str,
+        since_token: Optional[str],
+        limit: int,
+    ) -> JsonDict:
+        """Fetches up to `limit` events sent to `device_id` starting from `since_token`
+        and returns the new since token. If there are no more messages, returns an empty
+        array.
+
+        Args:
+            requester: the user requesting the messages
+            device_id: ID of the dehydrated device
+            since_token: stream id to start from when fetching messages
+            limit: the number of messages to fetch
+        Returns:
+            A dict containing the to-device messages, as well as a token that the client
+            can provide in the next call to fetch the next batch of messages
+        """
+
+        user_id = requester.user.to_string()
+
+        # only allow fetching messages for the dehydrated device id currently associated
+        # with the user
+        dehydrated_device = await self.device_handler.get_dehydrated_device(user_id)
+        if dehydrated_device is None:
+            raise SynapseError(
+                HTTPStatus.FORBIDDEN,
+                "No dehydrated device exists",
+                Codes.FORBIDDEN,
+            )
+
+        dehydrated_device_id, _ = dehydrated_device
+        if device_id != dehydrated_device_id:
+            raise SynapseError(
+                HTTPStatus.FORBIDDEN,
+                "You may only fetch messages for your dehydrated device",
+                Codes.FORBIDDEN,
+            )
+
+        since_stream_id = 0
+        if since_token:
+            if not since_token.startswith("d"):
+                raise SynapseError(
+                    HTTPStatus.BAD_REQUEST,
+                    "from parameter %r has an invalid format" % (since_token,),
+                    errcode=Codes.INVALID_PARAM,
+                )
+
+            try:
+                since_stream_id = int(since_token[1:])
+            except Exception:
+                raise SynapseError(
+                    HTTPStatus.BAD_REQUEST,
+                    "from parameter %r has an invalid format" % (since_token,),
+                    errcode=Codes.INVALID_PARAM,
+                )
+
+            # if we have a since token, delete any to-device messages before that token
+            # (since we now know that the device has received them)
+            deleted = await self.store.delete_messages_for_device(
+                user_id, device_id, since_stream_id
+            )
+            logger.debug(
+                "Deleted %d to-device messages up to %d for user_id %s device_id %s",
+                deleted,
+                since_stream_id,
+                user_id,
+                device_id,
+            )
+
+        to_token = self.event_sources.get_current_token().to_device_key
+
+        messages, stream_id = await self.store.get_messages_for_device(
+            user_id, device_id, since_stream_id, to_token, limit
+        )
+
+        for message in messages:
+            # Remove the message id before sending to client
+            message_id = message.pop("message_id", None)
+            if message_id:
+                set_tag(SynapseTags.TO_DEVICE_EDU_ID, message_id)
+
+        logger.debug(
+            "Returning %d to-device messages between %d and %d (current token: %d) for "
+            "dehydrated device %s, user_id %s",
+            len(messages),
+            since_stream_id,
+            stream_id,
+            to_token,
+            device_id,
+            user_id,
+        )
+
+        return {
+            "events": messages,
+            "next_batch": f"d{stream_id}",
+        }
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 38dff9703f..690d2ec406 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -14,19 +14,22 @@
 # limitations under the License.
 
 import logging
+from http import HTTPStatus
 from typing import TYPE_CHECKING, List, Optional, Tuple
 
 from pydantic import Extra, StrictStr
 
 from synapse.api import errors
-from synapse.api.errors import NotFoundError, UnrecognizedRequestError
+from synapse.api.errors import NotFoundError, SynapseError, UnrecognizedRequestError
 from synapse.handlers.device import DeviceHandler
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     RestServlet,
     parse_and_validate_json_object_from_request,
+    parse_integer,
 )
 from synapse.http.site import SynapseRequest
+from synapse.replication.http.devices import ReplicationUploadKeysForUserRestServlet
 from synapse.rest.client._base import client_patterns, interactive_auth_handler
 from synapse.rest.client.models import AuthenticationData
 from synapse.rest.models import RequestBodyModel
@@ -229,6 +232,8 @@ class DehydratedDeviceDataModel(RequestBodyModel):
 class DehydratedDeviceServlet(RestServlet):
     """Retrieve or store a dehydrated device.
 
+    Implements either MSC2697 or MSC3814.
+
     GET /org.matrix.msc2697.v2/dehydrated_device
 
     HTTP/1.1 200 OK
@@ -261,9 +266,7 @@ class DehydratedDeviceServlet(RestServlet):
 
     """
 
-    PATTERNS = client_patterns("/org.matrix.msc2697.v2/dehydrated_device$", releases=())
-
-    def __init__(self, hs: "HomeServer"):
+    def __init__(self, hs: "HomeServer", msc2697: bool = True):
         super().__init__()
         self.hs = hs
         self.auth = hs.get_auth()
@@ -271,6 +274,13 @@ class DehydratedDeviceServlet(RestServlet):
         assert isinstance(handler, DeviceHandler)
         self.device_handler = handler
 
+        self.PATTERNS = client_patterns(
+            "/org.matrix.msc2697.v2/dehydrated_device$"
+            if msc2697
+            else "/org.matrix.msc3814.v1/dehydrated_device$",
+            releases=(),
+        )
+
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
         dehydrated_device = await self.device_handler.get_dehydrated_device(
@@ -293,6 +303,7 @@ class DehydratedDeviceServlet(RestServlet):
 
         device_id = await self.device_handler.store_dehydrated_device(
             requester.user.to_string(),
+            None,
             submission.device_data.dict(),
             submission.initial_device_display_name,
         )
@@ -347,6 +358,210 @@ class ClaimDehydratedDeviceServlet(RestServlet):
         return 200, result
 
 
+class DehydratedDeviceEventsServlet(RestServlet):
+    PATTERNS = client_patterns(
+        "/org.matrix.msc3814.v1/dehydrated_device/(?P<device_id>[^/]*)/events$",
+        releases=(),
+    )
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.message_handler = hs.get_device_message_handler()
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastores().main
+
+    class PostBody(RequestBodyModel):
+        next_batch: Optional[StrictStr]
+
+    async def on_POST(
+        self, request: SynapseRequest, device_id: str
+    ) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+
+        next_batch = parse_and_validate_json_object_from_request(
+            request, self.PostBody
+        ).next_batch
+        limit = parse_integer(request, "limit", 100)
+
+        msgs = await self.message_handler.get_events_for_dehydrated_device(
+            requester=requester,
+            device_id=device_id,
+            since_token=next_batch,
+            limit=limit,
+        )
+
+        return 200, msgs
+
+
+class DehydratedDeviceV2Servlet(RestServlet):
+    """Upload, retrieve, or delete a dehydrated device.
+
+    GET /org.matrix.msc3814.v1/dehydrated_device
+
+    HTTP/1.1 200 OK
+    Content-Type: application/json
+
+    {
+      "device_id": "dehydrated_device_id",
+      "device_data": {
+        "algorithm": "org.matrix.msc2697.v1.dehydration.v1.olm",
+        "account": "dehydrated_device"
+      }
+    }
+
+    PUT /org.matrix.msc3814.v1/dehydrated_device
+    Content-Type: application/json
+
+    {
+        "device_id": "dehydrated_device_id",
+        "device_data": {
+            "algorithm": "org.matrix.msc2697.v1.dehydration.v1.olm",
+            "account": "dehydrated_device"
+        },
+        "device_keys": {
+            "user_id": "<user_id>",
+            "device_id": "<device_id>",
+            "valid_until_ts": <millisecond_timestamp>,
+            "algorithms": [
+                "m.olm.curve25519-aes-sha2",
+            ]
+            "keys": {
+                "<algorithm>:<device_id>": "<key_base64>",
+            },
+            "signatures:" {
+                "<user_id>" {
+                    "<algorithm>:<device_id>": "<signature_base64>"
+                }
+            }
+        },
+        "fallback_keys": {
+            "<algorithm>:<device_id>": "<key_base64>",
+            "signed_<algorithm>:<device_id>": {
+                "fallback": true,
+                "key": "<key_base64>",
+                "signatures": {
+                    "<user_id>": {
+                        "<algorithm>:<device_id>": "<key_base64>"
+                    }
+                }
+            }
+        }
+        "one_time_keys": {
+            "<algorithm>:<key_id>": "<key_base64>"
+        },
+
+    }
+
+    HTTP/1.1 200 OK
+    Content-Type: application/json
+
+    {
+      "device_id": "dehydrated_device_id"
+    }
+
+    DELETE /org.matrix.msc3814.v1/dehydrated_device
+
+    HTTP/1.1 200 OK
+    Content-Type: application/json
+
+    {
+      "device_id": "dehydrated_device_id",
+    }
+    """
+
+    PATTERNS = [
+        *client_patterns("/org.matrix.msc3814.v1/dehydrated_device$", releases=()),
+    ]
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        self.hs = hs
+        self.auth = hs.get_auth()
+        handler = hs.get_device_handler()
+        assert isinstance(handler, DeviceHandler)
+        self.e2e_keys_handler = hs.get_e2e_keys_handler()
+        self.device_handler = handler
+
+        if hs.config.worker.worker_app is None:
+            # if main process
+            self.key_uploader = self.e2e_keys_handler.upload_keys_for_user
+        else:
+            # then a worker
+            self.key_uploader = ReplicationUploadKeysForUserRestServlet.make_client(hs)
+
+    async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+
+        dehydrated_device = await self.device_handler.get_dehydrated_device(
+            requester.user.to_string()
+        )
+
+        if dehydrated_device is not None:
+            (device_id, device_data) = dehydrated_device
+            result = {"device_id": device_id, "device_data": device_data}
+            return 200, result
+        else:
+            raise errors.NotFoundError("No dehydrated device available")
+
+    async def on_DELETE(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        requester = await self.auth.get_user_by_req(request)
+
+        dehydrated_device = await self.device_handler.get_dehydrated_device(
+            requester.user.to_string()
+        )
+
+        if dehydrated_device is not None:
+            (device_id, device_data) = dehydrated_device
+
+            result = await self.device_handler.rehydrate_device(
+                requester.user.to_string(),
+                self.auth.get_access_token_from_request(request),
+                device_id,
+            )
+
+            result = {"device_id": device_id}
+
+            return 200, result
+        else:
+            raise errors.NotFoundError("No dehydrated device available")
+
+    class PutBody(RequestBodyModel):
+        device_data: DehydratedDeviceDataModel
+        device_id: StrictStr
+        initial_device_display_name: Optional[StrictStr]
+
+        class Config:
+            extra = Extra.allow
+
+    async def on_PUT(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        submission = parse_and_validate_json_object_from_request(request, self.PutBody)
+        requester = await self.auth.get_user_by_req(request)
+        user_id = requester.user.to_string()
+
+        device_info = submission.dict()
+        if "device_keys" not in device_info.keys():
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST,
+                "Device key(s) not found, these must be provided.",
+            )
+
+        # TODO: Those two operations, creating a device and storing the
+        # device's keys should be atomic.
+        device_id = await self.device_handler.store_dehydrated_device(
+            requester.user.to_string(),
+            submission.device_id,
+            submission.device_data.dict(),
+            submission.initial_device_display_name,
+        )
+
+        # TODO: Do we need to do something with the result here?
+        await self.key_uploader(
+            user_id=user_id, device_id=submission.device_id, keys=submission.dict()
+        )
+
+        return 200, {"device_id": device_id}
+
+
 def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     if (
         hs.config.worker.worker_app is None
@@ -354,7 +569,12 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     ):
         DeleteDevicesRestServlet(hs).register(http_server)
     DevicesRestServlet(hs).register(http_server)
+
     if hs.config.worker.worker_app is None:
         DeviceRestServlet(hs).register(http_server)
-        DehydratedDeviceServlet(hs).register(http_server)
-        ClaimDehydratedDeviceServlet(hs).register(http_server)
+        if hs.config.experimental.msc2697_enabled:
+            DehydratedDeviceServlet(hs, msc2697=True).register(http_server)
+            ClaimDehydratedDeviceServlet(hs).register(http_server)
+        if hs.config.experimental.msc3814_enabled:
+            DehydratedDeviceV2Servlet(hs).register(http_server)
+            DehydratedDeviceEventsServlet(hs).register(http_server)
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index 66215af2b8..647ee09279 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -17,15 +17,18 @@
 from typing import Optional
 from unittest import mock
 
+from twisted.internet.defer import ensureDeferred
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import RoomEncryptionAlgorithms
 from synapse.api.errors import NotFoundError, SynapseError
 from synapse.appservice import ApplicationService
 from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler
+from synapse.rest import admin
+from synapse.rest.client import devices, login, register
 from synapse.server import HomeServer
 from synapse.storage.databases.main.appservice import _make_exclusive_regex
-from synapse.types import JsonDict
+from synapse.types import JsonDict, create_requester
 from synapse.util import Clock
 
 from tests import unittest
@@ -399,11 +402,19 @@ class DeviceTestCase(unittest.HomeserverTestCase):
 
 
 class DehydrationTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets_for_client_rest_resource,
+        login.register_servlets,
+        register.register_servlets,
+        devices.register_servlets,
+    ]
+
     def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
         hs = self.setup_test_homeserver("server")
         handler = hs.get_device_handler()
         assert isinstance(handler, DeviceHandler)
         self.handler = handler
+        self.message_handler = hs.get_device_message_handler()
         self.registration = hs.get_registration_handler()
         self.auth = hs.get_auth()
         self.store = hs.get_datastores().main
@@ -418,6 +429,7 @@ class DehydrationTestCase(unittest.HomeserverTestCase):
         stored_dehydrated_device_id = self.get_success(
             self.handler.store_dehydrated_device(
                 user_id=user_id,
+                device_id=None,
                 device_data={"device_data": {"foo": "bar"}},
                 initial_device_display_name="dehydrated device",
             )
@@ -481,3 +493,88 @@ class DehydrationTestCase(unittest.HomeserverTestCase):
         ret = self.get_success(self.handler.get_dehydrated_device(user_id=user_id))
 
         self.assertIsNone(ret)
+
+    @unittest.override_config(
+        {"experimental_features": {"msc2697_enabled": False, "msc3814_enabled": True}}
+    )
+    def test_dehydrate_v2_and_fetch_events(self) -> None:
+        user_id = "@boris:server"
+
+        self.get_success(self.store.register_user(user_id, "foobar"))
+
+        # First check if we can store and fetch a dehydrated device
+        stored_dehydrated_device_id = self.get_success(
+            self.handler.store_dehydrated_device(
+                user_id=user_id,
+                device_id=None,
+                device_data={"device_data": {"foo": "bar"}},
+                initial_device_display_name="dehydrated device",
+            )
+        )
+
+        device_info = self.get_success(
+            self.handler.get_dehydrated_device(user_id=user_id)
+        )
+        assert device_info is not None
+        retrieved_device_id, device_data = device_info
+        self.assertEqual(retrieved_device_id, stored_dehydrated_device_id)
+        self.assertEqual(device_data, {"device_data": {"foo": "bar"}})
+
+        # Create a new login for the user
+        device_id, access_token, _expiration_time, _refresh_token = self.get_success(
+            self.registration.register_device(
+                user_id=user_id,
+                device_id=None,
+                initial_display_name="new device",
+            )
+        )
+
+        requester = create_requester(user_id, device_id=device_id)
+
+        # Fetching messages for a non-existing device should return an error
+        self.get_failure(
+            self.message_handler.get_events_for_dehydrated_device(
+                requester=requester,
+                device_id="not the right device ID",
+                since_token=None,
+                limit=10,
+            ),
+            SynapseError,
+        )
+
+        # Send a message to the dehydrated device
+        ensureDeferred(
+            self.message_handler.send_device_message(
+                requester=requester,
+                message_type="test.message",
+                messages={user_id: {stored_dehydrated_device_id: {"body": "foo"}}},
+            )
+        )
+        self.pump()
+
+        # Fetch the message of the dehydrated device
+        res = self.get_success(
+            self.message_handler.get_events_for_dehydrated_device(
+                requester=requester,
+                device_id=stored_dehydrated_device_id,
+                since_token=None,
+                limit=10,
+            )
+        )
+
+        self.assertTrue(len(res["next_batch"]) > 1)
+        self.assertEqual(len(res["events"]), 1)
+        self.assertEqual(res["events"][0]["content"]["body"], "foo")
+
+        # Fetch the message of the dehydrated device again, which should return nothing
+        # and delete the old messages
+        res = self.get_success(
+            self.message_handler.get_events_for_dehydrated_device(
+                requester=requester,
+                device_id=stored_dehydrated_device_id,
+                since_token=res["next_batch"],
+                limit=10,
+            )
+        )
+        self.assertTrue(len(res["next_batch"]) > 1)
+        self.assertEqual(len(res["events"]), 0)
diff --git a/tests/rest/client/test_devices.py b/tests/rest/client/test_devices.py
index d80eea17d3..b7d420cfec 100644
--- a/tests/rest/client/test_devices.py
+++ b/tests/rest/client/test_devices.py
@@ -13,12 +13,14 @@
 # limitations under the License.
 from http import HTTPStatus
 
+from twisted.internet.defer import ensureDeferred
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.errors import NotFoundError
 from synapse.rest import admin, devices, room, sync
-from synapse.rest.client import account, login, register
+from synapse.rest.client import account, keys, login, register
 from synapse.server import HomeServer
+from synapse.types import JsonDict, create_requester
 from synapse.util import Clock
 
 from tests import unittest
@@ -208,8 +210,13 @@ class DehydratedDeviceTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
         register.register_servlets,
         devices.register_servlets,
+        keys.register_servlets,
     ]
 
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.registration = hs.get_registration_handler()
+        self.message_handler = hs.get_device_message_handler()
+
     def test_PUT(self) -> None:
         """Sanity-check that we can PUT a dehydrated device.
 
@@ -226,7 +233,21 @@ class DehydratedDeviceTestCase(unittest.HomeserverTestCase):
                 "device_data": {
                     "algorithm": "org.matrix.msc2697.v1.dehydration.v1.olm",
                     "account": "dehydrated_device",
-                }
+                },
+                "device_keys": {
+                    "user_id": "@alice:test",
+                    "device_id": "device1",
+                    "valid_until_ts": "80",
+                    "algorithms": [
+                        "m.olm.curve25519-aes-sha2",
+                    ],
+                    "keys": {
+                        "<algorithm>:<device_id>": "<key_base64>",
+                    },
+                    "signatures": {
+                        "<user_id>": {"<algorithm>:<device_id>": "<signature_base64>"}
+                    },
+                },
             },
             access_token=token,
             shorthand=False,
@@ -234,3 +255,128 @@ class DehydratedDeviceTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body)
         device_id = channel.json_body.get("device_id")
         self.assertIsInstance(device_id, str)
+
+    @unittest.override_config(
+        {"experimental_features": {"msc2697_enabled": False, "msc3814_enabled": True}}
+    )
+    def test_dehydrate_msc3814(self) -> None:
+        user = self.register_user("mikey", "pass")
+        token = self.login(user, "pass", device_id="device1")
+        content: JsonDict = {
+            "device_data": {
+                "algorithm": "m.dehydration.v1.olm",
+            },
+            "device_id": "device1",
+            "initial_device_display_name": "foo bar",
+            "device_keys": {
+                "user_id": "@mikey:test",
+                "device_id": "device1",
+                "valid_until_ts": "80",
+                "algorithms": [
+                    "m.olm.curve25519-aes-sha2",
+                ],
+                "keys": {
+                    "<algorithm>:<device_id>": "<key_base64>",
+                },
+                "signatures": {
+                    "<user_id>": {"<algorithm>:<device_id>": "<signature_base64>"}
+                },
+            },
+        }
+        channel = self.make_request(
+            "PUT",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            content=content,
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        device_id = channel.json_body.get("device_id")
+        assert device_id is not None
+        self.assertIsInstance(device_id, str)
+        self.assertEqual("device1", device_id)
+
+        # test that we can now GET the dehydrated device info
+        channel = self.make_request(
+            "GET",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        returned_device_id = channel.json_body.get("device_id")
+        self.assertEqual(returned_device_id, device_id)
+        device_data = channel.json_body.get("device_data")
+        expected_device_data = {
+            "algorithm": "m.dehydration.v1.olm",
+        }
+        self.assertEqual(device_data, expected_device_data)
+
+        # create another device for the user
+        (
+            new_device_id,
+            _,
+            _,
+            _,
+        ) = self.get_success(
+            self.registration.register_device(
+                user_id=user,
+                device_id=None,
+                initial_display_name="new device",
+            )
+        )
+        requester = create_requester(user, device_id=new_device_id)
+
+        # Send a message to the dehydrated device
+        ensureDeferred(
+            self.message_handler.send_device_message(
+                requester=requester,
+                message_type="test.message",
+                messages={user: {device_id: {"body": "test_message"}}},
+            )
+        )
+        self.pump()
+
+        # make sure we can fetch the message with our dehydrated device id
+        channel = self.make_request(
+            "POST",
+            f"_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device/{device_id}/events",
+            content={},
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        expected_content = {"body": "test_message"}
+        self.assertEqual(channel.json_body["events"][0]["content"], expected_content)
+        next_batch_token = channel.json_body.get("next_batch")
+
+        # fetch messages again and make sure that the message was deleted and we are returned an
+        # empty array
+        content = {"next_batch": next_batch_token}
+        channel = self.make_request(
+            "POST",
+            f"_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device/{device_id}/events",
+            content=content,
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["events"], [])
+
+        # make sure we can delete the dehydrated device
+        channel = self.make_request(
+            "DELETE",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # ...and after deleting it is no longer available
+        channel = self.make_request(
+            "GET",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 404)
-- 
cgit 1.5.1


From 8ebfd577e237eb7b364a692c88e14bc8820980d1 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 26 Jul 2023 14:51:44 +0200
Subject: Bump DB version to 79 since synapse v1.88 was already there (#15998)

---
 changelog.d/15998.bugfix                           |   1 +
 synapse/storage/schema/__init__.py                 |   6 +-
 .../78/04_read_write_locks_triggers.sql.postgres   | 101 --------------------
 .../78/04_read_write_locks_triggers.sql.sqlite     |  72 ---------------
 .../78/05_mitigate_stream_ordering_update_race.py  |  70 --------------
 .../78/06_read_write_locks_triggers.sql.postgres   |  69 --------------
 .../78/06_read_write_locks_triggers.sql.sqlite     |  65 -------------
 .../79/03_read_write_locks_triggers.sql.postgres   | 102 +++++++++++++++++++++
 .../79/03_read_write_locks_triggers.sql.sqlite     |  72 +++++++++++++++
 .../79/04_mitigate_stream_ordering_update_race.py  |  70 ++++++++++++++
 .../79/05_read_write_locks_triggers.sql.postgres   |  69 ++++++++++++++
 .../79/05_read_write_locks_triggers.sql.sqlite     |  65 +++++++++++++
 12 files changed, 384 insertions(+), 378 deletions(-)
 create mode 100644 changelog.d/15998.bugfix
 delete mode 100644 synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
 delete mode 100644 synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
 delete mode 100644 synapse/storage/schema/main/delta/78/05_mitigate_stream_ordering_update_race.py
 delete mode 100644 synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.postgres
 delete mode 100644 synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.sqlite
 create mode 100644 synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite
 create mode 100644 synapse/storage/schema/main/delta/79/04_mitigate_stream_ordering_update_race.py
 create mode 100644 synapse/storage/schema/main/delta/79/05_read_write_locks_triggers.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/79/05_read_write_locks_triggers.sql.sqlite

(limited to 'synapse')

diff --git a/changelog.d/15998.bugfix b/changelog.d/15998.bugfix
new file mode 100644
index 0000000000..b4ad8d776b
--- /dev/null
+++ b/changelog.d/15998.bugfix
@@ -0,0 +1 @@
+Internal changelog to be removed.
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index fc190a8b13..d3ec648f6d 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 78  # remember to update the list below when updating
+SCHEMA_VERSION = 79  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -106,6 +106,10 @@ Changes in SCHEMA_VERSION = 77
 
 Changes in SCHEMA_VERSION = 78
     - Validate check (full_user_id IS NOT NULL) on tables profiles and user_filters
+
+Changes in SCHEMA_VERSION = 79
+    - Add tables to handle in DB read-write locks.
+    - Add some mitigations for a painful race between foreground and background updates, cf #15677.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
deleted file mode 100644
index e1cc3469a4..0000000000
--- a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.postgres
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
--- We implement read/write style locks by using two tables with mutual foreign
--- key constraints. Note that this implementation is vulnerable to starving
--- writers if read locks repeatedly get acquired.
---
--- The first table (`worker_read_write_locks_mode`) indicates that a given lock
--- has either been acquired in read mode *or* write mode, but not both. This is
--- enforced by the unique constraint. Each instance of a lock being acquired is
--- associated with a random `token`.
---
--- The second table (`worker_read_write_locks`) tracks who has currently
--- acquired a given lock. For a given lock_name/lock_key, there can be multiple
--- read locks at a time but only one write lock (no mixing read and write locks
--- at the same time).
---
--- The foreign key from the second to first table enforces that for any given
--- lock the second table cannot have a mix of rows with read or write.
---
--- The foreign key from the first to second table enforces that we don't have a
--- row for a lock in the first table if not in the second table.
---
---
--- Furthermore, we add some triggers to automatically keep the first table up to
--- date when inserting/deleting from the second table. This reduces the number
--- of round trips needed to acquire and release locks, as those operations
--- simply become an INSERT or DELETE. These triggers are added in a separate
--- delta due to database specific syntax.
-
-
--- A table to track whether a lock is currently acquired, and if so whether its
--- in read or write mode.
-CREATE TABLE worker_read_write_locks_mode (
-    lock_name TEXT NOT NULL,
-    lock_key TEXT NOT NULL,
-    -- Whether this lock is in read (false) or write (true) mode
-    write_lock BOOLEAN NOT NULL,
-    -- A token that has currently acquired the lock. We need this so that we can
-    -- add a foreign constraint from this table to `worker_read_write_locks`.
-    token TEXT NOT NULL
-);
-
--- Ensure that we can only have one row per lock
-CREATE UNIQUE INDEX worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key);
--- We need this (redundant) constraint so that we can have a foreign key
--- constraint against this table.
-CREATE UNIQUE INDEX worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock);
-
-
--- A table to track who has currently acquired a given lock.
-CREATE TABLE worker_read_write_locks (
-    lock_name TEXT NOT NULL,
-    lock_key TEXT NOT NULL,
-    -- We write the instance name to ease manual debugging, we don't ever read
-    -- from it.
-    -- Note: instance names aren't guarenteed to be unique.
-    instance_name TEXT NOT NULL,
-    -- Whether the process has taken out a "read" or a "write" lock.
-    write_lock BOOLEAN NOT NULL,
-    -- A random string generated each time an instance takes out a lock. Used by
-    -- the instance to tell whether the lock is still held by it (e.g. in the
-    -- case where the process stalls for a long time the lock may time out and
-    -- be taken out by another instance, at which point the original instance
-    -- can tell it no longer holds the lock as the tokens no longer match).
-    token TEXT NOT NULL,
-    last_renewed_ts BIGINT NOT NULL,
-
-    -- This constraint ensures that a given lock has only been acquired in read
-    -- xor write mode, but not both.
-    FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock)
-);
-
-CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
--- Ensures that only one instance can acquire a lock in write mode at a time.
-CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
-
-
--- Add a foreign key constraint to ensure that if a lock is in
--- `worker_read_write_locks_mode` then there must be a corresponding row in
--- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in
--- `worker_read_write_locks_mode` when the lock is not currently acquired).
---
--- We only add to PostgreSQL as SQLite does not support adding constraints
--- after table creation, and so doesn't support "circular" foreign key
--- constraints.
-ALTER TABLE worker_read_write_locks_mode ADD CONSTRAINT worker_read_write_locks_mode_foreign
-    FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED;
diff --git a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
deleted file mode 100644
index b15432f576..0000000000
--- a/synapse/storage/schema/main/delta/78/04_read_write_locks_triggers.sql.sqlite
+++ /dev/null
@@ -1,72 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
--- c.f. the postgres version for context. The tables and constraints are the
--- same, however they need to be defined slightly differently to work around how
--- each database handles circular foreign key references.
-
-
-
--- A table to track whether a lock is currently acquired, and if so whether its
--- in read or write mode.
-CREATE TABLE worker_read_write_locks_mode (
-    lock_name TEXT NOT NULL,
-    lock_key TEXT NOT NULL,
-    -- Whether this lock is in read (false) or write (true) mode
-    write_lock BOOLEAN NOT NULL,
-    -- A token that has currently acquired the lock. We need this so that we can
-    -- add a foreign constraint from this table to `worker_read_write_locks`.
-    token TEXT NOT NULL,
-    -- Add a foreign key constraint to ensure that if a lock is in
-    -- `worker_read_write_locks_mode` then there must be a corresponding row in
-    -- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in
-    -- `worker_read_write_locks_mode` when the lock is not currently acquired).
-    FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED
-);
-
--- Ensure that we can only have one row per lock
-CREATE UNIQUE INDEX worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key);
--- We need this (redundant) constraint so that we can have a foreign key
--- constraint against this table.
-CREATE UNIQUE INDEX worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock);
-
-
--- A table to track who has currently acquired a given lock.
-CREATE TABLE worker_read_write_locks (
-    lock_name TEXT NOT NULL,
-    lock_key TEXT NOT NULL,
-    -- We write the instance name to ease manual debugging, we don't ever read
-    -- from it.
-    -- Note: instance names aren't guarenteed to be unique.
-    instance_name TEXT NOT NULL,
-    -- Whether the process has taken out a "read" or a "write" lock.
-    write_lock BOOLEAN NOT NULL,
-    -- A random string generated each time an instance takes out a lock. Used by
-    -- the instance to tell whether the lock is still held by it (e.g. in the
-    -- case where the process stalls for a long time the lock may time out and
-    -- be taken out by another instance, at which point the original instance
-    -- can tell it no longer holds the lock as the tokens no longer match).
-    token TEXT NOT NULL,
-    last_renewed_ts BIGINT NOT NULL,
-
-    -- This constraint ensures that a given lock has only been acquired in read
-    -- xor write mode, but not both.
-    FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock)
-);
-
-CREATE UNIQUE INDEX worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
--- Ensures that only one instance can acquire a lock in write mode at a time.
-CREATE UNIQUE INDEX worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
diff --git a/synapse/storage/schema/main/delta/78/05_mitigate_stream_ordering_update_race.py b/synapse/storage/schema/main/delta/78/05_mitigate_stream_ordering_update_race.py
deleted file mode 100644
index 1a22f6a404..0000000000
--- a/synapse/storage/schema/main/delta/78/05_mitigate_stream_ordering_update_race.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-
-from synapse.storage.database import LoggingTransaction
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
-
-
-def run_create(
-    cur: LoggingTransaction,
-    database_engine: BaseDatabaseEngine,
-) -> None:
-    """
-    An attempt to mitigate a painful race between foreground and background updates
-    touching the `stream_ordering` column of the events table. More info can be found
-    at https://github.com/matrix-org/synapse/issues/15677.
-    """
-
-    # technically the bg update we're concerned with below should only have been added in
-    # postgres but it doesn't hurt to be extra careful
-    if isinstance(database_engine, PostgresEngine):
-        select_sql = """
-            SELECT 1 FROM background_updates
-                WHERE update_name = 'replace_stream_ordering_column'
-        """
-        cur.execute(select_sql)
-        res = cur.fetchone()
-
-        # if the background update `replace_stream_ordering_column` is still pending, we need
-        # to drop the indexes added in 7403, and re-add them to the column `stream_ordering2`
-        # with the idea that they will be preserved when the column is renamed `stream_ordering`
-        # after the background update has finished
-        if res:
-            drop_cse_sql = """
-            ALTER TABLE current_state_events DROP CONSTRAINT event_stream_ordering_fkey
-            """
-            cur.execute(drop_cse_sql)
-
-            drop_lcm_sql = """
-            ALTER TABLE local_current_membership DROP CONSTRAINT event_stream_ordering_fkey
-            """
-            cur.execute(drop_lcm_sql)
-
-            drop_rm_sql = """
-            ALTER TABLE room_memberships DROP CONSTRAINT event_stream_ordering_fkey
-            """
-            cur.execute(drop_rm_sql)
-
-            add_cse_sql = """
-            ALTER TABLE current_state_events ADD CONSTRAINT event_stream_ordering_fkey
-            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
-            """
-            cur.execute(add_cse_sql)
-
-            add_lcm_sql = """
-            ALTER TABLE local_current_membership ADD CONSTRAINT event_stream_ordering_fkey
-            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
-            """
-            cur.execute(add_lcm_sql)
-
-            add_rm_sql = """
-            ALTER TABLE room_memberships ADD CONSTRAINT event_stream_ordering_fkey
-            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
-            """
-            cur.execute(add_rm_sql)
diff --git a/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.postgres
deleted file mode 100644
index ea3496ef2d..0000000000
--- a/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.postgres
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
-
--- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
--- and acquire a lock, i.e. insert into `worker_read_write_locks`,
-CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
-BEGIN
-    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
-        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
-        ON CONFLICT (lock_name, lock_key)
-        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
-    RETURN NEW;
-END
-$$
-LANGUAGE plpgsql;
-
-DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger ON worker_read_write_locks;
-CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
-    FOR EACH ROW
-    EXECUTE PROCEDURE upsert_read_write_lock_parent();
-
-
--- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
--- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
--- update the `worker_read_write_locks_mode.token` to match another instance
--- that has currently acquired the lock, or we delete the row if nobody has
--- currently acquired a lock.
-CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
-DECLARE
-    new_token TEXT;
-BEGIN
-    SELECT token INTO new_token FROM worker_read_write_locks
-        WHERE
-            lock_name = OLD.lock_name
-            AND lock_key = OLD.lock_key
-        LIMIT 1 FOR UPDATE;
-
-    IF NOT FOUND THEN
-        DELETE FROM worker_read_write_locks_mode
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token;
-    ELSE
-        UPDATE worker_read_write_locks_mode
-            SET token = new_token
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
-    END IF;
-
-    RETURN NEW;
-END
-$$
-LANGUAGE plpgsql;
-
-DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks;
-CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
-    FOR EACH ROW
-    EXECUTE PROCEDURE delete_read_write_lock_parent();
diff --git a/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.sqlite
deleted file mode 100644
index acb1a77c80..0000000000
--- a/synapse/storage/schema/main/delta/78/06_read_write_locks_triggers.sql.sqlite
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
-
--- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
--- and acquire a lock, i.e. insert into `worker_read_write_locks`,
-DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger;
-CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger
-BEFORE INSERT ON worker_read_write_locks
-FOR EACH ROW
-BEGIN
-    -- First ensure that `worker_read_write_locks_mode` doesn't have stale
-    -- entries in it, as on SQLite we don't have the foreign key constraint to
-    -- enforce this.
-    DELETE FROM worker_read_write_locks_mode
-        WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
-        AND NOT EXISTS (
-            SELECT 1 FROM worker_read_write_locks
-            WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
-        );
-
-    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
-        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
-        ON CONFLICT (lock_name, lock_key)
-        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
-END;
-
--- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
--- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
--- update the `worker_read_write_locks_mode.token` to match another instance
--- that has currently acquired the lock, or we delete the row if nobody has
--- currently acquired a lock.
-DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger;
-CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger
-AFTER DELETE ON worker_read_write_locks
-FOR EACH ROW
-BEGIN
-    DELETE FROM worker_read_write_locks_mode
-        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-            AND token = OLD.token
-        AND NOT EXISTS (
-            SELECT 1 FROM worker_read_write_locks
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-        );
-
-    UPDATE worker_read_write_locks_mode
-        SET token = (
-            SELECT token FROM worker_read_write_locks
-            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
-        )
-        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
-END;
diff --git a/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
new file mode 100644
index 0000000000..7df07ab0da
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.postgres
@@ -0,0 +1,102 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- We implement read/write style locks by using two tables with mutual foreign
+-- key constraints. Note that this implementation is vulnerable to starving
+-- writers if read locks repeatedly get acquired.
+--
+-- The first table (`worker_read_write_locks_mode`) indicates that a given lock
+-- has either been acquired in read mode *or* write mode, but not both. This is
+-- enforced by the unique constraint. Each instance of a lock being acquired is
+-- associated with a random `token`.
+--
+-- The second table (`worker_read_write_locks`) tracks who has currently
+-- acquired a given lock. For a given lock_name/lock_key, there can be multiple
+-- read locks at a time but only one write lock (no mixing read and write locks
+-- at the same time).
+--
+-- The foreign key from the second to first table enforces that for any given
+-- lock the second table cannot have a mix of rows with read or write.
+--
+-- The foreign key from the first to second table enforces that we don't have a
+-- row for a lock in the first table if not in the second table.
+--
+--
+-- Furthermore, we add some triggers to automatically keep the first table up to
+-- date when inserting/deleting from the second table. This reduces the number
+-- of round trips needed to acquire and release locks, as those operations
+-- simply become an INSERT or DELETE. These triggers are added in a separate
+-- delta due to database specific syntax.
+
+
+-- A table to track whether a lock is currently acquired, and if so whether its
+-- in read or write mode.
+CREATE TABLE IF NOT EXISTS worker_read_write_locks_mode (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- Whether this lock is in read (false) or write (true) mode
+    write_lock BOOLEAN NOT NULL,
+    -- A token that has currently acquired the lock. We need this so that we can
+    -- add a foreign constraint from this table to `worker_read_write_locks`.
+    token TEXT NOT NULL
+);
+
+-- Ensure that we can only have one row per lock
+CREATE UNIQUE INDEX IF NOT EXISTS worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key);
+-- We need this (redundant) constraint so that we can have a foreign key
+-- constraint against this table.
+CREATE UNIQUE INDEX IF NOT EXISTS worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock);
+
+
+-- A table to track who has currently acquired a given lock.
+CREATE TABLE IF NOT EXISTS worker_read_write_locks (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- We write the instance name to ease manual debugging, we don't ever read
+    -- from it.
+    -- Note: instance names aren't guarenteed to be unique.
+    instance_name TEXT NOT NULL,
+    -- Whether the process has taken out a "read" or a "write" lock.
+    write_lock BOOLEAN NOT NULL,
+    -- A random string generated each time an instance takes out a lock. Used by
+    -- the instance to tell whether the lock is still held by it (e.g. in the
+    -- case where the process stalls for a long time the lock may time out and
+    -- be taken out by another instance, at which point the original instance
+    -- can tell it no longer holds the lock as the tokens no longer match).
+    token TEXT NOT NULL,
+    last_renewed_ts BIGINT NOT NULL,
+
+    -- This constraint ensures that a given lock has only been acquired in read
+    -- xor write mode, but not both.
+    FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock)
+);
+
+CREATE UNIQUE INDEX IF NOT EXISTS worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
+-- Ensures that only one instance can acquire a lock in write mode at a time.
+CREATE UNIQUE INDEX IF NOT EXISTS worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
+
+
+-- Add a foreign key constraint to ensure that if a lock is in
+-- `worker_read_write_locks_mode` then there must be a corresponding row in
+-- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in
+-- `worker_read_write_locks_mode` when the lock is not currently acquired).
+--
+-- We only add to PostgreSQL as SQLite does not support adding constraints
+-- after table creation, and so doesn't support "circular" foreign key
+-- constraints.
+ALTER TABLE worker_read_write_locks_mode DROP CONSTRAINT IF EXISTS worker_read_write_locks_mode_foreign;
+ALTER TABLE worker_read_write_locks_mode ADD CONSTRAINT worker_read_write_locks_mode_foreign
+    FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED;
diff --git a/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite
new file mode 100644
index 0000000000..95f9dbf120
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/03_read_write_locks_triggers.sql.sqlite
@@ -0,0 +1,72 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- c.f. the postgres version for context. The tables and constraints are the
+-- same, however they need to be defined slightly differently to work around how
+-- each database handles circular foreign key references.
+
+
+
+-- A table to track whether a lock is currently acquired, and if so whether its
+-- in read or write mode.
+CREATE TABLE IF NOT EXISTS worker_read_write_locks_mode (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- Whether this lock is in read (false) or write (true) mode
+    write_lock BOOLEAN NOT NULL,
+    -- A token that has currently acquired the lock. We need this so that we can
+    -- add a foreign constraint from this table to `worker_read_write_locks`.
+    token TEXT NOT NULL,
+    -- Add a foreign key constraint to ensure that if a lock is in
+    -- `worker_read_write_locks_mode` then there must be a corresponding row in
+    -- `worker_read_write_locks` (i.e. we don't accidentally end up with a row in
+    -- `worker_read_write_locks_mode` when the lock is not currently acquired).
+    FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED
+);
+
+-- Ensure that we can only have one row per lock
+CREATE UNIQUE INDEX IF NOT EXISTS worker_read_write_locks_mode_key ON worker_read_write_locks_mode (lock_name, lock_key);
+-- We need this (redundant) constraint so that we can have a foreign key
+-- constraint against this table.
+CREATE UNIQUE INDEX IF NOT EXISTS worker_read_write_locks_mode_type ON worker_read_write_locks_mode (lock_name, lock_key, write_lock);
+
+
+-- A table to track who has currently acquired a given lock.
+CREATE TABLE IF NOT EXISTS worker_read_write_locks (
+    lock_name TEXT NOT NULL,
+    lock_key TEXT NOT NULL,
+    -- We write the instance name to ease manual debugging, we don't ever read
+    -- from it.
+    -- Note: instance names aren't guarenteed to be unique.
+    instance_name TEXT NOT NULL,
+    -- Whether the process has taken out a "read" or a "write" lock.
+    write_lock BOOLEAN NOT NULL,
+    -- A random string generated each time an instance takes out a lock. Used by
+    -- the instance to tell whether the lock is still held by it (e.g. in the
+    -- case where the process stalls for a long time the lock may time out and
+    -- be taken out by another instance, at which point the original instance
+    -- can tell it no longer holds the lock as the tokens no longer match).
+    token TEXT NOT NULL,
+    last_renewed_ts BIGINT NOT NULL,
+
+    -- This constraint ensures that a given lock has only been acquired in read
+    -- xor write mode, but not both.
+    FOREIGN KEY (lock_name, lock_key, write_lock) REFERENCES worker_read_write_locks_mode (lock_name, lock_key, write_lock)
+);
+
+CREATE UNIQUE INDEX IF NOT EXISTS worker_read_write_locks_key ON worker_read_write_locks (lock_name, lock_key, token);
+-- Ensures that only one instance can acquire a lock in write mode at a time.
+CREATE UNIQUE INDEX IF NOT EXISTS worker_read_write_locks_write ON worker_read_write_locks (lock_name, lock_key) WHERE write_lock;
diff --git a/synapse/storage/schema/main/delta/79/04_mitigate_stream_ordering_update_race.py b/synapse/storage/schema/main/delta/79/04_mitigate_stream_ordering_update_race.py
new file mode 100644
index 0000000000..ae63585847
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/04_mitigate_stream_ordering_update_race.py
@@ -0,0 +1,70 @@
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+
+
+def run_create(
+    cur: LoggingTransaction,
+    database_engine: BaseDatabaseEngine,
+) -> None:
+    """
+    An attempt to mitigate a painful race between foreground and background updates
+    touching the `stream_ordering` column of the events table. More info can be found
+    at https://github.com/matrix-org/synapse/issues/15677.
+    """
+
+    # technically the bg update we're concerned with below should only have been added in
+    # postgres but it doesn't hurt to be extra careful
+    if isinstance(database_engine, PostgresEngine):
+        select_sql = """
+            SELECT 1 FROM background_updates
+                WHERE update_name = 'replace_stream_ordering_column'
+        """
+        cur.execute(select_sql)
+        res = cur.fetchone()
+
+        # if the background update `replace_stream_ordering_column` is still pending, we need
+        # to drop the indexes added in 7403, and re-add them to the column `stream_ordering2`
+        # with the idea that they will be preserved when the column is renamed `stream_ordering`
+        # after the background update has finished
+        if res:
+            drop_cse_sql = """
+            ALTER TABLE current_state_events DROP CONSTRAINT IF EXISTS event_stream_ordering_fkey
+            """
+            cur.execute(drop_cse_sql)
+
+            drop_lcm_sql = """
+            ALTER TABLE local_current_membership DROP CONSTRAINT IF EXISTS event_stream_ordering_fkey
+            """
+            cur.execute(drop_lcm_sql)
+
+            drop_rm_sql = """
+            ALTER TABLE room_memberships DROP CONSTRAINT IF EXISTS event_stream_ordering_fkey
+            """
+            cur.execute(drop_rm_sql)
+
+            add_cse_sql = """
+            ALTER TABLE current_state_events ADD CONSTRAINT event_stream_ordering_fkey
+            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
+            """
+            cur.execute(add_cse_sql)
+
+            add_lcm_sql = """
+            ALTER TABLE local_current_membership ADD CONSTRAINT event_stream_ordering_fkey
+            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
+            """
+            cur.execute(add_lcm_sql)
+
+            add_rm_sql = """
+            ALTER TABLE room_memberships ADD CONSTRAINT event_stream_ordering_fkey
+            FOREIGN KEY (event_stream_ordering) REFERENCES events(stream_ordering2) NOT VALID;
+            """
+            cur.execute(add_rm_sql)
diff --git a/synapse/storage/schema/main/delta/79/05_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/79/05_read_write_locks_triggers.sql.postgres
new file mode 100644
index 0000000000..ea3496ef2d
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/05_read_write_locks_triggers.sql.postgres
@@ -0,0 +1,69 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
+BEGIN
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE upsert_read_write_lock_parent();
+
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
+DECLARE
+    new_token TEXT;
+BEGIN
+    SELECT token INTO new_token FROM worker_read_write_locks
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key
+        LIMIT 1 FOR UPDATE;
+
+    IF NOT FOUND THEN
+        DELETE FROM worker_read_write_locks_mode
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token;
+    ELSE
+        UPDATE worker_read_write_locks_mode
+            SET token = new_token
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+    END IF;
+
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE delete_read_write_lock_parent();
diff --git a/synapse/storage/schema/main/delta/79/05_read_write_locks_triggers.sql.sqlite b/synapse/storage/schema/main/delta/79/05_read_write_locks_triggers.sql.sqlite
new file mode 100644
index 0000000000..acb1a77c80
--- /dev/null
+++ b/synapse/storage/schema/main/delta/79/05_read_write_locks_triggers.sql.sqlite
@@ -0,0 +1,65 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
+
+-- Add a trigger to UPSERT into `worker_read_write_locks_mode` whenever we try
+-- and acquire a lock, i.e. insert into `worker_read_write_locks`,
+DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger;
+CREATE TRIGGER IF NOT EXISTS upsert_read_write_lock_parent_trigger
+BEFORE INSERT ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+    -- First ensure that `worker_read_write_locks_mode` doesn't have stale
+    -- entries in it, as on SQLite we don't have the foreign key constraint to
+    -- enforce this.
+    DELETE FROM worker_read_write_locks_mode
+        WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+        AND NOT EXISTS (
+            SELECT 1 FROM worker_read_write_locks
+            WHERE lock_name = NEW.lock_name AND lock_key = NEW.lock_key
+        );
+
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO UPDATE SET write_lock = NEW.write_lock, token = NEW.token;
+END;
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger;
+CREATE TRIGGER IF NOT EXISTS delete_read_write_lock_parent_trigger
+AFTER DELETE ON worker_read_write_locks
+FOR EACH ROW
+BEGIN
+    DELETE FROM worker_read_write_locks_mode
+        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+            AND token = OLD.token
+        AND NOT EXISTS (
+            SELECT 1 FROM worker_read_write_locks
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        );
+
+    UPDATE worker_read_write_locks_mode
+        SET token = (
+            SELECT token FROM worker_read_write_locks
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key
+        )
+        WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+END;
-- 
cgit 1.5.1


From 58f830511486271da72543dd20676b702bc52b2f Mon Sep 17 00:00:00 2001
From: Anshul Madnawat <100751856+anshulm333@users.noreply.github.com>
Date: Thu, 27 Jul 2023 00:15:47 +0530
Subject: Inline SQL queries using boolean parameters (#15525)

SQLite now supports TRUE and FALSE constants, simplify some
queries by inlining those instead of passing them as arguments.
---
 changelog.d/15525.misc                             |  1 +
 synapse/storage/databases/main/event_federation.py |  3 +--
 synapse/storage/databases/main/events.py           | 12 ++++++------
 synapse/storage/databases/main/purge_events.py     |  9 ++++-----
 synapse/storage/databases/main/push_rule.py        |  6 +++---
 synapse/storage/databases/main/registration.py     |  4 ++--
 synapse/storage/databases/main/room.py             | 10 +++++-----
 synapse/storage/databases/main/stream.py           |  4 ++--
 8 files changed, 24 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/15525.misc

(limited to 'synapse')

diff --git a/changelog.d/15525.misc b/changelog.d/15525.misc
new file mode 100644
index 0000000000..67ab0cf62f
--- /dev/null
+++ b/changelog.d/15525.misc
@@ -0,0 +1 @@
+Update SQL queries to inline boolean parameters as supported in SQLite 3.27.
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index b2cda52ce5..534dc32413 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -843,7 +843,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                      * because the schema change is in a background update, it's not
                      * necessarily safe to assume that it will have been completed.
                      */
-                    AND edge.is_state is ? /* False */
+                    AND edge.is_state is FALSE
                     /**
                      * We only want backwards extremities that are older than or at
                      * the same position of the given `current_depth` (where older
@@ -886,7 +886,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
                 sql,
                 (
                     room_id,
-                    False,
                     current_depth,
                     self._clock.time_msec(),
                     BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 2b83a69426..bd3f14fb71 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1455,8 +1455,8 @@ class PersistEventsStore:
                     },
                 )
 
-                sql = "UPDATE events SET outlier = ? WHERE event_id = ?"
-                txn.execute(sql, (False, event.event_id))
+                sql = "UPDATE events SET outlier = FALSE WHERE event_id = ?"
+                txn.execute(sql, (event.event_id,))
 
                 # Update the event_backward_extremities table now that this
                 # event isn't an outlier any more.
@@ -1549,13 +1549,13 @@ class PersistEventsStore:
             for event, _ in events_and_contexts
             if not event.internal_metadata.is_redacted()
         ]
-        sql = "UPDATE redactions SET have_censored = ? WHERE "
+        sql = "UPDATE redactions SET have_censored = FALSE WHERE "
         clause, args = make_in_list_sql_clause(
             self.database_engine,
             "redacts",
             unredacted_events,
         )
-        txn.execute(sql + clause, [False] + args)
+        txn.execute(sql + clause, args)
 
         self.db_pool.simple_insert_many_txn(
             txn,
@@ -2318,14 +2318,14 @@ class PersistEventsStore:
             "   SELECT 1 FROM events"
             "   LEFT JOIN event_edges edge"
             "   ON edge.event_id = events.event_id"
-            "   WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = ? OR edge.event_id IS NULL)"
+            "   WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = FALSE OR edge.event_id IS NULL)"
             " )"
         )
 
         txn.execute_batch(
             query,
             [
-                (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False)
+                (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id)
                 for ev in events
                 for e_id in ev.prev_event_ids()
                 if not ev.internal_metadata.is_outlier()
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 9773c1fcd2..b52f48cf04 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -249,12 +249,11 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
         # Mark all state and own events as outliers
         logger.info("[purge] marking remaining events as outliers")
         txn.execute(
-            "UPDATE events SET outlier = ?"
+            "UPDATE events SET outlier = TRUE"
             " WHERE event_id IN ("
-            "    SELECT event_id FROM events_to_purge "
-            "    WHERE NOT should_delete"
-            ")",
-            (True,),
+            "   SELECT event_id FROM events_to_purge "
+            "   WHERE NOT should_delete"
+            ")"
         )
 
         # synapse tries to take out an exclusive lock on room_depth whenever it
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index e098ceea3c..c13c0bc7d7 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -560,19 +560,19 @@ class PushRuleStore(PushRulesWorkerStore):
         if isinstance(self.database_engine, PostgresEngine):
             sql = """
                 INSERT INTO push_rules_enable (id, user_name, rule_id, enabled)
-                VALUES (?, ?, ?, ?)
+                VALUES (?, ?, ?, 1)
                 ON CONFLICT DO NOTHING
             """
         elif isinstance(self.database_engine, Sqlite3Engine):
             sql = """
                 INSERT OR IGNORE INTO push_rules_enable (id, user_name, rule_id, enabled)
-                VALUES (?, ?, ?, ?)
+                VALUES (?, ?, ?, 1)
             """
         else:
             raise RuntimeError("Unknown database engine")
 
         new_enable_id = self._push_rules_enable_id_gen.get_next()
-        txn.execute(sql, (new_enable_id, user_id, rule_id, 1))
+        txn.execute(sql, (new_enable_id, user_id, rule_id))
 
     async def delete_push_rule(self, user_id: str, rule_id: str) -> None:
         """
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 676d03bb7e..c582cf0573 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -454,9 +454,9 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
         ) -> List[Tuple[str, int]]:
             sql = (
                 "SELECT user_id, expiration_ts_ms FROM account_validity"
-                " WHERE email_sent = ? AND (expiration_ts_ms - ?) <= ?"
+                " WHERE email_sent = FALSE AND (expiration_ts_ms - ?) <= ?"
             )
-            values = [False, now_ms, renew_at]
+            values = [now_ms, renew_at]
             txn.execute(sql, values)
             return cast(List[Tuple[str, int]], txn.fetchall())
 
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
index 830658f328..719e11aea6 100644
--- a/synapse/storage/databases/main/room.py
+++ b/synapse/storage/databases/main/room.py
@@ -936,11 +936,11 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
             JOIN event_json USING (room_id, event_id)
             WHERE room_id = ?
                 %(where_clause)s
-                AND contains_url = ? AND outlier = ?
+                AND contains_url = TRUE AND outlier = FALSE
             ORDER BY stream_ordering DESC
             LIMIT ?
         """
-        txn.execute(sql % {"where_clause": ""}, (room_id, True, False, 100))
+        txn.execute(sql % {"where_clause": ""}, (room_id, 100))
 
         local_media_mxcs = []
         remote_media_mxcs = []
@@ -976,7 +976,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
             txn.execute(
                 sql % {"where_clause": "AND stream_ordering < ?"},
-                (room_id, next_token, True, False, 100),
+                (room_id, next_token, 100),
             )
 
         return local_media_mxcs, remote_media_mxcs
@@ -1086,9 +1086,9 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
 
         # set quarantine
         if quarantined_by is not None:
-            sql += "AND safe_from_quarantine = ?"
+            sql += "AND safe_from_quarantine = FALSE"
             txn.executemany(
-                sql, [(quarantined_by, media_id, False) for media_id in local_mxcs]
+                sql, [(quarantined_by, media_id) for media_id in local_mxcs]
             )
         # remove from quarantine
         else:
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
index 92cbe262a6..5a3611c415 100644
--- a/synapse/storage/databases/main/stream.py
+++ b/synapse/storage/databases/main/stream.py
@@ -1401,7 +1401,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
             `to_token`), or `limit` is zero.
         """
 
-        args = [False, room_id]
+        args: List[Any] = [room_id]
 
         order, from_bound, to_bound = generate_pagination_bounds(
             direction, from_token, to_token
@@ -1475,7 +1475,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
                 event.topological_ordering, event.stream_ordering
             FROM events AS event
             %(join_clause)s
-            WHERE event.outlier = ? AND event.room_id = ? AND %(bounds)s
+            WHERE event.outlier = FALSE AND event.room_id = ? AND %(bounds)s
             ORDER BY event.topological_ordering %(order)s,
             event.stream_ordering %(order)s LIMIT ?
         """ % {
-- 
cgit 1.5.1


From f98f4f2e16a01928e0d442fef4669a1e3fca9b0f Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Wed, 26 Jul 2023 12:59:47 -0700
Subject: Remove support for legacy application service paths (#15964)

---
 changelog.d/15964.removal    |  1 +
 synapse/appservice/api.py    | 82 ++++++--------------------------------------
 tests/appservice/test_api.py | 53 ----------------------------
 3 files changed, 12 insertions(+), 124 deletions(-)
 create mode 100644 changelog.d/15964.removal

(limited to 'synapse')

diff --git a/changelog.d/15964.removal b/changelog.d/15964.removal
new file mode 100644
index 0000000000..7613afe505
--- /dev/null
+++ b/changelog.d/15964.removal
@@ -0,0 +1 @@
+Remove support for legacy application service paths.
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 5fb3d5083d..359999f680 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -17,8 +17,6 @@ import urllib.parse
 from typing import (
     TYPE_CHECKING,
     Any,
-    Awaitable,
-    Callable,
     Dict,
     Iterable,
     List,
@@ -30,7 +28,7 @@ from typing import (
 )
 
 from prometheus_client import Counter
-from typing_extensions import Concatenate, ParamSpec, TypeGuard
+from typing_extensions import ParamSpec, TypeGuard
 
 from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind
 from synapse.api.errors import CodeMessageException, HttpResponseException
@@ -80,9 +78,7 @@ sent_todevice_counter = Counter(
 
 HOUR_IN_MS = 60 * 60 * 1000
 
-
 APP_SERVICE_PREFIX = "/_matrix/app/v1"
-APP_SERVICE_UNSTABLE_PREFIX = "/_matrix/app/unstable"
 
 P = ParamSpec("P")
 R = TypeVar("R")
@@ -128,47 +124,6 @@ class ApplicationServiceApi(SimpleHttpClient):
             hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS
         )
 
-    async def _send_with_fallbacks(
-        self,
-        service: "ApplicationService",
-        prefixes: List[str],
-        path: str,
-        func: Callable[Concatenate[str, P], Awaitable[R]],
-        *args: P.args,
-        **kwargs: P.kwargs,
-    ) -> R:
-        """
-        Attempt to call an application service with multiple paths, falling back
-        until one succeeds.
-
-        Args:
-            service: The appliacation service, this provides the base URL.
-            prefixes: A last of paths to try in order for the requests.
-            path: A suffix to append to each prefix.
-            func: The function to call, the first argument will be the full
-                endpoint to fetch. Other arguments are provided by args/kwargs.
-
-        Returns:
-            The return value of func.
-        """
-        for i, prefix in enumerate(prefixes, start=1):
-            uri = f"{service.url}{prefix}{path}"
-            try:
-                return await func(uri, *args, **kwargs)
-            except HttpResponseException as e:
-                # If an error is received that is due to an unrecognised path,
-                # fallback to next path (if one exists). Otherwise, consider it
-                # a legitimate error and raise.
-                if i < len(prefixes) and is_unknown_endpoint(e):
-                    continue
-                raise
-            except Exception:
-                # Unexpected exceptions get sent to the caller.
-                raise
-
-        # The function should always exit via the return or raise above this.
-        raise RuntimeError("Unexpected fallback behaviour. This should never be seen.")
-
     async def query_user(self, service: "ApplicationService", user_id: str) -> bool:
         if service.url is None:
             return False
@@ -177,11 +132,8 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         try:
-            response = await self._send_with_fallbacks(
-                service,
-                [APP_SERVICE_PREFIX, ""],
-                f"/users/{urllib.parse.quote(user_id)}",
-                self.get_json,
+            response = await self.get_json(
+                f"{service.url}{APP_SERVICE_PREFIX}/users/{urllib.parse.quote(user_id)}",
                 {"access_token": service.hs_token},
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
@@ -203,11 +155,8 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         try:
-            response = await self._send_with_fallbacks(
-                service,
-                [APP_SERVICE_PREFIX, ""],
-                f"/rooms/{urllib.parse.quote(alias)}",
-                self.get_json,
+            response = await self.get_json(
+                f"{service.url}{APP_SERVICE_PREFIX}/rooms/{urllib.parse.quote(alias)}",
                 {"access_token": service.hs_token},
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
@@ -245,11 +194,8 @@ class ApplicationServiceApi(SimpleHttpClient):
                 **fields,
                 b"access_token": service.hs_token,
             }
-            response = await self._send_with_fallbacks(
-                service,
-                [APP_SERVICE_PREFIX, APP_SERVICE_UNSTABLE_PREFIX],
-                f"/thirdparty/{kind}/{urllib.parse.quote(protocol)}",
-                self.get_json,
+            response = await self.get_json(
+                f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/{kind}/{urllib.parse.quote(protocol)}",
                 args=args,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
@@ -285,11 +231,8 @@ class ApplicationServiceApi(SimpleHttpClient):
             # This is required by the configuration.
             assert service.hs_token is not None
             try:
-                info = await self._send_with_fallbacks(
-                    service,
-                    [APP_SERVICE_PREFIX, APP_SERVICE_UNSTABLE_PREFIX],
-                    f"/thirdparty/protocol/{urllib.parse.quote(protocol)}",
-                    self.get_json,
+                info = await self.get_json(
+                    f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/protocol/{urllib.parse.quote(protocol)}",
                     {"access_token": service.hs_token},
                     headers={"Authorization": [f"Bearer {service.hs_token}"]},
                 )
@@ -401,11 +344,8 @@ class ApplicationServiceApi(SimpleHttpClient):
                 }
 
         try:
-            await self._send_with_fallbacks(
-                service,
-                [APP_SERVICE_PREFIX, ""],
-                f"/transactions/{urllib.parse.quote(str(txn_id))}",
-                self.put_json,
+            await self.put_json(
+                f"{service.url}{APP_SERVICE_PREFIX}/transactions/{urllib.parse.quote(str(txn_id))}",
                 json_body=body,
                 args={"access_token": service.hs_token},
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 15fce165b6..807dc2f21c 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -16,7 +16,6 @@ from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactor
 
-from synapse.api.errors import HttpResponseException
 from synapse.appservice import ApplicationService
 from synapse.server import HomeServer
 from synapse.types import JsonDict
@@ -107,58 +106,6 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
         self.assertEqual(self.request_url, URL_LOCATION)
         self.assertEqual(result, SUCCESS_RESULT_LOCATION)
 
-    def test_fallback(self) -> None:
-        """
-        Tests that the fallback to legacy URLs works.
-        """
-        SUCCESS_RESULT_USER = [
-            {
-                "protocol": PROTOCOL,
-                "userid": "@a:user",
-                "fields": {
-                    "more": "fields",
-                },
-            }
-        ]
-
-        URL_USER = f"{URL}/_matrix/app/v1/thirdparty/user/{PROTOCOL}"
-        FALLBACK_URL_USER = f"{URL}/_matrix/app/unstable/thirdparty/user/{PROTOCOL}"
-
-        self.request_url = None
-        self.v1_seen = False
-
-        async def get_json(
-            url: str,
-            args: Mapping[Any, Any],
-            headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]],
-        ) -> List[JsonDict]:
-            # Ensure the access token is passed as both a header and query arg.
-            if not headers.get("Authorization") or not args.get(b"access_token"):
-                raise RuntimeError("Access token not provided")
-
-            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
-            self.assertEqual(args.get(b"access_token"), TOKEN)
-            self.request_url = url
-            if url == URL_USER:
-                self.v1_seen = True
-                raise HttpResponseException(404, "NOT_FOUND", b"NOT_FOUND")
-            elif url == FALLBACK_URL_USER:
-                return SUCCESS_RESULT_USER
-            else:
-                raise RuntimeError(
-                    "URL provided was invalid. This should never be seen."
-                )
-
-        # We assign to a method, which mypy doesn't like.
-        self.api.get_json = Mock(side_effect=get_json)  # type: ignore[assignment]
-
-        result = self.get_success(
-            self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]})
-        )
-        self.assertTrue(self.v1_seen)
-        self.assertEqual(self.request_url, FALLBACK_URL_USER)
-        self.assertEqual(result, SUCCESS_RESULT_USER)
-
     def test_claim_keys(self) -> None:
         """
         Tests that the /keys/claim response is properly parsed for missing
-- 
cgit 1.5.1


From a719b703d9bd0dade2565ddcad0e2f3a7a9d4c37 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 27 Jul 2023 15:45:05 +0200
Subject: Fix 404 on /profile when the display name is empty but not the avatar
 (#16012)

---
 changelog.d/16012.bugfix       |  1 +
 synapse/handlers/profile.py    |  2 +-
 tests/handlers/test_profile.py | 10 ++++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16012.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16012.bugfix b/changelog.d/16012.bugfix
new file mode 100644
index 0000000000..44ca9377ff
--- /dev/null
+++ b/changelog.d/16012.bugfix
@@ -0,0 +1 @@
+Fix 404 not found code returned on profile endpoint when the display name is empty but not the avatar URL.
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index a7f8c5e636..c7fe101cd9 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -68,7 +68,7 @@ class ProfileHandler:
 
         if self.hs.is_mine(target_user):
             profileinfo = await self.store.get_profileinfo(target_user)
-            if profileinfo.display_name is None:
+            if profileinfo.display_name is None and profileinfo.avatar_url is None:
                 raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND)
 
             return {
diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py
index 196ceb0b82..ec2f5d30be 100644
--- a/tests/handlers/test_profile.py
+++ b/tests/handlers/test_profile.py
@@ -179,6 +179,16 @@ class ProfileTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual("http://my.server/me.png", avatar_url)
 
+    def test_get_profile_empty_displayname(self) -> None:
+        self.get_success(self.store.set_profile_displayname(self.frank, None))
+        self.get_success(
+            self.store.set_profile_avatar_url(self.frank, "http://my.server/me.png")
+        )
+
+        profile = self.get_success(self.handler.get_profile(self.frank.to_string()))
+
+        self.assertEqual("http://my.server/me.png", profile["avatar_url"])
+
     def test_set_my_avatar(self) -> None:
         self.get_success(
             self.handler.set_avatar_url(
-- 
cgit 1.5.1


From 68b2611783ab00fdad567654a95492442722c106 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Thu, 27 Jul 2023 15:08:46 -0700
Subject: Clarify comment on key uploads over replication (#16016)

---
 changelog.d/16016.doc               | 2 ++
 synapse/replication/http/devices.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16016.doc

(limited to 'synapse')

diff --git a/changelog.d/16016.doc b/changelog.d/16016.doc
new file mode 100644
index 0000000000..e677058c2d
--- /dev/null
+++ b/changelog.d/16016.doc
@@ -0,0 +1,2 @@
+Clarify comment on the keys/upload over replication enpoint.
+
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index f874f072f9..73f3de3642 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -107,8 +107,7 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
     Calls to e2e_keys_handler.upload_keys_for_user(user_id, device_id, keys) on
     the main process to accomplish this.
 
-    Defined in https://spec.matrix.org/v1.4/client-server-api/#post_matrixclientv3keysupload
-    Request format(borrowed and expanded from KeyUploadServlet):
+    Request format for this endpoint (borrowed and expanded from KeyUploadServlet):
 
         POST /_synapse/replication/upload_keys_for_user
 
@@ -117,6 +116,7 @@ class ReplicationUploadKeysForUserRestServlet(ReplicationEndpoint):
         "device_id": "<device_id>",
         "keys": {
             ....this part can be found in KeyUploadServlet in rest/client/keys.py....
+            or as defined in https://spec.matrix.org/v1.4/client-server-api/#post_matrixclientv3keysupload
         }
     }
 
-- 
cgit 1.5.1


From ae55cc1e6bc6527d0e359a823c474f5c9ed4382e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 31 Jul 2023 10:58:03 +0100
Subject: Add ability to wait for locks and add locks to purge history / room
 deletion (#15791)

c.f. #13476
---
 changelog.d/15791.bugfix                         |   1 +
 synapse/federation/federation_server.py          |  17 +-
 synapse/handlers/message.py                      |  38 ++-
 synapse/handlers/pagination.py                   |  23 +-
 synapse/handlers/room_member.py                  |  45 +--
 synapse/handlers/worker_lock.py                  | 333 +++++++++++++++++++++++
 synapse/notifier.py                              |  16 ++
 synapse/replication/tcp/commands.py              |  33 +++
 synapse/replication/tcp/handler.py               |  22 ++
 synapse/rest/client/room_upgrade_rest_servlet.py |  11 +-
 synapse/server.py                                |   5 +
 synapse/storage/controllers/persist_events.py    |  27 +-
 synapse/storage/databases/main/lock.py           | 190 ++++++++-----
 tests/handlers/test_worker_lock.py               |  74 +++++
 tests/rest/client/test_rooms.py                  |   4 +-
 tests/storage/databases/main/test_lock.py        |  52 ++++
 16 files changed, 783 insertions(+), 108 deletions(-)
 create mode 100644 changelog.d/15791.bugfix
 create mode 100644 synapse/handlers/worker_lock.py
 create mode 100644 tests/handlers/test_worker_lock.py

(limited to 'synapse')

diff --git a/changelog.d/15791.bugfix b/changelog.d/15791.bugfix
new file mode 100644
index 0000000000..182634b62f
--- /dev/null
+++ b/changelog.d/15791.bugfix
@@ -0,0 +1 @@
+Fix bug where purging history and paginating simultaneously could lead to database corruption when using workers.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index fa61dd8c10..a90d99c4d6 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -63,6 +63,7 @@ from synapse.federation.federation_base import (
 )
 from synapse.federation.persistence import TransactionActions
 from synapse.federation.units import Edu, Transaction
+from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.context import (
     make_deferred_yieldable,
@@ -137,6 +138,7 @@ class FederationServer(FederationBase):
         self._event_auth_handler = hs.get_event_auth_handler()
         self._room_member_handler = hs.get_room_member_handler()
         self._e2e_keys_handler = hs.get_e2e_keys_handler()
+        self._worker_lock_handler = hs.get_worker_locks_handler()
 
         self._state_storage_controller = hs.get_storage_controllers().state
 
@@ -1236,9 +1238,18 @@ class FederationServer(FederationBase):
                 logger.info("handling received PDU in room %s: %s", room_id, event)
                 try:
                     with nested_logging_context(event.event_id):
-                        await self._federation_event_handler.on_receive_pdu(
-                            origin, event
-                        )
+                        # We're taking out a lock within a lock, which could
+                        # lead to deadlocks if we're not careful. However, it is
+                        # safe on this occasion as we only ever take a write
+                        # lock when deleting a room, which we would never do
+                        # while holding the `_INBOUND_EVENT_HANDLING_LOCK_NAME`
+                        # lock.
+                        async with self._worker_lock_handler.acquire_read_write_lock(
+                            DELETE_ROOM_LOCK_NAME, room_id, write=False
+                        ):
+                            await self._federation_event_handler.on_receive_pdu(
+                                origin, event
+                            )
                 except FederationError as e:
                     # XXX: Ideally we'd inform the remote we failed to process
                     # the event, but we can't return an error in the transaction
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index fff0b5fa12..187dedae7d 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -53,6 +53,7 @@ from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
 from synapse.events.utils import SerializeEventConfig, maybe_upsert_event_field
 from synapse.events.validator import EventValidator
 from synapse.handlers.directory import DirectoryHandler
+from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
 from synapse.logging import opentracing
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.metrics.background_process_metrics import run_as_background_process
@@ -485,6 +486,7 @@ class EventCreationHandler:
         self._events_shard_config = self.config.worker.events_shard_config
         self._instance_name = hs.get_instance_name()
         self._notifier = hs.get_notifier()
+        self._worker_lock_handler = hs.get_worker_locks_handler()
 
         self.room_prejoin_state_types = self.hs.config.api.room_prejoin_state
 
@@ -1010,6 +1012,37 @@ class EventCreationHandler:
                         event.internal_metadata.stream_ordering,
                     )
 
+        async with self._worker_lock_handler.acquire_read_write_lock(
+            DELETE_ROOM_LOCK_NAME, room_id, write=False
+        ):
+            return await self._create_and_send_nonmember_event_locked(
+                requester=requester,
+                event_dict=event_dict,
+                allow_no_prev_events=allow_no_prev_events,
+                prev_event_ids=prev_event_ids,
+                state_event_ids=state_event_ids,
+                ratelimit=ratelimit,
+                txn_id=txn_id,
+                ignore_shadow_ban=ignore_shadow_ban,
+                outlier=outlier,
+                depth=depth,
+            )
+
+    async def _create_and_send_nonmember_event_locked(
+        self,
+        requester: Requester,
+        event_dict: dict,
+        allow_no_prev_events: bool = False,
+        prev_event_ids: Optional[List[str]] = None,
+        state_event_ids: Optional[List[str]] = None,
+        ratelimit: bool = True,
+        txn_id: Optional[str] = None,
+        ignore_shadow_ban: bool = False,
+        outlier: bool = False,
+        depth: Optional[int] = None,
+    ) -> Tuple[EventBase, int]:
+        room_id = event_dict["room_id"]
+
         # If we don't have any prev event IDs specified then we need to
         # check that the host is in the room (as otherwise populating the
         # prev events will fail), at which point we may as well check the
@@ -1923,7 +1956,10 @@ class EventCreationHandler:
         )
 
         for room_id in room_ids:
-            dummy_event_sent = await self._send_dummy_event_for_room(room_id)
+            async with self._worker_lock_handler.acquire_read_write_lock(
+                DELETE_ROOM_LOCK_NAME, room_id, write=False
+            ):
+                dummy_event_sent = await self._send_dummy_event_for_room(room_id)
 
             if not dummy_event_sent:
                 # Did not find a valid user in the room, so remove from future attempts
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 19b8728db9..da34658470 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -46,6 +46,11 @@ logger = logging.getLogger(__name__)
 BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3
 
 
+PURGE_HISTORY_LOCK_NAME = "purge_history_lock"
+
+DELETE_ROOM_LOCK_NAME = "delete_room_lock"
+
+
 @attr.s(slots=True, auto_attribs=True)
 class PurgeStatus:
     """Object tracking the status of a purge request
@@ -142,6 +147,7 @@ class PaginationHandler:
         self._server_name = hs.hostname
         self._room_shutdown_handler = hs.get_room_shutdown_handler()
         self._relations_handler = hs.get_relations_handler()
+        self._worker_locks = hs.get_worker_locks_handler()
 
         self.pagination_lock = ReadWriteLock()
         # IDs of rooms in which there currently an active purge *or delete* operation.
@@ -356,7 +362,9 @@ class PaginationHandler:
         """
         self._purges_in_progress_by_room.add(room_id)
         try:
-            async with self.pagination_lock.write(room_id):
+            async with self._worker_locks.acquire_read_write_lock(
+                PURGE_HISTORY_LOCK_NAME, room_id, write=True
+            ):
                 await self._storage_controllers.purge_events.purge_history(
                     room_id, token, delete_local_events
                 )
@@ -412,7 +420,10 @@ class PaginationHandler:
             room_id: room to be purged
             force: set true to skip checking for joined users.
         """
-        async with self.pagination_lock.write(room_id):
+        async with self._worker_locks.acquire_multi_read_write_lock(
+            [(PURGE_HISTORY_LOCK_NAME, room_id), (DELETE_ROOM_LOCK_NAME, room_id)],
+            write=True,
+        ):
             # first check that we have no users in this room
             if not force:
                 joined = await self.store.is_host_joined(room_id, self._server_name)
@@ -471,7 +482,9 @@ class PaginationHandler:
 
         room_token = from_token.room_key
 
-        async with self.pagination_lock.read(room_id):
+        async with self._worker_locks.acquire_read_write_lock(
+            PURGE_HISTORY_LOCK_NAME, room_id, write=False
+        ):
             (membership, member_event_id) = (None, None)
             if not use_admin_priviledge:
                 (
@@ -747,7 +760,9 @@ class PaginationHandler:
 
         self._purges_in_progress_by_room.add(room_id)
         try:
-            async with self.pagination_lock.write(room_id):
+            async with self._worker_locks.acquire_read_write_lock(
+                PURGE_HISTORY_LOCK_NAME, room_id, write=True
+            ):
                 self._delete_by_id[delete_id].status = DeleteStatus.STATUS_SHUTTING_DOWN
                 self._delete_by_id[
                     delete_id
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 496e701f13..6cca2ec344 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -39,6 +39,7 @@ from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN
 from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler
+from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
 from synapse.logging import opentracing
 from synapse.metrics import event_processing_positions
 from synapse.metrics.background_process_metrics import run_as_background_process
@@ -94,6 +95,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self.event_creation_handler = hs.get_event_creation_handler()
         self.account_data_handler = hs.get_account_data_handler()
         self.event_auth_handler = hs.get_event_auth_handler()
+        self._worker_lock_handler = hs.get_worker_locks_handler()
 
         self.member_linearizer: Linearizer = Linearizer(name="member")
         self.member_as_limiter = Linearizer(max_count=10, name="member_as_limiter")
@@ -638,26 +640,29 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # by application services), and then by room ID.
         async with self.member_as_limiter.queue(as_id):
             async with self.member_linearizer.queue(key):
-                with opentracing.start_active_span("update_membership_locked"):
-                    result = await self.update_membership_locked(
-                        requester,
-                        target,
-                        room_id,
-                        action,
-                        txn_id=txn_id,
-                        remote_room_hosts=remote_room_hosts,
-                        third_party_signed=third_party_signed,
-                        ratelimit=ratelimit,
-                        content=content,
-                        new_room=new_room,
-                        require_consent=require_consent,
-                        outlier=outlier,
-                        allow_no_prev_events=allow_no_prev_events,
-                        prev_event_ids=prev_event_ids,
-                        state_event_ids=state_event_ids,
-                        depth=depth,
-                        origin_server_ts=origin_server_ts,
-                    )
+                async with self._worker_lock_handler.acquire_read_write_lock(
+                    DELETE_ROOM_LOCK_NAME, room_id, write=False
+                ):
+                    with opentracing.start_active_span("update_membership_locked"):
+                        result = await self.update_membership_locked(
+                            requester,
+                            target,
+                            room_id,
+                            action,
+                            txn_id=txn_id,
+                            remote_room_hosts=remote_room_hosts,
+                            third_party_signed=third_party_signed,
+                            ratelimit=ratelimit,
+                            content=content,
+                            new_room=new_room,
+                            require_consent=require_consent,
+                            outlier=outlier,
+                            allow_no_prev_events=allow_no_prev_events,
+                            prev_event_ids=prev_event_ids,
+                            state_event_ids=state_event_ids,
+                            depth=depth,
+                            origin_server_ts=origin_server_ts,
+                        )
 
         return result
 
diff --git a/synapse/handlers/worker_lock.py b/synapse/handlers/worker_lock.py
new file mode 100644
index 0000000000..72df773a86
--- /dev/null
+++ b/synapse/handlers/worker_lock.py
@@ -0,0 +1,333 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+from types import TracebackType
+from typing import (
+    TYPE_CHECKING,
+    AsyncContextManager,
+    Collection,
+    Dict,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+)
+from weakref import WeakSet
+
+import attr
+
+from twisted.internet import defer
+from twisted.internet.interfaces import IReactorTime
+
+from synapse.logging.context import PreserveLoggingContext
+from synapse.logging.opentracing import start_active_span
+from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.storage.databases.main.lock import Lock, LockStore
+from synapse.util.async_helpers import timeout_deferred
+
+if TYPE_CHECKING:
+    from synapse.logging.opentracing import opentracing
+    from synapse.server import HomeServer
+
+
+DELETE_ROOM_LOCK_NAME = "delete_room_lock"
+
+
+class WorkerLocksHandler:
+    """A class for waiting on taking out locks, rather than using the storage
+    functions directly (which don't support awaiting).
+    """
+
+    def __init__(self, hs: "HomeServer") -> None:
+        self._reactor = hs.get_reactor()
+        self._store = hs.get_datastores().main
+        self._clock = hs.get_clock()
+        self._notifier = hs.get_notifier()
+        self._instance_name = hs.get_instance_name()
+
+        # Map from lock name/key to set of `WaitingLock` that are active for
+        # that lock.
+        self._locks: Dict[
+            Tuple[str, str], WeakSet[Union[WaitingLock, WaitingMultiLock]]
+        ] = {}
+
+        self._clock.looping_call(self._cleanup_locks, 30_000)
+
+        self._notifier.add_lock_released_callback(self._on_lock_released)
+
+    def acquire_lock(self, lock_name: str, lock_key: str) -> "WaitingLock":
+        """Acquire a standard lock, returns a context manager that will block
+        until the lock is acquired.
+
+        Note: Care must be taken to avoid deadlocks. In particular, this
+        function does *not* timeout.
+
+        Usage:
+            async with handler.acquire_lock(name, key):
+                # Do work while holding the lock...
+        """
+
+        lock = WaitingLock(
+            reactor=self._reactor,
+            store=self._store,
+            handler=self,
+            lock_name=lock_name,
+            lock_key=lock_key,
+            write=None,
+        )
+
+        self._locks.setdefault((lock_name, lock_key), WeakSet()).add(lock)
+
+        return lock
+
+    def acquire_read_write_lock(
+        self,
+        lock_name: str,
+        lock_key: str,
+        *,
+        write: bool,
+    ) -> "WaitingLock":
+        """Acquire a read/write lock, returns a context manager that will block
+        until the lock is acquired.
+
+        Note: Care must be taken to avoid deadlocks. In particular, this
+        function does *not* timeout.
+
+        Usage:
+            async with handler.acquire_read_write_lock(name, key, write=True):
+                # Do work while holding the lock...
+        """
+
+        lock = WaitingLock(
+            reactor=self._reactor,
+            store=self._store,
+            handler=self,
+            lock_name=lock_name,
+            lock_key=lock_key,
+            write=write,
+        )
+
+        self._locks.setdefault((lock_name, lock_key), WeakSet()).add(lock)
+
+        return lock
+
+    def acquire_multi_read_write_lock(
+        self,
+        lock_names: Collection[Tuple[str, str]],
+        *,
+        write: bool,
+    ) -> "WaitingMultiLock":
+        """Acquires multi read/write locks at once, returns a context manager
+        that will block until all the locks are acquired.
+
+        This will try and acquire all locks at once, and will never hold on to a
+        subset of the locks. (This avoids accidentally creating deadlocks).
+
+        Note: Care must be taken to avoid deadlocks. In particular, this
+        function does *not* timeout.
+        """
+
+        lock = WaitingMultiLock(
+            lock_names=lock_names,
+            write=write,
+            reactor=self._reactor,
+            store=self._store,
+            handler=self,
+        )
+
+        for lock_name, lock_key in lock_names:
+            self._locks.setdefault((lock_name, lock_key), WeakSet()).add(lock)
+
+        return lock
+
+    def notify_lock_released(self, lock_name: str, lock_key: str) -> None:
+        """Notify that a lock has been released.
+
+        Pokes both the notifier and replication.
+        """
+
+        self._notifier.notify_lock_released(self._instance_name, lock_name, lock_key)
+
+    def _on_lock_released(
+        self, instance_name: str, lock_name: str, lock_key: str
+    ) -> None:
+        """Called when a lock has been released.
+
+        Wakes up any locks that might be waiting on this.
+        """
+        locks = self._locks.get((lock_name, lock_key))
+        if not locks:
+            return
+
+        def _wake_deferred(deferred: defer.Deferred) -> None:
+            if not deferred.called:
+                deferred.callback(None)
+
+        for lock in locks:
+            self._clock.call_later(0, _wake_deferred, lock.deferred)
+
+    @wrap_as_background_process("_cleanup_locks")
+    async def _cleanup_locks(self) -> None:
+        """Periodically cleans out stale entries in the locks map"""
+        self._locks = {key: value for key, value in self._locks.items() if value}
+
+
+@attr.s(auto_attribs=True, eq=False)
+class WaitingLock:
+    reactor: IReactorTime
+    store: LockStore
+    handler: WorkerLocksHandler
+    lock_name: str
+    lock_key: str
+    write: Optional[bool]
+    deferred: "defer.Deferred[None]" = attr.Factory(defer.Deferred)
+    _inner_lock: Optional[Lock] = None
+    _retry_interval: float = 0.1
+    _lock_span: "opentracing.Scope" = attr.Factory(
+        lambda: start_active_span("WaitingLock.lock")
+    )
+
+    async def __aenter__(self) -> None:
+        self._lock_span.__enter__()
+
+        with start_active_span("WaitingLock.waiting_for_lock"):
+            while self._inner_lock is None:
+                self.deferred = defer.Deferred()
+
+                if self.write is not None:
+                    lock = await self.store.try_acquire_read_write_lock(
+                        self.lock_name, self.lock_key, write=self.write
+                    )
+                else:
+                    lock = await self.store.try_acquire_lock(
+                        self.lock_name, self.lock_key
+                    )
+
+                if lock:
+                    self._inner_lock = lock
+                    break
+
+                try:
+                    # Wait until the we get notified the lock might have been
+                    # released (by the deferred being resolved). We also
+                    # periodically wake up in case the lock was released but we
+                    # weren't notified.
+                    with PreserveLoggingContext():
+                        await timeout_deferred(
+                            deferred=self.deferred,
+                            timeout=self._get_next_retry_interval(),
+                            reactor=self.reactor,
+                        )
+                except Exception:
+                    pass
+
+        return await self._inner_lock.__aenter__()
+
+    async def __aexit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc: Optional[BaseException],
+        tb: Optional[TracebackType],
+    ) -> Optional[bool]:
+        assert self._inner_lock
+
+        self.handler.notify_lock_released(self.lock_name, self.lock_key)
+
+        try:
+            r = await self._inner_lock.__aexit__(exc_type, exc, tb)
+        finally:
+            self._lock_span.__exit__(exc_type, exc, tb)
+
+        return r
+
+    def _get_next_retry_interval(self) -> float:
+        next = self._retry_interval
+        self._retry_interval = max(5, next * 2)
+        return next * random.uniform(0.9, 1.1)
+
+
+@attr.s(auto_attribs=True, eq=False)
+class WaitingMultiLock:
+    lock_names: Collection[Tuple[str, str]]
+
+    write: bool
+
+    reactor: IReactorTime
+    store: LockStore
+    handler: WorkerLocksHandler
+
+    deferred: "defer.Deferred[None]" = attr.Factory(defer.Deferred)
+
+    _inner_lock_cm: Optional[AsyncContextManager] = None
+    _retry_interval: float = 0.1
+    _lock_span: "opentracing.Scope" = attr.Factory(
+        lambda: start_active_span("WaitingLock.lock")
+    )
+
+    async def __aenter__(self) -> None:
+        self._lock_span.__enter__()
+
+        with start_active_span("WaitingLock.waiting_for_lock"):
+            while self._inner_lock_cm is None:
+                self.deferred = defer.Deferred()
+
+                lock_cm = await self.store.try_acquire_multi_read_write_lock(
+                    self.lock_names, write=self.write
+                )
+
+                if lock_cm:
+                    self._inner_lock_cm = lock_cm
+                    break
+
+                try:
+                    # Wait until the we get notified the lock might have been
+                    # released (by the deferred being resolved). We also
+                    # periodically wake up in case the lock was released but we
+                    # weren't notified.
+                    with PreserveLoggingContext():
+                        await timeout_deferred(
+                            deferred=self.deferred,
+                            timeout=self._get_next_retry_interval(),
+                            reactor=self.reactor,
+                        )
+                except Exception:
+                    pass
+
+        assert self._inner_lock_cm
+        await self._inner_lock_cm.__aenter__()
+        return
+
+    async def __aexit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc: Optional[BaseException],
+        tb: Optional[TracebackType],
+    ) -> Optional[bool]:
+        assert self._inner_lock_cm
+
+        for lock_name, lock_key in self.lock_names:
+            self.handler.notify_lock_released(lock_name, lock_key)
+
+        try:
+            r = await self._inner_lock_cm.__aexit__(exc_type, exc, tb)
+        finally:
+            self._lock_span.__exit__(exc_type, exc, tb)
+
+        return r
+
+    def _get_next_retry_interval(self) -> float:
+        next = self._retry_interval
+        self._retry_interval = max(5, next * 2)
+        return next * random.uniform(0.9, 1.1)
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 897272ad5b..68115bca70 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -234,6 +234,9 @@ class Notifier:
 
         self._third_party_rules = hs.get_module_api_callbacks().third_party_event_rules
 
+        # List of callbacks to be notified when a lock is released
+        self._lock_released_callback: List[Callable[[str, str, str], None]] = []
+
         self.clock = hs.get_clock()
         self.appservice_handler = hs.get_application_service_handler()
         self._pusher_pool = hs.get_pusherpool()
@@ -785,6 +788,19 @@ class Notifier:
         # that any in flight requests can be immediately retried.
         self._federation_client.wake_destination(server)
 
+    def add_lock_released_callback(
+        self, callback: Callable[[str, str, str], None]
+    ) -> None:
+        """Add a function to be called whenever we are notified about a released lock."""
+        self._lock_released_callback.append(callback)
+
+    def notify_lock_released(
+        self, instance_name: str, lock_name: str, lock_key: str
+    ) -> None:
+        """Notify the callbacks that a lock has been released."""
+        for cb in self._lock_released_callback:
+            cb(instance_name, lock_name, lock_key)
+
 
 @attr.s(auto_attribs=True)
 class ReplicationNotifier:
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
index 32f52e54d8..10f5c98ff8 100644
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@@ -422,6 +422,36 @@ class RemoteServerUpCommand(_SimpleCommand):
     NAME = "REMOTE_SERVER_UP"
 
 
+class LockReleasedCommand(Command):
+    """Sent to inform other instances that a given lock has been dropped.
+
+    Format::
+
+        LOCK_RELEASED ["<instance_name>", "<lock_name>", "<lock_key>"]
+    """
+
+    NAME = "LOCK_RELEASED"
+
+    def __init__(
+        self,
+        instance_name: str,
+        lock_name: str,
+        lock_key: str,
+    ):
+        self.instance_name = instance_name
+        self.lock_name = lock_name
+        self.lock_key = lock_key
+
+    @classmethod
+    def from_line(cls: Type["LockReleasedCommand"], line: str) -> "LockReleasedCommand":
+        instance_name, lock_name, lock_key = json_decoder.decode(line)
+
+        return cls(instance_name, lock_name, lock_key)
+
+    def to_line(self) -> str:
+        return json_encoder.encode([self.instance_name, self.lock_name, self.lock_key])
+
+
 _COMMANDS: Tuple[Type[Command], ...] = (
     ServerCommand,
     RdataCommand,
@@ -435,6 +465,7 @@ _COMMANDS: Tuple[Type[Command], ...] = (
     UserIpCommand,
     RemoteServerUpCommand,
     ClearUserSyncsCommand,
+    LockReleasedCommand,
 )
 
 # Map of command name to command type.
@@ -448,6 +479,7 @@ VALID_SERVER_COMMANDS = (
     ErrorCommand.NAME,
     PingCommand.NAME,
     RemoteServerUpCommand.NAME,
+    LockReleasedCommand.NAME,
 )
 
 # The commands the client is allowed to send
@@ -461,6 +493,7 @@ VALID_CLIENT_COMMANDS = (
     UserIpCommand.NAME,
     ErrorCommand.NAME,
     RemoteServerUpCommand.NAME,
+    LockReleasedCommand.NAME,
 )
 
 
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index 5d108fe11b..a2cabba7b1 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -39,6 +39,7 @@ from synapse.replication.tcp.commands import (
     ClearUserSyncsCommand,
     Command,
     FederationAckCommand,
+    LockReleasedCommand,
     PositionCommand,
     RdataCommand,
     RemoteServerUpCommand,
@@ -248,6 +249,9 @@ class ReplicationCommandHandler:
         if self._is_master or self._should_insert_client_ips:
             self.subscribe_to_channel("USER_IP")
 
+        if hs.config.redis.redis_enabled:
+            self._notifier.add_lock_released_callback(self.on_lock_released)
+
     def subscribe_to_channel(self, channel_name: str) -> None:
         """
         Indicates that we wish to subscribe to a Redis channel by name.
@@ -648,6 +652,17 @@ class ReplicationCommandHandler:
 
         self._notifier.notify_remote_server_up(cmd.data)
 
+    def on_LOCK_RELEASED(
+        self, conn: IReplicationConnection, cmd: LockReleasedCommand
+    ) -> None:
+        """Called when we get a new LOCK_RELEASED command."""
+        if cmd.instance_name == self._instance_name:
+            return
+
+        self._notifier.notify_lock_released(
+            cmd.instance_name, cmd.lock_name, cmd.lock_key
+        )
+
     def new_connection(self, connection: IReplicationConnection) -> None:
         """Called when we have a new connection."""
         self._connections.append(connection)
@@ -754,6 +769,13 @@ class ReplicationCommandHandler:
         """
         self.send_command(RdataCommand(stream_name, self._instance_name, token, data))
 
+    def on_lock_released(
+        self, instance_name: str, lock_name: str, lock_key: str
+    ) -> None:
+        """Called when we released a lock and should notify other instances."""
+        if instance_name == self._instance_name:
+            self.send_command(LockReleasedCommand(instance_name, lock_name, lock_key))
+
 
 UpdateToken = TypeVar("UpdateToken")
 UpdateRow = TypeVar("UpdateRow")
diff --git a/synapse/rest/client/room_upgrade_rest_servlet.py b/synapse/rest/client/room_upgrade_rest_servlet.py
index 6a7792e18b..4a5d9e13e7 100644
--- a/synapse/rest/client/room_upgrade_rest_servlet.py
+++ b/synapse/rest/client/room_upgrade_rest_servlet.py
@@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Tuple
 
 from synapse.api.errors import Codes, ShadowBanError, SynapseError
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
+from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     RestServlet,
@@ -60,6 +61,7 @@ class RoomUpgradeRestServlet(RestServlet):
         self._hs = hs
         self._room_creation_handler = hs.get_room_creation_handler()
         self._auth = hs.get_auth()
+        self._worker_lock_handler = hs.get_worker_locks_handler()
 
     async def on_POST(
         self, request: SynapseRequest, room_id: str
@@ -78,9 +80,12 @@ class RoomUpgradeRestServlet(RestServlet):
             )
 
         try:
-            new_room_id = await self._room_creation_handler.upgrade_room(
-                requester, room_id, new_version
-            )
+            async with self._worker_lock_handler.acquire_read_write_lock(
+                DELETE_ROOM_LOCK_NAME, room_id, write=False
+            ):
+                new_room_id = await self._room_creation_handler.upgrade_room(
+                    requester, room_id, new_version
+                )
         except ShadowBanError:
             # Generate a random room ID.
             new_room_id = stringutils.random_string(18)
diff --git a/synapse/server.py b/synapse/server.py
index b72b76a38b..8430f99ef2 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -107,6 +107,7 @@ from synapse.handlers.stats import StatsHandler
 from synapse.handlers.sync import SyncHandler
 from synapse.handlers.typing import FollowerTypingHandler, TypingWriterHandler
 from synapse.handlers.user_directory import UserDirectoryHandler
+from synapse.handlers.worker_lock import WorkerLocksHandler
 from synapse.http.client import (
     InsecureInterceptableContextFactory,
     ReplicationClient,
@@ -912,3 +913,7 @@ class HomeServer(metaclass=abc.ABCMeta):
     def get_common_usage_metrics_manager(self) -> CommonUsageMetricsManager:
         """Usage metrics shared between phone home stats and the prometheus exporter."""
         return CommonUsageMetricsManager(self)
+
+    @cache_in_self
+    def get_worker_locks_handler(self) -> WorkerLocksHandler:
+        return WorkerLocksHandler(self)
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 35c0680365..35cd1089d6 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -45,6 +45,7 @@ from twisted.internet import defer
 from synapse.api.constants import EventTypes, Membership
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
+from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
 from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
 from synapse.logging.opentracing import (
     SynapseTags,
@@ -338,6 +339,7 @@ class EventsPersistenceStorageController:
         )
         self._state_resolution_handler = hs.get_state_resolution_handler()
         self._state_controller = state_controller
+        self.hs = hs
 
     async def _process_event_persist_queue_task(
         self,
@@ -350,15 +352,22 @@ class EventsPersistenceStorageController:
             A dictionary of event ID to event ID we didn't persist as we already
             had another event persisted with the same TXN ID.
         """
-        if isinstance(task, _PersistEventsTask):
-            return await self._persist_event_batch(room_id, task)
-        elif isinstance(task, _UpdateCurrentStateTask):
-            await self._update_current_state(room_id, task)
-            return {}
-        else:
-            raise AssertionError(
-                f"Found an unexpected task type in event persistence queue: {task}"
-            )
+
+        # Ensure that the room can't be deleted while we're persisting events to
+        # it. We might already have taken out the lock, but since this is just a
+        # "read" lock its inherently reentrant.
+        async with self.hs.get_worker_locks_handler().acquire_read_write_lock(
+            DELETE_ROOM_LOCK_NAME, room_id, write=False
+        ):
+            if isinstance(task, _PersistEventsTask):
+                return await self._persist_event_batch(room_id, task)
+            elif isinstance(task, _UpdateCurrentStateTask):
+                await self._update_current_state(room_id, task)
+                return {}
+            else:
+                raise AssertionError(
+                    f"Found an unexpected task type in event persistence queue: {task}"
+                )
 
     @trace
     async def persist_events(
diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py
index c89b4f7919..1680bf6168 100644
--- a/synapse/storage/databases/main/lock.py
+++ b/synapse/storage/databases/main/lock.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+from contextlib import AsyncExitStack
 from types import TracebackType
-from typing import TYPE_CHECKING, Optional, Set, Tuple, Type
+from typing import TYPE_CHECKING, Collection, Optional, Set, Tuple, Type
 from weakref import WeakValueDictionary
 
 from twisted.internet.interfaces import IReactorCore
@@ -208,76 +209,85 @@ class LockStore(SQLBaseStore):
         used (otherwise the lock will leak).
         """
 
+        try:
+            lock = await self.db_pool.runInteraction(
+                "try_acquire_read_write_lock",
+                self._try_acquire_read_write_lock_txn,
+                lock_name,
+                lock_key,
+                write,
+            )
+        except self.database_engine.module.IntegrityError:
+            return None
+
+        return lock
+
+    def _try_acquire_read_write_lock_txn(
+        self,
+        txn: LoggingTransaction,
+        lock_name: str,
+        lock_key: str,
+        write: bool,
+    ) -> "Lock":
+        # We attempt to acquire the lock by inserting into
+        # `worker_read_write_locks` and seeing if that fails any
+        # constraints. If it doesn't then we have acquired the lock,
+        # otherwise we haven't.
+        #
+        # Before that though we clear the table of any stale locks.
+
         now = self._clock.time_msec()
         token = random_string(6)
 
-        def _try_acquire_read_write_lock_txn(txn: LoggingTransaction) -> None:
-            # We attempt to acquire the lock by inserting into
-            # `worker_read_write_locks` and seeing if that fails any
-            # constraints. If it doesn't then we have acquired the lock,
-            # otherwise we haven't.
-            #
-            # Before that though we clear the table of any stale locks.
-
-            delete_sql = """
-                DELETE FROM worker_read_write_locks
-                    WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?;
-            """
-
-            insert_sql = """
-                INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts)
-                VALUES (?, ?, ?, ?, ?, ?)
-            """
-
-            if isinstance(self.database_engine, PostgresEngine):
-                # For Postgres we can send these queries at the same time.
-                txn.execute(
-                    delete_sql + ";" + insert_sql,
-                    (
-                        # DELETE args
-                        now - _LOCK_TIMEOUT_MS,
-                        lock_name,
-                        lock_key,
-                        # UPSERT args
-                        lock_name,
-                        lock_key,
-                        write,
-                        self._instance_name,
-                        token,
-                        now,
-                    ),
-                )
-            else:
-                # For SQLite these need to be two queries.
-                txn.execute(
-                    delete_sql,
-                    (
-                        now - _LOCK_TIMEOUT_MS,
-                        lock_name,
-                        lock_key,
-                    ),
-                )
-                txn.execute(
-                    insert_sql,
-                    (
-                        lock_name,
-                        lock_key,
-                        write,
-                        self._instance_name,
-                        token,
-                        now,
-                    ),
-                )
+        delete_sql = """
+            DELETE FROM worker_read_write_locks
+                WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?;
+        """
 
-            return
+        insert_sql = """
+            INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts)
+            VALUES (?, ?, ?, ?, ?, ?)
+        """
 
-        try:
-            await self.db_pool.runInteraction(
-                "try_acquire_read_write_lock",
-                _try_acquire_read_write_lock_txn,
+        if isinstance(self.database_engine, PostgresEngine):
+            # For Postgres we can send these queries at the same time.
+            txn.execute(
+                delete_sql + ";" + insert_sql,
+                (
+                    # DELETE args
+                    now - _LOCK_TIMEOUT_MS,
+                    lock_name,
+                    lock_key,
+                    # UPSERT args
+                    lock_name,
+                    lock_key,
+                    write,
+                    self._instance_name,
+                    token,
+                    now,
+                ),
+            )
+        else:
+            # For SQLite these need to be two queries.
+            txn.execute(
+                delete_sql,
+                (
+                    now - _LOCK_TIMEOUT_MS,
+                    lock_name,
+                    lock_key,
+                ),
+            )
+            txn.execute(
+                insert_sql,
+                (
+                    lock_name,
+                    lock_key,
+                    write,
+                    self._instance_name,
+                    token,
+                    now,
+                ),
             )
-        except self.database_engine.module.IntegrityError:
-            return None
 
         lock = Lock(
             self._reactor,
@@ -289,10 +299,58 @@ class LockStore(SQLBaseStore):
             token=token,
         )
 
-        self._live_read_write_lock_tokens[(lock_name, lock_key, token)] = lock
+        def set_lock() -> None:
+            self._live_read_write_lock_tokens[(lock_name, lock_key, token)] = lock
+
+        txn.call_after(set_lock)
 
         return lock
 
+    async def try_acquire_multi_read_write_lock(
+        self,
+        lock_names: Collection[Tuple[str, str]],
+        write: bool,
+    ) -> Optional[AsyncExitStack]:
+        """Try to acquire multiple locks for the given names/keys. Will return
+        an async context manager if the locks are successfully acquired, which
+        *must* be used (otherwise the lock will leak).
+
+        If only a subset of the locks can be acquired then it will immediately
+        drop them and return `None`.
+        """
+        try:
+            locks = await self.db_pool.runInteraction(
+                "try_acquire_multi_read_write_lock",
+                self._try_acquire_multi_read_write_lock_txn,
+                lock_names,
+                write,
+            )
+        except self.database_engine.module.IntegrityError:
+            return None
+
+        stack = AsyncExitStack()
+
+        for lock in locks:
+            await stack.enter_async_context(lock)
+
+        return stack
+
+    def _try_acquire_multi_read_write_lock_txn(
+        self,
+        txn: LoggingTransaction,
+        lock_names: Collection[Tuple[str, str]],
+        write: bool,
+    ) -> Collection["Lock"]:
+        locks = []
+
+        for lock_name, lock_key in lock_names:
+            lock = self._try_acquire_read_write_lock_txn(
+                txn, lock_name, lock_key, write
+            )
+            locks.append(lock)
+
+        return locks
+
 
 class Lock:
     """An async context manager that manages an acquired lock, ensuring it is
diff --git a/tests/handlers/test_worker_lock.py b/tests/handlers/test_worker_lock.py
new file mode 100644
index 0000000000..73e548726c
--- /dev/null
+++ b/tests/handlers/test_worker_lock.py
@@ -0,0 +1,74 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from twisted.internet import defer
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.replication._base import BaseMultiWorkerStreamTestCase
+
+
+class WorkerLockTestCase(unittest.HomeserverTestCase):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
+        self.worker_lock_handler = self.hs.get_worker_locks_handler()
+
+    def test_wait_for_lock_locally(self) -> None:
+        """Test waiting for a lock on a single worker"""
+
+        lock1 = self.worker_lock_handler.acquire_lock("name", "key")
+        self.get_success(lock1.__aenter__())
+
+        lock2 = self.worker_lock_handler.acquire_lock("name", "key")
+        d2 = defer.ensureDeferred(lock2.__aenter__())
+        self.assertNoResult(d2)
+
+        self.get_success(lock1.__aexit__(None, None, None))
+
+        self.get_success(d2)
+        self.get_success(lock2.__aexit__(None, None, None))
+
+
+class WorkerLockWorkersTestCase(BaseMultiWorkerStreamTestCase):
+    def prepare(
+        self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
+    ) -> None:
+        self.main_worker_lock_handler = self.hs.get_worker_locks_handler()
+
+    def test_wait_for_lock_worker(self) -> None:
+        """Test waiting for a lock on another worker"""
+
+        worker = self.make_worker_hs(
+            "synapse.app.generic_worker",
+            extra_config={
+                "redis": {"enabled": True},
+            },
+        )
+        worker_lock_handler = worker.get_worker_locks_handler()
+
+        lock1 = self.main_worker_lock_handler.acquire_lock("name", "key")
+        self.get_success(lock1.__aenter__())
+
+        lock2 = worker_lock_handler.acquire_lock("name", "key")
+        d2 = defer.ensureDeferred(lock2.__aenter__())
+        self.assertNoResult(d2)
+
+        self.get_success(lock1.__aexit__(None, None, None))
+
+        self.get_success(d2)
+        self.get_success(lock2.__aexit__(None, None, None))
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index d013e75d55..4f6347be15 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -711,7 +711,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(30, channel.resource_usage.db_txn_count)
+        self.assertEqual(32, channel.resource_usage.db_txn_count)
 
     def test_post_room_initial_state(self) -> None:
         # POST with initial_state config key, expect new room id
@@ -724,7 +724,7 @@ class RoomsCreateTestCase(RoomBase):
         self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
         self.assertTrue("room_id" in channel.json_body)
         assert channel.resource_usage is not None
-        self.assertEqual(32, channel.resource_usage.db_txn_count)
+        self.assertEqual(34, channel.resource_usage.db_txn_count)
 
     def test_post_room_visibility_key(self) -> None:
         # POST with visibility config key, expect new room id
diff --git a/tests/storage/databases/main/test_lock.py b/tests/storage/databases/main/test_lock.py
index ad454f6dd8..383da83dfb 100644
--- a/tests/storage/databases/main/test_lock.py
+++ b/tests/storage/databases/main/test_lock.py
@@ -448,3 +448,55 @@ class ReadWriteLockTestCase(unittest.HomeserverTestCase):
         self.get_success(self.store._on_shutdown())
 
         self.assertEqual(self.store._live_read_write_lock_tokens, {})
+
+    def test_acquire_multiple_locks(self) -> None:
+        """Tests that acquiring multiple locks at once works."""
+
+        # Take out multiple locks and ensure that we can't get those locks out
+        # again.
+        lock = self.get_success(
+            self.store.try_acquire_multi_read_write_lock(
+                [("name1", "key1"), ("name2", "key2")], write=True
+            )
+        )
+        self.assertIsNotNone(lock)
+
+        assert lock is not None
+        self.get_success(lock.__aenter__())
+
+        lock2 = self.get_success(
+            self.store.try_acquire_read_write_lock("name1", "key1", write=True)
+        )
+        self.assertIsNone(lock2)
+
+        lock3 = self.get_success(
+            self.store.try_acquire_read_write_lock("name2", "key2", write=False)
+        )
+        self.assertIsNone(lock3)
+
+        # Overlapping locks attempts will fail, and won't lock any locks.
+        lock4 = self.get_success(
+            self.store.try_acquire_multi_read_write_lock(
+                [("name1", "key1"), ("name3", "key3")], write=True
+            )
+        )
+        self.assertIsNone(lock4)
+
+        lock5 = self.get_success(
+            self.store.try_acquire_read_write_lock("name3", "key3", write=True)
+        )
+        self.assertIsNotNone(lock5)
+        assert lock5 is not None
+        self.get_success(lock5.__aenter__())
+        self.get_success(lock5.__aexit__(None, None, None))
+
+        # Once we release the lock we can take out the locks again.
+        self.get_success(lock.__aexit__(None, None, None))
+
+        lock6 = self.get_success(
+            self.store.try_acquire_read_write_lock("name1", "key1", write=True)
+        )
+        self.assertIsNotNone(lock6)
+        assert lock6 is not None
+        self.get_success(lock6.__aenter__())
+        self.get_success(lock6.__aexit__(None, None, None))
-- 
cgit 1.5.1


From b7695ac38843d679b7121495729e0d433c37688e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 31 Jul 2023 08:44:45 -0400
Subject: Combine duplicated code for calculating an event ID from a txn ID
 (#16023)

Refactoring related to stabilization of MSC3970, refactor to combine
code which has the same logic.
---
 changelog.d/16023.misc          |  1 +
 synapse/handlers/message.py     | 39 +++++++++++++++++++++++++++++++--------
 synapse/handlers/room_member.py | 28 ++++------------------------
 3 files changed, 36 insertions(+), 32 deletions(-)
 create mode 100644 changelog.d/16023.misc

(limited to 'synapse')

diff --git a/changelog.d/16023.misc b/changelog.d/16023.misc
new file mode 100644
index 0000000000..ee732318e4
--- /dev/null
+++ b/changelog.d/16023.misc
@@ -0,0 +1 @@
+Combine duplicated code.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 187dedae7d..c656e07d37 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -878,14 +878,13 @@ class EventCreationHandler:
                 return prev_event
         return None
 
-    async def get_event_from_transaction(
+    async def get_event_id_from_transaction(
         self,
         requester: Requester,
         txn_id: str,
         room_id: str,
-    ) -> Optional[EventBase]:
-        """For the given transaction ID and room ID, check if there is a matching event.
-        If so, fetch it and return it.
+    ) -> Optional[str]:
+        """For the given transaction ID and room ID, check if there is a matching event ID.
 
         Args:
             requester: The requester making the request in the context of which we want
@@ -894,8 +893,9 @@ class EventCreationHandler:
             room_id: The room ID.
 
         Returns:
-            An event if one could be found, None otherwise.
+            An event ID if one could be found, None otherwise.
         """
+        existing_event_id = None
 
         if self._msc3970_enabled and requester.device_id:
             # When MSC3970 is enabled, we lookup for events sent by the same device first,
@@ -909,7 +909,7 @@ class EventCreationHandler:
                 )
             )
             if existing_event_id:
-                return await self.store.get_event(existing_event_id)
+                return existing_event_id
 
         # Pre-MSC3970, we looked up for events that were sent by the same session by
         # using the access token ID.
@@ -922,9 +922,32 @@ class EventCreationHandler:
                     txn_id,
                 )
             )
-            if existing_event_id:
-                return await self.store.get_event(existing_event_id)
 
+        return existing_event_id
+
+    async def get_event_from_transaction(
+        self,
+        requester: Requester,
+        txn_id: str,
+        room_id: str,
+    ) -> Optional[EventBase]:
+        """For the given transaction ID and room ID, check if there is a matching event.
+        If so, fetch it and return it.
+
+        Args:
+            requester: The requester making the request in the context of which we want
+                to fetch the event.
+            txn_id: The transaction ID.
+            room_id: The room ID.
+
+        Returns:
+            An event if one could be found, None otherwise.
+        """
+        existing_event_id = await self.get_event_id_from_transaction(
+            requester, txn_id, room_id
+        )
+        if existing_event_id:
+            return await self.store.get_event(existing_event_id)
         return None
 
     async def create_and_send_nonmember_event(
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 6cca2ec344..e3cdf2bc61 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -176,8 +176,6 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self.request_ratelimiter = hs.get_request_ratelimiter()
         hs.get_notifier().add_new_join_in_room_callback(self._on_user_joined_room)
 
-        self._msc3970_enabled = hs.config.experimental.msc3970_enabled
-
     def _on_user_joined_room(self, event_id: str, room_id: str) -> None:
         """Notify the rate limiter that a room join has occurred.
 
@@ -418,29 +416,11 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # do this check just before we persist an event as well, but may as well
         # do it up front for efficiency.)
         if txn_id:
-            existing_event_id = None
-            if self._msc3970_enabled and requester.device_id:
-                # When MSC3970 is enabled, we lookup for events sent by the same device
-                # first, and fallback to the old behaviour if none were found.
-                existing_event_id = (
-                    await self.store.get_event_id_from_transaction_id_and_device_id(
-                        room_id,
-                        requester.user.to_string(),
-                        requester.device_id,
-                        txn_id,
-                    )
+            existing_event_id = (
+                await self.event_creation_handler.get_event_id_from_transaction(
+                    requester, txn_id, room_id
                 )
-
-            if requester.access_token_id and not existing_event_id:
-                existing_event_id = (
-                    await self.store.get_event_id_from_transaction_id_and_token_id(
-                        room_id,
-                        requester.user.to_string(),
-                        requester.access_token_id,
-                        txn_id,
-                    )
-                )
-
+            )
             if existing_event_id:
                 event_pos = await self.store.get_position_for_event(existing_event_id)
                 return existing_event_id, event_pos.stream
-- 
cgit 1.5.1


From 7cbb2a00d1ed07d42c6fa1fb226db512cd2a6b90 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Tue, 1 Aug 2023 07:10:49 -0500
Subject: Add metrics tracking for eviction to ResponseCache (#16028)

Track whether the ResponseCache is evicting due to invalidation
or due to time.
---
 changelog.d/16028.misc                |  1 +
 synapse/util/caches/response_cache.py | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16028.misc

(limited to 'synapse')

diff --git a/changelog.d/16028.misc b/changelog.d/16028.misc
new file mode 100644
index 0000000000..3a1e9fef09
--- /dev/null
+++ b/changelog.d/16028.misc
@@ -0,0 +1 @@
+Collect additional metrics from `ResponseCache` for eviction.
diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py
index 340e5e9145..0cb46700a9 100644
--- a/synapse/util/caches/response_cache.py
+++ b/synapse/util/caches/response_cache.py
@@ -36,7 +36,7 @@ from synapse.logging.opentracing import (
 )
 from synapse.util import Clock
 from synapse.util.async_helpers import AbstractObservableDeferred, ObservableDeferred
-from synapse.util.caches import register_cache
+from synapse.util.caches import EvictionReason, register_cache
 
 logger = logging.getLogger(__name__)
 
@@ -167,7 +167,7 @@ class ResponseCache(Generic[KV]):
             # the should_cache bit, we leave it in the cache for now and schedule
             # its removal later.
             if self.timeout_sec and context.should_cache:
-                self.clock.call_later(self.timeout_sec, self.unset, key)
+                self.clock.call_later(self.timeout_sec, self._entry_timeout, key)
             else:
                 # otherwise, remove the result immediately.
                 self.unset(key)
@@ -185,6 +185,12 @@ class ResponseCache(Generic[KV]):
         Args:
             key: key used to remove the cached value
         """
+        self._metrics.inc_evictions(EvictionReason.invalidation)
+        self._result_cache.pop(key, None)
+
+    def _entry_timeout(self, key: KV) -> None:
+        """For the call_later to remove from the cache"""
+        self._metrics.inc_evictions(EvictionReason.time)
         self._result_cache.pop(key, None)
 
     async def wrap(
-- 
cgit 1.5.1


From 5eb3fd785bdbf2ae07031f13a6ac5fb578adc338 Mon Sep 17 00:00:00 2001
From: Mohit Rathee <mohit.rathee2505@gmail.com>
Date: Tue, 1 Aug 2023 18:44:02 +0530
Subject: Trim whitespace when setting display names (#16031)

---
 changelog.d/16031.bugfix          |  1 +
 synapse/handlers/profile.py       |  2 +-
 tests/rest/client/test_profile.py | 12 ++++++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16031.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16031.bugfix b/changelog.d/16031.bugfix
new file mode 100644
index 0000000000..e48bf3975c
--- /dev/null
+++ b/changelog.d/16031.bugfix
@@ -0,0 +1 @@
+Remove leading and trailing spaces when setting a display name.
diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py
index c7fe101cd9..c2109036ec 100644
--- a/synapse/handlers/profile.py
+++ b/synapse/handlers/profile.py
@@ -163,7 +163,7 @@ class ProfileHandler:
                 400, "Displayname is too long (max %i)" % (MAX_DISPLAYNAME_LEN,)
             )
 
-        displayname_to_set: Optional[str] = new_displayname
+        displayname_to_set: Optional[str] = new_displayname.strip()
         if new_displayname == "":
             displayname_to_set = None
 
diff --git a/tests/rest/client/test_profile.py b/tests/rest/client/test_profile.py
index 27c93ad761..ecae092b47 100644
--- a/tests/rest/client/test_profile.py
+++ b/tests/rest/client/test_profile.py
@@ -68,6 +68,18 @@ class ProfileTestCase(unittest.HomeserverTestCase):
         res = self._get_displayname()
         self.assertEqual(res, "test")
 
+    def test_set_displayname_with_extra_spaces(self) -> None:
+        channel = self.make_request(
+            "PUT",
+            "/profile/%s/displayname" % (self.owner,),
+            content={"displayname": "  test  "},
+            access_token=self.owner_tok,
+        )
+        self.assertEqual(channel.code, 200, channel.result)
+
+        res = self._get_displayname()
+        self.assertEqual(res, "test")
+
     def test_set_displayname_noauth(self) -> None:
         channel = self.make_request(
             "PUT",
-- 
cgit 1.5.1


From 90ad836ed8f4b701580213a89f2befb742c88b5e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 1 Aug 2023 10:36:33 -0400
Subject: Properly setup the additional sequences in the portdb script.
 (#16043)

The un_partial_stated_event_stream_sequence and
application_services_txn_id_seq were never properly configured
in the portdb script, resulting in an error on start-up.
---
 changelog.d/16043.bugfix            |  1 +
 synapse/_scripts/synapse_port_db.py | 18 +++++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/16043.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16043.bugfix b/changelog.d/16043.bugfix
new file mode 100644
index 0000000000..78c0f3455a
--- /dev/null
+++ b/changelog.d/16043.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where the `synapse_port_db` failed to configure sequences for application services and partial stated rooms.
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 7c4aa0afa2..22c84fbd5b 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -761,7 +761,7 @@ class Porter:
 
             # Step 2. Set up sequences
             #
-            # We do this before porting the tables so that event if we fail half
+            # We do this before porting the tables so that even if we fail half
             # way through the postgres DB always have sequences that are greater
             # than their respective tables. If we don't then creating the
             # `DataStore` object will fail due to the inconsistency.
@@ -769,6 +769,10 @@ class Porter:
             await self._setup_state_group_id_seq()
             await self._setup_user_id_seq()
             await self._setup_events_stream_seqs()
+            await self._setup_sequence(
+                "un_partial_stated_event_stream_sequence",
+                ("un_partial_stated_event_stream",),
+            )
             await self._setup_sequence(
                 "device_inbox_sequence", ("device_inbox", "device_federation_outbox")
             )
@@ -779,6 +783,11 @@ class Porter:
             await self._setup_sequence("receipts_sequence", ("receipts_linearized",))
             await self._setup_sequence("presence_stream_sequence", ("presence_stream",))
             await self._setup_auth_chain_sequence()
+            await self._setup_sequence(
+                "application_services_txn_id_seq",
+                ("application_services_txns",),
+                "txn_id",
+            )
 
             # Step 3. Get tables.
             self.progress.set_state("Fetching tables")
@@ -1083,7 +1092,10 @@ class Porter:
         )
 
     async def _setup_sequence(
-        self, sequence_name: str, stream_id_tables: Iterable[str]
+        self,
+        sequence_name: str,
+        stream_id_tables: Iterable[str],
+        column_name: str = "stream_id",
     ) -> None:
         """Set a sequence to the correct value."""
         current_stream_ids = []
@@ -1093,7 +1105,7 @@ class Porter:
                 await self.sqlite_store.db_pool.simple_select_one_onecol(
                     table=stream_id_table,
                     keyvalues={},
-                    retcol="COALESCE(MAX(stream_id), 1)",
+                    retcol=f"COALESCE(MAX({column_name}), 1)",
                     allow_none=True,
                 ),
             )
-- 
cgit 1.5.1


From 4f5bccbbba13ba10412497cb92a1460535cf7a25 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 2 Aug 2023 11:35:54 -0400
Subject: Add forward-compatibility for the redacts property (MSC2174).
 (#16013)

The location of the redacts field changes in room version 11. Ensure
it is copied to the *new* location for *old* room versions for
forwards-compatibility with clients.

Note that copying it to the *old* location for the *new* room version
was previously handled.
---
 changelog.d/16013.misc               |  1 +
 synapse/events/utils.py              | 18 +++++-----
 tests/rest/client/test_redactions.py | 67 +++++++++++++++++++++++++++---------
 3 files changed, 61 insertions(+), 25 deletions(-)
 create mode 100644 changelog.d/16013.misc

(limited to 'synapse')

diff --git a/changelog.d/16013.misc b/changelog.d/16013.misc
new file mode 100644
index 0000000000..bd161e13ed
--- /dev/null
+++ b/changelog.d/16013.misc
@@ -0,0 +1 @@
+Properly overwrite the `redacts` content-property for forwards-compatibility with room versions 1 through 10.
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index c890833b1d..967a6c245b 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -475,14 +475,16 @@ def serialize_event(
     if config.as_client_event:
         d = config.event_format(d)
 
-    # If the event is a redaction, copy the redacts field from the content to
-    # top-level for backwards compatibility.
-    if (
-        e.type == EventTypes.Redaction
-        and e.room_version.updated_redaction_rules
-        and e.redacts is not None
-    ):
-        d["redacts"] = e.redacts
+    # If the event is a redaction, the field with the redacted event ID appears
+    # in a different location depending on the room version. e.redacts handles
+    # fetching from the proper location; copy it to the other location for forwards-
+    # and backwards-compatibility with clients.
+    if e.type == EventTypes.Redaction and e.redacts is not None:
+        if e.room_version.updated_redaction_rules:
+            d["redacts"] = e.redacts
+        else:
+            d["content"] = dict(d["content"])
+            d["content"]["redacts"] = e.redacts
 
     only_event_fields = config.only_event_fields
     if only_event_fields:
diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py
index 6028886bd6..180b635ea6 100644
--- a/tests/rest/client/test_redactions.py
+++ b/tests/rest/client/test_redactions.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 from typing import List, Optional
 
+from parameterized import parameterized
+
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import EventTypes, RelationTypes
-from synapse.api.room_versions import RoomVersions
+from synapse.api.room_versions import RoomVersion, RoomVersions
 from synapse.rest import admin
 from synapse.rest.client import login, room, sync
 from synapse.server import HomeServer
@@ -569,50 +571,81 @@ class RedactionsTestCase(HomeserverTestCase):
         self.assertIn("body", event_dict["content"], event_dict)
         self.assertEqual("I'm in a thread!", event_dict["content"]["body"])
 
-    def test_content_redaction(self) -> None:
-        """MSC2174 moved the redacts property to the content."""
+    @parameterized.expand(
+        [
+            # Tuples of:
+            #   Room version
+            #   Boolean: True if the redaction event content should include the event ID.
+            #   Boolean: true if the resulting redaction event is expected to include the
+            #            event ID in the content.
+            (RoomVersions.V10, False, False),
+            (RoomVersions.V11, True, True),
+            (RoomVersions.V11, False, True),
+        ]
+    )
+    def test_redaction_content(
+        self, room_version: RoomVersion, include_content: bool, expect_content: bool
+    ) -> None:
+        """
+        Room version 11 moved the redacts property to the content.
+
+        Ensure that the event gets created properly and that the Client-Server
+        API servers the proper backwards-compatible version.
+        """
         # Create a room with the newer room version.
         room_id = self.helper.create_room_as(
             self.mod_user_id,
             tok=self.mod_access_token,
-            room_version=RoomVersions.V11.identifier,
+            room_version=room_version.identifier,
         )
 
         # Create an event.
         b = self.helper.send(room_id=room_id, tok=self.mod_access_token)
         event_id = b["event_id"]
 
-        # Attempt to redact it with a bogus event ID.
-        self._redact_event(
+        # Ensure the event ID in the URL and the content must match.
+        if include_content:
+            self._redact_event(
+                self.mod_access_token,
+                room_id,
+                event_id,
+                expect_code=400,
+                content={"redacts": "foo"},
+            )
+
+        # Redact it for real.
+        result = self._redact_event(
             self.mod_access_token,
             room_id,
             event_id,
-            expect_code=400,
-            content={"redacts": "foo"},
+            content={"redacts": event_id} if include_content else {},
         )
-
-        # Redact it for real.
-        self._redact_event(self.mod_access_token, room_id, event_id)
+        redaction_event_id = result["event_id"]
 
         # Sync the room, to get the id of the create event
         timeline = self._sync_room_timeline(self.mod_access_token, room_id)
         redact_event = timeline[-1]
         self.assertEqual(redact_event["type"], EventTypes.Redaction)
-        # The redacts key should be in the content.
+        # The redacts key should be in the content and the redacts keys.
         self.assertEquals(redact_event["content"]["redacts"], event_id)
-
-        # It should also be copied as the top-level redacts field for backwards
-        # compatibility.
         self.assertEquals(redact_event["redacts"], event_id)
 
         # But it isn't actually part of the event.
         def get_event(txn: LoggingTransaction) -> JsonDict:
             return db_to_json(
-                main_datastore._fetch_event_rows(txn, [event_id])[event_id].json
+                main_datastore._fetch_event_rows(txn, [redaction_event_id])[
+                    redaction_event_id
+                ].json
             )
 
         main_datastore = self.hs.get_datastores().main
         event_json = self.get_success(
             main_datastore.db_pool.runInteraction("get_event", get_event)
         )
-        self.assertNotIn("redacts", event_json)
+        self.assertEquals(event_json["type"], EventTypes.Redaction)
+        if expect_content:
+            self.assertNotIn("redacts", event_json)
+            self.assertEquals(event_json["content"]["redacts"], event_id)
+        else:
+            self.assertEquals(event_json["redacts"], event_id)
+            self.assertNotIn("redacts", event_json["content"])
-- 
cgit 1.5.1


From 9c462f18a4b6969f627349d956b9161968ab8252 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 3 Aug 2023 12:42:19 +0000
Subject: Allow modules to check whether the current worker is configured to
 run background tasks. (#15991)

---
 changelog.d/15991.misc         |  1 +
 synapse/module_api/__init__.py | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 changelog.d/15991.misc

(limited to 'synapse')

diff --git a/changelog.d/15991.misc b/changelog.d/15991.misc
new file mode 100644
index 0000000000..18f388cff8
--- /dev/null
+++ b/changelog.d/15991.misc
@@ -0,0 +1 @@
+Allow modules to check whether the current worker is configured to run background tasks.
\ No newline at end of file
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 95f7800111..ba1a925003 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -1230,6 +1230,18 @@ class ModuleApi:
                 f,
             )
 
+    def should_run_background_tasks(self) -> bool:
+        """
+        Return true if and only if the current worker is configured to run
+        background tasks.
+        There should only be one worker configured to run background tasks, so
+        this is helpful when you need to only run a task on one worker but don't
+        have any other good way to choose which one.
+
+        Added in Synapse v1.89.0.
+        """
+        return self._hs.config.worker.run_background_tasks
+
     async def sleep(self, seconds: float) -> None:
         """Sleeps for the given number of seconds.
 
-- 
cgit 1.5.1


From f0a860908ba0309c89c9dba452d99b4f9c6928f7 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 3 Aug 2023 20:36:55 +0200
Subject: Allow config of the backoff algorithm for the federation client.
 (#15754)

Adds three new configuration variables:

* destination_min_retry_interval is identical to before (10mn).
* destination_retry_multiplier is now 2 instead of 5, the maximum value will
  be reached slower.
* destination_max_retry_interval is one day instead of (essentially) infinity.

Capping this will cause destinations to continue to be retried sometimes instead
of being lost forever. The previous value was 2 ^ 62 milliseconds.
---
 changelog.d/15754.misc                           |  1 +
 docs/usage/configuration/config_documentation.md | 11 +++++++++
 synapse/config/federation.py                     | 18 +++++++++++++++
 synapse/util/retryutils.py                       | 29 +++++++++++++-----------
 tests/storage/test_transactions.py               |  9 ++++++--
 tests/util/test_retryutils.py                    | 22 +++++++++---------
 6 files changed, 64 insertions(+), 26 deletions(-)
 create mode 100644 changelog.d/15754.misc

(limited to 'synapse')

diff --git a/changelog.d/15754.misc b/changelog.d/15754.misc
new file mode 100644
index 0000000000..4314d415a3
--- /dev/null
+++ b/changelog.d/15754.misc
@@ -0,0 +1 @@
+Allow for the configuration of the backoff algorithm for federation destinations.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 4e6fcd085a..c32608da2b 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1242,6 +1242,14 @@ like sending a federation transaction.
 * `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
 * `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
 
+The following options control the retry logic when communicating with a specific homeserver destination.
+Unlike the previous configuration options, these values apply across all requests
+for a given destination and the state of the backoff is stored in the database.
+
+* `destination_min_retry_interval`: the initial backoff, after the first request fails. Defaults to 10m.
+* `destination_retry_multiplier`: how much we multiply the backoff by after each subsequent fail. Defaults to 2.
+* `destination_max_retry_interval`: a cap on the backoff. Defaults to a week.
+
 Example configuration:
 ```yaml
 federation:
@@ -1250,6 +1258,9 @@ federation:
   max_long_retry_delay: 100s
   max_short_retries: 5
   max_long_retries: 20
+  destination_min_retry_interval: 30s
+  destination_retry_multiplier: 5
+  destination_max_retry_interval: 12h
 ```
 ---
 ## Caching
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 0e1cb8b6e3..97636039b8 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -65,5 +65,23 @@ class FederationConfig(Config):
         self.max_long_retries = federation_config.get("max_long_retries", 10)
         self.max_short_retries = federation_config.get("max_short_retries", 3)
 
+        # Allow for the configuration of the backoff algorithm used
+        # when trying to reach an unavailable destination.
+        # Unlike previous configuration those values applies across
+        # multiple requests and the state of the backoff is stored on DB.
+        self.destination_min_retry_interval_ms = Config.parse_duration(
+            federation_config.get("destination_min_retry_interval", "10m")
+        )
+        self.destination_retry_multiplier = federation_config.get(
+            "destination_retry_multiplier", 2
+        )
+        self.destination_max_retry_interval_ms = min(
+            Config.parse_duration(
+                federation_config.get("destination_max_retry_interval", "7d")
+            ),
+            # Set a hard-limit to not overflow the database column.
+            2**62,
+        )
+
 
 _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}}
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index dcc037b982..27e9fc976c 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -27,15 +27,6 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-# the initial backoff, after the first transaction fails
-MIN_RETRY_INTERVAL = 10 * 60 * 1000
-
-# how much we multiply the backoff by after each subsequent fail
-RETRY_MULTIPLIER = 5
-
-# a cap on the backoff. (Essentially none)
-MAX_RETRY_INTERVAL = 2**62
-
 
 class NotRetryingDestination(Exception):
     def __init__(self, retry_last_ts: int, retry_interval: int, destination: str):
@@ -169,6 +160,16 @@ class RetryDestinationLimiter:
         self.notifier = notifier
         self.replication_client = replication_client
 
+        self.destination_min_retry_interval_ms = (
+            self.store.hs.config.federation.destination_min_retry_interval_ms
+        )
+        self.destination_retry_multiplier = (
+            self.store.hs.config.federation.destination_retry_multiplier
+        )
+        self.destination_max_retry_interval_ms = (
+            self.store.hs.config.federation.destination_max_retry_interval_ms
+        )
+
     def __enter__(self) -> None:
         pass
 
@@ -220,13 +221,15 @@ class RetryDestinationLimiter:
             # We couldn't connect.
             if self.retry_interval:
                 self.retry_interval = int(
-                    self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4)
+                    self.retry_interval
+                    * self.destination_retry_multiplier
+                    * random.uniform(0.8, 1.4)
                 )
 
-                if self.retry_interval >= MAX_RETRY_INTERVAL:
-                    self.retry_interval = MAX_RETRY_INTERVAL
+                if self.retry_interval >= self.destination_max_retry_interval_ms:
+                    self.retry_interval = self.destination_max_retry_interval_ms
             else:
-                self.retry_interval = MIN_RETRY_INTERVAL
+                self.retry_interval = self.destination_min_retry_interval_ms
 
             logger.info(
                 "Connection to %s was unsuccessful (%s(%s)); backoff now %i",
diff --git a/tests/storage/test_transactions.py b/tests/storage/test_transactions.py
index 2fab84a529..ef06b50dbb 100644
--- a/tests/storage/test_transactions.py
+++ b/tests/storage/test_transactions.py
@@ -17,7 +17,6 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.server import HomeServer
 from synapse.storage.databases.main.transactions import DestinationRetryTimings
 from synapse.util import Clock
-from synapse.util.retryutils import MAX_RETRY_INTERVAL
 
 from tests.unittest import HomeserverTestCase
 
@@ -57,8 +56,14 @@ class TransactionStoreTestCase(HomeserverTestCase):
         self.get_success(d)
 
     def test_large_destination_retry(self) -> None:
+        max_retry_interval_ms = (
+            self.hs.config.federation.destination_max_retry_interval_ms
+        )
         d = self.store.set_destination_retry_timings(
-            "example.com", MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL, MAX_RETRY_INTERVAL
+            "example.com",
+            max_retry_interval_ms,
+            max_retry_interval_ms,
+            max_retry_interval_ms,
         )
         self.get_success(d)
 
diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py
index 5f8f4e76b5..1277e1a865 100644
--- a/tests/util/test_retryutils.py
+++ b/tests/util/test_retryutils.py
@@ -11,12 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from synapse.util.retryutils import (
-    MIN_RETRY_INTERVAL,
-    RETRY_MULTIPLIER,
-    NotRetryingDestination,
-    get_retry_limiter,
-)
+from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
 
 from tests.unittest import HomeserverTestCase
 
@@ -42,6 +37,11 @@ class RetryLimiterTestCase(HomeserverTestCase):
 
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
+        min_retry_interval_ms = (
+            self.hs.config.federation.destination_min_retry_interval_ms
+        )
+        retry_multiplier = self.hs.config.federation.destination_retry_multiplier
+
         self.pump(1)
         try:
             with limiter:
@@ -57,7 +57,7 @@ class RetryLimiterTestCase(HomeserverTestCase):
         assert new_timings is not None
         self.assertEqual(new_timings.failure_ts, failure_ts)
         self.assertEqual(new_timings.retry_last_ts, failure_ts)
-        self.assertEqual(new_timings.retry_interval, MIN_RETRY_INTERVAL)
+        self.assertEqual(new_timings.retry_interval, min_retry_interval_ms)
 
         # now if we try again we should get a failure
         self.get_failure(
@@ -68,7 +68,7 @@ class RetryLimiterTestCase(HomeserverTestCase):
         # advance the clock and try again
         #
 
-        self.pump(MIN_RETRY_INTERVAL)
+        self.pump(min_retry_interval_ms)
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
         self.pump(1)
@@ -87,16 +87,16 @@ class RetryLimiterTestCase(HomeserverTestCase):
         self.assertEqual(new_timings.failure_ts, failure_ts)
         self.assertEqual(new_timings.retry_last_ts, retry_ts)
         self.assertGreaterEqual(
-            new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5
+            new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 0.5
         )
         self.assertLessEqual(
-            new_timings.retry_interval, MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0
+            new_timings.retry_interval, min_retry_interval_ms * retry_multiplier * 2.0
         )
 
         #
         # one more go, with success
         #
-        self.reactor.advance(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0)
+        self.reactor.advance(min_retry_interval_ms * retry_multiplier * 2.0)
         limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
 
         self.pump(1)
-- 
cgit 1.5.1


From 0a5f4f766514b84aff84ff17dffd5301a437c797 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Thu, 3 Aug 2023 11:43:51 -0700
Subject: Move support for application service query parameter authorization
 behind a configuration option (#16017)

---
 changelog.d/16017.removal                        |  1 +
 docs/upgrade.md                                  | 16 ++++-
 docs/usage/configuration/config_documentation.md | 14 ++++
 synapse/appservice/api.py                        | 34 +++++++---
 synapse/config/appservice.py                     |  8 +++
 tests/appservice/test_api.py                     | 85 ++++++++++++++++++++++--
 6 files changed, 144 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/16017.removal

(limited to 'synapse')

diff --git a/changelog.d/16017.removal b/changelog.d/16017.removal
new file mode 100644
index 0000000000..6b72442892
--- /dev/null
+++ b/changelog.d/16017.removal
@@ -0,0 +1 @@
+Move support for application service query parameter authorization behind a configuration option.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 5dde6c769e..f50a279e98 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -88,6 +88,21 @@ process, for example:
     dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb
     ```
 
+# Upgrading to v1.90.0
+
+## App service query parameter authorization is now a configuration option
+
+Synapse v1.81.0 deprecated application service authorization via query parameters as this is
+considered insecure - and from Synapse v1.71.0 forwards the application service token has also been sent via 
+[the `Authorization` header](https://spec.matrix.org/v1.6/application-service-api/#authorization)], making the insecure
+query parameter authorization redundant. Since removing the ability to continue to use query parameters could break 
+backwards compatibility it has now been put behind a configuration option, `use_appservice_legacy_authorization`.  
+This option defaults to false, but can be activated by adding 
+```yaml
+use_appservice_legacy_authorization: true 
+```
+to your configuration.
+
 # Upgrading to v1.89.0
 
 ## Removal of unspecced `user` property for `/register`
@@ -97,7 +112,6 @@ The standard `username` property should be used instead. See the
 [Application Service specification](https://spec.matrix.org/v1.7/application-service-api/#server-admin-style-permissions)
 for more information.
 
-
 # Upgrading to v1.88.0
 
 ## Minimum supported Python version
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index c32608da2b..2987c9332d 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -2848,6 +2848,20 @@ Example configuration:
 ```yaml
 track_appservice_user_ips: true
 ```
+---
+### `use_appservice_legacy_authorization`
+
+Whether to send the application service access tokens via the `access_token` query parameter
+per older versions of the Matrix specification. Defaults to false. Set to true to enable sending
+access tokens via a query parameter.
+
+**Enabling this option is considered insecure and is not recommended. **
+
+Example configuration:
+```yaml
+use_appservice_legacy_authorization: true 
+```
+
 ---
 ### `macaroon_secret_key`
 
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index 359999f680..de7a94bf26 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -16,7 +16,6 @@ import logging
 import urllib.parse
 from typing import (
     TYPE_CHECKING,
-    Any,
     Dict,
     Iterable,
     List,
@@ -25,6 +24,7 @@ from typing import (
     Sequence,
     Tuple,
     TypeVar,
+    Union,
 )
 
 from prometheus_client import Counter
@@ -119,6 +119,7 @@ class ApplicationServiceApi(SimpleHttpClient):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
         self.clock = hs.get_clock()
+        self.config = hs.config.appservice
 
         self.protocol_meta_cache: ResponseCache[Tuple[str, str]] = ResponseCache(
             hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS
@@ -132,9 +133,12 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         try:
+            args = None
+            if self.config.use_appservice_legacy_authorization:
+                args = {"access_token": service.hs_token}
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/users/{urllib.parse.quote(user_id)}",
-                {"access_token": service.hs_token},
+                args,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if response is not None:  # just an empty json object
@@ -155,9 +159,12 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         try:
+            args = None
+            if self.config.use_appservice_legacy_authorization:
+                args = {"access_token": service.hs_token}
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/rooms/{urllib.parse.quote(alias)}",
-                {"access_token": service.hs_token},
+                args,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if response is not None:  # just an empty json object
@@ -190,10 +197,12 @@ class ApplicationServiceApi(SimpleHttpClient):
         assert service.hs_token is not None
 
         try:
-            args: Mapping[Any, Any] = {
-                **fields,
-                b"access_token": service.hs_token,
-            }
+            args: Mapping[bytes, Union[List[bytes], str]] = fields
+            if self.config.use_appservice_legacy_authorization:
+                args = {
+                    **fields,
+                    b"access_token": service.hs_token,
+                }
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/{kind}/{urllib.parse.quote(protocol)}",
                 args=args,
@@ -231,9 +240,12 @@ class ApplicationServiceApi(SimpleHttpClient):
             # This is required by the configuration.
             assert service.hs_token is not None
             try:
+                args = None
+                if self.config.use_appservice_legacy_authorization:
+                    args = {"access_token": service.hs_token}
                 info = await self.get_json(
                     f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/protocol/{urllib.parse.quote(protocol)}",
-                    {"access_token": service.hs_token},
+                    args,
                     headers={"Authorization": [f"Bearer {service.hs_token}"]},
                 )
 
@@ -344,10 +356,14 @@ class ApplicationServiceApi(SimpleHttpClient):
                 }
 
         try:
+            args = None
+            if self.config.use_appservice_legacy_authorization:
+                args = {"access_token": service.hs_token}
+
             await self.put_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/transactions/{urllib.parse.quote(str(txn_id))}",
                 json_body=body,
-                args={"access_token": service.hs_token},
+                args=args,
                 headers={"Authorization": [f"Bearer {service.hs_token}"]},
             )
             if logger.isEnabledFor(logging.DEBUG):
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index c2710fdf04..919f81a9b7 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -43,6 +43,14 @@ class AppServiceConfig(Config):
             )
 
         self.track_appservice_user_ips = config.get("track_appservice_user_ips", False)
+        self.use_appservice_legacy_authorization = config.get(
+            "use_appservice_legacy_authorization", False
+        )
+        if self.use_appservice_legacy_authorization:
+            logger.warning(
+                "The use of appservice legacy authorization via query params is deprecated"
+                " and should be considered insecure."
+            )
 
 
 def load_appservices(
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 807dc2f21c..3c635e3dcb 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, List, Mapping, Sequence, Union
+from typing import Any, List, Mapping, Optional, Sequence, Union
 from unittest.mock import Mock
 
 from twisted.test.proto_helpers import MemoryReactor
@@ -22,6 +22,7 @@ from synapse.types import JsonDict
 from synapse.util import Clock
 
 from tests import unittest
+from tests.unittest import override_config
 
 PROTOCOL = "myproto"
 TOKEN = "myastoken"
@@ -39,7 +40,7 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             hs_token=TOKEN,
         )
 
-    def test_query_3pe_authenticates_token(self) -> None:
+    def test_query_3pe_authenticates_token_via_header(self) -> None:
         """
         Tests that 3pe queries to the appservice are authenticated
         with the appservice's token.
@@ -74,12 +75,88 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             args: Mapping[Any, Any],
             headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]],
         ) -> List[JsonDict]:
-            # Ensure the access token is passed as both a header and query arg.
-            if not headers.get("Authorization") or not args.get(b"access_token"):
+            # Ensure the access token is passed as a header.
+            if not headers or not headers.get("Authorization"):
                 raise RuntimeError("Access token not provided")
+            # ... and not as a query param
+            if b"access_token" in args:
+                raise RuntimeError(
+                    "Access token should not be passed as a query param."
+                )
 
             self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
+            self.request_url = url
+            if url == URL_USER:
+                return SUCCESS_RESULT_USER
+            elif url == URL_LOCATION:
+                return SUCCESS_RESULT_LOCATION
+            else:
+                raise RuntimeError(
+                    "URL provided was invalid. This should never be seen."
+                )
+
+        # We assign to a method, which mypy doesn't like.
+        self.api.get_json = Mock(side_effect=get_json)  # type: ignore[assignment]
+
+        result = self.get_success(
+            self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]})
+        )
+        self.assertEqual(self.request_url, URL_USER)
+        self.assertEqual(result, SUCCESS_RESULT_USER)
+        result = self.get_success(
+            self.api.query_3pe(
+                self.service, "location", PROTOCOL, {b"some": [b"field"]}
+            )
+        )
+        self.assertEqual(self.request_url, URL_LOCATION)
+        self.assertEqual(result, SUCCESS_RESULT_LOCATION)
+
+    @override_config({"use_appservice_legacy_authorization": True})
+    def test_query_3pe_authenticates_token_via_param(self) -> None:
+        """
+        Tests that 3pe queries to the appservice are authenticated
+        with the appservice's token.
+        """
+
+        SUCCESS_RESULT_USER = [
+            {
+                "protocol": PROTOCOL,
+                "userid": "@a:user",
+                "fields": {
+                    "more": "fields",
+                },
+            }
+        ]
+        SUCCESS_RESULT_LOCATION = [
+            {
+                "protocol": PROTOCOL,
+                "alias": "#a:room",
+                "fields": {
+                    "more": "fields",
+                },
+            }
+        ]
+
+        URL_USER = f"{URL}/_matrix/app/v1/thirdparty/user/{PROTOCOL}"
+        URL_LOCATION = f"{URL}/_matrix/app/v1/thirdparty/location/{PROTOCOL}"
+
+        self.request_url = None
+
+        async def get_json(
+            url: str,
+            args: Mapping[Any, Any],
+            headers: Optional[
+                Mapping[Union[str, bytes], Sequence[Union[str, bytes]]]
+            ] = None,
+        ) -> List[JsonDict]:
+            # Ensure the access token is passed as a both a query param and in the headers.
+            if not args.get(b"access_token"):
+                raise RuntimeError("Access token should be provided in query params.")
+            if not headers or not headers.get("Authorization"):
+                raise RuntimeError("Access token should be provided in auth headers.")
+
             self.assertEqual(args.get(b"access_token"), TOKEN)
+            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
             self.request_url = url
             if url == URL_USER:
                 return SUCCESS_RESULT_USER
-- 
cgit 1.5.1


From d98a43d9226cbb4b9ab5ad3abd9b630548c2f09f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 4 Aug 2023 07:47:18 -0400
Subject: Stabilize support for MSC3970: updated transaction semantics (scope
 to `device_id`) (#15629)

For now this maintains compatible with old Synapses by falling back
to using transaction semantics on a per-access token. A future version
of Synapse will drop support for this.
---
 changelog.d/15629.feature                |  1 +
 synapse/config/experimental.py           |  9 -------
 synapse/events/utils.py                  | 42 ++++++++++++++++----------------
 synapse/handlers/message.py              | 12 ++++-----
 synapse/rest/client/transactions.py      | 12 ++++-----
 synapse/server.py                        |  4 +--
 synapse/storage/databases/main/events.py | 15 +++++-------
 synapse/storage/schema/__init__.py       |  5 +++-
 synapse/types/__init__.py                |  7 +++---
 9 files changed, 48 insertions(+), 59 deletions(-)
 create mode 100644 changelog.d/15629.feature

(limited to 'synapse')

diff --git a/changelog.d/15629.feature b/changelog.d/15629.feature
new file mode 100644
index 0000000000..16264effca
--- /dev/null
+++ b/changelog.d/15629.feature
@@ -0,0 +1 @@
+Scope transaction IDs to devices (implement [MSC3970](https://github.com/matrix-org/matrix-spec-proposals/pull/3970)).
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 1695ed8ca3..ac9449b18f 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -216,12 +216,6 @@ class MSC3861:
                 ("session_lifetime",),
             )
 
-        if not root.experimental.msc3970_enabled:
-            raise ConfigError(
-                "experimental_features.msc3970_enabled must be 'true' when OAuth delegation is enabled",
-                ("experimental_features", "msc3970_enabled"),
-            )
-
 
 @attr.s(auto_attribs=True, frozen=True, slots=True)
 class MSC3866Config:
@@ -397,9 +391,6 @@ class ExperimentalConfig(Config):
                 "Invalid MSC3861 configuration", ("experimental", "msc3861")
             ) from exc
 
-        # MSC3970: Scope transaction IDs to devices
-        self.msc3970_enabled = experimental.get("msc3970_enabled", self.msc3861.enabled)
-
         # Check that none of the other config options conflict with MSC3861 when enabled
         self.msc3861.check_config_conflicts(self.root)
 
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index 967a6c245b..52acb21955 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -394,7 +394,6 @@ def serialize_event(
     time_now_ms: int,
     *,
     config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG,
-    msc3970_enabled: bool = False,
 ) -> JsonDict:
     """Serialize event for clients
 
@@ -402,8 +401,6 @@ def serialize_event(
         e
         time_now_ms
         config: Event serialization config
-        msc3970_enabled: Whether MSC3970 is enabled. It changes whether we should
-            include the `transaction_id` in the event's `unsigned` section.
 
     Returns:
         The serialized event dictionary.
@@ -429,38 +426,46 @@ def serialize_event(
             e.unsigned["redacted_because"],
             time_now_ms,
             config=config,
-            msc3970_enabled=msc3970_enabled,
         )
 
     # If we have a txn_id saved in the internal_metadata, we should include it in the
     # unsigned section of the event if it was sent by the same session as the one
     # requesting the event.
     txn_id: Optional[str] = getattr(e.internal_metadata, "txn_id", None)
-    if txn_id is not None and config.requester is not None:
-        # For the MSC3970 rules to be applied, we *need* to have the device ID in the
-        # event internal metadata. Since we were not recording them before, if it hasn't
-        # been recorded, we fallback to the old behaviour.
+    if (
+        txn_id is not None
+        and config.requester is not None
+        and config.requester.user.to_string() == e.sender
+    ):
+        # Some events do not have the device ID stored in the internal metadata,
+        # this includes old events as well as those created by appservice, guests,
+        # or with tokens minted with the admin API. For those events, fallback
+        # to using the access token instead.
         event_device_id: Optional[str] = getattr(e.internal_metadata, "device_id", None)
-        if msc3970_enabled and event_device_id is not None:
+        if event_device_id is not None:
             if event_device_id == config.requester.device_id:
                 d["unsigned"]["transaction_id"] = txn_id
 
         else:
-            # The pre-MSC3970 behaviour is to only include the transaction ID if the
-            # event was sent from the same access token. For regular users, we can use
-            # the access token ID to determine this. For guests, we can't, but since
-            # each guest only has one access token, we can just check that the event was
-            # sent by the same user as the one requesting the event.
+            # Fallback behaviour: only include the transaction ID if the event
+            # was sent from the same access token.
+            #
+            # For regular users, the access token ID can be used to determine this.
+            # This includes access tokens minted with the admin API.
+            #
+            # For guests and appservice users, we can't check the access token ID
+            # so assume it is the same session.
             event_token_id: Optional[int] = getattr(
                 e.internal_metadata, "token_id", None
             )
-            if config.requester.user.to_string() == e.sender and (
+            if (
                 (
                     event_token_id is not None
                     and config.requester.access_token_id is not None
                     and event_token_id == config.requester.access_token_id
                 )
                 or config.requester.is_guest
+                or config.requester.app_service
             ):
                 d["unsigned"]["transaction_id"] = txn_id
 
@@ -504,9 +509,6 @@ class EventClientSerializer:
     clients.
     """
 
-    def __init__(self, *, msc3970_enabled: bool = False):
-        self._msc3970_enabled = msc3970_enabled
-
     def serialize_event(
         self,
         event: Union[JsonDict, EventBase],
@@ -531,9 +533,7 @@ class EventClientSerializer:
         if not isinstance(event, EventBase):
             return event
 
-        serialized_event = serialize_event(
-            event, time_now, config=config, msc3970_enabled=self._msc3970_enabled
-        )
+        serialized_event = serialize_event(event, time_now, config=config)
 
         # Check if there are any bundled aggregations to include with the event.
         if bundle_aggregations:
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index c656e07d37..d485f21e49 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -561,8 +561,6 @@ class EventCreationHandler:
                 expiry_ms=30 * 60 * 1000,
             )
 
-        self._msc3970_enabled = hs.config.experimental.msc3970_enabled
-
     async def create_event(
         self,
         requester: Requester,
@@ -897,9 +895,8 @@ class EventCreationHandler:
         """
         existing_event_id = None
 
-        if self._msc3970_enabled and requester.device_id:
-            # When MSC3970 is enabled, we lookup for events sent by the same device first,
-            # and fallback to the old behaviour if none were found.
+        # According to the spec, transactions are scoped to a user's device ID.
+        if requester.device_id:
             existing_event_id = (
                 await self.store.get_event_id_from_transaction_id_and_device_id(
                     room_id,
@@ -911,8 +908,9 @@ class EventCreationHandler:
             if existing_event_id:
                 return existing_event_id
 
-        # Pre-MSC3970, we looked up for events that were sent by the same session by
-        # using the access token ID.
+        # Some requsters don't have device IDs (appservice, guests, and access
+        # tokens minted with the admin API), fallback to checking the access token
+        # ID, which should be close enough.
         if requester.access_token_id:
             existing_event_id = (
                 await self.store.get_event_id_from_transaction_id_and_token_id(
diff --git a/synapse/rest/client/transactions.py b/synapse/rest/client/transactions.py
index 0d8a63d8be..3d814c404d 100644
--- a/synapse/rest/client/transactions.py
+++ b/synapse/rest/client/transactions.py
@@ -50,8 +50,6 @@ class HttpTransactionCache:
         # for at *LEAST* 30 mins, and at *MOST* 60 mins.
         self.cleaner = self.clock.looping_call(self._cleanup, CLEANUP_PERIOD_MS)
 
-        self._msc3970_enabled = hs.config.experimental.msc3970_enabled
-
     def _get_transaction_key(self, request: IRequest, requester: Requester) -> Hashable:
         """A helper function which returns a transaction key that can be used
         with TransactionCache for idempotent requests.
@@ -78,18 +76,20 @@ class HttpTransactionCache:
         elif requester.app_service is not None:
             return (path, "appservice", requester.app_service.id)
 
-        # With MSC3970, we use the user ID and device ID as the transaction key
-        elif self._msc3970_enabled:
+        # Use the user ID and device ID as the transaction key.
+        elif requester.device_id:
             assert requester.user, "Requester must have a user"
             assert requester.device_id, "Requester must have a device_id"
             return (path, "user", requester.user, requester.device_id)
 
-        # Otherwise, the pre-MSC3970 behaviour is to use the access token ID
+        # Some requsters don't have device IDs, these are mostly handled above
+        # (appservice and guest users), but does not cover access tokens minted
+        # by the admin API. Use the access token ID instead.
         else:
             assert (
                 requester.access_token_id is not None
             ), "Requester must have an access_token_id"
-            return (path, "user", requester.access_token_id)
+            return (path, "user_admin", requester.access_token_id)
 
     def fetch_or_execute_request(
         self,
diff --git a/synapse/server.py b/synapse/server.py
index 8430f99ef2..e753ff0377 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -785,9 +785,7 @@ class HomeServer(metaclass=abc.ABCMeta):
 
     @cache_in_self
     def get_event_client_serializer(self) -> EventClientSerializer:
-        return EventClientSerializer(
-            msc3970_enabled=self.config.experimental.msc3970_enabled
-        )
+        return EventClientSerializer()
 
     @cache_in_self
     def get_password_policy_handler(self) -> PasswordPolicyHandler:
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index bd3f14fb71..c1353b18c1 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -127,8 +127,6 @@ class PersistEventsStore:
         self._backfill_id_gen: AbstractStreamIdGenerator = self.store._backfill_id_gen
         self._stream_id_gen: AbstractStreamIdGenerator = self.store._stream_id_gen
 
-        self._msc3970_enabled = hs.config.experimental.msc3970_enabled
-
     @trace
     async def _persist_events_and_state_updates(
         self,
@@ -1012,9 +1010,11 @@ class PersistEventsStore:
                         )
                     )
 
-        # Pre-MSC3970, we rely on the access_token_id to scope the txn_id for events.
-        # Since this is an experimental flag, we still store the mapping even if the
-        # flag is disabled.
+        # Synapse usually relies on the device_id to scope transactions for events,
+        # except for users without device IDs (appservice, guests, and access
+        # tokens minted with the admin API) which use the access token ID instead.
+        #
+        # TODO https://github.com/matrix-org/synapse/issues/16042
         if to_insert_token_id:
             self.db_pool.simple_insert_many_txn(
                 txn,
@@ -1030,10 +1030,7 @@ class PersistEventsStore:
                 values=to_insert_token_id,
             )
 
-        # With MSC3970, we rely on the device_id instead to scope the txn_id for events.
-        # We're only inserting if MSC3970 is *enabled*, because else the pre-MSC3970
-        # behaviour would allow for a UNIQUE constraint violation on this table
-        if to_insert_device_id and self._msc3970_enabled:
+        if to_insert_device_id:
             self.db_pool.simple_insert_many_txn(
                 txn,
                 table="event_txn_id_device_id",
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index d3ec648f6d..7de9949a5b 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 79  # remember to update the list below when updating
+SCHEMA_VERSION = 80  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -110,6 +110,9 @@ Changes in SCHEMA_VERSION = 78
 Changes in SCHEMA_VERSION = 79
     - Add tables to handle in DB read-write locks.
     - Add some mitigations for a painful race between foreground and background updates, cf #15677.
+
+Changes in SCHEMA_VERSION = 80
+    - The event_txn_id_device_id is always written to for new events.
 """
 
 
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index fdfd465c8d..39a1ae4ac3 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -117,11 +117,12 @@ class Requester:
 
     Attributes:
         user:  id of the user making the request
-        access_token_id:  *ID* of the access token used for this
-            request, or None if it came via the appservice API or similar
+        access_token_id:  *ID* of the access token used for this request, or
+            None for appservices, guests, and tokens generated by the admin API
         is_guest:  True if the user making this request is a guest user
         shadow_banned:  True if the user making this request has been shadow-banned.
-        device_id:  device_id which was set at authentication time
+        device_id:  device_id which was set at authentication time, or
+            None for appservices, guests, and tokens generated by the admin API
         app_service:  the AS requesting on behalf of the user
         authenticated_entity: The entity that authenticated when making the request.
             This is different to the user_id when an admin user or the server is
-- 
cgit 1.5.1


From 84ae2e3f6fb86115df767bb2f1fb16ac2fbaa7c3 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Fri, 4 Aug 2023 10:49:54 -0700
Subject: Fix deletion for Dehydrated Devices (#16046)

---
 changelog.d/16046.bugfix          |   1 +
 synapse/handlers/device.py        |  16 +++++
 synapse/rest/client/devices.py    |  14 ++--
 tests/rest/client/test_devices.py | 139 +++++++++++++++++++++++++++++++++++++-
 4 files changed, 165 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/16046.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16046.bugfix b/changelog.d/16046.bugfix
new file mode 100644
index 0000000000..ce5a9ae4b5
--- /dev/null
+++ b/changelog.d/16046.bugfix
@@ -0,0 +1 @@
+Fix deletion in dehydrated devices v2.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index f3a713f5fa..b7bf70a72d 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -722,6 +722,22 @@ class DeviceHandler(DeviceWorkerHandler):
 
         return {"success": True}
 
+    async def delete_dehydrated_device(self, user_id: str, device_id: str) -> None:
+        """
+        Delete a stored dehydrated device.
+
+        Args:
+            user_id: the user_id to delete the device from
+            device_id: id of the dehydrated device to delete
+        """
+        success = await self.store.remove_dehydrated_device(user_id, device_id)
+
+        if not success:
+            raise errors.NotFoundError()
+
+        await self.delete_devices(user_id, [device_id])
+        await self.store.delete_e2e_keys_by_device(user_id=user_id, device_id=device_id)
+
     @wrap_as_background_process("_handle_new_device_update_async")
     async def _handle_new_device_update_async(self) -> None:
         """Called when we have a new local device list update that we need to
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 690d2ec406..dd3f7fd666 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -513,10 +513,8 @@ class DehydratedDeviceV2Servlet(RestServlet):
         if dehydrated_device is not None:
             (device_id, device_data) = dehydrated_device
 
-            result = await self.device_handler.rehydrate_device(
-                requester.user.to_string(),
-                self.auth.get_access_token_from_request(request),
-                device_id,
+            await self.device_handler.delete_dehydrated_device(
+                requester.user.to_string(), device_id
             )
 
             result = {"device_id": device_id}
@@ -538,6 +536,14 @@ class DehydratedDeviceV2Servlet(RestServlet):
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
 
+        old_dehydrated_device = await self.device_handler.get_dehydrated_device(user_id)
+
+        # if an old device exists, delete it before creating a new one
+        if old_dehydrated_device:
+            await self.device_handler.delete_dehydrated_device(
+                user_id, old_dehydrated_device[0]
+            )
+
         device_info = submission.dict()
         if "device_keys" not in device_info.keys():
             raise SynapseError(
diff --git a/tests/rest/client/test_devices.py b/tests/rest/client/test_devices.py
index b7d420cfec..3cf29c10ea 100644
--- a/tests/rest/client/test_devices.py
+++ b/tests/rest/client/test_devices.py
@@ -379,4 +379,141 @@ class DehydratedDeviceTestCase(unittest.HomeserverTestCase):
             access_token=token,
             shorthand=False,
         )
-        self.assertEqual(channel.code, 404)
+        self.assertEqual(channel.code, 401)
+
+    @unittest.override_config(
+        {"experimental_features": {"msc2697_enabled": False, "msc3814_enabled": True}}
+    )
+    def test_msc3814_dehydrated_device_delete_works(self) -> None:
+        user = self.register_user("mikey", "pass")
+        token = self.login(user, "pass", device_id="device1")
+        content: JsonDict = {
+            "device_data": {
+                "algorithm": "m.dehydration.v1.olm",
+            },
+            "device_id": "device2",
+            "initial_device_display_name": "foo bar",
+            "device_keys": {
+                "user_id": "@mikey:test",
+                "device_id": "device2",
+                "valid_until_ts": "80",
+                "algorithms": [
+                    "m.olm.curve25519-aes-sha2",
+                ],
+                "keys": {
+                    "<algorithm>:<device_id>": "<key_base64>",
+                },
+                "signatures": {
+                    "<user_id>": {"<algorithm>:<device_id>": "<signature_base64>"}
+                },
+            },
+        }
+        channel = self.make_request(
+            "PUT",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            content=content,
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        device_id = channel.json_body.get("device_id")
+        assert device_id is not None
+        self.assertIsInstance(device_id, str)
+        self.assertEqual("device2", device_id)
+
+        # ensure that keys were uploaded and available
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/keys/query",
+            {
+                "device_keys": {
+                    user: ["device2"],
+                },
+            },
+            token,
+        )
+        self.assertEqual(
+            channel.json_body["device_keys"][user]["device2"]["keys"],
+            {
+                "<algorithm>:<device_id>": "<key_base64>",
+            },
+        )
+
+        # delete the dehydrated device
+        channel = self.make_request(
+            "DELETE",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # ensure that keys are no longer available for deleted device
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/keys/query",
+            {
+                "device_keys": {
+                    user: ["device2"],
+                },
+            },
+            token,
+        )
+        self.assertEqual(channel.json_body["device_keys"], {"@mikey:test": {}})
+
+        # check that an old device is deleted when user PUTs a new device
+        # First, create a device
+        content["device_id"] = "device3"
+        content["device_keys"]["device_id"] = "device3"
+        channel = self.make_request(
+            "PUT",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            content=content,
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        device_id = channel.json_body.get("device_id")
+        assert device_id is not None
+        self.assertIsInstance(device_id, str)
+        self.assertEqual("device3", device_id)
+
+        # create a second device without deleting first device
+        content["device_id"] = "device4"
+        content["device_keys"]["device_id"] = "device4"
+        channel = self.make_request(
+            "PUT",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            content=content,
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        device_id = channel.json_body.get("device_id")
+        assert device_id is not None
+        self.assertIsInstance(device_id, str)
+        self.assertEqual("device4", device_id)
+
+        # check that the second device that was created is what is returned when we GET
+        channel = self.make_request(
+            "GET",
+            "_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device",
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        returned_device_id = channel.json_body["device_id"]
+        self.assertEqual(returned_device_id, "device4")
+
+        # and that if we query the keys for the first device they are not there
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/keys/query",
+            {
+                "device_keys": {
+                    user: ["device3"],
+                },
+            },
+            token,
+        )
+        self.assertEqual(channel.json_body["device_keys"], {"@mikey:test": {}})
-- 
cgit 1.5.1


From 81a6f8c9ae0241afa9973da2f53efc2467e61c6b Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 7 Aug 2023 10:37:08 -0700
Subject: Drop backwards compat hack for event serialization (#16069)

---
 changelog.d/16069.misc     |  1 +
 synapse/events/snapshot.py | 12 +-----------
 2 files changed, 2 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/16069.misc

(limited to 'synapse')

diff --git a/changelog.d/16069.misc b/changelog.d/16069.misc
new file mode 100644
index 0000000000..f59ead8638
--- /dev/null
+++ b/changelog.d/16069.misc
@@ -0,0 +1 @@
+Drop backwards compat hack for event serialization.
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index a43498ed4d..a9e3d4e556 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -186,9 +186,6 @@ class EventContext(UnpersistedEventContextBase):
             ),
             "app_service_id": self.app_service.id if self.app_service else None,
             "partial_state": self.partial_state,
-            # add dummy delta_ids and prev_group for backwards compatibility
-            "delta_ids": None,
-            "prev_group": None,
         }
 
     @staticmethod
@@ -203,13 +200,6 @@ class EventContext(UnpersistedEventContextBase):
         Returns:
             The event context.
         """
-        # workaround for backwards/forwards compatibility: if the input doesn't have a value
-        # for "state_group_deltas" just assign an empty dict
-        state_group_deltas = input.get("state_group_deltas", None)
-        if state_group_deltas:
-            state_group_deltas = _decode_state_group_delta(state_group_deltas)
-        else:
-            state_group_deltas = {}
 
         context = EventContext(
             # We use the state_group and prev_state_id stuff to pull the
@@ -217,7 +207,7 @@ class EventContext(UnpersistedEventContextBase):
             storage=storage,
             state_group=input["state_group"],
             state_group_before_event=input["state_group_before_event"],
-            state_group_deltas=state_group_deltas,
+            state_group_deltas=_decode_state_group_delta(input["state_group_deltas"]),
             state_delta_due_to_event=_decode_state_dict(
                 input["state_delta_due_to_event"]
             ),
-- 
cgit 1.5.1


From 8af3f33d84b0f63cb9baab6c8616983222d75307 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Mon, 7 Aug 2023 10:52:15 -0700
Subject: Fix endpoint improperly declaring support for MSC3814 (#16068)

---
 changelog.d/16068.misc         |  1 +
 synapse/rest/client/devices.py | 18 ++++++++----------
 2 files changed, 9 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/16068.misc

(limited to 'synapse')

diff --git a/changelog.d/16068.misc b/changelog.d/16068.misc
new file mode 100644
index 0000000000..341426a746
--- /dev/null
+++ b/changelog.d/16068.misc
@@ -0,0 +1 @@
+Fix endpoint improperly declaring support for MSC3814.
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index dd3f7fd666..51f17f80da 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -232,7 +232,7 @@ class DehydratedDeviceDataModel(RequestBodyModel):
 class DehydratedDeviceServlet(RestServlet):
     """Retrieve or store a dehydrated device.
 
-    Implements either MSC2697 or MSC3814.
+    Implements MSC2697.
 
     GET /org.matrix.msc2697.v2/dehydrated_device
 
@@ -266,7 +266,12 @@ class DehydratedDeviceServlet(RestServlet):
 
     """
 
-    def __init__(self, hs: "HomeServer", msc2697: bool = True):
+    PATTERNS = client_patterns(
+        "/org.matrix.msc2697.v2/dehydrated_device$",
+        releases=(),
+    )
+
+    def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.hs = hs
         self.auth = hs.get_auth()
@@ -274,13 +279,6 @@ class DehydratedDeviceServlet(RestServlet):
         assert isinstance(handler, DeviceHandler)
         self.device_handler = handler
 
-        self.PATTERNS = client_patterns(
-            "/org.matrix.msc2697.v2/dehydrated_device$"
-            if msc2697
-            else "/org.matrix.msc3814.v1/dehydrated_device$",
-            releases=(),
-        )
-
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
         dehydrated_device = await self.device_handler.get_dehydrated_device(
@@ -579,7 +577,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     if hs.config.worker.worker_app is None:
         DeviceRestServlet(hs).register(http_server)
         if hs.config.experimental.msc2697_enabled:
-            DehydratedDeviceServlet(hs, msc2697=True).register(http_server)
+            DehydratedDeviceServlet(hs).register(http_server)
             ClaimDehydratedDeviceServlet(hs).register(http_server)
         if hs.config.experimental.msc3814_enabled:
             DehydratedDeviceV2Servlet(hs).register(http_server)
-- 
cgit 1.5.1


From f3dc6dc19f902b638c164097342136010f0769d1 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 8 Aug 2023 10:10:07 +0000
Subject: Remove old rows from the `cache_invalidation_stream_by_instance`
 table automatically. (This table is not used when Synapse is configured to
 use SQLite.) (#15868)

* Add a cache invalidation clean-up task

* Run the cache invalidation stream clean-up on the background worker

* Tune down

* call_later is in millis!

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* fixup! Add a cache invalidation clean-up task

* Update synapse/storage/databases/main/cache.py

Co-authored-by: Eric Eastwood <erice@element.io>

* Update synapse/storage/databases/main/cache.py

Co-authored-by: Eric Eastwood <erice@element.io>

* MILLISEC -> MS

* Expand on comment

* Move and tweak comment about Postgres

* Use `wrap_as_background_process`

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
Co-authored-by: Eric Eastwood <erice@element.io>
---
 changelog.d/15868.feature               |   1 +
 synapse/storage/databases/main/cache.py | 130 ++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+)
 create mode 100644 changelog.d/15868.feature

(limited to 'synapse')

diff --git a/changelog.d/15868.feature b/changelog.d/15868.feature
new file mode 100644
index 0000000000..a866bf5774
--- /dev/null
+++ b/changelog.d/15868.feature
@@ -0,0 +1 @@
+Remove old rows from the `cache_invalidation_stream_by_instance` table automatically (this table is unused in SQLite).
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index c940f864d1..2fbd389c71 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -18,6 +18,8 @@ import logging
 from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Tuple
 
 from synapse.api.constants import EventTypes
+from synapse.config._base import Config
+from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.replication.tcp.streams import BackfillStream, CachesStream
 from synapse.replication.tcp.streams.events import (
     EventsStream,
@@ -52,6 +54,21 @@ PURGE_HISTORY_CACHE_NAME = "ph_cache_fake"
 # As above, but for invalidating room caches on room deletion
 DELETE_ROOM_CACHE_NAME = "dr_cache_fake"
 
+# How long between cache invalidation table cleanups, once we have caught up
+# with the backlog.
+REGULAR_CLEANUP_INTERVAL_MS = Config.parse_duration("1h")
+
+# How long between cache invalidation table cleanups, before we have caught
+# up with the backlog.
+CATCH_UP_CLEANUP_INTERVAL_MS = Config.parse_duration("1m")
+
+# Maximum number of cache invalidation rows to delete at once.
+CLEAN_UP_MAX_BATCH_SIZE = 20_000
+
+# Keep cache invalidations for 7 days
+# (This is likely to be quite excessive.)
+RETENTION_PERIOD_OF_CACHE_INVALIDATIONS_MS = Config.parse_duration("7d")
+
 
 class CacheInvalidationWorkerStore(SQLBaseStore):
     def __init__(
@@ -98,6 +115,18 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         else:
             self._cache_id_gen = None
 
+        # Occasionally clean up the cache invalidations stream table by deleting
+        # old rows.
+        # This is only applicable when Postgres is in use; this table is unused
+        # and not populated at all when SQLite is the active database engine.
+        if hs.config.worker.run_background_tasks and isinstance(
+            self.database_engine, PostgresEngine
+        ):
+            self.hs.get_clock().call_later(
+                CATCH_UP_CLEANUP_INTERVAL_MS / 1000,
+                self._clean_up_cache_invalidation_wrapper,
+            )
+
     async def get_all_updated_caches(
         self, instance_name: str, last_id: int, current_id: int, limit: int
     ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
@@ -554,3 +583,104 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
             return self._cache_id_gen.get_current_token_for_writer(instance_name)
         else:
             return 0
+
+    @wrap_as_background_process("clean_up_old_cache_invalidations")
+    async def _clean_up_cache_invalidation_wrapper(self) -> None:
+        """
+        Clean up cache invalidation stream table entries occasionally.
+        If we are behind (i.e. there are entries old enough to
+        be deleted but too many of them to be deleted in one go),
+        then we run slightly more frequently.
+        """
+        delete_up_to: int = (
+            self.hs.get_clock().time_msec() - RETENTION_PERIOD_OF_CACHE_INVALIDATIONS_MS
+        )
+
+        in_backlog = await self._clean_up_batch_of_old_cache_invalidations(delete_up_to)
+
+        # Vary how long we wait before calling again depending on whether we
+        # are still sifting through backlog or we have caught up.
+        if in_backlog:
+            next_interval = CATCH_UP_CLEANUP_INTERVAL_MS
+        else:
+            next_interval = REGULAR_CLEANUP_INTERVAL_MS
+
+        self.hs.get_clock().call_later(
+            next_interval / 1000, self._clean_up_cache_invalidation_wrapper
+        )
+
+    async def _clean_up_batch_of_old_cache_invalidations(
+        self, delete_up_to_millisec: int
+    ) -> bool:
+        """
+        Remove old rows from the `cache_invalidation_stream_by_instance` table automatically (this table is unused in SQLite).
+
+        Up to `CLEAN_UP_BATCH_SIZE` rows will be deleted at once.
+
+        Returns true if and only if we were limited by batch size (i.e. we are in backlog:
+        there are more things to clean up).
+        """
+
+        def _clean_up_batch_of_old_cache_invalidations_txn(
+            txn: LoggingTransaction,
+        ) -> bool:
+            # First get the earliest stream ID
+            txn.execute(
+                """
+                SELECT stream_id FROM cache_invalidation_stream_by_instance
+                ORDER BY stream_id ASC
+                LIMIT 1
+                """
+            )
+            row = txn.fetchone()
+            if row is None:
+                return False
+            earliest_stream_id: int = row[0]
+
+            # Then find the last stream ID of the range we will delete
+            txn.execute(
+                """
+                SELECT stream_id FROM cache_invalidation_stream_by_instance
+                WHERE stream_id <= ? AND invalidation_ts <= ?
+                ORDER BY stream_id DESC
+                LIMIT 1
+                """,
+                (earliest_stream_id + CLEAN_UP_MAX_BATCH_SIZE, delete_up_to_millisec),
+            )
+            row = txn.fetchone()
+            if row is None:
+                return False
+            cutoff_stream_id: int = row[0]
+
+            # Determine whether we are caught up or still catching up
+            txn.execute(
+                """
+                SELECT invalidation_ts FROM cache_invalidation_stream_by_instance
+                WHERE stream_id > ?
+                ORDER BY stream_id ASC
+                LIMIT 1
+                """,
+                (cutoff_stream_id,),
+            )
+            row = txn.fetchone()
+            if row is None:
+                in_backlog = False
+            else:
+                # We are in backlog if the next row could have been deleted
+                # if we didn't have such a small batch size
+                in_backlog = row[0] <= delete_up_to_millisec
+
+            txn.execute(
+                """
+                DELETE FROM cache_invalidation_stream_by_instance
+                WHERE ? <= stream_id AND stream_id <= ?
+                """,
+                (earliest_stream_id, cutoff_stream_id),
+            )
+
+            return in_backlog
+
+        return await self.db_pool.runInteraction(
+            "clean_up_old_cache_invalidations",
+            _clean_up_batch_of_old_cache_invalidations_txn,
+        )
-- 
cgit 1.5.1


From a476d5048b96d6f9422f3d31d3c14a5247855715 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Tue, 8 Aug 2023 10:53:49 +0000
Subject: Allow modules to schedule delayed background calls. (#15993)

* Add a module API function to provide `call_later`

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Add comments

* Update version number

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/15993.misc         |  1 +
 synapse/module_api/__init__.py | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 changelog.d/15993.misc

(limited to 'synapse')

diff --git a/changelog.d/15993.misc b/changelog.d/15993.misc
new file mode 100644
index 0000000000..35ead05157
--- /dev/null
+++ b/changelog.d/15993.misc
@@ -0,0 +1 @@
+Allow modules to schedule delayed background calls.
\ No newline at end of file
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index ba1a925003..acee1dafd3 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -34,6 +34,7 @@ import jinja2
 from typing_extensions import ParamSpec
 
 from twisted.internet import defer
+from twisted.internet.interfaces import IDelayedCall
 from twisted.web.resource import Resource
 
 from synapse.api import errors
@@ -1242,6 +1243,46 @@ class ModuleApi:
         """
         return self._hs.config.worker.run_background_tasks
 
+    def delayed_background_call(
+        self,
+        msec: float,
+        f: Callable,
+        *args: object,
+        desc: Optional[str] = None,
+        **kwargs: object,
+    ) -> IDelayedCall:
+        """Wraps a function as a background process and calls it in a given number of milliseconds.
+
+        The scheduled call is not persistent: if the current Synapse instance is
+        restarted before the call is made, the call will not be made.
+
+        Added in Synapse v1.90.0.
+
+        Args:
+            msec: How long to wait before calling, in milliseconds.
+            f: The function to call once. f can be either synchronous or
+                asynchronous, and must follow Synapse's logcontext rules.
+                More info about logcontexts is available at
+                https://matrix-org.github.io/synapse/latest/log_contexts.html
+            *args: Positional arguments to pass to function.
+            desc: The background task's description. Default to the function's name.
+            **kwargs: Keyword arguments to pass to function.
+
+        Returns:
+            IDelayedCall handle from twisted, which allows to cancel the delayed call if desired.
+        """
+
+        if desc is None:
+            desc = f.__name__
+
+        return self._clock.call_later(
+            # convert ms to seconds as needed by call_later.
+            msec * 0.001,
+            run_as_background_process,
+            desc,
+            lambda: maybe_awaitable(f(*args, **kwargs)),
+        )
+
     async def sleep(self, seconds: float) -> None:
         """Sleeps for the given number of seconds.
 
-- 
cgit 1.5.1


From 0328b56468fe12c4d86ef636b60964527a510160 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 8 Aug 2023 12:04:46 -0700
Subject: Support MSC3814: Dehydrated Devices Part 2 (#16010)

---
 changelog.d/16010.misc                            |   1 +
 synapse/handlers/device.py                        |  14 +-
 synapse/handlers/devicemessage.py                 |  13 --
 synapse/rest/client/devices.py                    |  16 +-
 synapse/storage/databases/main/devices.py         |  51 ++++++-
 synapse/storage/databases/main/end_to_end_keys.py | 170 ++++++++++++++--------
 tests/handlers/test_device.py                     |   9 +-
 tests/rest/client/test_devices.py                 |  77 +++++++++-
 8 files changed, 254 insertions(+), 97 deletions(-)
 create mode 100644 changelog.d/16010.misc

(limited to 'synapse')

diff --git a/changelog.d/16010.misc b/changelog.d/16010.misc
new file mode 100644
index 0000000000..1e1a148069
--- /dev/null
+++ b/changelog.d/16010.misc
@@ -0,0 +1 @@
+Update dehydrated devices implementation.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index b7bf70a72d..5ae427d52c 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -385,6 +385,7 @@ class DeviceHandler(DeviceWorkerHandler):
         self.federation_sender = hs.get_federation_sender()
         self._account_data_handler = hs.get_account_data_handler()
         self._storage_controllers = hs.get_storage_controllers()
+        self.db_pool = hs.get_datastores().main.db_pool
 
         self.device_list_updater = DeviceListUpdater(hs, self)
 
@@ -656,15 +657,17 @@ class DeviceHandler(DeviceWorkerHandler):
         device_id: Optional[str],
         device_data: JsonDict,
         initial_device_display_name: Optional[str] = None,
+        keys_for_device: Optional[JsonDict] = None,
     ) -> str:
-        """Store a dehydrated device for a user.  If the user had a previous
-        dehydrated device, it is removed.
+        """Store a dehydrated device for a user, optionally storing the keys associated with
+        it as well.  If the user had a previous dehydrated device, it is removed.
 
         Args:
             user_id: the user that we are storing the device for
             device_id: device id supplied by client
             device_data: the dehydrated device information
             initial_device_display_name: The display name to use for the device
+            keys_for_device: keys for the dehydrated device
         Returns:
             device id of the dehydrated device
         """
@@ -673,11 +676,16 @@ class DeviceHandler(DeviceWorkerHandler):
             device_id,
             initial_device_display_name,
         )
+
+        time_now = self.clock.time_msec()
+
         old_device_id = await self.store.store_dehydrated_device(
-            user_id, device_id, device_data
+            user_id, device_id, device_data, time_now, keys_for_device
         )
+
         if old_device_id is not None:
             await self.delete_devices(user_id, [old_device_id])
+
         return device_id
 
     async def rehydrate_device(
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 15e94a03cb..17ff8821d9 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -367,19 +367,6 @@ class DeviceMessageHandler:
                     errcode=Codes.INVALID_PARAM,
                 )
 
-            # if we have a since token, delete any to-device messages before that token
-            # (since we now know that the device has received them)
-            deleted = await self.store.delete_messages_for_device(
-                user_id, device_id, since_stream_id
-            )
-            logger.debug(
-                "Deleted %d to-device messages up to %d for user_id %s device_id %s",
-                deleted,
-                since_stream_id,
-                user_id,
-                device_id,
-            )
-
         to_token = self.event_sources.get_current_token().to_device_key
 
         messages, stream_id = await self.store.get_messages_for_device(
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 51f17f80da..925f037743 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -29,7 +29,6 @@ from synapse.http.servlet import (
     parse_integer,
 )
 from synapse.http.site import SynapseRequest
-from synapse.replication.http.devices import ReplicationUploadKeysForUserRestServlet
 from synapse.rest.client._base import client_patterns, interactive_auth_handler
 from synapse.rest.client.models import AuthenticationData
 from synapse.rest.models import RequestBodyModel
@@ -480,13 +479,6 @@ class DehydratedDeviceV2Servlet(RestServlet):
         self.e2e_keys_handler = hs.get_e2e_keys_handler()
         self.device_handler = handler
 
-        if hs.config.worker.worker_app is None:
-            # if main process
-            self.key_uploader = self.e2e_keys_handler.upload_keys_for_user
-        else:
-            # then a worker
-            self.key_uploader = ReplicationUploadKeysForUserRestServlet.make_client(hs)
-
     async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
@@ -549,18 +541,12 @@ class DehydratedDeviceV2Servlet(RestServlet):
                 "Device key(s) not found, these must be provided.",
             )
 
-        # TODO: Those two operations, creating a device and storing the
-        # device's keys should be atomic.
         device_id = await self.device_handler.store_dehydrated_device(
             requester.user.to_string(),
             submission.device_id,
             submission.device_data.dict(),
             submission.initial_device_display_name,
-        )
-
-        # TODO: Do we need to do something with the result here?
-        await self.key_uploader(
-            user_id=user_id, device_id=submission.device_id, keys=submission.dict()
+            device_info,
         )
 
         return 200, {"device_id": device_id}
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index d9df437e51..e4162f846b 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -28,6 +28,7 @@ from typing import (
     cast,
 )
 
+from canonicaljson import encode_canonical_json
 from typing_extensions import Literal
 
 from synapse.api.constants import EduTypes
@@ -1188,8 +1189,42 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         )
 
     def _store_dehydrated_device_txn(
-        self, txn: LoggingTransaction, user_id: str, device_id: str, device_data: str
+        self,
+        txn: LoggingTransaction,
+        user_id: str,
+        device_id: str,
+        device_data: str,
+        time: int,
+        keys: Optional[JsonDict] = None,
     ) -> Optional[str]:
+        # TODO: make keys non-optional once support for msc2697 is dropped
+        if keys:
+            device_keys = keys.get("device_keys", None)
+            if device_keys:
+                # Type ignore - this function is defined on EndToEndKeyStore which we do
+                # have access to due to hs.get_datastore() "magic"
+                self._set_e2e_device_keys_txn(  # type: ignore[attr-defined]
+                    txn, user_id, device_id, time, device_keys
+                )
+
+            one_time_keys = keys.get("one_time_keys", None)
+            if one_time_keys:
+                key_list = []
+                for key_id, key_obj in one_time_keys.items():
+                    algorithm, key_id = key_id.split(":")
+                    key_list.append(
+                        (
+                            algorithm,
+                            key_id,
+                            encode_canonical_json(key_obj).decode("ascii"),
+                        )
+                    )
+                self._add_e2e_one_time_keys_txn(txn, user_id, device_id, time, key_list)
+
+            fallback_keys = keys.get("fallback_keys", None)
+            if fallback_keys:
+                self._set_e2e_fallback_keys_txn(txn, user_id, device_id, fallback_keys)
+
         old_device_id = self.db_pool.simple_select_one_onecol_txn(
             txn,
             table="dehydrated_devices",
@@ -1203,10 +1238,16 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             keyvalues={"user_id": user_id},
             values={"device_id": device_id, "device_data": device_data},
         )
+
         return old_device_id
 
     async def store_dehydrated_device(
-        self, user_id: str, device_id: str, device_data: JsonDict
+        self,
+        user_id: str,
+        device_id: str,
+        device_data: JsonDict,
+        time_now: int,
+        keys: Optional[dict] = None,
     ) -> Optional[str]:
         """Store a dehydrated device for a user.
 
@@ -1214,15 +1255,21 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             user_id: the user that we are storing the device for
             device_id: the ID of the dehydrated device
             device_data: the dehydrated device information
+            time_now: current time at the request in milliseconds
+            keys: keys for the dehydrated device
+
         Returns:
             device id of the user's previous dehydrated device, if any
         """
+
         return await self.db_pool.runInteraction(
             "store_dehydrated_device_txn",
             self._store_dehydrated_device_txn,
             user_id,
             device_id,
             json_encoder.encode(device_data),
+            time_now,
+            keys,
         )
 
     async def remove_dehydrated_device(self, user_id: str, device_id: str) -> bool:
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index 91ae9c457d..b49dea577c 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -522,36 +522,57 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             new_keys: keys to add - each a tuple of (algorithm, key_id, key json)
         """
 
-        def _add_e2e_one_time_keys(txn: LoggingTransaction) -> None:
-            set_tag("user_id", user_id)
-            set_tag("device_id", device_id)
-            set_tag("new_keys", str(new_keys))
-            # We are protected from race between lookup and insertion due to
-            # a unique constraint. If there is a race of two calls to
-            # `add_e2e_one_time_keys` then they'll conflict and we will only
-            # insert one set.
-            self.db_pool.simple_insert_many_txn(
-                txn,
-                table="e2e_one_time_keys_json",
-                keys=(
-                    "user_id",
-                    "device_id",
-                    "algorithm",
-                    "key_id",
-                    "ts_added_ms",
-                    "key_json",
-                ),
-                values=[
-                    (user_id, device_id, algorithm, key_id, time_now, json_bytes)
-                    for algorithm, key_id, json_bytes in new_keys
-                ],
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.count_e2e_one_time_keys, (user_id, device_id)
-            )
-
         await self.db_pool.runInteraction(
-            "add_e2e_one_time_keys_insert", _add_e2e_one_time_keys
+            "add_e2e_one_time_keys_insert",
+            self._add_e2e_one_time_keys_txn,
+            user_id,
+            device_id,
+            time_now,
+            new_keys,
+        )
+
+    def _add_e2e_one_time_keys_txn(
+        self,
+        txn: LoggingTransaction,
+        user_id: str,
+        device_id: str,
+        time_now: int,
+        new_keys: Iterable[Tuple[str, str, str]],
+    ) -> None:
+        """Insert some new one time keys for a device. Errors if any of the keys already exist.
+
+        Args:
+             user_id: id of user to get keys for
+             device_id: id of device to get keys for
+             time_now: insertion time to record (ms since epoch)
+             new_keys: keys to add - each a tuple of (algorithm, key_id, key json) - note
+             that the key JSON must be in canonical JSON form
+        """
+        set_tag("user_id", user_id)
+        set_tag("device_id", device_id)
+        set_tag("new_keys", str(new_keys))
+        # We are protected from race between lookup and insertion due to
+        # a unique constraint. If there is a race of two calls to
+        # `add_e2e_one_time_keys` then they'll conflict and we will only
+        # insert one set.
+        self.db_pool.simple_insert_many_txn(
+            txn,
+            table="e2e_one_time_keys_json",
+            keys=(
+                "user_id",
+                "device_id",
+                "algorithm",
+                "key_id",
+                "ts_added_ms",
+                "key_json",
+            ),
+            values=[
+                (user_id, device_id, algorithm, key_id, time_now, json_bytes)
+                for algorithm, key_id, json_bytes in new_keys
+            ],
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.count_e2e_one_time_keys, (user_id, device_id)
         )
 
     @cached(max_entries=10000)
@@ -723,6 +744,14 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         device_id: str,
         fallback_keys: JsonDict,
     ) -> None:
+        """Set the user's e2e fallback keys.
+
+        Args:
+            user_id: the user whose keys are being set
+            device_id: the device whose keys are being set
+            fallback_keys: the keys to set.  This is a map from key ID (which is
+                    of the form "algorithm:id") to key data.
+        """
         # fallback_keys will usually only have one item in it, so using a for
         # loop (as opposed to calling simple_upsert_many_txn) won't be too bad
         # FIXME: make sure that only one key per algorithm is uploaded
@@ -1304,42 +1333,69 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
     ) -> bool:
         """Stores device keys for a device. Returns whether there was a change
         or the keys were already in the database.
+
+            Args:
+                user_id: user_id of the user to store keys for
+                device_id: device_id of the device to store keys for
+                time_now: time at the request to store the keys
+                device_keys: the keys to store
         """
 
-        def _set_e2e_device_keys_txn(txn: LoggingTransaction) -> bool:
-            set_tag("user_id", user_id)
-            set_tag("device_id", device_id)
-            set_tag("time_now", time_now)
-            set_tag("device_keys", str(device_keys))
+        return await self.db_pool.runInteraction(
+            "set_e2e_device_keys",
+            self._set_e2e_device_keys_txn,
+            user_id,
+            device_id,
+            time_now,
+            device_keys,
+        )
 
-            old_key_json = self.db_pool.simple_select_one_onecol_txn(
-                txn,
-                table="e2e_device_keys_json",
-                keyvalues={"user_id": user_id, "device_id": device_id},
-                retcol="key_json",
-                allow_none=True,
-            )
+    def _set_e2e_device_keys_txn(
+        self,
+        txn: LoggingTransaction,
+        user_id: str,
+        device_id: str,
+        time_now: int,
+        device_keys: JsonDict,
+    ) -> bool:
+        """Stores device keys for a device. Returns whether there was a change
+        or the keys were already in the database.
 
-            # In py3 we need old_key_json to match new_key_json type. The DB
-            # returns unicode while encode_canonical_json returns bytes.
-            new_key_json = encode_canonical_json(device_keys).decode("utf-8")
+        Args:
+             user_id: user_id of the user to store keys for
+             device_id: device_id of the device to store keys for
+             time_now: time at the request to store the keys
+             device_keys: the keys to store
+        """
+        set_tag("user_id", user_id)
+        set_tag("device_id", device_id)
+        set_tag("time_now", time_now)
+        set_tag("device_keys", str(device_keys))
+
+        old_key_json = self.db_pool.simple_select_one_onecol_txn(
+            txn,
+            table="e2e_device_keys_json",
+            keyvalues={"user_id": user_id, "device_id": device_id},
+            retcol="key_json",
+            allow_none=True,
+        )
 
-            if old_key_json == new_key_json:
-                log_kv({"Message": "Device key already stored."})
-                return False
+        # In py3 we need old_key_json to match new_key_json type. The DB
+        # returns unicode while encode_canonical_json returns bytes.
+        new_key_json = encode_canonical_json(device_keys).decode("utf-8")
 
-            self.db_pool.simple_upsert_txn(
-                txn,
-                table="e2e_device_keys_json",
-                keyvalues={"user_id": user_id, "device_id": device_id},
-                values={"ts_added_ms": time_now, "key_json": new_key_json},
-            )
-            log_kv({"message": "Device keys stored."})
-            return True
+        if old_key_json == new_key_json:
+            log_kv({"Message": "Device key already stored."})
+            return False
 
-        return await self.db_pool.runInteraction(
-            "set_e2e_device_keys", _set_e2e_device_keys_txn
+        self.db_pool.simple_upsert_txn(
+            txn,
+            table="e2e_device_keys_json",
+            keyvalues={"user_id": user_id, "device_id": device_id},
+            values={"ts_added_ms": time_now, "key_json": new_key_json},
         )
+        log_kv({"message": "Device keys stored."})
+        return True
 
     async def delete_e2e_keys_by_device(self, user_id: str, device_id: str) -> None:
         def delete_e2e_keys_by_device_txn(txn: LoggingTransaction) -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index 647ee09279..e1e58fa6e6 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -566,15 +566,16 @@ class DehydrationTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(res["events"]), 1)
         self.assertEqual(res["events"][0]["content"]["body"], "foo")
 
-        # Fetch the message of the dehydrated device again, which should return nothing
-        # and delete the old messages
+        # Fetch the message of the dehydrated device again, which should return
+        # the same message as it has not been deleted
         res = self.get_success(
             self.message_handler.get_events_for_dehydrated_device(
                 requester=requester,
                 device_id=stored_dehydrated_device_id,
-                since_token=res["next_batch"],
+                since_token=None,
                 limit=10,
             )
         )
         self.assertTrue(len(res["next_batch"]) > 1)
-        self.assertEqual(len(res["events"]), 0)
+        self.assertEqual(len(res["events"]), 1)
+        self.assertEqual(res["events"][0]["content"]["body"], "foo")
diff --git a/tests/rest/client/test_devices.py b/tests/rest/client/test_devices.py
index 3cf29c10ea..60099f8c59 100644
--- a/tests/rest/client/test_devices.py
+++ b/tests/rest/client/test_devices.py
@@ -20,7 +20,7 @@ from synapse.api.errors import NotFoundError
 from synapse.rest import admin, devices, room, sync
 from synapse.rest.client import account, keys, login, register
 from synapse.server import HomeServer
-from synapse.types import JsonDict, create_requester
+from synapse.types import JsonDict, UserID, create_requester
 from synapse.util import Clock
 
 from tests import unittest
@@ -282,6 +282,17 @@ class DehydratedDeviceTestCase(unittest.HomeserverTestCase):
                     "<user_id>": {"<algorithm>:<device_id>": "<signature_base64>"}
                 },
             },
+            "fallback_keys": {
+                "alg1:device1": "f4llb4ckk3y",
+                "signed_<algorithm>:<device_id>": {
+                    "fallback": "true",
+                    "key": "f4llb4ckk3y",
+                    "signatures": {
+                        "<user_id>": {"<algorithm>:<device_id>": "<key_base64>"}
+                    },
+                },
+            },
+            "one_time_keys": {"alg1:k1": "0net1m3k3y"},
         }
         channel = self.make_request(
             "PUT",
@@ -312,6 +323,55 @@ class DehydratedDeviceTestCase(unittest.HomeserverTestCase):
         }
         self.assertEqual(device_data, expected_device_data)
 
+        # test that the keys are correctly uploaded
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/r0/keys/query",
+            {
+                "device_keys": {
+                    user: ["device1"],
+                },
+            },
+            token,
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(
+            channel.json_body["device_keys"][user][device_id]["keys"],
+            content["device_keys"]["keys"],
+        )
+        # first claim should return the onetime key we uploaded
+        res = self.get_success(
+            self.hs.get_e2e_keys_handler().claim_one_time_keys(
+                {user: {device_id: {"alg1": 1}}},
+                UserID.from_string(user),
+                timeout=None,
+                always_include_fallback_keys=False,
+            )
+        )
+        self.assertEqual(
+            res,
+            {
+                "failures": {},
+                "one_time_keys": {user: {device_id: {"alg1:k1": "0net1m3k3y"}}},
+            },
+        )
+        # second claim should return fallback key
+        res2 = self.get_success(
+            self.hs.get_e2e_keys_handler().claim_one_time_keys(
+                {user: {device_id: {"alg1": 1}}},
+                UserID.from_string(user),
+                timeout=None,
+                always_include_fallback_keys=False,
+            )
+        )
+        self.assertEqual(
+            res2,
+            {
+                "failures": {},
+                "one_time_keys": {user: {device_id: {"alg1:device1": "f4llb4ckk3y"}}},
+            },
+        )
+
         # create another device for the user
         (
             new_device_id,
@@ -348,10 +408,21 @@ class DehydratedDeviceTestCase(unittest.HomeserverTestCase):
         self.assertEqual(channel.code, 200)
         expected_content = {"body": "test_message"}
         self.assertEqual(channel.json_body["events"][0]["content"], expected_content)
+
+        # fetch messages again and make sure that the message was not deleted
+        channel = self.make_request(
+            "POST",
+            f"_matrix/client/unstable/org.matrix.msc3814.v1/dehydrated_device/{device_id}/events",
+            content={},
+            access_token=token,
+            shorthand=False,
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(channel.json_body["events"][0]["content"], expected_content)
         next_batch_token = channel.json_body.get("next_batch")
 
-        # fetch messages again and make sure that the message was deleted and we are returned an
-        # empty array
+        # make sure fetching messages with next batch token works - there are no unfetched
+        # messages so we should receive an empty array
         content = {"next_batch": next_batch_token}
         channel = self.make_request(
             "POST",
-- 
cgit 1.5.1


From dac97642e41f3f4bc0deff0c80b6a3f7acb4dbc0 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Thu, 10 Aug 2023 11:10:55 +0200
Subject: Implements admin API to lock an user (MSC3939) (#15870)

---
 changelog.d/15870.feature                          |   1 +
 docs/admin_api/user_admin_api.md                   |   1 +
 docs/usage/configuration/config_documentation.md   |   2 +
 synapse/_scripts/synapse_port_db.py                |   2 +-
 synapse/api/auth/__init__.py                       |   1 +
 synapse/api/auth/internal.py                       |  15 ++-
 synapse/api/auth/msc3861_delegated.py              |  13 +++
 synapse/api/errors.py                              |   2 +
 synapse/config/user_directory.py                   |   1 +
 synapse/handlers/admin.py                          |   1 +
 synapse/handlers/user_directory.py                 |   5 +-
 synapse/rest/admin/users.py                        |  17 ++++
 synapse/rest/client/logout.py                      |   8 +-
 synapse/storage/databases/main/registration.py     |  62 +++++++++++-
 synapse/storage/databases/main/user_directory.py   |  11 +-
 .../schema/main/delta/80/01_users_alter_locked.sql |  16 +++
 tests/api/test_auth.py                             |   3 +
 tests/rest/admin/test_user.py                      | 111 ++++++++++++++++++++-
 tests/storage/test_registration.py                 |   1 +
 19 files changed, 262 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/15870.feature
 create mode 100644 synapse/storage/schema/main/delta/80/01_users_alter_locked.sql

(limited to 'synapse')

diff --git a/changelog.d/15870.feature b/changelog.d/15870.feature
new file mode 100644
index 0000000000..527220d637
--- /dev/null
+++ b/changelog.d/15870.feature
@@ -0,0 +1 @@
+Implements an admin API to lock an user without deactivating them. Based on [MSC3939](https://github.com/matrix-org/matrix-spec-proposals/pull/3939).
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index ac4f635099..c269ce6af0 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -146,6 +146,7 @@ Body parameters:
 - `admin` - **bool**, optional, defaults to `false`. Whether the user is a homeserver administrator,
   granting them access to the Admin API, among other things.
 - `deactivated` - **bool**, optional. If unspecified, deactivation state will be left unchanged.
+- `locked` - **bool**, optional. If unspecified, locked state will be left unchanged.
 
   Note: the `password` field must also be set if both of the following are true:
   - `deactivated` is set to `false` and the user was previously deactivated (you are reactivating this user)
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 2987c9332d..a17a8c2900 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3631,6 +3631,7 @@ This option has the following sub-options:
 * `prefer_local_users`: Defines whether to prefer local users in search query results.
    If set to true, local users are more likely to appear above remote users when searching the
    user directory. Defaults to false.
+* `show_locked_users`: Defines whether to show locked users in search query results. Defaults to false.
 
 Example configuration:
 ```yaml
@@ -3638,6 +3639,7 @@ user_directory:
     enabled: false
     search_all_users: true
     prefer_local_users: true
+    show_locked_users: true
 ```
 ---
 ### `user_consent`
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 22c84fbd5b..1300aaf63c 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -123,7 +123,7 @@ BOOLEAN_COLUMNS = {
     "redactions": ["have_censored"],
     "room_stats_state": ["is_federatable"],
     "rooms": ["is_public", "has_auth_chain_index"],
-    "users": ["shadow_banned", "approved"],
+    "users": ["shadow_banned", "approved", "locked"],
     "un_partial_stated_event_stream": ["rejection_status_changed"],
     "users_who_share_rooms": ["share_private"],
     "per_user_experimental_features": ["enabled"],
diff --git a/synapse/api/auth/__init__.py b/synapse/api/auth/__init__.py
index 90cfe39d76..bb3f50f2dd 100644
--- a/synapse/api/auth/__init__.py
+++ b/synapse/api/auth/__init__.py
@@ -60,6 +60,7 @@ class Auth(Protocol):
         request: SynapseRequest,
         allow_guest: bool = False,
         allow_expired: bool = False,
+        allow_locked: bool = False,
     ) -> Requester:
         """Get a registered user's ID.
 
diff --git a/synapse/api/auth/internal.py b/synapse/api/auth/internal.py
index e2ae198b19..6a5fd44ec0 100644
--- a/synapse/api/auth/internal.py
+++ b/synapse/api/auth/internal.py
@@ -58,6 +58,7 @@ class InternalAuth(BaseAuth):
         request: SynapseRequest,
         allow_guest: bool = False,
         allow_expired: bool = False,
+        allow_locked: bool = False,
     ) -> Requester:
         """Get a registered user's ID.
 
@@ -79,7 +80,7 @@ class InternalAuth(BaseAuth):
         parent_span = active_span()
         with start_active_span("get_user_by_req"):
             requester = await self._wrapped_get_user_by_req(
-                request, allow_guest, allow_expired
+                request, allow_guest, allow_expired, allow_locked
             )
 
             if parent_span:
@@ -107,6 +108,7 @@ class InternalAuth(BaseAuth):
         request: SynapseRequest,
         allow_guest: bool,
         allow_expired: bool,
+        allow_locked: bool,
     ) -> Requester:
         """Helper for get_user_by_req
 
@@ -126,6 +128,17 @@ class InternalAuth(BaseAuth):
                     access_token, allow_expired=allow_expired
                 )
 
+                # Deny the request if the user account is locked.
+                if not allow_locked and await self.store.get_user_locked_status(
+                    requester.user.to_string()
+                ):
+                    raise AuthError(
+                        401,
+                        "User account has been locked",
+                        errcode=Codes.USER_LOCKED,
+                        additional_fields={"soft_logout": True},
+                    )
+
                 # Deny the request if the user account has expired.
                 # This check is only done for regular users, not appservice ones.
                 if not allow_expired:
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index bd4fc9c0ee..9524102a30 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -27,6 +27,7 @@ from twisted.web.http_headers import Headers
 from synapse.api.auth.base import BaseAuth
 from synapse.api.errors import (
     AuthError,
+    Codes,
     HttpResponseException,
     InvalidClientTokenError,
     OAuthInsufficientScopeError,
@@ -196,6 +197,7 @@ class MSC3861DelegatedAuth(BaseAuth):
         request: SynapseRequest,
         allow_guest: bool = False,
         allow_expired: bool = False,
+        allow_locked: bool = False,
     ) -> Requester:
         access_token = self.get_access_token_from_request(request)
 
@@ -205,6 +207,17 @@ class MSC3861DelegatedAuth(BaseAuth):
             # so that we don't provision the user if they don't have enough permission:
             requester = await self.get_user_by_access_token(access_token, allow_expired)
 
+            # Deny the request if the user account is locked.
+            if not allow_locked and await self.store.get_user_locked_status(
+                requester.user.to_string()
+            ):
+                raise AuthError(
+                    401,
+                    "User account has been locked",
+                    errcode=Codes.USER_LOCKED,
+                    additional_fields={"soft_logout": True},
+                )
+
         if not allow_guest and requester.is_guest:
             raise OAuthInsufficientScopeError([SCOPE_MATRIX_API])
 
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 3546aaf7c3..7ffd72c42c 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -80,6 +80,8 @@ class Codes(str, Enum):
     WEAK_PASSWORD = "M_WEAK_PASSWORD"
     INVALID_SIGNATURE = "M_INVALID_SIGNATURE"
     USER_DEACTIVATED = "M_USER_DEACTIVATED"
+    # USER_LOCKED = "M_USER_LOCKED"
+    USER_LOCKED = "ORG_MATRIX_MSC3939_USER_LOCKED"
 
     # Part of MSC3848
     # https://github.com/matrix-org/matrix-spec-proposals/pull/3848
diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py
index c9e18b91e9..f60ec2ea66 100644
--- a/synapse/config/user_directory.py
+++ b/synapse/config/user_directory.py
@@ -35,3 +35,4 @@ class UserDirectoryConfig(Config):
         self.user_directory_search_prefer_local_users = user_directory_config.get(
             "prefer_local_users", False
         )
+        self.show_locked_users = user_directory_config.get("show_locked_users", False)
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index 119c7f8384..0e812a6d8b 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -67,6 +67,7 @@ class AdminHandler:
             "name",
             "admin",
             "deactivated",
+            "locked",
             "shadow_banned",
             "creation_ts",
             "appservice_id",
diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py
index 05197edc95..a0f5568000 100644
--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@@ -94,6 +94,7 @@ class UserDirectoryHandler(StateDeltasHandler):
         self.is_mine_id = hs.is_mine_id
         self.update_user_directory = hs.config.worker.should_update_user_directory
         self.search_all_users = hs.config.userdirectory.user_directory_search_all_users
+        self.show_locked_users = hs.config.userdirectory.show_locked_users
         self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
         self._hs = hs
 
@@ -144,7 +145,9 @@ class UserDirectoryHandler(StateDeltasHandler):
                     ]
                 }
         """
-        results = await self.store.search_user_dir(user_id, search_term, limit)
+        results = await self.store.search_user_dir(
+            user_id, search_term, limit, self.show_locked_users
+        )
 
         # Remove any spammy users from the results.
         non_spammy_users = []
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index e0257daa75..04d9ef25b7 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -280,6 +280,17 @@ class UserRestServletV2(RestServlet):
                 HTTPStatus.BAD_REQUEST, "'deactivated' parameter is not of type boolean"
             )
 
+        lock = body.get("locked", False)
+        if not isinstance(lock, bool):
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST, "'locked' parameter is not of type boolean"
+            )
+
+        if deactivate and lock:
+            raise SynapseError(
+                HTTPStatus.BAD_REQUEST, "An user can't be deactivated and locked"
+            )
+
         approved: Optional[bool] = None
         if "approved" in body and self._msc3866_enabled:
             approved = body["approved"]
@@ -397,6 +408,12 @@ class UserRestServletV2(RestServlet):
                         target_user.to_string()
                     )
 
+            if "locked" in body:
+                if lock and not user["locked"]:
+                    await self.store.set_user_locked_status(user_id, True)
+                elif not lock and user["locked"]:
+                    await self.store.set_user_locked_status(user_id, False)
+
             if "user_type" in body:
                 await self.store.set_user_type(target_user, user_type)
 
diff --git a/synapse/rest/client/logout.py b/synapse/rest/client/logout.py
index 94ad90942f..2e104d4888 100644
--- a/synapse/rest/client/logout.py
+++ b/synapse/rest/client/logout.py
@@ -40,7 +40,9 @@ class LogoutRestServlet(RestServlet):
         self._device_handler = handler
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_expired=True)
+        requester = await self.auth.get_user_by_req(
+            request, allow_expired=True, allow_locked=True
+        )
 
         if requester.device_id is None:
             # The access token wasn't associated with a device.
@@ -67,7 +69,9 @@ class LogoutAllRestServlet(RestServlet):
         self._device_handler = handler
 
     async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
-        requester = await self.auth.get_user_by_req(request, allow_expired=True)
+        requester = await self.auth.get_user_by_req(
+            request, allow_expired=True, allow_locked=True
+        )
         user_id = requester.user.to_string()
 
         # first delete all of the user's devices
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index c582cf0573..d3a01d526f 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -205,7 +205,8 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
                     name, password_hash, is_guest, admin, consent_version, consent_ts,
                     consent_server_notice_sent, appservice_id, creation_ts, user_type,
                     deactivated, COALESCE(shadow_banned, FALSE) AS shadow_banned,
-                    COALESCE(approved, TRUE) AS approved
+                    COALESCE(approved, TRUE) AS approved,
+                    COALESCE(locked, FALSE) AS locked
                 FROM users
                 WHERE name = ?
                 """,
@@ -230,10 +231,15 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             # want to make sure we're returning the right type of data.
             # Note: when adding a column name to this list, be wary of NULLable columns,
             # since NULL values will be turned into False.
-            boolean_columns = ["admin", "deactivated", "shadow_banned", "approved"]
+            boolean_columns = [
+                "admin",
+                "deactivated",
+                "shadow_banned",
+                "approved",
+                "locked",
+            ]
             for column in boolean_columns:
-                if not isinstance(row[column], bool):
-                    row[column] = bool(row[column])
+                row[column] = bool(row[column])
 
         return row
 
@@ -1116,6 +1122,27 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
         # Convert the integer into a boolean.
         return res == 1
 
+    @cached()
+    async def get_user_locked_status(self, user_id: str) -> bool:
+        """Retrieve the value for the `locked` property for the provided user.
+
+        Args:
+            user_id: The ID of the user to retrieve the status for.
+
+        Returns:
+            True if the user was locked, false if the user is still active.
+        """
+
+        res = await self.db_pool.simple_select_one_onecol(
+            table="users",
+            keyvalues={"name": user_id},
+            retcol="locked",
+            desc="get_user_locked_status",
+        )
+
+        # Convert the potential integer into a boolean.
+        return bool(res)
+
     async def get_threepid_validation_session(
         self,
         medium: Optional[str],
@@ -2111,6 +2138,33 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore):
         self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
         txn.call_after(self.is_guest.invalidate, (user_id,))
 
+    async def set_user_locked_status(self, user_id: str, locked: bool) -> None:
+        """Set the `locked` property for the provided user to the provided value.
+
+        Args:
+            user_id: The ID of the user to set the status for.
+            locked: The value to set for `locked`.
+        """
+
+        await self.db_pool.runInteraction(
+            "set_user_locked_status",
+            self.set_user_locked_status_txn,
+            user_id,
+            locked,
+        )
+
+    def set_user_locked_status_txn(
+        self, txn: LoggingTransaction, user_id: str, locked: bool
+    ) -> None:
+        self.db_pool.simple_update_one_txn(
+            txn=txn,
+            table="users",
+            keyvalues={"name": user_id},
+            updatevalues={"locked": locked},
+        )
+        self._invalidate_cache_and_stream(txn, self.get_user_locked_status, (user_id,))
+        self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
+
     def update_user_approval_status_txn(
         self, txn: LoggingTransaction, user_id: str, approved: bool
     ) -> None:
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index 2a136f2ff6..f0dc31fee6 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -995,7 +995,11 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
         )
 
     async def search_user_dir(
-        self, user_id: str, search_term: str, limit: int
+        self,
+        user_id: str,
+        search_term: str,
+        limit: int,
+        show_locked_users: bool = False,
     ) -> SearchResult:
         """Searches for users in directory
 
@@ -1029,6 +1033,9 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 )
             """
 
+        if not show_locked_users:
+            where_clause += " AND (u.locked IS NULL OR u.locked = FALSE)"
+
         # We allow manipulating the ranking algorithm by injecting statements
         # based on config options.
         additional_ordering_statements = []
@@ -1060,6 +1067,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 SELECT d.user_id AS user_id, display_name, avatar_url
                 FROM matching_users as t
                 INNER JOIN user_directory AS d USING (user_id)
+                LEFT JOIN users AS u ON t.user_id = u.name
                 WHERE
                     %(where_clause)s
                 ORDER BY
@@ -1115,6 +1123,7 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
                 SELECT d.user_id AS user_id, display_name, avatar_url
                 FROM user_directory_search as t
                 INNER JOIN user_directory AS d USING (user_id)
+                LEFT JOIN users AS u ON t.user_id = u.name
                 WHERE
                     %(where_clause)s
                     AND value MATCH ?
diff --git a/synapse/storage/schema/main/delta/80/01_users_alter_locked.sql b/synapse/storage/schema/main/delta/80/01_users_alter_locked.sql
new file mode 100644
index 0000000000..21c7971441
--- /dev/null
+++ b/synapse/storage/schema/main/delta/80/01_users_alter_locked.sql
@@ -0,0 +1,16 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE users ADD locked BOOLEAN DEFAULT FALSE NOT NULL;
diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py
index cdb0048122..ce96574915 100644
--- a/tests/api/test_auth.py
+++ b/tests/api/test_auth.py
@@ -69,6 +69,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         )
         self.store.get_user_by_access_token = simple_async_mock(user_info)
         self.store.mark_access_token_as_used = simple_async_mock(None)
+        self.store.get_user_locked_status = simple_async_mock(False)
 
         request = Mock(args={})
         request.args[b"access_token"] = [self.test_token]
@@ -293,6 +294,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
         )
         self.store.insert_client_ip = simple_async_mock(None)
         self.store.mark_access_token_as_used = simple_async_mock(None)
+        self.store.get_user_locked_status = simple_async_mock(False)
         request = Mock(args={})
         request.getClientAddress.return_value.host = "127.0.0.1"
         request.args[b"access_token"] = [self.test_token]
@@ -311,6 +313,7 @@ class AuthTestCase(unittest.HomeserverTestCase):
                 token_used=True,
             )
         )
+        self.store.get_user_locked_status = simple_async_mock(False)
         self.store.insert_client_ip = simple_async_mock(None)
         self.store.mark_access_token_as_used = simple_async_mock(None)
         request = Mock(args={})
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 9af9db6e3e..41a959b4d6 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -29,7 +29,16 @@ from synapse.api.constants import ApprovalNoticeMedium, LoginType, UserTypes
 from synapse.api.errors import Codes, HttpResponseException, ResourceLimitError
 from synapse.api.room_versions import RoomVersions
 from synapse.media.filepath import MediaFilePaths
-from synapse.rest.client import devices, login, logout, profile, register, room, sync
+from synapse.rest.client import (
+    devices,
+    login,
+    logout,
+    profile,
+    register,
+    room,
+    sync,
+    user_directory,
+)
 from synapse.server import HomeServer
 from synapse.types import JsonDict, UserID, create_requester
 from synapse.util import Clock
@@ -1477,6 +1486,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         login.register_servlets,
         sync.register_servlets,
         register.register_servlets,
+        user_directory.register_servlets,
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
@@ -2464,6 +2474,105 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         # This key was removed intentionally. Ensure it is not accidentally re-included.
         self.assertNotIn("password_hash", channel.json_body)
 
+    def test_locked_user(self) -> None:
+        # User can sync
+        channel = self.make_request(
+            "GET",
+            "/_matrix/client/v3/sync",
+            access_token=self.other_user_token,
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        # Lock user
+        channel = self.make_request(
+            "PUT",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+            content={"locked": True},
+        )
+
+        # User is not authorized to sync anymore
+        channel = self.make_request(
+            "GET",
+            "/_matrix/client/v3/sync",
+            access_token=self.other_user_token,
+        )
+        self.assertEqual(401, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.USER_LOCKED, channel.json_body["errcode"])
+        self.assertTrue(channel.json_body["soft_logout"])
+
+    @override_config({"user_directory": {"enabled": True, "search_all_users": True}})
+    def test_locked_user_not_in_user_dir(self) -> None:
+        # User is available in the user dir
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/user_directory/search",
+            {"search_term": self.other_user},
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertIn("results", channel.json_body)
+        self.assertEqual(1, len(channel.json_body["results"]))
+
+        # Lock user
+        channel = self.make_request(
+            "PUT",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+            content={"locked": True},
+        )
+
+        # User is not available anymore in the user dir
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/user_directory/search",
+            {"search_term": self.other_user},
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertIn("results", channel.json_body)
+        self.assertEqual(0, len(channel.json_body["results"]))
+
+    @override_config(
+        {
+            "user_directory": {
+                "enabled": True,
+                "search_all_users": True,
+                "show_locked_users": True,
+            }
+        }
+    )
+    def test_locked_user_in_user_dir_with_show_locked_users_option(self) -> None:
+        # User is available in the user dir
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/user_directory/search",
+            {"search_term": self.other_user},
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertIn("results", channel.json_body)
+        self.assertEqual(1, len(channel.json_body["results"]))
+
+        # Lock user
+        channel = self.make_request(
+            "PUT",
+            self.url_other_user,
+            access_token=self.admin_user_tok,
+            content={"locked": True},
+        )
+
+        # User is still available in the user dir
+        channel = self.make_request(
+            "POST",
+            "/_matrix/client/v3/user_directory/search",
+            {"search_term": self.other_user},
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertIn("results", channel.json_body)
+        self.assertEqual(1, len(channel.json_body["results"]))
+
     @override_config({"user_directory": {"enabled": True, "search_all_users": True}})
     def test_change_name_deactivate_user_user_directory(self) -> None:
         """
diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py
index 05ea802008..ba41459d08 100644
--- a/tests/storage/test_registration.py
+++ b/tests/storage/test_registration.py
@@ -48,6 +48,7 @@ class RegistrationStoreTestCase(HomeserverTestCase):
                 "creation_ts": 0,
                 "user_type": None,
                 "deactivated": 0,
+                "locked": 0,
                 "shadow_banned": 0,
                 "approved": 1,
             },
-- 
cgit 1.5.1


From efd4d06d7694e269f1d85e697104e742a984da18 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 10 Aug 2023 07:39:46 -0400
Subject: Clean-up presence code (#16092)

Misc. clean-ups to:

* Use keyword arguments.
* Return early (reducing indentation) of some functions.
* Removing duplicated / unused code.
* Use wrap_as_background_process.
---
 changelog.d/16092.misc       |   1 +
 synapse/handlers/presence.py | 169 +++++++++++++++++++------------------------
 2 files changed, 76 insertions(+), 94 deletions(-)
 create mode 100644 changelog.d/16092.misc

(limited to 'synapse')

diff --git a/changelog.d/16092.misc b/changelog.d/16092.misc
new file mode 100644
index 0000000000..b520807771
--- /dev/null
+++ b/changelog.d/16092.misc
@@ -0,0 +1 @@
+Clean-up the presence code.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index cd7df0525f..11dff724e6 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -30,7 +30,6 @@ from types import TracebackType
 from typing import (
     TYPE_CHECKING,
     Any,
-    Awaitable,
     Callable,
     Collection,
     Dict,
@@ -54,7 +53,10 @@ from synapse.appservice import ApplicationService
 from synapse.events.presence_router import PresenceRouter
 from synapse.logging.context import run_in_background
 from synapse.metrics import LaterGauge
-from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.metrics.background_process_metrics import (
+    run_as_background_process,
+    wrap_as_background_process,
+)
 from synapse.replication.http.presence import (
     ReplicationBumpPresenceActiveTime,
     ReplicationPresenceSetState,
@@ -141,6 +143,8 @@ class BasePresenceHandler(abc.ABC):
         self.state = hs.get_state_handler()
         self.is_mine_id = hs.is_mine_id
 
+        self._presence_enabled = hs.config.server.use_presence
+
         self._federation = None
         if hs.should_send_federation():
             self._federation = hs.get_federation_sender()
@@ -149,6 +153,15 @@ class BasePresenceHandler(abc.ABC):
 
         self._busy_presence_enabled = hs.config.experimental.msc3026_enabled
 
+        self.VALID_PRESENCE: Tuple[str, ...] = (
+            PresenceState.ONLINE,
+            PresenceState.UNAVAILABLE,
+            PresenceState.OFFLINE,
+        )
+
+        if self._busy_presence_enabled:
+            self.VALID_PRESENCE += (PresenceState.BUSY,)
+
         active_presence = self.store.take_presence_startup_info()
         self.user_to_current_state = {state.user_id: state for state in active_presence}
 
@@ -395,8 +408,6 @@ class WorkerPresenceHandler(BasePresenceHandler):
 
         self._presence_writer_instance = hs.config.worker.writers.presence[0]
 
-        self._presence_enabled = hs.config.server.use_presence
-
         # Route presence EDUs to the right worker
         hs.get_federation_registry().register_instances_for_edu(
             EduTypes.PRESENCE,
@@ -421,8 +432,6 @@ class WorkerPresenceHandler(BasePresenceHandler):
             self.send_stop_syncing, UPDATE_SYNCING_USERS_MS
         )
 
-        self._busy_presence_enabled = hs.config.experimental.msc3026_enabled
-
         hs.get_reactor().addSystemEventTrigger(
             "before",
             "shutdown",
@@ -490,7 +499,9 @@ class WorkerPresenceHandler(BasePresenceHandler):
             # what the spec wants: see comment in the BasePresenceHandler version
             # of this function.
             await self.set_state(
-                UserID.from_string(user_id), {"presence": presence_state}, True
+                UserID.from_string(user_id),
+                {"presence": presence_state},
+                ignore_status_msg=True,
             )
 
         curr_sync = self._user_to_num_current_syncs.get(user_id, 0)
@@ -601,22 +612,13 @@ class WorkerPresenceHandler(BasePresenceHandler):
         """
         presence = state["presence"]
 
-        valid_presence = (
-            PresenceState.ONLINE,
-            PresenceState.UNAVAILABLE,
-            PresenceState.OFFLINE,
-            PresenceState.BUSY,
-        )
-
-        if presence not in valid_presence or (
-            presence == PresenceState.BUSY and not self._busy_presence_enabled
-        ):
+        if presence not in self.VALID_PRESENCE:
             raise SynapseError(400, "Invalid presence state")
 
         user_id = target_user.to_string()
 
         # If presence is disabled, no-op
-        if not self.hs.config.server.use_presence:
+        if not self._presence_enabled:
             return
 
         # Proxy request to instance that writes presence
@@ -633,7 +635,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
         with the app.
         """
         # If presence is disabled, no-op
-        if not self.hs.config.server.use_presence:
+        if not self._presence_enabled:
             return
 
         # Proxy request to instance that writes presence
@@ -649,7 +651,6 @@ class PresenceHandler(BasePresenceHandler):
         self.hs = hs
         self.wheel_timer: WheelTimer[str] = WheelTimer()
         self.notifier = hs.get_notifier()
-        self._presence_enabled = hs.config.server.use_presence
 
         federation_registry = hs.get_federation_registry()
 
@@ -700,8 +701,6 @@ class PresenceHandler(BasePresenceHandler):
             self._on_shutdown,
         )
 
-        self._next_serial = 1
-
         # Keeps track of the number of *ongoing* syncs on this process. While
         # this is non zero a user will never go offline.
         self.user_to_num_current_syncs: Dict[str, int] = {}
@@ -723,21 +722,16 @@ class PresenceHandler(BasePresenceHandler):
             # Start a LoopingCall in 30s that fires every 5s.
             # The initial delay is to allow disconnected clients a chance to
             # reconnect before we treat them as offline.
-            def run_timeout_handler() -> Awaitable[None]:
-                return run_as_background_process(
-                    "handle_presence_timeouts", self._handle_timeouts
-                )
-
             self.clock.call_later(
-                30, self.clock.looping_call, run_timeout_handler, 5000
+                30, self.clock.looping_call, self._handle_timeouts, 5000
             )
 
-            def run_persister() -> Awaitable[None]:
-                return run_as_background_process(
-                    "persist_presence_changes", self._persist_unpersisted_changes
-                )
-
-            self.clock.call_later(60, self.clock.looping_call, run_persister, 60 * 1000)
+            self.clock.call_later(
+                60,
+                self.clock.looping_call,
+                self._persist_unpersisted_changes,
+                60 * 1000,
+            )
 
         LaterGauge(
             "synapse_handlers_presence_wheel_timer_size",
@@ -783,6 +777,7 @@ class PresenceHandler(BasePresenceHandler):
             )
         logger.info("Finished _on_shutdown")
 
+    @wrap_as_background_process("persist_presence_changes")
     async def _persist_unpersisted_changes(self) -> None:
         """We periodically persist the unpersisted changes, as otherwise they
         may stack up and slow down shutdown times.
@@ -898,6 +893,7 @@ class PresenceHandler(BasePresenceHandler):
                         states, [destination]
                     )
 
+    @wrap_as_background_process("handle_presence_timeouts")
     async def _handle_timeouts(self) -> None:
         """Checks the presence of users that have timed out and updates as
         appropriate.
@@ -955,7 +951,7 @@ class PresenceHandler(BasePresenceHandler):
         with the app.
         """
         # If presence is disabled, no-op
-        if not self.hs.config.server.use_presence:
+        if not self._presence_enabled:
             return
 
         user_id = user.to_string()
@@ -990,56 +986,51 @@ class PresenceHandler(BasePresenceHandler):
                 client that is being used by a user.
             presence_state: The presence state indicated in the sync request
         """
-        # Override if it should affect the user's presence, if presence is
-        # disabled.
-        if not self.hs.config.server.use_presence:
-            affect_presence = False
+        if not affect_presence or not self._presence_enabled:
+            return _NullContextManager()
 
-        if affect_presence:
-            curr_sync = self.user_to_num_current_syncs.get(user_id, 0)
-            self.user_to_num_current_syncs[user_id] = curr_sync + 1
+        curr_sync = self.user_to_num_current_syncs.get(user_id, 0)
+        self.user_to_num_current_syncs[user_id] = curr_sync + 1
 
-            prev_state = await self.current_state_for_user(user_id)
+        prev_state = await self.current_state_for_user(user_id)
 
-            # If they're busy then they don't stop being busy just by syncing,
-            # so just update the last sync time.
-            if prev_state.state != PresenceState.BUSY:
-                # XXX: We set_state separately here and just update the last_active_ts above
-                # This keeps the logic as similar as possible between the worker and single
-                # process modes. Using set_state will actually cause last_active_ts to be
-                # updated always, which is not what the spec calls for, but synapse has done
-                # this for... forever, I think.
-                await self.set_state(
-                    UserID.from_string(user_id), {"presence": presence_state}, True
-                )
-                # Retrieve the new state for the logic below. This should come from the
-                # in-memory cache.
-                prev_state = await self.current_state_for_user(user_id)
+        # If they're busy then they don't stop being busy just by syncing,
+        # so just update the last sync time.
+        if prev_state.state != PresenceState.BUSY:
+            # XXX: We set_state separately here and just update the last_active_ts above
+            # This keeps the logic as similar as possible between the worker and single
+            # process modes. Using set_state will actually cause last_active_ts to be
+            # updated always, which is not what the spec calls for, but synapse has done
+            # this for... forever, I think.
+            await self.set_state(
+                UserID.from_string(user_id),
+                {"presence": presence_state},
+                ignore_status_msg=True,
+            )
+            # Retrieve the new state for the logic below. This should come from the
+            # in-memory cache.
+            prev_state = await self.current_state_for_user(user_id)
 
-            # To keep the single process behaviour consistent with worker mode, run the
-            # same logic as `update_external_syncs_row`, even though it looks weird.
-            if prev_state.state == PresenceState.OFFLINE:
-                await self._update_states(
-                    [
-                        prev_state.copy_and_replace(
-                            state=PresenceState.ONLINE,
-                            last_active_ts=self.clock.time_msec(),
-                            last_user_sync_ts=self.clock.time_msec(),
-                        )
-                    ]
-                )
-            # otherwise, set the new presence state & update the last sync time,
-            # but don't update last_active_ts as this isn't an indication that
-            # they've been active (even though it's probably been updated by
-            # set_state above)
-            else:
-                await self._update_states(
-                    [
-                        prev_state.copy_and_replace(
-                            last_user_sync_ts=self.clock.time_msec()
-                        )
-                    ]
-                )
+        # To keep the single process behaviour consistent with worker mode, run the
+        # same logic as `update_external_syncs_row`, even though it looks weird.
+        if prev_state.state == PresenceState.OFFLINE:
+            await self._update_states(
+                [
+                    prev_state.copy_and_replace(
+                        state=PresenceState.ONLINE,
+                        last_active_ts=self.clock.time_msec(),
+                        last_user_sync_ts=self.clock.time_msec(),
+                    )
+                ]
+            )
+        # otherwise, set the new presence state & update the last sync time,
+        # but don't update last_active_ts as this isn't an indication that
+        # they've been active (even though it's probably been updated by
+        # set_state above)
+        else:
+            await self._update_states(
+                [prev_state.copy_and_replace(last_user_sync_ts=self.clock.time_msec())]
+            )
 
         async def _end() -> None:
             try:
@@ -1061,8 +1052,7 @@ class PresenceHandler(BasePresenceHandler):
             try:
                 yield
             finally:
-                if affect_presence:
-                    run_in_background(_end)
+                run_in_background(_end)
 
         return _user_syncing()
 
@@ -1229,20 +1219,11 @@ class PresenceHandler(BasePresenceHandler):
         status_msg = state.get("status_msg", None)
         presence = state["presence"]
 
-        valid_presence = (
-            PresenceState.ONLINE,
-            PresenceState.UNAVAILABLE,
-            PresenceState.OFFLINE,
-            PresenceState.BUSY,
-        )
-
-        if presence not in valid_presence or (
-            presence == PresenceState.BUSY and not self._busy_presence_enabled
-        ):
+        if presence not in self.VALID_PRESENCE:
             raise SynapseError(400, "Invalid presence state")
 
         # If presence is disabled, no-op
-        if not self.hs.config.server.use_presence:
+        if not self._presence_enabled:
             return
 
         user_id = target_user.to_string()
-- 
cgit 1.5.1


From 7f4b41369049c143919d229670087df69edb9602 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Thu, 10 Aug 2023 17:28:31 +0000
Subject: Fix the type annotation on `run_db_interaction` in the Module API.
 (#16089)

* Fix the method signature of `run_db_interaction` on the module API

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/16089.misc         | 1 +
 synapse/module_api/__init__.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16089.misc

(limited to 'synapse')

diff --git a/changelog.d/16089.misc b/changelog.d/16089.misc
new file mode 100644
index 0000000000..8c302e6884
--- /dev/null
+++ b/changelog.d/16089.misc
@@ -0,0 +1 @@
+Fix the type annotation on `run_db_interaction` in the Module API.
\ No newline at end of file
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index acee1dafd3..9ad8e038ae 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -31,7 +31,7 @@ from typing import (
 
 import attr
 import jinja2
-from typing_extensions import ParamSpec
+from typing_extensions import Concatenate, ParamSpec
 
 from twisted.internet import defer
 from twisted.internet.interfaces import IDelayedCall
@@ -885,7 +885,7 @@ class ModuleApi:
     def run_db_interaction(
         self,
         desc: str,
-        func: Callable[P, T],
+        func: Callable[Concatenate[LoggingTransaction, P], T],
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> "defer.Deferred[T]":
-- 
cgit 1.5.1


From 614efc488b1a25dfa32256930c5acc896c88d92f Mon Sep 17 00:00:00 2001
From: Nick Mills-Barrett <nick@beeper.com>
Date: Fri, 11 Aug 2023 12:37:09 +0100
Subject: Add linearizer on user ID to push rule PUT/DELETE requests (#16052)

See: #16053

Signed off by Nick @ Beeper (@Fizzadar)
---
 changelog.d/16052.bugfix         |  1 +
 synapse/rest/client/push_rule.py | 28 ++++++++++++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/16052.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16052.bugfix b/changelog.d/16052.bugfix
new file mode 100644
index 0000000000..3c7a60f226
--- /dev/null
+++ b/changelog.d/16052.bugfix
@@ -0,0 +1 @@
+Fix long-standing bug where concurrent requests to change a user's push rules could cause a deadlock. Contributed by Nick @ Beeper (@fizzadar).
diff --git a/synapse/rest/client/push_rule.py b/synapse/rest/client/push_rule.py
index 5c9fece3ba..5ed3b83a03 100644
--- a/synapse/rest/client/push_rule.py
+++ b/synapse/rest/client/push_rule.py
@@ -32,6 +32,7 @@ from synapse.push.rulekinds import PRIORITY_CLASS_MAP
 from synapse.rest.client._base import client_patterns
 from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException
 from synapse.types import JsonDict
+from synapse.util.async_helpers import Linearizer
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -53,26 +54,32 @@ class PushRuleRestServlet(RestServlet):
         self.notifier = hs.get_notifier()
         self._is_worker = hs.config.worker.worker_app is not None
         self._push_rules_handler = hs.get_push_rules_handler()
+        self._push_rule_linearizer = Linearizer(name="push_rules")
 
     async def on_PUT(self, request: SynapseRequest, path: str) -> Tuple[int, JsonDict]:
         if self._is_worker:
             raise Exception("Cannot handle PUT /push_rules on worker")
 
+        requester = await self.auth.get_user_by_req(request)
+        user_id = requester.user.to_string()
+
+        async with self._push_rule_linearizer.queue(user_id):
+            return await self.handle_put(request, path, user_id)
+
+    async def handle_put(
+        self, request: SynapseRequest, path: str, user_id: str
+    ) -> Tuple[int, JsonDict]:
         spec = _rule_spec_from_path(path.split("/"))
         try:
             priority_class = _priority_class_from_spec(spec)
         except InvalidRuleException as e:
             raise SynapseError(400, str(e))
 
-        requester = await self.auth.get_user_by_req(request)
-
         if "/" in spec.rule_id or "\\" in spec.rule_id:
             raise SynapseError(400, "rule_id may not contain slashes")
 
         content = parse_json_value_from_request(request)
 
-        user_id = requester.user.to_string()
-
         if spec.attr:
             try:
                 await self._push_rules_handler.set_rule_attr(user_id, spec, content)
@@ -126,11 +133,20 @@ class PushRuleRestServlet(RestServlet):
         if self._is_worker:
             raise Exception("Cannot handle DELETE /push_rules on worker")
 
-        spec = _rule_spec_from_path(path.split("/"))
-
         requester = await self.auth.get_user_by_req(request)
         user_id = requester.user.to_string()
 
+        async with self._push_rule_linearizer.queue(user_id):
+            return await self.handle_delete(request, path, user_id)
+
+    async def handle_delete(
+        self,
+        request: SynapseRequest,
+        path: str,
+        user_id: str,
+    ) -> Tuple[int, JsonDict]:
+        spec = _rule_spec_from_path(path.split("/"))
+
         namespaced_rule_id = f"global/{spec.template}/{spec.rule_id}"
 
         try:
-- 
cgit 1.5.1


From 9ff84bccbb152460913d63d4b8e9dffc220adfea Mon Sep 17 00:00:00 2001
From: Gabriel Rodríguez <gabrc52@gmail.com>
Date: Fri, 11 Aug 2023 16:15:17 -0400
Subject: Allow customizing IdP name and icon for SAML and CAS (#16094)

---
 changelog.d/16094.feature                        |  1 +
 docs/usage/configuration/config_documentation.md | 22 +++++++++++++++++++++-
 synapse/config/cas.py                            |  4 ++++
 synapse/config/saml2.py                          |  6 ++++++
 synapse/handlers/cas.py                          | 11 ++++++-----
 synapse/handlers/saml.py                         | 11 ++++++-----
 6 files changed, 44 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/16094.feature

(limited to 'synapse')

diff --git a/changelog.d/16094.feature b/changelog.d/16094.feature
new file mode 100644
index 0000000000..3be71badb9
--- /dev/null
+++ b/changelog.d/16094.feature
@@ -0,0 +1 @@
+Allow customising the IdP display name, icon, and brand for SAML and CAS providers (in addition to OIDC provider).
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index a17a8c2900..6601bba9f2 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3025,6 +3025,16 @@ enable SAML login. You can either put your entire pysaml config inline using the
 option, or you can specify a path to a psyaml config file with the sub-option `config_path`.
 This setting has the following sub-options:
 
+* `idp_name`: A user-facing name for this identity provider, which is used to
+   offer the user a choice of login mechanisms.
+* `idp_icon`: An optional icon for this identity provider, which is presented
+   by clients and Synapse's own IdP picker page. If given, must be an
+   MXC URI of the format `mxc://<server-name>/<media-id>`. (An easy way to
+   obtain such an MXC URI is to upload an image to an (unencrypted) room
+   and then copy the "url" from the source of the event.)
+* `idp_brand`: An optional brand for this identity provider, allowing clients
+   to style the login flow according to the identity provider in question.
+   See the [spec](https://spec.matrix.org/latest/) for possible options here.
 * `sp_config`: the configuration for the pysaml2 Service Provider. See pysaml2 docs for format of config.
    Default values will be used for the `entityid` and `service` settings,
    so it is not normally necessary to specify them unless you need to
@@ -3176,7 +3186,7 @@ Options for each entry include:
 
 * `idp_icon`: An optional icon for this identity provider, which is presented
    by clients and Synapse's own IdP picker page. If given, must be an
-   MXC URI of the format mxc://<server-name>/<media-id>. (An easy way to
+   MXC URI of the format `mxc://<server-name>/<media-id>`. (An easy way to
    obtain such an MXC URI is to upload an image to an (unencrypted) room
    and then copy the "url" from the source of the event.)
 
@@ -3391,6 +3401,16 @@ Enable Central Authentication Service (CAS) for registration and login.
 Has the following sub-options:
 * `enabled`: Set this to true to enable authorization against a CAS server.
    Defaults to false.
+* `idp_name`: A user-facing name for this identity provider, which is used to
+   offer the user a choice of login mechanisms.
+* `idp_icon`: An optional icon for this identity provider, which is presented
+   by clients and Synapse's own IdP picker page. If given, must be an
+   MXC URI of the format `mxc://<server-name>/<media-id>`. (An easy way to
+   obtain such an MXC URI is to upload an image to an (unencrypted) room
+   and then copy the "url" from the source of the event.)
+* `idp_brand`: An optional brand for this identity provider, allowing clients
+   to style the login flow according to the identity provider in question.
+   See the [spec](https://spec.matrix.org/latest/) for possible options here.
 * `server_url`: The URL of the CAS authorization endpoint.
 * `displayname_attribute`: The attribute of the CAS response to use as the display name.
    If no name is given here, no displayname will be set.
diff --git a/synapse/config/cas.py b/synapse/config/cas.py
index 9152c06bd6..c4e63e7411 100644
--- a/synapse/config/cas.py
+++ b/synapse/config/cas.py
@@ -47,6 +47,10 @@ class CasConfig(Config):
                 required_attributes
             )
 
+            self.idp_name = cas_config.get("idp_name", "CAS")
+            self.idp_icon = cas_config.get("idp_icon")
+            self.idp_brand = cas_config.get("idp_brand")
+
         else:
             self.cas_server_url = None
             self.cas_service_url = None
diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py
index 49ca663dde..c69e24cf26 100644
--- a/synapse/config/saml2.py
+++ b/synapse/config/saml2.py
@@ -89,8 +89,14 @@ class SAML2Config(Config):
             "grandfathered_mxid_source_attribute", "uid"
         )
 
+        # refers to a SAML IdP entity ID
         self.saml2_idp_entityid = saml2_config.get("idp_entityid", None)
 
+        # IdP properties for Matrix clients
+        self.idp_name = saml2_config.get("idp_name", "SAML")
+        self.idp_icon = saml2_config.get("idp_icon")
+        self.idp_brand = saml2_config.get("idp_brand")
+
         # user_mapping_provider may be None if the key is present but has no value
         ump_dict = saml2_config.get("user_mapping_provider") or {}
 
diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py
index fc467bc7c1..5c71637038 100644
--- a/synapse/handlers/cas.py
+++ b/synapse/handlers/cas.py
@@ -76,12 +76,13 @@ class CasHandler:
         self.idp_id = "cas"
 
         # user-facing name of this auth provider
-        self.idp_name = "CAS"
+        self.idp_name = hs.config.cas.idp_name
 
-        # we do not currently support brands/icons for CAS auth, but this is required by
-        # the SsoIdentityProvider protocol type.
-        self.idp_icon = None
-        self.idp_brand = None
+        # MXC URI for icon for this auth provider
+        self.idp_icon = hs.config.cas.idp_icon
+
+        # optional brand identifier for this auth provider
+        self.idp_brand = hs.config.cas.idp_brand
 
         self._sso_handler = hs.get_sso_handler()
 
diff --git a/synapse/handlers/saml.py b/synapse/handlers/saml.py
index 6083c9f4b5..d00035c332 100644
--- a/synapse/handlers/saml.py
+++ b/synapse/handlers/saml.py
@@ -74,12 +74,13 @@ class SamlHandler:
         self.idp_id = "saml"
 
         # user-facing name of this auth provider
-        self.idp_name = "SAML"
+        self.idp_name = hs.config.saml2.idp_name
 
-        # we do not currently support icons/brands for SAML auth, but this is required by
-        # the SsoIdentityProvider protocol type.
-        self.idp_icon = None
-        self.idp_brand = None
+        # MXC URI for icon for this auth provider
+        self.idp_icon = hs.config.saml2.idp_icon
+
+        # optional brand identifier for this auth provider
+        self.idp_brand = hs.config.saml2.idp_brand
 
         # a map from saml session id to Saml2SessionData object
         self._outstanding_requests_dict: Dict[str, Saml2SessionData] = {}
-- 
cgit 1.5.1


From ad3f43be9a597dd4fdf59e0a95e4630e7b9502fe Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 15 Aug 2023 08:11:20 -0400
Subject: Run pyupgrade for python 3.7 & 3.8. (#16110)

---
 changelog.d/16110.misc                             |  1 +
 contrib/cmdclient/console.py                       |  2 +-
 docker/configure_workers_and_start.py              |  2 +-
 docker/start.py                                    |  2 +-
 scripts-dev/build_debian_packages.py               |  2 +-
 scripts-dev/check_schema_delta.py                  |  2 +-
 scripts-dev/federation_client.py                   |  2 +-
 scripts-dev/release.py                             |  1 -
 scripts-dev/sign_json.py                           |  2 +-
 synapse/__init__.py                                |  8 +++-
 synapse/_scripts/synapse_port_db.py                |  6 +--
 synapse/_scripts/update_synapse_database.py        |  2 +-
 synapse/api/constants.py                           |  3 +-
 synapse/handlers/presence.py                       |  2 +-
 synapse/handlers/sso.py                            |  5 ++-
 synapse/handlers/stats.py                          | 12 ++++--
 synapse/handlers/sync.py                           |  8 ++--
 synapse/logging/_remote.py                         |  3 +-
 .../module_api/callbacks/spamchecker_callbacks.py  | 48 ++++++----------------
 synapse/replication/tcp/handler.py                 |  2 +-
 synapse/storage/databases/main/filtering.py        |  3 +-
 synapse/storage/databases/main/keys.py             |  2 +-
 synapse/storage/databases/main/stats.py            |  3 +-
 synapse/storage/engines/_base.py                   |  2 +-
 synapse/storage/prepare_database.py                | 12 +++++-
 synapse/types/__init__.py                          |  3 +-
 synapse/util/async_helpers.py                      |  3 +-
 synapse/util/macaroons.py                          |  2 +-
 synapse/util/ratelimitutils.py                     |  2 +-
 synapse/visibility.py                              |  2 +-
 tests/app/test_phone_stats_home.py                 |  2 +-
 tests/crypto/test_keyring.py                       |  2 +-
 .../federation/test_matrix_federation_agent.py     |  2 +-
 tests/module_api/test_api.py                       |  2 +-
 tests/replication/test_multi_media_repo.py         |  2 +-
 tests/rest/client/test_redactions.py               | 10 ++---
 tests/rest/client/test_relations.py                | 38 ++++++++---------
 tests/rest/client/test_rooms.py                    |  6 +--
 tests/server.py                                    |  3 +-
 tests/storage/test_appservice.py                   |  6 +--
 tests/storage/test_main.py                         |  2 +-
 tests/storage/test_room_search.py                  |  8 ++--
 tests/test_visibility.py                           |  2 +-
 43 files changed, 113 insertions(+), 121 deletions(-)
 create mode 100644 changelog.d/16110.misc

(limited to 'synapse')

diff --git a/changelog.d/16110.misc b/changelog.d/16110.misc
new file mode 100644
index 0000000000..68efe86ddc
--- /dev/null
+++ b/changelog.d/16110.misc
@@ -0,0 +1 @@
+Run `pyupgrade` for Python 3.8+.
diff --git a/contrib/cmdclient/console.py b/contrib/cmdclient/console.py
index 895b2a7af1..710fe25699 100755
--- a/contrib/cmdclient/console.py
+++ b/contrib/cmdclient/console.py
@@ -769,7 +769,7 @@ def main(server_url, identity_server_url, username, token, config_path):
     global CONFIG_JSON
     CONFIG_JSON = config_path  # bit cheeky, but just overwrite the global
     try:
-        with open(config_path, "r") as config:
+        with open(config_path) as config:
             syn_cmd.config = json.load(config)
             try:
                 http_client.verbose = "on" == syn_cmd.config["verbose"]
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index dc824038b5..400a7515aa 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -861,7 +861,7 @@ def generate_worker_files(
         # Then a worker config file
         convert(
             "/conf/worker.yaml.j2",
-            "/conf/workers/{name}.yaml".format(name=worker_name),
+            f"/conf/workers/{worker_name}.yaml",
             **worker_config,
             worker_log_config_filepath=log_config_filepath,
             using_unix_sockets=using_unix_sockets,
diff --git a/docker/start.py b/docker/start.py
index ebcc599f04..aebc7e4aaa 100755
--- a/docker/start.py
+++ b/docker/start.py
@@ -82,7 +82,7 @@ def generate_config_from_template(
                 with open(filename) as handle:
                     value = handle.read()
             else:
-                log("Generating a random secret for {}".format(secret))
+                log(f"Generating a random secret for {secret}")
                 value = codecs.encode(os.urandom(32), "hex").decode()
                 with open(filename, "w") as handle:
                     handle.write(value)
diff --git a/scripts-dev/build_debian_packages.py b/scripts-dev/build_debian_packages.py
index bb89ba581c..c03e3418c0 100755
--- a/scripts-dev/build_debian_packages.py
+++ b/scripts-dev/build_debian_packages.py
@@ -47,7 +47,7 @@ can be passed on the commandline for debugging.
 projdir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 
 
-class Builder(object):
+class Builder:
     def __init__(
         self,
         redirect_stdout: bool = False,
diff --git a/scripts-dev/check_schema_delta.py b/scripts-dev/check_schema_delta.py
index fee4a8bd3d..467be96fdf 100755
--- a/scripts-dev/check_schema_delta.py
+++ b/scripts-dev/check_schema_delta.py
@@ -43,7 +43,7 @@ def main(force_colors: bool) -> None:
     diffs: List[git.Diff] = repo.remote().refs.develop.commit.diff(None)
 
     # Get the schema version of the local file to check against current schema on develop
-    with open("synapse/storage/schema/__init__.py", "r") as file:
+    with open("synapse/storage/schema/__init__.py") as file:
         local_schema = file.read()
     new_locals: Dict[str, Any] = {}
     exec(local_schema, new_locals)
diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py
index 63f0b25ddd..5ad334b4d8 100755
--- a/scripts-dev/federation_client.py
+++ b/scripts-dev/federation_client.py
@@ -247,7 +247,7 @@ def main() -> None:
 
 
 def read_args_from_config(args: argparse.Namespace) -> None:
-    with open(args.config, "r") as fh:
+    with open(args.config) as fh:
         config = yaml.safe_load(fh)
 
         if not args.server_name:
diff --git a/scripts-dev/release.py b/scripts-dev/release.py
index 89ffba8d92..4ac8eaa889 100755
--- a/scripts-dev/release.py
+++ b/scripts-dev/release.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 # Copyright 2020 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/scripts-dev/sign_json.py b/scripts-dev/sign_json.py
index bb217799fb..00cbaf68f5 100755
--- a/scripts-dev/sign_json.py
+++ b/scripts-dev/sign_json.py
@@ -145,7 +145,7 @@ Example usage:
 
 
 def read_args_from_config(args: argparse.Namespace) -> None:
-    with open(args.config, "r") as fh:
+    with open(args.config) as fh:
         config = yaml.safe_load(fh)
         if not args.server_name:
             args.server_name = config["server_name"]
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 6c1801862b..2f9c22a833 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -25,7 +25,11 @@ from synapse.util.rust import check_rust_lib_up_to_date
 from synapse.util.stringutils import strtobool
 
 # Check that we're not running on an unsupported Python version.
-if sys.version_info < (3, 8):
+#
+# Note that we use an (unneeded) variable here so that pyupgrade doesn't nuke the
+# if-statement completely.
+py_version = sys.version_info
+if py_version < (3, 8):
     print("Synapse requires Python 3.8 or above.")
     sys.exit(1)
 
@@ -78,7 +82,7 @@ try:
 except ImportError:
     pass
 
-import synapse.util
+import synapse.util  # noqa: E402
 
 __version__ = synapse.util.SYNAPSE_VERSION
 
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 1300aaf63c..49242800b8 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -1205,10 +1205,10 @@ class CursesProgress(Progress):
         self.total_processed = 0
         self.total_remaining = 0
 
-        super(CursesProgress, self).__init__()
+        super().__init__()
 
     def update(self, table: str, num_done: int) -> None:
-        super(CursesProgress, self).update(table, num_done)
+        super().update(table, num_done)
 
         self.total_processed = 0
         self.total_remaining = 0
@@ -1304,7 +1304,7 @@ class TerminalProgress(Progress):
     """Just prints progress to the terminal"""
 
     def update(self, table: str, num_done: int) -> None:
-        super(TerminalProgress, self).update(table, num_done)
+        super().update(table, num_done)
 
         data = self.tables[table]
 
diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py
index 0adf94bba6..f97aecf8d5 100644
--- a/synapse/_scripts/update_synapse_database.py
+++ b/synapse/_scripts/update_synapse_database.py
@@ -38,7 +38,7 @@ class MockHomeserver(HomeServer):
     DATASTORE_CLASS = DataStore  # type: ignore [assignment]
 
     def __init__(self, config: HomeServerConfig):
-        super(MockHomeserver, self).__init__(
+        super().__init__(
             hostname=config.server.server_name,
             config=config,
             reactor=reactor,
diff --git a/synapse/api/constants.py b/synapse/api/constants.py
index dc32553d0c..bf311b636d 100644
--- a/synapse/api/constants.py
+++ b/synapse/api/constants.py
@@ -18,8 +18,7 @@
 """Contains constants from the specification."""
 
 import enum
-
-from typing_extensions import Final
+from typing import Final
 
 # the max size of a (canonical-json-encoded) event
 MAX_PDU_SIZE = 65536
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 11dff724e6..e8e9db4b91 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -32,6 +32,7 @@ from typing import (
     Any,
     Callable,
     Collection,
+    ContextManager,
     Dict,
     Generator,
     Iterable,
@@ -43,7 +44,6 @@ from typing import (
 )
 
 from prometheus_client import Counter
-from typing_extensions import ContextManager
 
 import synapse.metrics
 from synapse.api.constants import EduTypes, EventTypes, Membership, PresenceState
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 4d29328a74..e9a544e754 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -24,13 +24,14 @@ from typing import (
     Iterable,
     List,
     Mapping,
+    NoReturn,
     Optional,
     Set,
 )
 from urllib.parse import urlencode
 
 import attr
-from typing_extensions import NoReturn, Protocol
+from typing_extensions import Protocol
 
 from twisted.web.iweb import IRequest
 from twisted.web.server import Request
@@ -791,7 +792,7 @@ class SsoHandler:
 
             if code != 200:
                 raise Exception(
-                    "GET request to download sso avatar image returned {}".format(code)
+                    f"GET request to download sso avatar image returned {code}"
                 )
 
             # upload name includes hash of the image file's content so that we can
diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py
index 7cabf7980a..3dde19fc81 100644
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -14,9 +14,15 @@
 # limitations under the License.
 import logging
 from collections import Counter
-from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Tuple
-
-from typing_extensions import Counter as CounterType
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Counter as CounterType,
+    Dict,
+    Iterable,
+    Optional,
+    Tuple,
+)
 
 from synapse.api.constants import EventContentFields, EventTypes, Membership
 from synapse.metrics import event_processing_positions
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index c010405be6..8174248387 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -1442,11 +1442,9 @@ class SyncHandler:
 
         # Now we have our list of joined room IDs, exclude as configured and freeze
         joined_room_ids = frozenset(
-            (
-                room_id
-                for room_id in mutable_joined_room_ids
-                if room_id not in mutable_rooms_to_exclude
-            )
+            room_id
+            for room_id in mutable_joined_room_ids
+            if room_id not in mutable_rooms_to_exclude
         )
 
         logger.debug(
diff --git a/synapse/logging/_remote.py b/synapse/logging/_remote.py
index 5a61b21eaf..284fbac524 100644
--- a/synapse/logging/_remote.py
+++ b/synapse/logging/_remote.py
@@ -18,10 +18,9 @@ import traceback
 from collections import deque
 from ipaddress import IPv4Address, IPv6Address, ip_address
 from math import floor
-from typing import Callable, Optional
+from typing import Callable, Deque, Optional
 
 import attr
-from typing_extensions import Deque
 from zope.interface import implementer
 
 from twisted.application.internet import ClientService
diff --git a/synapse/module_api/callbacks/spamchecker_callbacks.py b/synapse/module_api/callbacks/spamchecker_callbacks.py
index e191450323..32db7cce8d 100644
--- a/synapse/module_api/callbacks/spamchecker_callbacks.py
+++ b/synapse/module_api/callbacks/spamchecker_callbacks.py
@@ -426,9 +426,7 @@ class SpamCheckerModuleApiCallbacks:
                 generally discouraged as it doesn't support internationalization.
         """
         for callback in self._check_event_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(callback(event))
                 if res is False or res == self.NOT_SPAM:
                     # This spam-checker accepts the event.
@@ -481,9 +479,7 @@ class SpamCheckerModuleApiCallbacks:
             True if the event should be silently dropped
         """
         for callback in self._should_drop_federated_event_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res: Union[bool, str] = await delay_cancellation(callback(event))
             if res:
                 return res
@@ -505,9 +501,7 @@ class SpamCheckerModuleApiCallbacks:
             NOT_SPAM if the operation is permitted, [Codes, Dict] otherwise.
         """
         for callback in self._user_may_join_room_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(callback(user_id, room_id, is_invited))
                 # Normalize return values to `Codes` or `"NOT_SPAM"`.
                 if res is True or res is self.NOT_SPAM:
@@ -546,9 +540,7 @@ class SpamCheckerModuleApiCallbacks:
             NOT_SPAM if the operation is permitted, Codes otherwise.
         """
         for callback in self._user_may_invite_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(
                     callback(inviter_userid, invitee_userid, room_id)
                 )
@@ -593,9 +585,7 @@ class SpamCheckerModuleApiCallbacks:
             NOT_SPAM if the operation is permitted, Codes otherwise.
         """
         for callback in self._user_may_send_3pid_invite_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(
                     callback(inviter_userid, medium, address, room_id)
                 )
@@ -630,9 +620,7 @@ class SpamCheckerModuleApiCallbacks:
             userid: The ID of the user attempting to create a room
         """
         for callback in self._user_may_create_room_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(callback(userid))
                 if res is True or res is self.NOT_SPAM:
                     continue
@@ -666,9 +654,7 @@ class SpamCheckerModuleApiCallbacks:
 
         """
         for callback in self._user_may_create_room_alias_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(callback(userid, room_alias))
                 if res is True or res is self.NOT_SPAM:
                     continue
@@ -701,9 +687,7 @@ class SpamCheckerModuleApiCallbacks:
             room_id: The ID of the room that would be published
         """
         for callback in self._user_may_publish_room_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(callback(userid, room_id))
                 if res is True or res is self.NOT_SPAM:
                     continue
@@ -742,9 +726,7 @@ class SpamCheckerModuleApiCallbacks:
             True if the user is spammy.
         """
         for callback in self._check_username_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 # Make a copy of the user profile object to ensure the spam checker cannot
                 # modify it.
                 res = await delay_cancellation(callback(user_profile.copy()))
@@ -776,9 +758,7 @@ class SpamCheckerModuleApiCallbacks:
         """
 
         for callback in self._check_registration_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 behaviour = await delay_cancellation(
                     callback(email_threepid, username, request_info, auth_provider_id)
                 )
@@ -820,9 +800,7 @@ class SpamCheckerModuleApiCallbacks:
         """
 
         for callback in self._check_media_file_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(callback(file_wrapper, file_info))
                 # Normalize return values to `Codes` or `"NOT_SPAM"`.
                 if res is False or res is self.NOT_SPAM:
@@ -869,9 +847,7 @@ class SpamCheckerModuleApiCallbacks:
         """
 
         for callback in self._check_login_for_spam_callbacks:
-            with Measure(
-                self.clock, "{}.{}".format(callback.__module__, callback.__qualname__)
-            ):
+            with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"):
                 res = await delay_cancellation(
                     callback(
                         user_id,
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index a2cabba7b1..38adcbe1d0 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -17,6 +17,7 @@ from typing import (
     TYPE_CHECKING,
     Any,
     Awaitable,
+    Deque,
     Dict,
     Iterable,
     Iterator,
@@ -29,7 +30,6 @@ from typing import (
 )
 
 from prometheus_client import Counter
-from typing_extensions import Deque
 
 from twisted.internet.protocol import ReconnectingClientFactory
 
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index fff417f9e3..047de6283a 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -13,10 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional, Tuple, Union, cast
+from typing import TYPE_CHECKING, Optional, Tuple, Union, cast
 
 from canonicaljson import encode_canonical_json
-from typing_extensions import TYPE_CHECKING
 
 from synapse.api.errors import Codes, StoreError, SynapseError
 from synapse.storage._base import SQLBaseStore, db_to_json
diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
index 1666e3c43b..cea32a034a 100644
--- a/synapse/storage/databases/main/keys.py
+++ b/synapse/storage/databases/main/keys.py
@@ -188,7 +188,7 @@ class KeyStore(SQLBaseStore):
         # invalidate takes a tuple corresponding to the params of
         # _get_server_keys_json. _get_server_keys_json only takes one
         # param, which is itself the 2-tuple (server_name, key_id).
-        self._get_server_keys_json.invalidate((((server_name, key_id),)))
+        self._get_server_keys_json.invalidate(((server_name, key_id),))
 
     @cached()
     def _get_server_keys_json(
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index f34b7ce8f4..6298f0984d 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -19,6 +19,7 @@ from itertools import chain
 from typing import (
     TYPE_CHECKING,
     Any,
+    Counter,
     Dict,
     Iterable,
     List,
@@ -28,8 +29,6 @@ from typing import (
     cast,
 )
 
-from typing_extensions import Counter
-
 from twisted.internet.defer import DeferredLock
 
 from synapse.api.constants import Direction, EventContentFields, EventTypes, Membership
diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index 0363cdc038..0b5b3bf03e 100644
--- a/synapse/storage/engines/_base.py
+++ b/synapse/storage/engines/_base.py
@@ -145,5 +145,5 @@ class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCM
 
         This is not provided by DBAPI2, and so needs engine-specific support.
         """
-        with open(filepath, "rt") as f:
+        with open(filepath) as f:
             cls.executescript(cursor, f.read())
diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py
index 38b7abd801..31501fd573 100644
--- a/synapse/storage/prepare_database.py
+++ b/synapse/storage/prepare_database.py
@@ -16,10 +16,18 @@ import logging
 import os
 import re
 from collections import Counter
-from typing import Collection, Generator, Iterable, List, Optional, TextIO, Tuple
+from typing import (
+    Collection,
+    Counter as CounterType,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    TextIO,
+    Tuple,
+)
 
 import attr
-from typing_extensions import Counter as CounterType
 
 from synapse.config.homeserver import HomeServerConfig
 from synapse.storage.database import LoggingDatabaseConnection, LoggingTransaction
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 39a1ae4ac3..073f682aca 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -21,6 +21,7 @@ from typing import (
     Any,
     ClassVar,
     Dict,
+    Final,
     List,
     Mapping,
     Match,
@@ -38,7 +39,7 @@ import attr
 from immutabledict import immutabledict
 from signedjson.key import decode_verify_key_bytes
 from signedjson.types import VerifyKey
-from typing_extensions import Final, TypedDict
+from typing_extensions import TypedDict
 from unpaddedbase64 import decode_base64
 from zope.interface import Interface
 
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 4041e49e71..943ad54456 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -22,6 +22,7 @@ import logging
 from contextlib import asynccontextmanager
 from typing import (
     Any,
+    AsyncContextManager,
     AsyncIterator,
     Awaitable,
     Callable,
@@ -42,7 +43,7 @@ from typing import (
 )
 
 import attr
-from typing_extensions import AsyncContextManager, Concatenate, Literal, ParamSpec
+from typing_extensions import Concatenate, Literal, ParamSpec
 
 from twisted.internet import defer
 from twisted.internet.defer import CancelledError
diff --git a/synapse/util/macaroons.py b/synapse/util/macaroons.py
index 644c341e8c..db6c40a3e1 100644
--- a/synapse/util/macaroons.py
+++ b/synapse/util/macaroons.py
@@ -218,7 +218,7 @@ class MacaroonGenerator:
         # to avoid validating those as guest tokens, we explicitely verify if
         # the macaroon includes the "guest = true" caveat.
         is_guest = any(
-            (caveat.caveat_id == "guest = true" for caveat in macaroon.caveats)
+            caveat.caveat_id == "guest = true" for caveat in macaroon.caveats
         )
 
         if not is_guest:
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index 2ad55ac13e..cde4a0780f 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -20,6 +20,7 @@ import typing
 from typing import (
     Any,
     Callable,
+    ContextManager,
     DefaultDict,
     Dict,
     Iterator,
@@ -33,7 +34,6 @@ from typing import (
 from weakref import WeakSet
 
 from prometheus_client.core import Counter
-from typing_extensions import ContextManager
 
 from twisted.internet import defer
 
diff --git a/synapse/visibility.py b/synapse/visibility.py
index fc71dc92a4..eac10f6438 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -17,6 +17,7 @@ from enum import Enum, auto
 from typing import (
     Collection,
     Dict,
+    Final,
     FrozenSet,
     List,
     Mapping,
@@ -27,7 +28,6 @@ from typing import (
 )
 
 import attr
-from typing_extensions import Final
 
 from synapse.api.constants import EventTypes, HistoryVisibility, Membership
 from synapse.events import EventBase
diff --git a/tests/app/test_phone_stats_home.py b/tests/app/test_phone_stats_home.py
index 9305b758d7..93af614def 100644
--- a/tests/app/test_phone_stats_home.py
+++ b/tests/app/test_phone_stats_home.py
@@ -26,7 +26,7 @@ class PhoneHomeR30V2TestCase(HomeserverTestCase):
     def make_homeserver(
         self, reactor: ThreadedMemoryReactorClock, clock: Clock
     ) -> HomeServer:
-        hs = super(PhoneHomeR30V2TestCase, self).make_homeserver(reactor, clock)
+        hs = super().make_homeserver(reactor, clock)
 
         # We don't want our tests to actually report statistics, so check
         # that it's not enabled
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 7c63b2ea4c..fdfd4f911d 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -312,7 +312,7 @@ class KeyringTestCase(unittest.HomeserverTestCase):
             [("server9", get_key_id(key1))]
         )
         result = self.get_success(d)
-        self.assertEquals(result[("server9", get_key_id(key1))].valid_until_ts, 0)
+        self.assertEqual(result[("server9", get_key_id(key1))].valid_until_ts, 0)
 
     def test_verify_json_dedupes_key_requests(self) -> None:
         """Two requests for the same key should be deduped."""
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index aed2a4c07a..6a0b5fc0bd 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -514,7 +514,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.assertEqual(response.code, 200)
 
         # Send the body
-        request.write('{ "a": 1 }'.encode("ascii"))
+        request.write(b'{ "a": 1 }')
         request.finish()
 
         self.reactor.pump((0.1,))
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index b3310abe1b..fe631d7ecb 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -757,7 +757,7 @@ class ModuleApiTestCase(BaseModuleApiTestCase):
         self.assertEqual(channel.json_body["creator"], user_id)
 
         # Check room alias.
-        self.assertEquals(room_alias, f"#foo-bar:{self.module_api.server_name}")
+        self.assertEqual(room_alias, f"#foo-bar:{self.module_api.server_name}")
 
         # Let's try a room with no alias.
         room_id, room_alias = self.get_success(
diff --git a/tests/replication/test_multi_media_repo.py b/tests/replication/test_multi_media_repo.py
index 1527b4a82d..6e78daa830 100644
--- a/tests/replication/test_multi_media_repo.py
+++ b/tests/replication/test_multi_media_repo.py
@@ -116,7 +116,7 @@ class MediaRepoShardTestCase(BaseMultiWorkerStreamTestCase):
         self.assertEqual(request.method, b"GET")
         self.assertEqual(
             request.path,
-            f"/_matrix/media/r0/download/{target}/{media_id}".encode("utf-8"),
+            f"/_matrix/media/r0/download/{target}/{media_id}".encode(),
         )
         self.assertEqual(
             request.requestHeaders.getRawHeaders(b"host"), [target.encode("utf-8")]
diff --git a/tests/rest/client/test_redactions.py b/tests/rest/client/test_redactions.py
index 180b635ea6..4e0a387bd3 100644
--- a/tests/rest/client/test_redactions.py
+++ b/tests/rest/client/test_redactions.py
@@ -627,8 +627,8 @@ class RedactionsTestCase(HomeserverTestCase):
         redact_event = timeline[-1]
         self.assertEqual(redact_event["type"], EventTypes.Redaction)
         # The redacts key should be in the content and the redacts keys.
-        self.assertEquals(redact_event["content"]["redacts"], event_id)
-        self.assertEquals(redact_event["redacts"], event_id)
+        self.assertEqual(redact_event["content"]["redacts"], event_id)
+        self.assertEqual(redact_event["redacts"], event_id)
 
         # But it isn't actually part of the event.
         def get_event(txn: LoggingTransaction) -> JsonDict:
@@ -642,10 +642,10 @@ class RedactionsTestCase(HomeserverTestCase):
         event_json = self.get_success(
             main_datastore.db_pool.runInteraction("get_event", get_event)
         )
-        self.assertEquals(event_json["type"], EventTypes.Redaction)
+        self.assertEqual(event_json["type"], EventTypes.Redaction)
         if expect_content:
             self.assertNotIn("redacts", event_json)
-            self.assertEquals(event_json["content"]["redacts"], event_id)
+            self.assertEqual(event_json["content"]["redacts"], event_id)
         else:
-            self.assertEquals(event_json["redacts"], event_id)
+            self.assertEqual(event_json["redacts"], event_id)
             self.assertNotIn("redacts", event_json["content"])
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index 75439416c1..9bfe913e45 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -129,7 +129,7 @@ class BaseRelationsTestCase(unittest.HomeserverTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         return [ev["event_id"] for ev in channel.json_body["chunk"]]
 
     def _get_bundled_aggregations(self) -> JsonDict:
@@ -142,7 +142,7 @@ class BaseRelationsTestCase(unittest.HomeserverTestCase):
             f"/_matrix/client/v3/rooms/{self.room}/event/{self.parent_id}",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         return channel.json_body["unsigned"].get("m.relations", {})
 
     def _find_event_in_chunk(self, events: List[JsonDict]) -> JsonDict:
@@ -1602,7 +1602,7 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         threads = channel.json_body["chunk"]
         return [
             (
@@ -1634,7 +1634,7 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         ##################################################
         # Check the test data is configured as expected. #
         ##################################################
-        self.assertEquals(self._get_related_events(), list(reversed(thread_replies)))
+        self.assertEqual(self._get_related_events(), list(reversed(thread_replies)))
         relations = self._get_bundled_aggregations()
         self.assertDictContainsSubset(
             {"count": 3, "current_user_participated": True},
@@ -1655,7 +1655,7 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         self._redact(thread_replies.pop())
 
         # The thread should still exist, but the latest event should be updated.
-        self.assertEquals(self._get_related_events(), list(reversed(thread_replies)))
+        self.assertEqual(self._get_related_events(), list(reversed(thread_replies)))
         relations = self._get_bundled_aggregations()
         self.assertDictContainsSubset(
             {"count": 2, "current_user_participated": True},
@@ -1674,7 +1674,7 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         self._redact(thread_replies.pop(0))
 
         # Nothing should have changed (except the thread count).
-        self.assertEquals(self._get_related_events(), thread_replies)
+        self.assertEqual(self._get_related_events(), thread_replies)
         relations = self._get_bundled_aggregations()
         self.assertDictContainsSubset(
             {"count": 1, "current_user_participated": True},
@@ -1691,11 +1691,11 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         # Redact the last remaining event. #
         ####################################
         self._redact(thread_replies.pop(0))
-        self.assertEquals(thread_replies, [])
+        self.assertEqual(thread_replies, [])
 
         # The event should no longer be considered a thread.
-        self.assertEquals(self._get_related_events(), [])
-        self.assertEquals(self._get_bundled_aggregations(), {})
+        self.assertEqual(self._get_related_events(), [])
+        self.assertEqual(self._get_bundled_aggregations(), {})
         self.assertEqual(self._get_threads(), [])
 
     def test_redact_parent_edit(self) -> None:
@@ -1749,8 +1749,8 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         # The relations are returned.
         event_ids = self._get_related_events()
         relations = self._get_bundled_aggregations()
-        self.assertEquals(event_ids, [related_event_id])
-        self.assertEquals(
+        self.assertEqual(event_ids, [related_event_id])
+        self.assertEqual(
             relations[RelationTypes.REFERENCE],
             {"chunk": [{"event_id": related_event_id}]},
         )
@@ -1772,7 +1772,7 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         # The unredacted relation should still exist.
         event_ids = self._get_related_events()
         relations = self._get_bundled_aggregations()
-        self.assertEquals(len(event_ids), 1)
+        self.assertEqual(len(event_ids), 1)
         self.assertDictContainsSubset(
             {
                 "count": 1,
@@ -1816,7 +1816,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         threads = self._get_threads(channel.json_body)
         self.assertEqual(threads, [(thread_2, reply_2), (thread_1, reply_1)])
 
@@ -1829,7 +1829,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         # Tuple of (thread ID, latest event ID) for each thread.
         threads = self._get_threads(channel.json_body)
         self.assertEqual(threads, [(thread_1, reply_3), (thread_2, reply_2)])
@@ -1850,7 +1850,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/threads?limit=1",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
         self.assertEqual(thread_roots, [thread_2])
 
@@ -1864,7 +1864,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/threads?limit=1&from={next_batch}",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
         self.assertEqual(thread_roots, [thread_1], channel.json_body)
 
@@ -1899,7 +1899,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
         self.assertEqual(
             thread_roots, [thread_3, thread_2, thread_1], channel.json_body
@@ -1911,7 +1911,7 @@ class ThreadsTestCase(BaseRelationsTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/threads?include=participated",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
         self.assertEqual(thread_roots, [thread_2, thread_1], channel.json_body)
 
@@ -1943,6 +1943,6 @@ class ThreadsTestCase(BaseRelationsTestCase):
             f"/_matrix/client/v1/rooms/{self.room}/threads",
             access_token=self.user_token,
         )
-        self.assertEquals(200, channel.code, channel.json_body)
+        self.assertEqual(200, channel.code, channel.json_body)
         thread_roots = [ev["event_id"] for ev in channel.json_body["chunk"]]
         self.assertEqual(thread_roots, [thread_1], channel.json_body)
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 4f6347be15..88e579dc39 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -1362,7 +1362,7 @@ class RoomAppserviceTsParamTestCase(unittest.HomeserverTestCase):
 
         # Ensure the event was persisted with the correct timestamp.
         res = self.get_success(self.main_store.get_event(event_id))
-        self.assertEquals(ts, res.origin_server_ts)
+        self.assertEqual(ts, res.origin_server_ts)
 
     def test_send_state_event_ts(self) -> None:
         """Test sending a state event with a custom timestamp."""
@@ -1384,7 +1384,7 @@ class RoomAppserviceTsParamTestCase(unittest.HomeserverTestCase):
 
         # Ensure the event was persisted with the correct timestamp.
         res = self.get_success(self.main_store.get_event(event_id))
-        self.assertEquals(ts, res.origin_server_ts)
+        self.assertEqual(ts, res.origin_server_ts)
 
     def test_send_membership_event_ts(self) -> None:
         """Test sending a membership event with a custom timestamp."""
@@ -1406,7 +1406,7 @@ class RoomAppserviceTsParamTestCase(unittest.HomeserverTestCase):
 
         # Ensure the event was persisted with the correct timestamp.
         res = self.get_success(self.main_store.get_event(event_id))
-        self.assertEquals(ts, res.origin_server_ts)
+        self.assertEqual(ts, res.origin_server_ts)
 
 
 class RoomJoinRatelimitTestCase(RoomBase):
diff --git a/tests/server.py b/tests/server.py
index c84a524e8c..481fe34c5c 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -26,6 +26,7 @@ from typing import (
     Any,
     Awaitable,
     Callable,
+    Deque,
     Dict,
     Iterable,
     List,
@@ -41,7 +42,7 @@ from typing import (
 from unittest.mock import Mock
 
 import attr
-from typing_extensions import Deque, ParamSpec
+from typing_extensions import ParamSpec
 from zope.interface import implementer
 
 from twisted.internet import address, threads, udp
diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py
index 5e1324a169..71302facd1 100644
--- a/tests/storage/test_appservice.py
+++ b/tests/storage/test_appservice.py
@@ -40,7 +40,7 @@ from tests.test_utils import make_awaitable
 
 class ApplicationServiceStoreTestCase(unittest.HomeserverTestCase):
     def setUp(self) -> None:
-        super(ApplicationServiceStoreTestCase, self).setUp()
+        super().setUp()
 
         self.as_yaml_files: List[str] = []
 
@@ -71,7 +71,7 @@ class ApplicationServiceStoreTestCase(unittest.HomeserverTestCase):
             except Exception:
                 pass
 
-        super(ApplicationServiceStoreTestCase, self).tearDown()
+        super().tearDown()
 
     def _add_appservice(
         self, as_token: str, id: str, url: str, hs_token: str, sender: str
@@ -110,7 +110,7 @@ class ApplicationServiceStoreTestCase(unittest.HomeserverTestCase):
 
 class ApplicationServiceTransactionStoreTestCase(unittest.HomeserverTestCase):
     def setUp(self) -> None:
-        super(ApplicationServiceTransactionStoreTestCase, self).setUp()
+        super().setUp()
         self.as_yaml_files: List[str] = []
 
         self.hs.config.appservice.app_service_config_files = self.as_yaml_files
diff --git a/tests/storage/test_main.py b/tests/storage/test_main.py
index 27f450e22d..b8823d6993 100644
--- a/tests/storage/test_main.py
+++ b/tests/storage/test_main.py
@@ -20,7 +20,7 @@ from tests import unittest
 
 class DataStoreTestCase(unittest.HomeserverTestCase):
     def setUp(self) -> None:
-        super(DataStoreTestCase, self).setUp()
+        super().setUp()
 
         self.store = self.hs.get_datastores().main
 
diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py
index f183c38477..52ffa91c81 100644
--- a/tests/storage/test_room_search.py
+++ b/tests/storage/test_room_search.py
@@ -318,14 +318,14 @@ class MessageSearchTest(HomeserverTestCase):
             result = self.get_success(
                 store.search_msgs([self.room_id], query, ["content.body"])
             )
-            self.assertEquals(
+            self.assertEqual(
                 result["count"],
                 1 if expect_to_contain else 0,
                 f"expected '{query}' to match '{self.PHRASE}'"
                 if expect_to_contain
                 else f"'{query}' unexpectedly matched '{self.PHRASE}'",
             )
-            self.assertEquals(
+            self.assertEqual(
                 len(result["results"]),
                 1 if expect_to_contain else 0,
                 "results array length should match count",
@@ -336,14 +336,14 @@ class MessageSearchTest(HomeserverTestCase):
             result = self.get_success(
                 store.search_rooms([self.room_id], query, ["content.body"], 10)
             )
-            self.assertEquals(
+            self.assertEqual(
                 result["count"],
                 1 if expect_to_contain else 0,
                 f"expected '{query}' to match '{self.PHRASE}'"
                 if expect_to_contain
                 else f"'{query}' unexpectedly matched '{self.PHRASE}'",
             )
-            self.assertEquals(
+            self.assertEqual(
                 len(result["results"]),
                 1 if expect_to_contain else 0,
                 "results array length should match count",
diff --git a/tests/test_visibility.py b/tests/test_visibility.py
index 9ed330f554..a46c29ddf4 100644
--- a/tests/test_visibility.py
+++ b/tests/test_visibility.py
@@ -31,7 +31,7 @@ TEST_ROOM_ID = "!TEST:ROOM"
 
 class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
     def setUp(self) -> None:
-        super(FilterEventsForServerTestCase, self).setUp()
+        super().setUp()
         self.event_creation_handler = self.hs.get_event_creation_handler()
         self.event_builder_factory = self.hs.get_event_builder_factory()
         self._storage_controllers = self.hs.get_storage_controllers()
-- 
cgit 1.5.1


From 47c629bb27c0a479068ed5da184dffe7a6cb0fca Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 15 Aug 2023 17:07:13 +0100
Subject: Attempt to fix twisted trunk (#16115)

---
 .github/workflows/twisted_trunk.yml | 5 ++++-
 changelog.d/16115.misc              | 1 +
 mypy.ini                            | 7 +++++++
 synapse/util/manhole.py             | 4 +++-
 4 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16115.misc

(limited to 'synapse')

diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml
index f7a4ee7c13..67ccc03f6e 100644
--- a/.github/workflows/twisted_trunk.yml
+++ b/.github/workflows/twisted_trunk.yml
@@ -5,6 +5,9 @@ on:
     - cron: 0 8 * * *
 
   workflow_dispatch:
+    # NB: inputs are only present when this workflow is dispatched manually.
+    # (The default below is the default field value in the form to trigger
+    # a manual dispatch). Otherwise the inputs will evaluate to null.
     inputs:
       twisted_ref:
         description: Commit, branch or tag to checkout from upstream Twisted.
@@ -49,7 +52,7 @@ jobs:
           extras: "all"
       - run: |
           poetry remove twisted
-          poetry add --extras tls git+https://github.com/twisted/twisted.git#${{ inputs.twisted_ref }}
+          poetry add --extras tls git+https://github.com/twisted/twisted.git#${{ inputs.twisted_ref || 'trunk' }}
           poetry install --no-interaction --extras "all test"
       - name: Remove warn_unused_ignores from mypy config
         run: sed '/warn_unused_ignores = True/d' -i mypy.ini
diff --git a/changelog.d/16115.misc b/changelog.d/16115.misc
new file mode 100644
index 0000000000..f325d2a31d
--- /dev/null
+++ b/changelog.d/16115.misc
@@ -0,0 +1 @@
+Attempt to fix the twisted trunk job.
diff --git a/mypy.ini b/mypy.ini
index 1038b7d8c7..311a951aa8 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -45,6 +45,13 @@ warn_unused_ignores = False
 disallow_untyped_defs = False
 disallow_incomplete_defs = False
 
+[mypy-synapse.util.manhole]
+# This module imports something from Twisted which has a bad annotation in Twisted trunk,
+# but is unannotated in Twisted's latest release. We want to type-ignore the problem 
+# in the twisted trunk job, even though it has no effect on normal mypy runs.
+warn_unused_ignores = False
+
+
 ;; Dependencies without annotations
 ;; Before ignoring a module, check to see if type stubs are available.
 ;; The `typeshed` project maintains stubs here:
diff --git a/synapse/util/manhole.py b/synapse/util/manhole.py
index 48b8195ca1..8cb766860e 100644
--- a/synapse/util/manhole.py
+++ b/synapse/util/manhole.py
@@ -98,7 +98,9 @@ def manhole(settings: ManholeConfig, globals: Dict[str, Any]) -> ServerFactory:
         SynapseManhole, dict(globals, __name__="__console__")
     )
 
-    factory = manhole_ssh.ConchFactory(portal.Portal(rlm, [checker]))
+    # type-ignore: This is an error in Twisted's annotations. See
+    # https://github.com/twisted/twisted/issues/11812 and /11813 .
+    factory = manhole_ssh.ConchFactory(portal.Portal(rlm, [checker]))  # type: ignore[arg-type]
 
     # conch has the wrong type on these dicts (says bytes to bytes,
     # should be bytes to Keys judging by how it's used).
-- 
cgit 1.5.1


From 8c3bcea2da4939e21a99f72d6c3995186bc4b80d Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 16 Aug 2023 16:19:54 +0200
Subject: Rename pagination&purge locks and add comments explaining them
 (#16112)

---
 changelog.d/16112.misc                           |  1 +
 synapse/federation/federation_server.py          |  4 ++--
 synapse/handlers/message.py                      |  6 +++---
 synapse/handlers/pagination.py                   | 19 ++++++++++++-------
 synapse/handlers/room_member.py                  |  4 ++--
 synapse/handlers/worker_lock.py                  |  6 +++++-
 synapse/rest/client/room_upgrade_rest_servlet.py |  4 ++--
 synapse/storage/controllers/persist_events.py    |  4 ++--
 8 files changed, 29 insertions(+), 19 deletions(-)
 create mode 100644 changelog.d/16112.misc

(limited to 'synapse')

diff --git a/changelog.d/16112.misc b/changelog.d/16112.misc
new file mode 100644
index 0000000000..05a58c1348
--- /dev/null
+++ b/changelog.d/16112.misc
@@ -0,0 +1 @@
+Rename pagination and purge locks and add comments to explain why they exist and how they work.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index a90d99c4d6..f9915e5a3f 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -63,7 +63,7 @@ from synapse.federation.federation_base import (
 )
 from synapse.federation.persistence import TransactionActions
 from synapse.federation.units import Edu, Transaction
-from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
+from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.context import (
     make_deferred_yieldable,
@@ -1245,7 +1245,7 @@ class FederationServer(FederationBase):
                         # while holding the `_INBOUND_EVENT_HANDLING_LOCK_NAME`
                         # lock.
                         async with self._worker_lock_handler.acquire_read_write_lock(
-                            DELETE_ROOM_LOCK_NAME, room_id, write=False
+                            NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False
                         ):
                             await self._federation_event_handler.on_receive_pdu(
                                 origin, event
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index d485f21e49..a74db1dccf 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -53,7 +53,7 @@ from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
 from synapse.events.utils import SerializeEventConfig, maybe_upsert_event_field
 from synapse.events.validator import EventValidator
 from synapse.handlers.directory import DirectoryHandler
-from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
+from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.logging import opentracing
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.metrics.background_process_metrics import run_as_background_process
@@ -1034,7 +1034,7 @@ class EventCreationHandler:
                     )
 
         async with self._worker_lock_handler.acquire_read_write_lock(
-            DELETE_ROOM_LOCK_NAME, room_id, write=False
+            NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False
         ):
             return await self._create_and_send_nonmember_event_locked(
                 requester=requester,
@@ -1978,7 +1978,7 @@ class EventCreationHandler:
 
         for room_id in room_ids:
             async with self._worker_lock_handler.acquire_read_write_lock(
-                DELETE_ROOM_LOCK_NAME, room_id, write=False
+                NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False
             ):
                 dummy_event_sent = await self._send_dummy_event_for_room(room_id)
 
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index da34658470..1be6ebc6d9 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -24,6 +24,7 @@ from synapse.api.errors import SynapseError
 from synapse.api.filtering import Filter
 from synapse.events.utils import SerializeEventConfig
 from synapse.handlers.room import ShutdownRoomResponse
+from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.logging.opentracing import trace
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.rest.admin._base import assert_user_is_admin
@@ -46,9 +47,10 @@ logger = logging.getLogger(__name__)
 BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3
 
 
-PURGE_HISTORY_LOCK_NAME = "purge_history_lock"
-
-DELETE_ROOM_LOCK_NAME = "delete_room_lock"
+# This is used to avoid purging a room several time at the same moment,
+# and also paginating during a purge. Pagination can trigger backfill,
+# which would create old events locally, and would potentially clash with the room delete.
+PURGE_PAGINATION_LOCK_NAME = "purge_pagination_lock"
 
 
 @attr.s(slots=True, auto_attribs=True)
@@ -363,7 +365,7 @@ class PaginationHandler:
         self._purges_in_progress_by_room.add(room_id)
         try:
             async with self._worker_locks.acquire_read_write_lock(
-                PURGE_HISTORY_LOCK_NAME, room_id, write=True
+                PURGE_PAGINATION_LOCK_NAME, room_id, write=True
             ):
                 await self._storage_controllers.purge_events.purge_history(
                     room_id, token, delete_local_events
@@ -421,7 +423,10 @@ class PaginationHandler:
             force: set true to skip checking for joined users.
         """
         async with self._worker_locks.acquire_multi_read_write_lock(
-            [(PURGE_HISTORY_LOCK_NAME, room_id), (DELETE_ROOM_LOCK_NAME, room_id)],
+            [
+                (PURGE_PAGINATION_LOCK_NAME, room_id),
+                (NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id),
+            ],
             write=True,
         ):
             # first check that we have no users in this room
@@ -483,7 +488,7 @@ class PaginationHandler:
         room_token = from_token.room_key
 
         async with self._worker_locks.acquire_read_write_lock(
-            PURGE_HISTORY_LOCK_NAME, room_id, write=False
+            PURGE_PAGINATION_LOCK_NAME, room_id, write=False
         ):
             (membership, member_event_id) = (None, None)
             if not use_admin_priviledge:
@@ -761,7 +766,7 @@ class PaginationHandler:
         self._purges_in_progress_by_room.add(room_id)
         try:
             async with self._worker_locks.acquire_read_write_lock(
-                PURGE_HISTORY_LOCK_NAME, room_id, write=True
+                PURGE_PAGINATION_LOCK_NAME, room_id, write=True
             ):
                 self._delete_by_id[delete_id].status = DeleteStatus.STATUS_SHUTTING_DOWN
                 self._delete_by_id[
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index e3cdf2bc61..1d8d4a72e7 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -39,7 +39,7 @@ from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
 from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN
 from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler
-from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
+from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.logging import opentracing
 from synapse.metrics import event_processing_positions
 from synapse.metrics.background_process_metrics import run_as_background_process
@@ -621,7 +621,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         async with self.member_as_limiter.queue(as_id):
             async with self.member_linearizer.queue(key):
                 async with self._worker_lock_handler.acquire_read_write_lock(
-                    DELETE_ROOM_LOCK_NAME, room_id, write=False
+                    NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False
                 ):
                     with opentracing.start_active_span("update_membership_locked"):
                         result = await self.update_membership_locked(
diff --git a/synapse/handlers/worker_lock.py b/synapse/handlers/worker_lock.py
index 72df773a86..58efe7116b 100644
--- a/synapse/handlers/worker_lock.py
+++ b/synapse/handlers/worker_lock.py
@@ -42,7 +42,11 @@ if TYPE_CHECKING:
     from synapse.server import HomeServer
 
 
-DELETE_ROOM_LOCK_NAME = "delete_room_lock"
+# This lock is used to avoid creating an event while we are purging the room.
+# We take a read lock when creating an event, and a write one when purging a room.
+# This is because it is fine to create several events concurrently, since referenced events
+# will not disappear under our feet as long as we don't delete the room.
+NEW_EVENT_DURING_PURGE_LOCK_NAME = "new_event_during_purge_lock"
 
 
 class WorkerLocksHandler:
diff --git a/synapse/rest/client/room_upgrade_rest_servlet.py b/synapse/rest/client/room_upgrade_rest_servlet.py
index 4a5d9e13e7..b1f6b5d1b7 100644
--- a/synapse/rest/client/room_upgrade_rest_servlet.py
+++ b/synapse/rest/client/room_upgrade_rest_servlet.py
@@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Tuple
 
 from synapse.api.errors import Codes, ShadowBanError, SynapseError
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
-from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
+from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     RestServlet,
@@ -81,7 +81,7 @@ class RoomUpgradeRestServlet(RestServlet):
 
         try:
             async with self._worker_lock_handler.acquire_read_write_lock(
-                DELETE_ROOM_LOCK_NAME, room_id, write=False
+                NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False
             ):
                 new_room_id = await self._room_creation_handler.upgrade_room(
                     requester, room_id, new_version
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 35cd1089d6..abd1d149db 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -45,7 +45,7 @@ from twisted.internet import defer
 from synapse.api.constants import EventTypes, Membership
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
-from synapse.handlers.worker_lock import DELETE_ROOM_LOCK_NAME
+from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
 from synapse.logging.opentracing import (
     SynapseTags,
@@ -357,7 +357,7 @@ class EventsPersistenceStorageController:
         # it. We might already have taken out the lock, but since this is just a
         # "read" lock its inherently reentrant.
         async with self.hs.get_worker_locks_handler().acquire_read_write_lock(
-            DELETE_ROOM_LOCK_NAME, room_id, write=False
+            NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False
         ):
             if isinstance(task, _PersistEventsTask):
                 return await self._persist_event_batch(room_id, task)
-- 
cgit 1.5.1


From 8a4fb7a6baf32a35c24056474d98fed525522b78 Mon Sep 17 00:00:00 2001
From: Matthew Ma <mtthman@amazon.com>
Date: Thu, 17 Aug 2023 02:22:50 -0700
Subject: Disable caching in /sync corner case (#16080)

Fixes #15502
---
 changelog.d/16080.bugfix |  1 +
 synapse/handlers/sync.py | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/16080.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16080.bugfix b/changelog.d/16080.bugfix
new file mode 100644
index 0000000000..1ad6fb3c52
--- /dev/null
+++ b/changelog.d/16080.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bu in `/sync` where timeout=0 does not skip caching, resulting in slow calls in cases where there are no new changes. Contributed by @PlasmaIntec.
\ No newline at end of file
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 8174248387..60a9f341b5 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -387,16 +387,16 @@ class SyncHandler:
                 from_token=since_token,
             )
 
-            # if nothing has happened in any of the users' rooms since /sync was called,
-            # the resultant next_batch will be the same as since_token (since the result
-            # is generated when wait_for_events is first called, and not regenerated
-            # when wait_for_events times out).
-            #
-            # If that happens, we mustn't cache it, so that when the client comes back
-            # with the same cache token, we don't immediately return the same empty
-            # result, causing a tightloop. (#8518)
-            if result.next_batch == since_token:
-                cache_context.should_cache = False
+        # if nothing has happened in any of the users' rooms since /sync was called,
+        # the resultant next_batch will be the same as since_token (since the result
+        # is generated when wait_for_events is first called, and not regenerated
+        # when wait_for_events times out).
+        #
+        # If that happens, we mustn't cache it, so that when the client comes back
+        # with the same cache token, we don't immediately return the same empty
+        # result, causing a tightloop. (#8518)
+        if result.next_batch == since_token:
+            cache_context.should_cache = False
 
         if result:
             if sync_config.filter_collection.lazy_load_members():
-- 
cgit 1.5.1


From 0377cb4fab27c717bc75ea27225c950b0215c152 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Thu, 17 Aug 2023 02:30:02 -0700
Subject: Override global statement timeout when creating indexes in Postgres
 (#16085)

---
 changelog.d/16085.misc                |  1 +
 synapse/storage/background_updates.py | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 changelog.d/16085.misc

(limited to 'synapse')

diff --git a/changelog.d/16085.misc b/changelog.d/16085.misc
new file mode 100644
index 0000000000..7b7a95edd4
--- /dev/null
+++ b/changelog.d/16085.misc
@@ -0,0 +1 @@
+Override global statement timeout when creating indexes in Postgres.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 2d5ddc3e7b..ddca0af1da 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -238,6 +238,7 @@ class BackgroundUpdater:
     def __init__(self, hs: "HomeServer", database: "DatabasePool"):
         self._clock = hs.get_clock()
         self.db_pool = database
+        self.hs = hs
 
         self._database_name = database.name()
 
@@ -758,6 +759,11 @@ class BackgroundUpdater:
                 logger.debug("[SQL] %s", sql)
                 c.execute(sql)
 
+                # override the global statement timeout to avoid accidentally squashing
+                # a long-running index creation process
+                timeout_sql = "SET SESSION statement_timeout = 0"
+                c.execute(timeout_sql)
+
                 sql = (
                     "CREATE %(unique)s INDEX CONCURRENTLY %(name)s"
                     " ON %(table)s"
@@ -778,6 +784,12 @@ class BackgroundUpdater:
                     logger.debug("[SQL] %s", sql)
                     c.execute(sql)
             finally:
+                # mypy ignore - `statement_timeout` is defined on PostgresEngine
+                # reset the global timeout to the default
+                default_timeout = self.db_pool.engine.statement_timeout  # type: ignore[attr-defined]
+                undo_timeout_sql = f"SET statement_timeout = {default_timeout}"
+                conn.cursor().execute(undo_timeout_sql)
+
                 conn.set_session(autocommit=False)  # type: ignore
 
         def create_index_sqlite(conn: Connection) -> None:
-- 
cgit 1.5.1


From eb0dbab15b119eab7721bc03ac1cfc7f6b638bb3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Thu, 17 Aug 2023 14:07:57 +0100
Subject: Fix database performance of read/write worker locks (#16061)

We were seeing serialization errors when taking out multiple read locks.

The transactions were retried, so isn't causing any failures.

Introduced in #15782.
---
 changelog.d/16061.misc                    |  1 +
 synapse/storage/databases/main/lock.py    | 87 +++++++++++++------------------
 tests/storage/databases/main/test_lock.py |  7 +--
 3 files changed, 40 insertions(+), 55 deletions(-)
 create mode 100644 changelog.d/16061.misc

(limited to 'synapse')

diff --git a/changelog.d/16061.misc b/changelog.d/16061.misc
new file mode 100644
index 0000000000..37928b670f
--- /dev/null
+++ b/changelog.d/16061.misc
@@ -0,0 +1 @@
+Fix database performance of read/write worker locks.
diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py
index 1680bf6168..54d40e7a3a 100644
--- a/synapse/storage/databases/main/lock.py
+++ b/synapse/storage/databases/main/lock.py
@@ -26,7 +26,6 @@ from synapse.storage.database import (
     LoggingDatabaseConnection,
     LoggingTransaction,
 )
-from synapse.storage.engines import PostgresEngine
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
@@ -96,6 +95,10 @@ class LockStore(SQLBaseStore):
 
         self._acquiring_locks: Set[Tuple[str, str]] = set()
 
+        self._clock.looping_call(
+            self._reap_stale_read_write_locks, _LOCK_TIMEOUT_MS / 10.0
+        )
+
     @wrap_as_background_process("LockStore._on_shutdown")
     async def _on_shutdown(self) -> None:
         """Called when the server is shutting down"""
@@ -216,6 +219,7 @@ class LockStore(SQLBaseStore):
                 lock_name,
                 lock_key,
                 write,
+                db_autocommit=True,
             )
         except self.database_engine.module.IntegrityError:
             return None
@@ -233,61 +237,22 @@ class LockStore(SQLBaseStore):
         # `worker_read_write_locks` and seeing if that fails any
         # constraints. If it doesn't then we have acquired the lock,
         # otherwise we haven't.
-        #
-        # Before that though we clear the table of any stale locks.
 
         now = self._clock.time_msec()
         token = random_string(6)
 
-        delete_sql = """
-            DELETE FROM worker_read_write_locks
-                WHERE last_renewed_ts < ? AND lock_name = ? AND lock_key = ?;
-        """
-
-        insert_sql = """
-            INSERT INTO worker_read_write_locks (lock_name, lock_key, write_lock, instance_name, token, last_renewed_ts)
-            VALUES (?, ?, ?, ?, ?, ?)
-        """
-
-        if isinstance(self.database_engine, PostgresEngine):
-            # For Postgres we can send these queries at the same time.
-            txn.execute(
-                delete_sql + ";" + insert_sql,
-                (
-                    # DELETE args
-                    now - _LOCK_TIMEOUT_MS,
-                    lock_name,
-                    lock_key,
-                    # UPSERT args
-                    lock_name,
-                    lock_key,
-                    write,
-                    self._instance_name,
-                    token,
-                    now,
-                ),
-            )
-        else:
-            # For SQLite these need to be two queries.
-            txn.execute(
-                delete_sql,
-                (
-                    now - _LOCK_TIMEOUT_MS,
-                    lock_name,
-                    lock_key,
-                ),
-            )
-            txn.execute(
-                insert_sql,
-                (
-                    lock_name,
-                    lock_key,
-                    write,
-                    self._instance_name,
-                    token,
-                    now,
-                ),
-            )
+        self.db_pool.simple_insert_txn(
+            txn,
+            table="worker_read_write_locks",
+            values={
+                "lock_name": lock_name,
+                "lock_key": lock_key,
+                "write_lock": write,
+                "instance_name": self._instance_name,
+                "token": token,
+                "last_renewed_ts": now,
+            },
+        )
 
         lock = Lock(
             self._reactor,
@@ -351,6 +316,24 @@ class LockStore(SQLBaseStore):
 
         return locks
 
+    @wrap_as_background_process("_reap_stale_read_write_locks")
+    async def _reap_stale_read_write_locks(self) -> None:
+        delete_sql = """
+            DELETE FROM worker_read_write_locks
+                WHERE last_renewed_ts < ?
+        """
+
+        def reap_stale_read_write_locks_txn(txn: LoggingTransaction) -> None:
+            txn.execute(delete_sql, (self._clock.time_msec() - _LOCK_TIMEOUT_MS,))
+            if txn.rowcount:
+                logger.info("Reaped %d stale locks", txn.rowcount)
+
+        await self.db_pool.runInteraction(
+            "_reap_stale_read_write_locks",
+            reap_stale_read_write_locks_txn,
+            db_autocommit=True,
+        )
+
 
 class Lock:
     """An async context manager that manages an acquired lock, ensuring it is
diff --git a/tests/storage/databases/main/test_lock.py b/tests/storage/databases/main/test_lock.py
index 383da83dfb..f541f1d6be 100644
--- a/tests/storage/databases/main/test_lock.py
+++ b/tests/storage/databases/main/test_lock.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 from twisted.internet import defer, reactor
 from twisted.internet.base import ReactorBase
 from twisted.internet.defer import Deferred
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.server import HomeServer
-from synapse.storage.databases.main.lock import _LOCK_TIMEOUT_MS
+from synapse.storage.databases.main.lock import _LOCK_TIMEOUT_MS, _RENEWAL_INTERVAL_MS
 from synapse.util import Clock
 
 from tests import unittest
@@ -380,8 +381,8 @@ class ReadWriteLockTestCase(unittest.HomeserverTestCase):
         self.get_success(lock.__aenter__())
 
         # Wait for ages with the lock, we should not be able to get the lock.
-        self.reactor.advance(5 * _LOCK_TIMEOUT_MS / 1000)
-        self.pump()
+        for _ in range(0, 10):
+            self.reactor.advance((_RENEWAL_INTERVAL_MS / 1000))
 
         lock2 = self.get_success(
             self.store.try_acquire_read_write_lock("name", "key", write=True)
-- 
cgit 1.5.1


From 54a51ff6c1a1d9c7174e239acdd2dee7bed744f7 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Thu, 17 Aug 2023 10:53:10 -0700
Subject: Cache token introspection response from OIDC provider (#16117)

---
 changelog.d/16117.misc                  |  1 +
 synapse/api/auth/msc3861_delegated.py   | 40 +++++++++++++++++++--
 tests/handlers/test_oauth_delegation.py | 62 +++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16117.misc

(limited to 'synapse')

diff --git a/changelog.d/16117.misc b/changelog.d/16117.misc
new file mode 100644
index 0000000000..f33fa6dc17
--- /dev/null
+++ b/changelog.d/16117.misc
@@ -0,0 +1 @@
+Cache token introspection response from OIDC provider.
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 9524102a30..3a516093f5 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -39,6 +39,7 @@ from synapse.logging.context import make_deferred_yieldable
 from synapse.types import Requester, UserID, create_requester
 from synapse.util import json_decoder
 from synapse.util.caches.cached_call import RetryOnExceptionCachedCall
+from synapse.util.caches.expiringcache import ExpiringCache
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -106,6 +107,14 @@ class MSC3861DelegatedAuth(BaseAuth):
 
         self._issuer_metadata = RetryOnExceptionCachedCall(self._load_metadata)
 
+        self._clock = hs.get_clock()
+        self._token_cache: ExpiringCache[str, IntrospectionToken] = ExpiringCache(
+            cache_name="introspection_token_cache",
+            clock=self._clock,
+            max_len=10000,
+            expiry_ms=5 * 60 * 1000,
+        )
+
         if isinstance(auth_method, PrivateKeyJWTWithKid):
             # Use the JWK as the client secret when using the private_key_jwt method
             assert self._config.jwk, "No JWK provided"
@@ -144,6 +153,20 @@ class MSC3861DelegatedAuth(BaseAuth):
         Returns:
             The introspection response
         """
+        # check the cache before doing a request
+        introspection_token = self._token_cache.get(token, None)
+
+        if introspection_token:
+            # check the expiration field of the token (if it exists)
+            exp = introspection_token.get("exp", None)
+            if exp:
+                time_now = self._clock.time()
+                expired = time_now > exp
+                if not expired:
+                    return introspection_token
+            else:
+                return introspection_token
+
         metadata = await self._issuer_metadata.get()
         introspection_endpoint = metadata.get("introspection_endpoint")
         raw_headers: Dict[str, str] = {
@@ -157,7 +180,10 @@ class MSC3861DelegatedAuth(BaseAuth):
 
         # Fill the body/headers with credentials
         uri, raw_headers, body = self._client_auth.prepare(
-            method="POST", uri=introspection_endpoint, headers=raw_headers, body=body
+            method="POST",
+            uri=introspection_endpoint,
+            headers=raw_headers,
+            body=body,
         )
         headers = Headers({k: [v] for (k, v) in raw_headers.items()})
 
@@ -187,7 +213,17 @@ class MSC3861DelegatedAuth(BaseAuth):
                 "The introspection endpoint returned an invalid JSON response."
             )
 
-        return IntrospectionToken(**resp)
+        expiration = resp.get("exp", None)
+        if expiration:
+            if self._clock.time() > expiration:
+                raise InvalidClientTokenError("Token is expired.")
+
+        introspection_token = IntrospectionToken(**resp)
+
+        # add token to cache
+        self._token_cache[token] = introspection_token
+
+        return introspection_token
 
     async def is_server_admin(self, requester: Requester) -> bool:
         return "urn:synapse:admin:*" in requester.scope
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index 6309d7b36e..82c26e303f 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -491,6 +491,68 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         error = self.get_failure(self.auth.get_user_by_req(request), SynapseError)
         self.assertEqual(error.value.code, 503)
 
+    def test_introspection_token_cache(self) -> None:
+        access_token = "open_sesame"
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(
+                code=200,
+                payload={"active": "true", "scope": "guest", "jti": access_token},
+            )
+        )
+
+        # first call should cache response
+        # Mpyp ignores below are due to mypy not understanding the dynamic substitution of msc3861 auth code
+        # for regular auth code via the config
+        self.get_success(
+            self.auth._introspect_token(access_token)  # type: ignore[attr-defined]
+        )
+        introspection_token = self.auth._token_cache.get(access_token)  # type: ignore[attr-defined]
+        self.assertEqual(introspection_token["jti"], access_token)
+        # there's been one http request
+        self.http_client.request.assert_called_once()
+
+        # second call should pull from cache, there should still be only one http request
+        token = self.get_success(self.auth._introspect_token(access_token))  # type: ignore[attr-defined]
+        self.http_client.request.assert_called_once()
+        self.assertEqual(token["jti"], access_token)
+
+        # advance past five minutes and check that cache expired - there should be more than one http call now
+        self.reactor.advance(360)
+        token_2 = self.get_success(self.auth._introspect_token(access_token))  # type: ignore[attr-defined]
+        self.assertEqual(self.http_client.request.call_count, 2)
+        self.assertEqual(token_2["jti"], access_token)
+
+        # test that if a cached token is expired, a fresh token will be pulled from authorizing server - first add a
+        # token with a soon-to-expire `exp` field to the cache
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(
+                code=200,
+                payload={
+                    "active": "true",
+                    "scope": "guest",
+                    "jti": "stale",
+                    "exp": self.clock.time() + 100,
+                },
+            )
+        )
+        self.get_success(
+            self.auth._introspect_token("stale")  # type: ignore[attr-defined]
+        )
+        introspection_token = self.auth._token_cache.get("stale")  # type: ignore[attr-defined]
+        self.assertEqual(introspection_token["jti"], "stale")
+        self.assertEqual(self.http_client.request.call_count, 1)
+
+        # advance the reactor past the token expiry but less than the cache expiry
+        self.reactor.advance(120)
+        self.assertEqual(self.auth._token_cache.get("stale"), introspection_token)  # type: ignore[attr-defined]
+
+        # check that the next call causes another http request (which will fail because the token is technically expired
+        # but the important thing is we discard the token from the cache and try the network)
+        self.get_failure(
+            self.auth._introspect_token("stale"), InvalidClientTokenError  # type: ignore[attr-defined]
+        )
+        self.assertEqual(self.http_client.request.call_count, 2)
+
     def make_device_keys(self, user_id: str, device_id: str) -> JsonDict:
         # We only generate a master key to simplify the test.
         master_signing_key = generate_signing_key(device_id)
-- 
cgit 1.5.1


From 0aba4a4eaac778ad75509fe20733b27bfc86fd9d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 18 Aug 2023 11:05:01 +0100
Subject: Add cache to `get_server_keys_json_for_remote` (#16123)

---
 changelog.d/16123.misc                     |   1 +
 synapse/rest/key/v2/remote_key_resource.py |  44 +++++-----
 synapse/storage/databases/main/keys.py     | 132 +++++++++++++++++++----------
 synapse/storage/keys.py                    |   7 ++
 tests/crypto/test_keyring.py               |  61 +++++--------
 5 files changed, 144 insertions(+), 101 deletions(-)
 create mode 100644 changelog.d/16123.misc

(limited to 'synapse')

diff --git a/changelog.d/16123.misc b/changelog.d/16123.misc
new file mode 100644
index 0000000000..b7c6b7c2f2
--- /dev/null
+++ b/changelog.d/16123.misc
@@ -0,0 +1 @@
+Add cache to `get_server_keys_json_for_remote`.
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index 8f3865d412..981fd1f58a 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -14,7 +14,7 @@
 
 import logging
 import re
-from typing import TYPE_CHECKING, Dict, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Dict, Mapping, Optional, Set, Tuple
 
 from signedjson.sign import sign_json
 
@@ -27,6 +27,7 @@ from synapse.http.servlet import (
     parse_integer,
     parse_json_object_from_request,
 )
+from synapse.storage.keys import FetchKeyResultForRemote
 from synapse.types import JsonDict
 from synapse.util import json_decoder
 from synapse.util.async_helpers import yieldable_gather_results
@@ -157,14 +158,22 @@ class RemoteKey(RestServlet):
     ) -> JsonDict:
         logger.info("Handling query for keys %r", query)
 
-        store_queries = []
+        server_keys: Dict[Tuple[str, str], Optional[FetchKeyResultForRemote]] = {}
         for server_name, key_ids in query.items():
-            if not key_ids:
-                key_ids = (None,)
-            for key_id in key_ids:
-                store_queries.append((server_name, key_id, None))
+            if key_ids:
+                results: Mapping[
+                    str, Optional[FetchKeyResultForRemote]
+                ] = await self.store.get_server_keys_json_for_remote(
+                    server_name, key_ids
+                )
+            else:
+                results = await self.store.get_all_server_keys_json_for_remote(
+                    server_name
+                )
 
-        cached = await self.store.get_server_keys_json_for_remote(store_queries)
+            server_keys.update(
+                ((server_name, key_id), res) for key_id, res in results.items()
+            )
 
         json_results: Set[bytes] = set()
 
@@ -173,23 +182,20 @@ class RemoteKey(RestServlet):
         # Map server_name->key_id->int. Note that the value of the int is unused.
         # XXX: why don't we just use a set?
         cache_misses: Dict[str, Dict[str, int]] = {}
-        for (server_name, key_id, _), key_results in cached.items():
-            results = [(result["ts_added_ms"], result) for result in key_results]
-
-            if key_id is None:
+        for (server_name, key_id), key_result in server_keys.items():
+            if not query[server_name]:
                 # all keys were requested. Just return what we have without worrying
                 # about validity
-                for _, result in results:
-                    # Cast to bytes since postgresql returns a memoryview.
-                    json_results.add(bytes(result["key_json"]))
+                if key_result:
+                    json_results.add(key_result.key_json)
                 continue
 
             miss = False
-            if not results:
+            if key_result is None:
                 miss = True
             else:
-                ts_added_ms, most_recent_result = max(results)
-                ts_valid_until_ms = most_recent_result["ts_valid_until_ms"]
+                ts_added_ms = key_result.added_ts
+                ts_valid_until_ms = key_result.valid_until_ts
                 req_key = query.get(server_name, {}).get(key_id, {})
                 req_valid_until = req_key.get("minimum_valid_until_ts")
                 if req_valid_until is not None:
@@ -235,8 +241,8 @@ class RemoteKey(RestServlet):
                         ts_valid_until_ms,
                         time_now_ms,
                     )
-                # Cast to bytes since postgresql returns a memoryview.
-                json_results.add(bytes(most_recent_result["key_json"]))
+
+                json_results.add(key_result.key_json)
 
             if miss and query_remote_on_cache_miss:
                 # only bother attempting to fetch keys from servers on our whitelist
diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
index cea32a034a..a3b4744855 100644
--- a/synapse/storage/databases/main/keys.py
+++ b/synapse/storage/databases/main/keys.py
@@ -16,14 +16,13 @@
 import itertools
 import json
 import logging
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
+from typing import Dict, Iterable, Mapping, Optional, Tuple
 
 from signedjson.key import decode_verify_key_bytes
 from unpaddedbase64 import decode_base64
 
-from synapse.storage._base import SQLBaseStore
-from synapse.storage.database import LoggingTransaction
-from synapse.storage.keys import FetchKeyResult
+from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
+from synapse.storage.keys import FetchKeyResult, FetchKeyResultForRemote
 from synapse.storage.types import Cursor
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.iterutils import batch_iter
@@ -34,7 +33,7 @@ logger = logging.getLogger(__name__)
 db_binary_type = memoryview
 
 
-class KeyStore(SQLBaseStore):
+class KeyStore(CacheInvalidationWorkerStore):
     """Persistence for signature verification keys"""
 
     @cached()
@@ -188,7 +187,12 @@ class KeyStore(SQLBaseStore):
         # invalidate takes a tuple corresponding to the params of
         # _get_server_keys_json. _get_server_keys_json only takes one
         # param, which is itself the 2-tuple (server_name, key_id).
-        self._get_server_keys_json.invalidate(((server_name, key_id),))
+        await self.invalidate_cache_and_stream(
+            "_get_server_keys_json", ((server_name, key_id),)
+        )
+        await self.invalidate_cache_and_stream(
+            "get_server_key_json_for_remote", (server_name, key_id)
+        )
 
     @cached()
     def _get_server_keys_json(
@@ -253,47 +257,87 @@ class KeyStore(SQLBaseStore):
 
         return await self.db_pool.runInteraction("get_server_keys_json", _txn)
 
-    async def get_server_keys_json_for_remote(
-        self, server_keys: Iterable[Tuple[str, Optional[str], Optional[str]]]
-    ) -> Dict[Tuple[str, Optional[str], Optional[str]], List[Dict[str, Any]]]:
-        """Retrieve the key json for a list of server_keys and key ids.
-        If no keys are found for a given server, key_id and source then
-        that server, key_id, and source triplet entry will be an empty list.
-        The JSON is returned as a byte array so that it can be efficiently
-        used in an HTTP response.
+    @cached()
+    def get_server_key_json_for_remote(
+        self,
+        server_name: str,
+        key_id: str,
+    ) -> Optional[FetchKeyResultForRemote]:
+        raise NotImplementedError()
 
-        Args:
-            server_keys: List of (server_name, key_id, source) triplets.
+    @cachedList(
+        cached_method_name="get_server_key_json_for_remote", list_name="key_ids"
+    )
+    async def get_server_keys_json_for_remote(
+        self, server_name: str, key_ids: Iterable[str]
+    ) -> Dict[str, Optional[FetchKeyResultForRemote]]:
+        """Fetch the cached keys for the given server/key IDs.
 
-        Returns:
-            A mapping from (server_name, key_id, source) triplets to a list of dicts
+        If we have multiple entries for a given key ID, returns the most recent.
         """
+        rows = await self.db_pool.simple_select_many_batch(
+            table="server_keys_json",
+            column="key_id",
+            iterable=key_ids,
+            keyvalues={"server_name": server_name},
+            retcols=(
+                "key_id",
+                "from_server",
+                "ts_added_ms",
+                "ts_valid_until_ms",
+                "key_json",
+            ),
+            desc="get_server_keys_json_for_remote",
+        )
 
-        def _get_server_keys_json_txn(
-            txn: LoggingTransaction,
-        ) -> Dict[Tuple[str, Optional[str], Optional[str]], List[Dict[str, Any]]]:
-            results = {}
-            for server_name, key_id, from_server in server_keys:
-                keyvalues = {"server_name": server_name}
-                if key_id is not None:
-                    keyvalues["key_id"] = key_id
-                if from_server is not None:
-                    keyvalues["from_server"] = from_server
-                rows = self.db_pool.simple_select_list_txn(
-                    txn,
-                    "server_keys_json",
-                    keyvalues=keyvalues,
-                    retcols=(
-                        "key_id",
-                        "from_server",
-                        "ts_added_ms",
-                        "ts_valid_until_ms",
-                        "key_json",
-                    ),
-                )
-                results[(server_name, key_id, from_server)] = rows
-            return results
+        if not rows:
+            return {}
+
+        # We sort the rows so that the most recently added entry is picked up.
+        rows.sort(key=lambda r: r["ts_added_ms"])
+
+        return {
+            row["key_id"]: FetchKeyResultForRemote(
+                # Cast to bytes since postgresql returns a memoryview.
+                key_json=bytes(row["key_json"]),
+                valid_until_ts=row["ts_valid_until_ms"],
+                added_ts=row["ts_added_ms"],
+            )
+            for row in rows
+        }
 
-        return await self.db_pool.runInteraction(
-            "get_server_keys_json", _get_server_keys_json_txn
+    async def get_all_server_keys_json_for_remote(
+        self,
+        server_name: str,
+    ) -> Dict[str, FetchKeyResultForRemote]:
+        """Fetch the cached keys for the given server.
+
+        If we have multiple entries for a given key ID, returns the most recent.
+        """
+        rows = await self.db_pool.simple_select_list(
+            table="server_keys_json",
+            keyvalues={"server_name": server_name},
+            retcols=(
+                "key_id",
+                "from_server",
+                "ts_added_ms",
+                "ts_valid_until_ms",
+                "key_json",
+            ),
+            desc="get_server_keys_json_for_remote",
         )
+
+        if not rows:
+            return {}
+
+        rows.sort(key=lambda r: r["ts_added_ms"])
+
+        return {
+            row["key_id"]: FetchKeyResultForRemote(
+                # Cast to bytes since postgresql returns a memoryview.
+                key_json=bytes(row["key_json"]),
+                valid_until_ts=row["ts_valid_until_ms"],
+                added_ts=row["ts_added_ms"],
+            )
+            for row in rows
+        }
diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py
index 71584f3f74..e74b2269d2 100644
--- a/synapse/storage/keys.py
+++ b/synapse/storage/keys.py
@@ -25,3 +25,10 @@ logger = logging.getLogger(__name__)
 class FetchKeyResult:
     verify_key: VerifyKey  # the key itself
     valid_until_ts: int  # how long we can use this key for
+
+
+@attr.s(slots=True, frozen=True, auto_attribs=True)
+class FetchKeyResultForRemote:
+    key_json: bytes  # the full key JSON
+    valid_until_ts: int  # how long we can use this key for, in milliseconds.
+    added_ts: int  # When we added this key, in milliseconds.
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index fdfd4f911d..2be341ac7b 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -456,24 +456,19 @@ class ServerKeyFetcherTestCase(unittest.HomeserverTestCase):
         self.assertEqual(k.verify_key.version, "ver1")
 
         # check that the perspectives store is correctly updated
-        lookup_triplet = (SERVER_NAME, testverifykey_id, None)
         key_json = self.get_success(
             self.hs.get_datastores().main.get_server_keys_json_for_remote(
-                [lookup_triplet]
+                SERVER_NAME, [testverifykey_id]
             )
         )
-        res_keys = key_json[lookup_triplet]
-        self.assertEqual(len(res_keys), 1)
-        res = res_keys[0]
-        self.assertEqual(res["key_id"], testverifykey_id)
-        self.assertEqual(res["from_server"], SERVER_NAME)
-        self.assertEqual(res["ts_added_ms"], self.reactor.seconds() * 1000)
-        self.assertEqual(res["ts_valid_until_ms"], VALID_UNTIL_TS)
+        res = key_json[testverifykey_id]
+        self.assertIsNotNone(res)
+        assert res is not None
+        self.assertEqual(res.added_ts, self.reactor.seconds() * 1000)
+        self.assertEqual(res.valid_until_ts, VALID_UNTIL_TS)
 
         # we expect it to be encoded as canonical json *before* it hits the db
-        self.assertEqual(
-            bytes(res["key_json"]), canonicaljson.encode_canonical_json(response)
-        )
+        self.assertEqual(res.key_json, canonicaljson.encode_canonical_json(response))
 
         # change the server name: the result should be ignored
         response["server_name"] = "OTHER_SERVER"
@@ -576,23 +571,18 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase):
         self.assertEqual(k.verify_key.version, "ver1")
 
         # check that the perspectives store is correctly updated
-        lookup_triplet = (SERVER_NAME, testverifykey_id, None)
         key_json = self.get_success(
             self.hs.get_datastores().main.get_server_keys_json_for_remote(
-                [lookup_triplet]
+                SERVER_NAME, [testverifykey_id]
             )
         )
-        res_keys = key_json[lookup_triplet]
-        self.assertEqual(len(res_keys), 1)
-        res = res_keys[0]
-        self.assertEqual(res["key_id"], testverifykey_id)
-        self.assertEqual(res["from_server"], self.mock_perspective_server.server_name)
-        self.assertEqual(res["ts_added_ms"], self.reactor.seconds() * 1000)
-        self.assertEqual(res["ts_valid_until_ms"], VALID_UNTIL_TS)
-
-        self.assertEqual(
-            bytes(res["key_json"]), canonicaljson.encode_canonical_json(response)
-        )
+        res = key_json[testverifykey_id]
+        self.assertIsNotNone(res)
+        assert res is not None
+        self.assertEqual(res.added_ts, self.reactor.seconds() * 1000)
+        self.assertEqual(res.valid_until_ts, VALID_UNTIL_TS)
+
+        self.assertEqual(res.key_json, canonicaljson.encode_canonical_json(response))
 
     def test_get_multiple_keys_from_perspectives(self) -> None:
         """Check that we can correctly request multiple keys for the same server"""
@@ -699,23 +689,18 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase):
         self.assertEqual(k.verify_key.version, "ver1")
 
         # check that the perspectives store is correctly updated
-        lookup_triplet = (SERVER_NAME, testverifykey_id, None)
         key_json = self.get_success(
             self.hs.get_datastores().main.get_server_keys_json_for_remote(
-                [lookup_triplet]
+                SERVER_NAME, [testverifykey_id]
             )
         )
-        res_keys = key_json[lookup_triplet]
-        self.assertEqual(len(res_keys), 1)
-        res = res_keys[0]
-        self.assertEqual(res["key_id"], testverifykey_id)
-        self.assertEqual(res["from_server"], self.mock_perspective_server.server_name)
-        self.assertEqual(res["ts_added_ms"], self.reactor.seconds() * 1000)
-        self.assertEqual(res["ts_valid_until_ms"], VALID_UNTIL_TS)
-
-        self.assertEqual(
-            bytes(res["key_json"]), canonicaljson.encode_canonical_json(response)
-        )
+        res = key_json[testverifykey_id]
+        self.assertIsNotNone(res)
+        assert res is not None
+        self.assertEqual(res.added_ts, self.reactor.seconds() * 1000)
+        self.assertEqual(res.valid_until_ts, VALID_UNTIL_TS)
+
+        self.assertEqual(res.key_json, canonicaljson.encode_canonical_json(response))
 
     def test_invalid_perspectives_responses(self) -> None:
         """Check that invalid responses from the perspectives server are rejected"""
-- 
cgit 1.5.1


From 6130afb862c6547e5e279353fc032c4d63fe14d2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 18 Aug 2023 12:16:00 +0100
Subject: Add response time metrics for introspection requests (#16131)

See #16119
---
 changelog.d/16131.misc                |  1 +
 synapse/api/auth/msc3861_delegated.py | 34 +++++++++++++++++++++++++++-------
 2 files changed, 28 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/16131.misc

(limited to 'synapse')

diff --git a/changelog.d/16131.misc b/changelog.d/16131.misc
new file mode 100644
index 0000000000..4f04699512
--- /dev/null
+++ b/changelog.d/16131.misc
@@ -0,0 +1 @@
+Add response time metrics for introspection requests for delegated auth.
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 3a516093f5..18875f2c81 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -20,6 +20,7 @@ from authlib.oauth2.auth import encode_client_secret_basic, encode_client_secret
 from authlib.oauth2.rfc7523 import ClientSecretJWT, PrivateKeyJWT, private_key_jwt_sign
 from authlib.oauth2.rfc7662 import IntrospectionToken
 from authlib.oidc.discovery import OpenIDProviderMetadata, get_well_known_url
+from prometheus_client import Histogram
 
 from twisted.web.client import readBody
 from twisted.web.http_headers import Headers
@@ -46,6 +47,13 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
+introspection_response_timer = Histogram(
+    "synapse_api_auth_delegated_introspection_response",
+    "Time taken to get a response for an introspection request",
+    ["code"],
+)
+
+
 # Scope as defined by MSC2967
 # https://github.com/matrix-org/matrix-spec-proposals/pull/2967
 SCOPE_MATRIX_API = "urn:matrix:org.matrix.msc2967.client:api:*"
@@ -190,14 +198,26 @@ class MSC3861DelegatedAuth(BaseAuth):
         # Do the actual request
         # We're not using the SimpleHttpClient util methods as we don't want to
         # check the HTTP status code, and we do the body encoding ourselves.
-        response = await self._http_client.request(
-            method="POST",
-            uri=uri,
-            data=body.encode("utf-8"),
-            headers=headers,
-        )
 
-        resp_body = await make_deferred_yieldable(readBody(response))
+        start_time = self._clock.time()
+        try:
+            response = await self._http_client.request(
+                method="POST",
+                uri=uri,
+                data=body.encode("utf-8"),
+                headers=headers,
+            )
+
+            resp_body = await make_deferred_yieldable(readBody(response))
+        except Exception:
+            end_time = self._clock.time()
+            introspection_response_timer.labels("ERR").observe(end_time - start_time)
+            raise
+
+        end_time = self._clock.time()
+        introspection_response_timer.labels(response.code).observe(
+            end_time - start_time
+        )
 
         if response.code < 200 or response.code >= 300:
             raise HttpResponseException(
-- 
cgit 1.5.1


From 54317d34b76adb1e8f694acd91f631b3abe38947 Mon Sep 17 00:00:00 2001
From: Alexander Fechler <141915399+afechler@users.noreply.github.com>
Date: Fri, 18 Aug 2023 13:26:38 +0200
Subject: Allow filtering for admins in the list accounts admin API (#16114)

---
 changelog.d/16114.feature                  |  1 +
 docs/admin_api/user_admin_api.md           |  2 ++
 synapse/rest/admin/users.py                |  3 +++
 synapse/storage/databases/main/__init__.py | 10 ++++++++
 tests/rest/admin/test_user.py              | 38 ++++++++++++++++++++++++++++++
 5 files changed, 54 insertions(+)
 create mode 100644 changelog.d/16114.feature

(limited to 'synapse')

diff --git a/changelog.d/16114.feature b/changelog.d/16114.feature
new file mode 100644
index 0000000000..e937a3b029
--- /dev/null
+++ b/changelog.d/16114.feature
@@ -0,0 +1 @@
+Add an `admins` query parameter to the [List Accounts](https://matrix-org.github.io/synapse/v1.91/admin_api/user_admin_api.html#list-accounts) [admin API](https://matrix-org.github.io/synapse/v1.91/usage/administration/admin_api/index.html), to include only admins or to exclude admins in user queries.
\ No newline at end of file
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index c269ce6af0..99abfea3a0 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -219,6 +219,8 @@ The following parameters should be set in the URL:
   **or** displaynames that contain this value.
 - `guests` - string representing a bool - Is optional and if `false` will **exclude** guest users.
   Defaults to `true` to include guest users.
+- `admins` - Optional flag to filter admins. If `true`, only admins are queried. If `false`, admins are excluded from 
+  the query. When the flag is absent (the default), **both** admins and non-admins are included in the search results.
 - `deactivated` - string representing a bool - Is optional and if `true` will **include** deactivated users.
   Defaults to `false` to exclude deactivated users.
 - `limit` - string representing a positive integer - Is optional but is used for pagination,
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 04d9ef25b7..240e6254b0 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -109,6 +109,8 @@ class UsersRestServletV2(RestServlet):
             )
         deactivated = parse_boolean(request, "deactivated", default=False)
 
+        admins = parse_boolean(request, "admins")
+
         # If support for MSC3866 is not enabled, apply no filtering based on the
         # `approved` column.
         if self._msc3866_enabled:
@@ -146,6 +148,7 @@ class UsersRestServletV2(RestServlet):
             name,
             guests,
             deactivated,
+            admins,
             order_by,
             direction,
             approved,
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index be67d1ff22..e17f25e87a 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -168,6 +168,7 @@ class DataStore(
         name: Optional[str] = None,
         guests: bool = True,
         deactivated: bool = False,
+        admins: Optional[bool] = None,
         order_by: str = UserSortOrder.NAME.value,
         direction: Direction = Direction.FORWARDS,
         approved: bool = True,
@@ -184,6 +185,9 @@ class DataStore(
             name: search for local part of user_id or display name
             guests: whether to in include guest users
             deactivated: whether to include deactivated users
+            admins: Optional flag to filter admins. If true, only admins are queried.
+                    if false, admins are excluded from the query. When it is
+                    none (the default), both admins and none-admins are queried.
             order_by: the sort order of the returned list
             direction: sort ascending or descending
             approved: whether to include approved users
@@ -220,6 +224,12 @@ class DataStore(
             if not deactivated:
                 filters.append("deactivated = 0")
 
+            if admins is not None:
+                if admins:
+                    filters.append("admin = 1")
+                else:
+                    filters.append("admin = 0")
+
             if not approved:
                 # We ignore NULL values for the approved flag because these should only
                 # be already existing users that we consider as already approved.
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 41a959b4d6..feb81844ae 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -879,6 +879,44 @@ class UsersListTestCase(unittest.HomeserverTestCase):
         self._order_test([self.admin_user, user1, user2], "creation_ts", "f")
         self._order_test([user2, user1, self.admin_user], "creation_ts", "b")
 
+    def test_filter_admins(self) -> None:
+        """
+        Tests whether the various values of the query parameter `admins` lead to the
+        expected result set.
+        """
+
+        # Register an additional non admin user
+        self.register_user("user", "pass", admin=False)
+
+        # Query all users
+        channel = self.make_request(
+            "GET",
+            f"{self.url}",
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(2, channel.json_body["total"])
+
+        # Query only admin users
+        channel = self.make_request(
+            "GET",
+            f"{self.url}?admins=true",
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(1, channel.json_body["total"])
+        self.assertEqual(1, channel.json_body["users"][0]["admin"])
+
+        # Query only non admin users
+        channel = self.make_request(
+            "GET",
+            f"{self.url}?admins=false",
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, channel.result)
+        self.assertEqual(1, channel.json_body["total"])
+        self.assertFalse(channel.json_body["users"][0]["admin"])
+
     @override_config(
         {
             "experimental_features": {
-- 
cgit 1.5.1


From 2d15e396843879bb514a148097cbddf10f50655c Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Fri, 18 Aug 2023 15:46:46 +0200
Subject: MSC3861: allow impersonation by an admin using a query param (#16132)

---
 changelog.d/16132.misc                  |  1 +
 synapse/api/auth/msc3861_delegated.py   | 25 ++++++++++++++++++++---
 tests/handlers/test_oauth_delegation.py | 35 +++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/16132.misc

(limited to 'synapse')

diff --git a/changelog.d/16132.misc b/changelog.d/16132.misc
new file mode 100644
index 0000000000..aca26079d8
--- /dev/null
+++ b/changelog.d/16132.misc
@@ -0,0 +1 @@
+MSC3861: allow impersonation by an admin user using `_oidc_admin_impersonate_user_id` query parameter.
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 18875f2c81..4bdfe31b22 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -246,7 +246,7 @@ class MSC3861DelegatedAuth(BaseAuth):
         return introspection_token
 
     async def is_server_admin(self, requester: Requester) -> bool:
-        return "urn:synapse:admin:*" in requester.scope
+        return SCOPE_SYNAPSE_ADMIN in requester.scope
 
     async def get_user_by_req(
         self,
@@ -263,6 +263,25 @@ class MSC3861DelegatedAuth(BaseAuth):
             # so that we don't provision the user if they don't have enough permission:
             requester = await self.get_user_by_access_token(access_token, allow_expired)
 
+            # Allow impersonation by an admin user using `_oidc_admin_impersonate_user_id` query parameter
+            if request.args is not None:
+                user_id_params = request.args.get(b"_oidc_admin_impersonate_user_id")
+                if user_id_params:
+                    if await self.is_server_admin(requester):
+                        user_id_str = user_id_params[0].decode("ascii")
+                        impersonated_user_id = UserID.from_string(user_id_str)
+                        logging.info(f"Admin impersonation of user {user_id_str}")
+                        requester = create_requester(
+                            user_id=impersonated_user_id,
+                            scope=[SCOPE_MATRIX_API],
+                            authenticated_entity=requester.user.to_string(),
+                        )
+                    else:
+                        raise AuthError(
+                            401,
+                            "Impersonation not possible by a non admin user",
+                        )
+
             # Deny the request if the user account is locked.
             if not allow_locked and await self.store.get_user_locked_status(
                 requester.user.to_string()
@@ -290,14 +309,14 @@ class MSC3861DelegatedAuth(BaseAuth):
             # XXX: This is a temporary solution so that the admin API can be called by
             # the OIDC provider. This will be removed once we have OIDC client
             # credentials grant support in matrix-authentication-service.
-            logging.info("Admin toked used")
+            logging.info("Admin token used")
             # XXX: that user doesn't exist and won't be provisioned.
             # This is mostly fine for admin calls, but we should also think about doing
             # requesters without a user_id.
             admin_user = UserID("__oidc_admin", self._hostname)
             return create_requester(
                 user_id=admin_user,
-                scope=["urn:synapse:admin:*"],
+                scope=[SCOPE_SYNAPSE_ADMIN],
             )
 
         try:
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index 82c26e303f..1456b675a7 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -340,6 +340,41 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
             get_awaitable_result(self.auth.is_server_admin(requester)), False
         )
 
+    def test_active_user_admin_impersonation(self) -> None:
+        """The handler should return a requester with normal user rights
+        and an user ID matching the one specified in query param `user_id`"""
+
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(
+                code=200,
+                payload={
+                    "active": True,
+                    "sub": SUBJECT,
+                    "scope": " ".join([SYNAPSE_ADMIN_SCOPE, MATRIX_USER_SCOPE]),
+                    "username": USERNAME,
+                },
+            )
+        )
+        request = Mock(args={})
+        request.args[b"access_token"] = [b"mockAccessToken"]
+        impersonated_user_id = f"@{USERNAME}:{SERVER_NAME}"
+        request.args[b"_oidc_admin_impersonate_user_id"] = [
+            impersonated_user_id.encode("ascii")
+        ]
+        request.requestHeaders.getRawHeaders = mock_getRawHeaders()
+        requester = self.get_success(self.auth.get_user_by_req(request))
+        self.http_client.get_json.assert_called_once_with(WELL_KNOWN)
+        self.http_client.request.assert_called_once_with(
+            method="POST", uri=INTROSPECTION_ENDPOINT, data=ANY, headers=ANY
+        )
+        self._assertParams()
+        self.assertEqual(requester.user.to_string(), impersonated_user_id)
+        self.assertEqual(requester.is_guest, False)
+        self.assertEqual(requester.device_id, None)
+        self.assertEqual(
+            get_awaitable_result(self.auth.is_server_admin(requester)), False
+        )
+
     def test_active_user_with_device(self) -> None:
         """The handler should return a requester with normal user rights and a device ID."""
 
-- 
cgit 1.5.1


From bd558a6dc369b6f5d06ab6fd2500faa216a45883 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 18 Aug 2023 15:32:06 +0100
Subject: Speed up state res in rare case we don't have all events (#16116)

If we don't have all the auth events in a room then not all state events will have a chain cover index. Even so, we can still use the chain cover index on the events that do have it, rather than bailing and using the slower functions.

This situation should not arise for newly persisted rooms, as we check we have the full auth chain for each event, but can happen for existing rooms.

c.f. #15245
---
 changelog.d/16116.bugfix                           |   1 +
 synapse/storage/databases/main/event_federation.py | 184 ++++++++++++++--
 tests/storage/test_event_federation.py             | 241 +++++++++++++++++----
 3 files changed, 355 insertions(+), 71 deletions(-)
 create mode 100644 changelog.d/16116.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16116.bugfix b/changelog.d/16116.bugfix
new file mode 100644
index 0000000000..f57a26ae39
--- /dev/null
+++ b/changelog.d/16116.bugfix
@@ -0,0 +1 @@
+Fix performance of state resolutions for large, old rooms that did not have the full auth chain persisted.
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 534dc32413..fab7008a8f 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -452,33 +452,56 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         # sets.
         seen_chains: Set[int] = set()
 
-        sql = """
-            SELECT event_id, chain_id, sequence_number
-            FROM event_auth_chains
-            WHERE %s
-        """
-        for batch in batch_iter(initial_events, 1000):
-            clause, args = make_in_list_sql_clause(
-                txn.database_engine, "event_id", batch
-            )
-            txn.execute(sql % (clause,), args)
+        # Fetch the chain cover index for the initial set of events we're
+        # considering.
+        def fetch_chain_info(events_to_fetch: Collection[str]) -> None:
+            sql = """
+                SELECT event_id, chain_id, sequence_number
+                FROM event_auth_chains
+                WHERE %s
+            """
+            for batch in batch_iter(events_to_fetch, 1000):
+                clause, args = make_in_list_sql_clause(
+                    txn.database_engine, "event_id", batch
+                )
+                txn.execute(sql % (clause,), args)
 
-            for event_id, chain_id, sequence_number in txn:
-                chain_info[event_id] = (chain_id, sequence_number)
-                seen_chains.add(chain_id)
-                chain_to_event.setdefault(chain_id, {})[sequence_number] = event_id
+                for event_id, chain_id, sequence_number in txn:
+                    chain_info[event_id] = (chain_id, sequence_number)
+                    seen_chains.add(chain_id)
+                    chain_to_event.setdefault(chain_id, {})[sequence_number] = event_id
+
+        fetch_chain_info(initial_events)
 
         # Check that we actually have a chain ID for all the events.
         events_missing_chain_info = initial_events.difference(chain_info)
+
+        # The result set to return, i.e. the auth chain difference.
+        result: Set[str] = set()
+
         if events_missing_chain_info:
-            # This can happen due to e.g. downgrade/upgrade of the server. We
-            # raise an exception and fall back to the previous algorithm.
-            logger.info(
-                "Unexpectedly found that events don't have chain IDs in room %s: %s",
+            # For some reason we have events we haven't calculated the chain
+            # index for, so we need to handle those separately. This should only
+            # happen for older rooms where the server doesn't have all the auth
+            # events.
+            result = self._fixup_auth_chain_difference_sets(
+                txn,
                 room_id,
-                events_missing_chain_info,
+                state_sets=state_sets,
+                events_missing_chain_info=events_missing_chain_info,
+                events_that_have_chain_index=chain_info,
             )
-            raise _NoChainCoverIndex(room_id)
+
+            # We now need to refetch any events that we have added to the state
+            # sets.
+            new_events_to_fetch = {
+                event_id
+                for state_set in state_sets
+                for event_id in state_set
+                if event_id not in initial_events
+            }
+
+            fetch_chain_info(new_events_to_fetch)
 
         # Corresponds to `state_sets`, except as a map from chain ID to max
         # sequence number reachable from the state set.
@@ -487,8 +510,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             chains: Dict[int, int] = {}
             set_to_chain.append(chains)
 
-            for event_id in state_set:
-                chain_id, seq_no = chain_info[event_id]
+            for state_id in state_set:
+                chain_id, seq_no = chain_info[state_id]
 
                 chains[chain_id] = max(seq_no, chains.get(chain_id, 0))
 
@@ -532,7 +555,6 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         # from *any* state set and the minimum sequence number reachable from
         # *all* state sets. Events in that range are in the auth chain
         # difference.
-        result = set()
 
         # Mapping from chain ID to the range of sequence numbers that should be
         # pulled from the database.
@@ -588,6 +610,122 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
 
         return result
 
+    def _fixup_auth_chain_difference_sets(
+        self,
+        txn: LoggingTransaction,
+        room_id: str,
+        state_sets: List[Set[str]],
+        events_missing_chain_info: Set[str],
+        events_that_have_chain_index: Collection[str],
+    ) -> Set[str]:
+        """Helper for `_get_auth_chain_difference_using_cover_index_txn` to
+        handle the case where we haven't calculated the chain cover index for
+        all events.
+
+        This modifies `state_sets` so that they only include events that have a
+        chain cover index, and returns a set of event IDs that are part of the
+        auth difference.
+        """
+
+        # This works similarly to the handling of unpersisted events in
+        # `synapse.state.v2_get_auth_chain_difference`. We uses the observation
+        # that if you can split the set of events into two classes X and Y,
+        # where no events in Y have events in X in their auth chain, then we can
+        # calculate the auth difference by considering X and Y separately.
+        #
+        # We do this in three steps:
+        #   1. Compute the set of events without chain cover index belonging to
+        #      the auth difference.
+        #   2. Replacing the un-indexed events in the state_sets with their auth
+        #      events, recursively, until the state_sets contain only indexed
+        #      events. We can then calculate the auth difference of those state
+        #      sets using the chain cover index.
+        #   3. Add the results of 1 and 2 together.
+
+        # By construction we know that all events that we haven't persisted the
+        # chain cover index for are contained in
+        # `event_auth_chain_to_calculate`, so we pull out the events from those
+        # rather than doing recursive queries to walk the auth chain.
+        #
+        # We pull out those events with their auth events, which gives us enough
+        # information to construct the auth chain of an event up to auth events
+        # that have the chain cover index.
+        sql = """
+            SELECT tc.event_id, ea.auth_id, eac.chain_id IS NOT NULL
+            FROM event_auth_chain_to_calculate AS tc
+            LEFT JOIN event_auth AS ea USING (event_id)
+            LEFT JOIN event_auth_chains AS eac ON (ea.auth_id = eac.event_id)
+            WHERE tc.room_id = ?
+        """
+        txn.execute(sql, (room_id,))
+        event_to_auth_ids: Dict[str, Set[str]] = {}
+        events_that_have_chain_index = set(events_that_have_chain_index)
+        for event_id, auth_id, auth_id_has_chain in txn:
+            s = event_to_auth_ids.setdefault(event_id, set())
+            if auth_id is not None:
+                s.add(auth_id)
+                if auth_id_has_chain:
+                    events_that_have_chain_index.add(auth_id)
+
+        if events_missing_chain_info - event_to_auth_ids.keys():
+            # Uh oh, we somehow haven't correctly done the chain cover index,
+            # bail and fall back to the old method.
+            logger.info(
+                "Unexpectedly found that events don't have chain IDs in room %s: %s",
+                room_id,
+                events_missing_chain_info - event_to_auth_ids.keys(),
+            )
+            raise _NoChainCoverIndex(room_id)
+
+        # Create a map from event IDs we care about to their partial auth chain.
+        event_id_to_partial_auth_chain: Dict[str, Set[str]] = {}
+        for event_id, auth_ids in event_to_auth_ids.items():
+            if not any(event_id in state_set for state_set in state_sets):
+                continue
+
+            processing = set(auth_ids)
+            to_add = set()
+            while processing:
+                auth_id = processing.pop()
+                to_add.add(auth_id)
+
+                sub_auth_ids = event_to_auth_ids.get(auth_id)
+                if sub_auth_ids is None:
+                    continue
+
+                processing.update(sub_auth_ids - to_add)
+
+            event_id_to_partial_auth_chain[event_id] = to_add
+
+        # Now we do two things:
+        #   1. Update the state sets to only include indexed events; and
+        #   2. Create a new list containing the auth chains of the un-indexed
+        #      events
+        unindexed_state_sets: List[Set[str]] = []
+        for state_set in state_sets:
+            unindexed_state_set = set()
+            for event_id, auth_chain in event_id_to_partial_auth_chain.items():
+                if event_id not in state_set:
+                    continue
+
+                unindexed_state_set.add(event_id)
+
+                state_set.discard(event_id)
+                state_set.difference_update(auth_chain)
+                for auth_id in auth_chain:
+                    if auth_id in events_that_have_chain_index:
+                        state_set.add(auth_id)
+                    else:
+                        unindexed_state_set.add(auth_id)
+
+            unindexed_state_sets.append(unindexed_state_set)
+
+        # Calculate and return the auth difference of the un-indexed events.
+        union = unindexed_state_sets[0].union(*unindexed_state_sets[1:])
+        intersection = unindexed_state_sets[0].intersection(*unindexed_state_sets[1:])
+
+        return union - intersection
+
     def _get_auth_chain_difference_txn(
         self, txn: LoggingTransaction, state_sets: List[Set[str]]
     ) -> Set[str]:
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 9c151a5e62..7a4ecab2d5 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -13,7 +13,19 @@
 # limitations under the License.
 
 import datetime
-from typing import Dict, List, Tuple, Union, cast
+from typing import (
+    Collection,
+    Dict,
+    FrozenSet,
+    Iterable,
+    List,
+    Mapping,
+    Set,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+)
 
 import attr
 from parameterized import parameterized
@@ -38,6 +50,138 @@ from synapse.util import Clock, json_encoder
 import tests.unittest
 import tests.utils
 
+# The silly auth graph we use to test the auth difference algorithm,
+# where the top are the most recent events.
+#
+#   A   B
+#    \ /
+#  D  E
+#  \  |
+#   ` F   C
+#     |  /|
+#     G ´ |
+#     | \ |
+#     H   I
+#     |   |
+#     K   J
+
+AUTH_GRAPH: Dict[str, List[str]] = {
+    "a": ["e"],
+    "b": ["e"],
+    "c": ["g", "i"],
+    "d": ["f"],
+    "e": ["f"],
+    "f": ["g"],
+    "g": ["h", "i"],
+    "h": ["k"],
+    "i": ["j"],
+    "k": [],
+    "j": [],
+}
+
+DEPTH_GRAPH = {
+    "a": 7,
+    "b": 7,
+    "c": 4,
+    "d": 6,
+    "e": 6,
+    "f": 5,
+    "g": 3,
+    "h": 2,
+    "i": 2,
+    "k": 1,
+    "j": 1,
+}
+
+T = TypeVar("T")
+
+
+def get_all_topologically_sorted_orders(
+    nodes: Iterable[T],
+    graph: Mapping[T, Collection[T]],
+) -> List[List[T]]:
+    """Given a set of nodes and a graph, return all possible topological
+    orderings.
+    """
+
+    # This is implemented by Kahn's algorithm, and forking execution each time
+    # we have a choice over which node to consider next.
+
+    degree_map = {node: 0 for node in nodes}
+    reverse_graph: Dict[T, Set[T]] = {}
+
+    for node, edges in graph.items():
+        if node not in degree_map:
+            continue
+
+        for edge in set(edges):
+            if edge in degree_map:
+                degree_map[node] += 1
+
+            reverse_graph.setdefault(edge, set()).add(node)
+        reverse_graph.setdefault(node, set())
+
+    zero_degree = [node for node, degree in degree_map.items() if degree == 0]
+
+    return _get_all_topologically_sorted_orders_inner(
+        reverse_graph, zero_degree, degree_map
+    )
+
+
+def _get_all_topologically_sorted_orders_inner(
+    reverse_graph: Dict[T, Set[T]],
+    zero_degree: List[T],
+    degree_map: Dict[T, int],
+) -> List[List[T]]:
+    new_paths = []
+
+    # Rather than only choosing *one* item from the list of nodes with zero
+    # degree, we "fork" execution and run the algorithm for each node in the
+    # zero degree.
+    for node in zero_degree:
+        new_degree_map = degree_map.copy()
+        new_zero_degree = zero_degree.copy()
+        new_zero_degree.remove(node)
+
+        for edge in reverse_graph.get(node, []):
+            if edge in new_degree_map:
+                new_degree_map[edge] -= 1
+                if new_degree_map[edge] == 0:
+                    new_zero_degree.append(edge)
+
+        paths = _get_all_topologically_sorted_orders_inner(
+            reverse_graph, new_zero_degree, new_degree_map
+        )
+        for path in paths:
+            path.insert(0, node)
+
+        new_paths.extend(paths)
+
+    if not new_paths:
+        return [[]]
+
+    return new_paths
+
+
+def get_all_topologically_consistent_subsets(
+    nodes: Iterable[T],
+    graph: Mapping[T, Collection[T]],
+) -> Set[FrozenSet[T]]:
+    """Get all subsets of the graph where if node N is in the subgraph, then all
+    nodes that can reach that node (i.e. for all X there exists a path X -> N)
+    are in the subgraph.
+    """
+    all_topological_orderings = get_all_topologically_sorted_orders(nodes, graph)
+
+    graph_subsets = set()
+    for ordering in all_topological_orderings:
+        ordering.reverse()
+
+        for idx in range(len(ordering)):
+            graph_subsets.add(frozenset(ordering[:idx]))
+
+    return graph_subsets
+
 
 @attr.s(auto_attribs=True, frozen=True, slots=True)
 class _BackfillSetupInfo:
@@ -172,49 +316,6 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
     def _setup_auth_chain(self, use_chain_cover_index: bool) -> str:
         room_id = "@ROOM:local"
 
-        # The silly auth graph we use to test the auth difference algorithm,
-        # where the top are the most recent events.
-        #
-        #   A   B
-        #    \ /
-        #  D  E
-        #  \  |
-        #   ` F   C
-        #     |  /|
-        #     G ´ |
-        #     | \ |
-        #     H   I
-        #     |   |
-        #     K   J
-
-        auth_graph: Dict[str, List[str]] = {
-            "a": ["e"],
-            "b": ["e"],
-            "c": ["g", "i"],
-            "d": ["f"],
-            "e": ["f"],
-            "f": ["g"],
-            "g": ["h", "i"],
-            "h": ["k"],
-            "i": ["j"],
-            "k": [],
-            "j": [],
-        }
-
-        depth_map = {
-            "a": 7,
-            "b": 7,
-            "c": 4,
-            "d": 6,
-            "e": 6,
-            "f": 5,
-            "g": 3,
-            "h": 2,
-            "i": 2,
-            "k": 1,
-            "j": 1,
-        }
-
         # Mark the room as maybe having a cover index.
 
         def store_room(txn: LoggingTransaction) -> None:
@@ -238,9 +339,9 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         def insert_event(txn: LoggingTransaction) -> None:
             stream_ordering = 0
 
-            for event_id in auth_graph:
+            for event_id in AUTH_GRAPH:
                 stream_ordering += 1
-                depth = depth_map[event_id]
+                depth = DEPTH_GRAPH[event_id]
 
                 self.store.db_pool.simple_insert_txn(
                     txn,
@@ -260,8 +361,8 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
             self.persist_events._persist_event_auth_chain_txn(
                 txn,
                 [
-                    cast(EventBase, FakeEvent(event_id, room_id, auth_graph[event_id]))
-                    for event_id in auth_graph
+                    cast(EventBase, FakeEvent(event_id, room_id, AUTH_GRAPH[event_id]))
+                    for event_id in AUTH_GRAPH
                 ],
             )
 
@@ -344,7 +445,51 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         room_id = self._setup_auth_chain(use_chain_cover_index)
 
         # Now actually test that various combinations give the right result:
+        self.assert_auth_diff_is_expected(room_id)
+
+    @parameterized.expand(
+        [
+            [graph_subset]
+            for graph_subset in get_all_topologically_consistent_subsets(
+                AUTH_GRAPH, AUTH_GRAPH
+            )
+        ]
+    )
+    def test_auth_difference_partial(self, graph_subset: Collection[str]) -> None:
+        """Test that if we only have a chain cover index on a partial subset of
+        the room we still get the correct auth chain difference.
+
+        We do this by removing the chain cover index for every valid subset of the
+        graph.
+        """
+        room_id = self._setup_auth_chain(True)
+
+        for event_id in graph_subset:
+            # Remove chain cover from that event.
+            self.get_success(
+                self.store.db_pool.simple_delete(
+                    table="event_auth_chains",
+                    keyvalues={"event_id": event_id},
+                    desc="test_auth_difference_partial_remove",
+                )
+            )
+            self.get_success(
+                self.store.db_pool.simple_insert(
+                    table="event_auth_chain_to_calculate",
+                    values={
+                        "event_id": event_id,
+                        "room_id": room_id,
+                        "type": "",
+                        "state_key": "",
+                    },
+                    desc="test_auth_difference_partial_remove",
+                )
+            )
+
+        self.assert_auth_diff_is_expected(room_id)
 
+    def assert_auth_diff_is_expected(self, room_id: str) -> None:
+        """Assert the auth chain difference returns the correct answers."""
         difference = self.get_success(
             self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}])
         )
-- 
cgit 1.5.1


From 358896e1b835bf693ef40d4cf9f10077432e935b Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Mon, 21 Aug 2023 14:17:13 +0200
Subject: Implements a task scheduler for resumable potentially long running
 tasks (#15891)

---
 changelog.d/15891.feature                          |   1 +
 synapse/app/generic_worker.py                      |   2 +
 synapse/server.py                                  |   7 +-
 synapse/storage/databases/main/__init__.py         |   2 +
 synapse/storage/databases/main/task_scheduler.py   | 202 ++++++++++++
 synapse/storage/schema/__init__.py                 |   1 +
 .../schema/main/delta/80/02_scheduled_tasks.sql    |  28 ++
 synapse/types/__init__.py                          |  39 +++
 synapse/util/task_scheduler.py                     | 364 +++++++++++++++++++++
 tests/util/test_task_scheduler.py                  | 186 +++++++++++
 10 files changed, 831 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/15891.feature
 create mode 100644 synapse/storage/databases/main/task_scheduler.py
 create mode 100644 synapse/storage/schema/main/delta/80/02_scheduled_tasks.sql
 create mode 100644 synapse/util/task_scheduler.py
 create mode 100644 tests/util/test_task_scheduler.py

(limited to 'synapse')

diff --git a/changelog.d/15891.feature b/changelog.d/15891.feature
new file mode 100644
index 0000000000..5024b5adc4
--- /dev/null
+++ b/changelog.d/15891.feature
@@ -0,0 +1 @@
+Implements a task scheduler for	resumable potentially long running tasks.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index dc79efcc14..d25e3548e0 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -91,6 +91,7 @@ from synapse.storage.databases.main.state import StateGroupWorkerStore
 from synapse.storage.databases.main.stats import StatsStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
 from synapse.storage.databases.main.tags import TagsWorkerStore
+from synapse.storage.databases.main.task_scheduler import TaskSchedulerWorkerStore
 from synapse.storage.databases.main.transactions import TransactionWorkerStore
 from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore
 from synapse.storage.databases.main.user_directory import UserDirectoryStore
@@ -144,6 +145,7 @@ class GenericWorkerStore(
     TransactionWorkerStore,
     LockStore,
     SessionStore,
+    TaskSchedulerWorkerStore,
 ):
     # Properties that multiple storage classes define. Tell mypy what the
     # expected type is.
diff --git a/synapse/server.py b/synapse/server.py
index e753ff0377..7cdd3ea3c2 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -142,6 +142,7 @@ from synapse.util.distributor import Distributor
 from synapse.util.macaroons import MacaroonGenerator
 from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.stringutils import random_string
+from synapse.util.task_scheduler import TaskScheduler
 
 logger = logging.getLogger(__name__)
 
@@ -360,6 +361,7 @@ class HomeServer(metaclass=abc.ABCMeta):
         """
         for i in self.REQUIRED_ON_BACKGROUND_TASK_STARTUP:
             getattr(self, "get_" + i + "_handler")()
+        self.get_task_scheduler()
 
     def get_reactor(self) -> ISynapseReactor:
         """
@@ -912,6 +914,9 @@ class HomeServer(metaclass=abc.ABCMeta):
         """Usage metrics shared between phone home stats and the prometheus exporter."""
         return CommonUsageMetricsManager(self)
 
-    @cache_in_self
     def get_worker_locks_handler(self) -> WorkerLocksHandler:
         return WorkerLocksHandler(self)
+
+    @cache_in_self
+    def get_task_scheduler(self) -> TaskScheduler:
+        return TaskScheduler(self)
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index e17f25e87a..a85633efcd 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -70,6 +70,7 @@ from .state import StateStore
 from .stats import StatsStore
 from .stream import StreamWorkerStore
 from .tags import TagsStore
+from .task_scheduler import TaskSchedulerWorkerStore
 from .transactions import TransactionWorkerStore
 from .ui_auth import UIAuthStore
 from .user_directory import UserDirectoryStore
@@ -127,6 +128,7 @@ class DataStore(
     CacheInvalidationWorkerStore,
     LockStore,
     SessionStore,
+    TaskSchedulerWorkerStore,
 ):
     def __init__(
         self,
diff --git a/synapse/storage/databases/main/task_scheduler.py b/synapse/storage/databases/main/task_scheduler.py
new file mode 100644
index 0000000000..1fb3180c3c
--- /dev/null
+++ b/synapse/storage/databases/main/task_scheduler.py
@@ -0,0 +1,202 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import (
+    DatabasePool,
+    LoggingDatabaseConnection,
+    LoggingTransaction,
+    make_in_list_sql_clause,
+)
+from synapse.types import JsonDict, JsonMapping, ScheduledTask, TaskStatus
+from synapse.util import json_encoder
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+class TaskSchedulerWorkerStore(SQLBaseStore):
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+
+    @staticmethod
+    def _convert_row_to_task(row: Dict[str, Any]) -> ScheduledTask:
+        row["status"] = TaskStatus(row["status"])
+        if row["params"] is not None:
+            row["params"] = db_to_json(row["params"])
+        if row["result"] is not None:
+            row["result"] = db_to_json(row["result"])
+        return ScheduledTask(**row)
+
+    async def get_scheduled_tasks(
+        self,
+        *,
+        actions: Optional[List[str]] = None,
+        resource_id: Optional[str] = None,
+        statuses: Optional[List[TaskStatus]] = None,
+        max_timestamp: Optional[int] = None,
+    ) -> List[ScheduledTask]:
+        """Get a list of scheduled tasks from the DB.
+
+        Args:
+            actions: Limit the returned tasks to those specific action names
+            resource_id: Limit the returned tasks to the specific resource id, if specified
+            statuses: Limit the returned tasks to the specific statuses
+            max_timestamp: Limit the returned tasks to the ones that have
+                a timestamp inferior to the specified one
+
+        Returns: a list of `ScheduledTask`, ordered by increasing timestamps
+        """
+
+        def get_scheduled_tasks_txn(txn: LoggingTransaction) -> List[Dict[str, Any]]:
+            clauses: List[str] = []
+            args: List[Any] = []
+            if resource_id:
+                clauses.append("resource_id = ?")
+                args.append(resource_id)
+            if actions is not None:
+                clause, temp_args = make_in_list_sql_clause(
+                    txn.database_engine, "action", actions
+                )
+                clauses.append(clause)
+                args.extend(temp_args)
+            if statuses is not None:
+                clause, temp_args = make_in_list_sql_clause(
+                    txn.database_engine, "status", statuses
+                )
+                clauses.append(clause)
+                args.extend(temp_args)
+            if max_timestamp is not None:
+                clauses.append("timestamp <= ?")
+                args.append(max_timestamp)
+
+            sql = "SELECT * FROM scheduled_tasks"
+            if clauses:
+                sql = sql + " WHERE " + " AND ".join(clauses)
+
+            sql = sql + "ORDER BY timestamp"
+
+            txn.execute(sql, args)
+            return self.db_pool.cursor_to_dict(txn)
+
+        rows = await self.db_pool.runInteraction(
+            "get_scheduled_tasks", get_scheduled_tasks_txn
+        )
+        return [TaskSchedulerWorkerStore._convert_row_to_task(row) for row in rows]
+
+    async def insert_scheduled_task(self, task: ScheduledTask) -> None:
+        """Insert a specified `ScheduledTask` in the DB.
+
+        Args:
+            task: the `ScheduledTask` to insert
+        """
+        await self.db_pool.simple_insert(
+            "scheduled_tasks",
+            {
+                "id": task.id,
+                "action": task.action,
+                "status": task.status,
+                "timestamp": task.timestamp,
+                "resource_id": task.resource_id,
+                "params": None
+                if task.params is None
+                else json_encoder.encode(task.params),
+                "result": None
+                if task.result is None
+                else json_encoder.encode(task.result),
+                "error": task.error,
+            },
+            desc="insert_scheduled_task",
+        )
+
+    async def update_scheduled_task(
+        self,
+        id: str,
+        timestamp: int,
+        *,
+        status: Optional[TaskStatus] = None,
+        result: Optional[JsonMapping] = None,
+        error: Optional[str] = None,
+    ) -> bool:
+        """Update a scheduled task in the DB with some new value(s).
+
+        Args:
+            id: id of the `ScheduledTask` to update
+            timestamp: new timestamp of the task
+            status: new status of the task
+            result: new result of the task
+            error: new error of the task
+
+        Returns: `False` if no matching row was found, `True` otherwise
+        """
+        updatevalues: JsonDict = {"timestamp": timestamp}
+        if status is not None:
+            updatevalues["status"] = status
+        if result is not None:
+            updatevalues["result"] = json_encoder.encode(result)
+        if error is not None:
+            updatevalues["error"] = error
+        nb_rows = await self.db_pool.simple_update(
+            "scheduled_tasks",
+            {"id": id},
+            updatevalues,
+            desc="update_scheduled_task",
+        )
+        return nb_rows > 0
+
+    async def get_scheduled_task(self, id: str) -> Optional[ScheduledTask]:
+        """Get a specific `ScheduledTask` from its id.
+
+        Args:
+            id: the id of the task to retrieve
+
+        Returns: the task if available, `None` otherwise
+        """
+        row = await self.db_pool.simple_select_one(
+            table="scheduled_tasks",
+            keyvalues={"id": id},
+            retcols=(
+                "id",
+                "action",
+                "status",
+                "timestamp",
+                "resource_id",
+                "params",
+                "result",
+                "error",
+            ),
+            allow_none=True,
+            desc="get_scheduled_task",
+        )
+
+        return TaskSchedulerWorkerStore._convert_row_to_task(row) if row else None
+
+    async def delete_scheduled_task(self, id: str) -> None:
+        """Delete a specific task from its id.
+
+        Args:
+            id: the id of the task to delete
+        """
+        await self.db_pool.simple_delete(
+            "scheduled_tasks",
+            keyvalues={"id": id},
+            desc="delete_scheduled_task",
+        )
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 7de9949a5b..649d3c8e9f 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -113,6 +113,7 @@ Changes in SCHEMA_VERSION = 79
 
 Changes in SCHEMA_VERSION = 80
     - The event_txn_id_device_id is always written to for new events.
+    - Add tables for the task scheduler.
 """
 
 
diff --git a/synapse/storage/schema/main/delta/80/02_scheduled_tasks.sql b/synapse/storage/schema/main/delta/80/02_scheduled_tasks.sql
new file mode 100644
index 0000000000..286d109ed7
--- /dev/null
+++ b/synapse/storage/schema/main/delta/80/02_scheduled_tasks.sql
@@ -0,0 +1,28 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- cf ScheduledTask docstring for the meaning of the fields.
+CREATE TABLE IF NOT EXISTS scheduled_tasks(
+    id TEXT PRIMARY KEY,
+    action TEXT NOT NULL,
+    status TEXT NOT NULL,
+    timestamp BIGINT NOT NULL,
+    resource_id TEXT,
+    params TEXT,
+    result TEXT,
+    error TEXT
+);
+
+CREATE INDEX IF NOT EXISTS scheduled_tasks_status ON scheduled_tasks(status);
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 073f682aca..e750417189 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -15,6 +15,7 @@
 import abc
 import re
 import string
+from enum import Enum
 from typing import (
     TYPE_CHECKING,
     AbstractSet,
@@ -969,3 +970,41 @@ class UserProfile(TypedDict):
 class RetentionPolicy:
     min_lifetime: Optional[int] = None
     max_lifetime: Optional[int] = None
+
+
+class TaskStatus(str, Enum):
+    """Status of a scheduled task"""
+
+    # Task is scheduled but not active
+    SCHEDULED = "scheduled"
+    # Task is active and probably running, and if not
+    # will be run on next scheduler loop run
+    ACTIVE = "active"
+    # Task has completed successfully
+    COMPLETE = "complete"
+    # Task is over and either returned a failed status, or had an exception
+    FAILED = "failed"
+
+
+@attr.s(auto_attribs=True, frozen=True, slots=True)
+class ScheduledTask:
+    """Description of a scheduled task"""
+
+    # Id used to identify the task
+    id: str
+    # Name of the action to be run by this task
+    action: str
+    # Current status of this task
+    status: TaskStatus
+    # If the status is SCHEDULED then this represents when it should be launched,
+    # otherwise it represents the last time this task got a change of state.
+    # In milliseconds since epoch in system time timezone, usually UTC.
+    timestamp: int
+    # Optionally bind a task to some resource id for easy retrieval
+    resource_id: Optional[str]
+    # Optional parameters that will be passed to the function ran by the task
+    params: Optional[JsonMapping]
+    # Optional result that can be updated by the running task
+    result: Optional[JsonMapping]
+    # Optional error that should be assigned a value when the status is FAILED
+    error: Optional[str]
diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
new file mode 100644
index 0000000000..773a8327f6
--- /dev/null
+++ b/synapse/util/task_scheduler.py
@@ -0,0 +1,364 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING, Awaitable, Callable, Dict, List, Optional, Set, Tuple
+
+from prometheus_client import Gauge
+
+from twisted.python.failure import Failure
+
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.types import JsonMapping, ScheduledTask, TaskStatus
+from synapse.util.stringutils import random_string
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+running_tasks_gauge = Gauge(
+    "synapse_scheduler_running_tasks",
+    "The number of concurrent running tasks handled by the TaskScheduler",
+)
+
+
+class TaskScheduler:
+    """
+    This is a simple task sheduler aimed at resumable tasks: usually we use `run_in_background`
+    to launch a background task, or Twisted `deferLater` if we want to do so later on.
+
+    The problem with that is that the tasks will just stop and never be resumed if synapse
+    is stopped for whatever reason.
+
+    How this works:
+    - A function mapped to a named action should first be registered with `register_action`.
+    This function will be called when trying to resuming tasks after a synapse shutdown,
+    so this registration should happen when synapse is initialised, NOT right before scheduling
+    a task.
+    - A task can then be launched using this named action with `schedule_task`. A `params` dict
+    can be passed, and it will be available to the registered function when launched. This task
+    can be launch either now-ish, or later on by giving a `timestamp` parameter.
+
+    The function may call `update_task` at any time to update the `result` of the task,
+    and this can be used to resume the task at a specific point and/or to convey a result to
+    the code launching the task.
+    You can also specify the `result` (and/or an `error`) when returning from the function.
+
+    The reconciliation loop runs every 5 mns, so this is not a precise scheduler. When wanting
+    to launch now, the launch will still not happen before the next loop run.
+
+    Tasks will be run on the worker specified with `run_background_tasks_on` config,
+    or the main one by default.
+    There is a limit of 10 concurrent tasks, so tasks may be delayed if the pool is already
+    full. In this regard, please take great care that scheduled tasks can actually finished.
+    For now there is no mechanism to stop a running task if it is stuck.
+    """
+
+    # Precision of the scheduler, evaluation of tasks to run will only happen
+    # every `SCHEDULE_INTERVAL_MS` ms
+    SCHEDULE_INTERVAL_MS = 1 * 60 * 1000  # 1mn
+    # Time before a complete or failed task is deleted from the DB
+    KEEP_TASKS_FOR_MS = 7 * 24 * 60 * 60 * 1000  # 1 week
+    # Maximum number of tasks that can run at the same time
+    MAX_CONCURRENT_RUNNING_TASKS = 10
+    # Time from the last task update after which we will log a warning
+    LAST_UPDATE_BEFORE_WARNING_MS = 24 * 60 * 60 * 1000  # 24hrs
+
+    def __init__(self, hs: "HomeServer"):
+        self._store = hs.get_datastores().main
+        self._clock = hs.get_clock()
+        self._running_tasks: Set[str] = set()
+        # A map between action names and their registered function
+        self._actions: Dict[
+            str,
+            Callable[
+                [ScheduledTask, bool],
+                Awaitable[Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]],
+            ],
+        ] = {}
+        self._run_background_tasks = hs.config.worker.run_background_tasks
+
+        if self._run_background_tasks:
+            self._clock.looping_call(
+                run_as_background_process,
+                TaskScheduler.SCHEDULE_INTERVAL_MS,
+                "handle_scheduled_tasks",
+                self._handle_scheduled_tasks,
+            )
+
+    def register_action(
+        self,
+        function: Callable[
+            [ScheduledTask, bool],
+            Awaitable[Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]],
+        ],
+        action_name: str,
+    ) -> None:
+        """Register a function to be executed when an action is scheduled with
+        the specified action name.
+
+        Actions need to be registered as early as possible so that a resumed action
+        can find its matching function. It's usually better to NOT do that right before
+        calling `schedule_task` but rather in an `__init__` method.
+
+        Args:
+            function: The function to be executed for this action. The parameters
+                passed to the function when launched are the `ScheduledTask` being run,
+                and a `first_launch` boolean to signal if it's a resumed task or the first
+                launch of it. The function should return a tuple of new `status`, `result`
+                and `error` as specified in `ScheduledTask`.
+            action_name: The name of the action to be associated with the function
+        """
+        self._actions[action_name] = function
+
+    async def schedule_task(
+        self,
+        action: str,
+        *,
+        resource_id: Optional[str] = None,
+        timestamp: Optional[int] = None,
+        params: Optional[JsonMapping] = None,
+    ) -> str:
+        """Schedule a new potentially resumable task. A function matching the specified
+        `action` should have been previously registered with `register_action`.
+
+        Args:
+            action: the name of a previously registered action
+            resource_id: a task can be associated with a resource id to facilitate
+                getting all tasks associated with a specific resource
+            timestamp: if `None`, the task will be launched as soon as possible, otherwise it
+                will be launch as soon as possible after the `timestamp` value.
+                Note that this scheduler is not meant to be precise, and the scheduling
+                could be delayed if too many tasks are already running
+            params: a set of parameters that can be easily accessed from inside the
+                executed function
+
+        Returns:
+            The id of the scheduled task
+        """
+        if action not in self._actions:
+            raise Exception(
+                f"No function associated with action {action} of the scheduled task"
+            )
+
+        if timestamp is None or timestamp < self._clock.time_msec():
+            timestamp = self._clock.time_msec()
+
+        task = ScheduledTask(
+            random_string(16),
+            action,
+            TaskStatus.SCHEDULED,
+            timestamp,
+            resource_id,
+            params,
+            result=None,
+            error=None,
+        )
+        await self._store.insert_scheduled_task(task)
+
+        return task.id
+
+    async def update_task(
+        self,
+        id: str,
+        *,
+        timestamp: Optional[int] = None,
+        status: Optional[TaskStatus] = None,
+        result: Optional[JsonMapping] = None,
+        error: Optional[str] = None,
+    ) -> bool:
+        """Update some task associated values. This is exposed publically so it can
+        be used inside task functions, mainly to update the result and be able to
+        resume a task at a specific step after a restart of synapse.
+
+        It can also be used to stage a task, by setting the `status` to `SCHEDULED` with
+        a new timestamp.
+
+        The `status` can only be set to `ACTIVE` or `SCHEDULED`, `COMPLETE` and `FAILED`
+        are terminal status and can only be set by returning it in the function.
+
+        Args:
+            id: the id of the task to update
+            timestamp: useful to schedule a new stage of the task at a later date
+            status: the new `TaskStatus` of the task
+            result: the new result of the task
+            error: the new error of the task
+        """
+        if status == TaskStatus.COMPLETE or status == TaskStatus.FAILED:
+            raise Exception(
+                "update_task can't be called with a FAILED or COMPLETE status"
+            )
+
+        if timestamp is None:
+            timestamp = self._clock.time_msec()
+        return await self._store.update_scheduled_task(
+            id,
+            timestamp,
+            status=status,
+            result=result,
+            error=error,
+        )
+
+    async def get_task(self, id: str) -> Optional[ScheduledTask]:
+        """Get a specific task description by id.
+
+        Args:
+            id: the id of the task to retrieve
+
+        Returns:
+            The task information or `None` if it doesn't exist or it has
+            already been removed because it's too old.
+        """
+        return await self._store.get_scheduled_task(id)
+
+    async def get_tasks(
+        self,
+        *,
+        actions: Optional[List[str]] = None,
+        resource_id: Optional[str] = None,
+        statuses: Optional[List[TaskStatus]] = None,
+        max_timestamp: Optional[int] = None,
+    ) -> List[ScheduledTask]:
+        """Get a list of tasks. Returns all the tasks if no args is provided.
+
+        If an arg is `None` all tasks matching the other args will be selected.
+        If an arg is an empty list, the corresponding value of the task needs
+        to be `None` to be selected.
+
+        Args:
+            actions: Limit the returned tasks to those specific action names
+            resource_id: Limit the returned tasks to the specific resource id, if specified
+            statuses: Limit the returned tasks to the specific statuses
+            max_timestamp: Limit the returned tasks to the ones that have
+                a timestamp inferior to the specified one
+
+        Returns
+            A list of `ScheduledTask`, ordered by increasing timestamps
+        """
+        return await self._store.get_scheduled_tasks(
+            actions=actions,
+            resource_id=resource_id,
+            statuses=statuses,
+            max_timestamp=max_timestamp,
+        )
+
+    async def delete_task(self, id: str) -> None:
+        """Delete a task. Running tasks can't be deleted.
+
+        Can only be called from the worker handling the task scheduling.
+
+        Args:
+            id: id of the task to delete
+        """
+        if self.task_is_running(id):
+            raise Exception(f"Task {id} is currently running and can't be deleted")
+        await self._store.delete_scheduled_task(id)
+
+    def task_is_running(self, id: str) -> bool:
+        """Check if a task is currently running.
+
+        Can only be called from the worker handling the task scheduling.
+
+        Args:
+            id: id of the task to check
+        """
+        assert self._run_background_tasks
+        return id in self._running_tasks
+
+    async def _handle_scheduled_tasks(self) -> None:
+        """Main loop taking care of launching tasks and cleaning up old ones."""
+        await self._launch_scheduled_tasks()
+        await self._clean_scheduled_tasks()
+
+    async def _launch_scheduled_tasks(self) -> None:
+        """Retrieve and launch scheduled tasks that should be running at that time."""
+        for task in await self.get_tasks(statuses=[TaskStatus.ACTIVE]):
+            if not self.task_is_running(task.id):
+                if (
+                    len(self._running_tasks)
+                    < TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS
+                ):
+                    await self._launch_task(task, first_launch=False)
+            else:
+                if (
+                    self._clock.time_msec()
+                    > task.timestamp + TaskScheduler.LAST_UPDATE_BEFORE_WARNING_MS
+                ):
+                    logger.warn(
+                        f"Task {task.id} (action {task.action}) has seen no update for more than 24h and may be stuck"
+                    )
+        for task in await self.get_tasks(
+            statuses=[TaskStatus.SCHEDULED], max_timestamp=self._clock.time_msec()
+        ):
+            if (
+                not self.task_is_running(task.id)
+                and len(self._running_tasks)
+                < TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS
+            ):
+                await self._launch_task(task, first_launch=True)
+
+        running_tasks_gauge.set(len(self._running_tasks))
+
+    async def _clean_scheduled_tasks(self) -> None:
+        """Clean old complete or failed jobs to avoid clutter the DB."""
+        for task in await self._store.get_scheduled_tasks(
+            statuses=[TaskStatus.FAILED, TaskStatus.COMPLETE]
+        ):
+            # FAILED and COMPLETE tasks should never be running
+            assert not self.task_is_running(task.id)
+            if (
+                self._clock.time_msec()
+                > task.timestamp + TaskScheduler.KEEP_TASKS_FOR_MS
+            ):
+                await self._store.delete_scheduled_task(task.id)
+
+    async def _launch_task(self, task: ScheduledTask, first_launch: bool) -> None:
+        """Launch a scheduled task now.
+
+        Args:
+            task: the task to launch
+            first_launch: `True` if it's the first time is launched, `False` otherwise
+        """
+        assert task.action in self._actions
+
+        function = self._actions[task.action]
+
+        async def wrapper() -> None:
+            try:
+                (status, result, error) = await function(task, first_launch)
+            except Exception:
+                f = Failure()
+                logger.error(
+                    f"scheduled task {task.id} failed",
+                    exc_info=(f.type, f.value, f.getTracebackObject()),
+                )
+                status = TaskStatus.FAILED
+                result = None
+                error = f.getErrorMessage()
+
+            await self._store.update_scheduled_task(
+                task.id,
+                self._clock.time_msec(),
+                status=status,
+                result=result,
+                error=error,
+            )
+            self._running_tasks.remove(task.id)
+
+        self._running_tasks.add(task.id)
+        await self.update_task(task.id, status=TaskStatus.ACTIVE)
+        description = f"{task.id}-{task.action}"
+        run_as_background_process(description, wrapper)
diff --git a/tests/util/test_task_scheduler.py b/tests/util/test_task_scheduler.py
new file mode 100644
index 0000000000..3a97559bf0
--- /dev/null
+++ b/tests/util/test_task_scheduler.py
@@ -0,0 +1,186 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Tuple
+
+from twisted.internet.task import deferLater
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.types import JsonMapping, ScheduledTask, TaskStatus
+from synapse.util import Clock
+from synapse.util.task_scheduler import TaskScheduler
+
+from tests import unittest
+
+
+class TestTaskScheduler(unittest.HomeserverTestCase):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.task_scheduler = hs.get_task_scheduler()
+        self.task_scheduler.register_action(self._test_task, "_test_task")
+        self.task_scheduler.register_action(self._sleeping_task, "_sleeping_task")
+        self.task_scheduler.register_action(self._raising_task, "_raising_task")
+        self.task_scheduler.register_action(self._resumable_task, "_resumable_task")
+
+    async def _test_task(
+        self, task: ScheduledTask, first_launch: bool
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        # This test task will copy the parameters to the result
+        result = None
+        if task.params:
+            result = task.params
+        return (TaskStatus.COMPLETE, result, None)
+
+    def test_schedule_task(self) -> None:
+        """Schedule a task in the future with some parameters to be copied as a result and check it executed correctly.
+        Also check that it get removed after `KEEP_TASKS_FOR_MS`."""
+        timestamp = self.clock.time_msec() + 30 * 1000
+        task_id = self.get_success(
+            self.task_scheduler.schedule_task(
+                "_test_task",
+                timestamp=timestamp,
+                params={"val": 1},
+            )
+        )
+
+        task = self.get_success(self.task_scheduler.get_task(task_id))
+        assert task is not None
+        self.assertEqual(task.status, TaskStatus.SCHEDULED)
+        self.assertIsNone(task.result)
+
+        # The timestamp being 30s after now the task should been executed
+        # after the first scheduling loop is run
+        self.reactor.advance(TaskScheduler.SCHEDULE_INTERVAL_MS / 1000)
+
+        task = self.get_success(self.task_scheduler.get_task(task_id))
+        assert task is not None
+        self.assertEqual(task.status, TaskStatus.COMPLETE)
+        assert task.result is not None
+        # The passed parameter should have been copied to the result
+        self.assertTrue(task.result.get("val") == 1)
+
+        # Let's wait for the complete task to be deleted and hence unavailable
+        self.reactor.advance((TaskScheduler.KEEP_TASKS_FOR_MS / 1000) + 1)
+
+        task = self.get_success(self.task_scheduler.get_task(task_id))
+        self.assertIsNone(task)
+
+    async def _sleeping_task(
+        self, task: ScheduledTask, first_launch: bool
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        # Sleep for a second
+        await deferLater(self.reactor, 1, lambda: None)
+        return TaskStatus.COMPLETE, None, None
+
+    def test_schedule_lot_of_tasks(self) -> None:
+        """Schedule more than `TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS` tasks and check the behavior."""
+        timestamp = self.clock.time_msec() + 30 * 1000
+        task_ids = []
+        for i in range(TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS + 1):
+            task_ids.append(
+                self.get_success(
+                    self.task_scheduler.schedule_task(
+                        "_sleeping_task",
+                        timestamp=timestamp,
+                        params={"val": i},
+                    )
+                )
+            )
+
+        # The timestamp being 30s after now the task should been executed
+        # after the first scheduling loop is run
+        self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
+
+        # This is to give the time to the sleeping tasks to finish
+        self.reactor.advance(1)
+
+        # Check that only MAX_CONCURRENT_RUNNING_TASKS tasks has run and that one
+        # is still scheduled.
+        tasks = [
+            self.get_success(self.task_scheduler.get_task(task_id))
+            for task_id in task_ids
+        ]
+
+        self.assertEquals(
+            len(
+                [t for t in tasks if t is not None and t.status == TaskStatus.COMPLETE]
+            ),
+            TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS,
+        )
+
+        scheduled_tasks = [
+            t for t in tasks if t is not None and t.status == TaskStatus.SCHEDULED
+        ]
+        self.assertEquals(len(scheduled_tasks), 1)
+
+        self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
+        self.reactor.advance(1)
+
+        # Check that the last task has been properly executed after the next scheduler loop run
+        prev_scheduled_task = self.get_success(
+            self.task_scheduler.get_task(scheduled_tasks[0].id)
+        )
+        assert prev_scheduled_task is not None
+        self.assertEquals(
+            prev_scheduled_task.status,
+            TaskStatus.COMPLETE,
+        )
+
+    async def _raising_task(
+        self, task: ScheduledTask, first_launch: bool
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        raise Exception("raising")
+
+    def test_schedule_raising_task(self) -> None:
+        """Schedule a task raising an exception and check it runs to failure and report exception content."""
+        task_id = self.get_success(self.task_scheduler.schedule_task("_raising_task"))
+
+        self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
+
+        task = self.get_success(self.task_scheduler.get_task(task_id))
+        assert task is not None
+        self.assertEqual(task.status, TaskStatus.FAILED)
+        self.assertEqual(task.error, "raising")
+
+    async def _resumable_task(
+        self, task: ScheduledTask, first_launch: bool
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        if task.result and "in_progress" in task.result:
+            return TaskStatus.COMPLETE, {"success": True}, None
+        else:
+            await self.task_scheduler.update_task(task.id, result={"in_progress": True})
+            # Await forever to simulate an aborted task because of a restart
+            await deferLater(self.reactor, 2**16, lambda: None)
+            # This should never been called
+            return TaskStatus.ACTIVE, None, None
+
+    def test_schedule_resumable_task(self) -> None:
+        """Schedule a resumable task and check that it gets properly resumed and complete after simulating a synapse restart."""
+        task_id = self.get_success(self.task_scheduler.schedule_task("_resumable_task"))
+
+        self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
+
+        task = self.get_success(self.task_scheduler.get_task(task_id))
+        assert task is not None
+        self.assertEqual(task.status, TaskStatus.ACTIVE)
+
+        # Simulate a synapse restart by emptying the list of running tasks
+        self.task_scheduler._running_tasks = set()
+        self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
+
+        task = self.get_success(self.task_scheduler.get_task(task_id))
+        assert task is not None
+        self.assertEqual(task.status, TaskStatus.COMPLETE)
+        assert task.result is not None
+        self.assertTrue(task.result.get("success"))
-- 
cgit 1.5.1


From d6ae4041a4c014a8c234f1afccc80867bf5b7df0 Mon Sep 17 00:00:00 2001
From: Maximilian Bosch <maximilian@mbosch.me>
Date: Mon, 21 Aug 2023 21:32:17 +0200
Subject: Add `client_secret_path` as alternative for `client_secret` for OIDC
 config (#16030)

---
 changelog.d/16030.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  8 ++++++++
 synapse/config/oidc.py                           | 16 +++++++++++++++-
 3 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16030.feature

(limited to 'synapse')

diff --git a/changelog.d/16030.feature b/changelog.d/16030.feature
new file mode 100644
index 0000000000..c2f068085f
--- /dev/null
+++ b/changelog.d/16030.feature
@@ -0,0 +1 @@
+Allow specifying `client_secret_path` as alternative to `client_secret` for OIDC providers. This avoids leaking the client secret in the homeserver config. Contributed by @Ma27.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 6601bba9f2..743c51d76a 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3204,6 +3204,14 @@ Options for each entry include:
 
 * `client_secret`: oauth2 client secret to use. May be omitted if
   `client_secret_jwt_key` is given, or if `client_auth_method` is 'none'.
+  Must be omitted if `client_secret_path` is specified.
+
+* `client_secret_path`: path to the oauth2 client secret to use. With that
+   it's not necessary to leak secrets into the config file itself.
+   Mutually exclusive with `client_secret`. Can be omitted if
+   `client_secret_jwt_key` is specified.
+
+   *Added in Synapse 1.91.0.*
 
 * `client_secret_jwt_key`: Alternative to client_secret: details of a key used
    to create a JSON Web Token to be used as an OAuth2 client secret. If
diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py
index 77c1d1dc8e..574d6afb95 100644
--- a/synapse/config/oidc.py
+++ b/synapse/config/oidc.py
@@ -280,6 +280,20 @@ def _parse_oidc_config_dict(
         for x in oidc_config.get("attribute_requirements", [])
     ]
 
+    # Read from either `client_secret_path` or `client_secret`. If both exist, error.
+    client_secret = oidc_config.get("client_secret")
+    client_secret_path = oidc_config.get("client_secret_path")
+    if client_secret_path is not None:
+        if client_secret is None:
+            client_secret = read_file(
+                client_secret_path, config_path + ("client_secret_path",)
+            ).rstrip("\n")
+        else:
+            raise ConfigError(
+                "Cannot specify both client_secret and client_secret_path",
+                config_path + ("client_secret",),
+            )
+
     return OidcProviderConfig(
         idp_id=idp_id,
         idp_name=oidc_config.get("idp_name", "OIDC"),
@@ -288,7 +302,7 @@ def _parse_oidc_config_dict(
         discover=oidc_config.get("discover", True),
         issuer=oidc_config["issuer"],
         client_id=oidc_config["client_id"],
-        client_secret=oidc_config.get("client_secret"),
+        client_secret=client_secret,
         client_secret_jwt_key=client_secret_jwt_key,
         client_auth_method=oidc_config.get("client_auth_method", "client_secret_basic"),
         pkce_method=oidc_config.get("pkce_method", "auto"),
-- 
cgit 1.5.1


From 7dbac123f98a2d59d09a63efe4543ee850a8d630 Mon Sep 17 00:00:00 2001
From: Hugh Nimmo-Smith <hughns@users.noreply.github.com>
Date: Tue, 22 Aug 2023 12:42:08 +0100
Subject: Disallow user_consent where experimental MSC3861 is enabled (#16127)

---
 changelog.d/16127.bugfix              |  1 +
 synapse/config/experimental.py        |  7 +++++++
 tests/config/test_oauth_delegation.py | 16 ++++++++++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 changelog.d/16127.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16127.bugfix b/changelog.d/16127.bugfix
new file mode 100644
index 0000000000..0308fdfd45
--- /dev/null
+++ b/changelog.d/16127.bugfix
@@ -0,0 +1 @@
+User consent features cannot be enabled when using experimental MSC3861.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index ac9449b18f..d4cf9a0555 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -173,6 +173,13 @@ class MSC3861:
                 ("enable_registration",),
             )
 
+        # We only need to test the user consent version, as if it must be set if the user_consent section was present in the config
+        if root.consent.user_consent_version is not None:
+            raise ConfigError(
+                "User consent cannot be enabled when OAuth delegation is enabled",
+                ("user_consent",),
+            )
+
         if (
             root.oidc.oidc_enabled
             or root.saml2.saml2_enabled
diff --git a/tests/config/test_oauth_delegation.py b/tests/config/test_oauth_delegation.py
index f57c813a58..35f7b85dc7 100644
--- a/tests/config/test_oauth_delegation.py
+++ b/tests/config/test_oauth_delegation.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 from unittest.mock import Mock
 
 from synapse.config import ConfigError
@@ -167,6 +168,21 @@ class MSC3861OAuthDelegation(TestCase):
         with self.assertRaises(ConfigError):
             self.parse_config()
 
+    def test_user_consent_cannot_be_enabled(self) -> None:
+        tmpdir = self.mktemp()
+        os.mkdir(tmpdir)
+        self.config_dict["user_consent"] = {
+            "require_at_registration": True,
+            "version": "1",
+            "template_dir": tmpdir,
+            "server_notice_content": {
+                "msgtype": "m.text",
+                "body": "foo",
+            },
+        }
+        with self.assertRaises(ConfigError):
+            self.parse_config()
+
     def test_password_config_cannot_be_enabled(self) -> None:
         self.config_dict["password_config"] = {"enabled": True}
         with self.assertRaises(ConfigError):
-- 
cgit 1.5.1


From 69048f7b4848ab6a4ae6cb233f8cbf36d73c0ba1 Mon Sep 17 00:00:00 2001
From: Shay <hillerys@element.io>
Date: Tue, 22 Aug 2023 07:15:34 -0700
Subject: Add an admin endpoint to allow authorizing server to signal token
 revocations (#16125)

---
 changelog.d/16125.misc                             |  1 +
 synapse/api/auth/msc3861_delegated.py              | 13 +++++
 synapse/replication/tcp/client.py                  | 12 +++++
 synapse/rest/admin/__init__.py                     |  3 ++
 synapse/rest/admin/oidc.py                         | 55 +++++++++++++++++++
 synapse/storage/databases/main/cache.py            | 13 +++++
 synapse/storage/databases/main/devices.py          |  9 ++++
 synapse/util/caches/expiringcache.py               | 22 ++++++++
 tests/handlers/test_oauth_delegation.py            | 34 +++++++++++-
 tests/replication/test_intro_token_invalidation.py | 62 ++++++++++++++++++++++
 10 files changed, 223 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16125.misc
 create mode 100644 synapse/rest/admin/oidc.py
 create mode 100644 tests/replication/test_intro_token_invalidation.py

(limited to 'synapse')

diff --git a/changelog.d/16125.misc b/changelog.d/16125.misc
new file mode 100644
index 0000000000..2f1bf23108
--- /dev/null
+++ b/changelog.d/16125.misc
@@ -0,0 +1 @@
+Add an admin endpoint to allow authorizing server to signal token revocations.
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 4bdfe31b22..14cba50c90 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -438,3 +438,16 @@ class MSC3861DelegatedAuth(BaseAuth):
             scope=scope,
             is_guest=(has_guest_scope and not has_user_scope),
         )
+
+    def invalidate_cached_tokens(self, keys: List[str]) -> None:
+        """
+        Invalidate the entry(s) in the introspection token cache corresponding to the given key
+        """
+        for key in keys:
+            self._token_cache.invalidate(key)
+
+    def invalidate_token_cache(self) -> None:
+        """
+        Invalidate the entire token cache.
+        """
+        self._token_cache.invalidate_all()
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 139f57cf86..04e8cff6ea 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -26,6 +26,7 @@ from synapse.logging.context import PreserveLoggingContext, make_deferred_yielda
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.tcp.streams import (
     AccountDataStream,
+    CachesStream,
     DeviceListsStream,
     PushersStream,
     PushRulesStream,
@@ -73,6 +74,7 @@ class ReplicationDataHandler:
         self._instance_name = hs.get_instance_name()
         self._typing_handler = hs.get_typing_handler()
         self._state_storage_controller = hs.get_storage_controllers().state
+        self.auth = hs.get_auth()
 
         self._notify_pushers = hs.config.worker.start_pushers
         self._pusher_pool = hs.get_pusherpool()
@@ -218,6 +220,16 @@ class ReplicationDataHandler:
                 self._state_storage_controller.notify_event_un_partial_stated(
                     row.event_id
                 )
+        # invalidate the introspection token cache
+        elif stream_name == CachesStream.NAME:
+            for row in rows:
+                if row.cache_func == "introspection_token_invalidation":
+                    if row.keys[0] is None:
+                        # invalidate the whole cache
+                        # mypy ignore - the token cache is defined on MSC3861DelegatedAuth
+                        self.auth.invalidate_token_cache()  # type: ignore[attr-defined]
+                    else:
+                        self.auth.invalidate_cached_tokens(row.keys)  # type: ignore[attr-defined]
 
         await self._presence_handler.process_replication_rows(
             stream_name, instance_name, token, rows
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index fe8177ed4d..55e752fda8 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -47,6 +47,7 @@ from synapse.rest.admin.federation import (
     ListDestinationsRestServlet,
 )
 from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo
+from synapse.rest.admin.oidc import OIDCTokenRevocationRestServlet
 from synapse.rest.admin.registration_tokens import (
     ListRegistrationTokensRestServlet,
     NewRegistrationTokenRestServlet,
@@ -297,6 +298,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     BackgroundUpdateRestServlet(hs).register(http_server)
     BackgroundUpdateStartJobRestServlet(hs).register(http_server)
     ExperimentalFeaturesRestServlet(hs).register(http_server)
+    if hs.config.experimental.msc3861.enabled:
+        OIDCTokenRevocationRestServlet(hs).register(http_server)
 
 
 def register_servlets_for_client_rest_resource(
diff --git a/synapse/rest/admin/oidc.py b/synapse/rest/admin/oidc.py
new file mode 100644
index 0000000000..64d2d40550
--- /dev/null
+++ b/synapse/rest/admin/oidc.py
@@ -0,0 +1,55 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from http import HTTPStatus
+from typing import TYPE_CHECKING, Dict, Tuple
+
+from synapse.http.servlet import RestServlet
+from synapse.http.site import SynapseRequest
+from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+
+class OIDCTokenRevocationRestServlet(RestServlet):
+    """
+    Delete a given token introspection response - identified by the `jti` field - from the
+    introspection token cache when a token is revoked at the authorizing server
+    """
+
+    PATTERNS = admin_patterns("/OIDC_token_revocation/(?P<token_id>[^/]*)")
+
+    def __init__(self, hs: "HomeServer"):
+        super().__init__()
+        auth = hs.get_auth()
+
+        # If this endpoint is loaded then we must have enabled delegated auth.
+        from synapse.api.auth.msc3861_delegated import MSC3861DelegatedAuth
+
+        assert isinstance(auth, MSC3861DelegatedAuth)
+
+        self.auth = auth
+        self.store = hs.get_datastores().main
+
+    async def on_DELETE(
+        self, request: SynapseRequest, token_id: str
+    ) -> Tuple[HTTPStatus, Dict]:
+        await assert_requester_is_admin(self.auth, request)
+
+        self.auth._token_cache.invalidate(token_id)
+
+        # make sure we invalidate the cache on any workers
+        await self.store.stream_introspection_token_invalidation((token_id,))
+
+        return HTTPStatus.OK, {}
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 2fbd389c71..18905e07b6 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -584,6 +584,19 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         else:
             return 0
 
+    async def stream_introspection_token_invalidation(
+        self, key: Tuple[Optional[str]]
+    ) -> None:
+        """
+        Stream an invalidation request for the introspection token cache to workers
+
+        Args:
+            key: token_id of the introspection token to remove from the cache
+        """
+        await self.send_invalidation_to_replication(
+            "introspection_token_invalidation", key
+        )
+
     @wrap_as_background_process("clean_up_old_cache_invalidations")
     async def _clean_up_cache_invalidation_wrapper(self) -> None:
         """
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index e4162f846b..fa69a4a298 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -33,6 +33,7 @@ from typing_extensions import Literal
 
 from synapse.api.constants import EduTypes
 from synapse.api.errors import Codes, StoreError
+from synapse.config.homeserver import HomeServerConfig
 from synapse.logging.opentracing import (
     get_active_span_text_map,
     set_tag,
@@ -1663,6 +1664,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         self.device_id_exists_cache: LruCache[
             Tuple[str, str], Literal[True]
         ] = LruCache(cache_name="device_id_exists", max_size=10000)
+        self.config: HomeServerConfig = hs.config
 
     async def store_device(
         self,
@@ -1784,6 +1786,13 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
+        # TODO: don't nuke the entire cache once there is a way to associate
+        #  device_id -> introspection_token
+        if self.config.experimental.msc3861.enabled:
+            # mypy ignore - the token cache is defined on MSC3861DelegatedAuth
+            self.auth._token_cache.invalidate_all()  # type: ignore[attr-defined]
+            await self.stream_introspection_token_invalidation((None,))
+
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index 01ad02af67..9a3e10ddee 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -140,6 +140,20 @@ class ExpiringCache(Generic[KT, VT]):
 
         return value.value
 
+    def invalidate(self, key: KT) -> None:
+        """
+        Remove the given key from the cache.
+        """
+
+        value = self._cache.pop(key, None)
+        if value:
+            if self.iterable:
+                self.metrics.inc_evictions(
+                    EvictionReason.invalidation, len(value.value)
+                )
+            else:
+                self.metrics.inc_evictions(EvictionReason.invalidation)
+
     def __contains__(self, key: KT) -> bool:
         return key in self._cache
 
@@ -193,6 +207,14 @@ class ExpiringCache(Generic[KT, VT]):
             len(self),
         )
 
+    def invalidate_all(self) -> None:
+        """
+        Remove all items from the cache.
+        """
+        keys = set(self._cache.keys())
+        for key in keys:
+            self._cache.pop(key)
+
     def __len__(self) -> int:
         if self.iterable:
             return sum(len(entry.value) for entry in self._cache.values())
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index 1456b675a7..b891e84690 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -14,7 +14,7 @@
 
 from http import HTTPStatus
 from typing import Any, Dict, Union
-from unittest.mock import ANY, Mock
+from unittest.mock import ANY, AsyncMock, Mock
 from urllib.parse import parse_qs
 
 from signedjson.key import (
@@ -588,6 +588,38 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         )
         self.assertEqual(self.http_client.request.call_count, 2)
 
+    def test_revocation_endpoint(self) -> None:
+        # mock introspection response and then admin verification response
+        self.http_client.request = AsyncMock(
+            side_effect=[
+                FakeResponse.json(
+                    code=200, payload={"active": True, "jti": "open_sesame"}
+                ),
+                FakeResponse.json(
+                    code=200,
+                    payload={
+                        "active": True,
+                        "sub": SUBJECT,
+                        "scope": " ".join([SYNAPSE_ADMIN_SCOPE, MATRIX_USER_SCOPE]),
+                        "username": USERNAME,
+                    },
+                ),
+            ]
+        )
+
+        # cache a token to delete
+        introspection_token = self.get_success(
+            self.auth._introspect_token("open_sesame")  # type: ignore[attr-defined]
+        )
+        self.assertEqual(self.auth._token_cache.get("open_sesame"), introspection_token)  # type: ignore[attr-defined]
+
+        # delete the revoked token
+        introspection_token_id = "open_sesame"
+        url = f"/_synapse/admin/v1/OIDC_token_revocation/{introspection_token_id}"
+        channel = self.make_request("DELETE", url, access_token="mockAccessToken")
+        self.assertEqual(channel.code, 200)
+        self.assertEqual(self.auth._token_cache.get("open_sesame"), None)  # type: ignore[attr-defined]
+
     def make_device_keys(self, user_id: str, device_id: str) -> JsonDict:
         # We only generate a master key to simplify the test.
         master_signing_key = generate_signing_key(device_id)
diff --git a/tests/replication/test_intro_token_invalidation.py b/tests/replication/test_intro_token_invalidation.py
new file mode 100644
index 0000000000..f90678b6b1
--- /dev/null
+++ b/tests/replication/test_intro_token_invalidation.py
@@ -0,0 +1,62 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict
+
+import synapse.rest.admin._base
+
+from tests.replication._base import BaseMultiWorkerStreamTestCase
+
+
+class IntrospectionTokenCacheInvalidationTestCase(BaseMultiWorkerStreamTestCase):
+    servlets = [synapse.rest.admin.register_servlets]
+
+    def default_config(self) -> Dict[str, Any]:
+        config = super().default_config()
+        config["disable_registration"] = True
+        config["experimental_features"] = {
+            "msc3861": {
+                "enabled": True,
+                "issuer": "some_dude",
+                "client_id": "ID",
+                "client_auth_method": "client_secret_post",
+                "client_secret": "secret",
+            }
+        }
+        return config
+
+    def test_stream_introspection_token_invalidation(self) -> None:
+        worker_hs = self.make_worker_hs("synapse.app.generic_worker")
+        auth = worker_hs.get_auth()
+        store = self.hs.get_datastores().main
+
+        # add a token to the cache on the worker
+        auth._token_cache["open_sesame"] = "intro_token"  # type: ignore[attr-defined]
+
+        # stream the invalidation from the master
+        self.get_success(
+            store.stream_introspection_token_invalidation(("open_sesame",))
+        )
+
+        # check that the cache on the worker was invalidated
+        self.assertEqual(auth._token_cache.get("open_sesame"), None)  # type: ignore[attr-defined]
+
+        # test invalidating whole cache
+        for i in range(0, 5):
+            auth._token_cache[f"open_sesame_{i}"] = f"intro_token_{i}"  # type: ignore[attr-defined]
+        self.assertEqual(len(auth._token_cache), 5)  # type: ignore[attr-defined]
+
+        self.get_success(store.stream_introspection_token_invalidation((None,)))
+
+        self.assertEqual(len(auth._token_cache), 0)  # type: ignore[attr-defined]
-- 
cgit 1.5.1


From 0ba17777be81ba9457defb407112b664042a14d2 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Tue, 22 Aug 2023 16:47:59 +0200
Subject: Disable `m.3pid_changes` capability when MSC3861 is enabled. (#16134)

---
 changelog.d/16127.bugfix              |  2 +-
 changelog.d/16134.bugfix              |  1 +
 synapse/config/experimental.py        |  6 ++++++
 synapse/config/registration.py        | 11 ++++++++++-
 tests/config/test_oauth_delegation.py |  5 +++++
 5 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16134.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16127.bugfix b/changelog.d/16127.bugfix
index 0308fdfd45..9ce5f4a705 100644
--- a/changelog.d/16127.bugfix
+++ b/changelog.d/16127.bugfix
@@ -1 +1 @@
-User consent features cannot be enabled when using experimental MSC3861.
+User constent and 3-PID changes capability cannot be enabled when using experimental [MSC3861](https://github.com/matrix-org/matrix-spec-proposals/pull/3861) support.
diff --git a/changelog.d/16134.bugfix b/changelog.d/16134.bugfix
new file mode 100644
index 0000000000..9ce5f4a705
--- /dev/null
+++ b/changelog.d/16134.bugfix
@@ -0,0 +1 @@
+User constent and 3-PID changes capability cannot be enabled when using experimental [MSC3861](https://github.com/matrix-org/matrix-spec-proposals/pull/3861) support.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index d4cf9a0555..277ea4675b 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -223,6 +223,12 @@ class MSC3861:
                 ("session_lifetime",),
             )
 
+        if root.registration.enable_3pid_changes:
+            raise ConfigError(
+                "enable_3pid_changes cannot be enabled when OAuth delegation is enabled",
+                ("enable_3pid_changes",),
+            )
+
 
 @attr.s(auto_attribs=True, frozen=True, slots=True)
 class MSC3866Config:
diff --git a/synapse/config/registration.py b/synapse/config/registration.py
index df1d83dfaa..b8ad6fbc06 100644
--- a/synapse/config/registration.py
+++ b/synapse/config/registration.py
@@ -133,7 +133,16 @@ class RegistrationConfig(Config):
 
         self.enable_set_displayname = config.get("enable_set_displayname", True)
         self.enable_set_avatar_url = config.get("enable_set_avatar_url", True)
-        self.enable_3pid_changes = config.get("enable_3pid_changes", True)
+
+        # The default value of enable_3pid_changes is True, unless msc3861 is enabled.
+        msc3861_enabled = (
+            (config.get("experimental_features") or {})
+            .get("msc3861", {})
+            .get("enabled", False)
+        )
+        self.enable_3pid_changes = config.get(
+            "enable_3pid_changes", not msc3861_enabled
+        )
 
         self.disable_msisdn_registration = config.get(
             "disable_msisdn_registration", False
diff --git a/tests/config/test_oauth_delegation.py b/tests/config/test_oauth_delegation.py
index 35f7b85dc7..5c91031746 100644
--- a/tests/config/test_oauth_delegation.py
+++ b/tests/config/test_oauth_delegation.py
@@ -271,3 +271,8 @@ class MSC3861OAuthDelegation(TestCase):
         self.config_dict["session_lifetime"] = "24h"
         with self.assertRaises(ConfigError):
             self.parse_config()
+
+    def test_enable_3pid_changes_cannot_be_enabled(self) -> None:
+        self.config_dict["enable_3pid_changes"] = True
+        with self.assertRaises(ConfigError):
+            self.parse_config()
-- 
cgit 1.5.1


From 803f63df1c52237a23cb68c1b2a8402200a7216d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Tue, 22 Aug 2023 16:11:22 +0100
Subject: Fix perf of `wait_for_stream_positions` (#16148)

---
 changelog.d/16148.bugfix          |  1 +
 synapse/replication/tcp/client.py | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/16148.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16148.bugfix b/changelog.d/16148.bugfix
new file mode 100644
index 0000000000..fea316f856
--- /dev/null
+++ b/changelog.d/16148.bugfix
@@ -0,0 +1 @@
+Fix performance degredation when there are a lot of in-flight replication requests.
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 04e8cff6ea..3b88dc68ea 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -14,7 +14,9 @@
 """A replication client for use by synapse workers.
 """
 import logging
-from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Dict, Iterable, Optional, Set, Tuple
+
+from sortedcontainers import SortedList
 
 from twisted.internet import defer
 from twisted.internet.defer import Deferred
@@ -86,7 +88,9 @@ class ReplicationDataHandler:
 
         # Map from stream and instance to list of deferreds waiting for the stream to
         # arrive at a particular position. The lists are sorted by stream position.
-        self._streams_to_waiters: Dict[Tuple[str, str], List[Tuple[int, Deferred]]] = {}
+        self._streams_to_waiters: Dict[
+            Tuple[str, str], SortedList[Tuple[int, Deferred]]
+        ] = {}
 
     async def on_rdata(
         self, stream_name: str, instance_name: str, token: int, rows: list
@@ -238,7 +242,9 @@ class ReplicationDataHandler:
         # Notify any waiting deferreds. The list is ordered by position so we
         # just iterate through the list until we reach a position that is
         # greater than the received row position.
-        waiting_list = self._streams_to_waiters.get((stream_name, instance_name), [])
+        waiting_list = self._streams_to_waiters.get((stream_name, instance_name))
+        if not waiting_list:
+            return
 
         # Index of first item with a position after the current token, i.e we
         # have called all deferreds before this index. If not overwritten by
@@ -262,7 +268,7 @@ class ReplicationDataHandler:
 
         # Drop all entries in the waiting list that were called in the above
         # loop. (This maintains the order so no need to resort)
-        waiting_list[:] = waiting_list[index_of_first_deferred_not_called:]
+        del waiting_list[:index_of_first_deferred_not_called]
 
         for deferred in deferreds_to_callback:
             try:
@@ -322,11 +328,10 @@ class ReplicationDataHandler:
         )
 
         waiting_list = self._streams_to_waiters.setdefault(
-            (stream_name, instance_name), []
+            (stream_name, instance_name), SortedList(key=lambda t: t[0])
         )
 
-        waiting_list.append((position, deferred))
-        waiting_list.sort(key=lambda t: t[0])
+        waiting_list.add((position, deferred))
 
         # We measure here to get in flight counts and average waiting time.
         with Measure(self._clock, "repl.wait_for_stream_position"):
-- 
cgit 1.5.1


From 3b3fed7229c8110870aefd4de740724fc607a46c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 23 Aug 2023 09:23:22 +0100
Subject: Increase perf of read/write locks (#16149)

We do this by marking the tables as `UNLOGGED` in PostgreSQL.
---
 changelog.d/16149.misc                             |  1 +
 .../80/02_read_write_locks_unlogged.sql.postgres   | 30 ++++++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 changelog.d/16149.misc
 create mode 100644 synapse/storage/schema/main/delta/80/02_read_write_locks_unlogged.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/16149.misc b/changelog.d/16149.misc
new file mode 100644
index 0000000000..8b6674d2aa
--- /dev/null
+++ b/changelog.d/16149.misc
@@ -0,0 +1 @@
+Increase performance of read/write locks.
diff --git a/synapse/storage/schema/main/delta/80/02_read_write_locks_unlogged.sql.postgres b/synapse/storage/schema/main/delta/80/02_read_write_locks_unlogged.sql.postgres
new file mode 100644
index 0000000000..5b5dbf2687
--- /dev/null
+++ b/synapse/storage/schema/main/delta/80/02_read_write_locks_unlogged.sql.postgres
@@ -0,0 +1,30 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Mark the worker_read_write_locks* tables as UNLOGGED, to increase
+-- performance. This means that we don't replicate the tables, and they get
+-- truncated on a crash. This is acceptable as a) in those cases it's likely
+-- that Synapse needs to be stopped/restarted anyway, and b) the locks are
+-- considered best-effort anyway.
+
+-- We need to remove and recreate the circular foreign key references, as
+-- UNLOGGED tables can't reference normal tables.
+ALTER TABLE worker_read_write_locks_mode DROP CONSTRAINT IF EXISTS worker_read_write_locks_mode_foreign;
+
+ALTER TABLE worker_read_write_locks SET UNLOGGED;
+ALTER TABLE worker_read_write_locks_mode SET UNLOGGED;
+
+ALTER TABLE worker_read_write_locks_mode ADD CONSTRAINT worker_read_write_locks_mode_foreign
+    FOREIGN KEY (lock_name, lock_key, token) REFERENCES worker_read_write_locks(lock_name, lock_key, token) DEFERRABLE INITIALLY DEFERRED;
-- 
cgit 1.5.1


From dffe095642b071dcac4907cc97944886e9fbd5b2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 23 Aug 2023 09:23:41 +0100
Subject: Only lock when we're backfilling (#16159)

---
 changelog.d/16159.misc         |   1 +
 synapse/handlers/federation.py |  35 ++++--
 synapse/handlers/pagination.py | 267 ++++++++++++++++++++---------------------
 3 files changed, 157 insertions(+), 146 deletions(-)
 create mode 100644 changelog.d/16159.misc

(limited to 'synapse')

diff --git a/changelog.d/16159.misc b/changelog.d/16159.misc
new file mode 100644
index 0000000000..04cdd1afaf
--- /dev/null
+++ b/changelog.d/16159.misc
@@ -0,0 +1 @@
+Reduce scope of locks when paginating to alleviate DB contention.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 2b93b8c621..29cd45550a 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -60,6 +60,7 @@ from synapse.events import EventBase
 from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
 from synapse.events.validator import EventValidator
 from synapse.federation.federation_client import InvalidResponseError
+from synapse.handlers.pagination import PURGE_PAGINATION_LOCK_NAME
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.context import nested_logging_context
 from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace
@@ -152,6 +153,7 @@ class FederationHandler:
         self._device_handler = hs.get_device_handler()
         self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator()
         self._notifier = hs.get_notifier()
+        self._worker_locks = hs.get_worker_locks_handler()
 
         self._clean_room_for_join_client = ReplicationCleanRoomRestServlet.make_client(
             hs
@@ -200,7 +202,7 @@ class FederationHandler:
     @trace
     @tag_args
     async def maybe_backfill(
-        self, room_id: str, current_depth: int, limit: int
+        self, room_id: str, current_depth: int, limit: int, record_time: bool = True
     ) -> bool:
         """Checks the database to see if we should backfill before paginating,
         and if so do.
@@ -213,21 +215,25 @@ class FederationHandler:
             limit: The number of events that the pagination request will
                 return. This is used as part of the heuristic to decide if we
                 should back paginate.
+            record_time: Whether to record the time it takes to backfill.
 
         Returns:
             True if we actually tried to backfill something, otherwise False.
         """
         # Starting the processing time here so we can include the room backfill
         # linearizer lock queue in the timing
-        processing_start_time = self.clock.time_msec()
+        processing_start_time = self.clock.time_msec() if record_time else 0
 
         async with self._room_backfill.queue(room_id):
-            return await self._maybe_backfill_inner(
-                room_id,
-                current_depth,
-                limit,
-                processing_start_time=processing_start_time,
-            )
+            async with self._worker_locks.acquire_read_write_lock(
+                PURGE_PAGINATION_LOCK_NAME, room_id, write=False
+            ):
+                return await self._maybe_backfill_inner(
+                    room_id,
+                    current_depth,
+                    limit,
+                    processing_start_time=processing_start_time,
+                )
 
     @trace
     @tag_args
@@ -305,12 +311,21 @@ class FederationHandler:
         # of history that extends all the way back to where we are currently paginating
         # and it's within the 100 events that are returned from `/backfill`.
         if not sorted_backfill_points and current_depth != MAX_DEPTH:
+            # Check that we actually have later backfill points, if not just return.
+            have_later_backfill_points = await self.store.get_backfill_points_in_room(
+                room_id=room_id,
+                current_depth=MAX_DEPTH,
+                limit=1,
+            )
+            if not have_later_backfill_points:
+                return False
+
             logger.debug(
                 "_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points."
             )
             run_as_background_process(
                 "_maybe_backfill_inner_anyway_with_max_depth",
-                self._maybe_backfill_inner,
+                self.maybe_backfill,
                 room_id=room_id,
                 # We use `MAX_DEPTH` so that we find all backfill points next
                 # time (all events are below the `MAX_DEPTH`)
@@ -319,7 +334,7 @@ class FederationHandler:
                 # We don't want to start another timing observation from this
                 # nested recursive call. The top-most call can record the time
                 # overall otherwise the smaller one will throw off the results.
-                processing_start_time=None,
+                record_time=False,
             )
             # We return `False` because we're backfilling in the background and there is
             # no new events immediately for the caller to know about yet.
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 1be6ebc6d9..e5ac9096cc 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -487,155 +487,150 @@ class PaginationHandler:
 
         room_token = from_token.room_key
 
-        async with self._worker_locks.acquire_read_write_lock(
-            PURGE_PAGINATION_LOCK_NAME, room_id, write=False
-        ):
-            (membership, member_event_id) = (None, None)
-            if not use_admin_priviledge:
-                (
-                    membership,
-                    member_event_id,
-                ) = await self.auth.check_user_in_room_or_world_readable(
-                    room_id, requester, allow_departed_users=True
+        (membership, member_event_id) = (None, None)
+        if not use_admin_priviledge:
+            (
+                membership,
+                member_event_id,
+            ) = await self.auth.check_user_in_room_or_world_readable(
+                room_id, requester, allow_departed_users=True
+            )
+
+        if pagin_config.direction == Direction.BACKWARDS:
+            # if we're going backwards, we might need to backfill. This
+            # requires that we have a topo token.
+            if room_token.topological:
+                curr_topo = room_token.topological
+            else:
+                curr_topo = await self.store.get_current_topological_token(
+                    room_id, room_token.stream
                 )
 
-            if pagin_config.direction == Direction.BACKWARDS:
-                # if we're going backwards, we might need to backfill. This
-                # requires that we have a topo token.
-                if room_token.topological:
-                    curr_topo = room_token.topological
-                else:
-                    curr_topo = await self.store.get_current_topological_token(
-                        room_id, room_token.stream
-                    )
+        # If they have left the room then clamp the token to be before
+        # they left the room, to save the effort of loading from the
+        # database.
+        if (
+            pagin_config.direction == Direction.BACKWARDS
+            and not use_admin_priviledge
+            and membership == Membership.LEAVE
+        ):
+            # This is only None if the room is world_readable, in which case
+            # "Membership.JOIN" would have been returned and we should never hit
+            # this branch.
+            assert member_event_id
 
-            # If they have left the room then clamp the token to be before
-            # they left the room, to save the effort of loading from the
-            # database.
-            if (
-                pagin_config.direction == Direction.BACKWARDS
-                and not use_admin_priviledge
-                and membership == Membership.LEAVE
-            ):
-                # This is only None if the room is world_readable, in which case
-                # "Membership.JOIN" would have been returned and we should never hit
-                # this branch.
-                assert member_event_id
+            leave_token = await self.store.get_topological_token_for_event(
+                member_event_id
+            )
+            assert leave_token.topological is not None
 
-                leave_token = await self.store.get_topological_token_for_event(
-                    member_event_id
+            if leave_token.topological < curr_topo:
+                from_token = from_token.copy_and_replace(
+                    StreamKeyType.ROOM, leave_token
                 )
-                assert leave_token.topological is not None
 
-                if leave_token.topological < curr_topo:
-                    from_token = from_token.copy_and_replace(
-                        StreamKeyType.ROOM, leave_token
-                    )
+        to_room_key = None
+        if pagin_config.to_token:
+            to_room_key = pagin_config.to_token.room_key
+
+        # Initially fetch the events from the database. With any luck, we can return
+        # these without blocking on backfill (handled below).
+        events, next_key = await self.store.paginate_room_events(
+            room_id=room_id,
+            from_key=from_token.room_key,
+            to_key=to_room_key,
+            direction=pagin_config.direction,
+            limit=pagin_config.limit,
+            event_filter=event_filter,
+        )
 
-            to_room_key = None
-            if pagin_config.to_token:
-                to_room_key = pagin_config.to_token.room_key
-
-            # Initially fetch the events from the database. With any luck, we can return
-            # these without blocking on backfill (handled below).
-            events, next_key = await self.store.paginate_room_events(
-                room_id=room_id,
-                from_key=from_token.room_key,
-                to_key=to_room_key,
-                direction=pagin_config.direction,
-                limit=pagin_config.limit,
-                event_filter=event_filter,
+        if pagin_config.direction == Direction.BACKWARDS:
+            # We use a `Set` because there can be multiple events at a given depth
+            # and we only care about looking at the unique continum of depths to
+            # find gaps.
+            event_depths: Set[int] = {event.depth for event in events}
+            sorted_event_depths = sorted(event_depths)
+
+            # Inspect the depths of the returned events to see if there are any gaps
+            found_big_gap = False
+            number_of_gaps = 0
+            previous_event_depth = (
+                sorted_event_depths[0] if len(sorted_event_depths) > 0 else 0
             )
-
-            if pagin_config.direction == Direction.BACKWARDS:
-                # We use a `Set` because there can be multiple events at a given depth
-                # and we only care about looking at the unique continum of depths to
-                # find gaps.
-                event_depths: Set[int] = {event.depth for event in events}
-                sorted_event_depths = sorted(event_depths)
-
-                # Inspect the depths of the returned events to see if there are any gaps
-                found_big_gap = False
-                number_of_gaps = 0
-                previous_event_depth = (
-                    sorted_event_depths[0] if len(sorted_event_depths) > 0 else 0
-                )
-                for event_depth in sorted_event_depths:
-                    # We don't expect a negative depth but we'll just deal with it in
-                    # any case by taking the absolute value to get the true gap between
-                    # any two integers.
-                    depth_gap = abs(event_depth - previous_event_depth)
-                    # A `depth_gap` of 1 is a normal continuous chain to the next event
-                    # (1 <-- 2 <-- 3) so anything larger indicates a missing event (it's
-                    # also possible there is no event at a given depth but we can't ever
-                    # know that for sure)
-                    if depth_gap > 1:
-                        number_of_gaps += 1
-
-                    # We only tolerate a small number single-event long gaps in the
-                    # returned events because those are most likely just events we've
-                    # failed to pull in the past. Anything longer than that is probably
-                    # a sign that we're missing a decent chunk of history and we should
-                    # try to backfill it.
-                    #
-                    # XXX: It's possible we could tolerate longer gaps if we checked
-                    # that a given events `prev_events` is one that has failed pull
-                    # attempts and we could just treat it like a dead branch of history
-                    # for now or at least something that we don't need the block the
-                    # client on to try pulling.
-                    #
-                    # XXX: If we had something like MSC3871 to indicate gaps in the
-                    # timeline to the client, we could also get away with any sized gap
-                    # and just have the client refetch the holes as they see fit.
-                    if depth_gap > 2:
-                        found_big_gap = True
-                        break
-                    previous_event_depth = event_depth
-
-                # Backfill in the foreground if we found a big gap, have too many holes,
-                # or we don't have enough events to fill the limit that the client asked
-                # for.
-                missing_too_many_events = (
-                    number_of_gaps > BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD
+            for event_depth in sorted_event_depths:
+                # We don't expect a negative depth but we'll just deal with it in
+                # any case by taking the absolute value to get the true gap between
+                # any two integers.
+                depth_gap = abs(event_depth - previous_event_depth)
+                # A `depth_gap` of 1 is a normal continuous chain to the next event
+                # (1 <-- 2 <-- 3) so anything larger indicates a missing event (it's
+                # also possible there is no event at a given depth but we can't ever
+                # know that for sure)
+                if depth_gap > 1:
+                    number_of_gaps += 1
+
+                # We only tolerate a small number single-event long gaps in the
+                # returned events because those are most likely just events we've
+                # failed to pull in the past. Anything longer than that is probably
+                # a sign that we're missing a decent chunk of history and we should
+                # try to backfill it.
+                #
+                # XXX: It's possible we could tolerate longer gaps if we checked
+                # that a given events `prev_events` is one that has failed pull
+                # attempts and we could just treat it like a dead branch of history
+                # for now or at least something that we don't need the block the
+                # client on to try pulling.
+                #
+                # XXX: If we had something like MSC3871 to indicate gaps in the
+                # timeline to the client, we could also get away with any sized gap
+                # and just have the client refetch the holes as they see fit.
+                if depth_gap > 2:
+                    found_big_gap = True
+                    break
+                previous_event_depth = event_depth
+
+            # Backfill in the foreground if we found a big gap, have too many holes,
+            # or we don't have enough events to fill the limit that the client asked
+            # for.
+            missing_too_many_events = (
+                number_of_gaps > BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD
+            )
+            not_enough_events_to_fill_response = len(events) < pagin_config.limit
+            if (
+                found_big_gap
+                or missing_too_many_events
+                or not_enough_events_to_fill_response
+            ):
+                did_backfill = await self.hs.get_federation_handler().maybe_backfill(
+                    room_id,
+                    curr_topo,
+                    limit=pagin_config.limit,
                 )
-                not_enough_events_to_fill_response = len(events) < pagin_config.limit
-                if (
-                    found_big_gap
-                    or missing_too_many_events
-                    or not_enough_events_to_fill_response
-                ):
-                    did_backfill = (
-                        await self.hs.get_federation_handler().maybe_backfill(
-                            room_id,
-                            curr_topo,
-                            limit=pagin_config.limit,
-                        )
-                    )
 
-                    # If we did backfill something, refetch the events from the database to
-                    # catch anything new that might have been added since we last fetched.
-                    if did_backfill:
-                        events, next_key = await self.store.paginate_room_events(
-                            room_id=room_id,
-                            from_key=from_token.room_key,
-                            to_key=to_room_key,
-                            direction=pagin_config.direction,
-                            limit=pagin_config.limit,
-                            event_filter=event_filter,
-                        )
-                else:
-                    # Otherwise, we can backfill in the background for eventual
-                    # consistency's sake but we don't need to block the client waiting
-                    # for a costly federation call and processing.
-                    run_as_background_process(
-                        "maybe_backfill_in_the_background",
-                        self.hs.get_federation_handler().maybe_backfill,
-                        room_id,
-                        curr_topo,
+                # If we did backfill something, refetch the events from the database to
+                # catch anything new that might have been added since we last fetched.
+                if did_backfill:
+                    events, next_key = await self.store.paginate_room_events(
+                        room_id=room_id,
+                        from_key=from_token.room_key,
+                        to_key=to_room_key,
+                        direction=pagin_config.direction,
                         limit=pagin_config.limit,
+                        event_filter=event_filter,
                     )
+            else:
+                # Otherwise, we can backfill in the background for eventual
+                # consistency's sake but we don't need to block the client waiting
+                # for a costly federation call and processing.
+                run_as_background_process(
+                    "maybe_backfill_in_the_background",
+                    self.hs.get_federation_handler().maybe_backfill,
+                    room_id,
+                    curr_topo,
+                    limit=pagin_config.limit,
+                )
 
-            next_token = from_token.copy_and_replace(StreamKeyType.ROOM, next_key)
+        next_token = from_token.copy_and_replace(StreamKeyType.ROOM, next_key)
 
         # if no events are returned from pagination, that implies
         # we have reached the end of the available events.
-- 
cgit 1.5.1


From 19a1cda084342034cc92c88c0376cbcadbf8e2a0 Mon Sep 17 00:00:00 2001
From: "DeepBlueV7.X" <nicolas.werner@hotmail.de>
Date: Wed, 23 Aug 2023 08:35:23 +0000
Subject: Properly update retry_last_ts when hitting the maximum retry interval
 (#16156)

* Properly update retry_last_ts when hitting the maximum retry interval

This was broken in 1.87 when the maximum retry interval got changed from
almost infinite to a week (and made configurable).

fixes #16101

Signed-off-by: Nicolas Werner <nicolas.werner@hotmail.de>

* Add changelog

* Change fix + add test

* Add comment

---------

Signed-off-by: Nicolas Werner <nicolas.werner@hotmail.de>
Co-authored-by: Mathieu Velten <mathieuv@matrix.org>
---
 changelog.d/16156.bugfix                       |  1 +
 synapse/storage/databases/main/transactions.py |  4 +-
 tests/util/test_retryutils.py                  | 51 ++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16156.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16156.bugfix b/changelog.d/16156.bugfix
new file mode 100644
index 0000000000..17284297cf
--- /dev/null
+++ b/changelog.d/16156.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.87 where synapse would send an excessive amount of federation requests to servers which have been offline for a long time. Contributed by Nico.
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index c3bd36efc9..48e4b0ba3c 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -242,6 +242,8 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
     ) -> None:
         # Upsert retry time interval if retry_interval is zero (i.e. we're
         # resetting it) or greater than the existing retry interval.
+        # We also upsert when the new retry interval is the same as the existing one,
+        # since it will be the case when `destination_max_retry_interval` is reached.
         #
         # WARNING: This is executed in autocommit, so we shouldn't add any more
         # SQL calls in here (without being very careful).
@@ -257,7 +259,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
                 WHERE
                     EXCLUDED.retry_interval = 0
                     OR destinations.retry_interval IS NULL
-                    OR destinations.retry_interval < EXCLUDED.retry_interval
+                    OR destinations.retry_interval <= EXCLUDED.retry_interval
         """
 
         txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
diff --git a/tests/util/test_retryutils.py b/tests/util/test_retryutils.py
index 1277e1a865..4bcd17a6fc 100644
--- a/tests/util/test_retryutils.py
+++ b/tests/util/test_retryutils.py
@@ -108,3 +108,54 @@ class RetryLimiterTestCase(HomeserverTestCase):
 
         new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
         self.assertIsNone(new_timings)
+
+    def test_max_retry_interval(self) -> None:
+        """Test that `destination_max_retry_interval` setting works as expected"""
+        store = self.hs.get_datastores().main
+
+        destination_max_retry_interval_ms = (
+            self.hs.config.federation.destination_max_retry_interval_ms
+        )
+
+        self.get_success(get_retry_limiter("test_dest", self.clock, store))
+        self.pump(1)
+
+        failure_ts = self.clock.time_msec()
+
+        # Simulate reaching destination_max_retry_interval
+        self.get_success(
+            store.set_destination_retry_timings(
+                "test_dest",
+                failure_ts=failure_ts,
+                retry_last_ts=failure_ts,
+                retry_interval=destination_max_retry_interval_ms,
+            )
+        )
+
+        # Check it fails
+        self.get_failure(
+            get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination
+        )
+
+        # Get past retry_interval and we can try again, and still throw an error to continue the backoff
+        self.reactor.advance(destination_max_retry_interval_ms / 1000 + 1)
+        limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
+        self.pump(1)
+        try:
+            with limiter:
+                self.pump(1)
+                raise AssertionError("argh")
+        except AssertionError:
+            pass
+
+        self.pump()
+
+        # retry_interval does not increase and stays at destination_max_retry_interval_ms
+        new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
+        assert new_timings is not None
+        self.assertEqual(new_timings.retry_interval, destination_max_retry_interval_ms)
+
+        # Check it fails
+        self.get_failure(
+            get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination
+        )
-- 
cgit 1.5.1


From 873971a8b9b4cbbc141df570e76a02c7b4b9b9c0 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 23 Aug 2023 13:37:51 +0200
Subject: Task scheduler: mark task as active if we are scheduling ASAP
 (#16165)

---
 changelog.d/16165.misc                           | 1 +
 synapse/storage/databases/main/task_scheduler.py | 2 +-
 synapse/util/task_scheduler.py                   | 4 +++-
 3 files changed, 5 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16165.misc

(limited to 'synapse')

diff --git a/changelog.d/16165.misc b/changelog.d/16165.misc
new file mode 100644
index 0000000000..b4d514d249
--- /dev/null
+++ b/changelog.d/16165.misc
@@ -0,0 +1 @@
+Task scheduler: mark task as active if we are scheduling as soon as possible.
diff --git a/synapse/storage/databases/main/task_scheduler.py b/synapse/storage/databases/main/task_scheduler.py
index 1fb3180c3c..9ab120eea9 100644
--- a/synapse/storage/databases/main/task_scheduler.py
+++ b/synapse/storage/databases/main/task_scheduler.py
@@ -92,7 +92,7 @@ class TaskSchedulerWorkerStore(SQLBaseStore):
             if clauses:
                 sql = sql + " WHERE " + " AND ".join(clauses)
 
-            sql = sql + "ORDER BY timestamp"
+            sql = sql + " ORDER BY timestamp"
 
             txn.execute(sql, args)
             return self.db_pool.cursor_to_dict(txn)
diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index 773a8327f6..4aea64b338 100644
--- a/synapse/util/task_scheduler.py
+++ b/synapse/util/task_scheduler.py
@@ -154,13 +154,15 @@ class TaskScheduler:
                 f"No function associated with action {action} of the scheduled task"
             )
 
+        status = TaskStatus.SCHEDULED
         if timestamp is None or timestamp < self._clock.time_msec():
             timestamp = self._clock.time_msec()
+            status = TaskStatus.ACTIVE
 
         task = ScheduledTask(
             random_string(16),
             action,
-            TaskStatus.SCHEDULED,
+            status,
             timestamp,
             resource_id,
             params,
-- 
cgit 1.5.1


From 86ecd341ec93167fbb5a335237c1cd629e7256a2 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 23 Aug 2023 13:04:46 +0100
Subject: Always update `retry_last_ts` (#16164)

---
 changelog.d/16164.bugfix                       | 1 +
 synapse/storage/databases/main/transactions.py | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/16164.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16164.bugfix b/changelog.d/16164.bugfix
new file mode 100644
index 0000000000..17284297cf
--- /dev/null
+++ b/changelog.d/16164.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in 1.87 where synapse would send an excessive amount of federation requests to servers which have been offline for a long time. Contributed by Nico.
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index 48e4b0ba3c..860bbf7c0f 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -242,8 +242,6 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
     ) -> None:
         # Upsert retry time interval if retry_interval is zero (i.e. we're
         # resetting it) or greater than the existing retry interval.
-        # We also upsert when the new retry interval is the same as the existing one,
-        # since it will be the case when `destination_max_retry_interval` is reached.
         #
         # WARNING: This is executed in autocommit, so we shouldn't add any more
         # SQL calls in here (without being very careful).
@@ -258,8 +256,10 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
                     retry_interval = EXCLUDED.retry_interval
                 WHERE
                     EXCLUDED.retry_interval = 0
+                    OR EXCLUDED.retry_last_ts = 0
                     OR destinations.retry_interval IS NULL
-                    OR destinations.retry_interval <= EXCLUDED.retry_interval
+                    OR destinations.retry_interval < EXCLUDED.retry_interval
+                    OR destinations.retry_last_ts < EXCLUDED.retry_last_ts
         """
 
         txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
-- 
cgit 1.5.1


From 7cd79ce0519964bf52a3f88d6fd8a5cc5dff5c6c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 23 Aug 2023 13:45:19 +0100
Subject: Reduce DB contention on worker locks (#16160)

---
 changelog.d/16160.misc                             |  1 +
 .../80/03_read_write_locks_triggers.sql.postgres   | 37 ++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 changelog.d/16160.misc
 create mode 100644 synapse/storage/schema/main/delta/80/03_read_write_locks_triggers.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/16160.misc b/changelog.d/16160.misc
new file mode 100644
index 0000000000..78803b7bcd
--- /dev/null
+++ b/changelog.d/16160.misc
@@ -0,0 +1 @@
+Reduce DB contention on worker locks.
diff --git a/synapse/storage/schema/main/delta/80/03_read_write_locks_triggers.sql.postgres b/synapse/storage/schema/main/delta/80/03_read_write_locks_triggers.sql.postgres
new file mode 100644
index 0000000000..31de5bfa18
--- /dev/null
+++ b/synapse/storage/schema/main/delta/80/03_read_write_locks_triggers.sql.postgres
@@ -0,0 +1,37 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Fix up the triggers that were in `78/04_read_write_locks_triggers.sql`
+
+-- Reduce the number of writes we do on this table.
+--
+-- Note: that we still want to lock the row here (i.e. still do a `DO UPDATE
+-- SET`) so that we serialize updates.
+CREATE OR REPLACE FUNCTION upsert_read_write_lock_parent() RETURNS trigger AS $$
+BEGIN
+    INSERT INTO worker_read_write_locks_mode (lock_name, lock_key, write_lock, token)
+        VALUES (NEW.lock_name, NEW.lock_key, NEW.write_lock, NEW.token)
+        ON CONFLICT (lock_name, lock_key)
+        DO UPDATE SET write_lock = NEW.write_lock
+            WHERE OLD.write_lock != NEW.write_lock;
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS upsert_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER upsert_read_write_lock_parent_trigger BEFORE INSERT ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE upsert_read_write_lock_parent();
-- 
cgit 1.5.1


From 4adaba9acf224e14171a8a4b9c98ef0791c4a1e3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 23 Aug 2023 13:45:25 +0100
Subject: Fix rare deadlock when using read/write locks (#16133)

---
 changelog.d/16133.bugfix                           |  1 +
 .../80/02_read_write_locks_deadlock.sql.postgres   | 37 ++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 changelog.d/16133.bugfix
 create mode 100644 synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/16133.bugfix b/changelog.d/16133.bugfix
new file mode 100644
index 0000000000..ed8830692f
--- /dev/null
+++ b/changelog.d/16133.bugfix
@@ -0,0 +1 @@
+Fix a rare race that could block new events from being sent for up to two minutes. Introduced in v1.90.0.
diff --git a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres
new file mode 100644
index 0000000000..401c42e18a
--- /dev/null
+++ b/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres
@@ -0,0 +1,37 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- To avoid the possibility of a deadlock, lock the
+-- `worker_read_write_locks_mode` table so that we serialize inserts/deletes
+-- for a specific lock name/key.
+
+CREATE OR REPLACE FUNCTION delete_read_write_lock_parent_before() RETURNS trigger AS $$
+BEGIN
+    -- `PERFORM` is a `SELECT` which discards the rows.
+    PERFORM * FROM worker_read_write_locks_mode
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key
+        FOR UPDATE;
+
+    RETURN OLD;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks;
+CREATE TRIGGER delete_read_write_lock_parent_before_trigger BEFORE DELETE ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE delete_read_write_lock_parent_before();
-- 
cgit 1.5.1


From ec662bbe413bd976af97f099ea4f11dafaf98b3e Mon Sep 17 00:00:00 2001
From: Neil Johnson <neil@matrix.org>
Date: Wed, 23 Aug 2023 14:00:34 +0100
Subject: Filter out unwanted user_agents from udv. (#16124)

---
 changelog.d/16124.bugfix                     |  1 +
 synapse/storage/databases/main/client_ips.py |  5 +++
 tests/storage/test_client_ips.py             | 65 ++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)
 create mode 100644 changelog.d/16124.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16124.bugfix b/changelog.d/16124.bugfix
new file mode 100644
index 0000000000..fb1d501a2f
--- /dev/null
+++ b/changelog.d/16124.bugfix
@@ -0,0 +1 @@
+Filter out user agent references to the sliding sync proxy and rust-sdk from the user_daily_visits table to ensure that Element X can be represented fully.
diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py
index 0df160d2b0..d8d333e11d 100644
--- a/synapse/storage/databases/main/client_ips.py
+++ b/synapse/storage/databases/main/client_ips.py
@@ -579,6 +579,11 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke
         device_id: Optional[str],
         now: Optional[int] = None,
     ) -> None:
+        # The sync proxy continuously triggers /sync even if the user is not
+        # present so should be excluded from user_ips entries.
+        if user_agent == "sync-v3-proxy-":
+            return
+
         if not now:
             now = int(self._clock.time_msec())
         key = (user_id, access_token, ip)
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index cd0079871c..209d68b40b 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -654,6 +654,71 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
             r,
         )
 
+    def test_invalid_user_agents_are_ignored(self) -> None:
+        # First make sure we have completed all updates.
+        self.wait_for_background_updates()
+
+        user_id1 = "@user1:id"
+        user_id2 = "@user2:id"
+        device_id1 = "MY_DEVICE1"
+        device_id2 = "MY_DEVICE2"
+        access_token1 = "access_token1"
+        access_token2 = "access_token2"
+
+        # Insert a user IP 1
+        self.get_success(
+            self.store.store_device(
+                user_id1,
+                device_id1,
+                "display name1",
+            )
+        )
+        # Insert a user IP 2
+        self.get_success(
+            self.store.store_device(
+                user_id2,
+                device_id2,
+                "display name2",
+            )
+        )
+
+        self.get_success(
+            self.store.insert_client_ip(
+                user_id1, access_token1, "ip", "sync-v3-proxy-", device_id1
+            )
+        )
+        self.get_success(
+            self.store.insert_client_ip(
+                user_id2, access_token2, "ip", "user_agent", device_id2
+            )
+        )
+        # Force persisting to disk
+        self.reactor.advance(200)
+
+        # We should see that in the DB
+        result = self.get_success(
+            self.store.db_pool.simple_select_list(
+                table="user_ips",
+                keyvalues={},
+                retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"],
+                desc="get_user_ip_and_agents",
+            )
+        )
+
+        # ensure user1 is filtered out
+        self.assertEqual(
+            result,
+            [
+                {
+                    "access_token": access_token2,
+                    "ip": "ip",
+                    "user_agent": "user_agent",
+                    "device_id": device_id2,
+                    "last_seen": 0,
+                }
+            ],
+        )
+
 
 class ClientIpAuthTestCase(unittest.HomeserverTestCase):
     servlets = [
-- 
cgit 1.5.1


From 18279631e9555bd9032b993074e62c7af886d9cd Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 23 Aug 2023 16:24:30 +0100
Subject: Fix rare deadlock when using read/write locks (#16169)

---
 changelog.d/16169.bugfix                           |  1 +
 .../80/02_read_write_locks_deadlock.sql.postgres   | 37 -----------
 .../80/04_read_write_locks_deadlock.sql.postgres   | 71 ++++++++++++++++++++++
 3 files changed, 72 insertions(+), 37 deletions(-)
 create mode 100644 changelog.d/16169.bugfix
 delete mode 100644 synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres
 create mode 100644 synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres

(limited to 'synapse')

diff --git a/changelog.d/16169.bugfix b/changelog.d/16169.bugfix
new file mode 100644
index 0000000000..ed8830692f
--- /dev/null
+++ b/changelog.d/16169.bugfix
@@ -0,0 +1 @@
+Fix a rare race that could block new events from being sent for up to two minutes. Introduced in v1.90.0.
diff --git a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres
deleted file mode 100644
index 401c42e18a..0000000000
--- a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright 2023 The Matrix.org Foundation C.I.C
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
--- To avoid the possibility of a deadlock, lock the
--- `worker_read_write_locks_mode` table so that we serialize inserts/deletes
--- for a specific lock name/key.
-
-CREATE OR REPLACE FUNCTION delete_read_write_lock_parent_before() RETURNS trigger AS $$
-BEGIN
-    -- `PERFORM` is a `SELECT` which discards the rows.
-    PERFORM * FROM worker_read_write_locks_mode
-        WHERE
-            lock_name = OLD.lock_name
-            AND lock_key = OLD.lock_key
-        FOR UPDATE;
-
-    RETURN OLD;
-END
-$$
-LANGUAGE plpgsql;
-
-DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks;
-CREATE TRIGGER delete_read_write_lock_parent_before_trigger BEFORE DELETE ON worker_read_write_locks
-    FOR EACH ROW
-    EXECUTE PROCEDURE delete_read_write_lock_parent_before();
diff --git a/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres
new file mode 100644
index 0000000000..0eb459c0b9
--- /dev/null
+++ b/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres
@@ -0,0 +1,71 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- Remove a previous attempt to avoid deadlocks
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks;
+DROP FUNCTION IF EXISTS delete_read_write_lock_parent_before;
+
+
+-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock
+-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we
+-- update the `worker_read_write_locks_mode.token` to match another instance
+-- that has currently acquired the lock, or we delete the row if nobody has
+-- currently acquired a lock.
+CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$
+DECLARE
+    new_token TEXT;
+    mode_row_token TEXT;
+BEGIN
+    -- Only update the token in `_mode` if its our token. This prevents
+    -- deadlocks.
+    --
+    -- We shove the token into `mode_row_token`, as otherwise postgres complains
+    -- we're not using the returned data.
+    SELECT token INTO mode_row_token FROM worker_read_write_locks_mode
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key
+            AND token = OLD.token
+        FOR UPDATE;
+
+    IF NOT FOUND THEN
+        RETURN NEW;
+    END IF;
+
+    SELECT token INTO new_token FROM worker_read_write_locks
+        WHERE
+            lock_name = OLD.lock_name
+            AND lock_key = OLD.lock_key
+        LIMIT 1 FOR UPDATE SKIP LOCKED;
+
+    IF NOT FOUND THEN
+        DELETE FROM worker_read_write_locks_mode
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token;
+    ELSE
+        UPDATE worker_read_write_locks_mode
+            SET token = new_token
+            WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key;
+    END IF;
+
+    RETURN NEW;
+END
+$$
+LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks;
+CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks
+    FOR EACH ROW
+    EXECUTE PROCEDURE delete_read_write_lock_parent();
-- 
cgit 1.5.1


From 33fa82a34cb0001787889be88c3817688ce2f76d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 23 Aug 2023 13:22:34 -0400
Subject: Stabilize support for MSC3958 (suppress notifications from edits).
 (#16113)

---
 changelog.d/16113.feature                   | 1 +
 rust/benches/evaluator.rs                   | 1 -
 rust/src/push/base_rules.rs                 | 2 +-
 rust/src/push/evaluator.rs                  | 2 +-
 rust/src/push/mod.rs                        | 9 ---------
 stubs/synapse/synapse_rust/push.pyi         | 1 -
 synapse/config/experimental.py              | 5 -----
 synapse/storage/databases/main/push_rule.py | 1 -
 tests/push/test_bulk_push_rule_evaluator.py | 1 -
 9 files changed, 3 insertions(+), 20 deletions(-)
 create mode 100644 changelog.d/16113.feature

(limited to 'synapse')

diff --git a/changelog.d/16113.feature b/changelog.d/16113.feature
new file mode 100644
index 0000000000..69fdaaebac
--- /dev/null
+++ b/changelog.d/16113.feature
@@ -0,0 +1 @@
+Suppress notifications from message edits per [MSC3958](https://github.com/matrix-org/matrix-spec-proposals/pull/3958).
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 6e1eab2a3b..14071105a0 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -197,7 +197,6 @@ fn bench_eval_message(b: &mut Bencher) {
         false,
         false,
         false,
-        false,
     );
 
     b.iter(|| eval.run(&rules, Some("bob"), Some("person")));
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 00baceda91..59fd27665a 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -228,7 +228,7 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
     // We don't want to notify on edits *unless* the edit directly mentions a
     // user, which is handled above.
     PushRule {
-        rule_id: Cow::Borrowed("global/override/.org.matrix.msc3958.suppress_edits"),
+        rule_id: Cow::Borrowed("global/override/.m.rule.suppress_edits"),
         priority_class: 5,
         conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventPropertyIs(
             EventPropertyIsCondition {
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 48e670478b..5b9bf9b26a 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -564,7 +564,7 @@ fn test_requires_room_version_supports_condition() {
     };
     let rules = PushRules::new(vec![custom_rule]);
     result = evaluator.run(
-        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false),
+        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true),
         None,
         None,
     );
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 829fb79d0e..8e91f506cc 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -527,7 +527,6 @@ pub struct FilteredPushRules {
     msc1767_enabled: bool,
     msc3381_polls_enabled: bool,
     msc3664_enabled: bool,
-    msc3958_suppress_edits_enabled: bool,
 }
 
 #[pymethods]
@@ -539,7 +538,6 @@ impl FilteredPushRules {
         msc1767_enabled: bool,
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
-        msc3958_suppress_edits_enabled: bool,
     ) -> Self {
         Self {
             push_rules,
@@ -547,7 +545,6 @@ impl FilteredPushRules {
             msc1767_enabled,
             msc3381_polls_enabled,
             msc3664_enabled,
-            msc3958_suppress_edits_enabled,
         }
     }
 
@@ -584,12 +581,6 @@ impl FilteredPushRules {
                     return false;
                 }
 
-                if !self.msc3958_suppress_edits_enabled
-                    && rule.rule_id == "global/override/.org.matrix.msc3958.suppress_edits"
-                {
-                    return false;
-                }
-
                 true
             })
             .map(|r| {
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index d573a37b9a..1f432d4ecf 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -46,7 +46,6 @@ class FilteredPushRules:
         msc1767_enabled: bool,
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
-        msc3958_suppress_edits_enabled: bool,
     ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 277ea4675b..84d6dd13af 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -383,11 +383,6 @@ class ExperimentalConfig(Config):
         # MSC3391: Removing account data.
         self.msc3391_enabled = experimental.get("msc3391_enabled", False)
 
-        # MSC3959: Do not generate notifications for edits.
-        self.msc3958_supress_edit_notifs = experimental.get(
-            "msc3958_supress_edit_notifs", False
-        )
-
         # MSC3967: Do not require UIA when first uploading cross signing keys
         self.msc3967_enabled = experimental.get("msc3967_enabled", False)
 
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index c13c0bc7d7..bec0dc2afe 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -88,7 +88,6 @@ def _load_rules(
         msc1767_enabled=experimental_config.msc1767_enabled,
         msc3664_enabled=experimental_config.msc3664_enabled,
         msc3381_polls_enabled=experimental_config.msc3381_polls_enabled,
-        msc3958_suppress_edits_enabled=experimental_config.msc3958_supress_edit_notifs,
     )
 
     return filtered_rules
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index 829b9df83d..937e6ebb7d 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -382,7 +382,6 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
             )
         )
 
-    @override_config({"experimental_features": {"msc3958_supress_edit_notifs": True}})
     def test_suppress_edits(self) -> None:
         """Under the default push rules, event edits should not generate notifications."""
         bulk_evaluator = BulkPushRuleEvaluator(self.hs)
-- 
cgit 1.5.1


From 0538e3e2dba8ff5bbc13f11d796e696f6ba8a7c7 Mon Sep 17 00:00:00 2001
From: Will Hunt <will@half-shot.uk>
Date: Thu, 24 Aug 2023 15:40:26 +0100
Subject: Add `Retry-After` to M_LIMIT_EXCEEDED error responses (#16136)

Implements MSC4041 behind an experimental configuration flag.
---
 changelog.d/16136.feature       |  1 +
 synapse/api/errors.py           | 10 +++++++++-
 synapse/config/experimental.py  |  9 +++++++++
 tests/api/test_errors.py        | 36 ++++++++++++++++++++++++++++++++++++
 tests/rest/client/test_login.py | 24 ++++++++++++++++++------
 5 files changed, 73 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/16136.feature
 create mode 100644 tests/api/test_errors.py

(limited to 'synapse')

diff --git a/changelog.d/16136.feature b/changelog.d/16136.feature
new file mode 100644
index 0000000000..4ad98a88c3
--- /dev/null
+++ b/changelog.d/16136.feature
@@ -0,0 +1 @@
+Return a `Retry-After` with `M_LIMIT_EXCEEDED` error responses.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 7ffd72c42c..578e798773 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -16,6 +16,7 @@
 """Contains exceptions and error codes."""
 
 import logging
+import math
 import typing
 from enum import Enum
 from http import HTTPStatus
@@ -503,6 +504,8 @@ class InvalidCaptchaError(SynapseError):
 class LimitExceededError(SynapseError):
     """A client has sent too many requests and is being throttled."""
 
+    include_retry_after_header = False
+
     def __init__(
         self,
         code: int = 429,
@@ -510,7 +513,12 @@ class LimitExceededError(SynapseError):
         retry_after_ms: Optional[int] = None,
         errcode: str = Codes.LIMIT_EXCEEDED,
     ):
-        super().__init__(code, msg, errcode)
+        headers = (
+            {"Retry-After": str(math.ceil(retry_after_ms / 1000))}
+            if self.include_retry_after_header and retry_after_ms is not None
+            else None
+        )
+        super().__init__(code, msg, errcode, headers=headers)
         self.retry_after_ms = retry_after_ms
 
     def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict":
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index 84d6dd13af..cabe0d4397 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Any, Optional
 import attr
 import attr.validators
 
+from synapse.api.errors import LimitExceededError
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersions
 from synapse.config import ConfigError
 from synapse.config._base import Config, RootConfig
@@ -406,3 +407,11 @@ class ExperimentalConfig(Config):
         self.msc4010_push_rules_account_data = experimental.get(
             "msc4010_push_rules_account_data", False
         )
+
+        # MSC4041: Use HTTP header Retry-After to enable library-assisted retry handling
+        #
+        # This is a bit hacky, but the most reasonable way to *alway* include the
+        # headers.
+        LimitExceededError.include_retry_after_header = experimental.get(
+            "msc4041_enabled", False
+        )
diff --git a/tests/api/test_errors.py b/tests/api/test_errors.py
new file mode 100644
index 0000000000..319abfe63d
--- /dev/null
+++ b/tests/api/test_errors.py
@@ -0,0 +1,36 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.api.errors import LimitExceededError
+
+from tests import unittest
+
+
+class ErrorsTestCase(unittest.TestCase):
+    # Create a sub-class to avoid mutating the class-level property.
+    class LimitExceededErrorHeaders(LimitExceededError):
+        include_retry_after_header = True
+
+    def test_limit_exceeded_header(self) -> None:
+        err = ErrorsTestCase.LimitExceededErrorHeaders(retry_after_ms=100)
+        self.assertEqual(err.error_dict(None).get("retry_after_ms"), 100)
+        assert err.headers is not None
+        self.assertEqual(err.headers.get("Retry-After"), "1")
+
+    def test_limit_exceeded_rounding(self) -> None:
+        err = ErrorsTestCase.LimitExceededErrorHeaders(retry_after_ms=3001)
+        self.assertEqual(err.error_dict(None).get("retry_after_ms"), 3001)
+        assert err.headers is not None
+        self.assertEqual(err.headers.get("Retry-After"), "4")
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index ffbc13bb8d..62c32cae5e 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -169,7 +169,8 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
                 # which sets these values to 10000, but as we're overriding the entire
                 # rc_login dict here, we need to set this manually as well
                 "account": {"per_second": 10000, "burst_count": 10000},
-            }
+            },
+            "experimental_features": {"msc4041_enabled": True},
         }
     )
     def test_POST_ratelimiting_per_address(self) -> None:
@@ -189,12 +190,15 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
             if i == 5:
                 self.assertEqual(channel.code, 429, msg=channel.result)
                 retry_after_ms = int(channel.json_body["retry_after_ms"])
+                retry_header = channel.headers.getRawHeaders("Retry-After")
             else:
                 self.assertEqual(channel.code, 200, msg=channel.result)
 
         # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower
         # than 1min.
-        self.assertTrue(retry_after_ms < 6000)
+        self.assertLess(retry_after_ms, 6000)
+        assert retry_header
+        self.assertLessEqual(int(retry_header[0]), 6)
 
         self.reactor.advance(retry_after_ms / 1000.0 + 1.0)
 
@@ -217,7 +221,8 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
                 # which sets these values to 10000, but as we're overriding the entire
                 # rc_login dict here, we need to set this manually as well
                 "address": {"per_second": 10000, "burst_count": 10000},
-            }
+            },
+            "experimental_features": {"msc4041_enabled": True},
         }
     )
     def test_POST_ratelimiting_per_account(self) -> None:
@@ -234,12 +239,15 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
             if i == 5:
                 self.assertEqual(channel.code, 429, msg=channel.result)
                 retry_after_ms = int(channel.json_body["retry_after_ms"])
+                retry_header = channel.headers.getRawHeaders("Retry-After")
             else:
                 self.assertEqual(channel.code, 200, msg=channel.result)
 
         # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower
         # than 1min.
-        self.assertTrue(retry_after_ms < 6000)
+        self.assertLess(retry_after_ms, 6000)
+        assert retry_header
+        self.assertLessEqual(int(retry_header[0]), 6)
 
         self.reactor.advance(retry_after_ms / 1000.0)
 
@@ -262,7 +270,8 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
                 # rc_login dict here, we need to set this manually as well
                 "address": {"per_second": 10000, "burst_count": 10000},
                 "failed_attempts": {"per_second": 0.17, "burst_count": 5},
-            }
+            },
+            "experimental_features": {"msc4041_enabled": True},
         }
     )
     def test_POST_ratelimiting_per_account_failed_attempts(self) -> None:
@@ -279,12 +288,15 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
             if i == 5:
                 self.assertEqual(channel.code, 429, msg=channel.result)
                 retry_after_ms = int(channel.json_body["retry_after_ms"])
+                retry_header = channel.headers.getRawHeaders("Retry-After")
             else:
                 self.assertEqual(channel.code, 403, msg=channel.result)
 
         # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower
         # than 1min.
-        self.assertTrue(retry_after_ms < 6000)
+        self.assertLess(retry_after_ms, 6000)
+        assert retry_header
+        self.assertLessEqual(int(retry_header[0]), 6)
 
         self.reactor.advance(retry_after_ms / 1000.0 + 1.0)
 
-- 
cgit 1.5.1


From e691243e191d9dad2bcbf55f9659d007f75fd28e Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 24 Aug 2023 15:53:07 +0100
Subject: Fix typechecking with twisted trunk (#16121)

---
 .github/workflows/twisted_trunk.yml   |  4 ++--
 changelog.d/16121.misc                |  1 +
 synapse/handlers/message.py           | 32 ++++++++++++++++----------------
 synapse/logging/context.py            | 19 ++++++++++---------
 synapse/util/caches/deferred_cache.py |  2 +-
 tests/util/test_async_helpers.py      | 14 ++++++--------
 6 files changed, 36 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/16121.misc

(limited to 'synapse')

diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml
index 67ccc03f6e..7d629a4ed0 100644
--- a/.github/workflows/twisted_trunk.yml
+++ b/.github/workflows/twisted_trunk.yml
@@ -54,8 +54,8 @@ jobs:
           poetry remove twisted
           poetry add --extras tls git+https://github.com/twisted/twisted.git#${{ inputs.twisted_ref || 'trunk' }}
           poetry install --no-interaction --extras "all test"
-      - name: Remove warn_unused_ignores from mypy config
-        run: sed '/warn_unused_ignores = True/d' -i mypy.ini
+      - name: Remove unhelpful options from mypy config
+        run: sed -e '/warn_unused_ignores = True/d' -e '/warn_redundant_casts = True/d' -i mypy.ini
       - run: poetry run mypy
 
   trial:
diff --git a/changelog.d/16121.misc b/changelog.d/16121.misc
new file mode 100644
index 0000000000..f325d2a31d
--- /dev/null
+++ b/changelog.d/16121.misc
@@ -0,0 +1 @@
+Attempt to fix the twisted trunk job.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index a74db1dccf..3184bfb047 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1474,23 +1474,23 @@ class EventCreationHandler:
 
         # We now persist the event (and update the cache in parallel, since we
         # don't want to block on it).
-        event, context = events_and_context[0]
+        #
+        # Note: mypy gets confused if we inline dl and check with twisted#11770.
+        # Some kind of bug in mypy's deduction?
+        deferreds = (
+            run_in_background(
+                self._persist_events,
+                requester=requester,
+                events_and_context=events_and_context,
+                ratelimit=ratelimit,
+                extra_users=extra_users,
+            ),
+            run_in_background(
+                self.cache_joined_hosts_for_events, events_and_context
+            ).addErrback(log_failure, "cache_joined_hosts_for_event failed"),
+        )
         result, _ = await make_deferred_yieldable(
-            gather_results(
-                (
-                    run_in_background(
-                        self._persist_events,
-                        requester=requester,
-                        events_and_context=events_and_context,
-                        ratelimit=ratelimit,
-                        extra_users=extra_users,
-                    ),
-                    run_in_background(
-                        self.cache_joined_hosts_for_events, events_and_context
-                    ).addErrback(log_failure, "cache_joined_hosts_for_event failed"),
-                ),
-                consumeErrors=True,
-            )
+            gather_results(deferreds, consumeErrors=True)
         ).addErrback(unwrapFirstError)
 
         return result
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index f62bea968f..64c6ae4512 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -809,23 +809,24 @@ def run_in_background(  # type: ignore[misc]
 
     # `res` may be a coroutine, `Deferred`, some other kind of awaitable, or a plain
     # value. Convert it to a `Deferred`.
+    d: "defer.Deferred[R]"
     if isinstance(res, typing.Coroutine):
         # Wrap the coroutine in a `Deferred`.
-        res = defer.ensureDeferred(res)
+        d = defer.ensureDeferred(res)
     elif isinstance(res, defer.Deferred):
-        pass
+        d = res
     elif isinstance(res, Awaitable):
         # `res` is probably some kind of completed awaitable, such as a `DoneAwaitable`
         # or `Future` from `make_awaitable`.
-        res = defer.ensureDeferred(_unwrap_awaitable(res))
+        d = defer.ensureDeferred(_unwrap_awaitable(res))
     else:
         # `res` is a plain value. Wrap it in a `Deferred`.
-        res = defer.succeed(res)
+        d = defer.succeed(res)
 
-    if res.called and not res.paused:
+    if d.called and not d.paused:
         # The function should have maintained the logcontext, so we can
         # optimise out the messing about
-        return res
+        return d
 
     # The function may have reset the context before returning, so
     # we need to restore it now.
@@ -843,8 +844,8 @@ def run_in_background(  # type: ignore[misc]
     # which is supposed to have a single entry and exit point. But
     # by spawning off another deferred, we are effectively
     # adding a new exit point.)
-    res.addBoth(_set_context_cb, ctx)
-    return res
+    d.addBoth(_set_context_cb, ctx)
+    return d
 
 
 T = TypeVar("T")
@@ -877,7 +878,7 @@ def make_deferred_yieldable(deferred: "defer.Deferred[T]") -> "defer.Deferred[T]
 ResultT = TypeVar("ResultT")
 
 
-def _set_context_cb(result: ResultT, context: LoggingContext) -> ResultT:
+def _set_context_cb(result: ResultT, context: LoggingContextOrSentinel) -> ResultT:
     """A callback function which just sets the logging context"""
     set_current_context(context)
     return result
diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py
index bf7bd351e0..029eedcc6f 100644
--- a/synapse/util/caches/deferred_cache.py
+++ b/synapse/util/caches/deferred_cache.py
@@ -470,7 +470,7 @@ class CacheMultipleEntries(CacheEntry[KT, VT]):
     def deferred(self, key: KT) -> "defer.Deferred[VT]":
         if not self._deferred:
             self._deferred = ObservableDeferred(defer.Deferred(), consumeErrors=True)
-        return self._deferred.observe().addCallback(lambda res: res.get(key))
+        return self._deferred.observe().addCallback(lambda res: res[key])
 
     def add_invalidation_callback(
         self, key: KT, callback: Optional[Callable[[], None]]
diff --git a/tests/util/test_async_helpers.py b/tests/util/test_async_helpers.py
index 91cac9822a..05983ed434 100644
--- a/tests/util/test_async_helpers.py
+++ b/tests/util/test_async_helpers.py
@@ -60,11 +60,9 @@ class ObservableDeferredTest(TestCase):
         observer1.addBoth(check_called_first)
 
         # store the results
-        results: List[Optional[ObservableDeferred[int]]] = [None, None]
+        results: List[Optional[int]] = [None, None]
 
-        def check_val(
-            res: ObservableDeferred[int], idx: int
-        ) -> ObservableDeferred[int]:
+        def check_val(res: int, idx: int) -> int:
             results[idx] = res
             return res
 
@@ -93,14 +91,14 @@ class ObservableDeferredTest(TestCase):
         observer1.addBoth(check_called_first)
 
         # store the results
-        results: List[Optional[ObservableDeferred[str]]] = [None, None]
+        results: List[Optional[Failure]] = [None, None]
 
-        def check_val(res: ObservableDeferred[str], idx: int) -> None:
+        def check_failure(res: Failure, idx: int) -> None:
             results[idx] = res
             return None
 
-        observer1.addErrback(check_val, 0)
-        observer2.addErrback(check_val, 1)
+        observer1.addErrback(check_failure, 0)
+        observer2.addErrback(check_failure, 1)
 
         try:
             raise Exception("gah!")
-- 
cgit 1.5.1


From aeeca2a62ebfb601efa7930acae0897c8d3e43df Mon Sep 17 00:00:00 2001
From: Aurélien Grimpard <aurelien@grimpard.net>
Date: Thu, 24 Aug 2023 22:11:23 +0200
Subject: Add configuration setting for CAS protocol version (#15816)

---
 changelog.d/15816.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  2 ++
 synapse/config/cas.py                            | 13 ++++++++++++-
 synapse/handlers/cas.py                          |  6 +++++-
 4 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15816.feature

(limited to 'synapse')

diff --git a/changelog.d/15816.feature b/changelog.d/15816.feature
new file mode 100644
index 0000000000..9248dd6792
--- /dev/null
+++ b/changelog.d/15816.feature
@@ -0,0 +1 @@
+Add configuration setting for CAS protocol version. Contributed by Aurélien Grimpard.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 743c51d76a..235f873860 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3420,6 +3420,7 @@ Has the following sub-options:
    to style the login flow according to the identity provider in question.
    See the [spec](https://spec.matrix.org/latest/) for possible options here.
 * `server_url`: The URL of the CAS authorization endpoint.
+* `protocol_version`: The CAS protocol version, defaults to none (version 3 is required if you want to use "required_attributes").
 * `displayname_attribute`: The attribute of the CAS response to use as the display name.
    If no name is given here, no displayname will be set.
 * `required_attributes`:  It is possible to configure Synapse to only allow logins if CAS attributes
@@ -3433,6 +3434,7 @@ Example configuration:
 cas_config:
   enabled: true
   server_url: "https://cas-server.com"
+  protocol_version: 3
   displayname_attribute: name
   required_attributes:
     userGroup: "staff"
diff --git a/synapse/config/cas.py b/synapse/config/cas.py
index c4e63e7411..6e2d9addbf 100644
--- a/synapse/config/cas.py
+++ b/synapse/config/cas.py
@@ -18,7 +18,7 @@ from typing import Any, List
 from synapse.config.sso import SsoAttributeRequirement
 from synapse.types import JsonDict
 
-from ._base import Config
+from ._base import Config, ConfigError
 from ._util import validate_config
 
 
@@ -41,6 +41,16 @@ class CasConfig(Config):
             public_baseurl = self.root.server.public_baseurl
             self.cas_service_url = public_baseurl + "_matrix/client/r0/login/cas/ticket"
 
+            self.cas_protocol_version = cas_config.get("protocol_version")
+            if (
+                self.cas_protocol_version is not None
+                and self.cas_protocol_version not in [1, 2, 3]
+            ):
+                raise ConfigError(
+                    "Unsupported CAS protocol version %s (only versions 1, 2, 3 are supported)"
+                    % (self.cas_protocol_version,),
+                    ("cas_config", "protocol_version"),
+                )
             self.cas_displayname_attribute = cas_config.get("displayname_attribute")
             required_attributes = cas_config.get("required_attributes") or {}
             self.cas_required_attributes = _parsed_required_attributes_def(
@@ -54,6 +64,7 @@ class CasConfig(Config):
         else:
             self.cas_server_url = None
             self.cas_service_url = None
+            self.cas_protocol_version = None
             self.cas_displayname_attribute = None
             self.cas_required_attributes = []
 
diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py
index 5c71637038..a850545453 100644
--- a/synapse/handlers/cas.py
+++ b/synapse/handlers/cas.py
@@ -67,6 +67,7 @@ class CasHandler:
 
         self._cas_server_url = hs.config.cas.cas_server_url
         self._cas_service_url = hs.config.cas.cas_service_url
+        self._cas_protocol_version = hs.config.cas.cas_protocol_version
         self._cas_displayname_attribute = hs.config.cas.cas_displayname_attribute
         self._cas_required_attributes = hs.config.cas.cas_required_attributes
 
@@ -121,7 +122,10 @@ class CasHandler:
         Returns:
             The parsed CAS response.
         """
-        uri = self._cas_server_url + "/proxyValidate"
+        if self._cas_protocol_version == 3:
+            uri = self._cas_server_url + "/p3/proxyValidate"
+        else:
+            uri = self._cas_server_url + "/proxyValidate"
         args = {
             "ticket": ticket,
             "service": self._build_service_param(service_args),
-- 
cgit 1.5.1


From fcf7a5759efd9bd81838baf298e80e79218f3bf0 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 25 Aug 2023 12:11:40 -0400
Subject: Send proper JSON POST data to /publicRooms (#16185)

The include_all_networks was previously sent in the JSON body as
string "true" and "false" instead of boolean true and false.
---
 changelog.d/16185.bugfix               |  1 +
 synapse/federation/transport/client.py | 16 ++++++----------
 2 files changed, 7 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/16185.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16185.bugfix b/changelog.d/16185.bugfix
new file mode 100644
index 0000000000..e62c9c7a0d
--- /dev/null
+++ b/changelog.d/16185.bugfix
@@ -0,0 +1 @@
+Fix a spec compliance issue where requests to the `/publicRooms` federation API would specify `include_all_networks` as a string.
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 0b17f713ea..5ce3f345cb 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -475,13 +475,11 @@ class TransportLayerClient:
         See synapse.federation.federation_client.FederationClient.get_public_rooms for
         more information.
         """
+        path = _create_v1_path("/publicRooms")
+
         if search_filter:
             # this uses MSC2197 (Search Filtering over Federation)
-            path = _create_v1_path("/publicRooms")
-
-            data: Dict[str, Any] = {
-                "include_all_networks": "true" if include_all_networks else "false"
-            }
+            data: Dict[str, Any] = {"include_all_networks": include_all_networks}
             if third_party_instance_id:
                 data["third_party_instance_id"] = third_party_instance_id
             if limit:
@@ -505,17 +503,15 @@ class TransportLayerClient:
                     )
                 raise
         else:
-            path = _create_v1_path("/publicRooms")
-
             args: Dict[str, Union[str, Iterable[str]]] = {
                 "include_all_networks": "true" if include_all_networks else "false"
             }
             if third_party_instance_id:
-                args["third_party_instance_id"] = (third_party_instance_id,)
+                args["third_party_instance_id"] = third_party_instance_id
             if limit:
-                args["limit"] = [str(limit)]
+                args["limit"] = str(limit)
             if since_token:
-                args["since"] = [since_token]
+                args["since"] = since_token
 
             try:
                 response = await self.client.get_json(
-- 
cgit 1.5.1


From 82699428e392f63c269e8b5d4d4c4d0afc11684b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 25 Aug 2023 14:10:31 -0400
Subject: Validate input to POST /key/v2/query endpoint. (#16183)

To avoid 500 internal server errors with garbage input.
---
 changelog.d/16183.misc                     |  1 +
 synapse/rest/key/v2/remote_key_resource.py | 39 ++++++++++++++++++++++--------
 2 files changed, 30 insertions(+), 10 deletions(-)
 create mode 100644 changelog.d/16183.misc

(limited to 'synapse')

diff --git a/changelog.d/16183.misc b/changelog.d/16183.misc
new file mode 100644
index 0000000000..305d5baa6e
--- /dev/null
+++ b/changelog.d/16183.misc
@@ -0,0 +1 @@
+Improve error reporting of invalid data passed to `/_matrix/key/v2/query`.
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index 981fd1f58a..0aaa838d04 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -16,6 +16,7 @@ import logging
 import re
 from typing import TYPE_CHECKING, Dict, Mapping, Optional, Set, Tuple
 
+from pydantic import Extra, StrictInt, StrictStr
 from signedjson.sign import sign_json
 
 from twisted.web.server import Request
@@ -24,9 +25,10 @@ from synapse.crypto.keyring import ServerKeyFetcher
 from synapse.http.server import HttpServer
 from synapse.http.servlet import (
     RestServlet,
+    parse_and_validate_json_object_from_request,
     parse_integer,
-    parse_json_object_from_request,
 )
+from synapse.rest.models import RequestBodyModel
 from synapse.storage.keys import FetchKeyResultForRemote
 from synapse.types import JsonDict
 from synapse.util import json_decoder
@@ -38,6 +40,13 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+class _KeyQueryCriteriaDataModel(RequestBodyModel):
+    class Config:
+        extra = Extra.allow
+
+    minimum_valid_until_ts: Optional[StrictInt]
+
+
 class RemoteKey(RestServlet):
     """HTTP resource for retrieving the TLS certificate and NACL signature
     verification keys for a collection of servers. Checks that the reported
@@ -96,6 +105,9 @@ class RemoteKey(RestServlet):
 
     CATEGORY = "Federation requests"
 
+    class PostBody(RequestBodyModel):
+        server_keys: Dict[StrictStr, Dict[StrictStr, _KeyQueryCriteriaDataModel]]
+
     def __init__(self, hs: "HomeServer"):
         self.fetcher = ServerKeyFetcher(hs)
         self.store = hs.get_datastores().main
@@ -137,24 +149,29 @@ class RemoteKey(RestServlet):
             )
 
             minimum_valid_until_ts = parse_integer(request, "minimum_valid_until_ts")
-            arguments = {}
-            if minimum_valid_until_ts is not None:
-                arguments["minimum_valid_until_ts"] = minimum_valid_until_ts
-            query = {server: {key_id: arguments}}
+            query = {
+                server: {
+                    key_id: _KeyQueryCriteriaDataModel(
+                        minimum_valid_until_ts=minimum_valid_until_ts
+                    )
+                }
+            }
         else:
             query = {server: {}}
 
         return 200, await self.query_keys(query, query_remote_on_cache_miss=True)
 
     async def on_POST(self, request: Request) -> Tuple[int, JsonDict]:
-        content = parse_json_object_from_request(request)
+        content = parse_and_validate_json_object_from_request(request, self.PostBody)
 
-        query = content["server_keys"]
+        query = content.server_keys
 
         return 200, await self.query_keys(query, query_remote_on_cache_miss=True)
 
     async def query_keys(
-        self, query: JsonDict, query_remote_on_cache_miss: bool = False
+        self,
+        query: Dict[str, Dict[str, _KeyQueryCriteriaDataModel]],
+        query_remote_on_cache_miss: bool = False,
     ) -> JsonDict:
         logger.info("Handling query for keys %r", query)
 
@@ -196,8 +213,10 @@ class RemoteKey(RestServlet):
             else:
                 ts_added_ms = key_result.added_ts
                 ts_valid_until_ms = key_result.valid_until_ts
-                req_key = query.get(server_name, {}).get(key_id, {})
-                req_valid_until = req_key.get("minimum_valid_until_ts")
+                req_key = query.get(server_name, {}).get(
+                    key_id, _KeyQueryCriteriaDataModel(minimum_valid_until_ts=None)
+                )
+                req_valid_until = req_key.minimum_valid_until_ts
                 if req_valid_until is not None:
                     if ts_valid_until_ms < req_valid_until:
                         logger.debug(
-- 
cgit 1.5.1


From 84f441f88f51d3f94e1616e1e5507df0dadb6de8 Mon Sep 17 00:00:00 2001
From: V02460 <V02460@gmail.com>
Date: Fri, 25 Aug 2023 21:05:10 +0200
Subject: Prepare unit tests for Python 3.12 (#16099)

---
 .ci/scripts/calculate_jobs.py         |  3 +--
 changelog.d/16099.misc                |  1 +
 poetry.lock                           | 22 +++++++++----------
 synapse/logging/_terse_json.py        |  1 +
 tests/handlers/test_device.py         | 24 ++++++++++-----------
 tests/rest/client/test_login.py       |  5 +++--
 tests/rest/client/test_register.py    |  8 +++----
 tests/rest/client/test_relations.py   | 40 ++++++++++++++++++-----------------
 tests/storage/test_client_ips.py      | 30 +++++++++++++-------------
 tests/storage/test_devices.py         | 18 ++++++++--------
 tests/storage/test_end_to_end_keys.py | 10 ++++++---
 tests/storage/test_room.py            | 12 +++++------
 tests/test_terms_auth.py              |  4 +++-
 13 files changed, 94 insertions(+), 84 deletions(-)
 create mode 100644 changelog.d/16099.misc

(limited to 'synapse')

diff --git a/.ci/scripts/calculate_jobs.py b/.ci/scripts/calculate_jobs.py
index 50e11e6504..661887e209 100755
--- a/.ci/scripts/calculate_jobs.py
+++ b/.ci/scripts/calculate_jobs.py
@@ -47,10 +47,9 @@ if not IS_PR:
             "database": "sqlite",
             "extras": "all",
         }
-        for version in ("3.9", "3.10", "3.11")
+        for version in ("3.9", "3.10", "3.11", "3.12.0-rc.1")
     )
 
-
 trial_postgres_tests = [
     {
         "python-version": "3.8",
diff --git a/changelog.d/16099.misc b/changelog.d/16099.misc
new file mode 100644
index 0000000000..d0e2811366
--- /dev/null
+++ b/changelog.d/16099.misc
@@ -0,0 +1 @@
+Prepare unit tests for Python 3.12.
diff --git a/poetry.lock b/poetry.lock
index e62c10da9f..796890c3d8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
 [[package]]
 name = "alabaster"
@@ -544,13 +544,13 @@ files = [
 
 [[package]]
 name = "elementpath"
-version = "4.1.0"
+version = "4.1.5"
 description = "XPath 1.0/2.0/3.0/3.1 parsers and selectors for ElementTree and lxml"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "elementpath-4.1.0-py3-none-any.whl", hash = "sha256:2b1b524223d70fd6dd63a36b9bc32e4919c96a272c2d1454094c4d85086bc6f8"},
-    {file = "elementpath-4.1.0.tar.gz", hash = "sha256:dbd7eba3cf0b3b4934f627ba24851a3e0798ef2bc9104555a4cd831f2e6e8e14"},
+    {file = "elementpath-4.1.5-py3-none-any.whl", hash = "sha256:2ac1a2fb31eb22bbbf817f8cf6752f844513216263f0e3892c8e79782fe4bb55"},
+    {file = "elementpath-4.1.5.tar.gz", hash = "sha256:c2d6dc524b29ef751ecfc416b0627668119d8812441c555d7471da41d4bacb8d"},
 ]
 
 [package.extras]
@@ -3207,22 +3207,22 @@ files = [
 
 [[package]]
 name = "xmlschema"
-version = "2.2.2"
+version = "2.4.0"
 description = "An XML Schema validator and decoder"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "xmlschema-2.2.2-py3-none-any.whl", hash = "sha256:557f3632b54b6ff10576736bba62e43db84eb60f6465a83818576cd9ffcc1799"},
-    {file = "xmlschema-2.2.2.tar.gz", hash = "sha256:0caa96668807b4b51c42a0fe2b6610752bc59f069615df3e34dcfffb962973fd"},
+    {file = "xmlschema-2.4.0-py3-none-any.whl", hash = "sha256:dc87be0caaa61f42649899189aab2fd8e0d567f2cf548433ba7b79278d231a4a"},
+    {file = "xmlschema-2.4.0.tar.gz", hash = "sha256:d74cd0c10866ac609e1ef94a5a69b018ad16e39077bc6393408b40c6babee793"},
 ]
 
 [package.dependencies]
-elementpath = ">=4.0.0,<5.0.0"
+elementpath = ">=4.1.5,<5.0.0"
 
 [package.extras]
-codegen = ["elementpath (>=4.0.0,<5.0.0)", "jinja2"]
-dev = ["Sphinx", "coverage", "elementpath (>=4.0.0,<5.0.0)", "flake8", "jinja2", "lxml", "lxml-stubs", "memory-profiler", "mypy", "sphinx-rtd-theme", "tox"]
-docs = ["Sphinx", "elementpath (>=4.0.0,<5.0.0)", "jinja2", "sphinx-rtd-theme"]
+codegen = ["elementpath (>=4.1.5,<5.0.0)", "jinja2"]
+dev = ["Sphinx", "coverage", "elementpath (>=4.1.5,<5.0.0)", "flake8", "jinja2", "lxml", "lxml-stubs", "memory-profiler", "mypy", "sphinx-rtd-theme", "tox"]
+docs = ["Sphinx", "elementpath (>=4.1.5,<5.0.0)", "jinja2", "sphinx-rtd-theme"]
 
 [[package]]
 name = "zipp"
diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py
index b78d6e17c9..98c6038ff2 100644
--- a/synapse/logging/_terse_json.py
+++ b/synapse/logging/_terse_json.py
@@ -44,6 +44,7 @@ _IGNORED_LOG_RECORD_ATTRIBUTES = {
     "processName",
     "relativeCreated",
     "stack_info",
+    "taskName",
     "thread",
     "threadName",
 }
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index dca539d203..55a4f95ef3 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -122,50 +122,50 @@ class DeviceTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(3, len(res))
         device_map = {d["device_id"]: d for d in res}
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": user1,
                 "device_id": "xyz",
                 "display_name": "display 0",
                 "last_seen_ip": None,
                 "last_seen_ts": None,
-            },
-            device_map["xyz"],
+            }.items(),
+            device_map["xyz"].items(),
         )
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": user1,
                 "device_id": "fco",
                 "display_name": "display 1",
                 "last_seen_ip": "ip1",
                 "last_seen_ts": 1000000,
-            },
-            device_map["fco"],
+            }.items(),
+            device_map["fco"].items(),
         )
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": user1,
                 "device_id": "abc",
                 "display_name": "display 2",
                 "last_seen_ip": "ip3",
                 "last_seen_ts": 3000000,
-            },
-            device_map["abc"],
+            }.items(),
+            device_map["abc"].items(),
         )
 
     def test_get_device(self) -> None:
         self._record_users()
 
         res = self.get_success(self.handler.get_device(user1, "abc"))
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": user1,
                 "device_id": "abc",
                 "display_name": "display 2",
                 "last_seen_ip": "ip3",
                 "last_seen_ts": 3000000,
-            },
-            res,
+            }.items(),
+            res.items(),
         )
 
     def test_delete_device(self) -> None:
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index 62c32cae5e..a2a6589564 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -581,8 +581,9 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
             body,
         )
         self.assertEqual(channel.code, 403, channel.result)
-        self.assertDictContainsSubset(
-            {"errcode": Codes.LIMIT_EXCEEDED, "extra": "value"}, channel.json_body
+        self.assertLessEqual(
+            {"errcode": Codes.LIMIT_EXCEEDED, "extra": "value"}.items(),
+            channel.json_body.items(),
         )
 
 
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index b228dba861..c33393dc28 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -75,7 +75,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(channel.code, 200, msg=channel.result)
         det_data = {"user_id": user_id, "home_server": self.hs.hostname}
-        self.assertDictContainsSubset(det_data, channel.json_body)
+        self.assertLessEqual(det_data.items(), channel.json_body.items())
 
     def test_POST_appservice_registration_no_type(self) -> None:
         as_token = "i_am_an_app_service"
@@ -136,7 +136,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "device_id": device_id,
         }
         self.assertEqual(channel.code, 200, msg=channel.result)
-        self.assertDictContainsSubset(det_data, channel.json_body)
+        self.assertLessEqual(det_data.items(), channel.json_body.items())
 
     @override_config({"enable_registration": False})
     def test_POST_disabled_registration(self) -> None:
@@ -157,7 +157,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
         det_data = {"home_server": self.hs.hostname, "device_id": "guest_device"}
         self.assertEqual(channel.code, 200, msg=channel.result)
-        self.assertDictContainsSubset(det_data, channel.json_body)
+        self.assertLessEqual(det_data.items(), channel.json_body.items())
 
     def test_POST_disabled_guest_registration(self) -> None:
         self.hs.config.registration.allow_guest_access = False
@@ -267,7 +267,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
             "device_id": device_id,
         }
         self.assertEqual(channel.code, 200, msg=channel.result)
-        self.assertDictContainsSubset(det_data, channel.json_body)
+        self.assertLessEqual(det_data.items(), channel.json_body.items())
 
         # Check the `completed` counter has been incremented and pending is 0
         res = self.get_success(
diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py
index d3f6191996..61773fb28c 100644
--- a/tests/rest/client/test_relations.py
+++ b/tests/rest/client/test_relations.py
@@ -570,7 +570,7 @@ class RelationsTestCase(BaseRelationsTestCase):
         )
         self.assertEqual(200, channel.code, channel.json_body)
         event_result = channel.json_body
-        self.assertDictContainsSubset(original_body, event_result["content"])
+        self.assertLessEqual(original_body.items(), event_result["content"].items())
 
         # also check /context, which returns the *edited* event
         channel = self.make_request(
@@ -587,14 +587,14 @@ class RelationsTestCase(BaseRelationsTestCase):
             (context_result, "/context"),
         ):
             # The reference metadata should still be intact.
-            self.assertDictContainsSubset(
+            self.assertLessEqual(
                 {
                     "m.relates_to": {
                         "event_id": self.parent_id,
                         "rel_type": "m.reference",
                     }
-                },
-                result_event_dict["content"],
+                }.items(),
+                result_event_dict["content"].items(),
                 desc,
             )
 
@@ -1372,9 +1372,11 @@ class BundledAggregationsTestCase(BaseRelationsTestCase):
         latest_event_in_thread = thread_summary["latest_event"]
         # The latest event in the thread should have the edit appear under the
         # bundled aggregations.
-        self.assertDictContainsSubset(
-            {"event_id": edit_event_id, "sender": "@alice:test"},
-            latest_event_in_thread["unsigned"]["m.relations"][RelationTypes.REPLACE],
+        self.assertLessEqual(
+            {"event_id": edit_event_id, "sender": "@alice:test"}.items(),
+            latest_event_in_thread["unsigned"]["m.relations"][
+                RelationTypes.REPLACE
+            ].items(),
         )
 
     def test_aggregation_get_event_for_annotation(self) -> None:
@@ -1637,9 +1639,9 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         ##################################################
         self.assertEqual(self._get_related_events(), list(reversed(thread_replies)))
         relations = self._get_bundled_aggregations()
-        self.assertDictContainsSubset(
-            {"count": 3, "current_user_participated": True},
-            relations[RelationTypes.THREAD],
+        self.assertLessEqual(
+            {"count": 3, "current_user_participated": True}.items(),
+            relations[RelationTypes.THREAD].items(),
         )
         # The latest event is the last sent event.
         self.assertEqual(
@@ -1658,9 +1660,9 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         # The thread should still exist, but the latest event should be updated.
         self.assertEqual(self._get_related_events(), list(reversed(thread_replies)))
         relations = self._get_bundled_aggregations()
-        self.assertDictContainsSubset(
-            {"count": 2, "current_user_participated": True},
-            relations[RelationTypes.THREAD],
+        self.assertLessEqual(
+            {"count": 2, "current_user_participated": True}.items(),
+            relations[RelationTypes.THREAD].items(),
         )
         # And the latest event is the last unredacted event.
         self.assertEqual(
@@ -1677,9 +1679,9 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         # Nothing should have changed (except the thread count).
         self.assertEqual(self._get_related_events(), thread_replies)
         relations = self._get_bundled_aggregations()
-        self.assertDictContainsSubset(
-            {"count": 1, "current_user_participated": True},
-            relations[RelationTypes.THREAD],
+        self.assertLessEqual(
+            {"count": 1, "current_user_participated": True}.items(),
+            relations[RelationTypes.THREAD].items(),
         )
         # And the latest event is the last unredacted event.
         self.assertEqual(
@@ -1774,12 +1776,12 @@ class RelationRedactionTestCase(BaseRelationsTestCase):
         event_ids = self._get_related_events()
         relations = self._get_bundled_aggregations()
         self.assertEqual(len(event_ids), 1)
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "count": 1,
                 "current_user_participated": True,
-            },
-            relations[RelationTypes.THREAD],
+            }.items(),
+            relations[RelationTypes.THREAD].items(),
         )
         self.assertEqual(
             relations[RelationTypes.THREAD]["latest_event"]["event_id"],
diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py
index 12e24d4dbd..6b9692c486 100644
--- a/tests/storage/test_client_ips.py
+++ b/tests/storage/test_client_ips.py
@@ -65,15 +65,15 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         )
 
         r = result[(user_id, device_id)]
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": user_id,
                 "device_id": device_id,
                 "ip": "ip",
                 "user_agent": "user_agent",
                 "last_seen": 12345678000,
-            },
-            r,
+            }.items(),
+            r.items(),
         )
 
     def test_insert_new_client_ip_none_device_id(self) -> None:
@@ -526,15 +526,15 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         )
 
         r = result[(user_id, device_id)]
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": user_id,
                 "device_id": device_id,
                 "ip": None,
                 "user_agent": None,
                 "last_seen": None,
-            },
-            r,
+            }.items(),
+            r.items(),
         )
 
         # Register the background update to run again.
@@ -561,15 +561,15 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         )
 
         r = result[(user_id, device_id)]
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": user_id,
                 "device_id": device_id,
                 "ip": "ip",
                 "user_agent": "user_agent",
                 "last_seen": 0,
-            },
-            r,
+            }.items(),
+            r.items(),
         )
 
     def test_old_user_ips_pruned(self) -> None:
@@ -640,15 +640,15 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase):
         )
 
         r = result2[(user_id, device_id)]
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": user_id,
                 "device_id": device_id,
                 "ip": "ip",
                 "user_agent": "user_agent",
                 "last_seen": 0,
-            },
-            r,
+            }.items(),
+            r.items(),
         )
 
     def test_invalid_user_agents_are_ignored(self) -> None:
@@ -777,13 +777,13 @@ class ClientIpAuthTestCase(unittest.HomeserverTestCase):
             self.store.get_last_client_ip_by_device(self.user_id, device_id)
         )
         r = result[(self.user_id, device_id)]
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": self.user_id,
                 "device_id": device_id,
                 "ip": expected_ip,
                 "user_agent": "Mozzila pizza",
                 "last_seen": 123456100,
-            },
-            r,
+            }.items(),
+            r.items(),
         )
diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py
index f03807c8f9..58ab41cf26 100644
--- a/tests/storage/test_devices.py
+++ b/tests/storage/test_devices.py
@@ -58,13 +58,13 @@ class DeviceStoreTestCase(HomeserverTestCase):
 
         res = self.get_success(self.store.get_device("user_id", "device_id"))
         assert res is not None
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": "user_id",
                 "device_id": "device_id",
                 "display_name": "display_name",
-            },
-            res,
+            }.items(),
+            res.items(),
         )
 
     def test_get_devices_by_user(self) -> None:
@@ -80,21 +80,21 @@ class DeviceStoreTestCase(HomeserverTestCase):
 
         res = self.get_success(self.store.get_devices_by_user("user_id"))
         self.assertEqual(2, len(res.keys()))
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": "user_id",
                 "device_id": "device1",
                 "display_name": "display_name 1",
-            },
-            res["device1"],
+            }.items(),
+            res["device1"].items(),
         )
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "user_id": "user_id",
                 "device_id": "device2",
                 "display_name": "display_name 2",
-            },
-            res["device2"],
+            }.items(),
+            res["device2"].items(),
         )
 
     def test_count_devices_by_users(self) -> None:
diff --git a/tests/storage/test_end_to_end_keys.py b/tests/storage/test_end_to_end_keys.py
index 5fde3b9c78..2033377b52 100644
--- a/tests/storage/test_end_to_end_keys.py
+++ b/tests/storage/test_end_to_end_keys.py
@@ -38,7 +38,7 @@ class EndToEndKeyStoreTestCase(HomeserverTestCase):
         self.assertIn("user", res)
         self.assertIn("device", res["user"])
         dev = res["user"]["device"]
-        self.assertDictContainsSubset(json, dev)
+        self.assertLessEqual(json.items(), dev.items())
 
     def test_reupload_key(self) -> None:
         now = 1470174257070
@@ -71,8 +71,12 @@ class EndToEndKeyStoreTestCase(HomeserverTestCase):
         self.assertIn("user", res)
         self.assertIn("device", res["user"])
         dev = res["user"]["device"]
-        self.assertDictContainsSubset(
-            {"key": "value", "unsigned": {"device_display_name": "display_name"}}, dev
+        self.assertLessEqual(
+            {
+                "key": "value",
+                "unsigned": {"device_display_name": "display_name"},
+            }.items(),
+            dev.items(),
         )
 
     def test_multiple_devices(self) -> None:
diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py
index 71ec74eadc..1e27f2c275 100644
--- a/tests/storage/test_room.py
+++ b/tests/storage/test_room.py
@@ -44,13 +44,13 @@ class RoomStoreTestCase(HomeserverTestCase):
     def test_get_room(self) -> None:
         res = self.get_success(self.store.get_room(self.room.to_string()))
         assert res is not None
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "room_id": self.room.to_string(),
                 "creator": self.u_creator.to_string(),
                 "is_public": True,
-            },
-            res,
+            }.items(),
+            res.items(),
         )
 
     def test_get_room_unknown_room(self) -> None:
@@ -59,13 +59,13 @@ class RoomStoreTestCase(HomeserverTestCase):
     def test_get_room_with_stats(self) -> None:
         res = self.get_success(self.store.get_room_with_stats(self.room.to_string()))
         assert res is not None
-        self.assertDictContainsSubset(
+        self.assertLessEqual(
             {
                 "room_id": self.room.to_string(),
                 "creator": self.u_creator.to_string(),
                 "public": True,
-            },
-            res,
+            }.items(),
+            res.items(),
         )
 
     def test_get_room_with_stats_unknown_room(self) -> None:
diff --git a/tests/test_terms_auth.py b/tests/test_terms_auth.py
index 52424aa087..64a49488c6 100644
--- a/tests/test_terms_auth.py
+++ b/tests/test_terms_auth.py
@@ -85,7 +85,9 @@ class TermsTestCase(unittest.HomeserverTestCase):
             }
         }
         self.assertIsInstance(channel.json_body["params"], dict)
-        self.assertDictContainsSubset(channel.json_body["params"], expected_params)
+        self.assertLessEqual(
+            channel.json_body["params"].items(), expected_params.items()
+        )
 
         # We have to complete the dummy auth stage before completing the terms stage
         request_data = {
-- 
cgit 1.5.1


From 501da8ecd8f056fb953fbccb43fc60ba9edb91d5 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Mon, 28 Aug 2023 16:03:51 +0200
Subject: Task scheduler: add replication notify for new task to launch ASAP
 (#16184)

---
 changelog.d/16184.misc              |  1 +
 synapse/replication/tcp/commands.py | 12 +++++
 synapse/replication/tcp/handler.py  | 18 ++++++++
 synapse/util/task_scheduler.py      | 92 +++++++++++++++++--------------------
 tests/util/test_task_scheduler.py   | 58 +++++++++++++++--------
 5 files changed, 114 insertions(+), 67 deletions(-)
 create mode 100644 changelog.d/16184.misc

(limited to 'synapse')

diff --git a/changelog.d/16184.misc b/changelog.d/16184.misc
new file mode 100644
index 0000000000..3c0baddfe1
--- /dev/null
+++ b/changelog.d/16184.misc
@@ -0,0 +1 @@
+Task scheduler: add replication notify for new task to launch ASAP.
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
index 10f5c98ff8..58a871c6d9 100644
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@@ -452,6 +452,17 @@ class LockReleasedCommand(Command):
         return json_encoder.encode([self.instance_name, self.lock_name, self.lock_key])
 
 
+class NewActiveTaskCommand(_SimpleCommand):
+    """Sent to inform instance handling background tasks that a new active task is available to run.
+
+    Format::
+
+        NEW_ACTIVE_TASK "<task_id>"
+    """
+
+    NAME = "NEW_ACTIVE_TASK"
+
+
 _COMMANDS: Tuple[Type[Command], ...] = (
     ServerCommand,
     RdataCommand,
@@ -466,6 +477,7 @@ _COMMANDS: Tuple[Type[Command], ...] = (
     RemoteServerUpCommand,
     ClearUserSyncsCommand,
     LockReleasedCommand,
+    NewActiveTaskCommand,
 )
 
 # Map of command name to command type.
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index 38adcbe1d0..92c5a55acc 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -40,6 +40,7 @@ from synapse.replication.tcp.commands import (
     Command,
     FederationAckCommand,
     LockReleasedCommand,
+    NewActiveTaskCommand,
     PositionCommand,
     RdataCommand,
     RemoteServerUpCommand,
@@ -238,6 +239,10 @@ class ReplicationCommandHandler:
         if self._is_master:
             self._server_notices_sender = hs.get_server_notices_sender()
 
+        self._task_scheduler = None
+        if hs.config.worker.run_background_tasks:
+            self._task_scheduler = hs.get_task_scheduler()
+
         if hs.config.redis.redis_enabled:
             # If we're using Redis, it's the background worker that should
             # receive USER_IP commands and store the relevant client IPs.
@@ -663,6 +668,15 @@ class ReplicationCommandHandler:
             cmd.instance_name, cmd.lock_name, cmd.lock_key
         )
 
+    async def on_NEW_ACTIVE_TASK(
+        self, conn: IReplicationConnection, cmd: NewActiveTaskCommand
+    ) -> None:
+        """Called when get a new NEW_ACTIVE_TASK command."""
+        if self._task_scheduler:
+            task = await self._task_scheduler.get_task(cmd.data)
+            if task:
+                await self._task_scheduler._launch_task(task)
+
     def new_connection(self, connection: IReplicationConnection) -> None:
         """Called when we have a new connection."""
         self._connections.append(connection)
@@ -776,6 +790,10 @@ class ReplicationCommandHandler:
         if instance_name == self._instance_name:
             self.send_command(LockReleasedCommand(instance_name, lock_name, lock_key))
 
+    def send_new_active_task(self, task_id: str) -> None:
+        """Called when a new task has been scheduled for immediate launch and is ACTIVE."""
+        self.send_command(NewActiveTaskCommand(task_id))
+
 
 UpdateToken = TypeVar("UpdateToken")
 UpdateRow = TypeVar("UpdateRow")
diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index 4aea64b338..9e89aeb748 100644
--- a/synapse/util/task_scheduler.py
+++ b/synapse/util/task_scheduler.py
@@ -57,14 +57,13 @@ class TaskScheduler:
     the code launching the task.
     You can also specify the `result` (and/or an `error`) when returning from the function.
 
-    The reconciliation loop runs every 5 mns, so this is not a precise scheduler. When wanting
-    to launch now, the launch will still not happen before the next loop run.
-
-    Tasks will be run on the worker specified with `run_background_tasks_on` config,
-    or the main one by default.
+    The reconciliation loop runs every minute, so this is not a precise scheduler.
     There is a limit of 10 concurrent tasks, so tasks may be delayed if the pool is already
     full. In this regard, please take great care that scheduled tasks can actually finished.
     For now there is no mechanism to stop a running task if it is stuck.
+
+    Tasks will be run on the worker specified with `run_background_tasks_on` config,
+    or the main one by default.
     """
 
     # Precision of the scheduler, evaluation of tasks to run will only happen
@@ -85,7 +84,7 @@ class TaskScheduler:
         self._actions: Dict[
             str,
             Callable[
-                [ScheduledTask, bool],
+                [ScheduledTask],
                 Awaitable[Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]],
             ],
         ] = {}
@@ -98,11 +97,13 @@ class TaskScheduler:
                 "handle_scheduled_tasks",
                 self._handle_scheduled_tasks,
             )
+        else:
+            self.replication_client = hs.get_replication_command_handler()
 
     def register_action(
         self,
         function: Callable[
-            [ScheduledTask, bool],
+            [ScheduledTask],
             Awaitable[Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]],
         ],
         action_name: str,
@@ -115,10 +116,9 @@ class TaskScheduler:
         calling `schedule_task` but rather in an `__init__` method.
 
         Args:
-            function: The function to be executed for this action. The parameters
-                passed to the function when launched are the `ScheduledTask` being run,
-                and a `first_launch` boolean to signal if it's a resumed task or the first
-                launch of it. The function should return a tuple of new `status`, `result`
+            function: The function to be executed for this action. The parameter
+                passed to the function when launched is the `ScheduledTask` being run.
+                The function should return a tuple of new `status`, `result`
                 and `error` as specified in `ScheduledTask`.
             action_name: The name of the action to be associated with the function
         """
@@ -171,6 +171,12 @@ class TaskScheduler:
         )
         await self._store.insert_scheduled_task(task)
 
+        if status == TaskStatus.ACTIVE:
+            if self._run_background_tasks:
+                await self._launch_task(task)
+            else:
+                self.replication_client.send_new_active_task(task.id)
+
         return task.id
 
     async def update_task(
@@ -265,21 +271,13 @@ class TaskScheduler:
         Args:
             id: id of the task to delete
         """
-        if self.task_is_running(id):
-            raise Exception(f"Task {id} is currently running and can't be deleted")
+        task = await self.get_task(id)
+        if task is None:
+            raise Exception(f"Task {id} does not exist")
+        if task.status == TaskStatus.ACTIVE:
+            raise Exception(f"Task {id} is currently ACTIVE and can't be deleted")
         await self._store.delete_scheduled_task(id)
 
-    def task_is_running(self, id: str) -> bool:
-        """Check if a task is currently running.
-
-        Can only be called from the worker handling the task scheduling.
-
-        Args:
-            id: id of the task to check
-        """
-        assert self._run_background_tasks
-        return id in self._running_tasks
-
     async def _handle_scheduled_tasks(self) -> None:
         """Main loop taking care of launching tasks and cleaning up old ones."""
         await self._launch_scheduled_tasks()
@@ -288,29 +286,11 @@ class TaskScheduler:
     async def _launch_scheduled_tasks(self) -> None:
         """Retrieve and launch scheduled tasks that should be running at that time."""
         for task in await self.get_tasks(statuses=[TaskStatus.ACTIVE]):
-            if not self.task_is_running(task.id):
-                if (
-                    len(self._running_tasks)
-                    < TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS
-                ):
-                    await self._launch_task(task, first_launch=False)
-            else:
-                if (
-                    self._clock.time_msec()
-                    > task.timestamp + TaskScheduler.LAST_UPDATE_BEFORE_WARNING_MS
-                ):
-                    logger.warn(
-                        f"Task {task.id} (action {task.action}) has seen no update for more than 24h and may be stuck"
-                    )
+            await self._launch_task(task)
         for task in await self.get_tasks(
             statuses=[TaskStatus.SCHEDULED], max_timestamp=self._clock.time_msec()
         ):
-            if (
-                not self.task_is_running(task.id)
-                and len(self._running_tasks)
-                < TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS
-            ):
-                await self._launch_task(task, first_launch=True)
+            await self._launch_task(task)
 
         running_tasks_gauge.set(len(self._running_tasks))
 
@@ -320,27 +300,27 @@ class TaskScheduler:
             statuses=[TaskStatus.FAILED, TaskStatus.COMPLETE]
         ):
             # FAILED and COMPLETE tasks should never be running
-            assert not self.task_is_running(task.id)
+            assert task.id not in self._running_tasks
             if (
                 self._clock.time_msec()
                 > task.timestamp + TaskScheduler.KEEP_TASKS_FOR_MS
             ):
                 await self._store.delete_scheduled_task(task.id)
 
-    async def _launch_task(self, task: ScheduledTask, first_launch: bool) -> None:
+    async def _launch_task(self, task: ScheduledTask) -> None:
         """Launch a scheduled task now.
 
         Args:
             task: the task to launch
-            first_launch: `True` if it's the first time is launched, `False` otherwise
         """
-        assert task.action in self._actions
+        assert self._run_background_tasks
 
+        assert task.action in self._actions
         function = self._actions[task.action]
 
         async def wrapper() -> None:
             try:
-                (status, result, error) = await function(task, first_launch)
+                (status, result, error) = await function(task)
             except Exception:
                 f = Failure()
                 logger.error(
@@ -360,6 +340,20 @@ class TaskScheduler:
             )
             self._running_tasks.remove(task.id)
 
+        if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
+            return
+
+        if (
+            self._clock.time_msec()
+            > task.timestamp + TaskScheduler.LAST_UPDATE_BEFORE_WARNING_MS
+        ):
+            logger.warn(
+                f"Task {task.id} (action {task.action}) has seen no update for more than 24h and may be stuck"
+            )
+
+        if task.id in self._running_tasks:
+            return
+
         self._running_tasks.add(task.id)
         await self.update_task(task.id, status=TaskStatus.ACTIVE)
         description = f"{task.id}-{task.action}"
diff --git a/tests/util/test_task_scheduler.py b/tests/util/test_task_scheduler.py
index 3a97559bf0..8665aeb50c 100644
--- a/tests/util/test_task_scheduler.py
+++ b/tests/util/test_task_scheduler.py
@@ -22,10 +22,11 @@ from synapse.types import JsonMapping, ScheduledTask, TaskStatus
 from synapse.util import Clock
 from synapse.util.task_scheduler import TaskScheduler
 
-from tests import unittest
+from tests.replication._base import BaseMultiWorkerStreamTestCase
+from tests.unittest import HomeserverTestCase, override_config
 
 
-class TestTaskScheduler(unittest.HomeserverTestCase):
+class TestTaskScheduler(HomeserverTestCase):
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.task_scheduler = hs.get_task_scheduler()
         self.task_scheduler.register_action(self._test_task, "_test_task")
@@ -34,7 +35,7 @@ class TestTaskScheduler(unittest.HomeserverTestCase):
         self.task_scheduler.register_action(self._resumable_task, "_resumable_task")
 
     async def _test_task(
-        self, task: ScheduledTask, first_launch: bool
+        self, task: ScheduledTask
     ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
         # This test task will copy the parameters to the result
         result = None
@@ -77,7 +78,7 @@ class TestTaskScheduler(unittest.HomeserverTestCase):
         self.assertIsNone(task)
 
     async def _sleeping_task(
-        self, task: ScheduledTask, first_launch: bool
+        self, task: ScheduledTask
     ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
         # Sleep for a second
         await deferLater(self.reactor, 1, lambda: None)
@@ -85,24 +86,18 @@ class TestTaskScheduler(unittest.HomeserverTestCase):
 
     def test_schedule_lot_of_tasks(self) -> None:
         """Schedule more than `TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS` tasks and check the behavior."""
-        timestamp = self.clock.time_msec() + 30 * 1000
         task_ids = []
         for i in range(TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS + 1):
             task_ids.append(
                 self.get_success(
                     self.task_scheduler.schedule_task(
                         "_sleeping_task",
-                        timestamp=timestamp,
                         params={"val": i},
                     )
                 )
             )
 
-        # The timestamp being 30s after now the task should been executed
-        # after the first scheduling loop is run
-        self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
-
-        # This is to give the time to the sleeping tasks to finish
+        # This is to give the time to the active tasks to finish
         self.reactor.advance(1)
 
         # Check that only MAX_CONCURRENT_RUNNING_TASKS tasks has run and that one
@@ -120,10 +115,11 @@ class TestTaskScheduler(unittest.HomeserverTestCase):
         )
 
         scheduled_tasks = [
-            t for t in tasks if t is not None and t.status == TaskStatus.SCHEDULED
+            t for t in tasks if t is not None and t.status == TaskStatus.ACTIVE
         ]
         self.assertEquals(len(scheduled_tasks), 1)
 
+        # We need to wait for the next run of the scheduler loop
         self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
         self.reactor.advance(1)
 
@@ -138,7 +134,7 @@ class TestTaskScheduler(unittest.HomeserverTestCase):
         )
 
     async def _raising_task(
-        self, task: ScheduledTask, first_launch: bool
+        self, task: ScheduledTask
     ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
         raise Exception("raising")
 
@@ -146,15 +142,13 @@ class TestTaskScheduler(unittest.HomeserverTestCase):
         """Schedule a task raising an exception and check it runs to failure and report exception content."""
         task_id = self.get_success(self.task_scheduler.schedule_task("_raising_task"))
 
-        self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
-
         task = self.get_success(self.task_scheduler.get_task(task_id))
         assert task is not None
         self.assertEqual(task.status, TaskStatus.FAILED)
         self.assertEqual(task.error, "raising")
 
     async def _resumable_task(
-        self, task: ScheduledTask, first_launch: bool
+        self, task: ScheduledTask
     ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
         if task.result and "in_progress" in task.result:
             return TaskStatus.COMPLETE, {"success": True}, None
@@ -169,8 +163,6 @@ class TestTaskScheduler(unittest.HomeserverTestCase):
         """Schedule a resumable task and check that it gets properly resumed and complete after simulating a synapse restart."""
         task_id = self.get_success(self.task_scheduler.schedule_task("_resumable_task"))
 
-        self.reactor.advance((TaskScheduler.SCHEDULE_INTERVAL_MS / 1000))
-
         task = self.get_success(self.task_scheduler.get_task(task_id))
         assert task is not None
         self.assertEqual(task.status, TaskStatus.ACTIVE)
@@ -184,3 +176,33 @@ class TestTaskScheduler(unittest.HomeserverTestCase):
         self.assertEqual(task.status, TaskStatus.COMPLETE)
         assert task.result is not None
         self.assertTrue(task.result.get("success"))
+
+
+class TestTaskSchedulerWithBackgroundWorker(BaseMultiWorkerStreamTestCase):
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.task_scheduler = hs.get_task_scheduler()
+        self.task_scheduler.register_action(self._test_task, "_test_task")
+
+    async def _test_task(
+        self, task: ScheduledTask
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        return (TaskStatus.COMPLETE, None, None)
+
+    @override_config({"run_background_tasks_on": "worker1"})
+    def test_schedule_task(self) -> None:
+        """Check that a task scheduled to run now is launch right away on the background worker."""
+        bg_worker_hs = self.make_worker_hs(
+            "synapse.app.generic_worker",
+            extra_config={"worker_name": "worker1"},
+        )
+        bg_worker_hs.get_task_scheduler().register_action(self._test_task, "_test_task")
+
+        task_id = self.get_success(
+            self.task_scheduler.schedule_task(
+                "_test_task",
+            )
+        )
+
+        task = self.get_success(self.task_scheduler.get_task(task_id))
+        assert task is not None
+        self.assertEqual(task.status, TaskStatus.COMPLETE)
-- 
cgit 1.5.1


From 1bf143699c0ac8dd53111bfca4628f126d65210d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 28 Aug 2023 11:03:23 -0400
Subject: Combine logic about not overriding BUSY presence. (#16170)

Simplify some of the presence code by reducing duplicated code between
worker & non-worker modes.

The main change is to push some of the logic from `user_syncing` into
`set_state`. This is done by passing whether the user is setting the presence
via a `/sync` with a new `is_sync` flag to `set_state`. If this is `true` some
additional logic is performed:

* Don't override `busy` presence.
* Update the `last_user_sync_ts`.
* Never update the status message.
---
 changelog.d/16170.misc               |   1 +
 synapse/handlers/presence.py         | 155 ++++++++++++++---------------------
 synapse/replication/http/presence.py |  10 +--
 tests/handlers/test_presence.py      |  37 +++++++--
 4 files changed, 99 insertions(+), 104 deletions(-)
 create mode 100644 changelog.d/16170.misc

(limited to 'synapse')

diff --git a/changelog.d/16170.misc b/changelog.d/16170.misc
new file mode 100644
index 0000000000..c950b54367
--- /dev/null
+++ b/changelog.d/16170.misc
@@ -0,0 +1 @@
+Simplify presence code when using workers.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index e8e9db4b91..c395dcdb43 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -151,15 +151,13 @@ class BasePresenceHandler(abc.ABC):
 
         self._federation_queue = PresenceFederationQueue(hs, self)
 
-        self._busy_presence_enabled = hs.config.experimental.msc3026_enabled
-
         self.VALID_PRESENCE: Tuple[str, ...] = (
             PresenceState.ONLINE,
             PresenceState.UNAVAILABLE,
             PresenceState.OFFLINE,
         )
 
-        if self._busy_presence_enabled:
+        if hs.config.experimental.msc3026_enabled:
             self.VALID_PRESENCE += (PresenceState.BUSY,)
 
         active_presence = self.store.take_presence_startup_info()
@@ -255,17 +253,19 @@ class BasePresenceHandler(abc.ABC):
         self,
         target_user: UserID,
         state: JsonDict,
-        ignore_status_msg: bool = False,
         force_notify: bool = False,
+        is_sync: bool = False,
     ) -> None:
         """Set the presence state of the user.
 
         Args:
             target_user: The ID of the user to set the presence state of.
             state: The presence state as a JSON dictionary.
-            ignore_status_msg: True to ignore the "status_msg" field of the `state` dict.
-                If False, the user's current status will be updated.
             force_notify: Whether to force notification of the update to clients.
+            is_sync: True if this update was from a sync, which results in
+                *not* overriding a previously set BUSY status, updating the
+                user's last_user_sync_ts, and ignoring the "status_msg" field of
+                the `state` dict.
         """
 
     @abc.abstractmethod
@@ -491,23 +491,18 @@ class WorkerPresenceHandler(BasePresenceHandler):
         if not affect_presence or not self._presence_enabled:
             return _NullContextManager()
 
-        prev_state = await self.current_state_for_user(user_id)
-        if prev_state.state != PresenceState.BUSY:
-            # We set state here but pass ignore_status_msg = True as we don't want to
-            # cause the status message to be cleared.
-            # Note that this causes last_active_ts to be incremented which is not
-            # what the spec wants: see comment in the BasePresenceHandler version
-            # of this function.
-            await self.set_state(
-                UserID.from_string(user_id),
-                {"presence": presence_state},
-                ignore_status_msg=True,
-            )
+        # Note that this causes last_active_ts to be incremented which is not
+        # what the spec wants.
+        await self.set_state(
+            UserID.from_string(user_id),
+            state={"presence": presence_state},
+            is_sync=True,
+        )
 
         curr_sync = self._user_to_num_current_syncs.get(user_id, 0)
         self._user_to_num_current_syncs[user_id] = curr_sync + 1
 
-        # If we went from no in flight sync to some, notify replication
+        # If this is the first in-flight sync, notify replication
         if self._user_to_num_current_syncs[user_id] == 1:
             self.mark_as_coming_online(user_id)
 
@@ -518,7 +513,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
             if user_id in self._user_to_num_current_syncs:
                 self._user_to_num_current_syncs[user_id] -= 1
 
-                # If we went from one in flight sync to non, notify replication
+                # If there are no more in-flight syncs, notify replication
                 if self._user_to_num_current_syncs[user_id] == 0:
                     self.mark_as_going_offline(user_id)
 
@@ -598,17 +593,19 @@ class WorkerPresenceHandler(BasePresenceHandler):
         self,
         target_user: UserID,
         state: JsonDict,
-        ignore_status_msg: bool = False,
         force_notify: bool = False,
+        is_sync: bool = False,
     ) -> None:
         """Set the presence state of the user.
 
         Args:
             target_user: The ID of the user to set the presence state of.
             state: The presence state as a JSON dictionary.
-            ignore_status_msg: True to ignore the "status_msg" field of the `state` dict.
-                If False, the user's current status will be updated.
             force_notify: Whether to force notification of the update to clients.
+            is_sync: True if this update was from a sync, which results in
+                *not* overriding a previously set BUSY status, updating the
+                user's last_user_sync_ts, and ignoring the "status_msg" field of
+                the `state` dict.
         """
         presence = state["presence"]
 
@@ -626,8 +623,8 @@ class WorkerPresenceHandler(BasePresenceHandler):
             instance_name=self._presence_writer_instance,
             user_id=user_id,
             state=state,
-            ignore_status_msg=ignore_status_msg,
             force_notify=force_notify,
+            is_sync=is_sync,
         )
 
     async def bump_presence_active_time(self, user: UserID) -> None:
@@ -992,45 +989,13 @@ class PresenceHandler(BasePresenceHandler):
         curr_sync = self.user_to_num_current_syncs.get(user_id, 0)
         self.user_to_num_current_syncs[user_id] = curr_sync + 1
 
-        prev_state = await self.current_state_for_user(user_id)
-
-        # If they're busy then they don't stop being busy just by syncing,
-        # so just update the last sync time.
-        if prev_state.state != PresenceState.BUSY:
-            # XXX: We set_state separately here and just update the last_active_ts above
-            # This keeps the logic as similar as possible between the worker and single
-            # process modes. Using set_state will actually cause last_active_ts to be
-            # updated always, which is not what the spec calls for, but synapse has done
-            # this for... forever, I think.
-            await self.set_state(
-                UserID.from_string(user_id),
-                {"presence": presence_state},
-                ignore_status_msg=True,
-            )
-            # Retrieve the new state for the logic below. This should come from the
-            # in-memory cache.
-            prev_state = await self.current_state_for_user(user_id)
-
-        # To keep the single process behaviour consistent with worker mode, run the
-        # same logic as `update_external_syncs_row`, even though it looks weird.
-        if prev_state.state == PresenceState.OFFLINE:
-            await self._update_states(
-                [
-                    prev_state.copy_and_replace(
-                        state=PresenceState.ONLINE,
-                        last_active_ts=self.clock.time_msec(),
-                        last_user_sync_ts=self.clock.time_msec(),
-                    )
-                ]
-            )
-        # otherwise, set the new presence state & update the last sync time,
-        # but don't update last_active_ts as this isn't an indication that
-        # they've been active (even though it's probably been updated by
-        # set_state above)
-        else:
-            await self._update_states(
-                [prev_state.copy_and_replace(last_user_sync_ts=self.clock.time_msec())]
-            )
+        # Note that this causes last_active_ts to be incremented which is not
+        # what the spec wants.
+        await self.set_state(
+            UserID.from_string(user_id),
+            state={"presence": presence_state},
+            is_sync=True,
+        )
 
         async def _end() -> None:
             try:
@@ -1080,32 +1045,27 @@ class PresenceHandler(BasePresenceHandler):
                 process_id, set()
             )
 
-            updates = []
+            # USER_SYNC is sent when a user starts or stops syncing on a remote
+            # process. (But only for the initial and last device.)
+            #
+            # When a user *starts* syncing it also calls set_state(...) which
+            # will update the state, last_active_ts, and last_user_sync_ts.
+            # Simply ensure the user is tracked as syncing in this case.
+            #
+            # When a user *stops* syncing, update the last_user_sync_ts and mark
+            # them as no longer syncing. Note this doesn't quite match the
+            # monolith behaviour, which updates last_user_sync_ts at the end of
+            # every sync, not just the last in-flight sync.
             if is_syncing and user_id not in process_presence:
-                if prev_state.state == PresenceState.OFFLINE:
-                    updates.append(
-                        prev_state.copy_and_replace(
-                            state=PresenceState.ONLINE,
-                            last_active_ts=sync_time_msec,
-                            last_user_sync_ts=sync_time_msec,
-                        )
-                    )
-                else:
-                    updates.append(
-                        prev_state.copy_and_replace(last_user_sync_ts=sync_time_msec)
-                    )
                 process_presence.add(user_id)
-            elif user_id in process_presence:
-                updates.append(
-                    prev_state.copy_and_replace(last_user_sync_ts=sync_time_msec)
+            elif not is_syncing and user_id in process_presence:
+                new_state = prev_state.copy_and_replace(
+                    last_user_sync_ts=sync_time_msec
                 )
+                await self._update_states([new_state])
 
-            if not is_syncing:
                 process_presence.discard(user_id)
 
-            if updates:
-                await self._update_states(updates)
-
             self.external_process_last_updated_ms[process_id] = self.clock.time_msec()
 
     async def update_external_syncs_clear(self, process_id: str) -> None:
@@ -1204,17 +1164,19 @@ class PresenceHandler(BasePresenceHandler):
         self,
         target_user: UserID,
         state: JsonDict,
-        ignore_status_msg: bool = False,
         force_notify: bool = False,
+        is_sync: bool = False,
     ) -> None:
         """Set the presence state of the user.
 
         Args:
             target_user: The ID of the user to set the presence state of.
             state: The presence state as a JSON dictionary.
-            ignore_status_msg: True to ignore the "status_msg" field of the `state` dict.
-                If False, the user's current status will be updated.
             force_notify: Whether to force notification of the update to clients.
+            is_sync: True if this update was from a sync, which results in
+                *not* overriding a previously set BUSY status, updating the
+                user's last_user_sync_ts, and ignoring the "status_msg" field of
+                the `state` dict.
         """
         status_msg = state.get("status_msg", None)
         presence = state["presence"]
@@ -1227,18 +1189,27 @@ class PresenceHandler(BasePresenceHandler):
             return
 
         user_id = target_user.to_string()
+        now = self.clock.time_msec()
 
         prev_state = await self.current_state_for_user(user_id)
 
+        # Syncs do not override a previous presence of busy.
+        #
+        # TODO: This is a hack for lack of multi-device support. Unfortunately
+        # removing this requires coordination with clients.
+        if prev_state.state == PresenceState.BUSY and is_sync:
+            presence = PresenceState.BUSY
+
         new_fields = {"state": presence}
 
-        if not ignore_status_msg:
-            new_fields["status_msg"] = status_msg
+        if presence == PresenceState.ONLINE or presence == PresenceState.BUSY:
+            new_fields["last_active_ts"] = now
 
-        if presence == PresenceState.ONLINE or (
-            presence == PresenceState.BUSY and self._busy_presence_enabled
-        ):
-            new_fields["last_active_ts"] = self.clock.time_msec()
+        if is_sync:
+            new_fields["last_user_sync_ts"] = now
+        else:
+            # Syncs do not override the status message.
+            new_fields["status_msg"] = status_msg
 
         await self._update_states(
             [prev_state.copy_and_replace(**new_fields)], force_notify=force_notify
diff --git a/synapse/replication/http/presence.py b/synapse/replication/http/presence.py
index db16aac9c2..a24fb9310b 100644
--- a/synapse/replication/http/presence.py
+++ b/synapse/replication/http/presence.py
@@ -73,8 +73,8 @@ class ReplicationPresenceSetState(ReplicationEndpoint):
 
         {
             "state": { ... },
-            "ignore_status_msg": false,
-            "force_notify": false
+            "force_notify": false,
+            "is_sync": false
         }
 
         200 OK
@@ -96,13 +96,13 @@ class ReplicationPresenceSetState(ReplicationEndpoint):
     async def _serialize_payload(  # type: ignore[override]
         user_id: str,
         state: JsonDict,
-        ignore_status_msg: bool = False,
         force_notify: bool = False,
+        is_sync: bool = False,
     ) -> JsonDict:
         return {
             "state": state,
-            "ignore_status_msg": ignore_status_msg,
             "force_notify": force_notify,
+            "is_sync": is_sync,
         }
 
     async def _handle_request(  # type: ignore[override]
@@ -111,8 +111,8 @@ class ReplicationPresenceSetState(ReplicationEndpoint):
         await self._presence_handler.set_state(
             UserID.from_string(user_id),
             content["state"],
-            content["ignore_status_msg"],
             content["force_notify"],
+            content.get("is_sync", False),
         )
 
         return (200, {})
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 1aebcc16ad..a3fdcf7f93 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -641,13 +641,20 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         """Test that if an external process doesn't update the records for a while
         we time out their syncing users presence.
         """
-        process_id = "1"
 
-        # Notify handler that a user is now syncing.
+        # Create a worker and use it to handle /sync traffic instead.
+        # This is used to test that presence changes get replicated from workers
+        # to the main process correctly.
+        worker_to_sync_against = self.make_worker_hs(
+            "synapse.app.generic_worker", {"worker_name": "synchrotron"}
+        )
+        worker_presence_handler = worker_to_sync_against.get_presence_handler()
+
         self.get_success(
-            self.presence_handler.update_external_syncs_row(
-                process_id, self.user_id, True, self.clock.time_msec()
-            )
+            worker_presence_handler.user_syncing(
+                self.user_id, True, PresenceState.ONLINE
+            ),
+            by=0.1,
         )
 
         # Check that if we wait a while without telling the handler the user has
@@ -820,7 +827,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
             # This is used to test that presence changes get replicated from workers
             # to the main process correctly.
             worker_to_sync_against = self.make_worker_hs(
-                "synapse.app.generic_worker", {"worker_name": "presence_writer"}
+                "synapse.app.generic_worker", {"worker_name": "synchrotron"}
             )
 
         # Set presence to BUSY
@@ -832,7 +839,8 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         self.get_success(
             worker_to_sync_against.get_presence_handler().user_syncing(
                 self.user_id, True, PresenceState.ONLINE
-            )
+            ),
+            by=0.1,
         )
 
         # Check against the main process that the user's presence did not change.
@@ -840,6 +848,21 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         # we should still be busy
         self.assertEqual(state.state, PresenceState.BUSY)
 
+        # Advance such that the device would be discarded if it was not busy,
+        # then pump so _handle_timeouts function to called.
+        self.reactor.advance(IDLE_TIMER / 1000)
+        self.reactor.pump([5])
+
+        # The account should still be busy.
+        state = self.get_success(self.presence_handler.get_state(self.user_id_obj))
+        self.assertEqual(state.state, PresenceState.BUSY)
+
+        # Ensure that a /presence call can set the user *off* busy.
+        self._set_presencestate_with_status_msg(PresenceState.ONLINE, status_msg)
+
+        state = self.get_success(self.presence_handler.get_state(self.user_id_obj))
+        self.assertEqual(state.state, PresenceState.ONLINE)
+
     def _set_presencestate_with_status_msg(
         self, state: str, status_msg: Optional[str]
     ) -> None:
-- 
cgit 1.5.1


From 40901af5e096cb10ab69141875b071b4ea4ed1e0 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 28 Aug 2023 13:08:49 -0400
Subject: Pass the device ID around in the presence handler (#16171)

Refactoring to pass the device ID (in addition to the user ID) through
the presence handler (specifically the `user_syncing`, `set_state`,
and `bump_presence_active_time` methods and their replication
versions).
---
 changelog.d/16171.misc               |  1 +
 synapse/handlers/events.py           |  1 +
 synapse/handlers/message.py          |  9 ++++---
 synapse/handlers/presence.py         | 46 +++++++++++++++++++++++++++++-------
 synapse/replication/http/presence.py | 11 +++++----
 synapse/rest/client/presence.py      |  2 +-
 synapse/rest/client/read_marker.py   |  4 +++-
 synapse/rest/client/receipts.py      |  4 +++-
 synapse/rest/client/room.py          |  4 +++-
 synapse/rest/client/sync.py          |  1 +
 tests/handlers/test_presence.py      | 38 ++++++++++++++++++++---------
 11 files changed, 91 insertions(+), 30 deletions(-)
 create mode 100644 changelog.d/16171.misc

(limited to 'synapse')

diff --git a/changelog.d/16171.misc b/changelog.d/16171.misc
new file mode 100644
index 0000000000..4d709cb56e
--- /dev/null
+++ b/changelog.d/16171.misc
@@ -0,0 +1 @@
+Track per-device information in the presence code.
diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py
index 33359f6ed7..d12803bf0f 100644
--- a/synapse/handlers/events.py
+++ b/synapse/handlers/events.py
@@ -67,6 +67,7 @@ class EventStreamHandler:
 
         context = await presence_handler.user_syncing(
             requester.user.to_string(),
+            requester.device_id,
             affect_presence=affect_presence,
             presence_state=PresenceState.ONLINE,
         )
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 3184bfb047..4a15c76a7b 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1921,7 +1921,10 @@ class EventCreationHandler:
                 # We don't want to block sending messages on any presence code. This
                 # matters as sometimes presence code can take a while.
                 run_as_background_process(
-                    "bump_presence_active_time", self._bump_active_time, requester.user
+                    "bump_presence_active_time",
+                    self._bump_active_time,
+                    requester.user,
+                    requester.device_id,
                 )
 
         async def _notify() -> None:
@@ -1958,10 +1961,10 @@ class EventCreationHandler:
         logger.info("maybe_kick_guest_users %r", current_state)
         await self.hs.get_room_member_handler().kick_guest_users(current_state)
 
-    async def _bump_active_time(self, user: UserID) -> None:
+    async def _bump_active_time(self, user: UserID, device_id: Optional[str]) -> None:
         try:
             presence = self.hs.get_presence_handler()
-            await presence.bump_presence_active_time(user)
+            await presence.bump_presence_active_time(user, device_id)
         except Exception:
             logger.exception("Error bumping presence active time")
 
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index c395dcdb43..50c68c86ce 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -165,7 +165,11 @@ class BasePresenceHandler(abc.ABC):
 
     @abc.abstractmethod
     async def user_syncing(
-        self, user_id: str, affect_presence: bool, presence_state: str
+        self,
+        user_id: str,
+        device_id: Optional[str],
+        affect_presence: bool,
+        presence_state: str,
     ) -> ContextManager[None]:
         """Returns a context manager that should surround any stream requests
         from the user.
@@ -176,6 +180,7 @@ class BasePresenceHandler(abc.ABC):
 
         Args:
             user_id: the user that is starting a sync
+            device_id: the user's device that is starting a sync
             affect_presence: If false this function will be a no-op.
                 Useful for streams that are not associated with an actual
                 client that is being used by a user.
@@ -252,6 +257,7 @@ class BasePresenceHandler(abc.ABC):
     async def set_state(
         self,
         target_user: UserID,
+        device_id: Optional[str],
         state: JsonDict,
         force_notify: bool = False,
         is_sync: bool = False,
@@ -260,6 +266,7 @@ class BasePresenceHandler(abc.ABC):
 
         Args:
             target_user: The ID of the user to set the presence state of.
+            device_id: the device that the user is setting the presence state of.
             state: The presence state as a JSON dictionary.
             force_notify: Whether to force notification of the update to clients.
             is_sync: True if this update was from a sync, which results in
@@ -269,7 +276,9 @@ class BasePresenceHandler(abc.ABC):
         """
 
     @abc.abstractmethod
-    async def bump_presence_active_time(self, user: UserID) -> None:
+    async def bump_presence_active_time(
+        self, user: UserID, device_id: Optional[str]
+    ) -> None:
         """We've seen the user do something that indicates they're interacting
         with the app.
         """
@@ -381,7 +390,9 @@ class BasePresenceHandler(abc.ABC):
         # We set force_notify=True here so that this presence update is guaranteed to
         # increment the presence stream ID (which resending the current user's presence
         # otherwise would not do).
-        await self.set_state(UserID.from_string(user_id), state, force_notify=True)
+        await self.set_state(
+            UserID.from_string(user_id), None, state, force_notify=True
+        )
 
     async def is_visible(self, observed_user: UserID, observer_user: UserID) -> bool:
         raise NotImplementedError(
@@ -481,7 +492,11 @@ class WorkerPresenceHandler(BasePresenceHandler):
                 self.send_user_sync(user_id, False, last_sync_ms)
 
     async def user_syncing(
-        self, user_id: str, affect_presence: bool, presence_state: str
+        self,
+        user_id: str,
+        device_id: Optional[str],
+        affect_presence: bool,
+        presence_state: str,
     ) -> ContextManager[None]:
         """Record that a user is syncing.
 
@@ -495,6 +510,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
         # what the spec wants.
         await self.set_state(
             UserID.from_string(user_id),
+            device_id,
             state={"presence": presence_state},
             is_sync=True,
         )
@@ -592,6 +608,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
     async def set_state(
         self,
         target_user: UserID,
+        device_id: Optional[str],
         state: JsonDict,
         force_notify: bool = False,
         is_sync: bool = False,
@@ -600,6 +617,7 @@ class WorkerPresenceHandler(BasePresenceHandler):
 
         Args:
             target_user: The ID of the user to set the presence state of.
+            device_id: the device that the user is setting the presence state of.
             state: The presence state as a JSON dictionary.
             force_notify: Whether to force notification of the update to clients.
             is_sync: True if this update was from a sync, which results in
@@ -622,12 +640,15 @@ class WorkerPresenceHandler(BasePresenceHandler):
         await self._set_state_client(
             instance_name=self._presence_writer_instance,
             user_id=user_id,
+            device_id=device_id,
             state=state,
             force_notify=force_notify,
             is_sync=is_sync,
         )
 
-    async def bump_presence_active_time(self, user: UserID) -> None:
+    async def bump_presence_active_time(
+        self, user: UserID, device_id: Optional[str]
+    ) -> None:
         """We've seen the user do something that indicates they're interacting
         with the app.
         """
@@ -638,7 +659,9 @@ class WorkerPresenceHandler(BasePresenceHandler):
         # Proxy request to instance that writes presence
         user_id = user.to_string()
         await self._bump_active_client(
-            instance_name=self._presence_writer_instance, user_id=user_id
+            instance_name=self._presence_writer_instance,
+            user_id=user_id,
+            device_id=device_id,
         )
 
 
@@ -943,7 +966,9 @@ class PresenceHandler(BasePresenceHandler):
 
         return await self._update_states(changes)
 
-    async def bump_presence_active_time(self, user: UserID) -> None:
+    async def bump_presence_active_time(
+        self, user: UserID, device_id: Optional[str]
+    ) -> None:
         """We've seen the user do something that indicates they're interacting
         with the app.
         """
@@ -966,6 +991,7 @@ class PresenceHandler(BasePresenceHandler):
     async def user_syncing(
         self,
         user_id: str,
+        device_id: Optional[str],
         affect_presence: bool = True,
         presence_state: str = PresenceState.ONLINE,
     ) -> ContextManager[None]:
@@ -977,7 +1003,8 @@ class PresenceHandler(BasePresenceHandler):
         when users disconnect/reconnect.
 
         Args:
-            user_id
+            user_id: the user that is starting a sync
+            device_id: the user's device that is starting a sync
             affect_presence: If false this function will be a no-op.
                 Useful for streams that are not associated with an actual
                 client that is being used by a user.
@@ -993,6 +1020,7 @@ class PresenceHandler(BasePresenceHandler):
         # what the spec wants.
         await self.set_state(
             UserID.from_string(user_id),
+            device_id,
             state={"presence": presence_state},
             is_sync=True,
         )
@@ -1163,6 +1191,7 @@ class PresenceHandler(BasePresenceHandler):
     async def set_state(
         self,
         target_user: UserID,
+        device_id: Optional[str],
         state: JsonDict,
         force_notify: bool = False,
         is_sync: bool = False,
@@ -1171,6 +1200,7 @@ class PresenceHandler(BasePresenceHandler):
 
         Args:
             target_user: The ID of the user to set the presence state of.
+            device_id: the device that the user is setting the presence state of.
             state: The presence state as a JSON dictionary.
             force_notify: Whether to force notification of the update to clients.
             is_sync: True if this update was from a sync, which results in
diff --git a/synapse/replication/http/presence.py b/synapse/replication/http/presence.py
index a24fb9310b..6c9e79fb07 100644
--- a/synapse/replication/http/presence.py
+++ b/synapse/replication/http/presence.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, Optional, Tuple
 
 from twisted.web.server import Request
 
@@ -51,14 +51,14 @@ class ReplicationBumpPresenceActiveTime(ReplicationEndpoint):
         self._presence_handler = hs.get_presence_handler()
 
     @staticmethod
-    async def _serialize_payload(user_id: str) -> JsonDict:  # type: ignore[override]
-        return {}
+    async def _serialize_payload(user_id: str, device_id: Optional[str]) -> JsonDict:  # type: ignore[override]
+        return {"device_id": device_id}
 
     async def _handle_request(  # type: ignore[override]
         self, request: Request, content: JsonDict, user_id: str
     ) -> Tuple[int, JsonDict]:
         await self._presence_handler.bump_presence_active_time(
-            UserID.from_string(user_id)
+            UserID.from_string(user_id), content.get("device_id")
         )
 
         return (200, {})
@@ -95,11 +95,13 @@ class ReplicationPresenceSetState(ReplicationEndpoint):
     @staticmethod
     async def _serialize_payload(  # type: ignore[override]
         user_id: str,
+        device_id: Optional[str],
         state: JsonDict,
         force_notify: bool = False,
         is_sync: bool = False,
     ) -> JsonDict:
         return {
+            "device_id": device_id,
             "state": state,
             "force_notify": force_notify,
             "is_sync": is_sync,
@@ -110,6 +112,7 @@ class ReplicationPresenceSetState(ReplicationEndpoint):
     ) -> Tuple[int, JsonDict]:
         await self._presence_handler.set_state(
             UserID.from_string(user_id),
+            content.get("device_id"),
             content["state"],
             content["force_notify"],
             content.get("is_sync", False),
diff --git a/synapse/rest/client/presence.py b/synapse/rest/client/presence.py
index 8e193330f8..d578faa969 100644
--- a/synapse/rest/client/presence.py
+++ b/synapse/rest/client/presence.py
@@ -97,7 +97,7 @@ class PresenceStatusRestServlet(RestServlet):
             raise SynapseError(400, "Unable to parse state")
 
         if self._use_presence:
-            await self.presence_handler.set_state(user, state)
+            await self.presence_handler.set_state(user, requester.device_id, state)
 
         return 200, {}
 
diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py
index 4f96e51eeb..1707e51972 100644
--- a/synapse/rest/client/read_marker.py
+++ b/synapse/rest/client/read_marker.py
@@ -52,7 +52,9 @@ class ReadMarkerRestServlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
         requester = await self.auth.get_user_by_req(request)
 
-        await self.presence_handler.bump_presence_active_time(requester.user)
+        await self.presence_handler.bump_presence_active_time(
+            requester.user, requester.device_id
+        )
 
         body = parse_json_object_from_request(request)
 
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 316e7b9982..869a374459 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -94,7 +94,9 @@ class ReceiptRestServlet(RestServlet):
                     Codes.INVALID_PARAM,
                 )
 
-        await self.presence_handler.bump_presence_active_time(requester.user)
+        await self.presence_handler.bump_presence_active_time(
+            requester.user, requester.device_id
+        )
 
         if receipt_type == ReceiptTypes.FULLY_READ:
             await self.read_marker_handler.received_client_read_marker(
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index dc498001e4..553938ce9d 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -1229,7 +1229,9 @@ class RoomTypingRestServlet(RestServlet):
 
         content = parse_json_object_from_request(request)
 
-        await self.presence_handler.bump_presence_active_time(requester.user)
+        await self.presence_handler.bump_presence_active_time(
+            requester.user, requester.device_id
+        )
 
         # Limit timeout to stop people from setting silly typing timeouts.
         timeout = min(content.get("timeout", 30000), 120000)
diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py
index d7854ed4fd..42bdd3bb10 100644
--- a/synapse/rest/client/sync.py
+++ b/synapse/rest/client/sync.py
@@ -205,6 +205,7 @@ class SyncRestServlet(RestServlet):
 
         context = await self.presence_handler.user_syncing(
             user.to_string(),
+            requester.device_id,
             affect_presence=affect_presence,
             presence_state=set_presence,
         )
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index a3fdcf7f93..a987267308 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -524,6 +524,7 @@ class PresenceHandlerInitTestCase(unittest.HomeserverTestCase):
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.user_id = f"@test:{self.hs.config.server.server_name}"
+        self.device_id = "dev-1"
 
         # Move the reactor to the initial time.
         self.reactor.advance(1000)
@@ -608,7 +609,10 @@ class PresenceHandlerInitTestCase(unittest.HomeserverTestCase):
         self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000 / 2)
         self.get_success(
             presence_handler.user_syncing(
-                self.user_id, sync_state != PresenceState.OFFLINE, sync_state
+                self.user_id,
+                self.device_id,
+                sync_state != PresenceState.OFFLINE,
+                sync_state,
             )
         )
 
@@ -632,6 +636,7 @@ class PresenceHandlerInitTestCase(unittest.HomeserverTestCase):
 class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
     user_id = "@test:server"
     user_id_obj = UserID.from_string(user_id)
+    device_id = "dev-1"
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.presence_handler = hs.get_presence_handler()
@@ -652,7 +657,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
 
         self.get_success(
             worker_presence_handler.user_syncing(
-                self.user_id, True, PresenceState.ONLINE
+                self.user_id, self.device_id, True, PresenceState.ONLINE
             ),
             by=0.1,
         )
@@ -708,7 +713,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         # Mark user as offline
         self.get_success(
             self.presence_handler.set_state(
-                self.user_id_obj, {"presence": PresenceState.OFFLINE}
+                self.user_id_obj, self.device_id, {"presence": PresenceState.OFFLINE}
             )
         )
 
@@ -740,7 +745,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         # Mark user as online again
         self.get_success(
             self.presence_handler.set_state(
-                self.user_id_obj, {"presence": PresenceState.ONLINE}
+                self.user_id_obj, self.device_id, {"presence": PresenceState.ONLINE}
             )
         )
 
@@ -769,7 +774,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
 
         self.get_success(
             self.presence_handler.user_syncing(
-                self.user_id, False, PresenceState.ONLINE
+                self.user_id, self.device_id, False, PresenceState.ONLINE
             )
         )
 
@@ -786,7 +791,9 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         self._set_presencestate_with_status_msg(PresenceState.UNAVAILABLE, status_msg)
 
         self.get_success(
-            self.presence_handler.user_syncing(self.user_id, True, PresenceState.ONLINE)
+            self.presence_handler.user_syncing(
+                self.user_id, self.device_id, True, PresenceState.ONLINE
+            )
         )
 
         state = self.get_success(self.presence_handler.get_state(self.user_id_obj))
@@ -800,7 +807,9 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         self._set_presencestate_with_status_msg(PresenceState.UNAVAILABLE, status_msg)
 
         self.get_success(
-            self.presence_handler.user_syncing(self.user_id, True, PresenceState.ONLINE)
+            self.presence_handler.user_syncing(
+                self.user_id, self.device_id, True, PresenceState.ONLINE
+            )
         )
 
         state = self.get_success(self.presence_handler.get_state(self.user_id_obj))
@@ -838,7 +847,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         # /presence/*.
         self.get_success(
             worker_to_sync_against.get_presence_handler().user_syncing(
-                self.user_id, True, PresenceState.ONLINE
+                self.user_id, self.device_id, True, PresenceState.ONLINE
             ),
             by=0.1,
         )
@@ -875,6 +884,7 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         self.get_success(
             self.presence_handler.set_state(
                 self.user_id_obj,
+                self.device_id,
                 {"presence": state, "status_msg": status_msg},
             )
         )
@@ -1116,7 +1126,9 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
         # Mark test2 as online, test will be offline with a last_active of 0
         self.get_success(
             self.presence_handler.set_state(
-                UserID.from_string("@test2:server"), {"presence": PresenceState.ONLINE}
+                UserID.from_string("@test2:server"),
+                "dev-1",
+                {"presence": PresenceState.ONLINE},
             )
         )
         self.reactor.pump([0])  # Wait for presence updates to be handled
@@ -1163,7 +1175,9 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
         # Mark test as online
         self.get_success(
             self.presence_handler.set_state(
-                UserID.from_string("@test:server"), {"presence": PresenceState.ONLINE}
+                UserID.from_string("@test:server"),
+                "dev-1",
+                {"presence": PresenceState.ONLINE},
             )
         )
 
@@ -1171,7 +1185,9 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
         # Note we don't join them to the room yet
         self.get_success(
             self.presence_handler.set_state(
-                UserID.from_string("@test2:server"), {"presence": PresenceState.ONLINE}
+                UserID.from_string("@test2:server"),
+                "dev-1",
+                {"presence": PresenceState.ONLINE},
             )
         )
 
-- 
cgit 1.5.1


From 692ee2af190a82f2484427d0be773a0ff5282be1 Mon Sep 17 00:00:00 2001
From: Chen Zhang <c.expecto.patronum@gmail.com>
Date: Tue, 29 Aug 2023 02:37:09 -0700
Subject: Fix inaccurate error message while trying to ban or unban a user with
 the same or higher PL (#16205)

---
 changelog.d/16205.bugfix | 1 +
 synapse/event_auth.py    | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16205.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16205.bugfix b/changelog.d/16205.bugfix
new file mode 100644
index 0000000000..97ac92a148
--- /dev/null
+++ b/changelog.d/16205.bugfix
@@ -0,0 +1 @@
+Fix inaccurate error message while attempting to ban or unban a user with the same or higher PL by spliting the conditional statements. Contributed by @leviosacz.
\ No newline at end of file
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 3a260a492b..531bb74f07 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -669,12 +669,18 @@ def _is_membership_change_allowed(
                     errcode=Codes.INSUFFICIENT_POWER,
                 )
     elif Membership.BAN == membership:
-        if user_level < ban_level or user_level <= target_level:
+        if user_level < ban_level:
             raise UnstableSpecAuthError(
                 403,
                 "You don't have permission to ban",
                 errcode=Codes.INSUFFICIENT_POWER,
             )
+        elif user_level <= target_level:
+            raise UnstableSpecAuthError(
+                403,
+                "You don't have permission to ban this user",
+                errcode=Codes.INSUFFICIENT_POWER,
+            )
     elif room_version.knock_join_rule and Membership.KNOCK == membership:
         if join_rule != JoinRules.KNOCK and (
             not room_version.knock_restricted_join_rule
-- 
cgit 1.5.1


From 63b51ef3fbb548cdc7899720f8c40bb65756f655 Mon Sep 17 00:00:00 2001
From: Evilham <github@evilham.com>
Date: Tue, 29 Aug 2023 15:33:58 +0200
Subject: Support IPv6-only SMTP servers (#16155)

Use Twisted HostnameEndpoint to connect to SMTP servers (instead
of connectTCP/connectSSL) which properly supports IPv6-only servers.
---
 changelog.d/16155.bugfix          |  1 +
 synapse/handlers/send_email.py    | 28 +++++++---------
 tests/handlers/test_send_email.py | 69 +++++++++++++++++++++++++++++++++------
 tests/server.py                   | 54 +++++++++++++++++++++++++++++-
 tests/unittest.py                 |  2 +-
 5 files changed, 125 insertions(+), 29 deletions(-)
 create mode 100644 changelog.d/16155.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16155.bugfix b/changelog.d/16155.bugfix
new file mode 100644
index 0000000000..8b2dc04006
--- /dev/null
+++ b/changelog.d/16155.bugfix
@@ -0,0 +1 @@
+Fix IPv6-related bugs on SMTP settings, adding groundwork to fix similar issues. Contributed by @evilham and @telmich (ungleich.ch).
diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py
index 804cc6e81e..05e21509de 100644
--- a/synapse/handlers/send_email.py
+++ b/synapse/handlers/send_email.py
@@ -23,9 +23,11 @@ from pkg_resources import parse_version
 
 import twisted
 from twisted.internet.defer import Deferred
-from twisted.internet.interfaces import IOpenSSLContextFactory
+from twisted.internet.endpoints import HostnameEndpoint
+from twisted.internet.interfaces import IOpenSSLContextFactory, IProtocolFactory
 from twisted.internet.ssl import optionsForClientTLS
 from twisted.mail.smtp import ESMTPSender, ESMTPSenderFactory
+from twisted.protocols.tls import TLSMemoryBIOFactory
 
 from synapse.logging.context import make_deferred_yieldable
 from synapse.types import ISynapseReactor
@@ -97,6 +99,7 @@ async def _sendmail(
             **kwargs,
         )
 
+    factory: IProtocolFactory
     if _is_old_twisted:
         # before twisted 21.2, we have to override the ESMTPSender protocol to disable
         # TLS
@@ -110,22 +113,13 @@ async def _sendmail(
         factory = build_sender_factory(hostname=smtphost if enable_tls else None)
 
     if force_tls:
-        reactor.connectSSL(
-            smtphost,
-            smtpport,
-            factory,
-            optionsForClientTLS(smtphost),
-            timeout=30,
-            bindAddress=None,
-        )
-    else:
-        reactor.connectTCP(
-            smtphost,
-            smtpport,
-            factory,
-            timeout=30,
-            bindAddress=None,
-        )
+        factory = TLSMemoryBIOFactory(optionsForClientTLS(smtphost), True, factory)
+
+    endpoint = HostnameEndpoint(
+        reactor, smtphost, smtpport, timeout=30, bindAddress=None
+    )
+
+    await make_deferred_yieldable(endpoint.connect(factory))
 
     await make_deferred_yieldable(d)
 
diff --git a/tests/handlers/test_send_email.py b/tests/handlers/test_send_email.py
index 8b6e4a40b6..a066745d70 100644
--- a/tests/handlers/test_send_email.py
+++ b/tests/handlers/test_send_email.py
@@ -13,19 +13,40 @@
 # limitations under the License.
 
 
-from typing import Callable, List, Tuple
+from typing import Callable, List, Tuple, Type, Union
+from unittest.mock import patch
 
 from zope.interface import implementer
 
 from twisted.internet import defer
-from twisted.internet.address import IPv4Address
+from twisted.internet._sslverify import ClientTLSOptions
+from twisted.internet.address import IPv4Address, IPv6Address
 from twisted.internet.defer import ensureDeferred
+from twisted.internet.interfaces import IProtocolFactory
+from twisted.internet.ssl import ContextFactory
 from twisted.mail import interfaces, smtp
 
 from tests.server import FakeTransport
 from tests.unittest import HomeserverTestCase, override_config
 
 
+def TestingESMTPTLSClientFactory(
+    contextFactory: ContextFactory,
+    _connectWrapped: bool,
+    wrappedProtocol: IProtocolFactory,
+) -> IProtocolFactory:
+    """We use this to pass through in testing without using TLS, but
+    saving the context information to check that it would have happened.
+
+    Note that this is what the MemoryReactor does on connectSSL.
+    It only saves the contextFactory, but starts the connection with the
+    underlying Factory.
+    See: L{twisted.internet.testing.MemoryReactor.connectSSL}"""
+
+    wrappedProtocol._testingContextFactory = contextFactory  # type: ignore[attr-defined]
+    return wrappedProtocol
+
+
 @implementer(interfaces.IMessageDelivery)
 class _DummyMessageDelivery:
     def __init__(self) -> None:
@@ -75,7 +96,13 @@ class _DummyMessage:
         pass
 
 
-class SendEmailHandlerTestCase(HomeserverTestCase):
+class SendEmailHandlerTestCaseIPv4(HomeserverTestCase):
+    ip_class: Union[Type[IPv4Address], Type[IPv6Address]] = IPv4Address
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.reactor.lookups["localhost"] = "127.0.0.1"
+
     def test_send_email(self) -> None:
         """Happy-path test that we can send email to a non-TLS server."""
         h = self.hs.get_send_email_handler()
@@ -89,7 +116,7 @@ class SendEmailHandlerTestCase(HomeserverTestCase):
         (host, port, client_factory, _timeout, _bindAddress) = self.reactor.tcpClients[
             0
         ]
-        self.assertEqual(host, "localhost")
+        self.assertEqual(host, self.reactor.lookups["localhost"])
         self.assertEqual(port, 25)
 
         # wire it up to an SMTP server
@@ -105,7 +132,9 @@ class SendEmailHandlerTestCase(HomeserverTestCase):
             FakeTransport(
                 client_protocol,
                 self.reactor,
-                peer_address=IPv4Address("TCP", "127.0.0.1", 1234),
+                peer_address=self.ip_class(
+                    "TCP", self.reactor.lookups["localhost"], 1234
+                ),
             )
         )
 
@@ -118,6 +147,10 @@ class SendEmailHandlerTestCase(HomeserverTestCase):
         self.assertEqual(str(user), "foo@bar.com")
         self.assertIn(b"Subject: test subject", msg)
 
+    @patch(
+        "synapse.handlers.send_email.TLSMemoryBIOFactory",
+        TestingESMTPTLSClientFactory,
+    )
     @override_config(
         {
             "email": {
@@ -135,17 +168,23 @@ class SendEmailHandlerTestCase(HomeserverTestCase):
             )
         )
         # there should be an attempt to connect to localhost:465
-        self.assertEqual(len(self.reactor.sslClients), 1)
+        self.assertEqual(len(self.reactor.tcpClients), 1)
         (
             host,
             port,
             client_factory,
-            contextFactory,
             _timeout,
             _bindAddress,
-        ) = self.reactor.sslClients[0]
-        self.assertEqual(host, "localhost")
+        ) = self.reactor.tcpClients[0]
+        self.assertEqual(host, self.reactor.lookups["localhost"])
         self.assertEqual(port, 465)
+        # We need to make sure that TLS is happenning
+        self.assertIsInstance(
+            client_factory._wrappedFactory._testingContextFactory,
+            ClientTLSOptions,
+        )
+        # And since we use endpoints, they go through reactor.connectTCP
+        # which works differently to connectSSL on the testing reactor
 
         # wire it up to an SMTP server
         message_delivery = _DummyMessageDelivery()
@@ -160,7 +199,9 @@ class SendEmailHandlerTestCase(HomeserverTestCase):
             FakeTransport(
                 client_protocol,
                 self.reactor,
-                peer_address=IPv4Address("TCP", "127.0.0.1", 1234),
+                peer_address=self.ip_class(
+                    "TCP", self.reactor.lookups["localhost"], 1234
+                ),
             )
         )
 
@@ -172,3 +213,11 @@ class SendEmailHandlerTestCase(HomeserverTestCase):
         user, msg = message_delivery.messages.pop()
         self.assertEqual(str(user), "foo@bar.com")
         self.assertIn(b"Subject: test subject", msg)
+
+
+class SendEmailHandlerTestCaseIPv6(SendEmailHandlerTestCaseIPv4):
+    ip_class = IPv6Address
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.reactor.lookups["localhost"] = "::1"
diff --git a/tests/server.py b/tests/server.py
index ff03d28864..659ccce838 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import hashlib
+import ipaddress
 import json
 import logging
 import os
@@ -45,7 +46,7 @@ import attr
 from typing_extensions import ParamSpec
 from zope.interface import implementer
 
-from twisted.internet import address, threads, udp
+from twisted.internet import address, tcp, threads, udp
 from twisted.internet._resolver import SimpleResolverComplexifier
 from twisted.internet.defer import Deferred, fail, maybeDeferred, succeed
 from twisted.internet.error import DNSLookupError
@@ -567,6 +568,8 @@ class ThreadedMemoryReactorClock(MemoryReactorClock):
         conn = super().connectTCP(
             host, port, factory, timeout=timeout, bindAddress=None
         )
+        if self.lookups and host in self.lookups:
+            validate_connector(conn, self.lookups[host])
 
         callback = self._tcp_callbacks.get((host, port))
         if callback:
@@ -599,6 +602,55 @@ class ThreadedMemoryReactorClock(MemoryReactorClock):
             super().advance(0)
 
 
+def validate_connector(connector: tcp.Connector, expected_ip: str) -> None:
+    """Try to validate the obtained connector as it would happen when
+    synapse is running and the conection will be established.
+
+    This method will raise a useful exception when necessary, else it will
+    just do nothing.
+
+    This is in order to help catch quirks related to reactor.connectTCP,
+    since when called directly, the connector's destination will be of type
+    IPv4Address, with the hostname as the literal host that was given (which
+    could be an IPv6-only host or an IPv6 literal).
+
+    But when called from reactor.connectTCP *through* e.g. an Endpoint, the
+    connector's destination will contain the specific IP address with the
+    correct network stack class.
+
+    Note that testing code paths that use connectTCP directly should not be
+    affected by this check, unless they specifically add a test with a
+    matching reactor.lookups[HOSTNAME] = "IPv6Literal", where reactor is of
+    type ThreadedMemoryReactorClock.
+    For an example of implementing such tests, see test/handlers/send_email.py.
+    """
+    destination = connector.getDestination()
+
+    # We use address.IPv{4,6}Address to check what the reactor thinks it is
+    # is sending but check for validity with ipaddress.IPv{4,6}Address
+    # because they fail with IPs on the wrong network stack.
+    cls_mapping = {
+        address.IPv4Address: ipaddress.IPv4Address,
+        address.IPv6Address: ipaddress.IPv6Address,
+    }
+
+    cls = cls_mapping.get(destination.__class__)
+
+    if cls is not None:
+        try:
+            cls(expected_ip)
+        except Exception as exc:
+            raise ValueError(
+                "Invalid IP type and resolution for %s. Expected %s to be %s"
+                % (destination, expected_ip, cls.__name__)
+            ) from exc
+    else:
+        raise ValueError(
+            "Unknown address type %s for %s"
+            % (destination.__class__.__name__, destination)
+        )
+
+
 class ThreadPool:
     """
     Threadless thread pool.
diff --git a/tests/unittest.py b/tests/unittest.py
index b0721e060c..40672a4415 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -313,7 +313,7 @@ class HomeserverTestCase(TestCase):
         servlets: List of servlet registration function.
         user_id (str): The user ID to assume if auth is hijacked.
         hijack_auth: Whether to hijack auth to return the user specified
-        in user_id.
+           in user_id.
     """
 
     hijack_auth: ClassVar[bool] = True
-- 
cgit 1.5.1


From 001fc7bd199b335f628908a0c91e44967cef2c2b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 29 Aug 2023 09:41:43 -0400
Subject: Bump ruff from 0.0.277 to 0.0.286 (#16198)

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock                               | 38 +++++++++++++++----------------
 pyproject.toml                            |  2 +-
 synapse/config/_base.py                   |  8 +++----
 synapse/config/appservice.py              |  2 +-
 synapse/event_auth.py                     |  4 ++--
 synapse/events/utils.py                   |  4 ++--
 synapse/events/validator.py               |  4 ++--
 synapse/federation/federation_base.py     |  2 +-
 synapse/federation/federation_client.py   |  2 +-
 synapse/handlers/message.py               |  2 +-
 synapse/http/matrixfederationclient.py    |  2 +-
 synapse/media/oembed.py                   |  2 +-
 synapse/media/thumbnailer.py              |  2 +-
 synapse/push/bulk_push_rule_evaluator.py  |  8 +++++--
 synapse/rest/admin/__init__.py            |  2 +-
 synapse/rest/admin/registration_tokens.py | 21 +++++++++++------
 synapse/rest/admin/users.py               |  7 ++++--
 synapse/rest/client/report_event.py       |  2 +-
 synapse/storage/databases/main/events.py  |  6 ++---
 19 files changed, 67 insertions(+), 53 deletions(-)

(limited to 'synapse')

diff --git a/poetry.lock b/poetry.lock
index 70b443069c..1d37c88328 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2324,28 +2324,28 @@ files = [
 
 [[package]]
 name = "ruff"
-version = "0.0.277"
+version = "0.0.286"
 description = "An extremely fast Python linter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.0.277-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:3250b24333ef419b7a232080d9724ccc4d2da1dbbe4ce85c4caa2290d83200f8"},
-    {file = "ruff-0.0.277-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:3e60605e07482183ba1c1b7237eca827bd6cbd3535fe8a4ede28cbe2a323cb97"},
-    {file = "ruff-0.0.277-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7baa97c3d7186e5ed4d5d4f6834d759a27e56cf7d5874b98c507335f0ad5aadb"},
-    {file = "ruff-0.0.277-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:74e4b206cb24f2e98a615f87dbe0bde18105217cbcc8eb785bb05a644855ba50"},
-    {file = "ruff-0.0.277-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:479864a3ccd8a6a20a37a6e7577bdc2406868ee80b1e65605478ad3b8eb2ba0b"},
-    {file = "ruff-0.0.277-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:468bfb0a7567443cec3d03cf408d6f562b52f30c3c29df19927f1e0e13a40cd7"},
-    {file = "ruff-0.0.277-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f32ec416c24542ca2f9cc8c8b65b84560530d338aaf247a4a78e74b99cd476b4"},
-    {file = "ruff-0.0.277-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:14a7b2f00f149c5a295f188a643ac25226ff8a4d08f7a62b1d4b0a1dc9f9b85c"},
-    {file = "ruff-0.0.277-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9879f59f763cc5628aa01c31ad256a0f4dc61a29355c7315b83c2a5aac932b5"},
-    {file = "ruff-0.0.277-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f612e0a14b3d145d90eb6ead990064e22f6f27281d847237560b4e10bf2251f3"},
-    {file = "ruff-0.0.277-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:323b674c98078be9aaded5b8b51c0d9c424486566fb6ec18439b496ce79e5998"},
-    {file = "ruff-0.0.277-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3a43fbe026ca1a2a8c45aa0d600a0116bec4dfa6f8bf0c3b871ecda51ef2b5dd"},
-    {file = "ruff-0.0.277-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:734165ea8feb81b0d53e3bf523adc2413fdb76f1264cde99555161dd5a725522"},
-    {file = "ruff-0.0.277-py3-none-win32.whl", hash = "sha256:88d0f2afb2e0c26ac1120e7061ddda2a566196ec4007bd66d558f13b374b9efc"},
-    {file = "ruff-0.0.277-py3-none-win_amd64.whl", hash = "sha256:6fe81732f788894a00f6ade1fe69e996cc9e485b7c35b0f53fb00284397284b2"},
-    {file = "ruff-0.0.277-py3-none-win_arm64.whl", hash = "sha256:2d4444c60f2e705c14cd802b55cd2b561d25bf4311702c463a002392d3116b22"},
-    {file = "ruff-0.0.277.tar.gz", hash = "sha256:2dab13cdedbf3af6d4427c07f47143746b6b95d9e4a254ac369a0edb9280a0d2"},
+    {file = "ruff-0.0.286-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:8e22cb557e7395893490e7f9cfea1073d19a5b1dd337f44fd81359b2767da4e9"},
+    {file = "ruff-0.0.286-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:68ed8c99c883ae79a9133cb1a86d7130feee0397fdf5ba385abf2d53e178d3fa"},
+    {file = "ruff-0.0.286-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8301f0bb4ec1a5b29cfaf15b83565136c47abefb771603241af9d6038f8981e8"},
+    {file = "ruff-0.0.286-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:acc4598f810bbc465ce0ed84417ac687e392c993a84c7eaf3abf97638701c1ec"},
+    {file = "ruff-0.0.286-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88c8e358b445eb66d47164fa38541cfcc267847d1e7a92dd186dddb1a0a9a17f"},
+    {file = "ruff-0.0.286-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:0433683d0c5dbcf6162a4beb2356e820a593243f1fa714072fec15e2e4f4c939"},
+    {file = "ruff-0.0.286-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddb61a0c4454cbe4623f4a07fef03c5ae921fe04fede8d15c6e36703c0a73b07"},
+    {file = "ruff-0.0.286-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:47549c7c0be24c8ae9f2bce6f1c49fbafea83bca80142d118306f08ec7414041"},
+    {file = "ruff-0.0.286-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:559aa793149ac23dc4310f94f2c83209eedb16908a0343663be19bec42233d25"},
+    {file = "ruff-0.0.286-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d73cfb1c3352e7aa0ce6fb2321f36fa1d4a2c48d2ceac694cb03611ddf0e4db6"},
+    {file = "ruff-0.0.286-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:3dad93b1f973c6d1db4b6a5da8690c5625a3fa32bdf38e543a6936e634b83dc3"},
+    {file = "ruff-0.0.286-py3-none-musllinux_1_2_i686.whl", hash = "sha256:26afc0851f4fc3738afcf30f5f8b8612a31ac3455cb76e611deea80f5c0bf3ce"},
+    {file = "ruff-0.0.286-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:9b6b116d1c4000de1b9bf027131dbc3b8a70507788f794c6b09509d28952c512"},
+    {file = "ruff-0.0.286-py3-none-win32.whl", hash = "sha256:556e965ac07c1e8c1c2d759ac512e526ecff62c00fde1a046acb088d3cbc1a6c"},
+    {file = "ruff-0.0.286-py3-none-win_amd64.whl", hash = "sha256:5d295c758961376c84aaa92d16e643d110be32add7465e197bfdaec5a431a107"},
+    {file = "ruff-0.0.286-py3-none-win_arm64.whl", hash = "sha256:1d6142d53ab7f164204b3133d053c4958d4d11ec3a39abf23a40b13b0784e3f0"},
+    {file = "ruff-0.0.286.tar.gz", hash = "sha256:f1e9d169cce81a384a26ee5bb8c919fe9ae88255f39a1a69fd1ebab233a85ed2"},
 ]
 
 [[package]]
@@ -3339,4 +3339,4 @@ user-search = ["pyicu"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8.0"
-content-hash = "0a8c6605e7e1d0ac7188a5d02b47a029bfb0f917458b87cb40755911442383d8"
+content-hash = "87163d8994d09d3a7983ff647a9987d4277a3966dee48741437f4e98bca7e6db"
diff --git a/pyproject.toml b/pyproject.toml
index 499dd9532d..2f1277ab52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -312,7 +312,7 @@ all = [
 # This helps prevents merge conflicts when running a batch of dependabot updates.
 isort = ">=5.10.1"
 black = ">=22.7.0"
-ruff = "0.0.277"
+ruff = "0.0.286"
 
 # Typechecking
 lxml-stubs = ">=0.4.0"
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 1d268a1817..69a8318127 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -186,9 +186,9 @@ class Config:
             TypeError, if given something other than an integer or a string
             ValueError: if given a string not of the form described above.
         """
-        if type(value) is int:
+        if type(value) is int:  # noqa: E721
             return value
-        elif type(value) is str:
+        elif isinstance(value, str):
             sizes = {"K": 1024, "M": 1024 * 1024}
             size = 1
             suffix = value[-1]
@@ -218,9 +218,9 @@ class Config:
             TypeError, if given something other than an integer or a string
             ValueError: if given a string not of the form described above.
         """
-        if type(value) is int:
+        if type(value) is int:  # noqa: E721
             return value
-        elif type(value) is str:
+        elif isinstance(value, str):
             second = 1000
             minute = 60 * second
             hour = 60 * minute
diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py
index 919f81a9b7..a70dfbf41f 100644
--- a/synapse/config/appservice.py
+++ b/synapse/config/appservice.py
@@ -34,7 +34,7 @@ class AppServiceConfig(Config):
     def read_config(self, config: JsonDict, **kwargs: Any) -> None:
         self.app_service_config_files = config.get("app_service_config_files", [])
         if not isinstance(self.app_service_config_files, list) or not all(
-            type(x) is str for x in self.app_service_config_files
+            isinstance(x, str) for x in self.app_service_config_files
         ):
             raise ConfigError(
                 "Expected '%s' to be a list of AS config files:"
diff --git a/synapse/event_auth.py b/synapse/event_auth.py
index 531bb74f07..2ac9f8b309 100644
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@@ -852,11 +852,11 @@ def _check_power_levels(
                 "kick",
                 "invite",
             }:
-                if type(v) is not int:
+                if type(v) is not int:  # noqa: E721
                     raise SynapseError(400, f"{v!r} must be an integer.")
             if k in {"events", "notifications", "users"}:
                 if not isinstance(v, collections.abc.Mapping) or not all(
-                    type(v) is int for v in v.values()
+                    type(v) is int for v in v.values()  # noqa: E721
                 ):
                     raise SynapseError(
                         400,
diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index 52acb21955..53af423a5a 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -702,7 +702,7 @@ def _copy_power_level_value_as_integer(
     :raises TypeError: if `old_value` is neither an integer nor a base-10 string
         representation of an integer.
     """
-    if type(old_value) is int:
+    if type(old_value) is int:  # noqa: E721
         power_levels[key] = old_value
         return
 
@@ -730,7 +730,7 @@ def validate_canonicaljson(value: Any) -> None:
     * Floats
     * NaN, Infinity, -Infinity
     """
-    if type(value) is int:
+    if type(value) is int:  # noqa: E721
         if value < CANONICALJSON_MIN_INT or CANONICALJSON_MAX_INT < value:
             raise SynapseError(400, "JSON integer out of range", Codes.BAD_JSON)
 
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index 9278f1a1aa..34625dd7a1 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -151,7 +151,7 @@ class EventValidator:
         max_lifetime = event.content.get("max_lifetime")
 
         if min_lifetime is not None:
-            if type(min_lifetime) is not int:
+            if type(min_lifetime) is not int:  # noqa: E721
                 raise SynapseError(
                     code=400,
                     msg="'min_lifetime' must be an integer",
@@ -159,7 +159,7 @@ class EventValidator:
                 )
 
         if max_lifetime is not None:
-            if type(max_lifetime) is not int:
+            if type(max_lifetime) is not int:  # noqa: E721
                 raise SynapseError(
                     code=400,
                     msg="'max_lifetime' must be an integer",
diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py
index 31e0260b83..d4e7dd45a9 100644
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
@@ -280,7 +280,7 @@ def event_from_pdu_json(pdu_json: JsonDict, room_version: RoomVersion) -> EventB
         _strip_unsigned_values(pdu_json)
 
     depth = pdu_json["depth"]
-    if type(depth) is not int:
+    if type(depth) is not int:  # noqa: E721
         raise SynapseError(400, "Depth %r not an intger" % (depth,), Codes.BAD_JSON)
 
     if depth < 0:
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 89bd597409..607013f121 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -1891,7 +1891,7 @@ class TimestampToEventResponse:
             )
 
         origin_server_ts = d.get("origin_server_ts")
-        if type(origin_server_ts) is not int:
+        if type(origin_server_ts) is not int:  # noqa: E721
             raise ValueError(
                 "Invalid response: 'origin_server_ts' must be a int but received %r"
                 % origin_server_ts
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 4a15c76a7b..187c3e6cc0 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -379,7 +379,7 @@ class MessageHandler:
         """
 
         expiry_ts = event.content.get(EventContentFields.SELF_DESTRUCT_AFTER)
-        if type(expiry_ts) is not int or event.is_state():
+        if type(expiry_ts) is not int or event.is_state():  # noqa: E721
             return
 
         # _schedule_expiry_for_event won't actually schedule anything if there's already
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 583c03447c..11342ccac8 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -243,7 +243,7 @@ class LegacyJsonSendParser(_BaseJsonParser[Tuple[int, JsonDict]]):
         return (
             isinstance(v, list)
             and len(v) == 2
-            and type(v[0]) == int
+            and type(v[0]) == int  # noqa: E721
             and isinstance(v[1], dict)
         )
 
diff --git a/synapse/media/oembed.py b/synapse/media/oembed.py
index 5ad9eec80b..2ce842c98d 100644
--- a/synapse/media/oembed.py
+++ b/synapse/media/oembed.py
@@ -204,7 +204,7 @@ class OEmbedProvider:
                 calc_description_and_urls(open_graph_response, oembed["html"])
             for size in ("width", "height"):
                 val = oembed.get(size)
-                if type(val) is int:
+                if type(val) is int:  # noqa: E721
                     open_graph_response[f"og:video:{size}"] = val
 
         elif oembed_type == "link":
diff --git a/synapse/media/thumbnailer.py b/synapse/media/thumbnailer.py
index 2bfa58ceee..d8979813b3 100644
--- a/synapse/media/thumbnailer.py
+++ b/synapse/media/thumbnailer.py
@@ -78,7 +78,7 @@ class Thumbnailer:
             image_exif = self.image._getexif()  # type: ignore
             if image_exif is not None:
                 image_orientation = image_exif.get(EXIF_ORIENTATION_TAG)
-                assert type(image_orientation) is int
+                assert type(image_orientation) is int  # noqa: E721
                 self.transpose_method = EXIF_TRANSPOSE_MAPPINGS.get(image_orientation)
         except Exception as e:
             # A lot of parsing errors can happen when parsing EXIF
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 990c079c81..554634579e 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -379,7 +379,7 @@ class BulkPushRuleEvaluator:
             keys = list(notification_levels.keys())
             for key in keys:
                 level = notification_levels.get(key, SENTINEL)
-                if level is not SENTINEL and type(level) is not int:
+                if level is not SENTINEL and type(level) is not int:  # noqa: E721
                     try:
                         notification_levels[key] = int(level)
                     except (TypeError, ValueError):
@@ -472,7 +472,11 @@ StateGroup = Union[object, int]
 
 
 def _is_simple_value(value: Any) -> bool:
-    return isinstance(value, (bool, str)) or type(value) is int or value is None
+    return (
+        isinstance(value, (bool, str))
+        or type(value) is int  # noqa: E721
+        or value is None
+    )
 
 
 def _flatten_dict(
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 55e752fda8..94170715fb 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -157,7 +157,7 @@ class PurgeHistoryRestServlet(RestServlet):
             logger.info("[purge] purging up to token %s (event_id %s)", token, event_id)
         elif "purge_up_to_ts" in body:
             ts = body["purge_up_to_ts"]
-            if type(ts) is not int:
+            if type(ts) is not int:  # noqa: E721
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
                     "purge_up_to_ts must be an int",
diff --git a/synapse/rest/admin/registration_tokens.py b/synapse/rest/admin/registration_tokens.py
index 95e751288b..ffce92d45e 100644
--- a/synapse/rest/admin/registration_tokens.py
+++ b/synapse/rest/admin/registration_tokens.py
@@ -143,7 +143,7 @@ class NewRegistrationTokenRestServlet(RestServlet):
         else:
             # Get length of token to generate (default is 16)
             length = body.get("length", 16)
-            if type(length) is not int:
+            if type(length) is not int:  # noqa: E721
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
                     "length must be an integer",
@@ -163,7 +163,8 @@ class NewRegistrationTokenRestServlet(RestServlet):
 
         uses_allowed = body.get("uses_allowed", None)
         if not (
-            uses_allowed is None or (type(uses_allowed) is int and uses_allowed >= 0)
+            uses_allowed is None
+            or (type(uses_allowed) is int and uses_allowed >= 0)  # noqa: E721
         ):
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
@@ -172,13 +173,16 @@ class NewRegistrationTokenRestServlet(RestServlet):
             )
 
         expiry_time = body.get("expiry_time", None)
-        if type(expiry_time) not in (int, type(None)):
+        if expiry_time is not None and type(expiry_time) is not int:  # noqa: E721
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "expiry_time must be an integer or null",
                 Codes.INVALID_PARAM,
             )
-        if type(expiry_time) is int and expiry_time < self.clock.time_msec():
+        if (
+            type(expiry_time) is int  # noqa: E721
+            and expiry_time < self.clock.time_msec()
+        ):
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "expiry_time must not be in the past",
@@ -283,7 +287,7 @@ class RegistrationTokenRestServlet(RestServlet):
             uses_allowed = body["uses_allowed"]
             if not (
                 uses_allowed is None
-                or (type(uses_allowed) is int and uses_allowed >= 0)
+                or (type(uses_allowed) is int and uses_allowed >= 0)  # noqa: E721
             ):
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
@@ -294,13 +298,16 @@ class RegistrationTokenRestServlet(RestServlet):
 
         if "expiry_time" in body:
             expiry_time = body["expiry_time"]
-            if type(expiry_time) not in (int, type(None)):
+            if expiry_time is not None and type(expiry_time) is not int:  # noqa: E721
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
                     "expiry_time must be an integer or null",
                     Codes.INVALID_PARAM,
                 )
-            if type(expiry_time) is int and expiry_time < self.clock.time_msec():
+            if (
+                type(expiry_time) is int  # noqa: E721
+                and expiry_time < self.clock.time_msec()
+            ):
                 raise SynapseError(
                     HTTPStatus.BAD_REQUEST,
                     "expiry_time must not be in the past",
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 240e6254b0..625a47ec1a 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -1172,14 +1172,17 @@ class RateLimitRestServlet(RestServlet):
         messages_per_second = body.get("messages_per_second", 0)
         burst_count = body.get("burst_count", 0)
 
-        if type(messages_per_second) is not int or messages_per_second < 0:
+        if (
+            type(messages_per_second) is not int  # noqa: E721
+            or messages_per_second < 0
+        ):
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "%r parameter must be a positive int" % (messages_per_second,),
                 errcode=Codes.INVALID_PARAM,
             )
 
-        if type(burst_count) is not int or burst_count < 0:
+        if type(burst_count) is not int or burst_count < 0:  # noqa: E721
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "%r parameter must be a positive int" % (burst_count,),
diff --git a/synapse/rest/client/report_event.py b/synapse/rest/client/report_event.py
index ac1a63ca27..ee93e459f6 100644
--- a/synapse/rest/client/report_event.py
+++ b/synapse/rest/client/report_event.py
@@ -55,7 +55,7 @@ class ReportEventRestServlet(RestServlet):
                 "Param 'reason' must be a string",
                 Codes.BAD_JSON,
             )
-        if type(body.get("score", 0)) is not int:
+        if type(body.get("score", 0)) is not int:  # noqa: E721
             raise SynapseError(
                 HTTPStatus.BAD_REQUEST,
                 "Param 'score' must be an integer",
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index c1353b18c1..c784612f59 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -1671,7 +1671,7 @@ class PersistEventsStore:
             if self._ephemeral_messages_enabled:
                 # If there's an expiry timestamp on the event, store it.
                 expiry_ts = event.content.get(EventContentFields.SELF_DESTRUCT_AFTER)
-                if type(expiry_ts) is int and not event.is_state():
+                if type(expiry_ts) is int and not event.is_state():  # noqa: E721
                     self._insert_event_expiry_txn(txn, event.event_id, expiry_ts)
 
         # Insert into the room_memberships table.
@@ -2039,10 +2039,10 @@ class PersistEventsStore:
         ):
             if (
                 "min_lifetime" in event.content
-                and type(event.content["min_lifetime"]) is not int
+                and type(event.content["min_lifetime"]) is not int  # noqa: E721
             ) or (
                 "max_lifetime" in event.content
-                and type(event.content["max_lifetime"]) is not int
+                and type(event.content["max_lifetime"]) is not int  # noqa: E721
             ):
                 # Ignore the event if one of the value isn't an integer.
                 return
-- 
cgit 1.5.1


From 9ec3da06daf70b5e799545a6e12ead4846559d80 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 29 Aug 2023 10:38:56 -0400
Subject: Bump mypy-zope & mypy. (#16188)

---
 changelog.d/16188.misc                             |  1 +
 poetry.lock                                        | 72 +++++++++++-----------
 synapse/_scripts/synapse_port_db.py                |  9 ++-
 synapse/logging/opentracing.py                     | 14 ++---
 synapse/storage/database.py                        | 17 ++++-
 synapse/util/check_dependencies.py                 |  6 +-
 tests/appservice/test_api.py                       |  6 +-
 tests/federation/test_complexity.py                | 24 ++++----
 tests/federation/test_federation_catch_up.py       |  4 +-
 tests/federation/test_federation_sender.py         |  4 +-
 tests/federation/transport/test_knocking.py        |  4 +-
 tests/handlers/test_appservice.py                  | 10 +--
 tests/handlers/test_cas.py                         |  8 +--
 tests/handlers/test_e2e_keys.py                    |  4 +-
 tests/handlers/test_federation.py                  |  4 +-
 tests/handlers/test_oidc.py                        |  4 +-
 tests/handlers/test_password_providers.py          |  2 +-
 tests/handlers/test_register.py                    |  6 +-
 tests/handlers/test_saml.py                        | 14 ++---
 tests/handlers/test_typing.py                      | 26 ++++----
 tests/logging/test_terse_json.py                   |  2 +-
 tests/module_api/test_api.py                       |  4 +-
 tests/push/test_bulk_push_rule_evaluator.py        |  2 +-
 tests/replication/storage/test_events.py           |  2 +-
 tests/rest/admin/test_user.py                      |  4 +-
 tests/rest/admin/test_username_available.py        |  2 +-
 tests/rest/client/test_account.py                  |  2 +-
 tests/rest/client/test_events.py                   |  2 +-
 tests/rest/client/test_filter.py                   |  4 +-
 tests/rest/client/test_rooms.py                    | 12 ++--
 tests/rest/client/test_shadow_banned.py            |  2 +-
 tests/rest/client/test_third_party_rules.py        |  2 +-
 tests/server.py                                    |  2 +-
 .../test_resource_limits_server_notices.py         | 30 ++++-----
 tests/storage/test_appservice.py                   |  2 +-
 tests/storage/test_monthly_active_users.py         | 12 ++--
 tests/test_federation.py                           |  6 +-
 tests/test_state.py                                |  4 +-
 tests/unittest.py                                  |  6 +-
 39 files changed, 180 insertions(+), 161 deletions(-)
 create mode 100644 changelog.d/16188.misc

(limited to 'synapse')

diff --git a/changelog.d/16188.misc b/changelog.d/16188.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/16188.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/poetry.lock b/poetry.lock
index 1d37c88328..6d63d71b2c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1445,43 +1445,43 @@ files = [
 
 [[package]]
 name = "mypy"
-version = "1.0.1"
+version = "1.4.1"
 description = "Optional static typing for Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "mypy-1.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:71a808334d3f41ef011faa5a5cd8153606df5fc0b56de5b2e89566c8093a0c9a"},
-    {file = "mypy-1.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:920169f0184215eef19294fa86ea49ffd4635dedfdea2b57e45cb4ee85d5ccaf"},
-    {file = "mypy-1.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27a0f74a298769d9fdc8498fcb4f2beb86f0564bcdb1a37b58cbbe78e55cf8c0"},
-    {file = "mypy-1.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:65b122a993d9c81ea0bfde7689b3365318a88bde952e4dfa1b3a8b4ac05d168b"},
-    {file = "mypy-1.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:5deb252fd42a77add936b463033a59b8e48eb2eaec2976d76b6878d031933fe4"},
-    {file = "mypy-1.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2013226d17f20468f34feddd6aae4635a55f79626549099354ce641bc7d40262"},
-    {file = "mypy-1.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:48525aec92b47baed9b3380371ab8ab6e63a5aab317347dfe9e55e02aaad22e8"},
-    {file = "mypy-1.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c96b8a0c019fe29040d520d9257d8c8f122a7343a8307bf8d6d4a43f5c5bfcc8"},
-    {file = "mypy-1.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:448de661536d270ce04f2d7dddaa49b2fdba6e3bd8a83212164d4174ff43aa65"},
-    {file = "mypy-1.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:d42a98e76070a365a1d1c220fcac8aa4ada12ae0db679cb4d910fabefc88b994"},
-    {file = "mypy-1.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e64f48c6176e243ad015e995de05af7f22bbe370dbb5b32bd6988438ec873919"},
-    {file = "mypy-1.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fdd63e4f50e3538617887e9aee91855368d9fc1dea30da743837b0df7373bc4"},
-    {file = "mypy-1.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:dbeb24514c4acbc78d205f85dd0e800f34062efcc1f4a4857c57e4b4b8712bff"},
-    {file = "mypy-1.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a2948c40a7dd46c1c33765718936669dc1f628f134013b02ff5ac6c7ef6942bf"},
-    {file = "mypy-1.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5bc8d6bd3b274dd3846597855d96d38d947aedba18776aa998a8d46fabdaed76"},
-    {file = "mypy-1.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:17455cda53eeee0a4adb6371a21dd3dbf465897de82843751cf822605d152c8c"},
-    {file = "mypy-1.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e831662208055b006eef68392a768ff83596035ffd6d846786578ba1714ba8f6"},
-    {file = "mypy-1.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e60d0b09f62ae97a94605c3f73fd952395286cf3e3b9e7b97f60b01ddfbbda88"},
-    {file = "mypy-1.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:0af4f0e20706aadf4e6f8f8dc5ab739089146b83fd53cb4a7e0e850ef3de0bb6"},
-    {file = "mypy-1.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:24189f23dc66f83b839bd1cce2dfc356020dfc9a8bae03978477b15be61b062e"},
-    {file = "mypy-1.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93a85495fb13dc484251b4c1fd7a5ac370cd0d812bbfc3b39c1bafefe95275d5"},
-    {file = "mypy-1.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f546ac34093c6ce33f6278f7c88f0f147a4849386d3bf3ae193702f4fe31407"},
-    {file = "mypy-1.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c6c2ccb7af7154673c591189c3687b013122c5a891bb5651eca3db8e6c6c55bd"},
-    {file = "mypy-1.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:15b5a824b58c7c822c51bc66308e759243c32631896743f030daf449fe3677f3"},
-    {file = "mypy-1.0.1-py3-none-any.whl", hash = "sha256:eda5c8b9949ed411ff752b9a01adda31afe7eae1e53e946dbdf9db23865e66c4"},
-    {file = "mypy-1.0.1.tar.gz", hash = "sha256:28cea5a6392bb43d266782983b5a4216c25544cd7d80be681a155ddcdafd152d"},
-]
-
-[package.dependencies]
-mypy-extensions = ">=0.4.3"
+    {file = "mypy-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:566e72b0cd6598503e48ea610e0052d1b8168e60a46e0bfd34b3acf2d57f96a8"},
+    {file = "mypy-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca637024ca67ab24a7fd6f65d280572c3794665eaf5edcc7e90a866544076878"},
+    {file = "mypy-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dde1d180cd84f0624c5dcaaa89c89775550a675aff96b5848de78fb11adabcd"},
+    {file = "mypy-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c4d8e89aa7de683e2056a581ce63c46a0c41e31bd2b6d34144e2c80f5ea53dc"},
+    {file = "mypy-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:bfdca17c36ae01a21274a3c387a63aa1aafe72bff976522886869ef131b937f1"},
+    {file = "mypy-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7549fbf655e5825d787bbc9ecf6028731973f78088fbca3a1f4145c39ef09462"},
+    {file = "mypy-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98324ec3ecf12296e6422939e54763faedbfcc502ea4a4c38502082711867258"},
+    {file = "mypy-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141dedfdbfe8a04142881ff30ce6e6653c9685b354876b12e4fe6c78598b45e2"},
+    {file = "mypy-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8207b7105829eca6f3d774f64a904190bb2231de91b8b186d21ffd98005f14a7"},
+    {file = "mypy-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:16f0db5b641ba159eff72cff08edc3875f2b62b2fa2bc24f68c1e7a4e8232d01"},
+    {file = "mypy-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:470c969bb3f9a9efcedbadcd19a74ffb34a25f8e6b0e02dae7c0e71f8372f97b"},
+    {file = "mypy-1.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5952d2d18b79f7dc25e62e014fe5a23eb1a3d2bc66318df8988a01b1a037c5b"},
+    {file = "mypy-1.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:190b6bab0302cec4e9e6767d3eb66085aef2a1cc98fe04936d8a42ed2ba77bb7"},
+    {file = "mypy-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9d40652cc4fe33871ad3338581dca3297ff5f2213d0df345bcfbde5162abf0c9"},
+    {file = "mypy-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01fd2e9f85622d981fd9063bfaef1aed6e336eaacca00892cd2d82801ab7c042"},
+    {file = "mypy-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2460a58faeea905aeb1b9b36f5065f2dc9a9c6e4c992a6499a2360c6c74ceca3"},
+    {file = "mypy-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2746d69a8196698146a3dbe29104f9eb6a2a4d8a27878d92169a6c0b74435b6"},
+    {file = "mypy-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ae704dcfaa180ff7c4cfbad23e74321a2b774f92ca77fd94ce1049175a21c97f"},
+    {file = "mypy-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:43d24f6437925ce50139a310a64b2ab048cb2d3694c84c71c3f2a1626d8101dc"},
+    {file = "mypy-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c482e1246726616088532b5e964e39765b6d1520791348e6c9dc3af25b233828"},
+    {file = "mypy-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43b592511672017f5b1a483527fd2684347fdffc041c9ef53428c8dc530f79a3"},
+    {file = "mypy-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34a9239d5b3502c17f07fd7c0b2ae6b7dd7d7f6af35fbb5072c6208e76295816"},
+    {file = "mypy-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5703097c4936bbb9e9bce41478c8d08edd2865e177dc4c52be759f81ee4dd26c"},
+    {file = "mypy-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e02d700ec8d9b1859790c0475df4e4092c7bf3272a4fd2c9f33d87fac4427b8f"},
+    {file = "mypy-1.4.1-py3-none-any.whl", hash = "sha256:45d32cec14e7b97af848bddd97d85ea4f0db4d5a149ed9676caa4eb2f7402bb4"},
+    {file = "mypy-1.4.1.tar.gz", hash = "sha256:9bbcd9ab8ea1f2e1c8031c21445b511442cc45c89951e49bbf852cbb70755b1b"},
+]
+
+[package.dependencies]
+mypy-extensions = ">=1.0.0"
 tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-typing-extensions = ">=3.10"
+typing-extensions = ">=4.1.0"
 
 [package.extras]
 dmypy = ["psutil (>=4.0)"]
@@ -1502,17 +1502,17 @@ files = [
 
 [[package]]
 name = "mypy-zope"
-version = "0.9.1"
+version = "1.0.0"
 description = "Plugin for mypy to support zope interfaces"
 optional = false
 python-versions = "*"
 files = [
-    {file = "mypy-zope-0.9.1.tar.gz", hash = "sha256:4c87dbc71fec35f6533746ecdf9d400cd9281338d71c16b5676bb5ed00a97ca2"},
-    {file = "mypy_zope-0.9.1-py3-none-any.whl", hash = "sha256:733d4399affe9e61e332ce9c4049418d6775c39b473e4b9f409d51c207c1b71a"},
+    {file = "mypy-zope-1.0.0.tar.gz", hash = "sha256:be815c2fcb5333aa87e8ec682029ad3214142fe2a05ea383f9ff2d77c98008b7"},
+    {file = "mypy_zope-1.0.0-py3-none-any.whl", hash = "sha256:9732e9b2198f2aec3343b38a51905ff49d44dc9e39e8e8bc6fc490b232388209"},
 ]
 
 [package.dependencies]
-mypy = ">=1.0.0,<1.1.0"
+mypy = ">=1.0.0,<1.5.0"
 "zope.interface" = "*"
 "zope.schema" = "*"
 
diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py
index 49242800b8..ab2b29cf1b 100755
--- a/synapse/_scripts/synapse_port_db.py
+++ b/synapse/_scripts/synapse_port_db.py
@@ -482,7 +482,10 @@ class Porter:
                         do_backward[0] = False
 
                 if forward_rows or backward_rows:
-                    headers = [column[0] for column in txn.description]
+                    assert txn.description is not None
+                    headers: Optional[List[str]] = [
+                        column[0] for column in txn.description
+                    ]
                 else:
                     headers = None
 
@@ -544,6 +547,7 @@ class Porter:
             def r(txn: LoggingTransaction) -> Tuple[List[str], List[Tuple]]:
                 txn.execute(select, (forward_chunk, self.batch_size))
                 rows = txn.fetchall()
+                assert txn.description is not None
                 headers = [column[0] for column in txn.description]
 
                 return headers, rows
@@ -919,7 +923,8 @@ class Porter:
         def r(txn: LoggingTransaction) -> Tuple[List[str], List[Tuple]]:
             txn.execute(select)
             rows = txn.fetchall()
-            headers: List[str] = [column[0] for column in txn.description]
+            assert txn.description is not None
+            headers = [column[0] for column in txn.description]
 
             ts_ind = headers.index("ts")
 
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index be910128aa..5c3045e197 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -910,10 +910,10 @@ def _custom_sync_async_decorator(
         async def _wrapper(
             *args: P.args, **kwargs: P.kwargs
         ) -> Any:  # Return type is RInner
-            with wrapping_logic(func, *args, **kwargs):
-                # type-ignore: func() returns R, but mypy doesn't know that R is
-                # Awaitable here.
-                return await func(*args, **kwargs)  # type: ignore[misc]
+            # type-ignore: func() returns R, but mypy doesn't know that R is
+            # Awaitable here.
+            with wrapping_logic(func, *args, **kwargs):  # type: ignore[arg-type]
+                return await func(*args, **kwargs)
 
     else:
         # The other case here handles sync functions including those decorated with
@@ -980,8 +980,7 @@ def trace_with_opname(
     See the module's doc string for usage examples.
     """
 
-    # type-ignore: mypy bug, see https://github.com/python/mypy/issues/12909
-    @contextlib.contextmanager  # type: ignore[arg-type]
+    @contextlib.contextmanager
     def _wrapping_logic(
         func: Callable[P, R], *args: P.args, **kwargs: P.kwargs
     ) -> Generator[None, None, None]:
@@ -1024,8 +1023,7 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]:
     if not opentracing:
         return func
 
-    # type-ignore: mypy bug, see https://github.com/python/mypy/issues/12909
-    @contextlib.contextmanager  # type: ignore[arg-type]
+    @contextlib.contextmanager
     def _wrapping_logic(
         func: Callable[P, R], *args: P.args, **kwargs: P.kwargs
     ) -> Generator[None, None, None]:
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index a1c8fb0f46..55ac313f33 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -31,6 +31,7 @@ from typing import (
     Iterator,
     List,
     Optional,
+    Sequence,
     Tuple,
     Type,
     TypeVar,
@@ -358,7 +359,21 @@ class LoggingTransaction:
         return self.txn.rowcount
 
     @property
-    def description(self) -> Any:
+    def description(
+        self,
+    ) -> Optional[
+        Sequence[
+            Tuple[
+                str,
+                Optional[Any],
+                Optional[int],
+                Optional[int],
+                Optional[int],
+                Optional[int],
+                Optional[int],
+            ]
+        ]
+    ]:
         return self.txn.description
 
     def execute_batch(self, sql: str, args: Iterable[Iterable[Any]]) -> None:
diff --git a/synapse/util/check_dependencies.py b/synapse/util/check_dependencies.py
index 114130a08f..f7cead9e12 100644
--- a/synapse/util/check_dependencies.py
+++ b/synapse/util/check_dependencies.py
@@ -51,9 +51,9 @@ class DependencyException(Exception):
 
 
 DEV_EXTRAS = {"lint", "mypy", "test", "dev"}
-RUNTIME_EXTRAS = (
-    set(metadata.metadata(DISTRIBUTION_NAME).get_all("Provides-Extra")) - DEV_EXTRAS
-)
+ALL_EXTRAS = metadata.metadata(DISTRIBUTION_NAME).get_all("Provides-Extra")
+assert ALL_EXTRAS is not None
+RUNTIME_EXTRAS = set(ALL_EXTRAS) - DEV_EXTRAS
 VERSION = metadata.version(DISTRIBUTION_NAME)
 
 
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 3c635e3dcb..75fb5fae6b 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -96,7 +96,7 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
                 )
 
         # We assign to a method, which mypy doesn't like.
-        self.api.get_json = Mock(side_effect=get_json)  # type: ignore[assignment]
+        self.api.get_json = Mock(side_effect=get_json)  # type: ignore[method-assign]
 
         result = self.get_success(
             self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]})
@@ -168,7 +168,7 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
                 )
 
         # We assign to a method, which mypy doesn't like.
-        self.api.get_json = Mock(side_effect=get_json)  # type: ignore[assignment]
+        self.api.get_json = Mock(side_effect=get_json)  # type: ignore[method-assign]
 
         result = self.get_success(
             self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]})
@@ -215,7 +215,7 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             return RESPONSE
 
         # We assign to a method, which mypy doesn't like.
-        self.api.post_json_get_json = Mock(side_effect=post_json_get_json)  # type: ignore[assignment]
+        self.api.post_json_get_json = Mock(side_effect=post_json_get_json)  # type: ignore[method-assign]
 
         MISSING_KEYS = [
             # Known user, known device, missing algorithm.
diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py
index 5b58fb13b5..73a2766baf 100644
--- a/tests/federation/test_complexity.py
+++ b/tests/federation/test_complexity.py
@@ -57,7 +57,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         async def get_current_state_event_counts(room_id: str) -> int:
             return int(500 * 1.23)
 
-        store.get_current_state_event_counts = get_current_state_event_counts  # type: ignore[assignment]
+        store.get_current_state_event_counts = get_current_state_event_counts  # type: ignore[method-assign]
 
         # Get the room complexity again -- make sure it's our artificial value
         channel = self.make_signed_federation_request(
@@ -74,8 +74,8 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = AsyncMock(return_value={"v1": 9999})  # type: ignore[assignment]
-        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[assignment]
+        fed_transport.client.get_json = AsyncMock(return_value={"v1": 9999})  # type: ignore[method-assign]
+        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[method-assign]
             return_value=("", 1)
         )
 
@@ -105,8 +105,8 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = AsyncMock(return_value={"v1": 9999})  # type: ignore[assignment]
-        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[assignment]
+        fed_transport.client.get_json = AsyncMock(return_value={"v1": 9999})  # type: ignore[method-assign]
+        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[method-assign]
             return_value=("", 1)
         )
 
@@ -142,8 +142,8 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = AsyncMock(return_value=None)  # type: ignore[assignment]
-        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[assignment]
+        fed_transport.client.get_json = AsyncMock(return_value=None)  # type: ignore[method-assign]
+        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[method-assign]
             return_value=("", 1)
         )
 
@@ -151,7 +151,7 @@ class RoomComplexityTests(unittest.FederatingHomeserverTestCase):
         async def get_current_state_event_counts(room_id: str) -> int:
             return 600
 
-        self.hs.get_datastores().main.get_current_state_event_counts = get_current_state_event_counts  # type: ignore[assignment]
+        self.hs.get_datastores().main.get_current_state_event_counts = get_current_state_event_counts  # type: ignore[method-assign]
 
         d = handler._remote_join(
             create_requester(u1),
@@ -199,8 +199,8 @@ class RoomComplexityAdminTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = AsyncMock(return_value={"v1": 9999})  # type: ignore[assignment]
-        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[assignment]
+        fed_transport.client.get_json = AsyncMock(return_value={"v1": 9999})  # type: ignore[method-assign]
+        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[method-assign]
             return_value=("", 1)
         )
 
@@ -229,8 +229,8 @@ class RoomComplexityAdminTests(unittest.FederatingHomeserverTestCase):
         fed_transport = self.hs.get_federation_transport_client()
 
         # Mock out some things, because we don't want to test the whole join
-        fed_transport.client.get_json = AsyncMock(return_value={"v1": 9999})  # type: ignore[assignment]
-        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[assignment]
+        fed_transport.client.get_json = AsyncMock(return_value={"v1": 9999})  # type: ignore[method-assign]
+        handler.federation_handler.do_invite_join = AsyncMock(  # type: ignore[method-assign]
             return_value=("", 1)
         )
 
diff --git a/tests/federation/test_federation_catch_up.py b/tests/federation/test_federation_catch_up.py
index 40318aa1b6..75ae740b43 100644
--- a/tests/federation/test_federation_catch_up.py
+++ b/tests/federation/test_federation_catch_up.py
@@ -50,7 +50,7 @@ class FederationCatchUpTestCases(FederatingHomeserverTestCase):
         # This mock is crucial for destination_rooms to be populated.
         # TODO: this seems to no longer be the case---tests pass with this mock
         # commented out.
-        state_storage_controller.get_current_hosts_in_room = AsyncMock(  # type: ignore[assignment]
+        state_storage_controller.get_current_hosts_in_room = AsyncMock(  # type: ignore[method-assign]
             return_value={"test", "host2"}
         )
 
@@ -436,7 +436,7 @@ class FederationCatchUpTestCases(FederatingHomeserverTestCase):
         def wake_destination_track(destination: str) -> None:
             woken.add(destination)
 
-        self.federation_sender.wake_destination = wake_destination_track  # type: ignore[assignment]
+        self.federation_sender.wake_destination = wake_destination_track  # type: ignore[method-assign]
 
         # We wait quite long so that all dests can be woken up, since there is a delay
         # between them.
diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py
index 5ea4a75a9f..7bd3d06859 100644
--- a/tests/federation/test_federation_sender.py
+++ b/tests/federation/test_federation_sender.py
@@ -47,11 +47,11 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
             federation_transport_client=self.federation_transport_client,
         )
 
-        hs.get_storage_controllers().state.get_current_hosts_in_room = AsyncMock(  # type: ignore[assignment]
+        hs.get_storage_controllers().state.get_current_hosts_in_room = AsyncMock(  # type: ignore[method-assign]
             return_value={"test", "host2"}
         )
 
-        hs.get_storage_controllers().state.get_current_hosts_in_room_or_partial_state_approximation = (  # type: ignore[assignment]
+        hs.get_storage_controllers().state.get_current_hosts_in_room_or_partial_state_approximation = (  # type: ignore[method-assign]
             hs.get_storage_controllers().state.get_current_hosts_in_room
         )
 
diff --git a/tests/federation/transport/test_knocking.py b/tests/federation/transport/test_knocking.py
index 70209ab090..3f42f79f26 100644
--- a/tests/federation/transport/test_knocking.py
+++ b/tests/federation/transport/test_knocking.py
@@ -218,7 +218,7 @@ class FederationKnockingTestCase(
         ) -> EventBase:
             return pdu
 
-        homeserver.get_federation_server()._check_sigs_and_hash = (  # type: ignore[assignment]
+        homeserver.get_federation_server()._check_sigs_and_hash = (  # type: ignore[method-assign]
             approve_all_signature_checking
         )
 
@@ -229,7 +229,7 @@ class FederationKnockingTestCase(
         ) -> None:
             pass
 
-        homeserver.get_federation_event_handler()._check_event_auth = _check_event_auth  # type: ignore[assignment]
+        homeserver.get_federation_event_handler()._check_event_auth = _check_event_auth  # type: ignore[method-assign]
 
         return super().prepare(reactor, clock, homeserver)
 
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index 4bd0facd65..46d022092e 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -400,11 +400,11 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
         # Mock the ApplicationServiceScheduler's _TransactionController's send method so that
         # we can track any outgoing ephemeral events
         self.send_mock = AsyncMock()
-        hs.get_application_service_handler().scheduler.txn_ctrl.send = self.send_mock  # type: ignore[assignment]
+        hs.get_application_service_handler().scheduler.txn_ctrl.send = self.send_mock  # type: ignore[method-assign]
 
         # Mock out application services, and allow defining our own in tests
         self._services: List[ApplicationService] = []
-        self.hs.get_datastores().main.get_app_services = Mock(  # type: ignore[assignment]
+        self.hs.get_datastores().main.get_app_services = Mock(  # type: ignore[method-assign]
             return_value=self._services
         )
 
@@ -898,11 +898,11 @@ class ApplicationServicesHandlerDeviceListsTestCase(unittest.HomeserverTestCase)
         # Mock ApplicationServiceApi's put_json, so we can verify the raw JSON that
         # will be sent over the wire
         self.put_json = AsyncMock()
-        hs.get_application_service_api().put_json = self.put_json  # type: ignore[assignment]
+        hs.get_application_service_api().put_json = self.put_json  # type: ignore[method-assign]
 
         # Mock out application services, and allow defining our own in tests
         self._services: List[ApplicationService] = []
-        self.hs.get_datastores().main.get_app_services = Mock(  # type: ignore[assignment]
+        self.hs.get_datastores().main.get_app_services = Mock(  # type: ignore[method-assign]
             return_value=self._services
         )
 
@@ -1004,7 +1004,7 @@ class ApplicationServicesHandlerOtkCountsTestCase(unittest.HomeserverTestCase):
         # Mock the ApplicationServiceScheduler's _TransactionController's send method so that
         # we can track what's going out
         self.send_mock = AsyncMock()
-        hs.get_application_service_handler().scheduler.txn_ctrl.send = self.send_mock  # type: ignore[assignment]  # We assign to a method.
+        hs.get_application_service_handler().scheduler.txn_ctrl.send = self.send_mock  # type: ignore[method-assign]  # We assign to a method.
 
         # Define an application service for the tests
         self._service_token = "VERYSECRET"
diff --git a/tests/handlers/test_cas.py b/tests/handlers/test_cas.py
index 2cb24add20..8582b1cd1e 100644
--- a/tests/handlers/test_cas.py
+++ b/tests/handlers/test_cas.py
@@ -60,7 +60,7 @@ class CasHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
 
         cas_response = CasResponse("test_user", {})
         request = _mock_request()
@@ -88,7 +88,7 @@ class CasHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
 
         # Map a user via SSO.
         cas_response = CasResponse("test_user", {})
@@ -128,7 +128,7 @@ class CasHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
 
         cas_response = CasResponse("föö", {})
         request = _mock_request()
@@ -159,7 +159,7 @@ class CasHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
 
         # The response doesn't have the proper userGroup or department.
         cas_response = CasResponse("test_user", {})
diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py
index 7917766a08..c5556f2844 100644
--- a/tests/handlers/test_e2e_keys.py
+++ b/tests/handlers/test_e2e_keys.py
@@ -800,7 +800,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         remote_master_key = "85T7JXPFBAySB/jwby4S3lBPTqY3+Zg53nYuGmu1ggY"
         remote_self_signing_key = "QeIiFEjluPBtI7WQdG365QKZcFs9kqmHir6RBD0//nQ"
 
-        self.hs.get_federation_client().query_client_keys = mock.AsyncMock(  # type: ignore[assignment]
+        self.hs.get_federation_client().query_client_keys = mock.AsyncMock(  # type: ignore[method-assign]
             return_value={
                 "device_keys": {remote_user_id: {}},
                 "master_keys": {
@@ -876,7 +876,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase):
         remote_master_key = "85T7JXPFBAySB/jwby4S3lBPTqY3+Zg53nYuGmu1ggY"
         remote_self_signing_key = "QeIiFEjluPBtI7WQdG365QKZcFs9kqmHir6RBD0//nQ"
 
-        self.hs.get_federation_client().query_user_devices = mock.AsyncMock(  # type: ignore[assignment]
+        self.hs.get_federation_client().query_user_devices = mock.AsyncMock(  # type: ignore[method-assign]
             return_value={
                 "user_id": remote_user_id,
                 "stream_id": 1,
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index bd743b3578..21d63ab1f2 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -371,14 +371,14 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
         # We mock out the FederationClient.backfill method, to pretend that a remote
         # server has returned our fake event.
         federation_client_backfill_mock = AsyncMock(return_value=[event])
-        self.hs.get_federation_client().backfill = federation_client_backfill_mock  # type: ignore[assignment]
+        self.hs.get_federation_client().backfill = federation_client_backfill_mock  # type: ignore[method-assign]
 
         # We also mock the persist method with a side effect of itself. This allows us
         # to track when it has been called while preserving its function.
         persist_events_and_notify_mock = Mock(
             side_effect=self.hs.get_federation_event_handler().persist_events_and_notify
         )
-        self.hs.get_federation_event_handler().persist_events_and_notify = (  # type: ignore[assignment]
+        self.hs.get_federation_event_handler().persist_events_and_notify = (  # type: ignore[method-assign]
             persist_events_and_notify_mock
         )
 
diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py
index 9b2c7812cc..e797aaae00 100644
--- a/tests/handlers/test_oidc.py
+++ b/tests/handlers/test_oidc.py
@@ -157,7 +157,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
         sso_handler = hs.get_sso_handler()
         # Mock the render error method.
         self.render_error = Mock(return_value=None)
-        sso_handler.render_error = self.render_error  # type: ignore[assignment]
+        sso_handler.render_error = self.render_error  # type: ignore[method-assign]
 
         # Reduce the number of attempts when generating MXIDs.
         sso_handler._MAP_USERNAME_RETRIES = 3
@@ -165,7 +165,7 @@ class OidcHandlerTestCase(HomeserverTestCase):
         auth_handler = hs.get_auth_handler()
         # Mock the complete SSO login method.
         self.complete_sso_login = AsyncMock()
-        auth_handler.complete_sso_login = self.complete_sso_login  # type: ignore[assignment]
+        auth_handler.complete_sso_login = self.complete_sso_login  # type: ignore[method-assign]
 
         return hs
 
diff --git a/tests/handlers/test_password_providers.py b/tests/handlers/test_password_providers.py
index 4496370c3f..11ec8c7f11 100644
--- a/tests/handlers/test_password_providers.py
+++ b/tests/handlers/test_password_providers.py
@@ -830,7 +830,7 @@ class PasswordAuthProviderTests(unittest.HomeserverTestCase):
             username: The username to use for the test.
             registration: Whether to test with registration URLs.
         """
-        self.hs.get_identity_handler().send_threepid_validation = AsyncMock(  # type: ignore[assignment]
+        self.hs.get_identity_handler().send_threepid_validation = AsyncMock(  # type: ignore[method-assign]
             return_value=0
         )
 
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index a04234829f..e9fbf32c7c 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -202,7 +202,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
 
     @override_config({"limit_usage_by_mau": True})
     def test_get_or_create_user_mau_not_blocked(self) -> None:
-        self.store.count_monthly_users = AsyncMock(  # type: ignore[assignment]
+        self.store.count_monthly_users = AsyncMock(  # type: ignore[method-assign]
             return_value=self.hs.config.server.max_mau_value - 1
         )
         # Ensure does not throw exception
@@ -299,7 +299,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
     def test_auto_create_auto_join_rooms_when_user_is_the_first_real_user(self) -> None:
         room_alias_str = "#room:test"
 
-        self.store.count_real_users = AsyncMock(return_value=1)  # type: ignore[assignment]
+        self.store.count_real_users = AsyncMock(return_value=1)  # type: ignore[method-assign]
         self.store.is_real_user = AsyncMock(return_value=True)
         user_id = self.get_success(self.handler.register_user(localpart="real"))
         rooms = self.get_success(self.store.get_rooms_for_user(user_id))
@@ -314,7 +314,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
     def test_auto_create_auto_join_rooms_when_user_is_not_the_first_real_user(
         self,
     ) -> None:
-        self.store.count_real_users = AsyncMock(return_value=2)  # type: ignore[assignment]
+        self.store.count_real_users = AsyncMock(return_value=2)  # type: ignore[method-assign]
         self.store.is_real_user = AsyncMock(return_value=True)
         user_id = self.get_success(self.handler.register_user(localpart="real"))
         rooms = self.get_success(self.store.get_rooms_for_user(user_id))
diff --git a/tests/handlers/test_saml.py b/tests/handlers/test_saml.py
index 6e666d7bed..00f4e181e8 100644
--- a/tests/handlers/test_saml.py
+++ b/tests/handlers/test_saml.py
@@ -133,7 +133,7 @@ class SamlHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
 
         # send a mocked-up SAML response to the callback
         saml_response = FakeAuthnResponse({"uid": "test_user", "username": "test_user"})
@@ -163,7 +163,7 @@ class SamlHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
 
         # Map a user via SSO.
         saml_response = FakeAuthnResponse(
@@ -205,11 +205,11 @@ class SamlHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
 
         # mock out the error renderer too
         sso_handler = self.hs.get_sso_handler()
-        sso_handler.render_error = Mock(return_value=None)  # type: ignore[assignment]
+        sso_handler.render_error = Mock(return_value=None)  # type: ignore[method-assign]
 
         saml_response = FakeAuthnResponse({"uid": "test", "username": "föö"})
         request = _mock_request()
@@ -226,9 +226,9 @@ class SamlHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler and error renderer
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
         sso_handler = self.hs.get_sso_handler()
-        sso_handler.render_error = Mock(return_value=None)  # type: ignore[assignment]
+        sso_handler.render_error = Mock(return_value=None)  # type: ignore[method-assign]
 
         # register a user to occupy the first-choice MXID
         store = self.hs.get_datastores().main
@@ -311,7 +311,7 @@ class SamlHandlerTestCase(HomeserverTestCase):
 
         # stub out the auth handler
         auth_handler = self.hs.get_auth_handler()
-        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[assignment]
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
 
         # The response doesn't have the proper userGroup or department.
         saml_response = FakeAuthnResponse({"uid": "test_user", "username": "test_user"})
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index d776526bc1..2a295da3a0 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -122,15 +122,15 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
 
         self.datastore.get_destination_retry_timings = AsyncMock(return_value=None)
 
-        self.datastore.get_device_updates_by_remote = AsyncMock(  # type: ignore[assignment]
+        self.datastore.get_device_updates_by_remote = AsyncMock(  # type: ignore[method-assign]
             return_value=(0, [])
         )
 
-        self.datastore.get_destination_last_successful_stream_ordering = AsyncMock(  # type: ignore[assignment]
+        self.datastore.get_destination_last_successful_stream_ordering = AsyncMock(  # type: ignore[method-assign]
             return_value=None
         )
 
-        self.datastore.get_received_txn_response = AsyncMock(  # type: ignore[assignment]
+        self.datastore.get_received_txn_response = AsyncMock(  # type: ignore[method-assign]
             return_value=None
         )
 
@@ -143,25 +143,25 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
                 raise AuthError(401, "User is not in the room")
             return None
 
-        hs.get_auth().check_user_in_room = Mock(  # type: ignore[assignment]
+        hs.get_auth().check_user_in_room = Mock(  # type: ignore[method-assign]
             side_effect=check_user_in_room
         )
 
         async def check_host_in_room(room_id: str, server_name: str) -> bool:
             return room_id == ROOM_ID
 
-        hs.get_event_auth_handler().is_host_in_room = Mock(  # type: ignore[assignment]
+        hs.get_event_auth_handler().is_host_in_room = Mock(  # type: ignore[method-assign]
             side_effect=check_host_in_room
         )
 
         async def get_current_hosts_in_room(room_id: str) -> Set[str]:
             return {member.domain for member in self.room_members}
 
-        hs.get_storage_controllers().state.get_current_hosts_in_room = Mock(  # type: ignore[assignment]
+        hs.get_storage_controllers().state.get_current_hosts_in_room = Mock(  # type: ignore[method-assign]
             side_effect=get_current_hosts_in_room
         )
 
-        hs.get_storage_controllers().state.get_current_hosts_in_room_or_partial_state_approximation = Mock(  # type: ignore[assignment]
+        hs.get_storage_controllers().state.get_current_hosts_in_room_or_partial_state_approximation = Mock(  # type: ignore[method-assign]
             side_effect=get_current_hosts_in_room
         )
 
@@ -170,24 +170,24 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
 
         self.datastore.get_users_in_room = Mock(side_effect=get_users_in_room)
 
-        self.datastore.get_user_directory_stream_pos = AsyncMock(  # type: ignore[assignment]
+        self.datastore.get_user_directory_stream_pos = AsyncMock(  # type: ignore[method-assign]
             # we deliberately return a non-None stream pos to avoid
             # doing an initial_sync
             return_value=1
         )
 
-        self.datastore.get_partial_current_state_deltas = Mock(return_value=(0, None))  # type: ignore[assignment]
+        self.datastore.get_partial_current_state_deltas = Mock(return_value=(0, None))  # type: ignore[method-assign]
 
-        self.datastore.get_to_device_stream_token = Mock(  # type: ignore[assignment]
+        self.datastore.get_to_device_stream_token = Mock(  # type: ignore[method-assign]
             return_value=0
         )
-        self.datastore.get_new_device_msgs_for_remote = AsyncMock(  # type: ignore[assignment]
+        self.datastore.get_new_device_msgs_for_remote = AsyncMock(  # type: ignore[method-assign]
             return_value=([], 0)
         )
-        self.datastore.delete_device_msgs_for_remote = AsyncMock(  # type: ignore[assignment]
+        self.datastore.delete_device_msgs_for_remote = AsyncMock(  # type: ignore[method-assign]
             return_value=None
         )
-        self.datastore.set_received_txn_response = AsyncMock(  # type: ignore[assignment]
+        self.datastore.set_received_txn_response = AsyncMock(  # type: ignore[method-assign]
             return_value=None
         )
 
diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py
index fa27f1279a..c379853e20 100644
--- a/tests/logging/test_terse_json.py
+++ b/tests/logging/test_terse_json.py
@@ -164,7 +164,7 @@ class TerseJsonTestCase(LoggerCleanupMixin, TestCase):
         # Call requestReceived to finish instantiating the object.
         request.content = BytesIO()
         # Partially skip some internal processing of SynapseRequest.
-        request._started_processing = Mock()  # type: ignore[assignment]
+        request._started_processing = Mock()  # type: ignore[method-assign]
         request.request_metrics = Mock(spec=["name"])
         with patch.object(Request, "render"):
             request.requestReceived(b"POST", b"/_matrix/client/versions", b"1.1")
diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py
index 9ce9326190..172fc3a736 100644
--- a/tests/module_api/test_api.py
+++ b/tests/module_api/test_api.py
@@ -233,7 +233,7 @@ class ModuleApiTestCase(BaseModuleApiTestCase):
     def test_sending_events_into_room(self) -> None:
         """Tests that a module can send events into a room"""
         # Mock out create_and_send_nonmember_event to check whether events are being sent
-        self.event_creation_handler.create_and_send_nonmember_event = Mock(  # type: ignore[assignment]
+        self.event_creation_handler.create_and_send_nonmember_event = Mock(  # type: ignore[method-assign]
             spec=[],
             side_effect=self.event_creation_handler.create_and_send_nonmember_event,
         )
@@ -579,7 +579,7 @@ class ModuleApiTestCase(BaseModuleApiTestCase):
         # Necessary to fake a remote join.
         fake_stream_id = 1
         mocked_remote_join = AsyncMock(return_value=("fake-event-id", fake_stream_id))
-        self.hs.get_room_member_handler()._remote_join = mocked_remote_join  # type: ignore[assignment]
+        self.hs.get_room_member_handler()._remote_join = mocked_remote_join  # type: ignore[method-assign]
         fake_remote_host = f"{self.module_api.server_name}-remote"
 
         # Given that the join is to be faked, we expect the relevant join event not to
diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py
index a3880ac171..7c23b77e0a 100644
--- a/tests/push/test_bulk_push_rule_evaluator.py
+++ b/tests/push/test_bulk_push_rule_evaluator.py
@@ -190,7 +190,7 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase):
         # Mock the method which calculates push rules -- we do this instead of
         # e.g. checking the results in the database because we want to ensure
         # that code isn't even running.
-        bulk_evaluator._action_for_event_by_user = AsyncMock()  # type: ignore[assignment]
+        bulk_evaluator._action_for_event_by_user = AsyncMock()  # type: ignore[method-assign]
 
         # Ensure no actions are generated!
         self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)]))
diff --git a/tests/replication/storage/test_events.py b/tests/replication/storage/test_events.py
index f7c6417a09..af25815fa5 100644
--- a/tests/replication/storage/test_events.py
+++ b/tests/replication/storage/test_events.py
@@ -58,7 +58,7 @@ def patch__eq__(cls: object) -> Callable[[], None]:
 
     def unpatch() -> None:
         if eq is not None:
-            cls.__eq__ = eq  # type: ignore[assignment]
+            cls.__eq__ = eq  # type: ignore[method-assign]
 
     return unpatch
 
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 339a41c7e1..2f6bd0d74f 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -71,8 +71,8 @@ class UserRegisterTestCase(unittest.HomeserverTestCase):
 
         self.hs.config.registration.registration_shared_secret = "shared"
 
-        self.hs.get_media_repository = Mock()  # type: ignore[assignment]
-        self.hs.get_deactivate_account_handler = Mock()  # type: ignore[assignment]
+        self.hs.get_media_repository = Mock()  # type: ignore[method-assign]
+        self.hs.get_deactivate_account_handler = Mock()  # type: ignore[method-assign]
 
         return self.hs
 
diff --git a/tests/rest/admin/test_username_available.py b/tests/rest/admin/test_username_available.py
index 6c04e6c56c..4c69d224b8 100644
--- a/tests/rest/admin/test_username_available.py
+++ b/tests/rest/admin/test_username_available.py
@@ -50,7 +50,7 @@ class UsernameAvailableTestCase(unittest.HomeserverTestCase):
             )
 
         handler = self.hs.get_registration_handler()
-        handler.check_username = check_username  # type: ignore[assignment]
+        handler.check_username = check_username  # type: ignore[method-assign]
 
     def test_username_available(self) -> None:
         """
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index ac19f3c6da..e9f495e206 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -1346,7 +1346,7 @@ class AccountStatusTestCase(unittest.HomeserverTestCase):
                 return {}
 
         # Register a mock that will return the expected result depending on the remote.
-        self.hs.get_federation_http_client().post_json = Mock(side_effect=post_json)  # type: ignore[assignment]
+        self.hs.get_federation_http_client().post_json = Mock(side_effect=post_json)  # type: ignore[method-assign]
 
         # Check that we've got the correct response from the client-side endpoint.
         self._test_status(
diff --git a/tests/rest/client/test_events.py b/tests/rest/client/test_events.py
index 54df2a252c..141e0f57a3 100644
--- a/tests/rest/client/test_events.py
+++ b/tests/rest/client/test_events.py
@@ -45,7 +45,7 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase):
 
         hs = self.setup_test_homeserver(config=config)
 
-        hs.get_federation_handler = Mock()  # type: ignore[assignment]
+        hs.get_federation_handler = Mock()  # type: ignore[method-assign]
 
         return hs
 
diff --git a/tests/rest/client/test_filter.py b/tests/rest/client/test_filter.py
index a2d5d340be..90a8df147c 100644
--- a/tests/rest/client/test_filter.py
+++ b/tests/rest/client/test_filter.py
@@ -65,14 +65,14 @@ class FilterTestCase(unittest.HomeserverTestCase):
 
     def test_add_filter_non_local_user(self) -> None:
         _is_mine = self.hs.is_mine
-        self.hs.is_mine = lambda target_user: False  # type: ignore[assignment]
+        self.hs.is_mine = lambda target_user: False  # type: ignore[method-assign]
         channel = self.make_request(
             "POST",
             "/_matrix/client/r0/user/%s/filter" % (self.user_id),
             self.EXAMPLE_FILTER_JSON,
         )
 
-        self.hs.is_mine = _is_mine  # type: ignore[assignment]
+        self.hs.is_mine = _is_mine  # type: ignore[method-assign]
         self.assertEqual(channel.code, 403)
         self.assertEqual(channel.json_body["errcode"], Codes.FORBIDDEN)
 
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 53182459e4..47c1d38ad7 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -68,7 +68,7 @@ class RoomBase(unittest.HomeserverTestCase):
             "red",
         )
 
-        self.hs.get_federation_handler = Mock()  # type: ignore[assignment]
+        self.hs.get_federation_handler = Mock()  # type: ignore[method-assign]
         self.hs.get_federation_handler.return_value.maybe_backfill = AsyncMock(
             return_value=None
         )
@@ -76,7 +76,7 @@ class RoomBase(unittest.HomeserverTestCase):
         async def _insert_client_ip(*args: Any, **kwargs: Any) -> None:
             return None
 
-        self.hs.get_datastores().main.insert_client_ip = _insert_client_ip  # type: ignore[assignment]
+        self.hs.get_datastores().main.insert_client_ip = _insert_client_ip  # type: ignore[method-assign]
 
         return self.hs
 
@@ -3413,8 +3413,8 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
         # a remote IS. We keep the mock for make_and_store_3pid_invite around so we
         # can check its call_count later on during the test.
         make_invite_mock = AsyncMock(return_value=(Mock(event_id="abc"), 0))
-        self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock  # type: ignore[assignment]
-        self.hs.get_identity_handler().lookup_3pid = AsyncMock(  # type: ignore[assignment]
+        self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock  # type: ignore[method-assign]
+        self.hs.get_identity_handler().lookup_3pid = AsyncMock(  # type: ignore[method-assign]
             return_value=None,
         )
 
@@ -3477,8 +3477,8 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase):
         # a remote IS. We keep the mock for make_and_store_3pid_invite around so we
         # can check its call_count later on during the test.
         make_invite_mock = AsyncMock(return_value=(Mock(event_id="abc"), 0))
-        self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock  # type: ignore[assignment]
-        self.hs.get_identity_handler().lookup_3pid = AsyncMock(  # type: ignore[assignment]
+        self.hs.get_room_member_handler()._make_and_store_3pid_invite = make_invite_mock  # type: ignore[method-assign]
+        self.hs.get_identity_handler().lookup_3pid = AsyncMock(  # type: ignore[method-assign]
             return_value=None,
         )
 
diff --git a/tests/rest/client/test_shadow_banned.py b/tests/rest/client/test_shadow_banned.py
index 8d2cdf8751..9aecf88e41 100644
--- a/tests/rest/client/test_shadow_banned.py
+++ b/tests/rest/client/test_shadow_banned.py
@@ -84,7 +84,7 @@ class RoomTestCase(_ShadowBannedBase):
     def test_invite_3pid(self) -> None:
         """Ensure that a 3PID invite does not attempt to contact the identity server."""
         identity_handler = self.hs.get_identity_handler()
-        identity_handler.lookup_3pid = Mock(  # type: ignore[assignment]
+        identity_handler.lookup_3pid = Mock(  # type: ignore[method-assign]
             side_effect=AssertionError("This should not get called")
         )
 
diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py
index da37fcb045..57eb713b15 100644
--- a/tests/rest/client/test_third_party_rules.py
+++ b/tests/rest/client/test_third_party_rules.py
@@ -117,7 +117,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase):
         async def _check_event_auth(origin: Any, event: Any, context: Any) -> None:
             pass
 
-        hs.get_federation_event_handler()._check_event_auth = _check_event_auth  # type: ignore[assignment]
+        hs.get_federation_event_handler()._check_event_auth = _check_event_auth  # type: ignore[method-assign]
 
         return hs
 
diff --git a/tests/server.py b/tests/server.py
index 659ccce838..08633fe640 100644
--- a/tests/server.py
+++ b/tests/server.py
@@ -722,7 +722,7 @@ def _make_test_homeserver_synchronous(server: HomeServer) -> None:
                 **kwargs,
             )
 
-        pool.runWithConnection = runWithConnection  # type: ignore[assignment]
+        pool.runWithConnection = runWithConnection  # type: ignore[method-assign]
         pool.runInteraction = runInteraction  # type: ignore[assignment]
         # Replace the thread pool with a threadless 'thread' pool
         pool.threadpool = ThreadPool(clock._reactor)
diff --git a/tests/server_notices/test_resource_limits_server_notices.py b/tests/server_notices/test_resource_limits_server_notices.py
index 47c53a5475..17f428bfc5 100644
--- a/tests/server_notices/test_resource_limits_server_notices.py
+++ b/tests/server_notices/test_resource_limits_server_notices.py
@@ -69,7 +69,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         self._rlsn = rlsn
 
         self._rlsn._store.user_last_seen_monthly_active = AsyncMock(return_value=1000)
-        self._rlsn._server_notices_manager.send_notice = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._server_notices_manager.send_notice = AsyncMock(  # type: ignore[method-assign]
             return_value=Mock()
         )
         self._send_notice = self._rlsn._server_notices_manager.send_notice
@@ -82,8 +82,8 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         self._rlsn._server_notices_manager.maybe_get_notice_room_for_user = AsyncMock(
             return_value="!something:localhost"
         )
-        self._rlsn._store.add_tag_to_room = AsyncMock(return_value=None)  # type: ignore[assignment]
-        self._rlsn._store.get_tags_for_room = AsyncMock(return_value={})  # type: ignore[assignment]
+        self._rlsn._store.add_tag_to_room = AsyncMock(return_value=None)  # type: ignore[method-assign]
+        self._rlsn._store.get_tags_for_room = AsyncMock(return_value={})  # type: ignore[method-assign]
 
     @override_config({"hs_disabled": True})
     def test_maybe_send_server_notice_disabled_hs(self) -> None:
@@ -100,13 +100,13 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
     def test_maybe_send_server_notice_to_user_remove_blocked_notice(self) -> None:
         """Test when user has blocked notice, but should have it removed"""
 
-        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[method-assign]
             return_value=None
         )
         mock_event = Mock(
             type=EventTypes.Message, content={"msgtype": ServerNoticeMsgType}
         )
-        self._rlsn._store.get_events = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._store.get_events = AsyncMock(  # type: ignore[method-assign]
             return_value={"123": mock_event}
         )
         self.get_success(self._rlsn.maybe_send_server_notice_to_user(self.user_id))
@@ -122,7 +122,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         """
         Test when user has blocked notice, but notice ought to be there (NOOP)
         """
-        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[method-assign]
             return_value=None,
             side_effect=ResourceLimitError(403, "foo"),
         )
@@ -130,7 +130,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         mock_event = Mock(
             type=EventTypes.Message, content={"msgtype": ServerNoticeMsgType}
         )
-        self._rlsn._store.get_events = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._store.get_events = AsyncMock(  # type: ignore[method-assign]
             return_value={"123": mock_event}
         )
 
@@ -142,7 +142,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         """
         Test when user does not have blocked notice, but should have one
         """
-        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[method-assign]
             return_value=None,
             side_effect=ResourceLimitError(403, "foo"),
         )
@@ -155,7 +155,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         """
         Test when user does not have blocked notice, nor should they (NOOP)
         """
-        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[method-assign]
             return_value=None
         )
 
@@ -168,7 +168,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         Test when user is not part of the MAU cohort - this should not ever
         happen - but ...
         """
-        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[method-assign]
             return_value=None
         )
         self._rlsn._store.user_last_seen_monthly_active = AsyncMock(return_value=None)
@@ -184,7 +184,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         Test that when server is over MAU limit and alerting is suppressed, then
         an alert message is not sent into the room
         """
-        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[method-assign]
             return_value=None,
             side_effect=ResourceLimitError(
                 403, "foo", limit_type=LimitBlockingTypes.MONTHLY_ACTIVE_USER
@@ -199,7 +199,7 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         """
         Test that when a server is disabled, that MAU limit alerting is ignored.
         """
-        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[method-assign]
             return_value=None,
             side_effect=ResourceLimitError(
                 403, "foo", limit_type=LimitBlockingTypes.HS_DISABLED
@@ -218,21 +218,21 @@ class TestResourceLimitsServerNotices(unittest.HomeserverTestCase):
         When the room is already in a blocked state, test that when alerting
         is suppressed that the room is returned to an unblocked state.
         """
-        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._auth_blocking.check_auth_blocking = AsyncMock(  # type: ignore[method-assign]
             return_value=None,
             side_effect=ResourceLimitError(
                 403, "foo", limit_type=LimitBlockingTypes.MONTHLY_ACTIVE_USER
             ),
         )
 
-        self._rlsn._is_room_currently_blocked = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._is_room_currently_blocked = AsyncMock(  # type: ignore[method-assign]
             return_value=(True, [])
         )
 
         mock_event = Mock(
             type=EventTypes.Message, content={"msgtype": ServerNoticeMsgType}
         )
-        self._rlsn._store.get_events = AsyncMock(  # type: ignore[assignment]
+        self._rlsn._store.get_events = AsyncMock(  # type: ignore[method-assign]
             return_value={"123": mock_event}
         )
         self.get_success(self._rlsn.maybe_send_server_notice_to_user(self.user_id))
diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py
index 48f39df9fe..cbce26a725 100644
--- a/tests/storage/test_appservice.py
+++ b/tests/storage/test_appservice.py
@@ -338,7 +338,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.HomeserverTestCase):
 
         # we aren't testing store._base stuff here, so mock this out
         # (ignore needed because Mypy won't allow us to assign to a method otherwise)
-        self.store.get_events_as_list = AsyncMock(return_value=events)  # type: ignore[assignment]
+        self.store.get_events_as_list = AsyncMock(return_value=events)  # type: ignore[method-assign]
 
         self.get_success(self._insert_txn(self.as_list[1]["id"], 9, other_events))
         self.get_success(self._insert_txn(service.id, 10, events))
diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py
index 0bf706ba08..49366440ce 100644
--- a/tests/storage/test_monthly_active_users.py
+++ b/tests/storage/test_monthly_active_users.py
@@ -252,7 +252,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         )
         self.get_success(d)
 
-        self.store.upsert_monthly_active_user = AsyncMock(return_value=None)  # type: ignore[assignment]
+        self.store.upsert_monthly_active_user = AsyncMock(return_value=None)  # type: ignore[method-assign]
 
         d = self.store.populate_monthly_active_users(user_id)
         self.get_success(d)
@@ -260,9 +260,9 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.store.upsert_monthly_active_user.assert_not_called()
 
     def test_populate_monthly_users_should_update(self) -> None:
-        self.store.upsert_monthly_active_user = AsyncMock(return_value=None)  # type: ignore[assignment]
+        self.store.upsert_monthly_active_user = AsyncMock(return_value=None)  # type: ignore[method-assign]
 
-        self.store.is_trial_user = AsyncMock(return_value=False)  # type: ignore[assignment]
+        self.store.is_trial_user = AsyncMock(return_value=False)  # type: ignore[method-assign]
 
         self.store.user_last_seen_monthly_active = AsyncMock(return_value=None)
         d = self.store.populate_monthly_active_users("user_id")
@@ -271,9 +271,9 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
         self.store.upsert_monthly_active_user.assert_called_once()
 
     def test_populate_monthly_users_should_not_update(self) -> None:
-        self.store.upsert_monthly_active_user = AsyncMock(return_value=None)  # type: ignore[assignment]
+        self.store.upsert_monthly_active_user = AsyncMock(return_value=None)  # type: ignore[method-assign]
 
-        self.store.is_trial_user = AsyncMock(return_value=False)  # type: ignore[assignment]
+        self.store.is_trial_user = AsyncMock(return_value=False)  # type: ignore[method-assign]
         self.store.user_last_seen_monthly_active = AsyncMock(
             return_value=self.hs.get_clock().time_msec()
         )
@@ -356,7 +356,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase):
 
     @override_config({"limit_usage_by_mau": False, "mau_stats_only": False})
     def test_no_users_when_not_tracking(self) -> None:
-        self.store.upsert_monthly_active_user = AsyncMock(return_value=None)  # type: ignore[assignment]
+        self.store.upsert_monthly_active_user = AsyncMock(return_value=None)  # type: ignore[method-assign]
 
         self.get_success(self.store.populate_monthly_active_users("@user:sever"))
 
diff --git a/tests/test_federation.py b/tests/test_federation.py
index 779f70467b..f8ade6da38 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -80,7 +80,7 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
         ) -> None:
             pass
 
-        federation_event_handler._check_event_auth = _check_event_auth  # type: ignore[assignment]
+        federation_event_handler._check_event_auth = _check_event_auth  # type: ignore[method-assign]
         self.client = self.hs.get_federation_client()
 
         async def _check_sigs_and_hash_for_pulled_events_and_fetch(
@@ -190,7 +190,7 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
 
         # Register the mock on the federation client.
         federation_client = self.hs.get_federation_client()
-        federation_client.query_user_devices = Mock(side_effect=query_user_devices)  # type: ignore[assignment]
+        federation_client.query_user_devices = Mock(side_effect=query_user_devices)  # type: ignore[method-assign]
 
         # Register a mock on the store so that the incoming update doesn't fail because
         # we don't share a room with the user.
@@ -240,7 +240,7 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
 
         # Register mock device list retrieval on the federation client.
         federation_client = self.hs.get_federation_client()
-        federation_client.query_user_devices = AsyncMock(  # type: ignore[assignment]
+        federation_client.query_user_devices = AsyncMock(  # type: ignore[method-assign]
             return_value={
                 "user_id": remote_user_id,
                 "stream_id": 1,
diff --git a/tests/test_state.py b/tests/test_state.py
index eded38c766..9c8679cc1d 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -714,7 +714,7 @@ class StateTestCase(unittest.TestCase):
         store = _DummyStore()
         store.register_events(old_state_1)
         store.register_events(old_state_2)
-        self.dummy_store.get_events = store.get_events  # type: ignore[assignment]
+        self.dummy_store.get_events = store.get_events  # type: ignore[method-assign]
 
         context: EventContext
         context = yield self._get_context(
@@ -773,7 +773,7 @@ class StateTestCase(unittest.TestCase):
         store = _DummyStore()
         store.register_events(old_state_1)
         store.register_events(old_state_2)
-        self.dummy_store.get_events = store.get_events  # type: ignore[assignment]
+        self.dummy_store.get_events = store.get_events  # type: ignore[method-assign]
 
         context: EventContext
         context = yield self._get_context(
diff --git a/tests/unittest.py b/tests/unittest.py
index 40672a4415..5d3640d8ac 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -395,9 +395,9 @@ class HomeserverTestCase(TestCase):
                     )
 
                 # Type ignore: mypy doesn't like us assigning to methods.
-                self.hs.get_auth().get_user_by_req = get_requester  # type: ignore[assignment]
-                self.hs.get_auth().get_user_by_access_token = get_requester  # type: ignore[assignment]
-                self.hs.get_auth().get_access_token_from_request = Mock(return_value=token)  # type: ignore[assignment]
+                self.hs.get_auth().get_user_by_req = get_requester  # type: ignore[method-assign]
+                self.hs.get_auth().get_user_by_access_token = get_requester  # type: ignore[method-assign]
+                self.hs.get_auth().get_access_token_from_request = Mock(return_value=token)  # type: ignore[method-assign]
 
         if self.needs_threadpool:
             self.reactor.threadpool = ThreadPool()  # type: ignore[assignment]
-- 
cgit 1.5.1


From e9235d92f2a3cde489a4d24303e7868a93f3fb4d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 29 Aug 2023 11:44:07 -0400
Subject: Track currently syncing users by device for presence (#16172)

Refactoring to use both the user ID & the device ID when tracking
the currently syncing users in the presence handler.

This is done both locally and over replication. Note that the device
ID is discarded but will be used in a future change.
---
 changelog.d/16172.misc              |   1 +
 synapse/handlers/presence.py        | 155 +++++++++++++++++++++++-------------
 synapse/replication/tcp/commands.py |  17 +++-
 synapse/replication/tcp/handler.py  |  19 +++--
 4 files changed, 129 insertions(+), 63 deletions(-)
 create mode 100644 changelog.d/16172.misc

(limited to 'synapse')

diff --git a/changelog.d/16172.misc b/changelog.d/16172.misc
new file mode 100644
index 0000000000..4d709cb56e
--- /dev/null
+++ b/changelog.d/16172.misc
@@ -0,0 +1 @@
+Track per-device information in the presence code.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 50c68c86ce..2f841863ae 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -23,6 +23,7 @@ The methods that define policy are:
 """
 import abc
 import contextlib
+import itertools
 import logging
 from bisect import bisect
 from contextlib import contextmanager
@@ -188,15 +189,17 @@ class BasePresenceHandler(abc.ABC):
         """
 
     @abc.abstractmethod
-    def get_currently_syncing_users_for_replication(self) -> Iterable[str]:
-        """Get an iterable of syncing users on this worker, to send to the presence handler
+    def get_currently_syncing_users_for_replication(
+        self,
+    ) -> Iterable[Tuple[str, Optional[str]]]:
+        """Get an iterable of syncing users and devices on this worker, to send to the presence handler
 
         This is called when a replication connection is established. It should return
-        a list of user ids, which are then sent as USER_SYNC commands to inform the
-        process handling presence about those users.
+        a list of tuples of user ID & device ID, which are then sent as USER_SYNC commands
+        to inform the process handling presence about those users/devices.
 
         Returns:
-            An iterable of user_id strings.
+            An iterable of tuples of user ID and device ID.
         """
 
     async def get_state(self, target_user: UserID) -> UserPresenceState:
@@ -284,7 +287,12 @@ class BasePresenceHandler(abc.ABC):
         """
 
     async def update_external_syncs_row(  # noqa: B027 (no-op by design)
-        self, process_id: str, user_id: str, is_syncing: bool, sync_time_msec: int
+        self,
+        process_id: str,
+        user_id: str,
+        device_id: Optional[str],
+        is_syncing: bool,
+        sync_time_msec: int,
     ) -> None:
         """Update the syncing users for an external process as a delta.
 
@@ -295,6 +303,7 @@ class BasePresenceHandler(abc.ABC):
                 syncing against. This allows synapse to process updates
                 as user start and stop syncing against a given process.
             user_id: The user who has started or stopped syncing
+            device_id: The user's device that has started or stopped syncing
             is_syncing: Whether or not the user is now syncing
             sync_time_msec: Time in ms when the user was last syncing
         """
@@ -425,16 +434,18 @@ class WorkerPresenceHandler(BasePresenceHandler):
             hs.config.worker.writers.presence,
         )
 
-        # The number of ongoing syncs on this process, by user id.
+        # The number of ongoing syncs on this process, by (user ID, device ID).
         # Empty if _presence_enabled is false.
-        self._user_to_num_current_syncs: Dict[str, int] = {}
+        self._user_device_to_num_current_syncs: Dict[
+            Tuple[str, Optional[str]], int
+        ] = {}
 
         self.notifier = hs.get_notifier()
         self.instance_id = hs.get_instance_id()
 
-        # user_id -> last_sync_ms. Lists the users that have stopped syncing but
-        # we haven't notified the presence writer of that yet
-        self.users_going_offline: Dict[str, int] = {}
+        # (user_id, device_id) -> last_sync_ms. Lists the devices that have stopped
+        # syncing but we haven't notified the presence writer of that yet
+        self._user_devices_going_offline: Dict[Tuple[str, Optional[str]], int] = {}
 
         self._bump_active_client = ReplicationBumpPresenceActiveTime.make_client(hs)
         self._set_state_client = ReplicationPresenceSetState.make_client(hs)
@@ -457,39 +468,47 @@ class WorkerPresenceHandler(BasePresenceHandler):
                 ClearUserSyncsCommand(self.instance_id)
             )
 
-    def send_user_sync(self, user_id: str, is_syncing: bool, last_sync_ms: int) -> None:
+    def send_user_sync(
+        self,
+        user_id: str,
+        device_id: Optional[str],
+        is_syncing: bool,
+        last_sync_ms: int,
+    ) -> None:
         if self._presence_enabled:
             self.hs.get_replication_command_handler().send_user_sync(
-                self.instance_id, user_id, is_syncing, last_sync_ms
+                self.instance_id, user_id, device_id, is_syncing, last_sync_ms
             )
 
-    def mark_as_coming_online(self, user_id: str) -> None:
+    def mark_as_coming_online(self, user_id: str, device_id: Optional[str]) -> None:
         """A user has started syncing. Send a UserSync to the presence writer,
         unless they had recently stopped syncing.
         """
-        going_offline = self.users_going_offline.pop(user_id, None)
+        going_offline = self._user_devices_going_offline.pop((user_id, device_id), None)
         if not going_offline:
             # Safe to skip because we haven't yet told the presence writer they
             # were offline
-            self.send_user_sync(user_id, True, self.clock.time_msec())
+            self.send_user_sync(user_id, device_id, True, self.clock.time_msec())
 
-    def mark_as_going_offline(self, user_id: str) -> None:
+    def mark_as_going_offline(self, user_id: str, device_id: Optional[str]) -> None:
         """A user has stopped syncing. We wait before notifying the presence
         writer as its likely they'll come back soon. This allows us to avoid
         sending a stopped syncing immediately followed by a started syncing
         notification to the presence writer
         """
-        self.users_going_offline[user_id] = self.clock.time_msec()
+        self._user_devices_going_offline[(user_id, device_id)] = self.clock.time_msec()
 
     def send_stop_syncing(self) -> None:
         """Check if there are any users who have stopped syncing a while ago and
         haven't come back yet. If there are poke the presence writer about them.
         """
         now = self.clock.time_msec()
-        for user_id, last_sync_ms in list(self.users_going_offline.items()):
+        for (user_id, device_id), last_sync_ms in list(
+            self._user_devices_going_offline.items()
+        ):
             if now - last_sync_ms > UPDATE_SYNCING_USERS_MS:
-                self.users_going_offline.pop(user_id, None)
-                self.send_user_sync(user_id, False, last_sync_ms)
+                self._user_devices_going_offline.pop((user_id, device_id), None)
+                self.send_user_sync(user_id, device_id, False, last_sync_ms)
 
     async def user_syncing(
         self,
@@ -515,23 +534,23 @@ class WorkerPresenceHandler(BasePresenceHandler):
             is_sync=True,
         )
 
-        curr_sync = self._user_to_num_current_syncs.get(user_id, 0)
-        self._user_to_num_current_syncs[user_id] = curr_sync + 1
+        curr_sync = self._user_device_to_num_current_syncs.get((user_id, device_id), 0)
+        self._user_device_to_num_current_syncs[(user_id, device_id)] = curr_sync + 1
 
         # If this is the first in-flight sync, notify replication
-        if self._user_to_num_current_syncs[user_id] == 1:
-            self.mark_as_coming_online(user_id)
+        if self._user_device_to_num_current_syncs[(user_id, device_id)] == 1:
+            self.mark_as_coming_online(user_id, device_id)
 
         def _end() -> None:
             # We check that the user_id is in user_to_num_current_syncs because
             # user_to_num_current_syncs may have been cleared if we are
             # shutting down.
-            if user_id in self._user_to_num_current_syncs:
-                self._user_to_num_current_syncs[user_id] -= 1
+            if (user_id, device_id) in self._user_device_to_num_current_syncs:
+                self._user_device_to_num_current_syncs[(user_id, device_id)] -= 1
 
                 # If there are no more in-flight syncs, notify replication
-                if self._user_to_num_current_syncs[user_id] == 0:
-                    self.mark_as_going_offline(user_id)
+                if self._user_device_to_num_current_syncs[(user_id, device_id)] == 0:
+                    self.mark_as_going_offline(user_id, device_id)
 
         @contextlib.contextmanager
         def _user_syncing() -> Generator[None, None, None]:
@@ -598,10 +617,12 @@ class WorkerPresenceHandler(BasePresenceHandler):
         # If this is a federation sender, notify about presence updates.
         await self.maybe_send_presence_to_interested_destinations(state_to_notify)
 
-    def get_currently_syncing_users_for_replication(self) -> Iterable[str]:
+    def get_currently_syncing_users_for_replication(
+        self,
+    ) -> Iterable[Tuple[str, Optional[str]]]:
         return [
-            user_id
-            for user_id, count in self._user_to_num_current_syncs.items()
+            user_id_device_id
+            for user_id_device_id, count in self._user_device_to_num_current_syncs.items()
             if count > 0
         ]
 
@@ -723,17 +744,23 @@ class PresenceHandler(BasePresenceHandler):
 
         # Keeps track of the number of *ongoing* syncs on this process. While
         # this is non zero a user will never go offline.
-        self.user_to_num_current_syncs: Dict[str, int] = {}
+        self._user_device_to_num_current_syncs: Dict[
+            Tuple[str, Optional[str]], int
+        ] = {}
 
         # Keeps track of the number of *ongoing* syncs on other processes.
+        #
         # While any sync is ongoing on another process the user will never
         # go offline.
+        #
         # Each process has a unique identifier and an update frequency. If
         # no update is received from that process within the update period then
         # we assume that all the sync requests on that process have stopped.
-        # Stored as a dict from process_id to set of user_id, and a dict of
-        # process_id to millisecond timestamp last updated.
-        self.external_process_to_current_syncs: Dict[str, Set[str]] = {}
+        # Stored as a dict from process_id to set of (user_id, device_id), and
+        # a dict of process_id to millisecond timestamp last updated.
+        self.external_process_to_current_syncs: Dict[
+            str, Set[Tuple[str, Optional[str]]]
+        ] = {}
         self.external_process_last_updated_ms: Dict[str, int] = {}
 
         self.external_sync_linearizer = Linearizer(name="external_sync_linearizer")
@@ -938,7 +965,10 @@ class PresenceHandler(BasePresenceHandler):
             # that were syncing on that process to see if they need to be timed
             # out.
             users_to_check.update(
-                self.external_process_to_current_syncs.pop(process_id, ())
+                user_id
+                for user_id, device_id in self.external_process_to_current_syncs.pop(
+                    process_id, ()
+                )
             )
             self.external_process_last_updated_ms.pop(process_id)
 
@@ -951,11 +981,15 @@ class PresenceHandler(BasePresenceHandler):
 
         syncing_user_ids = {
             user_id
-            for user_id, count in self.user_to_num_current_syncs.items()
+            for (user_id, _), count in self._user_device_to_num_current_syncs.items()
             if count
         }
-        for user_ids in self.external_process_to_current_syncs.values():
-            syncing_user_ids.update(user_ids)
+        syncing_user_ids.update(
+            user_id
+            for user_id, _ in itertools.chain(
+                *self.external_process_to_current_syncs.values()
+            )
+        )
 
         changes = handle_timeouts(
             states,
@@ -1013,8 +1047,8 @@ class PresenceHandler(BasePresenceHandler):
         if not affect_presence or not self._presence_enabled:
             return _NullContextManager()
 
-        curr_sync = self.user_to_num_current_syncs.get(user_id, 0)
-        self.user_to_num_current_syncs[user_id] = curr_sync + 1
+        curr_sync = self._user_device_to_num_current_syncs.get((user_id, device_id), 0)
+        self._user_device_to_num_current_syncs[(user_id, device_id)] = curr_sync + 1
 
         # Note that this causes last_active_ts to be incremented which is not
         # what the spec wants.
@@ -1027,7 +1061,7 @@ class PresenceHandler(BasePresenceHandler):
 
         async def _end() -> None:
             try:
-                self.user_to_num_current_syncs[user_id] -= 1
+                self._user_device_to_num_current_syncs[(user_id, device_id)] -= 1
 
                 prev_state = await self.current_state_for_user(user_id)
                 await self._update_states(
@@ -1049,12 +1083,19 @@ class PresenceHandler(BasePresenceHandler):
 
         return _user_syncing()
 
-    def get_currently_syncing_users_for_replication(self) -> Iterable[str]:
+    def get_currently_syncing_users_for_replication(
+        self,
+    ) -> Iterable[Tuple[str, Optional[str]]]:
         # since we are the process handling presence, there is nothing to do here.
         return []
 
     async def update_external_syncs_row(
-        self, process_id: str, user_id: str, is_syncing: bool, sync_time_msec: int
+        self,
+        process_id: str,
+        user_id: str,
+        device_id: Optional[str],
+        is_syncing: bool,
+        sync_time_msec: int,
     ) -> None:
         """Update the syncing users for an external process as a delta.
 
@@ -1063,6 +1104,7 @@ class PresenceHandler(BasePresenceHandler):
                 syncing against. This allows synapse to process updates
                 as user start and stop syncing against a given process.
             user_id: The user who has started or stopped syncing
+            device_id: The user's device that has started or stopped syncing
             is_syncing: Whether or not the user is now syncing
             sync_time_msec: Time in ms when the user was last syncing
         """
@@ -1073,26 +1115,27 @@ class PresenceHandler(BasePresenceHandler):
                 process_id, set()
             )
 
-            # USER_SYNC is sent when a user starts or stops syncing on a remote
-            # process. (But only for the initial and last device.)
+            # USER_SYNC is sent when a user's device starts or stops syncing on
+            # a remote # process. (But only for the initial and last sync for that
+            # device.)
             #
-            # When a user *starts* syncing it also calls set_state(...) which
+            # When a device *starts* syncing it also calls set_state(...) which
             # will update the state, last_active_ts, and last_user_sync_ts.
-            # Simply ensure the user is tracked as syncing in this case.
+            # Simply ensure the user & device is tracked as syncing in this case.
             #
-            # When a user *stops* syncing, update the last_user_sync_ts and mark
+            # When a device *stops* syncing, update the last_user_sync_ts and mark
             # them as no longer syncing. Note this doesn't quite match the
             # monolith behaviour, which updates last_user_sync_ts at the end of
             # every sync, not just the last in-flight sync.
-            if is_syncing and user_id not in process_presence:
-                process_presence.add(user_id)
-            elif not is_syncing and user_id in process_presence:
+            if is_syncing and (user_id, device_id) not in process_presence:
+                process_presence.add((user_id, device_id))
+            elif not is_syncing and (user_id, device_id) in process_presence:
                 new_state = prev_state.copy_and_replace(
                     last_user_sync_ts=sync_time_msec
                 )
                 await self._update_states([new_state])
 
-                process_presence.discard(user_id)
+                process_presence.discard((user_id, device_id))
 
             self.external_process_last_updated_ms[process_id] = self.clock.time_msec()
 
@@ -1106,7 +1149,9 @@ class PresenceHandler(BasePresenceHandler):
             process_presence = self.external_process_to_current_syncs.pop(
                 process_id, set()
             )
-            prev_states = await self.current_state_for_users(process_presence)
+            prev_states = await self.current_state_for_users(
+                {user_id for user_id, device_id in process_presence}
+            )
             time_now_ms = self.clock.time_msec()
 
             await self._update_states(
diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py
index 58a871c6d9..e616b5e1c8 100644
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@@ -267,27 +267,38 @@ class UserSyncCommand(Command):
     NAME = "USER_SYNC"
 
     def __init__(
-        self, instance_id: str, user_id: str, is_syncing: bool, last_sync_ms: int
+        self,
+        instance_id: str,
+        user_id: str,
+        device_id: Optional[str],
+        is_syncing: bool,
+        last_sync_ms: int,
     ):
         self.instance_id = instance_id
         self.user_id = user_id
+        self.device_id = device_id
         self.is_syncing = is_syncing
         self.last_sync_ms = last_sync_ms
 
     @classmethod
     def from_line(cls: Type["UserSyncCommand"], line: str) -> "UserSyncCommand":
-        instance_id, user_id, state, last_sync_ms = line.split(" ", 3)
+        device_id: Optional[str]
+        instance_id, user_id, device_id, state, last_sync_ms = line.split(" ", 4)
+
+        if device_id == "None":
+            device_id = None
 
         if state not in ("start", "end"):
             raise Exception("Invalid USER_SYNC state %r" % (state,))
 
-        return cls(instance_id, user_id, state == "start", int(last_sync_ms))
+        return cls(instance_id, user_id, device_id, state == "start", int(last_sync_ms))
 
     def to_line(self) -> str:
         return " ".join(
             (
                 self.instance_id,
                 self.user_id,
+                str(self.device_id),
                 "start" if self.is_syncing else "end",
                 str(self.last_sync_ms),
             )
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index 92c5a55acc..d9045d7b73 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -428,7 +428,11 @@ class ReplicationCommandHandler:
 
         if self._is_presence_writer:
             return self._presence_handler.update_external_syncs_row(
-                cmd.instance_id, cmd.user_id, cmd.is_syncing, cmd.last_sync_ms
+                cmd.instance_id,
+                cmd.user_id,
+                cmd.device_id,
+                cmd.is_syncing,
+                cmd.last_sync_ms,
             )
         else:
             return None
@@ -699,9 +703,9 @@ class ReplicationCommandHandler:
         )
 
         now = self._clock.time_msec()
-        for user_id in currently_syncing:
+        for user_id, device_id in currently_syncing:
             connection.send_command(
-                UserSyncCommand(self._instance_id, user_id, True, now)
+                UserSyncCommand(self._instance_id, user_id, device_id, True, now)
             )
 
     def lost_connection(self, connection: IReplicationConnection) -> None:
@@ -753,11 +757,16 @@ class ReplicationCommandHandler:
         self.send_command(FederationAckCommand(self._instance_name, token))
 
     def send_user_sync(
-        self, instance_id: str, user_id: str, is_syncing: bool, last_sync_ms: int
+        self,
+        instance_id: str,
+        user_id: str,
+        device_id: Optional[str],
+        is_syncing: bool,
+        last_sync_ms: int,
     ) -> None:
         """Poke the master that a user has started/stopped syncing."""
         self.send_command(
-            UserSyncCommand(instance_id, user_id, is_syncing, last_sync_ms)
+            UserSyncCommand(instance_id, user_id, device_id, is_syncing, last_sync_ms)
         )
 
     def send_user_ip(
-- 
cgit 1.5.1


From 62a1a9be52f4bc79b112f9841ddb3d03b8efccba Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Wed, 30 Aug 2023 00:39:39 +0100
Subject: Describe which rate limiter was hit in logs (#16135)

---
 changelog.d/16135.misc                     |   1 +
 synapse/api/errors.py                      |  14 ++-
 synapse/api/ratelimiting.py                |  20 +++--
 synapse/config/ratelimiting.py             | 132 +++++++++++++++++++----------
 synapse/handlers/auth.py                   |   8 +-
 synapse/handlers/devicemessage.py          |   3 +-
 synapse/handlers/identity.py               |   6 +-
 synapse/handlers/room_member.py            |  21 ++---
 synapse/handlers/room_summary.py           |   5 +-
 synapse/http/server.py                     |   8 +-
 synapse/rest/client/login.py               |   6 +-
 synapse/rest/client/login_token_request.py |  10 ++-
 synapse/rest/client/register.py            |   3 +-
 synapse/server.py                          |   3 +-
 synapse/util/ratelimitutils.py             |   3 +-
 tests/api/test_errors.py                   |  15 +++-
 tests/api/test_ratelimiting.py             |  67 +++++++++------
 tests/config/test_ratelimiting.py          |  31 +++++++
 18 files changed, 235 insertions(+), 121 deletions(-)
 create mode 100644 changelog.d/16135.misc

(limited to 'synapse')

diff --git a/changelog.d/16135.misc b/changelog.d/16135.misc
new file mode 100644
index 0000000000..cba8733d02
--- /dev/null
+++ b/changelog.d/16135.misc
@@ -0,0 +1 @@
+Describe which rate limiter was hit in logs.
diff --git a/synapse/api/errors.py b/synapse/api/errors.py
index 578e798773..fdb2955be8 100644
--- a/synapse/api/errors.py
+++ b/synapse/api/errors.py
@@ -211,6 +211,11 @@ class SynapseError(CodeMessageException):
     def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict":
         return cs_error(self.msg, self.errcode, **self._additional_fields)
 
+    @property
+    def debug_context(self) -> Optional[str]:
+        """Override this to add debugging context that shouldn't be sent to clients."""
+        return None
+
 
 class InvalidAPICallError(SynapseError):
     """You called an existing API endpoint, but fed that endpoint
@@ -508,8 +513,8 @@ class LimitExceededError(SynapseError):
 
     def __init__(
         self,
+        limiter_name: str,
         code: int = 429,
-        msg: str = "Too Many Requests",
         retry_after_ms: Optional[int] = None,
         errcode: str = Codes.LIMIT_EXCEEDED,
     ):
@@ -518,12 +523,17 @@ class LimitExceededError(SynapseError):
             if self.include_retry_after_header and retry_after_ms is not None
             else None
         )
-        super().__init__(code, msg, errcode, headers=headers)
+        super().__init__(code, "Too Many Requests", errcode, headers=headers)
         self.retry_after_ms = retry_after_ms
+        self.limiter_name = limiter_name
 
     def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict":
         return cs_error(self.msg, self.errcode, retry_after_ms=self.retry_after_ms)
 
+    @property
+    def debug_context(self) -> Optional[str]:
+        return self.limiter_name
+
 
 class RoomKeysVersionError(SynapseError):
     """A client has tried to upload to a non-current version of the room_keys store"""
diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py
index 511790c7c5..887b214d64 100644
--- a/synapse/api/ratelimiting.py
+++ b/synapse/api/ratelimiting.py
@@ -61,12 +61,16 @@ class Ratelimiter:
     """
 
     def __init__(
-        self, store: DataStore, clock: Clock, rate_hz: float, burst_count: int
+        self,
+        store: DataStore,
+        clock: Clock,
+        cfg: RatelimitSettings,
     ):
         self.clock = clock
-        self.rate_hz = rate_hz
-        self.burst_count = burst_count
+        self.rate_hz = cfg.per_second
+        self.burst_count = cfg.burst_count
         self.store = store
+        self._limiter_name = cfg.key
 
         # An ordered dictionary representing the token buckets tracked by this rate
         # limiter. Each entry maps a key of arbitrary type to a tuple representing:
@@ -305,7 +309,8 @@ class Ratelimiter:
 
         if not allowed:
             raise LimitExceededError(
-                retry_after_ms=int(1000 * (time_allowed - time_now_s))
+                limiter_name=self._limiter_name,
+                retry_after_ms=int(1000 * (time_allowed - time_now_s)),
             )
 
 
@@ -322,7 +327,9 @@ class RequestRatelimiter:
 
         # The rate_hz and burst_count are overridden on a per-user basis
         self.request_ratelimiter = Ratelimiter(
-            store=self.store, clock=self.clock, rate_hz=0, burst_count=0
+            store=self.store,
+            clock=self.clock,
+            cfg=RatelimitSettings(key=rc_message.key, per_second=0, burst_count=0),
         )
         self._rc_message = rc_message
 
@@ -332,8 +339,7 @@ class RequestRatelimiter:
             self.admin_redaction_ratelimiter: Optional[Ratelimiter] = Ratelimiter(
                 store=self.store,
                 clock=self.clock,
-                rate_hz=rc_admin_redaction.per_second,
-                burst_count=rc_admin_redaction.burst_count,
+                cfg=rc_admin_redaction,
             )
         else:
             self.admin_redaction_ratelimiter = None
diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py
index a5514e70a2..4efbaeac0d 100644
--- a/synapse/config/ratelimiting.py
+++ b/synapse/config/ratelimiting.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, cast
 
 import attr
 
@@ -21,16 +21,47 @@ from synapse.types import JsonDict
 from ._base import Config
 
 
+@attr.s(slots=True, frozen=True, auto_attribs=True)
 class RatelimitSettings:
-    def __init__(
-        self,
-        config: Dict[str, float],
+    key: str
+    per_second: float
+    burst_count: int
+
+    @classmethod
+    def parse(
+        cls,
+        config: Dict[str, Any],
+        key: str,
         defaults: Optional[Dict[str, float]] = None,
-    ):
+    ) -> "RatelimitSettings":
+        """Parse config[key] as a new-style rate limiter config.
+
+        The key may refer to a nested dictionary using a full stop (.) to separate
+        each nested key. For example, use the key "a.b.c" to parse the following:
+
+        a:
+          b:
+            c:
+              per_second: 10
+              burst_count: 200
+
+        If this lookup fails, we'll fallback to the defaults.
+        """
         defaults = defaults or {"per_second": 0.17, "burst_count": 3.0}
 
-        self.per_second = config.get("per_second", defaults["per_second"])
-        self.burst_count = int(config.get("burst_count", defaults["burst_count"]))
+        rl_config = config
+        for part in key.split("."):
+            rl_config = rl_config.get(part, {})
+
+        # By this point we should have hit the rate limiter parameters.
+        # We don't actually check this though!
+        rl_config = cast(Dict[str, float], rl_config)
+
+        return cls(
+            key=key,
+            per_second=rl_config.get("per_second", defaults["per_second"]),
+            burst_count=int(rl_config.get("burst_count", defaults["burst_count"])),
+        )
 
 
 @attr.s(auto_attribs=True)
@@ -49,15 +80,14 @@ class RatelimitConfig(Config):
         # Load the new-style messages config if it exists. Otherwise fall back
         # to the old method.
         if "rc_message" in config:
-            self.rc_message = RatelimitSettings(
-                config["rc_message"], defaults={"per_second": 0.2, "burst_count": 10.0}
+            self.rc_message = RatelimitSettings.parse(
+                config, "rc_message", defaults={"per_second": 0.2, "burst_count": 10.0}
             )
         else:
             self.rc_message = RatelimitSettings(
-                {
-                    "per_second": config.get("rc_messages_per_second", 0.2),
-                    "burst_count": config.get("rc_message_burst_count", 10.0),
-                }
+                key="rc_messages",
+                per_second=config.get("rc_messages_per_second", 0.2),
+                burst_count=config.get("rc_message_burst_count", 10.0),
             )
 
         # Load the new-style federation config, if it exists. Otherwise, fall
@@ -79,51 +109,59 @@ class RatelimitConfig(Config):
                 }
             )
 
-        self.rc_registration = RatelimitSettings(config.get("rc_registration", {}))
+        self.rc_registration = RatelimitSettings.parse(config, "rc_registration", {})
 
-        self.rc_registration_token_validity = RatelimitSettings(
-            config.get("rc_registration_token_validity", {}),
+        self.rc_registration_token_validity = RatelimitSettings.parse(
+            config,
+            "rc_registration_token_validity",
             defaults={"per_second": 0.1, "burst_count": 5},
         )
 
         # It is reasonable to login with a bunch of devices at once (i.e. when
         # setting up an account), but it is *not* valid to continually be
         # logging into new devices.
-        rc_login_config = config.get("rc_login", {})
-        self.rc_login_address = RatelimitSettings(
-            rc_login_config.get("address", {}),
+        self.rc_login_address = RatelimitSettings.parse(
+            config,
+            "rc_login.address",
             defaults={"per_second": 0.003, "burst_count": 5},
         )
-        self.rc_login_account = RatelimitSettings(
-            rc_login_config.get("account", {}),
+        self.rc_login_account = RatelimitSettings.parse(
+            config,
+            "rc_login.account",
             defaults={"per_second": 0.003, "burst_count": 5},
         )
-        self.rc_login_failed_attempts = RatelimitSettings(
-            rc_login_config.get("failed_attempts", {})
+        self.rc_login_failed_attempts = RatelimitSettings.parse(
+            config,
+            "rc_login.failed_attempts",
+            {},
         )
 
         self.federation_rr_transactions_per_room_per_second = config.get(
             "federation_rr_transactions_per_room_per_second", 50
         )
 
-        rc_admin_redaction = config.get("rc_admin_redaction")
         self.rc_admin_redaction = None
-        if rc_admin_redaction:
-            self.rc_admin_redaction = RatelimitSettings(rc_admin_redaction)
+        if "rc_admin_redaction" in config:
+            self.rc_admin_redaction = RatelimitSettings.parse(
+                config, "rc_admin_redaction", {}
+            )
 
-        self.rc_joins_local = RatelimitSettings(
-            config.get("rc_joins", {}).get("local", {}),
+        self.rc_joins_local = RatelimitSettings.parse(
+            config,
+            "rc_joins.local",
             defaults={"per_second": 0.1, "burst_count": 10},
         )
-        self.rc_joins_remote = RatelimitSettings(
-            config.get("rc_joins", {}).get("remote", {}),
+        self.rc_joins_remote = RatelimitSettings.parse(
+            config,
+            "rc_joins.remote",
             defaults={"per_second": 0.01, "burst_count": 10},
         )
 
         # Track the rate of joins to a given room. If there are too many, temporarily
         # prevent local joins and remote joins via this server.
-        self.rc_joins_per_room = RatelimitSettings(
-            config.get("rc_joins_per_room", {}),
+        self.rc_joins_per_room = RatelimitSettings.parse(
+            config,
+            "rc_joins_per_room",
             defaults={"per_second": 1, "burst_count": 10},
         )
 
@@ -132,31 +170,37 @@ class RatelimitConfig(Config):
         # * For requests received over federation this is keyed by the origin.
         #
         # Note that this isn't exposed in the configuration as it is obscure.
-        self.rc_key_requests = RatelimitSettings(
-            config.get("rc_key_requests", {}),
+        self.rc_key_requests = RatelimitSettings.parse(
+            config,
+            "rc_key_requests",
             defaults={"per_second": 20, "burst_count": 100},
         )
 
-        self.rc_3pid_validation = RatelimitSettings(
-            config.get("rc_3pid_validation") or {},
+        self.rc_3pid_validation = RatelimitSettings.parse(
+            config,
+            "rc_3pid_validation",
             defaults={"per_second": 0.003, "burst_count": 5},
         )
 
-        self.rc_invites_per_room = RatelimitSettings(
-            config.get("rc_invites", {}).get("per_room", {}),
+        self.rc_invites_per_room = RatelimitSettings.parse(
+            config,
+            "rc_invites.per_room",
             defaults={"per_second": 0.3, "burst_count": 10},
         )
-        self.rc_invites_per_user = RatelimitSettings(
-            config.get("rc_invites", {}).get("per_user", {}),
+        self.rc_invites_per_user = RatelimitSettings.parse(
+            config,
+            "rc_invites.per_user",
             defaults={"per_second": 0.003, "burst_count": 5},
         )
 
-        self.rc_invites_per_issuer = RatelimitSettings(
-            config.get("rc_invites", {}).get("per_issuer", {}),
+        self.rc_invites_per_issuer = RatelimitSettings.parse(
+            config,
+            "rc_invites.per_issuer",
             defaults={"per_second": 0.3, "burst_count": 10},
         )
 
-        self.rc_third_party_invite = RatelimitSettings(
-            config.get("rc_third_party_invite", {}),
+        self.rc_third_party_invite = RatelimitSettings.parse(
+            config,
+            "rc_third_party_invite",
             defaults={"per_second": 0.0025, "burst_count": 5},
         )
diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py
index 59ecafa6a0..2b0c505130 100644
--- a/synapse/handlers/auth.py
+++ b/synapse/handlers/auth.py
@@ -218,19 +218,17 @@ class AuthHandler:
         self._failed_uia_attempts_ratelimiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
-            rate_hz=self.hs.config.ratelimiting.rc_login_failed_attempts.per_second,
-            burst_count=self.hs.config.ratelimiting.rc_login_failed_attempts.burst_count,
+            cfg=self.hs.config.ratelimiting.rc_login_failed_attempts,
         )
 
         # The number of seconds to keep a UI auth session active.
         self._ui_auth_session_timeout = hs.config.auth.ui_auth_session_timeout
 
-        # Ratelimitier for failed /login attempts
+        # Ratelimiter for failed /login attempts
         self._failed_login_attempts_ratelimiter = Ratelimiter(
             store=self.store,
             clock=hs.get_clock(),
-            rate_hz=self.hs.config.ratelimiting.rc_login_failed_attempts.per_second,
-            burst_count=self.hs.config.ratelimiting.rc_login_failed_attempts.burst_count,
+            cfg=self.hs.config.ratelimiting.rc_login_failed_attempts,
         )
 
         self._clock = self.hs.get_clock()
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 17ff8821d9..798c7039f9 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -90,8 +90,7 @@ class DeviceMessageHandler:
         self._ratelimiter = Ratelimiter(
             store=self.store,
             clock=hs.get_clock(),
-            rate_hz=hs.config.ratelimiting.rc_key_requests.per_second,
-            burst_count=hs.config.ratelimiting.rc_key_requests.burst_count,
+            cfg=hs.config.ratelimiting.rc_key_requests,
         )
 
     async def on_direct_to_device_edu(self, origin: str, content: JsonDict) -> None:
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index 3031384d25..472879c964 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -66,14 +66,12 @@ class IdentityHandler:
         self._3pid_validation_ratelimiter_ip = Ratelimiter(
             store=self.store,
             clock=hs.get_clock(),
-            rate_hz=hs.config.ratelimiting.rc_3pid_validation.per_second,
-            burst_count=hs.config.ratelimiting.rc_3pid_validation.burst_count,
+            cfg=hs.config.ratelimiting.rc_3pid_validation,
         )
         self._3pid_validation_ratelimiter_address = Ratelimiter(
             store=self.store,
             clock=hs.get_clock(),
-            rate_hz=hs.config.ratelimiting.rc_3pid_validation.per_second,
-            burst_count=hs.config.ratelimiting.rc_3pid_validation.burst_count,
+            cfg=hs.config.ratelimiting.rc_3pid_validation,
         )
 
     async def ratelimit_request_token_requests(
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index 1d8d4a72e7..de0f04e3fe 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -112,8 +112,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self._join_rate_limiter_local = Ratelimiter(
             store=self.store,
             clock=self.clock,
-            rate_hz=hs.config.ratelimiting.rc_joins_local.per_second,
-            burst_count=hs.config.ratelimiting.rc_joins_local.burst_count,
+            cfg=hs.config.ratelimiting.rc_joins_local,
         )
         # Tracks joins from local users to rooms this server isn't a member of.
         # I.e. joins this server makes by requesting /make_join /send_join from
@@ -121,8 +120,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self._join_rate_limiter_remote = Ratelimiter(
             store=self.store,
             clock=self.clock,
-            rate_hz=hs.config.ratelimiting.rc_joins_remote.per_second,
-            burst_count=hs.config.ratelimiting.rc_joins_remote.burst_count,
+            cfg=hs.config.ratelimiting.rc_joins_remote,
         )
         # TODO: find a better place to keep this Ratelimiter.
         #   It needs to be
@@ -135,8 +133,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self._join_rate_per_room_limiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
-            rate_hz=hs.config.ratelimiting.rc_joins_per_room.per_second,
-            burst_count=hs.config.ratelimiting.rc_joins_per_room.burst_count,
+            cfg=hs.config.ratelimiting.rc_joins_per_room,
         )
 
         # Ratelimiter for invites, keyed by room (across all issuers, all
@@ -144,8 +141,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self._invites_per_room_limiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
-            rate_hz=hs.config.ratelimiting.rc_invites_per_room.per_second,
-            burst_count=hs.config.ratelimiting.rc_invites_per_room.burst_count,
+            cfg=hs.config.ratelimiting.rc_invites_per_room,
         )
 
         # Ratelimiter for invites, keyed by recipient (across all rooms, all
@@ -153,8 +149,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self._invites_per_recipient_limiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
-            rate_hz=hs.config.ratelimiting.rc_invites_per_user.per_second,
-            burst_count=hs.config.ratelimiting.rc_invites_per_user.burst_count,
+            cfg=hs.config.ratelimiting.rc_invites_per_user,
         )
 
         # Ratelimiter for invites, keyed by issuer (across all rooms, all
@@ -162,15 +157,13 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self._invites_per_issuer_limiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
-            rate_hz=hs.config.ratelimiting.rc_invites_per_issuer.per_second,
-            burst_count=hs.config.ratelimiting.rc_invites_per_issuer.burst_count,
+            cfg=hs.config.ratelimiting.rc_invites_per_issuer,
         )
 
         self._third_party_invite_limiter = Ratelimiter(
             store=self.store,
             clock=self.clock,
-            rate_hz=hs.config.ratelimiting.rc_third_party_invite.per_second,
-            burst_count=hs.config.ratelimiting.rc_third_party_invite.burst_count,
+            cfg=hs.config.ratelimiting.rc_third_party_invite,
         )
 
         self.request_ratelimiter = hs.get_request_ratelimiter()
diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py
index dad3e23470..dd559b4c45 100644
--- a/synapse/handlers/room_summary.py
+++ b/synapse/handlers/room_summary.py
@@ -35,6 +35,7 @@ from synapse.api.errors import (
     UnsupportedRoomVersionError,
 )
 from synapse.api.ratelimiting import Ratelimiter
+from synapse.config.ratelimiting import RatelimitSettings
 from synapse.events import EventBase
 from synapse.types import JsonDict, Requester, StrCollection
 from synapse.util.caches.response_cache import ResponseCache
@@ -94,7 +95,9 @@ class RoomSummaryHandler:
         self._server_name = hs.hostname
         self._federation_client = hs.get_federation_client()
         self._ratelimiter = Ratelimiter(
-            store=self._store, clock=hs.get_clock(), rate_hz=5, burst_count=10
+            store=self._store,
+            clock=hs.get_clock(),
+            cfg=RatelimitSettings("<room summary>", per_second=5, burst_count=10),
         )
 
         # If a user tries to fetch the same page multiple times in quick succession,
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 5109cec983..3bbf91298e 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -115,7 +115,13 @@ def return_json_error(
         if exc.headers is not None:
             for header, value in exc.headers.items():
                 request.setHeader(header, value)
-        logger.info("%s SynapseError: %s - %s", request, error_code, exc.msg)
+        error_ctx = exc.debug_context
+        if error_ctx:
+            logger.info(
+                "%s SynapseError: %s - %s (%s)", request, error_code, exc.msg, error_ctx
+            )
+        else:
+            logger.info("%s SynapseError: %s - %s", request, error_code, exc.msg)
     elif f.check(CancelledError):
         error_code = HTTP_STATUS_REQUEST_CANCELLED
         error_dict = {"error": "Request cancelled", "errcode": Codes.UNKNOWN}
diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py
index d724c68920..7be327e26f 100644
--- a/synapse/rest/client/login.py
+++ b/synapse/rest/client/login.py
@@ -120,14 +120,12 @@ class LoginRestServlet(RestServlet):
         self._address_ratelimiter = Ratelimiter(
             store=self._main_store,
             clock=hs.get_clock(),
-            rate_hz=self.hs.config.ratelimiting.rc_login_address.per_second,
-            burst_count=self.hs.config.ratelimiting.rc_login_address.burst_count,
+            cfg=self.hs.config.ratelimiting.rc_login_address,
         )
         self._account_ratelimiter = Ratelimiter(
             store=self._main_store,
             clock=hs.get_clock(),
-            rate_hz=self.hs.config.ratelimiting.rc_login_account.per_second,
-            burst_count=self.hs.config.ratelimiting.rc_login_account.burst_count,
+            cfg=self.hs.config.ratelimiting.rc_login_account,
         )
 
         # ensure the CAS/SAML/OIDC handlers are loaded on this worker instance.
diff --git a/synapse/rest/client/login_token_request.py b/synapse/rest/client/login_token_request.py
index b1629f94a5..d189a923b5 100644
--- a/synapse/rest/client/login_token_request.py
+++ b/synapse/rest/client/login_token_request.py
@@ -16,6 +16,7 @@ import logging
 from typing import TYPE_CHECKING, Tuple
 
 from synapse.api.ratelimiting import Ratelimiter
+from synapse.config.ratelimiting import RatelimitSettings
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
@@ -66,15 +67,18 @@ class LoginTokenRequestServlet(RestServlet):
         self.token_timeout = hs.config.auth.login_via_existing_token_timeout
         self._require_ui_auth = hs.config.auth.login_via_existing_require_ui_auth
 
-        # Ratelimit aggressively to a maxmimum of 1 request per minute.
+        # Ratelimit aggressively to a maximum of 1 request per minute.
         #
         # This endpoint can be used to spawn additional sessions and could be
         # abused by a malicious client to create many sessions.
         self._ratelimiter = Ratelimiter(
             store=self._main_store,
             clock=hs.get_clock(),
-            rate_hz=1 / 60,
-            burst_count=1,
+            cfg=RatelimitSettings(
+                key="<login token request>",
+                per_second=1 / 60,
+                burst_count=1,
+            ),
         )
 
     @interactive_auth_handler
diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py
index 77e3b91b79..132623462a 100644
--- a/synapse/rest/client/register.py
+++ b/synapse/rest/client/register.py
@@ -376,8 +376,7 @@ class RegistrationTokenValidityRestServlet(RestServlet):
         self.ratelimiter = Ratelimiter(
             store=self.store,
             clock=hs.get_clock(),
-            rate_hz=hs.config.ratelimiting.rc_registration_token_validity.per_second,
-            burst_count=hs.config.ratelimiting.rc_registration_token_validity.burst_count,
+            cfg=hs.config.ratelimiting.rc_registration_token_validity,
         )
 
     async def on_GET(self, request: Request) -> Tuple[int, JsonDict]:
diff --git a/synapse/server.py b/synapse/server.py
index 7cdd3ea3c2..fd16dacd0d 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -408,8 +408,7 @@ class HomeServer(metaclass=abc.ABCMeta):
         return Ratelimiter(
             store=self.get_datastores().main,
             clock=self.get_clock(),
-            rate_hz=self.config.ratelimiting.rc_registration.per_second,
-            burst_count=self.config.ratelimiting.rc_registration.burst_count,
+            cfg=self.config.ratelimiting.rc_registration,
         )
 
     @cache_in_self
diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py
index cde4a0780f..f693ba2a8c 100644
--- a/synapse/util/ratelimitutils.py
+++ b/synapse/util/ratelimitutils.py
@@ -291,7 +291,8 @@ class _PerHostRatelimiter:
             if self.metrics_name:
                 rate_limit_reject_counter.labels(self.metrics_name).inc()
             raise LimitExceededError(
-                retry_after_ms=int(self.window_size / self.sleep_limit)
+                limiter_name="rc_federation",
+                retry_after_ms=int(self.window_size / self.sleep_limit),
             )
 
         self.request_times.append(time_now)
diff --git a/tests/api/test_errors.py b/tests/api/test_errors.py
index 319abfe63d..8e159029d9 100644
--- a/tests/api/test_errors.py
+++ b/tests/api/test_errors.py
@@ -1,6 +1,5 @@
 # Copyright 2023 The Matrix.org Foundation C.I.C.
 #
-#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -13,24 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+
 from synapse.api.errors import LimitExceededError
 
 from tests import unittest
 
 
-class ErrorsTestCase(unittest.TestCase):
+class LimitExceededErrorTestCase(unittest.TestCase):
+    def test_key_appears_in_context_but_not_error_dict(self) -> None:
+        err = LimitExceededError("needle")
+        serialised = json.dumps(err.error_dict(None))
+        self.assertIn("needle", err.debug_context)
+        self.assertNotIn("needle", serialised)
+
     # Create a sub-class to avoid mutating the class-level property.
     class LimitExceededErrorHeaders(LimitExceededError):
         include_retry_after_header = True
 
     def test_limit_exceeded_header(self) -> None:
-        err = ErrorsTestCase.LimitExceededErrorHeaders(retry_after_ms=100)
+        err = self.LimitExceededErrorHeaders(limiter_name="test", retry_after_ms=100)
         self.assertEqual(err.error_dict(None).get("retry_after_ms"), 100)
         assert err.headers is not None
         self.assertEqual(err.headers.get("Retry-After"), "1")
 
     def test_limit_exceeded_rounding(self) -> None:
-        err = ErrorsTestCase.LimitExceededErrorHeaders(retry_after_ms=3001)
+        err = self.LimitExceededErrorHeaders(limiter_name="test", retry_after_ms=3001)
         self.assertEqual(err.error_dict(None).get("retry_after_ms"), 3001)
         assert err.headers is not None
         self.assertEqual(err.headers.get("Retry-After"), "4")
diff --git a/tests/api/test_ratelimiting.py b/tests/api/test_ratelimiting.py
index fa6c1c02ce..a24638c9ef 100644
--- a/tests/api/test_ratelimiting.py
+++ b/tests/api/test_ratelimiting.py
@@ -1,5 +1,6 @@
 from synapse.api.ratelimiting import LimitExceededError, Ratelimiter
 from synapse.appservice import ApplicationService
+from synapse.config.ratelimiting import RatelimitSettings
 from synapse.types import create_requester
 
 from tests import unittest
@@ -10,8 +11,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=1,
+            cfg=RatelimitSettings(key="", per_second=0.1, burst_count=1),
         )
         allowed, time_allowed = self.get_success_or_raise(
             limiter.can_do_action(None, key="test_id", _time_now_s=0)
@@ -43,8 +43,11 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=1,
+            cfg=RatelimitSettings(
+                key="",
+                per_second=0.1,
+                burst_count=1,
+            ),
         )
         allowed, time_allowed = self.get_success_or_raise(
             limiter.can_do_action(as_requester, _time_now_s=0)
@@ -76,8 +79,11 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=1,
+            cfg=RatelimitSettings(
+                key="",
+                per_second=0.1,
+                burst_count=1,
+            ),
         )
         allowed, time_allowed = self.get_success_or_raise(
             limiter.can_do_action(as_requester, _time_now_s=0)
@@ -101,8 +107,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=1,
+            cfg=RatelimitSettings(key="", per_second=0.1, burst_count=1),
         )
 
         # Shouldn't raise
@@ -128,8 +133,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=1,
+            cfg=RatelimitSettings(key="", per_second=0.1, burst_count=1),
         )
 
         # First attempt should be allowed
@@ -177,8 +181,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=1,
+            cfg=RatelimitSettings(key="", per_second=0.1, burst_count=1),
         )
 
         # First attempt should be allowed
@@ -208,8 +211,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=1,
+            cfg=RatelimitSettings(key="", per_second=0.1, burst_count=1),
         )
         self.get_success_or_raise(
             limiter.can_do_action(None, key="test_id_1", _time_now_s=0)
@@ -244,7 +246,11 @@ class TestRatelimiter(unittest.HomeserverTestCase):
             )
         )
 
-        limiter = Ratelimiter(store=store, clock=self.clock, rate_hz=0.1, burst_count=1)
+        limiter = Ratelimiter(
+            store=store,
+            clock=self.clock,
+            cfg=RatelimitSettings("", per_second=0.1, burst_count=1),
+        )
 
         # Shouldn't raise
         for _ in range(20):
@@ -254,8 +260,11 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=3,
+            cfg=RatelimitSettings(
+                key="",
+                per_second=0.1,
+                burst_count=3,
+            ),
         )
         # Test that 4 actions aren't allowed with a maximum burst of 3.
         allowed, time_allowed = self.get_success_or_raise(
@@ -321,8 +330,7 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=3,
+            cfg=RatelimitSettings("", per_second=0.1, burst_count=3),
         )
 
         def consume_at(time: float) -> bool:
@@ -346,8 +354,11 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=3,
+            cfg=RatelimitSettings(
+                "",
+                per_second=0.1,
+                burst_count=3,
+            ),
         )
 
         # Observe two actions, leaving room in the bucket for one more.
@@ -369,8 +380,11 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=3,
+            cfg=RatelimitSettings(
+                "",
+                per_second=0.1,
+                burst_count=3,
+            ),
         )
 
         # Observe three actions, filling up the bucket.
@@ -398,8 +412,11 @@ class TestRatelimiter(unittest.HomeserverTestCase):
         limiter = Ratelimiter(
             store=self.hs.get_datastores().main,
             clock=self.clock,
-            rate_hz=0.1,
-            burst_count=3,
+            cfg=RatelimitSettings(
+                "",
+                per_second=0.1,
+                burst_count=3,
+            ),
         )
 
         # Observe four actions, exceeding the bucket.
diff --git a/tests/config/test_ratelimiting.py b/tests/config/test_ratelimiting.py
index f12147eaa0..0c27dd21e2 100644
--- a/tests/config/test_ratelimiting.py
+++ b/tests/config/test_ratelimiting.py
@@ -12,11 +12,42 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from synapse.config.homeserver import HomeServerConfig
+from synapse.config.ratelimiting import RatelimitSettings
 
 from tests.unittest import TestCase
 from tests.utils import default_config
 
 
+class ParseRatelimitSettingsTestcase(TestCase):
+    def test_depth_1(self) -> None:
+        cfg = {
+            "a": {
+                "per_second": 5,
+                "burst_count": 10,
+            }
+        }
+        parsed = RatelimitSettings.parse(cfg, "a")
+        self.assertEqual(parsed, RatelimitSettings("a", 5, 10))
+
+    def test_depth_2(self) -> None:
+        cfg = {
+            "a": {
+                "b": {
+                    "per_second": 5,
+                    "burst_count": 10,
+                },
+            }
+        }
+        parsed = RatelimitSettings.parse(cfg, "a.b")
+        self.assertEqual(parsed, RatelimitSettings("a.b", 5, 10))
+
+    def test_missing(self) -> None:
+        parsed = RatelimitSettings.parse(
+            {}, "a", defaults={"per_second": 5, "burst_count": 10}
+        )
+        self.assertEqual(parsed, RatelimitSettings("a", 5, 10))
+
+
 class RatelimitConfigTestCase(TestCase):
     def test_parse_rc_federation(self) -> None:
         config_dict = default_config("test")
-- 
cgit 1.5.1


From ebd8374fb5f10f84fc818058100ec7ae284835b3 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 30 Aug 2023 06:10:56 -0400
Subject: Stop writing to the event_txn_id table (#16175)

---
 changelog.d/16175.misc                          |  1 +
 synapse/handlers/message.py                     | 13 --------
 synapse/storage/databases/main/events.py        | 35 +--------------------
 synapse/storage/databases/main/events_worker.py | 41 +++++++++----------------
 synapse/storage/schema/__init__.py              | 16 ++++------
 tests/handlers/test_message.py                  | 15 +++------
 6 files changed, 26 insertions(+), 95 deletions(-)
 create mode 100644 changelog.d/16175.misc

(limited to 'synapse')

diff --git a/changelog.d/16175.misc b/changelog.d/16175.misc
new file mode 100644
index 0000000000..308fbc2259
--- /dev/null
+++ b/changelog.d/16175.misc
@@ -0,0 +1 @@
+Stop using the `event_txn_id` table.
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 187c3e6cc0..d6be18cdef 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -908,19 +908,6 @@ class EventCreationHandler:
             if existing_event_id:
                 return existing_event_id
 
-        # Some requsters don't have device IDs (appservice, guests, and access
-        # tokens minted with the admin API), fallback to checking the access token
-        # ID, which should be close enough.
-        if requester.access_token_id:
-            existing_event_id = (
-                await self.store.get_event_id_from_transaction_id_and_token_id(
-                    room_id,
-                    requester.user.to_string(),
-                    requester.access_token_id,
-                    txn_id,
-                )
-            )
-
         return existing_event_id
 
     async def get_event_from_transaction(
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index c784612f59..0c1ed75240 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -978,26 +978,12 @@ class PersistEventsStore:
         """Persist the mapping from transaction IDs to event IDs (if defined)."""
 
         inserted_ts = self._clock.time_msec()
-        to_insert_token_id: List[Tuple[str, str, str, int, str, int]] = []
         to_insert_device_id: List[Tuple[str, str, str, str, str, int]] = []
         for event, _ in events_and_contexts:
             txn_id = getattr(event.internal_metadata, "txn_id", None)
-            token_id = getattr(event.internal_metadata, "token_id", None)
             device_id = getattr(event.internal_metadata, "device_id", None)
 
             if txn_id is not None:
-                if token_id is not None:
-                    to_insert_token_id.append(
-                        (
-                            event.event_id,
-                            event.room_id,
-                            event.sender,
-                            token_id,
-                            txn_id,
-                            inserted_ts,
-                        )
-                    )
-
                 if device_id is not None:
                     to_insert_device_id.append(
                         (
@@ -1010,26 +996,7 @@ class PersistEventsStore:
                         )
                     )
 
-        # Synapse usually relies on the device_id to scope transactions for events,
-        # except for users without device IDs (appservice, guests, and access
-        # tokens minted with the admin API) which use the access token ID instead.
-        #
-        # TODO https://github.com/matrix-org/synapse/issues/16042
-        if to_insert_token_id:
-            self.db_pool.simple_insert_many_txn(
-                txn,
-                table="event_txn_id",
-                keys=(
-                    "event_id",
-                    "room_id",
-                    "user_id",
-                    "token_id",
-                    "txn_id",
-                    "inserted_ts",
-                ),
-                values=to_insert_token_id,
-            )
-
+        # Synapse relies on the device_id to scope transactions for events..
         if to_insert_device_id:
             self.db_pool.simple_insert_many_txn(
                 txn,
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 7e7648c951..1eb313040e 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -2022,25 +2022,6 @@ class EventsWorkerStore(SQLBaseStore):
             desc="get_next_event_to_expire", func=get_next_event_to_expire_txn
         )
 
-    async def get_event_id_from_transaction_id_and_token_id(
-        self, room_id: str, user_id: str, token_id: int, txn_id: str
-    ) -> Optional[str]:
-        """Look up if we have already persisted an event for the transaction ID,
-        returning the event ID if so.
-        """
-        return await self.db_pool.simple_select_one_onecol(
-            table="event_txn_id",
-            keyvalues={
-                "room_id": room_id,
-                "user_id": user_id,
-                "token_id": token_id,
-                "txn_id": txn_id,
-            },
-            retcol="event_id",
-            allow_none=True,
-            desc="get_event_id_from_transaction_id_and_token_id",
-        )
-
     async def get_event_id_from_transaction_id_and_device_id(
         self, room_id: str, user_id: str, device_id: str, txn_id: str
     ) -> Optional[str]:
@@ -2072,29 +2053,35 @@ class EventsWorkerStore(SQLBaseStore):
         """
 
         mapping = {}
-        txn_id_to_event: Dict[Tuple[str, int, str], str] = {}
+        txn_id_to_event: Dict[Tuple[str, str, str, str], str] = {}
 
         for event in events:
-            token_id = getattr(event.internal_metadata, "token_id", None)
+            device_id = getattr(event.internal_metadata, "device_id", None)
             txn_id = getattr(event.internal_metadata, "txn_id", None)
 
-            if token_id and txn_id:
+            if device_id and txn_id:
                 # Check if this is a duplicate of an event in the given events.
-                existing = txn_id_to_event.get((event.room_id, token_id, txn_id))
+                existing = txn_id_to_event.get(
+                    (event.room_id, event.sender, device_id, txn_id)
+                )
                 if existing:
                     mapping[event.event_id] = existing
                     continue
 
                 # Check if this is a duplicate of an event we've already
                 # persisted.
-                existing = await self.get_event_id_from_transaction_id_and_token_id(
-                    event.room_id, event.sender, token_id, txn_id
+                existing = await self.get_event_id_from_transaction_id_and_device_id(
+                    event.room_id, event.sender, device_id, txn_id
                 )
                 if existing:
                     mapping[event.event_id] = existing
-                    txn_id_to_event[(event.room_id, token_id, txn_id)] = existing
+                    txn_id_to_event[
+                        (event.room_id, event.sender, device_id, txn_id)
+                    ] = existing
                 else:
-                    txn_id_to_event[(event.room_id, token_id, txn_id)] = event.event_id
+                    txn_id_to_event[
+                        (event.room_id, event.sender, device_id, txn_id)
+                    ] = event.event_id
 
         return mapping
 
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 649d3c8e9f..422f11f59e 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 80  # remember to update the list below when updating
+SCHEMA_VERSION = 81  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -114,19 +114,15 @@ Changes in SCHEMA_VERSION = 79
 Changes in SCHEMA_VERSION = 80
     - The event_txn_id_device_id is always written to for new events.
     - Add tables for the task scheduler.
+
+Changes in SCHEMA_VERSION = 81
+    - The event_txn_id is no longer written to for new events.
 """
 
 
 SCHEMA_COMPAT_VERSION = (
-    # Queries against `event_stream_ordering` columns in membership tables must
-    # be disambiguated.
-    #
-    # The threads_id column must written to with non-null values for the
-    # event_push_actions, event_push_actions_staging, and event_push_summary tables.
-    #
-    # insertions to the column `full_user_id` of tables profiles and user_filters can no
-    # longer be null
-    76
+    # The `event_txn_id_device_id` must be written to for new events.
+    80
 )
 """Limit on how far the synapse codebase can be rolled back without breaking db compat
 
diff --git a/tests/handlers/test_message.py b/tests/handlers/test_message.py
index 9691d66b48..1c5897c84e 100644
--- a/tests/handlers/test_message.py
+++ b/tests/handlers/test_message.py
@@ -46,18 +46,11 @@ class EventCreationTestCase(unittest.HomeserverTestCase):
         self._persist_event_storage_controller = persistence
 
         self.user_id = self.register_user("tester", "foobar")
-        self.access_token = self.login("tester", "foobar")
-        self.room_id = self.helper.create_room_as(self.user_id, tok=self.access_token)
-
-        info = self.get_success(
-            self.hs.get_datastores().main.get_user_by_access_token(
-                self.access_token,
-            )
-        )
-        assert info is not None
-        self.token_id = info.token_id
+        device_id = "dev-1"
+        access_token = self.login("tester", "foobar", device_id=device_id)
+        self.room_id = self.helper.create_room_as(self.user_id, tok=access_token)
 
-        self.requester = create_requester(self.user_id, access_token_id=self.token_id)
+        self.requester = create_requester(self.user_id, device_id=device_id)
 
     def _create_and_persist_member_event(self) -> Tuple[EventBase, EventContext]:
         # Create a member event we can use as an auth_event
-- 
cgit 1.5.1


From a2e0d4cd6024462f0067c56f83c2fe5b67da2109 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 30 Aug 2023 14:18:42 +0100
Subject: Fix rare bug that broke looping calls (#16210)

* Fix rare bug that broke looping calls

We can't interact with the reactor from the main thread via looping
call.

Introduced in v1.90.0 / #15791.

* Newsfile
---
 changelog.d/16210.bugfix                  |  1 +
 synapse/storage/databases/main/lock.py    | 36 +++++++++++++++++++------------
 tests/storage/databases/main/test_lock.py |  2 ++
 3 files changed, 25 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/16210.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16210.bugfix b/changelog.d/16210.bugfix
new file mode 100644
index 0000000000..39c35a1fe1
--- /dev/null
+++ b/changelog.d/16210.bugfix
@@ -0,0 +1 @@
+Fix rare bug that broke looping calls, which could lead to e.g. linearly increasing memory usage. Introduced in v1.90.0.
diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py
index 54d40e7a3a..5a01ec2137 100644
--- a/synapse/storage/databases/main/lock.py
+++ b/synapse/storage/databases/main/lock.py
@@ -17,7 +17,7 @@ from types import TracebackType
 from typing import TYPE_CHECKING, Collection, Optional, Set, Tuple, Type
 from weakref import WeakValueDictionary
 
-from twisted.internet.interfaces import IReactorCore
+from twisted.internet.task import LoopingCall
 
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.storage._base import SQLBaseStore
@@ -26,6 +26,7 @@ from synapse.storage.database import (
     LoggingDatabaseConnection,
     LoggingTransaction,
 )
+from synapse.types import ISynapseReactor
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
 
@@ -358,7 +359,7 @@ class Lock:
 
     def __init__(
         self,
-        reactor: IReactorCore,
+        reactor: ISynapseReactor,
         clock: Clock,
         store: LockStore,
         read_write: bool,
@@ -377,19 +378,25 @@ class Lock:
 
         self._table = "worker_read_write_locks" if read_write else "worker_locks"
 
-        self._looping_call = clock.looping_call(
+        # We might be called from a non-main thread, so we defer setting up the
+        # looping call.
+        self._looping_call: Optional[LoopingCall] = None
+        reactor.callFromThread(self._setup_looping_call)
+
+        self._dropped = False
+
+    def _setup_looping_call(self) -> None:
+        self._looping_call = self._clock.looping_call(
             self._renew,
             _RENEWAL_INTERVAL_MS,
-            store,
-            clock,
-            read_write,
-            lock_name,
-            lock_key,
-            token,
+            self._store,
+            self._clock,
+            self._read_write,
+            self._lock_name,
+            self._lock_key,
+            self._token,
         )
 
-        self._dropped = False
-
     @staticmethod
     @wrap_as_background_process("Lock._renew")
     async def _renew(
@@ -459,7 +466,7 @@ class Lock:
         if self._dropped:
             return
 
-        if self._looping_call.running:
+        if self._looping_call and self._looping_call.running:
             self._looping_call.stop()
 
         await self._store.db_pool.simple_delete(
@@ -486,8 +493,9 @@ class Lock:
             # We should not be dropped without the lock being released (unless
             # we're shutting down), but if we are then let's at least stop
             # renewing the lock.
-            if self._looping_call.running:
-                self._looping_call.stop()
+            if self._looping_call and self._looping_call.running:
+                # We might be called from a non-main thread.
+                self._reactor.callFromThread(self._looping_call.stop)
 
             if self._reactor.running:
                 logger.error(
diff --git a/tests/storage/databases/main/test_lock.py b/tests/storage/databases/main/test_lock.py
index f541f1d6be..650b4941ba 100644
--- a/tests/storage/databases/main/test_lock.py
+++ b/tests/storage/databases/main/test_lock.py
@@ -132,6 +132,7 @@ class LockTestCase(unittest.HomeserverTestCase):
 
         # We simulate the process getting stuck by cancelling the looping call
         # that keeps the lock active.
+        assert lock._looping_call
         lock._looping_call.stop()
 
         # Wait for the lock to timeout.
@@ -403,6 +404,7 @@ class ReadWriteLockTestCase(unittest.HomeserverTestCase):
 
         # We simulate the process getting stuck by cancelling the looping call
         # that keeps the lock active.
+        assert lock._looping_call
         lock._looping_call.stop()
 
         # Wait for the lock to timeout.
-- 
cgit 1.5.1


From 3de82bb2af28f56696a79bf41ccffc81385b6e2c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 30 Aug 2023 15:18:34 +0100
Subject: Gracefully handle failing to thumbnail images (#16211)

---
 changelog.d/16211.bugfix          | 1 +
 synapse/__init__.py               | 5 +++++
 synapse/media/media_repository.py | 5 ++++-
 3 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16211.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16211.bugfix b/changelog.d/16211.bugfix
new file mode 100644
index 0000000000..ab1816386c
--- /dev/null
+++ b/changelog.d/16211.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where uploading images would fail if we could not generate thumbnails for them.
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 2f9c22a833..4a9bbc4d57 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -21,9 +21,14 @@ import os
 import sys
 from typing import Any, Dict
 
+from PIL import ImageFile
+
 from synapse.util.rust import check_rust_lib_up_to_date
 from synapse.util.stringutils import strtobool
 
+# Allow truncated JPEG images to be thumbnailed.
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+
 # Check that we're not running on an unsupported Python version.
 #
 # Note that we use an (unneeded) variable here so that pyupgrade doesn't nuke the
diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py
index 4b750c700b..1b7b014f9a 100644
--- a/synapse/media/media_repository.py
+++ b/synapse/media/media_repository.py
@@ -214,7 +214,10 @@ class MediaRepository:
             user_id=auth_user,
         )
 
-        await self._generate_thumbnails(None, media_id, media_id, media_type)
+        try:
+            await self._generate_thumbnails(None, media_id, media_id, media_type)
+        except Exception as e:
+            logger.info("Failed to generate thumbnails: %s", e)
 
         return MXCUri(self.server_name, media_id)
 
-- 
cgit 1.5.1


From 6525fd65ee52e36929b9c35253c772da16aa2b99 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 1 Sep 2023 12:41:56 +0100
Subject: Log the details of background update failures (#16212)

---
 changelog.d/16212.misc                  |  1 +
 synapse/storage/background_updates.py   |  4 ++--
 tests/storage/test_background_update.py | 24 +++++++++++++++++++++++-
 3 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/16212.misc

(limited to 'synapse')

diff --git a/changelog.d/16212.misc b/changelog.d/16212.misc
new file mode 100644
index 0000000000..19cf9b102d
--- /dev/null
+++ b/changelog.d/16212.misc
@@ -0,0 +1 @@
+Log the details of background update failures.
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index ddca0af1da..7619f405fa 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -405,14 +405,14 @@ class BackgroundUpdater:
                 try:
                     result = await self.do_next_background_update(sleep)
                     back_to_back_failures = 0
-                except Exception:
+                except Exception as e:
+                    logger.exception("Error doing update: %s", e)
                     back_to_back_failures += 1
                     if back_to_back_failures >= 5:
                         self._aborted = True
                         raise RuntimeError(
                             "5 back-to-back background update failures; aborting."
                         )
-                    logger.exception("Error doing update")
                 else:
                     if result:
                         logger.info(
diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py
index 52beb4e89d..abf7d0564d 100644
--- a/tests/storage/test_background_update.py
+++ b/tests/storage/test_background_update.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import logging
 from unittest.mock import AsyncMock, Mock
 
 import yaml
@@ -330,6 +330,28 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase):
         self.update_handler.side_effect = update_short
         self.get_success(self.updates.do_next_background_update(False))
 
+    def test_failed_update_logs_exception_details(self) -> None:
+        needle = "RUH ROH RAGGY"
+
+        def failing_update(progress: JsonDict, count: int) -> int:
+            raise Exception(needle)
+
+        self.update_handler.side_effect = failing_update
+        self.update_handler.reset_mock()
+
+        self.get_success(
+            self.store.db_pool.simple_insert(
+                "background_updates",
+                values={"update_name": "test_update", "progress_json": "{}"},
+            )
+        )
+
+        with self.assertLogs(level=logging.ERROR) as logs:
+            # Expect a back-to-back RuntimeError to be raised
+            self.get_failure(self.updates.run_background_updates(False), RuntimeError)
+
+        self.assertTrue(any(needle in log for log in logs.output), logs.output)
+
 
 class BackgroundUpdateControllerTestCase(unittest.HomeserverTestCase):
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-- 
cgit 1.5.1


From 93f2fdd8d1d56a55bddc5b13fd46042ecabea178 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Fri, 1 Sep 2023 13:52:57 +0100
Subject: Fix typo where we ended up with multiple `WorkerLocksHandler`
 (#16220)

I don't think has caused any actual issues.

Introduced in #15891
---
 changelog.d/16220.misc | 1 +
 synapse/server.py      | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/16220.misc

(limited to 'synapse')

diff --git a/changelog.d/16220.misc b/changelog.d/16220.misc
new file mode 100644
index 0000000000..329e9f76f6
--- /dev/null
+++ b/changelog.d/16220.misc
@@ -0,0 +1 @@
+Fix typo where we ended up with multiple `WorkerLocksHandler`.
diff --git a/synapse/server.py b/synapse/server.py
index fd16dacd0d..71ead524d6 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -913,6 +913,7 @@ class HomeServer(metaclass=abc.ABCMeta):
         """Usage metrics shared between phone home stats and the prometheus exporter."""
         return CommonUsageMetricsManager(self)
 
+    @cache_in_self
     def get_worker_locks_handler(self) -> WorkerLocksHandler:
         return WorkerLocksHandler(self)
 
-- 
cgit 1.5.1


From b85c3485b15900240a61fb5f2dca606adc1ff268 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Fri, 1 Sep 2023 13:52:57 +0100
Subject: Fix typo where we ended up with multiple `WorkerLocksHandler`
 (#16220)

I don't think has caused any actual issues.

Introduced in #15891
---
 changelog.d/16220.misc | 1 +
 synapse/server.py      | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/16220.misc

(limited to 'synapse')

diff --git a/changelog.d/16220.misc b/changelog.d/16220.misc
new file mode 100644
index 0000000000..329e9f76f6
--- /dev/null
+++ b/changelog.d/16220.misc
@@ -0,0 +1 @@
+Fix typo where we ended up with multiple `WorkerLocksHandler`.
diff --git a/synapse/server.py b/synapse/server.py
index 7cdd3ea3c2..8f5e4fc140 100644
--- a/synapse/server.py
+++ b/synapse/server.py
@@ -914,6 +914,7 @@ class HomeServer(metaclass=abc.ABCMeta):
         """Usage metrics shared between phone home stats and the prometheus exporter."""
         return CommonUsageMetricsManager(self)
 
+    @cache_in_self
     def get_worker_locks_handler(self) -> WorkerLocksHandler:
         return WorkerLocksHandler(self)
 
-- 
cgit 1.5.1


From e9eb26e3aff63545c77980f0f7a0c04bcbccbda0 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Mon, 4 Sep 2023 11:57:59 +0100
Subject: Cache device resync requests over replication (#16241)

---
 changelog.d/16241.misc              | 1 +
 synapse/replication/http/devices.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16241.misc

(limited to 'synapse')

diff --git a/changelog.d/16241.misc b/changelog.d/16241.misc
new file mode 100644
index 0000000000..0fc5f34c5c
--- /dev/null
+++ b/changelog.d/16241.misc
@@ -0,0 +1 @@
+Cache device resync requests over replication.
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index 73f3de3642..209833d287 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -62,7 +62,7 @@ class ReplicationMultiUserDevicesResyncRestServlet(ReplicationEndpoint):
 
     NAME = "multi_user_device_resync"
     PATH_ARGS = ()
-    CACHE = False
+    CACHE = True
 
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
-- 
cgit 1.5.1


From f84baecb6f57b5ddb570c43574f774fae5e8afed Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Mon, 4 Sep 2023 14:04:43 +0100
Subject: Don't reset retry timers on "valid" error codes (#16221)

---
 changelog.d/16221.bugfix               |  1 +
 synapse/federation/transport/client.py |  4 +++-
 synapse/http/matrixfederationclient.py |  8 ++++++++
 synapse/util/retryutils.py             | 18 ++++++++++++++++--
 tests/handlers/test_typing.py          |  4 ++--
 5 files changed, 30 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/16221.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16221.bugfix b/changelog.d/16221.bugfix
new file mode 100644
index 0000000000..22678256e4
--- /dev/null
+++ b/changelog.d/16221.bugfix
@@ -0,0 +1 @@
+Fix long-standing bug where we did not correctly back off from servers that had "gone" if they returned 4xx series error codes.
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 5ce3f345cb..b5e4b2680e 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -249,8 +249,10 @@ class TransportLayerClient:
             data=json_data,
             json_data_callback=json_data_callback,
             long_retries=True,
-            backoff_on_404=True,  # If we get a 404 the other side has gone
             try_trailing_slash_on_400=True,
+            # Sending a transaction should always succeed, if it doesn't
+            # then something is wrong and we should backoff.
+            backoff_on_all_error_codes=True,
         )
 
     async def make_query(
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 11342ccac8..08c7fc1631 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -512,6 +512,7 @@ class MatrixFederationHttpClient:
         long_retries: bool = False,
         ignore_backoff: bool = False,
         backoff_on_404: bool = False,
+        backoff_on_all_error_codes: bool = False,
     ) -> IResponse:
         """
         Sends a request to the given server.
@@ -552,6 +553,7 @@ class MatrixFederationHttpClient:
                 and try the request anyway.
 
             backoff_on_404: Back off if we get a 404
+            backoff_on_all_error_codes: Back off if we get any error response
 
         Returns:
             Resolves with the HTTP response object on success.
@@ -594,6 +596,7 @@ class MatrixFederationHttpClient:
             ignore_backoff=ignore_backoff,
             notifier=self.hs.get_notifier(),
             replication_client=self.hs.get_replication_command_handler(),
+            backoff_on_all_error_codes=backoff_on_all_error_codes,
         )
 
         method_bytes = request.method.encode("ascii")
@@ -889,6 +892,7 @@ class MatrixFederationHttpClient:
         backoff_on_404: bool = False,
         try_trailing_slash_on_400: bool = False,
         parser: Literal[None] = None,
+        backoff_on_all_error_codes: bool = False,
     ) -> JsonDict:
         ...
 
@@ -906,6 +910,7 @@ class MatrixFederationHttpClient:
         backoff_on_404: bool = False,
         try_trailing_slash_on_400: bool = False,
         parser: Optional[ByteParser[T]] = None,
+        backoff_on_all_error_codes: bool = False,
     ) -> T:
         ...
 
@@ -922,6 +927,7 @@ class MatrixFederationHttpClient:
         backoff_on_404: bool = False,
         try_trailing_slash_on_400: bool = False,
         parser: Optional[ByteParser[T]] = None,
+        backoff_on_all_error_codes: bool = False,
     ) -> Union[JsonDict, T]:
         """Sends the specified json data using PUT
 
@@ -957,6 +963,7 @@ class MatrixFederationHttpClient:
                 enabled.
             parser: The parser to use to decode the response. Defaults to
                 parsing as JSON.
+            backoff_on_all_error_codes: Back off if we get any error response
 
         Returns:
             Succeeds when we get a 2xx HTTP response. The
@@ -990,6 +997,7 @@ class MatrixFederationHttpClient:
             ignore_backoff=ignore_backoff,
             long_retries=long_retries,
             timeout=timeout,
+            backoff_on_all_error_codes=backoff_on_all_error_codes,
         )
 
         if timeout is not None:
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index 27e9fc976c..9d2065372c 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -128,6 +128,7 @@ class RetryDestinationLimiter:
         backoff_on_failure: bool = True,
         notifier: Optional["Notifier"] = None,
         replication_client: Optional["ReplicationCommandHandler"] = None,
+        backoff_on_all_error_codes: bool = False,
     ):
         """Marks the destination as "down" if an exception is thrown in the
         context, except for CodeMessageException with code < 500.
@@ -147,6 +148,9 @@ class RetryDestinationLimiter:
 
             backoff_on_failure: set to False if we should not increase the
                 retry interval on a failure.
+
+            backoff_on_all_error_codes: Whether we should back off on any
+                error code.
         """
         self.clock = clock
         self.store = store
@@ -156,6 +160,7 @@ class RetryDestinationLimiter:
         self.retry_interval = retry_interval
         self.backoff_on_404 = backoff_on_404
         self.backoff_on_failure = backoff_on_failure
+        self.backoff_on_all_error_codes = backoff_on_all_error_codes
 
         self.notifier = notifier
         self.replication_client = replication_client
@@ -179,6 +184,7 @@ class RetryDestinationLimiter:
         exc_val: Optional[BaseException],
         exc_tb: Optional[TracebackType],
     ) -> None:
+        success = exc_type is None
         valid_err_code = False
         if exc_type is None:
             valid_err_code = True
@@ -195,7 +201,9 @@ class RetryDestinationLimiter:
             # won't accept our requests for at least a while.
             # 429 is us being aggressively rate limited, so lets rate limit
             # ourselves.
-            if exc_val.code == 404 and self.backoff_on_404:
+            if self.backoff_on_all_error_codes:
+                valid_err_code = False
+            elif exc_val.code == 404 and self.backoff_on_404:
                 valid_err_code = False
             elif exc_val.code in (401, 429):
                 valid_err_code = False
@@ -204,7 +212,7 @@ class RetryDestinationLimiter:
             else:
                 valid_err_code = False
 
-        if valid_err_code:
+        if success:
             # We connected successfully.
             if not self.retry_interval:
                 return
@@ -215,6 +223,12 @@ class RetryDestinationLimiter:
             self.failure_ts = None
             retry_last_ts = 0
             self.retry_interval = 0
+        elif valid_err_code:
+            # We got a potentially valid error code back. We don't reset the
+            # timers though, as the other side might actually be down anyway
+            # (e.g. some deprovisioned servers will always return a 404 or 403,
+            # and we don't want to keep resetting the retry timers for them).
+            return
         elif not self.backoff_on_failure:
             return
         else:
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 2a295da3a0..43c513b157 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -251,8 +251,8 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
             ),
             json_data_callback=ANY,
             long_retries=True,
-            backoff_on_404=True,
             try_trailing_slash_on_400=True,
+            backoff_on_all_error_codes=True,
         )
 
     def test_started_typing_remote_recv(self) -> None:
@@ -366,7 +366,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
             ),
             json_data_callback=ANY,
             long_retries=True,
-            backoff_on_404=True,
+            backoff_on_all_error_codes=True,
             try_trailing_slash_on_400=True,
         )
 
-- 
cgit 1.5.1


From dcb27783417a1161c484525afb839233299b847f Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Mon, 4 Sep 2023 18:13:28 +0200
Subject: Add last_seen_ts to the admin users API (#16218)

---
 changelog.d/16218.feature                      |  1 +
 docs/admin_api/user_admin_api.md               |  2 +
 synapse/handlers/admin.py                      |  1 +
 synapse/rest/admin/users.py                    |  1 +
 synapse/storage/databases/main/__init__.py     |  6 ++-
 synapse/storage/databases/main/registration.py |  7 ++-
 synapse/storage/databases/main/stats.py        |  1 +
 synapse/types/__init__.py                      |  2 +
 tests/rest/admin/test_user.py                  | 60 ++++++++++++++++++++++++++
 tests/storage/test_registration.py             |  1 +
 10 files changed, 80 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16218.feature

(limited to 'synapse')

diff --git a/changelog.d/16218.feature b/changelog.d/16218.feature
new file mode 100644
index 0000000000..4afd092e88
--- /dev/null
+++ b/changelog.d/16218.feature
@@ -0,0 +1 @@
+Add `last_seen_ts` to the admin users API.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 8032e05497..975a7a0da4 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -242,6 +242,7 @@ The following parameters should be set in the URL:
   - `displayname` - Users are ordered alphabetically by `displayname`.
   - `avatar_url` - Users are ordered alphabetically by avatar URL.
   - `creation_ts` - Users are ordered by when the users was created in ms.
+  - `last_seen_ts` - Users are ordered by when the user was lastly seen in ms.
 
 - `dir` - Direction of media order. Either `f` for forwards or `b` for backwards.
   Setting this value to `b` will reverse the above sort order. Defaults to `f`.
@@ -272,6 +273,7 @@ The following fields are returned in the JSON response body:
   - `displayname` - string - The user's display name if they have set one.
   - `avatar_url` - string -  The user's avatar URL if they have set one.
   - `creation_ts` - integer - The user's creation timestamp in ms.
+  - `last_seen_ts` - integer - The user's last activity timestamp in ms.
 
 - `next_token`: string representing a positive integer - Indication for pagination. See above.
 - `total` - integer - Total number of media.
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index 0e812a6d8b..2f0e5f3b0a 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -76,6 +76,7 @@ class AdminHandler:
             "consent_ts",
             "user_type",
             "is_guest",
+            "last_seen_ts",
         }
 
         if self._msc3866_enabled:
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 625a47ec1a..91898a5c13 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -132,6 +132,7 @@ class UsersRestServletV2(RestServlet):
                 UserSortOrder.AVATAR_URL.value,
                 UserSortOrder.SHADOW_BANNED.value,
                 UserSortOrder.CREATION_TS.value,
+                UserSortOrder.LAST_SEEN_TS.value,
             ),
         )
 
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index a85633efcd..0836e247ef 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -277,6 +277,10 @@ class DataStore(
                 FROM users as u
                 LEFT JOIN profiles AS p ON u.name = p.full_user_id
                 LEFT JOIN erased_users AS eu ON u.name = eu.user_id
+                LEFT JOIN (
+                    SELECT user_id, MAX(last_seen) AS last_seen_ts
+                    FROM user_ips GROUP BY user_id
+                ) ls ON u.name = ls.user_id
                 {where_clause}
                 """
             sql = "SELECT COUNT(*) as total_users " + sql_base
@@ -286,7 +290,7 @@ class DataStore(
             sql = f"""
                 SELECT name, user_type, is_guest, admin, deactivated, shadow_banned,
                 displayname, avatar_url, creation_ts * 1000 as creation_ts, approved,
-                eu.user_id is not null as erased
+                eu.user_id is not null as erased, last_seen_ts
                 {sql_base}
                 ORDER BY {order_by_column} {order}, u.name ASC
                 LIMIT ? OFFSET ?
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index d3a01d526f..7e85b73e8e 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -206,8 +206,12 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
                     consent_server_notice_sent, appservice_id, creation_ts, user_type,
                     deactivated, COALESCE(shadow_banned, FALSE) AS shadow_banned,
                     COALESCE(approved, TRUE) AS approved,
-                    COALESCE(locked, FALSE) AS locked
+                    COALESCE(locked, FALSE) AS locked, last_seen_ts
                 FROM users
+                LEFT JOIN (
+                    SELECT user_id, MAX(last_seen) AS last_seen_ts
+                    FROM user_ips GROUP BY user_id
+                ) ls ON users.name = ls.user_id
                 WHERE name = ?
                 """,
                 (user_id,),
@@ -268,6 +272,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             is_shadow_banned=bool(user_data["shadow_banned"]),
             user_id=UserID.from_string(user_data["name"]),
             user_type=user_data["user_type"],
+            last_seen_ts=user_data["last_seen_ts"],
         )
 
     async def is_trial_user(self, user_id: str) -> bool:
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 6298f0984d..3a2966b9e4 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -107,6 +107,7 @@ class UserSortOrder(Enum):
     AVATAR_URL = "avatar_url"
     SHADOW_BANNED = "shadow_banned"
     CREATION_TS = "creation_ts"
+    LAST_SEEN_TS = "last_seen_ts"
 
 
 class StatsStore(StateDeltasStore):
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index e750417189..488714f60c 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -946,6 +946,7 @@ class UserInfo:
         is_guest:  True if the user is a guest user.
         is_shadow_banned:  True if the user has been shadow-banned.
         user_type:  User type (None for normal user, 'support' and 'bot' other options).
+        last_seen_ts:  Last activity timestamp of the user.
     """
 
     user_id: UserID
@@ -958,6 +959,7 @@ class UserInfo:
     is_deactivated: bool
     is_guest: bool
     is_shadow_banned: bool
+    last_seen_ts: Optional[int]
 
 
 class UserProfile(TypedDict):
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 2f6bd0d74f..761871b933 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -40,6 +40,7 @@ from synapse.rest.client import (
     user_directory,
 )
 from synapse.server import HomeServer
+from synapse.storage.databases.main.client_ips import LAST_SEEN_GRANULARITY
 from synapse.types import JsonDict, UserID, create_requester
 from synapse.util import Clock
 
@@ -456,6 +457,7 @@ class UsersListTestCase(unittest.HomeserverTestCase):
     servlets = [
         synapse.rest.admin.register_servlets,
         login.register_servlets,
+        room.register_servlets,
     ]
     url = "/_synapse/admin/v2/users"
 
@@ -506,6 +508,62 @@ class UsersListTestCase(unittest.HomeserverTestCase):
         # Check that all fields are available
         self._check_fields(channel.json_body["users"])
 
+    def test_last_seen(self) -> None:
+        """
+        Test that last_seen_ts field is properly working.
+        """
+        user1 = self.register_user("u1", "pass")
+        user1_token = self.login("u1", "pass")
+        user2 = self.register_user("u2", "pass")
+        user2_token = self.login("u2", "pass")
+        user3 = self.register_user("u3", "pass")
+        user3_token = self.login("u3", "pass")
+
+        self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok)
+        self.reactor.advance(10)
+        self.helper.create_room_as(user2, tok=user2_token)
+        self.reactor.advance(10)
+        self.helper.create_room_as(user1, tok=user1_token)
+        self.reactor.advance(10)
+        self.helper.create_room_as(user3, tok=user3_token)
+        self.reactor.advance(10)
+
+        channel = self.make_request(
+            "GET",
+            self.url,
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(4, len(channel.json_body["users"]))
+        self.assertEqual(4, channel.json_body["total"])
+
+        admin_last_seen = channel.json_body["users"][0]["last_seen_ts"]
+        user1_last_seen = channel.json_body["users"][1]["last_seen_ts"]
+        user2_last_seen = channel.json_body["users"][2]["last_seen_ts"]
+        user3_last_seen = channel.json_body["users"][3]["last_seen_ts"]
+        self.assertTrue(admin_last_seen > 0 and admin_last_seen < 10000)
+        self.assertTrue(user2_last_seen > 10000 and user2_last_seen < 20000)
+        self.assertTrue(user1_last_seen > 20000 and user1_last_seen < 30000)
+        self.assertTrue(user3_last_seen > 30000 and user3_last_seen < 40000)
+
+        self._order_test([self.admin_user, user2, user1, user3], "last_seen_ts")
+
+        self.reactor.advance(LAST_SEEN_GRANULARITY / 1000)
+        self.helper.create_room_as(user1, tok=user1_token)
+        self.reactor.advance(10)
+
+        channel = self.make_request(
+            "GET",
+            self.url + "/" + user1,
+            access_token=self.admin_user_tok,
+        )
+        self.assertTrue(
+            channel.json_body["last_seen_ts"] > 40000 + LAST_SEEN_GRANULARITY
+        )
+
+        self._order_test([self.admin_user, user2, user3, user1], "last_seen_ts")
+
     def test_search_term(self) -> None:
         """Test that searching for a users works correctly"""
 
@@ -1135,6 +1193,7 @@ class UsersListTestCase(unittest.HomeserverTestCase):
             self.assertIn("displayname", u)
             self.assertIn("avatar_url", u)
             self.assertIn("creation_ts", u)
+            self.assertIn("last_seen_ts", u)
 
     def _create_users(self, number_users: int) -> None:
         """
@@ -3035,6 +3094,7 @@ class UserRestTestCase(unittest.HomeserverTestCase):
         self.assertIn("consent_version", content)
         self.assertIn("consent_ts", content)
         self.assertIn("external_ids", content)
+        self.assertIn("last_seen_ts", content)
 
         # This key was removed intentionally. Ensure it is not accidentally re-included.
         self.assertNotIn("password_hash", content)
diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py
index ba41459d08..95c9792d54 100644
--- a/tests/storage/test_registration.py
+++ b/tests/storage/test_registration.py
@@ -51,6 +51,7 @@ class RegistrationStoreTestCase(HomeserverTestCase):
                 "locked": 0,
                 "shadow_banned": 0,
                 "approved": 1,
+                "last_seen_ts": None,
             },
             (self.get_success(self.store.get_user_by_id(self.user_id))),
         )
-- 
cgit 1.5.1


From d35bed8369514fe727b4fe1afb68f48cc8b2655a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Mon, 4 Sep 2023 17:14:09 +0100
Subject: Don't wake up destination transaction queue if they're not due for
 retry. (#16223)

---
 changelog.d/16223.feature                          |  1 +
 synapse/federation/send_queue.py                   | 12 +--
 synapse/federation/sender/__init__.py              | 86 +++++++++++++++-------
 synapse/federation/sender/per_destination_queue.py |  6 +-
 synapse/handlers/device.py                         | 26 +++----
 synapse/handlers/devicemessage.py                  |  7 +-
 synapse/handlers/presence.py                       | 16 ++--
 synapse/handlers/typing.py                         | 14 +++-
 synapse/module_api/__init__.py                     |  2 +-
 synapse/replication/tcp/client.py                  |  8 +-
 synapse/storage/databases/main/transactions.py     | 26 ++++++-
 synapse/util/retryutils.py                         | 25 +++++++
 tests/federation/test_federation_sender.py         | 27 ++++---
 tests/handlers/test_presence.py                    | 60 ++++++++++++---
 tests/handlers/test_typing.py                      |  2 -
 15 files changed, 228 insertions(+), 90 deletions(-)
 create mode 100644 changelog.d/16223.feature

(limited to 'synapse')

diff --git a/changelog.d/16223.feature b/changelog.d/16223.feature
new file mode 100644
index 0000000000..a52d66658b
--- /dev/null
+++ b/changelog.d/16223.feature
@@ -0,0 +1 @@
+Improve resource usage when sending data to a large number of remote hosts that are marked as "down".
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index fb448f2155..6520795635 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -49,7 +49,7 @@ from synapse.api.presence import UserPresenceState
 from synapse.federation.sender import AbstractFederationSender, FederationSender
 from synapse.metrics import LaterGauge
 from synapse.replication.tcp.streams.federation import FederationStream
-from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
+from synapse.types import JsonDict, ReadReceipt, RoomStreamToken, StrCollection
 from synapse.util.metrics import Measure
 
 from .units import Edu
@@ -229,7 +229,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
         """
         # nothing to do here: the replication listener will handle it.
 
-    def send_presence_to_destinations(
+    async def send_presence_to_destinations(
         self, states: Iterable[UserPresenceState], destinations: Iterable[str]
     ) -> None:
         """As per FederationSender
@@ -245,7 +245,9 @@ class FederationRemoteSendQueue(AbstractFederationSender):
 
         self.notifier.on_new_replication_data()
 
-    def send_device_messages(self, destination: str, immediate: bool = True) -> None:
+    async def send_device_messages(
+        self, destinations: StrCollection, immediate: bool = True
+    ) -> None:
         """As per FederationSender"""
         # We don't need to replicate this as it gets sent down a different
         # stream.
@@ -463,7 +465,7 @@ class ParsedFederationStreamData:
     edus: Dict[str, List[Edu]]
 
 
-def process_rows_for_federation(
+async def process_rows_for_federation(
     transaction_queue: FederationSender,
     rows: List[FederationStream.FederationStreamRow],
 ) -> None:
@@ -496,7 +498,7 @@ def process_rows_for_federation(
         parsed_row.add_to_buffer(buff)
 
     for state, destinations in buff.presence_destinations:
-        transaction_queue.send_presence_to_destinations(
+        await transaction_queue.send_presence_to_destinations(
             states=[state], destinations=destinations
         )
 
diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py
index 97abbdee18..fb20fd8a10 100644
--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
@@ -147,7 +147,10 @@ from twisted.internet import defer
 import synapse.metrics
 from synapse.api.presence import UserPresenceState
 from synapse.events import EventBase
-from synapse.federation.sender.per_destination_queue import PerDestinationQueue
+from synapse.federation.sender.per_destination_queue import (
+    CATCHUP_RETRY_INTERVAL,
+    PerDestinationQueue,
+)
 from synapse.federation.sender.transaction_manager import TransactionManager
 from synapse.federation.units import Edu
 from synapse.logging.context import make_deferred_yieldable, run_in_background
@@ -161,9 +164,10 @@ from synapse.metrics.background_process_metrics import (
     run_as_background_process,
     wrap_as_background_process,
 )
-from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
+from synapse.types import JsonDict, ReadReceipt, RoomStreamToken, StrCollection
 from synapse.util import Clock
 from synapse.util.metrics import Measure
+from synapse.util.retryutils import filter_destinations_by_retry_limiter
 
 if TYPE_CHECKING:
     from synapse.events.presence_router import PresenceRouter
@@ -213,7 +217,7 @@ class AbstractFederationSender(metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def send_presence_to_destinations(
+    async def send_presence_to_destinations(
         self, states: Iterable[UserPresenceState], destinations: Iterable[str]
     ) -> None:
         """Send the given presence states to the given destinations.
@@ -242,9 +246,11 @@ class AbstractFederationSender(metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def send_device_messages(self, destination: str, immediate: bool = True) -> None:
+    async def send_device_messages(
+        self, destinations: StrCollection, immediate: bool = True
+    ) -> None:
         """Tells the sender that a new device message is ready to be sent to the
-        destination. The `immediate` flag specifies whether the messages should
+        destinations. The `immediate` flag specifies whether the messages should
         be tried to be sent immediately, or whether it can be delayed for a
         short while (to aid performance).
         """
@@ -716,6 +722,13 @@ class FederationSender(AbstractFederationSender):
             pdu.internal_metadata.stream_ordering,
         )
 
+        destinations = await filter_destinations_by_retry_limiter(
+            destinations,
+            clock=self.clock,
+            store=self.store,
+            retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
+        )
+
         for destination in destinations:
             self._get_per_destination_queue(destination).send_pdu(pdu)
 
@@ -763,12 +776,20 @@ class FederationSender(AbstractFederationSender):
         domains_set = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
             room_id
         )
-        domains = [
+        domains: StrCollection = [
             d
             for d in domains_set
             if not self.is_mine_server_name(d)
             and self._federation_shard_config.should_handle(self._instance_name, d)
         ]
+
+        domains = await filter_destinations_by_retry_limiter(
+            domains,
+            clock=self.clock,
+            store=self.store,
+            retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
+        )
+
         if not domains:
             return
 
@@ -816,7 +837,7 @@ class FederationSender(AbstractFederationSender):
         for queue in queues:
             queue.flush_read_receipts_for_room(room_id)
 
-    def send_presence_to_destinations(
+    async def send_presence_to_destinations(
         self, states: Iterable[UserPresenceState], destinations: Iterable[str]
     ) -> None:
         """Send the given presence states to the given destinations.
@@ -831,13 +852,20 @@ class FederationSender(AbstractFederationSender):
         for state in states:
             assert self.is_mine_id(state.user_id)
 
+        destinations = await filter_destinations_by_retry_limiter(
+            [
+                d
+                for d in destinations
+                if self._federation_shard_config.should_handle(self._instance_name, d)
+            ],
+            clock=self.clock,
+            store=self.store,
+            retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
+        )
+
         for destination in destinations:
             if self.is_mine_server_name(destination):
                 continue
-            if not self._federation_shard_config.should_handle(
-                self._instance_name, destination
-            ):
-                continue
 
             self._get_per_destination_queue(destination).send_presence(
                 states, start_loop=False
@@ -896,21 +924,29 @@ class FederationSender(AbstractFederationSender):
         else:
             queue.send_edu(edu)
 
-    def send_device_messages(self, destination: str, immediate: bool = True) -> None:
-        if self.is_mine_server_name(destination):
-            logger.warning("Not sending device update to ourselves")
-            return
-
-        if not self._federation_shard_config.should_handle(
-            self._instance_name, destination
-        ):
-            return
+    async def send_device_messages(
+        self, destinations: StrCollection, immediate: bool = True
+    ) -> None:
+        destinations = await filter_destinations_by_retry_limiter(
+            [
+                destination
+                for destination in destinations
+                if self._federation_shard_config.should_handle(
+                    self._instance_name, destination
+                )
+                and not self.is_mine_server_name(destination)
+            ],
+            clock=self.clock,
+            store=self.store,
+            retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
+        )
 
-        if immediate:
-            self._get_per_destination_queue(destination).attempt_new_transaction()
-        else:
-            self._get_per_destination_queue(destination).mark_new_data()
-            self._destination_wakeup_queue.add_to_queue(destination)
+        for destination in destinations:
+            if immediate:
+                self._get_per_destination_queue(destination).attempt_new_transaction()
+            else:
+                self._get_per_destination_queue(destination).mark_new_data()
+                self._destination_wakeup_queue.add_to_queue(destination)
 
     def wake_destination(self, destination: str) -> None:
         """Called when we want to retry sending transactions to a remote.
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index 31c5c2b7de..9105ba664c 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -59,6 +59,10 @@ sent_edus_by_type = Counter(
 )
 
 
+# If the retry interval is larger than this then we enter "catchup" mode
+CATCHUP_RETRY_INTERVAL = 60 * 60 * 1000
+
+
 class PerDestinationQueue:
     """
     Manages the per-destination transmission queues.
@@ -370,7 +374,7 @@ class PerDestinationQueue:
                 ),
             )
 
-            if e.retry_interval > 60 * 60 * 1000:
+            if e.retry_interval > CATCHUP_RETRY_INTERVAL:
                 # we won't retry for another hour!
                 # (this suggests a significant outage)
                 # We drop pending EDUs because otherwise they will
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 5ae427d52c..763f56dfc1 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -836,17 +836,16 @@ class DeviceHandler(DeviceWorkerHandler):
                             user_id,
                             hosts,
                         )
-                        for host in hosts:
-                            self.federation_sender.send_device_messages(
-                                host, immediate=False
-                            )
-                            # TODO: when called, this isn't in a logging context.
-                            # This leads to log spam, sentry event spam, and massive
-                            # memory usage.
-                            # See https://github.com/matrix-org/synapse/issues/12552.
-                            # log_kv(
-                            #     {"message": "sent device update to host", "host": host}
-                            # )
+                        await self.federation_sender.send_device_messages(
+                            hosts, immediate=False
+                        )
+                        # TODO: when called, this isn't in a logging context.
+                        # This leads to log spam, sentry event spam, and massive
+                        # memory usage.
+                        # See https://github.com/matrix-org/synapse/issues/12552.
+                        # log_kv(
+                        #     {"message": "sent device update to host", "host": host}
+                        # )
 
                     if current_stream_id != stream_id:
                         # Clear the set of hosts we've already sent to as we're
@@ -951,8 +950,9 @@ class DeviceHandler(DeviceWorkerHandler):
 
         # Notify things that device lists need to be sent out.
         self.notifier.notify_replication()
-        for host in potentially_changed_hosts:
-            self.federation_sender.send_device_messages(host, immediate=False)
+        await self.federation_sender.send_device_messages(
+            potentially_changed_hosts, immediate=False
+        )
 
 
 def _update_device_from_client_ips(
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index 798c7039f9..1c79f7a61e 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -302,10 +302,9 @@ class DeviceMessageHandler:
         )
 
         if self.federation_sender:
-            for destination in remote_messages.keys():
-                # Enqueue a new federation transaction to send the new
-                # device messages to each remote destination.
-                self.federation_sender.send_device_messages(destination)
+            # Enqueue a new federation transaction to send the new
+            # device messages to each remote destination.
+            await self.federation_sender.send_device_messages(remote_messages.keys())
 
     async def get_events_for_dehydrated_device(
         self,
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 2f841863ae..f31e18328b 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -354,7 +354,9 @@ class BasePresenceHandler(abc.ABC):
         )
 
         for destination, host_states in hosts_to_states.items():
-            self._federation.send_presence_to_destinations(host_states, [destination])
+            await self._federation.send_presence_to_destinations(
+                host_states, [destination]
+            )
 
     async def send_full_presence_to_users(self, user_ids: StrCollection) -> None:
         """
@@ -936,7 +938,7 @@ class PresenceHandler(BasePresenceHandler):
                 )
 
                 for destination, states in hosts_to_states.items():
-                    self._federation_queue.send_presence_to_destinations(
+                    await self._federation_queue.send_presence_to_destinations(
                         states, [destination]
                     )
 
@@ -1508,7 +1510,7 @@ class PresenceHandler(BasePresenceHandler):
                 or state.status_msg is not None
             ]
 
-            self._federation_queue.send_presence_to_destinations(
+            await self._federation_queue.send_presence_to_destinations(
                 destinations=newly_joined_remote_hosts,
                 states=states,
             )
@@ -1519,7 +1521,7 @@ class PresenceHandler(BasePresenceHandler):
             prev_remote_hosts or newly_joined_remote_hosts
         ):
             local_states = await self.current_state_for_users(newly_joined_local_users)
-            self._federation_queue.send_presence_to_destinations(
+            await self._federation_queue.send_presence_to_destinations(
                 destinations=prev_remote_hosts | newly_joined_remote_hosts,
                 states=list(local_states.values()),
             )
@@ -2182,7 +2184,7 @@ class PresenceFederationQueue:
         index = bisect(self._queue, (clear_before,))
         self._queue = self._queue[index:]
 
-    def send_presence_to_destinations(
+    async def send_presence_to_destinations(
         self, states: Collection[UserPresenceState], destinations: StrCollection
     ) -> None:
         """Send the presence states to the given destinations.
@@ -2202,7 +2204,7 @@ class PresenceFederationQueue:
             return
 
         if self._federation:
-            self._federation.send_presence_to_destinations(
+            await self._federation.send_presence_to_destinations(
                 states=states,
                 destinations=destinations,
             )
@@ -2325,7 +2327,7 @@ class PresenceFederationQueue:
 
         for host, user_ids in hosts_to_users.items():
             states = await self._presence_handler.current_state_for_users(user_ids)
-            self._federation.send_presence_to_destinations(
+            await self._federation.send_presence_to_destinations(
                 states=states.values(),
                 destinations=[host],
             )
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 7aeae5319c..4b4227003d 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -26,9 +26,10 @@ from synapse.metrics.background_process_metrics import (
 )
 from synapse.replication.tcp.streams import TypingStream
 from synapse.streams import EventSource
-from synapse.types import JsonDict, Requester, StreamKeyType, UserID
+from synapse.types import JsonDict, Requester, StrCollection, StreamKeyType, UserID
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 from synapse.util.metrics import Measure
+from synapse.util.retryutils import filter_destinations_by_retry_limiter
 from synapse.util.wheel_timer import WheelTimer
 
 if TYPE_CHECKING:
@@ -150,8 +151,15 @@ class FollowerTypingHandler:
                 now=now, obj=member, then=now + FEDERATION_PING_INTERVAL
             )
 
-            hosts = await self._storage_controllers.state.get_current_hosts_in_room(
-                member.room_id
+            hosts: StrCollection = (
+                await self._storage_controllers.state.get_current_hosts_in_room(
+                    member.room_id
+                )
+            )
+            hosts = await filter_destinations_by_retry_limiter(
+                hosts,
+                clock=self.clock,
+                store=self.store,
             )
             for domain in hosts:
                 if not self.is_mine_server_name(domain):
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 9ad8e038ae..2f00a7ba20 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -1180,7 +1180,7 @@ class ModuleApi:
 
             # Send to remote destinations.
             destination = UserID.from_string(user).domain
-            presence_handler.get_federation_queue().send_presence_to_destinations(
+            await presence_handler.get_federation_queue().send_presence_to_destinations(
                 presence_events, [destination]
             )
 
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 3b88dc68ea..51285e6d33 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -422,7 +422,7 @@ class FederationSenderHandler:
         # The federation stream contains things that we want to send out, e.g.
         # presence, typing, etc.
         if stream_name == "federation":
-            send_queue.process_rows_for_federation(self.federation_sender, rows)
+            await send_queue.process_rows_for_federation(self.federation_sender, rows)
             await self.update_token(token)
 
         # ... and when new receipts happen
@@ -439,16 +439,14 @@ class FederationSenderHandler:
                 for row in rows
                 if not row.entity.startswith("@") and not row.is_signature
             }
-            for host in hosts:
-                self.federation_sender.send_device_messages(host, immediate=False)
+            await self.federation_sender.send_device_messages(hosts, immediate=False)
 
         elif stream_name == ToDeviceStream.NAME:
             # The to_device stream includes stuff to be pushed to both local
             # clients and remote servers, so we ignore entities that start with
             # '@' (since they'll be local users rather than destinations).
             hosts = {row.entity for row in rows if not row.entity.startswith("@")}
-            for host in hosts:
-                self.federation_sender.send_device_messages(host)
+            await self.federation_sender.send_device_messages(hosts)
 
     async def _on_new_receipts(
         self, rows: Iterable[ReceiptsStream.ReceiptsStreamRow]
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index 860bbf7c0f..efd21b5bfc 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -14,7 +14,7 @@
 
 import logging
 from enum import Enum
-from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, cast
 
 import attr
 from canonicaljson import encode_canonical_json
@@ -28,8 +28,8 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
-from synapse.types import JsonDict
-from synapse.util.caches.descriptors import cached
+from synapse.types import JsonDict, StrCollection
+from synapse.util.caches.descriptors import cached, cachedList
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -205,6 +205,26 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
         else:
             return None
 
+    @cachedList(
+        cached_method_name="get_destination_retry_timings", list_name="destinations"
+    )
+    async def get_destination_retry_timings_batch(
+        self, destinations: StrCollection
+    ) -> Dict[str, Optional[DestinationRetryTimings]]:
+        rows = await self.db_pool.simple_select_many_batch(
+            table="destinations",
+            iterable=destinations,
+            column="destination",
+            retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"),
+            desc="get_destination_retry_timings_batch",
+        )
+
+        return {
+            row.pop("destination"): DestinationRetryTimings(**row)
+            for row in rows
+            if row["retry_last_ts"] and row["failure_ts"] and row["retry_interval"]
+        }
+
     async def set_destination_retry_timings(
         self,
         destination: str,
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index 9d2065372c..0e1f907667 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -19,6 +19,7 @@ from typing import TYPE_CHECKING, Any, Optional, Type
 from synapse.api.errors import CodeMessageException
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage import DataStore
+from synapse.types import StrCollection
 from synapse.util import Clock
 
 if TYPE_CHECKING:
@@ -116,6 +117,30 @@ async def get_retry_limiter(
     )
 
 
+async def filter_destinations_by_retry_limiter(
+    destinations: StrCollection,
+    clock: Clock,
+    store: DataStore,
+    retry_due_within_ms: int = 0,
+) -> StrCollection:
+    """Filter down the list of destinations to only those that will are either
+    alive or due for a retry (within `retry_due_within_ms`)
+    """
+    if not destinations:
+        return destinations
+
+    retry_timings = await store.get_destination_retry_timings_batch(destinations)
+
+    now = int(clock.time_msec())
+
+    return [
+        destination
+        for destination, timings in retry_timings.items()
+        if timings is None
+        or timings.retry_last_ts + timings.retry_interval <= now + retry_due_within_ms
+    ]
+
+
 class RetryDestinationLimiter:
     def __init__(
         self,
diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py
index 7bd3d06859..caf04b54cb 100644
--- a/tests/federation/test_federation_sender.py
+++ b/tests/federation/test_federation_sender.py
@@ -75,7 +75,7 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
             thread_id=None,
             data={"ts": 1234},
         )
-        self.successResultOf(defer.ensureDeferred(sender.send_read_receipt(receipt)))
+        self.get_success(sender.send_read_receipt(receipt))
 
         self.pump()
 
@@ -111,6 +111,9 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
         # * The same room / user on multiple threads.
         # * A different user in the same room.
         sender = self.hs.get_federation_sender()
+        # Hack so that we have a txn in-flight so we batch up read receipts
+        # below
+        sender.wake_destination("host2")
         for user, thread in (
             ("alice", None),
             ("alice", "thread"),
@@ -125,9 +128,7 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
                 thread_id=thread,
                 data={"ts": 1234},
             )
-            self.successResultOf(
-                defer.ensureDeferred(sender.send_read_receipt(receipt))
-            )
+            defer.ensureDeferred(sender.send_read_receipt(receipt))
 
         self.pump()
 
@@ -191,7 +192,7 @@ class FederationSenderReceiptsTestCases(HomeserverTestCase):
             thread_id=None,
             data={"ts": 1234},
         )
-        self.successResultOf(defer.ensureDeferred(sender.send_read_receipt(receipt)))
+        self.get_success(sender.send_read_receipt(receipt))
 
         self.pump()
 
@@ -342,7 +343,9 @@ class FederationSenderDevicesTestCases(HomeserverTestCase):
         self.reactor.advance(1)
 
         # a second call should produce no new device EDUs
-        self.hs.get_federation_sender().send_device_messages("host2")
+        self.get_success(
+            self.hs.get_federation_sender().send_device_messages(["host2"])
+        )
         self.assertEqual(self.edus, [])
 
         # a second device
@@ -550,7 +553,9 @@ class FederationSenderDevicesTestCases(HomeserverTestCase):
 
         # recover the server
         mock_send_txn.side_effect = self.record_transaction
-        self.hs.get_federation_sender().send_device_messages("host2")
+        self.get_success(
+            self.hs.get_federation_sender().send_device_messages(["host2"])
+        )
 
         # We queue up device list updates to be sent over federation, so we
         # advance to clear the queue.
@@ -601,7 +606,9 @@ class FederationSenderDevicesTestCases(HomeserverTestCase):
 
         # recover the server
         mock_send_txn.side_effect = self.record_transaction
-        self.hs.get_federation_sender().send_device_messages("host2")
+        self.get_success(
+            self.hs.get_federation_sender().send_device_messages(["host2"])
+        )
 
         # We queue up device list updates to be sent over federation, so we
         # advance to clear the queue.
@@ -656,7 +663,9 @@ class FederationSenderDevicesTestCases(HomeserverTestCase):
 
         # recover the server
         mock_send_txn.side_effect = self.record_transaction
-        self.hs.get_federation_sender().send_device_messages("host2")
+        self.get_success(
+            self.hs.get_federation_sender().send_device_messages(["host2"])
+        )
 
         # We queue up device list updates to be sent over federation, so we
         # advance to clear the queue.
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index a987267308..88a16193a3 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -909,8 +909,14 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
 
         prev_token = self.queue.get_current_token(self.instance_name)
 
-        self.queue.send_presence_to_destinations((state1, state2), ("dest1", "dest2"))
-        self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        self.get_success(
+            self.queue.send_presence_to_destinations(
+                (state1, state2), ("dest1", "dest2")
+            )
+        )
+        self.get_success(
+            self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        )
 
         now_token = self.queue.get_current_token(self.instance_name)
 
@@ -946,11 +952,17 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
 
         prev_token = self.queue.get_current_token(self.instance_name)
 
-        self.queue.send_presence_to_destinations((state1, state2), ("dest1", "dest2"))
+        self.get_success(
+            self.queue.send_presence_to_destinations(
+                (state1, state2), ("dest1", "dest2")
+            )
+        )
 
         now_token = self.queue.get_current_token(self.instance_name)
 
-        self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        self.get_success(
+            self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        )
 
         rows, upto_token, limited = self.get_success(
             self.queue.get_replication_rows("master", prev_token, now_token, 10)
@@ -989,8 +1001,14 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
 
         prev_token = self.queue.get_current_token(self.instance_name)
 
-        self.queue.send_presence_to_destinations((state1, state2), ("dest1", "dest2"))
-        self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        self.get_success(
+            self.queue.send_presence_to_destinations(
+                (state1, state2), ("dest1", "dest2")
+            )
+        )
+        self.get_success(
+            self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        )
 
         self.reactor.advance(10 * 60 * 1000)
 
@@ -1005,8 +1023,14 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
 
         prev_token = self.queue.get_current_token(self.instance_name)
 
-        self.queue.send_presence_to_destinations((state1, state2), ("dest1", "dest2"))
-        self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        self.get_success(
+            self.queue.send_presence_to_destinations(
+                (state1, state2), ("dest1", "dest2")
+            )
+        )
+        self.get_success(
+            self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        )
 
         now_token = self.queue.get_current_token(self.instance_name)
 
@@ -1033,11 +1057,17 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
 
         prev_token = self.queue.get_current_token(self.instance_name)
 
-        self.queue.send_presence_to_destinations((state1, state2), ("dest1", "dest2"))
+        self.get_success(
+            self.queue.send_presence_to_destinations(
+                (state1, state2), ("dest1", "dest2")
+            )
+        )
 
         self.reactor.advance(2 * 60 * 1000)
 
-        self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        self.get_success(
+            self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        )
 
         self.reactor.advance(4 * 60 * 1000)
 
@@ -1053,8 +1083,14 @@ class PresenceFederationQueueTestCase(unittest.HomeserverTestCase):
 
         prev_token = self.queue.get_current_token(self.instance_name)
 
-        self.queue.send_presence_to_destinations((state1, state2), ("dest1", "dest2"))
-        self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        self.get_success(
+            self.queue.send_presence_to_destinations(
+                (state1, state2), ("dest1", "dest2")
+            )
+        )
+        self.get_success(
+            self.queue.send_presence_to_destinations((state3,), ("dest3",))
+        )
 
         now_token = self.queue.get_current_token(self.instance_name)
 
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 43c513b157..95106ec8f3 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -120,8 +120,6 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase):
 
         self.datastore = hs.get_datastores().main
 
-        self.datastore.get_destination_retry_timings = AsyncMock(return_value=None)
-
         self.datastore.get_device_updates_by_remote = AsyncMock(  # type: ignore[method-assign]
             return_value=(0, [])
         )
-- 
cgit 1.5.1


From 757010905ea85333672289a0ac124d41bd923bb3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 5 Sep 2023 11:14:14 +0000
Subject: Bump twisted from 22.10.0 to 23.8.0 (#16235)

* Bump twisted from 22.10.0 to 23.8.0

Bumps [twisted](https://github.com/twisted/twisted) from 22.10.0 to 23.8.0.
- [Release notes](https://github.com/twisted/twisted/releases)
- [Changelog](https://github.com/twisted/twisted/blob/trunk/NEWS.rst)
- [Commits](https://github.com/twisted/twisted/compare/twisted-22.10.0...twisted-23.8.0)

---
updated-dependencies:
- dependency-name: twisted
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Fix types

* Fix lint

* Newsfile

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/16235.misc           |  1 +
 poetry.lock                      | 37 ++++++++++++++++++-------------------
 synapse/handlers/initial_sync.py |  8 ++------
 synapse/logging/context.py       |  4 ++--
 synapse/util/gai_resolver.py     |  2 +-
 5 files changed, 24 insertions(+), 28 deletions(-)
 create mode 100644 changelog.d/16235.misc

(limited to 'synapse')

diff --git a/changelog.d/16235.misc b/changelog.d/16235.misc
new file mode 100644
index 0000000000..b1533f93b6
--- /dev/null
+++ b/changelog.d/16235.misc
@@ -0,0 +1 @@
+Fix type checking when using the new version of Twisted.
diff --git a/poetry.lock b/poetry.lock
index 1cefabb358..872a863edc 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2866,44 +2866,43 @@ urllib3 = ">=1.26.0"
 
 [[package]]
 name = "twisted"
-version = "22.10.0"
+version = "23.8.0"
 description = "An asynchronous networking framework written in Python"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "Twisted-22.10.0-py3-none-any.whl", hash = "sha256:86c55f712cc5ab6f6d64e02503352464f0400f66d4f079096d744080afcccbd0"},
-    {file = "Twisted-22.10.0.tar.gz", hash = "sha256:32acbd40a94f5f46e7b42c109bfae2b302250945561783a8b7a059048f2d4d31"},
+    {file = "twisted-23.8.0-py3-none-any.whl", hash = "sha256:b8bdba145de120ffb36c20e6e071cce984e89fba798611ed0704216fb7f884cd"},
+    {file = "twisted-23.8.0.tar.gz", hash = "sha256:3c73360add17336a622c0d811c2a2ce29866b6e59b1125fd6509b17252098a24"},
 ]
 
 [package.dependencies]
-attrs = ">=19.2.0"
-Automat = ">=0.8.0"
+attrs = ">=21.3.0"
+automat = ">=0.8.0"
 constantly = ">=15.1"
 hyperlink = ">=17.1.1"
 idna = {version = ">=2.4", optional = true, markers = "extra == \"tls\""}
-incremental = ">=21.3.0"
+incremental = ">=22.10.0"
 pyopenssl = {version = ">=21.0.0", optional = true, markers = "extra == \"tls\""}
 service-identity = {version = ">=18.1.0", optional = true, markers = "extra == \"tls\""}
 twisted-iocpsupport = {version = ">=1.0.2,<2", markers = "platform_system == \"Windows\""}
-typing-extensions = ">=3.6.5"
-"zope.interface" = ">=4.4.2"
+typing-extensions = ">=3.10.0"
+zope-interface = ">=5"
 
 [package.extras]
-all-non-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)"]
-conch = ["appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "cryptography (>=2.6)", "pyasn1"]
-conch-nacl = ["PyNaCl", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "cryptography (>=2.6)", "pyasn1"]
+all-non-platform = ["twisted[conch,contextvars,http2,serial,test,tls]", "twisted[conch,contextvars,http2,serial,test,tls]"]
+conch = ["appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)"]
 contextvars = ["contextvars (>=2.4,<3)"]
-dev = ["coverage (>=6b1,<7)", "pydoctor (>=22.9.0,<22.10.0)", "pyflakes (>=2.2,<3.0)", "python-subunit (>=1.4,<2.0)", "readthedocs-sphinx-ext (>=2.1,<3.0)", "sphinx (>=5.0,<6)", "sphinx-rtd-theme (>=1.0,<2.0)", "towncrier (>=22.8,<23.0)", "twistedchecker (>=0.7,<1.0)"]
-dev-release = ["pydoctor (>=22.9.0,<22.10.0)", "readthedocs-sphinx-ext (>=2.1,<3.0)", "sphinx (>=5.0,<6)", "sphinx-rtd-theme (>=1.0,<2.0)", "towncrier (>=22.8,<23.0)"]
-gtk-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pygobject", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)"]
+dev = ["coverage (>=6b1,<7)", "pyflakes (>=2.2,<3.0)", "python-subunit (>=1.4,<2.0)", "twisted[dev-release]", "twistedchecker (>=0.7,<1.0)"]
+dev-release = ["pydoctor (>=23.4.0,<23.5.0)", "pydoctor (>=23.4.0,<23.5.0)", "readthedocs-sphinx-ext (>=2.2,<3.0)", "readthedocs-sphinx-ext (>=2.2,<3.0)", "sphinx (>=5,<7)", "sphinx (>=5,<7)", "sphinx-rtd-theme (>=1.2,<2.0)", "sphinx-rtd-theme (>=1.2,<2.0)", "towncrier (>=22.12,<23.0)", "towncrier (>=22.12,<23.0)", "urllib3 (<2)", "urllib3 (<2)"]
+gtk-platform = ["pygobject", "pygobject", "twisted[all-non-platform]", "twisted[all-non-platform]"]
 http2 = ["h2 (>=3.0,<5.0)", "priority (>=1.1.0,<2.0)"]
-macos-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyobjc-core", "pyobjc-framework-CFNetwork", "pyobjc-framework-Cocoa", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)"]
-mypy = ["PyHamcrest (>=1.9.0)", "PyNaCl", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "coverage (>=6b1,<7)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "mypy (==0.930)", "mypy-zope (==0.3.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pydoctor (>=22.9.0,<22.10.0)", "pyflakes (>=2.2,<3.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "python-subunit (>=1.4,<2.0)", "pywin32 (!=226)", "readthedocs-sphinx-ext (>=2.1,<3.0)", "service-identity (>=18.1.0)", "sphinx (>=5.0,<6)", "sphinx-rtd-theme (>=1.0,<2.0)", "towncrier (>=22.8,<23.0)", "twistedchecker (>=0.7,<1.0)", "types-pyOpenSSL", "types-setuptools"]
-osx-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyobjc-core", "pyobjc-framework-CFNetwork", "pyobjc-framework-Cocoa", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)"]
+macos-platform = ["pyobjc-core", "pyobjc-core", "pyobjc-framework-cfnetwork", "pyobjc-framework-cfnetwork", "pyobjc-framework-cocoa", "pyobjc-framework-cocoa", "twisted[all-non-platform]", "twisted[all-non-platform]"]
+mypy = ["mypy (==0.981)", "mypy-extensions (==0.4.3)", "mypy-zope (==0.3.11)", "twisted[all-non-platform,dev]", "types-pyopenssl", "types-setuptools"]
+osx-platform = ["twisted[macos-platform]", "twisted[macos-platform]"]
 serial = ["pyserial (>=3.0)", "pywin32 (!=226)"]
-test = ["PyHamcrest (>=1.9.0)", "cython-test-exception-raiser (>=1.0.2,<2)", "hypothesis (>=6.0,<7.0)"]
+test = ["cython-test-exception-raiser (>=1.0.2,<2)", "hypothesis (>=6.56)", "pyhamcrest (>=2)"]
 tls = ["idna (>=2.4)", "pyopenssl (>=21.0.0)", "service-identity (>=18.1.0)"]
-windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)"]
+windows-platform = ["pywin32 (!=226)", "pywin32 (!=226)", "twisted[all-non-platform]", "twisted[all-non-platform]"]
 
 [[package]]
 name = "twisted-iocpsupport"
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index b3be7a86f0..5dc76ef588 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import TYPE_CHECKING, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, List, Optional, Tuple
 
 from synapse.api.constants import (
     AccountDataTypes,
@@ -23,7 +23,6 @@ from synapse.api.constants import (
     Membership,
 )
 from synapse.api.errors import SynapseError
-from synapse.events import EventBase
 from synapse.events.utils import SerializeEventConfig
 from synapse.events.validator import EventValidator
 from synapse.handlers.presence import format_user_presence_state
@@ -35,7 +34,6 @@ from synapse.types import (
     JsonDict,
     Requester,
     RoomStreamToken,
-    StateMap,
     StreamKeyType,
     StreamToken,
     UserID,
@@ -199,9 +197,7 @@ class InitialSyncHandler:
                     deferred_room_state = run_in_background(
                         self._state_storage_controller.get_state_for_events,
                         [event.event_id],
-                    ).addCallback(
-                        lambda states: cast(StateMap[EventBase], states[event.event_id])
-                    )
+                    ).addCallback(lambda states: states[event.event_id])
 
                 (messages, token), current_state = await make_deferred_yieldable(
                     gather_results(
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 64c6ae4512..bf7e311026 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -728,7 +728,7 @@ async def _unwrap_awaitable(awaitable: Awaitable[R]) -> R:
 
 
 @overload
-def preserve_fn(  # type: ignore[misc]
+def preserve_fn(
     f: Callable[P, Awaitable[R]],
 ) -> Callable[P, "defer.Deferred[R]"]:
     # The `type: ignore[misc]` above suppresses
@@ -756,7 +756,7 @@ def preserve_fn(
 
 
 @overload
-def run_in_background(  # type: ignore[misc]
+def run_in_background(
     f: Callable[P, Awaitable[R]], *args: P.args, **kwargs: P.kwargs
 ) -> "defer.Deferred[R]":
     # The `type: ignore[misc]` above suppresses
diff --git a/synapse/util/gai_resolver.py b/synapse/util/gai_resolver.py
index 214eb17fbc..fecf829ade 100644
--- a/synapse/util/gai_resolver.py
+++ b/synapse/util/gai_resolver.py
@@ -136,7 +136,7 @@ class GAIResolver:
 
     # The types on IHostnameResolver is incorrect in Twisted, see
     # https://twistedmatrix.com/trac/ticket/10276
-    def resolveHostName(  # type: ignore[override]
+    def resolveHostName(
         self,
         resolutionReceiver: IResolutionReceiver,
         hostName: str,
-- 
cgit 1.5.1


From ea75346f6af8c182a42d1ca29119a10361693a7b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 5 Sep 2023 09:58:51 -0400
Subject: Track presence state per-device and combine to a user state. (#16066)

Tracks presence on an individual per-device basis and combine
the per-device state into a per-user state. This should help in
situations where a user has multiple devices with conflicting status
(e.g. one is syncing with unavailable and one is syncing with online).

The tie-breaking is done by priority:

    BUSY > ONLINE > UNAVAILABLE > OFFLINE
---
 changelog.d/16066.bugfix        |   1 +
 changelog.d/16170.bugfix        |   1 +
 changelog.d/16170.misc          |   1 -
 changelog.d/16171.bugfix        |   1 +
 changelog.d/16171.misc          |   1 -
 changelog.d/16172.bugfix        |   1 +
 changelog.d/16172.misc          |   1 -
 synapse/api/presence.py         |  43 +++-
 synapse/handlers/presence.py    | 279 ++++++++++++++++++----
 tests/handlers/test_presence.py | 500 +++++++++++++++++++++++++++++++++++++++-
 10 files changed, 765 insertions(+), 64 deletions(-)
 create mode 100644 changelog.d/16066.bugfix
 create mode 100644 changelog.d/16170.bugfix
 delete mode 100644 changelog.d/16170.misc
 create mode 100644 changelog.d/16171.bugfix
 delete mode 100644 changelog.d/16171.misc
 create mode 100644 changelog.d/16172.bugfix
 delete mode 100644 changelog.d/16172.misc

(limited to 'synapse')

diff --git a/changelog.d/16066.bugfix b/changelog.d/16066.bugfix
new file mode 100644
index 0000000000..83649cf42a
--- /dev/null
+++ b/changelog.d/16066.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where multi-device accounts could cause high load due to presence.
diff --git a/changelog.d/16170.bugfix b/changelog.d/16170.bugfix
new file mode 100644
index 0000000000..83649cf42a
--- /dev/null
+++ b/changelog.d/16170.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where multi-device accounts could cause high load due to presence.
diff --git a/changelog.d/16170.misc b/changelog.d/16170.misc
deleted file mode 100644
index c950b54367..0000000000
--- a/changelog.d/16170.misc
+++ /dev/null
@@ -1 +0,0 @@
-Simplify presence code when using workers.
diff --git a/changelog.d/16171.bugfix b/changelog.d/16171.bugfix
new file mode 100644
index 0000000000..83649cf42a
--- /dev/null
+++ b/changelog.d/16171.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where multi-device accounts could cause high load due to presence.
diff --git a/changelog.d/16171.misc b/changelog.d/16171.misc
deleted file mode 100644
index 4d709cb56e..0000000000
--- a/changelog.d/16171.misc
+++ /dev/null
@@ -1 +0,0 @@
-Track per-device information in the presence code.
diff --git a/changelog.d/16172.bugfix b/changelog.d/16172.bugfix
new file mode 100644
index 0000000000..83649cf42a
--- /dev/null
+++ b/changelog.d/16172.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where multi-device accounts could cause high load due to presence.
diff --git a/changelog.d/16172.misc b/changelog.d/16172.misc
deleted file mode 100644
index 4d709cb56e..0000000000
--- a/changelog.d/16172.misc
+++ /dev/null
@@ -1 +0,0 @@
-Track per-device information in the presence code.
diff --git a/synapse/api/presence.py b/synapse/api/presence.py
index b80aa83cb3..b78f419994 100644
--- a/synapse/api/presence.py
+++ b/synapse/api/presence.py
@@ -20,18 +20,53 @@ from synapse.api.constants import PresenceState
 from synapse.types import JsonDict
 
 
+@attr.s(slots=True, auto_attribs=True)
+class UserDevicePresenceState:
+    """
+    Represents the current presence state of a user's device.
+
+    user_id: The user ID.
+    device_id: The user's device ID.
+    state: The presence state, see PresenceState.
+    last_active_ts: Time in msec that the device last interacted with server.
+    last_sync_ts: Time in msec that the device last *completed* a sync
+        (or event stream).
+    """
+
+    user_id: str
+    device_id: Optional[str]
+    state: str
+    last_active_ts: int
+    last_sync_ts: int
+
+    @classmethod
+    def default(
+        cls, user_id: str, device_id: Optional[str]
+    ) -> "UserDevicePresenceState":
+        """Returns a default presence state."""
+        return cls(
+            user_id=user_id,
+            device_id=device_id,
+            state=PresenceState.OFFLINE,
+            last_active_ts=0,
+            last_sync_ts=0,
+        )
+
+
 @attr.s(slots=True, frozen=True, auto_attribs=True)
 class UserPresenceState:
     """Represents the current presence state of the user.
 
-    user_id
-    last_active: Time in msec that the user last interacted with server.
-    last_federation_update: Time in msec since either a) we sent a presence
+    user_id: The user ID.
+    state: The presence state, see PresenceState.
+    last_active_ts: Time in msec that the user last interacted with server.
+    last_federation_update_ts: Time in msec since either a) we sent a presence
         update to other servers or b) we received a presence update, depending
         on if is a local user or not.
-    last_user_sync: Time in msec that the user last *completed* a sync
+    last_user_sync_ts: Time in msec that the user last *completed* a sync
         (or event stream).
     status_msg: User set status message.
+    currently_active: True if the user is currently syncing.
     """
 
     user_id: str
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index f31e18328b..80190838b7 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -13,13 +13,56 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""This module is responsible for keeping track of presence status of local
+"""
+This module is responsible for keeping track of presence status of local
 and remote users.
 
 The methods that define policy are:
     - PresenceHandler._update_states
     - PresenceHandler._handle_timeouts
     - should_notify
+
+# Tracking local presence
+
+For local users, presence is tracked on a per-device basis. When a user has multiple
+devices the user presence state is derived by coalescing the presence from each
+device:
+
+    BUSY > ONLINE > UNAVAILABLE > OFFLINE
+
+The time that each device was last active and last synced is tracked in order to
+automatically downgrade a device's presence state:
+
+    A device may move from ONLINE -> UNAVAILABLE, if it has not been active for
+    a period of time.
+
+    A device may go from any state -> OFFLINE, if it is not active and has not
+    synced for a period of time.
+
+The timeouts are handled using a wheel timer, which has coarse buckets. Timings
+do not need to be exact.
+
+Generally a device's presence state is updated whenever a user syncs (via the
+set_presence parameter), when the presence API is called, or if "pro-active"
+events occur, including:
+
+* Sending an event, receipt, read marker.
+* Updating typing status.
+
+The busy state has special status that it cannot is not downgraded by a call to
+sync with a lower priority state *and* it takes a long period of time to transition
+to offline.
+
+# Persisting (and restoring) presence
+
+For all users, presence is persisted on a per-user basis. Data is kept in-memory
+and persisted periodically. When Synapse starts each worker loads the current
+presence state and then tracks the presence stream to keep itself up-to-date.
+
+When restoring presence for local users a pseudo-device is created to match the
+user state; this device follows the normal timeout logic (see above) and will
+automatically be replaced with any information from currently available devices.
+
 """
 import abc
 import contextlib
@@ -30,6 +73,7 @@ from contextlib import contextmanager
 from types import TracebackType
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Any,
     Callable,
     Collection,
@@ -49,7 +93,7 @@ from prometheus_client import Counter
 import synapse.metrics
 from synapse.api.constants import EduTypes, EventTypes, Membership, PresenceState
 from synapse.api.errors import SynapseError
-from synapse.api.presence import UserPresenceState
+from synapse.api.presence import UserDevicePresenceState, UserPresenceState
 from synapse.appservice import ApplicationService
 from synapse.events.presence_router import PresenceRouter
 from synapse.logging.context import run_in_background
@@ -162,6 +206,7 @@ class BasePresenceHandler(abc.ABC):
             self.VALID_PRESENCE += (PresenceState.BUSY,)
 
         active_presence = self.store.take_presence_startup_info()
+        # The combined status across all user devices.
         self.user_to_current_state = {state.user_id: state for state in active_presence}
 
     @abc.abstractmethod
@@ -708,9 +753,27 @@ class PresenceHandler(BasePresenceHandler):
             lambda: len(self.user_to_current_state),
         )
 
+        # The per-device presence state, maps user to devices to per-device presence state.
+        self._user_to_device_to_current_state: Dict[
+            str, Dict[Optional[str], UserDevicePresenceState]
+        ] = {}
+
         now = self.clock.time_msec()
         if self._presence_enabled:
             for state in self.user_to_current_state.values():
+                # Create a psuedo-device to properly handle time outs. This will
+                # be overridden by any "real" devices within SYNC_ONLINE_TIMEOUT.
+                pseudo_device_id = None
+                self._user_to_device_to_current_state[state.user_id] = {
+                    pseudo_device_id: UserDevicePresenceState(
+                        user_id=state.user_id,
+                        device_id=pseudo_device_id,
+                        state=state.state,
+                        last_active_ts=state.last_active_ts,
+                        last_sync_ts=state.last_user_sync_ts,
+                    )
+                }
+
                 self.wheel_timer.insert(
                     now=now, obj=state.user_id, then=state.last_active_ts + IDLE_TIMER
                 )
@@ -752,7 +815,7 @@ class PresenceHandler(BasePresenceHandler):
 
         # Keeps track of the number of *ongoing* syncs on other processes.
         #
-        # While any sync is ongoing on another process the user will never
+        # While any sync is ongoing on another process the user's device will never
         # go offline.
         #
         # Each process has a unique identifier and an update frequency. If
@@ -981,22 +1044,21 @@ class PresenceHandler(BasePresenceHandler):
 
         timers_fired_counter.inc(len(states))
 
-        syncing_user_ids = {
-            user_id
-            for (user_id, _), count in self._user_device_to_num_current_syncs.items()
+        # Set of user ID & device IDs which are currently syncing.
+        syncing_user_devices = {
+            user_id_device_id
+            for user_id_device_id, count in self._user_device_to_num_current_syncs.items()
             if count
         }
-        syncing_user_ids.update(
-            user_id
-            for user_id, _ in itertools.chain(
-                *self.external_process_to_current_syncs.values()
-            )
+        syncing_user_devices.update(
+            itertools.chain(*self.external_process_to_current_syncs.values())
         )
 
         changes = handle_timeouts(
             states,
             is_mine_fn=self.is_mine_id,
-            syncing_user_ids=syncing_user_ids,
+            syncing_user_devices=syncing_user_devices,
+            user_to_devices=self._user_to_device_to_current_state,
             now=now,
         )
 
@@ -1016,11 +1078,26 @@ class PresenceHandler(BasePresenceHandler):
 
         bump_active_time_counter.inc()
 
-        prev_state = await self.current_state_for_user(user_id)
+        now = self.clock.time_msec()
+
+        # Update the device information & mark the device as online if it was
+        # unavailable.
+        devices = self._user_to_device_to_current_state.setdefault(user_id, {})
+        device_state = devices.setdefault(
+            device_id,
+            UserDevicePresenceState.default(user_id, device_id),
+        )
+        device_state.last_active_ts = now
+        if device_state.state == PresenceState.UNAVAILABLE:
+            device_state.state = PresenceState.ONLINE
 
-        new_fields: Dict[str, Any] = {"last_active_ts": self.clock.time_msec()}
-        if prev_state.state == PresenceState.UNAVAILABLE:
-            new_fields["state"] = PresenceState.ONLINE
+        # Update the user state, this will always update last_active_ts and
+        # might update the presence state.
+        prev_state = await self.current_state_for_user(user_id)
+        new_fields: Dict[str, Any] = {
+            "last_active_ts": now,
+            "state": _combine_device_states(devices.values()),
+        }
 
         await self._update_states([prev_state.copy_and_replace(**new_fields)])
 
@@ -1132,6 +1209,12 @@ class PresenceHandler(BasePresenceHandler):
             if is_syncing and (user_id, device_id) not in process_presence:
                 process_presence.add((user_id, device_id))
             elif not is_syncing and (user_id, device_id) in process_presence:
+                devices = self._user_to_device_to_current_state.setdefault(user_id, {})
+                device_state = devices.setdefault(
+                    device_id, UserDevicePresenceState.default(user_id, device_id)
+                )
+                device_state.last_sync_ts = sync_time_msec
+
                 new_state = prev_state.copy_and_replace(
                     last_user_sync_ts=sync_time_msec
                 )
@@ -1151,11 +1234,24 @@ class PresenceHandler(BasePresenceHandler):
             process_presence = self.external_process_to_current_syncs.pop(
                 process_id, set()
             )
-            prev_states = await self.current_state_for_users(
-                {user_id for user_id, device_id in process_presence}
-            )
+
             time_now_ms = self.clock.time_msec()
 
+            # Mark each device as having a last sync time.
+            updated_users = set()
+            for user_id, device_id in process_presence:
+                device_state = self._user_to_device_to_current_state.setdefault(
+                    user_id, {}
+                ).setdefault(
+                    device_id, UserDevicePresenceState.default(user_id, device_id)
+                )
+
+                device_state.last_sync_ts = time_now_ms
+                updated_users.add(user_id)
+
+            # Update each user (and insert into the appropriate timers to check if
+            # they've gone offline).
+            prev_states = await self.current_state_for_users(updated_users)
             await self._update_states(
                 [
                     prev_state.copy_and_replace(last_user_sync_ts=time_now_ms)
@@ -1277,6 +1373,20 @@ class PresenceHandler(BasePresenceHandler):
         if prev_state.state == PresenceState.BUSY and is_sync:
             presence = PresenceState.BUSY
 
+        # Update the device specific information.
+        devices = self._user_to_device_to_current_state.setdefault(user_id, {})
+        device_state = devices.setdefault(
+            device_id,
+            UserDevicePresenceState.default(user_id, device_id),
+        )
+        device_state.state = presence
+        device_state.last_active_ts = now
+        if is_sync:
+            device_state.last_sync_ts = now
+
+        # Based on the state of each user's device calculate the new presence state.
+        presence = _combine_device_states(devices.values())
+
         new_fields = {"state": presence}
 
         if presence == PresenceState.ONLINE or presence == PresenceState.BUSY:
@@ -1873,7 +1983,8 @@ class PresenceEventSource(EventSource[int, UserPresenceState]):
 def handle_timeouts(
     user_states: List[UserPresenceState],
     is_mine_fn: Callable[[str], bool],
-    syncing_user_ids: Set[str],
+    syncing_user_devices: AbstractSet[Tuple[str, Optional[str]]],
+    user_to_devices: Dict[str, Dict[Optional[str], UserDevicePresenceState]],
     now: int,
 ) -> List[UserPresenceState]:
     """Checks the presence of users that have timed out and updates as
@@ -1882,7 +1993,8 @@ def handle_timeouts(
     Args:
         user_states: List of UserPresenceState's to check.
         is_mine_fn: Function that returns if a user_id is ours
-        syncing_user_ids: Set of user_ids with active syncs.
+        syncing_user_devices: A set of (user ID, device ID) tuples with active syncs..
+        user_to_devices: A map of user ID to device ID to UserDevicePresenceState.
         now: Current time in ms.
 
     Returns:
@@ -1891,9 +2003,16 @@ def handle_timeouts(
     changes = {}  # Actual changes we need to notify people about
 
     for state in user_states:
-        is_mine = is_mine_fn(state.user_id)
-
-        new_state = handle_timeout(state, is_mine, syncing_user_ids, now)
+        user_id = state.user_id
+        is_mine = is_mine_fn(user_id)
+
+        new_state = handle_timeout(
+            state,
+            is_mine,
+            syncing_user_devices,
+            user_to_devices.get(user_id, {}),
+            now,
+        )
         if new_state:
             changes[state.user_id] = new_state
 
@@ -1901,14 +2020,19 @@ def handle_timeouts(
 
 
 def handle_timeout(
-    state: UserPresenceState, is_mine: bool, syncing_user_ids: Set[str], now: int
+    state: UserPresenceState,
+    is_mine: bool,
+    syncing_device_ids: AbstractSet[Tuple[str, Optional[str]]],
+    user_devices: Dict[Optional[str], UserDevicePresenceState],
+    now: int,
 ) -> Optional[UserPresenceState]:
     """Checks the presence of the user to see if any of the timers have elapsed
 
     Args:
-        state
+        state: UserPresenceState to check.
         is_mine: Whether the user is ours
-        syncing_user_ids: Set of user_ids with active syncs.
+        syncing_user_devices: A set of (user ID, device ID) tuples with active syncs..
+        user_devices: A map of device ID to UserDevicePresenceState.
         now: Current time in ms.
 
     Returns:
@@ -1919,34 +2043,55 @@ def handle_timeout(
         return None
 
     changed = False
-    user_id = state.user_id
 
     if is_mine:
-        if state.state == PresenceState.ONLINE:
-            if now - state.last_active_ts > IDLE_TIMER:
-                # Currently online, but last activity ages ago so auto
-                # idle
-                state = state.copy_and_replace(state=PresenceState.UNAVAILABLE)
-                changed = True
-            elif now - state.last_active_ts > LAST_ACTIVE_GRANULARITY:
-                # So that we send down a notification that we've
-                # stopped updating.
+        # Check per-device whether the device should be considered idle or offline
+        # due to timeouts.
+        device_changed = False
+        offline_devices = []
+        for device_id, device_state in user_devices.items():
+            if device_state.state == PresenceState.ONLINE:
+                if now - device_state.last_active_ts > IDLE_TIMER:
+                    # Currently online, but last activity ages ago so auto
+                    # idle
+                    device_state.state = PresenceState.UNAVAILABLE
+                    device_changed = True
+
+            # If there are have been no sync for a while (and none ongoing),
+            # set presence to offline.
+            if (state.user_id, device_id) not in syncing_device_ids:
+                # If the user has done something recently but hasn't synced,
+                # don't set them as offline.
+                sync_or_active = max(
+                    device_state.last_sync_ts, device_state.last_active_ts
+                )
+
+                if now - sync_or_active > SYNC_ONLINE_TIMEOUT:
+                    # Mark the device as going offline.
+                    offline_devices.append(device_id)
+                    device_changed = True
+
+        # Offline devices are not needed and do not add information.
+        for device_id in offline_devices:
+            user_devices.pop(device_id)
+
+        # If the presence state of the devices changed, then (maybe) update
+        # the user's overall presence state.
+        if device_changed:
+            new_presence = _combine_device_states(user_devices.values())
+            if new_presence != state.state:
+                state = state.copy_and_replace(state=new_presence)
                 changed = True
 
+        if now - state.last_active_ts > LAST_ACTIVE_GRANULARITY:
+            # So that we send down a notification that we've
+            # stopped updating.
+            changed = True
+
         if now - state.last_federation_update_ts > FEDERATION_PING_INTERVAL:
             # Need to send ping to other servers to ensure they don't
             # timeout and set us to offline
             changed = True
-
-        # If there are have been no sync for a while (and none ongoing),
-        # set presence to offline
-        if user_id not in syncing_user_ids:
-            # If the user has done something recently but hasn't synced,
-            # don't set them as offline.
-            sync_or_active = max(state.last_user_sync_ts, state.last_active_ts)
-            if now - sync_or_active > SYNC_ONLINE_TIMEOUT:
-                state = state.copy_and_replace(state=PresenceState.OFFLINE)
-                changed = True
     else:
         # We expect to be poked occasionally by the other side.
         # This is to protect against forgetful/buggy servers, so that
@@ -2036,6 +2181,46 @@ def handle_update(
     return new_state, persist_and_notify, federation_ping
 
 
+PRESENCE_BY_PRIORITY = {
+    PresenceState.BUSY: 4,
+    PresenceState.ONLINE: 3,
+    PresenceState.UNAVAILABLE: 2,
+    PresenceState.OFFLINE: 1,
+}
+
+
+def _combine_device_states(
+    device_states: Iterable[UserDevicePresenceState],
+) -> str:
+    """
+    Find the device to use presence information from.
+
+    Orders devices by priority, then last_active_ts.
+
+    Args:
+        device_states: An iterable of device presence states
+
+    Return:
+        The combined presence state.
+    """
+
+    # Based on (all) the user's devices calculate the new presence state.
+    presence = PresenceState.OFFLINE
+    last_active_ts = -1
+
+    # Find the device to use the presence state of based on the presence priority,
+    # but tie-break with how recently the device has been seen.
+    for device_state in device_states:
+        if (PRESENCE_BY_PRIORITY[device_state.state], device_state.last_active_ts) > (
+            PRESENCE_BY_PRIORITY[presence],
+            last_active_ts,
+        ):
+            presence = device_state.state
+            last_active_ts = device_state.last_active_ts
+
+    return presence
+
+
 async def get_interested_parties(
     store: DataStore, presence_router: PresenceRouter, states: List[UserPresenceState]
 ) -> Tuple[Dict[str, List[UserPresenceState]], Dict[str, List[UserPresenceState]]]:
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 88a16193a3..914415740a 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -21,7 +21,7 @@ from signedjson.key import generate_signing_key
 from twisted.test.proto_helpers import MemoryReactor
 
 from synapse.api.constants import EventTypes, Membership, PresenceState
-from synapse.api.presence import UserPresenceState
+from synapse.api.presence import UserDevicePresenceState, UserPresenceState
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events.builder import EventBuilder
 from synapse.federation.sender import FederationSender
@@ -352,6 +352,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
 
     def test_idle_timer(self) -> None:
         user_id = "@foo:bar"
+        device_id = "dev-1"
         status_msg = "I'm here!"
         now = 5000000
 
@@ -362,8 +363,21 @@ class PresenceTimeoutTestCase(unittest.TestCase):
             last_user_sync_ts=now,
             status_msg=status_msg,
         )
+        device_state = UserDevicePresenceState(
+            user_id=user_id,
+            device_id=device_id,
+            state=state.state,
+            last_active_ts=state.last_active_ts,
+            last_sync_ts=state.last_user_sync_ts,
+        )
 
-        new_state = handle_timeout(state, is_mine=True, syncing_user_ids=set(), now=now)
+        new_state = handle_timeout(
+            state,
+            is_mine=True,
+            syncing_device_ids=set(),
+            user_devices={device_id: device_state},
+            now=now,
+        )
 
         self.assertIsNotNone(new_state)
         assert new_state is not None
@@ -376,6 +390,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
         presence state into unavailable.
         """
         user_id = "@foo:bar"
+        device_id = "dev-1"
         status_msg = "I'm here!"
         now = 5000000
 
@@ -386,8 +401,21 @@ class PresenceTimeoutTestCase(unittest.TestCase):
             last_user_sync_ts=now,
             status_msg=status_msg,
         )
+        device_state = UserDevicePresenceState(
+            user_id=user_id,
+            device_id=device_id,
+            state=state.state,
+            last_active_ts=state.last_active_ts,
+            last_sync_ts=state.last_user_sync_ts,
+        )
 
-        new_state = handle_timeout(state, is_mine=True, syncing_user_ids=set(), now=now)
+        new_state = handle_timeout(
+            state,
+            is_mine=True,
+            syncing_device_ids=set(),
+            user_devices={device_id: device_state},
+            now=now,
+        )
 
         self.assertIsNotNone(new_state)
         assert new_state is not None
@@ -396,6 +424,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
 
     def test_sync_timeout(self) -> None:
         user_id = "@foo:bar"
+        device_id = "dev-1"
         status_msg = "I'm here!"
         now = 5000000
 
@@ -406,8 +435,21 @@ class PresenceTimeoutTestCase(unittest.TestCase):
             last_user_sync_ts=now - SYNC_ONLINE_TIMEOUT - 1,
             status_msg=status_msg,
         )
+        device_state = UserDevicePresenceState(
+            user_id=user_id,
+            device_id=device_id,
+            state=state.state,
+            last_active_ts=state.last_active_ts,
+            last_sync_ts=state.last_user_sync_ts,
+        )
 
-        new_state = handle_timeout(state, is_mine=True, syncing_user_ids=set(), now=now)
+        new_state = handle_timeout(
+            state,
+            is_mine=True,
+            syncing_device_ids=set(),
+            user_devices={device_id: device_state},
+            now=now,
+        )
 
         self.assertIsNotNone(new_state)
         assert new_state is not None
@@ -416,6 +458,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
 
     def test_sync_online(self) -> None:
         user_id = "@foo:bar"
+        device_id = "dev-1"
         status_msg = "I'm here!"
         now = 5000000
 
@@ -426,9 +469,20 @@ class PresenceTimeoutTestCase(unittest.TestCase):
             last_user_sync_ts=now - SYNC_ONLINE_TIMEOUT - 1,
             status_msg=status_msg,
         )
+        device_state = UserDevicePresenceState(
+            user_id=user_id,
+            device_id=device_id,
+            state=state.state,
+            last_active_ts=state.last_active_ts,
+            last_sync_ts=state.last_user_sync_ts,
+        )
 
         new_state = handle_timeout(
-            state, is_mine=True, syncing_user_ids={user_id}, now=now
+            state,
+            is_mine=True,
+            syncing_device_ids={(user_id, device_id)},
+            user_devices={device_id: device_state},
+            now=now,
         )
 
         self.assertIsNotNone(new_state)
@@ -438,6 +492,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
 
     def test_federation_ping(self) -> None:
         user_id = "@foo:bar"
+        device_id = "dev-1"
         status_msg = "I'm here!"
         now = 5000000
 
@@ -449,14 +504,28 @@ class PresenceTimeoutTestCase(unittest.TestCase):
             last_federation_update_ts=now - FEDERATION_PING_INTERVAL - 1,
             status_msg=status_msg,
         )
+        device_state = UserDevicePresenceState(
+            user_id=user_id,
+            device_id=device_id,
+            state=state.state,
+            last_active_ts=state.last_active_ts,
+            last_sync_ts=state.last_user_sync_ts,
+        )
 
-        new_state = handle_timeout(state, is_mine=True, syncing_user_ids=set(), now=now)
+        new_state = handle_timeout(
+            state,
+            is_mine=True,
+            syncing_device_ids=set(),
+            user_devices={device_id: device_state},
+            now=now,
+        )
 
         self.assertIsNotNone(new_state)
         self.assertEqual(state, new_state)
 
     def test_no_timeout(self) -> None:
         user_id = "@foo:bar"
+        device_id = "dev-1"
         now = 5000000
 
         state = UserPresenceState.default(user_id)
@@ -466,8 +535,21 @@ class PresenceTimeoutTestCase(unittest.TestCase):
             last_user_sync_ts=now,
             last_federation_update_ts=now,
         )
+        device_state = UserDevicePresenceState(
+            user_id=user_id,
+            device_id=device_id,
+            state=state.state,
+            last_active_ts=state.last_active_ts,
+            last_sync_ts=state.last_user_sync_ts,
+        )
 
-        new_state = handle_timeout(state, is_mine=True, syncing_user_ids=set(), now=now)
+        new_state = handle_timeout(
+            state,
+            is_mine=True,
+            syncing_device_ids=set(),
+            user_devices={device_id: device_state},
+            now=now,
+        )
 
         self.assertIsNone(new_state)
 
@@ -485,8 +567,9 @@ class PresenceTimeoutTestCase(unittest.TestCase):
             status_msg=status_msg,
         )
 
+        # Note that this is a remote user so we do not have their device information.
         new_state = handle_timeout(
-            state, is_mine=False, syncing_user_ids=set(), now=now
+            state, is_mine=False, syncing_device_ids=set(), user_devices={}, now=now
         )
 
         self.assertIsNotNone(new_state)
@@ -496,6 +579,7 @@ class PresenceTimeoutTestCase(unittest.TestCase):
 
     def test_last_active(self) -> None:
         user_id = "@foo:bar"
+        device_id = "dev-1"
         status_msg = "I'm here!"
         now = 5000000
 
@@ -507,8 +591,21 @@ class PresenceTimeoutTestCase(unittest.TestCase):
             last_federation_update_ts=now,
             status_msg=status_msg,
         )
+        device_state = UserDevicePresenceState(
+            user_id=user_id,
+            device_id=device_id,
+            state=state.state,
+            last_active_ts=state.last_active_ts,
+            last_sync_ts=state.last_user_sync_ts,
+        )
 
-        new_state = handle_timeout(state, is_mine=True, syncing_user_ids=set(), now=now)
+        new_state = handle_timeout(
+            state,
+            is_mine=True,
+            syncing_device_ids=set(),
+            user_devices={device_id: device_state},
+            now=now,
+        )
 
         self.assertIsNotNone(new_state)
         self.assertEqual(state, new_state)
@@ -579,7 +676,7 @@ class PresenceHandlerInitTestCase(unittest.HomeserverTestCase):
         [
             (PresenceState.BUSY, PresenceState.BUSY),
             (PresenceState.ONLINE, PresenceState.ONLINE),
-            (PresenceState.UNAVAILABLE, PresenceState.UNAVAILABLE),
+            (PresenceState.UNAVAILABLE, PresenceState.ONLINE),
             # Offline syncs don't update the state.
             (PresenceState.OFFLINE, PresenceState.ONLINE),
         ]
@@ -800,6 +897,389 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
         # we should now be online
         self.assertEqual(state.state, PresenceState.ONLINE)
 
+    @parameterized.expand(
+        # A list of tuples of 4 strings:
+        #
+        # * The presence state of device 1.
+        # * The presence state of device 2.
+        # * The expected user presence state after both devices have synced.
+        # * The expected user presence state after device 1 has idled.
+        # * The expected user presence state after device 2 has idled.
+        # * True to use workers, False a monolith.
+        [
+            (*cases, workers)
+            for workers in (False, True)
+            for cases in [
+                # If both devices have the same state, online should eventually idle.
+                # Otherwise, the state doesn't change.
+                (
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.OFFLINE,
+                    PresenceState.OFFLINE,
+                    PresenceState.OFFLINE,
+                    PresenceState.OFFLINE,
+                ),
+                # If the second device has a "lower" state it should fallback to it.
+                (
+                    PresenceState.ONLINE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.ONLINE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                (
+                    PresenceState.ONLINE,
+                    PresenceState.OFFLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.OFFLINE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                # If the second device has a "higher" state it should override.
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                ),
+            ]
+        ],
+        name_func=lambda testcase_func, param_num, params: f"{testcase_func.__name__}_{param_num}_{'workers' if params.args[5] else 'monolith'}",
+    )
+    @unittest.override_config({"experimental_features": {"msc3026_enabled": True}})
+    def test_set_presence_from_syncing_multi_device(
+        self,
+        dev_1_state: str,
+        dev_2_state: str,
+        expected_state_1: str,
+        expected_state_2: str,
+        expected_state_3: str,
+        test_with_workers: bool,
+    ) -> None:
+        """
+        Test the behaviour of multiple devices syncing at the same time.
+
+        Roughly the user's presence state should be set to the "highest" priority
+        of all the devices. When a device then goes offline its state should be
+        discarded and the next highest should win.
+
+        Note that these tests use the idle timer (and don't close the syncs), it
+        is unlikely that a *single* sync would last this long, but is close enough
+        to continually syncing with that current state.
+        """
+        user_id = f"@test:{self.hs.config.server.server_name}"
+
+        # By default, we call /sync against the main process.
+        worker_presence_handler = self.presence_handler
+        if test_with_workers:
+            # Create a worker and use it to handle /sync traffic instead.
+            # This is used to test that presence changes get replicated from workers
+            # to the main process correctly.
+            worker_to_sync_against = self.make_worker_hs(
+                "synapse.app.generic_worker", {"worker_name": "synchrotron"}
+            )
+            worker_presence_handler = worker_to_sync_against.get_presence_handler()
+
+        # 1. Sync with the first device.
+        self.get_success(
+            worker_presence_handler.user_syncing(
+                user_id,
+                "dev-1",
+                affect_presence=dev_1_state != PresenceState.OFFLINE,
+                presence_state=dev_1_state,
+            ),
+            by=0.01,
+        )
+
+        # 2. Wait half the idle timer.
+        self.reactor.advance(IDLE_TIMER / 1000 / 2)
+        self.reactor.pump([0.1])
+
+        # 3. Sync with the second device.
+        self.get_success(
+            worker_presence_handler.user_syncing(
+                user_id,
+                "dev-2",
+                affect_presence=dev_2_state != PresenceState.OFFLINE,
+                presence_state=dev_2_state,
+            ),
+            by=0.01,
+        )
+
+        # 4. Assert the expected presence state.
+        state = self.get_success(
+            self.presence_handler.get_state(UserID.from_string(user_id))
+        )
+        self.assertEqual(state.state, expected_state_1)
+        if test_with_workers:
+            state = self.get_success(
+                worker_presence_handler.get_state(UserID.from_string(user_id))
+            )
+            self.assertEqual(state.state, expected_state_1)
+
+        # When testing with workers, make another random sync (with any *different*
+        # user) to keep the process information from expiring.
+        #
+        # This is due to EXTERNAL_PROCESS_EXPIRY being equivalent to IDLE_TIMER.
+        if test_with_workers:
+            with self.get_success(
+                worker_presence_handler.user_syncing(
+                    f"@other-user:{self.hs.config.server.server_name}",
+                    "dev-3",
+                    affect_presence=True,
+                    presence_state=PresenceState.ONLINE,
+                ),
+                by=0.01,
+            ):
+                pass
+
+        # 5. Advance such that the first device should be discarded (the idle timer),
+        # then pump so _handle_timeouts function to called.
+        self.reactor.advance(IDLE_TIMER / 1000 / 2)
+        self.reactor.pump([0.01])
+
+        # 6. Assert the expected presence state.
+        state = self.get_success(
+            self.presence_handler.get_state(UserID.from_string(user_id))
+        )
+        self.assertEqual(state.state, expected_state_2)
+        if test_with_workers:
+            state = self.get_success(
+                worker_presence_handler.get_state(UserID.from_string(user_id))
+            )
+            self.assertEqual(state.state, expected_state_2)
+
+        # 7. Advance such that the second device should be discarded (half the idle timer),
+        # then pump so _handle_timeouts function to called.
+        self.reactor.advance(IDLE_TIMER / 1000 / 2)
+        self.reactor.pump([0.1])
+
+        # 8. The devices are still "syncing" (the sync context managers were never
+        # closed), so might idle.
+        state = self.get_success(
+            self.presence_handler.get_state(UserID.from_string(user_id))
+        )
+        self.assertEqual(state.state, expected_state_3)
+        if test_with_workers:
+            state = self.get_success(
+                worker_presence_handler.get_state(UserID.from_string(user_id))
+            )
+            self.assertEqual(state.state, expected_state_3)
+
+    @parameterized.expand(
+        # A list of tuples of 4 strings:
+        #
+        # * The presence state of device 1.
+        # * The presence state of device 2.
+        # * The expected user presence state after both devices have synced.
+        # * The expected user presence state after device 1 has stopped syncing.
+        # * True to use workers, False a monolith.
+        [
+            (*cases, workers)
+            for workers in (False, True)
+            for cases in [
+                # If both devices have the same state, nothing exciting should happen.
+                (
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                ),
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.OFFLINE,
+                    PresenceState.OFFLINE,
+                    PresenceState.OFFLINE,
+                ),
+                # If the second device has a "lower" state it should fallback to it.
+                (
+                    PresenceState.ONLINE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.ONLINE,
+                    PresenceState.UNAVAILABLE,
+                ),
+                (
+                    PresenceState.ONLINE,
+                    PresenceState.OFFLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.OFFLINE,
+                ),
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.OFFLINE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.OFFLINE,
+                ),
+                # If the second device has a "higher" state it should override.
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                    PresenceState.ONLINE,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.UNAVAILABLE,
+                ),
+            ]
+        ],
+        name_func=lambda testcase_func, param_num, params: f"{testcase_func.__name__}_{param_num}_{'workers' if params.args[4] else 'monolith'}",
+    )
+    @unittest.override_config({"experimental_features": {"msc3026_enabled": True}})
+    def test_set_presence_from_non_syncing_multi_device(
+        self,
+        dev_1_state: str,
+        dev_2_state: str,
+        expected_state_1: str,
+        expected_state_2: str,
+        test_with_workers: bool,
+    ) -> None:
+        """
+        Test the behaviour of multiple devices syncing at the same time.
+
+        Roughly the user's presence state should be set to the "highest" priority
+        of all the devices. When a device then goes offline its state should be
+        discarded and the next highest should win.
+
+        Note that these tests use the idle timer (and don't close the syncs), it
+        is unlikely that a *single* sync would last this long, but is close enough
+        to continually syncing with that current state.
+        """
+        user_id = f"@test:{self.hs.config.server.server_name}"
+
+        # By default, we call /sync against the main process.
+        worker_presence_handler = self.presence_handler
+        if test_with_workers:
+            # Create a worker and use it to handle /sync traffic instead.
+            # This is used to test that presence changes get replicated from workers
+            # to the main process correctly.
+            worker_to_sync_against = self.make_worker_hs(
+                "synapse.app.generic_worker", {"worker_name": "synchrotron"}
+            )
+            worker_presence_handler = worker_to_sync_against.get_presence_handler()
+
+        # 1. Sync with the first device.
+        sync_1 = self.get_success(
+            worker_presence_handler.user_syncing(
+                user_id,
+                "dev-1",
+                affect_presence=dev_1_state != PresenceState.OFFLINE,
+                presence_state=dev_1_state,
+            ),
+            by=0.1,
+        )
+
+        # 2. Sync with the second device.
+        sync_2 = self.get_success(
+            worker_presence_handler.user_syncing(
+                user_id,
+                "dev-2",
+                affect_presence=dev_2_state != PresenceState.OFFLINE,
+                presence_state=dev_2_state,
+            ),
+            by=0.1,
+        )
+
+        # 3. Assert the expected presence state.
+        state = self.get_success(
+            self.presence_handler.get_state(UserID.from_string(user_id))
+        )
+        self.assertEqual(state.state, expected_state_1)
+        if test_with_workers:
+            state = self.get_success(
+                worker_presence_handler.get_state(UserID.from_string(user_id))
+            )
+            self.assertEqual(state.state, expected_state_1)
+
+        # 4. Disconnect the first device.
+        with sync_1:
+            pass
+
+        # 5. Advance such that the first device should be discarded (the sync timeout),
+        # then pump so _handle_timeouts function to called.
+        self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000)
+        self.reactor.pump([5])
+
+        # 6. Assert the expected presence state.
+        state = self.get_success(
+            self.presence_handler.get_state(UserID.from_string(user_id))
+        )
+        self.assertEqual(state.state, expected_state_2)
+        if test_with_workers:
+            state = self.get_success(
+                worker_presence_handler.get_state(UserID.from_string(user_id))
+            )
+            self.assertEqual(state.state, expected_state_2)
+
+        # 7. Disconnect the second device.
+        with sync_2:
+            pass
+
+        # 8. Advance such that the second device should be discarded (the sync timeout),
+        # then pump so _handle_timeouts function to called.
+        self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000)
+        self.reactor.pump([5])
+
+        # 9. There are no more devices, should be offline.
+        state = self.get_success(
+            self.presence_handler.get_state(UserID.from_string(user_id))
+        )
+        self.assertEqual(state.state, PresenceState.OFFLINE)
+        if test_with_workers:
+            state = self.get_success(
+                worker_presence_handler.get_state(UserID.from_string(user_id))
+            )
+            self.assertEqual(state.state, PresenceState.OFFLINE)
+
     def test_set_presence_from_syncing_keeps_status(self) -> None:
         """Test that presence set by syncing retains status message"""
         status_msg = "I'm here!"
-- 
cgit 1.5.1


From 8b5013dcbc5db16f0f771898da493e812be6fc8a Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 5 Sep 2023 10:39:38 -0400
Subject: Time out busy presence status & test multi-device busy (#16174)

Add a (long) timeout to when a "busy" device is considered not online.
This does *not* match MSC3026, but is a reasonable thing for an
implementation to do.

Expands tests for the (unstable) busy presence with multiple devices.
---
 changelog.d/16174.bugfix        |   1 +
 synapse/handlers/presence.py    |  19 +++++++-
 tests/handlers/test_presence.py | 104 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 120 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/16174.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16174.bugfix b/changelog.d/16174.bugfix
new file mode 100644
index 0000000000..83649cf42a
--- /dev/null
+++ b/changelog.d/16174.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where multi-device accounts could cause high load due to presence.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 80190838b7..a4b05b72e7 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -155,6 +155,8 @@ LAST_ACTIVE_GRANULARITY = 60 * 1000
 # How long to wait until a new /events or /sync request before assuming
 # the client has gone.
 SYNC_ONLINE_TIMEOUT = 30 * 1000
+# Busy status waits longer, but does eventually go offline.
+BUSY_ONLINE_TIMEOUT = 60 * 60 * 1000
 
 # How long to wait before marking the user as idle. Compared against last active
 IDLE_TIMER = 5 * 60 * 1000
@@ -2066,7 +2068,15 @@ def handle_timeout(
                     device_state.last_sync_ts, device_state.last_active_ts
                 )
 
-                if now - sync_or_active > SYNC_ONLINE_TIMEOUT:
+                # Implementations aren't meant to timeout a device with a busy
+                # state, but it needs to timeout *eventually* or else the user
+                # will be stuck in that state.
+                online_timeout = (
+                    BUSY_ONLINE_TIMEOUT
+                    if device_state.state == PresenceState.BUSY
+                    else SYNC_ONLINE_TIMEOUT
+                )
+                if now - sync_or_active > online_timeout:
                     # Mark the device as going offline.
                     offline_devices.append(device_id)
                     device_changed = True
@@ -2166,6 +2176,13 @@ def handle_update(
                 new_state = new_state.copy_and_replace(last_federation_update_ts=now)
                 federation_ping = True
 
+        if new_state.state == PresenceState.BUSY:
+            wheel_timer.insert(
+                now=now,
+                obj=user_id,
+                then=new_state.last_user_sync_ts + BUSY_ONLINE_TIMEOUT,
+            )
+
     else:
         wheel_timer.insert(
             now=now,
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 914415740a..638787b029 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -26,6 +26,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events.builder import EventBuilder
 from synapse.federation.sender import FederationSender
 from synapse.handlers.presence import (
+    BUSY_ONLINE_TIMEOUT,
     EXTERNAL_PROCESS_EXPIRY,
     FEDERATION_PING_INTERVAL,
     FEDERATION_TIMEOUT,
@@ -912,6 +913,13 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
             for cases in [
                 # If both devices have the same state, online should eventually idle.
                 # Otherwise, the state doesn't change.
+                (
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
                 (
                     PresenceState.ONLINE,
                     PresenceState.ONLINE,
@@ -933,7 +941,29 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                     PresenceState.OFFLINE,
                     PresenceState.OFFLINE,
                 ),
-                # If the second device has a "lower" state it should fallback to it.
+                # If the second device has a "lower" state it should fallback to it,
+                # except for "busy" which overrides.
+                (
+                    PresenceState.BUSY,
+                    PresenceState.ONLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.BUSY,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.BUSY,
+                    PresenceState.OFFLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
                 (
                     PresenceState.ONLINE,
                     PresenceState.UNAVAILABLE,
@@ -956,6 +986,27 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                     PresenceState.UNAVAILABLE,
                 ),
                 # If the second device has a "higher" state it should override.
+                (
+                    PresenceState.ONLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
                 (
                     PresenceState.UNAVAILABLE,
                     PresenceState.ONLINE,
@@ -1114,6 +1165,12 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
             for workers in (False, True)
             for cases in [
                 # If both devices have the same state, nothing exciting should happen.
+                (
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
                 (
                     PresenceState.ONLINE,
                     PresenceState.ONLINE,
@@ -1132,7 +1189,26 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                     PresenceState.OFFLINE,
                     PresenceState.OFFLINE,
                 ),
-                # If the second device has a "lower" state it should fallback to it.
+                # If the second device has a "lower" state it should fallback to it,
+                # except for "busy" which overrides.
+                (
+                    PresenceState.BUSY,
+                    PresenceState.ONLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.BUSY,
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.BUSY,
+                    PresenceState.OFFLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
                 (
                     PresenceState.ONLINE,
                     PresenceState.UNAVAILABLE,
@@ -1152,6 +1228,24 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
                     PresenceState.OFFLINE,
                 ),
                 # If the second device has a "higher" state it should override.
+                (
+                    PresenceState.ONLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.UNAVAILABLE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
+                (
+                    PresenceState.OFFLINE,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                    PresenceState.BUSY,
+                ),
                 (
                     PresenceState.UNAVAILABLE,
                     PresenceState.ONLINE,
@@ -1266,7 +1360,11 @@ class PresenceHandlerTestCase(BaseMultiWorkerStreamTestCase):
 
         # 8. Advance such that the second device should be discarded (the sync timeout),
         # then pump so _handle_timeouts function to called.
-        self.reactor.advance(SYNC_ONLINE_TIMEOUT / 1000)
+        if dev_1_state == PresenceState.BUSY or dev_2_state == PresenceState.BUSY:
+            timeout = BUSY_ONLINE_TIMEOUT
+        else:
+            timeout = SYNC_ONLINE_TIMEOUT
+        self.reactor.advance(timeout / 1000)
         self.reactor.pump([5])
 
         # 9. There are no more devices, should be offline.
-- 
cgit 1.5.1


From a2b8814d64714e00acee662d81206944a9a6a56d Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 5 Sep 2023 12:11:05 -0400
Subject: Fix incorrect docstring for Ratelimiter. (#16255)

---
 changelog.d/16255.misc      | 1 +
 synapse/api/ratelimiting.py | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/16255.misc

(limited to 'synapse')

diff --git a/changelog.d/16255.misc b/changelog.d/16255.misc
new file mode 100644
index 0000000000..94d6aff1d6
--- /dev/null
+++ b/changelog.d/16255.misc
@@ -0,0 +1 @@
+Fix incorrect docstring for `Ratelimiter`.
diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py
index 887b214d64..02ae45e8b3 100644
--- a/synapse/api/ratelimiting.py
+++ b/synapse/api/ratelimiting.py
@@ -40,7 +40,7 @@ class Ratelimiter:
     - the cost C of this request in tokens.
     Then, if there is room in the bucket for C tokens (T + C <= `burst_count`),
     the request is permitted and `cost` tokens are added to the bucket.
-    Otherwise the request is denied, and the bucket continues to hold T tokens.
+    Otherwise, the request is denied, and the bucket continues to hold T tokens.
 
     This means that the limiter enforces an average request frequency of `rate_hz`,
     while accumulating a buffer of up to `burst_count` requests which can be consumed
@@ -55,9 +55,10 @@ class Ratelimiter:
     request.
 
     Args:
+        store: The datastore providing get_ratelimit_for_user.
         clock: A homeserver clock, for retrieving the current time
-        rate_hz: The long term number of actions that can be performed in a second.
-        burst_count: How many actions that can be performed before being limited.
+        cfg: The ratelimit configuration for this rate limiter including the
+            allowed rate and burst count.
     """
 
     def __init__(
-- 
cgit 1.5.1


From c9cec2daed00406b5337a8ce7064e3394ceaf656 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Tue, 5 Sep 2023 20:27:41 +0100
Subject: Fix bug where we kept re-requesting a remote server's key repeatedly.
  (#16257)

* Correctly handle multiple rows per server/key

* Newsfile
---
 changelog.d/16257.bugfix               |  1 +
 synapse/storage/databases/main/keys.py | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/16257.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16257.bugfix b/changelog.d/16257.bugfix
new file mode 100644
index 0000000000..28a5319749
--- /dev/null
+++ b/changelog.d/16257.bugfix
@@ -0,0 +1 @@
+Fix long-standing bug where we kept re-requesting a remote server's key repeatedly, potentially causing delays in receiving events over federation.
diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
index a3b4744855..57aa4921e1 100644
--- a/synapse/storage/databases/main/keys.py
+++ b/synapse/storage/databases/main/keys.py
@@ -221,12 +221,17 @@ class KeyStore(CacheInvalidationWorkerStore):
             """Processes a batch of keys to fetch, and adds the result to `keys`."""
 
             # batch_iter always returns tuples so it's safe to do len(batch)
-            sql = """
-            SELECT server_name, key_id, key_json, ts_valid_until_ms
-            FROM server_keys_json WHERE 1=0
-            """ + " OR (server_name=? AND key_id=?)" * len(
-                batch
-            )
+            where_clause = " OR (server_name=? AND key_id=?)" * len(batch)
+
+            # `server_keys_json` can have multiple entries per server (one per
+            # remote server we fetched from, if using perspectives). Order by
+            # `ts_added_ms` so the most recently fetched one always wins.
+            sql = f"""
+                SELECT server_name, key_id, key_json, ts_valid_until_ms
+                FROM server_keys_json WHERE 1=0
+                {where_clause}
+                ORDER BY ts_added_ms
+            """
 
             txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
 
-- 
cgit 1.5.1


From b1d71c687ae55ce67e4cfc82c475e61f959dfeb0 Mon Sep 17 00:00:00 2001
From: Travis Ralston <travisr@matrix.org>
Date: Tue, 5 Sep 2023 13:45:39 -0600
Subject: Add MSC4040 `matrix-fed` service lookups (#16137)

---
 changelog.d/16137.feature                          |   1 +
 scripts-dev/federation_client.py                   |  12 +
 synapse/http/federation/matrix_federation_agent.py |  29 +-
 .../federation/test_matrix_federation_agent.py     | 323 +++++++++++++++++++--
 4 files changed, 331 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/16137.feature

(limited to 'synapse')

diff --git a/changelog.d/16137.feature b/changelog.d/16137.feature
new file mode 100644
index 0000000000..bba6f161cd
--- /dev/null
+++ b/changelog.d/16137.feature
@@ -0,0 +1 @@
+Support resolving homeservers using `matrix-fed` DNS SRV records from [MSC4040](https://github.com/matrix-org/matrix-spec-proposals/pull/4040).
diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py
index 5ad334b4d8..e8baeac5e2 100755
--- a/scripts-dev/federation_client.py
+++ b/scripts-dev/federation_client.py
@@ -329,6 +329,17 @@ class MatrixConnectionAdapter(HTTPAdapter):
                 raise ValueError("Invalid host:port '%s'" % (server_name,))
             return out[0], port, out[0]
 
+        # Look up SRV for Matrix 1.8 `matrix-fed` service first
+        try:
+            srv = srvlookup.lookup("matrix-fed", "tcp", server_name)[0]
+            print(
+                f"SRV lookup on _matrix-fed._tcp.{server_name} gave {srv}",
+                file=sys.stderr,
+            )
+            return srv.host, srv.port, server_name
+        except Exception:
+            pass
+        # Fall back to deprecated `matrix` service
         try:
             srv = srvlookup.lookup("matrix", "tcp", server_name)[0]
             print(
@@ -337,6 +348,7 @@ class MatrixConnectionAdapter(HTTPAdapter):
             )
             return srv.host, srv.port, server_name
         except Exception:
+            # Fall even further back to just port 8448
             return server_name, 8448, server_name
 
     @staticmethod
diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 91a24efcd0..a3a396bb37 100644
--- a/synapse/http/federation/matrix_federation_agent.py
+++ b/synapse/http/federation/matrix_federation_agent.py
@@ -399,15 +399,34 @@ class MatrixHostnameEndpoint:
         if port or _is_ip_literal(host):
             return [Server(host, port or 8448)]
 
+        # Check _matrix-fed._tcp SRV record.
         logger.debug("Looking up SRV record for %s", host.decode(errors="replace"))
+        server_list = await self._srv_resolver.resolve_service(
+            b"_matrix-fed._tcp." + host
+        )
+
+        if server_list:
+            if logger.isEnabledFor(logging.DEBUG):
+                logger.debug(
+                    "Got %s from SRV lookup for %s",
+                    ", ".join(map(str, server_list)),
+                    host.decode(errors="replace"),
+                )
+            return server_list
+
+        # No _matrix-fed._tcp SRV record, fallback to legacy _matrix._tcp SRV record.
+        logger.debug(
+            "Looking up deprecated SRV record for %s", host.decode(errors="replace")
+        )
         server_list = await self._srv_resolver.resolve_service(b"_matrix._tcp." + host)
 
         if server_list:
-            logger.debug(
-                "Got %s from SRV lookup for %s",
-                ", ".join(map(str, server_list)),
-                host.decode(errors="replace"),
-            )
+            if logger.isEnabledFor(logging.DEBUG):
+                logger.debug(
+                    "Got %s from deprecated SRV lookup for %s",
+                    ", ".join(map(str, server_list)),
+                    host.decode(errors="replace"),
+                )
             return server_list
 
         # No SRV records, so we fallback to host and 8448
diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py
index 0d17f2fe5b..9f63fa6fa8 100644
--- a/tests/http/federation/test_matrix_federation_agent.py
+++ b/tests/http/federation/test_matrix_federation_agent.py
@@ -15,7 +15,7 @@ import base64
 import logging
 import os
 from typing import Generator, List, Optional, cast
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, call, patch
 
 import treq
 from netaddr import IPSet
@@ -651,9 +651,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
         # .well-known request fails.
         self.reactor.pump((0.4,))
 
-        # now there should be a SRV lookup
-        self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.testserv1"
+        # now there should be two SRV lookups
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [call(b"_matrix-fed._tcp.testserv1"), call(b"_matrix._tcp.testserv1")]
         )
 
         # we should fall back to a direct connection
@@ -737,9 +737,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
         # .well-known request fails.
         self.reactor.pump((0.4,))
 
-        # now there should be a SRV lookup
-        self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.testserv"
+        # now there should be two SRV lookups
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
         )
 
         # we should fall back to a direct connection
@@ -788,9 +788,12 @@ class MatrixFederationAgentTests(unittest.TestCase):
             content=b'{ "m.server": "target-server" }',
         )
 
-        # there should be a SRV lookup
-        self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.target-server"
+        # there should be two SRV lookups
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [
+                call(b"_matrix-fed._tcp.target-server"),
+                call(b"_matrix._tcp.target-server"),
+            ]
         )
 
         # now we should get a connection to the target server
@@ -878,9 +881,12 @@ class MatrixFederationAgentTests(unittest.TestCase):
 
         self.reactor.pump((0.1,))
 
-        # there should be a SRV lookup
-        self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.target-server"
+        # there should be two SRV lookups
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [
+                call(b"_matrix-fed._tcp.target-server"),
+                call(b"_matrix._tcp.target-server"),
+            ]
         )
 
         # now we should get a connection to the target server
@@ -942,9 +948,9 @@ class MatrixFederationAgentTests(unittest.TestCase):
             client_factory, expected_sni=b"testserv", content=b"NOT JSON"
         )
 
-        # now there should be a SRV lookup
-        self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.testserv"
+        # now there should be two SRV lookups
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
         )
 
         # we should fall back to a direct connection
@@ -1016,14 +1022,14 @@ class MatrixFederationAgentTests(unittest.TestCase):
         # there should be no requests
         self.assertEqual(len(http_proto.requests), 0)
 
-        # and there should be a SRV lookup instead
-        self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.testserv"
+        # and there should be two SRV lookups instead
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
         )
 
     def test_get_hostname_srv(self) -> None:
         """
-        Test the behaviour when there is a single SRV record
+        Test the behaviour when there is a single SRV record for _matrix-fed.
         """
         self.agent = self._make_agent()
 
@@ -1039,7 +1045,51 @@ class MatrixFederationAgentTests(unittest.TestCase):
 
         # the request for a .well-known will have failed with a DNS lookup error.
         self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.testserv"
+            b"_matrix-fed._tcp.testserv"
+        )
+
+        # Make sure treq is trying to connect
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 8443)
+
+        # make a test server, and wire up the client
+        http_server = self._make_connection(client_factory, expected_sni=b"testserv")
+
+        self.assertEqual(len(http_server.requests), 1)
+        request = http_server.requests[0]
+        self.assertEqual(request.method, b"GET")
+        self.assertEqual(request.path, b"/foo/bar")
+        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"testserv"])
+
+        # finish the request
+        request.finish()
+        self.reactor.pump((0.1,))
+        self.successResultOf(test_d)
+
+    def test_get_hostname_srv_legacy(self) -> None:
+        """
+        Test the behaviour when there is a single SRV record for _matrix.
+        """
+        self.agent = self._make_agent()
+
+        # Return no entries for the _matrix-fed lookup, and a response for _matrix.
+        self.mock_resolver.resolve_service.side_effect = [
+            [],
+            [Server(host=b"srvtarget", port=8443)],
+        ]
+        self.reactor.lookups["srvtarget"] = "1.2.3.4"
+
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
+
+        # Nothing happened yet
+        self.assertNoResult(test_d)
+
+        # the request for a .well-known will have failed with a DNS lookup error.
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
         )
 
         # Make sure treq is trying to connect
@@ -1065,7 +1115,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
 
     def test_get_well_known_srv(self) -> None:
         """Test the behaviour when the .well-known redirects to a place where there
-        is a SRV.
+        is a _matrix-fed SRV record.
         """
         self.agent = self._make_agent()
 
@@ -1096,7 +1146,72 @@ class MatrixFederationAgentTests(unittest.TestCase):
 
         # there should be a SRV lookup
         self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.target-server"
+            b"_matrix-fed._tcp.target-server"
+        )
+
+        # now we should get a connection to the target of the SRV record
+        self.assertEqual(len(clients), 2)
+        (host, port, client_factory, _timeout, _bindAddress) = clients[1]
+        self.assertEqual(host, "5.6.7.8")
+        self.assertEqual(port, 8443)
+
+        # make a test server, and wire up the client
+        http_server = self._make_connection(
+            client_factory, expected_sni=b"target-server"
+        )
+
+        self.assertEqual(len(http_server.requests), 1)
+        request = http_server.requests[0]
+        self.assertEqual(request.method, b"GET")
+        self.assertEqual(request.path, b"/foo/bar")
+        self.assertEqual(
+            request.requestHeaders.getRawHeaders(b"host"), [b"target-server"]
+        )
+
+        # finish the request
+        request.finish()
+        self.reactor.pump((0.1,))
+        self.successResultOf(test_d)
+
+    def test_get_well_known_srv_legacy(self) -> None:
+        """Test the behaviour when the .well-known redirects to a place where there
+        is a _matrix SRV record.
+        """
+        self.agent = self._make_agent()
+
+        self.reactor.lookups["testserv"] = "1.2.3.4"
+        self.reactor.lookups["srvtarget"] = "5.6.7.8"
+
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
+
+        # Nothing happened yet
+        self.assertNoResult(test_d)
+
+        # there should be an attempt to connect on port 443 for the .well-known
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 443)
+
+        # Return no entries for the _matrix-fed lookup, and a response for _matrix.
+        self.mock_resolver.resolve_service.side_effect = [
+            [],
+            [Server(host=b"srvtarget", port=8443)],
+        ]
+
+        self._handle_well_known_connection(
+            client_factory,
+            expected_sni=b"testserv",
+            content=b'{ "m.server": "target-server" }',
+        )
+
+        # there should be two SRV lookups
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [
+                call(b"_matrix-fed._tcp.target-server"),
+                call(b"_matrix._tcp.target-server"),
+            ]
         )
 
         # now we should get a connection to the target of the SRV record
@@ -1158,8 +1273,11 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.4,))
 
         # now there should have been a SRV lookup
-        self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.xn--bcher-kva.com"
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [
+                call(b"_matrix-fed._tcp.xn--bcher-kva.com"),
+                call(b"_matrix._tcp.xn--bcher-kva.com"),
+            ]
         )
 
         # We should fall back to port 8448
@@ -1188,7 +1306,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.successResultOf(test_d)
 
     def test_idna_srv_target(self) -> None:
-        """test the behaviour when the target of a SRV record has idna chars"""
+        """test the behaviour when the target of a _matrix-fed SRV record has idna chars"""
         self.agent = self._make_agent()
 
         self.mock_resolver.resolve_service.return_value = [
@@ -1204,7 +1322,57 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.assertNoResult(test_d)
 
         self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.xn--bcher-kva.com"
+            b"_matrix-fed._tcp.xn--bcher-kva.com"
+        )
+
+        # Make sure treq is trying to connect
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients[0]
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 8443)
+
+        # make a test server, and wire up the client
+        http_server = self._make_connection(
+            client_factory, expected_sni=b"xn--bcher-kva.com"
+        )
+
+        self.assertEqual(len(http_server.requests), 1)
+        request = http_server.requests[0]
+        self.assertEqual(request.method, b"GET")
+        self.assertEqual(request.path, b"/foo/bar")
+        self.assertEqual(
+            request.requestHeaders.getRawHeaders(b"host"), [b"xn--bcher-kva.com"]
+        )
+
+        # finish the request
+        request.finish()
+        self.reactor.pump((0.1,))
+        self.successResultOf(test_d)
+
+    def test_idna_srv_target_legacy(self) -> None:
+        """test the behaviour when the target of a _matrix SRV record has idna chars"""
+        self.agent = self._make_agent()
+
+        # Return no entries for the _matrix-fed lookup, and a response for _matrix.
+        self.mock_resolver.resolve_service.side_effect = [
+            [],
+            [Server(host=b"xn--trget-3qa.com", port=8443)],
+        ]  # târget.com
+        self.reactor.lookups["xn--trget-3qa.com"] = "1.2.3.4"
+
+        test_d = self._make_get_request(
+            b"matrix-federation://xn--bcher-kva.com/foo/bar"
+        )
+
+        # Nothing happened yet
+        self.assertNoResult(test_d)
+
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [
+                call(b"_matrix-fed._tcp.xn--bcher-kva.com"),
+                call(b"_matrix._tcp.xn--bcher-kva.com"),
+            ]
         )
 
         # Make sure treq is trying to connect
@@ -1394,7 +1562,7 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.assertIsNone(r.delegated_server)
 
     def test_srv_fallbacks(self) -> None:
-        """Test that other SRV results are tried if the first one fails."""
+        """Test that other SRV results are tried if the first one fails for _matrix-fed SRV."""
         self.agent = self._make_agent()
 
         self.mock_resolver.resolve_service.return_value = [
@@ -1409,7 +1577,67 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.assertNoResult(test_d)
 
         self.mock_resolver.resolve_service.assert_called_once_with(
-            b"_matrix._tcp.testserv"
+            b"_matrix-fed._tcp.testserv"
+        )
+
+        # We should see an attempt to connect to the first server
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0)
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 8443)
+
+        # Fonx the connection
+        client_factory.clientConnectionFailed(None, Exception("nope"))
+
+        # There's a 300ms delay in HostnameEndpoint
+        self.reactor.pump((0.4,))
+
+        # Hasn't failed yet
+        self.assertNoResult(test_d)
+
+        # We shouldnow see an attempt to connect to the second server
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0)
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 8444)
+
+        # make a test server, and wire up the client
+        http_server = self._make_connection(client_factory, expected_sni=b"testserv")
+
+        self.assertEqual(len(http_server.requests), 1)
+        request = http_server.requests[0]
+        self.assertEqual(request.method, b"GET")
+        self.assertEqual(request.path, b"/foo/bar")
+        self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"testserv"])
+
+        # finish the request
+        request.finish()
+        self.reactor.pump((0.1,))
+        self.successResultOf(test_d)
+
+    def test_srv_fallbacks_legacy(self) -> None:
+        """Test that other SRV results are tried if the first one fails for _matrix SRV."""
+        self.agent = self._make_agent()
+
+        # Return no entries for the _matrix-fed lookup, and a response for _matrix.
+        self.mock_resolver.resolve_service.side_effect = [
+            [],
+            [
+                Server(host=b"target.com", port=8443),
+                Server(host=b"target.com", port=8444),
+            ],
+        ]
+        self.reactor.lookups["target.com"] = "1.2.3.4"
+
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
+
+        # Nothing happened yet
+        self.assertNoResult(test_d)
+
+        self.mock_resolver.resolve_service.assert_has_calls(
+            [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")]
         )
 
         # We should see an attempt to connect to the first server
@@ -1449,6 +1677,43 @@ class MatrixFederationAgentTests(unittest.TestCase):
         self.reactor.pump((0.1,))
         self.successResultOf(test_d)
 
+    def test_srv_no_fallback_to_legacy(self) -> None:
+        """Test that _matrix SRV results are not tried if the _matrix-fed one fails."""
+        self.agent = self._make_agent()
+
+        # Return a failing entry for _matrix-fed.
+        self.mock_resolver.resolve_service.side_effect = [
+            [Server(host=b"target.com", port=8443)],
+            [],
+        ]
+        self.reactor.lookups["target.com"] = "1.2.3.4"
+
+        test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar")
+
+        # Nothing happened yet
+        self.assertNoResult(test_d)
+
+        # Only the _matrix-fed is checked, _matrix is ignored.
+        self.mock_resolver.resolve_service.assert_called_once_with(
+            b"_matrix-fed._tcp.testserv"
+        )
+
+        # We should see an attempt to connect to the first server
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0)
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 8443)
+
+        # Fonx the connection
+        client_factory.clientConnectionFailed(None, Exception("nope"))
+
+        # There's a 300ms delay in HostnameEndpoint
+        self.reactor.pump((0.4,))
+
+        # Failed to resolve a server.
+        self.assertFailure(test_d, Exception)
+
 
 class TestCachePeriodFromHeaders(unittest.TestCase):
     def test_cache_control(self) -> None:
-- 
cgit 1.5.1


From 1e571cd66437ea2455c203dafb94c20ba48cdcc1 Mon Sep 17 00:00:00 2001
From: Will Hunt <will@half-shot.uk>
Date: Tue, 5 Sep 2023 20:46:57 +0100
Subject: Fix appservices being unable to handle to_device messages for
 multiple users (#16251)

---
 changelog.d/16251.bugfix                      |   1 +
 synapse/storage/databases/main/deviceinbox.py |   2 +-
 tests/handlers/test_appservice.py             | 125 ++++++++++++++++++++++++++
 3 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16251.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16251.bugfix b/changelog.d/16251.bugfix
new file mode 100644
index 0000000000..6d3157c7aa
--- /dev/null
+++ b/changelog.d/16251.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where appservices using MSC2409 to receive to_device messages, would only get messages for one user.
\ No newline at end of file
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index b471fcb064..271cdf923c 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -349,7 +349,7 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                     table="devices",
                     column="user_id",
                     iterable=user_ids_to_query,
-                    keyvalues={"user_id": user_id, "hidden": False},
+                    keyvalues={"hidden": False},
                     retcols=("device_id",),
                 )
 
diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py
index 46d022092e..a7e6cdd66a 100644
--- a/tests/handlers/test_appservice.py
+++ b/tests/handlers/test_appservice.py
@@ -422,6 +422,18 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
             "exclusive_as_user", "password", self.exclusive_as_user_device_id
         )
 
+        self.exclusive_as_user_2_device_id = "exclusive_as_device_2"
+        self.exclusive_as_user_2 = self.register_user("exclusive_as_user_2", "password")
+        self.exclusive_as_user_2_token = self.login(
+            "exclusive_as_user_2", "password", self.exclusive_as_user_2_device_id
+        )
+
+        self.exclusive_as_user_3_device_id = "exclusive_as_device_3"
+        self.exclusive_as_user_3 = self.register_user("exclusive_as_user_3", "password")
+        self.exclusive_as_user_3_token = self.login(
+            "exclusive_as_user_3", "password", self.exclusive_as_user_3_device_id
+        )
+
     def _notify_interested_services(self) -> None:
         # This is normally set in `notify_interested_services` but we need to call the
         # internal async version so the reactor gets pushed to completion.
@@ -849,6 +861,119 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase):
         for count in service_id_to_message_count.values():
             self.assertEqual(count, number_of_messages)
 
+    @unittest.override_config(
+        {"experimental_features": {"msc2409_to_device_messages_enabled": True}}
+    )
+    def test_application_services_receive_local_to_device_for_many_users(self) -> None:
+        """
+        Test that when a user sends a to-device message to many users
+        in an application service's user namespace, the
+        application service will receive all of them.
+        """
+        interested_appservice = self._register_application_service(
+            namespaces={
+                ApplicationService.NS_USERS: [
+                    {
+                        "regex": "@exclusive_as_user:.+",
+                        "exclusive": True,
+                    },
+                    {
+                        "regex": "@exclusive_as_user_2:.+",
+                        "exclusive": True,
+                    },
+                    {
+                        "regex": "@exclusive_as_user_3:.+",
+                        "exclusive": True,
+                    },
+                ],
+            },
+        )
+
+        # Have local_user send a to-device message to exclusive_as_users
+        message_content = {"some_key": "some really interesting value"}
+        chan = self.make_request(
+            "PUT",
+            "/_matrix/client/r0/sendToDevice/m.room_key_request/3",
+            content={
+                "messages": {
+                    self.exclusive_as_user: {
+                        self.exclusive_as_user_device_id: message_content
+                    },
+                    self.exclusive_as_user_2: {
+                        self.exclusive_as_user_2_device_id: message_content
+                    },
+                    self.exclusive_as_user_3: {
+                        self.exclusive_as_user_3_device_id: message_content
+                    },
+                }
+            },
+            access_token=self.local_user_token,
+        )
+        self.assertEqual(chan.code, 200, chan.result)
+
+        # Have exclusive_as_user send a to-device message to local_user
+        for user_token in [
+            self.exclusive_as_user_token,
+            self.exclusive_as_user_2_token,
+            self.exclusive_as_user_3_token,
+        ]:
+            chan = self.make_request(
+                "PUT",
+                "/_matrix/client/r0/sendToDevice/m.room_key_request/4",
+                content={
+                    "messages": {
+                        self.local_user: {self.local_user_device_id: message_content}
+                    }
+                },
+                access_token=user_token,
+            )
+            self.assertEqual(chan.code, 200, chan.result)
+
+        # Check if our application service - that is interested in exclusive_as_user - received
+        # the to-device message as part of an AS transaction.
+        # Only the local_user -> exclusive_as_user to-device message should have been forwarded to the AS.
+        #
+        # The uninterested application service should not have been notified at all.
+        self.send_mock.assert_called_once()
+        (
+            service,
+            _events,
+            _ephemeral,
+            to_device_messages,
+            _otks,
+            _fbks,
+            _device_list_summary,
+        ) = self.send_mock.call_args[0]
+
+        # Assert that this was the same to-device message that local_user sent
+        self.assertEqual(service, interested_appservice)
+
+        # Assert expected number of messages
+        self.assertEqual(len(to_device_messages), 3)
+
+        for device_msg in to_device_messages:
+            self.assertEqual(device_msg["type"], "m.room_key_request")
+            self.assertEqual(device_msg["sender"], self.local_user)
+            self.assertEqual(device_msg["content"], message_content)
+
+        self.assertEqual(to_device_messages[0]["to_user_id"], self.exclusive_as_user)
+        self.assertEqual(
+            to_device_messages[0]["to_device_id"],
+            self.exclusive_as_user_device_id,
+        )
+
+        self.assertEqual(to_device_messages[1]["to_user_id"], self.exclusive_as_user_2)
+        self.assertEqual(
+            to_device_messages[1]["to_device_id"],
+            self.exclusive_as_user_2_device_id,
+        )
+
+        self.assertEqual(to_device_messages[2]["to_user_id"], self.exclusive_as_user_3)
+        self.assertEqual(
+            to_device_messages[2]["to_device_id"],
+            self.exclusive_as_user_3_device_id,
+        )
+
     def _register_application_service(
         self,
         namespaces: Optional[Dict[str, Iterable[Dict]]] = None,
-- 
cgit 1.5.1


From 4f1840a88ad3a93244fc23149c56245704eab824 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Wed, 6 Sep 2023 09:30:53 +0200
Subject: Delete device messages asynchronously and in staged batches (#16240)

---
 changelog.d/16240.misc                             |  1 +
 synapse/handlers/device.py                         | 48 ++++++++++++++++++++++
 synapse/handlers/presence.py                       |  4 +-
 synapse/handlers/sync.py                           | 16 ++++++--
 synapse/storage/databases/main/deviceinbox.py      | 26 +++++++++---
 synapse/storage/databases/main/devices.py          |  8 ----
 synapse/storage/databases/main/receipts.py         |  6 +--
 synapse/storage/engines/_base.py                   |  6 +++
 synapse/storage/engines/postgres.py                |  4 ++
 synapse/storage/engines/sqlite.py                  |  4 ++
 .../schema/main/delta/48/group_unique_indexes.py   |  4 +-
 synapse/util/task_scheduler.py                     | 17 ++++----
 tests/handlers/test_device.py                      | 47 +++++++++++++++++++++
 13 files changed, 154 insertions(+), 37 deletions(-)
 create mode 100644 changelog.d/16240.misc

(limited to 'synapse')

diff --git a/changelog.d/16240.misc b/changelog.d/16240.misc
new file mode 100644
index 0000000000..4f266c1fb0
--- /dev/null
+++ b/changelog.d/16240.misc
@@ -0,0 +1 @@
+Delete device messages asynchronously and in staged batches using the task scheduler.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 763f56dfc1..9e52af5f13 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -43,9 +43,12 @@ from synapse.metrics.background_process_metrics import (
 )
 from synapse.types import (
     JsonDict,
+    JsonMapping,
+    ScheduledTask,
     StrCollection,
     StreamKeyType,
     StreamToken,
+    TaskStatus,
     UserID,
     get_domain_from_id,
     get_verify_key_from_cross_signing_key,
@@ -62,6 +65,7 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
+DELETE_DEVICE_MSGS_TASK_NAME = "delete_device_messages"
 MAX_DEVICE_DISPLAY_NAME_LEN = 100
 DELETE_STALE_DEVICES_INTERVAL_MS = 24 * 60 * 60 * 1000
 
@@ -78,6 +82,7 @@ class DeviceWorkerHandler:
         self._appservice_handler = hs.get_application_service_handler()
         self._state_storage = hs.get_storage_controllers().state
         self._auth_handler = hs.get_auth_handler()
+        self._event_sources = hs.get_event_sources()
         self.server_name = hs.hostname
         self._msc3852_enabled = hs.config.experimental.msc3852_enabled
         self._query_appservices_for_keys = (
@@ -386,6 +391,7 @@ class DeviceHandler(DeviceWorkerHandler):
         self._account_data_handler = hs.get_account_data_handler()
         self._storage_controllers = hs.get_storage_controllers()
         self.db_pool = hs.get_datastores().main.db_pool
+        self._task_scheduler = hs.get_task_scheduler()
 
         self.device_list_updater = DeviceListUpdater(hs, self)
 
@@ -419,6 +425,10 @@ class DeviceHandler(DeviceWorkerHandler):
                 self._delete_stale_devices,
             )
 
+        self._task_scheduler.register_action(
+            self._delete_device_messages, DELETE_DEVICE_MSGS_TASK_NAME
+        )
+
     def _check_device_name_length(self, name: Optional[str]) -> None:
         """
         Checks whether a device name is longer than the maximum allowed length.
@@ -530,6 +540,7 @@ class DeviceHandler(DeviceWorkerHandler):
             user_id: The user to delete devices from.
             device_ids: The list of device IDs to delete
         """
+        to_device_stream_id = self._event_sources.get_current_token().to_device_key
 
         try:
             await self.store.delete_devices(user_id, device_ids)
@@ -559,12 +570,49 @@ class DeviceHandler(DeviceWorkerHandler):
                     f"org.matrix.msc3890.local_notification_settings.{device_id}",
                 )
 
+            # Delete device messages asynchronously and in batches using the task scheduler
+            await self._task_scheduler.schedule_task(
+                DELETE_DEVICE_MSGS_TASK_NAME,
+                resource_id=device_id,
+                params={
+                    "user_id": user_id,
+                    "device_id": device_id,
+                    "up_to_stream_id": to_device_stream_id,
+                },
+            )
+
         # Pushers are deleted after `delete_access_tokens_for_user` is called so that
         # modules using `on_logged_out` hook can use them if needed.
         await self.hs.get_pusherpool().remove_pushers_by_devices(user_id, device_ids)
 
         await self.notify_device_update(user_id, device_ids)
 
+    DEVICE_MSGS_DELETE_BATCH_LIMIT = 100
+
+    async def _delete_device_messages(
+        self,
+        task: ScheduledTask,
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        """Scheduler task to delete device messages in batch of `DEVICE_MSGS_DELETE_BATCH_LIMIT`."""
+        assert task.params is not None
+        user_id = task.params["user_id"]
+        device_id = task.params["device_id"]
+        up_to_stream_id = task.params["up_to_stream_id"]
+
+        res = await self.store.delete_messages_for_device(
+            user_id=user_id,
+            device_id=device_id,
+            up_to_stream_id=up_to_stream_id,
+            limit=DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT,
+        )
+
+        if res < DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT:
+            return TaskStatus.COMPLETE, None, None
+        else:
+            # There is probably still device messages to be deleted, let's keep the task active and it will be run
+            # again in a subsequent scheduler loop run (probably the next one, if not too many tasks are running).
+            return TaskStatus.ACTIVE, None, None
+
     async def update_device(self, user_id: str, device_id: str, content: dict) -> None:
         """Update the given device
 
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index a4b05b72e7..375c7d0901 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -183,6 +183,7 @@ class BasePresenceHandler(abc.ABC):
     writer"""
 
     def __init__(self, hs: "HomeServer"):
+        self.hs = hs
         self.clock = hs.get_clock()
         self.store = hs.get_datastores().main
         self._storage_controllers = hs.get_storage_controllers()
@@ -473,8 +474,6 @@ class _NullContextManager(ContextManager[None]):
 class WorkerPresenceHandler(BasePresenceHandler):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
-        self.hs = hs
-
         self._presence_writer_instance = hs.config.worker.writers.presence[0]
 
         # Route presence EDUs to the right worker
@@ -738,7 +737,6 @@ class WorkerPresenceHandler(BasePresenceHandler):
 class PresenceHandler(BasePresenceHandler):
     def __init__(self, hs: "HomeServer"):
         super().__init__(hs)
-        self.hs = hs
         self.wheel_timer: WheelTimer[str] = WheelTimer()
         self.notifier = hs.get_notifier()
 
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 60a9f341b5..0ccd7d250c 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -40,6 +40,7 @@ from synapse.api.filtering import FilterCollection
 from synapse.api.presence import UserPresenceState
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.events import EventBase
+from synapse.handlers.device import DELETE_DEVICE_MSGS_TASK_NAME
 from synapse.handlers.relations import BundledAggregations
 from synapse.logging import issue9533_logger
 from synapse.logging.context import current_context
@@ -268,6 +269,7 @@ class SyncHandler:
         self._storage_controllers = hs.get_storage_controllers()
         self._state_storage_controller = self._storage_controllers.state
         self._device_handler = hs.get_device_handler()
+        self._task_scheduler = hs.get_task_scheduler()
 
         self.should_calculate_push_rules = hs.config.push.enable_push
 
@@ -360,11 +362,19 @@ class SyncHandler:
         # (since we now know that the device has received them)
         if since_token is not None:
             since_stream_id = since_token.to_device_key
-            deleted = await self.store.delete_messages_for_device(
-                sync_config.user.to_string(), sync_config.device_id, since_stream_id
+            # Delete device messages asynchronously and in batches using the task scheduler
+            await self._task_scheduler.schedule_task(
+                DELETE_DEVICE_MSGS_TASK_NAME,
+                resource_id=sync_config.device_id,
+                params={
+                    "user_id": sync_config.user.to_string(),
+                    "device_id": sync_config.device_id,
+                    "up_to_stream_id": since_stream_id,
+                },
             )
             logger.debug(
-                "Deleted %d to-device messages up to %d", deleted, since_stream_id
+                "Deletion of to-device messages up to %d scheduled",
+                since_stream_id,
             )
 
         if timeout == 0 or since_token is None or full_state:
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index 271cdf923c..744e98c6d0 100644
--- a/synapse/storage/databases/main/deviceinbox.py
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -445,13 +445,18 @@ class DeviceInboxWorkerStore(SQLBaseStore):
 
     @trace
     async def delete_messages_for_device(
-        self, user_id: str, device_id: Optional[str], up_to_stream_id: int
+        self,
+        user_id: str,
+        device_id: Optional[str],
+        up_to_stream_id: int,
+        limit: int,
     ) -> int:
         """
         Args:
             user_id: The recipient user_id.
             device_id: The recipient device_id.
             up_to_stream_id: Where to delete messages up to.
+            limit: maximum number of messages to delete
 
         Returns:
             The number of messages deleted.
@@ -472,12 +477,16 @@ class DeviceInboxWorkerStore(SQLBaseStore):
                 log_kv({"message": "No changes in cache since last check"})
                 return 0
 
+        ROW_ID_NAME = self.database_engine.row_id_name
+
         def delete_messages_for_device_txn(txn: LoggingTransaction) -> int:
-            sql = (
-                "DELETE FROM device_inbox"
-                " WHERE user_id = ? AND device_id = ?"
-                " AND stream_id <= ?"
-            )
+            sql = f"""
+                DELETE FROM device_inbox WHERE {ROW_ID_NAME} IN (
+                  SELECT {ROW_ID_NAME} FROM device_inbox
+                  WHERE user_id = ? AND device_id = ? AND stream_id <= ?
+                  LIMIT {limit}
+                )
+                """
             txn.execute(sql, (user_id, device_id, up_to_stream_id))
             return txn.rowcount
 
@@ -487,6 +496,11 @@ class DeviceInboxWorkerStore(SQLBaseStore):
 
         log_kv({"message": f"deleted {count} messages for device", "count": count})
 
+        # In this case we don't know if we hit the limit or the delete is complete
+        # so let's not update the cache.
+        if count == limit:
+            return count
+
         # Update the cache, ensuring that we only ever increase the value
         updated_last_deleted_stream_id = self._last_device_delete_cache.get(
             (user_id, device_id), 0
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index fa69a4a298..7208fc8b33 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -1766,14 +1766,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
                 keyvalues={"user_id": user_id, "hidden": False},
             )
 
-            self.db_pool.simple_delete_many_txn(
-                txn,
-                table="device_inbox",
-                column="device_id",
-                values=device_ids,
-                keyvalues={"user_id": user_id},
-            )
-
             self.db_pool.simple_delete_many_txn(
                 txn,
                 table="device_auth_providers",
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 5ee5c7ad9f..e4d10ff250 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -939,11 +939,7 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore):
         receipts."""
 
         def _remote_duplicate_receipts_txn(txn: LoggingTransaction) -> None:
-            if isinstance(self.database_engine, PostgresEngine):
-                ROW_ID_NAME = "ctid"
-            else:
-                ROW_ID_NAME = "rowid"
-
+            ROW_ID_NAME = self.database_engine.row_id_name
             # Identify any duplicate receipts arising from
             # https://github.com/matrix-org/synapse/issues/14406.
             # The following query takes less than a minute on matrix.org.
diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index 0b5b3bf03e..b1a2418cbd 100644
--- a/synapse/storage/engines/_base.py
+++ b/synapse/storage/engines/_base.py
@@ -100,6 +100,12 @@ class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCM
         """Gets a string giving the server version. For example: '3.22.0'"""
         ...
 
+    @property
+    @abc.abstractmethod
+    def row_id_name(self) -> str:
+        """Gets the literal name representing a row id for this engine."""
+        ...
+
     @abc.abstractmethod
     def in_transaction(self, conn: ConnectionType) -> bool:
         """Whether the connection is currently in a transaction."""
diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 05a72dc554..6309363217 100644
--- a/synapse/storage/engines/postgres.py
+++ b/synapse/storage/engines/postgres.py
@@ -211,6 +211,10 @@ class PostgresEngine(
         else:
             return "%i.%i.%i" % (numver / 10000, (numver % 10000) / 100, numver % 100)
 
+    @property
+    def row_id_name(self) -> str:
+        return "ctid"
+
     def in_transaction(self, conn: psycopg2.extensions.connection) -> bool:
         return conn.status != psycopg2.extensions.STATUS_READY
 
diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index ca8c59297c..802069e1e1 100644
--- a/synapse/storage/engines/sqlite.py
+++ b/synapse/storage/engines/sqlite.py
@@ -123,6 +123,10 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
         """Gets a string giving the server version. For example: '3.22.0'."""
         return "%i.%i.%i" % sqlite3.sqlite_version_info
 
+    @property
+    def row_id_name(self) -> str:
+        return "rowid"
+
     def in_transaction(self, conn: sqlite3.Connection) -> bool:
         return conn.in_transaction
 
diff --git a/synapse/storage/schema/main/delta/48/group_unique_indexes.py b/synapse/storage/schema/main/delta/48/group_unique_indexes.py
index ad2da4c8af..622686d28f 100644
--- a/synapse/storage/schema/main/delta/48/group_unique_indexes.py
+++ b/synapse/storage/schema/main/delta/48/group_unique_indexes.py
@@ -14,7 +14,7 @@
 
 
 from synapse.storage.database import LoggingTransaction
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+from synapse.storage.engines import BaseDatabaseEngine
 from synapse.storage.prepare_database import get_statements
 
 FIX_INDEXES = """
@@ -37,7 +37,7 @@ CREATE INDEX group_rooms_r_idx ON group_rooms(room_id);
 
 
 def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None:
-    rowid = "ctid" if isinstance(database_engine, PostgresEngine) else "rowid"
+    rowid = database_engine.row_id_name
 
     # remove duplicates from group_users & group_invites tables
     cur.execute(
diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index 9e89aeb748..9b2581e51a 100644
--- a/synapse/util/task_scheduler.py
+++ b/synapse/util/task_scheduler.py
@@ -77,6 +77,7 @@ class TaskScheduler:
     LAST_UPDATE_BEFORE_WARNING_MS = 24 * 60 * 60 * 1000  # 24hrs
 
     def __init__(self, hs: "HomeServer"):
+        self._hs = hs
         self._store = hs.get_datastores().main
         self._clock = hs.get_clock()
         self._running_tasks: Set[str] = set()
@@ -97,8 +98,6 @@ class TaskScheduler:
                 "handle_scheduled_tasks",
                 self._handle_scheduled_tasks,
             )
-        else:
-            self.replication_client = hs.get_replication_command_handler()
 
     def register_action(
         self,
@@ -133,7 +132,7 @@ class TaskScheduler:
         params: Optional[JsonMapping] = None,
     ) -> str:
         """Schedule a new potentially resumable task. A function matching the specified
-        `action` should have been previously registered with `register_action`.
+        `action` should have be registered with `register_action` before the task is run.
 
         Args:
             action: the name of a previously registered action
@@ -149,11 +148,6 @@ class TaskScheduler:
         Returns:
             The id of the scheduled task
         """
-        if action not in self._actions:
-            raise Exception(
-                f"No function associated with action {action} of the scheduled task"
-            )
-
         status = TaskStatus.SCHEDULED
         if timestamp is None or timestamp < self._clock.time_msec():
             timestamp = self._clock.time_msec()
@@ -175,7 +169,7 @@ class TaskScheduler:
             if self._run_background_tasks:
                 await self._launch_task(task)
             else:
-                self.replication_client.send_new_active_task(task.id)
+                self._hs.get_replication_command_handler().send_new_active_task(task.id)
 
         return task.id
 
@@ -315,7 +309,10 @@ class TaskScheduler:
         """
         assert self._run_background_tasks
 
-        assert task.action in self._actions
+        if task.action not in self._actions:
+            raise Exception(
+                f"No function associated with action {task.action} of the scheduled task {task.id}"
+            )
         function = self._actions[task.action]
 
         async def wrapper() -> None:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index 55a4f95ef3..9659a4a355 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -30,6 +30,7 @@ from synapse.server import HomeServer
 from synapse.storage.databases.main.appservice import _make_exclusive_regex
 from synapse.types import JsonDict, create_requester
 from synapse.util import Clock
+from synapse.util.task_scheduler import TaskScheduler
 
 from tests import unittest
 from tests.unittest import override_config
@@ -49,6 +50,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
         assert isinstance(handler, DeviceHandler)
         self.handler = handler
         self.store = hs.get_datastores().main
+        self.device_message_handler = hs.get_device_message_handler()
         return hs
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
@@ -211,6 +213,51 @@ class DeviceTestCase(unittest.HomeserverTestCase):
         )
         self.assertIsNone(res)
 
+    def test_delete_device_and_big_device_inbox(self) -> None:
+        """Check that deleting a big device inbox is staged and batched asynchronously."""
+        DEVICE_ID = "abc"
+        sender = "@sender:" + self.hs.hostname
+        receiver = "@receiver:" + self.hs.hostname
+        self._record_user(sender, DEVICE_ID, DEVICE_ID)
+        self._record_user(receiver, DEVICE_ID, DEVICE_ID)
+
+        # queue a bunch of messages in the inbox
+        requester = create_requester(sender, device_id=DEVICE_ID)
+        for i in range(0, DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT + 10):
+            self.get_success(
+                self.device_message_handler.send_device_message(
+                    requester, "message_type", {receiver: {"*": {"val": i}}}
+                )
+            )
+
+        # delete the device
+        self.get_success(self.handler.delete_devices(receiver, [DEVICE_ID]))
+
+        # messages should be deleted up to DEVICE_MSGS_DELETE_BATCH_LIMIT straight away
+        res = self.get_success(
+            self.store.db_pool.simple_select_list(
+                table="device_inbox",
+                keyvalues={"user_id": receiver},
+                retcols=("user_id", "device_id", "stream_id"),
+                desc="get_device_id_from_device_inbox",
+            )
+        )
+        self.assertEqual(10, len(res))
+
+        # wait for the task scheduler to do a second delete pass
+        self.reactor.advance(TaskScheduler.SCHEDULE_INTERVAL_MS / 1000)
+
+        # remaining messages should now be deleted
+        res = self.get_success(
+            self.store.db_pool.simple_select_list(
+                table="device_inbox",
+                keyvalues={"user_id": receiver},
+                retcols=("user_id", "device_id", "stream_id"),
+                desc="get_device_id_from_device_inbox",
+            )
+        )
+        self.assertEqual(0, len(res))
+
     def test_update_device(self) -> None:
         self._record_users()
 
-- 
cgit 1.5.1


From 698f6fa2508dbff1a4353d57da60be5d13bbd61d Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 6 Sep 2023 10:50:07 +0000
Subject: Allow modules to delete rooms. (#15997)

* Allow user_id to be optional for room deletion

* Add module API method to delete a room

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

* Don't worry about the case block=True && requester_user_id is None

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/15997.misc                                      |  1 +
 synapse/handlers/pagination.py                              | 12 ++++++++++--
 synapse/handlers/room.py                                    | 10 +++++++++-
 synapse/module_api/__init__.py                              | 13 +++++++++++++
 .../callbacks/third_party_event_rules_callbacks.py          | 11 ++++++++---
 5 files changed, 41 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15997.misc

(limited to 'synapse')

diff --git a/changelog.d/15997.misc b/changelog.d/15997.misc
new file mode 100644
index 0000000000..94768c3cb8
--- /dev/null
+++ b/changelog.d/15997.misc
@@ -0,0 +1 @@
+Allow modules to delete rooms.
\ No newline at end of file
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index e5ac9096cc..19cf5a2b43 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -713,7 +713,7 @@ class PaginationHandler:
         self,
         delete_id: str,
         room_id: str,
-        requester_user_id: str,
+        requester_user_id: Optional[str],
         new_room_user_id: Optional[str] = None,
         new_room_name: Optional[str] = None,
         message: Optional[str] = None,
@@ -732,6 +732,10 @@ class PaginationHandler:
             requester_user_id:
                 User who requested the action. Will be recorded as putting the room on the
                 blocking list.
+                If None, the action was not manually requested but instead
+                triggered automatically, e.g. through a Synapse module
+                or some other policy.
+                MUST NOT be None if block=True.
             new_room_user_id:
                 If set, a new room will be created with this user ID
                 as the creator and admin, and all users in the old room will be
@@ -818,7 +822,7 @@ class PaginationHandler:
     def start_shutdown_and_purge_room(
         self,
         room_id: str,
-        requester_user_id: str,
+        requester_user_id: Optional[str],
         new_room_user_id: Optional[str] = None,
         new_room_name: Optional[str] = None,
         message: Optional[str] = None,
@@ -833,6 +837,10 @@ class PaginationHandler:
             requester_user_id:
                 User who requested the action and put the room on the
                 blocking list.
+                If None, the action was not manually requested but instead
+                triggered automatically, e.g. through a Synapse module
+                or some other policy.
+                MUST NOT be None if block=True.
             new_room_user_id:
                 If set, a new room will be created with this user ID
                 as the creator and admin, and all users in the old room will be
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 0513e28aab..7a762c8511 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -1787,7 +1787,7 @@ class RoomShutdownHandler:
     async def shutdown_room(
         self,
         room_id: str,
-        requester_user_id: str,
+        requester_user_id: Optional[str],
         new_room_user_id: Optional[str] = None,
         new_room_name: Optional[str] = None,
         message: Optional[str] = None,
@@ -1811,6 +1811,10 @@ class RoomShutdownHandler:
             requester_user_id:
                 User who requested the action and put the room on the
                 blocking list.
+                If None, the action was not manually requested but instead
+                triggered automatically, e.g. through a Synapse module
+                or some other policy.
+                MUST NOT be None if block=True.
             new_room_user_id:
                 If set, a new room will be created with this user ID
                 as the creator and admin, and all users in the old room will be
@@ -1863,6 +1867,10 @@ class RoomShutdownHandler:
 
         # Action the block first (even if the room doesn't exist yet)
         if block:
+            if requester_user_id is None:
+                raise ValueError(
+                    "shutdown_room: block=True not allowed when requester_user_id is None."
+                )
             # This will work even if the room is already blocked, but that is
             # desirable in case the first attempt at blocking the room failed below.
             await self.store.block_room(room_id, requester_user_id)
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 2f00a7ba20..d6efe10a28 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -1730,6 +1730,19 @@ class ModuleApi:
         room_alias_str = room_alias.to_string() if room_alias else None
         return room_id, room_alias_str
 
+    async def delete_room(self, room_id: str) -> None:
+        """
+        Schedules the deletion of a room from Synapse's database.
+
+        If the room is already being deleted, this method does nothing.
+        This method does not wait for the room to be deleted.
+
+        Added in Synapse v1.89.0.
+        """
+        # Future extensions to this method might want to e.g. allow use of `force_purge`.
+        # TODO In the future we should make sure this is persistent.
+        self._hs.get_pagination_handler().start_shutdown_and_purge_room(room_id, None)
+
     async def set_displayname(
         self,
         user_id: UserID,
diff --git a/synapse/module_api/callbacks/third_party_event_rules_callbacks.py b/synapse/module_api/callbacks/third_party_event_rules_callbacks.py
index 911f37ba42..ecaeef3511 100644
--- a/synapse/module_api/callbacks/third_party_event_rules_callbacks.py
+++ b/synapse/module_api/callbacks/third_party_event_rules_callbacks.py
@@ -40,7 +40,7 @@ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK = Callable[
     [str, StateMap[EventBase], str], Awaitable[bool]
 ]
 ON_NEW_EVENT_CALLBACK = Callable[[EventBase, StateMap[EventBase]], Awaitable]
-CHECK_CAN_SHUTDOWN_ROOM_CALLBACK = Callable[[str, str], Awaitable[bool]]
+CHECK_CAN_SHUTDOWN_ROOM_CALLBACK = Callable[[Optional[str], str], Awaitable[bool]]
 CHECK_CAN_DEACTIVATE_USER_CALLBACK = Callable[[str, bool], Awaitable[bool]]
 ON_PROFILE_UPDATE_CALLBACK = Callable[[str, ProfileInfo, bool, bool], Awaitable]
 ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK = Callable[[str, bool, bool], Awaitable]
@@ -429,12 +429,17 @@ class ThirdPartyEventRulesModuleApiCallbacks:
                     "Failed to run module API callback %s: %s", callback, e
                 )
 
-    async def check_can_shutdown_room(self, user_id: str, room_id: str) -> bool:
+    async def check_can_shutdown_room(
+        self, user_id: Optional[str], room_id: str
+    ) -> bool:
         """Intercept requests to shutdown a room. If `False` is returned, the
          room must not be shut down.
 
         Args:
-            requester: The ID of the user requesting the shutdown.
+            user_id: The ID of the user requesting the shutdown.
+                If no user ID is supplied, then the room is being shut down through
+                some mechanism other than a user's request, e.g. through a module's
+                request.
             room_id: The ID of the room.
         """
         for callback in self._check_can_shutdown_room_callbacks:
-- 
cgit 1.5.1


From e937e2111a45d0cb3ecc973f95dafafecb6e9c36 Mon Sep 17 00:00:00 2001
From: reivilibre <oliverw@matrix.org>
Date: Wed, 6 Sep 2023 13:01:10 +0000
Subject: Add the ability to use `G` (GiB) and `T` (TiB) suffixes in
 configuration options that refer to numbers of bytes. (#16219)

* Add more suffixes to `parse_size`

* Newsfile

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
---
 changelog.d/16219.feature                        | 1 +
 docs/usage/configuration/config_documentation.md | 4 +++-
 synapse/config/_base.py                          | 7 ++++---
 3 files changed, 8 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/16219.feature

(limited to 'synapse')

diff --git a/changelog.d/16219.feature b/changelog.d/16219.feature
new file mode 100644
index 0000000000..c789f2abb7
--- /dev/null
+++ b/changelog.d/16219.feature
@@ -0,0 +1 @@
+Add the ability to use `G` (GiB) and `T` (TiB) suffixes in configuration options that refer to numbers of bytes.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 0b1725816e..97fd1beb39 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -25,8 +25,10 @@ messages from the database after 5 minutes, rather than 5 months.
 
 In addition, configuration options referring to size use the following suffixes:
 
-* `M` = MiB, or 1,048,576 bytes
 * `K` = KiB, or 1024 bytes
+* `M` = MiB, or 1,048,576 bytes
+* `G` = GiB, or 1,073,741,824 bytes
+* `T` = TiB, or 1,099,511,627,776 bytes
 
 For example, setting `max_avatar_size: 10M` means that Synapse will not accept files larger than 10,485,760 bytes
 for a user avatar.
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 69a8318127..58856839e1 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -179,8 +179,9 @@ class Config:
 
         If an integer is provided it is treated as bytes and is unchanged.
 
-        String byte sizes can have a suffix of 'K' or `M`, representing kibibytes and
-        mebibytes respectively. No suffix is understood as a plain byte count.
+        String byte sizes can have a suffix of 'K', `M`, `G` or `T`,
+        representing kibibytes, mebibytes, gibibytes and tebibytes respectively.
+        No suffix is understood as a plain byte count.
 
         Raises:
             TypeError, if given something other than an integer or a string
@@ -189,7 +190,7 @@ class Config:
         if type(value) is int:  # noqa: E721
             return value
         elif isinstance(value, str):
-            sizes = {"K": 1024, "M": 1024 * 1024}
+            sizes = {"K": 1024, "M": 1024 * 1024, "G": 1024**3, "T": 1024**4}
             size = 1
             suffix = value[-1]
             if suffix in sizes:
-- 
cgit 1.5.1


From 1940d990a345b44839039b3f6a9ee3f26757eb0e Mon Sep 17 00:00:00 2001
From: Quentin Gliech <quenting@element.io>
Date: Wed, 6 Sep 2023 16:19:51 +0200
Subject: Revert MSC3861 introspection cache, admin impersonation and account
 lock (#16258)

---
 changelog.d/16258.bugfix                           |   1 +
 synapse/api/auth/msc3861_delegated.py              |  91 +-----------
 synapse/replication/tcp/client.py                  |  12 --
 synapse/rest/admin/__init__.py                     |   3 -
 synapse/rest/admin/oidc.py                         |  55 --------
 synapse/storage/databases/main/cache.py            |  13 --
 synapse/storage/databases/main/devices.py          |   9 --
 synapse/util/caches/expiringcache.py               |  22 ---
 tests/handlers/test_oauth_delegation.py            | 154 ++++-----------------
 tests/replication/test_intro_token_invalidation.py |  62 ---------
 10 files changed, 31 insertions(+), 391 deletions(-)
 create mode 100644 changelog.d/16258.bugfix
 delete mode 100644 synapse/rest/admin/oidc.py
 delete mode 100644 tests/replication/test_intro_token_invalidation.py

(limited to 'synapse')

diff --git a/changelog.d/16258.bugfix b/changelog.d/16258.bugfix
new file mode 100644
index 0000000000..02ba9598a2
--- /dev/null
+++ b/changelog.d/16258.bugfix
@@ -0,0 +1 @@
+Revert MSC3861 introspection cache, admin impersonation and account lock.
\ No newline at end of file
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index 14cba50c90..ef5d3f9b81 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -28,7 +28,6 @@ from twisted.web.http_headers import Headers
 from synapse.api.auth.base import BaseAuth
 from synapse.api.errors import (
     AuthError,
-    Codes,
     HttpResponseException,
     InvalidClientTokenError,
     OAuthInsufficientScopeError,
@@ -40,7 +39,6 @@ from synapse.logging.context import make_deferred_yieldable
 from synapse.types import Requester, UserID, create_requester
 from synapse.util import json_decoder
 from synapse.util.caches.cached_call import RetryOnExceptionCachedCall
-from synapse.util.caches.expiringcache import ExpiringCache
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -109,20 +107,13 @@ class MSC3861DelegatedAuth(BaseAuth):
         assert self._config.client_id, "No client_id provided"
         assert auth_method is not None, "Invalid client_auth_method provided"
 
+        self._clock = hs.get_clock()
         self._http_client = hs.get_proxied_http_client()
         self._hostname = hs.hostname
         self._admin_token = self._config.admin_token
 
         self._issuer_metadata = RetryOnExceptionCachedCall(self._load_metadata)
 
-        self._clock = hs.get_clock()
-        self._token_cache: ExpiringCache[str, IntrospectionToken] = ExpiringCache(
-            cache_name="introspection_token_cache",
-            clock=self._clock,
-            max_len=10000,
-            expiry_ms=5 * 60 * 1000,
-        )
-
         if isinstance(auth_method, PrivateKeyJWTWithKid):
             # Use the JWK as the client secret when using the private_key_jwt method
             assert self._config.jwk, "No JWK provided"
@@ -161,20 +152,6 @@ class MSC3861DelegatedAuth(BaseAuth):
         Returns:
             The introspection response
         """
-        # check the cache before doing a request
-        introspection_token = self._token_cache.get(token, None)
-
-        if introspection_token:
-            # check the expiration field of the token (if it exists)
-            exp = introspection_token.get("exp", None)
-            if exp:
-                time_now = self._clock.time()
-                expired = time_now > exp
-                if not expired:
-                    return introspection_token
-            else:
-                return introspection_token
-
         metadata = await self._issuer_metadata.get()
         introspection_endpoint = metadata.get("introspection_endpoint")
         raw_headers: Dict[str, str] = {
@@ -188,10 +165,7 @@ class MSC3861DelegatedAuth(BaseAuth):
 
         # Fill the body/headers with credentials
         uri, raw_headers, body = self._client_auth.prepare(
-            method="POST",
-            uri=introspection_endpoint,
-            headers=raw_headers,
-            body=body,
+            method="POST", uri=introspection_endpoint, headers=raw_headers, body=body
         )
         headers = Headers({k: [v] for (k, v) in raw_headers.items()})
 
@@ -233,20 +207,10 @@ class MSC3861DelegatedAuth(BaseAuth):
                 "The introspection endpoint returned an invalid JSON response."
             )
 
-        expiration = resp.get("exp", None)
-        if expiration:
-            if self._clock.time() > expiration:
-                raise InvalidClientTokenError("Token is expired.")
-
-        introspection_token = IntrospectionToken(**resp)
-
-        # add token to cache
-        self._token_cache[token] = introspection_token
-
-        return introspection_token
+        return IntrospectionToken(**resp)
 
     async def is_server_admin(self, requester: Requester) -> bool:
-        return SCOPE_SYNAPSE_ADMIN in requester.scope
+        return "urn:synapse:admin:*" in requester.scope
 
     async def get_user_by_req(
         self,
@@ -263,36 +227,6 @@ class MSC3861DelegatedAuth(BaseAuth):
             # so that we don't provision the user if they don't have enough permission:
             requester = await self.get_user_by_access_token(access_token, allow_expired)
 
-            # Allow impersonation by an admin user using `_oidc_admin_impersonate_user_id` query parameter
-            if request.args is not None:
-                user_id_params = request.args.get(b"_oidc_admin_impersonate_user_id")
-                if user_id_params:
-                    if await self.is_server_admin(requester):
-                        user_id_str = user_id_params[0].decode("ascii")
-                        impersonated_user_id = UserID.from_string(user_id_str)
-                        logging.info(f"Admin impersonation of user {user_id_str}")
-                        requester = create_requester(
-                            user_id=impersonated_user_id,
-                            scope=[SCOPE_MATRIX_API],
-                            authenticated_entity=requester.user.to_string(),
-                        )
-                    else:
-                        raise AuthError(
-                            401,
-                            "Impersonation not possible by a non admin user",
-                        )
-
-            # Deny the request if the user account is locked.
-            if not allow_locked and await self.store.get_user_locked_status(
-                requester.user.to_string()
-            ):
-                raise AuthError(
-                    401,
-                    "User account has been locked",
-                    errcode=Codes.USER_LOCKED,
-                    additional_fields={"soft_logout": True},
-                )
-
         if not allow_guest and requester.is_guest:
             raise OAuthInsufficientScopeError([SCOPE_MATRIX_API])
 
@@ -309,14 +243,14 @@ class MSC3861DelegatedAuth(BaseAuth):
             # XXX: This is a temporary solution so that the admin API can be called by
             # the OIDC provider. This will be removed once we have OIDC client
             # credentials grant support in matrix-authentication-service.
-            logging.info("Admin token used")
+            logging.info("Admin toked used")
             # XXX: that user doesn't exist and won't be provisioned.
             # This is mostly fine for admin calls, but we should also think about doing
             # requesters without a user_id.
             admin_user = UserID("__oidc_admin", self._hostname)
             return create_requester(
                 user_id=admin_user,
-                scope=[SCOPE_SYNAPSE_ADMIN],
+                scope=["urn:synapse:admin:*"],
             )
 
         try:
@@ -438,16 +372,3 @@ class MSC3861DelegatedAuth(BaseAuth):
             scope=scope,
             is_guest=(has_guest_scope and not has_user_scope),
         )
-
-    def invalidate_cached_tokens(self, keys: List[str]) -> None:
-        """
-        Invalidate the entry(s) in the introspection token cache corresponding to the given key
-        """
-        for key in keys:
-            self._token_cache.invalidate(key)
-
-    def invalidate_token_cache(self) -> None:
-        """
-        Invalidate the entire token cache.
-        """
-        self._token_cache.invalidate_all()
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 3b88dc68ea..078c8d7074 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -28,7 +28,6 @@ from synapse.logging.context import PreserveLoggingContext, make_deferred_yielda
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.tcp.streams import (
     AccountDataStream,
-    CachesStream,
     DeviceListsStream,
     PushersStream,
     PushRulesStream,
@@ -76,7 +75,6 @@ class ReplicationDataHandler:
         self._instance_name = hs.get_instance_name()
         self._typing_handler = hs.get_typing_handler()
         self._state_storage_controller = hs.get_storage_controllers().state
-        self.auth = hs.get_auth()
 
         self._notify_pushers = hs.config.worker.start_pushers
         self._pusher_pool = hs.get_pusherpool()
@@ -224,16 +222,6 @@ class ReplicationDataHandler:
                 self._state_storage_controller.notify_event_un_partial_stated(
                     row.event_id
                 )
-        # invalidate the introspection token cache
-        elif stream_name == CachesStream.NAME:
-            for row in rows:
-                if row.cache_func == "introspection_token_invalidation":
-                    if row.keys[0] is None:
-                        # invalidate the whole cache
-                        # mypy ignore - the token cache is defined on MSC3861DelegatedAuth
-                        self.auth.invalidate_token_cache()  # type: ignore[attr-defined]
-                    else:
-                        self.auth.invalidate_cached_tokens(row.keys)  # type: ignore[attr-defined]
 
         await self._presence_handler.process_replication_rows(
             stream_name, instance_name, token, rows
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 55e752fda8..fe8177ed4d 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -47,7 +47,6 @@ from synapse.rest.admin.federation import (
     ListDestinationsRestServlet,
 )
 from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo
-from synapse.rest.admin.oidc import OIDCTokenRevocationRestServlet
 from synapse.rest.admin.registration_tokens import (
     ListRegistrationTokensRestServlet,
     NewRegistrationTokenRestServlet,
@@ -298,8 +297,6 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
     BackgroundUpdateRestServlet(hs).register(http_server)
     BackgroundUpdateStartJobRestServlet(hs).register(http_server)
     ExperimentalFeaturesRestServlet(hs).register(http_server)
-    if hs.config.experimental.msc3861.enabled:
-        OIDCTokenRevocationRestServlet(hs).register(http_server)
 
 
 def register_servlets_for_client_rest_resource(
diff --git a/synapse/rest/admin/oidc.py b/synapse/rest/admin/oidc.py
deleted file mode 100644
index 64d2d40550..0000000000
--- a/synapse/rest/admin/oidc.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2023 The Matrix.org Foundation C.I.C
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from http import HTTPStatus
-from typing import TYPE_CHECKING, Dict, Tuple
-
-from synapse.http.servlet import RestServlet
-from synapse.http.site import SynapseRequest
-from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
-
-if TYPE_CHECKING:
-    from synapse.server import HomeServer
-
-
-class OIDCTokenRevocationRestServlet(RestServlet):
-    """
-    Delete a given token introspection response - identified by the `jti` field - from the
-    introspection token cache when a token is revoked at the authorizing server
-    """
-
-    PATTERNS = admin_patterns("/OIDC_token_revocation/(?P<token_id>[^/]*)")
-
-    def __init__(self, hs: "HomeServer"):
-        super().__init__()
-        auth = hs.get_auth()
-
-        # If this endpoint is loaded then we must have enabled delegated auth.
-        from synapse.api.auth.msc3861_delegated import MSC3861DelegatedAuth
-
-        assert isinstance(auth, MSC3861DelegatedAuth)
-
-        self.auth = auth
-        self.store = hs.get_datastores().main
-
-    async def on_DELETE(
-        self, request: SynapseRequest, token_id: str
-    ) -> Tuple[HTTPStatus, Dict]:
-        await assert_requester_is_admin(self.auth, request)
-
-        self.auth._token_cache.invalidate(token_id)
-
-        # make sure we invalidate the cache on any workers
-        await self.store.stream_introspection_token_invalidation((token_id,))
-
-        return HTTPStatus.OK, {}
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
index 18905e07b6..2fbd389c71 100644
--- a/synapse/storage/databases/main/cache.py
+++ b/synapse/storage/databases/main/cache.py
@@ -584,19 +584,6 @@ class CacheInvalidationWorkerStore(SQLBaseStore):
         else:
             return 0
 
-    async def stream_introspection_token_invalidation(
-        self, key: Tuple[Optional[str]]
-    ) -> None:
-        """
-        Stream an invalidation request for the introspection token cache to workers
-
-        Args:
-            key: token_id of the introspection token to remove from the cache
-        """
-        await self.send_invalidation_to_replication(
-            "introspection_token_invalidation", key
-        )
-
     @wrap_as_background_process("clean_up_old_cache_invalidations")
     async def _clean_up_cache_invalidation_wrapper(self) -> None:
         """
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index fa69a4a298..e4162f846b 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -33,7 +33,6 @@ from typing_extensions import Literal
 
 from synapse.api.constants import EduTypes
 from synapse.api.errors import Codes, StoreError
-from synapse.config.homeserver import HomeServerConfig
 from synapse.logging.opentracing import (
     get_active_span_text_map,
     set_tag,
@@ -1664,7 +1663,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         self.device_id_exists_cache: LruCache[
             Tuple[str, str], Literal[True]
         ] = LruCache(cache_name="device_id_exists", max_size=10000)
-        self.config: HomeServerConfig = hs.config
 
     async def store_device(
         self,
@@ -1786,13 +1784,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
         for device_id in device_ids:
             self.device_id_exists_cache.invalidate((user_id, device_id))
 
-        # TODO: don't nuke the entire cache once there is a way to associate
-        #  device_id -> introspection_token
-        if self.config.experimental.msc3861.enabled:
-            # mypy ignore - the token cache is defined on MSC3861DelegatedAuth
-            self.auth._token_cache.invalidate_all()  # type: ignore[attr-defined]
-            await self.stream_introspection_token_invalidation((None,))
-
     async def update_device(
         self, user_id: str, device_id: str, new_display_name: Optional[str] = None
     ) -> None:
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index 9a3e10ddee..01ad02af67 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -140,20 +140,6 @@ class ExpiringCache(Generic[KT, VT]):
 
         return value.value
 
-    def invalidate(self, key: KT) -> None:
-        """
-        Remove the given key from the cache.
-        """
-
-        value = self._cache.pop(key, None)
-        if value:
-            if self.iterable:
-                self.metrics.inc_evictions(
-                    EvictionReason.invalidation, len(value.value)
-                )
-            else:
-                self.metrics.inc_evictions(EvictionReason.invalidation)
-
     def __contains__(self, key: KT) -> bool:
         return key in self._cache
 
@@ -207,14 +193,6 @@ class ExpiringCache(Generic[KT, VT]):
             len(self),
         )
 
-    def invalidate_all(self) -> None:
-        """
-        Remove all items from the cache.
-        """
-        keys = set(self._cache.keys())
-        for key in keys:
-            self._cache.pop(key)
-
     def __len__(self) -> int:
         if self.iterable:
             return sum(len(entry.value) for entry in self._cache.values())
diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py
index b891e84690..503277cdff 100644
--- a/tests/handlers/test_oauth_delegation.py
+++ b/tests/handlers/test_oauth_delegation.py
@@ -14,7 +14,7 @@
 
 from http import HTTPStatus
 from typing import Any, Dict, Union
-from unittest.mock import ANY, AsyncMock, Mock
+from unittest.mock import ANY, Mock
 from urllib.parse import parse_qs
 
 from signedjson.key import (
@@ -122,6 +122,7 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
                 "client_id": CLIENT_ID,
                 "client_auth_method": "client_secret_post",
                 "client_secret": CLIENT_SECRET,
+                "admin_token": "admin_token_value",
             }
         }
         return config
@@ -340,41 +341,6 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
             get_awaitable_result(self.auth.is_server_admin(requester)), False
         )
 
-    def test_active_user_admin_impersonation(self) -> None:
-        """The handler should return a requester with normal user rights
-        and an user ID matching the one specified in query param `user_id`"""
-
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse.json(
-                code=200,
-                payload={
-                    "active": True,
-                    "sub": SUBJECT,
-                    "scope": " ".join([SYNAPSE_ADMIN_SCOPE, MATRIX_USER_SCOPE]),
-                    "username": USERNAME,
-                },
-            )
-        )
-        request = Mock(args={})
-        request.args[b"access_token"] = [b"mockAccessToken"]
-        impersonated_user_id = f"@{USERNAME}:{SERVER_NAME}"
-        request.args[b"_oidc_admin_impersonate_user_id"] = [
-            impersonated_user_id.encode("ascii")
-        ]
-        request.requestHeaders.getRawHeaders = mock_getRawHeaders()
-        requester = self.get_success(self.auth.get_user_by_req(request))
-        self.http_client.get_json.assert_called_once_with(WELL_KNOWN)
-        self.http_client.request.assert_called_once_with(
-            method="POST", uri=INTROSPECTION_ENDPOINT, data=ANY, headers=ANY
-        )
-        self._assertParams()
-        self.assertEqual(requester.user.to_string(), impersonated_user_id)
-        self.assertEqual(requester.is_guest, False)
-        self.assertEqual(requester.device_id, None)
-        self.assertEqual(
-            get_awaitable_result(self.auth.is_server_admin(requester)), False
-        )
-
     def test_active_user_with_device(self) -> None:
         """The handler should return a requester with normal user rights and a device ID."""
 
@@ -526,100 +492,6 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         error = self.get_failure(self.auth.get_user_by_req(request), SynapseError)
         self.assertEqual(error.value.code, 503)
 
-    def test_introspection_token_cache(self) -> None:
-        access_token = "open_sesame"
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse.json(
-                code=200,
-                payload={"active": "true", "scope": "guest", "jti": access_token},
-            )
-        )
-
-        # first call should cache response
-        # Mpyp ignores below are due to mypy not understanding the dynamic substitution of msc3861 auth code
-        # for regular auth code via the config
-        self.get_success(
-            self.auth._introspect_token(access_token)  # type: ignore[attr-defined]
-        )
-        introspection_token = self.auth._token_cache.get(access_token)  # type: ignore[attr-defined]
-        self.assertEqual(introspection_token["jti"], access_token)
-        # there's been one http request
-        self.http_client.request.assert_called_once()
-
-        # second call should pull from cache, there should still be only one http request
-        token = self.get_success(self.auth._introspect_token(access_token))  # type: ignore[attr-defined]
-        self.http_client.request.assert_called_once()
-        self.assertEqual(token["jti"], access_token)
-
-        # advance past five minutes and check that cache expired - there should be more than one http call now
-        self.reactor.advance(360)
-        token_2 = self.get_success(self.auth._introspect_token(access_token))  # type: ignore[attr-defined]
-        self.assertEqual(self.http_client.request.call_count, 2)
-        self.assertEqual(token_2["jti"], access_token)
-
-        # test that if a cached token is expired, a fresh token will be pulled from authorizing server - first add a
-        # token with a soon-to-expire `exp` field to the cache
-        self.http_client.request = simple_async_mock(
-            return_value=FakeResponse.json(
-                code=200,
-                payload={
-                    "active": "true",
-                    "scope": "guest",
-                    "jti": "stale",
-                    "exp": self.clock.time() + 100,
-                },
-            )
-        )
-        self.get_success(
-            self.auth._introspect_token("stale")  # type: ignore[attr-defined]
-        )
-        introspection_token = self.auth._token_cache.get("stale")  # type: ignore[attr-defined]
-        self.assertEqual(introspection_token["jti"], "stale")
-        self.assertEqual(self.http_client.request.call_count, 1)
-
-        # advance the reactor past the token expiry but less than the cache expiry
-        self.reactor.advance(120)
-        self.assertEqual(self.auth._token_cache.get("stale"), introspection_token)  # type: ignore[attr-defined]
-
-        # check that the next call causes another http request (which will fail because the token is technically expired
-        # but the important thing is we discard the token from the cache and try the network)
-        self.get_failure(
-            self.auth._introspect_token("stale"), InvalidClientTokenError  # type: ignore[attr-defined]
-        )
-        self.assertEqual(self.http_client.request.call_count, 2)
-
-    def test_revocation_endpoint(self) -> None:
-        # mock introspection response and then admin verification response
-        self.http_client.request = AsyncMock(
-            side_effect=[
-                FakeResponse.json(
-                    code=200, payload={"active": True, "jti": "open_sesame"}
-                ),
-                FakeResponse.json(
-                    code=200,
-                    payload={
-                        "active": True,
-                        "sub": SUBJECT,
-                        "scope": " ".join([SYNAPSE_ADMIN_SCOPE, MATRIX_USER_SCOPE]),
-                        "username": USERNAME,
-                    },
-                ),
-            ]
-        )
-
-        # cache a token to delete
-        introspection_token = self.get_success(
-            self.auth._introspect_token("open_sesame")  # type: ignore[attr-defined]
-        )
-        self.assertEqual(self.auth._token_cache.get("open_sesame"), introspection_token)  # type: ignore[attr-defined]
-
-        # delete the revoked token
-        introspection_token_id = "open_sesame"
-        url = f"/_synapse/admin/v1/OIDC_token_revocation/{introspection_token_id}"
-        channel = self.make_request("DELETE", url, access_token="mockAccessToken")
-        self.assertEqual(channel.code, 200)
-        self.assertEqual(self.auth._token_cache.get("open_sesame"), None)  # type: ignore[attr-defined]
-
     def make_device_keys(self, user_id: str, device_id: str) -> JsonDict:
         # We only generate a master key to simplify the test.
         master_signing_key = generate_signing_key(device_id)
@@ -791,3 +663,25 @@ class MSC3861OAuthDelegation(HomeserverTestCase):
         self.expect_unrecognized("GET", "/_synapse/admin/v1/users/foo/admin")
         self.expect_unrecognized("PUT", "/_synapse/admin/v1/users/foo/admin")
         self.expect_unrecognized("POST", "/_synapse/admin/v1/account_validity/validity")
+
+    def test_admin_token(self) -> None:
+        """The handler should return a requester with admin rights when admin_token is used."""
+        self.http_client.request = simple_async_mock(
+            return_value=FakeResponse.json(code=200, payload={"active": False}),
+        )
+
+        request = Mock(args={})
+        request.args[b"access_token"] = [b"admin_token_value"]
+        request.requestHeaders.getRawHeaders = mock_getRawHeaders()
+        requester = self.get_success(self.auth.get_user_by_req(request))
+        self.assertEqual(
+            requester.user.to_string(), "@%s:%s" % ("__oidc_admin", SERVER_NAME)
+        )
+        self.assertEqual(requester.is_guest, False)
+        self.assertEqual(requester.device_id, None)
+        self.assertEqual(
+            get_awaitable_result(self.auth.is_server_admin(requester)), True
+        )
+
+        # There should be no call to the introspection endpoint
+        self.http_client.request.assert_not_called()
diff --git a/tests/replication/test_intro_token_invalidation.py b/tests/replication/test_intro_token_invalidation.py
deleted file mode 100644
index f90678b6b1..0000000000
--- a/tests/replication/test_intro_token_invalidation.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2023 The Matrix.org Foundation C.I.C.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any, Dict
-
-import synapse.rest.admin._base
-
-from tests.replication._base import BaseMultiWorkerStreamTestCase
-
-
-class IntrospectionTokenCacheInvalidationTestCase(BaseMultiWorkerStreamTestCase):
-    servlets = [synapse.rest.admin.register_servlets]
-
-    def default_config(self) -> Dict[str, Any]:
-        config = super().default_config()
-        config["disable_registration"] = True
-        config["experimental_features"] = {
-            "msc3861": {
-                "enabled": True,
-                "issuer": "some_dude",
-                "client_id": "ID",
-                "client_auth_method": "client_secret_post",
-                "client_secret": "secret",
-            }
-        }
-        return config
-
-    def test_stream_introspection_token_invalidation(self) -> None:
-        worker_hs = self.make_worker_hs("synapse.app.generic_worker")
-        auth = worker_hs.get_auth()
-        store = self.hs.get_datastores().main
-
-        # add a token to the cache on the worker
-        auth._token_cache["open_sesame"] = "intro_token"  # type: ignore[attr-defined]
-
-        # stream the invalidation from the master
-        self.get_success(
-            store.stream_introspection_token_invalidation(("open_sesame",))
-        )
-
-        # check that the cache on the worker was invalidated
-        self.assertEqual(auth._token_cache.get("open_sesame"), None)  # type: ignore[attr-defined]
-
-        # test invalidating whole cache
-        for i in range(0, 5):
-            auth._token_cache[f"open_sesame_{i}"] = f"intro_token_{i}"  # type: ignore[attr-defined]
-        self.assertEqual(len(auth._token_cache), 5)  # type: ignore[attr-defined]
-
-        self.get_success(store.stream_introspection_token_invalidation((None,)))
-
-        self.assertEqual(len(auth._token_cache), 0)  # type: ignore[attr-defined]
-- 
cgit 1.5.1


From fe69e7f617199f51eb97f510a0a934fdcf02fbad Mon Sep 17 00:00:00 2001
From: Aurélien Grimpard <aurelien@grimpard.net>
Date: Wed, 6 Sep 2023 20:32:24 +0200
Subject: Handle "registration_enabled" parameter for CAS (#16262)

Similar to OIDC, CAS providers can now disable registration such
that only existing users are able to login via SSO.
---
 changelog.d/16262.feature                        |  1 +
 docs/usage/configuration/config_documentation.md |  7 +++++++
 synapse/config/cas.py                            |  3 +++
 synapse/handlers/cas.py                          |  2 ++
 tests/handlers/test_cas.py                       | 17 +++++++++++++++++
 5 files changed, 30 insertions(+)
 create mode 100644 changelog.d/16262.feature

(limited to 'synapse')

diff --git a/changelog.d/16262.feature b/changelog.d/16262.feature
new file mode 100644
index 0000000000..7c8e7e349b
--- /dev/null
+++ b/changelog.d/16262.feature
@@ -0,0 +1 @@
+Add the ability to enable/disable registrations when in the CAS flow. Contributed by Aurélien Grimpard.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 97fd1beb39..42df53d52b 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -3430,6 +3430,12 @@ Has the following sub-options:
    and the values must match the given value. Alternately if the given value
    is `None` then any value is allowed (the attribute just must exist).
    All of the listed attributes must match for the login to be permitted.
+* `enable_registration`: set to 'false' to disable automatic registration of new
+   users. This allows the CAS SSO flow to be limited to sign in only, rather than
+   automatically registering users that have a valid SSO login but do not have
+   a pre-registered account. Defaults to true.
+
+   *Added in Synapse 1.93.0.*
 
 Example configuration:
 ```yaml
@@ -3441,6 +3447,7 @@ cas_config:
   required_attributes:
     userGroup: "staff"
     department: None
+  enable_registration: true
 ```
 ---
 ### `sso`
diff --git a/synapse/config/cas.py b/synapse/config/cas.py
index 6e2d9addbf..bbc8f43073 100644
--- a/synapse/config/cas.py
+++ b/synapse/config/cas.py
@@ -57,6 +57,8 @@ class CasConfig(Config):
                 required_attributes
             )
 
+            self.cas_enable_registration = cas_config.get("enable_registration", True)
+
             self.idp_name = cas_config.get("idp_name", "CAS")
             self.idp_icon = cas_config.get("idp_icon")
             self.idp_brand = cas_config.get("idp_brand")
@@ -67,6 +69,7 @@ class CasConfig(Config):
             self.cas_protocol_version = None
             self.cas_displayname_attribute = None
             self.cas_required_attributes = []
+            self.cas_enable_registration = False
 
 
 # CAS uses a legacy required attributes mapping, not the one provided by
diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py
index a850545453..b5b8b9bd35 100644
--- a/synapse/handlers/cas.py
+++ b/synapse/handlers/cas.py
@@ -70,6 +70,7 @@ class CasHandler:
         self._cas_protocol_version = hs.config.cas.cas_protocol_version
         self._cas_displayname_attribute = hs.config.cas.cas_displayname_attribute
         self._cas_required_attributes = hs.config.cas.cas_required_attributes
+        self._cas_enable_registration = hs.config.cas.cas_enable_registration
 
         self._http_client = hs.get_proxied_http_client()
 
@@ -395,4 +396,5 @@ class CasHandler:
             client_redirect_url,
             cas_response_to_user_attributes,
             grandfather_existing_users,
+            registration_enabled=self._cas_enable_registration,
         )
diff --git a/tests/handlers/test_cas.py b/tests/handlers/test_cas.py
index 8582b1cd1e..13e2cd153a 100644
--- a/tests/handlers/test_cas.py
+++ b/tests/handlers/test_cas.py
@@ -197,6 +197,23 @@ class CasHandlerTestCase(HomeserverTestCase):
             auth_provider_session_id=None,
         )
 
+    @override_config({"cas_config": {"enable_registration": False}})
+    def test_map_cas_user_does_not_register_new_user(self) -> None:
+        """Ensures new users are not registered if the enabled registration flag is disabled."""
+
+        # stub out the auth handler
+        auth_handler = self.hs.get_auth_handler()
+        auth_handler.complete_sso_login = AsyncMock()  # type: ignore[method-assign]
+
+        cas_response = CasResponse("test_user", {})
+        request = _mock_request()
+        self.get_success(
+            self.handler._handle_cas_response(request, cas_response, "redirect_uri", "")
+        )
+
+        # check that the auth handler was not called as expected
+        auth_handler.complete_sso_login.assert_not_called()
+
 
 def _mock_request() -> Mock:
     """Returns a mock which will stand in as a SynapseRequest"""
-- 
cgit 1.5.1


From 13e9cad537a16108b0cb544ccdc24e7dc2ca33ae Mon Sep 17 00:00:00 2001
From: Marcel <MTRNord@users.noreply.github.com>
Date: Wed, 6 Sep 2023 21:19:17 +0200
Subject: Send the opentracing span information to appservices (#16227)

---
 changelog.d/16227.feature    |  1 +
 synapse/appservice/api.py    | 32 ++++++++++++++++++++++++--------
 tests/appservice/test_api.py | 18 ++++++++++++------
 3 files changed, 37 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/16227.feature

(limited to 'synapse')

diff --git a/changelog.d/16227.feature b/changelog.d/16227.feature
new file mode 100644
index 0000000000..510062b622
--- /dev/null
+++ b/changelog.d/16227.feature
@@ -0,0 +1 @@
+Add span information to requests sent to appservices. Contributed by MTRNord.
\ No newline at end of file
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index de7a94bf26..b1523be208 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -40,6 +40,7 @@ from synapse.appservice import (
 from synapse.events import EventBase
 from synapse.events.utils import SerializeEventConfig, serialize_event
 from synapse.http.client import SimpleHttpClient, is_unknown_endpoint
+from synapse.logging import opentracing
 from synapse.types import DeviceListUpdates, JsonDict, ThirdPartyInstanceID
 from synapse.util.caches.response_cache import ResponseCache
 
@@ -125,6 +126,17 @@ class ApplicationServiceApi(SimpleHttpClient):
             hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS
         )
 
+    def _get_headers(self, service: "ApplicationService") -> Dict[bytes, List[bytes]]:
+        """This makes sure we have always the auth header and opentracing headers set."""
+
+        # This is also ensured before in the functions. However this is needed to please
+        # the typechecks.
+        assert service.hs_token is not None
+
+        headers = {b"Authorization": [b"Bearer " + service.hs_token.encode("ascii")]}
+        opentracing.inject_header_dict(headers, check_destination=False)
+        return headers
+
     async def query_user(self, service: "ApplicationService", user_id: str) -> bool:
         if service.url is None:
             return False
@@ -136,10 +148,11 @@ class ApplicationServiceApi(SimpleHttpClient):
             args = None
             if self.config.use_appservice_legacy_authorization:
                 args = {"access_token": service.hs_token}
+
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/users/{urllib.parse.quote(user_id)}",
                 args,
-                headers={"Authorization": [f"Bearer {service.hs_token}"]},
+                headers=self._get_headers(service),
             )
             if response is not None:  # just an empty json object
                 return True
@@ -162,10 +175,11 @@ class ApplicationServiceApi(SimpleHttpClient):
             args = None
             if self.config.use_appservice_legacy_authorization:
                 args = {"access_token": service.hs_token}
+
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/rooms/{urllib.parse.quote(alias)}",
                 args,
-                headers={"Authorization": [f"Bearer {service.hs_token}"]},
+                headers=self._get_headers(service),
             )
             if response is not None:  # just an empty json object
                 return True
@@ -203,10 +217,11 @@ class ApplicationServiceApi(SimpleHttpClient):
                     **fields,
                     b"access_token": service.hs_token,
                 }
+
             response = await self.get_json(
                 f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/{kind}/{urllib.parse.quote(protocol)}",
                 args=args,
-                headers={"Authorization": [f"Bearer {service.hs_token}"]},
+                headers=self._get_headers(service),
             )
             if not isinstance(response, list):
                 logger.warning(
@@ -243,10 +258,11 @@ class ApplicationServiceApi(SimpleHttpClient):
                 args = None
                 if self.config.use_appservice_legacy_authorization:
                     args = {"access_token": service.hs_token}
+
                 info = await self.get_json(
                     f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/protocol/{urllib.parse.quote(protocol)}",
                     args,
-                    headers={"Authorization": [f"Bearer {service.hs_token}"]},
+                    headers=self._get_headers(service),
                 )
 
                 if not _is_valid_3pe_metadata(info):
@@ -283,7 +299,7 @@ class ApplicationServiceApi(SimpleHttpClient):
         await self.post_json_get_json(
             uri=f"{service.url}{APP_SERVICE_PREFIX}/ping",
             post_json={"transaction_id": txn_id},
-            headers={"Authorization": [f"Bearer {service.hs_token}"]},
+            headers=self._get_headers(service),
         )
 
     async def push_bulk(
@@ -364,7 +380,7 @@ class ApplicationServiceApi(SimpleHttpClient):
                 f"{service.url}{APP_SERVICE_PREFIX}/transactions/{urllib.parse.quote(str(txn_id))}",
                 json_body=body,
                 args=args,
-                headers={"Authorization": [f"Bearer {service.hs_token}"]},
+                headers=self._get_headers(service),
             )
             if logger.isEnabledFor(logging.DEBUG):
                 logger.debug(
@@ -437,7 +453,7 @@ class ApplicationServiceApi(SimpleHttpClient):
             response = await self.post_json_get_json(
                 uri,
                 body,
-                headers={"Authorization": [f"Bearer {service.hs_token}"]},
+                headers=self._get_headers(service),
             )
         except HttpResponseException as e:
             # The appservice doesn't support this endpoint.
@@ -498,7 +514,7 @@ class ApplicationServiceApi(SimpleHttpClient):
             response = await self.post_json_get_json(
                 uri,
                 query,
-                headers={"Authorization": [f"Bearer {service.hs_token}"]},
+                headers=self._get_headers(service),
             )
         except HttpResponseException as e:
             # The appservice doesn't support this endpoint.
diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py
index 75fb5fae6b..366b6fd5f0 100644
--- a/tests/appservice/test_api.py
+++ b/tests/appservice/test_api.py
@@ -76,7 +76,7 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]],
         ) -> List[JsonDict]:
             # Ensure the access token is passed as a header.
-            if not headers or not headers.get("Authorization"):
+            if not headers or not headers.get(b"Authorization"):
                 raise RuntimeError("Access token not provided")
             # ... and not as a query param
             if b"access_token" in args:
@@ -84,7 +84,9 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
                     "Access token should not be passed as a query param."
                 )
 
-            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
+            self.assertEqual(
+                headers.get(b"Authorization"), [f"Bearer {TOKEN}".encode()]
+            )
             self.request_url = url
             if url == URL_USER:
                 return SUCCESS_RESULT_USER
@@ -152,11 +154,13 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             # Ensure the access token is passed as a both a query param and in the headers.
             if not args.get(b"access_token"):
                 raise RuntimeError("Access token should be provided in query params.")
-            if not headers or not headers.get("Authorization"):
+            if not headers or not headers.get(b"Authorization"):
                 raise RuntimeError("Access token should be provided in auth headers.")
 
             self.assertEqual(args.get(b"access_token"), TOKEN)
-            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
+            self.assertEqual(
+                headers.get(b"Authorization"), [f"Bearer {TOKEN}".encode()]
+            )
             self.request_url = url
             if url == URL_USER:
                 return SUCCESS_RESULT_USER
@@ -208,10 +212,12 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase):
             headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]],
         ) -> JsonDict:
             # Ensure the access token is passed as both a header and query arg.
-            if not headers.get("Authorization"):
+            if not headers.get(b"Authorization"):
                 raise RuntimeError("Access token not provided")
 
-            self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"])
+            self.assertEqual(
+                headers.get(b"Authorization"), [f"Bearer {TOKEN}".encode()]
+            )
             return RESPONSE
 
         # We assign to a method, which mypy doesn't like.
-- 
cgit 1.5.1


From 8940d1b28ecbaf9185459e2af62169ecf39a96f5 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Thu, 7 Sep 2023 10:26:07 +0100
Subject: Add `/notifications` endpoint to workers (#16265)

---
 changelog.d/16265.feature                          |  1 +
 docker/configure_workers_and_start.py              |  1 +
 docs/workers.md                                    |  1 +
 synapse/rest/__init__.py                           |  2 +-
 synapse/rest/client/notifications.py               |  2 +
 .../storage/databases/main/event_push_actions.py   | 72 +++++++++++-----------
 6 files changed, 42 insertions(+), 37 deletions(-)
 create mode 100644 changelog.d/16265.feature

(limited to 'synapse')

diff --git a/changelog.d/16265.feature b/changelog.d/16265.feature
new file mode 100644
index 0000000000..3ffa16dbcb
--- /dev/null
+++ b/changelog.d/16265.feature
@@ -0,0 +1 @@
+Allow `/notifications` endpoint to be routed to workers.
diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py
index 400a7515aa..62952e6b26 100755
--- a/docker/configure_workers_and_start.py
+++ b/docker/configure_workers_and_start.py
@@ -183,6 +183,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
             "^/_matrix/client/(r0|v3|unstable)/password_policy$",
             "^/_matrix/client/(api/v1|r0|v3|unstable)/directory/room/.*$",
             "^/_matrix/client/(r0|v3|unstable)/capabilities$",
+            "^/_matrix/client/(r0|v3|unstable)/notifications$",
         ],
         "shared_extra_conf": {},
         "worker_extra_conf": "",
diff --git a/docs/workers.md b/docs/workers.md
index 24bd22724e..dc76b073de 100644
--- a/docs/workers.md
+++ b/docs/workers.md
@@ -246,6 +246,7 @@ information.
     ^/_matrix/client/(r0|v3|unstable)/user/.*/filter(/|$)
     ^/_matrix/client/(api/v1|r0|v3|unstable)/directory/room/.*$
     ^/_matrix/client/(r0|v3|unstable)/capabilities$
+    ^/_matrix/client/(r0|v3|unstable)/notifications$
 
     # Encryption requests
     ^/_matrix/client/(r0|v3|unstable)/keys/query$
diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index df0845edb2..1be9c47c61 100644
--- a/synapse/rest/__init__.py
+++ b/synapse/rest/__init__.py
@@ -123,7 +123,7 @@ class ClientRestResource(JsonResource):
         if is_main_process:
             report_event.register_servlets(hs, client_resource)
             openid.register_servlets(hs, client_resource)
-            notifications.register_servlets(hs, client_resource)
+        notifications.register_servlets(hs, client_resource)
         devices.register_servlets(hs, client_resource)
         if is_main_process:
             thirdparty.register_servlets(hs, client_resource)
diff --git a/synapse/rest/client/notifications.py b/synapse/rest/client/notifications.py
index ea10042569..e7fe1332e7 100644
--- a/synapse/rest/client/notifications.py
+++ b/synapse/rest/client/notifications.py
@@ -36,6 +36,8 @@ logger = logging.getLogger(__name__)
 class NotificationsServlet(RestServlet):
     PATTERNS = client_patterns("/notifications$")
 
+    CATEGORY = "Client API requests"
+
     def __init__(self, hs: "HomeServer"):
         super().__init__()
         self.store = hs.get_datastores().main
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 07bda7d6be..b958a39aeb 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1740,42 +1740,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             # We sleep to ensure that we don't overwhelm the DB.
             await self._clock.sleep(1.0)
 
-
-class EventPushActionsStore(EventPushActionsWorkerStore):
-    EPA_HIGHLIGHT_INDEX = "epa_highlight_index"
-
-    def __init__(
-        self,
-        database: DatabasePool,
-        db_conn: LoggingDatabaseConnection,
-        hs: "HomeServer",
-    ):
-        super().__init__(database, db_conn, hs)
-
-        self.db_pool.updates.register_background_index_update(
-            self.EPA_HIGHLIGHT_INDEX,
-            index_name="event_push_actions_u_highlight",
-            table="event_push_actions",
-            columns=["user_id", "stream_ordering"],
-        )
-
-        self.db_pool.updates.register_background_index_update(
-            "event_push_actions_highlights_index",
-            index_name="event_push_actions_highlights_index",
-            table="event_push_actions",
-            columns=["user_id", "room_id", "topological_ordering", "stream_ordering"],
-            where_clause="highlight=1",
-        )
-
-        # Add index to make deleting old push actions faster.
-        self.db_pool.updates.register_background_index_update(
-            "event_push_actions_stream_highlight_index",
-            index_name="event_push_actions_stream_highlight_index",
-            table="event_push_actions",
-            columns=["highlight", "stream_ordering"],
-            where_clause="highlight=0",
-        )
-
     async def get_push_actions_for_user(
         self,
         user_id: str,
@@ -1834,6 +1798,42 @@ class EventPushActionsStore(EventPushActionsWorkerStore):
         ]
 
 
+class EventPushActionsStore(EventPushActionsWorkerStore):
+    EPA_HIGHLIGHT_INDEX = "epa_highlight_index"
+
+    def __init__(
+        self,
+        database: DatabasePool,
+        db_conn: LoggingDatabaseConnection,
+        hs: "HomeServer",
+    ):
+        super().__init__(database, db_conn, hs)
+
+        self.db_pool.updates.register_background_index_update(
+            self.EPA_HIGHLIGHT_INDEX,
+            index_name="event_push_actions_u_highlight",
+            table="event_push_actions",
+            columns=["user_id", "stream_ordering"],
+        )
+
+        self.db_pool.updates.register_background_index_update(
+            "event_push_actions_highlights_index",
+            index_name="event_push_actions_highlights_index",
+            table="event_push_actions",
+            columns=["user_id", "room_id", "topological_ordering", "stream_ordering"],
+            where_clause="highlight=1",
+        )
+
+        # Add index to make deleting old push actions faster.
+        self.db_pool.updates.register_background_index_update(
+            "event_push_actions_stream_highlight_index",
+            index_name="event_push_actions_stream_highlight_index",
+            table="event_push_actions",
+            columns=["highlight", "stream_ordering"],
+            where_clause="highlight=0",
+        )
+
+
 def _action_has_highlight(actions: Collection[Union[Mapping, str]]) -> bool:
     for action in actions:
         if not isinstance(action, dict):
-- 
cgit 1.5.1


From 1cd410a7833984ef69a7dcecf8997f4c45d609cd Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Thu, 7 Sep 2023 13:45:43 +0100
Subject: Recheck if remote device is cached before requesting it (#16252)

This fixes a bug where we could get stuck re-requesting the device over
replication again and again.
---
 changelog.d/16252.bugfix                  |  1 +
 synapse/handlers/device.py                | 21 +++++++++++++++------
 synapse/replication/http/devices.py       |  4 ++--
 synapse/storage/databases/main/devices.py | 26 +++++++++++++++++---------
 4 files changed, 35 insertions(+), 17 deletions(-)
 create mode 100644 changelog.d/16252.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16252.bugfix b/changelog.d/16252.bugfix
new file mode 100644
index 0000000000..881bc00e61
--- /dev/null
+++ b/changelog.d/16252.bugfix
@@ -0,0 +1 @@
+Fix bug when using workers where Synapse could end up re-requesting the same remote device repeatedly.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 9e52af5f13..9356ae998e 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -1030,7 +1030,7 @@ class DeviceListWorkerUpdater:
 
     async def multi_user_device_resync(
         self, user_ids: List[str], mark_failed_as_stale: bool = True
-    ) -> Dict[str, Optional[JsonDict]]:
+    ) -> Dict[str, Optional[JsonMapping]]:
         """
         Like `user_device_resync` but operates on multiple users **from the same origin**
         at once.
@@ -1059,6 +1059,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
         self._notifier = hs.get_notifier()
 
         self._remote_edu_linearizer = Linearizer(name="remote_device_list")
+        self._resync_linearizer = Linearizer(name="remote_device_resync")
 
         # user_id -> list of updates waiting to be handled.
         self._pending_updates: Dict[
@@ -1301,7 +1302,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
 
     async def multi_user_device_resync(
         self, user_ids: List[str], mark_failed_as_stale: bool = True
-    ) -> Dict[str, Optional[JsonDict]]:
+    ) -> Dict[str, Optional[JsonMapping]]:
         """
         Like `user_device_resync` but operates on multiple users **from the same origin**
         at once.
@@ -1321,9 +1322,11 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
         failed = set()
         # TODO(Perf): Actually batch these up
         for user_id in user_ids:
-            user_result, user_failed = await self._user_device_resync_returning_failed(
-                user_id
-            )
+            async with self._resync_linearizer.queue(user_id):
+                (
+                    user_result,
+                    user_failed,
+                ) = await self._user_device_resync_returning_failed(user_id)
             result[user_id] = user_result
             if user_failed:
                 failed.add(user_id)
@@ -1335,7 +1338,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
 
     async def _user_device_resync_returning_failed(
         self, user_id: str
-    ) -> Tuple[Optional[JsonDict], bool]:
+    ) -> Tuple[Optional[JsonMapping], bool]:
         """Fetches all devices for a user and updates the device cache with them.
 
         Args:
@@ -1348,6 +1351,12 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
               e.g. due to a connection problem.
             - True iff the resync failed and the device list should be marked as stale.
         """
+        # Check that we haven't gone and fetched the devices since we last
+        # checked if we needed to resync these device lists.
+        if await self.store.get_users_whose_devices_are_cached([user_id]):
+            cached = await self.store.get_cached_devices_for_user(user_id)
+            return cached, False
+
         logger.debug("Attempting to resync the device list for %s", user_id)
         log_kv({"message": "Doing resync to update device list."})
         # Fetch all devices for the user.
diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index 209833d287..b8198e059c 100644
--- a/synapse/replication/http/devices.py
+++ b/synapse/replication/http/devices.py
@@ -20,7 +20,7 @@ from twisted.web.server import Request
 from synapse.http.server import HttpServer
 from synapse.logging.opentracing import active_span
 from synapse.replication.http._base import ReplicationEndpoint
-from synapse.types import JsonDict
+from synapse.types import JsonDict, JsonMapping
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -82,7 +82,7 @@ class ReplicationMultiUserDevicesResyncRestServlet(ReplicationEndpoint):
 
     async def _handle_request(  # type: ignore[override]
         self, request: Request, content: JsonDict
-    ) -> Tuple[int, Dict[str, Optional[JsonDict]]]:
+    ) -> Tuple[int, Dict[str, Optional[JsonMapping]]]:
         user_ids: List[str] = content["user_ids"]
 
         logger.info("Resync for %r", user_ids)
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 324fdfa892..70faf4b1ec 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -759,18 +759,10 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
             mapping of user_id -> device_id -> device_info.
         """
         unique_user_ids = user_ids | {user_id for user_id, _ in user_and_device_ids}
-        user_map = await self.get_device_list_last_stream_id_for_remotes(
-            list(unique_user_ids)
-        )
 
-        # We go and check if any of the users need to have their device lists
-        # resynced. If they do then we remove them from the cached list.
-        users_needing_resync = await self.get_user_ids_requiring_device_list_resync(
+        user_ids_in_cache = await self.get_users_whose_devices_are_cached(
             unique_user_ids
         )
-        user_ids_in_cache = {
-            user_id for user_id, stream_id in user_map.items() if stream_id
-        } - users_needing_resync
         user_ids_not_in_cache = unique_user_ids - user_ids_in_cache
 
         # First fetch all the users which all devices are to be returned.
@@ -792,6 +784,22 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
 
         return user_ids_not_in_cache, results
 
+    async def get_users_whose_devices_are_cached(
+        self, user_ids: StrCollection
+    ) -> Set[str]:
+        """Checks which of the given users we have cached the devices for."""
+        user_map = await self.get_device_list_last_stream_id_for_remotes(user_ids)
+
+        # We go and check if any of the users need to have their device lists
+        # resynced. If they do then we remove them from the cached list.
+        users_needing_resync = await self.get_user_ids_requiring_device_list_resync(
+            user_ids
+        )
+        user_ids_in_cache = {
+            user_id for user_id, stream_id in user_map.items() if stream_id
+        } - users_needing_resync
+        return user_ids_in_cache
+
     @cached(num_args=2, tree=True)
     async def _get_cached_user_device(self, user_id: str, device_id: str) -> JsonDict:
         content = await self.db_pool.simple_select_one_onecol(
-- 
cgit 1.5.1


From d23c394669660a7226c818f222a76ec0905e126e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Fri, 8 Sep 2023 13:06:00 +0100
Subject: Reduce CPU overhead of change password endpoint  (#16264)

---
 changelog.d/16264.misc         |   1 +
 synapse/rest/client/account.py | 112 ++++++++++++++++++++---------------------
 2 files changed, 55 insertions(+), 58 deletions(-)
 create mode 100644 changelog.d/16264.misc

(limited to 'synapse')

diff --git a/changelog.d/16264.misc b/changelog.d/16264.misc
new file mode 100644
index 0000000000..a744434bef
--- /dev/null
+++ b/changelog.d/16264.misc
@@ -0,0 +1 @@
+Reduce CPU overhead of change password endpoint.
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 679ab9f266..196b292890 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -179,85 +179,81 @@ class PasswordRestServlet(RestServlet):
         #
         # In the second case, we require a password to confirm their identity.
 
-        requester = None
-        if self.auth.has_access_token(request):
-            requester = await self.auth.get_user_by_req(request)
-            try:
+        try:
+            requester = None
+            if self.auth.has_access_token(request):
+                requester = await self.auth.get_user_by_req(request)
                 params, session_id = await self.auth_handler.validate_user_via_ui_auth(
                     requester,
                     request,
                     body.dict(exclude_unset=True),
                     "modify your account password",
                 )
-            except InteractiveAuthIncompleteError as e:
-                # The user needs to provide more steps to complete auth, but
-                # they're not required to provide the password again.
-                #
-                # If a password is available now, hash the provided password and
-                # store it for later.
-                if new_password:
-                    new_password_hash = await self.auth_handler.hash(new_password)
-                    await self.auth_handler.set_session_data(
-                        e.session_id,
-                        UIAuthSessionDataConstants.PASSWORD_HASH,
-                        new_password_hash,
-                    )
-                raise
-            user_id = requester.user.to_string()
-        else:
-            try:
+                user_id = requester.user.to_string()
+            else:
                 result, params, session_id = await self.auth_handler.check_ui_auth(
                     [[LoginType.EMAIL_IDENTITY]],
                     request,
                     body.dict(exclude_unset=True),
                     "modify your account password",
                 )
-            except InteractiveAuthIncompleteError as e:
-                # The user needs to provide more steps to complete auth, but
-                # they're not required to provide the password again.
-                #
-                # If a password is available now, hash the provided password and
-                # store it for later.
-                if new_password:
-                    new_password_hash = await self.auth_handler.hash(new_password)
-                    await self.auth_handler.set_session_data(
-                        e.session_id,
-                        UIAuthSessionDataConstants.PASSWORD_HASH,
-                        new_password_hash,
+
+                if LoginType.EMAIL_IDENTITY in result:
+                    threepid = result[LoginType.EMAIL_IDENTITY]
+                    if "medium" not in threepid or "address" not in threepid:
+                        raise SynapseError(500, "Malformed threepid")
+                    if threepid["medium"] == "email":
+                        # For emails, canonicalise the address.
+                        # We store all email addresses canonicalised in the DB.
+                        # (See add_threepid in synapse/handlers/auth.py)
+                        try:
+                            threepid["address"] = validate_email(threepid["address"])
+                        except ValueError as e:
+                            raise SynapseError(400, str(e))
+                    # if using email, we must know about the email they're authing with!
+                    threepid_user_id = await self.datastore.get_user_id_by_threepid(
+                        threepid["medium"], threepid["address"]
                     )
+                    if not threepid_user_id:
+                        raise SynapseError(
+                            404, "Email address not found", Codes.NOT_FOUND
+                        )
+                    user_id = threepid_user_id
+                else:
+                    logger.error("Auth succeeded but no known type! %r", result.keys())
+                    raise SynapseError(500, "", Codes.UNKNOWN)
+
+        except InteractiveAuthIncompleteError as e:
+            # The user needs to provide more steps to complete auth, but
+            # they're not required to provide the password again.
+            #
+            # If a password is available now, hash the provided password and
+            # store it for later. We only do this if we don't already have the
+            # password hash stored, to avoid repeatedly hashing the password.
+
+            if not new_password:
                 raise
 
-            if LoginType.EMAIL_IDENTITY in result:
-                threepid = result[LoginType.EMAIL_IDENTITY]
-                if "medium" not in threepid or "address" not in threepid:
-                    raise SynapseError(500, "Malformed threepid")
-                if threepid["medium"] == "email":
-                    # For emails, canonicalise the address.
-                    # We store all email addresses canonicalised in the DB.
-                    # (See add_threepid in synapse/handlers/auth.py)
-                    try:
-                        threepid["address"] = validate_email(threepid["address"])
-                    except ValueError as e:
-                        raise SynapseError(400, str(e))
-                # if using email, we must know about the email they're authing with!
-                threepid_user_id = await self.datastore.get_user_id_by_threepid(
-                    threepid["medium"], threepid["address"]
-                )
-                if not threepid_user_id:
-                    raise SynapseError(404, "Email address not found", Codes.NOT_FOUND)
-                user_id = threepid_user_id
-            else:
-                logger.error("Auth succeeded but no known type! %r", result.keys())
-                raise SynapseError(500, "", Codes.UNKNOWN)
+            existing_session_password_hash = await self.auth_handler.get_session_data(
+                e.session_id, UIAuthSessionDataConstants.PASSWORD_HASH, None
+            )
+            if existing_session_password_hash:
+                raise
+
+            new_password_hash = await self.auth_handler.hash(new_password)
+            await self.auth_handler.set_session_data(
+                e.session_id,
+                UIAuthSessionDataConstants.PASSWORD_HASH,
+                new_password_hash,
+            )
+            raise
 
         # If we have a password in this request, prefer it. Otherwise, use the
         # password hash from an earlier request.
         if new_password:
             password_hash: Optional[str] = await self.auth_handler.hash(new_password)
         elif session_id is not None:
-            password_hash = await self.auth_handler.get_session_data(
-                session_id, UIAuthSessionDataConstants.PASSWORD_HASH, None
-            )
+            password_hash = existing_session_password_hash
         else:
             # UI validation was skipped, but the request did not include a new
             # password.
-- 
cgit 1.5.1


From 69b74d9330e42fc91a9c7423d00a06cd6d3732bf Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 8 Sep 2023 08:57:56 -0400
Subject: Avoid temporary storage of sensitive information. (#16272)

During the UI auth process, avoid storing sensitive information
into the database.
---
 changelog.d/16272.bugfix          |  1 +
 synapse/rest/client/account.py    |  4 ++--
 tests/rest/client/test_account.py | 13 +++++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16272.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16272.bugfix b/changelog.d/16272.bugfix
new file mode 100644
index 0000000000..afb22a999f
--- /dev/null
+++ b/changelog.d/16272.bugfix
@@ -0,0 +1 @@
+Avoid temporary storage of sensitive information.
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 196b292890..49cd0805fd 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -186,7 +186,7 @@ class PasswordRestServlet(RestServlet):
                 params, session_id = await self.auth_handler.validate_user_via_ui_auth(
                     requester,
                     request,
-                    body.dict(exclude_unset=True),
+                    body.dict(exclude_unset=True, exclude={"new_password"}),
                     "modify your account password",
                 )
                 user_id = requester.user.to_string()
@@ -194,7 +194,7 @@ class PasswordRestServlet(RestServlet):
                 result, params, session_id = await self.auth_handler.check_ui_auth(
                     [[LoginType.EMAIL_IDENTITY]],
                     request,
-                    body.dict(exclude_unset=True),
+                    body.dict(exclude_unset=True, exclude={"new_password"}),
                     "modify your account password",
                 )
 
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index e9f495e206..4a0eca5b30 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -31,6 +31,7 @@ from synapse.rest import admin
 from synapse.rest.client import account, login, register, room
 from synapse.rest.synapse.client.password_reset import PasswordResetSubmitTokenResource
 from synapse.server import HomeServer
+from synapse.storage._base import db_to_json
 from synapse.types import JsonDict, UserID
 from synapse.util import Clock
 
@@ -134,6 +135,18 @@ class PasswordResetTestCase(unittest.HomeserverTestCase):
         # Assert we can't log in with the old password
         self.attempt_wrong_password_login("kermit", old_password)
 
+        # Check that the UI Auth information doesn't store the password in the database.
+        #
+        # Note that we don't have the UI Auth session ID, so just pull out the single
+        # row.
+        ui_auth_data = self.get_success(
+            self.store.db_pool.simple_select_one(
+                "ui_auth_sessions", keyvalues={}, retcols=("clientdict",)
+            )
+        )
+        client_dict = db_to_json(ui_auth_data["clientdict"])
+        self.assertNotIn("new_password", client_dict)
+
     @override_config({"rc_3pid_validation": {"burst_count": 3}})
     def test_ratelimit_by_email(self) -> None:
         """Test that we ratelimit /requestToken for the same email."""
-- 
cgit 1.5.1


From f43d99462413b0b572da2e52037db8b1135f5ea6 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Fri, 8 Sep 2023 14:43:01 +0100
Subject: Fix bug with new task scheduler using lots of CPU. (#16278)

Using the new `TaskScheduler` meant that we'ed create lots of new
metrics (due to adding task ID to the desc of background process),
resulting in requests for metrics taking an increasing amount of CPU.
---
 changelog.d/16278.misc         |  1 +
 synapse/util/task_scheduler.py | 43 +++++++++++++++++++++---------------------
 2 files changed, 23 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/16278.misc

(limited to 'synapse')

diff --git a/changelog.d/16278.misc b/changelog.d/16278.misc
new file mode 100644
index 0000000000..e82a470c45
--- /dev/null
+++ b/changelog.d/16278.misc
@@ -0,0 +1 @@
+Fix using the new task scheduler causing lots of CPU to be used.
diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index 9b2581e51a..b7de201bde 100644
--- a/synapse/util/task_scheduler.py
+++ b/synapse/util/task_scheduler.py
@@ -19,6 +19,7 @@ from prometheus_client import Gauge
 
 from twisted.python.failure import Failure
 
+from synapse.logging.context import nested_logging_context
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.types import JsonMapping, ScheduledTask, TaskStatus
 from synapse.util.stringutils import random_string
@@ -316,26 +317,27 @@ class TaskScheduler:
         function = self._actions[task.action]
 
         async def wrapper() -> None:
-            try:
-                (status, result, error) = await function(task)
-            except Exception:
-                f = Failure()
-                logger.error(
-                    f"scheduled task {task.id} failed",
-                    exc_info=(f.type, f.value, f.getTracebackObject()),
+            with nested_logging_context(task.id):
+                try:
+                    (status, result, error) = await function(task)
+                except Exception:
+                    f = Failure()
+                    logger.error(
+                        f"scheduled task {task.id} failed",
+                        exc_info=(f.type, f.value, f.getTracebackObject()),
+                    )
+                    status = TaskStatus.FAILED
+                    result = None
+                    error = f.getErrorMessage()
+
+                await self._store.update_scheduled_task(
+                    task.id,
+                    self._clock.time_msec(),
+                    status=status,
+                    result=result,
+                    error=error,
                 )
-                status = TaskStatus.FAILED
-                result = None
-                error = f.getErrorMessage()
-
-            await self._store.update_scheduled_task(
-                task.id,
-                self._clock.time_msec(),
-                status=status,
-                result=result,
-                error=error,
-            )
-            self._running_tasks.remove(task.id)
+                self._running_tasks.remove(task.id)
 
         if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
             return
@@ -353,5 +355,4 @@ class TaskScheduler:
 
         self._running_tasks.add(task.id)
         await self.update_task(task.id, status=TaskStatus.ACTIVE)
-        description = f"{task.id}-{task.action}"
-        run_as_background_process(description, wrapper)
+        run_as_background_process(task.action, wrapper)
-- 
cgit 1.5.1


From c1c6c95d72b5c9fc6c0e527eeb6b9d3a59889b16 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 8 Sep 2023 14:50:13 +0100
Subject: Log values at DEBUG level with execute_values (#16281)

---
 changelog.d/16281.misc      | 1 +
 synapse/storage/database.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16281.misc

(limited to 'synapse')

diff --git a/changelog.d/16281.misc b/changelog.d/16281.misc
new file mode 100644
index 0000000000..de48396aff
--- /dev/null
+++ b/changelog.d/16281.misc
@@ -0,0 +1 @@
+Include values in SQL debug when using `execute_values` with Postgres.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 55ac313f33..6c5fcdcec3 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -422,10 +422,11 @@ class LoggingTransaction:
         return self._do_execute(
             # TODO: is it safe for values to be Iterable[Iterable[Any]] here?
             # https://www.psycopg.org/docs/extras.html?highlight=execute_batch#psycopg2.extras.execute_values says values should be Sequence[Sequence]
-            lambda the_sql: execute_values(
-                self.txn, the_sql, values, template=template, fetch=fetch
+            lambda the_sql, the_values: execute_values(
+                self.txn, the_sql, the_values, template=template, fetch=fetch
             ),
             sql,
+            values,
         )
 
     def execute(self, sql: str, parameters: SQLQueryParameters = ()) -> None:
-- 
cgit 1.5.1


From aa483cb4c905bbe483ffe8e8a8f439655a57481b Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Fri, 8 Sep 2023 11:24:36 -0400
Subject: Update ruff config (#16283)

Enable additional checks & clean-up unneeded configuration.
---
 changelog.d/16283.misc                          |  1 +
 contrib/cmdclient/http.py                       |  2 --
 docker/start.py                                 |  2 +-
 pyproject.toml                                  | 28 +++++++++++++++----------
 scripts-dev/mypy_synapse_plugin.py              |  7 ++++---
 synapse/_scripts/update_synapse_database.py     |  1 -
 synapse/events/snapshot.py                      |  2 --
 synapse/media/url_previewer.py                  |  4 +---
 synapse/storage/background_updates.py           |  2 --
 synmark/suites/logging.py                       |  2 +-
 tests/handlers/test_device.py                   |  2 +-
 tests/handlers/test_federation.py               |  2 +-
 tests/logging/test_remote_handler.py            | 12 +++++------
 tests/replication/tcp/streams/test_to_device.py |  2 +-
 tests/rest/admin/test_federation.py             |  6 +++---
 tests/rest/client/test_account.py               |  2 +-
 tests/rest/client/test_login.py                 |  8 +++----
 tests/rest/client/test_register.py              |  6 +++---
 tests/storage/databases/main/test_lock.py       |  2 +-
 tests/storage/test_event_chain.py               |  6 +++---
 tests/storage/test_event_federation.py          |  6 +++---
 tests/storage/test_profile.py                   |  4 ++--
 tests/storage/test_txn_limit.py                 |  2 +-
 tests/storage/test_user_filters.py              |  4 ++--
 tests/test_visibility.py                        |  8 +++----
 tests/util/caches/test_descriptors.py           |  4 ++--
 26 files changed, 63 insertions(+), 64 deletions(-)
 create mode 100644 changelog.d/16283.misc

(limited to 'synapse')

diff --git a/changelog.d/16283.misc b/changelog.d/16283.misc
new file mode 100644
index 0000000000..4b9d6f76ae
--- /dev/null
+++ b/changelog.d/16283.misc
@@ -0,0 +1 @@
+Enable additional linting checks.
diff --git a/contrib/cmdclient/http.py b/contrib/cmdclient/http.py
index 1310f078e3..508de5dcbd 100644
--- a/contrib/cmdclient/http.py
+++ b/contrib/cmdclient/http.py
@@ -37,7 +37,6 @@ class HttpClient:
             Deferred: Succeeds when we get a 2xx HTTP response. The result
             will be the decoded JSON body.
         """
-        pass
 
     def get_json(self, url, args=None):
         """Gets some json from the given host homeserver and path
@@ -53,7 +52,6 @@ class HttpClient:
             Deferred: Succeeds when we get a 2xx HTTP response. The result
             will be the decoded JSON body.
         """
-        pass
 
 
 class TwistedHttpClient(HttpClient):
diff --git a/docker/start.py b/docker/start.py
index aebc7e4aaa..12c444da9a 100755
--- a/docker/start.py
+++ b/docker/start.py
@@ -239,7 +239,7 @@ def main(args: List[str], environ: MutableMapping[str, str]) -> None:
         log("Could not find %s, will not use" % (jemallocpath,))
 
     # if there are no config files passed to synapse, try adding the default file
-    if not any(p.startswith("--config-path") or p.startswith("-c") for p in args):
+    if not any(p.startswith(("--config-path", "-c")) for p in args):
         config_dir = environ.get("SYNAPSE_CONFIG_DIR", "/data")
         config_path = environ.get(
             "SYNAPSE_CONFIG_PATH", config_dir + "/homeserver.yaml"
diff --git a/pyproject.toml b/pyproject.toml
index 5b43abe907..8747782b29 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,33 +43,39 @@ target-version = ['py38', 'py39', 'py310', 'py311']
 [tool.ruff]
 line-length = 88
 
-# See https://github.com/charliermarsh/ruff/#pycodestyle
+# See https://beta.ruff.rs/docs/rules/#error-e
 # for error codes. The ones we ignore are:
-#  E731: do not assign a lambda expression, use a def
 #  E501: Line too long (black enforces this for us)
+#  E731: do not assign a lambda expression, use a def
 #
 # flake8-bugbear compatible checks. Its error codes are described at
-# https://github.com/charliermarsh/ruff/#flake8-bugbear
-#  B019: Use of functools.lru_cache or functools.cache on methods can lead to memory leaks
+# https://beta.ruff.rs/docs/rules/#flake8-bugbear-b
 #  B023: Functions defined inside a loop must not use variables redefined in the loop
-#  B024: Abstract base class with no abstract method.
 ignore = [
-    "B019",
     "B023",
-    "B024",
     "E501",
     "E731",
 ]
 select = [
-    # pycodestyle checks.
+    # pycodestyle
     "E",
     "W",
-    # pyflakes checks.
+    # pyflakes
     "F",
-    # flake8-bugbear checks.
+    # flake8-bugbear
     "B0",
-    # flake8-comprehensions checks.
+    # flake8-comprehensions
     "C4",
+    # flake8-2020
+    "YTT",
+    # flake8-slots
+    "SLOT",
+    # flake8-debugger
+    "T10",
+    # flake8-pie
+    "PIE",
+    # flake8-executable
+    "EXE",
 ]
 
 [tool.isort]
diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py
index 8058e9c993..a0b3854f1b 100644
--- a/scripts-dev/mypy_synapse_plugin.py
+++ b/scripts-dev/mypy_synapse_plugin.py
@@ -30,9 +30,10 @@ class SynapsePlugin(Plugin):
         self, fullname: str
     ) -> Optional[Callable[[MethodSigContext], CallableType]]:
         if fullname.startswith(
-            "synapse.util.caches.descriptors.CachedFunction.__call__"
-        ) or fullname.startswith(
-            "synapse.util.caches.descriptors._LruCachedFunction.__call__"
+            (
+                "synapse.util.caches.descriptors.CachedFunction.__call__",
+                "synapse.util.caches.descriptors._LruCachedFunction.__call__",
+            )
         ):
             return cached_function_method_signature
         return None
diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py
index f97aecf8d5..992ae43881 100644
--- a/synapse/_scripts/update_synapse_database.py
+++ b/synapse/_scripts/update_synapse_database.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # Copyright 2019 The Matrix.org Foundation C.I.C.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index a9e3d4e556..5bdfa3a8ac 100644
--- a/synapse/events/snapshot.py
+++ b/synapse/events/snapshot.py
@@ -55,7 +55,6 @@ class UnpersistedEventContextBase(ABC):
         A method to convert an UnpersistedEventContext to an EventContext, suitable for
         sending to the database with the associated event.
         """
-        pass
 
     @abstractmethod
     async def get_prev_state_ids(
@@ -69,7 +68,6 @@ class UnpersistedEventContextBase(ABC):
             state_filter: specifies the type of state event to fetch from DB, example:
             EventTypes.JoinRules
         """
-        pass
 
 
 @attr.s(slots=True, auto_attribs=True)
diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py
index 70b32cee17..9b5a3dd5f4 100644
--- a/synapse/media/url_previewer.py
+++ b/synapse/media/url_previewer.py
@@ -846,9 +846,7 @@ def _is_media(content_type: str) -> bool:
 
 def _is_html(content_type: str) -> bool:
     content_type = content_type.lower()
-    return content_type.startswith("text/html") or content_type.startswith(
-        "application/xhtml"
-    )
+    return content_type.startswith(("text/html", "application/xhtml"))
 
 
 def _is_json(content_type: str) -> bool:
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 7619f405fa..99ebd96f84 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -62,7 +62,6 @@ class Constraint(metaclass=abc.ABCMeta):
     @abc.abstractmethod
     def make_check_clause(self, table: str) -> str:
         """Returns an SQL expression that checks the row passes the constraint."""
-        pass
 
     @abc.abstractmethod
     def make_constraint_clause_postgres(self) -> str:
@@ -70,7 +69,6 @@ class Constraint(metaclass=abc.ABCMeta):
 
         Only used on Postgres DBs
         """
-        pass
 
 
 @attr.s(auto_attribs=True)
diff --git a/synmark/suites/logging.py b/synmark/suites/logging.py
index 8beb077e0a..04e5b29dc9 100644
--- a/synmark/suites/logging.py
+++ b/synmark/suites/logging.py
@@ -112,7 +112,7 @@ async def main(reactor, loops):
     start = perf_counter()
 
     # Send a bunch of useful messages
-    for i in range(0, loops):
+    for i in range(loops):
         logger.info("test message %s", i)
 
         if len(handler._buffer) == handler.maximum_buffer:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index 9659a4a355..79d327499b 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -223,7 +223,7 @@ class DeviceTestCase(unittest.HomeserverTestCase):
 
         # queue a bunch of messages in the inbox
         requester = create_requester(sender, device_id=DEVICE_ID)
-        for i in range(0, DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT + 10):
+        for i in range(DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT + 10):
             self.get_success(
                 self.device_message_handler.send_device_message(
                     requester, "message_type", {receiver: {"*": {"val": i}}}
diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py
index 21d63ab1f2..4fc0742413 100644
--- a/tests/handlers/test_federation.py
+++ b/tests/handlers/test_federation.py
@@ -262,7 +262,7 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase):
             if (ev.type, ev.state_key)
             in {("m.room.create", ""), ("m.room.member", remote_server_user_id)}
         ]
-        for _ in range(0, 8):
+        for _ in range(8):
             event = make_event_from_dict(
                 self.add_hashes_and_signatures_from_other_server(
                     {
diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py
index 5191e31a8a..45eac100bf 100644
--- a/tests/logging/test_remote_handler.py
+++ b/tests/logging/test_remote_handler.py
@@ -78,11 +78,11 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
         logger = self.get_logger(handler)
 
         # Send some debug messages
-        for i in range(0, 3):
+        for i in range(3):
             logger.debug("debug %s" % (i,))
 
         # Send a bunch of useful messages
-        for i in range(0, 7):
+        for i in range(7):
             logger.info("info %s" % (i,))
 
         # The last debug message pushes it past the maximum buffer
@@ -108,15 +108,15 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
         logger = self.get_logger(handler)
 
         # Send some debug messages
-        for i in range(0, 3):
+        for i in range(3):
             logger.debug("debug %s" % (i,))
 
         # Send a bunch of useful messages
-        for i in range(0, 10):
+        for i in range(10):
             logger.warning("warn %s" % (i,))
 
         # Send a bunch of info messages
-        for i in range(0, 3):
+        for i in range(3):
             logger.info("info %s" % (i,))
 
         # The last debug message pushes it past the maximum buffer
@@ -144,7 +144,7 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase):
         logger = self.get_logger(handler)
 
         # Send a bunch of useful messages
-        for i in range(0, 20):
+        for i in range(20):
             logger.warning("warn %s" % (i,))
 
         # Allow the reconnection
diff --git a/tests/replication/tcp/streams/test_to_device.py b/tests/replication/tcp/streams/test_to_device.py
index fb9eac668f..ab379e8cf1 100644
--- a/tests/replication/tcp/streams/test_to_device.py
+++ b/tests/replication/tcp/streams/test_to_device.py
@@ -49,7 +49,7 @@ class ToDeviceStreamTestCase(BaseStreamTestCase):
 
         # add messages to the device inbox for user1 up until the
         # limit defined for a stream update batch
-        for i in range(0, _STREAM_UPDATE_TARGET_ROW_COUNT):
+        for i in range(_STREAM_UPDATE_TARGET_ROW_COUNT):
             msg["content"] = {"device": {}}
             messages = {user1: {"device": msg}}
 
diff --git a/tests/rest/admin/test_federation.py b/tests/rest/admin/test_federation.py
index 4c7864c629..0e2824d1b5 100644
--- a/tests/rest/admin/test_federation.py
+++ b/tests/rest/admin/test_federation.py
@@ -510,7 +510,7 @@ class FederationTestCase(unittest.HomeserverTestCase):
         Args:
             number_destinations: Number of destinations to be created
         """
-        for i in range(0, number_destinations):
+        for i in range(number_destinations):
             dest = f"sub{i}.example.com"
             self._create_destination(dest, 50, 50, 50, 100)
 
@@ -690,7 +690,7 @@ class DestinationMembershipTestCase(unittest.HomeserverTestCase):
         self._check_fields(channel_desc.json_body["rooms"])
 
         # test that both lists have different directions
-        for i in range(0, number_rooms):
+        for i in range(number_rooms):
             self.assertEqual(
                 channel_asc.json_body["rooms"][i]["room_id"],
                 channel_desc.json_body["rooms"][number_rooms - 1 - i]["room_id"],
@@ -777,7 +777,7 @@ class DestinationMembershipTestCase(unittest.HomeserverTestCase):
         Args:
             number_rooms: Number of rooms to be created
         """
-        for _ in range(0, number_rooms):
+        for _ in range(number_rooms):
             room_id = self.helper.create_room_as(
                 self.admin_user, tok=self.admin_user_tok
             )
diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py
index 4a0eca5b30..cffbda9a7d 100644
--- a/tests/rest/client/test_account.py
+++ b/tests/rest/client/test_account.py
@@ -575,7 +575,7 @@ class DeactivateTestCase(unittest.HomeserverTestCase):
 
         # create a bunch of users and add keys for them
         users = []
-        for i in range(0, 20):
+        for i in range(20):
             user_id = self.register_user("missPiggy" + str(i), "test")
             users.append((user_id,))
 
diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py
index a2a6589564..768d7ad4c2 100644
--- a/tests/rest/client/test_login.py
+++ b/tests/rest/client/test_login.py
@@ -176,10 +176,10 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
     def test_POST_ratelimiting_per_address(self) -> None:
         # Create different users so we're sure not to be bothered by the per-user
         # ratelimiter.
-        for i in range(0, 6):
+        for i in range(6):
             self.register_user("kermit" + str(i), "monkey")
 
-        for i in range(0, 6):
+        for i in range(6):
             params = {
                 "type": "m.login.password",
                 "identifier": {"type": "m.id.user", "user": "kermit" + str(i)},
@@ -228,7 +228,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
     def test_POST_ratelimiting_per_account(self) -> None:
         self.register_user("kermit", "monkey")
 
-        for i in range(0, 6):
+        for i in range(6):
             params = {
                 "type": "m.login.password",
                 "identifier": {"type": "m.id.user", "user": "kermit"},
@@ -277,7 +277,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase):
     def test_POST_ratelimiting_per_account_failed_attempts(self) -> None:
         self.register_user("kermit", "monkey")
 
-        for i in range(0, 6):
+        for i in range(6):
             params = {
                 "type": "m.login.password",
                 "identifier": {"type": "m.id.user", "user": "kermit"},
diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py
index c33393dc28..ba4e017a0e 100644
--- a/tests/rest/client/test_register.py
+++ b/tests/rest/client/test_register.py
@@ -169,7 +169,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
     @override_config({"rc_registration": {"per_second": 0.17, "burst_count": 5}})
     def test_POST_ratelimiting_guest(self) -> None:
-        for i in range(0, 6):
+        for i in range(6):
             url = self.url + b"?kind=guest"
             channel = self.make_request(b"POST", url, b"{}")
 
@@ -187,7 +187,7 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase):
 
     @override_config({"rc_registration": {"per_second": 0.17, "burst_count": 5}})
     def test_POST_ratelimiting(self) -> None:
-        for i in range(0, 6):
+        for i in range(6):
             request_data = {
                 "username": "kermit" + str(i),
                 "password": "monkey",
@@ -1223,7 +1223,7 @@ class RegistrationTokenValidityRestServletTestCase(unittest.HomeserverTestCase):
     def test_GET_ratelimiting(self) -> None:
         token = "1234"
 
-        for i in range(0, 6):
+        for i in range(6):
             channel = self.make_request(
                 b"GET",
                 f"{self.url}?token={token}",
diff --git a/tests/storage/databases/main/test_lock.py b/tests/storage/databases/main/test_lock.py
index 650b4941ba..35f77052a7 100644
--- a/tests/storage/databases/main/test_lock.py
+++ b/tests/storage/databases/main/test_lock.py
@@ -382,7 +382,7 @@ class ReadWriteLockTestCase(unittest.HomeserverTestCase):
         self.get_success(lock.__aenter__())
 
         # Wait for ages with the lock, we should not be able to get the lock.
-        for _ in range(0, 10):
+        for _ in range(10):
             self.reactor.advance((_RENEWAL_INTERVAL_MS / 1000))
 
         lock2 = self.get_success(
diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py
index 48ebfadaab..b55dd07f14 100644
--- a/tests/storage/test_event_chain.py
+++ b/tests/storage/test_event_chain.py
@@ -664,7 +664,7 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
 
         # Add a bunch of state so that it takes multiple iterations of the
         # background update to process the room.
-        for i in range(0, 150):
+        for i in range(150):
             self.helper.send_state(
                 room_id, event_type="m.test", body={"index": i}, tok=self.token
             )
@@ -718,12 +718,12 @@ class EventChainBackgroundUpdateTestCase(HomeserverTestCase):
 
         # Add a bunch of state so that it takes multiple iterations of the
         # background update to process the room.
-        for i in range(0, 150):
+        for i in range(150):
             self.helper.send_state(
                 room_id1, event_type="m.test", body={"index": i}, tok=self.token
             )
 
-        for i in range(0, 150):
+        for i in range(150):
             self.helper.send_state(
                 room_id2, event_type="m.test", body={"index": i}, tok=self.token
             )
diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py
index 7a4ecab2d5..d3e20f44b2 100644
--- a/tests/storage/test_event_federation.py
+++ b/tests/storage/test_event_federation.py
@@ -227,7 +227,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
                 (room_id, event_id),
             )
 
-        for i in range(0, 20):
+        for i in range(20):
             self.get_success(
                 self.store.db_pool.runInteraction("insert", insert_event, i)
             )
@@ -235,7 +235,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
         # this should get the last ten
         r = self.get_success(self.store.get_prev_events_for_room(room_id))
         self.assertEqual(10, len(r))
-        for i in range(0, 10):
+        for i in range(10):
             self.assertEqual("$event_%i:local" % (19 - i), r[i])
 
     def test_get_rooms_with_many_extremities(self) -> None:
@@ -277,7 +277,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
                 (room_id, event_id),
             )
 
-        for i in range(0, 20):
+        for i in range(20):
             self.get_success(
                 self.store.db_pool.runInteraction("insert", insert_event, i, room1)
             )
diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py
index fe5bb77913..95f99f4130 100644
--- a/tests/storage/test_profile.py
+++ b/tests/storage/test_profile.py
@@ -82,7 +82,7 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
 
             self.get_success(self.store.db_pool.runInteraction("", f))
 
-        for i in range(0, 70):
+        for i in range(70):
             self.get_success(
                 self.store.db_pool.simple_insert(
                     "profiles",
@@ -115,7 +115,7 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase):
         )
 
         expected_values = []
-        for i in range(0, 70):
+        for i in range(70):
             expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
 
         res = self.get_success(
diff --git a/tests/storage/test_txn_limit.py b/tests/storage/test_txn_limit.py
index 15ea4770bd..22f074982f 100644
--- a/tests/storage/test_txn_limit.py
+++ b/tests/storage/test_txn_limit.py
@@ -38,5 +38,5 @@ class SQLTransactionLimitTestCase(unittest.HomeserverTestCase):
         db_pool = self.hs.get_datastores().databases[0]
 
         # force txn limit to roll over at least once
-        for _ in range(0, 1001):
+        for _ in range(1001):
             self.get_success_or_raise(db_pool.runInteraction("test_select", do_select))
diff --git a/tests/storage/test_user_filters.py b/tests/storage/test_user_filters.py
index bab802f56e..d4637d9d1e 100644
--- a/tests/storage/test_user_filters.py
+++ b/tests/storage/test_user_filters.py
@@ -45,7 +45,7 @@ class UserFiltersStoreTestCase(unittest.HomeserverTestCase):
 
             self.get_success(self.store.db_pool.runInteraction("", f))
 
-        for i in range(0, 70):
+        for i in range(70):
             self.get_success(
                 self.store.db_pool.simple_insert(
                     "user_filters",
@@ -82,7 +82,7 @@ class UserFiltersStoreTestCase(unittest.HomeserverTestCase):
         )
 
         expected_values = []
-        for i in range(0, 70):
+        for i in range(70):
             expected_values.append((f"@hello{i:02}:{self.hs.hostname}",))
 
         res = self.get_success(
diff --git a/tests/test_visibility.py b/tests/test_visibility.py
index a46c29ddf4..434902c3f0 100644
--- a/tests/test_visibility.py
+++ b/tests/test_visibility.py
@@ -51,12 +51,12 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
 
         # before we do that, we persist some other events to act as state.
         self._inject_visibility("@admin:hs", "joined")
-        for i in range(0, 10):
+        for i in range(10):
             self._inject_room_member("@resident%i:hs" % i)
 
         events_to_filter = []
 
-        for i in range(0, 10):
+        for i in range(10):
             user = "@user%i:%s" % (i, "test_server" if i == 5 else "other_server")
             evt = self._inject_room_member(user, extra_content={"a": "b"})
             events_to_filter.append(evt)
@@ -74,7 +74,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
         )
 
         # the result should be 5 redacted events, and 5 unredacted events.
-        for i in range(0, 5):
+        for i in range(5):
             self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id)
             self.assertNotIn("a", filtered[i].content)
 
@@ -177,7 +177,7 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
             )
         )
 
-        for i in range(0, len(events_to_filter)):
+        for i in range(len(events_to_filter)):
             self.assertEqual(
                 events_to_filter[i].event_id,
                 filtered[i].event_id,
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 064f4987df..168419f440 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -623,14 +623,14 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase):
 
         a = A()
 
-        for k in range(0, 12):
+        for k in range(12):
             yield a.func(k)
 
         self.assertEqual(callcount[0], 12)
 
         # There must have been at least 2 evictions, meaning if we calculate
         # all 12 values again, we must get called at least 2 more times
-        for k in range(0, 12):
+        for k in range(12):
             yield a.func(k)
 
         self.assertTrue(
-- 
cgit 1.5.1


From edd83f23b710f0caae05d5766b474de3b6f24e9e Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 8 Sep 2023 19:29:38 +0100
Subject: Improve type hints for attrs classes (#16276)

---
 changelog.d/16276.misc                        |  1 +
 synapse/config/oembed.py                      |  2 +-
 synapse/storage/controllers/persist_events.py |  8 +++-----
 synapse/util/async_helpers.py                 | 25 +++++++++++--------------
 synapse/util/caches/dictionary_cache.py       | 10 ++++------
 synapse/util/caches/expiringcache.py          | 20 ++++++++++++--------
 synapse/util/caches/ttlcache.py               | 10 +++++-----
 7 files changed, 37 insertions(+), 39 deletions(-)
 create mode 100644 changelog.d/16276.misc

(limited to 'synapse')

diff --git a/changelog.d/16276.misc b/changelog.d/16276.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/16276.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/config/oembed.py b/synapse/config/oembed.py
index d7959639ee..59bc0b55f4 100644
--- a/synapse/config/oembed.py
+++ b/synapse/config/oembed.py
@@ -30,7 +30,7 @@ class OEmbedEndpointConfig:
     # The API endpoint to fetch.
     api_endpoint: str
     # The patterns to match.
-    url_patterns: List[Pattern]
+    url_patterns: List[Pattern[str]]
     # The supported formats.
     formats: Optional[List[str]]
 
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index abd1d149db..6864f93090 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -154,12 +154,13 @@ class _UpdateCurrentStateTask:
 
 
 _EventPersistQueueTask = Union[_PersistEventsTask, _UpdateCurrentStateTask]
+_PersistResult = TypeVar("_PersistResult")
 
 
 @attr.s(auto_attribs=True, slots=True)
-class _EventPersistQueueItem:
+class _EventPersistQueueItem(Generic[_PersistResult]):
     task: _EventPersistQueueTask
-    deferred: ObservableDeferred
+    deferred: ObservableDeferred[_PersistResult]
 
     parent_opentracing_span_contexts: List = attr.ib(factory=list)
     """A list of opentracing spans waiting for this batch"""
@@ -168,9 +169,6 @@ class _EventPersistQueueItem:
     """The opentracing span under which the persistence actually happened"""
 
 
-_PersistResult = TypeVar("_PersistResult")
-
-
 class _EventPeristenceQueue(Generic[_PersistResult]):
     """Queues up tasks so that they can be processed with only one concurrent
     transaction per room.
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 943ad54456..0cbeb0c365 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -19,6 +19,7 @@ import collections
 import inspect
 import itertools
 import logging
+import typing
 from contextlib import asynccontextmanager
 from typing import (
     Any,
@@ -29,6 +30,7 @@ from typing import (
     Collection,
     Coroutine,
     Dict,
+    Generator,
     Generic,
     Hashable,
     Iterable,
@@ -398,7 +400,7 @@ class _LinearizerEntry:
     # The number of things executing.
     count: int
     # Deferreds for the things blocked from executing.
-    deferreds: collections.OrderedDict
+    deferreds: typing.OrderedDict["defer.Deferred[None]", Literal[1]]
 
 
 class Linearizer:
@@ -717,30 +719,25 @@ def timeout_deferred(
     return new_d
 
 
-# This class can't be generic because it uses slots with attrs.
-# See: https://github.com/python-attrs/attrs/issues/313
 @attr.s(slots=True, frozen=True, auto_attribs=True)
-class DoneAwaitable:  # should be: Generic[R]
+class DoneAwaitable(Awaitable[R]):
     """Simple awaitable that returns the provided value."""
 
-    value: Any  # should be: R
+    value: R
 
-    def __await__(self) -> Any:
-        return self
-
-    def __iter__(self) -> "DoneAwaitable":
-        return self
-
-    def __next__(self) -> None:
-        raise StopIteration(self.value)
+    def __await__(self) -> Generator[Any, None, R]:
+        yield None
+        return self.value
 
 
 def maybe_awaitable(value: Union[Awaitable[R], R]) -> Awaitable[R]:
     """Convert a value to an awaitable if not already an awaitable."""
     if inspect.isawaitable(value):
-        assert isinstance(value, Awaitable)
         return value
 
+    # For some reason mypy doesn't deduce that value is not Awaitable here, even though
+    # inspect.isawaitable returns a TypeGuard.
+    assert not isinstance(value, Awaitable)
     return DoneAwaitable(value)
 
 
diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py
index 5eaf70c7ab..2fbc7b1e6c 100644
--- a/synapse/util/caches/dictionary_cache.py
+++ b/synapse/util/caches/dictionary_cache.py
@@ -14,7 +14,7 @@
 import enum
 import logging
 import threading
-from typing import Any, Dict, Generic, Iterable, Optional, Set, Tuple, TypeVar, Union
+from typing import Dict, Generic, Iterable, Optional, Set, Tuple, TypeVar, Union
 
 import attr
 from typing_extensions import Literal
@@ -33,10 +33,8 @@ DKT = TypeVar("DKT")
 DV = TypeVar("DV")
 
 
-# This class can't be generic because it uses slots with attrs.
-# See: https://github.com/python-attrs/attrs/issues/313
 @attr.s(slots=True, frozen=True, auto_attribs=True)
-class DictionaryEntry:  # should be: Generic[DKT, DV].
+class DictionaryEntry(Generic[DKT, DV]):
     """Returned when getting an entry from the cache
 
     If `full` is true then `known_absent` will be the empty set.
@@ -50,8 +48,8 @@ class DictionaryEntry:  # should be: Generic[DKT, DV].
     """
 
     full: bool
-    known_absent: Set[Any]  # should be: Set[DKT]
-    value: Dict[Any, Any]  # should be: Dict[DKT, DV]
+    known_absent: Set[DKT]
+    value: Dict[DKT, DV]
 
     def __len__(self) -> int:
         return len(self.value)
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index 01ad02af67..8e4c34039d 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -14,7 +14,7 @@
 
 import logging
 from collections import OrderedDict
-from typing import Any, Generic, Optional, TypeVar, Union, overload
+from typing import Any, Generic, Iterable, Optional, TypeVar, Union, overload
 
 import attr
 from typing_extensions import Literal
@@ -73,7 +73,7 @@ class ExpiringCache(Generic[KT, VT]):
         self._expiry_ms = expiry_ms
         self._reset_expiry_on_get = reset_expiry_on_get
 
-        self._cache: OrderedDict[KT, _CacheEntry] = OrderedDict()
+        self._cache: OrderedDict[KT, _CacheEntry[VT]] = OrderedDict()
 
         self.iterable = iterable
 
@@ -100,7 +100,10 @@ class ExpiringCache(Generic[KT, VT]):
         while self._max_size and len(self) > self._max_size:
             _key, value = self._cache.popitem(last=False)
             if self.iterable:
-                self.metrics.inc_evictions(EvictionReason.size, len(value.value))
+                # type-ignore, here and below: if self.iterable is true, then the value
+                # type VT should be Sized (i.e. have a __len__ method). We don't enforce
+                # this via the type system at present.
+                self.metrics.inc_evictions(EvictionReason.size, len(value.value))  # type: ignore[arg-type]
             else:
                 self.metrics.inc_evictions(EvictionReason.size)
 
@@ -134,7 +137,7 @@ class ExpiringCache(Generic[KT, VT]):
             return default
 
         if self.iterable:
-            self.metrics.inc_evictions(EvictionReason.invalidation, len(value.value))
+            self.metrics.inc_evictions(EvictionReason.invalidation, len(value.value))  # type: ignore[arg-type]
         else:
             self.metrics.inc_evictions(EvictionReason.invalidation)
 
@@ -182,7 +185,7 @@ class ExpiringCache(Generic[KT, VT]):
         for k in keys_to_delete:
             value = self._cache.pop(k)
             if self.iterable:
-                self.metrics.inc_evictions(EvictionReason.time, len(value.value))
+                self.metrics.inc_evictions(EvictionReason.time, len(value.value))  # type: ignore[arg-type]
             else:
                 self.metrics.inc_evictions(EvictionReason.time)
 
@@ -195,7 +198,8 @@ class ExpiringCache(Generic[KT, VT]):
 
     def __len__(self) -> int:
         if self.iterable:
-            return sum(len(entry.value) for entry in self._cache.values())
+            g: Iterable[int] = (len(entry.value) for entry in self._cache.values())  # type: ignore[arg-type]
+            return sum(g)
         else:
             return len(self._cache)
 
@@ -218,6 +222,6 @@ class ExpiringCache(Generic[KT, VT]):
 
 
 @attr.s(slots=True, auto_attribs=True)
-class _CacheEntry:
+class _CacheEntry(Generic[VT]):
     time: int
-    value: Any
+    value: VT
diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py
index f6b3ee31e4..48a6e4a906 100644
--- a/synapse/util/caches/ttlcache.py
+++ b/synapse/util/caches/ttlcache.py
@@ -35,10 +35,10 @@ class TTLCache(Generic[KT, VT]):
 
     def __init__(self, cache_name: str, timer: Callable[[], float] = time.time):
         # map from key to _CacheEntry
-        self._data: Dict[KT, _CacheEntry] = {}
+        self._data: Dict[KT, _CacheEntry[KT, VT]] = {}
 
         # the _CacheEntries, sorted by expiry time
-        self._expiry_list: SortedList[_CacheEntry] = SortedList()
+        self._expiry_list: SortedList[_CacheEntry[KT, VT]] = SortedList()
 
         self._timer = timer
 
@@ -160,11 +160,11 @@ class TTLCache(Generic[KT, VT]):
 
 
 @attr.s(frozen=True, slots=True, auto_attribs=True)
-class _CacheEntry:  # Should be Generic[KT, VT]. See python-attrs/attrs#313
+class _CacheEntry(Generic[KT, VT]):
     """TTLCache entry"""
 
     # expiry_time is the first attribute, so that entries are sorted by expiry.
     expiry_time: float
     ttl: float
-    key: Any  # should be KT
-    value: Any  # should be VT
+    key: KT
+    value: VT
-- 
cgit 1.5.1


From 151e4bbc45dbf7b767b1a6a74ffb4cd7889ccf78 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Mon, 11 Sep 2023 13:11:02 +0100
Subject: Filter out down hosts when retrying fetching device lists (#16298)

---
 changelog.d/16298.misc     |  1 +
 synapse/handlers/device.py | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16298.misc

(limited to 'synapse')

diff --git a/changelog.d/16298.misc b/changelog.d/16298.misc
new file mode 100644
index 0000000000..75b546d424
--- /dev/null
+++ b/changelog.d/16298.misc
@@ -0,0 +1 @@
+Don't try refetching device lists for users on remote hosts that are marked as "down".
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 9356ae998e..9d240ad4ee 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -58,7 +58,10 @@ from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.cancellation import cancellable
 from synapse.util.metrics import measure_func
-from synapse.util.retryutils import NotRetryingDestination
+from synapse.util.retryutils import (
+    NotRetryingDestination,
+    filter_destinations_by_retry_limiter,
+)
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -1269,8 +1272,18 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
             self._resync_retry_in_progress = True
             # Get all of the users that need resyncing.
             need_resync = await self.store.get_user_ids_requiring_device_list_resync()
+
+            # Filter out users whose host is marked as "down" up front.
+            hosts = await filter_destinations_by_retry_limiter(
+                {get_domain_from_id(u) for u in need_resync}, self.clock, self.store
+            )
+            hosts = set(hosts)
+
             # Iterate over the set of user IDs.
             for user_id in need_resync:
+                if get_domain_from_id(user_id) not in hosts:
+                    continue
+
                 try:
                     # Try to resync the current user's devices list.
                     result = (await self.multi_user_device_resync([user_id], False))[
-- 
cgit 1.5.1


From 9400dc05357b4272425c7be47ceeced26fa3f28c Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 11 Sep 2023 09:49:48 -0400
Subject: Add the List-Unsubscribe header for notification emails. (#16274)

Adds both the List-Unsubscribe (RFC2369) and List-Unsubscribe-Post (RFC8058)
headers to push notification emails, which together should:

* Show an "Unsubscribe" link in the MUA UI when viewing Synapse notification emails.
* Enable "one-click" unsubscribe (the user never leaves their MUA, which automatically
  makes a POST request to the specified endpoint).
---
 changelog.d/16274.feature                  |  1 +
 synapse/handlers/send_email.py             | 10 +++++-
 synapse/push/mailer.py                     | 33 +++++++++++++++---
 synapse/rest/synapse/client/unsubscribe.py | 17 +++++++++
 tests/push/test_email.py                   | 55 ++++++++++++++++++++++++++++++
 5 files changed, 110 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/16274.feature

(limited to 'synapse')

diff --git a/changelog.d/16274.feature b/changelog.d/16274.feature
new file mode 100644
index 0000000000..0d9da2bbef
--- /dev/null
+++ b/changelog.d/16274.feature
@@ -0,0 +1 @@
+Enable users to easily unsubscribe to notifications emails via the `List-Unsubscribe` header.
diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py
index 05e21509de..4f5fe62fe8 100644
--- a/synapse/handlers/send_email.py
+++ b/synapse/handlers/send_email.py
@@ -17,7 +17,7 @@ import logging
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from io import BytesIO
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any, Dict, Optional
 
 from pkg_resources import parse_version
 
@@ -151,6 +151,7 @@ class SendEmailHandler:
         app_name: str,
         html: str,
         text: str,
+        additional_headers: Optional[Dict[str, str]] = None,
     ) -> None:
         """Send a multipart email with the given information.
 
@@ -160,6 +161,7 @@ class SendEmailHandler:
             app_name: The app name to include in the From header.
             html: The HTML content to include in the email.
             text: The plain text content to include in the email.
+            additional_headers: A map of additional headers to include.
         """
         try:
             from_string = self._from % {"app": app_name}
@@ -181,6 +183,7 @@ class SendEmailHandler:
         multipart_msg["To"] = email_address
         multipart_msg["Date"] = email.utils.formatdate()
         multipart_msg["Message-ID"] = email.utils.make_msgid()
+
         # Discourage automatic responses to Synapse's emails.
         # Per RFC 3834, automatic responses should not be sent if the "Auto-Submitted"
         # header is present with any value other than "no". See
@@ -194,6 +197,11 @@ class SendEmailHandler:
         #    https://stackoverflow.com/a/25324691/5252017
         #    https://stackoverflow.com/a/61646381/5252017
         multipart_msg["X-Auto-Response-Suppress"] = "All"
+
+        if additional_headers:
+            for header, value in additional_headers.items():
+                multipart_msg[header] = value
+
         multipart_msg.attach(text_part)
         multipart_msg.attach(html_part)
 
diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 79e0627b6a..b6cad18c2d 100644
--- a/synapse/push/mailer.py
+++ b/synapse/push/mailer.py
@@ -298,20 +298,26 @@ class Mailer:
                 notifs_by_room, state_by_room, notif_events, reason
             )
 
+        unsubscribe_link = self._make_unsubscribe_link(user_id, app_id, email_address)
+
         template_vars: TemplateVars = {
             "user_display_name": user_display_name,
-            "unsubscribe_link": self._make_unsubscribe_link(
-                user_id, app_id, email_address
-            ),
+            "unsubscribe_link": unsubscribe_link,
             "summary_text": summary_text,
             "rooms": rooms,
             "reason": reason,
         }
 
-        await self.send_email(email_address, summary_text, template_vars)
+        await self.send_email(
+            email_address, summary_text, template_vars, unsubscribe_link
+        )
 
     async def send_email(
-        self, email_address: str, subject: str, extra_template_vars: TemplateVars
+        self,
+        email_address: str,
+        subject: str,
+        extra_template_vars: TemplateVars,
+        unsubscribe_link: Optional[str] = None,
     ) -> None:
         """Send an email with the given information and template text"""
         template_vars: TemplateVars = {
@@ -330,6 +336,23 @@ class Mailer:
             app_name=self.app_name,
             html=html_text,
             text=plain_text,
+            # Include the List-Unsubscribe header which some clients render in the UI.
+            # Per RFC 2369, this can be a URL or mailto URL. See
+            #     https://www.rfc-editor.org/rfc/rfc2369.html#section-3.2
+            #
+            # It is preferred to use email, but Synapse doesn't support incoming email.
+            #
+            # Also include the List-Unsubscribe-Post header from RFC 8058. See
+            #     https://www.rfc-editor.org/rfc/rfc8058.html#section-3.1
+            #
+            # Note that many email clients will not render the unsubscribe link
+            # unless DKIM, etc. is properly setup.
+            additional_headers={
+                "List-Unsubscribe-Post": "List-Unsubscribe=One-Click",
+                "List-Unsubscribe": f"<{unsubscribe_link}>",
+            }
+            if unsubscribe_link
+            else None,
         )
 
     async def _get_room_vars(
diff --git a/synapse/rest/synapse/client/unsubscribe.py b/synapse/rest/synapse/client/unsubscribe.py
index 60321018f9..050fd7bba1 100644
--- a/synapse/rest/synapse/client/unsubscribe.py
+++ b/synapse/rest/synapse/client/unsubscribe.py
@@ -38,6 +38,10 @@ class UnsubscribeResource(DirectServeHtmlResource):
         self.macaroon_generator = hs.get_macaroon_generator()
 
     async def _async_render_GET(self, request: SynapseRequest) -> None:
+        """
+        Handle a user opening an unsubscribe link in the browser, either via an
+        HTML/Text email or via the List-Unsubscribe header.
+        """
         token = parse_string(request, "access_token", required=True)
         app_id = parse_string(request, "app_id", required=True)
         pushkey = parse_string(request, "pushkey", required=True)
@@ -62,3 +66,16 @@ class UnsubscribeResource(DirectServeHtmlResource):
             200,
             UnsubscribeResource.SUCCESS_HTML,
         )
+
+    async def _async_render_POST(self, request: SynapseRequest) -> None:
+        """
+        Handle a mail user agent POSTing to the unsubscribe URL via the
+        List-Unsubscribe & List-Unsubscribe-Post headers.
+        """
+
+        # TODO Assert that the body has a single field
+
+        # Assert the body has form encoded key/value pair of
+        # List-Unsubscribe=One-Click.
+
+        await self._async_render_GET(request)
diff --git a/tests/push/test_email.py b/tests/push/test_email.py
index 4b5c96aeae..73a430ddc6 100644
--- a/tests/push/test_email.py
+++ b/tests/push/test_email.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 import email.message
 import os
+from http import HTTPStatus
 from typing import Any, Dict, List, Sequence, Tuple
 
 import attr
 import pkg_resources
+from parameterized import parameterized
 
 from twisted.internet.defer import Deferred
 from twisted.test.proto_helpers import MemoryReactor
@@ -25,9 +27,11 @@ import synapse.rest.admin
 from synapse.api.errors import Codes, SynapseError
 from synapse.push.emailpusher import EmailPusher
 from synapse.rest.client import login, room
+from synapse.rest.synapse.client.unsubscribe import UnsubscribeResource
 from synapse.server import HomeServer
 from synapse.util import Clock
 
+from tests.server import FakeSite, make_request
 from tests.unittest import HomeserverTestCase
 
 
@@ -175,6 +179,57 @@ class EmailPusherTests(HomeserverTestCase):
 
         self._check_for_mail()
 
+    @parameterized.expand([(False,), (True,)])
+    def test_unsubscribe(self, use_post: bool) -> None:
+        # Create a simple room with two users
+        room = self.helper.create_room_as(self.user_id, tok=self.access_token)
+        self.helper.invite(
+            room=room, src=self.user_id, tok=self.access_token, targ=self.others[0].id
+        )
+        self.helper.join(room=room, user=self.others[0].id, tok=self.others[0].token)
+
+        # The other user sends a single message.
+        self.helper.send(room, body="Hi!", tok=self.others[0].token)
+
+        # We should get emailed about that message
+        args, kwargs = self._check_for_mail()
+
+        # That email should contain an unsubscribe link in the body and header.
+        msg: bytes = args[5]
+
+        # Multipart: plain text, base 64 encoded; html, base 64 encoded
+        multipart_msg = email.message_from_bytes(msg)
+        txt = multipart_msg.get_payload()[0].get_payload(decode=True).decode()
+        html = multipart_msg.get_payload()[1].get_payload(decode=True).decode()
+        self.assertIn("/_synapse/client/unsubscribe", txt)
+        self.assertIn("/_synapse/client/unsubscribe", html)
+
+        # The unsubscribe headers should exist.
+        assert multipart_msg.get("List-Unsubscribe") is not None
+        self.assertIsNotNone(multipart_msg.get("List-Unsubscribe-Post"))
+
+        # Open the unsubscribe link.
+        unsubscribe_link = multipart_msg["List-Unsubscribe"].strip("<>")
+        unsubscribe_resource = UnsubscribeResource(self.hs)
+        channel = make_request(
+            self.reactor,
+            FakeSite(unsubscribe_resource, self.reactor),
+            "POST" if use_post else "GET",
+            unsubscribe_link,
+            shorthand=False,
+        )
+        self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
+
+        # Ensure the pusher was removed.
+        pushers = list(
+            self.get_success(
+                self.hs.get_datastores().main.get_pushers_by(
+                    {"user_name": self.user_id}
+                )
+            )
+        )
+        self.assertEqual(pushers, [])
+
     def test_invite_sends_email(self) -> None:
         # Create a room and invite the user to it
         room = self.helper.create_room_as(self.others[0].id, tok=self.others[0].token)
-- 
cgit 1.5.1


From 2b35626b6b7aed52a626734a5a85fe77c847251d Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Tue, 12 Sep 2023 11:08:04 +0100
Subject: Refactor storing of server keys (#16261)

---
 changelog.d/16261.misc                 |   1 +
 synapse/crypto/keyring.py              |  35 ++----
 synapse/storage/databases/main/keys.py | 219 +++++++++++----------------------
 tests/crypto/test_keyring.py           |  53 ++------
 tests/storage/test_keys.py             | 137 ---------------------
 tests/unittest.py                      |  26 ++--
 6 files changed, 106 insertions(+), 365 deletions(-)
 create mode 100644 changelog.d/16261.misc
 delete mode 100644 tests/storage/test_keys.py

(limited to 'synapse')

diff --git a/changelog.d/16261.misc b/changelog.d/16261.misc
new file mode 100644
index 0000000000..d3ad59ca4a
--- /dev/null
+++ b/changelog.d/16261.misc
@@ -0,0 +1 @@
+Simplify server key storage.
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 260aab3241..fe86f54d80 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -23,12 +23,7 @@ from signedjson.key import (
     get_verify_key,
     is_signing_algorithm_supported,
 )
-from signedjson.sign import (
-    SignatureVerifyException,
-    encode_canonical_json,
-    signature_ids,
-    verify_signed_json,
-)
+from signedjson.sign import SignatureVerifyException, signature_ids, verify_signed_json
 from signedjson.types import VerifyKey
 from unpaddedbase64 import decode_base64
 
@@ -596,24 +591,12 @@ class BaseV2KeyFetcher(KeyFetcher):
                     verify_key=verify_key, valid_until_ts=key_data["expired_ts"]
                 )
 
-        key_json_bytes = encode_canonical_json(response_json)
-
-        await make_deferred_yieldable(
-            defer.gatherResults(
-                [
-                    run_in_background(
-                        self.store.store_server_keys_json,
-                        server_name=server_name,
-                        key_id=key_id,
-                        from_server=from_server,
-                        ts_now_ms=time_added_ms,
-                        ts_expires_ms=ts_valid_until_ms,
-                        key_json_bytes=key_json_bytes,
-                    )
-                    for key_id in verify_keys
-                ],
-                consumeErrors=True,
-            ).addErrback(unwrapFirstError)
+        await self.store.store_server_keys_response(
+            server_name=server_name,
+            from_server=from_server,
+            ts_added_ms=time_added_ms,
+            verify_keys=verify_keys,
+            response_json=response_json,
         )
 
         return verify_keys
@@ -775,10 +758,6 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher):
 
             keys.setdefault(server_name, {}).update(processed_response)
 
-        await self.store.store_server_signature_keys(
-            perspective_name, time_now_ms, added_keys
-        )
-
         return keys
 
     def _validate_perspectives_response(
diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
index 57aa4921e1..41563371dc 100644
--- a/synapse/storage/databases/main/keys.py
+++ b/synapse/storage/databases/main/keys.py
@@ -16,14 +16,17 @@
 import itertools
 import json
 import logging
-from typing import Dict, Iterable, Mapping, Optional, Tuple
+from typing import Dict, Iterable, Optional, Tuple
 
+from canonicaljson import encode_canonical_json
 from signedjson.key import decode_verify_key_bytes
 from unpaddedbase64 import decode_base64
 
+from synapse.storage.database import LoggingTransaction
 from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
 from synapse.storage.keys import FetchKeyResult, FetchKeyResultForRemote
 from synapse.storage.types import Cursor
+from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.iterutils import batch_iter
 
@@ -36,162 +39,84 @@ db_binary_type = memoryview
 class KeyStore(CacheInvalidationWorkerStore):
     """Persistence for signature verification keys"""
 
-    @cached()
-    def _get_server_signature_key(
-        self, server_name_and_key_id: Tuple[str, str]
-    ) -> FetchKeyResult:
-        raise NotImplementedError()
-
-    @cachedList(
-        cached_method_name="_get_server_signature_key",
-        list_name="server_name_and_key_ids",
-    )
-    async def get_server_signature_keys(
-        self, server_name_and_key_ids: Iterable[Tuple[str, str]]
-    ) -> Dict[Tuple[str, str], FetchKeyResult]:
-        """
-        Args:
-            server_name_and_key_ids:
-                iterable of (server_name, key-id) tuples to fetch keys for
-
-        Returns:
-            A map from (server_name, key_id) -> FetchKeyResult, or None if the
-            key is unknown
-        """
-        keys = {}
-
-        def _get_keys(txn: Cursor, batch: Tuple[Tuple[str, str], ...]) -> None:
-            """Processes a batch of keys to fetch, and adds the result to `keys`."""
-
-            # batch_iter always returns tuples so it's safe to do len(batch)
-            sql = """
-            SELECT server_name, key_id, verify_key, ts_valid_until_ms
-            FROM server_signature_keys WHERE 1=0
-            """ + " OR (server_name=? AND key_id=?)" * len(
-                batch
-            )
-
-            txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
-
-            for row in txn:
-                server_name, key_id, key_bytes, ts_valid_until_ms = row
-
-                if ts_valid_until_ms is None:
-                    # Old keys may be stored with a ts_valid_until_ms of null,
-                    # in which case we treat this as if it was set to `0`, i.e.
-                    # it won't match key requests that define a minimum
-                    # `ts_valid_until_ms`.
-                    ts_valid_until_ms = 0
-
-                keys[(server_name, key_id)] = FetchKeyResult(
-                    verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)),
-                    valid_until_ts=ts_valid_until_ms,
-                )
-
-        def _txn(txn: Cursor) -> Dict[Tuple[str, str], FetchKeyResult]:
-            for batch in batch_iter(server_name_and_key_ids, 50):
-                _get_keys(txn, batch)
-            return keys
-
-        return await self.db_pool.runInteraction("get_server_signature_keys", _txn)
-
-    async def store_server_signature_keys(
+    async def store_server_keys_response(
         self,
+        server_name: str,
         from_server: str,
         ts_added_ms: int,
-        verify_keys: Mapping[Tuple[str, str], FetchKeyResult],
+        verify_keys: Dict[str, FetchKeyResult],
+        response_json: JsonDict,
     ) -> None:
-        """Stores NACL verification keys for remote servers.
+        """Stores the keys for the given server that we got from `from_server`.
+
         Args:
-            from_server: Where the verification keys were looked up
-            ts_added_ms: The time to record that the key was added
-            verify_keys:
-                keys to be stored. Each entry is a triplet of
-                (server_name, key_id, key).
+            server_name: The owner of the keys
+            from_server: Which server we got the keys from
+            ts_added_ms: When we're adding the keys
+            verify_keys: The decoded keys
+            response_json: The full *signed* response JSON that contains the keys.
         """
-        key_values = []
-        value_values = []
-        invalidations = []
-        for (server_name, key_id), fetch_result in verify_keys.items():
-            key_values.append((server_name, key_id))
-            value_values.append(
-                (
-                    from_server,
-                    ts_added_ms,
-                    fetch_result.valid_until_ts,
-                    db_binary_type(fetch_result.verify_key.encode()),
-                )
-            )
-            # invalidate takes a tuple corresponding to the params of
-            # _get_server_signature_key. _get_server_signature_key only takes one
-            # param, which is itself the 2-tuple (server_name, key_id).
-            invalidations.append((server_name, key_id))
 
-        await self.db_pool.simple_upsert_many(
-            table="server_signature_keys",
-            key_names=("server_name", "key_id"),
-            key_values=key_values,
-            value_names=(
-                "from_server",
-                "ts_added_ms",
-                "ts_valid_until_ms",
-                "verify_key",
-            ),
-            value_values=value_values,
-            desc="store_server_signature_keys",
-        )
+        key_json_bytes = encode_canonical_json(response_json)
+
+        def store_server_keys_response_txn(txn: LoggingTransaction) -> None:
+            self.db_pool.simple_upsert_many_txn(
+                txn,
+                table="server_signature_keys",
+                key_names=("server_name", "key_id"),
+                key_values=[(server_name, key_id) for key_id in verify_keys],
+                value_names=(
+                    "from_server",
+                    "ts_added_ms",
+                    "ts_valid_until_ms",
+                    "verify_key",
+                ),
+                value_values=[
+                    (
+                        from_server,
+                        ts_added_ms,
+                        fetch_result.valid_until_ts,
+                        db_binary_type(fetch_result.verify_key.encode()),
+                    )
+                    for fetch_result in verify_keys.values()
+                ],
+            )
 
-        invalidate = self._get_server_signature_key.invalidate
-        for i in invalidations:
-            invalidate((i,))
+            self.db_pool.simple_upsert_many_txn(
+                txn,
+                table="server_keys_json",
+                key_names=("server_name", "key_id", "from_server"),
+                key_values=[
+                    (server_name, key_id, from_server) for key_id in verify_keys
+                ],
+                value_names=(
+                    "ts_added_ms",
+                    "ts_valid_until_ms",
+                    "key_json",
+                ),
+                value_values=[
+                    (
+                        ts_added_ms,
+                        fetch_result.valid_until_ts,
+                        db_binary_type(key_json_bytes),
+                    )
+                    for fetch_result in verify_keys.values()
+                ],
+            )
 
-    async def store_server_keys_json(
-        self,
-        server_name: str,
-        key_id: str,
-        from_server: str,
-        ts_now_ms: int,
-        ts_expires_ms: int,
-        key_json_bytes: bytes,
-    ) -> None:
-        """Stores the JSON bytes for a set of keys from a server
-        The JSON should be signed by the originating server, the intermediate
-        server, and by this server. Updates the value for the
-        (server_name, key_id, from_server) triplet if one already existed.
-        Args:
-            server_name: The name of the server.
-            key_id: The identifier of the key this JSON is for.
-            from_server: The server this JSON was fetched from.
-            ts_now_ms: The time now in milliseconds.
-            ts_valid_until_ms: The time when this json stops being valid.
-            key_json_bytes: The encoded JSON.
-        """
-        await self.db_pool.simple_upsert(
-            table="server_keys_json",
-            keyvalues={
-                "server_name": server_name,
-                "key_id": key_id,
-                "from_server": from_server,
-            },
-            values={
-                "server_name": server_name,
-                "key_id": key_id,
-                "from_server": from_server,
-                "ts_added_ms": ts_now_ms,
-                "ts_valid_until_ms": ts_expires_ms,
-                "key_json": db_binary_type(key_json_bytes),
-            },
-            desc="store_server_keys_json",
-        )
+            # invalidate takes a tuple corresponding to the params of
+            # _get_server_keys_json. _get_server_keys_json only takes one
+            # param, which is itself the 2-tuple (server_name, key_id).
+            for key_id in verify_keys:
+                self._invalidate_cache_and_stream(
+                    txn, self._get_server_keys_json, ((server_name, key_id),)
+                )
+                self._invalidate_cache_and_stream(
+                    txn, self.get_server_key_json_for_remote, (server_name, key_id)
+                )
 
-        # invalidate takes a tuple corresponding to the params of
-        # _get_server_keys_json. _get_server_keys_json only takes one
-        # param, which is itself the 2-tuple (server_name, key_id).
-        await self.invalidate_cache_and_stream(
-            "_get_server_keys_json", ((server_name, key_id),)
-        )
-        await self.invalidate_cache_and_stream(
-            "get_server_key_json_for_remote", (server_name, key_id)
+        await self.db_pool.runInteraction(
+            "store_server_keys_response", store_server_keys_response_txn
         )
 
     @cached()
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index f93ba5d4cf..c5700771b0 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import time
 from typing import Any, Dict, List, Optional, cast
-from unittest.mock import AsyncMock, Mock
+from unittest.mock import Mock
 
 import attr
 import canonicaljson
@@ -189,23 +189,24 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         kr = keyring.Keyring(self.hs)
 
         key1 = signedjson.key.generate_signing_key("1")
-        r = self.hs.get_datastores().main.store_server_keys_json(
+        r = self.hs.get_datastores().main.store_server_keys_response(
             "server9",
-            get_key_id(key1),
             from_server="test",
-            ts_now_ms=int(time.time() * 1000),
-            ts_expires_ms=1000,
+            ts_added_ms=int(time.time() * 1000),
+            verify_keys={
+                get_key_id(key1): FetchKeyResult(
+                    verify_key=get_verify_key(key1), valid_until_ts=1000
+                )
+            },
             # The entire response gets signed & stored, just include the bits we
             # care about.
-            key_json_bytes=canonicaljson.encode_canonical_json(
-                {
-                    "verify_keys": {
-                        get_key_id(key1): {
-                            "key": encode_verify_key_base64(get_verify_key(key1))
-                        }
+            response_json={
+                "verify_keys": {
+                    get_key_id(key1): {
+                        "key": encode_verify_key_base64(get_verify_key(key1))
                     }
                 }
-            ),
+            },
         )
         self.get_success(r)
 
@@ -285,34 +286,6 @@ class KeyringTestCase(unittest.HomeserverTestCase):
         d = kr.verify_json_for_server(self.hs.hostname, json1, 0)
         self.get_success(d)
 
-    def test_verify_json_for_server_with_null_valid_until_ms(self) -> None:
-        """Tests that we correctly handle key requests for keys we've stored
-        with a null `ts_valid_until_ms`
-        """
-        mock_fetcher = Mock()
-        mock_fetcher.get_keys = AsyncMock(return_value={})
-
-        key1 = signedjson.key.generate_signing_key("1")
-        r = self.hs.get_datastores().main.store_server_signature_keys(
-            "server9",
-            int(time.time() * 1000),
-            # None is not a valid value in FetchKeyResult, but we're abusing this
-            # API to insert null values into the database. The nulls get converted
-            # to 0 when fetched in KeyStore.get_server_signature_keys.
-            {("server9", get_key_id(key1)): FetchKeyResult(get_verify_key(key1), None)},  # type: ignore[arg-type]
-        )
-        self.get_success(r)
-
-        json1: JsonDict = {}
-        signedjson.sign.sign_json(json1, "server9", key1)
-
-        # should succeed on a signed object with a 0 minimum_valid_until_ms
-        d = self.hs.get_datastores().main.get_server_signature_keys(
-            [("server9", get_key_id(key1))]
-        )
-        result = self.get_success(d)
-        self.assertEqual(result[("server9", get_key_id(key1))].valid_until_ts, 0)
-
     def test_verify_json_dedupes_key_requests(self) -> None:
         """Two requests for the same key should be deduped."""
         key1 = signedjson.key.generate_signing_key("1")
diff --git a/tests/storage/test_keys.py b/tests/storage/test_keys.py
deleted file mode 100644
index 5d7c13e6d0..0000000000
--- a/tests/storage/test_keys.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# Copyright 2017 Vector Creations Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import signedjson.key
-import signedjson.types
-import unpaddedbase64
-
-from synapse.storage.keys import FetchKeyResult
-
-import tests.unittest
-
-
-def decode_verify_key_base64(
-    key_id: str, key_base64: str
-) -> signedjson.types.VerifyKey:
-    key_bytes = unpaddedbase64.decode_base64(key_base64)
-    return signedjson.key.decode_verify_key_bytes(key_id, key_bytes)
-
-
-KEY_1 = decode_verify_key_base64(
-    "ed25519:key1", "fP5l4JzpZPq/zdbBg5xx6lQGAAOM9/3w94cqiJ5jPrw"
-)
-KEY_2 = decode_verify_key_base64(
-    "ed25519:key2", "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw"
-)
-
-
-class KeyStoreTestCase(tests.unittest.HomeserverTestCase):
-    def test_get_server_signature_keys(self) -> None:
-        store = self.hs.get_datastores().main
-
-        key_id_1 = "ed25519:key1"
-        key_id_2 = "ed25519:KEY_ID_2"
-        self.get_success(
-            store.store_server_signature_keys(
-                "from_server",
-                10,
-                {
-                    ("server1", key_id_1): FetchKeyResult(KEY_1, 100),
-                    ("server1", key_id_2): FetchKeyResult(KEY_2, 200),
-                },
-            )
-        )
-
-        res = self.get_success(
-            store.get_server_signature_keys(
-                [
-                    ("server1", key_id_1),
-                    ("server1", key_id_2),
-                    ("server1", "ed25519:key3"),
-                ]
-            )
-        )
-
-        self.assertEqual(len(res.keys()), 3)
-        res1 = res[("server1", key_id_1)]
-        self.assertEqual(res1.verify_key, KEY_1)
-        self.assertEqual(res1.verify_key.version, "key1")
-        self.assertEqual(res1.valid_until_ts, 100)
-
-        res2 = res[("server1", key_id_2)]
-        self.assertEqual(res2.verify_key, KEY_2)
-        # version comes from the ID it was stored with
-        self.assertEqual(res2.verify_key.version, "KEY_ID_2")
-        self.assertEqual(res2.valid_until_ts, 200)
-
-        # non-existent result gives None
-        self.assertIsNone(res[("server1", "ed25519:key3")])
-
-    def test_cache(self) -> None:
-        """Check that updates correctly invalidate the cache."""
-
-        store = self.hs.get_datastores().main
-
-        key_id_1 = "ed25519:key1"
-        key_id_2 = "ed25519:key2"
-
-        self.get_success(
-            store.store_server_signature_keys(
-                "from_server",
-                0,
-                {
-                    ("srv1", key_id_1): FetchKeyResult(KEY_1, 100),
-                    ("srv1", key_id_2): FetchKeyResult(KEY_2, 200),
-                },
-            )
-        )
-
-        res = self.get_success(
-            store.get_server_signature_keys([("srv1", key_id_1), ("srv1", key_id_2)])
-        )
-        self.assertEqual(len(res.keys()), 2)
-
-        res1 = res[("srv1", key_id_1)]
-        self.assertEqual(res1.verify_key, KEY_1)
-        self.assertEqual(res1.valid_until_ts, 100)
-
-        res2 = res[("srv1", key_id_2)]
-        self.assertEqual(res2.verify_key, KEY_2)
-        self.assertEqual(res2.valid_until_ts, 200)
-
-        # we should be able to look up the same thing again without a db hit
-        res = self.get_success(store.get_server_signature_keys([("srv1", key_id_1)]))
-        self.assertEqual(len(res.keys()), 1)
-        self.assertEqual(res[("srv1", key_id_1)].verify_key, KEY_1)
-
-        new_key_2 = signedjson.key.get_verify_key(
-            signedjson.key.generate_signing_key("key2")
-        )
-        d = store.store_server_signature_keys(
-            "from_server", 10, {("srv1", key_id_2): FetchKeyResult(new_key_2, 300)}
-        )
-        self.get_success(d)
-
-        res = self.get_success(
-            store.get_server_signature_keys([("srv1", key_id_1), ("srv1", key_id_2)])
-        )
-        self.assertEqual(len(res.keys()), 2)
-
-        res1 = res[("srv1", key_id_1)]
-        self.assertEqual(res1.verify_key, KEY_1)
-        self.assertEqual(res1.valid_until_ts, 100)
-
-        res2 = res[("srv1", key_id_2)]
-        self.assertEqual(res2.verify_key, new_key_2)
-        self.assertEqual(res2.valid_until_ts, 300)
diff --git a/tests/unittest.py b/tests/unittest.py
index 5d3640d8ac..dbaff361b4 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -70,6 +70,7 @@ from synapse.logging.context import (
 )
 from synapse.rest import RegisterServletsFunc
 from synapse.server import HomeServer
+from synapse.storage.keys import FetchKeyResult
 from synapse.types import JsonDict, Requester, UserID, create_requester
 from synapse.util import Clock
 from synapse.util.httpresourcetree import create_resource_tree
@@ -858,23 +859,22 @@ class FederatingHomeserverTestCase(HomeserverTestCase):
         verify_key_id = "%s:%s" % (verify_key.alg, verify_key.version)
 
         self.get_success(
-            hs.get_datastores().main.store_server_keys_json(
+            hs.get_datastores().main.store_server_keys_response(
                 self.OTHER_SERVER_NAME,
-                verify_key_id,
                 from_server=self.OTHER_SERVER_NAME,
-                ts_now_ms=clock.time_msec(),
-                ts_expires_ms=clock.time_msec() + 10000,
-                key_json_bytes=canonicaljson.encode_canonical_json(
-                    {
-                        "verify_keys": {
-                            verify_key_id: {
-                                "key": signedjson.key.encode_verify_key_base64(
-                                    verify_key
-                                )
-                            }
+                ts_added_ms=clock.time_msec(),
+                verify_keys={
+                    verify_key_id: FetchKeyResult(
+                        verify_key=verify_key, valid_until_ts=clock.time_msec() + 10000
+                    ),
+                },
+                response_json={
+                    "verify_keys": {
+                        verify_key_id: {
+                            "key": signedjson.key.encode_verify_key_base64(verify_key)
                         }
                     }
-                ),
+                },
             )
         )
 
-- 
cgit 1.5.1


From 16ef6f1e3c8d0cfe959e4209fd04528658383ab4 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 12 Sep 2023 07:12:31 -0400
Subject: Stop purging tables which are slated for removal. (#16273)

---
 changelog.d/16273.misc                         | 1 +
 synapse/storage/databases/main/purge_events.py | 4 ----
 synapse/storage/schema/__init__.py             | 6 +++++-
 3 files changed, 6 insertions(+), 5 deletions(-)
 create mode 100644 changelog.d/16273.misc

(limited to 'synapse')

diff --git a/changelog.d/16273.misc b/changelog.d/16273.misc
new file mode 100644
index 0000000000..19882f6754
--- /dev/null
+++ b/changelog.d/16273.misc
@@ -0,0 +1 @@
+Stop purging from tables slated for removal.
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index b52f48cf04..dea0e0458c 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -450,10 +450,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "e2e_room_keys",
             "event_push_summary",
             "pusher_throttle",
-            "insertion_events",
-            "insertion_event_extremities",
-            "insertion_event_edges",
-            "batch_events",
             "room_account_data",
             "room_tags",
             # "rooms" happens last, to keep the foreign keys in the other tables
diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 422f11f59e..5b50bd66bc 100644
--- a/synapse/storage/schema/__init__.py
+++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SCHEMA_VERSION = 81  # remember to update the list below when updating
+SCHEMA_VERSION = 82  # remember to update the list below when updating
 """Represents the expectations made by the codebase about the database schema
 
 This should be incremented whenever the codebase changes its requirements on the
@@ -117,6 +117,10 @@ Changes in SCHEMA_VERSION = 80
 
 Changes in SCHEMA_VERSION = 81
     - The event_txn_id is no longer written to for new events.
+
+Changes in SCHEMA_VERSION = 82
+    - The insertion_events, insertion_event_extremities, insertion_event_edges, and
+      batch_events tables are no longer purged in preparation for their removal.
 """
 
 
-- 
cgit 1.5.1


From ba48c563c98966400488c8972d2e9964f9510399 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 12 Sep 2023 07:16:09 -0400
Subject: Bump mypy from 1.4.1 to 1.5.1. (#16300)

---
 changelog.d/16300.misc         |  1 +
 mypy.ini                       |  1 -
 poetry.lock                    | 68 ++++++++++++++++++------------------------
 synapse/logging/opentracing.py | 10 ++-----
 4 files changed, 32 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/16300.misc

(limited to 'synapse')

diff --git a/changelog.d/16300.misc b/changelog.d/16300.misc
new file mode 100644
index 0000000000..8cc2e52369
--- /dev/null
+++ b/changelog.d/16300.misc
@@ -0,0 +1 @@
+Bump mypy from 1.4.1 to 1.5.1.
diff --git a/mypy.ini b/mypy.ini
index fb5f44c939..88aea301b9 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -23,7 +23,6 @@ warn_unused_ignores = True
 # warn_return_any = True
 # no_implicit_reexport = True
 strict_equality = True
-strict_concatenate = True
 
 # Run mypy type checking with the minimum supported Python version to catch new usage
 # that isn't backwards-compatible (types, overloads, etc).
diff --git a/poetry.lock b/poetry.lock
index e4cea28282..c01312579e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1445,37 +1445,38 @@ files = [
 
 [[package]]
 name = "mypy"
-version = "1.4.1"
+version = "1.5.1"
 description = "Optional static typing for Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "mypy-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:566e72b0cd6598503e48ea610e0052d1b8168e60a46e0bfd34b3acf2d57f96a8"},
-    {file = "mypy-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca637024ca67ab24a7fd6f65d280572c3794665eaf5edcc7e90a866544076878"},
-    {file = "mypy-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dde1d180cd84f0624c5dcaaa89c89775550a675aff96b5848de78fb11adabcd"},
-    {file = "mypy-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c4d8e89aa7de683e2056a581ce63c46a0c41e31bd2b6d34144e2c80f5ea53dc"},
-    {file = "mypy-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:bfdca17c36ae01a21274a3c387a63aa1aafe72bff976522886869ef131b937f1"},
-    {file = "mypy-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7549fbf655e5825d787bbc9ecf6028731973f78088fbca3a1f4145c39ef09462"},
-    {file = "mypy-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98324ec3ecf12296e6422939e54763faedbfcc502ea4a4c38502082711867258"},
-    {file = "mypy-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141dedfdbfe8a04142881ff30ce6e6653c9685b354876b12e4fe6c78598b45e2"},
-    {file = "mypy-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8207b7105829eca6f3d774f64a904190bb2231de91b8b186d21ffd98005f14a7"},
-    {file = "mypy-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:16f0db5b641ba159eff72cff08edc3875f2b62b2fa2bc24f68c1e7a4e8232d01"},
-    {file = "mypy-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:470c969bb3f9a9efcedbadcd19a74ffb34a25f8e6b0e02dae7c0e71f8372f97b"},
-    {file = "mypy-1.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5952d2d18b79f7dc25e62e014fe5a23eb1a3d2bc66318df8988a01b1a037c5b"},
-    {file = "mypy-1.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:190b6bab0302cec4e9e6767d3eb66085aef2a1cc98fe04936d8a42ed2ba77bb7"},
-    {file = "mypy-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9d40652cc4fe33871ad3338581dca3297ff5f2213d0df345bcfbde5162abf0c9"},
-    {file = "mypy-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01fd2e9f85622d981fd9063bfaef1aed6e336eaacca00892cd2d82801ab7c042"},
-    {file = "mypy-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2460a58faeea905aeb1b9b36f5065f2dc9a9c6e4c992a6499a2360c6c74ceca3"},
-    {file = "mypy-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2746d69a8196698146a3dbe29104f9eb6a2a4d8a27878d92169a6c0b74435b6"},
-    {file = "mypy-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ae704dcfaa180ff7c4cfbad23e74321a2b774f92ca77fd94ce1049175a21c97f"},
-    {file = "mypy-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:43d24f6437925ce50139a310a64b2ab048cb2d3694c84c71c3f2a1626d8101dc"},
-    {file = "mypy-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c482e1246726616088532b5e964e39765b6d1520791348e6c9dc3af25b233828"},
-    {file = "mypy-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43b592511672017f5b1a483527fd2684347fdffc041c9ef53428c8dc530f79a3"},
-    {file = "mypy-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34a9239d5b3502c17f07fd7c0b2ae6b7dd7d7f6af35fbb5072c6208e76295816"},
-    {file = "mypy-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5703097c4936bbb9e9bce41478c8d08edd2865e177dc4c52be759f81ee4dd26c"},
-    {file = "mypy-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e02d700ec8d9b1859790c0475df4e4092c7bf3272a4fd2c9f33d87fac4427b8f"},
-    {file = "mypy-1.4.1-py3-none-any.whl", hash = "sha256:45d32cec14e7b97af848bddd97d85ea4f0db4d5a149ed9676caa4eb2f7402bb4"},
-    {file = "mypy-1.4.1.tar.gz", hash = "sha256:9bbcd9ab8ea1f2e1c8031c21445b511442cc45c89951e49bbf852cbb70755b1b"},
+    {file = "mypy-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f33592ddf9655a4894aef22d134de7393e95fcbdc2d15c1ab65828eee5c66c70"},
+    {file = "mypy-1.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:258b22210a4a258ccd077426c7a181d789d1121aca6db73a83f79372f5569ae0"},
+    {file = "mypy-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9ec1f695f0c25986e6f7f8778e5ce61659063268836a38c951200c57479cc12"},
+    {file = "mypy-1.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:abed92d9c8f08643c7d831300b739562b0a6c9fcb028d211134fc9ab20ccad5d"},
+    {file = "mypy-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:a156e6390944c265eb56afa67c74c0636f10283429171018446b732f1a05af25"},
+    {file = "mypy-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ac9c21bfe7bc9f7f1b6fae441746e6a106e48fc9de530dea29e8cd37a2c0cc4"},
+    {file = "mypy-1.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:51cb1323064b1099e177098cb939eab2da42fea5d818d40113957ec954fc85f4"},
+    {file = "mypy-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:596fae69f2bfcb7305808c75c00f81fe2829b6236eadda536f00610ac5ec2243"},
+    {file = "mypy-1.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:32cb59609b0534f0bd67faebb6e022fe534bdb0e2ecab4290d683d248be1b275"},
+    {file = "mypy-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:159aa9acb16086b79bbb0016145034a1a05360626046a929f84579ce1666b315"},
+    {file = "mypy-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f6b0e77db9ff4fda74de7df13f30016a0a663928d669c9f2c057048ba44f09bb"},
+    {file = "mypy-1.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26f71b535dfc158a71264e6dc805a9f8d2e60b67215ca0bfa26e2e1aa4d4d373"},
+    {file = "mypy-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc3a600f749b1008cc75e02b6fb3d4db8dbcca2d733030fe7a3b3502902f161"},
+    {file = "mypy-1.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:26fb32e4d4afa205b24bf645eddfbb36a1e17e995c5c99d6d00edb24b693406a"},
+    {file = "mypy-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:82cb6193de9bbb3844bab4c7cf80e6227d5225cc7625b068a06d005d861ad5f1"},
+    {file = "mypy-1.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4a465ea2ca12804d5b34bb056be3a29dc47aea5973b892d0417c6a10a40b2d65"},
+    {file = "mypy-1.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9fece120dbb041771a63eb95e4896791386fe287fefb2837258925b8326d6160"},
+    {file = "mypy-1.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d28ddc3e3dfeab553e743e532fb95b4e6afad51d4706dd22f28e1e5e664828d2"},
+    {file = "mypy-1.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:57b10c56016adce71fba6bc6e9fd45d8083f74361f629390c556738565af8eeb"},
+    {file = "mypy-1.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff0cedc84184115202475bbb46dd99f8dcb87fe24d5d0ddfc0fe6b8575c88d2f"},
+    {file = "mypy-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8f772942d372c8cbac575be99f9cc9d9fb3bd95c8bc2de6c01411e2c84ebca8a"},
+    {file = "mypy-1.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d627124700b92b6bbaa99f27cbe615c8ea7b3402960f6372ea7d65faf376c14"},
+    {file = "mypy-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:361da43c4f5a96173220eb53340ace68cda81845cd88218f8862dfb0adc8cddb"},
+    {file = "mypy-1.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:330857f9507c24de5c5724235e66858f8364a0693894342485e543f5b07c8693"},
+    {file = "mypy-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:c543214ffdd422623e9fedd0869166c2f16affe4ba37463975043ef7d2ea8770"},
+    {file = "mypy-1.5.1-py3-none-any.whl", hash = "sha256:f757063a83970d67c444f6e01d9550a7402322af3557ce7630d3c957386fa8f5"},
+    {file = "mypy-1.5.1.tar.gz", hash = "sha256:b031b9601f1060bf1281feab89697324726ba0c0bae9d7cd7ab4b690940f0b92"},
 ]
 
 [package.dependencies]
@@ -1486,7 +1487,6 @@ typing-extensions = ">=4.1.0"
 [package.extras]
 dmypy = ["psutil (>=4.0)"]
 install-types = ["pip"]
-python2 = ["typed-ast (>=1.4.0,<2)"]
 reports = ["lxml"]
 
 [[package]]
@@ -2077,7 +2077,6 @@ files = [
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
-    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
     {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
     {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
     {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -2085,15 +2084,8 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
-    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
     {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
-    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
     {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -2110,7 +2102,6 @@ files = [
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
-    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
     {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
     {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
     {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -2118,7 +2109,6 @@ files = [
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
-    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
     {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
     {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
     {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 5c3045e197..4454fe29a5 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -991,11 +991,7 @@ def trace_with_opname(
         if not opentracing:
             return func
 
-        # type-ignore: mypy seems to be confused by the ParamSpecs here.
-        # I think the problem is https://github.com/python/mypy/issues/12909
-        return _custom_sync_async_decorator(
-            func, _wrapping_logic  # type: ignore[arg-type]
-        )
+        return _custom_sync_async_decorator(func, _wrapping_logic)
 
     return _decorator
 
@@ -1040,9 +1036,7 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]:
         set_tag(SynapseTags.FUNC_KWARGS, str(kwargs))
         yield
 
-    # type-ignore: mypy seems to be confused by the ParamSpecs here.
-    # I think the problem is https://github.com/python/mypy/issues/12909
-    return _custom_sync_async_decorator(func, _wrapping_logic)  # type: ignore[arg-type]
+    return _custom_sync_async_decorator(func, _wrapping_logic)
 
 
 @contextlib.contextmanager
-- 
cgit 1.5.1


From ab13fb08bf7c20a992ec2796c72d0fbb2a06545c Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 13 Sep 2023 10:51:50 +0100
Subject: Improve logging of replication (#16309)

---
 changelog.d/16309.misc              | 1 +
 synapse/replication/tcp/handler.py  | 2 +-
 synapse/replication/tcp/resource.py | 7 ++++++-
 3 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16309.misc

(limited to 'synapse')

diff --git a/changelog.d/16309.misc b/changelog.d/16309.misc
new file mode 100644
index 0000000000..bef5563ee9
--- /dev/null
+++ b/changelog.d/16309.misc
@@ -0,0 +1 @@
+Small improvements to logging in replication code.
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index d9045d7b73..5642666411 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -644,7 +644,7 @@ class ReplicationCommandHandler:
                     [stream.parse_row(row) for row in rows],
                 )
 
-            logger.info("Caught up with stream '%s' to %i", stream_name, cmd.new_token)
+        logger.info("Caught up with stream '%s' to %i", stream_name, cmd.new_token)
 
         # We've now caught up to position sent to us, notify handler.
         await self._replication_data_handler.on_position(
diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py
index 347467d863..1d9a29d22e 100644
--- a/synapse/replication/tcp/resource.py
+++ b/synapse/replication/tcp/resource.py
@@ -191,7 +191,12 @@ class ReplicationStreamer:
 
                         if updates:
                             logger.info(
-                                "Streaming: %s -> %s", stream.NAME, updates[-1][0]
+                                "Streaming: %s -> %s (limited: %s, updates: %s, max token: %s)",
+                                stream.NAME,
+                                updates[-1][0],
+                                limited,
+                                len(updates),
+                                current_token,
                             )
                             stream_updates_counter.labels(stream.NAME).inc(len(updates))
 
-- 
cgit 1.5.1


From be3c7b08a3e6888e60497a80ebd143bd4df9a719 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 13 Sep 2023 11:54:16 +0100
Subject: Fix deleting device inbox when using background worker (#16311)

Introduced in #16240

The action for the task was only defined on the "master" handler, rather than the base worker one.
---
 changelog.d/16311.misc     |  1 +
 synapse/handlers/device.py | 62 +++++++++++++++++++++++-----------------------
 2 files changed, 32 insertions(+), 31 deletions(-)
 create mode 100644 changelog.d/16311.misc

(limited to 'synapse')

diff --git a/changelog.d/16311.misc b/changelog.d/16311.misc
new file mode 100644
index 0000000000..4f266c1fb0
--- /dev/null
+++ b/changelog.d/16311.misc
@@ -0,0 +1 @@
+Delete device messages asynchronously and in staged batches using the task scheduler.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 9d240ad4ee..e2ae3da67e 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -91,9 +91,14 @@ class DeviceWorkerHandler:
         self._query_appservices_for_keys = (
             hs.config.experimental.msc3984_appservice_key_query
         )
+        self._task_scheduler = hs.get_task_scheduler()
 
         self.device_list_updater = DeviceListWorkerUpdater(hs)
 
+        self._task_scheduler.register_action(
+            self._delete_device_messages, DELETE_DEVICE_MSGS_TASK_NAME
+        )
+
     @trace
     async def get_devices_by_user(self, user_id: str) -> List[JsonDict]:
         """
@@ -383,6 +388,32 @@ class DeviceWorkerHandler:
             "Trying handling device list state for partial join: not supported on workers."
         )
 
+    DEVICE_MSGS_DELETE_BATCH_LIMIT = 100
+
+    async def _delete_device_messages(
+        self,
+        task: ScheduledTask,
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        """Scheduler task to delete device messages in batch of `DEVICE_MSGS_DELETE_BATCH_LIMIT`."""
+        assert task.params is not None
+        user_id = task.params["user_id"]
+        device_id = task.params["device_id"]
+        up_to_stream_id = task.params["up_to_stream_id"]
+
+        res = await self.store.delete_messages_for_device(
+            user_id=user_id,
+            device_id=device_id,
+            up_to_stream_id=up_to_stream_id,
+            limit=DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT,
+        )
+
+        if res < DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT:
+            return TaskStatus.COMPLETE, None, None
+        else:
+            # There is probably still device messages to be deleted, let's keep the task active and it will be run
+            # again in a subsequent scheduler loop run (probably the next one, if not too many tasks are running).
+            return TaskStatus.ACTIVE, None, None
+
 
 class DeviceHandler(DeviceWorkerHandler):
     device_list_updater: "DeviceListUpdater"
@@ -394,7 +425,6 @@ class DeviceHandler(DeviceWorkerHandler):
         self._account_data_handler = hs.get_account_data_handler()
         self._storage_controllers = hs.get_storage_controllers()
         self.db_pool = hs.get_datastores().main.db_pool
-        self._task_scheduler = hs.get_task_scheduler()
 
         self.device_list_updater = DeviceListUpdater(hs, self)
 
@@ -428,10 +458,6 @@ class DeviceHandler(DeviceWorkerHandler):
                 self._delete_stale_devices,
             )
 
-        self._task_scheduler.register_action(
-            self._delete_device_messages, DELETE_DEVICE_MSGS_TASK_NAME
-        )
-
     def _check_device_name_length(self, name: Optional[str]) -> None:
         """
         Checks whether a device name is longer than the maximum allowed length.
@@ -590,32 +616,6 @@ class DeviceHandler(DeviceWorkerHandler):
 
         await self.notify_device_update(user_id, device_ids)
 
-    DEVICE_MSGS_DELETE_BATCH_LIMIT = 100
-
-    async def _delete_device_messages(
-        self,
-        task: ScheduledTask,
-    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
-        """Scheduler task to delete device messages in batch of `DEVICE_MSGS_DELETE_BATCH_LIMIT`."""
-        assert task.params is not None
-        user_id = task.params["user_id"]
-        device_id = task.params["device_id"]
-        up_to_stream_id = task.params["up_to_stream_id"]
-
-        res = await self.store.delete_messages_for_device(
-            user_id=user_id,
-            device_id=device_id,
-            up_to_stream_id=up_to_stream_id,
-            limit=DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT,
-        )
-
-        if res < DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT:
-            return TaskStatus.COMPLETE, None, None
-        else:
-            # There is probably still device messages to be deleted, let's keep the task active and it will be run
-            # again in a subsequent scheduler loop run (probably the next one, if not too many tasks are running).
-            return TaskStatus.ACTIVE, None, None
-
     async def update_device(self, user_id: str, device_id: str, content: dict) -> None:
         """Update the given device
 
-- 
cgit 1.5.1


From e9addf6a01ab173bcf0aeeae35d7052a5bde9454 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 13 Sep 2023 11:59:44 +0100
Subject: Don't schedule an async task on every sync (#16312)

---
 changelog.d/16312.misc   |  1 +
 synapse/handlers/sync.py | 37 ++++++++++++++++++++++++++-----------
 2 files changed, 27 insertions(+), 11 deletions(-)
 create mode 100644 changelog.d/16312.misc

(limited to 'synapse')

diff --git a/changelog.d/16312.misc b/changelog.d/16312.misc
new file mode 100644
index 0000000000..4f266c1fb0
--- /dev/null
+++ b/changelog.d/16312.misc
@@ -0,0 +1 @@
+Delete device messages asynchronously and in staged batches using the task scheduler.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 0ccd7d250c..f1f19666d7 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -362,21 +362,36 @@ class SyncHandler:
         # (since we now know that the device has received them)
         if since_token is not None:
             since_stream_id = since_token.to_device_key
-            # Delete device messages asynchronously and in batches using the task scheduler
-            await self._task_scheduler.schedule_task(
-                DELETE_DEVICE_MSGS_TASK_NAME,
-                resource_id=sync_config.device_id,
-                params={
-                    "user_id": sync_config.user.to_string(),
-                    "device_id": sync_config.device_id,
-                    "up_to_stream_id": since_stream_id,
-                },
+            # Fast path: delete a limited number of to-device messages up front.
+            # We do this to avoid the overhead of scheduling a task for every
+            # sync.
+            device_deletion_limit = 100
+            deleted = await self.store.delete_messages_for_device(
+                sync_config.user.to_string(),
+                sync_config.device_id,
+                since_stream_id,
+                limit=device_deletion_limit,
             )
             logger.debug(
-                "Deletion of to-device messages up to %d scheduled",
-                since_stream_id,
+                "Deleted %d to-device messages up to %d", deleted, since_stream_id
             )
 
+            # If we hit the limit, schedule a background task to delete the rest.
+            if deleted >= device_deletion_limit:
+                await self._task_scheduler.schedule_task(
+                    DELETE_DEVICE_MSGS_TASK_NAME,
+                    resource_id=sync_config.device_id,
+                    params={
+                        "user_id": sync_config.user.to_string(),
+                        "device_id": sync_config.device_id,
+                        "up_to_stream_id": since_stream_id,
+                    },
+                )
+                logger.debug(
+                    "Deletion of to-device messages up to %d scheduled",
+                    since_stream_id,
+                )
+
         if timeout == 0 or since_token is None or full_state:
             # we are going to return immediately, so don't bother calling
             # notifier.wait_for_events.
-- 
cgit 1.5.1


From d38d0dffc94b6269ed7ff5163d60958be3e6c304 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 13 Sep 2023 07:57:19 -0400
Subject: Use StrCollection in additional places. (#16301)

---
 changelog.d/16301.misc                             |  1 +
 synapse/app/_base.py                               | 12 ++++----
 synapse/config/_base.py                            |  3 +-
 synapse/events/__init__.py                         |  5 ++--
 synapse/events/builder.py                          |  8 +++---
 synapse/events/validator.py                        |  6 ++--
 synapse/http/client.py                             |  5 ++--
 synapse/http/servlet.py                            | 33 +++++++++++-----------
 synapse/metrics/__init__.py                        |  8 +++---
 synapse/notifier.py                                |  6 ++--
 synapse/rest/client/_base.py                       |  4 +--
 synapse/state/__init__.py                          | 13 ++++-----
 synapse/state/v1.py                                |  5 ++--
 synapse/state/v2.py                                |  7 ++---
 synapse/storage/databases/main/event_federation.py |  4 +--
 synapse/visibility.py                              |  6 ++--
 16 files changed, 59 insertions(+), 67 deletions(-)
 create mode 100644 changelog.d/16301.misc

(limited to 'synapse')

diff --git a/changelog.d/16301.misc b/changelog.d/16301.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/16301.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index a94b57a671..9ac7e4313e 100644
--- a/synapse/app/_base.py
+++ b/synapse/app/_base.py
@@ -27,9 +27,7 @@ from typing import (
     Any,
     Awaitable,
     Callable,
-    Collection,
     Dict,
-    Iterable,
     List,
     NoReturn,
     Optional,
@@ -76,7 +74,7 @@ from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_
 from synapse.module_api.callbacks.third_party_event_rules_callbacks import (
     load_legacy_third_party_event_rules,
 )
-from synapse.types import ISynapseReactor
+from synapse.types import ISynapseReactor, StrCollection
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.caches.lrucache import setup_expire_lru_cache_entries
 from synapse.util.daemonize import daemonize_process
@@ -278,7 +276,7 @@ def register_start(
     reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper()))
 
 
-def listen_metrics(bind_addresses: Iterable[str], port: int) -> None:
+def listen_metrics(bind_addresses: StrCollection, port: int) -> None:
     """
     Start Prometheus metrics server.
     """
@@ -315,7 +313,7 @@ def _set_prometheus_client_use_created_metrics(new_value: bool) -> None:
 
 
 def listen_manhole(
-    bind_addresses: Collection[str],
+    bind_addresses: StrCollection,
     port: int,
     manhole_settings: ManholeConfig,
     manhole_globals: dict,
@@ -339,7 +337,7 @@ def listen_manhole(
 
 
 def listen_tcp(
-    bind_addresses: Collection[str],
+    bind_addresses: StrCollection,
     port: int,
     factory: ServerFactory,
     reactor: IReactorTCP = reactor,
@@ -448,7 +446,7 @@ def listen_http(
 
 
 def listen_ssl(
-    bind_addresses: Collection[str],
+    bind_addresses: StrCollection,
     port: int,
     factory: ServerFactory,
     context_factory: IOpenSSLContextFactory,
diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 58856839e1..c5816105f4 100644
--- a/synapse/config/_base.py
+++ b/synapse/config/_base.py
@@ -26,7 +26,6 @@ from textwrap import dedent
 from typing import (
     Any,
     ClassVar,
-    Collection,
     Dict,
     Iterable,
     Iterator,
@@ -384,7 +383,7 @@ class RootConfig:
 
     config_classes: List[Type[Config]] = []
 
-    def __init__(self, config_files: Collection[str] = ()):
+    def __init__(self, config_files: StrSequence = ()):
         # Capture absolute paths here, so we can reload config after we daemonize.
         self.config_files = [os.path.abspath(path) for path in config_files]
 
diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 35257a3b1b..3c1777b7ec 100644
--- a/synapse/events/__init__.py
+++ b/synapse/events/__init__.py
@@ -25,7 +25,6 @@ from typing import (
     Iterable,
     List,
     Optional,
-    Sequence,
     Tuple,
     Type,
     TypeVar,
@@ -408,7 +407,7 @@ class EventBase(metaclass=abc.ABCMeta):
     def keys(self) -> Iterable[str]:
         return self._dict.keys()
 
-    def prev_event_ids(self) -> Sequence[str]:
+    def prev_event_ids(self) -> List[str]:
         """Returns the list of prev event IDs. The order matches the order
         specified in the event, though there is no meaning to it.
 
@@ -553,7 +552,7 @@ class FrozenEventV2(EventBase):
         self._event_id = "$" + encode_base64(compute_event_reference_hash(self)[1])
         return self._event_id
 
-    def prev_event_ids(self) -> Sequence[str]:
+    def prev_event_ids(self) -> List[str]:
         """Returns the list of prev event IDs. The order matches the order
         specified in the event, though there is no meaning to it.
 
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 14ea0e6640..1165c017ba 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 
 import attr
 from signedjson.types import SigningKey
@@ -28,7 +28,7 @@ from synapse.event_auth import auth_types_for_event
 from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict
 from synapse.state import StateHandler
 from synapse.storage.databases.main import DataStore
-from synapse.types import EventID, JsonDict
+from synapse.types import EventID, JsonDict, StrCollection
 from synapse.types.state import StateFilter
 from synapse.util import Clock
 from synapse.util.stringutils import random_string
@@ -103,7 +103,7 @@ class EventBuilder:
 
     async def build(
         self,
-        prev_event_ids: Collection[str],
+        prev_event_ids: StrCollection,
         auth_event_ids: Optional[List[str]],
         depth: Optional[int] = None,
     ) -> EventBase:
@@ -136,7 +136,7 @@ class EventBuilder:
 
         format_version = self.room_version.event_format
         # The types of auth/prev events changes between event versions.
-        prev_events: Union[Collection[str], List[Tuple[str, Dict[str, str]]]]
+        prev_events: Union[StrCollection, List[Tuple[str, Dict[str, str]]]]
         auth_events: Union[List[str], List[Tuple[str, Dict[str, str]]]]
         if format_version == EventFormatVersions.ROOM_V1_V2:
             auth_events = await self._store.add_event_hashes(auth_event_ids)
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index 34625dd7a1..5da50cb0d2 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections.abc
-from typing import Iterable, List, Type, Union, cast
+from typing import List, Type, Union, cast
 
 import jsonschema
 from pydantic import Field, StrictBool, StrictStr
@@ -36,7 +36,7 @@ from synapse.events.utils import (
 from synapse.federation.federation_server import server_matches_acl_event
 from synapse.http.servlet import validate_json_object
 from synapse.rest.models import RequestBodyModel
-from synapse.types import EventID, JsonDict, RoomID, UserID
+from synapse.types import EventID, JsonDict, RoomID, StrCollection, UserID
 
 
 class EventValidator:
@@ -225,7 +225,7 @@ class EventValidator:
 
             self._ensure_state_event(event)
 
-    def _ensure_strings(self, d: JsonDict, keys: Iterable[str]) -> None:
+    def _ensure_strings(self, d: JsonDict, keys: StrCollection) -> None:
         for s in keys:
             if s not in d:
                 raise SynapseError(400, "'%s' not in content" % (s,))
diff --git a/synapse/http/client.py b/synapse/http/client.py
index ca2cdbc6e2..c750e03b36 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -78,7 +78,7 @@ from synapse.http.replicationagent import ReplicationAgent
 from synapse.http.types import QueryParams
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.logging.opentracing import set_tag, start_active_span, tags
-from synapse.types import ISynapseReactor
+from synapse.types import ISynapseReactor, StrSequence
 from synapse.util import json_decoder
 from synapse.util.async_helpers import timeout_deferred
 
@@ -108,10 +108,9 @@ RawHeaders = Union[Mapping[str, "RawHeaderValue"], Mapping[bytes, "RawHeaderValu
 # the value actually has to be a List, but List is invariant so we can't specify that
 # the entries can either be Lists or bytes.
 RawHeaderValue = Union[
-    List[str],
+    StrSequence,
     List[bytes],
     List[Union[str, bytes]],
-    Tuple[str, ...],
     Tuple[bytes, ...],
     Tuple[Union[str, bytes], ...],
 ]
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index fc62793628..5d79d31579 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -18,7 +18,6 @@ import logging
 from http import HTTPStatus
 from typing import (
     TYPE_CHECKING,
-    Iterable,
     List,
     Mapping,
     Optional,
@@ -38,7 +37,7 @@ from twisted.web.server import Request
 from synapse.api.errors import Codes, SynapseError
 from synapse.http import redact_uri
 from synapse.http.server import HttpServer
-from synapse.types import JsonDict, RoomAlias, RoomID
+from synapse.types import JsonDict, RoomAlias, RoomID, StrCollection
 from synapse.util import json_decoder
 
 if TYPE_CHECKING:
@@ -340,7 +339,7 @@ def parse_string(
     name: str,
     default: str,
     *,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> str:
     ...
@@ -352,7 +351,7 @@ def parse_string(
     name: str,
     *,
     required: Literal[True],
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> str:
     ...
@@ -365,7 +364,7 @@ def parse_string(
     *,
     default: Optional[str] = None,
     required: bool = False,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> Optional[str]:
     ...
@@ -376,7 +375,7 @@ def parse_string(
     name: str,
     default: Optional[str] = None,
     required: bool = False,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> Optional[str]:
     """
@@ -485,7 +484,7 @@ def parse_enum(
 
 def _parse_string_value(
     value: bytes,
-    allowed_values: Optional[Iterable[str]],
+    allowed_values: Optional[StrCollection],
     name: str,
     encoding: str,
 ) -> str:
@@ -511,7 +510,7 @@ def parse_strings_from_args(
     args: Mapping[bytes, Sequence[bytes]],
     name: str,
     *,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> Optional[List[str]]:
     ...
@@ -523,7 +522,7 @@ def parse_strings_from_args(
     name: str,
     default: List[str],
     *,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> List[str]:
     ...
@@ -535,7 +534,7 @@ def parse_strings_from_args(
     name: str,
     *,
     required: Literal[True],
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> List[str]:
     ...
@@ -548,7 +547,7 @@ def parse_strings_from_args(
     default: Optional[List[str]] = None,
     *,
     required: bool = False,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> Optional[List[str]]:
     ...
@@ -559,7 +558,7 @@ def parse_strings_from_args(
     name: str,
     default: Optional[List[str]] = None,
     required: bool = False,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> Optional[List[str]]:
     """
@@ -610,7 +609,7 @@ def parse_string_from_args(
     name: str,
     default: Optional[str] = None,
     *,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> Optional[str]:
     ...
@@ -623,7 +622,7 @@ def parse_string_from_args(
     default: Optional[str] = None,
     *,
     required: Literal[True],
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> str:
     ...
@@ -635,7 +634,7 @@ def parse_string_from_args(
     name: str,
     default: Optional[str] = None,
     required: bool = False,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> Optional[str]:
     ...
@@ -646,7 +645,7 @@ def parse_string_from_args(
     name: str,
     default: Optional[str] = None,
     required: bool = False,
-    allowed_values: Optional[Iterable[str]] = None,
+    allowed_values: Optional[StrCollection] = None,
     encoding: str = "ascii",
 ) -> Optional[str]:
     """
@@ -821,7 +820,7 @@ def parse_and_validate_json_object_from_request(
     return validate_json_object(content, model_type)
 
 
-def assert_params_in_dict(body: JsonDict, required: Iterable[str]) -> None:
+def assert_params_in_dict(body: JsonDict, required: StrCollection) -> None:
     absent = []
     for k in required:
         if k not in body:
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 39fc629937..3cf2fbc3e2 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -25,7 +25,6 @@ from typing import (
     Iterable,
     Mapping,
     Optional,
-    Sequence,
     Set,
     Tuple,
     Type,
@@ -49,6 +48,7 @@ import synapse.metrics._reactor_metrics  # noqa: F401
 from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager
 from synapse.metrics._twisted_exposition import MetricsResource, generate_latest
 from synapse.metrics._types import Collector
+from synapse.types import StrSequence
 from synapse.util import SYNAPSE_VERSION
 
 logger = logging.getLogger(__name__)
@@ -81,7 +81,7 @@ class LaterGauge(Collector):
 
     name: str
     desc: str
-    labels: Optional[Sequence[str]] = attr.ib(hash=False)
+    labels: Optional[StrSequence] = attr.ib(hash=False)
     # callback: should either return a value (if there are no labels for this metric),
     # or dict mapping from a label tuple to a value
     caller: Callable[
@@ -143,8 +143,8 @@ class InFlightGauge(Generic[MetricsEntry], Collector):
         self,
         name: str,
         desc: str,
-        labels: Sequence[str],
-        sub_metrics: Sequence[str],
+        labels: StrSequence,
+        sub_metrics: StrSequence,
     ):
         self.name = name
         self.desc = desc
diff --git a/synapse/notifier.py b/synapse/notifier.py
index 68115bca70..fc39e5c963 100644
--- a/synapse/notifier.py
+++ b/synapse/notifier.py
@@ -104,7 +104,7 @@ class _NotifierUserStream:
     def __init__(
         self,
         user_id: str,
-        rooms: Collection[str],
+        rooms: StrCollection,
         current_token: StreamToken,
         time_now_ms: int,
     ):
@@ -457,7 +457,7 @@ class Notifier:
         stream_key: str,
         new_token: Union[int, RoomStreamToken],
         users: Optional[Collection[Union[str, UserID]]] = None,
-        rooms: Optional[Collection[str]] = None,
+        rooms: Optional[StrCollection] = None,
     ) -> None:
         """Used to inform listeners that something has happened event wise.
 
@@ -529,7 +529,7 @@ class Notifier:
         user_id: str,
         timeout: int,
         callback: Callable[[StreamToken, StreamToken], Awaitable[T]],
-        room_ids: Optional[Collection[str]] = None,
+        room_ids: Optional[StrCollection] = None,
         from_token: StreamToken = StreamToken.START,
     ) -> T:
         """Wait until the callback returns a non empty response or the
diff --git a/synapse/rest/client/_base.py b/synapse/rest/client/_base.py
index 5c1c19e1f3..73c568ef75 100644
--- a/synapse/rest/client/_base.py
+++ b/synapse/rest/client/_base.py
@@ -20,14 +20,14 @@ from typing import Any, Awaitable, Callable, Iterable, Pattern, Tuple, TypeVar,
 
 from synapse.api.errors import InteractiveAuthIncompleteError
 from synapse.api.urls import CLIENT_API_PREFIX
-from synapse.types import JsonDict
+from synapse.types import JsonDict, StrCollection
 
 logger = logging.getLogger(__name__)
 
 
 def client_patterns(
     path_regex: str,
-    releases: Iterable[str] = ("r0", "v3"),
+    releases: StrCollection = ("r0", "v3"),
     unstable: bool = True,
     v1: bool = False,
 ) -> Iterable[Pattern]:
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 1b91cf5eaa..e977ed1044 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -20,7 +20,6 @@ from typing import (
     Any,
     Awaitable,
     Callable,
-    Collection,
     DefaultDict,
     Dict,
     FrozenSet,
@@ -49,7 +48,7 @@ from synapse.logging.opentracing import tag_args, trace
 from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet
 from synapse.state import v1, v2
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
-from synapse.types import StateMap
+from synapse.types import StateMap, StrCollection
 from synapse.types.state import StateFilter
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -197,7 +196,7 @@ class StateHandler:
     async def compute_state_after_events(
         self,
         room_id: str,
-        event_ids: Collection[str],
+        event_ids: StrCollection,
         state_filter: Optional[StateFilter] = None,
         await_full_state: bool = True,
     ) -> StateMap[str]:
@@ -231,7 +230,7 @@ class StateHandler:
         return await ret.get_state(self._state_storage_controller, state_filter)
 
     async def get_current_user_ids_in_room(
-        self, room_id: str, latest_event_ids: Collection[str]
+        self, room_id: str, latest_event_ids: StrCollection
     ) -> Set[str]:
         """
         Get the users IDs who are currently in a room.
@@ -256,7 +255,7 @@ class StateHandler:
         return await self.store.get_joined_user_ids_from_state(room_id, state)
 
     async def get_hosts_in_room_at_events(
-        self, room_id: str, event_ids: Collection[str]
+        self, room_id: str, event_ids: StrCollection
     ) -> FrozenSet[str]:
         """Get the hosts that were in a room at the given event ids
 
@@ -470,7 +469,7 @@ class StateHandler:
     @trace
     @measure_func()
     async def resolve_state_groups_for_events(
-        self, room_id: str, event_ids: Collection[str], await_full_state: bool = True
+        self, room_id: str, event_ids: StrCollection, await_full_state: bool = True
     ) -> _StateCacheEntry:
         """Given a list of event_ids this method fetches the state at each
         event, resolves conflicts between them and returns them.
@@ -882,7 +881,7 @@ class StateResolutionStore:
     store: "DataStore"
 
     def get_events(
-        self, event_ids: Collection[str], allow_rejected: bool = False
+        self, event_ids: StrCollection, allow_rejected: bool = False
     ) -> Awaitable[Dict[str, EventBase]]:
         """Get events from the database
 
diff --git a/synapse/state/v1.py b/synapse/state/v1.py
index 500e384695..c76a2f082e 100644
--- a/synapse/state/v1.py
+++ b/synapse/state/v1.py
@@ -17,7 +17,6 @@ import logging
 from typing import (
     Awaitable,
     Callable,
-    Collection,
     Dict,
     Iterable,
     List,
@@ -32,7 +31,7 @@ from synapse.api.constants import EventTypes
 from synapse.api.errors import AuthError
 from synapse.api.room_versions import RoomVersion
 from synapse.events import EventBase
-from synapse.types import MutableStateMap, StateMap
+from synapse.types import MutableStateMap, StateMap, StrCollection
 
 logger = logging.getLogger(__name__)
 
@@ -45,7 +44,7 @@ async def resolve_events_with_store(
     room_version: RoomVersion,
     state_sets: Sequence[StateMap[str]],
     event_map: Optional[Dict[str, EventBase]],
-    state_map_factory: Callable[[Collection[str]], Awaitable[Dict[str, EventBase]]],
+    state_map_factory: Callable[[StrCollection], Awaitable[Dict[str, EventBase]]],
 ) -> StateMap[str]:
     """
     Args:
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 44c49274a9..1752f95db8 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -19,7 +19,6 @@ from typing import (
     Any,
     Awaitable,
     Callable,
-    Collection,
     Dict,
     Generator,
     Iterable,
@@ -39,7 +38,7 @@ from synapse.api.constants import EventTypes
 from synapse.api.errors import AuthError
 from synapse.api.room_versions import RoomVersion
 from synapse.events import EventBase
-from synapse.types import MutableStateMap, StateMap
+from synapse.types import MutableStateMap, StateMap, StrCollection
 
 logger = logging.getLogger(__name__)
 
@@ -56,7 +55,7 @@ class StateResolutionStore(Protocol):
     # This is usually synapse.state.StateResolutionStore, but it's replaced with a
     # TestStateResolutionStore in tests.
     def get_events(
-        self, event_ids: Collection[str], allow_rejected: bool = False
+        self, event_ids: StrCollection, allow_rejected: bool = False
     ) -> Awaitable[Dict[str, EventBase]]:
         ...
 
@@ -366,7 +365,7 @@ async def _get_auth_chain_difference(
         union = unpersisted_set_ids[0].union(*unpersisted_set_ids[1:])
         intersection = unpersisted_set_ids[0].intersection(*unpersisted_set_ids[1:])
 
-        auth_difference_unpersisted_part: Collection[str] = union - intersection
+        auth_difference_unpersisted_part: StrCollection = union - intersection
     else:
         auth_difference_unpersisted_part = ()
         state_sets_ids = [set(state_set.values()) for state_set in state_sets]
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index fab7008a8f..09de8f55e2 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -47,7 +47,7 @@ from synapse.storage.database import (
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.signatures import SignatureWorkerStore
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
-from synapse.types import JsonDict, StrCollection
+from synapse.types import JsonDict, StrCollection, StrSequence
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.lrucache import LruCache
@@ -1179,7 +1179,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         )
 
     @cached(max_entries=5000, iterable=True)
-    async def get_latest_event_ids_in_room(self, room_id: str) -> Sequence[str]:
+    async def get_latest_event_ids_in_room(self, room_id: str) -> StrSequence:
         return await self.db_pool.simple_select_onecol(
             table="event_forward_extremities",
             keyvalues={"room_id": room_id},
diff --git a/synapse/visibility.py b/synapse/visibility.py
index eac10f6438..f15fdd8314 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -36,7 +36,7 @@ from synapse.events.utils import prune_event
 from synapse.logging.opentracing import trace
 from synapse.storage.controllers import StorageControllers
 from synapse.storage.databases.main import DataStore
-from synapse.types import RetentionPolicy, StateMap, get_domain_from_id
+from synapse.types import RetentionPolicy, StateMap, StrCollection, get_domain_from_id
 from synapse.types.state import StateFilter
 from synapse.util import Clock
 
@@ -150,12 +150,12 @@ async def filter_events_for_client(
 
 async def filter_event_for_clients_with_state(
     store: DataStore,
-    user_ids: Collection[str],
+    user_ids: StrCollection,
     event: EventBase,
     context: EventContext,
     is_peeking: bool = False,
     filter_send_to_client: bool = True,
-) -> Collection[str]:
+) -> StrCollection:
     """
     Checks to see if an event is visible to the users in the list at the time of
     the event.
-- 
cgit 1.5.1


From 7afb5e041004bab8b0aaf7909ce3c7a9ef80077f Mon Sep 17 00:00:00 2001
From: Hanadi <hanadi.tamimi@gmail.com>
Date: Wed, 13 Sep 2023 14:33:39 +0200
Subject: Fix using dehydrated devices (MSC2697) & refresh tokens (#16288)

Refresh tokens were not correctly moved to the rehydrated
device (similar to how the access token is currently handled).
This resulted in invalid refresh tokens after rehydration.
---
 changelog.d/16288.bugfix                       |  1 +
 synapse/handlers/device.py                     |  7 ++++---
 synapse/storage/databases/main/registration.py | 20 ++++++++++++++++++++
 tests/handlers/test_device.py                  | 10 +++++++++-
 4 files changed, 34 insertions(+), 4 deletions(-)
 create mode 100644 changelog.d/16288.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16288.bugfix b/changelog.d/16288.bugfix
new file mode 100644
index 0000000000..f08d10d1f3
--- /dev/null
+++ b/changelog.d/16288.bugfix
@@ -0,0 +1 @@
+Fix bug introduced in Synapse 1.49.0 when using dehydrated devices ([MSC2697](https://github.com/matrix-org/matrix-spec-proposals/pull/2697)) and refresh tokens. Contributed by Hanadi.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index e2ae3da67e..0d3d5ebc86 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -758,12 +758,13 @@ class DeviceHandler(DeviceWorkerHandler):
 
         # If the dehydrated device was successfully deleted (the device ID
         # matched the stored dehydrated device), then modify the access
-        # token to use the dehydrated device's ID and copy the old device
-        # display name to the dehydrated device, and destroy the old device
-        # ID
+        # token and refresh token to use the dehydrated device's ID and
+        # copy the old device display name to the dehydrated device,
+        # and destroy the old device ID
         old_device_id = await self.store.set_device_for_access_token(
             access_token, device_id
         )
+        await self.store.set_device_for_refresh_token(user_id, old_device_id, device_id)
         old_device = await self.store.get_device(user_id, old_device_id)
         if old_device is None:
             raise errors.NotFoundError()
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 7e85b73e8e..e34156dc55 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -2312,6 +2312,26 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore):
 
         return next_id
 
+    async def set_device_for_refresh_token(
+        self, user_id: str, old_device_id: str, device_id: str
+    ) -> None:
+        """Moves refresh tokens from old device to current device
+
+        Args:
+            user_id: The user of the devices.
+            old_device_id: The old device.
+            device_id: The new device ID.
+        Returns:
+            None
+        """
+
+        await self.db_pool.simple_update(
+            "refresh_tokens",
+            keyvalues={"user_id": user_id, "device_id": old_device_id},
+            updatevalues={"device_id": device_id},
+            desc="set_device_for_refresh_token",
+        )
+
     def _set_device_for_access_token_txn(
         self, txn: LoggingTransaction, token: str, device_id: str
     ) -> str:
diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py
index 79d327499b..d4ed068357 100644
--- a/tests/handlers/test_device.py
+++ b/tests/handlers/test_device.py
@@ -461,6 +461,7 @@ class DehydrationTestCase(unittest.HomeserverTestCase):
         self.message_handler = hs.get_device_message_handler()
         self.registration = hs.get_registration_handler()
         self.auth = hs.get_auth()
+        self.auth_handler = hs.get_auth_handler()
         self.store = hs.get_datastores().main
         return hs
 
@@ -487,11 +488,12 @@ class DehydrationTestCase(unittest.HomeserverTestCase):
         self.assertEqual(device_data, {"device_data": {"foo": "bar"}})
 
         # Create a new login for the user and dehydrated the device
-        device_id, access_token, _expiration_time, _refresh_token = self.get_success(
+        device_id, access_token, _expiration_time, refresh_token = self.get_success(
             self.registration.register_device(
                 user_id=user_id,
                 device_id=None,
                 initial_display_name="new device",
+                should_issue_refresh_token=True,
             )
         )
 
@@ -522,6 +524,12 @@ class DehydrationTestCase(unittest.HomeserverTestCase):
 
         self.assertEqual(user_info.device_id, retrieved_device_id)
 
+        # make sure the user device has the refresh token
+        assert refresh_token is not None
+        self.get_success(
+            self.auth_handler.refresh_token(refresh_token, 5 * 60 * 1000, 5 * 60 * 1000)
+        )
+
         # make sure the device has the display name that was set from the login
         res = self.get_success(self.handler.get_device(user_id, retrieved_device_id))
 
-- 
cgit 1.5.1


From 032cf84f524a972f38977a67d61163f08d9dcf2a Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Wed, 13 Sep 2023 16:17:06 +0100
Subject: Remove a reference cycle in background process (#16314)

---
 changelog.d/16314.misc                        |  1 +
 synapse/metrics/background_process_metrics.py | 21 ++++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16314.misc

(limited to 'synapse')

diff --git a/changelog.d/16314.misc b/changelog.d/16314.misc
new file mode 100644
index 0000000000..a32b07112a
--- /dev/null
+++ b/changelog.d/16314.misc
@@ -0,0 +1 @@
+Remove a reference cycle for in background processes.
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index 9ea4e23b31..f1f1f0cdf9 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -322,13 +322,21 @@ class BackgroundProcessLoggingContext(LoggingContext):
         if instance_id is None:
             instance_id = id(self)
         super().__init__("%s-%s" % (name, instance_id))
-        self._proc = _BackgroundProcess(name, self)
+        self._proc: Optional[_BackgroundProcess] = _BackgroundProcess(name, self)
 
     def start(self, rusage: "Optional[resource.struct_rusage]") -> None:
         """Log context has started running (again)."""
 
         super().start(rusage)
 
+        if self._proc is None:
+            logger.error(
+                "Background process re-entered without a proc: %s",
+                self.name,
+                stack_info=True,
+            )
+            return
+
         # We've become active again so we make sure we're in the list of active
         # procs. (Note that "start" here means we've become active, as opposed
         # to starting for the first time.)
@@ -345,6 +353,14 @@ class BackgroundProcessLoggingContext(LoggingContext):
 
         super().__exit__(type, value, traceback)
 
+        if self._proc is None:
+            logger.error(
+                "Background process exited without a proc: %s",
+                self.name,
+                stack_info=True,
+            )
+            return
+
         # The background process has finished. We explicitly remove and manually
         # update the metrics here so that if nothing is scraping metrics the set
         # doesn't infinitely grow.
@@ -352,3 +368,6 @@ class BackgroundProcessLoggingContext(LoggingContext):
             _background_processes_active_since_last_scrape.discard(self._proc)
 
         self._proc.update_metrics()
+
+        # Set proc to None to break the reference cycle.
+        self._proc = None
-- 
cgit 1.5.1


From 954921736b88de25c775c519a206449e46b3bf07 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Thu, 14 Sep 2023 12:46:30 +0100
Subject: Refactor `get_user_by_id` (#16316)

---
 changelog.d/16316.misc                           |  1 +
 synapse/api/auth/internal.py                     |  2 +-
 synapse/api/auth/msc3861_delegated.py            |  2 +-
 synapse/handlers/account.py                      |  2 +-
 synapse/handlers/admin.py                        | 49 +++++++--------
 synapse/handlers/message.py                      |  6 +-
 synapse/module_api/__init__.py                   |  4 +-
 synapse/rest/consent/consent_resource.py         |  2 +-
 synapse/server_notices/consent_server_notices.py |  6 +-
 synapse/storage/databases/main/client_ips.py     | 11 ++++
 synapse/storage/databases/main/registration.py   | 76 +++++++-----------------
 synapse/types/__init__.py                        | 10 +++-
 tests/api/test_auth.py                           | 12 +++-
 tests/storage/test_registration.py               | 48 +++++++--------
 14 files changed, 108 insertions(+), 123 deletions(-)
 create mode 100644 changelog.d/16316.misc

(limited to 'synapse')

diff --git a/changelog.d/16316.misc b/changelog.d/16316.misc
new file mode 100644
index 0000000000..aa0644f278
--- /dev/null
+++ b/changelog.d/16316.misc
@@ -0,0 +1 @@
+Refactor `get_user_by_id`.
diff --git a/synapse/api/auth/internal.py b/synapse/api/auth/internal.py
index 6a5fd44ec0..a75f6f2cc4 100644
--- a/synapse/api/auth/internal.py
+++ b/synapse/api/auth/internal.py
@@ -268,7 +268,7 @@ class InternalAuth(BaseAuth):
             stored_user = await self.store.get_user_by_id(user_id)
             if not stored_user:
                 raise InvalidClientTokenError("Unknown user_id %s" % user_id)
-            if not stored_user["is_guest"]:
+            if not stored_user.is_guest:
                 raise InvalidClientTokenError(
                     "Guest access token used for regular user"
                 )
diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index ef5d3f9b81..31bb035cc8 100644
--- a/synapse/api/auth/msc3861_delegated.py
+++ b/synapse/api/auth/msc3861_delegated.py
@@ -300,7 +300,7 @@ class MSC3861DelegatedAuth(BaseAuth):
             user_id = UserID(username, self._hostname)
 
             # First try to find a user from the username claim
-            user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string())
+            user_info = await self.store.get_user_by_id(user_id=user_id.to_string())
             if user_info is None:
                 # If the user does not exist, we should create it on the fly
                 # TODO: we could use SCIM to provision users ahead of time and listen
diff --git a/synapse/handlers/account.py b/synapse/handlers/account.py
index c05a14304c..fa043cca86 100644
--- a/synapse/handlers/account.py
+++ b/synapse/handlers/account.py
@@ -102,7 +102,7 @@ class AccountHandler:
         """
         status = {"exists": False}
 
-        userinfo = await self._main_store.get_userinfo_by_id(user_id.to_string())
+        userinfo = await self._main_store.get_user_by_id(user_id.to_string())
 
         if userinfo is not None:
             status = {
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index 2f0e5f3b0a..7092ff3449 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -18,7 +18,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Set
 
 from synapse.api.constants import Direction, Membership
 from synapse.events import EventBase
-from synapse.types import JsonDict, RoomStreamToken, StateMap, UserID
+from synapse.types import JsonDict, RoomStreamToken, StateMap, UserID, UserInfo
 from synapse.visibility import filter_events_for_client
 
 if TYPE_CHECKING:
@@ -57,38 +57,30 @@ class AdminHandler:
 
     async def get_user(self, user: UserID) -> Optional[JsonDict]:
         """Function to get user details"""
-        user_info_dict = await self._store.get_user_by_id(user.to_string())
-        if user_info_dict is None:
+        user_info: Optional[UserInfo] = await self._store.get_user_by_id(
+            user.to_string()
+        )
+        if user_info is None:
             return None
 
-        # Restrict returned information to a known set of fields. This prevents additional
-        # fields added to get_user_by_id from modifying Synapse's external API surface.
-        user_info_to_return = {
-            "name",
-            "admin",
-            "deactivated",
-            "locked",
-            "shadow_banned",
-            "creation_ts",
-            "appservice_id",
-            "consent_server_notice_sent",
-            "consent_version",
-            "consent_ts",
-            "user_type",
-            "is_guest",
-            "last_seen_ts",
+        user_info_dict = {
+            "name": user.to_string(),
+            "admin": user_info.is_admin,
+            "deactivated": user_info.is_deactivated,
+            "locked": user_info.locked,
+            "shadow_banned": user_info.is_shadow_banned,
+            "creation_ts": user_info.creation_ts,
+            "appservice_id": user_info.appservice_id,
+            "consent_server_notice_sent": user_info.consent_server_notice_sent,
+            "consent_version": user_info.consent_version,
+            "consent_ts": user_info.consent_ts,
+            "user_type": user_info.user_type,
+            "is_guest": user_info.is_guest,
         }
 
         if self._msc3866_enabled:
             # Only include the approved flag if support for MSC3866 is enabled.
-            user_info_to_return.add("approved")
-
-        # Restrict returned keys to a known set.
-        user_info_dict = {
-            key: value
-            for key, value in user_info_dict.items()
-            if key in user_info_to_return
-        }
+            user_info_dict["approved"] = user_info.approved
 
         # Add additional user metadata
         profile = await self._store.get_profileinfo(user)
@@ -105,6 +97,9 @@ class AdminHandler:
         user_info_dict["external_ids"] = external_ids
         user_info_dict["erased"] = await self._store.is_user_erased(user.to_string())
 
+        last_seen_ts = await self._store.get_last_seen_for_user_id(user.to_string())
+        user_info_dict["last_seen_ts"] = last_seen_ts
+
         return user_info_dict
 
     async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> Any:
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index d6be18cdef..c036578a3d 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -828,13 +828,13 @@ class EventCreationHandler:
 
         u = await self.store.get_user_by_id(user_id)
         assert u is not None
-        if u["user_type"] in (UserTypes.SUPPORT, UserTypes.BOT):
+        if u.user_type in (UserTypes.SUPPORT, UserTypes.BOT):
             # support and bot users are not required to consent
             return
-        if u["appservice_id"] is not None:
+        if u.appservice_id is not None:
             # users registered by an appservice are exempt
             return
-        if u["consent_version"] == self.config.consent.user_consent_version:
+        if u.consent_version == self.config.consent.user_consent_version:
             return
 
         consent_uri = self._consent_uri_builder.build_user_consent_uri(user.localpart)
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index d6efe10a28..7ec202be23 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -572,7 +572,7 @@ class ModuleApi:
         Returns:
             UserInfo object if a user was found, otherwise None
         """
-        return await self._store.get_userinfo_by_id(user_id)
+        return await self._store.get_user_by_id(user_id)
 
     async def get_user_by_req(
         self,
@@ -1878,7 +1878,7 @@ class AccountDataManager:
             raise TypeError(f"new_data must be a dict; got {type(new_data).__name__}")
 
         # Ensure the user exists, so we don't just write to users that aren't there.
-        if await self._store.get_userinfo_by_id(user_id) is None:
+        if await self._store.get_user_by_id(user_id) is None:
             raise ValueError(f"User {user_id} does not exist on this server.")
 
         await self._handler.add_account_data_for_user(user_id, data_type, new_data)
diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py
index 25f9ea285b..88d3ec1baf 100644
--- a/synapse/rest/consent/consent_resource.py
+++ b/synapse/rest/consent/consent_resource.py
@@ -129,7 +129,7 @@ class ConsentResource(DirectServeHtmlResource):
             if u is None:
                 raise NotFoundError("Unknown user")
 
-            has_consented = u["consent_version"] == version
+            has_consented = u.consent_version == version
             userhmac = userhmac_bytes.decode("ascii")
 
         try:
diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py
index 94025ba41f..a879b6505e 100644
--- a/synapse/server_notices/consent_server_notices.py
+++ b/synapse/server_notices/consent_server_notices.py
@@ -79,15 +79,15 @@ class ConsentServerNotices:
             if u is None:
                 return
 
-            if u["is_guest"] and not self._send_to_guests:
+            if u.is_guest and not self._send_to_guests:
                 # don't send to guests
                 return
 
-            if u["consent_version"] == self._current_consent_version:
+            if u.consent_version == self._current_consent_version:
                 # user has already consented
                 return
 
-            if u["consent_server_notice_sent"] == self._current_consent_version:
+            if u.consent_server_notice_sent == self._current_consent_version:
                 # we've already sent a notice to the user
                 return
 
diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py
index d8d333e11d..7da47c3dd7 100644
--- a/synapse/storage/databases/main/client_ips.py
+++ b/synapse/storage/databases/main/client_ips.py
@@ -764,3 +764,14 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke
                     }
 
         return list(results.values())
+
+    async def get_last_seen_for_user_id(self, user_id: str) -> Optional[int]:
+        """Get the last seen timestamp for a user, if we have it."""
+
+        return await self.db_pool.simple_select_one_onecol(
+            table="user_ips",
+            keyvalues={"user_id": user_id},
+            retcol="MAX(last_seen)",
+            allow_none=True,
+            desc="get_last_seen_for_user_id",
+        )
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index e34156dc55..cc964604e2 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -16,7 +16,7 @@
 import logging
 import random
 import re
-from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
 
 import attr
 
@@ -192,8 +192,8 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             )
 
     @cached()
-    async def get_user_by_id(self, user_id: str) -> Optional[Mapping[str, Any]]:
-        """Deprecated: use get_userinfo_by_id instead"""
+    async def get_user_by_id(self, user_id: str) -> Optional[UserInfo]:
+        """Returns info about the user account, if it exists."""
 
         def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]:
             # We could technically use simple_select_one here, but it would not perform
@@ -202,16 +202,12 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             txn.execute(
                 """
                 SELECT
-                    name, password_hash, is_guest, admin, consent_version, consent_ts,
+                    name, is_guest, admin, consent_version, consent_ts,
                     consent_server_notice_sent, appservice_id, creation_ts, user_type,
                     deactivated, COALESCE(shadow_banned, FALSE) AS shadow_banned,
                     COALESCE(approved, TRUE) AS approved,
-                    COALESCE(locked, FALSE) AS locked, last_seen_ts
+                    COALESCE(locked, FALSE) AS locked
                 FROM users
-                LEFT JOIN (
-                    SELECT user_id, MAX(last_seen) AS last_seen_ts
-                    FROM user_ips GROUP BY user_id
-                ) ls ON users.name = ls.user_id
                 WHERE name = ?
                 """,
                 (user_id,),
@@ -228,51 +224,23 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
             desc="get_user_by_id",
             func=get_user_by_id_txn,
         )
-
-        if row is not None:
-            # If we're using SQLite our boolean values will be integers. Because we
-            # present some of this data as is to e.g. server admins via REST APIs, we
-            # want to make sure we're returning the right type of data.
-            # Note: when adding a column name to this list, be wary of NULLable columns,
-            # since NULL values will be turned into False.
-            boolean_columns = [
-                "admin",
-                "deactivated",
-                "shadow_banned",
-                "approved",
-                "locked",
-            ]
-            for column in boolean_columns:
-                row[column] = bool(row[column])
-
-        return row
-
-    async def get_userinfo_by_id(self, user_id: str) -> Optional[UserInfo]:
-        """Get a UserInfo object for a user by user ID.
-
-        Note! Currently uses the cache of `get_user_by_id`. Once that deprecated method is removed,
-        this method should be cached.
-
-        Args:
-             user_id: The user to fetch user info for.
-        Returns:
-            `UserInfo` object if user found, otherwise `None`.
-        """
-        user_data = await self.get_user_by_id(user_id)
-        if not user_data:
+        if row is None:
             return None
+
         return UserInfo(
-            appservice_id=user_data["appservice_id"],
-            consent_server_notice_sent=user_data["consent_server_notice_sent"],
-            consent_version=user_data["consent_version"],
-            creation_ts=user_data["creation_ts"],
-            is_admin=bool(user_data["admin"]),
-            is_deactivated=bool(user_data["deactivated"]),
-            is_guest=bool(user_data["is_guest"]),
-            is_shadow_banned=bool(user_data["shadow_banned"]),
-            user_id=UserID.from_string(user_data["name"]),
-            user_type=user_data["user_type"],
-            last_seen_ts=user_data["last_seen_ts"],
+            appservice_id=row["appservice_id"],
+            consent_server_notice_sent=row["consent_server_notice_sent"],
+            consent_version=row["consent_version"],
+            consent_ts=row["consent_ts"],
+            creation_ts=row["creation_ts"],
+            is_admin=bool(row["admin"]),
+            is_deactivated=bool(row["deactivated"]),
+            is_guest=bool(row["is_guest"]),
+            is_shadow_banned=bool(row["shadow_banned"]),
+            user_id=UserID.from_string(row["name"]),
+            user_type=row["user_type"],
+            approved=bool(row["approved"]),
+            locked=bool(row["locked"]),
         )
 
     async def is_trial_user(self, user_id: str) -> bool:
@@ -290,10 +258,10 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore):
 
         now = self._clock.time_msec()
         days = self.config.server.mau_appservice_trial_days.get(
-            info["appservice_id"], self.config.server.mau_trial_days
+            info.appservice_id, self.config.server.mau_trial_days
         )
         trial_duration_ms = days * 24 * 60 * 60 * 1000
-        is_trial = (now - info["creation_ts"] * 1000) < trial_duration_ms
+        is_trial = (now - info.creation_ts * 1000) < trial_duration_ms
         return is_trial
 
     @cached()
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 488714f60c..76b0e3e694 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -933,33 +933,37 @@ def get_verify_key_from_cross_signing_key(
 
 @attr.s(auto_attribs=True, frozen=True, slots=True)
 class UserInfo:
-    """Holds information about a user. Result of get_userinfo_by_id.
+    """Holds information about a user. Result of get_user_by_id.
 
     Attributes:
         user_id:  ID of the user.
         appservice_id:  Application service ID that created this user.
         consent_server_notice_sent:  Version of policy documents the user has been sent.
         consent_version:  Version of policy documents the user has consented to.
+        consent_ts: Time the user consented
         creation_ts:  Creation timestamp of the user.
         is_admin:  True if the user is an admin.
         is_deactivated:  True if the user has been deactivated.
         is_guest:  True if the user is a guest user.
         is_shadow_banned:  True if the user has been shadow-banned.
         user_type:  User type (None for normal user, 'support' and 'bot' other options).
-        last_seen_ts:  Last activity timestamp of the user.
+        approved: If the user has been "approved" to register on the server.
+        locked: Whether the user's account has been locked
     """
 
     user_id: UserID
     appservice_id: Optional[int]
     consent_server_notice_sent: Optional[str]
     consent_version: Optional[str]
+    consent_ts: Optional[int]
     user_type: Optional[str]
     creation_ts: int
     is_admin: bool
     is_deactivated: bool
     is_guest: bool
     is_shadow_banned: bool
-    last_seen_ts: Optional[int]
+    approved: bool
+    locked: bool
 
 
 class UserProfile(TypedDict):
diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py
index dcd01d5688..e00d7215df 100644
--- a/tests/api/test_auth.py
+++ b/tests/api/test_auth.py
@@ -188,8 +188,11 @@ class AuthTestCase(unittest.HomeserverTestCase):
         )
         app_service.is_interested_in_user = Mock(return_value=True)
         self.store.get_app_service_by_token = Mock(return_value=app_service)
-        # This just needs to return a truth-y value.
-        self.store.get_user_by_id = AsyncMock(return_value={"is_guest": False})
+
+        class FakeUserInfo:
+            is_guest = False
+
+        self.store.get_user_by_id = AsyncMock(return_value=FakeUserInfo())
         self.store.get_user_by_access_token = AsyncMock(return_value=None)
 
         request = Mock(args={})
@@ -341,7 +344,10 @@ class AuthTestCase(unittest.HomeserverTestCase):
         )
 
     def test_get_guest_user_from_macaroon(self) -> None:
-        self.store.get_user_by_id = AsyncMock(return_value={"is_guest": True})
+        class FakeUserInfo:
+            is_guest = True
+
+        self.store.get_user_by_id = AsyncMock(return_value=FakeUserInfo())
         self.store.get_user_by_access_token = AsyncMock(return_value=None)
 
         user_id = "@baldrick:matrix.org"
diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py
index 95c9792d54..0cca34d355 100644
--- a/tests/storage/test_registration.py
+++ b/tests/storage/test_registration.py
@@ -16,7 +16,7 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.api.constants import UserTypes
 from synapse.api.errors import ThreepidValidationError
 from synapse.server import HomeServer
-from synapse.types import JsonDict, UserID
+from synapse.types import JsonDict, UserID, UserInfo
 from synapse.util import Clock
 
 from tests.unittest import HomeserverTestCase, override_config
@@ -35,24 +35,22 @@ class RegistrationStoreTestCase(HomeserverTestCase):
         self.get_success(self.store.register_user(self.user_id, self.pwhash))
 
         self.assertEqual(
-            {
+            UserInfo(
                 # TODO(paul): Surely this field should be 'user_id', not 'name'
-                "name": self.user_id,
-                "password_hash": self.pwhash,
-                "admin": 0,
-                "is_guest": 0,
-                "consent_version": None,
-                "consent_ts": None,
-                "consent_server_notice_sent": None,
-                "appservice_id": None,
-                "creation_ts": 0,
-                "user_type": None,
-                "deactivated": 0,
-                "locked": 0,
-                "shadow_banned": 0,
-                "approved": 1,
-                "last_seen_ts": None,
-            },
+                user_id=UserID.from_string(self.user_id),
+                is_admin=False,
+                is_guest=False,
+                consent_server_notice_sent=None,
+                consent_ts=None,
+                consent_version=None,
+                appservice_id=None,
+                creation_ts=0,
+                user_type=None,
+                is_deactivated=False,
+                locked=False,
+                is_shadow_banned=False,
+                approved=True,
+            ),
             (self.get_success(self.store.get_user_by_id(self.user_id))),
         )
 
@@ -65,9 +63,11 @@ class RegistrationStoreTestCase(HomeserverTestCase):
 
         user = self.get_success(self.store.get_user_by_id(self.user_id))
         assert user
-        self.assertEqual(user["consent_version"], "1")
-        self.assertGreater(user["consent_ts"], before_consent)
-        self.assertLess(user["consent_ts"], self.clock.time_msec())
+        self.assertEqual(user.consent_version, "1")
+        self.assertIsNotNone(user.consent_ts)
+        assert user.consent_ts is not None
+        self.assertGreater(user.consent_ts, before_consent)
+        self.assertLess(user.consent_ts, self.clock.time_msec())
 
     def test_add_tokens(self) -> None:
         self.get_success(self.store.register_user(self.user_id, self.pwhash))
@@ -215,7 +215,7 @@ class ApprovalRequiredRegistrationTestCase(HomeserverTestCase):
 
         user = self.get_success(self.store.get_user_by_id(self.user_id))
         assert user is not None
-        self.assertTrue(user["approved"])
+        self.assertTrue(user.approved)
 
         approved = self.get_success(self.store.is_user_approved(self.user_id))
         self.assertTrue(approved)
@@ -228,7 +228,7 @@ class ApprovalRequiredRegistrationTestCase(HomeserverTestCase):
 
         user = self.get_success(self.store.get_user_by_id(self.user_id))
         assert user is not None
-        self.assertFalse(user["approved"])
+        self.assertFalse(user.approved)
 
         approved = self.get_success(self.store.is_user_approved(self.user_id))
         self.assertFalse(approved)
@@ -248,7 +248,7 @@ class ApprovalRequiredRegistrationTestCase(HomeserverTestCase):
         user = self.get_success(self.store.get_user_by_id(self.user_id))
         self.assertIsNotNone(user)
         assert user is not None
-        self.assertEqual(user["approved"], 1)
+        self.assertEqual(user.approved, 1)
 
         approved = self.get_success(self.store.is_user_approved(self.user_id))
         self.assertTrue(approved)
-- 
cgit 1.5.1


From e9e2904eb2c0b73eb4154faf41bd360e6168cc92 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Thu, 14 Sep 2023 14:56:07 +0100
Subject: Speed up deleting to-device messages task (#16318)

---
 changelog.d/16318.misc     |  1 +
 synapse/handlers/device.py | 27 ++++++++++++++-------------
 2 files changed, 15 insertions(+), 13 deletions(-)
 create mode 100644 changelog.d/16318.misc

(limited to 'synapse')

diff --git a/changelog.d/16318.misc b/changelog.d/16318.misc
new file mode 100644
index 0000000000..1433a2f246
--- /dev/null
+++ b/changelog.d/16318.misc
@@ -0,0 +1 @@
+Speed up task to delete to-device messages.
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 0d3d5ebc86..86ad96d030 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -388,7 +388,8 @@ class DeviceWorkerHandler:
             "Trying handling device list state for partial join: not supported on workers."
         )
 
-    DEVICE_MSGS_DELETE_BATCH_LIMIT = 100
+    DEVICE_MSGS_DELETE_BATCH_LIMIT = 1000
+    DEVICE_MSGS_DELETE_SLEEP_MS = 1000
 
     async def _delete_device_messages(
         self,
@@ -400,19 +401,19 @@ class DeviceWorkerHandler:
         device_id = task.params["device_id"]
         up_to_stream_id = task.params["up_to_stream_id"]
 
-        res = await self.store.delete_messages_for_device(
-            user_id=user_id,
-            device_id=device_id,
-            up_to_stream_id=up_to_stream_id,
-            limit=DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT,
-        )
+        # Delete the messages in batches to avoid too much DB load.
+        while True:
+            res = await self.store.delete_messages_for_device(
+                user_id=user_id,
+                device_id=device_id,
+                up_to_stream_id=up_to_stream_id,
+                limit=DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT,
+            )
 
-        if res < DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT:
-            return TaskStatus.COMPLETE, None, None
-        else:
-            # There is probably still device messages to be deleted, let's keep the task active and it will be run
-            # again in a subsequent scheduler loop run (probably the next one, if not too many tasks are running).
-            return TaskStatus.ACTIVE, None, None
+            if res < DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT:
+                return TaskStatus.COMPLETE, None, None
+
+            await self.clock.sleep(DeviceHandler.DEVICE_MSGS_DELETE_SLEEP_MS / 1000.0)
 
 
 class DeviceHandler(DeviceWorkerHandler):
-- 
cgit 1.5.1


From 329597022ee02516e5cbee11fcd566e05609b724 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Thu, 14 Sep 2023 16:20:47 +0100
Subject: Some minor performance fixes for task schedular (#16313)

---
 changelog.d/16313.misc                             |   1 +
 synapse/replication/tcp/handler.py                 |   6 +-
 synapse/storage/databases/main/task_scheduler.py   |   6 ++
 .../main/delta/82/02_scheduled_tasks_index.sql     |  16 ++++
 synapse/util/task_scheduler.py                     | 100 ++++++++++++++-------
 5 files changed, 95 insertions(+), 34 deletions(-)
 create mode 100644 changelog.d/16313.misc
 create mode 100644 synapse/storage/schema/main/delta/82/02_scheduled_tasks_index.sql

(limited to 'synapse')

diff --git a/changelog.d/16313.misc b/changelog.d/16313.misc
new file mode 100644
index 0000000000..4f266c1fb0
--- /dev/null
+++ b/changelog.d/16313.misc
@@ -0,0 +1 @@
+Delete device messages asynchronously and in staged batches using the task scheduler.
diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index 5642666411..b668bb5da1 100644
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@@ -672,14 +672,12 @@ class ReplicationCommandHandler:
             cmd.instance_name, cmd.lock_name, cmd.lock_key
         )
 
-    async def on_NEW_ACTIVE_TASK(
+    def on_NEW_ACTIVE_TASK(
         self, conn: IReplicationConnection, cmd: NewActiveTaskCommand
     ) -> None:
         """Called when get a new NEW_ACTIVE_TASK command."""
         if self._task_scheduler:
-            task = await self._task_scheduler.get_task(cmd.data)
-            if task:
-                await self._task_scheduler._launch_task(task)
+            self._task_scheduler.launch_task_by_id(cmd.data)
 
     def new_connection(self, connection: IReplicationConnection) -> None:
         """Called when we have a new connection."""
diff --git a/synapse/storage/databases/main/task_scheduler.py b/synapse/storage/databases/main/task_scheduler.py
index 9ab120eea9..5c5372a825 100644
--- a/synapse/storage/databases/main/task_scheduler.py
+++ b/synapse/storage/databases/main/task_scheduler.py
@@ -53,6 +53,7 @@ class TaskSchedulerWorkerStore(SQLBaseStore):
         resource_id: Optional[str] = None,
         statuses: Optional[List[TaskStatus]] = None,
         max_timestamp: Optional[int] = None,
+        limit: Optional[int] = None,
     ) -> List[ScheduledTask]:
         """Get a list of scheduled tasks from the DB.
 
@@ -62,6 +63,7 @@ class TaskSchedulerWorkerStore(SQLBaseStore):
             statuses: Limit the returned tasks to the specific statuses
             max_timestamp: Limit the returned tasks to the ones that have
                 a timestamp inferior to the specified one
+            limit: Only return `limit` number of rows if set.
 
         Returns: a list of `ScheduledTask`, ordered by increasing timestamps
         """
@@ -94,6 +96,10 @@ class TaskSchedulerWorkerStore(SQLBaseStore):
 
             sql = sql + " ORDER BY timestamp"
 
+            if limit is not None:
+                sql += " LIMIT ?"
+                args.append(limit)
+
             txn.execute(sql, args)
             return self.db_pool.cursor_to_dict(txn)
 
diff --git a/synapse/storage/schema/main/delta/82/02_scheduled_tasks_index.sql b/synapse/storage/schema/main/delta/82/02_scheduled_tasks_index.sql
new file mode 100644
index 0000000000..6b90275139
--- /dev/null
+++ b/synapse/storage/schema/main/delta/82/02_scheduled_tasks_index.sql
@@ -0,0 +1,16 @@
+/* Copyright 2023 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE INDEX IF NOT EXISTS scheduled_tasks_timestamp ON scheduled_tasks(timestamp);
diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index b7de201bde..caf13b3474 100644
--- a/synapse/util/task_scheduler.py
+++ b/synapse/util/task_scheduler.py
@@ -15,12 +15,14 @@
 import logging
 from typing import TYPE_CHECKING, Awaitable, Callable, Dict, List, Optional, Set, Tuple
 
-from prometheus_client import Gauge
-
 from twisted.python.failure import Failure
 
 from synapse.logging.context import nested_logging_context
-from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.metrics import LaterGauge
+from synapse.metrics.background_process_metrics import (
+    run_as_background_process,
+    wrap_as_background_process,
+)
 from synapse.types import JsonMapping, ScheduledTask, TaskStatus
 from synapse.util.stringutils import random_string
 
@@ -30,12 +32,6 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
-running_tasks_gauge = Gauge(
-    "synapse_scheduler_running_tasks",
-    "The number of concurrent running tasks handled by the TaskScheduler",
-)
-
-
 class TaskScheduler:
     """
     This is a simple task sheduler aimed at resumable tasks: usually we use `run_in_background`
@@ -70,6 +66,8 @@ class TaskScheduler:
     # Precision of the scheduler, evaluation of tasks to run will only happen
     # every `SCHEDULE_INTERVAL_MS` ms
     SCHEDULE_INTERVAL_MS = 1 * 60 * 1000  # 1mn
+    # How often to clean up old tasks.
+    CLEANUP_INTERVAL_MS = 30 * 60 * 1000
     # Time before a complete or failed task is deleted from the DB
     KEEP_TASKS_FOR_MS = 7 * 24 * 60 * 60 * 1000  # 1 week
     # Maximum number of tasks that can run at the same time
@@ -92,14 +90,26 @@ class TaskScheduler:
         ] = {}
         self._run_background_tasks = hs.config.worker.run_background_tasks
 
+        # Flag to make sure we only try and launch new tasks once at a time.
+        self._launching_new_tasks = False
+
         if self._run_background_tasks:
             self._clock.looping_call(
-                run_as_background_process,
+                self._launch_scheduled_tasks,
+                TaskScheduler.SCHEDULE_INTERVAL_MS,
+            )
+            self._clock.looping_call(
+                self._clean_scheduled_tasks,
                 TaskScheduler.SCHEDULE_INTERVAL_MS,
-                "handle_scheduled_tasks",
-                self._handle_scheduled_tasks,
             )
 
+        LaterGauge(
+            "synapse_scheduler_running_tasks",
+            "The number of concurrent running tasks handled by the TaskScheduler",
+            labels=None,
+            caller=lambda: len(self._running_tasks),
+        )
+
     def register_action(
         self,
         function: Callable[
@@ -234,6 +244,7 @@ class TaskScheduler:
         resource_id: Optional[str] = None,
         statuses: Optional[List[TaskStatus]] = None,
         max_timestamp: Optional[int] = None,
+        limit: Optional[int] = None,
     ) -> List[ScheduledTask]:
         """Get a list of tasks. Returns all the tasks if no args is provided.
 
@@ -247,6 +258,7 @@ class TaskScheduler:
             statuses: Limit the returned tasks to the specific statuses
             max_timestamp: Limit the returned tasks to the ones that have
                 a timestamp inferior to the specified one
+            limit: Only return `limit` number of rows if set.
 
         Returns
             A list of `ScheduledTask`, ordered by increasing timestamps
@@ -256,6 +268,7 @@ class TaskScheduler:
             resource_id=resource_id,
             statuses=statuses,
             max_timestamp=max_timestamp,
+            limit=limit,
         )
 
     async def delete_task(self, id: str) -> None:
@@ -273,34 +286,58 @@ class TaskScheduler:
             raise Exception(f"Task {id} is currently ACTIVE and can't be deleted")
         await self._store.delete_scheduled_task(id)
 
-    async def _handle_scheduled_tasks(self) -> None:
-        """Main loop taking care of launching tasks and cleaning up old ones."""
-        await self._launch_scheduled_tasks()
-        await self._clean_scheduled_tasks()
+    def launch_task_by_id(self, id: str) -> None:
+        """Try launching the task with the given ID."""
+        # Don't bother trying to launch new tasks if we're already at capacity.
+        if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
+            return
+
+        run_as_background_process("launch_task_by_id", self._launch_task_by_id, id)
+
+    async def _launch_task_by_id(self, id: str) -> None:
+        """Helper async function for `launch_task_by_id`."""
+        task = await self.get_task(id)
+        if task:
+            await self._launch_task(task)
 
+    @wrap_as_background_process("launch_scheduled_tasks")
     async def _launch_scheduled_tasks(self) -> None:
         """Retrieve and launch scheduled tasks that should be running at that time."""
-        for task in await self.get_tasks(statuses=[TaskStatus.ACTIVE]):
-            await self._launch_task(task)
-        for task in await self.get_tasks(
-            statuses=[TaskStatus.SCHEDULED], max_timestamp=self._clock.time_msec()
-        ):
-            await self._launch_task(task)
+        # Don't bother trying to launch new tasks if we're already at capacity.
+        if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
+            return
+
+        if self._launching_new_tasks:
+            return
 
-        running_tasks_gauge.set(len(self._running_tasks))
+        self._launching_new_tasks = True
 
+        try:
+            for task in await self.get_tasks(
+                statuses=[TaskStatus.ACTIVE], limit=self.MAX_CONCURRENT_RUNNING_TASKS
+            ):
+                await self._launch_task(task)
+            for task in await self.get_tasks(
+                statuses=[TaskStatus.SCHEDULED],
+                max_timestamp=self._clock.time_msec(),
+                limit=self.MAX_CONCURRENT_RUNNING_TASKS,
+            ):
+                await self._launch_task(task)
+
+        finally:
+            self._launching_new_tasks = False
+
+    @wrap_as_background_process("clean_scheduled_tasks")
     async def _clean_scheduled_tasks(self) -> None:
         """Clean old complete or failed jobs to avoid clutter the DB."""
+        now = self._clock.time_msec()
         for task in await self._store.get_scheduled_tasks(
-            statuses=[TaskStatus.FAILED, TaskStatus.COMPLETE]
+            statuses=[TaskStatus.FAILED, TaskStatus.COMPLETE],
+            max_timestamp=now - TaskScheduler.KEEP_TASKS_FOR_MS,
         ):
             # FAILED and COMPLETE tasks should never be running
             assert task.id not in self._running_tasks
-            if (
-                self._clock.time_msec()
-                > task.timestamp + TaskScheduler.KEEP_TASKS_FOR_MS
-            ):
-                await self._store.delete_scheduled_task(task.id)
+            await self._store.delete_scheduled_task(task.id)
 
     async def _launch_task(self, task: ScheduledTask) -> None:
         """Launch a scheduled task now.
@@ -339,6 +376,9 @@ class TaskScheduler:
                 )
                 self._running_tasks.remove(task.id)
 
+            # Try launch a new task since we've finished with this one.
+            self._clock.call_later(1, self._launch_scheduled_tasks)
+
         if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS:
             return
 
@@ -355,4 +395,4 @@ class TaskScheduler:
 
         self._running_tasks.add(task.id)
         await self.update_task(task.id, status=TaskStatus.ACTIVE)
-        run_as_background_process(task.action, wrapper)
+        run_as_background_process(f"task-{task.action}", wrapper)
-- 
cgit 1.5.1


From edec0b93cabbe5e03d658a2aa4c2c1b79cf8e85e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Fri, 15 Sep 2023 09:10:24 +0100
Subject: Only use literal strings for process names (#16315)

---
 changelog.d/16315.misc                        |  1 +
 synapse/appservice/scheduler.py               | 13 ++++---------
 synapse/metrics/background_process_metrics.py |  7 +++++--
 synapse/util/caches/expiringcache.py          |  4 +---
 4 files changed, 11 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/16315.misc

(limited to 'synapse')

diff --git a/changelog.d/16315.misc b/changelog.d/16315.misc
new file mode 100644
index 0000000000..d88782c979
--- /dev/null
+++ b/changelog.d/16315.misc
@@ -0,0 +1 @@
+Only use literal strings for background process names.
diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index 3a319b0d42..79f95f7653 100644
--- a/synapse/appservice/scheduler.py
+++ b/synapse/appservice/scheduler.py
@@ -200,9 +200,7 @@ class _ServiceQueuer:
         if service.id in self.requests_in_flight:
             return
 
-        run_as_background_process(
-            "as-sender-%s" % (service.id,), self._send_request, service
-        )
+        run_as_background_process("as-sender", self._send_request, service)
 
     async def _send_request(self, service: ApplicationService) -> None:
         # sanity-check: we shouldn't get here if this service already has a sender
@@ -478,14 +476,11 @@ class _Recoverer:
         self.backoff_counter = 1
 
     def recover(self) -> None:
-        def _retry() -> None:
-            run_as_background_process(
-                "as-recoverer-%s" % (self.service.id,), self.retry
-            )
-
         delay = 2**self.backoff_counter
         logger.info("Scheduling retries on %s in %fs", self.service.id, delay)
-        self.clock.call_later(delay, _retry)
+        self.clock.call_later(
+            delay, run_as_background_process, "as-recoverer", self.retry
+        )
 
     def _backoff(self) -> None:
         # cap the backoff to be around 8.5min => (2^9) = 512 secs
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index f1f1f0cdf9..fceb7a9f3c 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -48,6 +48,9 @@ from synapse.metrics._types import Collector
 if TYPE_CHECKING:
     import resource
 
+    # Old versions don't have `LiteralString`
+    from typing_extensions import LiteralString
+
 
 logger = logging.getLogger(__name__)
 
@@ -191,7 +194,7 @@ R = TypeVar("R")
 
 
 def run_as_background_process(
-    desc: str,
+    desc: "LiteralString",
     func: Callable[..., Awaitable[Optional[R]]],
     *args: Any,
     bg_start_span: bool = True,
@@ -259,7 +262,7 @@ P = ParamSpec("P")
 
 
 def wrap_as_background_process(
-    desc: str,
+    desc: "LiteralString",
 ) -> Callable[
     [Callable[P, Awaitable[Optional[R]]]],
     Callable[P, "defer.Deferred[Optional[R]]"],
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index 8e4c34039d..e73cf66080 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -84,9 +84,7 @@ class ExpiringCache(Generic[KT, VT]):
             return
 
         def f() -> "defer.Deferred[None]":
-            return run_as_background_process(
-                "prune_cache_%s" % self._cache_name, self._prune_cache
-            )
+            return run_as_background_process("prune_cache", self._prune_cache)
 
         self._clock.looping_call(f, self._expiry_ms / 2)
 
-- 
cgit 1.5.1


From 2a0f86f88fdb3d450212541ba7db57df6a184ae3 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Fri, 15 Sep 2023 03:16:45 -0500
Subject: Convert `_insert_graph_receipts_txn` to `simple_upsert` (#16299)

---
 changelog.d/16299.misc                     |  1 +
 synapse/storage/database.py                |  3 +++
 synapse/storage/databases/main/receipts.py | 23 +++++++++--------------
 3 files changed, 13 insertions(+), 14 deletions(-)
 create mode 100644 changelog.d/16299.misc

(limited to 'synapse')

diff --git a/changelog.d/16299.misc b/changelog.d/16299.misc
new file mode 100644
index 0000000000..d454669151
--- /dev/null
+++ b/changelog.d/16299.misc
@@ -0,0 +1 @@
+Refactor `receipts_graph` Postgres transactions to stop error messages.
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 6c5fcdcec3..697bc5651c 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -1193,6 +1193,7 @@ class DatabasePool:
         keyvalues: Dict[str, Any],
         values: Dict[str, Any],
         insertion_values: Optional[Dict[str, Any]] = None,
+        where_clause: Optional[str] = None,
         desc: str = "simple_upsert",
     ) -> bool:
         """Insert a row with values + insertion_values; on conflict, update with values.
@@ -1243,6 +1244,7 @@ class DatabasePool:
             keyvalues: The unique key columns and their new values
             values: The nonunique columns and their new values
             insertion_values: additional key/values to use only when inserting
+            where_clause: An index predicate to apply to the upsert.
             desc: description of the transaction, for logging and metrics
         Returns:
             Returns True if a row was inserted or updated (i.e. if `values` is
@@ -1263,6 +1265,7 @@ class DatabasePool:
                     keyvalues,
                     values,
                     insertion_values,
+                    where_clause,
                     db_autocommit=autocommit,
                 )
             except self.engine.module.IntegrityError as e:
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index e4d10ff250..a074c43989 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -795,9 +795,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
             now - event_ts,
         )
 
-        await self.db_pool.runInteraction(
-            "insert_graph_receipt",
-            self._insert_graph_receipt_txn,
+        await self._insert_graph_receipt(
             room_id,
             receipt_type,
             user_id,
@@ -810,9 +808,8 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
         return stream_id, max_persisted_id
 
-    def _insert_graph_receipt_txn(
+    async def _insert_graph_receipt(
         self,
-        txn: LoggingTransaction,
         room_id: str,
         receipt_type: str,
         user_id: str,
@@ -822,13 +819,6 @@ class ReceiptsWorkerStore(SQLBaseStore):
     ) -> None:
         assert self._can_write_to_receipts
 
-        txn.call_after(
-            self._get_receipts_for_user_with_orderings.invalidate,
-            (user_id, receipt_type),
-        )
-        # FIXME: This shouldn't invalidate the whole cache
-        txn.call_after(self._get_linearized_receipts_for_room.invalidate, (room_id,))
-
         keyvalues = {
             "room_id": room_id,
             "receipt_type": receipt_type,
@@ -840,8 +830,8 @@ class ReceiptsWorkerStore(SQLBaseStore):
         else:
             keyvalues["thread_id"] = thread_id
 
-        self.db_pool.simple_upsert_txn(
-            txn,
+        await self.db_pool.simple_upsert(
+            desc="insert_graph_receipt",
             table="receipts_graph",
             keyvalues=keyvalues,
             values={
@@ -851,6 +841,11 @@ class ReceiptsWorkerStore(SQLBaseStore):
             where_clause=where_clause,
         )
 
+        self._get_receipts_for_user_with_orderings.invalidate((user_id, receipt_type))
+
+        # FIXME: This shouldn't invalidate the whole cache
+        self._get_linearized_receipts_for_room.invalidate((room_id,))
+
 
 class ReceiptsBackgroundUpdateStore(SQLBaseStore):
     POPULATE_RECEIPT_EVENT_STREAM_ORDERING = "populate_event_stream_ordering"
-- 
cgit 1.5.1


From dd44ee00b6cf4d900e56857039320660400cff37 Mon Sep 17 00:00:00 2001
From: Mathieu Velten <mathieuv@matrix.org>
Date: Fri, 15 Sep 2023 15:37:44 +0200
Subject: Add automatic purge after all users forget a room (#15488)

 Also add restore of purge/shutdown rooms after a synapse restart.

Co-authored-by:  Eric Eastwood <erice@matrix.org>
Co-authored-by: Erik Johnston <erikj@matrix.org>
---
 changelog.d/15488.feature                        |   1 +
 docs/usage/configuration/config_documentation.md |  11 +
 synapse/app/generic_worker.py                    |   2 +
 synapse/config/server.py                         |  11 +
 synapse/handlers/pagination.py                   | 464 ++++++++---------------
 synapse/handlers/room.py                         | 177 +++++----
 synapse/handlers/room_member.py                  |  30 +-
 synapse/module_api/__init__.py                   |  13 +-
 synapse/rest/admin/__init__.py                   |  20 +-
 synapse/rest/admin/rooms.py                      |  78 ++--
 tests/rest/admin/test_room.py                    | 159 +++++++-
 tests/rest/admin/test_server_notice.py           |  20 +-
 tests/rest/client/test_rooms.py                  |   6 +-
 13 files changed, 542 insertions(+), 450 deletions(-)
 create mode 100644 changelog.d/15488.feature

(limited to 'synapse')

diff --git a/changelog.d/15488.feature b/changelog.d/15488.feature
new file mode 100644
index 0000000000..8684d84192
--- /dev/null
+++ b/changelog.d/15488.feature
@@ -0,0 +1 @@
+Add automatic purge after all users forgotten a room. Also add restore of purge/shutdown rooms after a synapse restart.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index a06b3d8a06..885a7bf0a3 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -936,6 +936,17 @@ Example configuration:
 redaction_retention_period: 28d
 ```
 ---
+### `forgotten_room_retention_period`
+
+How long to keep locally forgotten rooms before purging them from the DB.
+
+Defaults to `null`, meaning it's disabled.
+
+Example configuration:
+```yaml
+forgotten_room_retention_period: 28d
+```
+---
 ### `user_ips_max_age`
 
 How long to track users' last seen time and IPs in the database.
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index d25e3548e0..f7c80eee21 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -77,6 +77,7 @@ from synapse.storage.databases.main.monthly_active_users import (
 )
 from synapse.storage.databases.main.presence import PresenceStore
 from synapse.storage.databases.main.profile import ProfileWorkerStore
+from synapse.storage.databases.main.purge_events import PurgeEventsStore
 from synapse.storage.databases.main.push_rule import PushRulesWorkerStore
 from synapse.storage.databases.main.pusher import PusherWorkerStore
 from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
@@ -134,6 +135,7 @@ class GenericWorkerStore(
     RelationsWorkerStore,
     EventFederationWorkerStore,
     EventPushActionsWorkerStore,
+    PurgeEventsStore,
     StateGroupWorkerStore,
     SignatureWorkerStore,
     UserErasureWorkerStore,
diff --git a/synapse/config/server.py b/synapse/config/server.py
index b46fa51593..72d30da300 100644
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@@ -486,6 +486,17 @@ class ServerConfig(Config):
         else:
             self.redaction_retention_period = None
 
+        # How long to keep locally forgotten rooms before purging them from the DB.
+        forgotten_room_retention_period = config.get(
+            "forgotten_room_retention_period", None
+        )
+        if forgotten_room_retention_period is not None:
+            self.forgotten_room_retention_period: Optional[int] = self.parse_duration(
+                forgotten_room_retention_period
+            )
+        else:
+            self.forgotten_room_retention_period = None
+
         # How long to keep entries in the `users_ips` table.
         user_ips_max_age = config.get("user_ips_max_age", "28d")
         if user_ips_max_age is not None:
diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index 19cf5a2b43..878f267a4e 100644
--- a/synapse/handlers/pagination.py
+++ b/synapse/handlers/pagination.py
@@ -13,9 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Dict, List, Optional, Set
-
-import attr
+from typing import TYPE_CHECKING, List, Optional, Set, Tuple, cast
 
 from twisted.python.failure import Failure
 
@@ -23,16 +21,22 @@ from synapse.api.constants import Direction, EventTypes, Membership
 from synapse.api.errors import SynapseError
 from synapse.api.filtering import Filter
 from synapse.events.utils import SerializeEventConfig
-from synapse.handlers.room import ShutdownRoomResponse
+from synapse.handlers.room import ShutdownRoomParams, ShutdownRoomResponse
 from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.logging.opentracing import trace
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.rest.admin._base import assert_user_is_admin
 from synapse.streams.config import PaginationConfig
-from synapse.types import JsonDict, Requester, StrCollection, StreamKeyType
+from synapse.types import (
+    JsonDict,
+    JsonMapping,
+    Requester,
+    ScheduledTask,
+    StreamKeyType,
+    TaskStatus,
+)
 from synapse.types.state import StateFilter
 from synapse.util.async_helpers import ReadWriteLock
-from synapse.util.stringutils import random_string
 from synapse.visibility import filter_events_for_client
 
 if TYPE_CHECKING:
@@ -53,80 +57,11 @@ BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3
 PURGE_PAGINATION_LOCK_NAME = "purge_pagination_lock"
 
 
-@attr.s(slots=True, auto_attribs=True)
-class PurgeStatus:
-    """Object tracking the status of a purge request
-
-    This class contains information on the progress of a purge request, for
-    return by get_purge_status.
-    """
-
-    STATUS_ACTIVE = 0
-    STATUS_COMPLETE = 1
-    STATUS_FAILED = 2
-
-    STATUS_TEXT = {
-        STATUS_ACTIVE: "active",
-        STATUS_COMPLETE: "complete",
-        STATUS_FAILED: "failed",
-    }
-
-    # Save the error message if an error occurs
-    error: str = ""
-
-    # Tracks whether this request has completed. One of STATUS_{ACTIVE,COMPLETE,FAILED}.
-    status: int = STATUS_ACTIVE
-
-    def asdict(self) -> JsonDict:
-        ret = {"status": PurgeStatus.STATUS_TEXT[self.status]}
-        if self.error:
-            ret["error"] = self.error
-        return ret
-
-
-@attr.s(slots=True, auto_attribs=True)
-class DeleteStatus:
-    """Object tracking the status of a delete room request
+PURGE_HISTORY_ACTION_NAME = "purge_history"
 
-    This class contains information on the progress of a delete room request, for
-    return by get_delete_status.
-    """
+PURGE_ROOM_ACTION_NAME = "purge_room"
 
-    STATUS_PURGING = 0
-    STATUS_COMPLETE = 1
-    STATUS_FAILED = 2
-    STATUS_SHUTTING_DOWN = 3
-
-    STATUS_TEXT = {
-        STATUS_PURGING: "purging",
-        STATUS_COMPLETE: "complete",
-        STATUS_FAILED: "failed",
-        STATUS_SHUTTING_DOWN: "shutting_down",
-    }
-
-    # Tracks whether this request has completed.
-    # One of STATUS_{PURGING,COMPLETE,FAILED,SHUTTING_DOWN}.
-    status: int = STATUS_PURGING
-
-    # Save the error message if an error occurs
-    error: str = ""
-
-    # Saves the result of an action to give it back to REST API
-    shutdown_room: ShutdownRoomResponse = {
-        "kicked_users": [],
-        "failed_to_kick_users": [],
-        "local_aliases": [],
-        "new_room_id": None,
-    }
-
-    def asdict(self) -> JsonDict:
-        ret = {
-            "status": DeleteStatus.STATUS_TEXT[self.status],
-            "shutdown_room": self.shutdown_room,
-        }
-        if self.error:
-            ret["error"] = self.error
-        return ret
+SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME = "shutdown_and_purge_room"
 
 
 class PaginationHandler:
@@ -136,9 +71,6 @@ class PaginationHandler:
     paginating during a purge.
     """
 
-    # when to remove a completed deletion/purge from the results map
-    CLEAR_PURGE_AFTER_MS = 1000 * 3600 * 24  # 24 hours
-
     def __init__(self, hs: "HomeServer"):
         self.hs = hs
         self.auth = hs.get_auth()
@@ -150,17 +82,11 @@ class PaginationHandler:
         self._room_shutdown_handler = hs.get_room_shutdown_handler()
         self._relations_handler = hs.get_relations_handler()
         self._worker_locks = hs.get_worker_locks_handler()
+        self._task_scheduler = hs.get_task_scheduler()
 
         self.pagination_lock = ReadWriteLock()
         # IDs of rooms in which there currently an active purge *or delete* operation.
         self._purges_in_progress_by_room: Set[str] = set()
-        # map from purge id to PurgeStatus
-        self._purges_by_id: Dict[str, PurgeStatus] = {}
-        # map from purge id to DeleteStatus
-        self._delete_by_id: Dict[str, DeleteStatus] = {}
-        # map from room id to delete ids
-        # Dict[`room_id`, List[`delete_id`]]
-        self._delete_by_room: Dict[str, List[str]] = {}
         self._event_serializer = hs.get_event_client_serializer()
 
         self._retention_default_max_lifetime = (
@@ -173,6 +99,9 @@ class PaginationHandler:
         self._retention_allowed_lifetime_max = (
             hs.config.retention.retention_allowed_lifetime_max
         )
+        self._forgotten_room_retention_period = (
+            hs.config.server.forgotten_room_retention_period
+        )
         self._is_master = hs.config.worker.worker_app is None
 
         if hs.config.retention.retention_enabled and self._is_master:
@@ -189,6 +118,14 @@ class PaginationHandler:
                     job.longest_max_lifetime,
                 )
 
+        self._task_scheduler.register_action(
+            self._purge_history, PURGE_HISTORY_ACTION_NAME
+        )
+        self._task_scheduler.register_action(self._purge_room, PURGE_ROOM_ACTION_NAME)
+        self._task_scheduler.register_action(
+            self._shutdown_and_purge_room, SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME
+        )
+
     async def purge_history_for_rooms_in_range(
         self, min_ms: Optional[int], max_ms: Optional[int]
     ) -> None:
@@ -224,7 +161,7 @@ class PaginationHandler:
             include_null = False
 
         logger.info(
-            "[purge] Running purge job for %s < max_lifetime <= %s (include NULLs = %s)",
+            "[purge] Running retention purge job for %s < max_lifetime <= %s (include NULLs = %s)",
             min_ms,
             max_ms,
             include_null,
@@ -239,10 +176,10 @@ class PaginationHandler:
         for room_id, retention_policy in rooms.items():
             logger.info("[purge] Attempting to purge messages in room %s", room_id)
 
-            if room_id in self._purges_in_progress_by_room:
+            if len(await self.get_delete_tasks_by_room(room_id, only_active=True)) > 0:
                 logger.warning(
-                    "[purge] not purging room %s as there's an ongoing purge running"
-                    " for this room",
+                    "[purge] not purging room %s for retention as there's an ongoing purge"
+                    " running for this room",
                     room_id,
                 )
                 continue
@@ -295,27 +232,20 @@ class PaginationHandler:
             (stream, topo, _event_id) = r
             token = "t%d-%d" % (topo, stream)
 
-            purge_id = random_string(16)
-
-            self._purges_by_id[purge_id] = PurgeStatus()
-
-            logger.info(
-                "Starting purging events in room %s (purge_id %s)" % (room_id, purge_id)
-            )
+            logger.info("Starting purging events in room %s", room_id)
 
             # We want to purge everything, including local events, and to run the purge in
             # the background so that it's not blocking any other operation apart from
             # other purges in the same room.
             run_as_background_process(
-                "_purge_history",
-                self._purge_history,
-                purge_id,
+                PURGE_HISTORY_ACTION_NAME,
+                self.purge_history,
                 room_id,
                 token,
                 True,
             )
 
-    def start_purge_history(
+    async def start_purge_history(
         self, room_id: str, token: str, delete_local_events: bool = False
     ) -> str:
         """Start off a history purge on a room.
@@ -329,40 +259,58 @@ class PaginationHandler:
         Returns:
             unique ID for this purge transaction.
         """
-        if room_id in self._purges_in_progress_by_room:
-            raise SynapseError(
-                400, "History purge already in progress for %s" % (room_id,)
-            )
-
-        purge_id = random_string(16)
+        purge_id = await self._task_scheduler.schedule_task(
+            PURGE_HISTORY_ACTION_NAME,
+            resource_id=room_id,
+            params={"token": token, "delete_local_events": delete_local_events},
+        )
 
         # we log the purge_id here so that it can be tied back to the
         # request id in the log lines.
         logger.info("[purge] starting purge_id %s", purge_id)
 
-        self._purges_by_id[purge_id] = PurgeStatus()
-        run_as_background_process(
-            "purge_history",
-            self._purge_history,
-            purge_id,
-            room_id,
-            token,
-            delete_local_events,
-        )
         return purge_id
 
     async def _purge_history(
-        self, purge_id: str, room_id: str, token: str, delete_local_events: bool
-    ) -> None:
+        self,
+        task: ScheduledTask,
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        """
+        Scheduler action to purge some history of a room.
+        """
+        if (
+            task.resource_id is None
+            or task.params is None
+            or "token" not in task.params
+            or "delete_local_events" not in task.params
+        ):
+            return (
+                TaskStatus.FAILED,
+                None,
+                "Not enough parameters passed to _purge_history",
+            )
+        err = await self.purge_history(
+            task.resource_id,
+            task.params["token"],
+            task.params["delete_local_events"],
+        )
+        if err is not None:
+            return TaskStatus.FAILED, None, err
+        return TaskStatus.COMPLETE, None, None
+
+    async def purge_history(
+        self,
+        room_id: str,
+        token: str,
+        delete_local_events: bool,
+    ) -> Optional[str]:
         """Carry out a history purge on a room.
 
         Args:
-            purge_id: The ID for this purge.
             room_id: The room to purge from
             token: topological token to delete events before
             delete_local_events: True to delete local events as well as remote ones
         """
-        self._purges_in_progress_by_room.add(room_id)
         try:
             async with self._worker_locks.acquire_read_write_lock(
                 PURGE_PAGINATION_LOCK_NAME, room_id, write=True
@@ -371,57 +319,68 @@ class PaginationHandler:
                     room_id, token, delete_local_events
                 )
             logger.info("[purge] complete")
-            self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE
+            return None
         except Exception:
             f = Failure()
             logger.error(
                 "[purge] failed", exc_info=(f.type, f.value, f.getTracebackObject())
             )
-            self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED
-            self._purges_by_id[purge_id].error = f.getErrorMessage()
-        finally:
-            self._purges_in_progress_by_room.discard(room_id)
-
-            # remove the purge from the list 24 hours after it completes
-            def clear_purge() -> None:
-                del self._purges_by_id[purge_id]
-
-            self.hs.get_reactor().callLater(
-                PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000, clear_purge
-            )
-
-    def get_purge_status(self, purge_id: str) -> Optional[PurgeStatus]:
-        """Get the current status of an active purge
+            return f.getErrorMessage()
 
-        Args:
-            purge_id: purge_id returned by start_purge_history
-        """
-        return self._purges_by_id.get(purge_id)
-
-    def get_delete_status(self, delete_id: str) -> Optional[DeleteStatus]:
+    async def get_delete_task(self, delete_id: str) -> Optional[ScheduledTask]:
         """Get the current status of an active deleting
 
         Args:
             delete_id: delete_id returned by start_shutdown_and_purge_room
+                or start_purge_history.
         """
-        return self._delete_by_id.get(delete_id)
+        return await self._task_scheduler.get_task(delete_id)
 
-    def get_delete_ids_by_room(self, room_id: str) -> Optional[StrCollection]:
-        """Get all active delete ids by room
+    async def get_delete_tasks_by_room(
+        self, room_id: str, only_active: Optional[bool] = False
+    ) -> List[ScheduledTask]:
+        """Get complete, failed or active delete tasks by room
 
         Args:
             room_id: room_id that is deleted
+            only_active: if True, completed&failed tasks will be omitted
+        """
+        statuses = [TaskStatus.ACTIVE]
+        if not only_active:
+            statuses += [TaskStatus.COMPLETE, TaskStatus.FAILED]
+
+        return await self._task_scheduler.get_tasks(
+            actions=[PURGE_ROOM_ACTION_NAME, SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME],
+            resource_id=room_id,
+            statuses=statuses,
+        )
+
+    async def _purge_room(
+        self,
+        task: ScheduledTask,
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
+        """
+        Scheduler action to purge a room.
         """
-        return self._delete_by_room.get(room_id)
+        if not task.resource_id:
+            raise Exception("No room id passed to purge_room task")
+        params = task.params if task.params else {}
+        await self.purge_room(task.resource_id, params.get("force", False))
+        return TaskStatus.COMPLETE, None, None
 
-    async def purge_room(self, room_id: str, force: bool = False) -> None:
+    async def purge_room(
+        self,
+        room_id: str,
+        force: bool,
+    ) -> None:
         """Purge the given room from the database.
-        This function is part the delete room v1 API.
 
         Args:
             room_id: room to be purged
             force: set true to skip checking for joined users.
         """
+        logger.info("starting purge room_id=%s force=%s", room_id, force)
+
         async with self._worker_locks.acquire_multi_read_write_lock(
             [
                 (PURGE_PAGINATION_LOCK_NAME, room_id),
@@ -430,13 +389,20 @@ class PaginationHandler:
             write=True,
         ):
             # first check that we have no users in this room
-            if not force:
-                joined = await self.store.is_host_joined(room_id, self._server_name)
-                if joined:
+            joined = await self.store.is_host_joined(room_id, self._server_name)
+            if joined:
+                if force:
+                    logger.info(
+                        "force-purging room %s with some local users still joined",
+                        room_id,
+                    )
+                else:
                     raise SynapseError(400, "Users are still joined to this room")
 
             await self._storage_controllers.purge_events.purge_room(room_id)
 
+        logger.info("purge complete for room_id %s", room_id)
+
     @trace
     async def get_messages(
         self,
@@ -711,177 +677,72 @@ class PaginationHandler:
 
     async def _shutdown_and_purge_room(
         self,
-        delete_id: str,
-        room_id: str,
-        requester_user_id: Optional[str],
-        new_room_user_id: Optional[str] = None,
-        new_room_name: Optional[str] = None,
-        message: Optional[str] = None,
-        block: bool = False,
-        purge: bool = True,
-        force_purge: bool = False,
-    ) -> None:
+        task: ScheduledTask,
+    ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]:
         """
-        Shuts down and purges a room.
-
-        See `RoomShutdownHandler.shutdown_room` for details of creation of the new room
-
-        Args:
-            delete_id: The ID for this delete.
-            room_id: The ID of the room to shut down.
-            requester_user_id:
-                User who requested the action. Will be recorded as putting the room on the
-                blocking list.
-                If None, the action was not manually requested but instead
-                triggered automatically, e.g. through a Synapse module
-                or some other policy.
-                MUST NOT be None if block=True.
-            new_room_user_id:
-                If set, a new room will be created with this user ID
-                as the creator and admin, and all users in the old room will be
-                moved into that room. If not set, no new room will be created
-                and the users will just be removed from the old room.
-            new_room_name:
-                A string representing the name of the room that new users will
-                be invited to. Defaults to `Content Violation Notification`
-            message:
-                A string containing the first message that will be sent as
-                `new_room_user_id` in the new room. Ideally this will clearly
-                convey why the original room was shut down.
-                Defaults to `Sharing illegal content on this server is not
-                permitted and rooms in violation will be blocked.`
-            block:
-                If set to `true`, this room will be added to a blocking list,
-                preventing future attempts to join the room. Defaults to `false`.
-            purge:
-                If set to `true`, purge the given room from the database.
-            force_purge:
-                If set to `true`, the room will be purged from database
-                also if it fails to remove some users from room.
-
-        Saves a `RoomShutdownHandler.ShutdownRoomResponse` in `DeleteStatus`:
+        Scheduler action to shutdown and purge a room.
         """
+        if task.resource_id is None or task.params is None:
+            raise Exception(
+                "No room id and/or no parameters passed to shutdown_and_purge_room task"
+            )
 
-        self._purges_in_progress_by_room.add(room_id)
-        try:
-            async with self._worker_locks.acquire_read_write_lock(
-                PURGE_PAGINATION_LOCK_NAME, room_id, write=True
-            ):
-                self._delete_by_id[delete_id].status = DeleteStatus.STATUS_SHUTTING_DOWN
-                self._delete_by_id[
-                    delete_id
-                ].shutdown_room = await self._room_shutdown_handler.shutdown_room(
-                    room_id=room_id,
-                    requester_user_id=requester_user_id,
-                    new_room_user_id=new_room_user_id,
-                    new_room_name=new_room_name,
-                    message=message,
-                    block=block,
-                )
-                self._delete_by_id[delete_id].status = DeleteStatus.STATUS_PURGING
+        room_id = task.resource_id
 
-                if purge:
-                    logger.info("starting purge room_id %s", room_id)
+        async def update_result(result: Optional[JsonMapping]) -> None:
+            await self._task_scheduler.update_task(task.id, result=result)
 
-                    # first check that we have no users in this room
-                    if not force_purge:
-                        joined = await self.store.is_host_joined(
-                            room_id, self._server_name
-                        )
-                        if joined:
-                            raise SynapseError(
-                                400, "Users are still joined to this room"
-                            )
+        shutdown_result = (
+            cast(ShutdownRoomResponse, task.result) if task.result else None
+        )
 
-                    await self._storage_controllers.purge_events.purge_room(room_id)
+        shutdown_result = await self._room_shutdown_handler.shutdown_room(
+            room_id,
+            cast(ShutdownRoomParams, task.params),
+            shutdown_result,
+            update_result,
+        )
 
-            logger.info("purge complete for room_id %s", room_id)
-            self._delete_by_id[delete_id].status = DeleteStatus.STATUS_COMPLETE
-        except Exception:
-            f = Failure()
-            logger.error(
-                "failed",
-                exc_info=(f.type, f.value, f.getTracebackObject()),
-            )
-            self._delete_by_id[delete_id].status = DeleteStatus.STATUS_FAILED
-            self._delete_by_id[delete_id].error = f.getErrorMessage()
-        finally:
-            self._purges_in_progress_by_room.discard(room_id)
-
-            # remove the delete from the list 24 hours after it completes
-            def clear_delete() -> None:
-                del self._delete_by_id[delete_id]
-                self._delete_by_room[room_id].remove(delete_id)
-                if not self._delete_by_room[room_id]:
-                    del self._delete_by_room[room_id]
-
-            self.hs.get_reactor().callLater(
-                PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000, clear_delete
+        if task.params.get("purge", False):
+            await self.purge_room(
+                room_id,
+                task.params.get("force_purge", False),
             )
 
-    def start_shutdown_and_purge_room(
+        return (TaskStatus.COMPLETE, shutdown_result, None)
+
+    async def start_shutdown_and_purge_room(
         self,
         room_id: str,
-        requester_user_id: Optional[str],
-        new_room_user_id: Optional[str] = None,
-        new_room_name: Optional[str] = None,
-        message: Optional[str] = None,
-        block: bool = False,
-        purge: bool = True,
-        force_purge: bool = False,
+        shutdown_params: ShutdownRoomParams,
     ) -> str:
         """Start off shut down and purge on a room.
 
         Args:
             room_id: The ID of the room to shut down.
-            requester_user_id:
-                User who requested the action and put the room on the
-                blocking list.
-                If None, the action was not manually requested but instead
-                triggered automatically, e.g. through a Synapse module
-                or some other policy.
-                MUST NOT be None if block=True.
-            new_room_user_id:
-                If set, a new room will be created with this user ID
-                as the creator and admin, and all users in the old room will be
-                moved into that room. If not set, no new room will be created
-                and the users will just be removed from the old room.
-            new_room_name:
-                A string representing the name of the room that new users will
-                be invited to. Defaults to `Content Violation Notification`
-            message:
-                A string containing the first message that will be sent as
-                `new_room_user_id` in the new room. Ideally this will clearly
-                convey why the original room was shut down.
-                Defaults to `Sharing illegal content on this server is not
-                permitted and rooms in violation will be blocked.`
-            block:
-                If set to `true`, this room will be added to a blocking list,
-                preventing future attempts to join the room. Defaults to `false`.
-            purge:
-                If set to `true`, purge the given room from the database.
-            force_purge:
-                If set to `true`, the room will be purged from database
-                also if it fails to remove some users from room.
+            shutdown_params: parameters for the shutdown
 
         Returns:
             unique ID for this delete transaction.
         """
-        if room_id in self._purges_in_progress_by_room:
-            raise SynapseError(
-                400, "History purge already in progress for %s" % (room_id,)
-            )
+        if len(await self.get_delete_tasks_by_room(room_id, only_active=True)) > 0:
+            raise SynapseError(400, "Purge already in progress for %s" % (room_id,))
 
         # This check is double to `RoomShutdownHandler.shutdown_room`
         # But here the requester get a direct response / error with HTTP request
         # and do not have to check the purge status
+        new_room_user_id = shutdown_params["new_room_user_id"]
         if new_room_user_id is not None:
             if not self.hs.is_mine_id(new_room_user_id):
                 raise SynapseError(
                     400, "User must be our own: %s" % (new_room_user_id,)
                 )
 
-        delete_id = random_string(16)
+        delete_id = await self._task_scheduler.schedule_task(
+            SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME,
+            resource_id=room_id,
+            params=shutdown_params,
+        )
 
         # we log the delete_id here so that it can be tied back to the
         # request id in the log lines.
@@ -891,19 +752,4 @@ class PaginationHandler:
             delete_id,
         )
 
-        self._delete_by_id[delete_id] = DeleteStatus()
-        self._delete_by_room.setdefault(room_id, []).append(delete_id)
-        run_as_background_process(
-            "shutdown_and_purge_room",
-            self._shutdown_and_purge_room,
-            delete_id,
-            room_id,
-            requester_user_id,
-            new_room_user_id,
-            new_room_name,
-            message,
-            block,
-            purge,
-            force_purge,
-        )
         return delete_id
diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 7a762c8511..a0c3b16819 100644
--- a/synapse/handlers/room.py
+++ b/synapse/handlers/room.py
@@ -20,7 +20,7 @@ import random
 import string
 from collections import OrderedDict
 from http import HTTPStatus
-from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, List, Optional, Tuple
 
 import attr
 from typing_extensions import TypedDict
@@ -54,11 +54,11 @@ from synapse.events import EventBase
 from synapse.events.snapshot import UnpersistedEventContext
 from synapse.events.utils import copy_and_fixup_power_levels_contents
 from synapse.handlers.relations import BundledAggregations
-from synapse.module_api import NOT_SPAM
 from synapse.rest.admin._base import assert_user_is_admin
 from synapse.streams import EventSource
 from synapse.types import (
     JsonDict,
+    JsonMapping,
     MutableStateMap,
     Requester,
     RoomAlias,
@@ -454,7 +454,7 @@ class RoomCreationHandler:
         spam_check = await self._spam_checker_module_callbacks.user_may_create_room(
             user_id
         )
-        if spam_check != NOT_SPAM:
+        if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
             raise SynapseError(
                 403,
                 "You are not permitted to create rooms",
@@ -768,7 +768,7 @@ class RoomCreationHandler:
             spam_check = await self._spam_checker_module_callbacks.user_may_create_room(
                 user_id
             )
-            if spam_check != NOT_SPAM:
+            if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
                 raise SynapseError(
                     403,
                     "You are not permitted to create rooms",
@@ -1750,6 +1750,45 @@ class RoomEventSource(EventSource[RoomStreamToken, EventBase]):
         return self.store.get_current_room_stream_token_for_room_id(room_id)
 
 
+class ShutdownRoomParams(TypedDict):
+    """
+    Attributes:
+        requester_user_id:
+            User who requested the action. Will be recorded as putting the room on the
+            blocking list.
+        new_room_user_id:
+            If set, a new room will be created with this user ID
+            as the creator and admin, and all users in the old room will be
+            moved into that room. If not set, no new room will be created
+            and the users will just be removed from the old room.
+        new_room_name:
+            A string representing the name of the room that new users will
+            be invited to. Defaults to `Content Violation Notification`
+        message:
+            A string containing the first message that will be sent as
+            `new_room_user_id` in the new room. Ideally this will clearly
+            convey why the original room was shut down.
+            Defaults to `Sharing illegal content on this server is not
+            permitted and rooms in violation will be blocked.`
+        block:
+            If set to `true`, this room will be added to a blocking list,
+            preventing future attempts to join the room. Defaults to `false`.
+        purge:
+            If set to `true`, purge the given room from the database.
+        force_purge:
+            If set to `true`, the room will be purged from database
+            even if there are still users joined to the room.
+    """
+
+    requester_user_id: Optional[str]
+    new_room_user_id: Optional[str]
+    new_room_name: Optional[str]
+    message: Optional[str]
+    block: bool
+    purge: bool
+    force_purge: bool
+
+
 class ShutdownRoomResponse(TypedDict):
     """
     Attributes:
@@ -1787,12 +1826,12 @@ class RoomShutdownHandler:
     async def shutdown_room(
         self,
         room_id: str,
-        requester_user_id: Optional[str],
-        new_room_user_id: Optional[str] = None,
-        new_room_name: Optional[str] = None,
-        message: Optional[str] = None,
-        block: bool = False,
-    ) -> ShutdownRoomResponse:
+        params: ShutdownRoomParams,
+        result: Optional[ShutdownRoomResponse] = None,
+        update_result_fct: Optional[
+            Callable[[Optional[JsonMapping]], Awaitable[None]]
+        ] = None,
+    ) -> Optional[ShutdownRoomResponse]:
         """
         Shuts down a room. Moves all local users and room aliases automatically
         to a new room if `new_room_user_id` is set. Otherwise local users only
@@ -1808,52 +1847,23 @@ class RoomShutdownHandler:
 
         Args:
             room_id: The ID of the room to shut down.
-            requester_user_id:
-                User who requested the action and put the room on the
-                blocking list.
-                If None, the action was not manually requested but instead
-                triggered automatically, e.g. through a Synapse module
-                or some other policy.
-                MUST NOT be None if block=True.
-            new_room_user_id:
-                If set, a new room will be created with this user ID
-                as the creator and admin, and all users in the old room will be
-                moved into that room. If not set, no new room will be created
-                and the users will just be removed from the old room.
-            new_room_name:
-                A string representing the name of the room that new users will
-                be invited to. Defaults to `Content Violation Notification`
-            message:
-                A string containing the first message that will be sent as
-                `new_room_user_id` in the new room. Ideally this will clearly
-                convey why the original room was shut down.
-                Defaults to `Sharing illegal content on this server is not
-                permitted and rooms in violation will be blocked.`
-            block:
-                If set to `True`, users will be prevented from joining the old
-                room. This option can also be used to pre-emptively block a room,
-                even if it's unknown to this homeserver. In this case, the room
-                will be blocked, and no further action will be taken. If `False`,
-                attempting to delete an unknown room is invalid.
-
-                Defaults to `False`.
-
-        Returns: a dict containing the following keys:
-            kicked_users: An array of users (`user_id`) that were kicked.
-            failed_to_kick_users:
-                An array of users (`user_id`) that that were not kicked.
-            local_aliases:
-                An array of strings representing the local aliases that were
-                migrated from the old room to the new.
-            new_room_id:
-                A string representing the room ID of the new room, or None if
-                no such room was created.
-        """
+            delete_id: The delete ID identifying this delete request
+            params: parameters for the shutdown, cf `ShutdownRoomParams`
+            result: current status of the shutdown, if it was interrupted
+            update_result_fct: function called when `result` is updated locally
 
-        if not new_room_name:
-            new_room_name = self.DEFAULT_ROOM_NAME
-        if not message:
-            message = self.DEFAULT_MESSAGE
+        Returns: a dict matching `ShutdownRoomResponse`.
+        """
+        requester_user_id = params["requester_user_id"]
+        new_room_user_id = params["new_room_user_id"]
+        block = params["block"]
+
+        new_room_name = (
+            params["new_room_name"]
+            if params["new_room_name"]
+            else self.DEFAULT_ROOM_NAME
+        )
+        message = params["message"] if params["message"] else self.DEFAULT_MESSAGE
 
         if not RoomID.is_valid(room_id):
             raise SynapseError(400, "%s is not a legal room ID" % (room_id,))
@@ -1865,6 +1875,17 @@ class RoomShutdownHandler:
                 403, "Shutdown of this room is forbidden", Codes.FORBIDDEN
             )
 
+        result = (
+            result
+            if result
+            else {
+                "kicked_users": [],
+                "failed_to_kick_users": [],
+                "local_aliases": [],
+                "new_room_id": None,
+            }
+        )
+
         # Action the block first (even if the room doesn't exist yet)
         if block:
             if requester_user_id is None:
@@ -1877,14 +1898,10 @@ class RoomShutdownHandler:
 
         if not await self.store.get_room(room_id):
             # if we don't know about the room, there is nothing left to do.
-            return {
-                "kicked_users": [],
-                "failed_to_kick_users": [],
-                "local_aliases": [],
-                "new_room_id": None,
-            }
+            return result
 
-        if new_room_user_id is not None:
+        new_room_id = result.get("new_room_id")
+        if new_room_user_id is not None and new_room_id is None:
             if not self.hs.is_mine_id(new_room_user_id):
                 raise SynapseError(
                     400, "User must be our own: %s" % (new_room_user_id,)
@@ -1904,6 +1921,10 @@ class RoomShutdownHandler:
                 ratelimit=False,
             )
 
+            result["new_room_id"] = new_room_id
+            if update_result_fct:
+                await update_result_fct(result)
+
             logger.info(
                 "Shutting down room %r, joining to new room: %r", room_id, new_room_id
             )
@@ -1917,12 +1938,9 @@ class RoomShutdownHandler:
                 stream_id,
             )
         else:
-            new_room_id = None
             logger.info("Shutting down room %r", room_id)
 
         users = await self.store.get_users_in_room(room_id)
-        kicked_users = []
-        failed_to_kick_users = []
         for user_id in users:
             if not self.hs.is_mine_id(user_id):
                 continue
@@ -1951,7 +1969,9 @@ class RoomShutdownHandler:
                     stream_id,
                 )
 
-                await self.room_member_handler.forget(target_requester.user, room_id)
+                await self.room_member_handler.forget(
+                    target_requester.user, room_id, do_not_schedule_purge=True
+                )
 
                 # Join users to new room
                 if new_room_user_id:
@@ -1966,15 +1986,23 @@ class RoomShutdownHandler:
                         require_consent=False,
                     )
 
-                kicked_users.append(user_id)
+                result["kicked_users"].append(user_id)
+                if update_result_fct:
+                    await update_result_fct(result)
             except Exception:
                 logger.exception(
                     "Failed to leave old room and join new room for %r", user_id
                 )
-                failed_to_kick_users.append(user_id)
+                result["failed_to_kick_users"].append(user_id)
+                if update_result_fct:
+                    await update_result_fct(result)
 
         # Send message in new room and move aliases
         if new_room_user_id:
+            room_creator_requester = create_requester(
+                new_room_user_id, authenticated_entity=requester_user_id
+            )
+
             await self.event_creation_handler.create_and_send_nonmember_event(
                 room_creator_requester,
                 {
@@ -1986,18 +2014,15 @@ class RoomShutdownHandler:
                 ratelimit=False,
             )
 
-            aliases_for_room = await self.store.get_aliases_for_room(room_id)
+            result["local_aliases"] = list(
+                await self.store.get_aliases_for_room(room_id)
+            )
 
             assert new_room_id is not None
             await self.store.update_aliases_for_room(
                 room_id, new_room_id, requester_user_id
             )
         else:
-            aliases_for_room = []
+            result["local_aliases"] = []
 
-        return {
-            "kicked_users": kicked_users,
-            "failed_to_kick_users": failed_to_kick_users,
-            "local_aliases": list(aliases_for_room),
-            "new_room_id": new_room_id,
-        }
+        return result
diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index de0f04e3fe..90343c2306 100644
--- a/synapse/handlers/room_member.py
+++ b/synapse/handlers/room_member.py
@@ -37,13 +37,13 @@ from synapse.api.ratelimiting import Ratelimiter
 from synapse.event_auth import get_named_level, get_power_level_event
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
+from synapse.handlers.pagination import PURGE_ROOM_ACTION_NAME
 from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN
 from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler
 from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.logging import opentracing
 from synapse.metrics import event_processing_positions
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.module_api import NOT_SPAM
 from synapse.types import (
     JsonDict,
     Requester,
@@ -169,6 +169,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         self.request_ratelimiter = hs.get_request_ratelimiter()
         hs.get_notifier().add_new_join_in_room_callback(self._on_user_joined_room)
 
+        self._forgotten_room_retention_period = (
+            hs.config.server.forgotten_room_retention_period
+        )
+
     def _on_user_joined_room(self, event_id: str, room_id: str) -> None:
         """Notify the rate limiter that a room join has occurred.
 
@@ -278,7 +282,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         """
         raise NotImplementedError()
 
-    async def forget(self, user: UserID, room_id: str) -> None:
+    async def forget(
+        self, user: UserID, room_id: str, do_not_schedule_purge: bool = False
+    ) -> None:
         user_id = user.to_string()
 
         member = await self._storage_controllers.state.get_current_state_event(
@@ -298,6 +304,20 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
         # the table `current_state_events` and `get_current_state_events` is `None`.
         await self.store.forget(user_id, room_id)
 
+        # If everyone locally has left the room, then there is no reason for us to keep the
+        # room around and we automatically purge room after a little bit
+        if (
+            not do_not_schedule_purge
+            and self._forgotten_room_retention_period
+            and await self.store.is_locally_forgotten_room(room_id)
+        ):
+            await self.hs.get_task_scheduler().schedule_task(
+                PURGE_ROOM_ACTION_NAME,
+                resource_id=room_id,
+                timestamp=self.clock.time_msec()
+                + self._forgotten_room_retention_period,
+            )
+
     async def ratelimit_multiple_invites(
         self,
         requester: Optional[Requester],
@@ -804,7 +824,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                 spam_check = await self._spam_checker_module_callbacks.user_may_invite(
                     requester.user.to_string(), target_id, room_id
                 )
-                if spam_check != NOT_SPAM:
+                if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
                     logger.info("Blocking invite due to spam checker")
                     block_invite_result = spam_check
 
@@ -939,7 +959,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                         target.to_string(), room_id, is_invited=inviter is not None
                     )
                 )
-                if spam_check != NOT_SPAM:
+                if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
                     raise SynapseError(
                         403,
                         "Not allowed to join this room",
@@ -1557,7 +1577,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
                     room_id=room_id,
                 )
             )
-            if spam_check != NOT_SPAM:
+            if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
                 raise SynapseError(
                     403,
                     "Cannot send threepid invite",
diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 7ec202be23..65e2aca456 100644
--- a/synapse/module_api/__init__.py
+++ b/synapse/module_api/__init__.py
@@ -1741,7 +1741,18 @@ class ModuleApi:
         """
         # Future extensions to this method might want to e.g. allow use of `force_purge`.
         # TODO In the future we should make sure this is persistent.
-        self._hs.get_pagination_handler().start_shutdown_and_purge_room(room_id, None)
+        await self._hs.get_pagination_handler().start_shutdown_and_purge_room(
+            room_id,
+            {
+                "new_room_user_id": None,
+                "new_room_name": None,
+                "message": None,
+                "requester_user_id": None,
+                "block": False,
+                "purge": True,
+                "force_purge": False,
+            },
+        )
 
     async def set_displayname(
         self,
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 0d42c89ff7..7d0b4b55a0 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -21,6 +21,7 @@ from http import HTTPStatus
 from typing import TYPE_CHECKING, Optional, Tuple
 
 from synapse.api.errors import Codes, NotFoundError, SynapseError
+from synapse.handlers.pagination import PURGE_HISTORY_ACTION_NAME
 from synapse.http.server import HttpServer, JsonResource
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
@@ -93,7 +94,7 @@ from synapse.rest.admin.users import (
     UserTokenRestServlet,
     WhoisRestServlet,
 )
-from synapse.types import JsonDict, RoomStreamToken
+from synapse.types import JsonDict, RoomStreamToken, TaskStatus
 from synapse.util import SYNAPSE_VERSION
 
 if TYPE_CHECKING:
@@ -196,7 +197,7 @@ class PurgeHistoryRestServlet(RestServlet):
                 errcode=Codes.BAD_JSON,
             )
 
-        purge_id = self.pagination_handler.start_purge_history(
+        purge_id = await self.pagination_handler.start_purge_history(
             room_id, token, delete_local_events=delete_local_events
         )
 
@@ -215,11 +216,20 @@ class PurgeHistoryStatusRestServlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self.auth, request)
 
-        purge_status = self.pagination_handler.get_purge_status(purge_id)
-        if purge_status is None:
+        purge_task = await self.pagination_handler.get_delete_task(purge_id)
+        if purge_task is None or purge_task.action != PURGE_HISTORY_ACTION_NAME:
             raise NotFoundError("purge id '%s' not found" % purge_id)
 
-        return HTTPStatus.OK, purge_status.asdict()
+        result: JsonDict = {
+            "status": purge_task.status
+            if purge_task.status == TaskStatus.COMPLETE
+            or purge_task.status == TaskStatus.FAILED
+            else "active",
+        }
+        if purge_task.error:
+            result["error"] = purge_task.error
+
+        return HTTPStatus.OK, result
 
 
 ########################################################################################
diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 1d65560265..436718c8b2 100644
--- a/synapse/rest/admin/rooms.py
+++ b/synapse/rest/admin/rooms.py
@@ -19,6 +19,10 @@ from urllib import parse as urlparse
 from synapse.api.constants import Direction, EventTypes, JoinRules, Membership
 from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.api.filtering import Filter
+from synapse.handlers.pagination import (
+    PURGE_ROOM_ACTION_NAME,
+    SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME,
+)
 from synapse.http.servlet import (
     ResolveRoomIdMixin,
     RestServlet,
@@ -36,7 +40,7 @@ from synapse.rest.admin._base import (
 )
 from synapse.storage.databases.main.room import RoomSortOrder
 from synapse.streams.config import PaginationConfig
-from synapse.types import JsonDict, RoomID, UserID, create_requester
+from synapse.types import JsonDict, RoomID, ScheduledTask, UserID, create_requester
 from synapse.types.state import StateFilter
 from synapse.util import json_decoder
 
@@ -117,20 +121,30 @@ class RoomRestV2Servlet(RestServlet):
                 403, "Shutdown of this room is forbidden", Codes.FORBIDDEN
             )
 
-        delete_id = self._pagination_handler.start_shutdown_and_purge_room(
+        delete_id = await self._pagination_handler.start_shutdown_and_purge_room(
             room_id=room_id,
-            new_room_user_id=content.get("new_room_user_id"),
-            new_room_name=content.get("room_name"),
-            message=content.get("message"),
-            requester_user_id=requester.user.to_string(),
-            block=block,
-            purge=purge,
-            force_purge=force_purge,
+            shutdown_params={
+                "new_room_user_id": content.get("new_room_user_id"),
+                "new_room_name": content.get("room_name"),
+                "message": content.get("message"),
+                "requester_user_id": requester.user.to_string(),
+                "block": block,
+                "purge": purge,
+                "force_purge": force_purge,
+            },
         )
 
         return HTTPStatus.OK, {"delete_id": delete_id}
 
 
+def _convert_delete_task_to_response(task: ScheduledTask) -> JsonDict:
+    return {
+        "delete_id": task.id,
+        "status": task.status,
+        "shutdown_room": task.result,
+    }
+
+
 class DeleteRoomStatusByRoomIdRestServlet(RestServlet):
     """Get the status of the delete room background task."""
 
@@ -150,21 +164,16 @@ class DeleteRoomStatusByRoomIdRestServlet(RestServlet):
                 HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,)
             )
 
-        delete_ids = self._pagination_handler.get_delete_ids_by_room(room_id)
-        if delete_ids is None:
-            raise NotFoundError("No delete task for room_id '%s' found" % room_id)
+        delete_tasks = await self._pagination_handler.get_delete_tasks_by_room(room_id)
 
-        response = []
-        for delete_id in delete_ids:
-            delete = self._pagination_handler.get_delete_status(delete_id)
-            if delete:
-                response += [
-                    {
-                        "delete_id": delete_id,
-                        **delete.asdict(),
-                    }
-                ]
-        return HTTPStatus.OK, {"results": cast(JsonDict, response)}
+        if delete_tasks:
+            return HTTPStatus.OK, {
+                "results": [
+                    _convert_delete_task_to_response(task) for task in delete_tasks
+                ],
+            }
+        else:
+            raise NotFoundError("No delete task for room_id '%s' found" % room_id)
 
 
 class DeleteRoomStatusByDeleteIdRestServlet(RestServlet):
@@ -181,11 +190,14 @@ class DeleteRoomStatusByDeleteIdRestServlet(RestServlet):
     ) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self._auth, request)
 
-        delete_status = self._pagination_handler.get_delete_status(delete_id)
-        if delete_status is None:
+        delete_task = await self._pagination_handler.get_delete_task(delete_id)
+        if delete_task is None or (
+            delete_task.action != PURGE_ROOM_ACTION_NAME
+            and delete_task.action != SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME
+        ):
             raise NotFoundError("delete id '%s' not found" % delete_id)
 
-        return HTTPStatus.OK, cast(JsonDict, delete_status.asdict())
+        return HTTPStatus.OK, _convert_delete_task_to_response(delete_task)
 
 
 class ListRoomRestServlet(RestServlet):
@@ -349,11 +361,15 @@ class RoomRestServlet(RestServlet):
 
         ret = await room_shutdown_handler.shutdown_room(
             room_id=room_id,
-            new_room_user_id=content.get("new_room_user_id"),
-            new_room_name=content.get("room_name"),
-            message=content.get("message"),
-            requester_user_id=requester.user.to_string(),
-            block=block,
+            params={
+                "new_room_user_id": content.get("new_room_user_id"),
+                "new_room_name": content.get("room_name"),
+                "message": content.get("message"),
+                "requester_user_id": requester.user.to_string(),
+                "block": block,
+                "purge": purge,
+                "force_purge": force_purge,
+            },
         )
 
         # Purge room
diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py
index eb50086c50..6ed451d7c4 100644
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -15,26 +15,34 @@ import json
 import time
 import urllib.parse
 from typing import List, Optional
-from unittest.mock import Mock
+from unittest.mock import AsyncMock, Mock
 
 from parameterized import parameterized
 
+from twisted.internet.task import deferLater
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
 from synapse.api.constants import EventTypes, Membership, RoomTypes
 from synapse.api.errors import Codes
-from synapse.handlers.pagination import PaginationHandler, PurgeStatus
+from synapse.handlers.pagination import (
+    PURGE_ROOM_ACTION_NAME,
+    SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME,
+)
 from synapse.rest.client import directory, events, login, room
 from synapse.server import HomeServer
+from synapse.types import UserID
 from synapse.util import Clock
-from synapse.util.stringutils import random_string
+from synapse.util.task_scheduler import TaskScheduler
 
 from tests import unittest
 
 """Tests admin REST events for /rooms paths."""
 
 
+ONE_HOUR_IN_S = 3600
+
+
 class DeleteRoomTestCase(unittest.HomeserverTestCase):
     servlets = [
         synapse.rest.admin.register_servlets,
@@ -46,6 +54,7 @@ class DeleteRoomTestCase(unittest.HomeserverTestCase):
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.event_creation_handler = hs.get_event_creation_handler()
+        self.task_scheduler = hs.get_task_scheduler()
         hs.config.consent.user_consent_version = "1"
 
         consent_uri_builder = Mock()
@@ -476,6 +485,7 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
         self.event_creation_handler = hs.get_event_creation_handler()
+        self.task_scheduler = hs.get_task_scheduler()
         hs.config.consent.user_consent_version = "1"
 
         consent_uri_builder = Mock()
@@ -502,6 +512,9 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
         )
         self.url_status_by_delete_id = "/_synapse/admin/v2/rooms/delete_status/"
 
+        self.room_member_handler = hs.get_room_member_handler()
+        self.pagination_handler = hs.get_pagination_handler()
+
     @parameterized.expand(
         [
             ("DELETE", "/_synapse/admin/v2/rooms/%s"),
@@ -661,7 +674,7 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
         delete_id1 = channel.json_body["delete_id"]
 
         # go ahead
-        self.reactor.advance(PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000 / 2)
+        self.reactor.advance(TaskScheduler.KEEP_TASKS_FOR_MS / 1000 / 2)
 
         # second task
         channel = self.make_request(
@@ -686,12 +699,14 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
         self.assertEqual(2, len(channel.json_body["results"]))
         self.assertEqual("complete", channel.json_body["results"][0]["status"])
         self.assertEqual("complete", channel.json_body["results"][1]["status"])
-        self.assertEqual(delete_id1, channel.json_body["results"][0]["delete_id"])
-        self.assertEqual(delete_id2, channel.json_body["results"][1]["delete_id"])
+        delete_ids = {delete_id1, delete_id2}
+        self.assertTrue(channel.json_body["results"][0]["delete_id"] in delete_ids)
+        delete_ids.remove(channel.json_body["results"][0]["delete_id"])
+        self.assertTrue(channel.json_body["results"][1]["delete_id"] in delete_ids)
 
         # get status after more than clearing time for first task
         # second task is not cleared
-        self.reactor.advance(PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000 / 2)
+        self.reactor.advance(TaskScheduler.KEEP_TASKS_FOR_MS / 1000 / 2)
 
         channel = self.make_request(
             "GET",
@@ -705,7 +720,7 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
         self.assertEqual(delete_id2, channel.json_body["results"][0]["delete_id"])
 
         # get status after more than clearing time for all tasks
-        self.reactor.advance(PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000 / 2)
+        self.reactor.advance(TaskScheduler.KEEP_TASKS_FOR_MS / 1000 / 2)
 
         channel = self.make_request(
             "GET",
@@ -721,6 +736,13 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
 
         body = {"new_room_user_id": self.admin_user}
 
+        # Mock PaginationHandler.purge_room to sleep for 100s, so we have time to do a second call
+        # before the purge is over. Note that it doesn't purge anymore, but we don't care.
+        async def purge_room(room_id: str, force: bool) -> None:
+            await deferLater(self.hs.get_reactor(), 100, lambda: None)
+
+        self.pagination_handler.purge_room = AsyncMock(side_effect=purge_room)  # type: ignore[method-assign]
+
         # first call to delete room
         # and do not wait for finish the task
         first_channel = self.make_request(
@@ -728,7 +750,6 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
             self.url.encode("ascii"),
             content=body,
             access_token=self.admin_user_tok,
-            await_result=False,
         )
 
         # second call to delete room
@@ -742,7 +763,7 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
         self.assertEqual(400, second_channel.code, msg=second_channel.json_body)
         self.assertEqual(Codes.UNKNOWN, second_channel.json_body["errcode"])
         self.assertEqual(
-            f"History purge already in progress for {self.room_id}",
+            f"Purge already in progress for {self.room_id}",
             second_channel.json_body["error"],
         )
 
@@ -751,6 +772,9 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
         self.assertEqual(200, first_channel.code, msg=first_channel.json_body)
         self.assertIn("delete_id", first_channel.json_body)
 
+        # wait for purge_room to finish
+        self.pump(1)
+
         # check status after finish the task
         self._test_result(
             first_channel.json_body["delete_id"],
@@ -972,6 +996,115 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
         # Assert we can no longer peek into the room
         self._assert_peek(self.room_id, expect_code=403)
 
+    @unittest.override_config({"forgotten_room_retention_period": "1d"})
+    def test_purge_forgotten_room(self) -> None:
+        # Create a test room
+        room_id = self.helper.create_room_as(
+            self.admin_user,
+            tok=self.admin_user_tok,
+        )
+
+        self.helper.leave(room_id, user=self.admin_user, tok=self.admin_user_tok)
+        self.get_success(
+            self.room_member_handler.forget(
+                UserID.from_string(self.admin_user), room_id
+            )
+        )
+
+        # Test that room is not yet purged
+        with self.assertRaises(AssertionError):
+            self._is_purged(room_id)
+
+        # Advance 24 hours in the future, past the `forgotten_room_retention_period`
+        self.reactor.advance(24 * ONE_HOUR_IN_S)
+
+        self._is_purged(room_id)
+
+    def test_scheduled_purge_room(self) -> None:
+        # Create a test room
+        room_id = self.helper.create_room_as(
+            self.admin_user,
+            tok=self.admin_user_tok,
+        )
+        self.helper.leave(room_id, user=self.admin_user, tok=self.admin_user_tok)
+
+        # Schedule a purge 10 seconds in the future
+        self.get_success(
+            self.task_scheduler.schedule_task(
+                PURGE_ROOM_ACTION_NAME,
+                resource_id=room_id,
+                timestamp=self.clock.time_msec() + 10 * 1000,
+            )
+        )
+
+        # Test that room is not yet purged
+        with self.assertRaises(AssertionError):
+            self._is_purged(room_id)
+
+        # Wait for next scheduler run
+        self.reactor.advance(TaskScheduler.SCHEDULE_INTERVAL_MS)
+
+        self._is_purged(room_id)
+
+    def test_schedule_shutdown_room(self) -> None:
+        # Create a test room
+        room_id = self.helper.create_room_as(
+            self.other_user,
+            tok=self.other_user_tok,
+        )
+
+        # Schedule a shutdown 10 seconds in the future
+        delete_id = self.get_success(
+            self.task_scheduler.schedule_task(
+                SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME,
+                resource_id=room_id,
+                params={
+                    "requester_user_id": self.admin_user,
+                    "new_room_user_id": self.admin_user,
+                    "new_room_name": None,
+                    "message": None,
+                    "block": False,
+                    "purge": True,
+                    "force_purge": True,
+                },
+                timestamp=self.clock.time_msec() + 10 * 1000,
+            )
+        )
+
+        # Test that room is not yet shutdown
+        self._is_member(room_id, self.other_user)
+
+        # Test that room is not yet purged
+        with self.assertRaises(AssertionError):
+            self._is_purged(room_id)
+
+        # Wait for next scheduler run
+        self.reactor.advance(TaskScheduler.SCHEDULE_INTERVAL_MS)
+
+        # Test that all users has been kicked (room is shutdown)
+        self._has_no_members(room_id)
+
+        self._is_purged(room_id)
+
+        # Retrieve delete results
+        result = self.make_request(
+            "GET",
+            self.url_status_by_delete_id + delete_id,
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, result.code, msg=result.json_body)
+
+        # Check that the user is in kicked_users
+        self.assertIn(
+            self.other_user, result.json_body["shutdown_room"]["kicked_users"]
+        )
+
+        new_room_id = result.json_body["shutdown_room"]["new_room_id"]
+        self.assertTrue(new_room_id)
+
+        # Check that the user is actually in the new room
+        self._is_member(new_room_id, self.other_user)
+
     def _is_blocked(self, room_id: str, expect: bool = True) -> None:
         """Assert that the room is blocked or not"""
         d = self.store.is_room_blocked(room_id)
@@ -1034,7 +1167,6 @@ class DeleteRoomV2TestCase(unittest.HomeserverTestCase):
             kicked_user: a user_id which is kicked from the room
             expect_new_room: if we expect that a new room was created
         """
-
         # get information by room_id
         channel_room_id = self.make_request(
             "GET",
@@ -1957,11 +2089,8 @@ class RoomMessagesTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(chunk), 2, [event["content"] for event in chunk])
 
         # Purge every event before the second event.
-        purge_id = random_string(16)
-        pagination_handler._purges_by_id[purge_id] = PurgeStatus()
         self.get_success(
-            pagination_handler._purge_history(
-                purge_id=purge_id,
+            pagination_handler.purge_history(
                 room_id=self.room_id,
                 token=second_token_str,
                 delete_local_events=True,
diff --git a/tests/rest/admin/test_server_notice.py b/tests/rest/admin/test_server_notice.py
index 28b999573e..dfd14f5751 100644
--- a/tests/rest/admin/test_server_notice.py
+++ b/tests/rest/admin/test_server_notice.py
@@ -22,6 +22,7 @@ from synapse.server import HomeServer
 from synapse.storage.roommember import RoomsForUser
 from synapse.types import JsonDict
 from synapse.util import Clock
+from synapse.util.stringutils import random_string
 
 from tests import unittest
 from tests.unittest import override_config
@@ -413,11 +414,24 @@ class ServerNoticeTestCase(unittest.HomeserverTestCase):
         self.assertEqual(messages[0]["content"]["body"], "test msg one")
         self.assertEqual(messages[0]["sender"], "@notices:test")
 
+        random_string(16)
+
         # shut down and purge room
         self.get_success(
-            self.room_shutdown_handler.shutdown_room(first_room_id, self.admin_user)
-        )
-        self.get_success(self.pagination_handler.purge_room(first_room_id))
+            self.room_shutdown_handler.shutdown_room(
+                first_room_id,
+                {
+                    "requester_user_id": self.admin_user,
+                    "new_room_user_id": None,
+                    "new_room_name": None,
+                    "message": None,
+                    "block": False,
+                    "purge": True,
+                    "force_purge": False,
+                },
+            )
+        )
+        self.get_success(self.pagination_handler.purge_room(first_room_id, force=False))
 
         # user is not member anymore
         self._check_invite_and_join_status(self.other_user, 0, 0)
diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py
index 47c1d38ad7..7627823d3f 100644
--- a/tests/rest/client/test_rooms.py
+++ b/tests/rest/client/test_rooms.py
@@ -41,7 +41,6 @@ from synapse.api.errors import Codes, HttpResponseException
 from synapse.appservice import ApplicationService
 from synapse.events import EventBase
 from synapse.events.snapshot import EventContext
-from synapse.handlers.pagination import PurgeStatus
 from synapse.rest import admin
 from synapse.rest.client import account, directory, login, profile, register, room, sync
 from synapse.server import HomeServer
@@ -2086,11 +2085,8 @@ class RoomMessageListTestCase(RoomBase):
         self.assertEqual(len(chunk), 2, [event["content"] for event in chunk])
 
         # Purge every event before the second event.
-        purge_id = random_string(16)
-        pagination_handler._purges_by_id[purge_id] = PurgeStatus()
         self.get_success(
-            pagination_handler._purge_history(
-                purge_id=purge_id,
+            pagination_handler.purge_history(
                 room_id=self.room_id,
                 token=second_token_str,
                 delete_local_events=True,
-- 
cgit 1.5.1


From 6946209e671ec278d7648434500aeb2639c8c3c3 Mon Sep 17 00:00:00 2001
From: José Joaquín Atria <jjatria@gmail.com>
Date: Mon, 18 Sep 2023 12:32:01 +0100
Subject: Set email charset as utf-8 rather than utf8 (#16329)

---
 changelog.d/16329.bugfix       | 1 +
 synapse/handlers/send_email.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16329.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16329.bugfix b/changelog.d/16329.bugfix
new file mode 100644
index 0000000000..2f1f7e8ffe
--- /dev/null
+++ b/changelog.d/16329.bugfix
@@ -0,0 +1 @@
+Use standard name for UTF-8 charset in emails.
diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py
index 4f5fe62fe8..657d9b3559 100644
--- a/synapse/handlers/send_email.py
+++ b/synapse/handlers/send_email.py
@@ -174,8 +174,8 @@ class SendEmailHandler:
         if raw_to == "":
             raise RuntimeError("Invalid 'to' address")
 
-        html_part = MIMEText(html, "html", "utf8")
-        text_part = MIMEText(text, "plain", "utf8")
+        html_part = MIMEText(html, "html", "utf-8")
+        text_part = MIMEText(text, "plain", "utf-8")
 
         multipart_msg = MIMEMultipart("alternative")
         multipart_msg["Subject"] = subject
-- 
cgit 1.5.1


From 63d28a88c1d18c64ea7e23b6dd7483e6d5dcf881 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 18 Sep 2023 09:02:12 -0400
Subject: Additional validation of receipts (#16327)

Reject invalid receipts with a reasonable error message &
expands tests for receipts.
---
 changelog.d/16327.bugfix           |   1 +
 synapse/handlers/receipts.py       |  26 ++++-
 synapse/rest/client/read_marker.py |   2 +-
 synapse/rest/client/receipts.py    |   2 +-
 tests/rest/client/test_receipts.py | 221 +++++++++++++++++++++++++++++++++++--
 tests/rest/client/test_sync.py     | 154 +-------------------------
 6 files changed, 241 insertions(+), 165 deletions(-)
 create mode 100644 changelog.d/16327.bugfix

(limited to 'synapse')

diff --git a/changelog.d/16327.bugfix b/changelog.d/16327.bugfix
new file mode 100644
index 0000000000..be3d1b4f21
--- /dev/null
+++ b/changelog.d/16327.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where invalid receipts would be accepted.
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index 2bacdebfb5..c7edada353 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -37,6 +37,8 @@ class ReceiptsHandler:
         self.server_name = hs.config.server.server_name
         self.store = hs.get_datastores().main
         self.event_auth_handler = hs.get_event_auth_handler()
+        self.event_handler = hs.get_event_handler()
+        self._storage_controllers = hs.get_storage_controllers()
 
         self.hs = hs
 
@@ -81,6 +83,20 @@ class ReceiptsHandler:
                 )
                 continue
 
+            # Let's check that the origin server is in the room before accepting the receipt.
+            # We don't want to block waiting on a partial state so take an
+            # approximation if needed.
+            domains = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
+                room_id
+            )
+            if origin not in domains:
+                logger.info(
+                    "Ignoring receipt for room %r from server %s as they're not in the room",
+                    room_id,
+                    origin,
+                )
+                continue
+
             for receipt_type, users in room_values.items():
                 for user_id, user_values in users.items():
                     if get_domain_from_id(user_id) != origin:
@@ -158,17 +174,23 @@ class ReceiptsHandler:
         self,
         room_id: str,
         receipt_type: str,
-        user_id: str,
+        user_id: UserID,
         event_id: str,
         thread_id: Optional[str],
     ) -> None:
         """Called when a client tells us a local user has read up to the given
         event_id in the room.
         """
+
+        # Ensure the room/event exists, this will raise an error if the user
+        # cannot view the event.
+        if not await self.event_handler.get_event(user_id, room_id, event_id):
+            return
+
         receipt = ReadReceipt(
             room_id=room_id,
             receipt_type=receipt_type,
-            user_id=user_id,
+            user_id=user_id.to_string(),
             event_ids=[event_id],
             thread_id=thread_id,
             data={"ts": int(self.clock.time_msec())},
diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py
index 1707e51972..15e4d56cdb 100644
--- a/synapse/rest/client/read_marker.py
+++ b/synapse/rest/client/read_marker.py
@@ -84,7 +84,7 @@ class ReadMarkerRestServlet(RestServlet):
                 await self.receipts_handler.received_client_receipt(
                     room_id,
                     receipt_type,
-                    user_id=requester.user.to_string(),
+                    user_id=requester.user,
                     event_id=event_id,
                     # Setting the thread ID is not possible with the /read_markers endpoint.
                     thread_id=None,
diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 869a374459..814d075faf 100644
--- a/synapse/rest/client/receipts.py
+++ b/synapse/rest/client/receipts.py
@@ -108,7 +108,7 @@ class ReceiptRestServlet(RestServlet):
             await self.receipts_handler.received_client_receipt(
                 room_id,
                 receipt_type,
-                user_id=requester.user.to_string(),
+                user_id=requester.user,
                 event_id=event_id,
                 thread_id=thread_id,
             )
diff --git a/tests/rest/client/test_receipts.py b/tests/rest/client/test_receipts.py
index 2a7fcea386..ec638c89b7 100644
--- a/tests/rest/client/test_receipts.py
+++ b/tests/rest/client/test_receipts.py
@@ -11,11 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from http import HTTPStatus
+from typing import Optional
+
 from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
-from synapse.rest.client import login, receipts, register
+from synapse.api.constants import EduTypes, EventTypes, HistoryVisibility, ReceiptTypes
+from synapse.rest.client import login, receipts, room, sync
 from synapse.server import HomeServer
+from synapse.types import JsonDict
 from synapse.util import Clock
 
 from tests import unittest
@@ -24,30 +29,113 @@ from tests import unittest
 class ReceiptsTestCase(unittest.HomeserverTestCase):
     servlets = [
         login.register_servlets,
-        register.register_servlets,
         receipts.register_servlets,
         synapse.rest.admin.register_servlets,
+        room.register_servlets,
+        sync.register_servlets,
     ]
 
     def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.owner = self.register_user("owner", "pass")
-        self.owner_tok = self.login("owner", "pass")
+        self.url = "/sync?since=%s"
+        self.next_batch = "s0"
+
+        # Register the first user
+        self.user_id = self.register_user("kermit", "monkey")
+        self.tok = self.login("kermit", "monkey")
+
+        # Create the room
+        self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok)
+
+        # Register the second user
+        self.user2 = self.register_user("kermit2", "monkey")
+        self.tok2 = self.login("kermit2", "monkey")
+
+        # Join the second user
+        self.helper.join(room=self.room_id, user=self.user2, tok=self.tok2)
 
     def test_send_receipt(self) -> None:
+        # Send a message.
+        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
+
+        # Send a read receipt
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ}/{res['event_id']}",
+            {},
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertNotEqual(self._get_read_receipt(), None)
+
+    def test_send_receipt_unknown_event(self) -> None:
+        """Receipts sent for unknown events are ignored to not break message retention."""
+        # Attempt to send a receipt to an unknown room.
         channel = self.make_request(
             "POST",
             "/rooms/!abc:beep/receipt/m.read/$def",
             content={},
-            access_token=self.owner_tok,
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 200, channel.result)
+        self.assertIsNone(self._get_read_receipt())
+
+        # Attempt to send a receipt to an unknown event.
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{self.room_id}/receipt/m.read/$def",
+            content={},
+            access_token=self.tok2,
         )
         self.assertEqual(channel.code, 200, channel.result)
+        self.assertIsNone(self._get_read_receipt())
+
+    def test_send_receipt_unviewable_event(self) -> None:
+        """Receipts sent for unviewable events are errors."""
+        # Create a room where new users can't see events from before their join
+        # & send events into it.
+        room_id = self.helper.create_room_as(
+            self.user_id,
+            tok=self.tok,
+            extra_content={
+                "preset": "private_chat",
+                "initial_state": [
+                    {
+                        "content": {"history_visibility": HistoryVisibility.JOINED},
+                        "state_key": "",
+                        "type": EventTypes.RoomHistoryVisibility,
+                    }
+                ],
+            },
+        )
+        res = self.helper.send(room_id, body="hello", tok=self.tok)
+
+        # Attempt to send a receipt from the wrong user.
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{room_id}/receipt/{ReceiptTypes.READ}/{res['event_id']}",
+            content={},
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 403, channel.result)
+
+        # Join the user to the room, but they still can't see the event.
+        self.helper.invite(room_id, self.user_id, self.user2, tok=self.tok)
+        self.helper.join(room=room_id, user=self.user2, tok=self.tok2)
+
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{room_id}/receipt/{ReceiptTypes.READ}/{res['event_id']}",
+            content={},
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 403, channel.result)
 
     def test_send_receipt_invalid_room_id(self) -> None:
         channel = self.make_request(
             "POST",
             "/rooms/not-a-room-id/receipt/m.read/$def",
             content={},
-            access_token=self.owner_tok,
+            access_token=self.tok,
         )
         self.assertEqual(channel.code, 400, channel.result)
         self.assertEqual(
@@ -59,7 +147,7 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
             "POST",
             "/rooms/!abc:beep/receipt/m.read/not-an-event-id",
             content={},
-            access_token=self.owner_tok,
+            access_token=self.tok,
         )
         self.assertEqual(channel.code, 400, channel.result)
         self.assertEqual(
@@ -71,6 +159,123 @@ class ReceiptsTestCase(unittest.HomeserverTestCase):
             "POST",
             "/rooms/!abc:beep/receipt/invalid-receipt-type/$def",
             content={},
-            access_token=self.owner_tok,
+            access_token=self.tok,
         )
         self.assertEqual(channel.code, 400, channel.result)
+
+    def test_private_read_receipts(self) -> None:
+        # Send a message as the first user
+        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
+
+        # Send a private read receipt to tell the server the first user's message was read
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
+            {},
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # Test that the first user can't see the other user's private read receipt
+        self.assertIsNone(self._get_read_receipt())
+
+    def test_public_receipt_can_override_private(self) -> None:
+        """
+        Sending a public read receipt to the same event which has a private read
+        receipt should cause that receipt to become public.
+        """
+        # Send a message as the first user
+        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
+
+        # Send a private read receipt
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
+            {},
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertIsNone(self._get_read_receipt())
+
+        # Send a public read receipt
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ}/{res['event_id']}",
+            {},
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # Test that we did override the private read receipt
+        self.assertNotEqual(self._get_read_receipt(), None)
+
+    def test_private_receipt_cannot_override_public(self) -> None:
+        """
+        Sending a private read receipt to the same event which has a public read
+        receipt should cause no change.
+        """
+        # Send a message as the first user
+        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
+
+        # Send a public read receipt
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ}/{res['event_id']}",
+            {},
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 200)
+        self.assertNotEqual(self._get_read_receipt(), None)
+
+        # Send a private read receipt
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
+            {},
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # Test that we didn't override the public read receipt
+        self.assertIsNone(self._get_read_receipt())
+
+    def test_read_receipt_with_empty_body_is_rejected(self) -> None:
+        # Send a message as the first user
+        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
+
+        # Send a read receipt for this message with an empty body
+        channel = self.make_request(
+            "POST",
+            f"/rooms/{self.room_id}/receipt/m.read/{res['event_id']}",
+            access_token=self.tok2,
+        )
+        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST)
+        self.assertEqual(channel.json_body["errcode"], "M_NOT_JSON", channel.json_body)
+
+    def _get_read_receipt(self) -> Optional[JsonDict]:
+        """Syncs and returns the read receipt."""
+
+        # Checks if event is a read receipt
+        def is_read_receipt(event: JsonDict) -> bool:
+            return event["type"] == EduTypes.RECEIPT
+
+        # Sync
+        channel = self.make_request(
+            "GET",
+            self.url % self.next_batch,
+            access_token=self.tok,
+        )
+        self.assertEqual(channel.code, 200)
+
+        # Store the next batch for the next request.
+        self.next_batch = channel.json_body["next_batch"]
+
+        if channel.json_body.get("rooms", None) is None:
+            return None
+
+        # Return the read receipt
+        ephemeral_events = channel.json_body["rooms"]["join"][self.room_id][
+            "ephemeral"
+        ]["events"]
+        receipt_event = filter(is_read_receipt, ephemeral_events)
+        return next(receipt_event, None)
diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py
index 9c876c7a32..d60665254e 100644
--- a/tests/rest/client/test_sync.py
+++ b/tests/rest/client/test_sync.py
@@ -13,8 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
-from http import HTTPStatus
-from typing import List, Optional
+from typing import List
 
 from parameterized import parameterized
 
@@ -22,7 +21,6 @@ from twisted.test.proto_helpers import MemoryReactor
 
 import synapse.rest.admin
 from synapse.api.constants import (
-    EduTypes,
     EventContentFields,
     EventTypes,
     ReceiptTypes,
@@ -376,156 +374,6 @@ class SyncKnockTestCase(KnockingStrippedStateEventHelperMixin):
         )
 
 
-class ReadReceiptsTestCase(unittest.HomeserverTestCase):
-    servlets = [
-        synapse.rest.admin.register_servlets,
-        login.register_servlets,
-        receipts.register_servlets,
-        room.register_servlets,
-        sync.register_servlets,
-    ]
-
-    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
-        config = self.default_config()
-
-        return self.setup_test_homeserver(config=config)
-
-    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
-        self.url = "/sync?since=%s"
-        self.next_batch = "s0"
-
-        # Register the first user
-        self.user_id = self.register_user("kermit", "monkey")
-        self.tok = self.login("kermit", "monkey")
-
-        # Create the room
-        self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok)
-
-        # Register the second user
-        self.user2 = self.register_user("kermit2", "monkey")
-        self.tok2 = self.login("kermit2", "monkey")
-
-        # Join the second user
-        self.helper.join(room=self.room_id, user=self.user2, tok=self.tok2)
-
-    def test_private_read_receipts(self) -> None:
-        # Send a message as the first user
-        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
-
-        # Send a private read receipt to tell the server the first user's message was read
-        channel = self.make_request(
-            "POST",
-            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
-            {},
-            access_token=self.tok2,
-        )
-        self.assertEqual(channel.code, 200)
-
-        # Test that the first user can't see the other user's private read receipt
-        self.assertIsNone(self._get_read_receipt())
-
-    def test_public_receipt_can_override_private(self) -> None:
-        """
-        Sending a public read receipt to the same event which has a private read
-        receipt should cause that receipt to become public.
-        """
-        # Send a message as the first user
-        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
-
-        # Send a private read receipt
-        channel = self.make_request(
-            "POST",
-            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
-            {},
-            access_token=self.tok2,
-        )
-        self.assertEqual(channel.code, 200)
-        self.assertIsNone(self._get_read_receipt())
-
-        # Send a public read receipt
-        channel = self.make_request(
-            "POST",
-            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ}/{res['event_id']}",
-            {},
-            access_token=self.tok2,
-        )
-        self.assertEqual(channel.code, 200)
-
-        # Test that we did override the private read receipt
-        self.assertNotEqual(self._get_read_receipt(), None)
-
-    def test_private_receipt_cannot_override_public(self) -> None:
-        """
-        Sending a private read receipt to the same event which has a public read
-        receipt should cause no change.
-        """
-        # Send a message as the first user
-        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
-
-        # Send a public read receipt
-        channel = self.make_request(
-            "POST",
-            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ}/{res['event_id']}",
-            {},
-            access_token=self.tok2,
-        )
-        self.assertEqual(channel.code, 200)
-        self.assertNotEqual(self._get_read_receipt(), None)
-
-        # Send a private read receipt
-        channel = self.make_request(
-            "POST",
-            f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}",
-            {},
-            access_token=self.tok2,
-        )
-        self.assertEqual(channel.code, 200)
-
-        # Test that we didn't override the public read receipt
-        self.assertIsNone(self._get_read_receipt())
-
-    def test_read_receipt_with_empty_body_is_rejected(self) -> None:
-        # Send a message as the first user
-        res = self.helper.send(self.room_id, body="hello", tok=self.tok)
-
-        # Send a read receipt for this message with an empty body
-        channel = self.make_request(
-            "POST",
-            f"/rooms/{self.room_id}/receipt/m.read/{res['event_id']}",
-            access_token=self.tok2,
-        )
-        self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST)
-        self.assertEqual(channel.json_body["errcode"], "M_NOT_JSON", channel.json_body)
-
-    def _get_read_receipt(self) -> Optional[JsonDict]:
-        """Syncs and returns the read receipt."""
-
-        # Checks if event is a read receipt
-        def is_read_receipt(event: JsonDict) -> bool:
-            return event["type"] == EduTypes.RECEIPT
-
-        # Sync
-        channel = self.make_request(
-            "GET",
-            self.url % self.next_batch,
-            access_token=self.tok,
-        )
-        self.assertEqual(channel.code, 200)
-
-        # Store the next batch for the next request.
-        self.next_batch = channel.json_body["next_batch"]
-
-        if channel.json_body.get("rooms", None) is None:
-            return None
-
-        # Return the read receipt
-        ephemeral_events = channel.json_body["rooms"]["join"][self.room_id][
-            "ephemeral"
-        ]["events"]
-        receipt_event = filter(is_read_receipt, ephemeral_events)
-        return next(receipt_event, None)
-
-
 class UnreadMessagesTestCase(unittest.HomeserverTestCase):
     servlets = [
         synapse.rest.admin.register_servlets,
-- 
cgit 1.5.1


From 85bfd4735e0b6e31d530f692d7113b4fec89e6b3 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 18 Sep 2023 09:29:05 -0400
Subject: Return an immutable value from get_latest_event_ids_in_room. (#16326)

---
 changelog.d/16326.misc                             |  1 +
 synapse/events/builder.py                          |  2 +-
 synapse/handlers/federation_event.py               |  8 +++----
 synapse/storage/controllers/persist_events.py      |  9 ++++----
 synapse/storage/databases/main/event_federation.py |  8 ++++---
 synapse/storage/databases/main/events.py           |  2 +-
 tests/handlers/test_presence.py                    |  2 +-
 tests/replication/storage/test_events.py           |  4 ++--
 tests/replication/tcp/streams/test_events.py       | 10 ++++-----
 tests/replication/test_federation_sender_shard.py  |  2 +-
 tests/storage/test_cleanup_extrems.py              | 14 ++++++------
 tests/test_federation.py                           | 26 ++++++++++++++--------
 12 files changed, 48 insertions(+), 40 deletions(-)
 create mode 100644 changelog.d/16326.misc

(limited to 'synapse')

diff --git a/changelog.d/16326.misc b/changelog.d/16326.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/16326.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 1165c017ba..43469b170f 100644
--- a/synapse/events/builder.py
+++ b/synapse/events/builder.py
@@ -103,7 +103,7 @@ class EventBuilder:
 
     async def build(
         self,
-        prev_event_ids: StrCollection,
+        prev_event_ids: List[str],
         auth_event_ids: Optional[List[str]],
         depth: Optional[int] = None,
     ) -> EventBase:
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index d32d224d56..eedde97ab0 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -723,12 +723,11 @@ class FederationEventHandler:
         if not prevs - seen:
             return
 
-        latest_list = await self._store.get_latest_event_ids_in_room(room_id)
+        latest_frozen = await self._store.get_latest_event_ids_in_room(room_id)
 
         # We add the prev events that we have seen to the latest
         # list to ensure the remote server doesn't give them to us
-        latest = set(latest_list)
-        latest |= seen
+        latest = seen | latest_frozen
 
         logger.info(
             "Requesting missing events between %s and %s",
@@ -1976,8 +1975,7 @@ class FederationEventHandler:
             # partial and full state and may not be accurate.
             return
 
-        extrem_ids_list = await self._store.get_latest_event_ids_in_room(event.room_id)
-        extrem_ids = set(extrem_ids_list)
+        extrem_ids = await self._store.get_latest_event_ids_in_room(event.room_id)
         prev_event_ids = set(event.prev_event_ids())
 
         if extrem_ids == prev_event_ids:
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index 6864f93090..f39ae2d635 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -19,6 +19,7 @@ import logging
 from collections import deque
 from typing import (
     TYPE_CHECKING,
+    AbstractSet,
     Any,
     Awaitable,
     Callable,
@@ -618,7 +619,7 @@ class EventsPersistenceStorageController:
                         )
 
                     for room_id, ev_ctx_rm in events_by_room.items():
-                        latest_event_ids = set(
+                        latest_event_ids = (
                             await self.main_store.get_latest_event_ids_in_room(room_id)
                         )
                         new_latest_event_ids = await self._calculate_new_extremities(
@@ -740,7 +741,7 @@ class EventsPersistenceStorageController:
         self,
         room_id: str,
         event_contexts: List[Tuple[EventBase, EventContext]],
-        latest_event_ids: Collection[str],
+        latest_event_ids: AbstractSet[str],
     ) -> Set[str]:
         """Calculates the new forward extremities for a room given events to
         persist.
@@ -758,8 +759,6 @@ class EventsPersistenceStorageController:
             and not event.internal_metadata.is_soft_failed()
         ]
 
-        latest_event_ids = set(latest_event_ids)
-
         # start with the existing forward extremities
         result = set(latest_event_ids)
 
@@ -798,7 +797,7 @@ class EventsPersistenceStorageController:
         self,
         room_id: str,
         events_context: List[Tuple[EventBase, EventContext]],
-        old_latest_event_ids: Set[str],
+        old_latest_event_ids: AbstractSet[str],
         new_latest_event_ids: Set[str],
     ) -> Tuple[Optional[StateMap[str]], Optional[StateMap[str]], Set[str]]:
         """Calculate the current state dict after adding some new events to
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 09de8f55e2..afffa54985 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -19,6 +19,7 @@ from typing import (
     TYPE_CHECKING,
     Collection,
     Dict,
+    FrozenSet,
     Iterable,
     List,
     Optional,
@@ -47,7 +48,7 @@ from synapse.storage.database import (
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.signatures import SignatureWorkerStore
 from synapse.storage.engines import PostgresEngine, Sqlite3Engine
-from synapse.types import JsonDict, StrCollection, StrSequence
+from synapse.types import JsonDict, StrCollection
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.lrucache import LruCache
@@ -1179,13 +1180,14 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         )
 
     @cached(max_entries=5000, iterable=True)
-    async def get_latest_event_ids_in_room(self, room_id: str) -> StrSequence:
-        return await self.db_pool.simple_select_onecol(
+    async def get_latest_event_ids_in_room(self, room_id: str) -> FrozenSet[str]:
+        event_ids = await self.db_pool.simple_select_onecol(
             table="event_forward_extremities",
             keyvalues={"room_id": room_id},
             retcol="event_id",
             desc="get_latest_event_ids_in_room",
         )
+        return frozenset(event_ids)
 
     async def get_min_depth(self, room_id: str) -> Optional[int]:
         """For the given room, get the minimum depth we have seen for it."""
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 0c1ed75240..bc8474a589 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -222,7 +222,7 @@ class PersistEventsStore:
 
             for room_id, latest_event_ids in new_forward_extremities.items():
                 self.store.get_latest_event_ids_in_room.prefill(
-                    (room_id,), list(latest_event_ids)
+                    (room_id,), frozenset(latest_event_ids)
                 )
 
     async def _get_events_which_are_prevs(self, event_ids: Iterable[str]) -> List[str]:
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 638787b029..41c8c44e02 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -1858,7 +1858,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase):
         )
 
         event = self.get_success(
-            builder.build(prev_event_ids=prev_event_ids, auth_event_ids=None)
+            builder.build(prev_event_ids=list(prev_event_ids), auth_event_ids=None)
         )
 
         self.get_success(self.federation_event_handler.on_receive_pdu(hostname, event))
diff --git a/tests/replication/storage/test_events.py b/tests/replication/storage/test_events.py
index af25815fa5..33c277a38a 100644
--- a/tests/replication/storage/test_events.py
+++ b/tests/replication/storage/test_events.py
@@ -90,7 +90,7 @@ class EventsWorkerStoreTestCase(BaseWorkerStoreTestCase):
     def test_get_latest_event_ids_in_room(self) -> None:
         create = self.persist(type="m.room.create", key="", creator=USER_ID)
         self.replicate()
-        self.check("get_latest_event_ids_in_room", (ROOM_ID,), [create.event_id])
+        self.check("get_latest_event_ids_in_room", (ROOM_ID,), {create.event_id})
 
         join = self.persist(
             type="m.room.member",
@@ -99,7 +99,7 @@ class EventsWorkerStoreTestCase(BaseWorkerStoreTestCase):
             prev_events=[(create.event_id, {})],
         )
         self.replicate()
-        self.check("get_latest_event_ids_in_room", (ROOM_ID,), [join.event_id])
+        self.check("get_latest_event_ids_in_room", (ROOM_ID,), {join.event_id})
 
     def test_redactions(self) -> None:
         self.persist(type="m.room.create", key="", creator=USER_ID)
diff --git a/tests/replication/tcp/streams/test_events.py b/tests/replication/tcp/streams/test_events.py
index 65ef4bb160..128fc3e046 100644
--- a/tests/replication/tcp/streams/test_events.py
+++ b/tests/replication/tcp/streams/test_events.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, List, Optional, Sequence
+from typing import Any, List, Optional
 
 from twisted.test.proto_helpers import MemoryReactor
 
@@ -139,7 +139,7 @@ class EventsStreamTestCase(BaseStreamTestCase):
         )
 
         # this is the point in the DAG where we make a fork
-        fork_point: Sequence[str] = self.get_success(
+        fork_point = self.get_success(
             self.hs.get_datastores().main.get_latest_event_ids_in_room(self.room_id)
         )
 
@@ -294,7 +294,7 @@ class EventsStreamTestCase(BaseStreamTestCase):
         )
 
         # this is the point in the DAG where we make a fork
-        fork_point: Sequence[str] = self.get_success(
+        fork_point = self.get_success(
             self.hs.get_datastores().main.get_latest_event_ids_in_room(self.room_id)
         )
 
@@ -316,14 +316,14 @@ class EventsStreamTestCase(BaseStreamTestCase):
         self.test_handler.received_rdata_rows.clear()
 
         # now roll back all that state by de-modding the users
-        prev_events = fork_point
+        prev_events = list(fork_point)
         pl_events = []
         for u in user_ids:
             pls["users"][u] = 0
             e = self.get_success(
                 inject_event(
                     self.hs,
-                    prev_event_ids=list(prev_events),
+                    prev_event_ids=prev_events,
                     type=EventTypes.PowerLevels,
                     state_key="",
                     sender=self.user_id,
diff --git a/tests/replication/test_federation_sender_shard.py b/tests/replication/test_federation_sender_shard.py
index 9b28cd474f..59f4fdc70b 100644
--- a/tests/replication/test_federation_sender_shard.py
+++ b/tests/replication/test_federation_sender_shard.py
@@ -261,7 +261,7 @@ class FederationSenderTestCase(BaseMultiWorkerStreamTestCase):
 
         builder = factory.for_room_version(room_version, event_dict)
         join_event = self.get_success(
-            builder.build(prev_event_ids=prev_event_ids, auth_event_ids=None)
+            builder.build(prev_event_ids=list(prev_event_ids), auth_event_ids=None)
         )
 
         self.get_success(federation.on_send_membership_event(remote_server, join_event))
diff --git a/tests/storage/test_cleanup_extrems.py b/tests/storage/test_cleanup_extrems.py
index 7de109966d..ceb9597dd3 100644
--- a/tests/storage/test_cleanup_extrems.py
+++ b/tests/storage/test_cleanup_extrems.py
@@ -120,7 +120,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
 
-        self.assertEqual(latest_event_ids, [event_id_4])
+        self.assertEqual(latest_event_ids, {event_id_4})
 
     def test_basic_cleanup(self) -> None:
         """Test that extremities are correctly calculated in the presence of
@@ -147,7 +147,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(set(latest_event_ids), {event_id_a, event_id_b})
+        self.assertEqual(latest_event_ids, {event_id_a, event_id_b})
 
         # Run the background update and check it did the right thing
         self.run_background_update()
@@ -155,7 +155,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(latest_event_ids, [event_id_b])
+        self.assertEqual(latest_event_ids, {event_id_b})
 
     def test_chain_of_fail_cleanup(self) -> None:
         """Test that extremities are correctly calculated in the presence of
@@ -185,7 +185,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(set(latest_event_ids), {event_id_a, event_id_b})
+        self.assertEqual(latest_event_ids, {event_id_a, event_id_b})
 
         # Run the background update and check it did the right thing
         self.run_background_update()
@@ -193,7 +193,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(latest_event_ids, [event_id_b])
+        self.assertEqual(latest_event_ids, {event_id_b})
 
     def test_forked_graph_cleanup(self) -> None:
         r"""Test that extremities are correctly calculated in the presence of
@@ -240,7 +240,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(set(latest_event_ids), {event_id_a, event_id_b, event_id_c})
+        self.assertEqual(latest_event_ids, {event_id_a, event_id_b, event_id_c})
 
         # Run the background update and check it did the right thing
         self.run_background_update()
@@ -248,7 +248,7 @@ class CleanupExtremBackgroundUpdateStoreTestCase(HomeserverTestCase):
         latest_event_ids = self.get_success(
             self.store.get_latest_event_ids_in_room(self.room_id)
         )
-        self.assertEqual(set(latest_event_ids), {event_id_b, event_id_c})
+        self.assertEqual(latest_event_ids, {event_id_b, event_id_c})
 
 
 class CleanupExtremDummyEventsTestCase(HomeserverTestCase):
diff --git a/tests/test_federation.py b/tests/test_federation.py
index f8ade6da38..1b0504709e 100644
--- a/tests/test_federation.py
+++ b/tests/test_federation.py
@@ -51,9 +51,15 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
         self.store = self.hs.get_datastores().main
 
         # Figure out what the most recent event is
-        most_recent = self.get_success(
-            self.hs.get_datastores().main.get_latest_event_ids_in_room(self.room_id)
-        )[0]
+        most_recent = next(
+            iter(
+                self.get_success(
+                    self.hs.get_datastores().main.get_latest_event_ids_in_room(
+                        self.room_id
+                    )
+                )
+            )
+        )
 
         join_event = make_event_from_dict(
             {
@@ -100,8 +106,8 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
 
         # Make sure we actually joined the room
         self.assertEqual(
-            self.get_success(self.store.get_latest_event_ids_in_room(self.room_id))[0],
-            "$join:test.serv",
+            self.get_success(self.store.get_latest_event_ids_in_room(self.room_id)),
+            {"$join:test.serv"},
         )
 
     def test_cant_hide_direct_ancestors(self) -> None:
@@ -127,9 +133,11 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
         self.http_client.post_json = post_json
 
         # Figure out what the most recent event is
-        most_recent = self.get_success(
-            self.store.get_latest_event_ids_in_room(self.room_id)
-        )[0]
+        most_recent = next(
+            iter(
+                self.get_success(self.store.get_latest_event_ids_in_room(self.room_id))
+            )
+        )
 
         # Now lie about an event
         lying_event = make_event_from_dict(
@@ -165,7 +173,7 @@ class MessageAcceptTests(unittest.HomeserverTestCase):
 
         # Make sure the invalid event isn't there
         extrem = self.get_success(self.store.get_latest_event_ids_in_room(self.room_id))
-        self.assertEqual(extrem[0], "$join:test.serv")
+        self.assertEqual(extrem, {"$join:test.serv"})
 
     def test_retry_device_list_resync(self) -> None:
         """Tests that device lists are marked as stale if they couldn't be synced, and
-- 
cgit 1.5.1


From c1e244c8f70ff1a23e358e1608c555f9722dee1f Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 18 Sep 2023 09:55:04 -0400
Subject: Make cached account data/tags/admin types immutable (#16325)

---
 changelog.d/16325.misc                             |  1 +
 synapse/app/admin_cmd.py                           | 14 +++++------
 synapse/handlers/admin.py                          | 18 +++++++--------
 synapse/handlers/sync.py                           | 27 +++++++++++++---------
 synapse/rest/admin/users.py                        |  8 +++----
 synapse/rest/client/account_data.py                | 10 ++++----
 synapse/storage/databases/main/account_data.py     | 14 +++++------
 .../databases/main/experimental_features.py        |  7 +++---
 synapse/storage/databases/main/tags.py             |  6 ++---
 9 files changed, 55 insertions(+), 50 deletions(-)
 create mode 100644 changelog.d/16325.misc

(limited to 'synapse')

diff --git a/changelog.d/16325.misc b/changelog.d/16325.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/16325.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index f9aada269a..aa24f7da6c 100644
--- a/synapse/app/admin_cmd.py
+++ b/synapse/app/admin_cmd.py
@@ -17,7 +17,7 @@ import logging
 import os
 import sys
 import tempfile
-from typing import List, Mapping, Optional
+from typing import List, Mapping, Optional, Sequence
 
 from twisted.internet import defer, task
 
@@ -57,7 +57,7 @@ from synapse.storage.databases.main.state import StateGroupWorkerStore
 from synapse.storage.databases.main.stream import StreamWorkerStore
 from synapse.storage.databases.main.tags import TagsWorkerStore
 from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore
-from synapse.types import JsonDict, StateMap
+from synapse.types import JsonMapping, StateMap
 from synapse.util import SYNAPSE_VERSION
 from synapse.util.logcontext import LoggingContext
 
@@ -198,7 +198,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
             for event in state.values():
                 json.dump(event, fp=f)
 
-    def write_profile(self, profile: JsonDict) -> None:
+    def write_profile(self, profile: JsonMapping) -> None:
         user_directory = os.path.join(self.base_directory, "user_data")
         os.makedirs(user_directory, exist_ok=True)
         profile_file = os.path.join(user_directory, "profile")
@@ -206,7 +206,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
         with open(profile_file, "a") as f:
             json.dump(profile, fp=f)
 
-    def write_devices(self, devices: List[JsonDict]) -> None:
+    def write_devices(self, devices: Sequence[JsonMapping]) -> None:
         user_directory = os.path.join(self.base_directory, "user_data")
         os.makedirs(user_directory, exist_ok=True)
         device_file = os.path.join(user_directory, "devices")
@@ -215,7 +215,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
             with open(device_file, "a") as f:
                 json.dump(device, fp=f)
 
-    def write_connections(self, connections: List[JsonDict]) -> None:
+    def write_connections(self, connections: Sequence[JsonMapping]) -> None:
         user_directory = os.path.join(self.base_directory, "user_data")
         os.makedirs(user_directory, exist_ok=True)
         connection_file = os.path.join(user_directory, "connections")
@@ -225,7 +225,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
                 json.dump(connection, fp=f)
 
     def write_account_data(
-        self, file_name: str, account_data: Mapping[str, JsonDict]
+        self, file_name: str, account_data: Mapping[str, JsonMapping]
     ) -> None:
         account_data_directory = os.path.join(
             self.base_directory, "user_data", "account_data"
@@ -237,7 +237,7 @@ class FileExfiltrationWriter(ExfiltrationWriter):
         with open(account_data_file, "a") as f:
             json.dump(account_data, fp=f)
 
-    def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None:
+    def write_media_id(self, media_id: str, media_metadata: JsonMapping) -> None:
         file_directory = os.path.join(self.base_directory, "media_ids")
         os.makedirs(file_directory, exist_ok=True)
         media_id_file = os.path.join(file_directory, media_id)
diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index 7092ff3449..ba9704a065 100644
--- a/synapse/handlers/admin.py
+++ b/synapse/handlers/admin.py
@@ -14,11 +14,11 @@
 
 import abc
 import logging
-from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Set
+from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Sequence, Set
 
 from synapse.api.constants import Direction, Membership
 from synapse.events import EventBase
-from synapse.types import JsonDict, RoomStreamToken, StateMap, UserID, UserInfo
+from synapse.types import JsonMapping, RoomStreamToken, StateMap, UserID, UserInfo
 from synapse.visibility import filter_events_for_client
 
 if TYPE_CHECKING:
@@ -35,7 +35,7 @@ class AdminHandler:
         self._state_storage_controller = self._storage_controllers.state
         self._msc3866_enabled = hs.config.experimental.msc3866.enabled
 
-    async def get_whois(self, user: UserID) -> JsonDict:
+    async def get_whois(self, user: UserID) -> JsonMapping:
         connections = []
 
         sessions = await self._store.get_user_ip_and_agents(user)
@@ -55,7 +55,7 @@ class AdminHandler:
 
         return ret
 
-    async def get_user(self, user: UserID) -> Optional[JsonDict]:
+    async def get_user(self, user: UserID) -> Optional[JsonMapping]:
         """Function to get user details"""
         user_info: Optional[UserInfo] = await self._store.get_user_by_id(
             user.to_string()
@@ -344,7 +344,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def write_profile(self, profile: JsonDict) -> None:
+    def write_profile(self, profile: JsonMapping) -> None:
         """Write the profile of a user.
 
         Args:
@@ -353,7 +353,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def write_devices(self, devices: List[JsonDict]) -> None:
+    def write_devices(self, devices: Sequence[JsonMapping]) -> None:
         """Write the devices of a user.
 
         Args:
@@ -362,7 +362,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def write_connections(self, connections: List[JsonDict]) -> None:
+    def write_connections(self, connections: Sequence[JsonMapping]) -> None:
         """Write the connections of a user.
 
         Args:
@@ -372,7 +372,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
 
     @abc.abstractmethod
     def write_account_data(
-        self, file_name: str, account_data: Mapping[str, JsonDict]
+        self, file_name: str, account_data: Mapping[str, JsonMapping]
     ) -> None:
         """Write the account data of a user.
 
@@ -383,7 +383,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None:
+    def write_media_id(self, media_id: str, media_metadata: JsonMapping) -> None:
         """Write the media's metadata of a user.
         Exports only the metadata, as this can be fetched from the database via
         read only. In order to access the files, a connection to the correct
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index f1f19666d7..1a4d394eda 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -57,6 +57,7 @@ from synapse.storage.roommember import MemberSummary
 from synapse.types import (
     DeviceListUpdates,
     JsonDict,
+    JsonMapping,
     MutableStateMap,
     Requester,
     RoomStreamToken,
@@ -1793,19 +1794,23 @@ class SyncHandler:
             )
 
             if push_rules_changed:
-                global_account_data = dict(global_account_data)
-                global_account_data[
-                    AccountDataTypes.PUSH_RULES
-                ] = await self._push_rules_handler.push_rules_for_user(sync_config.user)
+                global_account_data = {
+                    AccountDataTypes.PUSH_RULES: await self._push_rules_handler.push_rules_for_user(
+                        sync_config.user
+                    ),
+                    **global_account_data,
+                }
         else:
             all_global_account_data = await self.store.get_global_account_data_for_user(
                 user_id
             )
 
-            global_account_data = dict(all_global_account_data)
-            global_account_data[
-                AccountDataTypes.PUSH_RULES
-            ] = await self._push_rules_handler.push_rules_for_user(sync_config.user)
+            global_account_data = {
+                AccountDataTypes.PUSH_RULES: await self._push_rules_handler.push_rules_for_user(
+                    sync_config.user
+                ),
+                **all_global_account_data,
+            }
 
         account_data_for_user = (
             await sync_config.filter_collection.filter_global_account_data(
@@ -1909,7 +1914,7 @@ class SyncHandler:
             blocks_all_rooms
             or sync_result_builder.sync_config.filter_collection.blocks_all_room_account_data()
         ):
-            account_data_by_room: Mapping[str, Mapping[str, JsonDict]] = {}
+            account_data_by_room: Mapping[str, Mapping[str, JsonMapping]] = {}
         elif since_token and not sync_result_builder.full_state:
             account_data_by_room = (
                 await self.store.get_updated_room_account_data_for_user(
@@ -2349,8 +2354,8 @@ class SyncHandler:
         sync_result_builder: "SyncResultBuilder",
         room_builder: "RoomSyncResultBuilder",
         ephemeral: List[JsonDict],
-        tags: Optional[Mapping[str, Mapping[str, Any]]],
-        account_data: Mapping[str, JsonDict],
+        tags: Optional[Mapping[str, JsonMapping]],
+        account_data: Mapping[str, JsonMapping],
         always_include: bool = False,
     ) -> None:
         """Populates the `joined` and `archived` section of `sync_result_builder`
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 91898a5c13..9aaa88e229 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -39,7 +39,7 @@ from synapse.rest.admin._base import (
 from synapse.rest.client._base import client_patterns
 from synapse.storage.databases.main.registration import ExternalIDReuseException
 from synapse.storage.databases.main.stats import UserSortOrder
-from synapse.types import JsonDict, UserID
+from synapse.types import JsonDict, JsonMapping, UserID
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -211,7 +211,7 @@ class UserRestServletV2(RestServlet):
 
     async def on_GET(
         self, request: SynapseRequest, user_id: str
-    ) -> Tuple[int, JsonDict]:
+    ) -> Tuple[int, JsonMapping]:
         await assert_requester_is_admin(self.auth, request)
 
         target_user = UserID.from_string(user_id)
@@ -226,7 +226,7 @@ class UserRestServletV2(RestServlet):
 
     async def on_PUT(
         self, request: SynapseRequest, user_id: str
-    ) -> Tuple[int, JsonDict]:
+    ) -> Tuple[int, JsonMapping]:
         requester = await self.auth.get_user_by_req(request)
         await assert_user_is_admin(self.auth, requester)
 
@@ -658,7 +658,7 @@ class WhoisRestServlet(RestServlet):
 
     async def on_GET(
         self, request: SynapseRequest, user_id: str
-    ) -> Tuple[int, JsonDict]:
+    ) -> Tuple[int, JsonMapping]:
         target_user = UserID.from_string(user_id)
         requester = await self.auth.get_user_by_req(request)
 
diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py
index b1f9e9dc9b..ce0c4e7742 100644
--- a/synapse/rest/client/account_data.py
+++ b/synapse/rest/client/account_data.py
@@ -20,7 +20,7 @@ from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
-from synapse.types import JsonDict, RoomID
+from synapse.types import JsonDict, JsonMapping, RoomID
 
 from ._base import client_patterns
 
@@ -95,7 +95,7 @@ class AccountDataServlet(RestServlet):
 
     async def on_GET(
         self, request: SynapseRequest, user_id: str, account_data_type: str
-    ) -> Tuple[int, JsonDict]:
+    ) -> Tuple[int, JsonMapping]:
         requester = await self.auth.get_user_by_req(request)
         if user_id != requester.user.to_string():
             raise AuthError(403, "Cannot get account data for other users.")
@@ -106,7 +106,7 @@ class AccountDataServlet(RestServlet):
             and account_data_type == AccountDataTypes.PUSH_RULES
         ):
             account_data: Optional[
-                JsonDict
+                JsonMapping
             ] = await self._push_rules_handler.push_rules_for_user(requester.user)
         else:
             account_data = await self.store.get_global_account_data_by_type_for_user(
@@ -236,7 +236,7 @@ class RoomAccountDataServlet(RestServlet):
         user_id: str,
         room_id: str,
         account_data_type: str,
-    ) -> Tuple[int, JsonDict]:
+    ) -> Tuple[int, JsonMapping]:
         requester = await self.auth.get_user_by_req(request)
         if user_id != requester.user.to_string():
             raise AuthError(403, "Cannot get account data for other users.")
@@ -253,7 +253,7 @@ class RoomAccountDataServlet(RestServlet):
             self._hs.config.experimental.msc4010_push_rules_account_data
             and account_data_type == AccountDataTypes.PUSH_RULES
         ):
-            account_data: Optional[JsonDict] = {}
+            account_data: Optional[JsonMapping] = {}
         else:
             account_data = await self.store.get_account_data_for_room_and_type(
                 user_id, room_id, account_data_type
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 8f7bdbc61a..80f146dd53 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -43,7 +43,7 @@ from synapse.storage.util.id_generators import (
     MultiWriterIdGenerator,
     StreamIdGenerator,
 )
-from synapse.types import JsonDict
+from synapse.types import JsonDict, JsonMapping
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 from synapse.util.caches.stream_change_cache import StreamChangeCache
@@ -119,7 +119,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     @cached()
     async def get_global_account_data_for_user(
         self, user_id: str
-    ) -> Mapping[str, JsonDict]:
+    ) -> Mapping[str, JsonMapping]:
         """
         Get all the global client account_data for a user.
 
@@ -164,7 +164,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     @cached()
     async def get_room_account_data_for_user(
         self, user_id: str
-    ) -> Mapping[str, Mapping[str, JsonDict]]:
+    ) -> Mapping[str, Mapping[str, JsonMapping]]:
         """
         Get all of the per-room client account_data for a user.
 
@@ -213,7 +213,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     @cached(num_args=2, max_entries=5000, tree=True)
     async def get_global_account_data_by_type_for_user(
         self, user_id: str, data_type: str
-    ) -> Optional[JsonDict]:
+    ) -> Optional[JsonMapping]:
         """
         Returns:
             The account data.
@@ -265,7 +265,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     @cached(num_args=2, tree=True)
     async def get_account_data_for_room(
         self, user_id: str, room_id: str
-    ) -> Mapping[str, JsonDict]:
+    ) -> Mapping[str, JsonMapping]:
         """Get all the client account_data for a user for a room.
 
         Args:
@@ -296,7 +296,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
     @cached(num_args=3, max_entries=5000, tree=True)
     async def get_account_data_for_room_and_type(
         self, user_id: str, room_id: str, account_data_type: str
-    ) -> Optional[JsonDict]:
+    ) -> Optional[JsonMapping]:
         """Get the client account_data of given type for a user for a room.
 
         Args:
@@ -394,7 +394,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
 
     async def get_updated_global_account_data_for_user(
         self, user_id: str, stream_id: int
-    ) -> Dict[str, JsonDict]:
+    ) -> Mapping[str, JsonMapping]:
         """Get all the global account_data that's changed for a user.
 
         Args:
diff --git a/synapse/storage/databases/main/experimental_features.py b/synapse/storage/databases/main/experimental_features.py
index cf3226ae5a..654f924019 100644
--- a/synapse/storage/databases/main/experimental_features.py
+++ b/synapse/storage/databases/main/experimental_features.py
@@ -12,11 +12,10 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from typing import TYPE_CHECKING, Dict
+from typing import TYPE_CHECKING, Dict, FrozenSet
 
 from synapse.storage.database import DatabasePool, LoggingDatabaseConnection
 from synapse.storage.databases.main import CacheInvalidationWorkerStore
-from synapse.types import StrCollection
 from synapse.util.caches.descriptors import cached
 
 if TYPE_CHECKING:
@@ -34,7 +33,7 @@ class ExperimentalFeaturesStore(CacheInvalidationWorkerStore):
         super().__init__(database, db_conn, hs)
 
     @cached()
-    async def list_enabled_features(self, user_id: str) -> StrCollection:
+    async def list_enabled_features(self, user_id: str) -> FrozenSet[str]:
         """
         Checks to see what features are enabled for a given user
         Args:
@@ -49,7 +48,7 @@ class ExperimentalFeaturesStore(CacheInvalidationWorkerStore):
             ["feature"],
         )
 
-        return [feature["feature"] for feature in enabled]
+        return frozenset(feature["feature"] for feature in enabled)
 
     async def set_features_for_user(
         self,
diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
index c149a9eacb..61403a98cf 100644
--- a/synapse/storage/databases/main/tags.py
+++ b/synapse/storage/databases/main/tags.py
@@ -23,7 +23,7 @@ from synapse.storage._base import db_to_json
 from synapse.storage.database import LoggingTransaction
 from synapse.storage.databases.main.account_data import AccountDataWorkerStore
 from synapse.storage.util.id_generators import AbstractStreamIdGenerator
-from synapse.types import JsonDict
+from synapse.types import JsonDict, JsonMapping
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached
 
@@ -34,7 +34,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
     @cached()
     async def get_tags_for_user(
         self, user_id: str
-    ) -> Mapping[str, Mapping[str, JsonDict]]:
+    ) -> Mapping[str, Mapping[str, JsonMapping]]:
         """Get all the tags for a user.
 
 
@@ -109,7 +109,7 @@ class TagsWorkerStore(AccountDataWorkerStore):
 
     async def get_updated_tags(
         self, user_id: str, stream_id: int
-    ) -> Mapping[str, Mapping[str, JsonDict]]:
+    ) -> Mapping[str, Mapping[str, JsonMapping]]:
         """Get all the tags for the rooms where the tags have changed since the
         given version
 
-- 
cgit 1.5.1


From eef2b9e34418e902baab1e730eb805eb56034cc2 Mon Sep 17 00:00:00 2001
From: Hanadi <hanadi.tamimi@gmail.com>
Date: Mon, 18 Sep 2023 16:37:51 +0200
Subject: Filter locked users in the admin API (#16328)

Co-authored-by: Hanadi Tamimi <hanadi.tamimi@sdui.de>
---
 changelog.d/16328.feature                  |  1 +
 docs/admin_api/user_admin_api.md           | 17 ++++++++++++-----
 synapse/rest/admin/users.py                |  6 +++++-
 synapse/storage/databases/main/__init__.py |  7 ++++++-
 synapse/storage/databases/main/stats.py    |  1 +
 tests/rest/admin/test_user.py              | 26 ++++++++++++++++++++++++++
 6 files changed, 51 insertions(+), 7 deletions(-)
 create mode 100644 changelog.d/16328.feature

(limited to 'synapse')

diff --git a/changelog.d/16328.feature b/changelog.d/16328.feature
new file mode 100644
index 0000000000..9fadf766cc
--- /dev/null
+++ b/changelog.d/16328.feature
@@ -0,0 +1 @@
+Report whether a user is `locked` in the [List Accounts admin API](https://matrix-org.github.io/synapse/latest/admin_api/user_admin_api.html#list-accounts), and exclude locked users by default.
diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md
index 975a7a0da4..f83facabe4 100644
--- a/docs/admin_api/user_admin_api.md
+++ b/docs/admin_api/user_admin_api.md
@@ -54,7 +54,8 @@ It returns a JSON body like the following:
             "external_id": "<user_id_provider_2>"
         }
     ],
-    "user_type": null
+    "user_type": null,
+    "locked": false
 }
 ```
 
@@ -103,7 +104,8 @@ with a body of:
     ],
     "admin": false,
     "deactivated": false,
-    "user_type": null
+    "user_type": null,
+    "locked": false
 }
 ```
 
@@ -184,7 +186,8 @@ A response body like the following is returned:
             "shadow_banned": 0,
             "displayname": "<User One>",
             "avatar_url": null,
-            "creation_ts": 1560432668000
+            "creation_ts": 1560432668000,
+            "locked": false
         }, {
             "name": "<user_id2>",
             "is_guest": 0,
@@ -195,7 +198,8 @@ A response body like the following is returned:
             "shadow_banned": 0,
             "displayname": "<User Two>",
             "avatar_url": "<avatar_url>",
-            "creation_ts": 1561550621000
+            "creation_ts": 1561550621000,
+            "locked": false
         }
     ],
     "next_token": "100",
@@ -249,6 +253,8 @@ The following parameters should be set in the URL:
 - `not_user_type` - Exclude certain user types, such as bot users, from the request.
    Can be provided multiple times. Possible values are `bot`, `support` or "empty string".
    "empty string" here means to exclude users without a type.
+- `locked` - string representing a bool - Is optional and if `true` will **include** locked users.
+  Defaults to `false` to exclude locked users. Note: Introduced in v1.93.
 
 Caution. The database only has indexes on the columns `name` and `creation_ts`.
 This means that if a different sort order is used (`is_guest`, `admin`,
@@ -274,10 +280,11 @@ The following fields are returned in the JSON response body:
   - `avatar_url` - string -  The user's avatar URL if they have set one.
   - `creation_ts` - integer - The user's creation timestamp in ms.
   - `last_seen_ts` - integer - The user's last activity timestamp in ms.
-
+  - `locked` - bool - Status if that user has been marked as locked. Note: Introduced in v1.93.
 - `next_token`: string representing a positive integer - Indication for pagination. See above.
 - `total` - integer - Total number of media.
 
+*Added in Synapse 1.93:* the `locked` query parameter and response field.
 
 ## Query current sessions for a user
 
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 9aaa88e229..5b743a1d03 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -66,6 +66,7 @@ class UsersRestServletV2(RestServlet):
     The parameter `deactivated` can be used to include deactivated users.
     The parameter `order_by` can be used to order the result.
     The parameter `not_user_type` can be used to exclude certain user types.
+    The parameter `locked` can be used to include locked users.
     Possible values are `bot`, `support` or "empty string".
     "empty string" here means to exclude users without a type.
     """
@@ -107,8 +108,9 @@ class UsersRestServletV2(RestServlet):
                 "The guests parameter is not supported when MSC3861 is enabled.",
                 errcode=Codes.INVALID_PARAM,
             )
-        deactivated = parse_boolean(request, "deactivated", default=False)
 
+        deactivated = parse_boolean(request, "deactivated", default=False)
+        locked = parse_boolean(request, "locked", default=False)
         admins = parse_boolean(request, "admins")
 
         # If support for MSC3866 is not enabled, apply no filtering based on the
@@ -133,6 +135,7 @@ class UsersRestServletV2(RestServlet):
                 UserSortOrder.SHADOW_BANNED.value,
                 UserSortOrder.CREATION_TS.value,
                 UserSortOrder.LAST_SEEN_TS.value,
+                UserSortOrder.LOCKED.value,
             ),
         )
 
@@ -154,6 +157,7 @@ class UsersRestServletV2(RestServlet):
             direction,
             approved,
             not_user_types,
+            locked,
         )
 
         # If support for MSC3866 is not enabled, don't show the approval flag.
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 0836e247ef..101403578c 100644
--- a/synapse/storage/databases/main/__init__.py
+++ b/synapse/storage/databases/main/__init__.py
@@ -175,6 +175,7 @@ class DataStore(
         direction: Direction = Direction.FORWARDS,
         approved: bool = True,
         not_user_types: Optional[List[str]] = None,
+        locked: bool = False,
     ) -> Tuple[List[JsonDict], int]:
         """Function to retrieve a paginated list of users from
         users list. This will return a json list of users and the
@@ -194,6 +195,7 @@ class DataStore(
             direction: sort ascending or descending
             approved: whether to include approved users
             not_user_types: list of user types to exclude
+            locked: whether to include locked users
         Returns:
             A tuple of a list of mappings from user to information and a count of total users.
         """
@@ -226,6 +228,9 @@ class DataStore(
             if not deactivated:
                 filters.append("deactivated = 0")
 
+            if not locked:
+                filters.append("locked IS FALSE")
+
             if admins is not None:
                 if admins:
                     filters.append("admin = 1")
@@ -290,7 +295,7 @@ class DataStore(
             sql = f"""
                 SELECT name, user_type, is_guest, admin, deactivated, shadow_banned,
                 displayname, avatar_url, creation_ts * 1000 as creation_ts, approved,
-                eu.user_id is not null as erased, last_seen_ts
+                eu.user_id is not null as erased, last_seen_ts, locked
                 {sql_base}
                 ORDER BY {order_by_column} {order}, u.name ASC
                 LIMIT ? OFFSET ?
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 3a2966b9e4..9d403919e4 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -108,6 +108,7 @@ class UserSortOrder(Enum):
     SHADOW_BANNED = "shadow_banned"
     CREATION_TS = "creation_ts"
     LAST_SEEN_TS = "last_seen_ts"
+    LOCKED = "locked"
 
 
 class StatsStore(StateDeltasStore):
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index 761871b933..b326ad2c90 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -1146,6 +1146,32 @@ class UsersListTestCase(unittest.HomeserverTestCase):
         users = {user["name"]: user for user in channel.json_body["users"]}
         self.assertIs(users[user_id]["erased"], True)
 
+    def test_filter_locked(self) -> None:
+        # Create a new user.
+        user_id = self.register_user("lockme", "lockme")
+
+        # Lock them
+        self.get_success(self.store.set_user_locked_status(user_id, True))
+
+        # Locked user should appear in list users API
+        channel = self.make_request(
+            "GET",
+            self.url + "?locked=true",
+            access_token=self.admin_user_tok,
+        )
+        users = {user["name"]: user for user in channel.json_body["users"]}
+        self.assertIn(user_id, users)
+        self.assertTrue(users[user_id]["locked"])
+
+        # Locked user should not appear in list users API
+        channel = self.make_request(
+            "GET",
+            self.url + "?locked=false",
+            access_token=self.admin_user_tok,
+        )
+        users = {user["name"]: user for user in channel.json_body["users"]}
+        self.assertNotIn(user_id, users)
+
     def _order_test(
         self,
         expected_user_list: List[str],
-- 
cgit 1.5.1


From eee2b6642ddb28a6e0f850b958baa39eb74dc891 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 18 Sep 2023 11:30:43 -0400
Subject: Bump ruff from 0.0.286 to 0.0.290 (#16342)

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Patrick Cloke <patrickc@matrix.org>
---
 poetry.lock                                        | 38 +++++++++++-----------
 pyproject.toml                                     |  2 +-
 .../storage/databases/main/event_push_actions.py   |  5 +--
 synapse/storage/databases/main/events.py           | 10 +-----
 4 files changed, 22 insertions(+), 33 deletions(-)

(limited to 'synapse')

diff --git a/poetry.lock b/poetry.lock
index 17d0993a8b..8264e814b4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2332,28 +2332,28 @@ files = [
 
 [[package]]
 name = "ruff"
-version = "0.0.286"
+version = "0.0.290"
 description = "An extremely fast Python linter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.0.286-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:8e22cb557e7395893490e7f9cfea1073d19a5b1dd337f44fd81359b2767da4e9"},
-    {file = "ruff-0.0.286-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:68ed8c99c883ae79a9133cb1a86d7130feee0397fdf5ba385abf2d53e178d3fa"},
-    {file = "ruff-0.0.286-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8301f0bb4ec1a5b29cfaf15b83565136c47abefb771603241af9d6038f8981e8"},
-    {file = "ruff-0.0.286-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:acc4598f810bbc465ce0ed84417ac687e392c993a84c7eaf3abf97638701c1ec"},
-    {file = "ruff-0.0.286-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88c8e358b445eb66d47164fa38541cfcc267847d1e7a92dd186dddb1a0a9a17f"},
-    {file = "ruff-0.0.286-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:0433683d0c5dbcf6162a4beb2356e820a593243f1fa714072fec15e2e4f4c939"},
-    {file = "ruff-0.0.286-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddb61a0c4454cbe4623f4a07fef03c5ae921fe04fede8d15c6e36703c0a73b07"},
-    {file = "ruff-0.0.286-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:47549c7c0be24c8ae9f2bce6f1c49fbafea83bca80142d118306f08ec7414041"},
-    {file = "ruff-0.0.286-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:559aa793149ac23dc4310f94f2c83209eedb16908a0343663be19bec42233d25"},
-    {file = "ruff-0.0.286-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d73cfb1c3352e7aa0ce6fb2321f36fa1d4a2c48d2ceac694cb03611ddf0e4db6"},
-    {file = "ruff-0.0.286-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:3dad93b1f973c6d1db4b6a5da8690c5625a3fa32bdf38e543a6936e634b83dc3"},
-    {file = "ruff-0.0.286-py3-none-musllinux_1_2_i686.whl", hash = "sha256:26afc0851f4fc3738afcf30f5f8b8612a31ac3455cb76e611deea80f5c0bf3ce"},
-    {file = "ruff-0.0.286-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:9b6b116d1c4000de1b9bf027131dbc3b8a70507788f794c6b09509d28952c512"},
-    {file = "ruff-0.0.286-py3-none-win32.whl", hash = "sha256:556e965ac07c1e8c1c2d759ac512e526ecff62c00fde1a046acb088d3cbc1a6c"},
-    {file = "ruff-0.0.286-py3-none-win_amd64.whl", hash = "sha256:5d295c758961376c84aaa92d16e643d110be32add7465e197bfdaec5a431a107"},
-    {file = "ruff-0.0.286-py3-none-win_arm64.whl", hash = "sha256:1d6142d53ab7f164204b3133d053c4958d4d11ec3a39abf23a40b13b0784e3f0"},
-    {file = "ruff-0.0.286.tar.gz", hash = "sha256:f1e9d169cce81a384a26ee5bb8c919fe9ae88255f39a1a69fd1ebab233a85ed2"},
+    {file = "ruff-0.0.290-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:0e2b09ac4213b11a3520221083866a5816616f3ae9da123037b8ab275066fbac"},
+    {file = "ruff-0.0.290-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:4ca6285aa77b3d966be32c9a3cd531655b3d4a0171e1f9bf26d66d0372186767"},
+    {file = "ruff-0.0.290-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35e3550d1d9f2157b0fcc77670f7bb59154f223bff281766e61bdd1dd854e0c5"},
+    {file = "ruff-0.0.290-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d748c8bd97874f5751aed73e8dde379ce32d16338123d07c18b25c9a2796574a"},
+    {file = "ruff-0.0.290-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:982af5ec67cecd099e2ef5e238650407fb40d56304910102d054c109f390bf3c"},
+    {file = "ruff-0.0.290-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:bbd37352cea4ee007c48a44c9bc45a21f7ba70a57edfe46842e346651e2b995a"},
+    {file = "ruff-0.0.290-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d9be6351b7889462912e0b8185a260c0219c35dfd920fb490c7f256f1d8313e"},
+    {file = "ruff-0.0.290-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75cdc7fe32dcf33b7cec306707552dda54632ac29402775b9e212a3c16aad5e6"},
+    {file = "ruff-0.0.290-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb07f37f7aecdbbc91d759c0c09870ce0fb3eed4025eebedf9c4b98c69abd527"},
+    {file = "ruff-0.0.290-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2ab41bc0ba359d3f715fc7b705bdeef19c0461351306b70a4e247f836b9350ed"},
+    {file = "ruff-0.0.290-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:150bf8050214cea5b990945b66433bf9a5e0cef395c9bc0f50569e7de7540c86"},
+    {file = "ruff-0.0.290-py3-none-musllinux_1_2_i686.whl", hash = "sha256:75386ebc15fe5467248c039f5bf6a0cfe7bfc619ffbb8cd62406cd8811815fca"},
+    {file = "ruff-0.0.290-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ac93eadf07bc4ab4c48d8bb4e427bf0f58f3a9c578862eb85d99d704669f5da0"},
+    {file = "ruff-0.0.290-py3-none-win32.whl", hash = "sha256:461fbd1fb9ca806d4e3d5c745a30e185f7cf3ca77293cdc17abb2f2a990ad3f7"},
+    {file = "ruff-0.0.290-py3-none-win_amd64.whl", hash = "sha256:f1f49f5ec967fd5778813780b12a5650ab0ebcb9ddcca28d642c689b36920796"},
+    {file = "ruff-0.0.290-py3-none-win_arm64.whl", hash = "sha256:ae5a92dfbdf1f0c689433c223f8dac0782c2b2584bd502dfdbc76475669f1ba1"},
+    {file = "ruff-0.0.290.tar.gz", hash = "sha256:949fecbc5467bb11b8db810a7fa53c7e02633856ee6bd1302b2f43adcd71b88d"},
 ]
 
 [[package]]
@@ -3347,4 +3347,4 @@ user-search = ["pyicu"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8.0"
-content-hash = "4a3a82becd89b91e76e2bc2f8ba72123f665c517d9b841d9a34cd01b83a1adc3"
+content-hash = "104f108b3c966be05e17cf9975b4061942b354fe9a57cbf7372371fd56b1bf24"
diff --git a/pyproject.toml b/pyproject.toml
index 7f1e773159..de4dd61ea5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -320,7 +320,7 @@ all = [
 # This helps prevents merge conflicts when running a batch of dependabot updates.
 isort = ">=5.10.1"
 black = ">=22.7.0"
-ruff = "0.0.286"
+ruff = "0.0.290"
 
 # Typechecking
 lxml-stubs = ">=0.4.0"
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index b958a39aeb..ba99e63d26 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1599,10 +1599,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
             txn,
             table="event_push_summary",
             key_names=("user_id", "room_id", "thread_id"),
-            key_values=[
-                (user_id, room_id, thread_id)
-                for user_id, room_id, thread_id in summaries
-            ],
+            key_values=list(summaries),
             value_names=("notif_count", "unread_count", "stream_ordering"),
             value_values=[
                 (
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index bc8474a589..790d058c43 100644
--- a/synapse/storage/databases/main/events.py
+++ b/synapse/storage/databases/main/events.py
@@ -827,15 +827,7 @@ class PersistEventsStore:
                 "target_chain_id",
                 "target_sequence_number",
             ),
-            values=[
-                (source_id, source_seq, target_id, target_seq)
-                for (
-                    source_id,
-                    source_seq,
-                    target_id,
-                    target_seq,
-                ) in chain_links.get_additions()
-            ],
+            values=list(chain_links.get_additions()),
         )
 
     @staticmethod
-- 
cgit 1.5.1


From d7c89c5908f714aa6a142a89da08fafc597ffe0e Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 19 Sep 2023 15:26:44 -0400
Subject: Return immutable objects for cachedList decorators (#16350)

---
 changelog.d/16350.misc                             |  1 +
 synapse/appservice/__init__.py                     |  6 +++---
 synapse/appservice/api.py                          |  6 +++---
 synapse/appservice/scheduler.py                    | 18 ++++++++--------
 synapse/handlers/appservice.py                     |  9 ++++----
 synapse/handlers/e2e_keys.py                       | 24 ++++++++-------------
 synapse/handlers/initial_sync.py                   |  3 ++-
 synapse/handlers/receipts.py                       | 13 +++++------
 synapse/handlers/sync.py                           |  4 ++--
 synapse/handlers/typing.py                         | 17 ++++++++++-----
 synapse/push/bulk_push_rule_evaluator.py           |  2 +-
 synapse/storage/databases/main/appservice.py       |  6 +++---
 synapse/storage/databases/main/devices.py          | 23 ++++++++++++++------
 synapse/storage/databases/main/end_to_end_keys.py  | 25 +++++++++++-----------
 synapse/storage/databases/main/events_worker.py    |  5 +++--
 synapse/storage/databases/main/keys.py             |  6 +++---
 synapse/storage/databases/main/presence.py         | 14 ++++++++++--
 synapse/storage/databases/main/push_rule.py        |  2 +-
 synapse/storage/databases/main/receipts.py         | 14 ++++++------
 synapse/storage/databases/main/relations.py        |  6 +++---
 synapse/storage/databases/main/roommember.py       |  8 +++----
 synapse/storage/databases/main/state.py            | 14 ++++++++++--
 synapse/storage/databases/main/transactions.py     |  4 ++--
 .../storage/databases/main/user_erasure_store.py   |  4 ++--
 24 files changed, 134 insertions(+), 100 deletions(-)
 create mode 100644 changelog.d/16350.misc

(limited to 'synapse')

diff --git a/changelog.d/16350.misc b/changelog.d/16350.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/16350.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py
index 2260a8f589..6f4aa53c93 100644
--- a/synapse/appservice/__init__.py
+++ b/synapse/appservice/__init__.py
@@ -23,7 +23,7 @@ from netaddr import IPSet
 
 from synapse.api.constants import EventTypes
 from synapse.events import EventBase
-from synapse.types import DeviceListUpdates, JsonDict, UserID
+from synapse.types import DeviceListUpdates, JsonDict, JsonMapping, UserID
 from synapse.util.caches.descriptors import _CacheContext, cached
 
 if TYPE_CHECKING:
@@ -379,8 +379,8 @@ class AppServiceTransaction:
         service: ApplicationService,
         id: int,
         events: Sequence[EventBase],
-        ephemeral: List[JsonDict],
-        to_device_messages: List[JsonDict],
+        ephemeral: List[JsonMapping],
+        to_device_messages: List[JsonMapping],
         one_time_keys_count: TransactionOneTimeKeysCount,
         unused_fallback_keys: TransactionUnusedFallbackKeys,
         device_list_summary: DeviceListUpdates,
diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index b1523be208..c42e1f11aa 100644
--- a/synapse/appservice/api.py
+++ b/synapse/appservice/api.py
@@ -41,7 +41,7 @@ from synapse.events import EventBase
 from synapse.events.utils import SerializeEventConfig, serialize_event
 from synapse.http.client import SimpleHttpClient, is_unknown_endpoint
 from synapse.logging import opentracing
-from synapse.types import DeviceListUpdates, JsonDict, ThirdPartyInstanceID
+from synapse.types import DeviceListUpdates, JsonDict, JsonMapping, ThirdPartyInstanceID
 from synapse.util.caches.response_cache import ResponseCache
 
 if TYPE_CHECKING:
@@ -306,8 +306,8 @@ class ApplicationServiceApi(SimpleHttpClient):
         self,
         service: "ApplicationService",
         events: Sequence[EventBase],
-        ephemeral: List[JsonDict],
-        to_device_messages: List[JsonDict],
+        ephemeral: List[JsonMapping],
+        to_device_messages: List[JsonMapping],
         one_time_keys_count: TransactionOneTimeKeysCount,
         unused_fallback_keys: TransactionUnusedFallbackKeys,
         device_list_summary: DeviceListUpdates,
diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index 79f95f7653..18a30bc376 100644
--- a/synapse/appservice/scheduler.py
+++ b/synapse/appservice/scheduler.py
@@ -73,7 +73,7 @@ from synapse.events import EventBase
 from synapse.logging.context import run_in_background
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.databases.main import DataStore
-from synapse.types import DeviceListUpdates, JsonDict
+from synapse.types import DeviceListUpdates, JsonMapping
 from synapse.util import Clock
 
 if TYPE_CHECKING:
@@ -121,8 +121,8 @@ class ApplicationServiceScheduler:
         self,
         appservice: ApplicationService,
         events: Optional[Collection[EventBase]] = None,
-        ephemeral: Optional[Collection[JsonDict]] = None,
-        to_device_messages: Optional[Collection[JsonDict]] = None,
+        ephemeral: Optional[Collection[JsonMapping]] = None,
+        to_device_messages: Optional[Collection[JsonMapping]] = None,
         device_list_summary: Optional[DeviceListUpdates] = None,
     ) -> None:
         """
@@ -180,9 +180,9 @@ class _ServiceQueuer:
         # dict of {service_id: [events]}
         self.queued_events: Dict[str, List[EventBase]] = {}
         # dict of {service_id: [events]}
-        self.queued_ephemeral: Dict[str, List[JsonDict]] = {}
+        self.queued_ephemeral: Dict[str, List[JsonMapping]] = {}
         # dict of {service_id: [to_device_message_json]}
-        self.queued_to_device_messages: Dict[str, List[JsonDict]] = {}
+        self.queued_to_device_messages: Dict[str, List[JsonMapping]] = {}
         # dict of {service_id: [device_list_summary]}
         self.queued_device_list_summaries: Dict[str, List[DeviceListUpdates]] = {}
 
@@ -293,8 +293,8 @@ class _ServiceQueuer:
         self,
         service: ApplicationService,
         events: Iterable[EventBase],
-        ephemerals: Iterable[JsonDict],
-        to_device_messages: Iterable[JsonDict],
+        ephemerals: Iterable[JsonMapping],
+        to_device_messages: Iterable[JsonMapping],
     ) -> Tuple[TransactionOneTimeKeysCount, TransactionUnusedFallbackKeys]:
         """
         Given a list of the events, ephemeral messages and to-device messages,
@@ -364,8 +364,8 @@ class _TransactionController:
         self,
         service: ApplicationService,
         events: Sequence[EventBase],
-        ephemeral: Optional[List[JsonDict]] = None,
-        to_device_messages: Optional[List[JsonDict]] = None,
+        ephemeral: Optional[List[JsonMapping]] = None,
+        to_device_messages: Optional[List[JsonMapping]] = None,
         one_time_keys_count: Optional[TransactionOneTimeKeysCount] = None,
         unused_fallback_keys: Optional[TransactionUnusedFallbackKeys] = None,
         device_list_summary: Optional[DeviceListUpdates] = None,
diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py
index 6429545c98..7de7bd3289 100644
--- a/synapse/handlers/appservice.py
+++ b/synapse/handlers/appservice.py
@@ -46,6 +46,7 @@ from synapse.storage.databases.main.directory import RoomAliasMapping
 from synapse.types import (
     DeviceListUpdates,
     JsonDict,
+    JsonMapping,
     RoomAlias,
     RoomStreamToken,
     StreamKeyType,
@@ -397,7 +398,7 @@ class ApplicationServicesHandler:
 
     async def _handle_typing(
         self, service: ApplicationService, new_token: int
-    ) -> List[JsonDict]:
+    ) -> List[JsonMapping]:
         """
         Return the typing events since the given stream token that the given application
         service should receive.
@@ -432,7 +433,7 @@ class ApplicationServicesHandler:
 
     async def _handle_receipts(
         self, service: ApplicationService, new_token: int
-    ) -> List[JsonDict]:
+    ) -> List[JsonMapping]:
         """
         Return the latest read receipts that the given application service should receive.
 
@@ -471,7 +472,7 @@ class ApplicationServicesHandler:
         service: ApplicationService,
         users: Collection[Union[str, UserID]],
         new_token: Optional[int],
-    ) -> List[JsonDict]:
+    ) -> List[JsonMapping]:
         """
         Return the latest presence updates that the given application service should receive.
 
@@ -491,7 +492,7 @@ class ApplicationServicesHandler:
             A list of json dictionaries containing data derived from the presence events
             that should be sent to the given application service.
         """
-        events: List[JsonDict] = []
+        events: List[JsonMapping] = []
         presence_source = self.event_sources.sources.presence
         from_key = await self.store.get_type_stream_id_for_appservice(
             service, "presence"
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index ad075497c8..8c6432035d 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Tuple
+from typing import TYPE_CHECKING, Dict, Iterable, List, Mapping, Optional, Tuple
 
 import attr
 from canonicaljson import encode_canonical_json
@@ -31,6 +31,7 @@ from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.logging.opentracing import log_kv, set_tag, tag_args, trace
 from synapse.types import (
     JsonDict,
+    JsonMapping,
     UserID,
     get_domain_from_id,
     get_verify_key_from_cross_signing_key,
@@ -272,11 +273,7 @@ class E2eKeysHandler:
                 delay_cancellation=True,
             )
 
-            ret = {"device_keys": results, "failures": failures}
-
-            ret.update(cross_signing_keys)
-
-            return ret
+            return {"device_keys": results, "failures": failures, **cross_signing_keys}
 
     @trace
     async def _query_devices_for_destination(
@@ -408,7 +405,7 @@ class E2eKeysHandler:
     @cancellable
     async def get_cross_signing_keys_from_cache(
         self, query: Iterable[str], from_user_id: Optional[str]
-    ) -> Dict[str, Dict[str, dict]]:
+    ) -> Dict[str, Dict[str, JsonMapping]]:
         """Get cross-signing keys for users from the database
 
         Args:
@@ -551,16 +548,13 @@ class E2eKeysHandler:
                 self.config.federation.allow_device_name_lookup_over_federation
             ),
         )
-        ret = {"device_keys": res}
 
         # add in the cross-signing keys
         cross_signing_keys = await self.get_cross_signing_keys_from_cache(
             device_keys_query, None
         )
 
-        ret.update(cross_signing_keys)
-
-        return ret
+        return {"device_keys": res, **cross_signing_keys}
 
     async def claim_local_one_time_keys(
         self,
@@ -1127,7 +1121,7 @@ class E2eKeysHandler:
         user_id: str,
         master_key_id: str,
         signed_master_key: JsonDict,
-        stored_master_key: JsonDict,
+        stored_master_key: JsonMapping,
         devices: Dict[str, Dict[str, JsonDict]],
     ) -> List["SignatureListItem"]:
         """Check signatures of a user's master key made by their devices.
@@ -1278,7 +1272,7 @@ class E2eKeysHandler:
 
     async def _get_e2e_cross_signing_verify_key(
         self, user_id: str, key_type: str, from_user_id: Optional[str] = None
-    ) -> Tuple[JsonDict, str, VerifyKey]:
+    ) -> Tuple[JsonMapping, str, VerifyKey]:
         """Fetch locally or remotely query for a cross-signing public key.
 
         First, attempt to fetch the cross-signing public key from storage.
@@ -1333,7 +1327,7 @@ class E2eKeysHandler:
         self,
         user: UserID,
         desired_key_type: str,
-    ) -> Optional[Tuple[Dict[str, Any], str, VerifyKey]]:
+    ) -> Optional[Tuple[JsonMapping, str, VerifyKey]]:
         """Queries cross-signing keys for a remote user and saves them to the database
 
         Only the key specified by `key_type` will be returned, while all retrieved keys
@@ -1474,7 +1468,7 @@ def _check_device_signature(
     user_id: str,
     verify_key: VerifyKey,
     signed_device: JsonDict,
-    stored_device: JsonDict,
+    stored_device: JsonMapping,
 ) -> None:
     """Check that a signature on a device or cross-signing key is correct and
     matches the copy of the device/key that we have stored.  Throws an
diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index 5dc76ef588..5737f8014d 100644
--- a/synapse/handlers/initial_sync.py
+++ b/synapse/handlers/initial_sync.py
@@ -32,6 +32,7 @@ from synapse.storage.roommember import RoomsForUser
 from synapse.streams.config import PaginationConfig
 from synapse.types import (
     JsonDict,
+    JsonMapping,
     Requester,
     RoomStreamToken,
     StreamKeyType,
@@ -454,7 +455,7 @@ class InitialSyncHandler:
                 for s in states
             ]
 
-        async def get_receipts() -> List[JsonDict]:
+        async def get_receipts() -> List[JsonMapping]:
             receipts = await self.store.get_linearized_receipts_for_room(
                 room_id, to_key=now_token.receipt_key
             )
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index c7edada353..a7a29b758b 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -19,6 +19,7 @@ from synapse.appservice import ApplicationService
 from synapse.streams import EventSource
 from synapse.types import (
     JsonDict,
+    JsonMapping,
     ReadReceipt,
     StreamKeyType,
     UserID,
@@ -204,15 +205,15 @@ class ReceiptsHandler:
             await self.federation_sender.send_read_receipt(receipt)
 
 
-class ReceiptEventSource(EventSource[int, JsonDict]):
+class ReceiptEventSource(EventSource[int, JsonMapping]):
     def __init__(self, hs: "HomeServer"):
         self.store = hs.get_datastores().main
         self.config = hs.config
 
     @staticmethod
     def filter_out_private_receipts(
-        rooms: Sequence[JsonDict], user_id: str
-    ) -> List[JsonDict]:
+        rooms: Sequence[JsonMapping], user_id: str
+    ) -> List[JsonMapping]:
         """
         Filters a list of serialized receipts (as returned by /sync and /initialSync)
         and removes private read receipts of other users.
@@ -229,7 +230,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]):
             The same as rooms, but filtered.
         """
 
-        result = []
+        result: List[JsonMapping] = []
 
         # Iterate through each room's receipt content.
         for room in rooms:
@@ -282,7 +283,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]):
         room_ids: Iterable[str],
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
-    ) -> Tuple[List[JsonDict], int]:
+    ) -> Tuple[List[JsonMapping], int]:
         from_key = int(from_key)
         to_key = self.get_current_key()
 
@@ -301,7 +302,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]):
 
     async def get_new_events_as(
         self, from_key: int, to_key: int, service: ApplicationService
-    ) -> Tuple[List[JsonDict], int]:
+    ) -> Tuple[List[JsonMapping], int]:
         """Returns a set of new read receipt events that an appservice
         may be interested in.
 
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 1a4d394eda..7bd42f635f 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -235,7 +235,7 @@ class SyncResult:
     archived: List[ArchivedSyncResult]
     to_device: List[JsonDict]
     device_lists: DeviceListUpdates
-    device_one_time_keys_count: JsonDict
+    device_one_time_keys_count: JsonMapping
     device_unused_fallback_key_types: List[str]
 
     def __bool__(self) -> bool:
@@ -1558,7 +1558,7 @@ class SyncHandler:
 
         logger.debug("Fetching OTK data")
         device_id = sync_config.device_id
-        one_time_keys_count: JsonDict = {}
+        one_time_keys_count: JsonMapping = {}
         unused_fallback_key_types: List[str] = []
         if device_id:
             # TODO: We should have a way to let clients differentiate between the states of:
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index 4b4227003d..bdefa7f26f 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -26,7 +26,14 @@ from synapse.metrics.background_process_metrics import (
 )
 from synapse.replication.tcp.streams import TypingStream
 from synapse.streams import EventSource
-from synapse.types import JsonDict, Requester, StrCollection, StreamKeyType, UserID
+from synapse.types import (
+    JsonDict,
+    JsonMapping,
+    Requester,
+    StrCollection,
+    StreamKeyType,
+    UserID,
+)
 from synapse.util.caches.stream_change_cache import StreamChangeCache
 from synapse.util.metrics import Measure
 from synapse.util.retryutils import filter_destinations_by_retry_limiter
@@ -487,7 +494,7 @@ class TypingWriterHandler(FollowerTypingHandler):
         raise Exception("Typing writer instance got typing info over replication")
 
 
-class TypingNotificationEventSource(EventSource[int, JsonDict]):
+class TypingNotificationEventSource(EventSource[int, JsonMapping]):
     def __init__(self, hs: "HomeServer"):
         self._main_store = hs.get_datastores().main
         self.clock = hs.get_clock()
@@ -497,7 +504,7 @@ class TypingNotificationEventSource(EventSource[int, JsonDict]):
         #
         self.get_typing_handler = hs.get_typing_handler
 
-    def _make_event_for(self, room_id: str) -> JsonDict:
+    def _make_event_for(self, room_id: str) -> JsonMapping:
         typing = self.get_typing_handler()._room_typing[room_id]
         return {
             "type": EduTypes.TYPING,
@@ -507,7 +514,7 @@ class TypingNotificationEventSource(EventSource[int, JsonDict]):
 
     async def get_new_events_as(
         self, from_key: int, service: ApplicationService
-    ) -> Tuple[List[JsonDict], int]:
+    ) -> Tuple[List[JsonMapping], int]:
         """Returns a set of new typing events that an appservice
         may be interested in.
 
@@ -551,7 +558,7 @@ class TypingNotificationEventSource(EventSource[int, JsonDict]):
         room_ids: Iterable[str],
         is_guest: bool,
         explicit_room_id: Optional[str] = None,
-    ) -> Tuple[List[JsonDict], int]:
+    ) -> Tuple[List[JsonMapping], int]:
         with Measure(self.clock, "typing.get_new_events"):
             from_key = int(from_key)
             handler = self.get_typing_handler()
diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py
index 554634579e..14784312dc 100644
--- a/synapse/push/bulk_push_rule_evaluator.py
+++ b/synapse/push/bulk_push_rule_evaluator.py
@@ -131,7 +131,7 @@ class BulkPushRuleEvaluator:
     async def _get_rules_for_event(
         self,
         event: EventBase,
-    ) -> Dict[str, FilteredPushRules]:
+    ) -> Mapping[str, FilteredPushRules]:
         """Get the push rules for all users who may need to be notified about
         the event.
 
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
index 484db175d0..0553a0621a 100644
--- a/synapse/storage/databases/main/appservice.py
+++ b/synapse/storage/databases/main/appservice.py
@@ -45,7 +45,7 @@ from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
 from synapse.storage.types import Cursor
 from synapse.storage.util.sequence import build_sequence_generator
-from synapse.types import DeviceListUpdates, JsonDict
+from synapse.types import DeviceListUpdates, JsonMapping
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import _CacheContext, cached
 
@@ -268,8 +268,8 @@ class ApplicationServiceTransactionWorkerStore(
         self,
         service: ApplicationService,
         events: Sequence[EventBase],
-        ephemeral: List[JsonDict],
-        to_device_messages: List[JsonDict],
+        ephemeral: List[JsonMapping],
+        to_device_messages: List[JsonMapping],
         one_time_keys_count: TransactionOneTimeKeysCount,
         unused_fallback_keys: TransactionUnusedFallbackKeys,
         device_list_summary: DeviceListUpdates,
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index 70faf4b1ec..df596f35f9 100644
--- a/synapse/storage/databases/main/devices.py
+++ b/synapse/storage/databases/main/devices.py
@@ -55,7 +55,12 @@ from synapse.storage.util.id_generators import (
     AbstractStreamIdGenerator,
     StreamIdGenerator,
 )
-from synapse.types import JsonDict, StrCollection, get_verify_key_from_cross_signing_key
+from synapse.types import (
+    JsonDict,
+    JsonMapping,
+    StrCollection,
+    get_verify_key_from_cross_signing_key,
+)
 from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.lrucache import LruCache
@@ -746,7 +751,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     @cancellable
     async def get_user_devices_from_cache(
         self, user_ids: Set[str], user_and_device_ids: List[Tuple[str, str]]
-    ) -> Tuple[Set[str], Dict[str, Mapping[str, JsonDict]]]:
+    ) -> Tuple[Set[str], Dict[str, Mapping[str, JsonMapping]]]:
         """Get the devices (and keys if any) for remote users from the cache.
 
         Args:
@@ -766,13 +771,13 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         user_ids_not_in_cache = unique_user_ids - user_ids_in_cache
 
         # First fetch all the users which all devices are to be returned.
-        results: Dict[str, Mapping[str, JsonDict]] = {}
+        results: Dict[str, Mapping[str, JsonMapping]] = {}
         for user_id in user_ids:
             if user_id in user_ids_in_cache:
                 results[user_id] = await self.get_cached_devices_for_user(user_id)
         # Then fetch all device-specific requests, but skip users we've already
         # fetched all devices for.
-        device_specific_results: Dict[str, Dict[str, JsonDict]] = {}
+        device_specific_results: Dict[str, Dict[str, JsonMapping]] = {}
         for user_id, device_id in user_and_device_ids:
             if user_id in user_ids_in_cache and user_id not in user_ids:
                 device = await self._get_cached_user_device(user_id, device_id)
@@ -801,7 +806,9 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         return user_ids_in_cache
 
     @cached(num_args=2, tree=True)
-    async def _get_cached_user_device(self, user_id: str, device_id: str) -> JsonDict:
+    async def _get_cached_user_device(
+        self, user_id: str, device_id: str
+    ) -> JsonMapping:
         content = await self.db_pool.simple_select_one_onecol(
             table="device_lists_remote_cache",
             keyvalues={"user_id": user_id, "device_id": device_id},
@@ -811,7 +818,9 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
         return db_to_json(content)
 
     @cached()
-    async def get_cached_devices_for_user(self, user_id: str) -> Mapping[str, JsonDict]:
+    async def get_cached_devices_for_user(
+        self, user_id: str
+    ) -> Mapping[str, JsonMapping]:
         devices = await self.db_pool.simple_select_list(
             table="device_lists_remote_cache",
             keyvalues={"user_id": user_id},
@@ -1042,7 +1051,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore):
     )
     async def get_device_list_last_stream_id_for_remotes(
         self, user_ids: Iterable[str]
-    ) -> Dict[str, Optional[str]]:
+    ) -> Mapping[str, Optional[str]]:
         rows = await self.db_pool.simple_select_many_batch(
             table="device_lists_remote_extremeties",
             column="user_id",
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
index b49dea577c..89fac23f93 100644
--- a/synapse/storage/databases/main/end_to_end_keys.py
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -52,7 +52,7 @@ from synapse.storage.database import (
 from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.util.id_generators import StreamIdGenerator
-from synapse.types import JsonDict
+from synapse.types import JsonDict, JsonMapping
 from synapse.util import json_decoder, json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.cancellation import cancellable
@@ -125,7 +125,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
     async def get_e2e_device_keys_for_federation_query(
         self, user_id: str
-    ) -> Tuple[int, List[JsonDict]]:
+    ) -> Tuple[int, Sequence[JsonMapping]]:
         """Get all devices (with any device keys) for a user
 
         Returns:
@@ -174,7 +174,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     @cached(iterable=True)
     async def _get_e2e_device_keys_for_federation_query_inner(
         self, user_id: str
-    ) -> List[JsonDict]:
+    ) -> Sequence[JsonMapping]:
         """Get all devices (with any device keys) for a user"""
 
         devices = await self.get_e2e_device_keys_and_signatures([(user_id, None)])
@@ -578,7 +578,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     @cached(max_entries=10000)
     async def count_e2e_one_time_keys(
         self, user_id: str, device_id: str
-    ) -> Dict[str, int]:
+    ) -> Mapping[str, int]:
         """Count the number of one time keys the server has for a device
         Returns:
             A mapping from algorithm to number of keys for that algorithm.
@@ -812,7 +812,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
     async def get_e2e_cross_signing_key(
         self, user_id: str, key_type: str, from_user_id: Optional[str] = None
-    ) -> Optional[JsonDict]:
+    ) -> Optional[JsonMapping]:
         """Returns a user's cross-signing key.
 
         Args:
@@ -833,7 +833,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
         return user_keys.get(key_type)
 
     @cached(num_args=1)
-    def _get_bare_e2e_cross_signing_keys(self, user_id: str) -> Mapping[str, JsonDict]:
+    def _get_bare_e2e_cross_signing_keys(
+        self, user_id: str
+    ) -> Mapping[str, JsonMapping]:
         """Dummy function.  Only used to make a cache for
         _get_bare_e2e_cross_signing_keys_bulk.
         """
@@ -846,7 +848,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     )
     async def _get_bare_e2e_cross_signing_keys_bulk(
         self, user_ids: Iterable[str]
-    ) -> Dict[str, Optional[Mapping[str, JsonDict]]]:
+    ) -> Mapping[str, Optional[Mapping[str, JsonMapping]]]:
         """Returns the cross-signing keys for a set of users.  The output of this
         function should be passed to _get_e2e_cross_signing_signatures_txn if
         the signatures for the calling user need to be fetched.
@@ -860,15 +862,12 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
             their user ID will map to None.
 
         """
-        result = await self.db_pool.runInteraction(
+        return await self.db_pool.runInteraction(
             "get_bare_e2e_cross_signing_keys_bulk",
             self._get_bare_e2e_cross_signing_keys_bulk_txn,
             user_ids,
         )
 
-        # The `Optional` comes from the `@cachedList` decorator.
-        return cast(Dict[str, Optional[Mapping[str, JsonDict]]], result)
-
     def _get_bare_e2e_cross_signing_keys_bulk_txn(
         self,
         txn: LoggingTransaction,
@@ -1026,7 +1025,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
     @cancellable
     async def get_e2e_cross_signing_keys_bulk(
         self, user_ids: List[str], from_user_id: Optional[str] = None
-    ) -> Dict[str, Optional[Mapping[str, JsonDict]]]:
+    ) -> Mapping[str, Optional[Mapping[str, JsonMapping]]]:
         """Returns the cross-signing keys for a set of users.
 
         Args:
@@ -1043,7 +1042,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker
 
         if from_user_id:
             result = cast(
-                Dict[str, Optional[Mapping[str, JsonDict]]],
+                Dict[str, Optional[Mapping[str, JsonMapping]]],
                 await self.db_pool.runInteraction(
                     "get_e2e_cross_signing_signatures",
                     self._get_e2e_cross_signing_signatures_txn,
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 1eb313040e..b788d70fc5 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -24,6 +24,7 @@ from typing import (
     Dict,
     Iterable,
     List,
+    Mapping,
     MutableMapping,
     Optional,
     Set,
@@ -1633,7 +1634,7 @@ class EventsWorkerStore(SQLBaseStore):
         self,
         room_id: str,
         event_ids: Collection[str],
-    ) -> Dict[str, bool]:
+    ) -> Mapping[str, bool]:
         """Helper for have_seen_events
 
         Returns:
@@ -2325,7 +2326,7 @@ class EventsWorkerStore(SQLBaseStore):
     @cachedList(cached_method_name="is_partial_state_event", list_name="event_ids")
     async def get_partial_state_events(
         self, event_ids: Collection[str]
-    ) -> Dict[str, bool]:
+    ) -> Mapping[str, bool]:
         """Checks which of the given events have partial state
 
         Args:
diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
index 41563371dc..889c578b9c 100644
--- a/synapse/storage/databases/main/keys.py
+++ b/synapse/storage/databases/main/keys.py
@@ -16,7 +16,7 @@
 import itertools
 import json
 import logging
-from typing import Dict, Iterable, Optional, Tuple
+from typing import Dict, Iterable, Mapping, Optional, Tuple
 
 from canonicaljson import encode_canonical_json
 from signedjson.key import decode_verify_key_bytes
@@ -130,7 +130,7 @@ class KeyStore(CacheInvalidationWorkerStore):
     )
     async def get_server_keys_json(
         self, server_name_and_key_ids: Iterable[Tuple[str, str]]
-    ) -> Dict[Tuple[str, str], FetchKeyResult]:
+    ) -> Mapping[Tuple[str, str], FetchKeyResult]:
         """
         Args:
             server_name_and_key_ids:
@@ -200,7 +200,7 @@ class KeyStore(CacheInvalidationWorkerStore):
     )
     async def get_server_keys_json_for_remote(
         self, server_name: str, key_ids: Iterable[str]
-    ) -> Dict[str, Optional[FetchKeyResultForRemote]]:
+    ) -> Mapping[str, Optional[FetchKeyResultForRemote]]:
         """Fetch the cached keys for the given server/key IDs.
 
         If we have multiple entries for a given key ID, returns the most recent.
diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py
index b51d20ac26..194b4e031f 100644
--- a/synapse/storage/databases/main/presence.py
+++ b/synapse/storage/databases/main/presence.py
@@ -11,7 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    cast,
+)
 
 from synapse.api.presence import PresenceState, UserPresenceState
 from synapse.replication.tcp.streams import PresenceStream
@@ -249,7 +259,7 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore)
     )
     async def get_presence_for_users(
         self, user_ids: Iterable[str]
-    ) -> Dict[str, UserPresenceState]:
+    ) -> Mapping[str, UserPresenceState]:
         rows = await self.db_pool.simple_select_many_batch(
             table="presence_stream",
             column="user_id",
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index bec0dc2afe..af69944008 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -216,7 +216,7 @@ class PushRulesWorkerStore(
     @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids")
     async def bulk_get_push_rules(
         self, user_ids: Collection[str]
-    ) -> Dict[str, FilteredPushRules]:
+    ) -> Mapping[str, FilteredPushRules]:
         if not user_ids:
             return {}
 
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index a074c43989..0231f9407b 100644
--- a/synapse/storage/databases/main/receipts.py
+++ b/synapse/storage/databases/main/receipts.py
@@ -43,7 +43,7 @@ from synapse.storage.util.id_generators import (
     MultiWriterIdGenerator,
     StreamIdGenerator,
 )
-from synapse.types import JsonDict
+from synapse.types import JsonDict, JsonMapping
 from synapse.util import json_encoder
 from synapse.util.caches.descriptors import cached, cachedList
 from synapse.util.caches.stream_change_cache import StreamChangeCache
@@ -218,7 +218,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
     @cached()
     async def _get_receipts_for_user_with_orderings(
         self, user_id: str, receipt_type: str
-    ) -> JsonDict:
+    ) -> JsonMapping:
         """
         Fetch receipts for all rooms that the given user is joined to.
 
@@ -258,7 +258,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
     async def get_linearized_receipts_for_rooms(
         self, room_ids: Iterable[str], to_key: int, from_key: Optional[int] = None
-    ) -> List[dict]:
+    ) -> List[JsonMapping]:
         """Get receipts for multiple rooms for sending to clients.
 
         Args:
@@ -287,7 +287,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
 
     async def get_linearized_receipts_for_room(
         self, room_id: str, to_key: int, from_key: Optional[int] = None
-    ) -> Sequence[JsonDict]:
+    ) -> Sequence[JsonMapping]:
         """Get receipts for a single room for sending to clients.
 
         Args:
@@ -310,7 +310,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
     @cached(tree=True)
     async def _get_linearized_receipts_for_room(
         self, room_id: str, to_key: int, from_key: Optional[int] = None
-    ) -> Sequence[JsonDict]:
+    ) -> Sequence[JsonMapping]:
         """See get_linearized_receipts_for_room"""
 
         def f(txn: LoggingTransaction) -> List[Dict[str, Any]]:
@@ -353,7 +353,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
     )
     async def _get_linearized_receipts_for_rooms(
         self, room_ids: Collection[str], to_key: int, from_key: Optional[int] = None
-    ) -> Dict[str, Sequence[JsonDict]]:
+    ) -> Mapping[str, Sequence[JsonMapping]]:
         if not room_ids:
             return {}
 
@@ -415,7 +415,7 @@ class ReceiptsWorkerStore(SQLBaseStore):
     )
     async def get_linearized_receipts_for_all_rooms(
         self, to_key: int, from_key: Optional[int] = None
-    ) -> Mapping[str, JsonDict]:
+    ) -> Mapping[str, JsonMapping]:
         """Get receipts for all rooms between two stream_ids, up
         to a limit of the latest 100 read receipts.
 
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 96908f14ba..6ba9c9651f 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -519,7 +519,7 @@ class RelationsWorkerStore(SQLBaseStore):
     @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids")
     async def get_applicable_edits(
         self, event_ids: Collection[str]
-    ) -> Dict[str, Optional[EventBase]]:
+    ) -> Mapping[str, Optional[EventBase]]:
         """Get the most recent edit (if any) that has happened for the given
         events.
 
@@ -605,7 +605,7 @@ class RelationsWorkerStore(SQLBaseStore):
     @cachedList(cached_method_name="get_thread_summary", list_name="event_ids")
     async def get_thread_summaries(
         self, event_ids: Collection[str]
-    ) -> Dict[str, Optional[Tuple[int, EventBase]]]:
+    ) -> Mapping[str, Optional[Tuple[int, EventBase]]]:
         """Get the number of threaded replies and the latest reply (if any) for the given events.
 
         Args:
@@ -779,7 +779,7 @@ class RelationsWorkerStore(SQLBaseStore):
     @cachedList(cached_method_name="get_thread_participated", list_name="event_ids")
     async def get_threads_participated(
         self, event_ids: Collection[str], user_id: str
-    ) -> Dict[str, bool]:
+    ) -> Mapping[str, bool]:
         """Get whether the requesting user participated in the given threads.
 
         This is separate from get_thread_summaries since that can be cached across
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index fff259f74c..7b503dd697 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -191,7 +191,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
     )
     async def get_subset_users_in_room_with_profiles(
         self, room_id: str, user_ids: Collection[str]
-    ) -> Dict[str, ProfileInfo]:
+    ) -> Mapping[str, ProfileInfo]:
         """Get a mapping from user ID to profile information for a list of users
         in a given room.
 
@@ -676,7 +676,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
     )
     async def _get_rooms_for_users(
         self, user_ids: Collection[str]
-    ) -> Dict[str, FrozenSet[str]]:
+    ) -> Mapping[str, FrozenSet[str]]:
         """A batched version of `get_rooms_for_user`.
 
         Returns:
@@ -881,7 +881,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
     )
     async def _get_user_ids_from_membership_event_ids(
         self, event_ids: Iterable[str]
-    ) -> Dict[str, Optional[str]]:
+    ) -> Mapping[str, Optional[str]]:
         """For given set of member event_ids check if they point to a join
         event.
 
@@ -1191,7 +1191,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
     )
     async def get_membership_from_event_ids(
         self, member_event_ids: Iterable[str]
-    ) -> Dict[str, Optional[EventIdMembership]]:
+    ) -> Mapping[str, Optional[EventIdMembership]]:
         """Get user_id and membership of a set of event IDs.
 
         Returns:
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
index ebb2ae964f..5eaaff5b68 100644
--- a/synapse/storage/databases/main/state.py
+++ b/synapse/storage/databases/main/state.py
@@ -14,7 +14,17 @@
 # limitations under the License.
 import collections.abc
 import logging
-from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, Optional, Set, Tuple
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Collection,
+    Dict,
+    Iterable,
+    Mapping,
+    Optional,
+    Set,
+    Tuple,
+)
 
 import attr
 
@@ -372,7 +382,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
     )
     async def _get_state_group_for_events(
         self, event_ids: Collection[str]
-    ) -> Dict[str, int]:
+    ) -> Mapping[str, int]:
         """Returns mapping event_id -> state_group.
 
         Raises:
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
index efd21b5bfc..8f70eff809 100644
--- a/synapse/storage/databases/main/transactions.py
+++ b/synapse/storage/databases/main/transactions.py
@@ -14,7 +14,7 @@
 
 import logging
 from enum import Enum
-from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Iterable, List, Mapping, Optional, Tuple, cast
 
 import attr
 from canonicaljson import encode_canonical_json
@@ -210,7 +210,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
     )
     async def get_destination_retry_timings_batch(
         self, destinations: StrCollection
-    ) -> Dict[str, Optional[DestinationRetryTimings]]:
+    ) -> Mapping[str, Optional[DestinationRetryTimings]]:
         rows = await self.db_pool.simple_select_many_batch(
             table="destinations",
             iterable=destinations,
diff --git a/synapse/storage/databases/main/user_erasure_store.py b/synapse/storage/databases/main/user_erasure_store.py
index f79006533f..06fcbe5e54 100644
--- a/synapse/storage/databases/main/user_erasure_store.py
+++ b/synapse/storage/databases/main/user_erasure_store.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, Iterable
+from typing import Iterable, Mapping
 
 from synapse.storage.database import LoggingTransaction
 from synapse.storage.databases.main import CacheInvalidationWorkerStore
@@ -40,7 +40,7 @@ class UserErasureWorkerStore(CacheInvalidationWorkerStore):
         return bool(result)
 
     @cachedList(cached_method_name="is_user_erased", list_name="user_ids")
-    async def are_users_erased(self, user_ids: Iterable[str]) -> Dict[str, bool]:
+    async def are_users_erased(self, user_ids: Iterable[str]) -> Mapping[str, bool]:
         """
         Checks which users in a list have requested erasure
 
-- 
cgit 1.5.1


From 7ec0a141b4bdda0fa67cb1f2af7f321b9963f0b8 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Wed, 20 Sep 2023 07:48:55 -0400
Subject: Convert more cached return values to immutable types (#16356)

---
 changelog.d/16356.misc                       |  1 +
 synapse/api/filtering.py                     |  8 +++----
 synapse/federation/federation_client.py      |  4 ++--
 synapse/handlers/federation_event.py         |  2 +-
 synapse/handlers/relations.py                | 14 +++++++++--
 synapse/rest/client/filter.py                |  4 ++--
 synapse/storage/controllers/state.py         |  2 +-
 synapse/storage/databases/main/filtering.py  |  4 ++--
 synapse/storage/databases/main/relations.py  |  4 ++--
 synapse/storage/databases/main/roommember.py | 10 ++++----
 tests/util/caches/test_descriptors.py        | 35 +++++++++++++++-------------
 11 files changed, 52 insertions(+), 36 deletions(-)
 create mode 100644 changelog.d/16356.misc

(limited to 'synapse')

diff --git a/changelog.d/16356.misc b/changelog.d/16356.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/16356.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py
index 0995ecbe83..74ee8e9f3f 100644
--- a/synapse/api/filtering.py
+++ b/synapse/api/filtering.py
@@ -37,7 +37,7 @@ from synapse.api.constants import EduTypes, EventContentFields
 from synapse.api.errors import SynapseError
 from synapse.api.presence import UserPresenceState
 from synapse.events import EventBase, relation_from_event
-from synapse.types import JsonDict, RoomID, UserID
+from synapse.types import JsonDict, JsonMapping, RoomID, UserID
 
 if TYPE_CHECKING:
     from synapse.server import HomeServer
@@ -191,7 +191,7 @@ FilterEvent = TypeVar("FilterEvent", EventBase, UserPresenceState, JsonDict)
 
 
 class FilterCollection:
-    def __init__(self, hs: "HomeServer", filter_json: JsonDict):
+    def __init__(self, hs: "HomeServer", filter_json: JsonMapping):
         self._filter_json = filter_json
 
         room_filter_json = self._filter_json.get("room", {})
@@ -219,7 +219,7 @@ class FilterCollection:
     def __repr__(self) -> str:
         return "<FilterCollection %s>" % (json.dumps(self._filter_json),)
 
-    def get_filter_json(self) -> JsonDict:
+    def get_filter_json(self) -> JsonMapping:
         return self._filter_json
 
     def timeline_limit(self) -> int:
@@ -313,7 +313,7 @@ class FilterCollection:
 
 
 class Filter:
-    def __init__(self, hs: "HomeServer", filter_json: JsonDict):
+    def __init__(self, hs: "HomeServer", filter_json: JsonMapping):
         self._hs = hs
         self._store = hs.get_datastores().main
         self.filter_json = filter_json
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 607013f121..c8bc46415d 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -64,7 +64,7 @@ from synapse.federation.transport.client import SendJoinResponse
 from synapse.http.client import is_unknown_endpoint
 from synapse.http.types import QueryParams
 from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace
-from synapse.types import JsonDict, UserID, get_domain_from_id
+from synapse.types import JsonDict, StrCollection, UserID, get_domain_from_id
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.retryutils import NotRetryingDestination
@@ -1704,7 +1704,7 @@ class FederationClient(FederationBase):
     async def timestamp_to_event(
         self,
         *,
-        destinations: List[str],
+        destinations: StrCollection,
         room_id: str,
         timestamp: int,
         direction: Direction,
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index eedde97ab0..7c62cdfaef 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1538,7 +1538,7 @@ class FederationEventHandler:
             logger.exception("Failed to resync device for %s", sender)
 
     async def backfill_event_id(
-        self, destinations: List[str], room_id: str, event_id: str
+        self, destinations: StrCollection, room_id: str, event_id: str
     ) -> PulledPduInfo:
         """Backfill a single event and persist it as a non-outlier which means
         we also pull in all of the state and auth events necessary for it.
diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py
index db97f7aede..9b13448cdd 100644
--- a/synapse/handlers/relations.py
+++ b/synapse/handlers/relations.py
@@ -13,7 +13,17 @@
 # limitations under the License.
 import enum
 import logging
-from typing import TYPE_CHECKING, Collection, Dict, FrozenSet, Iterable, List, Optional
+from typing import (
+    TYPE_CHECKING,
+    Collection,
+    Dict,
+    FrozenSet,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+)
 
 import attr
 
@@ -245,7 +255,7 @@ class RelationsHandler:
 
     async def get_references_for_events(
         self, event_ids: Collection[str], ignored_users: FrozenSet[str] = frozenset()
-    ) -> Dict[str, List[_RelatedEvent]]:
+    ) -> Mapping[str, Sequence[_RelatedEvent]]:
         """Get a list of references to the given events.
 
         Args:
diff --git a/synapse/rest/client/filter.py b/synapse/rest/client/filter.py
index 5da1e511a2..b5879496db 100644
--- a/synapse/rest/client/filter.py
+++ b/synapse/rest/client/filter.py
@@ -19,7 +19,7 @@ from synapse.api.errors import AuthError, NotFoundError, StoreError, SynapseErro
 from synapse.http.server import HttpServer
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseRequest
-from synapse.types import JsonDict, UserID
+from synapse.types import JsonDict, JsonMapping, UserID
 
 from ._base import client_patterns, set_timeline_upper_limit
 
@@ -41,7 +41,7 @@ class GetFilterRestServlet(RestServlet):
 
     async def on_GET(
         self, request: SynapseRequest, user_id: str, filter_id: str
-    ) -> Tuple[int, JsonDict]:
+    ) -> Tuple[int, JsonMapping]:
         target_user = UserID.from_string(user_id)
         requester = await self.auth.get_user_by_req(request)
 
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 278c7832ba..10d219c045 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -582,7 +582,7 @@ class StateStorageController:
 
     @trace
     @tag_args
-    async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]:
+    async def get_current_hosts_in_room_ordered(self, room_id: str) -> Tuple[str, ...]:
         """Get current hosts in room based on current state.
 
         Blocks until we have full state for the given room. This only happens for rooms
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
index 047de6283a..7d94685caf 100644
--- a/synapse/storage/databases/main/filtering.py
+++ b/synapse/storage/databases/main/filtering.py
@@ -25,7 +25,7 @@ from synapse.storage.database import (
     LoggingTransaction,
 )
 from synapse.storage.engines import PostgresEngine
-from synapse.types import JsonDict, UserID
+from synapse.types import JsonDict, JsonMapping, UserID
 from synapse.util.caches.descriptors import cached
 
 if TYPE_CHECKING:
@@ -145,7 +145,7 @@ class FilteringWorkerStore(SQLBaseStore):
     @cached(num_args=2)
     async def get_user_filter(
         self, user_id: UserID, filter_id: Union[int, str]
-    ) -> JsonDict:
+    ) -> JsonMapping:
         # filter_id is BIGINT UNSIGNED, so if it isn't a number, fail
         # with a coherent error message rather than 500 M_UNKNOWN.
         try:
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index 6ba9c9651f..b67f780c10 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -465,7 +465,7 @@ class RelationsWorkerStore(SQLBaseStore):
     @cachedList(cached_method_name="get_references_for_event", list_name="event_ids")
     async def get_references_for_events(
         self, event_ids: Collection[str]
-    ) -> Mapping[str, Optional[List[_RelatedEvent]]]:
+    ) -> Mapping[str, Optional[Sequence[_RelatedEvent]]]:
         """Get a list of references to the given events.
 
         Args:
@@ -931,7 +931,7 @@ class RelationsWorkerStore(SQLBaseStore):
         room_id: str,
         limit: int = 5,
         from_token: Optional[ThreadsNextBatch] = None,
-    ) -> Tuple[List[str], Optional[ThreadsNextBatch]]:
+    ) -> Tuple[Sequence[str], Optional[ThreadsNextBatch]]:
         """Get a list of thread IDs, ordered by topological ordering of their
         latest reply.
 
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 7b503dd697..3755773faa 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -984,7 +984,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
         )
 
     @cached(iterable=True, max_entries=10000)
-    async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]:
+    async def get_current_hosts_in_room_ordered(self, room_id: str) -> Tuple[str, ...]:
         """
         Get current hosts in room based on current state.
 
@@ -1013,12 +1013,14 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
             # `get_users_in_room` rather than funky SQL.
 
             domains = await self.get_current_hosts_in_room(room_id)
-            return list(domains)
+            return tuple(domains)
 
         # For PostgreSQL we can use a regex to pull out the domains from the
         # joined users in `current_state_events` via regex.
 
-        def get_current_hosts_in_room_ordered_txn(txn: LoggingTransaction) -> List[str]:
+        def get_current_hosts_in_room_ordered_txn(
+            txn: LoggingTransaction,
+        ) -> Tuple[str, ...]:
             # Returns a list of servers currently joined in the room sorted by
             # longest in the room first (aka. with the lowest depth). The
             # heuristic of sorting by servers who have been in the room the
@@ -1043,7 +1045,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
             """
             txn.execute(sql, (room_id,))
             # `server_domain` will be `NULL` for malformed MXIDs with no colons.
-            return [d for d, in txn if d is not None]
+            return tuple(d for d, in txn if d is not None)
 
         return await self.db_pool.runInteraction(
             "get_current_hosts_in_room_ordered", get_current_hosts_in_room_ordered_txn
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index 168419f440..7e8725e610 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -15,10 +15,10 @@
 import logging
 from typing import (
     Any,
-    Dict,
     Generator,
     Iterable,
     List,
+    Mapping,
     NoReturn,
     Optional,
     Set,
@@ -96,7 +96,7 @@ class DescriptorTestCase(unittest.TestCase):
                 self.mock = mock.Mock()
 
             @descriptors.cached(num_args=1)
-            def fn(self, arg1: int, arg2: int) -> mock.Mock:
+            def fn(self, arg1: int, arg2: int) -> str:
                 return self.mock(arg1, arg2)
 
         obj = Cls()
@@ -228,8 +228,9 @@ class DescriptorTestCase(unittest.TestCase):
             call_count = 0
 
             @cached()
-            def fn(self, arg1: int) -> Optional[Deferred]:
+            def fn(self, arg1: int) -> Deferred:
                 self.call_count += 1
+                assert self.result is not None
                 return self.result
 
         obj = Cls()
@@ -401,21 +402,21 @@ class DescriptorTestCase(unittest.TestCase):
                 self.mock = mock.Mock()
 
             @descriptors.cached(iterable=True)
-            def fn(self, arg1: int, arg2: int) -> List[str]:
+            def fn(self, arg1: int, arg2: int) -> Tuple[str, ...]:
                 return self.mock(arg1, arg2)
 
         obj = Cls()
 
-        obj.mock.return_value = ["spam", "eggs"]
+        obj.mock.return_value = ("spam", "eggs")
         r = obj.fn(1, 2)
-        self.assertEqual(r.result, ["spam", "eggs"])
+        self.assertEqual(r.result, ("spam", "eggs"))
         obj.mock.assert_called_once_with(1, 2)
         obj.mock.reset_mock()
 
         # a call with different params should call the mock again
-        obj.mock.return_value = ["chips"]
+        obj.mock.return_value = ("chips",)
         r = obj.fn(1, 3)
-        self.assertEqual(r.result, ["chips"])
+        self.assertEqual(r.result, ("chips",))
         obj.mock.assert_called_once_with(1, 3)
         obj.mock.reset_mock()
 
@@ -423,9 +424,9 @@ class DescriptorTestCase(unittest.TestCase):
         self.assertEqual(len(obj.fn.cache.cache), 3)
 
         r = obj.fn(1, 2)
-        self.assertEqual(r.result, ["spam", "eggs"])
+        self.assertEqual(r.result, ("spam", "eggs"))
         r = obj.fn(1, 3)
-        self.assertEqual(r.result, ["chips"])
+        self.assertEqual(r.result, ("chips",))
         obj.mock.assert_not_called()
 
     def test_cache_iterable_with_sync_exception(self) -> None:
@@ -784,7 +785,9 @@ class CachedListDescriptorTestCase(unittest.TestCase):
                 pass
 
             @descriptors.cachedList(cached_method_name="fn", list_name="args1")
-            async def list_fn(self, args1: Iterable[int], arg2: int) -> Dict[int, str]:
+            async def list_fn(
+                self, args1: Iterable[int], arg2: int
+            ) -> Mapping[int, str]:
                 context = current_context()
                 assert isinstance(context, LoggingContext)
                 assert context.name == "c1"
@@ -847,11 +850,11 @@ class CachedListDescriptorTestCase(unittest.TestCase):
                 pass
 
             @descriptors.cachedList(cached_method_name="fn", list_name="args1")
-            def list_fn(self, args1: List[int]) -> "Deferred[dict]":
+            def list_fn(self, args1: List[int]) -> "Deferred[Mapping[int, str]]":
                 return self.mock(args1)
 
         obj = Cls()
-        deferred_result: "Deferred[dict]" = Deferred()
+        deferred_result: "Deferred[Mapping[int, str]]" = Deferred()
         obj.mock.return_value = deferred_result
 
         # start off several concurrent lookups of the same key
@@ -890,7 +893,7 @@ class CachedListDescriptorTestCase(unittest.TestCase):
                 pass
 
             @descriptors.cachedList(cached_method_name="fn", list_name="args1")
-            async def list_fn(self, args1: List[int], arg2: int) -> Dict[int, str]:
+            async def list_fn(self, args1: List[int], arg2: int) -> Mapping[int, str]:
                 # we want this to behave like an asynchronous function
                 await run_on_reactor()
                 return self.mock(args1, arg2)
@@ -929,7 +932,7 @@ class CachedListDescriptorTestCase(unittest.TestCase):
                 pass
 
             @cachedList(cached_method_name="fn", list_name="args")
-            async def list_fn(self, args: List[int]) -> Dict[int, str]:
+            async def list_fn(self, args: List[int]) -> Mapping[int, str]:
                 await complete_lookup
                 return {arg: str(arg) for arg in args}
 
@@ -964,7 +967,7 @@ class CachedListDescriptorTestCase(unittest.TestCase):
                 pass
 
             @cachedList(cached_method_name="fn", list_name="args")
-            async def list_fn(self, args: List[int]) -> Dict[int, str]:
+            async def list_fn(self, args: List[int]) -> Mapping[int, str]:
                 await make_deferred_yieldable(complete_lookup)
                 self.inner_context_was_finished = current_context().finished
                 return {arg: str(arg) for arg in args}
-- 
cgit 1.5.1


From c9a0e1673ac6e5553216775d2bcd28a4630cd026 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 21 Sep 2023 09:47:29 -0400
Subject: Add missing license header. (#16359)

---
 changelog.d/16359.misc |  1 +
 synapse/rest/models.py | 13 +++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 changelog.d/16359.misc

(limited to 'synapse')

diff --git a/changelog.d/16359.misc b/changelog.d/16359.misc
new file mode 100644
index 0000000000..8752085fc6
--- /dev/null
+++ b/changelog.d/16359.misc
@@ -0,0 +1 @@
+Add missing licence header.
diff --git a/synapse/rest/models.py b/synapse/rest/models.py
index ac39cda8e5..d47de5c19e 100644
--- a/synapse/rest/models.py
+++ b/synapse/rest/models.py
@@ -1,3 +1,16 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from pydantic import BaseModel, Extra
 
 
-- 
cgit 1.5.1


From 12611bfcddfe87e3bad90ef96a648acc2f1cebf3 Mon Sep 17 00:00:00 2001
From: Maxwell G <maxwell@gtmx.me>
Date: Mon, 25 Sep 2023 10:19:08 -0500
Subject: Add support for pydantic v2 via pydantic.v1 compat module (#16332)

While maintaining support with pydantic v1.
---
 changelog.d/16332.misc                     |   1 +
 poetry.lock                                | 203 +++++++++++++++++++++--------
 pyproject.toml                             |  12 +-
 scripts-dev/check_pydantic_models.py       |  98 +++++++++++---
 synapse/_pydantic_compat.py                |  26 ++++
 synapse/config/_util.py                    |  10 +-
 synapse/config/workers.py                  |  10 +-
 synapse/events/validator.py                |  10 +-
 synapse/http/servlet.py                    |  11 +-
 synapse/rest/client/account.py             |   7 +-
 synapse/rest/client/devices.py             |   7 +-
 synapse/rest/client/directory.py           |   8 +-
 synapse/rest/client/models.py              |   7 +-
 synapse/rest/key/v2/remote_key_resource.py |   8 +-
 synapse/rest/models.py                     |   9 +-
 synapse/storage/background_updates.py      |   7 +-
 tests/rest/client/test_models.py           |   8 +-
 17 files changed, 348 insertions(+), 94 deletions(-)
 create mode 100644 changelog.d/16332.misc
 create mode 100644 synapse/_pydantic_compat.py

(limited to 'synapse')

diff --git a/changelog.d/16332.misc b/changelog.d/16332.misc
new file mode 100644
index 0000000000..862d547d60
--- /dev/null
+++ b/changelog.d/16332.misc
@@ -0,0 +1 @@
+Added support for pydantic v2 in addition to pydantic v1. Contributed by Maxwell G (@gotmax23).
diff --git a/poetry.lock b/poetry.lock
index 5d560151fb..9a8be27b6f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -11,6 +11,20 @@ files = [
     {file = "alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2"},
 ]
 
+[[package]]
+name = "annotated-types"
+version = "0.5.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "annotated_types-0.5.0-py3-none-any.whl", hash = "sha256:58da39888f92c276ad970249761ebea80ba544b77acddaa1a4d6cf78287d45fd"},
+    {file = "annotated_types-0.5.0.tar.gz", hash = "sha256:47cdc3490d9ac1506ce92c7aaa76c579dc3509ff11e098fc867e5130ab7be802"},
+]
+
+[package.dependencies]
+typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""}
+
 [[package]]
 name = "astroid"
 version = "2.15.0"
@@ -1818,55 +1832,140 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "1.10.11"
-description = "Data validation and settings management using python type hints"
+version = "2.3.0"
+description = "Data validation using Python type hints"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pydantic-2.3.0-py3-none-any.whl", hash = "sha256:45b5e446c6dfaad9444819a293b921a40e1db1aa61ea08aede0522529ce90e81"},
+    {file = "pydantic-2.3.0.tar.gz", hash = "sha256:1607cc106602284cd4a00882986570472f193fde9cb1259bceeaedb26aa79a6d"},
+]
+
+[package.dependencies]
+annotated-types = ">=0.4.0"
+pydantic-core = "2.6.3"
+typing-extensions = ">=4.6.1"
+
+[package.extras]
+email = ["email-validator (>=2.0.0)"]
+
+[[package]]
+name = "pydantic-core"
+version = "2.6.3"
+description = ""
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pydantic-1.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ff44c5e89315b15ff1f7fdaf9853770b810936d6b01a7bcecaa227d2f8fe444f"},
-    {file = "pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a6c098d4ab5e2d5b3984d3cb2527e2d6099d3de85630c8934efcfdc348a9760e"},
-    {file = "pydantic-1.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16928fdc9cb273c6af00d9d5045434c39afba5f42325fb990add2c241402d151"},
-    {file = "pydantic-1.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0588788a9a85f3e5e9ebca14211a496409cb3deca5b6971ff37c556d581854e7"},
-    {file = "pydantic-1.10.11-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e9baf78b31da2dc3d3f346ef18e58ec5f12f5aaa17ac517e2ffd026a92a87588"},
-    {file = "pydantic-1.10.11-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:373c0840f5c2b5b1ccadd9286782852b901055998136287828731868027a724f"},
-    {file = "pydantic-1.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:c3339a46bbe6013ef7bdd2844679bfe500347ac5742cd4019a88312aa58a9847"},
-    {file = "pydantic-1.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:08a6c32e1c3809fbc49debb96bf833164f3438b3696abf0fbeceb417d123e6eb"},
-    {file = "pydantic-1.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a451ccab49971af043ec4e0d207cbc8cbe53dbf148ef9f19599024076fe9c25b"},
-    {file = "pydantic-1.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b02d24f7b2b365fed586ed73582c20f353a4c50e4be9ba2c57ab96f8091ddae"},
-    {file = "pydantic-1.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f34739a89260dfa420aa3cbd069fbcc794b25bbe5c0a214f8fb29e363484b66"},
-    {file = "pydantic-1.10.11-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e297897eb4bebde985f72a46a7552a7556a3dd11e7f76acda0c1093e3dbcf216"},
-    {file = "pydantic-1.10.11-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d185819a7a059550ecb85d5134e7d40f2565f3dd94cfd870132c5f91a89cf58c"},
-    {file = "pydantic-1.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:4400015f15c9b464c9db2d5d951b6a780102cfa5870f2c036d37c23b56f7fc1b"},
-    {file = "pydantic-1.10.11-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2417de68290434461a266271fc57274a138510dca19982336639484c73a07af6"},
-    {file = "pydantic-1.10.11-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:331c031ba1554b974c98679bd0780d89670d6fd6f53f5d70b10bdc9addee1713"},
-    {file = "pydantic-1.10.11-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8268a735a14c308923e8958363e3a3404f6834bb98c11f5ab43251a4e410170c"},
-    {file = "pydantic-1.10.11-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:44e51ba599c3ef227e168424e220cd3e544288c57829520dc90ea9cb190c3248"},
-    {file = "pydantic-1.10.11-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d7781f1d13b19700b7949c5a639c764a077cbbdd4322ed505b449d3ca8edcb36"},
-    {file = "pydantic-1.10.11-cp37-cp37m-win_amd64.whl", hash = "sha256:7522a7666157aa22b812ce14c827574ddccc94f361237ca6ea8bb0d5c38f1629"},
-    {file = "pydantic-1.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc64eab9b19cd794a380179ac0e6752335e9555d214cfcb755820333c0784cb3"},
-    {file = "pydantic-1.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8dc77064471780262b6a68fe67e013298d130414d5aaf9b562c33987dbd2cf4f"},
-    {file = "pydantic-1.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe429898f2c9dd209bd0632a606bddc06f8bce081bbd03d1c775a45886e2c1cb"},
-    {file = "pydantic-1.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:192c608ad002a748e4a0bed2ddbcd98f9b56df50a7c24d9a931a8c5dd053bd3d"},
-    {file = "pydantic-1.10.11-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ef55392ec4bb5721f4ded1096241e4b7151ba6d50a50a80a2526c854f42e6a2f"},
-    {file = "pydantic-1.10.11-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:41e0bb6efe86281623abbeeb0be64eab740c865388ee934cd3e6a358784aca6e"},
-    {file = "pydantic-1.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:265a60da42f9f27e0b1014eab8acd3e53bd0bad5c5b4884e98a55f8f596b2c19"},
-    {file = "pydantic-1.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:469adf96c8e2c2bbfa655fc7735a2a82f4c543d9fee97bd113a7fb509bf5e622"},
-    {file = "pydantic-1.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e6cbfbd010b14c8a905a7b10f9fe090068d1744d46f9e0c021db28daeb8b6de1"},
-    {file = "pydantic-1.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abade85268cc92dff86d6effcd917893130f0ff516f3d637f50dadc22ae93999"},
-    {file = "pydantic-1.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9738b0f2e6c70f44ee0de53f2089d6002b10c33264abee07bdb5c7f03038303"},
-    {file = "pydantic-1.10.11-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:787cf23e5a0cde753f2eabac1b2e73ae3844eb873fd1f5bdbff3048d8dbb7604"},
-    {file = "pydantic-1.10.11-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:174899023337b9fc685ac8adaa7b047050616136ccd30e9070627c1aaab53a13"},
-    {file = "pydantic-1.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:1954f8778489a04b245a1e7b8b22a9d3ea8ef49337285693cf6959e4b757535e"},
-    {file = "pydantic-1.10.11-py3-none-any.whl", hash = "sha256:008c5e266c8aada206d0627a011504e14268a62091450210eda7c07fabe6963e"},
-    {file = "pydantic-1.10.11.tar.gz", hash = "sha256:f66d479cf7eb331372c470614be6511eae96f1f120344c25f3f9bb59fb1b5528"},
-]
-
-[package.dependencies]
-typing-extensions = ">=4.2.0"
-
-[package.extras]
-dotenv = ["python-dotenv (>=0.10.4)"]
-email = ["email-validator (>=1.0.3)"]
+    {file = "pydantic_core-2.6.3-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:1a0ddaa723c48af27d19f27f1c73bdc615c73686d763388c8683fe34ae777bad"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5cfde4fab34dd1e3a3f7f3db38182ab6c95e4ea91cf322242ee0be5c2f7e3d2f"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5493a7027bfc6b108e17c3383959485087d5942e87eb62bbac69829eae9bc1f7"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:84e87c16f582f5c753b7f39a71bd6647255512191be2d2dbf49458c4ef024588"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:522a9c4a4d1924facce7270c84b5134c5cabcb01513213662a2e89cf28c1d309"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaafc776e5edc72b3cad1ccedb5fd869cc5c9a591f1213aa9eba31a781be9ac1"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a750a83b2728299ca12e003d73d1264ad0440f60f4fc9cee54acc489249b728"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e8b374ef41ad5c461efb7a140ce4730661aadf85958b5c6a3e9cf4e040ff4bb"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b594b64e8568cf09ee5c9501ede37066b9fc41d83d58f55b9952e32141256acd"},
+    {file = "pydantic_core-2.6.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2a20c533cb80466c1d42a43a4521669ccad7cf2967830ac62c2c2f9cece63e7e"},
+    {file = "pydantic_core-2.6.3-cp310-none-win32.whl", hash = "sha256:04fe5c0a43dec39aedba0ec9579001061d4653a9b53a1366b113aca4a3c05ca7"},
+    {file = "pydantic_core-2.6.3-cp310-none-win_amd64.whl", hash = "sha256:6bf7d610ac8f0065a286002a23bcce241ea8248c71988bda538edcc90e0c39ad"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:6bcc1ad776fffe25ea5c187a028991c031a00ff92d012ca1cc4714087e575973"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:df14f6332834444b4a37685810216cc8fe1fe91f447332cd56294c984ecbff1c"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b7486d85293f7f0bbc39b34e1d8aa26210b450bbd3d245ec3d732864009819"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a892b5b1871b301ce20d40b037ffbe33d1407a39639c2b05356acfef5536d26a"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:883daa467865e5766931e07eb20f3e8152324f0adf52658f4d302242c12e2c32"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4eb77df2964b64ba190eee00b2312a1fd7a862af8918ec70fc2d6308f76ac64"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce8c84051fa292a5dc54018a40e2a1926fd17980a9422c973e3ebea017aa8da"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22134a4453bd59b7d1e895c455fe277af9d9d9fbbcb9dc3f4a97b8693e7e2c9b"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:02e1c385095efbd997311d85c6021d32369675c09bcbfff3b69d84e59dc103f6"},
+    {file = "pydantic_core-2.6.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d79f1f2f7ebdb9b741296b69049ff44aedd95976bfee38eb4848820628a99b50"},
+    {file = "pydantic_core-2.6.3-cp311-none-win32.whl", hash = "sha256:430ddd965ffd068dd70ef4e4d74f2c489c3a313adc28e829dd7262cc0d2dd1e8"},
+    {file = "pydantic_core-2.6.3-cp311-none-win_amd64.whl", hash = "sha256:84f8bb34fe76c68c9d96b77c60cef093f5e660ef8e43a6cbfcd991017d375950"},
+    {file = "pydantic_core-2.6.3-cp311-none-win_arm64.whl", hash = "sha256:5a2a3c9ef904dcdadb550eedf3291ec3f229431b0084666e2c2aa8ff99a103a2"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:8421cf496e746cf8d6b677502ed9a0d1e4e956586cd8b221e1312e0841c002d5"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bb128c30cf1df0ab78166ded1ecf876620fb9aac84d2413e8ea1594b588c735d"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37a822f630712817b6ecc09ccc378192ef5ff12e2c9bae97eb5968a6cdf3b862"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:240a015102a0c0cc8114f1cba6444499a8a4d0333e178bc504a5c2196defd456"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f90e5e3afb11268628c89f378f7a1ea3f2fe502a28af4192e30a6cdea1e7d5e"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:340e96c08de1069f3d022a85c2a8c63529fd88709468373b418f4cf2c949fb0e"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1480fa4682e8202b560dcdc9eeec1005f62a15742b813c88cdc01d44e85308e5"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f14546403c2a1d11a130b537dda28f07eb6c1805a43dae4617448074fd49c282"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a87c54e72aa2ef30189dc74427421e074ab4561cf2bf314589f6af5b37f45e6d"},
+    {file = "pydantic_core-2.6.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f93255b3e4d64785554e544c1c76cd32f4a354fa79e2eeca5d16ac2e7fdd57aa"},
+    {file = "pydantic_core-2.6.3-cp312-none-win32.whl", hash = "sha256:f70dc00a91311a1aea124e5f64569ea44c011b58433981313202c46bccbec0e1"},
+    {file = "pydantic_core-2.6.3-cp312-none-win_amd64.whl", hash = "sha256:23470a23614c701b37252618e7851e595060a96a23016f9a084f3f92f5ed5881"},
+    {file = "pydantic_core-2.6.3-cp312-none-win_arm64.whl", hash = "sha256:1ac1750df1b4339b543531ce793b8fd5c16660a95d13aecaab26b44ce11775e9"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:a53e3195f134bde03620d87a7e2b2f2046e0e5a8195e66d0f244d6d5b2f6d31b"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:f2969e8f72c6236c51f91fbb79c33821d12a811e2a94b7aa59c65f8dbdfad34a"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:672174480a85386dd2e681cadd7d951471ad0bb028ed744c895f11f9d51b9ebe"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:002d0ea50e17ed982c2d65b480bd975fc41086a5a2f9c924ef8fc54419d1dea3"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ccc13afee44b9006a73d2046068d4df96dc5b333bf3509d9a06d1b42db6d8bf"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:439a0de139556745ae53f9cc9668c6c2053444af940d3ef3ecad95b079bc9987"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d63b7545d489422d417a0cae6f9898618669608750fc5e62156957e609e728a5"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b44c42edc07a50a081672e25dfe6022554b47f91e793066a7b601ca290f71e42"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1c721bfc575d57305dd922e6a40a8fe3f762905851d694245807a351ad255c58"},
+    {file = "pydantic_core-2.6.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5e4a2cf8c4543f37f5dc881de6c190de08096c53986381daebb56a355be5dfe6"},
+    {file = "pydantic_core-2.6.3-cp37-none-win32.whl", hash = "sha256:d9b4916b21931b08096efed090327f8fe78e09ae8f5ad44e07f5c72a7eedb51b"},
+    {file = "pydantic_core-2.6.3-cp37-none-win_amd64.whl", hash = "sha256:a8acc9dedd304da161eb071cc7ff1326aa5b66aadec9622b2574ad3ffe225525"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:5e9c068f36b9f396399d43bfb6defd4cc99c36215f6ff33ac8b9c14ba15bdf6b"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e61eae9b31799c32c5f9b7be906be3380e699e74b2db26c227c50a5fc7988698"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85463560c67fc65cd86153a4975d0b720b6d7725cf7ee0b2d291288433fc21b"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9616567800bdc83ce136e5847d41008a1d602213d024207b0ff6cab6753fe645"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e9b65a55bbabda7fccd3500192a79f6e474d8d36e78d1685496aad5f9dbd92c"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f468d520f47807d1eb5d27648393519655eadc578d5dd862d06873cce04c4d1b"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9680dd23055dd874173a3a63a44e7f5a13885a4cfd7e84814be71be24fba83db"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a718d56c4d55efcfc63f680f207c9f19c8376e5a8a67773535e6f7e80e93170"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8ecbac050856eb6c3046dea655b39216597e373aa8e50e134c0e202f9c47efec"},
+    {file = "pydantic_core-2.6.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:788be9844a6e5c4612b74512a76b2153f1877cd845410d756841f6c3420230eb"},
+    {file = "pydantic_core-2.6.3-cp38-none-win32.whl", hash = "sha256:07a1aec07333bf5adebd8264047d3dc518563d92aca6f2f5b36f505132399efc"},
+    {file = "pydantic_core-2.6.3-cp38-none-win_amd64.whl", hash = "sha256:621afe25cc2b3c4ba05fff53525156d5100eb35c6e5a7cf31d66cc9e1963e378"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:813aab5bfb19c98ae370952b6f7190f1e28e565909bfc219a0909db168783465"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:50555ba3cb58f9861b7a48c493636b996a617db1a72c18da4d7f16d7b1b9952b"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19e20f8baedd7d987bd3f8005c146e6bcbda7cdeefc36fad50c66adb2dd2da48"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b0a5d7edb76c1c57b95df719af703e796fc8e796447a1da939f97bfa8a918d60"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f06e21ad0b504658a3a9edd3d8530e8cea5723f6ea5d280e8db8efc625b47e49"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea053cefa008fda40f92aab937fb9f183cf8752e41dbc7bc68917884454c6362"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:171a4718860790f66d6c2eda1d95dd1edf64f864d2e9f9115840840cf5b5713f"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ed7ceca6aba5331ece96c0e328cd52f0dcf942b8895a1ed2642de50800b79d3"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:acafc4368b289a9f291e204d2c4c75908557d4f36bd3ae937914d4529bf62a76"},
+    {file = "pydantic_core-2.6.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1aa712ba150d5105814e53cb141412217146fedc22621e9acff9236d77d2a5ef"},
+    {file = "pydantic_core-2.6.3-cp39-none-win32.whl", hash = "sha256:44b4f937b992394a2e81a5c5ce716f3dcc1237281e81b80c748b2da6dd5cf29a"},
+    {file = "pydantic_core-2.6.3-cp39-none-win_amd64.whl", hash = "sha256:9b33bf9658cb29ac1a517c11e865112316d09687d767d7a0e4a63d5c640d1b17"},
+    {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d7050899026e708fb185e174c63ebc2c4ee7a0c17b0a96ebc50e1f76a231c057"},
+    {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:99faba727727b2e59129c59542284efebbddade4f0ae6a29c8b8d3e1f437beb7"},
+    {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fa159b902d22b283b680ef52b532b29554ea2a7fc39bf354064751369e9dbd7"},
+    {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:046af9cfb5384f3684eeb3f58a48698ddab8dd870b4b3f67f825353a14441418"},
+    {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:930bfe73e665ebce3f0da2c6d64455098aaa67e1a00323c74dc752627879fc67"},
+    {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:85cc4d105747d2aa3c5cf3e37dac50141bff779545ba59a095f4a96b0a460e70"},
+    {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b25afe9d5c4f60dcbbe2b277a79be114e2e65a16598db8abee2a2dcde24f162b"},
+    {file = "pydantic_core-2.6.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e49ce7dc9f925e1fb010fc3d555250139df61fa6e5a0a95ce356329602c11ea9"},
+    {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:2dd50d6a1aef0426a1d0199190c6c43ec89812b1f409e7fe44cb0fbf6dfa733c"},
+    {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6595b0d8c8711e8e1dc389d52648b923b809f68ac1c6f0baa525c6440aa0daa"},
+    {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ef724a059396751aef71e847178d66ad7fc3fc969a1a40c29f5aac1aa5f8784"},
+    {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3c8945a105f1589ce8a693753b908815e0748f6279959a4530f6742e1994dcb6"},
+    {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c8c6660089a25d45333cb9db56bb9e347241a6d7509838dbbd1931d0e19dbc7f"},
+    {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:692b4ff5c4e828a38716cfa92667661a39886e71136c97b7dac26edef18767f7"},
+    {file = "pydantic_core-2.6.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:f1a5d8f18877474c80b7711d870db0eeef9442691fcdb00adabfc97e183ee0b0"},
+    {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3796a6152c545339d3b1652183e786df648ecdf7c4f9347e1d30e6750907f5bb"},
+    {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:b962700962f6e7a6bd77e5f37320cabac24b4c0f76afeac05e9f93cf0c620014"},
+    {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56ea80269077003eaa59723bac1d8bacd2cd15ae30456f2890811efc1e3d4413"},
+    {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c0ebbebae71ed1e385f7dfd9b74c1cff09fed24a6df43d326dd7f12339ec34"},
+    {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:252851b38bad3bfda47b104ffd077d4f9604a10cb06fe09d020016a25107bf98"},
+    {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:6656a0ae383d8cd7cc94e91de4e526407b3726049ce8d7939049cbfa426518c8"},
+    {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d9140ded382a5b04a1c030b593ed9bf3088243a0a8b7fa9f071a5736498c5483"},
+    {file = "pydantic_core-2.6.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d38bbcef58220f9c81e42c255ef0bf99735d8f11edef69ab0b499da77105158a"},
+    {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c9d469204abcca28926cbc28ce98f28e50e488767b084fb3fbdf21af11d3de26"},
+    {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48c1ed8b02ffea4d5c9c220eda27af02b8149fe58526359b3c07eb391cb353a2"},
+    {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b2b1bfed698fa410ab81982f681f5b1996d3d994ae8073286515ac4d165c2e7"},
+    {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf9d42a71a4d7a7c1f14f629e5c30eac451a6fc81827d2beefd57d014c006c4a"},
+    {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4292ca56751aebbe63a84bbfc3b5717abb09b14d4b4442cc43fd7c49a1529efd"},
+    {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:7dc2ce039c7290b4ef64334ec7e6ca6494de6eecc81e21cb4f73b9b39991408c"},
+    {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:615a31b1629e12445c0e9fc8339b41aaa6cc60bd53bf802d5fe3d2c0cda2ae8d"},
+    {file = "pydantic_core-2.6.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1fa1f6312fb84e8c281f32b39affe81984ccd484da6e9d65b3d18c202c666149"},
+    {file = "pydantic_core-2.6.3.tar.gz", hash = "sha256:1508f37ba9e3ddc0189e6ff4e2228bd2d3c3a4641cbe8c07177162f76ed696c7"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
 
 [[package]]
 name = "pygithub"
@@ -2071,7 +2170,6 @@ files = [
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
-    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
     {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
     {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
     {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -2079,15 +2177,8 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
-    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
     {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
-    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
     {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -2104,7 +2195,6 @@ files = [
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
-    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
     {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
     {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
     {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -2112,7 +2202,6 @@ files = [
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
-    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
     {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
     {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
     {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@@ -3345,4 +3434,4 @@ user-search = ["pyicu"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8.0"
-content-hash = "1481c785665220fbf79613030a581b7289d49b7f336cc136dd87d39048f5446c"
+content-hash = "364c309486e9d93d4da8a1a3784d5ecd7d2a9734cf84dcd4a991f2cd54f0b5b5"
diff --git a/pyproject.toml b/pyproject.toml
index f69336a73f..ea55d81b13 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -209,11 +209,11 @@ cryptography = ">=3.4.7"
 # ijson 3.1.4 fixes a bug with "." in property names
 ijson = ">=3.1.4"
 matrix-common = "^1.3.0"
-# We need packaging.requirements.Requirement, added in 16.1.
-packaging = ">=16.1"
-# This is the most recent version of Pydantic with available on common distros.
-# We are currently incompatible with >=2.0.0: (https://github.com/matrix-org/synapse/issues/15858)
-pydantic = "^1.7.4"
+# We need packaging.verison.Version(...).major added in 20.0.
+packaging = ">=20.0"
+# We support pydantic v1 and pydantic v2 via the pydantic.v1 compat module.
+# See https://github.com/matrix-org/synapse/issues/15858
+pydantic = ">=1.7.4, <3"
 
 # This is for building the rust components during "poetry install", which
 # currently ignores the `build-system.requires` directive (c.f.
@@ -321,6 +321,8 @@ all = [
 isort = ">=5.10.1"
 black = ">=22.7.0"
 ruff = "0.0.290"
+# Type checking only works with the pydantic.v1 compat module from pydantic v2
+pydantic = "^2"
 
 # Typechecking
 lxml-stubs = ">=0.4.0"
diff --git a/scripts-dev/check_pydantic_models.py b/scripts-dev/check_pydantic_models.py
index 9f2b7ded5b..d1cfc9a85c 100755
--- a/scripts-dev/check_pydantic_models.py
+++ b/scripts-dev/check_pydantic_models.py
@@ -36,11 +36,41 @@ import textwrap
 import traceback
 import unittest.mock
 from contextlib import contextmanager
-from typing import Any, Callable, Dict, Generator, List, Set, Type, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    List,
+    Set,
+    Type,
+    TypeVar,
+)
 
 from parameterized import parameterized
-from pydantic import BaseModel as PydanticBaseModel, conbytes, confloat, conint, constr
-from pydantic.typing import get_args
+
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import (
+        BaseModel as PydanticBaseModel,
+        conbytes,
+        confloat,
+        conint,
+        constr,
+    )
+    from pydantic.v1.typing import get_args
+else:
+    from pydantic import (
+        BaseModel as PydanticBaseModel,
+        conbytes,
+        confloat,
+        conint,
+        constr,
+    )
+    from pydantic.typing import get_args
+
 from typing_extensions import ParamSpec
 
 logger = logging.getLogger(__name__)
@@ -251,7 +281,10 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                from pydantic import constr
+                try:
+                    from pydantic.v1 import constr
+                except ImportError:
+                    from pydantic import constr
                 constr()
                 """
             )
@@ -269,7 +302,10 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                from pydantic import *
+                try:
+                    from pydantic.v1 import *
+                except ImportError:
+                    from pydantic import *
                 constr()
                 """
             )
@@ -278,7 +314,10 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                from pydantic.types import constr
+                try:
+                    from pydantic.v1.types import constr
+                except ImportError:
+                    from pydantic.types import constr
                 constr()
                 """
             )
@@ -287,8 +326,11 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                import pydantic.types
-                pydantic.types.constr()
+                try:
+                    from pydantic.v1 import types as pydantic_types
+                except ImportError:
+                    from pydantic import types as pydantic_types
+                pydantic_types.constr()
                 """
             )
 
@@ -296,7 +338,10 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                from pydantic import constr
+                try:
+                    from pydantic.v1 import constr
+                except ImportError:
+                    from pydantic import constr
                 constr(min_length=10)
                 """
             )
@@ -305,7 +350,10 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                from pydantic import constr
+                try:
+                    from pydantic.v1 import constr
+                except ImportError:
+                    from pydantic import constr
                 constr(strict=False)
                 """
             )
@@ -314,7 +362,10 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic():
             run_test_snippet(
                 """
-                from pydantic import constr
+                try:
+                    from pydantic.v1 import constr
+                except ImportError:
+                    from pydantic import constr
                 constr(strict=True)
                 """
             )
@@ -323,7 +374,10 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                from pydantic import constr
+                try:
+                    from pydantic.v1 import constr
+                except ImportError:
+                    from pydantic import constr
                 x: constr()
                 """
             )
@@ -332,7 +386,10 @@ class TestConstrainedTypesPatch(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                from pydantic import BaseModel, conint
+                try:
+                    from pydantic.v1 import BaseModel, conint
+                except ImportError:
+                    from pydantic import BaseModel, conint
                 class C:
                     x: conint()
                 """
@@ -361,7 +418,10 @@ class TestFieldTypeInspection(unittest.TestCase):
             run_test_snippet(
                 f"""
                 from typing import *
-                from pydantic import *
+                try:
+                    from pydantic.v1 import *
+                except ImportError:
+                    from pydantic import *
                 class C(BaseModel):
                     f: {annotation}
                 """
@@ -388,7 +448,10 @@ class TestFieldTypeInspection(unittest.TestCase):
             run_test_snippet(
                 f"""
                 from typing import *
-                from pydantic import *
+                try:
+                    from pydantic.v1 import *
+                except ImportError:
+                    from pydantic import *
                 class C(BaseModel):
                     f: {annotation}
                 """
@@ -398,7 +461,10 @@ class TestFieldTypeInspection(unittest.TestCase):
         with monkeypatch_pydantic(), self.assertRaises(ModelCheckerException):
             run_test_snippet(
                 """
-                from pydantic.main import BaseModel
+                try:
+                    from pydantic.v1.main import BaseModel
+                except ImportError:
+                    from pydantic.main import BaseModel
                 class C(BaseModel):
                     f: str
                 """
diff --git a/synapse/_pydantic_compat.py b/synapse/_pydantic_compat.py
new file mode 100644
index 0000000000..ddff72afa1
--- /dev/null
+++ b/synapse/_pydantic_compat.py
@@ -0,0 +1,26 @@
+# Copyright 2023 Maxwell G <maxwell@gtmx.me>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from packaging.version import Version
+
+try:
+    from pydantic import __version__ as pydantic_version
+except ImportError:
+    import importlib.metadata
+
+    pydantic_version = importlib.metadata.version("pydantic")
+
+HAS_PYDANTIC_V2: bool = Version(pydantic_version).major == 2
+
+__all__ = ("HAS_PYDANTIC_V2",)
diff --git a/synapse/config/_util.py b/synapse/config/_util.py
index acccca413b..746838eee3 100644
--- a/synapse/config/_util.py
+++ b/synapse/config/_util.py
@@ -11,10 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Type, TypeVar
+from typing import TYPE_CHECKING, Any, Dict, Type, TypeVar
 
 import jsonschema
-from pydantic import BaseModel, ValidationError, parse_obj_as
+
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import BaseModel, ValidationError, parse_obj_as
+else:
+    from pydantic import BaseModel, ValidationError, parse_obj_as
 
 from synapse.config._base import ConfigError
 from synapse.types import JsonDict, StrSequence
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 6567fb6bb0..f1766088fc 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -15,10 +15,16 @@
 
 import argparse
 import logging
-from typing import Any, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
 import attr
-from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr
+
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import BaseModel, Extra, StrictBool, StrictInt, StrictStr
+else:
+    from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr
 
 from synapse.config._base import (
     Config,
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index 5da50cb0d2..a637fadfab 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -12,10 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections.abc
-from typing import List, Type, Union, cast
+from typing import TYPE_CHECKING, List, Type, Union, cast
 
 import jsonschema
-from pydantic import Field, StrictBool, StrictStr
+
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import Field, StrictBool, StrictStr
+else:
+    from pydantic import Field, StrictBool, StrictStr
 
 from synapse.api.constants import (
     MAX_ALIAS_LENGTH,
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index 5d79d31579..d9d5655c95 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -28,8 +28,15 @@ from typing import (
     overload,
 )
 
-from pydantic import BaseModel, MissingError, PydanticValueError, ValidationError
-from pydantic.error_wrappers import ErrorWrapper
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import BaseModel, MissingError, PydanticValueError, ValidationError
+    from pydantic.v1.error_wrappers import ErrorWrapper
+else:
+    from pydantic import BaseModel, MissingError, PydanticValueError, ValidationError
+    from pydantic.error_wrappers import ErrorWrapper
+
 from typing_extensions import Literal
 
 from twisted.web.server import Request
diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 49cd0805fd..e74a87af4d 100644
--- a/synapse/rest/client/account.py
+++ b/synapse/rest/client/account.py
@@ -18,7 +18,12 @@ import random
 from typing import TYPE_CHECKING, List, Optional, Tuple
 from urllib.parse import urlparse
 
-from pydantic import StrictBool, StrictStr, constr
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import StrictBool, StrictStr, constr
+else:
+    from pydantic import StrictBool, StrictStr, constr
 from typing_extensions import Literal
 
 from twisted.web.server import Request
diff --git a/synapse/rest/client/devices.py b/synapse/rest/client/devices.py
index 925f037743..80ae937921 100644
--- a/synapse/rest/client/devices.py
+++ b/synapse/rest/client/devices.py
@@ -17,7 +17,12 @@ import logging
 from http import HTTPStatus
 from typing import TYPE_CHECKING, List, Optional, Tuple
 
-from pydantic import Extra, StrictStr
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import Extra, StrictStr
+else:
+    from pydantic import Extra, StrictStr
 
 from synapse.api import errors
 from synapse.api.errors import NotFoundError, SynapseError, UnrecognizedRequestError
diff --git a/synapse/rest/client/directory.py b/synapse/rest/client/directory.py
index 570bb52747..82944ca711 100644
--- a/synapse/rest/client/directory.py
+++ b/synapse/rest/client/directory.py
@@ -15,7 +15,13 @@
 import logging
 from typing import TYPE_CHECKING, List, Optional, Tuple
 
-from pydantic import StrictStr
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import StrictStr
+else:
+    from pydantic import StrictStr
+
 from typing_extensions import Literal
 
 from twisted.web.server import Request
diff --git a/synapse/rest/client/models.py b/synapse/rest/client/models.py
index 3d7940b0fc..880f79473c 100644
--- a/synapse/rest/client/models.py
+++ b/synapse/rest/client/models.py
@@ -13,7 +13,12 @@
 # limitations under the License.
 from typing import TYPE_CHECKING, Dict, Optional
 
-from pydantic import Extra, StrictInt, StrictStr, constr, validator
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import Extra, StrictInt, StrictStr, constr, validator
+else:
+    from pydantic import Extra, StrictInt, StrictStr, constr, validator
 
 from synapse.rest.models import RequestBodyModel
 from synapse.util.threepids import validate_email
diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py
index 0aaa838d04..48c47058db 100644
--- a/synapse/rest/key/v2/remote_key_resource.py
+++ b/synapse/rest/key/v2/remote_key_resource.py
@@ -16,7 +16,13 @@ import logging
 import re
 from typing import TYPE_CHECKING, Dict, Mapping, Optional, Set, Tuple
 
-from pydantic import Extra, StrictInt, StrictStr
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import Extra, StrictInt, StrictStr
+else:
+    from pydantic import StrictInt, StrictStr, Extra
+
 from signedjson.sign import sign_json
 
 from twisted.web.server import Request
diff --git a/synapse/rest/models.py b/synapse/rest/models.py
index d47de5c19e..de354a2135 100644
--- a/synapse/rest/models.py
+++ b/synapse/rest/models.py
@@ -11,7 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from pydantic import BaseModel, Extra
+from typing import TYPE_CHECKING
+
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
+
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import BaseModel, Extra
+else:
+    from pydantic import BaseModel, Extra
 
 
 class RequestBodyModel(BaseModel):
diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 99ebd96f84..12829d3d7d 100644
--- a/synapse/storage/background_updates.py
+++ b/synapse/storage/background_updates.py
@@ -31,8 +31,8 @@ from typing import (
 )
 
 import attr
-from pydantic import BaseModel
 
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.engines import PostgresEngine
 from synapse.storage.types import Connection, Cursor
@@ -41,6 +41,11 @@ from synapse.util import Clock, json_encoder
 
 from . import engines
 
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import BaseModel
+else:
+    from pydantic import BaseModel
+
 if TYPE_CHECKING:
     from synapse.server import HomeServer
     from synapse.storage.database import DatabasePool, LoggingTransaction
diff --git a/tests/rest/client/test_models.py b/tests/rest/client/test_models.py
index 0b8fcb0c47..524ea6047e 100644
--- a/tests/rest/client/test_models.py
+++ b/tests/rest/client/test_models.py
@@ -12,12 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest as stdlib_unittest
+from typing import TYPE_CHECKING
 
-from pydantic import BaseModel, ValidationError
 from typing_extensions import Literal
 
+from synapse._pydantic_compat import HAS_PYDANTIC_V2
 from synapse.rest.client.models import EmailRequestTokenBody
 
+if TYPE_CHECKING or HAS_PYDANTIC_V2:
+    from pydantic.v1 import BaseModel, ValidationError
+else:
+    from pydantic import BaseModel, ValidationError
+
 
 class ThreepidMediumEnumTestCase(stdlib_unittest.TestCase):
     class Model(BaseModel):
-- 
cgit 1.5.1


From b225acf3e60f2413fd9bc8198ddbecf6d5ad4f84 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 25 Sep 2023 17:48:42 +0100
Subject: Bump types-psycopg2 from 2.9.21.11 to 2.9.21.14 (#16381)

* Bump types-psycopg2 from 2.9.21.11 to 2.9.21.14

Bumps [types-psycopg2](https://github.com/python/typeshed) from 2.9.21.11 to 2.9.21.14.
- [Commits](https://github.com/python/typeshed/commits)

---
updated-dependencies:
- dependency-name: types-psycopg2
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Relax the annotation of Cursor.description

See
https://github.com/matrix-org/synapse/pull/16343#issuecomment-1726083384
for rationale.

* Changelog

* Changelog

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: David Robertson <davidr@element.io>
---
 changelog.d/16381.misc      |  1 +
 poetry.lock                 |  6 +++---
 synapse/storage/database.py | 14 +-------------
 synapse/storage/types.py    | 20 ++++----------------
 4 files changed, 9 insertions(+), 32 deletions(-)
 create mode 100644 changelog.d/16381.misc

(limited to 'synapse')

diff --git a/changelog.d/16381.misc b/changelog.d/16381.misc
new file mode 100644
index 0000000000..a454651952
--- /dev/null
+++ b/changelog.d/16381.misc
@@ -0,0 +1 @@
+Improve type hints, and bump types-psycopg2 from 2.9.21.11 to 2.9.21.14.
diff --git a/poetry.lock b/poetry.lock
index 9a8be27b6f..bf229349cb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3093,13 +3093,13 @@ files = [
 
 [[package]]
 name = "types-psycopg2"
-version = "2.9.21.11"
+version = "2.9.21.14"
 description = "Typing stubs for psycopg2"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-psycopg2-2.9.21.11.tar.gz", hash = "sha256:d5077eacf90e61db8c0b8eea2fdc9d4a97d7aaa16865fb4bd7034a7571520b4d"},
-    {file = "types_psycopg2-2.9.21.11-py3-none-any.whl", hash = "sha256:7a323d7744bc8a882fb5a6f63448e903fc70d3dc0d6da9ec1f9c6c4dc10a7102"},
+    {file = "types-psycopg2-2.9.21.14.tar.gz", hash = "sha256:bf73a0ac4da4e278c89bf1b01fc596d5a5ac7a356cfe6ac0249f47b9e259f868"},
+    {file = "types_psycopg2-2.9.21.14-py3-none-any.whl", hash = "sha256:cd9c5350631f3bc6184ec8d48f2ed31d4ea660f89d0fffe78239450782f383c5"},
 ]
 
 [[package]]
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 697bc5651c..ca894edd5a 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -361,19 +361,7 @@ class LoggingTransaction:
     @property
     def description(
         self,
-    ) -> Optional[
-        Sequence[
-            Tuple[
-                str,
-                Optional[Any],
-                Optional[int],
-                Optional[int],
-                Optional[int],
-                Optional[int],
-                Optional[int],
-            ]
-        ]
-    ]:
+    ) -> Optional[Sequence[Any]]:
         return self.txn.description
 
     def execute_batch(self, sql: str, args: Iterable[Iterable[Any]]) -> None:
diff --git a/synapse/storage/types.py b/synapse/storage/types.py
index 34ac807530..afaeef9a5a 100644
--- a/synapse/storage/types.py
+++ b/synapse/storage/types.py
@@ -53,22 +53,10 @@ class Cursor(Protocol):
     @property
     def description(
         self,
-    ) -> Optional[
-        Sequence[
-            # Note that this is an approximate typing based on sqlite3 and other
-            # drivers, and may not be entirely accurate.
-            # FWIW, the DBAPI 2 spec is: https://peps.python.org/pep-0249/#description
-            Tuple[
-                str,
-                Optional[Any],
-                Optional[int],
-                Optional[int],
-                Optional[int],
-                Optional[int],
-                Optional[int],
-            ]
-        ]
-    ]:
+    ) -> Optional[Sequence[Any]]:
+        # At the time of writing, Synapse only assumes that `column[0]: str` for each
+        # `column in description`. Since this is hard to express in the type system, and
+        # as this is rarely used in Synapse, we deem `column: Any` good enough.
         ...
 
     @property
-- 
cgit 1.5.1


From 2763c49eca483dbb848b70b951891afd57016f17 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Mon, 25 Sep 2023 14:50:47 -0400
Subject: Improve comments in StateGroupBackgroundUpdateStore. (#16383)

---
 changelog.d/16383.misc                        |  1 +
 synapse/storage/databases/state/bg_updates.py | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/16383.misc

(limited to 'synapse')

diff --git a/changelog.d/16383.misc b/changelog.d/16383.misc
new file mode 100644
index 0000000000..d8d84cc184
--- /dev/null
+++ b/changelog.d/16383.misc
@@ -0,0 +1 @@
+Improve comments in `StateGroupBackgroundUpdateStore`.
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
index 5b8ba436d4..6ff533a129 100644
--- a/synapse/storage/databases/state/bg_updates.py
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -94,6 +94,18 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
         groups: List[int],
         state_filter: Optional[StateFilter] = None,
     ) -> Mapping[int, StateMap[str]]:
+        """
+        Given a number of state groups, fetch the latest state for each group.
+
+        Args:
+            txn: The transaction object.
+            groups: The given state groups that you want to fetch the latest state for.
+            state_filter: The state filter to apply the state we fetch state from the database.
+
+        Returns:
+            Map from state_group to a StateMap at that point.
+        """
+
         state_filter = state_filter or StateFilter.all()
 
         results: Dict[int, MutableStateMap[str]] = {group: {} for group in groups}
@@ -206,8 +218,10 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore):
             if where_clause:
                 where_clause = " AND (%s)" % (where_clause,)
 
-            # We don't use WITH RECURSIVE on sqlite3 as there are distributions
-            # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
+            # XXX: We could `WITH RECURSIVE` here since it's supported on SQLite 3.8.3
+            # or higher and our minimum supported version is greater than that.
+            #
+            # We just haven't put in the time to refactor this.
             for group in groups:
                 next_group: Optional[int] = group
 
-- 
cgit 1.5.1


From 47ffc7e5482cc8d7bc376f362f8db5baddbcf4b3 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Tue, 26 Sep 2023 13:49:44 +0300
Subject: Reduce calls to `send_presence_to_destinations` (#16385)

---
 changelog.d/16385.misc       |  1 +
 synapse/handlers/presence.py | 33 ++++++++++++++++++---------------
 2 files changed, 19 insertions(+), 15 deletions(-)
 create mode 100644 changelog.d/16385.misc

(limited to 'synapse')

diff --git a/changelog.d/16385.misc b/changelog.d/16385.misc
new file mode 100644
index 0000000000..d439a931d6
--- /dev/null
+++ b/changelog.d/16385.misc
@@ -0,0 +1 @@
+Minor performance improvement when sending presence to federated servers.
diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index 375c7d0901..7c7cda3e95 100644
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@@ -401,9 +401,9 @@ class BasePresenceHandler(abc.ABC):
             states,
         )
 
-        for destination, host_states in hosts_to_states.items():
+        for destinations, host_states in hosts_to_states:
             await self._federation.send_presence_to_destinations(
-                host_states, [destination]
+                host_states, destinations
             )
 
     async def send_full_presence_to_users(self, user_ids: StrCollection) -> None:
@@ -1000,9 +1000,9 @@ class PresenceHandler(BasePresenceHandler):
                     list(to_federation_ping.values()),
                 )
 
-                for destination, states in hosts_to_states.items():
+                for destinations, states in hosts_to_states:
                     await self._federation_queue.send_presence_to_destinations(
-                        states, [destination]
+                        states, destinations
                     )
 
     @wrap_as_background_process("handle_presence_timeouts")
@@ -2276,7 +2276,7 @@ async def get_interested_remotes(
     store: DataStore,
     presence_router: PresenceRouter,
     states: List[UserPresenceState],
-) -> Dict[str, Set[UserPresenceState]]:
+) -> List[Tuple[StrCollection, Collection[UserPresenceState]]]:
     """Given a list of presence states figure out which remote servers
     should be sent which.
 
@@ -2290,23 +2290,26 @@ async def get_interested_remotes(
     Returns:
         A map from destinations to presence states to send to that destination.
     """
-    hosts_and_states: Dict[str, Set[UserPresenceState]] = {}
+    hosts_and_states: List[Tuple[StrCollection, Collection[UserPresenceState]]] = []
 
     # First we look up the rooms each user is in (as well as any explicit
     # subscriptions), then for each distinct room we look up the remote
     # hosts in those rooms.
-    room_ids_to_states, users_to_states = await get_interested_parties(
-        store, presence_router, states
-    )
+    for state in states:
+        room_ids = await store.get_rooms_for_user(state.user_id)
+        hosts: Set[str] = set()
+        for room_id in room_ids:
+            room_hosts = await store.get_current_hosts_in_room(room_id)
+            hosts.update(room_hosts)
+        hosts_and_states.append((hosts, [state]))
 
-    for room_id, states in room_ids_to_states.items():
-        hosts = await store.get_current_hosts_in_room(room_id)
-        for host in hosts:
-            hosts_and_states.setdefault(host, set()).update(states)
+    # Ask a presence routing module for any additional parties if one
+    # is loaded.
+    router_users_to_states = await presence_router.get_users_for_states(states)
 
-    for user_id, states in users_to_states.items():
+    for user_id, user_states in router_users_to_states.items():
         host = get_domain_from_id(user_id)
-        hosts_and_states.setdefault(host, set()).update(states)
+        hosts_and_states.append(([host], user_states))
 
     return hosts_and_states
 
-- 
cgit 1.5.1


From 17800a0e9779a1cfd7c9dff79ae331adf8f44f83 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 26 Sep 2023 11:52:19 -0400
Subject: Implement MSC4028: push all encrypted events. (#16361)

This unstable push rule is implemented behind an experimental
configuration flag.
---
 changelog.d/16361.feature                   |  1 +
 rust/benches/evaluator.rs                   |  1 +
 rust/src/push/base_rules.rs                 | 13 +++++++++++++
 rust/src/push/evaluator.rs                  |  2 +-
 rust/src/push/mod.rs                        |  9 +++++++++
 stubs/synapse/synapse_rust/push.pyi         |  1 +
 synapse/config/experimental.py              |  4 ++++
 synapse/storage/databases/main/push_rule.py |  1 +
 8 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16361.feature

(limited to 'synapse')

diff --git a/changelog.d/16361.feature b/changelog.d/16361.feature
new file mode 100644
index 0000000000..632fff789b
--- /dev/null
+++ b/changelog.d/16361.feature
@@ -0,0 +1 @@
+Experimental support for [MSC4028](https://github.com/matrix-org/matrix-spec-proposals/pull/4028) to push all encrypted events to clients.
diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs
index 14071105a0..6e1eab2a3b 100644
--- a/rust/benches/evaluator.rs
+++ b/rust/benches/evaluator.rs
@@ -197,6 +197,7 @@ fn bench_eval_message(b: &mut Bencher) {
         false,
         false,
         false,
+        false,
     );
 
     b.iter(|| eval.run(&rules, Some("bob"), Some("person")));
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 59fd27665a..cebc2c079b 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -63,6 +63,19 @@ pub const BASE_PREPEND_OVERRIDE_RULES: &[PushRule] = &[PushRule {
 }];
 
 pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[
+    PushRule {
+        rule_id: Cow::Borrowed("global/override/.org.matrix.msc4028.encrypted_event"),
+        priority_class: 5,
+        conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch(
+            EventMatchCondition {
+                key: Cow::Borrowed("type"),
+                pattern: Cow::Borrowed("m.room.encrypted"),
+            },
+        ))]),
+        actions: Cow::Borrowed(&[Action::Notify]),
+        default: true,
+        default_enabled: false,
+    },
     PushRule {
         rule_id: Cow::Borrowed("global/override/.m.rule.suppress_notices"),
         priority_class: 5,
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
index 5b9bf9b26a..48e670478b 100644
--- a/rust/src/push/evaluator.rs
+++ b/rust/src/push/evaluator.rs
@@ -564,7 +564,7 @@ fn test_requires_room_version_supports_condition() {
     };
     let rules = PushRules::new(vec![custom_rule]);
     result = evaluator.run(
-        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true),
+        &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false),
         None,
         None,
     );
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index 8e91f506cc..5e1e8e1abb 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -527,6 +527,7 @@ pub struct FilteredPushRules {
     msc1767_enabled: bool,
     msc3381_polls_enabled: bool,
     msc3664_enabled: bool,
+    msc4028_push_encrypted_events: bool,
 }
 
 #[pymethods]
@@ -538,6 +539,7 @@ impl FilteredPushRules {
         msc1767_enabled: bool,
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
+        msc4028_push_encrypted_events: bool,
     ) -> Self {
         Self {
             push_rules,
@@ -545,6 +547,7 @@ impl FilteredPushRules {
             msc1767_enabled,
             msc3381_polls_enabled,
             msc3664_enabled,
+            msc4028_push_encrypted_events,
         }
     }
 
@@ -581,6 +584,12 @@ impl FilteredPushRules {
                     return false;
                 }
 
+                if !self.msc4028_push_encrypted_events
+                    && rule.rule_id == "global/override/.org.matrix.msc4028.encrypted_event"
+                {
+                    return false;
+                }
+
                 true
             })
             .map(|r| {
diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi
index 1f432d4ecf..25259ce91d 100644
--- a/stubs/synapse/synapse_rust/push.pyi
+++ b/stubs/synapse/synapse_rust/push.pyi
@@ -46,6 +46,7 @@ class FilteredPushRules:
         msc1767_enabled: bool,
         msc3381_polls_enabled: bool,
         msc3664_enabled: bool,
+        msc4028_push_encrypted_events: bool,
     ): ...
     def rules(self) -> Collection[Tuple[PushRule, bool]]: ...
 
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index cabe0d4397..9f830e7094 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -415,3 +415,7 @@ class ExperimentalConfig(Config):
         LimitExceededError.include_retry_after_header = experimental.get(
             "msc4041_enabled", False
         )
+
+        self.msc4028_push_encrypted_events = experimental.get(
+            "msc4028_push_encrypted_events", False
+        )
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
index af69944008..923166974c 100644
--- a/synapse/storage/databases/main/push_rule.py
+++ b/synapse/storage/databases/main/push_rule.py
@@ -88,6 +88,7 @@ def _load_rules(
         msc1767_enabled=experimental_config.msc1767_enabled,
         msc3664_enabled=experimental_config.msc3664_enabled,
         msc3381_polls_enabled=experimental_config.msc3381_polls_enabled,
+        msc4028_push_encrypted_events=experimental_config.msc4028_push_encrypted_events,
     )
 
     return filtered_rules
-- 
cgit 1.5.1


From f84da3c32ec74cf054e2fd6d10618aa4997cffaa Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Tue, 26 Sep 2023 11:57:50 -0400
Subject: Add a cache around server ACL checking (#16360)

* Pre-compiles the server ACLs onto an object per room and
  invalidates them when new events come in.
* Converts the server ACL checking into Rust.
---
 changelog.d/16360.misc                     |   1 +
 rust/src/acl/mod.rs                        | 102 +++++++++++++++++++++++++++++
 rust/src/lib.rs                            |   2 +
 stubs/synapse/synapse_rust/acl.pyi         |  21 ++++++
 synapse/events/validator.py                |   7 +-
 synapse/federation/federation_server.py    |  76 ++-------------------
 synapse/handlers/federation_event.py       |   6 ++
 synapse/handlers/message.py                |   5 ++
 synapse/replication/tcp/client.py          |   6 ++
 synapse/storage/controllers/state.py       |  59 +++++++++++++++++
 tests/federation/test_federation_server.py |  35 ++++++----
 11 files changed, 235 insertions(+), 85 deletions(-)
 create mode 100644 changelog.d/16360.misc
 create mode 100644 rust/src/acl/mod.rs
 create mode 100644 stubs/synapse/synapse_rust/acl.pyi

(limited to 'synapse')

diff --git a/changelog.d/16360.misc b/changelog.d/16360.misc
new file mode 100644
index 0000000000..b32d7b521e
--- /dev/null
+++ b/changelog.d/16360.misc
@@ -0,0 +1 @@
+Cache server ACL checking.
diff --git a/rust/src/acl/mod.rs b/rust/src/acl/mod.rs
new file mode 100644
index 0000000000..071f2b7732
--- /dev/null
+++ b/rust/src/acl/mod.rs
@@ -0,0 +1,102 @@
+// Copyright 2023 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! An implementation of Matrix server ACL rules.
+
+use std::net::Ipv4Addr;
+use std::str::FromStr;
+
+use anyhow::Error;
+use pyo3::prelude::*;
+use regex::Regex;
+
+use crate::push::utils::{glob_to_regex, GlobMatchType};
+
+/// Called when registering modules with python.
+pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
+    let child_module = PyModule::new(py, "acl")?;
+    child_module.add_class::<ServerAclEvaluator>()?;
+
+    m.add_submodule(child_module)?;
+
+    // We need to manually add the module to sys.modules to make `from
+    // synapse.synapse_rust import acl` work.
+    py.import("sys")?
+        .getattr("modules")?
+        .set_item("synapse.synapse_rust.acl", child_module)?;
+
+    Ok(())
+}
+
+#[derive(Debug, Clone)]
+#[pyclass(frozen)]
+pub struct ServerAclEvaluator {
+    allow_ip_literals: bool,
+    allow: Vec<Regex>,
+    deny: Vec<Regex>,
+}
+
+#[pymethods]
+impl ServerAclEvaluator {
+    #[new]
+    pub fn py_new(
+        allow_ip_literals: bool,
+        allow: Vec<&str>,
+        deny: Vec<&str>,
+    ) -> Result<Self, Error> {
+        let allow = allow
+            .iter()
+            .map(|s| glob_to_regex(s, GlobMatchType::Whole))
+            .collect::<Result<_, _>>()?;
+        let deny = deny
+            .iter()
+            .map(|s| glob_to_regex(s, GlobMatchType::Whole))
+            .collect::<Result<_, _>>()?;
+
+        Ok(ServerAclEvaluator {
+            allow_ip_literals,
+            allow,
+            deny,
+        })
+    }
+
+    pub fn server_matches_acl_event(&self, server_name: &str) -> bool {
+        // first of all, check if literal IPs are blocked, and if so, whether the
+        // server name is a literal IP
+        if !self.allow_ip_literals {
+            // check for ipv6 literals. These start with '['.
+            if server_name.starts_with('[') {
+                return false;
+            }
+
+            // check for ipv4 literals. We can just lift the routine from std::net.
+            if Ipv4Addr::from_str(server_name).is_ok() {
+                return false;
+            }
+        }
+
+        // next, check the deny list
+        if self.deny.iter().any(|e| e.is_match(server_name)) {
+            return false;
+        }
+
+        // then the allow list.
+        if self.allow.iter().any(|e| e.is_match(server_name)) {
+            return true;
+        }
+
+        // everything else should be rejected.
+        false
+    }
+}
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index ce67f58611..c44c09bda7 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -2,6 +2,7 @@ use lazy_static::lazy_static;
 use pyo3::prelude::*;
 use pyo3_log::ResetHandle;
 
+pub mod acl;
 pub mod push;
 
 lazy_static! {
@@ -38,6 +39,7 @@ fn synapse_rust(py: Python<'_>, m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?;
     m.add_function(wrap_pyfunction!(reset_logging_config, m)?)?;
 
+    acl::register_module(py, m)?;
     push::register_module(py, m)?;
 
     Ok(())
diff --git a/stubs/synapse/synapse_rust/acl.pyi b/stubs/synapse/synapse_rust/acl.pyi
new file mode 100644
index 0000000000..e03989b627
--- /dev/null
+++ b/stubs/synapse/synapse_rust/acl.pyi
@@ -0,0 +1,21 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+class ServerAclEvaluator:
+    def __init__(
+        self, allow_ip_literals: bool, allow: List[str], deny: List[str]
+    ) -> None: ...
+    def server_matches_acl_event(self, server_name: str) -> bool: ...
diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index a637fadfab..83d9fb5813 100644
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
@@ -39,9 +39,9 @@ from synapse.events.utils import (
     CANONICALJSON_MIN_INT,
     validate_canonicaljson,
 )
-from synapse.federation.federation_server import server_matches_acl_event
 from synapse.http.servlet import validate_json_object
 from synapse.rest.models import RequestBodyModel
+from synapse.storage.controllers.state import server_acl_evaluator_from_event
 from synapse.types import EventID, JsonDict, RoomID, StrCollection, UserID
 
 
@@ -106,7 +106,10 @@ class EventValidator:
             self._validate_retention(event)
 
         elif event.type == EventTypes.ServerACL:
-            if not server_matches_acl_event(config.server.server_name, event):
+            server_acl_evaluator = server_acl_evaluator_from_event(event)
+            if not server_acl_evaluator.server_matches_acl_event(
+                config.server.server_name
+            ):
                 raise SynapseError(
                     400, "Can't create an ACL event that denies the local server"
                 )
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index f9915e5a3f..ec8e770430 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -29,10 +29,8 @@ from typing import (
     Union,
 )
 
-from matrix_common.regex import glob_to_regex
 from prometheus_client import Counter, Gauge, Histogram
 
-from twisted.internet.abstract import isIPAddress
 from twisted.python import failure
 
 from synapse.api.constants import (
@@ -1324,75 +1322,13 @@ class FederationServer(FederationBase):
         Raises:
             AuthError if the server does not match the ACL
         """
-        acl_event = await self._storage_controllers.state.get_current_state_event(
-            room_id, EventTypes.ServerACL, ""
+        server_acl_evaluator = (
+            await self._storage_controllers.state.get_server_acl_for_room(room_id)
         )
-        if not acl_event or server_matches_acl_event(server_name, acl_event):
-            return
-
-        raise AuthError(code=403, msg="Server is banned from room")
-
-
-def server_matches_acl_event(server_name: str, acl_event: EventBase) -> bool:
-    """Check if the given server is allowed by the ACL event
-
-    Args:
-        server_name: name of server, without any port part
-        acl_event: m.room.server_acl event
-
-    Returns:
-        True if this server is allowed by the ACLs
-    """
-    logger.debug("Checking %s against acl %s", server_name, acl_event.content)
-
-    # first of all, check if literal IPs are blocked, and if so, whether the
-    # server name is a literal IP
-    allow_ip_literals = acl_event.content.get("allow_ip_literals", True)
-    if not isinstance(allow_ip_literals, bool):
-        logger.warning("Ignoring non-bool allow_ip_literals flag")
-        allow_ip_literals = True
-    if not allow_ip_literals:
-        # check for ipv6 literals. These start with '['.
-        if server_name[0] == "[":
-            return False
-
-        # check for ipv4 literals. We can just lift the routine from twisted.
-        if isIPAddress(server_name):
-            return False
-
-    # next,  check the deny list
-    deny = acl_event.content.get("deny", [])
-    if not isinstance(deny, (list, tuple)):
-        logger.warning("Ignoring non-list deny ACL %s", deny)
-        deny = []
-    for e in deny:
-        if _acl_entry_matches(server_name, e):
-            # logger.info("%s matched deny rule %s", server_name, e)
-            return False
-
-    # then the allow list.
-    allow = acl_event.content.get("allow", [])
-    if not isinstance(allow, (list, tuple)):
-        logger.warning("Ignoring non-list allow ACL %s", allow)
-        allow = []
-    for e in allow:
-        if _acl_entry_matches(server_name, e):
-            # logger.info("%s matched allow rule %s", server_name, e)
-            return True
-
-    # everything else should be rejected.
-    # logger.info("%s fell through", server_name)
-    return False
-
-
-def _acl_entry_matches(server_name: str, acl_entry: Any) -> bool:
-    if not isinstance(acl_entry, str):
-        logger.warning(
-            "Ignoring non-str ACL entry '%s' (is %s)", acl_entry, type(acl_entry)
-        )
-        return False
-    regex = glob_to_regex(acl_entry)
-    return bool(regex.match(server_name))
+        if server_acl_evaluator and not server_acl_evaluator.server_matches_acl_event(
+            server_name
+        ):
+            raise AuthError(code=403, msg="Server is banned from room")
 
 
 class FederationHandlerRegistry:
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 7c62cdfaef..0cc8e990d9 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -2342,6 +2342,12 @@ class FederationEventHandler:
             # TODO retrieve the previous state, and exclude join -> join transitions
             self._notifier.notify_user_joined_room(event.event_id, event.room_id)
 
+        # If this is a server ACL event, clear the cache in the storage controller.
+        if event.type == EventTypes.ServerACL:
+            self._state_storage_controller.get_server_acl_for_room.invalidate(
+                (event.room_id,)
+            )
+
     def _sanity_check_event(self, ev: EventBase) -> None:
         """
         Do some early sanity checks of a received event
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index c036578a3d..44dbbf81dd 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -1730,6 +1730,11 @@ class EventCreationHandler:
                         event.event_id, event.room_id
                     )
 
+            if event.type == EventTypes.ServerACL:
+                self._storage_controllers.state.get_server_acl_for_room.invalidate(
+                    (event.room_id,)
+                )
+
             await self._maybe_kick_guest_users(event, context)
 
             if event.type == EventTypes.CanonicalAlias:
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index ca8a76f77c..1c7946522a 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -205,6 +205,12 @@ class ReplicationDataHandler:
                     self.notifier.notify_user_joined_room(
                         row.data.event_id, row.data.room_id
                     )
+
+                # If this is a server ACL event, clear the cache in the storage controller.
+                if row.data.type == EventTypes.ServerACL:
+                    self._state_storage_controller.get_server_acl_for_room.invalidate(
+                        (row.data.room_id,)
+                    )
         elif stream_name == UnPartialStatedRoomStream.NAME:
             for row in rows:
                 assert isinstance(row, UnPartialStatedRoomStreamRow)
diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py
index 10d219c045..46957723a1 100644
--- a/synapse/storage/controllers/state.py
+++ b/synapse/storage/controllers/state.py
@@ -37,6 +37,7 @@ from synapse.storage.util.partial_state_events_tracker import (
     PartialCurrentStateTracker,
     PartialStateEventsTracker,
 )
+from synapse.synapse_rust.acl import ServerAclEvaluator
 from synapse.types import MutableStateMap, StateMap, get_domain_from_id
 from synapse.types.state import StateFilter
 from synapse.util.async_helpers import Linearizer
@@ -501,6 +502,31 @@ class StateStorageController:
 
         return event.content.get("alias")
 
+    @cached()
+    async def get_server_acl_for_room(
+        self, room_id: str
+    ) -> Optional[ServerAclEvaluator]:
+        """Get the server ACL evaluator for room, if any
+
+        This does up-front parsing of the content to ignore bad data and pre-compile
+        regular expressions.
+
+        Args:
+            room_id: The room ID
+
+        Returns:
+            The server ACL evaluator, if any
+        """
+
+        acl_event = await self.get_current_state_event(
+            room_id, EventTypes.ServerACL, ""
+        )
+
+        if not acl_event:
+            return None
+
+        return server_acl_evaluator_from_event(acl_event)
+
     @trace
     @tag_args
     async def get_current_state_deltas(
@@ -760,3 +786,36 @@ class StateStorageController:
                 cache.state_group = object()
 
         return frozenset(cache.hosts_to_joined_users)
+
+
+def server_acl_evaluator_from_event(acl_event: EventBase) -> "ServerAclEvaluator":
+    """
+    Create a ServerAclEvaluator from a m.room.server_acl event's content.
+
+    This does up-front parsing of the content to ignore bad data. It then creates
+    the ServerAclEvaluator which will pre-compile regular expressions from the globs.
+    """
+
+    # first of all, parse if literal IPs are blocked.
+    allow_ip_literals = acl_event.content.get("allow_ip_literals", True)
+    if not isinstance(allow_ip_literals, bool):
+        logger.warning("Ignoring non-bool allow_ip_literals flag")
+        allow_ip_literals = True
+
+    # next, parse the deny list by ignoring any non-strings.
+    deny = acl_event.content.get("deny", [])
+    if not isinstance(deny, (list, tuple)):
+        logger.warning("Ignoring non-list deny ACL %s", deny)
+        deny = []
+    else:
+        deny = [s for s in deny if isinstance(s, str)]
+
+    # then the allow list.
+    allow = acl_event.content.get("allow", [])
+    if not isinstance(allow, (list, tuple)):
+        logger.warning("Ignoring non-list allow ACL %s", allow)
+        allow = []
+    else:
+        allow = [s for s in allow if isinstance(s, str)]
+
+    return ServerAclEvaluator(allow_ip_literals, allow, deny)
diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py
index 5c850d1843..1831a5b47a 100644
--- a/tests/federation/test_federation_server.py
+++ b/tests/federation/test_federation_server.py
@@ -22,10 +22,10 @@ from twisted.test.proto_helpers import MemoryReactor
 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.config.server import DEFAULT_ROOM_VERSION
 from synapse.events import EventBase, make_event_from_dict
-from synapse.federation.federation_server import server_matches_acl_event
 from synapse.rest import admin
 from synapse.rest.client import login, room
 from synapse.server import HomeServer
+from synapse.storage.controllers.state import server_acl_evaluator_from_event
 from synapse.types import JsonDict
 from synapse.util import Clock
 
@@ -67,37 +67,46 @@ class ServerACLsTestCase(unittest.TestCase):
         e = _create_acl_event({"allow": ["*"], "deny": ["evil.com"]})
         logging.info("ACL event: %s", e.content)
 
-        self.assertFalse(server_matches_acl_event("evil.com", e))
-        self.assertFalse(server_matches_acl_event("EVIL.COM", e))
+        server_acl_evalutor = server_acl_evaluator_from_event(e)
 
-        self.assertTrue(server_matches_acl_event("evil.com.au", e))
-        self.assertTrue(server_matches_acl_event("honestly.not.evil.com", e))
+        self.assertFalse(server_acl_evalutor.server_matches_acl_event("evil.com"))
+        self.assertFalse(server_acl_evalutor.server_matches_acl_event("EVIL.COM"))
+
+        self.assertTrue(server_acl_evalutor.server_matches_acl_event("evil.com.au"))
+        self.assertTrue(
+            server_acl_evalutor.server_matches_acl_event("honestly.not.evil.com")
+        )
 
     def test_block_ip_literals(self) -> None:
         e = _create_acl_event({"allow_ip_literals": False, "allow": ["*"]})
         logging.info("ACL event: %s", e.content)
 
-        self.assertFalse(server_matches_acl_event("1.2.3.4", e))
-        self.assertTrue(server_matches_acl_event("1a.2.3.4", e))
-        self.assertFalse(server_matches_acl_event("[1:2::]", e))
-        self.assertTrue(server_matches_acl_event("1:2:3:4", e))
+        server_acl_evalutor = server_acl_evaluator_from_event(e)
+
+        self.assertFalse(server_acl_evalutor.server_matches_acl_event("1.2.3.4"))
+        self.assertTrue(server_acl_evalutor.server_matches_acl_event("1a.2.3.4"))
+        self.assertFalse(server_acl_evalutor.server_matches_acl_event("[1:2::]"))
+        self.assertTrue(server_acl_evalutor.server_matches_acl_event("1:2:3:4"))
 
     def test_wildcard_matching(self) -> None:
         e = _create_acl_event({"allow": ["good*.com"]})
+
+        server_acl_evalutor = server_acl_evaluator_from_event(e)
+
         self.assertTrue(
-            server_matches_acl_event("good.com", e),
+            server_acl_evalutor.server_matches_acl_event("good.com"),
             "* matches 0 characters",
         )
         self.assertTrue(
-            server_matches_acl_event("GOOD.COM", e),
+            server_acl_evalutor.server_matches_acl_event("GOOD.COM"),
             "pattern is case-insensitive",
         )
         self.assertTrue(
-            server_matches_acl_event("good.aa.com", e),
+            server_acl_evalutor.server_matches_acl_event("good.aa.com"),
             "* matches several characters, including '.'",
         )
         self.assertFalse(
-            server_matches_acl_event("ishgood.com", e),
+            server_acl_evalutor.server_matches_acl_event("ishgood.com"),
             "pattern does not allow prefixes",
         )
 
-- 
cgit 1.5.1


From cdb89dcefe9f4d7035f898cd77cd514fa69c2673 Mon Sep 17 00:00:00 2001
From: Patrick Cloke <clokep@users.noreply.github.com>
Date: Thu, 28 Sep 2023 07:01:46 -0400
Subject: Improve state types. (#16395)

---
 changelog.d/16395.misc |  1 +
 synapse/state/v2.py    |  5 ++---
 tests/state/test_v2.py | 13 ++++++++-----
 3 files changed, 11 insertions(+), 8 deletions(-)
 create mode 100644 changelog.d/16395.misc

(limited to 'synapse')

diff --git a/changelog.d/16395.misc b/changelog.d/16395.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/16395.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 1752f95db8..b2e63aed1e 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -23,7 +23,6 @@ from typing import (
     Generator,
     Iterable,
     List,
-    Mapping,
     Optional,
     Sequence,
     Set,
@@ -269,7 +268,7 @@ async def _get_power_level_for_sender(
 
 async def _get_auth_chain_difference(
     room_id: str,
-    state_sets: Sequence[Mapping[Any, str]],
+    state_sets: Sequence[StateMap[str]],
     unpersisted_events: Dict[str, EventBase],
     state_res_store: StateResolutionStore,
 ) -> Set[str]:
@@ -405,7 +404,7 @@ def _seperate(
 
     # mypy doesn't understand that discarding None above means that conflicted
     # state is StateMap[Set[str]], not StateMap[Set[Optional[Str]]].
-    return unconflicted_state, conflicted_state  # type: ignore
+    return unconflicted_state, conflicted_state  # type: ignore[return-value]
 
 
 def _is_power_event(event: EventBase) -> bool:
diff --git a/tests/state/test_v2.py b/tests/state/test_v2.py
index 2e3f2318d9..6a2f7584f6 100644
--- a/tests/state/test_v2.py
+++ b/tests/state/test_v2.py
@@ -719,7 +719,10 @@ class AuthChainDifferenceTestCase(unittest.TestCase):
         persisted_events = {a.event_id: a, b.event_id: b}
         unpersited_events = {c.event_id: c}
 
-        state_sets = [{"a": a.event_id, "b": b.event_id}, {"c": c.event_id}]
+        state_sets = [
+            {("a", ""): a.event_id, ("b", ""): b.event_id},
+            {("c", ""): c.event_id},
+        ]
 
         store = TestStateResolutionStore(persisted_events)
 
@@ -774,8 +777,8 @@ class AuthChainDifferenceTestCase(unittest.TestCase):
         unpersited_events = {c.event_id: c, d.event_id: d}
 
         state_sets = [
-            {"a": a.event_id, "b": b.event_id},
-            {"c": c.event_id, "d": d.event_id},
+            {("a", ""): a.event_id, ("b", ""): b.event_id},
+            {("c", ""): c.event_id, ("d", ""): d.event_id},
         ]
 
         store = TestStateResolutionStore(persisted_events)
@@ -841,8 +844,8 @@ class AuthChainDifferenceTestCase(unittest.TestCase):
         unpersited_events = {c.event_id: c, d.event_id: d, e.event_id: e}
 
         state_sets = [
-            {"a": a.event_id, "b": b.event_id, "e": e.event_id},
-            {"c": c.event_id, "d": d.event_id},
+            {("a", ""): a.event_id, ("b", ""): b.event_id, ("e", ""): e.event_id},
+            {("c", ""): c.event_id, ("d", ""): d.event_id},
         ]
 
         store = TestStateResolutionStore(persisted_events)
-- 
cgit 1.5.1


From 79eb6c0cdc15ccb5083368c923653862a4d2d23a Mon Sep 17 00:00:00 2001
From: Will Hunt <will@half-shot.uk>
Date: Fri, 29 Sep 2023 12:19:38 +0100
Subject: Support rendering some media downloads as inline (#15988)

Use an `inline` Content-Disposition header when the media is
"safe" to display inline (some known text, image, video, audio
formats).
---
 changelog.d/15988.feature         |  1 +
 synapse/media/_base.py            | 42 +++++++++++++++++++++++++++++++++++++--
 tests/media/test_base.py          | 29 ++++++++++++++++++++++++++-
 tests/media/test_media_storage.py | 40 ++++++++++++++++++++++++++++++++++---
 4 files changed, 106 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/15988.feature

(limited to 'synapse')

diff --git a/changelog.d/15988.feature b/changelog.d/15988.feature
new file mode 100644
index 0000000000..dee8fa597f
--- /dev/null
+++ b/changelog.d/15988.feature
@@ -0,0 +1 @@
+Render plain, CSS, CSV, JSON and common image formats media content in the browser (inline) when requested through the /download endpoint.
\ No newline at end of file
diff --git a/synapse/media/_base.py b/synapse/media/_base.py
index 20cb8b9010..80c448de2b 100644
--- a/synapse/media/_base.py
+++ b/synapse/media/_base.py
@@ -50,6 +50,39 @@ TEXT_CONTENT_TYPES = [
     "text/xml",
 ]
 
+# A list of all content types that are "safe" to be rendered inline in a browser.
+INLINE_CONTENT_TYPES = [
+    "text/css",
+    "text/plain",
+    "text/csv",
+    "application/json",
+    "application/ld+json",
+    # We allow some media files deemed as safe, which comes from the matrix-react-sdk.
+    # https://github.com/matrix-org/matrix-react-sdk/blob/a70fcfd0bcf7f8c85986da18001ea11597989a7c/src/utils/blobs.ts#L51
+    # SVGs are *intentionally* omitted.
+    "image/jpeg",
+    "image/gif",
+    "image/png",
+    "image/apng",
+    "image/webp",
+    "image/avif",
+    "video/mp4",
+    "video/webm",
+    "video/ogg",
+    "video/quicktime",
+    "audio/mp4",
+    "audio/webm",
+    "audio/aac",
+    "audio/mpeg",
+    "audio/ogg",
+    "audio/wave",
+    "audio/wav",
+    "audio/x-wav",
+    "audio/x-pn-wav",
+    "audio/flac",
+    "audio/x-flac",
+]
+
 
 def parse_media_id(request: Request) -> Tuple[str, str, Optional[str]]:
     """Parses the server name, media ID and optional file name from the request URI
@@ -153,8 +186,13 @@ def add_file_headers(
 
     request.setHeader(b"Content-Type", content_type.encode("UTF-8"))
 
-    # Use a Content-Disposition of attachment to force download of media.
-    disposition = "attachment"
+    # A strict subset of content types is allowed to be inlined  so that they may
+    # be viewed directly in a browser. Other file types are forced to be downloads.
+    if media_type.lower() in INLINE_CONTENT_TYPES:
+        disposition = "inline"
+    else:
+        disposition = "attachment"
+
     if upload_name:
         # RFC6266 section 4.1 [1] defines both `filename` and `filename*`.
         #
diff --git a/tests/media/test_base.py b/tests/media/test_base.py
index 4728c80969..119d7ba66f 100644
--- a/tests/media/test_base.py
+++ b/tests/media/test_base.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.media._base import get_filename_from_headers
+from unittest.mock import Mock
+
+from synapse.media._base import add_file_headers, get_filename_from_headers
 
 from tests import unittest
 
@@ -36,3 +38,28 @@ class GetFileNameFromHeadersTests(unittest.TestCase):
                 expected,
                 f"expected output for {hdr!r} to be {expected} but was {res}",
             )
+
+
+class AddFileHeadersTests(unittest.TestCase):
+    TEST_CASES = {
+        "text/plain": b"inline; filename=file.name",
+        "text/csv": b"inline; filename=file.name",
+        "image/png": b"inline; filename=file.name",
+        "text/html": b"attachment; filename=file.name",
+        "any/thing": b"attachment; filename=file.name",
+    }
+
+    def test_content_disposition(self) -> None:
+        for media_type, expected in self.TEST_CASES.items():
+            request = Mock()
+            add_file_headers(request, media_type, 0, "file.name")
+            request.setHeader.assert_any_call(b"Content-Disposition", expected)
+
+    def test_no_filename(self) -> None:
+        request = Mock()
+        add_file_headers(request, "text/plain", 0, None)
+        request.setHeader.assert_any_call(b"Content-Disposition", b"inline")
+
+        request.reset_mock()
+        add_file_headers(request, "text/html", 0, None)
+        request.setHeader.assert_any_call(b"Content-Disposition", b"attachment")
diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py
index ea0051dde4..04fc7bdcef 100644
--- a/tests/media/test_media_storage.py
+++ b/tests/media/test_media_storage.py
@@ -129,6 +129,8 @@ class _TestImage:
             a 404/400 is expected.
         unable_to_thumbnail: True if we expect the thumbnailing to fail (400), or
             False if the thumbnailing should succeed or a normal 404 is expected.
+        is_inline: True if we expect the file to be served using an inline
+            Content-Disposition or False if we expect an attachment.
     """
 
     data: bytes
@@ -138,6 +140,7 @@ class _TestImage:
     expected_scaled: Optional[bytes] = None
     expected_found: bool = True
     unable_to_thumbnail: bool = False
+    is_inline: bool = True
 
 
 @parameterized_class(
@@ -198,6 +201,25 @@ class _TestImage:
                 unable_to_thumbnail=True,
             ),
         ),
+        # An SVG.
+        (
+            _TestImage(
+                b"""<?xml version="1.0"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+
+<svg xmlns="http://www.w3.org/2000/svg"
+     width="400" height="400">
+  <circle cx="100" cy="100" r="50" stroke="black"
+    stroke-width="5" fill="red" />
+</svg>""",
+                b"image/svg",
+                b".svg",
+                expected_found=False,
+                unable_to_thumbnail=True,
+                is_inline=False,
+            ),
+        ),
     ],
 )
 class MediaRepoTests(unittest.HomeserverTestCase):
@@ -339,7 +361,11 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         )
         self.assertEqual(
             headers.getRawHeaders(b"Content-Disposition"),
-            [b"attachment; filename=out" + self.test_image.extension],
+            [
+                (b"inline" if self.test_image.is_inline else b"attachment")
+                + b"; filename=out"
+                + self.test_image.extension
+            ],
         )
 
     def test_disposition_filenamestar_utf8escaped(self) -> None:
@@ -359,7 +385,12 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         )
         self.assertEqual(
             headers.getRawHeaders(b"Content-Disposition"),
-            [b"attachment; filename*=utf-8''" + filename + self.test_image.extension],
+            [
+                (b"inline" if self.test_image.is_inline else b"attachment")
+                + b"; filename*=utf-8''"
+                + filename
+                + self.test_image.extension
+            ],
         )
 
     def test_disposition_none(self) -> None:
@@ -373,7 +404,10 @@ class MediaRepoTests(unittest.HomeserverTestCase):
         self.assertEqual(
             headers.getRawHeaders(b"Content-Type"), [self.test_image.content_type]
         )
-        self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), [b"attachment"])
+        self.assertEqual(
+            headers.getRawHeaders(b"Content-Disposition"),
+            [b"inline" if self.test_image.is_inline else b"attachment"],
+        )
 
     def test_thumbnail_crop(self) -> None:
         """Test that a cropped remote thumbnail is available."""
-- 
cgit 1.5.1


From 20fb08ec803c324a58e0f972935a27debaac133f Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@matrix.org>
Date: Fri, 29 Sep 2023 14:52:48 +0300
Subject: Downgrade repl stream time out error to warning (#16401)

This is because if a worker reaches ~100% CPU then everything starts
lagging and we hit the log line a lot. When at error we invoke sentry
and that has a lot of overhead, which then puts even more pressure on
the worker.
---
 changelog.d/16401.misc            | 1 +
 synapse/replication/tcp/client.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/16401.misc

(limited to 'synapse')

diff --git a/changelog.d/16401.misc b/changelog.d/16401.misc
new file mode 100644
index 0000000000..86d2749a08
--- /dev/null
+++ b/changelog.d/16401.misc
@@ -0,0 +1 @@
+Downgrade replication stream time out error log lines to warning.
diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py
index 1c7946522a..f4f2b29e96 100644
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@@ -339,7 +339,7 @@ class ReplicationDataHandler:
             try:
                 await make_deferred_yieldable(deferred)
             except defer.TimeoutError:
-                logger.error(
+                logger.warning(
                     "Timed out waiting for repl stream %r to reach %s (%s)"
                     "; currently at: %s",
                     stream_name,
-- 
cgit 1.5.1